]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-2050.7.9.tar.gz mac-os-x-108 v2050.7.9
authorApple <opensource@apple.com>
Fri, 24 Aug 2012 20:25:02 +0000 (20:25 +0000)
committerApple <opensource@apple.com>
Fri, 24 Aug 2012 20:25:02 +0000 (20:25 +0000)
1192 files changed:
.lldbinit [new file with mode: 0644]
EXTERNAL_HEADERS/Availability.h
EXTERNAL_HEADERS/AvailabilityInternal.h
EXTERNAL_HEADERS/AvailabilityMacros.h
EXTERNAL_HEADERS/Makefile
EXTERNAL_HEADERS/corecrypto/cc.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/cc_config.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/cc_priv.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccaes.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccder.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccdes.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccdigest.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/cchmac.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccmd5.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccmode.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccmode_factory.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccmode_impl.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccn.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccpad.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccrc4.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccrng.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccrng_system.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccsha1.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/ccsha2.h [new file with mode: 0644]
EXTERNAL_HEADERS/mach-o/Makefile
EXTERNAL_HEADERS/mach-o/kld.h [deleted file]
EXTERNAL_HEADERS/mach-o/loader.h
EXTERNAL_HEADERS/mach-o/nlist.h
EXTERNAL_HEADERS/mach-o/reloc.h
EXTERNAL_HEADERS/mach-o/x86_64/reloc.h
EXTERNAL_HEADERS/stdint.h
Makefile
README
SETUP/Makefile
SETUP/config/config.h
SETUP/config/doconf
SETUP/config/mkioconf.c
SETUP/config/mkmakefile.c
SETUP/decomment/Makefile [new file with mode: 0644]
SETUP/decomment/decomment.c [new file with mode: 0644]
SETUP/kextsymboltool/Makefile
SETUP/md/Makefile [new file with mode: 0644]
SETUP/md/md.1 [new file with mode: 0644]
SETUP/md/md.c [new file with mode: 0644]
bsd/bsm/audit.h
bsd/bsm/audit_errno.h
bsd/bsm/audit_kevents.h
bsd/conf/MASTER
bsd/conf/MASTER.i386
bsd/conf/MASTER.x86_64
bsd/conf/Makefile
bsd/conf/Makefile.i386
bsd/conf/Makefile.template
bsd/conf/Makefile.x86_64
bsd/conf/files
bsd/conf/files.i386
bsd/conf/files.x86_64
bsd/crypto/Makefile
bsd/crypto/aes.h [new file with mode: 0644]
bsd/crypto/aes/Assert.c [deleted file]
bsd/crypto/aes/Makefile [deleted file]
bsd/crypto/aes/aes.h [deleted file]
bsd/crypto/aes/gen/Makefile [deleted file]
bsd/crypto/aes/gen/aescrypt.c [deleted file]
bsd/crypto/aes/gen/aeskey.c [deleted file]
bsd/crypto/aes/gen/aesopt.h [deleted file]
bsd/crypto/aes/gen/aestab.c [deleted file]
bsd/crypto/aes/gen/aestab.h [deleted file]
bsd/crypto/aes/i386/AES.s [deleted file]
bsd/crypto/aes/i386/Context.h [deleted file]
bsd/crypto/aes/i386/Data.mk [deleted file]
bsd/crypto/aes/i386/Data.s [deleted file]
bsd/crypto/aes/i386/EncryptDecrypt.s [deleted file]
bsd/crypto/aes/i386/ExpandKeyForDecryption.s [deleted file]
bsd/crypto/aes/i386/ExpandKeyForEncryption.s [deleted file]
bsd/crypto/aes/i386/MakeData.c [deleted file]
bsd/crypto/aes/i386/Makefile [deleted file]
bsd/crypto/aes/i386/ReadMe.txt [deleted file]
bsd/crypto/aes/i386/aes_crypt_hw.s [deleted file]
bsd/crypto/aes/i386/aes_key_hw.s [deleted file]
bsd/crypto/aes/i386/aes_modes_asm.s [deleted file]
bsd/crypto/aes/i386/aes_modes_hw.s [deleted file]
bsd/crypto/aes/i386/aesxts.c [deleted file]
bsd/crypto/aes/i386/aesxts.h [deleted file]
bsd/crypto/aes/i386/aesxts_asm.s [deleted file]
bsd/crypto/aesxts.h [new file with mode: 0644]
bsd/crypto/des.h [new file with mode: 0644]
bsd/crypto/des/Makefile [deleted file]
bsd/crypto/des/des.h [deleted file]
bsd/crypto/des/des_ecb.c [deleted file]
bsd/crypto/des/des_enc.c [deleted file]
bsd/crypto/des/des_locl.h [deleted file]
bsd/crypto/des/des_setkey.c [deleted file]
bsd/crypto/des/podd.h [deleted file]
bsd/crypto/des/sk.h [deleted file]
bsd/crypto/des/spr.h [deleted file]
bsd/crypto/sha2.h [new file with mode: 0644]
bsd/crypto/sha2/Makefile [deleted file]
bsd/crypto/sha2/intel/sha256.s [deleted file]
bsd/crypto/sha2/intel/sha256nossse3.s [deleted file]
bsd/crypto/sha2/sha2.c [deleted file]
bsd/crypto/sha2/sha2.h [deleted file]
bsd/dev/dtrace/dtrace.c
bsd/dev/dtrace/dtrace_glue.c
bsd/dev/dtrace/dtrace_ptss.c
bsd/dev/dtrace/dtrace_subr.c
bsd/dev/dtrace/lockstat.c
bsd/dev/dtrace/profile_prvd.c
bsd/dev/dtrace/sdt.c
bsd/dev/dtrace/systrace.c
bsd/dev/dtrace/systrace.h
bsd/dev/i386/conf.c
bsd/dev/i386/dtrace_isa.c
bsd/dev/i386/fbt_x86.c
bsd/dev/i386/kern_machdep.c
bsd/dev/i386/mem.c
bsd/dev/i386/sysctl.c
bsd/dev/i386/systemcalls.c
bsd/dev/i386/unix_signal.c
bsd/dev/memdev.c
bsd/dev/random/randomdev.c
bsd/dev/unix_startup.c
bsd/hfs/hfs.h
bsd/hfs/hfs_attrlist.c
bsd/hfs/hfs_attrlist.h
bsd/hfs/hfs_btreeio.c
bsd/hfs/hfs_catalog.c
bsd/hfs/hfs_catalog.h
bsd/hfs/hfs_chash.c
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_cnode.h
bsd/hfs/hfs_cprotect.c
bsd/hfs/hfs_format.h
bsd/hfs/hfs_fsctl.h
bsd/hfs/hfs_hotfiles.c
bsd/hfs/hfs_kdebug.h
bsd/hfs/hfs_link.c
bsd/hfs/hfs_lookup.c
bsd/hfs/hfs_notification.c
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfs_search.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vfsutils.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfs_xattr.c
bsd/hfs/hfscommon/Catalog/CatalogUtilities.c
bsd/hfs/hfscommon/Catalog/FileIDsServices.c
bsd/hfs/hfscommon/Misc/BTreeWrapper.c
bsd/hfs/hfscommon/Misc/FileExtentMapping.c
bsd/hfs/hfscommon/Misc/VolumeAllocation.c
bsd/hfs/hfscommon/headers/FileMgrInternal.h
bsd/kern/bsd_init.c
bsd/kern/bsd_stubs.c
bsd/kern/decmpfs.c
bsd/kern/imageboot.c
bsd/kern/kdebug.c
bsd/kern/kern_authorization.c
bsd/kern/kern_callout.c [deleted file]
bsd/kern/kern_control.c
bsd/kern/kern_core.c
bsd/kern/kern_credential.c
bsd/kern/kern_descrip.c
bsd/kern/kern_event.c
bsd/kern/kern_exec.c
bsd/kern/kern_exit.c
bsd/kern/kern_fork.c
bsd/kern/kern_lockf.c
bsd/kern/kern_malloc.c
bsd/kern/kern_memorystatus.c
bsd/kern/kern_mib.c
bsd/kern/kern_mman.c
bsd/kern/kern_proc.c
bsd/kern/kern_resource.c
bsd/kern/kern_sig.c
bsd/kern/kern_subr.c
bsd/kern/kern_symfile.c
bsd/kern/kern_synch.c
bsd/kern/kern_sysctl.c
bsd/kern/kpi_mbuf.c
bsd/kern/kpi_socket.c
bsd/kern/kpi_socketfilter.c
bsd/kern/mach_fat.c
bsd/kern/mach_loader.c
bsd/kern/mach_loader.h
bsd/kern/mach_process.c
bsd/kern/makesyscalls.sh
bsd/kern/mcache.c
bsd/kern/netboot.c
bsd/kern/policy_check.c
bsd/kern/posix_shm.c
bsd/kern/proc_info.c
bsd/kern/process_policy.c
bsd/kern/pthread_support.c
bsd/kern/pthread_synch.c
bsd/kern/socket_info.c
bsd/kern/subr_prf.c
bsd/kern/subr_prof.c
bsd/kern/sys_generic.c
bsd/kern/sys_pipe.c
bsd/kern/sys_socket.c
bsd/kern/syscalls.master
bsd/kern/sysv_sem.c
bsd/kern/trace.codes
bsd/kern/tty.c
bsd/kern/tty_tty.c
bsd/kern/ubc_subr.c
bsd/kern/uipc_domain.c
bsd/kern/uipc_mbuf.c
bsd/kern/uipc_mbuf2.c
bsd/kern/uipc_socket.c
bsd/kern/uipc_socket2.c
bsd/kern/uipc_syscalls.c
bsd/kern/uipc_usrreq.c
bsd/kern/vm_pressure.c
bsd/kern/vm_pressure.h
bsd/libkern/libkern.h
bsd/machine/exec.h
bsd/machine/setjmp.h
bsd/man/man2/getattrlist.2
bsd/man/man2/getaudit.2
bsd/man/man2/getaudit_addr.2
bsd/man/man2/getgroups.2
bsd/man/man2/getrusage.2
bsd/man/man2/getsockopt.2
bsd/man/man2/searchfs.2
bsd/man/man2/setaudit.2
bsd/man/man2/setaudit_addr.2
bsd/man/man2/setxattr.2
bsd/man/man2/statfs.2
bsd/man/man3/posix_spawnattr_setspecialport_np.3
bsd/man/man4/Makefile
bsd/man/man4/inet6.4
bsd/man/man4/ip6.4
bsd/miscfs/specfs/spec_vnops.c
bsd/miscfs/specfs/specdev.h
bsd/net/Makefile
bsd/net/altq/Makefile [new file with mode: 0644]
bsd/net/altq/altq.h [new file with mode: 0644]
bsd/net/altq/altq_cbq.c [new file with mode: 0644]
bsd/net/altq/altq_cbq.h [new file with mode: 0644]
bsd/net/altq/altq_fairq.c [new file with mode: 0644]
bsd/net/altq/altq_fairq.h [new file with mode: 0644]
bsd/net/altq/altq_hfsc.c [new file with mode: 0644]
bsd/net/altq/altq_hfsc.h [new file with mode: 0644]
bsd/net/altq/altq_priq.c [new file with mode: 0644]
bsd/net/altq/altq_priq.h [new file with mode: 0644]
bsd/net/altq/altq_qfq.c [new file with mode: 0644]
bsd/net/altq/altq_qfq.h [new file with mode: 0644]
bsd/net/altq/altq_subr.c [new file with mode: 0644]
bsd/net/altq/altq_var.h [new file with mode: 0644]
bsd/net/altq/if_altq.h [new file with mode: 0644]
bsd/net/bpf.c
bsd/net/bpf.h
bsd/net/bpf_filter.c
bsd/net/bpfdesc.h
bsd/net/bridgestp.c
bsd/net/bridgestp.h
bsd/net/classq/Makefile [new file with mode: 0644]
bsd/net/classq/classq.c [new file with mode: 0644]
bsd/net/classq/classq.h [new file with mode: 0644]
bsd/net/classq/classq_blue.c [new file with mode: 0644]
bsd/net/classq/classq_blue.h [new file with mode: 0644]
bsd/net/classq/classq_red.c [new file with mode: 0644]
bsd/net/classq/classq_red.h [new file with mode: 0644]
bsd/net/classq/classq_rio.c [new file with mode: 0644]
bsd/net/classq/classq_rio.h [new file with mode: 0644]
bsd/net/classq/classq_sfb.c [new file with mode: 0644]
bsd/net/classq/classq_sfb.h [new file with mode: 0644]
bsd/net/classq/classq_subr.c [new file with mode: 0644]
bsd/net/classq/classq_util.c [new file with mode: 0644]
bsd/net/classq/if_classq.h [new file with mode: 0644]
bsd/net/dlil.c
bsd/net/dlil.h
bsd/net/dlil_pvt.h [deleted file]
bsd/net/ether_at_pr_module.c
bsd/net/ether_if_module.c
bsd/net/ether_inet6_pr_module.c
bsd/net/ether_inet_pr_module.c
bsd/net/flowadv.h [new file with mode: 0644]
bsd/net/flowhash.c [new file with mode: 0644]
bsd/net/flowhash.h [new file with mode: 0644]
bsd/net/if.c
bsd/net/if.h
bsd/net/if_bond.c
bsd/net/if_bond_internal.h [new file with mode: 0644]
bsd/net/if_bond_var.h
bsd/net/if_bridge.c
bsd/net/if_bridgevar.h
bsd/net/if_dl.h
bsd/net/if_ether.h
bsd/net/if_gif.c
bsd/net/if_llreach.c
bsd/net/if_llreach.h
bsd/net/if_loop.c
bsd/net/if_media.h
bsd/net/if_mib.c
bsd/net/if_mib.h
bsd/net/if_pflog.c
bsd/net/if_stf.c
bsd/net/if_utun.c
bsd/net/if_utun.h
bsd/net/if_utun_crypto.c [new file with mode: 0644]
bsd/net/if_utun_crypto.h [new file with mode: 0644]
bsd/net/if_utun_crypto_ipsec.c [new file with mode: 0644]
bsd/net/if_utun_crypto_ipsec.h [new file with mode: 0644]
bsd/net/if_var.h
bsd/net/if_vlan.c
bsd/net/iptap.c [new file with mode: 0644]
bsd/net/iptap.h [new file with mode: 0644]
bsd/net/kpi_interface.c
bsd/net/kpi_interface.h
bsd/net/kpi_protocol.c
bsd/net/lacp.h
bsd/net/ndrv.c
bsd/net/ndrv.h
bsd/net/ndrv_var.h
bsd/net/net_str_id.c
bsd/net/netsrc.c
bsd/net/netsrc.h
bsd/net/ntstat.c
bsd/net/ntstat.h
bsd/net/pf.c
bsd/net/pf_if.c
bsd/net/pf_ioctl.c
bsd/net/pf_mtag.h [deleted file]
bsd/net/pf_norm.c
bsd/net/pf_osfp.c
bsd/net/pf_ruleset.c
bsd/net/pf_table.c
bsd/net/pfkeyv2.h
bsd/net/pfvar.h
bsd/net/pktsched/Makefile [new file with mode: 0644]
bsd/net/pktsched/pktsched.c [new file with mode: 0644]
bsd/net/pktsched/pktsched.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_cbq.c [new file with mode: 0644]
bsd/net/pktsched/pktsched_cbq.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_fairq.c [new file with mode: 0644]
bsd/net/pktsched/pktsched_fairq.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_hfsc.c [new file with mode: 0644]
bsd/net/pktsched/pktsched_hfsc.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_priq.c [new file with mode: 0644]
bsd/net/pktsched/pktsched_priq.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_qfq.c [new file with mode: 0644]
bsd/net/pktsched/pktsched_qfq.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_rmclass.c [new file with mode: 0644]
bsd/net/pktsched/pktsched_rmclass.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_rmclass_debug.h [new file with mode: 0644]
bsd/net/pktsched/pktsched_tcq.c [new file with mode: 0644]
bsd/net/pktsched/pktsched_tcq.h [new file with mode: 0644]
bsd/net/radix.c
bsd/net/raw_usrreq.c
bsd/net/route.c
bsd/net/route.h
bsd/net/rtsock.c
bsd/netat/drv_dep.c
bsd/netinet/Makefile
bsd/netinet/icmp6.h
bsd/netinet/if_ether.h
bsd/netinet/igmp.c
bsd/netinet/igmp_var.h
bsd/netinet/in.c
bsd/netinet/in.h
bsd/netinet/in_arp.c
bsd/netinet/in_arp.h
bsd/netinet/in_cksum.c
bsd/netinet/in_dhcp.c
bsd/netinet/in_gif.c
bsd/netinet/in_mcast.c
bsd/netinet/in_pcb.c
bsd/netinet/in_pcb.h
bsd/netinet/in_pcblist.c
bsd/netinet/in_proto.c
bsd/netinet/in_rmx.c
bsd/netinet/in_tclass.c
bsd/netinet/in_var.h
bsd/netinet/ip_divert.c
bsd/netinet/ip_dummynet.c
bsd/netinet/ip_dummynet.h
bsd/netinet/ip_encap.c
bsd/netinet/ip_flowid.h [new file with mode: 0644]
bsd/netinet/ip_fw2.c
bsd/netinet/ip_fw2.h
bsd/netinet/ip_fw2_compat.c
bsd/netinet/ip_icmp.c
bsd/netinet/ip_input.c
bsd/netinet/ip_output.c
bsd/netinet/ip_var.h
bsd/netinet/kpi_ipfilter.c
bsd/netinet/kpi_ipfilter.h
bsd/netinet/lro_ext.h [new file with mode: 0644]
bsd/netinet/raw_ip.c
bsd/netinet/tcp.h
bsd/netinet/tcp_cc.h
bsd/netinet/tcp_input.c
bsd/netinet/tcp_ledbat.c
bsd/netinet/tcp_lro.c [new file with mode: 0644]
bsd/netinet/tcp_lro.h [new file with mode: 0644]
bsd/netinet/tcp_newreno.c
bsd/netinet/tcp_output.c
bsd/netinet/tcp_seq.h
bsd/netinet/tcp_subr.c
bsd/netinet/tcp_timer.c
bsd/netinet/tcp_timer.h
bsd/netinet/tcp_usrreq.c
bsd/netinet/tcp_var.h
bsd/netinet/udp_usrreq.c
bsd/netinet/udp_var.h
bsd/netinet6/Makefile
bsd/netinet6/ah_core.c
bsd/netinet6/ah_input.c
bsd/netinet6/ah_output.c
bsd/netinet6/esp_core.c
bsd/netinet6/esp_input.c
bsd/netinet6/esp_output.c
bsd/netinet6/esp_rijndael.c
bsd/netinet6/frag6.c
bsd/netinet6/icmp6.c
bsd/netinet6/in6.c
bsd/netinet6/in6.h
bsd/netinet6/in6_cksum.c
bsd/netinet6/in6_gif.c
bsd/netinet6/in6_ifattach.c
bsd/netinet6/in6_mcast.c
bsd/netinet6/in6_pcb.c
bsd/netinet6/in6_pcb.h
bsd/netinet6/in6_proto.c
bsd/netinet6/in6_rmx.c
bsd/netinet6/in6_src.c
bsd/netinet6/in6_var.h
bsd/netinet6/ip6_forward.c
bsd/netinet6/ip6_fw.c
bsd/netinet6/ip6_input.c
bsd/netinet6/ip6_mroute.c
bsd/netinet6/ip6_output.c
bsd/netinet6/ip6_var.h
bsd/netinet6/ipcomp_input.c
bsd/netinet6/ipsec.c
bsd/netinet6/ipsec.h
bsd/netinet6/mld6.c
bsd/netinet6/mld6_var.h
bsd/netinet6/nd6.c
bsd/netinet6/nd6.h
bsd/netinet6/nd6_nbr.c
bsd/netinet6/nd6_prproxy.c [new file with mode: 0644]
bsd/netinet6/nd6_rtr.c
bsd/netinet6/raw_ip6.c
bsd/netinet6/route6.c
bsd/netinet6/scope6.c
bsd/netinet6/scope6_var.h
bsd/netinet6/udp6_output.c
bsd/netinet6/udp6_usrreq.c
bsd/netkey/key.c
bsd/netkey/key.h
bsd/netkey/keydb.h
bsd/nfs/nfs.h
bsd/nfs/nfs4_subs.c
bsd/nfs/nfs4_vnops.c
bsd/nfs/nfs_bio.c
bsd/nfs/nfs_gss.c
bsd/nfs/nfs_gss.h
bsd/nfs/nfs_gss_crypto.c
bsd/nfs/nfs_gss_crypto.h
bsd/nfs/nfs_lock.c
bsd/nfs/nfs_node.c
bsd/nfs/nfs_serv.c
bsd/nfs/nfs_socket.c
bsd/nfs/nfs_srvcache.c
bsd/nfs/nfs_subs.c
bsd/nfs/nfs_syscalls.c
bsd/nfs/nfs_upcall.c [new file with mode: 0644]
bsd/nfs/nfs_vfsops.c
bsd/nfs/nfs_vnops.c
bsd/nfs/nfsmount.h
bsd/nfs/nfsnode.h
bsd/security/audit/audit_bsd.h
bsd/security/audit/audit_bsm_errno.c
bsd/security/audit/audit_pipe.c
bsd/security/audit/audit_session.c
bsd/security/audit/audit_syscalls.c
bsd/security/audit/audit_worker.c
bsd/sys/Makefile
bsd/sys/attr.h
bsd/sys/bsdtask_info.h
bsd/sys/buf.h
bsd/sys/buf_internal.h
bsd/sys/cdefs.h
bsd/sys/codesign.h
bsd/sys/conf.h
bsd/sys/cprotect.h
bsd/sys/decmpfs.h
bsd/sys/disk.h
bsd/sys/domain.h
bsd/sys/dtrace.h
bsd/sys/dtrace_impl.h
bsd/sys/errno.h
bsd/sys/event.h
bsd/sys/fcntl.h
bsd/sys/file.h
bsd/sys/file_internal.h
bsd/sys/imgact.h
bsd/sys/kas_info.h [new file with mode: 0644]
bsd/sys/kauth.h
bsd/sys/kdebug.h
bsd/sys/kern_callout.h [deleted file]
bsd/sys/kern_memorystatus.h
bsd/sys/kpi_mbuf.h
bsd/sys/kpi_socketfilter.h
bsd/sys/lockf.h
bsd/sys/lockstat.h
bsd/sys/make_posix_availability.sh
bsd/sys/make_symbol_aliasing.sh
bsd/sys/malloc.h
bsd/sys/mbuf.h
bsd/sys/mcache.h
bsd/sys/mount.h
bsd/sys/mount_internal.h
bsd/sys/munge.h [new file with mode: 0644]
bsd/sys/namei.h
bsd/sys/pipe.h
bsd/sys/priv.h
bsd/sys/proc.h
bsd/sys/proc_info.h
bsd/sys/proc_internal.h
bsd/sys/process_policy.h
bsd/sys/pthread_internal.h
bsd/sys/reboot.h
bsd/sys/resource.h
bsd/sys/sem.h
bsd/sys/sem_internal.h
bsd/sys/signal.h
bsd/sys/signalvar.h
bsd/sys/socket.h
bsd/sys/socketvar.h
bsd/sys/sockio.h
bsd/sys/spawn.h
bsd/sys/spawn_internal.h
bsd/sys/sysctl.h
bsd/sys/sysent.h
bsd/sys/syslimits.h
bsd/sys/syslog.h
bsd/sys/systm.h
bsd/sys/tty.h
bsd/sys/ubc.h
bsd/sys/un.h
bsd/sys/unpcb.h
bsd/sys/user.h
bsd/sys/vnode.h
bsd/sys/vnode_internal.h
bsd/sys/xattr.h
bsd/uxkern/ux_exception.c
bsd/vfs/kpi_vfs.c
bsd/vfs/vfs_attrlist.c
bsd/vfs/vfs_bio.c
bsd/vfs/vfs_cache.c
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_conf.c
bsd/vfs/vfs_fsevents.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_journal.h
bsd/vfs/vfs_lookup.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
bsd/vfs/vfs_vnops.c
bsd/vfs/vfs_xattr.c
bsd/vm/dp_backing_file.c
bsd/vm/vm_unix.c
bsd/vm/vnode_pager.c
config/BSDKernel.exports
config/IOKit.exports
config/IOKit.i386.exports
config/IOKit.x86_64.exports
config/Libkern.exports
config/Libkern.i386.exports
config/Libkern.x86_64.exports
config/MACFramework.exports
config/MACFramework.i386.exports
config/MACFramework.x86_64.exports
config/Makefile
config/MasterVersion
config/Private.exports
config/Private.i386.exports
config/Private.x86_64.exports
config/System6.0.exports
config/System6.0.i386.exports
config/System6.0.x86_64.exports
config/Unsupported.exports
config/Unsupported.i386.exports
config/Unsupported.x86_64.exports
config/newvers.pl
iokit/IOKit/IOCatalogue.h
iokit/IOKit/IODeviceTreeSupport.h
iokit/IOKit/IOHibernatePrivate.h
iokit/IOKit/IOKitServer.h
iokit/IOKit/IOLib.h
iokit/IOKit/IOMemoryDescriptor.h
iokit/IOKit/IOService.h
iokit/IOKit/IOTypes.h
iokit/IOKit/Makefile
iokit/IOKit/i386/Makefile
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/IOPMPrivate.h
iokit/IOKit/pwr_mgt/Makefile
iokit/IOKit/pwr_mgt/RootDomain.h
iokit/IOKit/x86_64/Makefile [new file with mode: 0644]
iokit/Kernel/IOBufferMemoryDescriptor.cpp
iokit/Kernel/IOCPU.cpp
iokit/Kernel/IOCatalogue.cpp
iokit/Kernel/IOCommandGate.cpp
iokit/Kernel/IODMACommand.cpp
iokit/Kernel/IODataQueue.cpp
iokit/Kernel/IODeviceTreeSupport.cpp
iokit/Kernel/IOFilterInterruptEventSource.cpp
iokit/Kernel/IOHibernateIO.cpp
iokit/Kernel/IOHibernateInternal.h
iokit/Kernel/IOHibernateRestoreKernel.c
iokit/Kernel/IOInterruptEventSource.cpp
iokit/Kernel/IOKitDebug.cpp
iokit/Kernel/IOKitKernelInternal.h
iokit/Kernel/IOLib.cpp
iokit/Kernel/IOMemoryDescriptor.cpp
iokit/Kernel/IONVRAM.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOPlatformExpert.cpp
iokit/Kernel/IORegistryEntry.cpp
iokit/Kernel/IOService.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/IOServicePMPrivate.h
iokit/Kernel/IOServicePrivate.h
iokit/Kernel/IOStatistics.cpp
iokit/Kernel/IOSubMemoryDescriptor.cpp
iokit/Kernel/IOUserClient.cpp
iokit/Kernel/IOWorkLoop.cpp
iokit/Kernel/RootDomainUserClient.cpp
iokit/bsddev/IOKitBSDInit.cpp
iokit/conf/MASTER
iokit/conf/MASTER.i386
iokit/conf/MASTER.x86_64
iokit/conf/Makefile
iokit/conf/Makefile.i386
iokit/conf/Makefile.x86_64
iokit/conf/files
iokit/conf/files.i386
iokit/conf/files.x86_64
kgmacros
libkern/Makefile
libkern/OSKextLib.cpp
libkern/c++/OSData.cpp
libkern/c++/OSDictionary.cpp
libkern/c++/OSKext.cpp
libkern/c++/OSMetaClass.cpp
libkern/c++/OSRuntime.cpp
libkern/c++/OSSet.cpp
libkern/c++/OSSymbol.cpp
libkern/c++/Tests/TestSerialization/test1/test1.xcodeproj/project.pbxproj
libkern/conf/MASTER
libkern/conf/MASTER.i386
libkern/conf/MASTER.x86_64
libkern/conf/Makefile
libkern/conf/Makefile.i386
libkern/conf/Makefile.template
libkern/conf/Makefile.x86_64
libkern/conf/files
libkern/conf/files.i386
libkern/conf/files.x86_64
libkern/crypto/corecrypto_aes.c [new file with mode: 0644]
libkern/crypto/corecrypto_aesxts.c [new file with mode: 0644]
libkern/crypto/corecrypto_des.c [new file with mode: 0644]
libkern/crypto/corecrypto_md5.c [new file with mode: 0644]
libkern/crypto/corecrypto_sha1.c [new file with mode: 0644]
libkern/crypto/corecrypto_sha2.c [new file with mode: 0644]
libkern/crypto/intel/sha1edp.s
libkern/crypto/md5.c [deleted file]
libkern/crypto/register_crypto.c [new file with mode: 0644]
libkern/crypto/sha1.c [deleted file]
libkern/gen/OSAtomicOperations.c
libkern/gen/OSDebug.cpp
libkern/kernel_mach_header.c
libkern/kmod/Makefile [deleted file]
libkern/kmod/Makefile.kmod [deleted file]
libkern/kmod/libkmod.xcodeproj/project.pbxproj [new file with mode: 0644]
libkern/kmod/libkmodtest/libkmodtest-Info.plist [new file with mode: 0644]
libkern/kmod/libkmodtest/libkmodtest.cpp [new file with mode: 0644]
libkern/kmod/libkmodtest/libkmodtest.h [new file with mode: 0644]
libkern/kxld/Makefile
libkern/kxld/WKdmCompress.c
libkern/kxld/WKdmDecompress.c
libkern/kxld/kxld.c
libkern/kxld/kxld_kext.c
libkern/kxld/kxld_kext.h
libkern/kxld/kxld_object.c
libkern/kxld/kxld_object.h
libkern/kxld/kxld_reloc.c
libkern/kxld/kxld_reloc.h
libkern/kxld/kxld_sect.c
libkern/kxld/kxld_sect.h
libkern/kxld/kxld_seg.c
libkern/kxld/kxld_seg.h
libkern/kxld/kxld_srcversion.c [new file with mode: 0644]
libkern/kxld/kxld_srcversion.h [new file with mode: 0644]
libkern/kxld/kxld_sym.c
libkern/kxld/kxld_sym.h
libkern/kxld/kxld_symtab.c
libkern/kxld/kxld_symtab.h
libkern/kxld/kxld_util.c
libkern/kxld/kxld_util.h
libkern/kxld/kxld_uuid.c
libkern/kxld/kxld_versionmin.c [new file with mode: 0644]
libkern/kxld/kxld_versionmin.h [new file with mode: 0644]
libkern/kxld/kxld_vtable.c
libkern/libkern/Makefile
libkern/libkern/OSAtomic.h
libkern/libkern/OSKextLib.h
libkern/libkern/OSKextLibPrivate.h
libkern/libkern/OSTypes.h
libkern/libkern/WKdm.h
libkern/libkern/c++/Makefile
libkern/libkern/c++/OSCollection.h
libkern/libkern/c++/OSData.h
libkern/libkern/c++/OSKext.h
libkern/libkern/c++/OSMetaClass.h
libkern/libkern/c++/OSObject.h
libkern/libkern/c++/OSSymbol.h
libkern/libkern/crypto/Makefile
libkern/libkern/crypto/aes.h [new file with mode: 0644]
libkern/libkern/crypto/aesxts.h [new file with mode: 0644]
libkern/libkern/crypto/crypto_internal.h [new file with mode: 0644]
libkern/libkern/crypto/des.h [new file with mode: 0644]
libkern/libkern/crypto/register_crypto.h [new file with mode: 0644]
libkern/libkern/crypto/sha2.h [new file with mode: 0644]
libkern/libkern/kernel_mach_header.h
libkern/libkern/kext_request_keys.h
libkern/libkern/kxld_types.h
libkern/libkern/machine/Makefile
libkern/libkern/stack_protector.h [new file with mode: 0644]
libkern/libkern/tree.h
libkern/stack_protector.c
libkern/uuid/Makefile
libkern/uuid/uuid.c
libkern/x86_64/OSAtomic.s
libkern/zlib/zutil.h
libsa/bootstrap.cpp
libsa/conf/MASTER
libsa/conf/MASTER.i386
libsa/conf/MASTER.x86_64
libsa/conf/Makefile
libsa/libsa/Makefile
libsyscall/Libsyscall.xcconfig
libsyscall/Libsyscall.xcodeproj/project.pbxproj
libsyscall/Platforms/MacOSX/i386/syscall.map
libsyscall/Platforms/MacOSX/x86_64/syscall.map
libsyscall/custom/SYS.h
libsyscall/custom/__getpid.s
libsyscall/custom/__gettimeofday.s
libsyscall/custom/__pipe.s
libsyscall/custom/__psynch_cvbroad.s [deleted file]
libsyscall/custom/__psynch_cvwait.s [deleted file]
libsyscall/custom/__ptrace.s
libsyscall/custom/custom.s
libsyscall/mach/mach/mach_interface.h
libsyscall/mach/mach_msg.c
libsyscall/mach/mach_port.c [new file with mode: 0644]
libsyscall/mach/mach_vm.c [new file with mode: 0644]
libsyscall/mach/string.h
libsyscall/mach/vm_map.defs
libsyscall/wrappers/__get_cpu_capabilities.s
libsyscall/wrappers/cancelable/fcntl-base.c
libsyscall/wrappers/legacy/getaudit.c [new file with mode: 0644]
libsyscall/wrappers/memcpy.c
libsyscall/wrappers/open_dprotected_np.c [new file with mode: 0644]
libsyscall/xcodescripts/create-syscalls.pl
libsyscall/xcodescripts/mach_install_mig.sh
lldbmacros.py [new file with mode: 0644]
makedefs/MakeInc.cmd
makedefs/MakeInc.def
makedefs/MakeInc.dir
makedefs/MakeInc.rule
osfmk/Makefile
osfmk/chud/chud_glue.c
osfmk/chud/chud_thread.c
osfmk/chud/chud_xnu.h
osfmk/chud/i386/chud_osfmk_callback_i386.c
osfmk/chud/i386/chud_thread_i386.c
osfmk/conf/MASTER
osfmk/conf/MASTER.i386
osfmk/conf/MASTER.x86_64
osfmk/conf/Makefile
osfmk/conf/Makefile.i386
osfmk/conf/Makefile.x86_64
osfmk/conf/files
osfmk/conf/files.i386
osfmk/conf/files.x86_64
osfmk/console/i386/serial_console.c
osfmk/console/serial_general.c
osfmk/console/video_console.c
osfmk/console/video_console.h
osfmk/ddb/Makefile [deleted file]
osfmk/ddb/db_access.c [deleted file]
osfmk/ddb/db_access.h [deleted file]
osfmk/ddb/db_aout.c [deleted file]
osfmk/ddb/db_aout.h [deleted file]
osfmk/ddb/db_break.c [deleted file]
osfmk/ddb/db_break.h [deleted file]
osfmk/ddb/db_coff.h [deleted file]
osfmk/ddb/db_command.c [deleted file]
osfmk/ddb/db_command.h [deleted file]
osfmk/ddb/db_cond.c [deleted file]
osfmk/ddb/db_cond.h [deleted file]
osfmk/ddb/db_examine.c [deleted file]
osfmk/ddb/db_examine.h [deleted file]
osfmk/ddb/db_expr.c [deleted file]
osfmk/ddb/db_expr.h [deleted file]
osfmk/ddb/db_ext_symtab.c [deleted file]
osfmk/ddb/db_input.c [deleted file]
osfmk/ddb/db_input.h [deleted file]
osfmk/ddb/db_lex.c [deleted file]
osfmk/ddb/db_lex.h [deleted file]
osfmk/ddb/db_macro.c [deleted file]
osfmk/ddb/db_macro.h [deleted file]
osfmk/ddb/db_output.c [deleted file]
osfmk/ddb/db_output.h [deleted file]
osfmk/ddb/db_print.c [deleted file]
osfmk/ddb/db_print.h [deleted file]
osfmk/ddb/db_run.c [deleted file]
osfmk/ddb/db_run.h [deleted file]
osfmk/ddb/db_sym.c [deleted file]
osfmk/ddb/db_sym.h [deleted file]
osfmk/ddb/db_task_thread.c [deleted file]
osfmk/ddb/db_task_thread.h [deleted file]
osfmk/ddb/db_trap.c [deleted file]
osfmk/ddb/db_trap.h [deleted file]
osfmk/ddb/db_variables.c [deleted file]
osfmk/ddb/db_variables.h [deleted file]
osfmk/ddb/db_watch.c [deleted file]
osfmk/ddb/db_watch.h [deleted file]
osfmk/ddb/db_write_cmd.c [deleted file]
osfmk/ddb/db_write_cmd.h [deleted file]
osfmk/ddb/makedis.c [deleted file]
osfmk/ddb/nlist.h [deleted file]
osfmk/ddb/orig/db_print.c [deleted file]
osfmk/ddb/stab.h [deleted file]
osfmk/default_pager/default_pager.c
osfmk/default_pager/default_pager_internal.h
osfmk/default_pager/dp_backing_store.c
osfmk/default_pager/dp_memory_object.c
osfmk/device/device.defs
osfmk/device/device_init.c
osfmk/device/iokit_rpc.c
osfmk/device/subrs.c
osfmk/gssd/gssd_mach.defs
osfmk/gssd/gssd_mach_types.h
osfmk/i386/AT386/model_dep.c
osfmk/i386/Diagnostics.c
osfmk/i386/Diagnostics.h
osfmk/i386/asm.h
osfmk/i386/asm64.h
osfmk/i386/bsd_i386.c
osfmk/i386/commpage/commpage.c
osfmk/i386/commpage/fifo_queues.s
osfmk/i386/commpage/pthreads.s
osfmk/i386/cpu_capabilities.h
osfmk/i386/cpu_data.h
osfmk/i386/cpu_threads.c
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/db_disasm.c [deleted file]
osfmk/i386/db_gcc_aout.c [deleted file]
osfmk/i386/db_interface.c [deleted file]
osfmk/i386/db_machdep.h [deleted file]
osfmk/i386/db_trace.c [deleted file]
osfmk/i386/etimer.c
osfmk/i386/fpu.c
osfmk/i386/gdt.c
osfmk/i386/genassym.c
osfmk/i386/hibernate_restore.c
osfmk/i386/hpet.c
osfmk/i386/i386_init.c
osfmk/i386/i386_lock.s
osfmk/i386/i386_lowmem.h
osfmk/i386/i386_vm_init.c
osfmk/i386/idle_pt.c
osfmk/i386/idt.s
osfmk/i386/idt64.s
osfmk/i386/ktss.c
osfmk/i386/lapic.h
osfmk/i386/lapic_native.c
osfmk/i386/locks.h
osfmk/i386/locks_i386.c
osfmk/i386/locore.s
osfmk/i386/loose_ends.c
osfmk/i386/lowmem_vectors.s
osfmk/i386/machdep_call.c
osfmk/i386/machdep_call.h
osfmk/i386/machine_check.c
osfmk/i386/machine_routines.c
osfmk/i386/machine_routines.h
osfmk/i386/machine_routines_asm.s
osfmk/i386/machine_task.c
osfmk/i386/misc_protos.h
osfmk/i386/mp.c
osfmk/i386/mp.h
osfmk/i386/mp_desc.c
osfmk/i386/mp_desc.h
osfmk/i386/mp_native.c
osfmk/i386/mtrr.c
osfmk/i386/pal_hibernate.h
osfmk/i386/pal_routines.c
osfmk/i386/pcb.c
osfmk/i386/phys.c
osfmk/i386/pmCPU.c
osfmk/i386/pmap.c
osfmk/i386/pmap.h
osfmk/i386/pmap_common.c
osfmk/i386/pmap_internal.h
osfmk/i386/pmap_x86_common.c
osfmk/i386/postcode.h
osfmk/i386/proc_reg.h
osfmk/i386/rtclock.c
osfmk/i386/rtclock_native.c
osfmk/i386/seg.h
osfmk/i386/start.s
osfmk/i386/start64.s
osfmk/i386/startup64.c
osfmk/i386/trap.c
osfmk/i386/trap_native.c
osfmk/i386/tsc.c
osfmk/i386/vmx/vmx_asm.h
osfmk/i386/vmx/vmx_cpu.c
osfmk/i386/vmx/vmx_cpu.h
osfmk/ipc/ipc_entry.c
osfmk/ipc/ipc_entry.h
osfmk/ipc/ipc_hash.c
osfmk/ipc/ipc_hash.h
osfmk/ipc/ipc_init.c
osfmk/ipc/ipc_init.h
osfmk/ipc/ipc_kmsg.c
osfmk/ipc/ipc_kmsg.h
osfmk/ipc/ipc_labelh.c
osfmk/ipc/ipc_labelh.h
osfmk/ipc/ipc_mqueue.c
osfmk/ipc/ipc_mqueue.h
osfmk/ipc/ipc_object.c
osfmk/ipc/ipc_object.h
osfmk/ipc/ipc_port.c
osfmk/ipc/ipc_port.h
osfmk/ipc/ipc_print.h [deleted file]
osfmk/ipc/ipc_pset.c
osfmk/ipc/ipc_pset.h
osfmk/ipc/ipc_right.c
osfmk/ipc/ipc_right.h
osfmk/ipc/ipc_space.c
osfmk/ipc/ipc_space.h
osfmk/ipc/ipc_splay.c [deleted file]
osfmk/ipc/ipc_splay.h [deleted file]
osfmk/ipc/ipc_table.c
osfmk/ipc/ipc_table.h
osfmk/ipc/ipc_types.h
osfmk/ipc/mach_debug.c
osfmk/ipc/mach_kernelrpc.c [new file with mode: 0644]
osfmk/ipc/mach_msg.c
osfmk/ipc/mach_port.c
osfmk/kdp/Makefile
osfmk/kdp/kdp.c
osfmk/kdp/kdp_dyld.h
osfmk/kdp/kdp_udp.c
osfmk/kdp/ml/i386/kdp_machdep.c
osfmk/kdp/ml/i386/kdp_vm.c
osfmk/kdp/ml/i386/kdp_x86_common.c
osfmk/kdp/ml/i386/kdp_x86_common.h [new file with mode: 0644]
osfmk/kdp/ml/x86_64/kdp_machdep.c
osfmk/kdp/ml/x86_64/kdp_vm.c
osfmk/kern/Makefile
osfmk/kern/affinity.c
osfmk/kern/ast.c
osfmk/kern/ast.h
osfmk/kern/audit_sessionport.c
osfmk/kern/bsd_kern.c
osfmk/kern/clock.c
osfmk/kern/clock.h
osfmk/kern/debug.c
osfmk/kern/debug.h
osfmk/kern/exception.c
osfmk/kern/exception.h
osfmk/kern/gzalloc.c [new file with mode: 0644]
osfmk/kern/host.c
osfmk/kern/ipc_kobject.c
osfmk/kern/ipc_mig.c
osfmk/kern/ipc_mig.h
osfmk/kern/ipc_misc.c
osfmk/kern/ipc_tt.c
osfmk/kern/ipc_tt.h
osfmk/kern/kalloc.c
osfmk/kern/kalloc.h
osfmk/kern/kern_print.h [deleted file]
osfmk/kern/kext_alloc.c
osfmk/kern/ledger.c
osfmk/kern/ledger.h
osfmk/kern/locks.c
osfmk/kern/mach_clock.c [deleted file]
osfmk/kern/mach_param.h
osfmk/kern/misc_protos.h
osfmk/kern/mk_sp.c
osfmk/kern/mk_timer.c
osfmk/kern/mk_timer.h
osfmk/kern/printf.c
osfmk/kern/priority.c
osfmk/kern/processor.c
osfmk/kern/queue.h
osfmk/kern/sched.h
osfmk/kern/sched_average.c
osfmk/kern/sched_fixedpriority.c
osfmk/kern/sched_grrr.c
osfmk/kern/sched_prim.c
osfmk/kern/sched_prim.h
osfmk/kern/security.c
osfmk/kern/stack.c
osfmk/kern/startup.c
osfmk/kern/sync_lock.c
osfmk/kern/sync_sema.c
osfmk/kern/syscall_subr.c
osfmk/kern/syscall_sw.c
osfmk/kern/syscall_sw.h
osfmk/kern/task.c
osfmk/kern/task.h
osfmk/kern/task_policy.c
osfmk/kern/thread.c
osfmk/kern/thread.h
osfmk/kern/thread_act.c
osfmk/kern/thread_call.c
osfmk/kern/thread_call.h
osfmk/kern/thread_policy.c
osfmk/kern/timer.c
osfmk/kern/timer.h
osfmk/kern/timer_call.c
osfmk/kern/wait_queue.c
osfmk/kern/wait_queue.h
osfmk/kern/xpr.c
osfmk/kern/zalloc.c
osfmk/kern/zalloc.h
osfmk/kperf/Makefile [new file with mode: 0644]
osfmk/kperf/action.c [new file with mode: 0644]
osfmk/kperf/action.h [new file with mode: 0644]
osfmk/kperf/ast.h [new file with mode: 0644]
osfmk/kperf/buffer.h [new file with mode: 0644]
osfmk/kperf/callstack.c [new file with mode: 0644]
osfmk/kperf/callstack.h [new file with mode: 0644]
osfmk/kperf/context.h [new file with mode: 0644]
osfmk/kperf/filter.c [new file with mode: 0644]
osfmk/kperf/filter.h [new file with mode: 0644]
osfmk/kperf/kperf.c [new file with mode: 0644]
osfmk/kperf/kperf.h [new file with mode: 0644]
osfmk/kperf/kperf_arch.h [new file with mode: 0644]
osfmk/kperf/kperfbsd.c [new file with mode: 0644]
osfmk/kperf/kperfbsd.h [new file with mode: 0644]
osfmk/kperf/pet.c [new file with mode: 0644]
osfmk/kperf/pet.h [new file with mode: 0644]
osfmk/kperf/sample.h [new file with mode: 0644]
osfmk/kperf/threadinfo.c [new file with mode: 0644]
osfmk/kperf/threadinfo.h [new file with mode: 0644]
osfmk/kperf/timetrigger.c [new file with mode: 0644]
osfmk/kperf/timetrigger.h [new file with mode: 0644]
osfmk/kperf/x86_64/kperf_arch.h [new file with mode: 0644]
osfmk/kperf/x86_64/kperf_mp.c [new file with mode: 0644]
osfmk/mach/Makefile
osfmk/mach/Makefile.template
osfmk/mach/exception_types.h
osfmk/mach/host_priv.defs
osfmk/mach/host_special_ports.h
osfmk/mach/i386/exception.h
osfmk/mach/i386/machine_types.defs
osfmk/mach/i386/sdt_isa.h
osfmk/mach/i386/vm_param.h
osfmk/mach/i386/vm_types.h
osfmk/mach/kmod.h
osfmk/mach/ledger.defs
osfmk/mach/mach_host.defs
osfmk/mach/mach_interface.h
osfmk/mach/mach_port.defs
osfmk/mach/mach_traps.h
osfmk/mach/mach_types.defs
osfmk/mach/mach_types.h
osfmk/mach/mach_vm.defs
osfmk/mach/machine.h
osfmk/mach/memory_object_types.h
osfmk/mach/message.h
osfmk/mach/mig.h
osfmk/mach/ndr.h
osfmk/mach/shared_region.h
osfmk/mach/syscall_sw.h
osfmk/mach/task_info.h
osfmk/mach/task_special_ports.h
osfmk/mach/vm_map.defs
osfmk/mach/vm_param.h
osfmk/mach/vm_statistics.h
osfmk/machine/Makefile
osfmk/machine/commpage.h
osfmk/machine/db_machdep.h [deleted file]
osfmk/machine/machine_cpuid.h [new file with mode: 0644]
osfmk/pmc/pmc.c
osfmk/pmc/pmc.h
osfmk/profiling/Makefile
osfmk/vm/bsd_vm.c
osfmk/vm/cpm.h
osfmk/vm/default_freezer.c
osfmk/vm/default_freezer.h
osfmk/vm/memory_object.c
osfmk/vm/pmap.h
osfmk/vm/vm_apple_protect.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_fault.h
osfmk/vm/vm_init.c
osfmk/vm/vm_kern.c
osfmk/vm/vm_kern.h
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/vm/vm_map_store.c
osfmk/vm/vm_object.c
osfmk/vm/vm_object.h
osfmk/vm/vm_page.h
osfmk/vm/vm_pageout.c
osfmk/vm/vm_pageout.h
osfmk/vm/vm_print.h [deleted file]
osfmk/vm/vm_protos.h
osfmk/vm/vm_purgeable.c
osfmk/vm/vm_purgeable_internal.h
osfmk/vm/vm_resident.c
osfmk/vm/vm_shared_region.c
osfmk/vm/vm_shared_region.h
osfmk/vm/vm_swapfile_pager.c
osfmk/vm/vm_user.c
osfmk/x86_64/boot_pt.c [new file with mode: 0644]
osfmk/x86_64/idt64.s
osfmk/x86_64/idt_table.h
osfmk/x86_64/locore.s
osfmk/x86_64/loose_ends.c
osfmk/x86_64/lowglobals.h
osfmk/x86_64/lowmem_vectors.c [new file with mode: 0644]
osfmk/x86_64/lowmem_vectors.s [deleted file]
osfmk/x86_64/machine_routines_asm.s
osfmk/x86_64/pmap.c
osfmk/x86_64/start.s
pexpert/conf/MASTER
pexpert/conf/MASTER.x86_64
pexpert/conf/Makefile
pexpert/conf/files
pexpert/gen/bootargs.c
pexpert/gen/device_tree.c
pexpert/i386/pe_init.c
pexpert/pexpert/device_tree.h
pexpert/pexpert/i386/boot.h
pexpert/pexpert/pexpert.h
security/conf/MASTER
security/conf/MASTER.i386
security/conf/MASTER.x86_64
security/conf/Makefile
security/mac.h
security/mac_base.c
security/mac_framework.h
security/mac_inet.c
security/mac_internal.h
security/mac_mach_internal.h
security/mac_policy.h
security/mac_posix_shm.c
security/mac_process.c
security/mac_socket.c
security/mac_system.c
security/mac_vfs.c
tools/tests/MPMMTest/KQMPMMtest.c
tools/tests/MPMMTest/MPMMtest.c
tools/tests/execperf/exit.c
tools/tests/execperf/printexecinfo.c
tools/tests/execperf/run.c
tools/tests/libMicro/AppleReadMe
tools/tests/libMicro/Makefile
tools/tests/libMicro/Makefile.Darwin
tools/tests/libMicro/apple/Makefile.Darwin
tools/tests/libMicro/apple/Makefile.benchmarks
tools/tests/libMicro/coreos_bench.sh
tools/tests/libMicro/embd_bench.sh [new file with mode: 0644]
tools/tests/xnu_quick_test/32bit_inode_tests.c
tools/tests/xnu_quick_test/commpage_tests.c
tools/tests/xnu_quick_test/content_protection_test.c [new file with mode: 0644]
tools/tests/xnu_quick_test/helpers/data_exec.c
tools/tests/xnu_quick_test/helpers/launch.c
tools/tests/xnu_quick_test/main.c
tools/tests/xnu_quick_test/makefile
tools/tests/xnu_quick_test/memory_tests.c
tools/tests/xnu_quick_test/misc.c
tools/tests/xnu_quick_test/pipes_tests.c [new file with mode: 0644]
tools/tests/xnu_quick_test/socket_tests.c
tools/tests/xnu_quick_test/tests.c
tools/tests/xnu_quick_test/tests.h

diff --git a/.lldbinit b/.lldbinit
new file mode 100644 (file)
index 0000000..6fd12a4
--- /dev/null
+++ b/.lldbinit
@@ -0,0 +1,17 @@
+# Import python macros
+script import lldbmacros
+
+# Basic types
+type summary add --regex --summary-string "${var%s}" "char \[[0-9]*\]"
+type summary add --summary-string "${var[0]%y}${var[1]%y}${var[2]%y}${var[3]%y}-${var[4]%y}${var[5]%y}-${var[6]%y}${var[7]%y}-${var[8]%y}${var[9]%y}-${var[10]%y}${var[11]%y}${var[12]%y}${var[13]%y}${var[14]%y}${var[15]%y}" uuid_t
+
+# Kexts
+type summary add --summary-string "${var->loadTag%u} ${var->address%x} ${var->size%x} ${var->version%u} ${var->name%s}" OSKextLoadedKextSummary
+type summary add -v --python-function lldbmacros.showallkexts_summary OSKextLoadedKextSummaryHeader
+command script add -f lldbmacros.showallkexts_command showallkexts
+
+#KGMacros
+command script add -f lldbmacros.zprint_command zprint
+command script add -f lldbmacros.memstats_command memstats
+command script add -f lldbmacros.showioalloc_command showioalloc
+
index e811335c111497efda462ffdcd08c8752efd9dd0..5c6ccf781d280bdc9c1eddf38469d8261b6cc28c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2010 by Apple Inc.. All rights reserved.
+ * Copyright (c) 2007-2011 by Apple Inc.. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -55,7 +55,7 @@
     
     For these macros to function properly, a program must specify the OS version range 
     it is targeting.  The min OS version is specified as an option to the compiler:
-    -mmacosx-version-min=10.x when building for Mac OS X, and -miphone-version-min=1.x.x
+    -mmacosx-version-min=10.x when building for Mac OS X, and -miphoneos-version-min=x.x
     when building for the iPhone.  The upper bound for the OS version is rarely needed,
     but it can be set on the command line via: -D__MAC_OS_X_VERSION_MAX_ALLOWED=10xx for
     Mac OS X and __IPHONE_OS_VERSION_MAX_ALLOWED = 1xxx for iPhone.  
 #define __MAC_10_5      1050
 #define __MAC_10_6      1060
 #define __MAC_10_7      1070
+#define __MAC_10_8      1080
 #define __MAC_NA        9999   /* not available */
 
 #define __IPHONE_2_0     20000
 #define __IPHONE_3_0     30000
 #define __IPHONE_3_1     30100
 #define __IPHONE_3_2     30200
+#define __IPHONE_4_0     40000
+#define __IPHONE_4_1     40100
+#define __IPHONE_4_2     40200
+#define __IPHONE_4_3     40300
+#define __IPHONE_5_0     50000
+#define __IPHONE_5_1     50100
 #define __IPHONE_NA      99999  /* not available */
 
 #include <AvailabilityInternal.h>
 
 
 #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED
-    #define __OSX_AVAILABLE_STARTING(_mac, _iphone) __AVAILABILITY_INTERNAL##_iphone
-    #define __OSX_AVAILABLE_BUT_DEPRECATED(_macIntro, _macDep, _iphoneIntro, _iphoneDep) \
-                                                    __AVAILABILITY_INTERNAL##_iphoneIntro##_DEP##_iphoneDep
+    #define __OSX_AVAILABLE_STARTING(_osx, _ios) __AVAILABILITY_INTERNAL##_ios
+    #define __OSX_AVAILABLE_BUT_DEPRECATED(_osxIntro, _osxDep, _iosIntro, _iosDep) \
+                                                    __AVAILABILITY_INTERNAL##_iosIntro##_DEP##_iosDep
 
 #elif defined(__MAC_OS_X_VERSION_MIN_REQUIRED)
-    #define __OSX_AVAILABLE_STARTING(_mac, _iphone) __AVAILABILITY_INTERNAL##_mac
-    #define __OSX_AVAILABLE_BUT_DEPRECATED(_macIntro, _macDep, _iphoneIntro, _iphoneDep) \
-                                                    __AVAILABILITY_INTERNAL##_macIntro##_DEP##_macDep
+    #define __OSX_AVAILABLE_STARTING(_osx, _ios) __AVAILABILITY_INTERNAL##_osx
+    #define __OSX_AVAILABLE_BUT_DEPRECATED(_osxIntro, _osxDep, _iosIntro, _iosDep) \
+                                                    __AVAILABILITY_INTERNAL##_osxIntro##_DEP##_osxDep
 
 #else
-    #define __OSX_AVAILABLE_STARTING(_mac, _iphone)
-    #define __OSX_AVAILABLE_BUT_DEPRECATED(_macIntro, _macDep, _iphoneIntro, _iphoneDep) 
+    #define __OSX_AVAILABLE_STARTING(_osx, _ios)
+    #define __OSX_AVAILABLE_BUT_DEPRECATED(_osxIntro, _osxDep, _iosIntro, _iosDep) 
 #endif
 
 
index a4524708ed20d2bc841cc8c1c215b48bb5526c7e..d94e55d0832c0fa73454da48736d834d7a35ffcb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2010 by Apple Inc.. All rights reserved.
+ * Copyright (c) 2007-2011 by Apple Inc.. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -55,7 +55,7 @@
 #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED
     /* make sure a default max version is set */
     #ifndef __IPHONE_OS_VERSION_MAX_ALLOWED
-        #define __IPHONE_OS_VERSION_MAX_ALLOWED     __IPHONE_3_2
+        #define __IPHONE_OS_VERSION_MAX_ALLOWED     __IPHONE_5_1
     #endif
     /* make sure a valid min is set */
     #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0
         #define __IPHONE_OS_VERSION_MIN_REQUIRED    __IPHONE_2_0 
     #endif
 
-    /* set up internal macros (up to 2.0) */
-    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_REGULAR
+    #ifdef __has_attribute
+        #if __has_attribute(availability)
+            /* use better attributes if possible */
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0                    __attribute__((availability(ios,introduced=2.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0    __attribute__((availability(ios,introduced=2.0,deprecated=2.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1    __attribute__((availability(ios,introduced=2.0,deprecated=2.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __attribute__((availability(ios,introduced=2.0,deprecated=2.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __attribute__((availability(ios,introduced=2.0,deprecated=3.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __attribute__((availability(ios,introduced=2.0,deprecated=3.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __attribute__((availability(ios,introduced=2.0,deprecated=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __attribute__((availability(ios,introduced=2.0,deprecated=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=2.0,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=2.0,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=2.0,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=2.0,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=2.0,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=2.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1                    __attribute__((availability(ios,introduced=2.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1    __attribute__((availability(ios,introduced=2.1,deprecated=2.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __attribute__((availability(ios,introduced=2.1,deprecated=2.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __attribute__((availability(ios,introduced=2.1,deprecated=3.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __attribute__((availability(ios,introduced=2.1,deprecated=3.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __attribute__((availability(ios,introduced=2.1,deprecated=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __attribute__((availability(ios,introduced=2.1,deprecated=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=2.1,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=2.1,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=2.1,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=2.1,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=2.1,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=2.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2                    __attribute__((availability(ios,introduced=2.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __attribute__((availability(ios,introduced=2.2,deprecated=2.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __attribute__((availability(ios,introduced=2.2,deprecated=3.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __attribute__((availability(ios,introduced=2.2,deprecated=3.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __attribute__((availability(ios,introduced=2.2,deprecated=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __attribute__((availability(ios,introduced=2.2,deprecated=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=2.2,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=2.2,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=2.2,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=2.2,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=2.2,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=2.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0                    __attribute__((availability(ios,introduced=3.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __attribute__((availability(ios,introduced=3.0,deprecated=3.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __attribute__((availability(ios,introduced=3.0,deprecated=3.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __attribute__((availability(ios,introduced=3.0,deprecated=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __attribute__((availability(ios,introduced=3.0,deprecated=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=3.0,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=3.0,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=3.0,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=3.0,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=3.0,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=3.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1                    __attribute__((availability(ios,introduced=3.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __attribute__((availability(ios,introduced=3.1,deprecated=3.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __attribute__((availability(ios,introduced=3.1,deprecated=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __attribute__((availability(ios,introduced=3.1,deprecated=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=3.1,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=3.1,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=3.1,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=3.1,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=3.1,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=3.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2                    __attribute__((availability(ios,introduced=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __attribute__((availability(ios,introduced=3.2,deprecated=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __attribute__((availability(ios,introduced=3.2,deprecated=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=3.2,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=3.2,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=3.2,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=3.2,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=3.2,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=3.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0                    __attribute__((availability(ios,introduced=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __attribute__((availability(ios,introduced=4.0,deprecated=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=4.0,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=4.0,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=4.0,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=4.0,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=4.0,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=4.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1                    __attribute__((availability(ios,introduced=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __attribute__((availability(ios,introduced=4.1,deprecated=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=4.1,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=4.1,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=4.1,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=4.1,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=4.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2                    __attribute__((availability(ios,introduced=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __attribute__((availability(ios,introduced=4.2,deprecated=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=4.2,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=4.2,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=4.2,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=4.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3                    __attribute__((availability(ios,introduced=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __attribute__((availability(ios,introduced=4.3,deprecated=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=4.3,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=4.3,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=4.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0                    __attribute__((availability(ios,introduced=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __attribute__((availability(ios,introduced=5.0,deprecated=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=5.0,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=5.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1                    __attribute__((availability(ios,introduced=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __attribute__((availability(ios,introduced=5.1,deprecated=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA     __attribute__((availability(ios,introduced=5.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_NA                     __attribute__((availability(ios,unavailable)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA      __attribute__((availability(ios,unavailable)))
+        #endif
     #endif
-    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_0
-    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0    __AVAILABILITY_INTERNAL_DEPRECATED
-    /* set up internal macros (up to 2.1) */
-    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_1
-    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_DEPRECATED
-    #endif
-    /* set up internal macros (up to 2.2) */
-    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_2
-    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
-    #endif
-    /* set up internal macros (up to 3.0) */
-    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_0
-    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
-    #endif
-    /* set up internal macros (up to 3.1) */
-    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_1
-    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
-    #endif
-    /* set up internal macros (up to 3.2) */
-    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_2
-    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
-    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_2
-    #else
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+
+    #ifndef __AVAILABILITY_INTERNAL__IPHONE_2_0
+        /* set up old style internal macros (up to 2.0) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0    __AVAILABILITY_INTERNAL_DEPRECATED
+        /* set up old style internal macros (up to 2.1) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 2.2) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 3.0) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 3.1) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 3.2) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 4.0) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 4.1) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 4.2) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 4.3) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 5.0) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up old style internal macros (up to 5.1) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1                __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_5_1
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL__IPHONE_5_1
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #endif
+        /* set up internal macros (n/a) */
+        #define __AVAILABILITY_INTERNAL__IPHONE_NA                     __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA      __AVAILABILITY_INTERNAL_UNAVAILABLE
     #endif
-    /* set up internal macros (n/a) */
-    #define __AVAILABILITY_INTERNAL__IPHONE_NA                     __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA      __AVAILABILITY_INTERNAL_UNAVAILABLE
 
 #elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
     /* compiler for Mac OS X sets __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ */
     #define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
     /* make sure a default max version is set */
     #ifndef __MAC_OS_X_VERSION_MAX_ALLOWED
-        #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_7
-    #endif
-    /* set up internal macros */
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_7
-        #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_7
-        #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_6
-        #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_6
-        #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_5
-        #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_5
-        #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_REGULAR
+        #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_8
     #endif
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_4
-        #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_4
-        #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_3
-        #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_3
-        #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_2
-        #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_2
-        #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_WEAK_IMPORT
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_REGULAR
-    #endif
-    #define __AVAILABILITY_INTERNAL__MAC_NA             __AVAILABILITY_INTERNAL_UNAVAILABLE
-    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1        __AVAILABILITY_INTERNAL_DEPRECATED
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1        __AVAILABILITY_INTERNAL__MAC_10_0
-    #endif
-    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_2
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2        __AVAILABILITY_INTERNAL_DEPRECATED
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2        __AVAILABILITY_INTERNAL__MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2        __AVAILABILITY_INTERNAL__MAC_10_1
-    #endif
-    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_3
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_2
-    #endif
-    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_4
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_2
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_3
-    #endif
-    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_5
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_2
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_3
-        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_4
-    #endif
-    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_6
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_2
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_3
-        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_4
-        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_5
+
+    #ifdef __has_attribute
+        #if __has_attribute(availability)
+            /* use better attributes if possible */
+            #define __AVAILABILITY_INTERNAL__MAC_10_0                  __attribute__((availability(macosx,introduced=10.0)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_0    __attribute__((availability(macosx,introduced=10.0,deprecated=10.0)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1    __attribute__((availability(macosx,introduced=10.0,deprecated=10.1)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2    __attribute__((availability(macosx,introduced=10.0,deprecated=10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3    __attribute__((availability(macosx,introduced=10.0,deprecated=10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4    __attribute__((availability(macosx,introduced=10.0,deprecated=10.4)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5    __attribute__((availability(macosx,introduced=10.0,deprecated=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6    __attribute__((availability(macosx,introduced=10.0,deprecated=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.0,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.0,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.0)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1                  __attribute__((availability(macosx,introduced=10.1)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1    __attribute__((availability(macosx,introduced=10.1,deprecated=10.1)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2    __attribute__((availability(macosx,introduced=10.1,deprecated=10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3    __attribute__((availability(macosx,introduced=10.1,deprecated=10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4    __attribute__((availability(macosx,introduced=10.1,deprecated=10.4)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5    __attribute__((availability(macosx,introduced=10.1,deprecated=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6    __attribute__((availability(macosx,introduced=10.1,deprecated=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.1,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.1,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.1)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2                  __attribute__((availability(macosx,introduced=10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2    __attribute__((availability(macosx,introduced=10.2,deprecated=10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3    __attribute__((availability(macosx,introduced=10.2,deprecated=10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4    __attribute__((availability(macosx,introduced=10.2,deprecated=10.4)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5    __attribute__((availability(macosx,introduced=10.2,deprecated=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6    __attribute__((availability(macosx,introduced=10.2,deprecated=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.2,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.2,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3                  __attribute__((availability(macosx,introduced=10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3    __attribute__((availability(macosx,introduced=10.3,deprecated=10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4    __attribute__((availability(macosx,introduced=10.3,deprecated=10.4)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5    __attribute__((availability(macosx,introduced=10.3,deprecated=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6    __attribute__((availability(macosx,introduced=10.3,deprecated=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.3,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.3,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_4                  __attribute__((availability(macosx,introduced=10.4)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4    __attribute__((availability(macosx,introduced=10.4,deprecated=10.4)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5    __attribute__((availability(macosx,introduced=10.4,deprecated=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6    __attribute__((availability(macosx,introduced=10.4,deprecated=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.4,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.4,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.4)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_5                  __attribute__((availability(macosx,introduced=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5    __attribute__((availability(macosx,introduced=10.5,deprecated=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6    __attribute__((availability(macosx,introduced=10.5,deprecated=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.5,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.5,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.5)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_6                  __attribute__((availability(macosx,introduced=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6    __attribute__((availability(macosx,introduced=10.6,deprecated=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.6,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.6,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.6)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_7                  __attribute__((availability(macosx,introduced=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7    __attribute__((availability(macosx,introduced=10.7,deprecated=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.7,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.7)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_8                  __attribute__((availability(macosx,introduced=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8    __attribute__((availability(macosx,introduced=10.8,deprecated=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA      __attribute__((availability(macosx,introduced=10.8)))
+            #define __AVAILABILITY_INTERNAL__MAC_NA                    __attribute__((availability(macosx,unavailable)))
+            #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA        __attribute__((availability(macosx,unavailable)))
+        #endif
     #endif
-    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_7
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
-        #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
-    #else
-        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_0
-        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_1
-        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_2
-        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_3
-        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_4
-        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_5
-        #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_6
+
+    #ifndef __AVAILABILITY_INTERNAL__MAC_10_0
+        /* use old style attributes */
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_8        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_8        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_8        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__MAC_NA             __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1        __AVAILABILITY_INTERNAL__MAC_10_1
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2        __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2        __AVAILABILITY_INTERNAL__MAC_10_2
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_3
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_4
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_5
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_6
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_7
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8        __AVAILABILITY_INTERNAL_DEPRECATED
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8        __AVAILABILITY_INTERNAL__MAC_10_8
+        #endif
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_4
+        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_5
+        #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_6
+        #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_7
+        #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_8
+        #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA               __AVAILABILITY_INTERNAL_UNAVAILABLE
     #endif
-    #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_0
-    #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_1
-    #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_2
-    #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_3
-    #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_4
-    #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_5
-    #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_6
-    #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_7
-    #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA               __AVAILABILITY_INTERNAL_UNAVAILABLE
 #endif
 
 #endif /* __AVAILABILITY_INTERNAL__ */
index 02981bd135333b2b5a10c862105e1ddbcd35b897..25587d84939ba924afa0ce21a9c1c775cb29b122 100644 (file)
@@ -97,7 +97,7 @@
 #define MAC_OS_X_VERSION_10_5 1050
 #define MAC_OS_X_VERSION_10_6 1060
 #define MAC_OS_X_VERSION_10_7 1070
-
+#define MAC_OS_X_VERSION_10_8 1080
 
 /* 
  * If min OS not specified, assume 10.1 for ppc and 10.4 for all others
 #endif
 
 /*
- * if max OS not specified, assume largerof(10.6, min)
+ * if max OS not specified, assume larger of (10.8, min)
  */
 #ifndef MAC_OS_X_VERSION_MAX_ALLOWED
-    #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_7
+    #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_8
         #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_MIN_REQUIRED
     #else
-        #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_7
+        #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_8
     #endif
 #endif
 
 #endif
 
 
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
  * 
  */
 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_0_AND_LATER     DEPRECATED_ATTRIBUTE
 
-
-
-
-
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
  * 
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED
- * 
- * Used on declarations introduced in Mac OS X 10.1, 
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
  * and deprecated in Mac OS X 10.1
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1
- * 
- * Used on declarations introduced in Mac OS X 10.0, 
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
  * but later deprecated in Mac OS X 10.1
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1
 
 /*
  * DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER
- * 
- * Used on types deprecated in Mac OS X 10.1 
+ *
+ * Used on types deprecated in Mac OS X 10.1
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER    DEPRECATED_ATTRIBUTE
 #endif
 
 
-
-
-
-
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
  * 
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED
- * 
- * Used on declarations introduced in Mac OS X 10.2, 
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
  * and deprecated in Mac OS X 10.2
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2
- * 
- * Used on declarations introduced in Mac OS X 10.0, 
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
  * but later deprecated in Mac OS X 10.2
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2
- * 
- * Used on declarations introduced in Mac OS X 10.1, 
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
  * but later deprecated in Mac OS X 10.2
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
 
 /*
  * DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER
- * 
- * Used on types deprecated in Mac OS X 10.2 
+ *
+ * Used on types deprecated in Mac OS X 10.2
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER    DEPRECATED_ATTRIBUTE
 #endif
 
 
-
-
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
  * 
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED
- * 
- * Used on declarations introduced in Mac OS X 10.3, 
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
  * and deprecated in Mac OS X 10.3
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3
- * 
- * Used on declarations introduced in Mac OS X 10.0, 
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
  * but later deprecated in Mac OS X 10.3
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3
- * 
- * Used on declarations introduced in Mac OS X 10.1, 
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
  * but later deprecated in Mac OS X 10.3
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3
- * 
- * Used on declarations introduced in Mac OS X 10.2, 
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
  * but later deprecated in Mac OS X 10.3
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
 
 /*
  * DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER
- * 
- * Used on types deprecated in Mac OS X 10.3 
+ *
+ * Used on types deprecated in Mac OS X 10.3
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER    DEPRECATED_ATTRIBUTE
 #endif
 
 
-
-
-
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
  * 
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED
- * 
- * Used on declarations introduced in Mac OS X 10.4, 
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
  * and deprecated in Mac OS X 10.4
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
- * 
- * Used on declarations introduced in Mac OS X 10.0, 
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
  * but later deprecated in Mac OS X 10.4
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
- * 
- * Used on declarations introduced in Mac OS X 10.1, 
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
  * but later deprecated in Mac OS X 10.4
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
- * 
- * Used on declarations introduced in Mac OS X 10.2, 
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
  * but later deprecated in Mac OS X 10.4
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
- * 
- * Used on declarations introduced in Mac OS X 10.3, 
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
  * but later deprecated in Mac OS X 10.4
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
 
 /*
  * DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER
- * 
- * Used on types deprecated in Mac OS X 10.4 
+ *
+ * Used on types deprecated in Mac OS X 10.4
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER    DEPRECATED_ATTRIBUTE
 #endif
 
 
-
-
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
  * 
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED
- * 
- * Used on declarations introduced in Mac OS X 10.5, 
+ *
+ * Used on declarations introduced in Mac OS X 10.5,
  * and deprecated in Mac OS X 10.5
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
- * 
- * Used on declarations introduced in Mac OS X 10.0, 
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
  * but later deprecated in Mac OS X 10.5
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
- * 
- * Used on declarations introduced in Mac OS X 10.1, 
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
  * but later deprecated in Mac OS X 10.5
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
- * 
- * Used on declarations introduced in Mac OS X 10.2, 
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
  * but later deprecated in Mac OS X 10.5
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
- * 
- * Used on declarations introduced in Mac OS X 10.3, 
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
  * but later deprecated in Mac OS X 10.5
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
- * 
- * Used on declarations introduced in Mac OS X 10.4, 
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
  * but later deprecated in Mac OS X 10.5
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
 
 /*
  * DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER
- * 
- * Used on types deprecated in Mac OS X 10.5 
+ *
+ * Used on types deprecated in Mac OS X 10.5
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER    DEPRECATED_ATTRIBUTE
 #endif
 
 
-
-
-
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
  * 
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED
- * 
- * Used on declarations introduced in Mac OS X 10.6, 
+ *
+ * Used on declarations introduced in Mac OS X 10.6,
  * and deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
- * 
- * Used on declarations introduced in Mac OS X 10.0, 
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
  * but later deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
- * 
- * Used on declarations introduced in Mac OS X 10.1, 
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
  * but later deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
- * 
- * Used on declarations introduced in Mac OS X 10.2, 
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
  * but later deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
- * 
- * Used on declarations introduced in Mac OS X 10.3, 
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
  * but later deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
- * 
- * Used on declarations introduced in Mac OS X 10.4, 
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
  * but later deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
- * 
- * Used on declarations introduced in Mac OS X 10.5, 
+ *
+ * Used on declarations introduced in Mac OS X 10.5,
  * but later deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
 
 /*
  * DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER
- * 
- * Used on types deprecated in Mac OS X 10.6 
+ *
+ * Used on types deprecated in Mac OS X 10.6
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER    DEPRECATED_ATTRIBUTE
 #endif
 
 
-
-
-
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
  * 
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED
- * 
- * Used on declarations introduced in Mac OS X 10.7, 
+ *
+ * Used on declarations introduced in Mac OS X 10.7,
  * and deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- * 
- * Used on declarations introduced in Mac OS X 10.0, 
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
  * but later deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- * 
- * Used on declarations introduced in Mac OS X 10.1, 
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
  * but later deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- * 
- * Used on declarations introduced in Mac OS X 10.2, 
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
  * but later deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- * 
- * Used on declarations introduced in Mac OS X 10.3, 
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
  * but later deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- * 
- * Used on declarations introduced in Mac OS X 10.4, 
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
  * but later deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- * 
- * Used on declarations introduced in Mac OS X 10.5, 
+ *
+ * Used on declarations introduced in Mac OS X 10.5,
  * but later deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- * 
- * Used on declarations introduced in Mac OS X 10.6, 
+ *
+ * Used on declarations introduced in Mac OS X 10.6,
  * but later deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
 
 /*
  * DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER
- * 
- * Used on types deprecated in Mac OS X 10.7 
+ *
+ * Used on types deprecated in Mac OS X 10.7
  */
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER    DEPRECATED_ATTRIBUTE
     #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER
 #endif
 
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.8 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED
+ *
+ * Used on declarations introduced in Mac OS X 10.8,
+ * and deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.5,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.6,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ *
+ * Used on declarations introduced in Mac OS X 10.7,
+ * but later deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8    AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER
+ *
+ * Used on types deprecated in Mac OS X 10.8
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER
+#endif
+
+
+
+
 #endif  /* __AVAILABILITYMACROS__ */
 
 
index 46ee40f904bdef8163cb4ad77c9791f1a8be397d..61680a39492a3719d8e0dd87dd798721f90b3c34 100644 (file)
@@ -22,7 +22,6 @@ INSTINC_SUBDIRS_ARM = \
 
 
 EXPORT_FILES = \
-       AppleSecureBootEpoch.h \
        Availability.h  \
        AvailabilityInternal.h  \
        AvailabilityMacros.h    \
diff --git a/EXTERNAL_HEADERS/corecrypto/cc.h b/EXTERNAL_HEADERS/corecrypto/cc.h
new file mode 100644 (file)
index 0000000..ecf0531
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ *  cc.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 12/16/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CC_H_
+#define _CORECRYPTO_CC_H_
+
+#include <corecrypto/cc_config.h>
+#include <string.h>
+#include <stdint.h>
+
+#if KERNEL
+#include <kern/assert.h>
+#else
+#include <assert.h>
+#endif
+
+/* Declare a struct element with a guarenteed alignment of _alignment_.
+   The resulting struct can be used to create arrays that are aligned by
+   a certain amount.  */
+#define cc_aligned_struct(_alignment_)  \
+    typedef struct { \
+        uint8_t b[_alignment_]; \
+    } __attribute__((aligned(_alignment_)))
+
+/* number of array elements used in a cc_ctx_decl */
+#define cc_ctx_n(_type_, _size_) ((_size_ + sizeof(_type_) - 1) / sizeof(_type_))
+
+/* sizeof of a context declared with cc_ctx_decl */
+#define cc_ctx_sizeof(_type_, _size_) sizeof(_type_[cc_ctx_n(_type_, _size_)])
+
+#define cc_ctx_decl(_type_, _size_, _name_)  \
+    _type_ _name_[cc_ctx_n(_type_, _size_)]
+
+#define cc_zero(_size_,_data_) bzero((_data_), (_size_))
+
+#define cc_copy(_size_, _dst_, _src_) memcpy(_dst_, _src_, _size_)
+
+#define cc_ctx_clear(_type_, _size_, _name_)  \
+    cc_zero((_size_ + sizeof(_type_) - 1) / sizeof(_type_), _name_)
+
+CC_INLINE CC_NONNULL2 CC_NONNULL3 CC_NONNULL4
+void cc_xor(size_t size, void *r, const void *s, const void *t) {
+    uint8_t *_r=(uint8_t *)r;
+    const uint8_t *_s=(uint8_t *)s;
+    const uint8_t *_t=(uint8_t *)t;
+    while (size--) {
+        _r[size] = _s[size] ^ _t[size];
+    }
+}
+
+/* Exchange S and T of any type.  NOTE: Both and S and T are evaluated
+   mutliple times and MUST NOT be expressions. */
+#define CC_SWAP(S,T)  do { \
+    __typeof__(S) _cc_swap_tmp = S; S = T; T = _cc_swap_tmp; \
+} while(0)
+
+/* Return the maximum value between S and T. */
+#define CC_MAX(S, T) ({__typeof__(S) _cc_max_s = S; __typeof__(T) _cc_max_t = T; _cc_max_s > _cc_max_t ? _cc_max_s : _cc_max_t;})
+
+/* Return the minimum value between S and T. */
+#define CC_MIN(S, T) ({__typeof__(S) _cc_min_s = S; __typeof__(T) _cc_min_t = T; _cc_min_s <= _cc_min_t ? _cc_min_s : _cc_min_t;})
+
+#endif /* _CORECRYPTO_CC_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/cc_config.h b/EXTERNAL_HEADERS/corecrypto/cc_config.h
new file mode 100644 (file)
index 0000000..7b0f2ed
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ *  cc_config.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 10/18/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+#ifndef _CORECRYPTO_CC_CONFIG_H_
+#define _CORECRYPTO_CC_CONFIG_H_
+
+#if !defined(CCN_UNIT_SIZE)
+#if defined(__x86_64__)
+#define CCN_UNIT_SIZE  8
+#elif defined(__arm__) || defined(__i386__)
+#define CCN_UNIT_SIZE  4
+#else
+#define CCN_UNIT_SIZE  2
+#endif
+#endif /* !defined(CCN_UNIT_SIZE) */
+
+/* No dynamic linking allowed in L4, e.g. avoid nonlazy symbols */
+/* For corecrypto kext, CC_STATIC should be 0 */
+
+#if   defined(__x86_64__) || defined(__i386__)
+
+/* These assembly routines only work for a single CCN_UNIT_SIZE. */
+#if (defined(__x86_64__) && CCN_UNIT_SIZE == 8) || (defined(__i386__) && CCN_UNIT_SIZE == 4)
+#define CCN_ADD_ASM            1
+#define CCN_SUB_ASM            1
+#define CCN_MUL_ASM            1
+#else
+#define CCN_ADD_ASM            0
+#define CCN_SUB_ASM            0
+#define CCN_MUL_ASM            0
+#endif
+
+#define CCN_ADDMUL1_ASM        0
+#define CCN_MUL1_ASM           0
+#define CCN_CMP_ASM            0
+#define CCN_ADD1_ASM           0
+#define CCN_SUB1_ASM           0
+#define CCN_N_ASM              0
+#define CCN_SET_ASM            0
+#define CCAES_ARM              0
+#define CCAES_INTEL            1
+#define CCN_USE_BUILTIN_CLZ    0
+#define CCSHA1_VNG_INTEL       1
+#define CCSHA2_VNG_INTEL       1
+#define CCSHA1_VNG_ARMV7NEON   0
+#define CCSHA2_VNG_ARMV7NEON   0
+
+#else
+
+#define CCN_ADD_ASM            0
+#define CCN_SUB_ASM            0
+#define CCN_MUL_ASM            0
+#define CCN_ADDMUL1_ASM        0
+#define CCN_MUL1_ASM           0
+#define CCN_CMP_ASM            0
+#define CCN_ADD1_ASM           0
+#define CCN_SUB1_ASM           0
+#define CCN_N_ASM              0
+#define CCN_SET_ASM            0
+#define CCAES_ARM              0
+#define CCAES_INTEL            0
+#define CCN_USE_BUILTIN_CLZ    0
+#define CCSHA1_VNG_INTEL       0
+#define CCSHA2_VNG_INTEL       0
+#define CCSHA1_VNG_ARMV7NEON   0
+#define CCSHA2_VNG_ARMV7NEON   0
+
+#endif /* !defined(__i386__) */
+
+#define CCN_N_INLINE           0
+#define CCN_CMP_INLINE         0
+
+#define CC_INLINE static inline
+
+#ifdef __GNUC__
+#define CC_NORETURN __attribute__((__noreturn__))
+#define CC_NOTHROW __attribute__((__nothrow__))
+#define CC_NONNULL(N) __attribute__((__nonnull__ N))
+#define CC_NONNULL1 __attribute__((__nonnull__(1)))
+#define CC_NONNULL2 __attribute__((__nonnull__(2)))
+#define CC_NONNULL3 __attribute__((__nonnull__(3)))
+#define CC_NONNULL4 __attribute__((__nonnull__(4)))
+#define CC_NONNULL5 __attribute__((__nonnull__(5)))
+#define CC_NONNULL6 __attribute__((__nonnull__(6)))
+#define CC_NONNULL7 __attribute__((__nonnull__(7)))
+#define CC_NONNULL_ALL __attribute__((__nonnull__))
+#define CC_SENTINEL __attribute__((__sentinel__))
+#define CC_CONST __attribute__((__const__))
+#define CC_PURE __attribute__((__pure__))
+#define CC_WARN_RESULT __attribute__((__warn_unused_result__))
+#define CC_MALLOC __attribute__((__malloc__))
+#define CC_UNUSED __attribute__((unused))
+#else /* !__GNUC__ */
+/*! @parseOnly */
+#define CC_NORETURN
+/*! @parseOnly */
+#define CC_NOTHROW
+/*! @parseOnly */
+#define CC_NONNULL1
+/*! @parseOnly */
+#define CC_NONNULL2
+/*! @parseOnly */
+#define CC_NONNULL3
+/*! @parseOnly */
+#define CC_NONNULL4
+/*! @parseOnly */
+#define CC_NONNULL5
+/*! @parseOnly */
+#define CC_NONNULL6
+/*! @parseOnly */
+#define CC_NONNULL7
+/*! @parseOnly */
+#define CC_NONNULL_ALL
+/*! @parseOnly */
+#define CC_SENTINEL
+/*! @parseOnly */
+#define CC_CONST
+/*! @parseOnly */
+#define CC_PURE
+/*! @parseOnly */
+#define CC_WARN_RESULT
+/*! @parseOnly */
+#define CC_MALLOC
+#endif /* !__GNUC__ */
+
+#endif /* _CORECRYPTO_CC_CONFIG_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/cc_priv.h b/EXTERNAL_HEADERS/corecrypto/cc_priv.h
new file mode 100644 (file)
index 0000000..db962d4
--- /dev/null
@@ -0,0 +1,362 @@
+/*
+ *  cc_priv.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 12/1/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CC_PRIV_H_
+#define _CORECRYPTO_CC_PRIV_H_
+
+#include <corecrypto/cc.h>
+#include <stdint.h>
+
+/* defines the following macros :
+
+ CC_MEMCPY  : optimized memcpy.
+ CC_MEMMOVE : optimized memmove.
+ CC_MEMSET  : optimized memset.
+ CC_BZERO   : optimized bzero.
+
+ CC_STORE32_BE : store 32 bit value in big endian in unaligned buffer.
+ CC_STORE32_LE : store 32 bit value in little endian in unaligned buffer.
+ CC_STORE64_BE : store 64 bit value in big endian in unaligned buffer.
+ CC_STORE64_LE : store 64 bit value in little endian in unaligned buffer.
+
+ CC_LOAD32_BE : load 32 bit value in big endian from unaligned buffer.
+ CC_LOAD32_LE : load 32 bit value in little endian from unaligned buffer.
+ CC_LOAD64_BE : load 64 bit value in big endian from unaligned buffer.
+ CC_LOAD64_LE : load 64 bit value in little endian from unaligned buffer.
+
+ CC_ROR  : Rotate Right 32 bits. Rotate count can be a variable.
+ CC_ROL  : Rotate Left 32 bits. Rotate count can be a variable.
+ CC_RORc : Rotate Right 32 bits. Rotate count must be a constant.
+ CC_ROLc : Rotate Left 32 bits. Rotate count must be a constant.
+
+ CC_ROR64  : Rotate Right 64 bits. Rotate count can be a variable.
+ CC_ROL64  : Rotate Left 64 bits. Rotate count can be a variable.
+ CC_ROR64c : Rotate Right 64 bits. Rotate count must be a constant.
+ CC_ROL64c : Rotate Left 64 bits. Rotate count must be a constant.
+
+ CC_BSWAP  : byte swap a 32 bits variable.
+
+ CC_H2BE32 : convert a 32 bits value between host and big endian order.
+ CC_H2LE32 : convert a 32 bits value between host and little endian order.
+
+The following are not defined yet... define them if needed.
+
+ CC_BSWAPc   : byte swap a 32 bits constant
+
+ CC_BSWAP64  : byte swap a 64 bits variable
+ CC_BSWAP64c : byte swap a 64 bits constant
+
+ CC_READ_LE32 : read a 32 bits little endian value
+ CC_READ_LE64 : read a 64 bits little endian value
+ CC_READ_BE32 : read a 32 bits big endian value
+ CC_READ_BE64 : read a 64 bits big endian value
+
+ CC_WRITE_LE32 : write a 32 bits little endian value
+ CC_WRITE_LE64 : write a 64 bits little endian value
+ CC_WRITE_BE32 : write a 32 bits big endian value
+ CC_WRITE_BE64 : write a 64 bits big endian value
+
+ CC_H2BE64 : convert a 64 bits value between host and big endian order
+ CC_H2LE64 : convert a 64 bits value between host and little endian order
+*/
+
+/* TODO: optimized versions */
+#define CC_MEMCPY(D,S,L) memcpy((D),(S),(L))
+#define CC_MEMMOVE(D,S,L) memmove((D),(S),(L))
+#define CC_MEMSET(D,V,L) memset((D),(V),(L))
+#define CC_BZERO(D,L) memset((D),0,(L))
+
+
+#pragma mark - Loads and Store
+
+#pragma mark -- 32 bits - little endian
+
+#pragma mark --- Default version
+
+#define        CC_STORE32_LE(x, y) do {                                    \
+    ((unsigned char *)(y))[3] = (unsigned char)(((x)>>24)&255);                \
+    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>16)&255);                \
+    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>8)&255);         \
+    ((unsigned char *)(y))[0] = (unsigned char)((x)&255);                      \
+} while(0)
+
+#define        CC_LOAD32_LE(x, y) do {                                     \
+x = ((uint32_t)(((unsigned char *)(y))[3] & 255)<<24) |                            \
+    ((uint32_t)(((unsigned char *)(y))[2] & 255)<<16) |                            \
+    ((uint32_t)(((unsigned char *)(y))[1] & 255)<<8)  |                            \
+    ((uint32_t)(((unsigned char *)(y))[0] & 255));                                 \
+} while(0)
+
+#pragma mark -- 64 bits - little endian
+
+#define        CC_STORE64_LE(x, y) do {                                    \
+    ((unsigned char *)(y))[7] = (unsigned char)(((x)>>56)&255);     \
+    ((unsigned char *)(y))[6] = (unsigned char)(((x)>>48)&255);                \
+    ((unsigned char *)(y))[5] = (unsigned char)(((x)>>40)&255);                \
+    ((unsigned char *)(y))[4] = (unsigned char)(((x)>>32)&255);                \
+    ((unsigned char *)(y))[3] = (unsigned char)(((x)>>24)&255);                \
+    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>16)&255);                \
+    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>8)&255);         \
+    ((unsigned char *)(y))[0] = (unsigned char)((x)&255);                      \
+} while(0)
+
+#define        CC_LOAD64_LE(x, y) do {                                     \
+x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) |           \
+    (((uint64_t)(((unsigned char *)(y))[6] & 255))<<48) |           \
+    (((uint64_t)(((unsigned char *)(y))[5] & 255))<<40) |           \
+    (((uint64_t)(((unsigned char *)(y))[4] & 255))<<32) |           \
+    (((uint64_t)(((unsigned char *)(y))[3] & 255))<<24) |           \
+    (((uint64_t)(((unsigned char *)(y))[2] & 255))<<16) |           \
+    (((uint64_t)(((unsigned char *)(y))[1] & 255))<<8)  |           \
+    (((uint64_t)(((unsigned char *)(y))[0] & 255)));                \
+} while(0)
+
+#pragma mark -- 32 bits - big endian
+#pragma mark --- intel version
+
+#if (defined(__i386__) || defined(__x86_64__))
+
+#define CC_STORE32_BE(x, y)     \
+    __asm__ __volatile__ (      \
+    "bswapl %0     \n\t"        \
+    "movl   %0,(%1)\n\t"        \
+    "bswapl %0     \n\t"        \
+    ::"r"(x), "r"(y))
+
+#define CC_LOAD32_BE(x, y)      \
+    __asm__ __volatile__ (      \
+    "movl (%1),%0\n\t"          \
+    "bswapl %0\n\t"             \
+    :"=r"(x): "r"(y))
+
+#else
+#pragma mark --- default version
+#define        CC_STORE32_BE(x, y) do {                                \
+    ((unsigned char *)(y))[0] = (unsigned char)(((x)>>24)&255);        \
+    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>16)&255);        \
+    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>8)&255); \
+    ((unsigned char *)(y))[3] = (unsigned char)((x)&255);       \
+} while(0)
+
+#define        CC_LOAD32_BE(x, y) do {                             \
+x = ((uint32_t)(((unsigned char *)(y))[0] & 255)<<24) |            \
+    ((uint32_t)(((unsigned char *)(y))[1] & 255)<<16) |                \
+    ((uint32_t)(((unsigned char *)(y))[2] & 255)<<8)  |                \
+    ((uint32_t)(((unsigned char *)(y))[3] & 255));          \
+} while(0)
+
+#endif
+
+#pragma mark -- 64 bits - big endian
+
+#pragma mark --- intel 64 bits version
+
+#if defined(__x86_64__)
+
+#define        CC_STORE64_BE(x, y)   \
+__asm__ __volatile__ (        \
+"bswapq %0     \n\t"          \
+"movq   %0,(%1)\n\t"          \
+"bswapq %0     \n\t"          \
+::"r"(x), "r"(y))
+
+#define        CC_LOAD64_BE(x, y)    \
+__asm__ __volatile__ (        \
+"movq (%1),%0\n\t"            \
+"bswapq %0\n\t"               \
+:"=r"(x): "r"(y))
+
+#else
+
+#pragma mark --- default version
+
+#define CC_STORE64_BE(x, y) do {                                    \
+    ((unsigned char *)(y))[0] = (unsigned char)(((x)>>56)&255);                \
+    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>48)&255);                \
+    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>40)&255);                \
+    ((unsigned char *)(y))[3] = (unsigned char)(((x)>>32)&255);                \
+    ((unsigned char *)(y))[4] = (unsigned char)(((x)>>24)&255);                \
+    ((unsigned char *)(y))[5] = (unsigned char)(((x)>>16)&255);                \
+    ((unsigned char *)(y))[6] = (unsigned char)(((x)>>8)&255);         \
+    ((unsigned char *)(y))[7] = (unsigned char)((x)&255);                      \
+} while(0)
+
+#define        CC_LOAD64_BE(x, y) do {                                     \
+x = (((uint64_t)(((unsigned char *)(y))[0] & 255))<<56) |           \
+    (((uint64_t)(((unsigned char *)(y))[1] & 255))<<48) |           \
+    (((uint64_t)(((unsigned char *)(y))[2] & 255))<<40) |           \
+    (((uint64_t)(((unsigned char *)(y))[3] & 255))<<32) |           \
+    (((uint64_t)(((unsigned char *)(y))[4] & 255))<<24) |           \
+    (((uint64_t)(((unsigned char *)(y))[5] & 255))<<16) |           \
+    (((uint64_t)(((unsigned char *)(y))[6] & 255))<<8)  |              \
+    (((uint64_t)(((unsigned char *)(y))[7] & 255)));               \
+} while(0)
+
+#endif
+
+#pragma mark - 32-bit Rotates
+
+#if defined(_MSC_VER)
+#pragma mark -- MSVC version
+
+#include <stdlib.h>
+#pragma intrinsic(_lrotr,_lrotl)
+#define        CC_ROR(x,n) _lrotr(x,n)
+#define        CC_ROL(x,n) _lrotl(x,n)
+#define        CC_RORc(x,n) _lrotr(x,n)
+#define        CC_ROLc(x,n) _lrotl(x,n)
+
+#elif (defined(__i386__) || defined(__x86_64__))
+#pragma mark -- intel asm version
+
+static inline uint32_t CC_ROL(uint32_t word, int i)
+{
+    __asm__ ("roll %%cl,%0"
+         :"=r" (word)
+         :"0" (word),"c" (i));
+    return word;
+}
+
+static inline uint32_t CC_ROR(uint32_t word, int i)
+{
+    __asm__ ("rorl %%cl,%0"
+         :"=r" (word)
+         :"0" (word),"c" (i));
+    return word;
+}
+
+/* Need to be a macro here, because 'i' is an immediate (constant) */
+#define CC_ROLc(word, i)                \
+({  uint32_t _word=(word);              \
+    __asm__ __volatile__ ("roll %2,%0"  \
+        :"=r" (_word)                   \
+        :"0" (_word),"I" (i));          \
+    _word;                              \
+})
+
+
+#define CC_RORc(word, i)                \
+({  uint32_t _word=(word);              \
+    __asm__ __volatile__ ("rorl %2,%0"  \
+        :"=r" (_word)                   \
+        :"0" (_word),"I" (i));          \
+    _word;                              \
+})
+
+#else
+
+#pragma mark -- default version
+
+static inline uint32_t CC_ROL(uint32_t word, int i)
+{
+    return ( (word<<(i&31)) | (word>>(32-(i&31))) );
+}
+
+static inline uint32_t CC_ROR(uint32_t word, int i)
+{
+    return ( (word>>(i&31)) | (word<<(32-(i&31))) );
+}
+
+#define        CC_ROLc(x, y) CC_ROL(x, y)
+#define        CC_RORc(x, y) CC_ROR(x, y)
+
+#endif
+
+#pragma mark - 64 bits rotates
+
+#if defined(__x86_64__)
+#pragma mark -- intel 64 asm version
+
+static inline uint64_t CC_ROL64(uint64_t word, int i)
+{
+    __asm__("rolq %%cl,%0"
+        :"=r" (word)
+        :"0" (word),"c" (i));
+    return word;
+}
+
+static inline uint64_t CC_ROR64(uint64_t word, int i)
+{
+    __asm__("rorq %%cl,%0"
+        :"=r" (word)
+        :"0" (word),"c" (i));
+    return word;
+}
+
+/* Need to be a macro here, because 'i' is an immediate (constant) */
+#define CC_ROL64c(word, i)      \
+({                              \
+    uint64_t _word=(word);      \
+    __asm__("rolq %2,%0"        \
+        :"=r" (_word)           \
+        :"0" (_word),"J" (i));  \
+    _word;                      \
+})
+
+#define CC_ROR64c(word, i)      \
+({                              \
+    uint64_t _word=(word);      \
+    __asm__("rorq %2,%0"        \
+        :"=r" (_word)           \
+        :"0" (_word),"J" (i));  \
+    _word;                      \
+})
+
+
+#else /* Not x86_64  */
+
+#pragma mark -- default C version
+
+static inline uint64_t CC_ROL64(uint64_t word, int i)
+{
+    return ( (word<<(i&63)) | (word>>(64-(i&63))) );
+}
+
+static inline uint64_t CC_ROR64(uint64_t word, int i)
+{
+    return ( (word>>(i&63)) | (word<<(64-(i&63))) );
+}
+
+#define        CC_ROL64c(x, y) CC_ROL64(x, y)
+#define        CC_ROR64c(x, y) CC_ROR64(x, y)
+
+#endif
+
+
+#pragma mark - Byte Swaps
+
+static inline uint32_t CC_BSWAP(uint32_t x)
+{
+    return (
+        ((x>>24)&0x000000FF) |
+        ((x<<24)&0xFF000000) |
+        ((x>>8) &0x0000FF00) |
+        ((x<<8) &0x00FF0000)
+    );
+}
+
+#ifdef __LITTLE_ENDIAN__
+#define CC_H2BE32(x) CC_BSWAP(x)
+#define CC_H2LE32(x) (x)
+#else
+#error not good.
+#define CC_H2BE32(x) (x)
+#define CC_H2LE32(x) CC_BSWAP(x)
+#endif
+
+
+/* extract a byte portably */
+#ifdef _MSC_VER
+#define cc_byte(x, n) ((unsigned char)((x) >> (8 * (n))))
+#else
+#define cc_byte(x, n) (((x) >> (8 * (n))) & 255)
+#endif
+
+#endif /* _CORECRYPTO_CC_PRIV_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccaes.h b/EXTERNAL_HEADERS/corecrypto/ccaes.h
new file mode 100644 (file)
index 0000000..9dca39b
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ *  ccaes.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 12/10/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCAES_H_
+#define _CORECRYPTO_CCAES_H_
+
+#include <corecrypto/cc_config.h>
+#include <corecrypto/ccmode.h>
+
+#define CCAES_BLOCK_SIZE 16
+#define CCAES_KEY_SIZE_128 16
+#define CCAES_KEY_SIZE_192 24
+#define CCAES_KEY_SIZE_256 32
+
+extern const struct ccmode_ecb ccaes_ltc_ecb_decrypt_mode;
+extern const struct ccmode_ecb ccaes_ltc_ecb_encrypt_mode;
+
+extern const struct ccmode_cbc ccaes_gladman_cbc_encrypt_mode;
+extern const struct ccmode_cbc ccaes_gladman_cbc_decrypt_mode;
+
+#if CCAES_ARM
+extern const struct ccmode_ecb ccaes_arm_ecb_encrypt_mode;
+extern const struct ccmode_ecb ccaes_arm_ecb_decrypt_mode;
+
+extern const struct ccmode_cbc ccaes_arm_cbc_encrypt_mode;
+extern const struct ccmode_cbc ccaes_arm_cbc_decrypt_mode;
+#endif
+
+#if CCAES_INTEL
+//extern const struct ccmode_ecb ccaes_intel_ecb_encrypt_mode;
+//extern const struct ccmode_ecb ccaes_intel_ecb_decrypt_mode;
+
+extern const struct ccmode_ecb ccaes_intel_ecb_encrypt_opt_mode;
+extern const struct ccmode_ecb ccaes_intel_ecb_encrypt_aesni_mode;
+
+extern const struct ccmode_ecb ccaes_intel_ecb_decrypt_opt_mode;
+extern const struct ccmode_ecb ccaes_intel_ecb_decrypt_aesni_mode;
+
+//extern const struct ccmode_cbc ccaes_intel_cbc_encrypt_mode;
+//extern const struct ccmode_cbc ccaes_intel_cbc_decrypt_mode;
+
+extern const struct ccmode_cbc ccaes_intel_cbc_encrypt_opt_mode;
+extern const struct ccmode_cbc ccaes_intel_cbc_encrypt_aesni_mode;
+
+extern const struct ccmode_cbc ccaes_intel_cbc_decrypt_opt_mode;
+extern const struct ccmode_cbc ccaes_intel_cbc_decrypt_aesni_mode;
+
+//extern const struct ccmode_xts ccaes_intel_xts_encrypt_mode;
+//extern const struct ccmode_xts ccaes_intel_xts_decrypt_mode;
+
+extern const struct ccmode_xts ccaes_intel_xts_encrypt_opt_mode;
+extern const struct ccmode_xts ccaes_intel_xts_encrypt_aesni_mode;
+
+extern const struct ccmode_xts ccaes_intel_xts_decrypt_opt_mode;
+extern const struct ccmode_xts ccaes_intel_xts_decrypt_aesni_mode;
+#endif
+
+
+/* Implementation Selectors: */
+const struct ccmode_ecb *ccaes_ecb_encrypt_mode(void);
+const struct ccmode_cbc *ccaes_cbc_encrypt_mode(void);
+const struct ccmode_cfb *ccaes_cfb_encrypt_mode(void);
+const struct ccmode_cfb8 *ccaes_cfb8_encrypt_mode(void);
+const struct ccmode_xts *ccaes_xts_encrypt_mode(void);
+const struct ccmode_gcm *ccaes_gcm_encrypt_mode(void);
+
+const struct ccmode_ecb *ccaes_ecb_decrypt_mode(void);
+const struct ccmode_cbc *ccaes_cbc_decrypt_mode(void);
+const struct ccmode_cfb *ccaes_cfb_decrypt_mode(void);
+const struct ccmode_cfb8 *ccaes_cfb8_decrypt_mode(void);
+const struct ccmode_xts *ccaes_xts_decrypt_mode(void);
+const struct ccmode_gcm *ccaes_gcm_decrypt_mode(void);
+
+const struct ccmode_ctr *ccaes_ctr_crypt_mode(void);
+const struct ccmode_ofb *ccaes_ofb_crypt_mode(void);
+
+#endif /* _CORECRYPTO_CCAES_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccder.h b/EXTERNAL_HEADERS/corecrypto/ccder.h
new file mode 100644 (file)
index 0000000..756afd2
--- /dev/null
@@ -0,0 +1,263 @@
+/*
+ *  ccder.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 2/28/12.
+ *  Copyright 2012 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCDER_H_
+#define _CORECRYPTO_CCDER_H_
+
+#include <corecrypto/ccasn1.h>
+#include <corecrypto/ccn.h>
+
+#define CCDER_MULTIBYTE_TAGS  1
+
+#ifdef CCDER_MULTIBYTE_TAGS
+typedef unsigned long ccder_tag;
+#else
+typedef uint8_t ccder_tag;
+#endif
+
+/* DER types to be used with ccder_decode and ccder_encode functions. */
+enum {
+    CCDER_EOL               = CCASN1_EOL,
+    CCDER_BOOLEAN           = CCASN1_BOOLEAN,
+    CCDER_INTEGER           = CCASN1_INTEGER,
+    CCDER_BIT_STRING        = CCASN1_BIT_STRING,
+    CCDER_OCTET_STRING      = CCASN1_OCTET_STRING,
+    CCDER_NULL              = CCASN1_NULL,
+    CCDER_OBJECT_IDENTIFIER = CCASN1_OBJECT_IDENTIFIER,
+    CCDER_OBJECT_DESCRIPTOR = CCASN1_OBJECT_DESCRIPTOR,
+    /* External or instance-of 0x08 */
+    CCDER_REAL              = CCASN1_REAL,
+    CCDER_ENUMERATED        = CCASN1_ENUMERATED,
+    CCDER_EMBEDDED_PDV      = CCASN1_EMBEDDED_PDV,
+    CCDER_UTF8_STRING       = CCASN1_UTF8_STRING,
+    /*                         0x0d */
+    /*                         0x0e */
+    /*                         0x0f */
+    CCDER_SEQUENCE          = CCASN1_SEQUENCE,
+    CCDER_SET               = CCASN1_SET,
+    CCDER_NUMERIC_STRING    = CCASN1_NUMERIC_STRING,
+    CCDER_PRINTABLE_STRING  = CCASN1_PRINTABLE_STRING,
+    CCDER_T61_STRING        = CCASN1_T61_STRING,
+    CCDER_VIDEOTEX_STRING   = CCASN1_VIDEOTEX_STRING,
+    CCDER_IA5_STRING        = CCASN1_IA5_STRING,
+    CCDER_UTC_TIME          = CCASN1_UTC_TIME,
+    CCDER_GENERALIZED_TIME  = CCASN1_GENERALIZED_TIME,
+    CCDER_GRAPHIC_STRING    = CCASN1_GRAPHIC_STRING,
+    CCDER_VISIBLE_STRING    = CCASN1_VISIBLE_STRING,
+    CCDER_GENERAL_STRING    = CCASN1_GENERAL_STRING,
+    CCDER_UNIVERSAL_STRING  = CCASN1_UNIVERSAL_STRING,
+    /*                         0x1d */
+    CCDER_BMP_STRING        = CCASN1_BMP_STRING,
+    CCDER_HIGH_TAG_NUMBER   = CCASN1_HIGH_TAG_NUMBER,
+    CCDER_TELETEX_STRING    = CCDER_T61_STRING,
+
+#ifdef CCDER_MULTIBYTE_TAGS
+    CCDER_TAG_MASK          = ((ccder_tag)~0),
+    CCDER_TAGNUM_MASK       = ((ccder_tag)~((ccder_tag)7 << (sizeof(ccder_tag) * 8 - 3))),
+
+    CCDER_METHOD_MASK       = ((ccder_tag)1 << (sizeof(ccder_tag) * 8 - 3)),
+    CCDER_PRIMITIVE         = ((ccder_tag)0 << (sizeof(ccder_tag) * 8 - 3)),
+    CCDER_CONSTRUCTED       = ((ccder_tag)1 << (sizeof(ccder_tag) * 8 - 3)),
+
+    CCDER_CLASS_MASK        = ((ccder_tag)3 << (sizeof(ccder_tag) * 8 - 2)),
+    CCDER_UNIVERSAL         = ((ccder_tag)0 << (sizeof(ccder_tag) * 8 - 2)),
+    CCDER_APPLICATION       = ((ccder_tag)1 << (sizeof(ccder_tag) * 8 - 2)),
+    CCDER_CONTEXT_SPECIFIC  = ((ccder_tag)2 << (sizeof(ccder_tag) * 8 - 2)),
+    CCDER_PRIVATE           = ((ccder_tag)3 << (sizeof(ccder_tag) * 8 - 2)),
+#else
+    CCDER_TAG_MASK                     = CCASN1_TAG_MASK,
+    CCDER_TAGNUM_MASK          = CCASN1_TAGNUM_MASK,
+
+    CCDER_METHOD_MASK          = CCASN1_METHOD_MASK,
+    CCDER_PRIMITIVE         = CCASN1_PRIMITIVE,
+    CCDER_CONSTRUCTED          = CCASN1_CONSTRUCTED,
+
+    CCDER_CLASS_MASK           = CCASN1_CLASS_MASK,
+    CCDER_UNIVERSAL         = CCASN1_UNIVERSAL,
+    CCDER_APPLICATION          = CCASN1_APPLICATION,
+    CCDER_CONTEXT_SPECIFIC     = CCASN1_CONTEXT_SPECIFIC,
+    CCDER_PRIVATE                      = CCASN1_PRIVATE,
+#endif
+    CCDER_CONSTRUCTED_SET   = CCDER_SET | CCDER_CONSTRUCTED,
+    CCDER_CONSTRUCTED_SEQUENCE = CCDER_SEQUENCE | CCDER_CONSTRUCTED,
+};
+
+
+#pragma mark ccder_sizeof_ functions
+
+inline CC_CONST
+size_t ccder_sizeof_tag(ccder_tag tag);
+
+inline CC_CONST
+size_t ccder_sizeof_len(size_t len);
+
+/* Returns the size of an asn1 encoded item of length l in bytes,
+ assuming a 1 byte tag. */
+inline CC_CONST
+size_t ccder_sizeof(ccder_tag tag, size_t len);
+
+inline CC_CONST
+size_t ccder_sizeof_oid(ccoid_t oid);
+
+#pragma mark ccder_encode_ functions.
+
+/* Encode a tag backwards, der_end should point to one byte past the end of
+   destination for the tag, returns a pointer to the first byte of the tag.
+   Returns NULL if there is an encoding error. */
+inline CC_NONNULL2
+uint8_t *ccder_encode_tag(ccder_tag tag, const uint8_t *der, uint8_t *der_end);
+
+/* Returns a pointer to the start of the len field.  returns NULL if there
+ is an encoding error. */
+inline CC_NONNULL2
+uint8_t *
+ccder_encode_len(size_t len, const uint8_t *der, uint8_t *der_end);
+
+/* der_end should point to the first byte of the content of this der item. */
+inline CC_NONNULL3
+uint8_t *
+ccder_encode_tl(ccder_tag tag, size_t len, const uint8_t *der, uint8_t *der_end);
+
+inline CC_PURE CC_NONNULL2
+uint8_t *
+ccder_encode_body_nocopy(size_t size, const uint8_t *der, uint8_t *der_end);
+
+/* Encode the tag and length of a constructed object.  der is the lower
+   bound, der_end is one byte paste where we want to write the length and
+   body_end is one byte past the end of the body of the der object we are
+   encoding the tag and length of. */
+inline CC_NONNULL((2,3))
+uint8_t *
+ccder_encode_constructed_tl(ccder_tag tag, const uint8_t *body_end,
+                            const uint8_t *der, uint8_t *der_end);
+
+/* Encodes oid into der and returns
+ der + ccder_sizeof_oid(oid). */
+inline CC_NONNULL1 CC_NONNULL2
+uint8_t *ccder_encode_oid(ccoid_t oid, const uint8_t *der, uint8_t *der_end);
+
+inline CC_NONNULL((3,4))
+uint8_t *ccder_encode_implicit_integer(ccder_tag implicit_tag,
+                                       cc_size n, const cc_unit *s,
+                                       const uint8_t *der, uint8_t *der_end);
+
+inline CC_NONNULL((2,3))
+uint8_t *ccder_encode_integer(cc_size n, const cc_unit *s,
+                              const uint8_t *der, uint8_t *der_end);
+
+inline CC_NONNULL3
+uint8_t *ccder_encode_implicit_uint64(ccder_tag implicit_tag,
+                                      uint64_t value,
+                                      const uint8_t *der, uint8_t *der_end);
+
+inline CC_NONNULL3
+uint8_t *ccder_encode_uint64(uint64_t value,
+                             const uint8_t *der, uint8_t *der_end);
+
+inline CC_NONNULL((3,4))
+uint8_t *ccder_encode_implicit_octet_string(ccder_tag implicit_tag,
+                                            cc_size n, const cc_unit *s,
+                                            const uint8_t *der,
+                                            uint8_t *der_end);
+
+inline CC_NONNULL((2,3))
+uint8_t *ccder_encode_octet_string(cc_size n, const cc_unit *s,
+                                   const uint8_t *der, uint8_t *der_end);
+
+inline CC_NONNULL((3,4))
+uint8_t *ccder_encode_implicit_raw_octet_string(ccder_tag implicit_tag,
+                                                size_t s_size, const uint8_t *s,
+                                                const uint8_t *der,
+                                                uint8_t *der_end);
+
+inline CC_NONNULL((2,3))
+uint8_t *ccder_encode_raw_octet_string(size_t s_size, const uint8_t *s,
+                                       const uint8_t *der, uint8_t *der_end);
+
+/* ccder_encode_body COPIES the body into the der.
+   It's inefficient â€“ especially when you already have to convert to get to
+   the form for the body.
+   see encode integer for the right way to unify conversion and insertion */
+inline CC_NONNULL3
+uint8_t *
+ccder_encode_body(size_t size, const uint8_t* body,
+                  const uint8_t *der, uint8_t *der_end);
+
+#pragma mark ccder_decode_ functions.
+
+/* Returns a pointer to the start of the length field, and returns the decoded tag in tag.
+ returns NULL if there is a decoding error. */
+inline CC_NONNULL((1,3))
+const uint8_t *ccder_decode_tag(ccder_tag *tagp, const uint8_t *der, const uint8_t *der_end);
+
+inline CC_NONNULL((1,3))
+const uint8_t *ccder_decode_len(size_t *lenp, const uint8_t *der, const uint8_t *der_end);
+
+/* Returns a pointer to the start of the der object, and returns the length in len.
+ returns NULL if there is a decoding error. */
+inline CC_NONNULL((2,4))
+const uint8_t *ccder_decode_tl(ccder_tag expected_tag, size_t *lenp,
+                               const uint8_t *der, const uint8_t *der_end);
+
+inline CC_NONNULL((2,3))
+const uint8_t *
+ccder_decode_constructed_tl(ccder_tag expected_tag, const uint8_t **body_end,
+                            const uint8_t *der, const uint8_t *der_end);
+
+inline CC_NONNULL((1,3))
+const uint8_t *
+ccder_decode_sequence_tl(const uint8_t **body_end,
+                         const uint8_t *der, const uint8_t *der_end);
+
+inline CC_NONNULL((2,4))
+const uint8_t *ccder_decode_uint(cc_size n, cc_unit *r,
+                                 const uint8_t *der, const uint8_t *der_end);
+
+inline CC_NONNULL((1,3))
+const uint8_t *ccder_decode_uint64(uint64_t* r,
+                                   const uint8_t *der, const uint8_t *der_end);
+
+/* Decode SEQUENCE { r, s -- (unsigned)integer } in der into r and s.
+   Returns NULL on decode errors, returns pointer just past the end of the
+   sequence of integers otherwise. */
+inline CC_NONNULL((2,3,5))
+const uint8_t *ccder_decode_seqii(cc_size n, cc_unit *r, cc_unit *s,
+                                  const uint8_t *der, const uint8_t *der_end);
+inline CC_NONNULL_ALL
+const uint8_t *ccder_decode_oid(ccoid_t *oidp,
+                                const uint8_t *der, const uint8_t *der_end);
+
+#ifndef CCDER_MULTIBYTE_TAGS
+#include <corecrypto/ccder_decode_constructed_tl.h>
+#include <corecrypto/ccder_decode_len.h>
+#include <corecrypto/ccder_decode_oid.h>
+#include <corecrypto/ccder_decode_seqii.h>
+#include <corecrypto/ccder_decode_sequence_tl.h>
+#include <corecrypto/ccder_decode_tag.h>
+#include <corecrypto/ccder_decode_tl.h>
+#include <corecrypto/ccder_decode_uint.h>
+#include <corecrypto/ccder_encode_body_nocopy.h>
+#include <corecrypto/ccder_encode_constructed_tl.h>
+#include <corecrypto/ccder_encode_implicit_integer.h>
+#include <corecrypto/ccder_encode_implicit_octet_string.h>
+#include <corecrypto/ccder_encode_implicit_uint64.h>
+#include <corecrypto/ccder_encode_integer.h>
+#include <corecrypto/ccder_encode_len.h>
+#include <corecrypto/ccder_encode_octet_string.h>
+#include <corecrypto/ccder_encode_oid.h>
+#include <corecrypto/ccder_encode_tag.h>
+#include <corecrypto/ccder_encode_tl.h>
+#include <corecrypto/ccder_encode_uint64.h>
+#include <corecrypto/ccder_sizeof.h>
+#include <corecrypto/ccder_sizeof_len.h>
+#include <corecrypto/ccder_sizeof_oid.h>
+#include <corecrypto/ccder_sizeof_tag.h>
+#endif
+
+#endif /* _CORECRYPTO_CCDER_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccdes.h b/EXTERNAL_HEADERS/corecrypto/ccdes.h
new file mode 100644 (file)
index 0000000..aff622b
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ *  ccdes.h
+ *  corecrypto
+ *
+ *  Created by Fabrice Gautier on 12/20/10.
+ *  Copyright 2010 Apple, Inc. All rights reserved.
+ *
+ */
+
+
+#ifndef _CORECRYPTO_CCDES_H_
+#define _CORECRYPTO_CCDES_H_
+
+#include <corecrypto/ccmode.h>
+
+#define CCDES_BLOCK_SIZE 8
+#define CCDES_KEY_SIZE 8
+
+extern const struct ccmode_ecb ccdes_ltc_ecb_decrypt_mode;
+extern const struct ccmode_ecb ccdes_ltc_ecb_encrypt_mode;
+
+extern const struct ccmode_ecb ccdes3_ltc_ecb_decrypt_mode;
+extern const struct ccmode_ecb ccdes3_ltc_ecb_encrypt_mode;
+extern const struct ccmode_ecb ccdes168_ltc_ecb_encrypt_mode;
+
+const struct ccmode_ecb *ccdes_ecb_decrypt_mode(void);
+const struct ccmode_ecb *ccdes_ecb_encrypt_mode(void);
+
+const struct ccmode_cbc *ccdes_cbc_decrypt_mode(void);
+const struct ccmode_cbc *ccdes_cbc_encrypt_mode(void);
+
+const struct ccmode_cfb *ccdes_cfb_decrypt_mode(void);
+const struct ccmode_cfb *ccdes_cfb_encrypt_mode(void);
+
+const struct ccmode_cfb8 *ccdes_cfb8_decrypt_mode(void);
+const struct ccmode_cfb8 *ccdes_cfb8_encrypt_mode(void);
+
+const struct ccmode_ctr *ccdes_ctr_crypt_mode(void);
+
+const struct ccmode_ofb *ccdes_ofb_crypt_mode(void);
+
+
+const struct ccmode_ecb *ccdes3_ecb_decrypt_mode(void);
+const struct ccmode_ecb *ccdes3_ecb_encrypt_mode(void);
+
+const struct ccmode_cbc *ccdes3_cbc_decrypt_mode(void);
+const struct ccmode_cbc *ccdes3_cbc_encrypt_mode(void);
+
+const struct ccmode_cfb *ccdes3_cfb_decrypt_mode(void);
+const struct ccmode_cfb *ccdes3_cfb_encrypt_mode(void);
+
+const struct ccmode_cfb8 *ccdes3_cfb8_decrypt_mode(void);
+const struct ccmode_cfb8 *ccdes3_cfb8_encrypt_mode(void);
+
+const struct ccmode_ctr *ccdes3_ctr_crypt_mode(void);
+
+const struct ccmode_ofb *ccdes3_ofb_crypt_mode(void);
+
+int ccdes_key_is_weak( void *key, unsigned long  length);
+void ccdes_key_set_odd_parity(void *key, unsigned long length);
+
+uint32_t
+ccdes_cbc_cksum(void *in, void *out, unsigned long length,
+                void *key, unsigned long keylen, void *ivec);
+
+
+#endif /* _CORECRYPTO_CCDES_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccdigest.h b/EXTERNAL_HEADERS/corecrypto/ccdigest.h
new file mode 100644 (file)
index 0000000..7aa8ada
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ *  ccdigest.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 11/30/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCDIGEST_H_
+#define _CORECRYPTO_CCDIGEST_H_
+
+#include <corecrypto/cc.h>
+#include <corecrypto/ccn.h>
+
+/* To malloc a digest context for a given di, use malloc(ccdigest_di_size(di))
+   and assign the result to a pointer to a struct ccdigest_ctx. */
+struct ccdigest_ctx {
+    union {
+        uint8_t u8;
+        uint32_t u32;
+        uint64_t u64;
+        cc_unit ccn;
+    } state;
+} __attribute((aligned(8)));
+
+typedef union {
+    struct ccdigest_ctx *hdr;
+} ccdigest_ctx_t __attribute__((transparent_union));
+
+struct ccdigest_state {
+    union {
+        uint8_t u8;
+        uint32_t u32;
+        uint64_t u64;
+        cc_unit ccn;
+    } state;
+} __attribute((aligned(8)));
+
+typedef union {
+    struct ccdigest_state *hdr;
+    struct ccdigest_ctx *_ctx;
+    ccdigest_ctx_t _ctxt;
+} ccdigest_state_t __attribute__((transparent_union));
+
+struct ccdigest_info {
+    unsigned long output_size;
+    unsigned long state_size;
+    unsigned long block_size;
+    unsigned long oid_size;
+    unsigned char *oid;
+    const void *initial_state;
+    void(*compress)(ccdigest_state_t state, unsigned long nblocks,
+                    const void *data);
+    void(*final)(const struct ccdigest_info *di, ccdigest_ctx_t ctx,
+                 unsigned char *digest);
+};
+
+/* Return sizeof a ccdigest_ctx for a given size_t _state_size_ and
+   size_t _block_size_. */
+#define ccdigest_ctx_size(_state_size_, _block_size_)  ((_state_size_) + sizeof(uint64_t) + (_block_size_) + sizeof(unsigned int))
+/* Return sizeof a ccdigest_ctx for a given struct ccdigest_info *_di_. */
+#define ccdigest_di_size(_di_)  (ccdigest_ctx_size((_di_)->state_size, (_di_)->block_size))
+
+/* Declare a ccdigest_ctx for a given size_t _state_size_ and
+   size_t _block_size_, named _name_.  Can be used in structs or on the
+   stack. */
+#define ccdigest_ctx_decl(_state_size_, _block_size_, _name_)  cc_ctx_decl(struct ccdigest_ctx, ccdigest_ctx_size(_state_size_, _block_size_), _name_)
+#define ccdigest_ctx_clear(_state_size_, _block_size_, _name_) cc_ctx_clear(struct ccdigest_ctx, ccdigest_ctx_size(_state_size_, _block_size_), _name_)
+/* Declare a ccdigest_ctx for a given size_t _state_size_ and
+   size_t _block_size_, named _name_.  Can be used on the stack. */
+#define ccdigest_di_decl(_di_, _name_)  cc_ctx_decl(struct ccdigest_ctx, ccdigest_di_size(_di_), _name_)
+#define ccdigest_di_clear(_di_, _name_) cc_ctx_clear(struct ccdigest_ctx, ccdigest_di_size(_di_), _name_)
+
+/* Digest context field accessors.  Consider the implementation private. */
+#define ccdigest_state(_di_, _ctx_)      ((ccdigest_state_t)(_ctx_))
+#define ccdigest_state_u8(_di_, _ctx_)   (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8)
+#define ccdigest_state_u32(_di_, _ctx_)  (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u32)
+#define ccdigest_state_u64(_di_, _ctx_)  (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u64)
+#define ccdigest_state_ccn(_di_, _ctx_)  (&((ccdigest_ctx_t)(_ctx_)).hdr->state.ccn)
+#define ccdigest_nbits(_di_, _ctx_)      (((uint64_t *)(&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8 + (_di_)->state_size))[0])
+#define ccdigest_data(_di_, _ctx_)       (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8 + (_di_)->state_size + sizeof(uint64_t))
+#define ccdigest_num(_di_, _ctx_)        (((unsigned int *)(&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8 + (_di_)->state_size + sizeof(uint64_t) + (_di_)->block_size))[0])
+
+/* Digest state field accessors.  Consider the implementation private. */
+#define ccdigest_u8(_state_)             (&((ccdigest_state_t)(_state_)).hdr->state.u8)
+#define ccdigest_u32(_state_)            (&((ccdigest_state_t)(_state_)).hdr->state.u32)
+#define ccdigest_u64(_state_)            (&((ccdigest_state_t)(_state_)).hdr->state.u64)
+#define ccdigest_ccn(_state_)            (&((ccdigest_state_t)(_state_)).hdr->state.ccn)
+
+/* We could just use memcpy instead of this special macro, but this allows us
+   to use the optimized ccn_set() assembly routine if we have one, which for
+   32 bit arm is about 200% quicker than generic memcpy(). */
+#if CCN_SET_ASM && CCN_UNIT_SIZE <= 4
+#define ccdigest_copy_state(_di_, _dst_, _src_) ccn_set((_di_)->state_size / CCN_UNIT_SIZE, _dst_, _src_)
+#else
+#define ccdigest_copy_state(_di_, _dst_, _src_) CC_MEMCPY(_dst_, _src_, (_di_)->state_size)
+#endif
+
+void ccdigest_init(const struct ccdigest_info *di, ccdigest_ctx_t ctx);
+void ccdigest_update(const struct ccdigest_info *di, ccdigest_ctx_t ctx,
+                     unsigned long len, const void *data);
+
+CC_INLINE
+void ccdigest_final(const struct ccdigest_info *di, ccdigest_ctx_t ctx, unsigned char *digest)
+{
+    di->final(di,ctx,digest);
+}
+
+void ccdigest(const struct ccdigest_info *di, unsigned long len,
+              const void *data, void *digest);
+
+/* test functions */
+int ccdigest_test(const struct ccdigest_info *di, unsigned long len,
+              const void *data, const void *digest);
+
+int ccdigest_test_chunk(const struct ccdigest_info *di, unsigned long len,
+                        const void *data, const void *digest, unsigned long chunk);
+
+struct ccdigest_vector {
+    unsigned long len;
+    const void *message;
+    const void *digest;
+};
+
+int ccdigest_test_vector(const struct ccdigest_info *di, const struct ccdigest_vector *v);
+int ccdigest_test_chunk_vector(const struct ccdigest_info *di, const struct ccdigest_vector *v, unsigned long chunk);
+
+#endif /* _CORECRYPTO_CCDIGEST_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/cchmac.h b/EXTERNAL_HEADERS/corecrypto/cchmac.h
new file mode 100644 (file)
index 0000000..b6fd0dc
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ *  cchmac.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 12/7/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCHMAC_H_
+#define _CORECRYPTO_CCHMAC_H_
+
+#include <corecrypto/cc.h>
+#include <corecrypto/ccdigest.h>
+
+/* An hmac_ctx_t is normally allocated as an array of these. */
+struct cchmac_ctx {
+    uint8_t b[8];
+} __attribute__((aligned(8)));
+
+typedef union {
+    struct cchmac_ctx *hdr;
+    ccdigest_ctx_t digest;
+} cchmac_ctx_t __attribute__((transparent_union));
+
+#define cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE)  (ccdigest_ctx_size(STATE_SIZE, BLOCK_SIZE) + (STATE_SIZE))
+#define cchmac_di_size(_di_)  (cchmac_ctx_size((_di_)->state_size, (_di_)->block_size))
+
+#define cchmac_ctx_n(STATE_SIZE, BLOCK_SIZE)  ccn_nof_size(cchmac_ctx_size((STATE_SIZE), (BLOCK_SIZE)))
+
+#define cchmac_ctx_decl(STATE_SIZE, BLOCK_SIZE, _name_) cc_ctx_decl(struct cchmac_ctx, cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_)
+#define cchmac_ctx_clear(STATE_SIZE, BLOCK_SIZE, _name_) cc_ctx_clear(struct cchmac_ctx, cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_)
+#define cchmac_di_decl(_di_, _name_) cchmac_ctx_decl((_di_)->state_size, (_di_)->block_size, _name_)
+#define cchmac_di_clear(_di_, _name_) cchmac_ctx_clear((_di_)->state_size, (_di_)->block_size, _name_)
+
+/* Return a ccdigest_ctx_t which can be accesed with the macros in ccdigest.h */
+#define cchmac_digest_ctx(_di_, HC)    (((cchmac_ctx_t)(HC)).digest)
+
+/* Accesors for ostate fields, this is all cchmac_ctx_t adds to the ccdigest_ctx_t. */
+#define cchmac_ostate(_di_, HC)    ((struct ccdigest_state *)(((cchmac_ctx_t)(HC)).hdr->b + ccdigest_di_size(_di_)))
+#define cchmac_ostate8(_di_, HC)   (ccdigest_u8(cchmac_ostate(_di_, HC)))
+#define cchmac_ostate32(_di_, HC)  (ccdigest_u32(cchmac_ostate(_di_, HC)))
+#define cchmac_ostate64(_di_, HC)  (ccdigest_u64(cchmac_ostate(_di_, HC)))
+#define cchmac_ostateccn(_di_, HC) (ccdigest_ccn(cchmac_ostate(_di_, HC)))
+
+/* Convenience accessors for ccdigest_ctx_t fields. */
+#define cchmac_istate(_di_, HC)    ((ccdigest_state_t)(((cchmac_ctx_t)(HC)).digest))
+#define cchmac_istate8(_di_, HC)   (ccdigest_u8(cchmac_istate(_di_, HC)))
+#define cchmac_istate32(_di_, HC)  (ccdigest_u32(cchmac_istate(_di_, HC)))
+#define cchmac_istate64(_di_, HC)  (ccdigest_u64(cchmac_istate(_di_, HC)))
+#define cchmac_istateccn(_di_, HC) (ccdigest_ccn(cchmac_istate(_di_, HC)))
+#define cchmac_data(_di_, HC)      ccdigest_data(_di_, ((cchmac_ctx_t)(HC)).digest)
+#define cchmac_num(_di_, HC)       ccdigest_num(_di_, ((cchmac_ctx_t)(HC)).digest)
+#define cchmac_nbits(_di_, HC)     ccdigest_nbits(_di_, ((cchmac_ctx_t)(HC)).digest)
+
+void cchmac_init(const struct ccdigest_info *di, cchmac_ctx_t ctx,
+                 unsigned long key_len, const void *key);
+void cchmac_update(const struct ccdigest_info *di, cchmac_ctx_t ctx,
+                   unsigned long data_len, const void *data);
+void cchmac_final(const struct ccdigest_info *di, cchmac_ctx_t ctx,
+                  unsigned char *mac);
+
+void cchmac(const struct ccdigest_info *di, unsigned long key_len,
+            const void *key, unsigned long data_len, const void *data,
+            unsigned char *mac);
+
+/* Test functions */
+
+struct cchmac_test_input {
+    const struct ccdigest_info *di;
+    unsigned long key_len;
+    const void *key;
+    unsigned long data_len;
+    const void *data;
+    unsigned long mac_len;
+    const void *expected_mac;
+};
+
+int cchmac_test(const struct cchmac_test_input *input);
+int cchmac_test_chunks(const struct cchmac_test_input *input, unsigned long chunk_size);
+
+
+#endif /* _CORECRYPTO_CCHMAC_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccmd5.h b/EXTERNAL_HEADERS/corecrypto/ccmd5.h
new file mode 100644 (file)
index 0000000..1285225
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ *  ccmd5.h
+ *  corecrypto
+ *
+ *  Created by Fabrice Gautier on 12/3/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCMD5_H_
+#define _CORECRYPTO_CCMD5_H_
+
+#include <corecrypto/ccdigest.h>
+
+#define CCMD5_BLOCK_SIZE   64
+#define CCMD5_OUTPUT_SIZE  16
+#define CCMD5_STATE_SIZE   16
+
+extern const uint32_t ccmd5_initial_state[4];
+
+/* Selector */
+const struct ccdigest_info *ccmd5_di(void);
+
+/* Implementations */
+extern const struct ccdigest_info ccmd5_ltc_di;
+
+#endif /* _CORECRYPTO_CCMD5_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode.h b/EXTERNAL_HEADERS/corecrypto/ccmode.h
new file mode 100644 (file)
index 0000000..3224069
--- /dev/null
@@ -0,0 +1,469 @@
+/*
+ *  ccmode.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 12/6/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCMODE_H_
+#define _CORECRYPTO_CCMODE_H_
+
+#include <corecrypto/cc.h>
+#include <corecrypto/ccmode_impl.h>
+
+/* ECB mode. */
+
+/* Declare a ecb key named _name_.  Pass the size field of a struct ccmode_ecb
+   for _size_. */
+#define ccecb_ctx_decl(_size_, _name_) cc_ctx_decl(ccecb_ctx, _size_, _name_)
+#define ccecb_ctx_clear(_size_, _name_) cc_ctx_clear(ccecb_ctx, _size_, _name_)
+
+CC_INLINE size_t ccecb_context_size(const struct ccmode_ecb *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long ccecb_block_size(const struct ccmode_ecb *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void ccecb_init(const struct ccmode_ecb *mode, ccecb_ctx *ctx,
+                           unsigned long key_len, const void *key)
+{
+    mode->init(mode, ctx, key_len, key);
+}
+
+CC_INLINE void ccecb_update(const struct ccmode_ecb *mode, const ccecb_ctx *ctx,
+       unsigned long in_len, const void *in, void *out)
+{
+       unsigned long numBlocks = (in_len / mode->block_size);
+       mode->ecb(ctx, numBlocks, in, out);
+}
+
+CC_INLINE void ccecb_one_shot(const struct ccmode_ecb *mode,
+       unsigned long key_len, const void *key, unsigned long in_len,
+       const void *in, void *out)
+{
+       unsigned long numBlocks = (in_len / mode->block_size);
+       ccecb_ctx_decl(mode->size, ctx);
+       mode->init(mode, ctx, key_len, key);
+       mode->ecb(ctx, numBlocks, in, out);
+       ccecb_ctx_clear(mode->size, ctx);
+}
+
+/* CBC mode. */
+
+/* The CBC interface changed due to rdar://11468135. This macros is to indicate 
+   to client which CBC API is implemented. Clients can support old versions of
+   corecrypto at build time using this.
+ */
+#define __CC_HAS_FIX_FOR_11468135__ 1
+
+/* Declare a cbc key named _name_.  Pass the size field of a struct ccmode_cbc
+   for _size_. */
+#define cccbc_ctx_decl(_size_, _name_) cc_ctx_decl(cccbc_ctx, _size_, _name_)
+#define cccbc_ctx_clear(_size_, _name_) cc_ctx_clear(cccbc_ctx, _size_, _name_)
+
+/* Declare a cbc iv tweak named _name_.  Pass the blocksize field of a struct ccmode_cbc
+   for _size_. */
+#define cccbc_iv_decl(_size_, _name_) cc_ctx_decl(cccbc_iv, _size_, _name_)
+#define cccbc_iv_clear(_size_, _name_) cc_ctx_clear(cccbc_iv, _size_, _name_)
+
+/* Actual symmetric algorithm implementation can provide you one of these.
+
+   Alternatively you can create a ccmode_cbc instance from any ccmode_ecb
+   cipher.  To do so, statically initialize a struct ccmode_cbc using the
+   CCMODE_FACTORY_CBC_DECRYPT or CCMODE_FACTORY_CBC_ENCRYPT macros. Alternatively
+   you can dynamically initialize a struct ccmode_cbc ccmode_factory_cbc_decrypt()
+   or ccmode_factory_cbc_encrypt(). */
+
+CC_INLINE size_t cccbc_context_size(const struct ccmode_cbc *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long cccbc_block_size(const struct ccmode_cbc *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void cccbc_init(const struct ccmode_cbc *mode, cccbc_ctx *ctx,
+                           unsigned long key_len, const void *key)
+{
+    mode->init(mode, ctx, key_len, key);
+}
+
+CC_INLINE void cccbc_set_iv(const struct ccmode_cbc *mode, cccbc_iv *iv_ctx, const void *iv)
+{
+    if(iv)
+        cc_copy(mode->block_size, iv_ctx, iv);
+    else
+        cc_zero(mode->block_size, iv_ctx);
+}
+
+CC_INLINE void cccbc_update(const struct ccmode_cbc *mode,  cccbc_ctx *ctx, cccbc_iv *iv,
+       unsigned long nblocks, const void *in, void *out)
+{
+       mode->cbc(ctx, iv, nblocks, in, out);
+}
+
+CC_INLINE void cccbc_one_shot(const struct ccmode_cbc *mode,
+       unsigned long key_len, const void *key, const void *iv, unsigned long nblocks,
+       const void *in, void *out)
+{
+       cccbc_ctx_decl(mode->size, ctx);
+       cccbc_iv_decl(mode->block_size, iv_ctx);
+       mode->init(mode, ctx, key_len, key);
+    if(iv)
+        cccbc_set_iv      (mode, iv_ctx, iv);
+    else
+        cc_zero(mode->block_size, iv_ctx);
+    mode->cbc(ctx, iv_ctx, nblocks, in, out);
+       cccbc_ctx_clear(mode->size, ctx);
+}
+
+/* CFB mode. */
+
+/* Declare a cfb key named _name_.  Pass the size field of a struct ccmode_cfb
+ for _size_. */
+#define cccfb_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb_ctx, _size_, _name_)
+#define cccfb_ctx_clear(_size_, _name_) cc_ctx_clear(cccfb_ctx, _size_, _name_)
+
+CC_INLINE size_t cccfb_context_size(const struct ccmode_cfb *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long cccfb_block_size(const struct ccmode_cfb *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void cccfb_init(const struct ccmode_cfb *mode, cccfb_ctx *ctx,
+                           unsigned long key_len, const void *key, const void *iv)
+{
+    mode->init(mode, ctx, key_len, key, iv);
+}
+
+CC_INLINE void cccfb_update(const struct ccmode_cfb *mode, cccfb_ctx *ctx,
+       unsigned long in_len, const void *in, void *out)
+{
+       mode->cfb(ctx, in_len, in, out);
+}
+
+CC_INLINE void cccfb_one_shot(const struct ccmode_cfb *mode,
+       unsigned long key_len, const void *key, const void *iv,
+    unsigned long in_len, const void *in, void *out)
+{
+       cccfb_ctx_decl(mode->size, ctx);
+       mode->init(mode, ctx, key_len, key, iv);
+       mode->cfb(ctx, in_len, in, out);
+       cccfb_ctx_clear(mode->size, ctx);
+}
+
+/* CFB8 mode. */
+
+/* Declare a cfb8 key named _name_.  Pass the size field of a struct ccmode_cfb8
+ for _size_. */
+#define cccfb8_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb8_ctx, _size_, _name_)
+#define cccfb8_ctx_clear(_size_, _name_) cc_ctx_clear(cccfb8_ctx, _size_, _name_)
+
+CC_INLINE size_t cccfb8_context_size(const struct ccmode_cfb8 *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long cccfb8_block_size(const struct ccmode_cfb8 *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void cccfb8_init(const struct ccmode_cfb8 *mode, cccfb8_ctx *ctx,
+                           unsigned long key_len, const void *key, const void *iv)
+{
+    mode->init(mode, ctx, key_len, key, iv);
+}
+
+CC_INLINE void cccfb8_update(const struct ccmode_cfb8 *mode,  cccfb8_ctx *ctx,
+       unsigned long in_len, const void *in, void *out)
+{
+       mode->cfb8(ctx, in_len, in, out);
+}
+
+CC_INLINE void cccfb8_one_shot(const struct ccmode_cfb8 *mode,
+       unsigned long key_len, const void *key, const void *iv,
+    unsigned long in_len, const void *in, void *out)
+{
+       cccfb8_ctx_decl(mode->size, ctx);
+       mode->init(mode, ctx, key_len, key, iv);
+       mode->cfb8(ctx, in_len, in, out);
+       cccfb8_ctx_clear(mode->size, ctx);
+}
+
+/* CTR mode. */
+
+/* Declare a ctr key named _name_.  Pass the size field of a struct ccmode_ctr
+ for _size_. */
+#define ccctr_ctx_decl(_size_, _name_) cc_ctx_decl(ccctr_ctx, _size_, _name_)
+#define ccctr_ctx_clear(_size_, _name_) cc_ctx_clear(ccctr_ctx, _size_, _name_)
+
+/* This is Integer Counter Mode: The IV is the initial value of the counter
+ that is incremented by 1 for each new block. Use the mode flags to select
+ if the IV/Counter is stored in big or little endian. */
+
+CC_INLINE size_t ccctr_context_size(const struct ccmode_ctr *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long ccctr_block_size(const struct ccmode_ctr *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void ccctr_init(const struct ccmode_ctr *mode, ccctr_ctx *ctx,
+                           unsigned long key_len, const void *key, const void *iv)
+{
+    mode->init(mode, ctx, key_len, key, iv);
+}
+
+CC_INLINE void ccctr_update(const struct ccmode_ctr *mode, ccctr_ctx *ctx,
+       unsigned long in_len, const void *in, void *out)
+{
+       unsigned long numBlocks = (in_len / mode->block_size);
+       mode->ctr(ctx, numBlocks, in, out);
+}
+
+CC_INLINE void ccctr_one_shot(const struct ccmode_ctr *mode,
+       unsigned long key_len, const void *key, const void *iv,
+    unsigned long in_len, const void *in, void *out)
+{
+       unsigned long numBlocks = (in_len / mode->block_size);
+       ccctr_ctx_decl(mode->size, ctx);
+       mode->init(mode, ctx, key_len, key, iv);
+       mode->ctr(ctx, numBlocks, in, out);
+       ccctr_ctx_clear(mode->size, ctx);
+}
+
+
+/* OFB mode. */
+
+/* Declare a ofb key named _name_.  Pass the size field of a struct ccmode_ofb
+ for _size_. */
+#define ccofb_ctx_decl(_size_, _name_) cc_ctx_decl(ccofb_ctx, _size_, _name_)
+#define ccofb_ctx_clear(_size_, _name_) cc_ctx_clear(ccofb_ctx, _size_, _name_)
+
+CC_INLINE size_t ccofb_context_size(const struct ccmode_ofb *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long ccofb_block_size(const struct ccmode_ofb *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void ccofb_init(const struct ccmode_ofb *mode, ccofb_ctx *ctx,
+                           unsigned long key_len, const void *key, const void *iv)
+{
+    mode->init(mode, ctx, key_len, key, iv);
+}
+
+CC_INLINE void ccofb_update(const struct ccmode_ofb *mode, ccofb_ctx *ctx,
+       unsigned long in_len, const void *in, void *out)
+{
+       mode->ofb(ctx, in_len, in, out);
+}
+
+CC_INLINE void ccofb_one_shot(const struct ccmode_ofb *mode,
+       unsigned long key_len, const void *key, const void *iv,
+    unsigned long in_len, const void *in, void *out)
+{
+       ccofb_ctx_decl(mode->size, ctx);
+       mode->init(mode, ctx, key_len, key, iv);
+       mode->ofb(ctx, in_len, in, out);
+       ccofb_ctx_clear(mode->size, ctx);
+}
+
+/* Authenticated cipher modes. */
+
+/* XTS mode. */
+
+/* Declare a xts key named _name_.  Pass the size field of a struct ccmode_xts
+ for _size_. */
+#define ccxts_ctx_decl(_size_, _name_) cc_ctx_decl(ccxts_ctx, _size_, _name_)
+#define ccxts_ctx_clear(_size_, _name_) cc_ctx_clear(ccxts_ctx, _size_, _name_)
+
+/* Declare a xts tweak named _name_.  Pass the tweak_size field of a struct ccmode_xts
+ for _size_. */
+#define ccxts_tweak_decl(_size_, _name_) cc_ctx_decl(ccxts_tweak, _size_, _name_)
+#define ccxts_tweak_clear(_size_, _name_) cc_ctx_clear(ccxts_tweak, _size_, _name_)
+
+/* Actual symmetric algorithm implementation can provide you one of these.
+
+ Alternatively you can create a ccmode_xts instance from any ccmode_ecb
+ cipher.  To do so, statically initialize a struct ccmode_xts using the
+ CCMODE_FACTORY_XTS_DECRYPT or CCMODE_FACTORY_XTS_ENCRYPT macros. Alternatively
+ you can dynamically initialize a struct ccmode_xts ccmode_factory_xts_decrypt()
+ or ccmode_factory_xts_encrypt(). */
+
+/* NOTE that xts mode does not do cts padding.  It's really an xex mode.
+   If you need cts padding use the ccpad_xts_encrypt and ccpad_xts_decrypt
+   functions.   Also note that xts only works for ecb modes with a block_size
+   of 16.  */
+
+CC_INLINE size_t ccxts_context_size(const struct ccmode_xts *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long ccxts_block_size(const struct ccmode_xts *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void ccxts_init(const struct ccmode_xts *mode, ccxts_ctx *ctx,
+                           unsigned long key_len, const void *key, const void *tweak_key)
+{
+    mode->init(mode, ctx, key_len, key, tweak_key);
+}
+
+CC_INLINE void ccxts_set_tweak(const struct ccmode_xts *mode, ccxts_ctx *ctx, ccxts_tweak *tweak, const void *iv)
+{
+       mode->set_tweak(ctx, tweak, iv);
+}
+
+CC_INLINE void *ccxts_update(const struct ccmode_xts *mode, ccxts_ctx *ctx,
+       ccxts_tweak *tweak, unsigned long in_len, const void *in, void *out)
+{
+       return mode->xts(ctx, tweak, in_len, in, out);
+}
+
+CC_INLINE void ccxts_one_shot(const struct ccmode_xts *mode,
+       unsigned long key_len, const void *key, const void *tweak_key,
+    const void* iv,
+       unsigned long in_len, const void *in, void *out)
+{
+       ccxts_ctx_decl(mode->size, ctx);
+    ccxts_tweak_decl(mode->tweak_size, tweak);
+       mode->init(mode, ctx, key_len, key, tweak_key);
+    mode->set_tweak(ctx, tweak, iv);
+       mode->xts(ctx, tweak, in_len, in, out);
+       ccxts_ctx_clear(mode->size, ctx);
+    ccxts_tweak_clear(mode->tweak_size, tweak);
+}
+
+/* GCM mode. */
+
+/* Declare a gcm key named _name_.  Pass the size field of a struct ccmode_gcm
+ for _size_. */
+#define ccgcm_ctx_decl(_size_, _name_) cc_ctx_decl(ccgcm_ctx, _size_, _name_)
+#define ccgcm_ctx_clear(_size_, _name_) cc_ctx_clear(ccgcm_ctx, _size_, _name_)
+
+CC_INLINE size_t ccgcm_context_size(const struct ccmode_gcm *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long ccgcm_block_size(const struct ccmode_gcm *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void ccgcm_init(const struct ccmode_gcm *mode, ccgcm_ctx *ctx,
+                           unsigned long key_len, const void *key)
+{
+    mode->init(mode, ctx, key_len, key);
+}
+
+CC_INLINE void ccgcm_set_iv(const struct ccmode_gcm *mode, ccgcm_ctx *ctx, size_t iv_size, const void *iv)
+{
+       mode->set_iv(ctx, iv_size, iv);
+}
+
+CC_INLINE void ccgcm_gmac(const struct ccmode_gcm *mode, ccgcm_ctx *ctx,
+       unsigned long nbytes, const void *in)
+{
+       mode->gmac(ctx, nbytes, in);
+}
+
+CC_INLINE void ccgcm_update(const struct ccmode_gcm *mode, ccgcm_ctx *ctx,
+       unsigned long nbytes, const void *in, void *out)
+{
+       mode->gcm(ctx, nbytes, in, out);
+}
+
+CC_INLINE void ccgcm_finalize(const struct ccmode_gcm *mode, ccgcm_ctx *ctx,
+       size_t tag_size, void *tag)
+{
+       mode->finalize(ctx, tag_size, tag);
+}
+
+CC_INLINE void ccgcm_reset(const struct ccmode_gcm *mode, ccgcm_ctx *ctx)
+{
+    mode->reset(ctx);
+}
+
+
+CC_INLINE void ccgcm_one_shot(const struct ccmode_gcm *mode,
+       unsigned long key_len, const void *key,
+       unsigned long iv_len, const void *iv,
+       unsigned long nbytes, const void *in, void *out,
+       unsigned long adata_len, const void* adata,
+       size_t tag_len, void *tag)
+{
+       ccgcm_ctx_decl(mode->size, ctx);
+       mode->init(mode, ctx, key_len, key);
+       mode->set_iv(ctx, iv_len, iv);
+       mode->gmac(ctx, adata_len, adata);
+       mode->gcm(ctx, nbytes, in, out);
+       mode->finalize(ctx, tag_len, tag);
+       ccgcm_ctx_clear(mode->size, ctx);
+}
+
+/* OMAC mode. */
+
+
+/* Declare a omac key named _name_.  Pass the size field of a struct ccmode_omac
+ for _size_. */
+#define ccomac_ctx_decl(_size_, _name_) cc_ctx_decl(ccomac_ctx, _size_, _name_)
+#define ccomac_ctx_clear(_size_, _name_) cc_ctx_clear(ccomac_ctx, _size_, _name_)
+
+CC_INLINE size_t ccomac_context_size(const struct ccmode_omac *mode)
+{
+    return mode->size;
+}
+
+CC_INLINE unsigned long ccomac_block_size(const struct ccmode_omac *mode)
+{
+       return mode->block_size;
+}
+
+CC_INLINE void ccomac_init(const struct ccmode_omac *mode, ccomac_ctx *ctx,
+                           unsigned long tweak_len, unsigned long key_len, const void *key)
+{
+    return mode->init(mode, ctx, tweak_len, key_len, key);
+}
+
+CC_INLINE int ccomac_update(const struct ccmode_omac *mode, ccomac_ctx *ctx,
+       unsigned long in_len, const void *tweak, const void *in, void *out)
+{
+       return mode->omac(ctx, in_len, tweak, in, out);
+}
+
+CC_INLINE int ccomac_one_shot(const struct ccmode_omac *mode,
+       unsigned long tweak_len, unsigned long key_len, const void *key,
+       const void *tweak, unsigned long in_len, const void *in, void *out)
+{
+       ccomac_ctx_decl(mode->size, ctx);
+       mode->init(mode, ctx, tweak_len, key_len, key);
+       int result = mode->omac(ctx, in_len, tweak, in, out);
+       ccomac_ctx_clear(mode->size, ctx);
+    return result;
+}
+
+
+#endif /* _CORECRYPTO_CCMODE_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h b/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h
new file mode 100644 (file)
index 0000000..3233c99
--- /dev/null
@@ -0,0 +1,571 @@
+/*
+ *  ccmode_factory.h
+ *  corecrypto
+ *
+ *  Created by Fabrice Gautier on 1/21/11.
+ *  Copyright 2011 Apple, Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCMODE_FACTORY_H_
+#define _CORECRYPTO_CCMODE_FACTORY_H_
+
+#include <corecrypto/ccn.h>  /* TODO: Remove dependancy on this header. */
+#include <corecrypto/ccmode_impl.h>
+
+/* For CBC, direction of underlying ecb is the same as the cbc direction */
+#define CCMODE_CBC_FACTORY(_cipher_, _dir_)                                     \
+static struct ccmode_cbc cbc_##_cipher_##_##_dir_;                              \
+                                                                                \
+const struct ccmode_cbc *cc##_cipher_##_cbc_##_dir_##_mode(void)                \
+{                                                                               \
+    const struct ccmode_ecb *ecb=cc##_cipher_##_ecb_##_dir_##_mode();           \
+    ccmode_factory_cbc_##_dir_(&cbc_##_cipher_##_##_dir_, ecb);                 \
+    return &cbc_##_cipher_##_##_dir_;                                           \
+}
+
+/* For CTR, only one direction, underlying ecb is always encrypt */
+#define CCMODE_CTR_FACTORY(_cipher_)                                            \
+static struct ccmode_ctr ctr_##_cipher_;                                        \
+                                                                                \
+const struct ccmode_ctr *cc##_cipher_##_ctr_crypt_mode(void)                    \
+{                                                                               \
+    const struct ccmode_ecb *ecb=cc##_cipher_##_ecb_encrypt_mode();             \
+    ccmode_factory_ctr_crypt(&ctr_##_cipher_, ecb);                             \
+    return &ctr_##_cipher_;                                                     \
+}
+
+/* OFB, same as CTR */
+#define CCMODE_OFB_FACTORY(_cipher_)                                            \
+static struct ccmode_ofb ofb_##_cipher_;                                        \
+                                                                                \
+const struct ccmode_ofb *cc##_cipher_##_ofb_crypt_mode(void)                    \
+{                                                                               \
+    const struct ccmode_ecb *ecb=cc##_cipher_##_ecb_encrypt_mode();             \
+    ccmode_factory_ofb_crypt(&ofb_##_cipher_, ecb);                             \
+    return &ofb_##_cipher_;                                                     \
+}
+
+
+/* For CFB, the underlying ecb operation is encrypt for both directions */
+#define CCMODE_CFB_FACTORY(_cipher_, _mode_, _dir_)                             \
+static struct ccmode_##_mode_ _mode_##_##_cipher_##_##_dir_;                    \
+                                                                                \
+const struct ccmode_##_mode_ *cc##_cipher_##_##_mode_##_##_dir_##_mode(void)    \
+{                                                                               \
+    const struct ccmode_ecb *ecb=cc##_cipher_##_ecb_encrypt_mode();             \
+    ccmode_factory_##_mode_##_##_dir_(&_mode_##_##_cipher_##_##_dir_, ecb);     \
+    return &_mode_##_##_cipher_##_##_dir_;                                      \
+}
+
+/* For GCM, same as CFB */
+#define CCMODE_GCM_FACTORY(_cipher_, _dir_) CCMODE_CFB_FACTORY(_cipher_, gcm, _dir_)
+
+
+/* Fot XTS, you always need an ecb encrypt */
+#define CCMODE_XTS_FACTORY(_cipher_ , _dir_)                                    \
+static struct ccmode_xts xts##_cipher_##_##_dir_;                               \
+                                                                                \
+const struct ccmode_xts *cc##_cipher_##_xts_##_dir_##_mode(void)                \
+{                                                                               \
+    const struct ccmode_ecb *ecb=cc##_cipher_##_ecb_##_dir_##_mode();           \
+    const struct ccmode_ecb *ecb_enc=cc##_cipher_##_ecb_encrypt_mode();         \
+                                                                                \
+    ccmode_factory_xts_##_dir_(&xts##_cipher_##_##_dir_, ecb, ecb_enc);         \
+    return &xts##_cipher_##_##_dir_;                                            \
+}
+
+#if 0
+
+/* example of how to make the selection function thread safe */
+
+struct ccmode_cbc cc3des_cbc_mode_encrypt;
+dispatch_once_t cc3des_mode_encrypt_init_once;
+
+void cc3des_mode_encrypt_init(void *ctx) {
+    struct ccmode_ecb *ecb = cc3des_ecb_encrypt_mode();
+    ccmode_factory_cbc_encrypt(&cc3des_mode_encrypt, ecb);
+}
+
+const struct ccmode_cbc *cc3des_cbc_encrypt_mode(void) {
+    dispatch_once_f(&cc3des_mode_encrypt_init_once, NULL, cc3des_mode_encrypt_init);
+    return &cc3des_mode_encrypt;
+}
+
+struct ccmode_cbc cc3des_cbc_mode_encrypt = {
+    .n = CC3DES_LTC_ECB_ENCRYPT_N,
+    .init = ccmode_cbc_init,
+    .cbc = ccmode_cbc_encrypt,
+    .custom = &cc3des_ltc_ecb_encrypt
+};
+
+const struct ccmode_cbc *cc3des_cbc_encrypt_mode(void) {
+    return &cc3des_mode_encrypt;
+}
+
+#endif
+
+
+
+void *ccmode_cbc_init(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                      unsigned long rawkey_len, const void *rawkey,
+                      const void *iv);
+void *ccmode_cbc_decrypt(cccbc_ctx *ctx, unsigned long nblocks,
+                         const void *in, void *out);
+void *ccmode_cbc_encrypt(cccbc_ctx *ctx, unsigned long nblocks,
+                         const void *in, void *out);
+
+struct _ccmode_cbc_key {
+    const struct ccmode_ecb *ecb;
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_cbc object for decryption. */
+#define CCMODE_FACTORY_CBC_DECRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_cbc_key)) + ccn_sizeof_size((ECB)->block_size) + ccn_sizeof_size((ECB)->size), \
+.block_size = (ECB)->block_size, \
+.init = ccmode_cbc_init, \
+.cbc = ccmode_cbc_decrypt, \
+.custom = (ECB) \
+}
+
+/* Use this to statically initialize a ccmode_cbc object for encryption. */
+#define CCMODE_FACTORY_CBC_ENCRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_cbc_key)) + ccn_sizeof_size((ECB)->block_size) + ccn_sizeof_size((ECB)->size), \
+.block_size = (ECB)->block_size, \
+.init = ccmode_cbc_init, \
+.cbc = ccmode_cbc_encrypt, \
+.custom = (ECB) \
+}
+
+/* Use these function to runtime initialize a ccmode_cbc decrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb decrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_cbc_decrypt(struct ccmode_cbc *cbc,
+                                const struct ccmode_ecb *ecb) {
+    struct ccmode_cbc cbc_decrypt = CCMODE_FACTORY_CBC_DECRYPT(ecb);
+    *cbc = cbc_decrypt;
+}
+
+/* Use these function to runtime initialize a ccmode_cbc encrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_cbc_encrypt(struct ccmode_cbc *cbc,
+                                const struct ccmode_ecb *ecb) {
+    struct ccmode_cbc cbc_encrypt = CCMODE_FACTORY_CBC_ENCRYPT(ecb);
+    *cbc = cbc_encrypt;
+}
+
+
+void ccmode_cfb_init(const struct ccmode_cfb *cfb, cccfb_ctx *ctx,
+                     unsigned long rawkey_len, const void *rawkey,
+                     const void *iv);
+void ccmode_cfb_decrypt(cccfb_ctx *ctx, unsigned long nblocks,
+                        const void *in, void *out);
+void ccmode_cfb_encrypt(cccfb_ctx *ctx, unsigned long nblocks,
+                        const void *in, void *out);
+
+struct _ccmode_cfb_key {
+    const struct ccmode_ecb *ecb;
+    size_t pad_len;
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_cfb object for decryption. */
+#define CCMODE_FACTORY_CFB_DECRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_cfb_key)) + 2 * ccn_sizeof_size((ECB)->block_size) + ccn_sizeof_size((ECB)->size), \
+.block_size = 1, \
+.init = ccmode_cfb_init, \
+.cfb = ccmode_cfb_decrypt, \
+.custom = (ECB) \
+}
+
+/* Use this to statically initialize a ccmode_cfb object for encryption. */
+#define CCMODE_FACTORY_CFB_ENCRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_cfb_key)) + 2 * ccn_sizeof_size((ECB)->block_size) + ccn_sizeof_size((ECB)->size), \
+.block_size = 1, \
+.init = ccmode_cfb_init, \
+.cfb = ccmode_cfb_encrypt, \
+.custom = (ECB) \
+}
+
+/* Use these function to runtime initialize a ccmode_cfb decrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_cfb_decrypt(struct ccmode_cfb *cfb,
+                                const struct ccmode_ecb *ecb) {
+    struct ccmode_cfb cfb_decrypt = CCMODE_FACTORY_CFB_DECRYPT(ecb);
+    *cfb = cfb_decrypt;
+}
+
+/* Use these function to runtime initialize a ccmode_cfb encrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_cfb_encrypt(struct ccmode_cfb *cfb,
+                             const struct ccmode_ecb *ecb) {
+    struct ccmode_cfb cfb_encrypt = CCMODE_FACTORY_CFB_ENCRYPT(ecb);
+    *cfb = cfb_encrypt;
+}
+
+
+void ccmode_cfb8_init(const struct ccmode_cfb8 *cfb8, cccfb8_ctx *ctx,
+                      unsigned long rawkey_len, const void *rawkey,
+                      const void *iv);
+void ccmode_cfb8_decrypt(cccfb8_ctx *ctx, unsigned long nbytes,
+                         const void *in, void *out);
+void ccmode_cfb8_encrypt(cccfb8_ctx *ctx, unsigned long nbytes,
+                         const void *in, void *out);
+
+struct _ccmode_cfb8_key {
+    const struct ccmode_ecb *ecb;
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_cfb8 object for decryption. */
+#define CCMODE_FACTORY_CFB8_DECRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_cfb8_key)) + 2 * ccn_sizeof_size((ECB)->block_size) + ccn_sizeof_size((ECB)->size), \
+.block_size = 1, \
+.init = ccmode_cfb8_init, \
+.cfb8 = ccmode_cfb8_decrypt, \
+.custom = (ECB) \
+}
+
+/* Use this to statically initialize a ccmode_cfb8 object for encryption. */
+#define CCMODE_FACTORY_CFB8_ENCRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_cfb8_key)) + 2 * ccn_sizeof_size((ECB)->block_size) + ccn_sizeof_size((ECB)->size), \
+.block_size = 1, \
+.init = ccmode_cfb8_init, \
+.cfb8 = ccmode_cfb8_encrypt, \
+.custom = (ECB) \
+}
+
+/* Use these function to runtime initialize a ccmode_cfb8 decrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb decrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_cfb8_decrypt(struct ccmode_cfb8 *cfb8,
+                              const struct ccmode_ecb *ecb) {
+    struct ccmode_cfb8 cfb8_decrypt = CCMODE_FACTORY_CFB8_DECRYPT(ecb);
+    *cfb8 = cfb8_decrypt;
+}
+
+/* Use these function to runtime initialize a ccmode_cfb8 encrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_cfb8_encrypt(struct ccmode_cfb8 *cfb8,
+                              const struct ccmode_ecb *ecb) {
+    struct ccmode_cfb8 cfb8_encrypt = CCMODE_FACTORY_CFB8_ENCRYPT(ecb);
+    *cfb8 = cfb8_encrypt;
+}
+
+void ccmode_ctr_init(const struct ccmode_ctr *ctr, ccctr_ctx *ctx,
+                     unsigned long rawkey_len, const void *rawkey,
+                     const void *iv);
+void ccmode_ctr_crypt(ccctr_ctx *ctx, unsigned long nblocks,
+                      const void *in, void *out);
+
+struct _ccmode_ctr_key {
+    const struct ccmode_ecb *ecb;
+    size_t pad_len;
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_ctr object for decryption. */
+#define CCMODE_FACTORY_CTR_CRYPT(ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_ctr_key)) + 2 * ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \
+.block_size = 1, \
+.init = ccmode_ctr_init, \
+.ctr = ccmode_ctr_crypt, \
+.custom = (ECB_ENCRYPT) \
+}
+
+/* Use these function to runtime initialize a ccmode_ctr decrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_ctr_crypt(struct ccmode_ctr *ctr,
+                             const struct ccmode_ecb *ecb) {
+    struct ccmode_ctr ctr_crypt = CCMODE_FACTORY_CTR_CRYPT(ecb);
+    *ctr = ctr_crypt;
+}
+
+/* GCM FEATURES. */
+//#define CCMODE_GCM_TABLES  1
+#define CCMODE_GCM_FAST  1
+
+#ifdef CCMODE_GCM_FAST
+#define CCMODE_GCM_FAST_TYPE cc_unit
+#endif
+
+#ifdef CCMODE_GCM_TABLES
+
+//#define CCMODE_GCM_TABLES_SSE2  1
+
+extern const unsigned char gcm_shift_table[256*2];
+#endif
+
+/* Create a gcm key from a gcm mode object.
+ key must point to at least sizeof(CCMODE_GCM_KEY(ecb)) bytes of free
+ storage. */
+void ccmode_gcm_init(const struct ccmode_gcm *gcm, ccgcm_ctx *ctx,
+                     unsigned long rawkey_len, const void *rawkey);
+void ccmode_gcm_set_iv(ccgcm_ctx *ctx, size_t iv_size, const void *iv);
+void ccmode_gcm_gmac(ccgcm_ctx *ctx, unsigned long nbytes, const void *in);
+void ccmode_gcm_decrypt(ccgcm_ctx *ctx, unsigned long nbytes, const void *in,
+                        void *out);
+void ccmode_gcm_encrypt(ccgcm_ctx *ctx, unsigned long nbytes, const void *in,
+                        void *out);
+void ccmode_gcm_finalize(ccgcm_ctx *key, size_t tag_size, void *tag);
+void ccmode_gcm_reset(ccgcm_ctx *key);
+
+struct _ccmode_gcm_key {
+    // 5 blocks of temp space.
+    unsigned char H[16];       /* multiplier */
+    unsigned char X[16];       /* accumulator */
+    unsigned char Y[16];       /* counter */
+    unsigned char Y_0[16];     /* initial counter */
+    unsigned char buf[16];      /* buffer for stuff */
+
+    const struct ccmode_ecb *ecb;
+    uint32_t ivmode;       /* Which mode is the IV in? */
+    uint32_t mode;         /* mode the GCM code is in */
+    uint32_t buflen;       /* length of data in buf */
+
+    uint64_t totlen;       /* 64-bit counter used for IV and AAD */
+    uint64_t pttotlen;     /* 64-bit counter for the PT */
+
+#ifdef CCMODE_GCM_TABLES
+    /* TODO: Make table based gcm a separate mode object. */
+    unsigned char       PC[16][256][16]  /* 16 tables of 8x128 */
+#ifdef CCMODE_GCM_TABLES_SSE2
+    __attribute__ ((aligned (16)))
+#endif /* CCMODE_GCM_TABLES_SSE2 */
+    ;
+#endif /* CCMODE_GCM_TABLES */
+
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_gcm object for decryption. */
+#define CCMODE_FACTORY_GCM_DECRYPT(ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_gcm_key)) + 5 * ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \
+.block_size = 1, \
+.init = ccmode_gcm_init, \
+.set_iv = ccmode_gcm_set_iv, \
+.gmac = ccmode_gcm_gmac, \
+.gcm = ccmode_gcm_decrypt, \
+.finalize = ccmode_gcm_finalize, \
+.reset = ccmode_gcm_reset, \
+.custom = (ECB_ENCRYPT) \
+}
+
+/* Use this to statically initialize a ccmode_gcm object for encryption. */
+#define CCMODE_FACTORY_GCM_ENCRYPT(ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_gcm_key)) + 5 * ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \
+.block_size = 1, \
+.init = ccmode_gcm_init, \
+.set_iv = ccmode_gcm_set_iv, \
+.gmac = ccmode_gcm_gmac, \
+.gcm = ccmode_gcm_encrypt, \
+.finalize = ccmode_gcm_finalize, \
+.reset = ccmode_gcm_reset, \
+.custom = (ECB_ENCRYPT) \
+}
+
+/* Use these function to runtime initialize a ccmode_gcm decrypt object (for
+ example if it's part of a larger structure). For GCM you always pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_gcm_decrypt(struct ccmode_gcm *gcm,
+                             const struct ccmode_ecb *ecb_encrypt) {
+    struct ccmode_gcm gcm_decrypt = CCMODE_FACTORY_GCM_DECRYPT(ecb_encrypt);
+    *gcm = gcm_decrypt;
+}
+
+/* Use these function to runtime initialize a ccmode_gcm encrypt object (for
+ example if it's part of a larger structure). For GCM you always pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_gcm_encrypt(struct ccmode_gcm *gcm,
+                             const struct ccmode_ecb *ecb_encrypt) {
+    struct ccmode_gcm gcm_encrypt = CCMODE_FACTORY_GCM_ENCRYPT(ecb_encrypt);
+    *gcm = gcm_encrypt;
+}
+
+
+void ccmode_ofb_init(const struct ccmode_ofb *ofb, ccofb_ctx *ctx,
+                     unsigned long rawkey_len, const void *rawkey,
+                     const void *iv);
+void ccmode_ofb_crypt(ccofb_ctx *ctx, unsigned long nblocks,
+                      const void *in, void *out);
+
+struct _ccmode_ofb_key {
+    const struct ccmode_ecb *ecb;
+    size_t pad_len;
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_ofb object. */
+#define CCMODE_FACTORY_OFB_CRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_ofb_key)) + ccn_sizeof_size((ECB)->block_size) + ccn_sizeof_size((ECB)->size), \
+.block_size = 1, \
+.init = ccmode_ofb_init, \
+.ofb = ccmode_ofb_crypt, \
+.custom = (ECB) \
+}
+
+/* Use these function to runtime initialize a ccmode_ofb encrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_ofb_crypt(struct ccmode_ofb *ofb,
+                             const struct ccmode_ecb *ecb) {
+    struct ccmode_ofb ofb_crypt = CCMODE_FACTORY_OFB_CRYPT(ecb);
+    *ofb = ofb_crypt;
+}
+
+
+int ccmode_omac_decrypt(ccomac_ctx *ctx, unsigned long nblocks,
+                        const void *tweak, const void *in, void *out);
+int ccmode_omac_encrypt(ccomac_ctx *ctx, unsigned long nblocks,
+                        const void *tweak, const void *in, void *out);
+
+/* Create a omac key from a omac mode object.  The tweak_len here
+ determines how long the tweak is in bytes, for each subsequent call to
+ ccmode_omac->omac().
+ key must point to at least sizeof(CCMODE_OMAC_KEY(ecb)) bytes of free
+ storage. */
+void ccmode_omac_init(const struct ccmode_omac *omac, ccomac_ctx *ctx,
+                      cc_size tweak_len, unsigned long rawkey_len,
+                      const void *rawkey);
+
+struct _ccmode_omac_key {
+    const struct ccmode_ecb *ecb;
+    size_t tweak_len;
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_omac object for decryption. */
+#define CCMODE_FACTORY_OMAC_DECRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_omac_key)) + 2 * ccn_sizeof_size((ECB)->size), \
+.block_size = (ECB)->block_size, \
+.init = ccmode_omac_init, \
+.omac = ccmode_omac_decrypt, \
+.custom = (ECB) \
+}
+
+/* Use this to statically initialize a ccmode_omac object for encryption. */
+#define CCMODE_FACTORY_OMAC_ENCRYPT(ECB) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_omac_key)) + 2 * ccn_sizeof_size((ECB)->size), \
+.block_size = (ECB)->block_size, \
+.init = ccmode_omac_init, \
+.omac = ccmode_omac_encrypt, \
+.custom = (ECB) \
+}
+
+/* Use these function to runtime initialize a ccmode_omac decrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb decrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_omac_decrypt(struct ccmode_omac *omac,
+                              const struct ccmode_ecb *ecb) {
+    struct ccmode_omac omac_decrypt = CCMODE_FACTORY_OMAC_DECRYPT(ecb);
+    *omac = omac_decrypt;
+}
+
+/* Use these function to runtime initialize a ccmode_omac encrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_omac_encrypt(struct ccmode_omac *omac,
+                              const struct ccmode_ecb *ecb) {
+    struct ccmode_omac omac_encrypt = CCMODE_FACTORY_OMAC_ENCRYPT(ecb);
+    *omac = omac_encrypt;
+}
+
+
+/* Function prototypes used by the macros below, do not call directly. */
+void ccmode_xts_init(const struct ccmode_xts *xts, ccxts_ctx *ctx,
+                     unsigned long key_len, const void *data_key,
+                     const void *tweak_key);
+void *ccmode_xts_crypt(ccxts_ctx *ctx, unsigned long nblocks,
+                       const void *in, void *out);
+void ccmode_xts_set_tweak(ccxts_ctx *ctx, const void *tweak);
+
+
+struct _ccmode_xts_key {
+    const struct ccmode_ecb *ecb;
+    const struct ccmode_ecb *ecb_encrypt;
+       // FIPS requires that for XTS that no more that 2^20 AES blocks may be processed for any given
+       // Key, Tweak Key, and tweak combination
+       // the bytes_processed field in the context will accumuate the number of blocks processed and
+       // will fail the encrypt/decrypt if the size is violated.  This counter will be reset to 0
+       // when set_tweak is called.
+       unsigned long  blocks_processed;
+    cc_unit u[];
+};
+
+/* Use this to statically initialize a ccmode_xts object for decryption. */
+#define CCMODE_FACTORY_XTS_DECRYPT(ECB, ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_xts_key)) + 2 * ccn_sizeof_size((ECB)->size) + ccn_sizeof_size(16), \
+.block_size = 16, \
+.init = ccmode_xts_init, \
+.set_tweak = ccmode_xts_set_tweak, \
+.xts = ccmode_xts_crypt, \
+.custom = (ECB), \
+.custom1 = (ECB_ENCRYPT) \
+}
+
+/* Use this to statically initialize a ccmode_xts object for encryption. */
+#define CCMODE_FACTORY_XTS_ENCRYPT(ECB, ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_xts_key)) + 2 * ccn_sizeof_size((ECB)->size) + ccn_sizeof_size(16), \
+.block_size = 16, \
+.init = ccmode_xts_init, \
+.set_tweak = ccmode_xts_set_tweak, \
+.xts = ccmode_xts_crypt, \
+.custom = (ECB), \
+.custom1 = (ECB_ENCRYPT) \
+}
+
+/* Use these function to runtime initialize a ccmode_xts decrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb decrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_xts_decrypt(struct ccmode_xts *xts,
+                             const struct ccmode_ecb *ecb,
+                             const struct ccmode_ecb *ecb_encrypt) {
+    struct ccmode_xts xts_decrypt = CCMODE_FACTORY_XTS_DECRYPT(ecb, ecb_encrypt);
+    *xts = xts_decrypt;
+}
+
+/* Use these function to runtime initialize a ccmode_xts encrypt object (for
+ example if it's part of a larger structure). Normally you would pass a
+ ecb encrypt mode implementation of some underlying algorithm as the ecb
+ parameter. */
+CC_INLINE
+void ccmode_factory_xts_encrypt(struct ccmode_xts *xts,
+                             const struct ccmode_ecb *ecb,
+                             const struct ccmode_ecb *ecb_encrypt) {
+    struct ccmode_xts xts_encrypt = CCMODE_FACTORY_XTS_ENCRYPT(ecb, ecb_encrypt);
+    *xts = xts_encrypt;
+}
+
+#endif /* _CORECRYPTO_CCMODE_FACTORY_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h b/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h
new file mode 100644 (file)
index 0000000..3e35f54
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ *  ccmode_impl.h
+ *  corecrypto
+ *
+ *  Created by James Murphy on 12/9/11.
+ *  Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCMODE_IMPL_H_
+#define _CORECRYPTO_CCMODE_IMPL_H_
+
+#include <corecrypto/cc.h>
+
+/* ECB mode. */
+cc_aligned_struct(16) ccecb_ctx;
+
+
+/* Actual symmetric algorithm implementation should provide you one of these. */
+struct ccmode_ecb {
+    size_t size;        /* first argument to ccecb_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_ecb *ecb, ccecb_ctx *ctx,
+                 unsigned long key_len, const void *key);
+    void (*ecb)(const ccecb_ctx *ctx, unsigned long nblocks, const void *in,
+                void *out);
+};
+
+/* CBC mode. */
+cc_aligned_struct(16) cccbc_ctx;
+cc_aligned_struct(16) cccbc_iv;
+
+struct ccmode_cbc {
+    size_t size;        /* first argument to cccbc_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                     unsigned long key_len, const void *key);
+    /* cbc encrypt or decrypt nblocks from in to out, iv will be used and updated. */
+    void (*cbc)(const cccbc_ctx *ctx, cccbc_iv *iv, unsigned long nblocks,
+                const void *in, void *out);
+    const void *custom;
+};
+
+/* CFB mode. */
+cc_aligned_struct(16) cccfb_ctx;
+
+struct ccmode_cfb {
+    size_t size;        /* first argument to cccfb_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_cfb *cfb, cccfb_ctx *ctx,
+                 unsigned long key_len, const void *key,
+                 const void *iv);
+    void (*cfb)(cccfb_ctx *ctx, unsigned long nblocks,
+                const void *in, void *out);
+    const void *custom;
+};
+
+/* CFB8 mode. */
+
+cc_aligned_struct(16) cccfb8_ctx;
+
+struct ccmode_cfb8 {
+    size_t size;        /* first argument to cccfb8_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_cfb8 *cfb8, cccfb8_ctx *ctx,
+                 unsigned long key_len, const void *key,
+                 const void *iv);
+    void (*cfb8)(cccfb8_ctx *ctx, unsigned long nbytes,
+                 const void *in, void *out);
+    const void *custom;
+};
+
+/* CTR mode. */
+
+cc_aligned_struct(16) ccctr_ctx;
+
+struct ccmode_ctr {
+    size_t size;        /* first argument to ccctr_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_ctr *ctr, ccctr_ctx *ctx,
+                 unsigned long key_len, const void *key,
+                 const void *iv);
+    void (*ctr)(ccctr_ctx *ctx, unsigned long nblocks,
+                const void *in, void *out);
+    const void *custom;
+};
+
+/* OFB mode. */
+
+cc_aligned_struct(16) ccofb_ctx;
+
+struct ccmode_ofb {
+    size_t size;        /* first argument to ccofb_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_ofb *ofb, ccofb_ctx *ctx,
+                 unsigned long key_len, const void *key,
+                 const void *iv);
+    void (*ofb)(ccofb_ctx *ctx, unsigned long nblocks,
+                const void *in, void *out);
+    const void *custom;
+};
+
+/* XTS mode. */
+
+cc_aligned_struct(16) ccxts_ctx;
+cc_aligned_struct(16) ccxts_tweak;
+
+struct ccmode_xts {
+    size_t size;        /* first argument to ccxts_ctx_decl(). */
+    size_t tweak_size;  /* first argument to ccxts_tweak_decl(). */
+    unsigned long block_size;
+
+    /* Create a xts key from a xts mode object.  The tweak_len here
+     determines how long the tweak is in bytes, for each subsequent call to
+     ccmode_xts->xts().
+     key must point to at least 'size' cc_units of free storage.
+     tweak_key must point to at least 'tweak_size' cc_units of free storage. */
+    void (*init)(const struct ccmode_xts *xts, ccxts_ctx *ctx,
+                 unsigned long key_len, const void *key,
+                 const void *tweak_key);
+
+    /* Set the tweak (sector number), the block within the sector zero. */
+    void (*set_tweak)(const ccxts_ctx *ctx, ccxts_tweak *tweak, const void *iv);
+
+    /* Encrypt blocks for a sector, clients must call set_tweak before calling
+       this function. Return a pointer to the tweak buffer */
+    void *(*xts)(const ccxts_ctx *ctx, ccxts_tweak *tweak, unsigned long nblocks,
+                    const void *in, void *out);
+
+    const void *custom;
+    const void *custom1;
+};
+
+/* GCM mode. */
+
+cc_aligned_struct(16) ccgcm_ctx;
+
+struct ccmode_gcm {
+    size_t size;        /* first argument to ccgcm_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_gcm *gcm, ccgcm_ctx *ctx,
+                 unsigned long key_len, const void *key);
+    void (*set_iv)(ccgcm_ctx *ctx, size_t iv_size, const void *iv);
+    void (*gmac)(ccgcm_ctx *ctx, unsigned long nbytes, const void *in);  // could just be gcm with NULL out
+    void (*gcm)(ccgcm_ctx *ctx, unsigned long nbytes, const void *in, void *out);
+    void (*finalize)(ccgcm_ctx *key, size_t tag_size, void *tag);
+    void (*reset)(ccgcm_ctx *ctx);
+    const void *custom;
+};
+
+/* OMAC mode. */
+
+cc_aligned_struct(16) ccomac_ctx;
+
+struct ccmode_omac {
+    size_t size;        /* first argument to ccomac_ctx_decl(). */
+    unsigned long block_size;
+    void (*init)(const struct ccmode_omac *omac, ccomac_ctx *ctx,
+                 unsigned long tweak_len, unsigned long key_len,
+                 const void *key);
+    int (*omac)(ccomac_ctx *ctx, unsigned long nblocks,
+                const void *tweak, const void *in, void *out);
+    const void *custom;
+};
+
+#endif /* _CORECRYPTO_CCMODE_IMPL_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccn.h b/EXTERNAL_HEADERS/corecrypto/ccn.h
new file mode 100644 (file)
index 0000000..dd10e97
--- /dev/null
@@ -0,0 +1,636 @@
+/*
+ *  ccn.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 7/25/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCN_H_
+#define _CORECRYPTO_CCN_H_
+
+#include <corecrypto/cc_config.h>
+#include <corecrypto/cc_priv.h>  /* TODO: Get rid of this include in this header. */
+#include <stdint.h>
+
+typedef uint8_t cc_byte;
+typedef size_t cc_size;
+
+#if  CCN_UNIT_SIZE == 8
+typedef uint64_t cc_unit;          // 64 bit unit
+//typedef uint128_t cc_dunit;         // 128 bit double width unit
+#define CCN_LOG2_BITS_PER_UNIT  6  // 2^6 = 64 bits
+#define CC_UNIT_C(x) UINT64_C(x)
+#elif  CCN_UNIT_SIZE == 4
+typedef uint32_t cc_unit;          // 32 bit unit
+typedef uint64_t cc_dunit;         // 64 bit double width unit
+#define CCN_LOG2_BITS_PER_UNIT  5  // 2^5 = 32 bits
+#define CC_UNIT_C(x) UINT32_C(x)
+#elif CCN_UNIT_SIZE == 2
+typedef uint16_t cc_unit;          // 16 bit unit
+typedef uint32_t cc_dunit;         // 32 bit double width unit
+#define CCN_LOG2_BITS_PER_UNIT  4  // 2^4 = 16 bits
+#define CC_UNIT_C(x) UINT16_C(x)
+#elif CCN_UNIT_SIZE == 1
+typedef uint8_t cc_unit;           // 8 bit unit
+typedef uint16_t cc_dunit;         // 16 bit double width unit
+#define CCN_LOG2_BITS_PER_UNIT  3  // 2^3 = 8 bits
+#define CC_UNIT_C(x) UINT8_C(x)
+#else
+#error invalid CCN_UNIT_SIZE
+#endif
+
+// All mp types have units in little endian unit order.
+typedef cc_unit *ccn_t;                // n unit long mp
+typedef cc_unit *ccnp1_t;              // n + 1 unit long mp
+typedef cc_unit *cc2n_t;               // 2 * n unit long mp
+typedef cc_unit *cc2np2_t;             // 2 * n + 2 unit long mp
+typedef const cc_unit *ccn_in_t;       // n unit long mp
+typedef const cc_unit *ccnp1_in_t;     // n + 1 unit long mp
+typedef const cc_unit *cc2n_in_t;      // 2 * n unit long mp
+typedef const cc_unit *cc2np2_in_t;    // 2 * n + 2 unit long mp
+
+#define CCN_UNIT_BITS  (sizeof(cc_unit) * 8)
+#define CCN_UNIT_MASK  ((cc_unit)~0)
+
+
+/* Conversions between n sizeof and bits */
+
+/* Returns the sizeof a ccn vector of length _n_ units. */
+#define ccn_sizeof_n(_n_)  (sizeof(cc_unit) * (_n_))
+
+/* Returns the count (n) of a ccn vector that can represent _bits_. */
+#define ccn_nof(_bits_)  (((_bits_) + CCN_UNIT_BITS - 1) / CCN_UNIT_BITS)
+
+/* Returns the sizeof a ccn vector that can represent _bits_. */
+#define ccn_sizeof(_bits_)  (ccn_sizeof_n(ccn_nof(_bits_)))
+
+/* Returns the count (n) of a ccn vector that can represent _size_ bytes. */
+#define ccn_nof_size(_size_)  (((_size_) + CCN_UNIT_SIZE - 1) / CCN_UNIT_SIZE)
+
+/* Return the max number of bits a ccn vector of _n_ units can hold. */
+#define ccn_bitsof_n(_n_)  ((_n_) * CCN_UNIT_BITS)
+
+/* Return the max number of bits a ccn vector of _size_ bytes can hold. */
+#define ccn_bitsof_size(_size_)  ((_size_) * 8)
+
+/* Return the size of a ccn of size bytes in bytes. */
+#define ccn_sizeof_size(_size_)  ccn_sizeof_n(ccn_nof_size(_size_))
+
+/* Returns the value of bit _k_ of _ccn_, both are only evaluated once.  */
+#define ccn_bit(_ccn_, _k_) ({__typeof__ (_k_) __k = (_k_); \
+    1 & ((_ccn_)[__k / CCN_UNIT_BITS] >> (__k & (CCN_UNIT_BITS - 1)));})
+
+#define ccn_set_bit(_ccn_, _k_, _v_) ({__typeof__ (_k_) __k = (_k_);        \
+    if (_v_)                                                                \
+        (_ccn_)[__k/CCN_UNIT_BITS] |= CC_UNIT_C(1) << (__k & (CCN_UNIT_BITS - 1));     \
+    else                                                                    \
+        (_ccn_)[__k/CCN_UNIT_BITS] &= ~(CC_UNIT_C(1) << (__k & (CCN_UNIT_BITS - 1)));  \
+    })
+
+/* Macros for making ccn constants.  You must use list of CCN64_C() instances
+ separated by commas, with an optional smaller sized CCN32_C, CCN16_C, or
+ CCN8_C() instance at the end of the list, when making macros to declare
+ larger sized constants. */
+#define CCN8_C(a0) CC_UNIT_C(0x##a0)
+
+#if CCN_UNIT_SIZE >= 2
+#define CCN16_C(a1,a0) CC_UNIT_C(0x##a1##a0)
+#define ccn16_v(a0)  (a0)
+#elif CCN_UNIT_SIZE == 1
+#define CCN16_C(a1,a0) CCN8_C(a0),CCN8_C(a1)
+#define ccn16_v(a0)  (a0 & UINT8_C(0xff)),(a0 >> 8)
+#endif
+
+#if CCN_UNIT_SIZE >= 4
+#define CCN32_C(a3,a2,a1,a0) CC_UNIT_C(0x##a3##a2##a1##a0)
+#define ccn32_v(a0)  (a0)
+#else
+#define CCN32_C(a3,a2,a1,a0) CCN16_C(a1,a0),CCN16_C(a3,a2)
+#define ccn32_v(a0)  ccn16_v(a0 & UINT16_C(0xffff)),ccn16_v(a0 >> 16)
+#endif
+
+#if CCN_UNIT_SIZE == 8
+#define CCN64_C(a7,a6,a5,a4,a3,a2,a1,a0) CC_UNIT_C(0x##a7##a6##a5##a4##a3##a2##a1##a0)
+#define CCN40_C(a4,a3,a2,a1,a0) CC_UNIT_C(0x##a4##a3##a2##a1##a0)
+#define ccn64_v(a0)  (a0)
+//#define ccn64_32(a1,a0)  ((a1 << 32) | a0)
+//#define ccn_uint64(a,i) (a[i])
+#else
+#define CCN64_C(a7,a6,a5,a4,a3,a2,a1,a0) CCN32_C(a3,a2,a1,a0),CCN32_C(a7,a6,a5,a4)
+#define CCN40_C(a4,a3,a2,a1,a0) CCN32_C(a3,a2,a1,a0),CCN8_C(a4)
+#define ccn64_v(a0)  ccn32_v((uint64_t)a0 & UINT32_C(0xffffffff)),ccn32_v((uint64_t)a0 >> 32)
+//#define ccn64_32(a1,a0)  ccn32_v(a0),ccn32_v(a1)
+//#define ccn_uint64(a,i) ((uint64_t)ccn_uint32(a, i << 1 + 1) << 32 | (uint64_t)ccn_uint32(a, i << 1))
+#endif
+
+/* Macro's for reading uint32_t and uint64_t from ccns, the index is in 32 or
+   64 bit units respectively. */
+#if CCN_UNIT_SIZE == 8
+//#define ccn_uint16(a,i) ((i & 3) == 3 ? ((uint16_t)(a[i >> 2] >> 48)) : \
+//    (i & 3) == 2 ? ((uint16_t)(a[i >> 2] >> 32) & UINT16_C(0xffff)) : \
+//    (i & 3) == 1 ? ((uint16_t)(a[i >> 2] >> 16) & UINT16_C(0xffff)) : \
+//    ((uint16_t)(a[i >> 1] & UINT16_C(0xffff))))
+//#define ccn_uint32(a,i) (i & 1 ? ((uint32_t)(a[i >> 1] >> 32)) : ((uint32_t)(a[i >> 1] & UINT32_C(0xffffffff))))
+#elif CCN_UNIT_SIZE == 4
+//#define ccn16_v(a0)  (a0)
+//#define ccn32_v(a0)  (a0)
+//#define ccn_uint16(a,i) (i & 1 ? ((uint16_t)(a[i >> 1] >> 16)) : ((uint16_t)(a[i >> 1] & UINT16_C(0xffff))))
+//#define ccn_uint32(a,i) (a[i])
+#elif CCN_UNIT_SIZE == 2
+//#define ccn16_v(a0)  (a0)
+//#define ccn32_v(a0,a1)  (a1,a0)
+//#define ccn_uint16(a,i) (a[i])
+//#define ccn_uint32(a,i) (((uint32_t)a[i << 1 + 1]) << 16 | (uint32_t)a[i << 1]))
+#elif CCN_UNIT_SIZE == 1
+//#define ccn16_v(a0)  (a0 & UINT8_C(0xff)),(a0 >> 8)
+//#define ccn_uint16(a,i) ((uint16_t)((a[i << 1 + 1] << 8) | a[i << 1]))
+//#define ccn_uint32(a,i) ((uint32_t)ccn_uint16(a, i << 1 + 1) << 16 | (uint32_t)ccn_uint16(a, i << 1))
+#endif
+
+/* Macro's for reading uint32_t and uint64_t from ccns, the index is in 32 or
+ 64 bit units respectively. */
+#if CCN_UNIT_SIZE == 8
+
+#define ccn64_32(a1,a0) (((cc_unit)a1) << 32 | ((cc_unit)a0))
+#define ccn32_32(a0) a0
+#if __LITTLE_ENDIAN__
+#define ccn32_32_parse(p,i) (((uint32_t *)p)[i])
+#else
+#define ccn32_32_parse(p,i) (((uint32_t *)p)[i^1])
+#endif
+#define ccn32_32_null 0
+
+#define ccn64_64(a0) a0
+#define ccn64_64_parse(p,i) p[i]
+#define ccn64_64_null 0
+
+#elif CCN_UNIT_SIZE == 4
+
+#define ccn32_32(a0) a0
+#define ccn32_32_parse(p,i) p[i]
+#define ccn32_32_null 0
+#define ccn64_32(a1,a0) ccn32_32(a0),ccn32_32(a1)
+
+#define ccn64_64(a1,a0) a0,a1
+#define ccn64_64_parse(p,i) p[1+(i<<1)],p[i<<1]
+#define ccn64_64_null 0,0
+
+#elif CCN_UNIT_SIZE == 2
+
+#define ccn32_32(a1,a0) a0,a1
+#define ccn32_32_parse(p,i) p[1+(i<<1)],p[i<<1]
+#define ccn32_32_null 0,0
+#define ccn64_32(a3,a2,a1,a0) ccn32_32(a1,a0),ccn32_32(a3,a2)
+
+#define ccn64_64(a3,a2,a1,a0) a0,a1,a2,a3
+#define ccn64_64_parse(p,i) p[3+(i<<2)],p[2+(i<<2)],p[1+(i<<2)],p[i<<2]
+#define ccn64_64_null 0,0,0,0
+
+#elif CCN_UNIT_SIZE == 1
+
+#define ccn32_32(a3,a2,a1,a0) a0,a1,a2,a3
+#define ccn32_32_parse(p,i) p[3+(i<<2)],p[2+(i<<2)],p[1+(i<<2)],p[i<<2]
+#define ccn32_32_null 0,0,0,0
+#define ccn64_32(a7,a6,a5,a4,a3,a2,a1,a0) ccn32_32(a3,a2,a1,a0),ccn32_32(a7,a6,a5,a4)
+
+#define ccn64_64(a7,a6,a5,a4,a3,a2,a1,a0) a0,a1,a2,a3,a4,a5,a6,a7
+#define ccn64_64_parse(p,i)  p[7+(i<<3)],p[6+(i<<3)],p[5+(i<<3)],p[4+(i<<3)],p[3+(i<<3)],p[2+(i<<3)],p[1+(i<<3)],p[i<<3]
+#define ccn64_64_null  0,0,0,0,0,0,0,0
+
+#endif
+
+
+/* Macros to construct fixed size ccn arrays from 64 or 32 bit quantities. */
+#define ccn192_64(a2,a1,a0) ccn64_64(a0),ccn64_64(a1),ccn64_64(a2)
+#define ccn224_32(a6,a5,a4,a3,a2,a1,a0) ccn64_32(a1,a0),ccn64_32(a3,a2),ccn64_32(a5,a4),ccn32_32(a6)
+#define ccn256_32(a7,a6,a5,a4,a3,a2,a1,a0) ccn64_32(a1,a0),ccn64_32(a3,a2),ccn64_32(a5,a4),ccn64_32(a7,a6)
+#define ccn384_32(a11,a10,a9,a8,a7,a6,a5,a4,a3,a2,a1,a0) ccn64_32(a1,a0),ccn64_32(a3,a2),ccn64_32(a5,a4),ccn64_32(a7,a6),ccn64_32(a9,a8),ccn64_32(a11,a10)
+
+
+#define CCN192_C(c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN64_C(a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN64_C(b7,b6,b5,b4,b3,b2,b1,b0),\
+    CCN64_C(c7,c6,c5,c4,c3,c2,c1,c0)
+
+#define CCN200_C(d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN192_C(c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN8_C(d0)
+
+#define CCN224_C(d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN192_C(c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN32_C(d3,d2,d1,d0)
+
+#define CCN232_C(d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN192_C(c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN40_C(d4,d3,d2,d1,d0)
+
+#define CCN256_C(d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN192_C(c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN64_C(d7,d6,d5,d4,d3,d2,d1,d0)
+
+#define CCN264_C(e0,d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN256_C(d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN8_C(e0)
+
+#define CCN384_C(f7,f6,f5,f4,f3,f2,f1,f0,e7,e6,e5,e4,e3,e2,e1,e0,d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN256_C(d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN64_C(e7,e6,e5,e4,e3,e2,e1,e0),\
+    CCN64_C(f7,f6,f5,f4,f3,f2,f1,f0)
+
+#define CCN392_C(g0,f7,f6,f5,f4,f3,f2,f1,f0,e7,e6,e5,e4,e3,e2,e1,e0,d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN384_C(f7,f6,f5,f4,f3,f2,f1,f0,e7,e6,e5,e4,e3,e2,e1,e0,d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN8_C(g0)
+
+#define CCN528_C(i1,i0,h7,h6,h5,h4,h3,h2,h1,h0,g7,g6,g5,g4,g3,g2,g1,g0,f7,f6,f5,f4,f3,f2,f1,f0,e7,e6,e5,e4,e3,e2,e1,e0,d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0) \
+    CCN256_C(d7,d6,d5,d4,d3,d2,d1,d0,c7,c6,c5,c4,c3,c2,c1,c0,b7,b6,b5,b4,b3,b2,b1,b0,a7,a6,a5,a4,a3,a2,a1,a0),\
+    CCN256_C(h7,h6,h5,h4,h3,h2,h1,h0,g7,g6,g5,g4,g3,g2,g1,g0,f7,f6,f5,f4,f3,f2,f1,f0,e7,e6,e5,e4,e3,e2,e1,e0),\
+    CCN16_C(i1,i0)
+
+#define CCN192_N  ccn_nof(192)
+#define CCN224_N  ccn_nof(224)
+#define CCN256_N  ccn_nof(256)
+#define CCN384_N  ccn_nof(384)
+#define CCN521_N  ccn_nof(521)
+
+#if defined(_ARM_ARCH_6) || defined(_ARM_ARCH_7)
+#if CCN_USE_BUILTIN_CLZ
+CC_INLINE CC_CONST
+cc_unit cc_clz(cc_unit data)
+{
+    return __builtin_clzl(data);
+}
+#else
+CC_INLINE CC_CONST
+cc_unit cc_clz(cc_unit data)
+{
+    __asm__ ("clz %0, %1\n" : "=l" (data) : "l" (data));
+    return data;
+}
+#endif /* CCN_USE_BUILTIN_CLZ */
+#endif /* !defined(_ARM_ARCH_6) && !defined(_ARM_ARCH_7) */
+
+
+#if CCN_N_INLINE
+/* Return the number of used units after stripping leading 0 units.  */
+CC_INLINE CC_PURE CC_NONNULL2 
+cc_size ccn_n(cc_size n, const cc_unit *s) {
+#if 1
+    while (n-- && s[n] == 0) {}
+    return n + 1;
+#elif 0
+    while (n && s[n - 1] == 0) {
+        n -= 1;
+    }
+    return n;
+#else
+    if (n & 1) {
+        if (s[n - 1])
+            return n;
+        n &= ~1;
+    }
+    if (n & 2) {
+        cc_unit a[2] = { s[n - 1], s[n - 2] };
+        if (a[0])
+            return n - 1;
+        if (a[1])
+            return n - 2;
+        n &= ~2;
+    }
+    while (n) {
+        cc_unit a[4] = { s[n - 1], s[n - 2], s[n - 3], s[n - 4] };
+        if (a[0])
+            return n - 1;
+        if (a[1])
+            return n - 2;
+        if (a[2])
+            return n - 3;
+        if (a[3])
+            return n - 4;
+        n -= 4;
+    }
+    return n;
+#endif
+}
+#else
+/* Return the number of used units after stripping leading 0 units.  */
+CC_PURE CC_NONNULL2
+cc_size ccn_n(cc_size n, const cc_unit *s);
+#endif
+
+/* s >> k -> r return bits shifted out of least significant word in bits [0, n>
+ { N bit, scalar -> N bit } N = n * sizeof(cc_unit) * 8
+ the _multi version doesn't return the shifted bits, but does support multiple
+ word shifts.  */
+CC_NONNULL((2,3))
+cc_unit ccn_shift_right(cc_size n, cc_unit *r, const cc_unit *s, size_t k);
+CC_NONNULL((2,3))
+void ccn_shift_right_multi(cc_size n, cc_unit *r,const cc_unit *s, size_t k);
+
+/* s << k -> r return bits shifted out of most significant word in bits [0, n>
+ { N bit, scalar -> N bit } N = n * sizeof(cc_unit) * 8
+ the _multi version doesn't return the shifted bits, but does support multiple
+ word shifts */
+CC_NONNULL((2,3))
+cc_unit ccn_shift_left(cc_size n, cc_unit *r, const cc_unit *s, size_t k);
+CC_NONNULL((2,3))
+void ccn_shift_left_multi(cc_size n, cc_unit *r, const cc_unit *s, size_t k);
+
+/* s == 0 -> return 0 | s > 0 -> return index (starting at 1) of most
+ significant bit that is 1.
+ { N bit } N = n * sizeof(cc_unit) * 8 */
+CC_NONNULL2
+size_t ccn_bitlen(cc_size n, const cc_unit *s);
+
+/* Returns the number of bits which are zero before the first one bit
+   counting from least to most significant bit. */
+size_t ccn_trailing_zeros(cc_size n, const cc_unit *s);
+
+/* s == 0 -> return true | s != 0 -> return false
+ { N bit } N = n * sizeof(cc_unit) * 8 */
+#define ccn_is_zero(_n_, _s_) (!ccn_n(_n_, _s_))
+
+/* s == 1 -> return true | s != 1 -> return false
+ { N bit } N = n * sizeof(cc_unit) * 8 */
+#define ccn_is_one(_n_, _s_) (ccn_n(_n_, _s_) == 1 && _s_[0] == 1)
+
+#if CCN_CMP_INLINE
+CC_INLINE CC_PURE CC_NONNULL((2,3))
+int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t) {
+       while (n) {
+        n--;
+        cc_unit si = s[n];
+        cc_unit ti = t[n];
+        if (si != ti)
+            return si > ti ? 1 : -1;
+       }
+       return n;
+}
+#else
+/* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1
+ { N bit, N bit -> int } N = n * sizeof(cc_unit) * 8 */
+CC_PURE CC_NONNULL((2,3))
+int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t);
+#endif
+
+/* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1
+ { N bit, M bit -> int } N = ns * sizeof(cc_unit) * 8  M = nt * sizeof(cc_unit) * 8 */
+CC_INLINE
+int ccn_cmpn(cc_size ns, const cc_unit *s,
+             cc_size nt, const cc_unit *t) {
+    if (ns > nt) {
+        return 1;
+    } else if (ns < nt) {
+        return -1;
+    }
+    return ccn_cmp(ns, s, t);
+}
+
+/* s - t -> r return 1 iff t > s
+ { N bit, N bit -> N bit } N = n * sizeof(cc_unit) * 8 */
+CC_NONNULL((2,3,4))
+cc_unit ccn_sub(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t);
+
+/* s - v -> r return 1 iff v > s return 0 otherwise.
+ { N bit, sizeof(cc_unit) * 8 bit -> N bit } N = n * sizeof(cc_unit) * 8 */
+CC_NONNULL((2,3))
+cc_unit ccn_sub1(cc_size n, cc_unit *r, const cc_unit *s, cc_unit v);
+
+/* s - t -> r return 1 iff t > s
+ { N bit, NT bit -> N bit  NT <= N} N = n * sizeof(cc_unit) * 8 */
+CC_INLINE
+CC_NONNULL((2,3,5))
+cc_unit ccn_subn(cc_size n, cc_unit *r,const cc_unit *s,
+             cc_size nt, const cc_unit *t) {
+    return ccn_sub1(n - nt, r + nt, s + nt, ccn_sub(nt, r, s, t));
+}
+
+
+/* s + t -> r return carry if result doesn't fit in n bits.
+ { N bit, N bit -> N bit } N = n * sizeof(cc_unit) * 8 */
+CC_NONNULL((2,3,4))
+cc_unit ccn_add(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t);
+
+/* s + v -> r return carry if result doesn't fit in n bits.
+ { N bit, sizeof(cc_unit) * 8 bit -> N bit } N = n * sizeof(cc_unit) * 8 */
+CC_NONNULL((2,3))
+cc_unit ccn_add1(cc_size n, cc_unit *r, const cc_unit *s, cc_unit v);
+
+/* s + t -> r return carry if result doesn't fit in n bits
+ { N bit, NT bit -> N bit  NT <= N} N = n * sizeof(cc_unit) * 8 */
+CC_INLINE
+CC_NONNULL((2,3,5))
+cc_unit ccn_addn(cc_size n, cc_unit *r, const cc_unit *s,
+                 cc_size nt, const cc_unit *t) {
+    return ccn_add1(n - nt, r + nt, s + nt, ccn_add(nt, r, s, t));
+}
+
+CC_NONNULL((4,5))
+void ccn_divmod(cc_size n, cc_unit *q, cc_unit *r, const cc_unit *s, const cc_unit *t);
+
+
+CC_NONNULL((2,3,4))
+void ccn_lcm(cc_size n, cc_unit *r2n, const cc_unit *s, const cc_unit *t);
+
+
+/* s * t -> r
+ { n bit, n bit -> 2 * n bit } n = count * sizeof(cc_unit) * 8 */
+CC_NONNULL((2,3,4))
+void ccn_mul(cc_size n, cc_unit *r_2n, const cc_unit *s, const cc_unit *t);
+
+CC_NONNULL((2,3))
+cc_unit ccn_mul1(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit v);
+CC_NONNULL((2,3))
+cc_unit ccn_addmul1(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit v);
+
+#if 0
+/* a % d -> n
+   {2 * n bit, n bit -> n bit } n = count * sizeof(cc_unit) * 8 */
+CC_NONNULL((2,3,4))
+void ccn_mod(cc_size n, cc_unit *r, const cc_unit *a_2n, const cc_unit *d);
+#endif
+
+/* r = gcd(s, t).
+   N bit, N bit -> N bit */
+CC_NONNULL((2,3,4))
+void ccn_gcd(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t);
+
+/* r = gcd(s, t).
+ N bit, N bit -> O bit */
+CC_NONNULL((2,4,6))
+void ccn_gcdn(cc_size rn, cc_unit *r, cc_size sn, const cc_unit *s, cc_size tn, const cc_unit *t);
+
+/* r = (data, len) treated as a big endian byte array, return -1 if data
+ doesn't fit in r, return 0 otherwise. */
+CC_NONNULL((2,4))
+int ccn_read_uint(cc_size n, cc_unit *r, size_t data_size, const uint8_t *data);
+
+/* r = (data, len) treated as a big endian byte array, return -1 if data
+ doesn't fit in r, return 0 otherwise. 
+ ccn_read_uint strips leading zeroes and doesn't care about sign. */
+#define ccn_read_int(n, r, data_size, data) ccn_read_uint(n, r, data_size, data)
+
+/* Return actual size in bytes needed to serialize s. */
+CC_PURE CC_NONNULL2
+size_t ccn_write_uint_size(cc_size n, const cc_unit *s);
+
+/* Serialize s, to out.
+   First byte of byte stream is the m.s. byte of s,
+   regardless of the size of cc_unit.
+
+   No assumption is made about the alignment of out.
+
+   The out_size argument should be the value returned from ccn_write_uint_size,
+   and is also the exact number of bytes this function will write to out.
+   If out_size if less than the value returned by ccn_write_uint_size, only the
+   first out_size non-zero most significant octects of s will be written. */
+CC_NONNULL((2,4))
+void ccn_write_uint(cc_size n, const cc_unit *s, size_t out_size, void *out);
+
+
+CC_INLINE CC_NONNULL((2,4))
+cc_size ccn_write_uint_padded(cc_size n, const cc_unit* s, size_t out_size, uint8_t* to)
+{
+    size_t bytesInKey = ccn_write_uint_size(n, s);
+    cc_size offset = (out_size > bytesInKey) ? out_size - bytesInKey : 0;
+
+    cc_zero(offset, to);
+    ccn_write_uint(n, s, out_size - offset, to + offset);
+
+    return offset;
+}
+
+
+/*  Return actual size in bytes needed to serialize s as int 
+    (adding leading zero if high bit is set). */
+CC_PURE CC_NONNULL2
+size_t ccn_write_int_size(cc_size n, const cc_unit *s);
+
+/*  Serialize s, to out.
+    First byte of byte stream is the m.s. byte of s,
+    regardless of the size of cc_unit.
+
+    No assumption is made about the alignment of out.
+
+    The out_size argument should be the value returned from ccn_write_int_size,
+    and is also the exact number of bytes this function will write to out.
+    If out_size if less than the value returned by ccn_write_int_size, only the
+    first out_size non-zero most significant octects of s will be written. */
+CC_NONNULL((2,4))
+void ccn_write_int(cc_size n, const cc_unit *s, size_t out_size, void *out);
+
+
+/* s^2 -> r
+ { n bit -> 2 * n bit } */
+CC_INLINE CC_NONNULL((2,3))
+void ccn_sqr(cc_size n, cc_unit *r, const cc_unit *s) {
+    ccn_mul(n, r, s, s);
+}
+
+/* s -> r
+ { n bit -> n bit } */
+CC_NONNULL((2,3))
+void ccn_set(cc_size n, cc_unit *r, const cc_unit *s);
+
+CC_INLINE CC_NONNULL2
+void ccn_zero(cc_size n, cc_unit *r) {
+    CC_BZERO(r, ccn_sizeof_n(n));
+}
+
+/* Burn (zero fill or otherwise overwrite) n cc_units of stack space. */
+void ccn_burn_stack(cc_size n);
+
+CC_INLINE CC_NONNULL2
+void ccn_seti(cc_size n, cc_unit *r, cc_unit v) {
+    /* assert(n > 0); */
+    r[0] = v;
+    ccn_zero(n - 1, r + 1);
+}
+
+CC_INLINE CC_NONNULL((2,4))
+void ccn_setn(cc_size n, cc_unit *r, CC_UNUSED const cc_size s_size, const cc_unit *s) {
+    /* FIXME: assert not available in kernel.
+    assert(n > 0);
+    assert(s_size > 0);
+    assert(s_size <= n);
+    */
+    ccn_set(s_size, r, s);
+    ccn_zero(n - s_size, r + s_size);
+}
+
+#define CC_SWAP_HOST_BIG_64(x) \
+    ((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \
+    (((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \
+    (((uint64_t)(x) & 0x0000ff0000000000ULL) >> 24) | \
+    (((uint64_t)(x) & 0x000000ff00000000ULL) >>  8) | \
+    (((uint64_t)(x) & 0x00000000ff000000ULL) <<  8) | \
+    (((uint64_t)(x) & 0x0000000000ff0000ULL) << 24) | \
+    (((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \
+    (((uint64_t)(x) & 0x00000000000000ffULL) << 56)))
+#define CC_SWAP_HOST_BIG_32(x) \
+    ((((x) & 0xff000000) >> 24) | \
+    (((x) & 0x00ff0000) >>  8) | \
+    (((x) & 0x0000ff00) <<  8) | \
+    (((x) & 0x000000ff) <<  24) )
+#define CC_SWAP_HOST_BIG_16(x) \
+    (((x) & 0xff00) >>  8) | \
+    (((x) & 0x00ff) <<  8) | \
+
+/* This should probably move if we move ccn_swap out of line. */
+#if CCN_UNIT_SIZE == 8
+#define CC_UNIT_TO_BIG(x) CC_SWAP_HOST_BIG_64(x)
+#elif CCN_UNIT_SIZE == 4
+#define CC_UNIT_TO_BIG(x) CC_SWAP_HOST_BIG_32(x)
+#elif CCN_UNIT_SIZE == 2
+#define CC_UNIT_TO_BIG(x) CC_SWAP_HOST_BIG_16(x)
+#elif CCN_UNIT_SIZE == 1
+#define CC_UNIT_TO_BIG(x) (x)
+#else
+#error unsupported CCN_UNIT_SIZE
+#endif
+
+/* Swap units in r in place from cc_unit vector byte order to big endian byte order (or back). */
+CC_INLINE CC_NONNULL2
+void ccn_swap(cc_size n, cc_unit *r) {
+    cc_unit *e;
+    for (e = r + n - 1; r < e; ++r, --e) {
+        cc_unit t = CC_UNIT_TO_BIG(*r);
+        *r = CC_UNIT_TO_BIG(*e);
+        *e = t;
+    }
+    if (n & 1)
+        *r = CC_UNIT_TO_BIG(*r);
+}
+
+CC_INLINE CC_NONNULL2 CC_NONNULL3 CC_NONNULL4
+void ccn_xor(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t) {
+    while (n--) {
+        r[n] = s[n] ^ t[n];
+    }
+}
+
+/* Debugging */
+CC_NONNULL2
+void ccn_print(cc_size n, const cc_unit *s);
+CC_NONNULL3
+void ccn_lprint(cc_size n, const char *label, const cc_unit *s);
+
+/* Forward declaration so we don't depend on ccrng.h. */
+struct ccrng_state;
+
+#if 0
+CC_INLINE CC_NONNULL((2,3))
+int ccn_random(cc_size n, cc_unit *r, struct ccrng_state *rng) {
+    return (RNG)->generate((RNG), ccn_sizeof_n(n), (unsigned char *)r);
+}
+#else
+#define ccn_random(_n_,_r_,_ccrng_ctx_) \
+    ccrng_generate(_ccrng_ctx_, ccn_sizeof_n(_n_), (unsigned char *)_r_);
+#endif
+
+/* Make a ccn of size ccn_nof(nbits) units with up to nbits sized random value. */
+CC_NONNULL((2,3))
+int ccn_random_bits(cc_size nbits, cc_unit *r, struct ccrng_state *rng);
+
+#endif /* _CORECRYPTO_CCN_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccpad.h b/EXTERNAL_HEADERS/corecrypto/ccpad.h
new file mode 100644 (file)
index 0000000..71789e0
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ *  ccpad.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 12/6/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCPAD_H_
+#define _CORECRYPTO_CCPAD_H_
+
+#include <corecrypto/ccmode.h>
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts1_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts1_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts2_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts2_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts3_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_cts3_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is non zero and a multiple of block_size. Furthermore in is nbytes long and out is nbytes long.  Returns number of bytes written to out (technically we always write nbytes to out but the returned value is the number of bytes decrypted after removal of padding.
+
+    To be safe we remove the entire offending block if the pkcs7 padding checks failed.  However we purposely don't report the failure to decode the padding since any use of this error leads to potential security exploits.  So currently there is no way to distinguish between a full block of padding and bad padding.
+ */
+unsigned long ccpad_pkcs7_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                                  unsigned long nbytes, const void *in,
+                                  void *out);
+
+/* Contract is in is nbytes long.  Writes (nbytes / block_size) + 1 times block_size to out.  In other words, out must be nbytes rounded down to the closest multiple of block_size plus block_size bytes. */
+void ccpad_pkcs7_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx,
+                         unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_xts_decrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+/* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
+void ccpad_xts_encrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx,
+                       unsigned long nbytes, const void *in, void *out);
+
+#endif /* _CORECRYPTO_CCPAD_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h b/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h
new file mode 100644 (file)
index 0000000..15b94da
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ *  ccpbkdf.h
+ *  corecrypto
+ *
+ *  Copyright 1999-2001, 2010 Apple Inc. All rights reserved.
+ *
+ *  Derived from pbkdf2.h by Mitch Adler on 09-12-2010. 
+ *
+ */
+
+#ifndef _CORECRYPTO_CCPBKDF2_H_
+#define _CORECRYPTO_CCPBKDF2_H_
+
+
+#include <corecrypto/ccdigest.h>
+
+/*! @function ccpbkdf2_hmac
+    @abstract perform a pbkdf2 using HMAC(di) for the PRF (see PKCS#5 for specification)
+    @discussion This performs a standard PBKDF2 transformation of password and salt through 
+an HMAC PRF of the callers slection (any Digest, typically SHA-1) returning dkLen bytes
+containing the entropy.
+
+Considerations:
+The salt used should be at least 8 bytes long. Each session should use it's own salt.
+We use the password as the key for the HMAC and the running data as the text for the HMAC to make a PRF.
+SHA-1 is a good hash to use for the core of the HMAC PRF.
+    @param di           digest info defining the digest type to use in the PRF.
+    @param passwordLen  amount of data to be fed in
+    @param password     data to be fed into the PBKDF
+    @param saltLen      length of the salt
+    @param salt         salt to be used in pbkdf
+    @param iterations   itrations to go
+    @param dkLen        length of the results
+    @param dk           buffer for the results of the PBKDF tranformation, must be dkLen big
+ */
+int ccpbkdf2_hmac(const struct ccdigest_info *di,
+                   unsigned long passwordLen, const void *password,
+                   unsigned long saltLen, const void *salt,
+                   unsigned long iterations,
+                   unsigned long dkLen, void *dk);
+
+#endif /* _CORECRYPTO_CCPBKDF2_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccrc4.h b/EXTERNAL_HEADERS/corecrypto/ccrc4.h
new file mode 100644 (file)
index 0000000..a177f86
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ *  ccrc4.h
+ *  corecrypto
+ *
+ *  Created by Fabrice Gautier on 12/22/10.
+ *  Copyright 2010,2011 Apple, Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCRC4_H_
+#define _CORECRYPTO_CCRC4_H_
+
+#include <corecrypto/ccmode.h>
+
+cc_aligned_struct(16) ccrc4_ctx;
+
+/* Declare a gcm key named _name_.  Pass the size field of a struct ccmode_gcm
+ for _size_. */
+#define ccrc4_ctx_decl(_size_, _name_) cc_ctx_decl(ccrc4_ctx, _size_, _name_)
+#define ccrc4_ctx_clear(_size_, _name_) cc_ctx_clear(ccrc4_ctx, _size_, _name_)
+
+struct ccrc4_info {
+    size_t size;        /* first argument to ccrc4_ctx_decl(). */
+    void (*init)(ccrc4_ctx *ctx, unsigned long key_len, const void *key);
+    void (*crypt)(ccrc4_ctx *ctx, unsigned long nbytes, const void *in, void *out);
+};
+
+
+const struct ccrc4_info *ccrc4(void);
+
+extern const struct ccrc4_info ccrc4_eay;
+
+struct ccrc4_vector {
+    unsigned long keylen;
+    const void *key;
+    unsigned long datalen;
+    const void *pt;
+    const void *ct;
+};
+
+int ccrc4_test(const struct ccrc4_info *rc4, const struct ccrc4_vector *v);
+
+#endif /* _CORECRYPTO_CCRC4_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccrng.h b/EXTERNAL_HEADERS/corecrypto/ccrng.h
new file mode 100644 (file)
index 0000000..8a31d5a
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ *  ccrng.h
+ *  corecrypto
+ *
+ *  Created by Fabrice Gautier on 12/13/10.
+ *  Copyright 2010 Apple, Inc. All rights reserved.
+ *
+ */
+
+
+#ifndef _CORECRYPTO_CCRNG_H_
+#define _CORECRYPTO_CCRNG_H_
+
+#include <stdint.h>
+
+#define CCRNG_STATE_COMMON                                                          \
+    int (*generate)(struct ccrng_state *rng, unsigned long outlen, void *out);
+
+/* default state structure - do not instantiate, instead use the specific one you need */
+struct ccrng_state {
+    CCRNG_STATE_COMMON
+};
+
+#define ccrng_generate(ctx, outlen, out) ((ctx)->generate((ctx), (outlen), (out)))
+
+#endif /* _CORECRYPTO_CCRNG_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccrng_system.h b/EXTERNAL_HEADERS/corecrypto/ccrng_system.h
new file mode 100644 (file)
index 0000000..049970d
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ *  ccrng_system.h
+ *  corecrypto
+ *
+ *  Created by Fabrice Gautier on 12/13/10.
+ *  Copyright 2010 Apple, Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCRNG_SYSTEM_H_
+#define _CORECRYPTO_CCRNG_SYSTEM_H_
+
+#include <corecrypto/ccrng.h>
+
+struct ccrng_system_state {
+    CCRNG_STATE_COMMON
+    int fd;
+};
+
+int ccrng_system_init(struct ccrng_system_state *rng);
+
+#endif /* _CORECRYPTO_CCRNG_SYSTEM_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccsha1.h b/EXTERNAL_HEADERS/corecrypto/ccsha1.h
new file mode 100644 (file)
index 0000000..fbb258f
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ *  ccsha1.h
+ *  corecrypto
+ *
+ *  Created by Michael Brouwer on 12/1/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCSHA1_H_
+#define _CORECRYPTO_CCSHA1_H_
+
+#include <corecrypto/ccdigest.h>
+#include <corecrypto/cc_config.h>
+
+#define CCSHA1_BLOCK_SIZE   64
+#define CCSHA1_OUTPUT_SIZE  20
+#define CCSHA1_STATE_SIZE   20
+
+/* sha1 selector */
+const struct ccdigest_info *ccsha1_di(void);
+
+extern const uint32_t ccsha1_initial_state[5];
+
+/* shared between several implementations */
+void ccsha1_final(const struct ccdigest_info *di, ccdigest_ctx_t,
+                  unsigned char *digest);
+
+
+/* Implementations */
+extern const struct ccdigest_info ccsha1_ltc_di;
+extern const struct ccdigest_info ccsha1_eay_di;
+
+#if CCSHA1_VNG_INTEL
+extern const struct ccdigest_info ccsha1_vng_intel_SSE3_di;
+extern const struct ccdigest_info ccsha1_vng_intel_NOSSE3_di;
+#endif
+
+#if CCSHA1_VNG_ARMV7NEON
+extern const struct ccdigest_info ccsha1_vng_armv7neon_di;
+#endif
+
+/* TODO: Placeholders */
+#define ccoid_sha1 ((unsigned char *)"\x06\x05\x2b\x0e\x03\x02\x1a")
+#define ccoid_sha1_len 7
+
+#endif /* _CORECRYPTO_CCSHA1_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/ccsha2.h b/EXTERNAL_HEADERS/corecrypto/ccsha2.h
new file mode 100644 (file)
index 0000000..4385b89
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ *  ccsha2.h
+ *  corecrypto
+ *
+ *  Created by Fabrice Gautier on 12/3/10.
+ *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CCSHA2_H_
+#define _CORECRYPTO_CCSHA2_H_
+
+#include <corecrypto/ccdigest.h>
+
+/* sha2 selectors */
+const struct ccdigest_info *ccsha224_di(void);
+const struct ccdigest_info *ccsha256_di(void);
+const struct ccdigest_info *ccsha384_di(void);
+const struct ccdigest_info *ccsha512_di(void);
+
+/* TODO: Placeholders */
+#define ccoid_sha224 ((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04")
+#define ccoid_sha224_len 11
+
+#define ccoid_sha256 ((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01")
+#define ccoid_sha256_len 11
+
+#define ccoid_sha384 ((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02")
+#define ccoid_sha384_len 11
+
+#define ccoid_sha512 ((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03")
+#define ccoid_sha512_len 11
+
+
+/* SHA256 */
+#define CCSHA256_BLOCK_SIZE  64
+#define        CCSHA256_OUTPUT_SIZE 32
+#define        CCSHA256_STATE_SIZE  32
+extern const struct ccdigest_info ccsha256_ltc_di;
+extern const struct ccdigest_info ccsha256_vng_intel_SSE3_di;
+extern const struct ccdigest_info ccsha256_vng_intel_NOSSE3_di;
+extern const struct ccdigest_info ccsha256_vng_armv7neon_di;
+extern const uint32_t ccsha256_K[64];
+
+/* SHA224 */
+#define        CCSHA224_OUTPUT_SIZE 28
+extern const struct ccdigest_info ccsha224_ltc_di;
+extern const struct ccdigest_info ccsha224_vng_intel_SSE3_di;
+extern const struct ccdigest_info ccsha224_vng_intel_NOSSE3_di;
+extern const struct ccdigest_info ccsha224_vng_armv7neon_di;
+
+/* SHA512 */
+#define CCSHA512_BLOCK_SIZE  128
+#define        CCSHA512_OUTPUT_SIZE  64
+#define        CCSHA512_STATE_SIZE   64
+extern const struct ccdigest_info ccsha512_ltc_di;
+
+/* SHA384 */
+#define        CCSHA384_OUTPUT_SIZE  48
+extern const struct ccdigest_info ccsha384_ltc_di;
+
+#endif /* _CORECRYPTO_CCSHA2_H_ */
index 1ce373da06b81dead92d5ab55c3fc78b3202817a..e54b58c6dd40d278dab600a171372c59d3ff3783 100644 (file)
@@ -11,7 +11,6 @@ INSTINC_SUBDIRS =
 
 EXPORT_FILES = \
        fat.h           \
-       kld.h           \
        loader.h        \
        nlist.h         \
        reloc.h
diff --git a/EXTERNAL_HEADERS/mach-o/kld.h b/EXTERNAL_HEADERS/mach-o/kld.h
deleted file mode 100644 (file)
index 6b15999..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-
-#ifndef _MACHO_KLD_H_
-#define _MACHO_KLD_H_
-
-#include <mach-o/loader.h>
-#include <stdarg.h>
-
-/*
- * These API's are in libkld.  Both kextload(8) and /mach_kernel should
- * link with -lkld and then ld(1) will expand -lkld to libkld.dylib or
- * libkld.a depending on if -dynamic or -static is in effect.
- *
- * Note: we are using the __DYNAMIC__ flag to indicate user space kernel
- * linking and __STATIC__ as a synonym of KERNEL.
- */
-
-/*
- * Note that you must supply the following function for error reporting when
- * using any of the functions listed here.
- */
-extern void kld_error_vprintf(const char *format, va_list ap);
-
-/*
- * These two are only in libkld.dylib for use by kextload(8) (user code compiled
- * with the default -dynamic).
- */
-#ifdef __DYNAMIC__
-extern long kld_load_basefile(
-    const char *base_filename);
-
-/* Note: this takes only one object file name */
-extern long kld_load(
-    struct mach_header **header_addr,
-    const char *object_filename,
-    const char *output_filename);
-
-extern long kld_load_from_memory(
-    struct mach_header **header_addr,
-    const char *object_name,
-    char *object_addr,
-    long object_size,
-    const char *output_filename);
-#endif /* __DYNAMIC__ */
-
-/*
- * This one is only in libkld.a use by /mach_kernel (kernel code compiled with
- * -static).
- */
-#ifdef __STATIC__
-/* Note: this api does not write an output file */
-extern long kld_load_from_memory(
-    struct mach_header **header_addr,
-    const char *object_name,
-    char *object_addr,
-    long object_size);
-#endif /* __STATIC__ */
-
-extern long kld_load_basefile_from_memory(
-    const char *base_filename,
-    char *base_addr,
-    long base_size);
-
-extern long kld_unload_all(
-    long deallocate_sets);
-
-extern long kld_lookup(
-    const char *symbol_name,
-    unsigned long *value);
-
-extern long kld_forget_symbol(
-    const char *symbol_name);
-
-extern void kld_address_func(
-    unsigned long (*func)(unsigned long size, unsigned long headers_size));
-
-#define KLD_STRIP_ALL  0x00000000
-#define KLD_STRIP_NONE 0x00000001
-
-extern void kld_set_link_options(
-    unsigned long link_options);
-
-#endif /* _MACHO_KLD_H_ */
index 9fecf2b4af6fd41202c1fd680bb2c1de83fa78ae..f41664e54d744512933c6267687d0e9fe41a0e58 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Apple Inc.  All Rights Reserved.
+ * Copyright (c) 1999-2010 Apple Inc.  All Rights Reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -174,13 +174,6 @@ struct mach_header_64 {
                                           in the task will be given stack
                                           execution privilege.  Only used in
                                           MH_EXECUTE filetypes. */
-#define        MH_DEAD_STRIPPABLE_DYLIB 0x400000 /* Only for use on dylibs.  When
-                                            linking against a dylib that
-                                            has this bit set, the static linker
-                                            will automatically not create a
-                                            LC_LOAD_DYLIB load command to the
-                                            dylib if no symbols are being
-                                            referenced from the dylib. */
 #define MH_ROOT_SAFE 0x40000           /* When this bit is set, the binary 
                                          declares it is safe for use in
                                          processes with uid zero */
@@ -197,6 +190,16 @@ struct mach_header_64 {
                                           load the main executable at a
                                           random address.  Only used in
                                           MH_EXECUTE filetypes. */
+#define        MH_DEAD_STRIPPABLE_DYLIB 0x400000 /* Only for use on dylibs.  When
+                                            linking against a dylib that
+                                            has this bit set, the static linker
+                                            will automatically not create a
+                                            LC_LOAD_DYLIB load command to the
+                                            dylib if no symbols are being
+                                            referenced from the dylib. */
+#define MH_HAS_TLV_DESCRIPTORS 0x800000 /* Contains a section of type 
+                                           S_THREAD_LOCAL_VARIABLES */
+
 #define MH_NO_HEAP_EXECUTION 0x1000000 /* When this bit is set, the OS will
                                           run the main executable with
                                           a non-executable heap even on
@@ -281,6 +284,17 @@ struct load_command {
 #define        LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */
 #define        LC_DYLD_INFO    0x22    /* compressed dyld information */
 #define        LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD)    /* compressed dyld information only */
+#define        LC_LOAD_UPWARD_DYLIB (0x23 | LC_REQ_DYLD) /* load upward dylib */
+#define LC_VERSION_MIN_MACOSX 0x24   /* build for MacOSX min OS version */
+#define LC_VERSION_MIN_IPHONEOS 0x25 /* build for iPhoneOS min OS version */
+#define LC_FUNCTION_STARTS 0x26 /* compressed table of function start addresses */
+#define LC_DYLD_ENVIRONMENT 0x27 /* string for dyld to treat
+                                   like environment variable */
+#define LC_MAIN (0x28|LC_REQ_DYLD) /* replacement for LC_UNIXTHREAD */
+#define LC_DATA_IN_CODE 0x29 /* table of non-instructions in __text */
+#define LC_SOURCE_VERSION 0x2A /* source version used to build binary */
+#define LC_DYLIB_CODE_SIGN_DRS 0x2B /* Code signing DRs copied from linked dylibs */
+
 
 /*
  * A variable length string in a load command is represented by an lc_str
@@ -470,6 +484,20 @@ struct section_64 { /* for 64-bit architectures */
 #define        S_LAZY_DYLIB_SYMBOL_POINTERS    0x10    /* section with only lazy
                                                   symbol pointers to lazy
                                                   loaded dylibs */
+/*
+ * Section types to support thread local variables
+ */
+#define S_THREAD_LOCAL_REGULAR                   0x11  /* template of initial 
+                                                         values for TLVs */
+#define S_THREAD_LOCAL_ZEROFILL                  0x12  /* template of initial 
+                                                         values for TLVs */
+#define S_THREAD_LOCAL_VARIABLES                 0x13  /* TLV descriptors */
+#define S_THREAD_LOCAL_VARIABLE_POINTERS         0x14  /* pointers to TLV 
+                                                          descriptors */
+#define S_THREAD_LOCAL_INIT_FUNCTION_POINTERS    0x15  /* functions to call
+                                                         to initialize TLV
+                                                         values */
+
 /*
  * Constants for the section attributes part of the flags field of a section
  * structure.
@@ -716,9 +744,12 @@ struct prebound_dylib_command {
  * the name of the dynamic linker (LC_LOAD_DYLINKER).  And a dynamic linker
  * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
  * A file can have at most one of these.
+ * This struct is also used for the LC_DYLD_ENVIRONMENT load command and
+ * contains string for dyld to treat like environment variable.
  */
 struct dylinker_command {
-       uint32_t        cmd;            /* LC_ID_DYLINKER or LC_LOAD_DYLINKER */
+       uint32_t        cmd;            /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or
+                                          LC_DYLD_ENVIRONMENT */
        uint32_t        cmdsize;        /* includes pathname string */
        union lc_str    name;           /* dynamic linker's path name */
 };
@@ -1122,7 +1153,9 @@ struct rpath_command {
  * of data in the __LINKEDIT segment.  
  */
 struct linkedit_data_command {
-    uint32_t   cmd;            /* LC_CODE_SIGNATURE or LC_SEGMENT_SPLIT_INFO */
+    uint32_t   cmd;            /* LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO,
+                                   LC_FUNCTION_STARTS, LC_DATA_IN_CODE,
+                                  or LC_DYLIB_CODE_SIGN_DRS */
     uint32_t   cmdsize;        /* sizeof(struct linkedit_data_command) */
     uint32_t   dataoff;        /* file offset of data in __LINKEDIT segment */
     uint32_t   datasize;       /* file size of data in __LINKEDIT segment  */
@@ -1141,6 +1174,18 @@ struct encryption_info_command {
                                   0 means not-encrypted yet */
 };
 
+/*
+ * The version_min_command contains the min OS version on which this 
+ * binary was built to run.
+ */
+struct version_min_command {
+    uint32_t   cmd;            /* LC_VERSION_MIN_MACOSX or
+                                  LC_VERSION_MIN_IPHONEOS  */
+    uint32_t   cmdsize;        /* sizeof(struct min_version_command) */
+    uint32_t   version;        /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+    uint32_t   sdk;            /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+};
+
 /*
  * The dyld_info_command contains the file offsets and sizes of 
  * the new compressed form of the information dyld needs to 
@@ -1170,7 +1215,7 @@ struct dyld_info_command {
     /*
      * Dyld binds an image during the loading process, if the image
      * requires any pointers to be initialized to symbols in other images.  
-     * The rebase information is a stream of byte sized 
+     * The bind information is a stream of byte sized 
      * opcodes whose symbolic names start with BIND_OPCODE_.
      * Conceptually the bind information is a table of tuples:
      *    <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend>
@@ -1223,19 +1268,27 @@ struct dyld_info_command {
      * The export area is a stream of nodes.  The first node sequentially
      * is the start node for the trie.  
      *
-     * Nodes for a symbol start with a byte that is the length of
+     * Nodes for a symbol start with a uleb128 that is the length of
      * the exported symbol information for the string so far.
-     * If there is no exported symbol, the byte is zero. If there
-     * is exported info, it follows the length byte.  The exported
-     * info normally consists of a flags and offset both encoded
-     * in uleb128.  The offset is location of the content named
-     * by the symbol.  It is the offset from the mach_header for
-     * the image.  
+     * If there is no exported symbol, the node starts with a zero byte. 
+     * If there is exported info, it follows the length.  
+        *
+        * First is a uleb128 containing flags. Normally, it is followed by
+     * a uleb128 encoded offset which is location of the content named
+     * by the symbol from the mach_header for the image.  If the flags
+     * is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is
+     * a uleb128 encoded library ordinal, then a zero terminated
+     * UTF8 string.  If the string is zero length, then the symbol
+     * is re-export from the specified dylib with the same name.
+        * If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following
+        * the flags is two uleb128s: the stub offset and the resolver offset.
+        * The stub is used by non-lazy pointers.  The resolver is used
+        * by lazy pointers and must be called to get the actual address to use.
      *
-     * After the initial byte and optional exported symbol information
-     * is a byte of how many edges (0-255) that this node has leaving
-     * it, followed by each edge.
-     * Each edge is a zero terminated cstring of the addition chars
+     * After the optional exported symbol information is a byte of
+     * how many edges (0-255) that this node has leaving it, 
+     * followed by each edge.
+     * Each edge is a zero terminated UTF8 of the addition chars
      * in the symbol, followed by a uleb128 offset for the node that
      * edge points to.
      *  
@@ -1303,8 +1356,8 @@ struct dyld_info_command {
 #define EXPORT_SYMBOL_FLAGS_KIND_REGULAR                       0x00
 #define EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL                  0x01
 #define EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION                    0x04
-#define EXPORT_SYMBOL_FLAGS_INDIRECT_DEFINITION                        0x08
-#define EXPORT_SYMBOL_FLAGS_HAS_SPECIALIZATIONS                        0x10
+#define EXPORT_SYMBOL_FLAGS_REEXPORT                           0x08
+#define EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER                  0x10
 
 /*
  * The symseg_command contains the offset and size of the GNU style
@@ -1346,4 +1399,60 @@ struct fvmfile_command {
        uint32_t        header_addr;    /* files virtual address */
 };
 
+
+/*
+ * The entry_point_command is a replacement for thread_command.
+ * It is used for main executables to specify the location (file offset)
+ * of main().  If -stack_size was used at link time, the stacksize
+ * field will contain the stack size need for the main thread.
+ */
+struct entry_point_command {
+    uint32_t  cmd;     /* LC_MAIN only used in MH_EXECUTE filetypes */
+    uint32_t  cmdsize; /* 24 */
+    uint64_t  entryoff;        /* file (__TEXT) offset of main() */
+    uint64_t  stacksize;/* if not zero, initial stack size */
+};
+
+
+/*
+ * The source_version_command is an optional load command containing
+ * the version of the sources used to build the binary.
+ */
+struct source_version_command {
+    uint32_t  cmd;     /* LC_SOURCE_VERSION */
+    uint32_t  cmdsize; /* 16 */
+    uint64_t  version; /* A.B.C.D.E packed as a24.b10.c10.d10.e10 */
+};
+
+
+/*
+ * The LC_DATA_IN_CODE load commands uses a linkedit_data_command 
+ * to point to an array of data_in_code_entry entries. Each entry
+ * describes a range of data in a code section.  This load command
+ * is only used in final linked images.
+ */
+struct data_in_code_entry {
+    uint32_t   offset;  /* from mach_header to start of data range*/
+    uint16_t   length;  /* number of bytes in data range */
+    uint16_t   kind;    /* a DICE_KIND_* value  */
+};
+#define DICE_KIND_DATA              0x0001  /* L$start$data$...  label */
+#define DICE_KIND_JUMP_TABLE8       0x0002  /* L$start$jt8$...   label */
+#define DICE_KIND_JUMP_TABLE16      0x0003  /* L$start$jt16$...  label */
+#define DICE_KIND_JUMP_TABLE32      0x0004  /* L$start$jt32$...  label */
+#define DICE_KIND_ABS_JUMP_TABLE32  0x0005  /* L$start$jta32$... label */
+
+
+
+/*
+ * Sections of type S_THREAD_LOCAL_VARIABLES contain an array 
+ * of tlv_descriptor structures.
+ */
+struct tlv_descriptor
+{
+       void*           (*thunk)(struct tlv_descriptor*);
+       unsigned long   key;
+       unsigned long   offset;
+};
+
 #endif /* _MACHO_LOADER_H_ */
index 868ec2046b5f1757b750d482f6a064e515623871..1c1941012e9edc663bdfaabba9a7afdd2c087adb 100644 (file)
@@ -214,8 +214,10 @@ struct nlist_64 {
  * determined by the static link editor.  Which library an undefined symbol is
  * bound to is recorded by the static linker in the high 8 bits of the n_desc
  * field using the SET_LIBRARY_ORDINAL macro below.  The ordinal recorded
- * references the libraries listed in the Mach-O's LC_LOAD_DYLIB load commands
- * in the order they appear in the headers.   The library ordinals start from 1.
+ * references the libraries listed in the Mach-O's LC_LOAD_DYLIB,
+ * LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB, and
+ * LC_LAZY_LOAD_DYLIB, etc. load commands in the order they appear in the
+ * headers.   The library ordinals start from 1.
  * For a dynamic library that is built as a two-level namespace image the
  * undefined references from module defined in another use the same nlist struct
  * an in that case SELF_LIBRARY_ORDINAL is used as the library ordinal.  For
@@ -286,6 +288,14 @@ struct nlist_64 {
  */
 #define N_ARM_THUMB_DEF        0x0008 /* symbol is a Thumb function (ARM) */
 
+/*
+ * The N_SYMBOL_RESOLVER bit of the n_desc field indicates that the
+ * that the function is actually a resolver function and should
+ * be called to get the address of the real function to use.
+ * This bit is only available in .o files (MH_OBJECT filetype)
+ */
+#define N_SYMBOL_RESOLVER  0x0100 
+
 #ifndef __STRICT_BSD__
 #if __cplusplus
 extern "C" {
index e36f4f734d10d98d1e11162aaa6522812db97651..d5741efa3fe1428fd36eb26f6c494ce425d4dd92 100644 (file)
@@ -196,7 +196,8 @@ enum reloc_type_generic
     GENERIC_RELOC_PAIR,                /* Only follows a GENERIC_RELOC_SECTDIFF */
     GENERIC_RELOC_SECTDIFF,
     GENERIC_RELOC_PB_LA_PTR,   /* prebound lazy pointer */
-    GENERIC_RELOC_LOCAL_SECTDIFF
+    GENERIC_RELOC_LOCAL_SECTDIFF,
+    GENERIC_RELOC_TLV          /* thread local variables */
 };
 
 #endif /* _MACHO_RELOC_H_ */
index 74edf082c70de5122f34e005a985c53f8dd8b4a4..d3466d8fe80f1f540df91f5e0e69d22044109c11 100644 (file)
  *     lea L0(%rip), %rax
  *             r_type=X86_64_RELOC_SIGNED, r_length=2, r_extern=0, r_pcrel=1, r_symbolnum=3
  *             48 8d 05 56 00 00 00
- *             // assumes L0 is in third section, has an address of 0x00000056 in .o
- *             // file, and there is no previous non-local label
- * 
+ *             // assumes L0 is in third section and there is no previous non-local label.
+ *             // The rip-relative-offset of 0x00000056 is L0-address_of_next_instruction.
+ *             // address_of_next_instruction is the address of the relocation + 4.
+ *
+ *     add     $6,L0(%rip)
+ *             r_type=X86_64_RELOC_SIGNED_1, r_length=2, r_extern=0, r_pcrel=1, r_symbolnum=3
+ *             83 05 18 00 00 00 06
+ *             // assumes L0 is in third section and there is no previous non-local label.
+ *             // The rip-relative-offset of 0x00000018 is L0-address_of_next_instruction.
+ *             // address_of_next_instruction is the address of the relocation + 4 + 1.
+ *             // The +1 comes from SIGNED_1.  This is used because the relocation is not
+ *             // at the end of the instruction.
+ *
  *     .quad L1
  *             r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_prev
  *             12 00 00 00 00 00 00 00
@@ -171,4 +181,5 @@ enum reloc_type_x86_64
        X86_64_RELOC_SIGNED_1,          // for signed 32-bit displacement with a -1 addend
        X86_64_RELOC_SIGNED_2,          // for signed 32-bit displacement with a -2 addend
        X86_64_RELOC_SIGNED_4,          // for signed 32-bit displacement with a -4 addend
+       X86_64_RELOC_TLV,               // for thread local variables
 };
index 90164c0af15178dd04bd685473ca0bd4cf5cfb44..ca048597abf0ecc00af6c7c79a90c821c22e6887 100644 (file)
@@ -162,7 +162,9 @@ typedef unsigned long long      uintmax_t;
 
 #define SIZE_MAX          UINT32_MAX
 
+#ifndef WCHAR_MAX
 #define WCHAR_MAX         INT32_MAX
+#endif
 
 /* We have no wint_t yet, so no WINT_{MIN,MAX}.
    Should end up being {U}INT32_{MIN,MAX}, depending.  */
index acd493419413401365a3be9294c83c771bd5aaa2..3e9a72e7eb6c52b98f1dad39657c48c233032ca4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -65,6 +65,8 @@ INSTALL_KERNEL_FILE = mach_kernel
 
 INSTALL_KERNEL_DIR = /
 
+INSTALL_KERNEL_SYM_DIR = $(INSTALL_KERNEL_DIR)/System/Library/Extensions/KDK/
+
 
 INSTMAN_SUBDIRS = \
        bsd
diff --git a/README b/README
index b9e102527f8ab5153018aa9ff3bd16cadc58e474..b71d70f725114ca3c443bedbc1c7b7d4445c4e06 100644 (file)
--- a/README
+++ b/README
@@ -131,6 +131,8 @@ A. How to build XNU:
 
   -project libkxld      # user-space version of kernel linker
 
+  -project libkmod     # static library automatically linked into kexts
+
   -project Libsyscall   # automatically generate BSD syscall stubs
 
 
@@ -154,7 +156,9 @@ A. How to build XNU:
 
    $ make -w              # trace recursive make invocations. Useful in combination with VERBOSE=YES
 
-   $ make BUILD_LTO=1    # built with LLVM Link Time Optimization (experimental)
+   $ make BUILD_LTO=1    # build with LLVM Link Time Optimization (experimental)
+
+   $ make BUILD_INTEGRATED_ASSEMBLER=1 # build with LLVM integrated assembler (experimental)
 
 =============================================
 B. How to install a new header file from XNU
index 7a0e5c5b4aa99f15fda0fc1919ffdbcef243e2d5..97c11fe613ff59fc4c7bedb46b7e84189d67aa6a 100644 (file)
@@ -10,7 +10,9 @@ include $(MakeInc_def)
 SETUP_SUBDIRS =        \
        config          \
        kextsymboltool  \
-       setsegname
+       setsegname      \
+       decomment       \
+       md
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 54219e1db45a48149cbc4c3296cc9c6df12146af..bcb0d3eebe8413598cf8739400605bcb6bb71d9b 100644 (file)
@@ -250,10 +250,10 @@ extern int        maxusers;
 #ifdef mips
 #define DEV_MASK 0xf
 #define        DEV_SHIFT  4
-#else  mips
+#else  /* mips */
 #define DEV_MASK 0x7
 #define        DEV_SHIFT  3
-#endif mips
+#endif /* mips */
 
 /* External function references */
 char *get_rest(FILE *fp);
index 2d4e952e98a9fea0bd08268f11123f643d7095d1..33612c0231e37d0112119965f7371676d4a75877 100755 (executable)
@@ -300,8 +300,7 @@ part != 0 {\
 # kernel binaries are put).
 #
     echo 'builddir     "."'                    >> $SYSCONF.new
-    set OBJRELDIR=`$RELPATH $OBJROOT $OBJDIR`
-    echo 'objectdir    "'$OBJROOT'/'$OBJRELDIR'"'              >> $SYSCONF.new
+    echo 'objectdir    "'$OBJDIR'"'            >> $SYSCONF.new
     set SRCDIR=`dirname $SOURCE`
     echo 'sourcedir    "'$SRCROOT'"'           >> $SYSCONF.new
     if (-f $SYSCONF) then
index 90b6c2f97872ce1faa18d1481f1c479029cd0a84..2b4ff4a6516568f034956eac3dfc0694c6e19efa 100644 (file)
@@ -554,7 +554,7 @@ pseudo_inits(FILE *fp)
                        continue;
                fprintf(fp, "extern int %s(int);\n", dp->d_init);
        }
-#endif notdef
+#endif /* notdef */
        fprintf(fp, "struct pseudo_init {\n");
        fprintf(fp, "\tint\tps_count;\n\tint\t(*ps_func)();\n");
        fprintf(fp, "} pseudo_inits[] = {\n");
@@ -567,7 +567,7 @@ pseudo_inits(FILE *fp)
                        count = 1;
                fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
        }
-#endif notdef
+#endif /* notdef */
        fprintf(fp, "\t{0,\t0},\n};\n");
 }
 #endif
@@ -731,7 +731,7 @@ romp_ioconf(void)
        (void) fclose(fp);
 } 
 
-#endif MACHINE_ROMP
+#endif /* MACHINE_ROMP */
 
 #if    MACHINE_MMAX
 void
@@ -875,7 +875,7 @@ mmax_ioconf(void)
        (void) fclose(fp);
 }
 
-#endif MACHINE_MMAX
+#endif /* MACHINE_MMAX */
 
 #if    MACHINE_SQT
 
@@ -1207,7 +1207,7 @@ sqt_ioconf(void)
        (void) fclose(fp);
 }
 
-#endif MACHINE_SQT
+#endif /* MACHINE_SQT */
 #if    MACHINE_I386
 void
 i386_ioconf(void)
@@ -1228,7 +1228,7 @@ i386_ioconf(void)
        i386_pseudo_inits (fp);
        (void) fclose(fp);
 }
-#endif MACHINE_I386
+#endif /* MACHINE_I386 */
 
 #if MACHINE_MIPSY || MACHINE_MIPS
 
@@ -1513,7 +1513,7 @@ is_declared(const char *cp)
        }
        return(0);
 }
-#endif MACHINE_MIPSY || MACHINE_MIPS
+#endif /* MACHINE_MIPSY || MACHINE_MIPS */
 
 #if    MACHINE_M68K
 char   *m68k_dn(const char *name);
@@ -1709,7 +1709,7 @@ m68k_dn(const char *name)
 {
        sprintf(errbuf, "&%sdriver", name); return ns(errbuf);
 }
-#endif MACHINE_M68K
+#endif /* MACHINE_M68K */
 
 #if    MACHINE_M88K || MACHINE_M98K
 char   *nrw_dn(char *name);
@@ -1800,7 +1800,7 @@ m98k_dn(char *name)
 }
 
 
-#endif MACHINE_M88K || MACHINE_M98K
+#endif /* MACHINE_M88K || MACHINE_M98K */
 
 #ifdef MACHINE_HPPA
 char   *hppa_dn(char *name);
@@ -1855,7 +1855,7 @@ hppa_dn(char *name)
        return (errbuf);
 }
 
-#endif MACHINE_HPPA
+#endif /* MACHINE_HPPA */
 
 #ifdef MACHINE_SPARC
 char   *sparc_dn(char *name);
@@ -1909,7 +1909,7 @@ sparc_dn(char *name)
        return (errbuf);
 }
 
-#endif MACHINE_SPARC
+#endif /* MACHINE_SPARC */
 
 #ifdef MACHINE_PPC
 char   *ppc_dn(char *name);
@@ -1964,7 +1964,7 @@ ppc_dn(name)
        return (errbuf);
 }
 
-#endif MACHINE_PPC
+#endif /* MACHINE_PPC */
 
 #ifdef MACHINE_ARM
 void   arm_pseudo_inits(FILE *fp);
index 6ac9aa099fe82d575c086854da887ed8d0c5dc1d..4bf5602fdce4d5cdafd6aca800448c75c5b9f6c5 100644 (file)
@@ -659,7 +659,7 @@ do_objs(FILE *fp, const char *msg, int ext)
 #if    DO_SWAPFILE
        register struct file_list *fl;
        char swapname[32];
-#endif DO_SWAPFILE
+#endif /* DO_SWAPFILE */
 
        fprintf(fp, "%s", msg);
        lpos = strlen(msg);
@@ -693,7 +693,7 @@ do_objs(FILE *fp, const char *msg, int ext)
                        if (eq(sp, swapname))
                                goto cont;
                }
-#endif DO_SWAPFILE
+#endif /* DO_SWAPFILE */
                cp = (char *)sp + (len = strlen(sp)) - 1;
                och = *cp;
                *cp = 'o';
@@ -707,7 +707,7 @@ do_objs(FILE *fp, const char *msg, int ext)
 #if    DO_SWAPFILE
 cont:
                ;
-#endif DO_SWAPFILE
+#endif /* DO_SWAPFILE */
        }
        if (lpos != 8)
                putc('\n', fp);
@@ -955,10 +955,11 @@ do_rules(FILE *f)
                                }
                                break;
                        default:
-                       fprintf(f, "\t${S_RULE_1A}%s%.*s${S_RULE_1B}%s\n",
-                                       source_dir, (int)(tp-np), np, nl);
-                       fprintf(f, "\t${S_RULE_2}%s\n", nl);
-                       fprintf(f, "\t${S_RULE_3}\n\n");
+                               fprintf(f, "\t${S_RULE_1A}%s%.*s${S_RULE_1B}%s\n",
+                                               source_dir, (int)(tp-np), np, nl);
+                               fprintf(f, "\t${S_RULE_2}%s\n", nl);
+                               fprintf(f, "\t${S_RULE_3}\n\n");
+                               break;
                        }
                        continue;
                }
@@ -989,7 +990,7 @@ do_rules(FILE *f)
                                                source_dir, extras, np);
                                }
                                break;
-       #endif  0
+       #endif  /* 0 */
                        default:
                                goto common;
                        }
@@ -1091,9 +1092,9 @@ do_load(FILE *f)
                        fprintf(f, " %s", fl->f_needs);
 #ifdef multimax
        fprintf(f, "\n\nall .ORDER: includelinks ${LOAD}\n");
-#else  multimax
+#else  /* multimax */
        fprintf(f, "\n\nall: includelinks ${LOAD}\n");
-#endif multimax
+#endif /* multimax */
        fprintf(f, "\n");
 }
 
@@ -1138,7 +1139,7 @@ do_swapspec(__unused FILE *f, __unused const char *name, __unused char *sysname)
                fprintf(f, "\t${C_RULE_3}\n");
                fprintf(f, "\t${C_RULE_4}\n\n");
        }
-#endif DO_SWAPFILE
+#endif /* DO_SWAPFILE */
 }
 
 char *
diff --git a/SETUP/decomment/Makefile b/SETUP/decomment/Makefile
new file mode 100644 (file)
index 0000000..05cf5b8
--- /dev/null
@@ -0,0 +1,31 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+OBJS = decomment.o
+
+CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I.
+
+WARNFLAGS = -Wall
+
+LDFLAGS = -isysroot $(HOST_SDKROOT)
+
+decomment: $(OBJS)
+       $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
+       @echo HOST_LD $@
+       $(_v)$(HOST_CODESIGN) -s - $@
+       @echo HOST_CODESIGN $@
+
+.c.o:
+       $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $<
+       @echo HOST_CC $@
+
+do_build_setup: decomment
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/SETUP/decomment/decomment.c b/SETUP/decomment/decomment.c
new file mode 100644 (file)
index 0000000..f95bdb6
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*
+ * decomment.c
+ *
+ * Removes all comments and (optionally) whitespace from an input file. 
+ * Writes result on stdout.
+ */
+#include <stdio.h>
+#include <ctype.h>     /* for isspace */
+#include <libc.h>
+
+/*
+ * State of input scanner.
+ */
+typedef enum {
+       IS_NORMAL,
+       IS_SLASH,               // encountered opening '/'
+       IS_IN_COMMENT,          // within / * * / comment
+       IS_STAR,                // encountered closing '*'
+       IS_IN_END_COMMENT       // within / / comment
+} input_state_t;
+
+static void usage(char **argv);
+
+int main(int argc, char **argv)
+{
+       FILE *fp;
+       char bufchar;
+       input_state_t input_state = IS_NORMAL;
+       int exit_code = 0;
+       int remove_whitespace = 0;
+       int arg;
+       
+       if(argc < 2)
+               usage(argv);
+       for(arg=2; arg<argc; arg++) {
+               switch(argv[arg][0]) {
+                   case 'r':
+                       remove_whitespace++;
+                       break;
+                   default:
+                       usage(argv);
+               }
+       }       
+       
+       fp = fopen(argv[1], "r");
+       if(!fp) {
+               fprintf(stderr, "Error opening %s\n", argv[1]);
+               perror("fopen");
+               exit(1);
+       }
+       for(;;) {
+               bufchar = getc_unlocked(fp);
+               if (bufchar == EOF)
+                       break;
+
+               switch(input_state) {
+               
+                   case IS_NORMAL:
+                       if(bufchar == '/') {
+                               /*
+                                * Might be start of a comment.
+                                */
+                               input_state = IS_SLASH;
+                       }
+                       else {
+                               if(!(remove_whitespace && isspace(bufchar))) {
+                                       putchar_unlocked(bufchar);
+                               }
+                       }
+                       break;
+                       
+                   case IS_SLASH:
+                       switch(bufchar) {
+                           case '*':
+                               /*
+                                * Start of normal comment.
+                                */
+                               input_state = IS_IN_COMMENT;
+                               break;
+                               
+                           case '/':
+                               /*
+                                * Start of 'to-end-of-line' comment.
+                                */
+                               input_state = IS_IN_END_COMMENT;
+                               break;
+                               
+                           default:
+                               /*
+                                * Not the start of comment. Emit the '/'
+                                * we skipped last char in case we were
+                                * entering a comment this time, then the
+                                * current char.
+                                */
+                               putchar_unlocked('/');
+                               if(!(remove_whitespace && isspace(bufchar))) {
+                                       putchar_unlocked(bufchar);
+                               }
+                               input_state = IS_NORMAL;
+                               break;
+                       }
+                       break;
+                       
+                   case IS_IN_COMMENT:
+                       if(bufchar == '*') {
+                               /*
+                                * Maybe ending comment...
+                                */
+                               input_state = IS_STAR;
+                       }
+                       break;
+       
+       
+                   case IS_STAR:
+                       switch(bufchar) {
+                           case '/':
+                               /*
+                                * End of normal comment.
+                                */
+                               input_state = IS_NORMAL;
+                               break;
+                               
+                           case '*':
+                               /*
+                                * Still could be one char away from end
+                                * of comment.
+                                */
+                               break;
+                               
+                           default:
+                               /*
+                                * Still inside comment, no end in sight.
+                                */
+                               input_state = IS_IN_COMMENT;
+                               break;
+                       }
+                       break;
+                       
+                   case IS_IN_END_COMMENT:
+                       if(bufchar == '\n') {
+                               /*
+                                * End of comment. Emit the newline if 
+                                * appropriate.
+                                */
+                               if(!remove_whitespace) {
+                                       putchar_unlocked(bufchar);
+                               }
+                               input_state = IS_NORMAL;
+                       }
+                       break;
+               
+               } /* switch input_state */
+       }         /* main read loop */
+       
+       /*
+        * Done.
+        */
+       return(exit_code);
+}
+
+static void usage(char **argv)
+{
+       printf("usage: %s infile [r(emove whitespace)]\n", argv[0]);
+       exit(1);
+}
index 137f253d27975334e113e0176dd3d182bb0e754d..607ef0839315c0c4c183ea2a5bac0739bf8fdc52 100644 (file)
@@ -13,7 +13,10 @@ CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I.
 
 WARNFLAGS = -Wall
 
-LDFLAGS = -isysroot $(HOST_SDKROOT) -lstdc++
+LDFLAGS = -isysroot $(HOST_SDKROOT) -L$(HOST_SPARSE_SDKROOT)/usr/local/lib/system -lstdc++
+ifneq ($(HOST_SPARSE_SDKROOT),/)
+LDFLAGS += -lmacho
+endif
 
 kextsymboltool: $(OBJS)
        $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
diff --git a/SETUP/md/Makefile b/SETUP/md/Makefile
new file mode 100644 (file)
index 0000000..b2741a7
--- /dev/null
@@ -0,0 +1,31 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+OBJS = md.o
+
+CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I.
+
+WARNFLAGS = -Wall
+
+LDFLAGS = -isysroot $(HOST_SDKROOT)
+
+md: $(OBJS)
+       $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
+       @echo HOST_LD $@
+       $(_v)$(HOST_CODESIGN) -s - $@
+       @echo HOST_CODESIGN $@
+
+.c.o:
+       $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $<
+       @echo HOST_CC $@
+
+do_build_setup: md
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/SETUP/md/md.1 b/SETUP/md/md.1
new file mode 100644 (file)
index 0000000..9505f88
--- /dev/null
@@ -0,0 +1,121 @@
+.\" Man page Copyright (c) 2002
+.\"    Apple Computer
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS DOCUMENTATION IS PROVIDED BY THE APPLE ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL APPLE BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"
+.Dd "December 25, 2002"
+.Dt md 1
+.Os
+.Sh NAME
+.Nm md
+.Nd process raw dependency files produced by cpp -MD
+.Sh SYNOPSIS
+.Nm
+.Op Fl d
+.Op Fl f
+.Op Fl m Ar makefile
+.Op Fl u Ar makefile
+.Op Fl o Ar outputfile
+.Op Fl v
+.Op Fl x
+.Op Fl D Ar c|d|m|o|t|D
+.Sh DESCRIPTION
+The
+.Nm
+command basically does two things:
+.Pp
+Process the raw dependency files produced by the cpp -MD option.
+There is one line in the file for every #include encountered, but
+there are repeats and patterns like .../dir1/../dir2 that appear which
+should reduce to .../dir2.
+.Nm
+canonicalizes and flushes repeats from the dependency list.
+It also sorts the file names and "fills" them to a 78 character line.
+.Pp
+.Nm
+also updates the makefile directly with the dependency information,
+so the .d file can be thrown away (see
+.Ar d
+option).  This is done to save space.
+.Nm
+assumes that dependency information in the makefile is sorted by .o
+file name and it procedes to merge in (add/or replace [as appropriate])
+the new dependency lines that it has generated.
+For time efficiency,
+.Nm
+assumes that any .d files it is given that were created
+before the creation date of the "makefile" were processed
+already.  It ignores them unless the force flag
+.Op f
+is given.
+.Pp
+.Sh FLAG SUMMARY
+.Bl -tag -width indent
+.It Fl D Ar c|D|d|m|o|t
+Specify debugging option(s):
+.Bl -tag -width indent
+.It c
+show file contents
+.It D
+show very low level debugging
+.It d
+show new dependency crunching
+.It m
+show generation of makefile
+.It o
+show files being opened
+.It t
+show time comparisons
+.El
+.It Fl d
+Delete the .d file after it is processed
+.It Fl f
+Force an update of the dependencies in the makefile,
+even if the makefile is more recent than the .n file.
+(This implies that
+.Nm
+has been run already.)
+.It Fl m Ar makefile
+Specify the makefile to be upgraded.  The defaults are
+.Ar makefile
+and then
+.Ar Makefile .
+.It Fl o Ar outputfile
+Specify an output file (other than a makefile) for the dependencies.
+.It Fl u Ar makefile
+Like
+.Fl m ,
+but the file will be created if necessary.
+.It Fl v
+Set the verbose flag.
+.It Fl x
+Expunge old dependency information from the makefile.
+.El
+.Sh SEE ALSO
+.Xr make 1
+.Sh BUGS
+Old, possibly not used by anyone.
+.Sh HISTORY
+The
+.Nm
+utility was written by Robert V. Baron at Carnegie-Mellon University.
diff --git a/SETUP/md/md.c b/SETUP/md/md.c
new file mode 100644 (file)
index 0000000..f253bf5
--- /dev/null
@@ -0,0 +1,654 @@
+/* ************************************************************************ *\
+ *                                                                          *
+ * File:        md.c                                                        *
+ *                                                                          *
+ *      Updates makefiles from the .n dependency files generated by the     *
+ *      -MD option to "cc" (and "cpp").                                     *
+ *                                                                          *
+ * Abstract:                                                                *
+ *                                                                          *
+ *      Basically, "md" does two things:                                    *
+ *      1) It processes the raw dependency files produced by the cpp -MD     *
+ *         option.  There is one line in the file for every #include        *
+ *         encountered, but there are repeats and patterns like             *
+ *         .../dir1/../dir2 appear which should reduce to .../dir2          *
+ *         Md canonicalizes and flushes repeats from the dependency         *
+ *         list.  It also sorts the file names and "fills" them to a 78     *
+ *         character line.                                                  *
+ *      2) Md also updates the makefile directly with the dependency        *
+ *         information, so the .d file can be thrown away (-- -d option)    *
+ *         This is done to save space.  Md assumes that dependency          *
+ *         information in the makefile is sorted by .o file name and it     *
+ *         procedes to merge in (add/or replace [as appropriate])  the new  *
+ *         dependency lines that it has generated.  For time effeciency,    *
+ *         Md assumes that any .d files it is given that were created       *
+ *         before the creation date of the "makefile" were processed        *
+ *         already.  It ignores them unless the force flag (-f) is given.   *
+ *                                                                          *
+ * Arguments:                                                               *
+ *                                                                          *
+ *      -d      delete the .d file after it is processed                    *
+ *      -f      force an update of the dependencies in the makefile         *
+ *              even though the makefile is more recent than the .n file    *
+ *              (This implies that md has been run already.)                *
+ *      -m      specify the makefile to be upgraded.  The defaults are      *
+ *              "makefile" and then "Makefile".                             *
+ *      -u      like -m above, but the file will be created if necessary    *
+ *      -o      specify an output file for the dependencies other than a    *
+ *              makefile                                                    *
+ *      -v      set the verbose flag                                        *
+ *      -x      expunge old dependency info from makefile                   *
+ *      -D      subswitch for debugging.  can be followed by any of         *
+ *              "c", "d", "m", "o", "t", "D" meaning:                       *
+ *              c       show file contents                                  *
+ *              d       show new dependency crunching                       *
+ *              m       show generation of makefile                         *
+ *              o       show files being opened                             *
+ *              t       show time comparisons                               *
+ *              D       show very low level debugging                       *
+ *                                                                          *
+ * Author:      Robert V. Baron                                             *
+ *              Copyright (c) 1986 by Robert V. Baron                       *
+ *                                                                          *
+ * HISTORY                                                                  *
+ * 29-Apr-87  Robert Baron (rvb) at Carnegie-Mellon University
+ *      If specified -u file does not exist, assume it is empty and
+ *      generate one.  As a sanity check, it must be possible to create
+ *      the output file.
+ *      Also, generalized fix below to handle any case of . as a
+ *      file name.
+ *
+ * 25-Mar-87  Mary Thompson (mrt) at Carnegie Mellon
+ *      Fixed up pathnamecanonicalization to recognize .// and
+ *      drop the second / as well. mmax cpp generates this form.
+ *
+ *  6-Jan-87  Robert Baron (rvb) at Carnegie-Mellon University
+ *      Fixed up pathname canonicalization to that ../../, etc would be
+ *      handled correctly.
+ *      Also made "force" on by default.
+ *
+ * 16-Mar-86  Robert Baron (rvb) at Carnegie-Mellon University
+ *              Created 4/16/86                                             *
+ *                                                                          *
+\* ************************************************************************ */
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define LINESIZE 65536  //  NeXT_MOD
+
+#define OUTLINELEN 79
+#define IObuffer 50000
+#define SALUTATION "# Dependencies for File:"
+#define SALUTATIONLEN (sizeof SALUTATION - 1)
+#define OLDSALUTATION "# DO NOT DELETE THIS LINE"
+#define OLDSALUTATIONLEN (sizeof OLDSALUTATION - 1)
+
+char file_array[IObuffer];      /* read file and store crunched names */
+char dep_line[LINESIZE];        /* line being processed */
+char dot_o[LINESIZE];           /* <foo.o>: prefix */
+char *path_component[100];      /* stores components for a path while being
+                                   crunched */
+
+struct dep {                    /* stores paths that a file depends on */
+        int len;
+        char *str;
+} dep_files[1000];
+int dep_file_index;
+
+qsort_strcmp(a, b)
+struct dep *a, *b;
+{
+extern int strcmp();
+        return strcmp(a->str, b->str);
+}
+
+char *outfile = (char *) 0;     /* generate dependency file */
+FILE *out;
+
+char *makefile = (char *) 0;    /* user supplied makefile name */
+char *real_mak_name;            /* actual makefile name (if not supplied) */
+char shadow_mak_name[LINESIZE]; /* changes done here then renamed */
+FILE *mak;                      /* for reading makefile */
+FILE *makout;                   /* for writing shadow */
+char makbuf[LINESIZE];          /* one line buffer for makefile */
+struct stat makstat;            /* stat of makefile for time comparisons */
+int mak_eof = 0;                        /* eof seen on makefile */
+FILE *find_mak(), *temp_mak();
+
+int delete = 0;                 /* -d delete dependency file */
+int debug = 0;
+int     D_contents = 0;         /* print file contents */
+int     D_depend = 0;           /* print dependency processing info */
+int     D_make = 0;             /* print makefile processing info */
+int     D_open = 0;             /* print after succesful open */
+int     D_time = 0;             /* print time comparison info */
+int force = 1;                  /* always update dependency info */
+int update = 0;                 /* it's ok if the -m file does not exist */
+int verbose = 0;                /* tell me something */
+int expunge = 0;                /* first flush dependency stuff from makefile */
+
+
+char *name;
+
+static void scan_mak(FILE *, FILE *, char *);
+static void finish_mak(FILE *, FILE *);
+
+main(argc,argv)
+register char **argv;
+{
+int size;
+
+        name = *argv;
+        {register char *cp =name;
+                while (*cp) if (*cp++ == '/') name = cp;
+        }
+
+        for ( argv++ ; --argc ; argv++ ) { register char *token = *argv;
+                if (*token++ != '-' || !*token)
+                        break;
+                else { register int flag;
+                        for ( ; flag = *token++ ; ) {
+                                switch (flag) {
+                                case 'd':
+                                        delete++;
+                                        break;
+                                case 'f':
+                                        force++;
+                                        break;
+                                case 'u':
+                                        update++;
+                                case 'm':
+                                        makefile = *++argv;
+                                        if (--argc < 0) goto usage;
+                                        break;
+                                case 'o':
+                                        outfile = *++argv;
+                                        if (--argc < 0) goto usage;
+                                        break;
+                                case 'v':
+                                        verbose++;
+                                        break;
+                                case 'x':
+                                        expunge++;
+                                        break;
+                                case 'D':
+                                        for ( ; flag = *token++ ; )
+                                                switch (flag) {
+                                                case 'c':
+                                                        D_contents++;
+                                                        break;
+                                                case 'd':
+                                                        D_depend++;
+                                                        break;
+                                                case 'm':
+                                                        D_make++;
+                                                        break;
+                                                case 'o':
+                                                        D_open++;
+                                                        break;
+                                                case 't':
+                                                        D_time++;
+                                                        break;
+                                                case 'D':
+                                                        debug++;
+                                                        break;
+                                                default:
+                                                        goto letters;
+                                                }
+                                        goto newtoken;
+                                default:
+                                        goto usage;
+                                }
+letters: ;
+                        }
+                }
+newtoken: ;
+        }
+
+        if (!expunge && argc < 1) goto usage;
+        if ((int) outfile && (int) makefile)    /* not both */
+                goto usage;
+
+        if ((int) outfile) {
+                /*
+                 * NeXT_MOD, For SGS stuff, in case still linked to master version
+                 */
+                unlink(outfile);
+
+                if ((out = fopen(outfile, "w")) == NULL) {
+                        fprintf(stderr, "%s: outfile = \"%s\" ", name, outfile);
+                        perror("fopen");
+                        fflush(stdout), fflush(stderr);
+                        exit(1);
+                } else if (D_open)
+                        printf("%s: opened outfile \"%s\"\n", name, outfile);
+        } else if (mak = find_mak(makefile)) {
+                makout = temp_mak();
+                out = makout;
+                if (expunge)
+                        expunge_mak(mak, makout);
+                else
+                        skip_mak(mak, makout);
+        } else if (mak_eof &&  /* non existent file == mt file */
+                   (int)(makout = temp_mak())) { /* but we need to be able */
+                out = makout;                    /* to write here */
+        } else if (makefile) {
+                fprintf(stderr, "%s: makefile \"%s\" can not be opened or stat'ed\n",
+                        name, makefile);
+                exit(2);
+        }
+
+        for (; argc--; argv++) {
+                dep_file_index = 0;
+
+                if (size = read_dep(*argv)) {
+
+                        save_dot_o();
+                        if (D_depend) printf("%s: dot_o = \"%s\"\n", name, dot_o);
+
+                        parse_dep();
+                        if (mak) scan_mak(mak, makout, dot_o);
+                        if (out) output_dep(out);
+
+                        if (delete)
+                                unlink(*argv);
+                }
+        }
+
+        if (mak) finish_mak(mak, makout);
+        rename(shadow_mak_name, real_mak_name);
+        exit(0);
+usage:
+        fprintf(stderr, "usage: md -f -Dcdmot -m makefile -o outputfile -v <file1> ... <filen>\n");
+        exit(1);
+}
+
+
+read_dep(file)
+register char *file;
+{
+register int fd;
+register int size;
+struct stat statbuf;
+
+        if ((fd = open(file, 0)) < 0) {
+                fprintf(stderr, "%s: file = \"%s\" ", name, file);
+                perror("open");
+                fflush(stdout), fflush(stderr);
+                return 0;
+        }
+        if (D_open)
+                printf("%s: opened dependency file \"%s\"\n", name, file);
+        
+        if (fstat(fd, &statbuf) < 0) {
+                fprintf(stderr, "%s: file = \"%s\" ", name, file);
+                perror("stat");
+                fflush(stdout), fflush(stderr);
+                goto out;
+        }
+        switch(statbuf.st_mode & S_IFMT) {
+        case S_IFREG:
+                if (D_time)
+                        printf("%s: file time = %d\n", name, statbuf.st_mtime);
+
+                if (statbuf.st_size > IObuffer) {
+                        fprintf(stderr, "%s: file \"%s\" tooo big for IObuffer\n",
+                                name, file);
+                        goto out;
+                } else if (force)
+                        break;
+                else if ((int) mak && statbuf.st_mtime < makstat.st_mtime) {
+                        if (verbose || D_time)
+                                fprintf(stderr, "%s: skipping \"%s\" %d < %d \"%s\"\n",
+                                        name, file, statbuf.st_mtime, makstat.st_mtime,
+                                        real_mak_name);
+                        goto out;
+                } else /* >=   =>ok */
+                        break;
+        case S_IFDIR:
+        case S_IFLNK:
+        case S_IFCHR:
+        case S_IFBLK:
+        case S_IFSOCK:
+        default:
+                fprintf(stderr, "%s: bad mode: 0%o on \"%s\"\n",
+                        name, statbuf.st_mode, file);
+                fflush(stdout), fflush(stderr);
+                goto out;
+        }
+
+        if ((size = read(fd, file_array, sizeof (file_array))) < 0) {
+                fprintf(stderr, "%s: file = \"%s\" ", name, file);
+                perror("read");
+                fflush(stdout), fflush(stderr);
+                goto out;
+        }
+        file_array[size] = 0;
+        
+        if (close(fd) < 0) {
+                fprintf(stderr, "%s: file = \"%s\" ", name, file);
+                perror("close");
+                fflush(stdout), fflush(stderr);
+                return 0;
+        }
+
+        if (D_depend && D_contents)
+                printf("file_array: \"%s\"\n", file_array);
+        return size;
+out: ;
+        close(fd);
+        return 0;
+}
+
+save_dot_o()
+{
+register char *cp = file_array;
+register char *svp = dot_o;
+register int c;
+
+        while ((*svp++ = (c = *cp++)) && c != ':');
+        *svp = 0;
+}
+
+parse_dep()
+{
+register char *lp = file_array;
+register int c;
+
+        while (*lp) {register char *tlp = lp;
+                     register char *cp = dep_line;
+                     register int i = 0;
+                     int abspath = 0;
+                     char oldc;
+                     char *oldcp;
+
+                        /* get a line to process */
+                while ((c = *lp++) && c != '\n')
+                  {
+                    if (c == '\\')
+                      lp++;             /* skip backslash newline */
+                    else
+                      *cp++ = c;
+                  }
+                  if (!c)
+                    break;
+                *cp = 0;
+                cp = dep_line;
+                lp[-1] = 0;
+                        /* skip .o file name */
+                while ((c = *cp++) && c != ':'); if (!c) continue;
+next_filename:
+                i = 0;
+                abspath = 0;
+                while ((c = *cp) && (c == ' ' || c == '\t')) cp++; if (!c) continue;
+
+                        /* canonicalization processing */
+
+                                        /* initial / is remembered */
+                if (c == '/')
+                        abspath++;
+
+                while (c && c != ' ' && c != '\t') {
+                        if (D_depend) printf("i = %d going \"%s\"\n", i, cp);
+                                        /* kill \'s */
+                        while ((c = *cp) && c == '/') cp++; if (!c) break;
+                        path_component[i] = cp;
+                                        /* swallow chars till next / or null */
+                        while ((c = *cp++) && c != '/' && c != ' ' && c != '\t');
+                        if (c) cp[-1]=0;/* end component C style */
+
+                                        /* ignore . */;
+                        if (!strcmp(path_component[i], "."))
+                                ;       /* if "component" != .. */
+                        else            /* don't reduce /component/.. to nothing */
+                                i++;    /* there could be symbolic links! */
+                }
+                        /* reassemble components */
+                oldc = c;               /* save c */
+                oldcp = cp;             /* save cp */
+                cp = tlp;               /* overwrite line in buffer */
+                if (abspath)
+                        *cp++ = '/';
+                for (c=0; c<i; c++) {register char *ccp = path_component[c];
+                        while (*cp++ = *ccp++);
+                        *--cp = '/';
+                        cp++;
+                }
+                *--cp = 0;
+
+                c=dep_file_index++;
+                dep_files[c].str = tlp;
+                dep_files[c].len = cp - tlp;
+                if (D_depend)
+                        printf("%s: dep_file[%d] = \"%s\" Len %d\n",
+                                name, dep_file_index - 1, tlp, cp - tlp);
+                tlp = cp + 1;
+                if (oldc)
+                  {
+                     cp = oldcp;
+                     goto next_filename;
+                  }
+        }
+}
+
+output_dep(out)
+FILE *out;
+{
+register int j;
+register int size = 1000;
+register int dot_o_len = strlen(dot_o);
+register struct dep *dp = dep_files;
+int written = 0;
+
+        if (D_depend && debug)
+                for(j = 0; j < dep_file_index; j++) {
+                        printf("dep_files[%d] = %s\n", j, dep_files[j].str);
+                }
+
+        qsort(dep_files, dep_file_index, sizeof (struct dep), qsort_strcmp);
+
+        if (D_depend && debug)
+                for(j = 0; j < dep_file_index; j++) {
+                        printf("dep_files[%d] = %s\n", j, dep_files[j].str);
+                }
+
+        fprintf(out, "%s %s", SALUTATION, dot_o);
+        for(j = 0; j < dep_file_index; j++, dp++)
+                                        {register int len = dp->len;
+                                         register char *str = dp->str;
+                if (j && len == (dp-1)->len && !strcmp(str, (dp-1)->str))
+                        continue;
+                written++;
+                if (size + len + 1 > OUTLINELEN) {
+                        fprintf(out, "\n%s %s", dot_o, str);
+                        size = dot_o_len + len + 1;
+                } else {
+                        fprintf(out, " %s", str);
+                        size += len + 1;
+                }
+        }
+        fprintf(out, "\n");
+        if (verbose)
+                fprintf(stdout, "%s: \"%s\" %d => %d\n", name, dot_o, dep_file_index, written);
+}
+
+                /* process makefile */
+FILE *
+find_mak(file)
+char *file;
+{
+FILE *mak;
+
+        if ((int) file) {
+                if ((mak = fopen(file, "r")) != NULL) {
+                        real_mak_name = file;
+                } else if (update) {
+                        mak_eof = 1;
+                        real_mak_name = file;
+                        return NULL;
+                } else {
+                        fprintf(stderr, "%s: file = \"%s\" ", name, file);
+                        perror("fopen");
+                        fflush(stdout), fflush(stderr);
+                        return NULL;
+                }
+        } else {
+                if ((mak = fopen("makefile", "r")) != NULL) {
+                        real_mak_name = "makefile";
+                } else if ((mak = fopen("Makefile", "r")) != NULL) {
+                        real_mak_name = "Makefile";
+                } else return NULL;
+        }
+
+        if (fstat(fileno(mak), &makstat) < 0) {
+                fprintf(stderr, "%s: file = \"%s\" ", name, real_mak_name);
+                perror("stat");
+                fflush(stdout), fflush(stderr);
+                return NULL;
+        }
+        if (D_open)
+                printf("%s: opened makefile \"%s\"\n", name, real_mak_name);
+        if (D_time)
+                printf("%s: makefile time = %d\n", name, makstat.st_mtime);
+
+        return mak;
+}
+
+FILE *
+temp_mak()
+{
+FILE *mak;
+
+        strcpy(shadow_mak_name, real_mak_name);
+        strcat(shadow_mak_name, ".md");
+
+        /*
+         * For SGS stuff, in case still linked to master version
+         */
+        unlink(shadow_mak_name);
+        if ((mak = fopen(shadow_mak_name, "w")) == NULL) {
+                fprintf(stderr, "%s: file = \"%s\" ", name, shadow_mak_name);
+                perror("fopen");
+                fflush(stdout), fflush(stderr);
+                return NULL;
+        }
+        if (D_open)
+                printf("%s: opened makefile.md \"%s\"\n", name, shadow_mak_name);
+
+        return mak;
+}
+
+skip_mak(makin, makout)
+register FILE *makin, *makout;
+{
+register int len = SALUTATIONLEN;
+
+        if (D_make)
+                printf("skipping in \"%s\"  ", real_mak_name);
+
+        while (fgets(makbuf, LINESIZE, makin) != NULL) {
+                if (D_make && D_contents)
+                        printf("%s: \"%s\"\n", real_mak_name, makbuf);
+                if (strncmp(makbuf, SALUTATION, len)) {
+                        fputs(makbuf, makout);
+                } else
+                        break;
+        }
+        mak_eof = feof(makin);
+        if (mak_eof)
+                fclose(makin);
+        if (D_make)
+                printf("eof = %d str = \"%s\"", mak_eof, makbuf);
+}
+
+expunge_mak(makin, makout)
+register FILE *makin, *makout;
+{
+register int len = SALUTATIONLEN;
+register int oldlen = OLDSALUTATIONLEN;
+
+        if (D_make)
+                printf("expunging in \"%s\"  ", real_mak_name);
+
+        while (fgets(makbuf, LINESIZE, makin) != NULL) {
+                if (D_make && D_contents)
+                        printf("%s: \"%s\"\n", real_mak_name, makbuf);
+                if (! strncmp(makbuf, SALUTATION, len) ||
+                    ! strncmp(makbuf, OLDSALUTATION, oldlen))
+                        break;
+                else
+                        fputs(makbuf, makout);
+        }
+        mak_eof = 1;
+        if (mak_eof)
+                fclose(makin);
+        if (D_make)
+                printf("eof = %d str = \"%s\"", mak_eof, makbuf);
+}
+
+static void
+scan_mak(FILE *makin, FILE *makout, char *file)
+{
+register char *cp = &makbuf[SALUTATIONLEN+1];
+register int len = strlen(file);
+register int ret;
+
+        if (D_make)
+                printf("scanning in \"%s\" for \"%s\"\n", real_mak_name, file);
+
+        do {
+                if (mak_eof)            /* don't scan any more */
+                        return;
+
+                ret = strncmp(cp, file, len);
+                if (D_make)
+                        printf("saw \"%s\" ret = %d\n", cp, ret);
+
+                if (ret < 0) {          /* skip forward till match or greater */
+                        fputs(makbuf, makout);          /* line we're looking at */
+                        while (fgets(makbuf, LINESIZE, makin) != NULL) {
+                                if (strncmp(makbuf, SALUTATION, SALUTATIONLEN)) {
+                                        fputs(makbuf, makout);
+                                } else
+                                        break;
+                        }
+                        mak_eof = feof(makin);
+                        if (mak_eof)
+                                fclose(makin);
+                        continue;
+                } else if (ret == 0) {  /* flush match */
+                        while (fgets(makbuf, LINESIZE, makin) != NULL) {
+                                if (strncmp(makbuf, SALUTATION, SALUTATIONLEN)) {
+                                        ;       /* flush old stuff */
+                                } else
+                                        break;
+                        }
+                        mak_eof = feof(makin);
+                        if (mak_eof)
+                                fclose(makin);
+                        break;
+                } else {                /* no luck this time */
+                        break;
+                }
+        } while (1);
+}
+
+static void
+finish_mak(FILE *makin, FILE *makout)
+{
+        if (mak_eof)            /* don't scan any more */
+                return;
+
+        if (D_make)
+                printf("finishing in \"%s\"\n", real_mak_name);
+
+        fputs(makbuf, makout);          /* line we're looking at */
+        while (fgets(makbuf, LINESIZE, makin) != NULL) {
+                fputs(makbuf, makout);
+        }
+}
index a24cc88d739184e048d5b0de50e68d0186db3e49..d3bc41fa6432f321e32eaa361d03211c0dd5b38c 100644 (file)
@@ -311,11 +311,29 @@ int       auditon(int, void *, int);
 int    auditctl(const char *);
 int    getauid(au_id_t *);
 int    setauid(const au_id_t *);
-int    getaudit(struct auditinfo *);
-int    setaudit(const struct auditinfo *);
 int    getaudit_addr(struct auditinfo_addr *, int);
 int    setaudit_addr(const struct auditinfo_addr *, int);
 
+#if defined(__APPLE__)
+#include <Availability.h>
+
+/*
+ * getaudit()/setaudit() are deprecated and have been replaced with
+ * wrappers to the getaudit_addr()/setaudit_addr() syscalls above.
+ */
+
+int    getaudit(struct auditinfo *)
+               __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8,
+                   __IPHONE_2_0, __IPHONE_NA);
+int    setaudit(const struct auditinfo *)
+               __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8,
+                   __IPHONE_2_0, __IPHONE_NA);
+#else
+
+int    getaudit(struct auditinfo *);
+int    setaudit(const struct auditinfo *);
+#endif /* !__APPLE__ */
+
 #ifdef __APPLE_API_PRIVATE
 #include <mach/port.h>
 mach_port_name_t audit_session_self(void);
index f7dec8d89b7d5cfd03056db79134cf02f67b02fe..c6f0580170939cf454b17db1ed569bfb1d72c90d 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2008 Apple Inc.
+ * Copyright (c) 2008-2011 Apple Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #define        BSM_ERRNO_EALREADY              149
 #define        BSM_ERRNO_EINPROGRESS           150
 #define        BSM_ERRNO_ESTALE                151
+#define        BSM_ERRNO_EQFULL                152
 
 /*
  * OpenBSM constants for error numbers not defined in Solaris.  In the event
index 25e3eb829ec4f607c5cb83730ca24b6cd1a30ad3..d5e4bac0168494d9cbe7ddb6ef945f4034d63763 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005-2009 Apple Inc.
+ * Copyright (c) 2005-2010 Apple Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #define        AUE_INITGROUPS          AUE_NULL
 #define        AUE_IOPOLICYSYS         AUE_NULL
 #define        AUE_ISSETUGID           AUE_NULL
+#define        AUE_LEDGER              AUE_NULL
 #define        AUE_LIOLISTIO           AUE_NULL
 #define        AUE_LISTXATTR           AUE_NULL
 #define        AUE_LSTATV              AUE_NULL
index bb57c6dae1a5989a20399683bcdff56a0adfcc85..2cbd0b9e8125b6f507bf7dbe58509815037130e7 100644 (file)
@@ -114,6 +114,8 @@ options             NORMA_ETHER     # NORMA across ethernet         # <norma_ether>
 options                SIMPLE_CLOCK    # don't assume fixed tick       # <simple_clock>
 options                XPR_DEBUG       # kernel tracing                # <xpr_debug>
 options                KDEBUG          # kernel tracing                # <kdebug>
+options                IST_KDEBUG      # limited kernel tracing        # <ist_kdebug>
+options                NO_KDEBUG       # no kernel tracing # <no_kdebug>
 options                DDM_DEBUG       # driverkit-style tracing       # <ddm_debug>
 options                MACH_OLD_VM_COPY # Old vm_copy technology       # <!newvm>
 options                NO_DIRECT_RPC   # for untyped mig servers       # 
@@ -127,8 +129,15 @@ options            ROUTING         # routing                       # <routing>
 options                VLAN            #                               # <vlan>
 options                BOND            #                               # <bond>
 options                PF              # Packet Filter                 # <pf>
-options                PF_PKTHDR       # PF tag inside mbuf pkthdr     # <pf_pkthdr>
+options                PF_ALTQ         # PF ALTQ (Alternate Queueing)  # <pf_altq>
 options                PFLOG           # PF log interface              # <pflog>
+options                PKTSCHED_CBQ    # CBQ packet scheduler          # <pktsched_cbq>
+options                PKTSCHED_HFSC   # H-FSC packet scheduler        # <pktsched_hfsc>
+options                PKTSCHED_PRIQ   # PRIQ packet scheduler         # <pktsched_priq>
+options                PKTSCHED_FAIRQ  # FAIRQ packet scheduler        # <pktsched_fairq>
+options                CLASSQ_BLUE     # BLUE queueing algorithm       # <classq_blue>
+options                CLASSQ_RED      # RED queueing algorithm        # <classq_red>
+options                CLASSQ_RIO      # RIO queueing algorithm        # <classq_rio>
 options                IPDIVERT        # Divert sockets (for NAT)      # <ipdivert>
 options                IPFIREWALL      # IP Firewalling (used by NAT)  # <ipfirewall>
 options                IPFIREWALL_FORWARD      #Transparent proxy      # <ipfirewall>
@@ -182,7 +191,10 @@ options            FDESC           # fdesc_fs support              # <fdesc>
 options                DEVFS           # devfs support                 # <devfs>
 options                JOURNALING      # journaling support    # <journaling>
 options                HFS_COMPRESSION # hfs compression       # <hfs_compression>
-options                CONFIG_HFS_TRIM # HFS trims unused blocks       # <config_hfs_trim>
+options                CONFIG_HFS_STD  # hfs standard support  # <config_hfs_std>
+options                CONFIG_HFS_TRIM # hfs trims unused blocks       # <config_hfs_trim>
+options                CONFIG_HFS_MOUNT_UNMAP  #hfs trims blocks at mount      # <config_hfs_mount_unmap>
+
 
 #
 #      file system features
@@ -193,6 +205,9 @@ options             NAMEDSTREAMS    # named stream vnop support     # <namedstreams>
 options                CONFIG_VOLFS    # volfs path support (legacy)   # <config_volfs>
 options                CONFIG_IMGSRC_ACCESS # source of imageboot dmg  # <config_imgsrc_access>
 options                CONFIG_TRIGGERS # trigger vnodes                # <config_triggers>
+options                CONFIG_VFS_FUNNEL # thread unsafe vfs's # <config_vfs_funnel>
+options                CONFIG_EXT_RESOLVER # e.g. memberd              # <config_ext_resolver>
+options                CONFIG_SEARCHFS # searchfs syscall support      # <config_searchfs>
 
 #
 # NFS support
@@ -234,7 +249,6 @@ options         "IPV6FIREWALL_DEFAULT_TO_ACCEPT"    #IPv6 Firewall Feature          # <ip
 
 pseudo-device   gif     1              # <gif>
 pseudo-device   dummy   2              # <dummy>
-pseudo-device   faith   1              # <faith>
 pseudo-device   stf    1               # <stf>
 
 options                        crypto                  # <ipsec,crypto>
@@ -336,18 +350,18 @@ options        CONFIG_NMBCLUSTERS="((1024 * 1024) / MCLBYTES)"            # <large,xlarge>
 options        CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)"          # <medium>
 options        CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)"          # <bsmall,xsmall,small>
 
-#
-# set maximum space used for packet buffers
-#
-options CONFIG_USESOCKTHRESHOLD=1      # <large,xlarge,medium>
-options CONFIG_USESOCKTHRESHOLD=0      # <bsmall,xsmall,small>
-
 #
 # Configure size of TCP hash table
 #
 options CONFIG_TCBHASHSIZE=4096                # <medium,large,xlarge>
 options CONFIG_TCBHASHSIZE=128         # <xsmall,small,bsmall>
 
+#
+# Configure bandwidth limiting sysctl
+#
+options CONFIG_ICMP_BANDLIM=250                # <medium,large,xlarge>
+options CONFIG_ICMP_BANDLIM=50         # <xsmall,small,bsmall>
+
 #
 #  configurable async IO options 
 #  CONFIG_AIO_MAX - system wide limit of async IO requests.
@@ -435,18 +449,31 @@ options           CONFIG_CODE_DECRYPTION  # <config_embedded>
 #
 # User Content Protection, used on embedded
 #
-
 options                CONFIG_PROTECT  # <config_protect>
 
 #
-# freeze - support app hibernation, used on embedded
-# CONFIG_FREEZE_SUSPENDED_MIN is the minimum number of suspended  
-# processes to be left unhibernated
+# enable per-process memory priority tracking
+#
+options                CONFIG_MEMORYSTATUS                             # <memorystatus>
+
+#
+# enable jetsam - used on embedded
+#
+options                CONFIG_JETSAM                           # <jetsam>
+
+#
+# enable freezing of suspended processes - used on embedded
 #
 options                CONFIG_FREEZE                                   # <freeze>
 
 options                CHECK_CS_VALIDATION_BITMAP                      # <config_cs_validation_bitmap>
 
+#
+# memory pressure event support
+# must be set in both bsd/conf and osfmk/conf MASTER files
+#
+options                VM_PRESSURE_EVENTS              # <vm_pressure_events>
+
 #
 #  Ethernet (ARP)
 #
@@ -507,4 +534,3 @@ pseudo-device       sdt             1       init    sdt_init        # <config_dtrace>
 pseudo-device  systrace        1       init    systrace_init   # <config_dtrace>
 pseudo-device  fbt             1       init    fbt_init        # <config_dtrace>
 pseudo-device  profile_prvd    1       init    profile_init    # <config_dtrace>
-
index 594f0fb514fb7bce639267269aca8cef67f33250..c2cae3eba2be45ddb19560d6bbd1ca9043eb9831 100644 (file)
 #  
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
-#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch zleaks ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs config_hfs_trim hfs_compression config_hfs_alloc_rbtree config_imgsrc_access config_triggers ]
-#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge pf pflog pf_pkthdr ]
+#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch zleaks memorystatus vm_pressure_events ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_vfs_funnel config_ext_resolver config_searchfs]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge PF ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
+#  PF =          [ pf pflog ]
+#  PKTSCHED =    [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ]
+#  CLASSQ =      [ classq_blue classq_red classq_rio ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
 #  PROFILE =     [ RELEASE profile ]
 #  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ]
@@ -115,4 +118,3 @@ pseudo-device       nfsmeas                                         # <nfsmeas>
 #  Removable Volume support
 #
 pseudo-device  vol                                             # <vol>
-
index 4bf42910ba9ab7a4d7587cfd3281067ebe45b9f9..a1be0eb1f6011d1ffa6319ff6414bb73b8e3c7d4 100644 (file)
 #  
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
-#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch zleaks ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs config_hfs_trim hfs_compression config_hfs_alloc_rbtree config_imgsrc_access config_triggers ]
-#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge pf pflog pf_pkthdr ]
+#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch zleaks memorystatus vm_pressure_events ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_ext_resolver config_searchfs ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge PF ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
+#  PF =          [ pf pflog ]
+#  PKTSCHED =    [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ]
+#  CLASSQ =      [ classq_blue classq_red classq_rio ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
 #  PROFILE =     [ RELEASE profile ]
 #  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ]
@@ -95,6 +98,9 @@ options               CONFIG_AUDIT                    # Kernel auditing
 # app-profiling i.e. pre-heating - off? 
 options   CONFIG_APP_PROFILE=0  
 
+# kernel performance tracing
+#options     KPERF                  # <kperf>
+
 #
 # code decryption... used on i386 for DSMOS
 # must be set in all the bsd/conf and osfmk/conf MASTER files
@@ -115,4 +121,3 @@ pseudo-device       nfsmeas                                         # <nfsmeas>
 #  Removable Volume support
 #
 pseudo-device  vol                                             # <vol>
-
index afaf3eb8973150f8a2fb813313c3cf02392344db..610e6d6c598735d87919e3ca88cff609fd91dd5b 100644 (file)
@@ -41,9 +41,11 @@ $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 
 do_all: $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile
        $(_v)next_source=$(subst conf/,,$(SOURCE));                     \
+       next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH));         \
        ${MAKE} -C $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)  \
                MAKEFILES=$(TARGET)/$(BSD_KERNEL_CONFIG)/Makefile       \
                SOURCE=$${next_source}                  \
+               RELATIVE_SOURCE_PATH=$${next_relsource}                 \
                TARGET=$(TARGET)                                        \
                INCL_MAKEDEP=FALSE      \
                KERNEL_CONFIG=$(BSD_KERNEL_CONFIG)      \
index a4635458942c893a1b1e267098e5a1484460a94d..59554731c6e7411a43b1585156ebb98af1b00521 100644 (file)
@@ -9,6 +9,11 @@ fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual
 # sha256 Files to build with -DSHA256_USE_ASSEMBLY=1
 sha2.o_CFLAGS_ADD += -DSHA256_USE_ASSEMBLY=1
 
+# Inline assembly doesn't interact well with LTO
+fbt_x86.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG)
+# Taking the address of labels doesn't work with LTO (9524055)
+dtrace.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG)
+
 ######################################################################
 #END    Machine dependent Makefile fragment for i386
 ######################################################################
index 8691ce70522af058027208440060414e49e2ec34..61a088bd85a35ba53f4fbb0dec35965dfb37863a 100644 (file)
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+# Copyright (c) 2000-2011 Apple Inc. All rights reserved.
 #
 # @APPLE_LICENSE_HEADER_START@
 # 
@@ -102,6 +102,7 @@ OBJS_NO_SIGN_COMPARE =              \
                mld6.o  \
                nd6.o   \
                nd6_nbr.o       \
+               nd6_prproxy.o   \
                nd6_rtr.o       \
                raw_ip6.o       \
                route6.o        \
@@ -118,6 +119,163 @@ OBJS_NO_SIGN_COMPARE =            \
 
 $(foreach file,$(OBJS_NO_SIGN_COMPARE),$(eval $(call add_perfile_cflags,$(file),-Wno-sign-compare)))
 
+# Objects that want -Wcast-align warning treated as error
+OBJS_ERROR_CAST_ALIGN =                        \
+               kern_control.o          \
+               mcache.o                \
+               sys_socket.o            \
+               uipc_mbuf.o             \
+               uipc_mbuf2.o            \
+               uipc_socket.o           \
+               uipc_socket2.o          \
+               uipc_syscalls.o         \
+               bpf.o                   \
+               bpf_filter.o            \
+               bridgestp.o             \
+               bsd_comp.o              \
+               devtimer.o              \
+               dlil.o                  \
+               ether_if_module.o       \
+               ether_inet_pr_module.o  \
+               ether_inet6_pr_module.o \
+               flowhash.o              \
+               if.o                    \
+               if_bridge.o             \
+               if_gif.o                \
+               if_llreach.o            \
+               if_loop.o               \
+               if_media.o              \
+               if_mib.o                \
+               if_pflog.o              \
+               if_stf.o                \
+               if_utun.o               \
+               if_utun_crypto.o        \
+               if_utun_crypto_ipsec.o  \
+               if_vlan.o               \
+               init.o                  \
+               iptap.o                 \
+               kext_net.o              \
+               kpi_interface.o         \
+               kpi_interfacefilter.o   \
+               kpi_protocol.o          \
+               kpi_protocol.o          \
+               ndrv.o                  \
+               net_osdep.o             \
+               net_str_id.o            \
+               netsrc.o                \
+               ntstat.o                \
+               pf.o                    \
+               pf_if.o                 \
+               pf_ioctl.o              \
+               pf_norm.o               \
+               pf_osfp.o               \
+               pf_ruleset.o            \
+               pf_table.o              \
+               ppp_deflate.o           \
+               radix.o                 \
+               raw_cb.o                \
+               raw_usrreq.o            \
+               route.o                 \
+               rtsock.o                \
+               dhcp_options.o          \
+               altq_cbq.o              \
+               altq_fairq.o            \
+               altq_hfsc.o             \
+               altq_priq.o             \
+               altq_qfq.o              \
+               altq_subr.o             \
+               pktsched.o              \
+               pktsched_cbq.o          \
+               pktsched_fairq.o        \
+               pktsched_hfsc.o         \
+               pktsched_priq.o         \
+               pktsched_qfq.o          \
+               pktsched_rmclass.o      \
+               pktsched_tcq.o          \
+               classq.o                \
+               classq_blue.o           \
+               classq_red.o            \
+               classq_rio.o            \
+               classq_sfb.o            \
+               classq_subr.o           \
+               classq_util.o           \
+               igmp.o                  \
+               in.o                    \
+               in_arp.o                \
+               in_cksum.o              \
+               in_dhcp.o               \
+               in_gif.o                \
+               in_mcast.o              \
+               in_pcb.o                \
+               in_pcblist.o            \
+               in_proto.o              \
+               in_rmx.o                \
+               in_tclass.o             \
+               ip_divert.o             \
+               ip_ecn.o                \
+               ip_encap.o              \
+               ip_icmp.o               \
+               ip_id.o                 \
+               ip_input.o              \
+               ip_mroute.o             \
+               ip_output.o             \
+               kpi_ipfilter.o          \
+               raw_ip.o                \
+               tcp_debug.o             \
+               tcp_input.o             \
+               tcp_ledbat.o            \
+               tcp_newreno.o           \
+               tcp_output.o            \
+               tcp_sack.o              \
+               tcp_subr.o              \
+               tcp_timer.o             \
+               tcp_usrreq.o            \
+               udp_usrreq.o            \
+               ah_core.o               \
+               ah_input.o              \
+               ah_output.o             \
+               dest6.o                 \
+               esp_core.o              \
+               esp_input.o             \
+               esp_output.o            \
+               esp_rijndael.o          \
+               frag6.o                 \
+               icmp6.o                 \
+               in6.o                   \
+               in6_cksum.o             \
+               in6_gif.o               \
+               in6_ifattach.o          \
+               in6_mcast.o             \
+               in6_pcb.o               \
+               in6_prefix.o            \
+               in6_proto.o             \
+               in6_rmx.o               \
+               in6_src.o               \
+               ip6_forward.o           \
+               ip6_id.o                \
+               ip6_input.o             \
+               ip6_mroute.o            \
+               ip6_output.o            \
+               ipcomp_core.o           \
+               ipcomp_input.o          \
+               ipcomp_output.o         \
+               ipsec.o                 \
+               mld6.o                  \
+               nd6.o                   \
+               nd6_nbr.o               \
+               nd6_rtr.o               \
+               raw_ip6.o               \
+               route6.o                \
+               scope6.o                \
+               udp6_output.o           \
+               udp6_usrreq.o           \
+               key.o                   \
+               key_debug.o             \
+               keydb.o                 \
+               keysock.o
+
+$(foreach file,$(OBJS_ERROR_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Werror=cast-align)))
+
 #
 # Directories for mig generated files
 #
index 29811299abe2e962f05d13df0aa105af33cc5e8c..e45baf159f09cd716866da2e0af77dd29e32b7c4 100644 (file)
@@ -9,6 +9,11 @@ fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual
 # sha256 Files to build with -DSHA256_USE_ASSEMBLY=1
 sha2.o_CFLAGS_ADD += -DSHA256_USE_ASSEMBLY=1 
 
+# Inline assembly doesn't interact well with LTO
+fbt_x86.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG)
+# Taking the address of labels doesn't work with LTO (9524055)
+dtrace.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG)
+
 ######################################################################
 #END    Machine dependent Makefile fragment for x86_64
 ######################################################################
index b3a7b10c498d472e0431a70c9e92c7939fb23e25..dd8075219bc9440c432aadaadca449847680ad80 100644 (file)
@@ -26,7 +26,6 @@ OPTIONS/mach_host                     optional mach_host
 OPTIONS/mach_ipc_compat                        optional mach_ipc_compat
 OPTIONS/mach_ipc_debug                 optional mach_ipc_debug
 OPTIONS/mach_ipc_test                  optional mach_ipc_test
-OPTIONS/mach_kdb                       optional mach_kdb
 OPTIONS/mach_ldebug                    optional mach_ldebug
 OPTIONS/mach_load                      optional mach_load
 OPTIONS/mach_machine_routines          optional mach_machine_routines
@@ -108,13 +107,21 @@ OPTIONS/ipfirewall                        optional ipfirewall
 OPTIONS/ipv6firewall           optional ipv6firewall
 OPTIONS/tcpdebug                       optional tcpdebug
 OPTIONS/if_bridge                      optional if_bridge
-OPTIONS/faith                          optional faith
+OPTIONS/bridgestp                      optional bridgestp if_bridge
 OPTIONS/gif                                    optional gif
 OPTIONS/netat                          optional netat
 OPTIONS/sendfile                       optional sendfile
 OPTIONS/randomipid                     optional randomipid
 OPTIONS/pf                             optional pf
-
+OPTIONS/pflog                          optional pflog pf
+OPTIONS/pf_altq                                optional pf_altq pf
+OPTIONS/classq_blue                    optional classq_blue
+OPTIONS/classq_red                     optional classq_red
+OPTIONS/classq_rio                     optional classq_rio
+OPTIONS/pktsched_cbq                   optional pktsched_cbq
+OPTIONS/pktsched_fairq                 optional pktsched_fairq
+OPTIONS/pktsched_hfsc                  optional pktsched_hfsc
+OPTIONS/pktsched_priq                  optional pktsched_priq
 OPTIONS/zlib                           optional zlib
 
 #
@@ -193,7 +200,7 @@ bsd/kern/decmpfs.c                  standard
 bsd/net/bpf.c                          optional bpfilter
 bsd/net/bpf_filter.c                   optional bpfilter
 bsd/net/if_bridge.c                    optional if_bridge
-bsd/net/bridgestp.c                    optional if_bridge
+bsd/net/bridgestp.c                    optional bridgestp
 bsd/net/bsd_comp.c                     optional ppp_bsdcomp
 bsd/net/if.c                           optional networking
 bsd/net/init.c                         optional sockets
@@ -229,7 +236,9 @@ bsd/net/kpi_protocol.c              optional networking
 bsd/net/kpi_interfacefilter.c  optional networking
 bsd/net/net_str_id.c                   optional networking
 bsd/net/if_utun.c                      optional networking
-bsd/net/if_pflog.c                     optional pflog pf
+bsd/net/if_utun_crypto.c               optional networking
+bsd/net/if_utun_crypto_ipsec.c         optional networking
+bsd/net/if_pflog.c                     optional pflog
 bsd/net/pf.c                           optional pf
 bsd/net/pf_if.c                                optional pf
 bsd/net/pf_ioctl.c                     optional pf
@@ -237,7 +246,33 @@ bsd/net/pf_norm.c                  optional pf
 bsd/net/pf_osfp.c                      optional pf
 bsd/net/pf_ruleset.c                   optional pf
 bsd/net/pf_table.c                     optional pf
+bsd/net/iptap.c                                optional networking
 bsd/net/if_llreach.c                   optional networking
+bsd/net/flowhash.c                     optional networking
+
+bsd/net/classq/classq.c                        optional networking
+bsd/net/classq/classq_blue.c           optional classq_blue
+bsd/net/classq/classq_red.c            optional classq_red
+bsd/net/classq/classq_rio.c            optional classq_rio
+bsd/net/classq/classq_sfb.c            optional networking
+bsd/net/classq/classq_subr.c           optional networking
+bsd/net/classq/classq_util.c           optional networking
+
+bsd/net/pktsched/pktsched.c            optional networking
+bsd/net/pktsched/pktsched_cbq.c                optional pktsched_cbq
+bsd/net/pktsched/pktsched_fairq.c      optional pktsched_fairq
+bsd/net/pktsched/pktsched_hfsc.c       optional pktsched_hfsc
+bsd/net/pktsched/pktsched_priq.c       optional pktsched_priq
+bsd/net/pktsched/pktsched_qfq.c                optional networking
+bsd/net/pktsched/pktsched_rmclass.c    optional pktsched_cbq
+bsd/net/pktsched/pktsched_tcq.c                optional networking
+
+bsd/net/altq/altq_cbq.c                        optional pktsched_cbq pf_altq
+bsd/net/altq/altq_fairq.c              optional pktsched_fairq pf_altq
+bsd/net/altq/altq_hfsc.c               optional pktsched_hfsc pf_altq
+bsd/net/altq/altq_priq.c               optional pktsched_priq pf_altq
+bsd/net/altq/altq_qfq.c                        optional pf_altq
+bsd/net/altq/altq_subr.c               optional pf_altq
 
 bsd/netinet/igmp.c                     optional inet
 bsd/netinet/in.c                       optional inet
@@ -268,6 +303,7 @@ bsd/netinet/tcp_subr.c                      optional inet
 bsd/netinet/tcp_timer.c                        optional inet
 bsd/netinet/tcp_usrreq.c               optional inet
 bsd/netinet/tcp_newreno.c              optional inet
+bsd/netinet/tcp_lro.c                   optional inet
 bsd/netinet/tcp_ledbat.c               optional inet
 bsd/netinet/udp_usrreq.c               optional inet
 bsd/netinet/in_gif.c                   optional gif inet
@@ -305,6 +341,7 @@ bsd/netinet6/in6_rmx.c                      optional inet6
 bsd/netinet6/mld6.c                    optional inet6
 bsd/netinet6/nd6.c                     optional inet6
 bsd/netinet6/nd6_nbr.c                 optional inet6
+bsd/netinet6/nd6_prproxy.c                     optional inet6
 bsd/netinet6/nd6_rtr.c                 optional inet6
 bsd/netinet6/raw_ip6.c                 optional inet6
 bsd/netinet6/route6.c                  optional inet6
@@ -318,10 +355,6 @@ bsd/netkey/key_debug.c             optional ipsec
 bsd/netkey/keysock.c                   optional ipsec
 bsd/netkey/keydb.c                     optional ipsec
 
-bsd/crypto/sha2/sha2.c                 optional crypto allcrypto
-bsd/crypto/des/des_ecb.c               optional crypto
-bsd/crypto/des/des_enc.c               optional crypto
-bsd/crypto/des/des_setkey.c            optional crypto
 bsd/crypto/blowfish/bf_enc.c           optional crypto allcrypto
 bsd/crypto/blowfish/bf_skey.c          optional crypto allcrypto
 bsd/crypto/cast128/cast128.c           optional crypto allcrypto
@@ -419,6 +452,7 @@ bsd/nfs/nfs_vfsops.c                        optional nfsclient
 bsd/nfs/nfs_vnops.c                    optional nfsclient
 bsd/nfs/nfs4_subs.c                    optional nfsclient
 bsd/nfs/nfs4_vnops.c                   optional nfsclient
+bsd/nfs/nfs_upcall.c                   optional nfsserver
 
 bsd/kern/netboot.c                     optional nfsclient
 
@@ -489,7 +523,6 @@ bsd/kern/kern_symfile.c                     standard
 bsd/kern/kern_descrip.c                        standard
 bsd/kern/kern_event.c                  standard
 bsd/kern/kern_control.c                        optional networking
-bsd/kern/kern_callout.c                        standard
 bsd/kern/kern_exec.c                   standard
 bsd/kern/kern_exit.c                   standard
 bsd/kern/kern_lockf.c                  standard
@@ -508,7 +541,7 @@ bsd/kern/kern_subr.c                        standard
 bsd/kern/kern_synch.c                  standard
 bsd/kern/kern_sysctl.c                 standard
 bsd/kern/kern_newsysctl.c              standard
-bsd/kern/kern_memorystatus.c           optional config_embedded
+bsd/kern/kern_memorystatus.c   optional config_memorystatus
 bsd/kern/kern_mib.c                    standard
 bsd/kern/kpi_mbuf.c                    optional sockets
 bsd/kern/kern_time.c                   standard
@@ -558,7 +591,7 @@ bsd/kern/pthread_support.c          optional psynch
 bsd/kern/pthread_synch.c               standard
 bsd/kern/proc_info.c                   standard
 bsd/kern/process_policy.c              standard
-bsd/kern/vm_pressure.c                 standard
+bsd/kern/vm_pressure.c                 optional vm_pressure_events
 bsd/kern/socket_info.c                 optional sockets
 
 bsd/vm/vnode_pager.c                   standard
@@ -573,6 +606,8 @@ bsd/conf/param.c                    standard
 bsd/dev/chud/chud_bsd_callback.c       standard
 bsd/dev/chud/chud_process.c            standard
 
+
+
 bsd/dev/dtrace/dtrace.c                        optional config_dtrace
 bsd/dev/dtrace/lockstat.c              optional config_dtrace
 bsd/dev/dtrace/dtrace_ptss.c           optional config_dtrace
@@ -589,3 +624,5 @@ bsd/dev/dtrace/fasttrap.c           optional config_dtrace
 
 bsd/kern/imageboot.c                  optional config_imageboot
 
+osfmk/kperf/kperfbsd.c                 optional kperf
+
index 331f7202d01cfc770dea793f8c5249e0d9d0f79e..5c8852f6cbfd3e3c85ea67b1ef58f14e55b007e1 100644 (file)
@@ -15,16 +15,6 @@ bsd/dev/i386/sysctl.c           standard
 bsd/dev/i386/unix_signal.c     standard
 bsd/dev/i386/munge.s           standard
 
-bsd/crypto/aes/i386/AES.s              optional crypto
-bsd/crypto/aes/i386/aes_modes_asm.s    optional crypto
-bsd/crypto/aes/i386/aes_modes_hw.s     optional crypto
-bsd/crypto/aes/i386/aes_key_hw.s       optional crypto
-bsd/crypto/aes/i386/aes_crypt_hw.s     optional crypto
-bsd/crypto/aes/i386/aesxts_asm.s       optional crypto
-bsd/crypto/aes/i386/aesxts.c   optional crypto
-
-bsd/crypto/sha2/intel/sha256.s optional crypto
-bsd/crypto/sha2/intel/sha256nossse3.s  optional crypto
 
 # Lightly ifdef'd to support K64 DTrace
 bsd/dev/i386/dtrace_isa.c      optional config_dtrace
index fcb3be604a54ff9a1240bfbea851259f55839a84..ed63a4a2f31d4347d516778ddfe80b71e26d311d 100644 (file)
@@ -15,16 +15,6 @@ bsd/dev/i386/sysctl.c           standard
 bsd/dev/i386/unix_signal.c     standard
 bsd/dev/x86_64/munge.s         standard
 
-bsd/crypto/aes/i386/AES.s              optional crypto
-bsd/crypto/aes/i386/aes_modes_asm.s    optional crypto
-bsd/crypto/aes/i386/aes_modes_hw.s     optional crypto
-bsd/crypto/aes/i386/aes_key_hw.s       optional crypto
-bsd/crypto/aes/i386/aes_crypt_hw.s     optional crypto
-bsd/crypto/aes/i386/aesxts_asm.s       optional crypto
-bsd/crypto/aes/i386/aesxts.c   optional crypto
-
-bsd/crypto/sha2/intel/sha256.s  optional crypto
-bsd/crypto/sha2/intel/sha256nossse3.s   optional crypto
 
 # Lightly ifdef'd to support K64 DTrace
 bsd/dev/i386/dtrace_isa.c      optional config_dtrace
index ab0c4b9860ed67aad68479521bd389ff8dc32569..109c4c4cb4e4cccf16d23391740475618cbbab81 100644 (file)
@@ -10,17 +10,11 @@ include $(MakeInc_def)
 INSTINC_SUBDIRS = \
        blowfish \
        cast128 \
-       des     \
     rc4 \
-       aes \
-       sha2
-
 
 INSTINC_SUBDIRS_I386 = \
-       aes
 
 INSTINC_SUBDIRS_X86_64 = \
-       aes
 
 INSTINC_SUBDIRS_ARM = \
 
@@ -33,7 +27,10 @@ EXPINC_SUBDIRS_X86_64 = \
 EXPINC_SUBDIRS_ARM = \
 
 PRIVATE_DATAFILES = \
-       sha1.h
+       sha1.h \
+       sha2.h \
+       des.h \
+       aes.h aesxts.h \
 
 INSTALL_MI_DIR = crypto
 
@@ -41,6 +38,11 @@ EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
 INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
+# We use this to install aesxts.h in Kernel.framework/PrivateHeaders 
+# in addition to Kernel.framework/PrivateHeaders/crypto
+# This should be removed once all clients are switched to include libkern/crypto/aesxts.h
+INSTALL_KF_MD_LCL_LIST = aesxts.h
+
 include $(MakeInc_rule)
 include $(MakeInc_dir)
 
diff --git a/bsd/crypto/aes.h b/bsd/crypto/aes.h
new file mode 100644 (file)
index 0000000..9fd55fd
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * This header file is kept for legacy reasons and may be removed in
+ * future; the interface resides in <libkern/crypto/aes.h>.
+ */
+#include <libkern/crypto/aes.h>
diff --git a/bsd/crypto/aes/Assert.c b/bsd/crypto/aes/Assert.c
deleted file mode 100644 (file)
index 5ba9c44..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/*     This module exists solely to check compile-time assertions.  It should be
-       compiled when building the project, and building should be terminated if
-       errors are encountered.  However, any object it produces need not be
-       included in the build.
-*/
-
-
-#include <stddef.h>
-
-#include "crypto/aes.h"
-#include "Context.h"
-
-/*     Declare CheckAssertion so that if any of the declarations below differ
-       from it, the compiler will report an error.
-*/
-extern char CheckAssertion[1];
-
-/*     Ensure that ContextKey is the offset of the ks member of the AES context
-       structures.
-*/
-extern char CheckAssertion[ContextKey == offsetof(aes_encrypt_ctx, ks)];
-extern char CheckAssertion[ContextKey == offsetof(aes_decrypt_ctx, ks)];
-       /*      If these assertions fail, change the definition of ContextKey in
-               Context.h to match the offset of the ks field.
-       */
-
-/*     Ensure that ContextKeyLength is the offset of the inf member of the AES
-       context structures.
-*/
-extern char CheckAssertion[ContextKeyLength == offsetof(aes_encrypt_ctx, inf)];
-extern char CheckAssertion[ContextKeyLength == offsetof(aes_decrypt_ctx, inf)];
-       /*      If these assertions fail, change the definition of ContextKeyLength in
-               Context.h to match the offset of the inf field.
-       */
diff --git a/bsd/crypto/aes/Makefile b/bsd/crypto/aes/Makefile
deleted file mode 100644 (file)
index 6b96dbd..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS = \
-
-INSTINC_SUBDIRS_I386 = \
-       i386
-
-INSTINC_SUBDIRS_X86_64 = \
-       i386
-
-INSTINC_SUBDIRS_ARM = \
-
-EXPINC_SUBDIRS = \
-
-EXPINC_SUBDIRS_I386 = \
-
-EXPINC_SUBDIRS_X86_64 = \
-
-EXPINC_SUBDIRS_ARM = \
-
-PRIVATE_DATAFILES = \
-       aes.h
-
-INSTALL_MI_DIR = crypto
-
-EXPORT_MI_DIR = ${INSTALL_MI_DIR}
-
-EXPORT_MI_LIST = aes.h
-
-INSTALL_KF_MI_LIST =
-
-INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/bsd/crypto/aes/aes.h b/bsd/crypto/aes/aes.h
deleted file mode 100755 (executable)
index 49c845d..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 31/01/2006
-
- This file contains the definitions required to use AES in C. See aesopt.h
- for optimisation details.
-*/
-
-#ifndef _AES_H
-#define _AES_H
-
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define AES_128     /* define if AES with 128 bit keys is needed    */
-#define AES_192     /* define if AES with 192 bit keys is needed    */
-#define AES_256     /* define if AES with 256 bit keys is needed    */
-#define AES_VAR     /* define if a variable key size is needed      */
-#define AES_MODES   /* define if support is needed for modes        */
-
-/* The following must also be set in assembler files if being used  */
-
-#define AES_ENCRYPT /* if support for encryption is needed          */
-#define AES_DECRYPT /* if support for decryption is needed          */
-#define AES_ERR_CHK /* for parameter checks & error return codes    */
-#define AES_REV_DKS /* define to reverse decryption key schedule    */
-
-#define AES_BLOCK_SIZE  16  /* the AES block size in bytes          */
-#define N_COLS           4  /* the number of columns in the state   */
-
-typedef        unsigned int    uint_32t;
-typedef unsigned char   uint_8t;
-typedef unsigned short  uint_16t;
-typedef unsigned char   aes_08t;
-typedef        unsigned int    aes_32t;
-
-#define void_ret  void
-#define int_ret   int
-
-/* The key schedule length is 11, 13 or 15 16-byte blocks for 128,  */
-/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes  */
-/* or 44, 52 or 60 32-bit words.                                    */
-
-#if defined( AES_VAR ) || defined( AES_256 )
-#define KS_LENGTH       60
-#elif defined( AES_192 )
-#define KS_LENGTH       52
-#else
-#define KS_LENGTH       44
-#endif
-
-
-#if 0 // defined (__i386__) || defined (__x86_64__)
-
-/* 
-       looks like no other code for (i386/x86_64) is using the following definitions any more.
-       I comment this out, so the C code in the directory gen/ can be used to compile for test/development purpose.
-       Note : this is not going to change anything in the i386/x86_64 kernel. 
-                       (source code in i386/, mostly in assembly, does not reference to this header file.) 
-
-       cclee   10-20-2010
-*/
-
-/* the character array 'inf' in the following structures is used    */
-/* to hold AES context information. This AES code uses cx->inf.b[0] */
-/* to hold the number of rounds multiplied by 16. The other three   */
-/* elements can be used by code that implements additional modes    */
-
-#if defined( AES_ERR_CHK )
-#define aes_rval     int_ret
-#else
-#define aes_rval     void_ret
-#endif
-
-typedef union
-{   uint_32t l;
-    uint_8t b[4];
-} aes_inf;
-
-typedef struct
-{   uint_32t ks[KS_LENGTH];
-    aes_inf inf;
-} aes_encrypt_ctx;
-
-typedef struct
-{   uint_32t ks[KS_LENGTH];
-    aes_inf inf;
-} aes_decrypt_ctx;
-
-#else
-
-#if defined( AES_ERR_CHK )
-#define aes_ret     int
-#define aes_good    0
-#define aes_error  -1
-#else
-#define aes_ret     void
-#endif
-
-#define aes_rval    aes_ret
-
-typedef struct
-{   aes_32t ks[KS_LENGTH];
-    aes_32t rn;
-} aes_encrypt_ctx;
-
-typedef struct
-{   aes_32t ks[KS_LENGTH];
-    aes_32t rn;
-} aes_decrypt_ctx;
-
-#endif
-
-typedef struct
-{   
-       aes_decrypt_ctx decrypt;
-    aes_encrypt_ctx encrypt;
-} aes_ctx;
-
-
-/* implemented in case of wrong call for fixed tables */
-
-void gen_tabs(void);
-
-
-/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */
-/* those in the range 128 <= key_len <= 256 are given in bits       */
-
-#if defined( AES_ENCRYPT )
-
-#if defined(AES_128) || defined(AES_VAR)
-aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
-#endif
-
-#if defined(AES_192) || defined(AES_VAR)
-aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
-#endif
-
-#if defined(AES_256) || defined(AES_VAR)
-aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
-#endif
-
-#if defined(AES_VAR)
-aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]);
-#endif
-
-#if defined (__i386__) || defined (__x86_64__)
-aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]);
-#endif
-
-aes_rval aes_encrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
-                                        unsigned char *out_blk, const aes_encrypt_ctx cx[1]);
-
-#endif
-
-#if defined( AES_DECRYPT )
-
-#if defined(AES_128) || defined(AES_VAR)
-aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
-#endif
-
-#if defined(AES_192) || defined(AES_VAR)
-aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
-#endif
-
-#if defined(AES_256) || defined(AES_VAR)
-aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
-#endif
-
-#if defined(AES_VAR)
-aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]);
-#endif
-
-#if defined (__i386__) || defined (__x86_64__)
-aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]);
-#endif
-
-aes_rval aes_decrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
-                                        unsigned char *out_blk, const aes_decrypt_ctx cx[1]);
-
-
-#endif
-
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
diff --git a/bsd/crypto/aes/gen/Makefile b/bsd/crypto/aes/gen/Makefile
deleted file mode 100644 (file)
index d32c71c..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS = \
-
-INSTINC_SUBDIRS_I386 = \
-
-EXPINC_SUBDIRS = \
-
-EXPINC_SUBDIRS_I386 = \
-
-PRIVATE_DATAFILES = \
-       aestab.h aesopt.h
-
-INSTALL_MI_DIR = crypto
-
-EXPORT_MI_DIR = ${INSTALL_MI_DIR}
-
-INSTALL_KF_MI_LIST =
-
-INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/bsd/crypto/aes/gen/aescrypt.c b/bsd/crypto/aes/gen/aescrypt.c
deleted file mode 100644 (file)
index 31d4c81..0000000
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
- This file contains the code for implementing encryption and decryption
- for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
- can optionally be replaced by code written in assembler using NASM. For
- further details see the file aesopt.h
-*/
-
-#include "aesopt.h"
-#include "aestab.h"
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c])
-#define xo(y,x,c) (s(y,c) ^= s(x, c))
-#define si(y,x,c)   (s(y,c) = word_in(x, c))
-#define so(y,x,c)   word_out(y, c, s(x,c))
-
-#if defined(ARRAYS)
-#define locals(y,x)     x[4],y[4]
-#else
-#define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
-#endif
-
-#define dtables(tab)     const aes_32t *tab##0, *tab##1, *tab##2, *tab##3
-#define itables(tab)     tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3]
-
-#define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
-                        s(y,2) = s(x,2); s(y,3) = s(x,3);
-
-#define key_in(y,x,k)   ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3)
-#define cbc(y,x)        xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3)
-#define state_in(y,x)   si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
-#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
-#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
-
-#if defined(ENCRYPTION) && !defined(AES_ASM)
-
-/* Visual C++ .Net v7.1 provides the fastest encryption code when using
-   Pentium optimiation with small code but this is poor for decryption
-   so we need to control this with the following VC++ pragmas
-*/
-
-#if defined(_MSC_VER)
-#pragma optimize( "s", on )
-#endif
-
-/* Given the column (c) of the output state variable, the following
-   macros give the input state variables which are needed in its
-   computation for each row (r) of the state. All the alternative
-   macros give the same end values but expand into different ways
-   of calculating these values.  In particular the complex macro
-   used for dynamically variable block sizes is designed to expand
-   to a compile time constant whenever possible but will expand to
-   conditional clauses on some branches (I am grateful to Frank
-   Yellin for this construction)
-*/
-
-#define fwd_var(x,r,c)\
- ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
- : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
- : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
- :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
-
-#if defined(FT4_SET)
-#undef  dec_fmvars
-#  if defined(ENC_ROUND_CACHE_TABLES)
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c))
-#  else
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c))
-#  endif
-#elif defined(FT1_SET)
-#undef  dec_fmvars
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c))
-#else
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c)))
-#endif
-
-#if defined(FL4_SET)
-#  if defined(LAST_ENC_ROUND_CACHE_TABLES)
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c))
-#  else
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c))
-#  endif
-#elif defined(FL1_SET)
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c))
-#else
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c))
-#endif
-
-aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
-                                        unsigned char *out, const aes_encrypt_ctx cx[1])
-{   aes_32t         locals(b0, b1);
-    const aes_32t   *kp;
-    const aes_32t   *kptr = cx->ks;
-#if defined(ENC_ROUND_CACHE_TABLES)
-       dtables(t_fn);
-#endif
-#if defined(LAST_ENC_ROUND_CACHE_TABLES)
-       dtables(t_fl);
-#endif
-
-#if defined( dec_fmvars )
-    dec_fmvars; /* declare variables for fwd_mcol() if needed */
-#endif
-
-#if defined( AES_ERR_CHK )
-    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
-        return aes_error;
-#endif
-
-       // Load IV into b0.
-       state_in(b0, in_iv);
-
-       for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk)
-       {
-               kp = kptr;
-#if 0
-               // Read the plaintext into b1
-               state_in(b1, in);
-               // Do the CBC with b0 which is either the iv or the ciphertext of the previous block.
-               cbc(b1, b0);
-
-               // Xor b1 with the key schedule to get things started.
-               key_in(b0, b1, kp);
-#else
-               // Since xor is associative we mess with the ordering here to get the loads started early
-               key_in(b1, b0, kp);  // Xor b0(IV) with the key schedule and assign to b1
-               state_in(b0, in);    // Load block into b0
-               cbc(b0, b1);         // Xor b0 with b1 and store in b0
-#endif
-
-#if defined(ENC_ROUND_CACHE_TABLES)
-               itables(t_fn);
-#endif
-
-#if (ENC_UNROLL == FULL)
-
-               switch(cx->rn)
-               {
-               case 14:
-                       round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-                       round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-                       kp += 2 * N_COLS;
-               case 12:
-                       round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-                       round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-                       kp += 2 * N_COLS;
-               case 10:
-               default:
-                       round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-                       round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-                       round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
-                       round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
-                       round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
-                       round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
-                       round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
-                       round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
-                       round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
-#if defined(LAST_ENC_ROUND_CACHE_TABLES)
-                       itables(t_fl);
-#endif
-                       round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
-               }
-
-#else
-
-               {   aes_32t    rnd;
-#if (ENC_UNROLL == PARTIAL)
-                       for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
-                       {
-                               kp += N_COLS;
-                               round(fwd_rnd, b1, b0, kp);
-                               kp += N_COLS;
-                               round(fwd_rnd, b0, b1, kp);
-                       }
-                       kp += N_COLS;
-                       round(fwd_rnd,  b1, b0, kp);
-#else
-                       for(rnd = 0; rnd < cx->rn - 1; ++rnd)
-                       {
-                               kp += N_COLS;
-                               round(fwd_rnd, b1, b0, kp);
-                               l_copy(b0, b1);
-                       }
-#endif
-#if defined(LAST_ENC_ROUND_CACHE_TABLES)
-                       itables(t_fl);
-#endif
-                       kp += N_COLS;
-                       round(fwd_lrnd, b0, b1, kp);
-               }
-#endif
-       
-               state_out(out, b0);
-       }
-
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(DECRYPTION) && !defined(AES_ASM)
-
-/* Visual C++ .Net v7.1 provides the fastest encryption code when using
-   Pentium optimiation with small code but this is poor for decryption
-   so we need to control this with the following VC++ pragmas
-*/
-
-#if defined(_MSC_VER)
-#pragma optimize( "t", on )
-#endif
-
-/* Given the column (c) of the output state variable, the following
-   macros give the input state variables which are needed in its
-   computation for each row (r) of the state. All the alternative
-   macros give the same end values but expand into different ways
-   of calculating these values.  In particular the complex macro
-   used for dynamically variable block sizes is designed to expand
-   to a compile time constant whenever possible but will expand to
-   conditional clauses on some branches (I am grateful to Frank
-   Yellin for this construction)
-*/
-
-#define inv_var(x,r,c)\
- ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
- : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
- : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
- :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
-
-#if defined(IT4_SET)
-#undef  dec_imvars
-#  if defined(DEC_ROUND_CACHE_TABLES)
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c))
-#  else
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c))
-#  endif
-#elif defined(IT1_SET)
-#undef  dec_imvars
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c))
-#else
-#define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)))
-#endif
-
-#if defined(IL4_SET)
-#  if defined(LAST_DEC_ROUND_CACHE_TABLES)
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c))
-#  else
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c))
-#  endif
-#elif defined(IL1_SET)
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c))
-#else
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))
-#endif
-
-aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
-                                        unsigned char *out, const aes_decrypt_ctx cx[1])
-{   aes_32t        locals(b0, b1);
-    const aes_32t *kptr = cx->ks + cx->rn * N_COLS;
-       const aes_32t *kp;
-#if defined(DEC_ROUND_CACHE_TABLES)
-       dtables(t_in);
-#endif
-#if defined(LAST_DEC_ROUND_CACHE_TABLES)
-       dtables(t_il);
-#endif
-
-#if defined( dec_imvars )
-    dec_imvars; /* declare variables for inv_mcol() if needed */
-#endif
-       
-#if defined( AES_ERR_CHK )
-    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
-        return aes_error;
-#endif
-
-#if defined(DEC_ROUND_CACHE_TABLES)
-       itables(t_in);
-#endif 
-       
-       in += AES_BLOCK_SIZE * (num_blk - 1);
-       out += AES_BLOCK_SIZE * (num_blk - 1);
-       // Load the last block's ciphertext into b1
-       state_in(b1, in);
-
-       for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk)
-       {
-               kp = kptr;
-               // Do the xor part of state_in, where b1 is the previous block's ciphertext.
-               key_in(b0, b1, kp);
-
-#if (DEC_UNROLL == FULL)
-       
-               switch(cx->rn)
-               {
-               case 14:
-                       round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
-                       round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
-                       kp -= 2 * N_COLS;
-               case 12:
-                       round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
-                       round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
-                       kp -= 2 * N_COLS;
-               case 10:
-               default:
-                       round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
-                       round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
-                       round(inv_rnd,  b1, b0, kp -  3 * N_COLS);
-                       round(inv_rnd,  b0, b1, kp -  4 * N_COLS);
-                       round(inv_rnd,  b1, b0, kp -  5 * N_COLS);
-                       round(inv_rnd,  b0, b1, kp -  6 * N_COLS);
-                       round(inv_rnd,  b1, b0, kp -  7 * N_COLS);
-                       round(inv_rnd,  b0, b1, kp -  8 * N_COLS);
-                       round(inv_rnd,  b1, b0, kp -  9 * N_COLS);
-#if defined(LAST_DEC_ROUND_CACHE_TABLES)
-                       itables(t_il);
-#endif 
-                       round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
-               }
-
-#else
-       
-               {   aes_32t    rnd;
-#if (DEC_UNROLL == PARTIAL)
-                       for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
-                       {
-                               kp -= N_COLS;
-                               round(inv_rnd, b1, b0, kp);
-                               kp -= N_COLS;
-                               round(inv_rnd, b0, b1, kp);
-                       }
-                       kp -= N_COLS;
-                       round(inv_rnd, b1, b0, kp);
-#else
-                       for(rnd = 0; rnd < cx->rn - 1; ++rnd)
-                       {
-                               kp -= N_COLS;
-                               round(inv_rnd, b1, b0, kp);
-                               l_copy(b0, b1);
-                       }
-#endif
-#if defined(LAST_DEC_ROUND_CACHE_TABLES)
-                       itables(t_il);
-#endif 
-                       kp -= N_COLS;
-                       round(inv_lrnd, b0, b1, kp);
-               }
-#endif
-
-               if (num_blk == 1)
-               {
-                       // We are doing the first block so we need the IV rather than the previous
-                       // block for CBC (there is no previous block)
-                       state_in(b1, in_iv);
-               }
-               else
-               {
-                       in -= AES_BLOCK_SIZE;
-                       state_in(b1, in);
-               }
-
-               // Do the CBC with b1 which is either the IV or the ciphertext of the previous block.
-               cbc(b0, b1);
-
-               state_out(out, b0);
-       }
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
diff --git a/bsd/crypto/aes/gen/aeskey.c b/bsd/crypto/aes/gen/aeskey.c
deleted file mode 100644 (file)
index 5e0a645..0000000
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue Date: 26/08/2003
-
- This file contains the code for implementing the key schedule for AES
- (Rijndael) for block and key sizes of 16, 24, and 32 bytes. See aesopt.h
- for further details including optimisation.
-*/
-
-#include "aesopt.h"
-#include "aestab.h"
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-/* Initialise the key schedule from the user supplied key. The key
-   length can be specified in bytes, with legal values of 16, 24
-   and 32, or in bits, with legal values of 128, 192 and 256. These
-   values correspond with Nk values of 4, 6 and 8 respectively.
-
-   The following macros implement a single cycle in the key
-   schedule generation process. The number of cycles needed
-   for each cx->n_col and nk value is:
-
-    nk =             4  5  6  7  8
-    ------------------------------
-    cx->n_col = 4   10  9  8  7  7
-    cx->n_col = 5   14 11 10  9  9
-    cx->n_col = 6   19 15 12 11 11
-    cx->n_col = 7   21 19 16 13 14
-    cx->n_col = 8   29 23 19 17 14
-*/
-
-#define ke4(k,i) \
-{   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
-    k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
-}
-#define kel4(k,i) \
-{   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
-    k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
-}
-
-#define ke6(k,i) \
-{   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
-    k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
-    k[6*(i)+10] = ss[4] ^= ss[3]; k[6*(i)+11] = ss[5] ^= ss[4]; \
-}
-#define kel6(k,i) \
-{   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
-    k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
-}
-
-#define ke8(k,i) \
-{   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
-    k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
-    k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); k[8*(i)+13] = ss[5] ^= ss[4]; \
-    k[8*(i)+14] = ss[6] ^= ss[5]; k[8*(i)+15] = ss[7] ^= ss[6]; \
-}
-#define kel8(k,i) \
-{   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
-    k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
-}
-
-#if defined(ENCRYPTION_KEY_SCHEDULE)
-
-#if defined(AES_128) || defined(AES_VAR)
-
-aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   aes_32t    ss[4];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if ENC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < ((11 * N_COLS - 5) / 4); ++i)
-            ke4(cx->ks, i);
-    }
-#else
-    ke4(cx->ks, 0);  ke4(cx->ks, 1);
-    ke4(cx->ks, 2);  ke4(cx->ks, 3);
-    ke4(cx->ks, 4);  ke4(cx->ks, 5);
-    ke4(cx->ks, 6);  ke4(cx->ks, 7);
-    ke4(cx->ks, 8);
-#endif
-    kel4(cx->ks, 9);
-    cx->rn = 10;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_192) || defined(AES_VAR)
-
-aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   aes_32t    ss[6];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-
-#if ENC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < (13 * N_COLS - 7) / 6; ++i)
-            ke6(cx->ks, i);
-    }
-#else
-    ke6(cx->ks, 0);  ke6(cx->ks, 1);
-    ke6(cx->ks, 2);  ke6(cx->ks, 3);
-    ke6(cx->ks, 4);  ke6(cx->ks, 5);
-    ke6(cx->ks, 6);
-#endif
-    kel6(cx->ks, 7);
-    cx->rn = 12;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_256) || defined(AES_VAR)
-
-aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   aes_32t    ss[8];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-    cx->ks[6] = ss[6] = word_in(key, 6);
-    cx->ks[7] = ss[7] = word_in(key, 7);
-
-#if ENC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < (15 * N_COLS - 9) / 8; ++i)
-            ke8(cx->ks,  i);
-    }
-#else
-    ke8(cx->ks, 0); ke8(cx->ks, 1);
-    ke8(cx->ks, 2); ke8(cx->ks, 3);
-    ke8(cx->ks, 4); ke8(cx->ks, 5);
-#endif
-    kel8(cx->ks, 6);
-    cx->rn = 14;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_VAR)
-
-aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1])
-{
-    switch(key_len)
-    {
-#if defined( AES_ERR_CHK )
-    case 16: case 128: return aes_encrypt_key128(key, cx);
-    case 24: case 192: return aes_encrypt_key192(key, cx);
-    case 32: case 256: return aes_encrypt_key256(key, cx);
-    default: return aes_error;
-#else
-    case 16: case 128: aes_encrypt_key128(key, cx); return;
-    case 24: case 192: aes_encrypt_key192(key, cx); return;
-    case 32: case 256: aes_encrypt_key256(key, cx); return;
-#endif
-    }
-}
-
-#endif
-
-#endif
-
-#if defined(DECRYPTION_KEY_SCHEDULE)
-
-#if DEC_ROUND == NO_TABLES
-#define ff(x)   (x)
-#else
-#define ff(x)   inv_mcol(x)
-#if defined( dec_imvars )
-#define d_vars  dec_imvars
-#endif
-#endif
-
-#if 1
-#define kdf4(k,i) \
-{   ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; ss[1] = ss[1] ^ ss[3]; ss[2] = ss[2] ^ ss[3]; ss[3] = ss[3]; \
-    ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
-    ss[4] ^= k[4*(i)];   k[4*(i)+4] = ff(ss[4]); ss[4] ^= k[4*(i)+1]; k[4*(i)+5] = ff(ss[4]); \
-    ss[4] ^= k[4*(i)+2]; k[4*(i)+6] = ff(ss[4]); ss[4] ^= k[4*(i)+3]; k[4*(i)+7] = ff(ss[4]); \
-}
-#define kd4(k,i) \
-{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
-    k[4*(i)+4] = ss[4] ^= k[4*(i)]; k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
-    k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
-}
-#define kdl4(k,i) \
-{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
-    k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; k[4*(i)+5] = ss[1] ^ ss[3]; \
-    k[4*(i)+6] = ss[0]; k[4*(i)+7] = ss[1]; \
-}
-#else
-#define kdf4(k,i) \
-{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ff(ss[0]); ss[1] ^= ss[0]; k[4*(i)+ 5] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[4*(i)+ 6] = ff(ss[2]); ss[3] ^= ss[2]; k[4*(i)+ 7] = ff(ss[3]); \
-}
-#define kd4(k,i) \
-{   ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[4*(i)+ 4] = ss[4] ^= k[4*(i)]; \
-    ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[4] ^= k[4*(i)+ 1]; \
-    ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[4] ^= k[4*(i)+ 2]; \
-    ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[4] ^= k[4*(i)+ 3]; \
-}
-#define kdl4(k,i) \
-{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ss[0]; ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[1]; \
-    ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[2]; ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[3]; \
-}
-#endif
-
-#define kdf6(k,i) \
-{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ff(ss[0]); ss[1] ^= ss[0]; k[6*(i)+ 7] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[6*(i)+ 8] = ff(ss[2]); ss[3] ^= ss[2]; k[6*(i)+ 9] = ff(ss[3]); \
-    ss[4] ^= ss[3]; k[6*(i)+10] = ff(ss[4]); ss[5] ^= ss[4]; k[6*(i)+11] = ff(ss[5]); \
-}
-#define kd6(k,i) \
-{   ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
-    ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
-    ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
-    ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
-    ss[4] ^= ss[3]; k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
-    ss[5] ^= ss[4]; k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
-}
-#define kdl6(k,i) \
-{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ss[0]; ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[1]; \
-    ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[2]; ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[3]; \
-}
-
-#define kdf8(k,i) \
-{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ff(ss[0]); ss[1] ^= ss[0]; k[8*(i)+ 9] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[8*(i)+10] = ff(ss[2]); ss[3] ^= ss[2]; k[8*(i)+11] = ff(ss[3]); \
-    ss[4] ^= ls_box(ss[3],0); k[8*(i)+12] = ff(ss[4]); ss[5] ^= ss[4]; k[8*(i)+13] = ff(ss[5]); \
-    ss[6] ^= ss[5]; k[8*(i)+14] = ff(ss[6]); ss[7] ^= ss[6]; k[8*(i)+15] = ff(ss[7]); \
-}
-#define kd8(k,i) \
-{   aes_32t g = ls_box(ss[7],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= g; g = ff(g); k[8*(i)+ 8] = g ^= k[8*(i)]; \
-    ss[1] ^= ss[0]; k[8*(i)+ 9] = g ^= k[8*(i)+ 1]; \
-    ss[2] ^= ss[1]; k[8*(i)+10] = g ^= k[8*(i)+ 2]; \
-    ss[3] ^= ss[2]; k[8*(i)+11] = g ^= k[8*(i)+ 3]; \
-    g = ls_box(ss[3],0); \
-    ss[4] ^= g; g = ff(g); k[8*(i)+12] = g ^= k[8*(i)+ 4]; \
-    ss[5] ^= ss[4]; k[8*(i)+13] = g ^= k[8*(i)+ 5]; \
-    ss[6] ^= ss[5]; k[8*(i)+14] = g ^= k[8*(i)+ 6]; \
-    ss[7] ^= ss[6]; k[8*(i)+15] = g ^= k[8*(i)+ 7]; \
-}
-#define kdl8(k,i) \
-{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ss[0]; ss[1] ^= ss[0]; k[8*(i)+ 9] = ss[1]; \
-    ss[2] ^= ss[1]; k[8*(i)+10] = ss[2]; ss[3] ^= ss[2]; k[8*(i)+11] = ss[3]; \
-}
-
-#if defined(AES_128) || defined(AES_VAR)
-
-aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   aes_32t    ss[5];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if DEC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < (11 * N_COLS - 5) / 4; ++i)
-            ke4(cx->ks, i);
-        kel4(cx->ks, 9);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 10 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-#endif
-    }
-#else
-    kdf4(cx->ks, 0);  kd4(cx->ks, 1);
-     kd4(cx->ks, 2);  kd4(cx->ks, 3);
-     kd4(cx->ks, 4);  kd4(cx->ks, 5);
-     kd4(cx->ks, 6);  kd4(cx->ks, 7);
-     kd4(cx->ks, 8); kdl4(cx->ks, 9);
-#endif
-    cx->rn = 10;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_192) || defined(AES_VAR)
-
-aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   aes_32t    ss[7];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if DEC_UNROLL == NONE
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-    {   aes_32t i;
-
-        for(i = 0; i < (13 * N_COLS - 7) / 6; ++i)
-            ke6(cx->ks, i);
-        kel6(cx->ks, 7);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 12 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-#endif
-    }
-#else
-    cx->ks[4] = ff(ss[4] = word_in(key, 4));
-    cx->ks[5] = ff(ss[5] = word_in(key, 5));
-    kdf6(cx->ks, 0); kd6(cx->ks, 1);
-    kd6(cx->ks, 2);  kd6(cx->ks, 3);
-    kd6(cx->ks, 4);  kd6(cx->ks, 5);
-    kd6(cx->ks, 6); kdl6(cx->ks, 7);
-#endif
-    cx->rn = 12;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_256) || defined(AES_VAR)
-
-aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   aes_32t    ss[8];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if DEC_UNROLL == NONE
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-    cx->ks[6] = ss[6] = word_in(key, 6);
-    cx->ks[7] = ss[7] = word_in(key, 7);
-    {   aes_32t i;
-
-        for(i = 0; i < (15 * N_COLS - 9) / 8; ++i)
-            ke8(cx->ks,  i);
-        kel8(cx->ks,  i);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 14 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-
-#endif
-    }
-#else
-    cx->ks[4] = ff(ss[4] = word_in(key, 4));
-    cx->ks[5] = ff(ss[5] = word_in(key, 5));
-    cx->ks[6] = ff(ss[6] = word_in(key, 6));
-    cx->ks[7] = ff(ss[7] = word_in(key, 7));
-    kdf8(cx->ks, 0); kd8(cx->ks, 1);
-    kd8(cx->ks, 2);  kd8(cx->ks, 3);
-    kd8(cx->ks, 4);  kd8(cx->ks, 5);
-    kdl8(cx->ks, 6);
-#endif
-    cx->rn = 14;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_VAR)
-
-aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1])
-{
-    switch(key_len)
-    {
-#if defined( AES_ERR_CHK )
-    case 16: case 128: return aes_decrypt_key128(key, cx);
-    case 24: case 192: return aes_decrypt_key192(key, cx);
-    case 32: case 256: return aes_decrypt_key256(key, cx);
-    default: return aes_error;
-#else
-    case 16: case 128: aes_decrypt_key128(key, cx); return;
-    case 24: case 192: aes_decrypt_key192(key, cx); return;
-    case 32: case 256: aes_decrypt_key256(key, cx); return;
-#endif
-    }
-}
-
-#endif
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
diff --git a/bsd/crypto/aes/gen/aesopt.h b/bsd/crypto/aes/gen/aesopt.h
deleted file mode 100644 (file)
index a007948..0000000
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
- My thanks go to Dag Arne Osvik for devising the schemes used here for key
- length derivation from the form of the key schedule
-
- This file contains the compilation options for AES (Rijndael) and code
- that is common across encryption, key scheduling and table generation.
-
- OPERATION
-
- These source code files implement the AES algorithm Rijndael designed by
- Joan Daemen and Vincent Rijmen. This version is designed for the standard
- block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
- and 32 bytes).
-
- This version is designed for flexibility and speed using operations on
- 32-bit words rather than operations on bytes.  It can be compiled with
- either big or little endian internal byte order but is faster when the
- native byte order for the processor is used.
-
- THE CIPHER INTERFACE
-
- The cipher interface is implemented as an array of bytes in which lower
- AES bit sequence indexes map to higher numeric significance within bytes.
-
-  aes_08t                 (an unsigned  8-bit type)
-  aes_32t                 (an unsigned 32-bit type)
-  struct aes_encrypt_ctx  (structure for the cipher encryption context)
-  struct aes_decrypt_ctx  (structure for the cipher decryption context)
-  aes_rval                the function return type
-
-  C subroutine calls:
-
-  aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_encrypt_ctx cx[1]);
-
-  aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_decrypt_ctx cx[1]);
-
- IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
- you call genTabs() before AES is used so that the tables are initialised.
-
- C++ aes class subroutines:
-
-     Class AESencrypt  for encryption
-
-      Construtors:
-          AESencrypt(void)
-          AESencrypt(const unsigned char *key) - 128 bit key
-      Members:
-          aes_rval key128(const unsigned char *key)
-          aes_rval key192(const unsigned char *key)
-          aes_rval key256(const unsigned char *key)
-          aes_rval encrypt(const unsigned char *in, unsigned char *out) const
-
-      Class AESdecrypt  for encryption
-      Construtors:
-          AESdecrypt(void)
-          AESdecrypt(const unsigned char *key) - 128 bit key
-      Members:
-          aes_rval key128(const unsigned char *key)
-          aes_rval key192(const unsigned char *key)
-          aes_rval key256(const unsigned char *key)
-          aes_rval decrypt(const unsigned char *in, unsigned char *out) const
-
-    COMPILATION
-
-    The files used to provide AES (Rijndael) are
-
-    a. aes.h for the definitions needed for use in C.
-    b. aescpp.h for the definitions needed for use in C++.
-    c. aesopt.h for setting compilation options (also includes common code).
-    d. aescrypt.c for encryption and decrytpion, or
-    e. aeskey.c for key scheduling.
-    f. aestab.c for table loading or generation.
-    g. aescrypt.asm for encryption and decryption using assembler code.
-    h. aescrypt.mmx.asm for encryption and decryption using MMX assembler.
-
-    To compile AES (Rijndael) for use in C code use aes.h and set the
-    defines here for the facilities you need (key lengths, encryption
-    and/or decryption). Do not define AES_DLL or AES_CPP.  Set the options
-    for optimisations and table sizes here.
-
-    To compile AES (Rijndael) for use in in C++ code use aescpp.h but do
-    not define AES_DLL
-
-    To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use
-    aes.h and include the AES_DLL define.
-
-    CONFIGURATION OPTIONS (here and in aes.h)
-
-    a. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL
-    b. You may need to set PLATFORM_BYTE_ORDER to define the byte order.
-    c. If you want the code to run in a specific internal byte order, then
-       ALGORITHM_BYTE_ORDER must be set accordingly.
-    d. set other configuration options decribed below.
-*/
-
-#if !defined( _AESOPT_H )
-#define _AESOPT_H
-
-#include <crypto/aes/aes.h>
-
-/*  CONFIGURATION - USE OF DEFINES
-
-    Later in this section there are a number of defines that control the
-    operation of the code.  In each section, the purpose of each define is
-    explained so that the relevant form can be included or excluded by
-    setting either 1's or 0's respectively on the branches of the related
-    #if clauses.
-
-    PLATFORM SPECIFIC INCLUDES AND BYTE ORDER IN 32-BIT WORDS
-
-    To obtain the highest speed on processors with 32-bit words, this code
-    needs to determine the byte order of the target machine. The following
-    block of code is an attempt to capture the most obvious ways in which
-    various environemnts define byte order. It may well fail, in which case
-    the definitions will need to be set by editing at the points marked
-    **** EDIT HERE IF NECESSARY **** below.  My thanks go to Peter Gutmann
-    for his assistance with this endian detection nightmare.
-*/
-
-#define BRG_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
-#define BRG_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
-
-#if defined(__GNUC__) || defined(__GNU_LIBRARY__)
-#  if defined(__FreeBSD__) || defined(__OpenBSD__)
-#    include <sys/endian.h>
-#  elif defined( BSD ) && BSD >= 199103
-#      include <machine/endian.h>
-#  elif defined(__APPLE__)
-#    if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN )
-#      define BIG_ENDIAN
-#    elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN )
-#      define LITTLE_ENDIAN
-#    endif
-#  else
-#    include <endian.h>
-#    if defined(__BEOS__)
-#      include <byteswap.h>
-#    endif
-#  endif
-#endif
-
-#if !defined(PLATFORM_BYTE_ORDER)
-#  if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN)
-#    if    defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif !defined(LITTLE_ENDIAN) &&  defined(BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    endif
-#  elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)
-#    if    defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif !defined(_LITTLE_ENDIAN) &&  defined(_BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#   endif
-#  elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)
-#    if    defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif !defined(__LITTLE_ENDIAN__) &&  defined(__BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    endif
-#  endif
-#endif
-
-/*  if the platform is still unknown, try to find its byte order    */
-/*  from commonly used machine defines                              */
-
-#if !defined(PLATFORM_BYTE_ORDER)
-
-#if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \
-      defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \
-      defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \
-      defined( vax )       || defined( vms )     || defined( VMS )        || \
-      defined( __VMS )
-#  define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-
-#elif defined( AMIGA )    || defined( applec )  || defined( __AS400__ )  || \
-      defined( _CRAY )    || defined( __hppa )  || defined( __hp9000 )   || \
-      defined( ibm370 )   || defined( mc68000 ) || defined( m68k )       || \
-      defined( __MRC__ )  || defined( __MVS__ ) || defined( __MWERKS__ ) || \
-      defined( sparc )    || defined( __sparc)  || defined( SYMANTEC_C ) || \
-      defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ )
-#  define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#else
-#  error Please edit aesopt.h (line 234 or 236) to set the platform byte order
-#endif
-
-#endif
-
-/*  SOME LOCAL DEFINITIONS  */
-
-#define NO_TABLES              0
-#define ONE_TABLE              1
-#define FOUR_TABLES            4
-#define NONE                   0
-#define PARTIAL                1
-#define FULL                   2
-
-#if defined(bswap32)
-#define aes_sw32    bswap32
-#elif defined(bswap_32)
-#define aes_sw32    bswap_32
-#else
-#define brot(x,n)   (((aes_32t)(x) <<  n) | ((aes_32t)(x) >> (32 - n)))
-#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
-#endif
-
-/*  1. FUNCTIONS REQUIRED
-
-    This implementation provides subroutines for encryption, decryption
-    and for setting the three key lengths (separately) for encryption
-    and decryption. When the assembler code is not being used the following
-    definition blocks allow the selection of the routines that are to be
-    included in the compilation.
-*/
-#if defined( AES_ENCRYPT )
-#define ENCRYPTION
-#define ENCRYPTION_KEY_SCHEDULE
-#endif
-
-#if defined( AES_DECRYPT )
-#define DECRYPTION
-#define DECRYPTION_KEY_SCHEDULE
-#endif
-
-/*  2. ASSEMBLER SUPPORT
-
-    This define (which can be on the command line) enables the use of the
-    assembler code routines for encryption and decryption with the C code
-    only providing key scheduling
-*/
-
-/*  3. BYTE ORDER WITHIN 32 BIT WORDS
-
-    The fundamental data processing units in Rijndael are 8-bit bytes. The
-    input, output and key input are all enumerated arrays of bytes in which
-    bytes are numbered starting at zero and increasing to one less than the
-    number of bytes in the array in question. This enumeration is only used
-    for naming bytes and does not imply any adjacency or order relationship
-    from one byte to another. When these inputs and outputs are considered
-    as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
-    byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
-    In this implementation bits are numbered from 0 to 7 starting at the
-    numerically least significant end of each byte (bit n represents 2^n).
-
-    However, Rijndael can be implemented more efficiently using 32-bit
-    words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
-    into word[n]. While in principle these bytes can be assembled into words
-    in any positions, this implementation only supports the two formats in
-    which bytes in adjacent positions within words also have adjacent byte
-    numbers. This order is called big-endian if the lowest numbered bytes
-    in words have the highest numeric significance and little-endian if the
-    opposite applies.
-
-    This code can work in either order irrespective of the order used by the
-    machine on which it runs. Normally the internal byte order will be set
-    to the order of the processor on which the code is to be run but this
-    define can be used to reverse this in special situations
-
-    NOTE: Assembler code versions rely on PLATFORM_BYTE_ORDER being set
-*/
-#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
-
-/*  4. FAST INPUT/OUTPUT OPERATIONS.
-
-    On some machines it is possible to improve speed by transferring the
-    bytes in the input and output arrays to and from the internal 32-bit
-    variables by addressing these arrays as if they are arrays of 32-bit
-    words.  On some machines this will always be possible but there may
-    be a large performance penalty if the byte arrays are not aligned on
-    the normal word boundaries. On other machines this technique will
-    lead to memory access errors when such 32-bit word accesses are not
-    properly aligned. The option SAFE_IO avoids such problems but will
-    often be slower on those machines that support misaligned access
-    (especially so if care is taken to align the input  and output byte
-    arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
-    assumed that access to byte arrays as if they are arrays of 32-bit
-    words will not cause problems when such accesses are misaligned.
-*/
-
-/*  5. LOOP UNROLLING
-
-    The code for encryption and decrytpion cycles through a number of rounds
-    that can be implemented either in a loop or by expanding the code into a
-    long sequence of instructions, the latter producing a larger program but
-    one that will often be much faster. The latter is called loop unrolling.
-    There are also potential speed advantages in expanding two iterations in
-    a loop with half the number of iterations, which is called partial loop
-    unrolling.  The following options allow partial or full loop unrolling
-    to be set independently for encryption and decryption
-*/
-#if 1
-#define ENC_UNROLL  FULL
-#elif 0
-#define ENC_UNROLL  PARTIAL
-#else
-#define ENC_UNROLL  NONE
-#endif
-
-#if 1
-#define DEC_UNROLL  FULL
-#elif 0
-#define DEC_UNROLL  PARTIAL
-#else
-#define DEC_UNROLL  NONE
-#endif
-
-/*  6. FAST FINITE FIELD OPERATIONS
-
-    If this section is included, tables are used to provide faster finite
-    field arithmetic (this has no effect if FIXED_TABLES is defined).
-*/
-#if 1
-#define FF_TABLES
-#endif
-
-/*  7. INTERNAL STATE VARIABLE FORMAT
-
-    The internal state of Rijndael is stored in a number of local 32-bit
-    word varaibles which can be defined either as an array or as individual
-    names variables. Include this section if you want to store these local
-    varaibles in arrays. Otherwise individual local variables will be used.
-*/
-#if 0
-#define ARRAYS
-#endif
-
-/* In this implementation the columns of the state array are each held in
-   32-bit words. The state array can be held in various ways: in an array
-   of words, in a number of individual word variables or in a number of
-   processor registers. The following define maps a variable name x and
-   a column number c to the way the state array variable is to be held.
-   The first define below maps the state into an array x[c] whereas the
-   second form maps the state into a number of individual variables x0,
-   x1, etc.  Another form could map individual state colums to machine
-   register names.
-*/
-
-#if defined(ARRAYS)
-#define s(x,c) x[c]
-#else
-#define s(x,c) x##c
-#endif
-
-/*  8. FIXED OR DYNAMIC TABLES
-
-    When this section is included the tables used by the code are compiled
-    statically into the binary file.  Otherwise the subroutine gen_tabs()
-    must be called to compute them before the code is first used.
-*/
-#if 1
-#define FIXED_TABLES
-#endif
-
-/*  9. TABLE ALIGNMENT
-
-    On some sytsems speed will be improved by aligning the AES large lookup
-    tables on particular boundaries. This define should be set to a power of
-    two giving the desired alignment. It can be left undefined if alignment
-    is not needed.  This option is specific to the Microsft VC++ compiler -
-    it seems to sometimes cause trouble for the VC++ version 6 compiler.
-*/
-
-
-/*  10. INTERNAL TABLE CONFIGURATION
-
-    This cipher proceeds by repeating in a number of cycles known as 'rounds'
-    which are implemented by a round function which can optionally be speeded
-    up using tables.  The basic tables are each 256 32-bit words, with either
-    one or four tables being required for each round function depending on
-    how much speed is required. The encryption and decryption round functions
-    are different and the last encryption and decrytpion round functions are
-    different again making four different round functions in all.
-
-    This means that:
-      1. Normal encryption and decryption rounds can each use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-      2. The last encryption and decryption rounds can also use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-
-    Include or exclude the appropriate definitions below to set the number
-    of tables used by this implementation.
-*/
-
-#if 1   /* set tables for the normal encryption round */
-#define ENC_ROUND   FOUR_TABLES
-#elif 0
-#define ENC_ROUND   ONE_TABLE
-#else
-#define ENC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last encryption round */
-#define LAST_ENC_ROUND  FOUR_TABLES
-#elif 0
-#define LAST_ENC_ROUND  ONE_TABLE
-#else
-#define LAST_ENC_ROUND  NO_TABLES
-#endif
-
-#if 1   /* set tables for the normal decryption round */
-#define DEC_ROUND   FOUR_TABLES
-#elif 0
-#define DEC_ROUND   ONE_TABLE
-#else
-#define DEC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last decryption round */
-#define LAST_DEC_ROUND  FOUR_TABLES
-#elif 0
-#define LAST_DEC_ROUND  ONE_TABLE
-#else
-#define LAST_DEC_ROUND  NO_TABLES
-#endif
-
-/*  The decryption key schedule can be speeded up with tables in the same
-    way that the round functions can.  Include or exclude the following
-    defines to set this requirement.
-*/
-#if 1
-#define KEY_SCHED   FOUR_TABLES
-#elif 0
-#define KEY_SCHED   ONE_TABLE
-#else
-#define KEY_SCHED   NO_TABLES
-#endif
-
-/*  11. TABLE POINTER CACHING
-
-    Normally tables are referenced directly, Enable this option if you wish to
-    cache pointers to the tables in the encrypt/decrypt code.  Note that this
-       only works if you are using FOUR_TABLES for the ROUND you enable this for.
-*/
-#if 1
-#define ENC_ROUND_CACHE_TABLES
-#endif
-#if 1
-#define LAST_ENC_ROUND_CACHE_TABLES
-#endif
-#if 1
-#define DEC_ROUND_CACHE_TABLES
-#endif
-#if 1
-#define LAST_DEC_ROUND_CACHE_TABLES
-#endif
-
-
-/* END OF CONFIGURATION OPTIONS */
-
-#define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))
-
-/* Disable or report errors on some combinations of options */
-
-#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
-#undef  LAST_ENC_ROUND
-#define LAST_ENC_ROUND  NO_TABLES
-#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
-#undef  LAST_ENC_ROUND
-#define LAST_ENC_ROUND  ONE_TABLE
-#endif
-
-#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
-#undef  ENC_UNROLL
-#define ENC_UNROLL  NONE
-#endif
-
-#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
-#undef  LAST_DEC_ROUND
-#define LAST_DEC_ROUND  NO_TABLES
-#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
-#undef  LAST_DEC_ROUND
-#define LAST_DEC_ROUND  ONE_TABLE
-#endif
-
-#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
-#undef  DEC_UNROLL
-#define DEC_UNROLL  NONE
-#endif
-
-/*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
-               higher index positions with wrap around into low positions
-    ups(x,n):  moves bytes by n positions to higher index positions in
-               words but without wrap around
-    bval(x,n): extracts a byte from a word
-
-    NOTE:      The definitions given here are intended only for use with
-               unsigned variables and with shift counts that are compile
-               time constants
-*/
-
-#if (ALGORITHM_BYTE_ORDER == BRG_LITTLE_ENDIAN)
-#define upr(x,n)        (((aes_32t)(x) << (8 * (n))) | ((aes_32t)(x) >> (32 - 8 * (n))))
-#define ups(x,n)        ((aes_32t) (x) << (8 * (n)))
-#define bval(x,n)       ((aes_08t)((x) >> (8 * (n))))
-#define bytes2word(b0, b1, b2, b3)  \
-        (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0))
-#endif
-
-#if (ALGORITHM_BYTE_ORDER == BRG_BIG_ENDIAN)
-#define upr(x,n)        (((aes_32t)(x) >> (8 * (n))) | ((aes_32t)(x) << (32 - 8 * (n))))
-#define ups(x,n)        ((aes_32t) (x) >> (8 * (n))))
-#define bval(x,n)       ((aes_08t)((x) >> (24 - 8 * (n))))
-#define bytes2word(b0, b1, b2, b3)  \
-        (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3))
-#endif
-
-#if defined(SAFE_IO)
-
-#define word_in(x,c)    bytes2word(((aes_08t*)(x)+4*c)[0], ((aes_08t*)(x)+4*c)[1], \
-                                   ((aes_08t*)(x)+4*c)[2], ((aes_08t*)(x)+4*c)[3])
-#define word_out(x,c,v) { ((aes_08t*)(x)+4*c)[0] = bval(v,0); ((aes_08t*)(x)+4*c)[1] = bval(v,1); \
-                          ((aes_08t*)(x)+4*c)[2] = bval(v,2); ((aes_08t*)(x)+4*c)[3] = bval(v,3); }
-
-#elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
-
-#define word_in(x,c)    (*((const aes_32t*)(x)+(c)))
-#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v))
-
-#else
-
-#define word_in(x,c)    aes_sw32(*((const aes_32t*)(x)+(c)))
-#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v))
-
-#endif
-
-/* the finite field modular polynomial and elements */
-
-#define WPOLY   0x011b
-#define BPOLY     0x1b
-
-/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
-
-#define m1  0x80808080
-#define m2  0x7f7f7f7f
-#define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
-
-/* The following defines provide alternative definitions of gf_mulx that might
-   give improved performance if a fast 32-bit multiply is not available. Note
-   that a temporary variable u needs to be defined where gf_mulx is used.
-
-#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
-#define m4  (0x01010101 * BPOLY)
-#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
-*/
-
-/* Work out which tables are needed for the different options   */
-
-#if defined( AES_ASM )
-#if defined( ENC_ROUND )
-#undef  ENC_ROUND
-#endif
-#define ENC_ROUND   FOUR_TABLES
-#if defined( LAST_ENC_ROUND )
-#undef  LAST_ENC_ROUND
-#endif
-#define LAST_ENC_ROUND  FOUR_TABLES
-#if defined( DEC_ROUND )
-#undef  DEC_ROUND
-#endif
-#define DEC_ROUND   FOUR_TABLES
-#if defined( LAST_DEC_ROUND )
-#undef  LAST_DEC_ROUND
-#endif
-#define LAST_DEC_ROUND  FOUR_TABLES
-#if defined( KEY_SCHED )
-#undef  KEY_SCHED
-#define KEY_SCHED   FOUR_TABLES
-#endif
-#endif
-
-#if defined(ENCRYPTION) || defined(AES_ASM)
-#if ENC_ROUND == ONE_TABLE
-#define FT1_SET
-#elif ENC_ROUND == FOUR_TABLES
-#define FT4_SET
-#else
-#define SBX_SET
-#endif
-#if LAST_ENC_ROUND == ONE_TABLE
-#define FL1_SET
-#elif LAST_ENC_ROUND == FOUR_TABLES
-#define FL4_SET
-#elif !defined(SBX_SET)
-#define SBX_SET
-#endif
-#endif
-
-#if defined(DECRYPTION) || defined(AES_ASM)
-#if DEC_ROUND == ONE_TABLE
-#define IT1_SET
-#elif DEC_ROUND == FOUR_TABLES
-#define IT4_SET
-#else
-#define ISB_SET
-#endif
-#if LAST_DEC_ROUND == ONE_TABLE
-#define IL1_SET
-#elif LAST_DEC_ROUND == FOUR_TABLES
-#define IL4_SET
-#elif !defined(ISB_SET)
-#define ISB_SET
-#endif
-#endif
-
-#if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE)
-#if KEY_SCHED == ONE_TABLE
-#define LS1_SET
-#define IM1_SET
-#elif KEY_SCHED == FOUR_TABLES
-#define LS4_SET
-#define IM4_SET
-#elif !defined(SBX_SET)
-#define SBX_SET
-#endif
-#endif
-
-/* generic definitions of Rijndael macros that use tables    */
-
-#define no_table(x,box,vf,rf,c) bytes2word( \
-    box[bval(vf(x,0,c),rf(0,c))], \
-    box[bval(vf(x,1,c),rf(1,c))], \
-    box[bval(vf(x,2,c),rf(2,c))], \
-    box[bval(vf(x,3,c),rf(3,c))])
-
-#define one_table(x,op,tab,vf,rf,c) \
- (     tab[bval(vf(x,0,c),rf(0,c))] \
-  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
-  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
-  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
-
-#define four_tables(x,tab,vf,rf,c) \
- (  tab[0][bval(vf(x,0,c),rf(0,c))] \
-  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
-  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
-  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
-
-#define four_cached_tables(x,tab,vf,rf,c) \
-(  tab##0[bval(vf(x,0,c),rf(0,c))] \
-   ^ tab##1[bval(vf(x,1,c),rf(1,c))] \
-   ^ tab##2[bval(vf(x,2,c),rf(2,c))] \
-   ^ tab##3[bval(vf(x,3,c),rf(3,c))])
-
-#define vf1(x,r,c)  (x)
-#define rf1(r,c)    (r)
-#define rf2(r,c)    ((8+r-c)&3)
-
-/* perform forward and inverse column mix operation on four bytes in long word x in */
-/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
-
-#if defined(FM4_SET)    /* not currently used */
-#define fwd_mcol(x)     four_tables(x,t_use(f,m),vf1,rf1,0)
-#elif defined(FM1_SET)  /* not currently used */
-#define fwd_mcol(x)     one_table(x,upr,t_use(f,m),vf1,rf1,0)
-#else
-#define dec_fmvars      aes_32t g2
-#define fwd_mcol(x)     (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
-#endif
-
-#if defined(IM4_SET)
-#define inv_mcol(x)     four_tables(x,t_use(i,m),vf1,rf1,0)
-#elif defined(IM1_SET)
-#define inv_mcol(x)     one_table(x,upr,t_use(i,m),vf1,rf1,0)
-#else
-#define dec_imvars      aes_32t g2, g4, g9
-#define inv_mcol(x)     (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
-                        (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
-#endif
-
-#if defined(FL4_SET)
-#define ls_box(x,c)     four_tables(x,t_use(f,l),vf1,rf2,c)
-#elif   defined(LS4_SET)
-#define ls_box(x,c)     four_tables(x,t_use(l,s),vf1,rf2,c)
-#elif defined(FL1_SET)
-#define ls_box(x,c)     one_table(x,upr,t_use(f,l),vf1,rf2,c)
-#elif defined(LS1_SET)
-#define ls_box(x,c)     one_table(x,upr,t_use(l,s),vf1,rf2,c)
-#else
-#define ls_box(x,c)     no_table(x,t_use(s,box),vf1,rf2,c)
-#endif
-
-#endif
diff --git a/bsd/crypto/aes/gen/aestab.c b/bsd/crypto/aes/gen/aestab.c
deleted file mode 100644 (file)
index dfd2ee9..0000000
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
-*/
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define DO_TABLES
-
-#include "aesopt.h"
-
-#if defined(FIXED_TABLES)
-
-#define sb_data(w) {\
-    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
-    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
-    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
-    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
-    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
-    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
-    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
-    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
-    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
-    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
-    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
-    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
-    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
-    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
-    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
-    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
-    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
-    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
-    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
-    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
-    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
-    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
-    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
-    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
-    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
-    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
-    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
-    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
-    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
-    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
-    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
-    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
-
-#define isb_data(w) {\
-    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
-    w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
-    w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
-    w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
-    w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
-    w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
-    w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
-    w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
-    w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
-    w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
-    w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
-    w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
-    w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
-    w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
-    w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
-    w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
-    w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
-    w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
-    w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
-    w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
-    w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
-    w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
-    w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
-    w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
-    w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
-    w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
-    w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
-    w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
-    w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
-    w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
-    w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
-    w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
-
-#define mm_data(w) {\
-    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
-    w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
-    w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
-    w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
-    w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
-    w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
-    w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
-    w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
-    w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
-    w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
-    w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
-    w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
-    w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
-    w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
-    w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
-    w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
-    w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
-    w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
-    w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
-    w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
-    w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
-    w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
-    w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
-    w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
-    w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
-    w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
-    w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
-    w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
-    w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
-    w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
-    w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
-    w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
-
-#define rc_data(w) {\
-    w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
-    w(0x1b), w(0x36) }
-
-#define h0(x)   (x)
-
-#define w0(p)   bytes2word(p, 0, 0, 0)
-#define w1(p)   bytes2word(0, p, 0, 0)
-#define w2(p)   bytes2word(0, 0, p, 0)
-#define w3(p)   bytes2word(0, 0, 0, p)
-
-#define u0(p)   bytes2word(f2(p), p, p, f3(p))
-#define u1(p)   bytes2word(f3(p), f2(p), p, p)
-#define u2(p)   bytes2word(p, f3(p), f2(p), p)
-#define u3(p)   bytes2word(p, p, f3(p), f2(p))
-
-#define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
-#define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
-#define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
-#define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
-
-#endif
-
-#if defined(FIXED_TABLES) || !defined(FF_TABLES)
-
-#define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
-#define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
-#define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
-                        ^ (((x>>5) & 4) * WPOLY))
-#define f3(x)   (f2(x) ^ x)
-#define f9(x)   (f8(x) ^ x)
-#define fb(x)   (f8(x) ^ f2(x) ^ x)
-#define fd(x)   (f8(x) ^ f4(x) ^ x)
-#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
-
-#else
-
-#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
-#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
-#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
-#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
-#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
-#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
-#define fi(x) ((x) ? pow[ 255 - log[x]] : 0)
-
-#endif
-
-#include "aestab.h"
-
-#if defined(FIXED_TABLES)
-
-/* implemented in case of wrong call for fixed tables */
-
-void gen_tabs(void)
-{
-}
-
-#else   /* dynamic table generation */
-
-#if !defined(FF_TABLES)
-
-/*  Generate the tables for the dynamic table option
-
-    It will generally be sensible to use tables to compute finite
-    field multiplies and inverses but where memory is scarse this
-    code might sometimes be better. But it only has effect during
-    initialisation so its pretty unimportant in overall terms.
-*/
-
-/*  return 2 ^ (n - 1) where n is the bit number of the highest bit
-    set in x with x in the range 1 < x < 0x00000200.   This form is
-    used so that locals within fi can be bytes rather than words
-*/
-
-static aes_08t hibit(const aes_32t x)
-{   aes_08t r = (aes_08t)((x >> 1) | (x >> 2));
-
-    r |= (r >> 2);
-    r |= (r >> 4);
-    return (r + 1) >> 1;
-}
-
-/* return the inverse of the finite field element x */
-
-static aes_08t fi(const aes_08t x)
-{   aes_08t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
-
-    if(x < 2) return x;
-
-    for(;;)
-    {
-        if(!n1) return v1;
-
-        while(n2 >= n1)
-        {
-            n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
-        }
-
-        if(!n2) return v2;
-
-        while(n1 >= n2)
-        {
-            n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
-        }
-    }
-}
-
-#endif
-
-/* The forward and inverse affine transformations used in the S-box */
-
-#define fwd_affine(x) \
-    (w = (aes_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(aes_08t)(w^(w>>8)))
-
-#define inv_affine(x) \
-    (w = (aes_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(aes_08t)(w^(w>>8)))
-
-static int init = 0;
-
-void gen_tabs(void)
-{   aes_32t  i, w;
-
-#if defined(FF_TABLES)
-
-    aes_08t  pow[512], log[256];
-
-    if(init) return;
-    /*  log and power tables for GF(2^8) finite field with
-        WPOLY as modular polynomial - the simplest primitive
-        root is 0x03, used here to generate the tables
-    */
-
-    i = 0; w = 1;
-    do
-    {
-        pow[i] = (aes_08t)w;
-        pow[i + 255] = (aes_08t)w;
-        log[w] = (aes_08t)i++;
-        w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
-    }
-    while (w != 1);
-
-#else
-    if(init) return;
-#endif
-
-    for(i = 0, w = 1; i < RC_LENGTH; ++i)
-    {
-        t_set(r,c)[i] = bytes2word(w, 0, 0, 0);
-        w = f2(w);
-    }
-
-    for(i = 0; i < 256; ++i)
-    {   aes_08t    b;
-
-        b = fwd_affine(fi((aes_08t)i));
-        w = bytes2word(f2(b), b, b, f3(b));
-
-#if defined( SBX_SET )
-        t_set(s,box)[i] = b;
-#endif
-
-#if defined( FT1_SET )                 /* tables for a normal encryption round */
-        t_set(f,n)[i] = w;
-#endif
-#if defined( FT4_SET )
-        t_set(f,n)[0][i] = w;
-        t_set(f,n)[1][i] = upr(w,1);
-        t_set(f,n)[2][i] = upr(w,2);
-        t_set(f,n)[3][i] = upr(w,3);
-#endif
-        w = bytes2word(b, 0, 0, 0);
-
-#if defined( FL1_SET )                 /* tables for last encryption round (may also   */
-        t_set(f,l)[i] = w;        /* be used in the key schedule)                 */
-#endif
-#if defined( FL4_SET )
-        t_set(f,l)[0][i] = w;
-        t_set(f,l)[1][i] = upr(w,1);
-        t_set(f,l)[2][i] = upr(w,2);
-        t_set(f,l)[3][i] = upr(w,3);
-#endif
-
-#if defined( LS1_SET )                 /* table for key schedule if t_set(f,l) above is    */
-        t_set(l,s)[i] = w;      /* not of the required form                     */
-#endif
-#if defined( LS4_SET )
-        t_set(l,s)[0][i] = w;
-        t_set(l,s)[1][i] = upr(w,1);
-        t_set(l,s)[2][i] = upr(w,2);
-        t_set(l,s)[3][i] = upr(w,3);
-#endif
-
-        b = fi(inv_affine((aes_08t)i));
-        w = bytes2word(fe(b), f9(b), fd(b), fb(b));
-
-#if defined( IM1_SET )                 /* tables for the inverse mix column operation  */
-        t_set(i,m)[b] = w;
-#endif
-#if defined( IM4_SET )
-        t_set(i,m)[0][b] = w;
-        t_set(i,m)[1][b] = upr(w,1);
-        t_set(i,m)[2][b] = upr(w,2);
-        t_set(i,m)[3][b] = upr(w,3);
-#endif
-
-#if defined( ISB_SET )
-        t_set(i,box)[i] = b;
-#endif
-#if defined( IT1_SET )                 /* tables for a normal decryption round */
-        t_set(i,n)[i] = w;
-#endif
-#if defined( IT4_SET )
-        t_set(i,n)[0][i] = w;
-        t_set(i,n)[1][i] = upr(w,1);
-        t_set(i,n)[2][i] = upr(w,2);
-        t_set(i,n)[3][i] = upr(w,3);
-#endif
-        w = bytes2word(b, 0, 0, 0);
-#if defined( IL1_SET )                 /* tables for last decryption round */
-        t_set(i,l)[i] = w;
-#endif
-#if defined( IL4_SET )
-        t_set(i,l)[0][i] = w;
-        t_set(i,l)[1][i] = upr(w,1);
-        t_set(i,l)[2][i] = upr(w,2);
-        t_set(i,l)[3][i] = upr(w,3);
-#endif
-    }
-    init = 1;
-}
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
diff --git a/bsd/crypto/aes/gen/aestab.h b/bsd/crypto/aes/gen/aestab.h
deleted file mode 100644 (file)
index 004ef9e..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
- This file contains the code for declaring the tables needed to implement
- AES. The file aesopt.h is assumed to be included before this header file.
- If there are no global variables, the definitions here can be used to put
- the AES tables in a structure so that a pointer can then be added to the
- AES context to pass them to the AES routines that need them.   If this
- facility is used, the calling program has to ensure that this pointer is
- managed appropriately.  In particular, the value of the t_dec(in,it) item
- in the table structure must be set to zero in order to ensure that the
- tables are initialised. In practice the three code sequences in aeskey.c
- that control the calls to gen_tabs() and the gen_tabs() routine itself will
- have to be changed for a specific implementation. If global variables are
- available it will generally be preferable to use them with the precomputed
- FIXED_TABLES option that uses static global tables.
-
- The following defines can be used to control the way the tables
- are defined, initialised and used in embedded environments that
- require special features for these purposes
-
-    the 't_dec' construction is used to declare fixed table arrays
-    the 't_set' construction is used to set fixed table values
-    the 't_use' construction is used to access fixed table values
-
-    256 byte tables:
-
-        t_xxx(s,box)    => forward S box
-        t_xxx(i,box)    => inverse S box
-
-    256 32-bit word OR 4 x 256 32-bit word tables:
-
-        t_xxx(f,n)      => forward normal round
-        t_xxx(f,l)      => forward last round
-        t_xxx(i,n)      => inverse normal round
-        t_xxx(i,l)      => inverse last round
-        t_xxx(l,s)      => key schedule table
-        t_xxx(i,m)      => key schedule table
-
-    Other variables and tables:
-
-        t_xxx(r,c)      => the rcon table
-*/
-
-#if !defined( _AESTAB_H )
-#define _AESTAB_H
-
-#define t_dec(m,n) t_##m##n
-#define t_set(m,n) t_##m##n
-#define t_use(m,n) t_##m##n
-
-#if defined(FIXED_TABLES)
-#define Const const
-#else
-#define Const
-#endif
-
-#if defined(DO_TABLES)
-#define Extern
-#else
-#define Extern extern
-#endif
-
-#if defined(_MSC_VER) && defined(TABLE_ALIGN)
-#define Align __declspec(align(TABLE_ALIGN))
-#else
-#define Align
-#endif
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#if defined(DO_TABLES) && defined(FIXED_TABLES)
-#define d_1(t,n,b,e)       Align Const t n[256]    =   b(e)
-#define d_4(t,n,b,e,f,g,h) Align Const t n[4][256] = { b(e), b(f), b(g), b(h) }
-Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0);
-#else
-#define d_1(t,n,b,e)       Extern Align Const t n[256]
-#define d_4(t,n,b,e,f,g,h) Extern Align Const t n[4][256]
-Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH];
-#endif
-
-#if defined( SBX_SET )
-    d_1(aes_08t, t_dec(s,box), sb_data, h0);
-#endif
-#if defined( ISB_SET )
-    d_1(aes_08t, t_dec(i,box), isb_data, h0);
-#endif
-
-#if defined( FT1_SET )
-    d_1(aes_32t, t_dec(f,n), sb_data, u0);
-#endif
-#if defined( FT4_SET )
-    d_4(aes_32t, t_dec(f,n), sb_data, u0, u1, u2, u3);
-#endif
-
-#if defined( FL1_SET )
-    d_1(aes_32t, t_dec(f,l), sb_data, w0);
-#endif
-#if defined( FL4_SET )
-    d_4(aes_32t, t_dec(f,l), sb_data, w0, w1, w2, w3);
-#endif
-
-#if defined( IT1_SET )
-    d_1(aes_32t, t_dec(i,n), isb_data, v0);
-#endif
-#if defined( IT4_SET )
-    d_4(aes_32t, t_dec(i,n), isb_data, v0, v1, v2, v3);
-#endif
-
-#if defined( IL1_SET )
-    d_1(aes_32t, t_dec(i,l), isb_data, w0);
-#endif
-#if defined( IL4_SET )
-    d_4(aes_32t, t_dec(i,l), isb_data, w0, w1, w2, w3);
-#endif
-
-#if defined( LS1_SET )
-#if defined( FL1_SET )
-#undef  LS1_SET
-#else
-    d_1(aes_32t, t_dec(l,s), sb_data, w0);
-#endif
-#endif
-
-#if defined( LS4_SET )
-#if defined( FL4_SET )
-#undef  LS4_SET
-#else
-    d_4(aes_32t, t_dec(l,s), sb_data, w0, w1, w2, w3);
-#endif
-#endif
-
-#if defined( IM1_SET )
-    d_1(aes_32t, t_dec(i,m), mm_data, v0);
-#endif
-#if defined( IM4_SET )
-    d_4(aes_32t, t_dec(i,m), mm_data, v0, v1, v2, v3);
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
diff --git a/bsd/crypto/aes/i386/AES.s b/bsd/crypto/aes/i386/AES.s
deleted file mode 100644 (file)
index 9bf440a..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-/*     AES.s -- Core AES routines for Intel processors.
-
-       Written by Eric Postpischil, January 30, 2008.
-*/
-
-
-/*     We build these AES routines as a single module because the routines refer
-       to labels in Data.s and it is easier and faster to refer to them as local
-       labels.  In my implementations of AES for CommonCrypto, both i386 and
-       x86_64 use position-independent code.  For this in-kernel implementation,
-       i386 has been converted to absolute addressing, but x86_64 still uses PIC.
-
-       A local label can be referred to with position-independent assembler
-       expressions such as "label-base(register)", where <base> is a local label
-       whose address has been loaded into <register>.  (On i386, this is typically
-       done with the idiom of a call to the next instruction and a pop of that
-       return address into a register.)  Without local labels, the references must
-       be done using spaces for addresses of "lazy symbols" that are filled in by
-       the dynamic loader and loaded by the code that wants the address.
-
-       So the various routines in other files are assembled here via #include
-       directives.
-*/
-#include "Data.s"
-
-
-#define        TableSize       (256*4)
-       /*      Each of the arrays defined in Data.s except for the round constants
-               in _AESRcon is composed of four tables of 256 entries of four bytes
-               each.  TableSize is the number of bytes in one of those four tables.
-       */
-
-
-// Include constants describing the AES context structures.
-#include "Context.h"
-
-
-/*     Define a macro to select a value based on architecture.  This reduces
-       some of the architecture conditionalization later in the source.
-*/
-#if defined __i386__
-       #define Arch(i386, x86_64)      i386
-#elif defined __x86_64__
-       #define Arch(i386, x86_64)      x86_64
-#endif
-
-
-// Define an instruction for moving pointers.
-#define        movp    Arch(movd, movd)
-       // Latter argument should be "movq", but the assembler uses "movd".
-
-
-/*     Rename the general registers.  This makes it easier to keep track of them
-       and provides names for the "whole register" that are uniform between i386
-       and x86_64.
-*/
-#if defined __i386__
-       #define r0      %eax    // Available for any use.
-       #define r1      %ecx    // Available for any use, some special purposes (loop).
-       #define r2      %edx    // Available for any use.
-       #define r3      %ebx    // Must be preserved by called routine.
-       #define r4      %esp    // Stack pointer.
-       #define r5      %ebp    // Frame pointer, must preserve, no bare indirect.
-       #define r6      %esi    // Must be preserved by called routine.
-       #define r7      %edi    // Must be preserved by called routine.
-#elif defined __x86_64__
-       #define r0      %rax    // Available for any use.
-       #define r1      %rcx    // Available for any use.
-       #define r2      %rdx    // Available for any use.
-       #define r3      %rbx    // Must be preserved by called routine.
-       #define r4      %rsp    // Stack pointer.
-       #define r5      %rbp    // Frame pointer.  Must be preserved by called routine.
-       #define r6      %rsi    // Available for any use.
-       #define r7      %rdi    // Available for any use.
-       #define r8      %r8             // Available for any use.
-       #define r9      %r9             // Available for any use.
-       #define r10     %r10    // Available for any use.
-       #define r11     %r11    // Available for any use.
-       #define r12     %r12    // Must be preserved by called routine.
-       #define r13     %r13    // Must be preserved by called routine.
-       #define r14     %r14    // Must be preserved by called routine.
-       #define r15     %r15    // Must be preserved by called routine.
-#else
-       #error "Unknown architecture."
-#endif
-
-// Define names for parts of registers.
-
-#define        r0d             %eax    // Low 32 bits of r0.
-#define        r1d             %ecx    // Low 32 bits of r1.
-#define        r2d             %edx    // Low 32 bits of r2.
-#define        r3d             %ebx    // Low 32 bits of r3.
-#define        r5d             %ebp    // Low 32 bits of r5.
-#define        r6d             %esi    // Low 32 bits of r6.
-#define        r7d             %edi    // Low 32 bits of r7.
-#define        r8d             %r8d    // Low 32 bits of r8.
-#define        r9d             %r9d    // Low 32 bits of r9.
-#define        r11d    %r11d   // Low 32 bits of r11.
-
-#define        r0l             %al             // Low byte of r0.
-#define        r1l             %cl             // Low byte of r1.
-#define        r2l             %dl             // Low byte of r2.
-#define        r3l             %bl             // Low byte of r3.
-#define        r5l             %bpl    // Low byte of r5.
-
-#define        r0h             %ah             // Second lowest byte of r0.
-#define        r1h             %ch             // Second lowest byte of r1.
-#define        r2h             %dh             // Second lowest byte of r2.
-#define        r3h             %bh             // Second lowest byte of r3.
-
-
-       .text
-
-
-// Define encryption routine, _AESEncryptWithExpandedKey
-#define        Select  0
-#include "EncryptDecrypt.s"
-#undef Select
-
-
-// Define decryption routine, _AESDecryptWithExpandedKey
-#define        Select  1
-#include "EncryptDecrypt.s"
-#undef Select
-
-// Define encryption routine, _AESEncryptWithExpandedKey
-#define        Select  2
-#include "EncryptDecrypt.s"
-#undef Select
-
-
-// Define decryption routine, _AESDecryptWithExpandedKey
-#define        Select  3
-#include "EncryptDecrypt.s"
-#undef Select
-
-
-// Define key expansion routine for encryption, _AESExpandKeyForEncryption.
-#include "ExpandKeyForEncryption.s"
-
-
-// Define key expansion for decryption routine, _AESExpandKeyForDecryption.
-#include "ExpandKeyForDecryption.s"
diff --git a/bsd/crypto/aes/i386/Context.h b/bsd/crypto/aes/i386/Context.h
deleted file mode 100644 (file)
index f53cb95..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-// Define byte offset of key within context structure.
-#define        ContextKey                      0
-
-/*     Define byte offset of key length within context structure.  The number
-       stored there is the number of bytes from the start of the first round key
-       to the start of the last round key.  That is 16 less than the number of
-       bytes in the entire key.
-*/
-#define        ContextKeyLength        240
diff --git a/bsd/crypto/aes/i386/Data.mk b/bsd/crypto/aes/i386/Data.mk
deleted file mode 100644 (file)
index 4b55d63..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-default:
-       @echo "This makefile builds Data.s, which contains constant data for the"
-       @echo "AES implementation.  This file does not normally need to be rebuilt,"
-       @echo "so it is checked into the source code repository.  It should be"
-       @echo "changed only when the implementation changes and needs data in a"
-       @echo "different format.  (This file can also build a C version, Data.c,"
-       @echo "but that is not currently in use.)"
-       @echo ""
-       @echo "To rebuild the file(s), execute \"make -f Data.mk all\"."
-
-.PHONY:        all clean
-Targets = Data.s
-all:   $(Targets)
-
-CFLAGS += -O3 -std=c99 -Wmost -Werror
-
-.INTERMEDIATE: MakeData
-MakeData:      MakeData.c
-
-# Do not leave bad output files if the build fails.
-.DELETE_ON_ERROR:      $(Targets)
-
-Data.c:        MakeData
-       ./$< >$@ C
-
-Data.s:        MakeData
-       ./$< >$@ Intel
-
-clean:
-       -rm $(Targets)
diff --git a/bsd/crypto/aes/i386/Data.s b/bsd/crypto/aes/i386/Data.s
deleted file mode 100644 (file)
index d330462..0000000
+++ /dev/null
@@ -1,5196 +0,0 @@
-// This file was generated by MakeData.c.
-
-
-       .const
-
-
-// Round constants.
-       .globl  _AESRcon
-       .private_extern _AESRcon
-_AESRcon:
-       .byte   0       // Not used, included for indexing simplicity.
-       .byte   0x01
-       .byte   0x02
-       .byte   0x04
-       .byte   0x08
-       .byte   0x10
-       .byte   0x20
-       .byte   0x40
-       .byte   0x80
-       .byte   0x1b
-       .byte   0x36
-
-
-// Tables for InvMixColumn.
-       .globl  _AESInvMixColumnTable
-       .private_extern _AESInvMixColumnTable
-       .align  2
-_AESInvMixColumnTable:
-       // Table 0.
-       .long   0x00000000
-       .long   0x0b0d090e
-       .long   0x161a121c
-       .long   0x1d171b12
-       .long   0x2c342438
-       .long   0x27392d36
-       .long   0x3a2e3624
-       .long   0x31233f2a
-       .long   0x58684870
-       .long   0x5365417e
-       .long   0x4e725a6c
-       .long   0x457f5362
-       .long   0x745c6c48
-       .long   0x7f516546
-       .long   0x62467e54
-       .long   0x694b775a
-       .long   0xb0d090e0
-       .long   0xbbdd99ee
-       .long   0xa6ca82fc
-       .long   0xadc78bf2
-       .long   0x9ce4b4d8
-       .long   0x97e9bdd6
-       .long   0x8afea6c4
-       .long   0x81f3afca
-       .long   0xe8b8d890
-       .long   0xe3b5d19e
-       .long   0xfea2ca8c
-       .long   0xf5afc382
-       .long   0xc48cfca8
-       .long   0xcf81f5a6
-       .long   0xd296eeb4
-       .long   0xd99be7ba
-       .long   0x7bbb3bdb
-       .long   0x70b632d5
-       .long   0x6da129c7
-       .long   0x66ac20c9
-       .long   0x578f1fe3
-       .long   0x5c8216ed
-       .long   0x41950dff
-       .long   0x4a9804f1
-       .long   0x23d373ab
-       .long   0x28de7aa5
-       .long   0x35c961b7
-       .long   0x3ec468b9
-       .long   0x0fe75793
-       .long   0x04ea5e9d
-       .long   0x19fd458f
-       .long   0x12f04c81
-       .long   0xcb6bab3b
-       .long   0xc066a235
-       .long   0xdd71b927
-       .long   0xd67cb029
-       .long   0xe75f8f03
-       .long   0xec52860d
-       .long   0xf1459d1f
-       .long   0xfa489411
-       .long   0x9303e34b
-       .long   0x980eea45
-       .long   0x8519f157
-       .long   0x8e14f859
-       .long   0xbf37c773
-       .long   0xb43ace7d
-       .long   0xa92dd56f
-       .long   0xa220dc61
-       .long   0xf66d76ad
-       .long   0xfd607fa3
-       .long   0xe07764b1
-       .long   0xeb7a6dbf
-       .long   0xda595295
-       .long   0xd1545b9b
-       .long   0xcc434089
-       .long   0xc74e4987
-       .long   0xae053edd
-       .long   0xa50837d3
-       .long   0xb81f2cc1
-       .long   0xb31225cf
-       .long   0x82311ae5
-       .long   0x893c13eb
-       .long   0x942b08f9
-       .long   0x9f2601f7
-       .long   0x46bde64d
-       .long   0x4db0ef43
-       .long   0x50a7f451
-       .long   0x5baafd5f
-       .long   0x6a89c275
-       .long   0x6184cb7b
-       .long   0x7c93d069
-       .long   0x779ed967
-       .long   0x1ed5ae3d
-       .long   0x15d8a733
-       .long   0x08cfbc21
-       .long   0x03c2b52f
-       .long   0x32e18a05
-       .long   0x39ec830b
-       .long   0x24fb9819
-       .long   0x2ff69117
-       .long   0x8dd64d76
-       .long   0x86db4478
-       .long   0x9bcc5f6a
-       .long   0x90c15664
-       .long   0xa1e2694e
-       .long   0xaaef6040
-       .long   0xb7f87b52
-       .long   0xbcf5725c
-       .long   0xd5be0506
-       .long   0xdeb30c08
-       .long   0xc3a4171a
-       .long   0xc8a91e14
-       .long   0xf98a213e
-       .long   0xf2872830
-       .long   0xef903322
-       .long   0xe49d3a2c
-       .long   0x3d06dd96
-       .long   0x360bd498
-       .long   0x2b1ccf8a
-       .long   0x2011c684
-       .long   0x1132f9ae
-       .long   0x1a3ff0a0
-       .long   0x0728ebb2
-       .long   0x0c25e2bc
-       .long   0x656e95e6
-       .long   0x6e639ce8
-       .long   0x737487fa
-       .long   0x78798ef4
-       .long   0x495ab1de
-       .long   0x4257b8d0
-       .long   0x5f40a3c2
-       .long   0x544daacc
-       .long   0xf7daec41
-       .long   0xfcd7e54f
-       .long   0xe1c0fe5d
-       .long   0xeacdf753
-       .long   0xdbeec879
-       .long   0xd0e3c177
-       .long   0xcdf4da65
-       .long   0xc6f9d36b
-       .long   0xafb2a431
-       .long   0xa4bfad3f
-       .long   0xb9a8b62d
-       .long   0xb2a5bf23
-       .long   0x83868009
-       .long   0x888b8907
-       .long   0x959c9215
-       .long   0x9e919b1b
-       .long   0x470a7ca1
-       .long   0x4c0775af
-       .long   0x51106ebd
-       .long   0x5a1d67b3
-       .long   0x6b3e5899
-       .long   0x60335197
-       .long   0x7d244a85
-       .long   0x7629438b
-       .long   0x1f6234d1
-       .long   0x146f3ddf
-       .long   0x097826cd
-       .long   0x02752fc3
-       .long   0x335610e9
-       .long   0x385b19e7
-       .long   0x254c02f5
-       .long   0x2e410bfb
-       .long   0x8c61d79a
-       .long   0x876cde94
-       .long   0x9a7bc586
-       .long   0x9176cc88
-       .long   0xa055f3a2
-       .long   0xab58faac
-       .long   0xb64fe1be
-       .long   0xbd42e8b0
-       .long   0xd4099fea
-       .long   0xdf0496e4
-       .long   0xc2138df6
-       .long   0xc91e84f8
-       .long   0xf83dbbd2
-       .long   0xf330b2dc
-       .long   0xee27a9ce
-       .long   0xe52aa0c0
-       .long   0x3cb1477a
-       .long   0x37bc4e74
-       .long   0x2aab5566
-       .long   0x21a65c68
-       .long   0x10856342
-       .long   0x1b886a4c
-       .long   0x069f715e
-       .long   0x0d927850
-       .long   0x64d90f0a
-       .long   0x6fd40604
-       .long   0x72c31d16
-       .long   0x79ce1418
-       .long   0x48ed2b32
-       .long   0x43e0223c
-       .long   0x5ef7392e
-       .long   0x55fa3020
-       .long   0x01b79aec
-       .long   0x0aba93e2
-       .long   0x17ad88f0
-       .long   0x1ca081fe
-       .long   0x2d83bed4
-       .long   0x268eb7da
-       .long   0x3b99acc8
-       .long   0x3094a5c6
-       .long   0x59dfd29c
-       .long   0x52d2db92
-       .long   0x4fc5c080
-       .long   0x44c8c98e
-       .long   0x75ebf6a4
-       .long   0x7ee6ffaa
-       .long   0x63f1e4b8
-       .long   0x68fcedb6
-       .long   0xb1670a0c
-       .long   0xba6a0302
-       .long   0xa77d1810
-       .long   0xac70111e
-       .long   0x9d532e34
-       .long   0x965e273a
-       .long   0x8b493c28
-       .long   0x80443526
-       .long   0xe90f427c
-       .long   0xe2024b72
-       .long   0xff155060
-       .long   0xf418596e
-       .long   0xc53b6644
-       .long   0xce366f4a
-       .long   0xd3217458
-       .long   0xd82c7d56
-       .long   0x7a0ca137
-       .long   0x7101a839
-       .long   0x6c16b32b
-       .long   0x671bba25
-       .long   0x5638850f
-       .long   0x5d358c01
-       .long   0x40229713
-       .long   0x4b2f9e1d
-       .long   0x2264e947
-       .long   0x2969e049
-       .long   0x347efb5b
-       .long   0x3f73f255
-       .long   0x0e50cd7f
-       .long   0x055dc471
-       .long   0x184adf63
-       .long   0x1347d66d
-       .long   0xcadc31d7
-       .long   0xc1d138d9
-       .long   0xdcc623cb
-       .long   0xd7cb2ac5
-       .long   0xe6e815ef
-       .long   0xede51ce1
-       .long   0xf0f207f3
-       .long   0xfbff0efd
-       .long   0x92b479a7
-       .long   0x99b970a9
-       .long   0x84ae6bbb
-       .long   0x8fa362b5
-       .long   0xbe805d9f
-       .long   0xb58d5491
-       .long   0xa89a4f83
-       .long   0xa397468d
-       // Table 1.
-       .long   0x00000000
-       .long   0x0d090e0b
-       .long   0x1a121c16
-       .long   0x171b121d
-       .long   0x3424382c
-       .long   0x392d3627
-       .long   0x2e36243a
-       .long   0x233f2a31
-       .long   0x68487058
-       .long   0x65417e53
-       .long   0x725a6c4e
-       .long   0x7f536245
-       .long   0x5c6c4874
-       .long   0x5165467f
-       .long   0x467e5462
-       .long   0x4b775a69
-       .long   0xd090e0b0
-       .long   0xdd99eebb
-       .long   0xca82fca6
-       .long   0xc78bf2ad
-       .long   0xe4b4d89c
-       .long   0xe9bdd697
-       .long   0xfea6c48a
-       .long   0xf3afca81
-       .long   0xb8d890e8
-       .long   0xb5d19ee3
-       .long   0xa2ca8cfe
-       .long   0xafc382f5
-       .long   0x8cfca8c4
-       .long   0x81f5a6cf
-       .long   0x96eeb4d2
-       .long   0x9be7bad9
-       .long   0xbb3bdb7b
-       .long   0xb632d570
-       .long   0xa129c76d
-       .long   0xac20c966
-       .long   0x8f1fe357
-       .long   0x8216ed5c
-       .long   0x950dff41
-       .long   0x9804f14a
-       .long   0xd373ab23
-       .long   0xde7aa528
-       .long   0xc961b735
-       .long   0xc468b93e
-       .long   0xe757930f
-       .long   0xea5e9d04
-       .long   0xfd458f19
-       .long   0xf04c8112
-       .long   0x6bab3bcb
-       .long   0x66a235c0
-       .long   0x71b927dd
-       .long   0x7cb029d6
-       .long   0x5f8f03e7
-       .long   0x52860dec
-       .long   0x459d1ff1
-       .long   0x489411fa
-       .long   0x03e34b93
-       .long   0x0eea4598
-       .long   0x19f15785
-       .long   0x14f8598e
-       .long   0x37c773bf
-       .long   0x3ace7db4
-       .long   0x2dd56fa9
-       .long   0x20dc61a2
-       .long   0x6d76adf6
-       .long   0x607fa3fd
-       .long   0x7764b1e0
-       .long   0x7a6dbfeb
-       .long   0x595295da
-       .long   0x545b9bd1
-       .long   0x434089cc
-       .long   0x4e4987c7
-       .long   0x053eddae
-       .long   0x0837d3a5
-       .long   0x1f2cc1b8
-       .long   0x1225cfb3
-       .long   0x311ae582
-       .long   0x3c13eb89
-       .long   0x2b08f994
-       .long   0x2601f79f
-       .long   0xbde64d46
-       .long   0xb0ef434d
-       .long   0xa7f45150
-       .long   0xaafd5f5b
-       .long   0x89c2756a
-       .long   0x84cb7b61
-       .long   0x93d0697c
-       .long   0x9ed96777
-       .long   0xd5ae3d1e
-       .long   0xd8a73315
-       .long   0xcfbc2108
-       .long   0xc2b52f03
-       .long   0xe18a0532
-       .long   0xec830b39
-       .long   0xfb981924
-       .long   0xf691172f
-       .long   0xd64d768d
-       .long   0xdb447886
-       .long   0xcc5f6a9b
-       .long   0xc1566490
-       .long   0xe2694ea1
-       .long   0xef6040aa
-       .long   0xf87b52b7
-       .long   0xf5725cbc
-       .long   0xbe0506d5
-       .long   0xb30c08de
-       .long   0xa4171ac3
-       .long   0xa91e14c8
-       .long   0x8a213ef9
-       .long   0x872830f2
-       .long   0x903322ef
-       .long   0x9d3a2ce4
-       .long   0x06dd963d
-       .long   0x0bd49836
-       .long   0x1ccf8a2b
-       .long   0x11c68420
-       .long   0x32f9ae11
-       .long   0x3ff0a01a
-       .long   0x28ebb207
-       .long   0x25e2bc0c
-       .long   0x6e95e665
-       .long   0x639ce86e
-       .long   0x7487fa73
-       .long   0x798ef478
-       .long   0x5ab1de49
-       .long   0x57b8d042
-       .long   0x40a3c25f
-       .long   0x4daacc54
-       .long   0xdaec41f7
-       .long   0xd7e54ffc
-       .long   0xc0fe5de1
-       .long   0xcdf753ea
-       .long   0xeec879db
-       .long   0xe3c177d0
-       .long   0xf4da65cd
-       .long   0xf9d36bc6
-       .long   0xb2a431af
-       .long   0xbfad3fa4
-       .long   0xa8b62db9
-       .long   0xa5bf23b2
-       .long   0x86800983
-       .long   0x8b890788
-       .long   0x9c921595
-       .long   0x919b1b9e
-       .long   0x0a7ca147
-       .long   0x0775af4c
-       .long   0x106ebd51
-       .long   0x1d67b35a
-       .long   0x3e58996b
-       .long   0x33519760
-       .long   0x244a857d
-       .long   0x29438b76
-       .long   0x6234d11f
-       .long   0x6f3ddf14
-       .long   0x7826cd09
-       .long   0x752fc302
-       .long   0x5610e933
-       .long   0x5b19e738
-       .long   0x4c02f525
-       .long   0x410bfb2e
-       .long   0x61d79a8c
-       .long   0x6cde9487
-       .long   0x7bc5869a
-       .long   0x76cc8891
-       .long   0x55f3a2a0
-       .long   0x58faacab
-       .long   0x4fe1beb6
-       .long   0x42e8b0bd
-       .long   0x099fead4
-       .long   0x0496e4df
-       .long   0x138df6c2
-       .long   0x1e84f8c9
-       .long   0x3dbbd2f8
-       .long   0x30b2dcf3
-       .long   0x27a9ceee
-       .long   0x2aa0c0e5
-       .long   0xb1477a3c
-       .long   0xbc4e7437
-       .long   0xab55662a
-       .long   0xa65c6821
-       .long   0x85634210
-       .long   0x886a4c1b
-       .long   0x9f715e06
-       .long   0x9278500d
-       .long   0xd90f0a64
-       .long   0xd406046f
-       .long   0xc31d1672
-       .long   0xce141879
-       .long   0xed2b3248
-       .long   0xe0223c43
-       .long   0xf7392e5e
-       .long   0xfa302055
-       .long   0xb79aec01
-       .long   0xba93e20a
-       .long   0xad88f017
-       .long   0xa081fe1c
-       .long   0x83bed42d
-       .long   0x8eb7da26
-       .long   0x99acc83b
-       .long   0x94a5c630
-       .long   0xdfd29c59
-       .long   0xd2db9252
-       .long   0xc5c0804f
-       .long   0xc8c98e44
-       .long   0xebf6a475
-       .long   0xe6ffaa7e
-       .long   0xf1e4b863
-       .long   0xfcedb668
-       .long   0x670a0cb1
-       .long   0x6a0302ba
-       .long   0x7d1810a7
-       .long   0x70111eac
-       .long   0x532e349d
-       .long   0x5e273a96
-       .long   0x493c288b
-       .long   0x44352680
-       .long   0x0f427ce9
-       .long   0x024b72e2
-       .long   0x155060ff
-       .long   0x18596ef4
-       .long   0x3b6644c5
-       .long   0x366f4ace
-       .long   0x217458d3
-       .long   0x2c7d56d8
-       .long   0x0ca1377a
-       .long   0x01a83971
-       .long   0x16b32b6c
-       .long   0x1bba2567
-       .long   0x38850f56
-       .long   0x358c015d
-       .long   0x22971340
-       .long   0x2f9e1d4b
-       .long   0x64e94722
-       .long   0x69e04929
-       .long   0x7efb5b34
-       .long   0x73f2553f
-       .long   0x50cd7f0e
-       .long   0x5dc47105
-       .long   0x4adf6318
-       .long   0x47d66d13
-       .long   0xdc31d7ca
-       .long   0xd138d9c1
-       .long   0xc623cbdc
-       .long   0xcb2ac5d7
-       .long   0xe815efe6
-       .long   0xe51ce1ed
-       .long   0xf207f3f0
-       .long   0xff0efdfb
-       .long   0xb479a792
-       .long   0xb970a999
-       .long   0xae6bbb84
-       .long   0xa362b58f
-       .long   0x805d9fbe
-       .long   0x8d5491b5
-       .long   0x9a4f83a8
-       .long   0x97468da3
-       // Table 2.
-       .long   0x00000000
-       .long   0x090e0b0d
-       .long   0x121c161a
-       .long   0x1b121d17
-       .long   0x24382c34
-       .long   0x2d362739
-       .long   0x36243a2e
-       .long   0x3f2a3123
-       .long   0x48705868
-       .long   0x417e5365
-       .long   0x5a6c4e72
-       .long   0x5362457f
-       .long   0x6c48745c
-       .long   0x65467f51
-       .long   0x7e546246
-       .long   0x775a694b
-       .long   0x90e0b0d0
-       .long   0x99eebbdd
-       .long   0x82fca6ca
-       .long   0x8bf2adc7
-       .long   0xb4d89ce4
-       .long   0xbdd697e9
-       .long   0xa6c48afe
-       .long   0xafca81f3
-       .long   0xd890e8b8
-       .long   0xd19ee3b5
-       .long   0xca8cfea2
-       .long   0xc382f5af
-       .long   0xfca8c48c
-       .long   0xf5a6cf81
-       .long   0xeeb4d296
-       .long   0xe7bad99b
-       .long   0x3bdb7bbb
-       .long   0x32d570b6
-       .long   0x29c76da1
-       .long   0x20c966ac
-       .long   0x1fe3578f
-       .long   0x16ed5c82
-       .long   0x0dff4195
-       .long   0x04f14a98
-       .long   0x73ab23d3
-       .long   0x7aa528de
-       .long   0x61b735c9
-       .long   0x68b93ec4
-       .long   0x57930fe7
-       .long   0x5e9d04ea
-       .long   0x458f19fd
-       .long   0x4c8112f0
-       .long   0xab3bcb6b
-       .long   0xa235c066
-       .long   0xb927dd71
-       .long   0xb029d67c
-       .long   0x8f03e75f
-       .long   0x860dec52
-       .long   0x9d1ff145
-       .long   0x9411fa48
-       .long   0xe34b9303
-       .long   0xea45980e
-       .long   0xf1578519
-       .long   0xf8598e14
-       .long   0xc773bf37
-       .long   0xce7db43a
-       .long   0xd56fa92d
-       .long   0xdc61a220
-       .long   0x76adf66d
-       .long   0x7fa3fd60
-       .long   0x64b1e077
-       .long   0x6dbfeb7a
-       .long   0x5295da59
-       .long   0x5b9bd154
-       .long   0x4089cc43
-       .long   0x4987c74e
-       .long   0x3eddae05
-       .long   0x37d3a508
-       .long   0x2cc1b81f
-       .long   0x25cfb312
-       .long   0x1ae58231
-       .long   0x13eb893c
-       .long   0x08f9942b
-       .long   0x01f79f26
-       .long   0xe64d46bd
-       .long   0xef434db0
-       .long   0xf45150a7
-       .long   0xfd5f5baa
-       .long   0xc2756a89
-       .long   0xcb7b6184
-       .long   0xd0697c93
-       .long   0xd967779e
-       .long   0xae3d1ed5
-       .long   0xa73315d8
-       .long   0xbc2108cf
-       .long   0xb52f03c2
-       .long   0x8a0532e1
-       .long   0x830b39ec
-       .long   0x981924fb
-       .long   0x91172ff6
-       .long   0x4d768dd6
-       .long   0x447886db
-       .long   0x5f6a9bcc
-       .long   0x566490c1
-       .long   0x694ea1e2
-       .long   0x6040aaef
-       .long   0x7b52b7f8
-       .long   0x725cbcf5
-       .long   0x0506d5be
-       .long   0x0c08deb3
-       .long   0x171ac3a4
-       .long   0x1e14c8a9
-       .long   0x213ef98a
-       .long   0x2830f287
-       .long   0x3322ef90
-       .long   0x3a2ce49d
-       .long   0xdd963d06
-       .long   0xd498360b
-       .long   0xcf8a2b1c
-       .long   0xc6842011
-       .long   0xf9ae1132
-       .long   0xf0a01a3f
-       .long   0xebb20728
-       .long   0xe2bc0c25
-       .long   0x95e6656e
-       .long   0x9ce86e63
-       .long   0x87fa7374
-       .long   0x8ef47879
-       .long   0xb1de495a
-       .long   0xb8d04257
-       .long   0xa3c25f40
-       .long   0xaacc544d
-       .long   0xec41f7da
-       .long   0xe54ffcd7
-       .long   0xfe5de1c0
-       .long   0xf753eacd
-       .long   0xc879dbee
-       .long   0xc177d0e3
-       .long   0xda65cdf4
-       .long   0xd36bc6f9
-       .long   0xa431afb2
-       .long   0xad3fa4bf
-       .long   0xb62db9a8
-       .long   0xbf23b2a5
-       .long   0x80098386
-       .long   0x8907888b
-       .long   0x9215959c
-       .long   0x9b1b9e91
-       .long   0x7ca1470a
-       .long   0x75af4c07
-       .long   0x6ebd5110
-       .long   0x67b35a1d
-       .long   0x58996b3e
-       .long   0x51976033
-       .long   0x4a857d24
-       .long   0x438b7629
-       .long   0x34d11f62
-       .long   0x3ddf146f
-       .long   0x26cd0978
-       .long   0x2fc30275
-       .long   0x10e93356
-       .long   0x19e7385b
-       .long   0x02f5254c
-       .long   0x0bfb2e41
-       .long   0xd79a8c61
-       .long   0xde94876c
-       .long   0xc5869a7b
-       .long   0xcc889176
-       .long   0xf3a2a055
-       .long   0xfaacab58
-       .long   0xe1beb64f
-       .long   0xe8b0bd42
-       .long   0x9fead409
-       .long   0x96e4df04
-       .long   0x8df6c213
-       .long   0x84f8c91e
-       .long   0xbbd2f83d
-       .long   0xb2dcf330
-       .long   0xa9ceee27
-       .long   0xa0c0e52a
-       .long   0x477a3cb1
-       .long   0x4e7437bc
-       .long   0x55662aab
-       .long   0x5c6821a6
-       .long   0x63421085
-       .long   0x6a4c1b88
-       .long   0x715e069f
-       .long   0x78500d92
-       .long   0x0f0a64d9
-       .long   0x06046fd4
-       .long   0x1d1672c3
-       .long   0x141879ce
-       .long   0x2b3248ed
-       .long   0x223c43e0
-       .long   0x392e5ef7
-       .long   0x302055fa
-       .long   0x9aec01b7
-       .long   0x93e20aba
-       .long   0x88f017ad
-       .long   0x81fe1ca0
-       .long   0xbed42d83
-       .long   0xb7da268e
-       .long   0xacc83b99
-       .long   0xa5c63094
-       .long   0xd29c59df
-       .long   0xdb9252d2
-       .long   0xc0804fc5
-       .long   0xc98e44c8
-       .long   0xf6a475eb
-       .long   0xffaa7ee6
-       .long   0xe4b863f1
-       .long   0xedb668fc
-       .long   0x0a0cb167
-       .long   0x0302ba6a
-       .long   0x1810a77d
-       .long   0x111eac70
-       .long   0x2e349d53
-       .long   0x273a965e
-       .long   0x3c288b49
-       .long   0x35268044
-       .long   0x427ce90f
-       .long   0x4b72e202
-       .long   0x5060ff15
-       .long   0x596ef418
-       .long   0x6644c53b
-       .long   0x6f4ace36
-       .long   0x7458d321
-       .long   0x7d56d82c
-       .long   0xa1377a0c
-       .long   0xa8397101
-       .long   0xb32b6c16
-       .long   0xba25671b
-       .long   0x850f5638
-       .long   0x8c015d35
-       .long   0x97134022
-       .long   0x9e1d4b2f
-       .long   0xe9472264
-       .long   0xe0492969
-       .long   0xfb5b347e
-       .long   0xf2553f73
-       .long   0xcd7f0e50
-       .long   0xc471055d
-       .long   0xdf63184a
-       .long   0xd66d1347
-       .long   0x31d7cadc
-       .long   0x38d9c1d1
-       .long   0x23cbdcc6
-       .long   0x2ac5d7cb
-       .long   0x15efe6e8
-       .long   0x1ce1ede5
-       .long   0x07f3f0f2
-       .long   0x0efdfbff
-       .long   0x79a792b4
-       .long   0x70a999b9
-       .long   0x6bbb84ae
-       .long   0x62b58fa3
-       .long   0x5d9fbe80
-       .long   0x5491b58d
-       .long   0x4f83a89a
-       .long   0x468da397
-       // Table 3.
-       .long   0x00000000
-       .long   0x0e0b0d09
-       .long   0x1c161a12
-       .long   0x121d171b
-       .long   0x382c3424
-       .long   0x3627392d
-       .long   0x243a2e36
-       .long   0x2a31233f
-       .long   0x70586848
-       .long   0x7e536541
-       .long   0x6c4e725a
-       .long   0x62457f53
-       .long   0x48745c6c
-       .long   0x467f5165
-       .long   0x5462467e
-       .long   0x5a694b77
-       .long   0xe0b0d090
-       .long   0xeebbdd99
-       .long   0xfca6ca82
-       .long   0xf2adc78b
-       .long   0xd89ce4b4
-       .long   0xd697e9bd
-       .long   0xc48afea6
-       .long   0xca81f3af
-       .long   0x90e8b8d8
-       .long   0x9ee3b5d1
-       .long   0x8cfea2ca
-       .long   0x82f5afc3
-       .long   0xa8c48cfc
-       .long   0xa6cf81f5
-       .long   0xb4d296ee
-       .long   0xbad99be7
-       .long   0xdb7bbb3b
-       .long   0xd570b632
-       .long   0xc76da129
-       .long   0xc966ac20
-       .long   0xe3578f1f
-       .long   0xed5c8216
-       .long   0xff41950d
-       .long   0xf14a9804
-       .long   0xab23d373
-       .long   0xa528de7a
-       .long   0xb735c961
-       .long   0xb93ec468
-       .long   0x930fe757
-       .long   0x9d04ea5e
-       .long   0x8f19fd45
-       .long   0x8112f04c
-       .long   0x3bcb6bab
-       .long   0x35c066a2
-       .long   0x27dd71b9
-       .long   0x29d67cb0
-       .long   0x03e75f8f
-       .long   0x0dec5286
-       .long   0x1ff1459d
-       .long   0x11fa4894
-       .long   0x4b9303e3
-       .long   0x45980eea
-       .long   0x578519f1
-       .long   0x598e14f8
-       .long   0x73bf37c7
-       .long   0x7db43ace
-       .long   0x6fa92dd5
-       .long   0x61a220dc
-       .long   0xadf66d76
-       .long   0xa3fd607f
-       .long   0xb1e07764
-       .long   0xbfeb7a6d
-       .long   0x95da5952
-       .long   0x9bd1545b
-       .long   0x89cc4340
-       .long   0x87c74e49
-       .long   0xddae053e
-       .long   0xd3a50837
-       .long   0xc1b81f2c
-       .long   0xcfb31225
-       .long   0xe582311a
-       .long   0xeb893c13
-       .long   0xf9942b08
-       .long   0xf79f2601
-       .long   0x4d46bde6
-       .long   0x434db0ef
-       .long   0x5150a7f4
-       .long   0x5f5baafd
-       .long   0x756a89c2
-       .long   0x7b6184cb
-       .long   0x697c93d0
-       .long   0x67779ed9
-       .long   0x3d1ed5ae
-       .long   0x3315d8a7
-       .long   0x2108cfbc
-       .long   0x2f03c2b5
-       .long   0x0532e18a
-       .long   0x0b39ec83
-       .long   0x1924fb98
-       .long   0x172ff691
-       .long   0x768dd64d
-       .long   0x7886db44
-       .long   0x6a9bcc5f
-       .long   0x6490c156
-       .long   0x4ea1e269
-       .long   0x40aaef60
-       .long   0x52b7f87b
-       .long   0x5cbcf572
-       .long   0x06d5be05
-       .long   0x08deb30c
-       .long   0x1ac3a417
-       .long   0x14c8a91e
-       .long   0x3ef98a21
-       .long   0x30f28728
-       .long   0x22ef9033
-       .long   0x2ce49d3a
-       .long   0x963d06dd
-       .long   0x98360bd4
-       .long   0x8a2b1ccf
-       .long   0x842011c6
-       .long   0xae1132f9
-       .long   0xa01a3ff0
-       .long   0xb20728eb
-       .long   0xbc0c25e2
-       .long   0xe6656e95
-       .long   0xe86e639c
-       .long   0xfa737487
-       .long   0xf478798e
-       .long   0xde495ab1
-       .long   0xd04257b8
-       .long   0xc25f40a3
-       .long   0xcc544daa
-       .long   0x41f7daec
-       .long   0x4ffcd7e5
-       .long   0x5de1c0fe
-       .long   0x53eacdf7
-       .long   0x79dbeec8
-       .long   0x77d0e3c1
-       .long   0x65cdf4da
-       .long   0x6bc6f9d3
-       .long   0x31afb2a4
-       .long   0x3fa4bfad
-       .long   0x2db9a8b6
-       .long   0x23b2a5bf
-       .long   0x09838680
-       .long   0x07888b89
-       .long   0x15959c92
-       .long   0x1b9e919b
-       .long   0xa1470a7c
-       .long   0xaf4c0775
-       .long   0xbd51106e
-       .long   0xb35a1d67
-       .long   0x996b3e58
-       .long   0x97603351
-       .long   0x857d244a
-       .long   0x8b762943
-       .long   0xd11f6234
-       .long   0xdf146f3d
-       .long   0xcd097826
-       .long   0xc302752f
-       .long   0xe9335610
-       .long   0xe7385b19
-       .long   0xf5254c02
-       .long   0xfb2e410b
-       .long   0x9a8c61d7
-       .long   0x94876cde
-       .long   0x869a7bc5
-       .long   0x889176cc
-       .long   0xa2a055f3
-       .long   0xacab58fa
-       .long   0xbeb64fe1
-       .long   0xb0bd42e8
-       .long   0xead4099f
-       .long   0xe4df0496
-       .long   0xf6c2138d
-       .long   0xf8c91e84
-       .long   0xd2f83dbb
-       .long   0xdcf330b2
-       .long   0xceee27a9
-       .long   0xc0e52aa0
-       .long   0x7a3cb147
-       .long   0x7437bc4e
-       .long   0x662aab55
-       .long   0x6821a65c
-       .long   0x42108563
-       .long   0x4c1b886a
-       .long   0x5e069f71
-       .long   0x500d9278
-       .long   0x0a64d90f
-       .long   0x046fd406
-       .long   0x1672c31d
-       .long   0x1879ce14
-       .long   0x3248ed2b
-       .long   0x3c43e022
-       .long   0x2e5ef739
-       .long   0x2055fa30
-       .long   0xec01b79a
-       .long   0xe20aba93
-       .long   0xf017ad88
-       .long   0xfe1ca081
-       .long   0xd42d83be
-       .long   0xda268eb7
-       .long   0xc83b99ac
-       .long   0xc63094a5
-       .long   0x9c59dfd2
-       .long   0x9252d2db
-       .long   0x804fc5c0
-       .long   0x8e44c8c9
-       .long   0xa475ebf6
-       .long   0xaa7ee6ff
-       .long   0xb863f1e4
-       .long   0xb668fced
-       .long   0x0cb1670a
-       .long   0x02ba6a03
-       .long   0x10a77d18
-       .long   0x1eac7011
-       .long   0x349d532e
-       .long   0x3a965e27
-       .long   0x288b493c
-       .long   0x26804435
-       .long   0x7ce90f42
-       .long   0x72e2024b
-       .long   0x60ff1550
-       .long   0x6ef41859
-       .long   0x44c53b66
-       .long   0x4ace366f
-       .long   0x58d32174
-       .long   0x56d82c7d
-       .long   0x377a0ca1
-       .long   0x397101a8
-       .long   0x2b6c16b3
-       .long   0x25671bba
-       .long   0x0f563885
-       .long   0x015d358c
-       .long   0x13402297
-       .long   0x1d4b2f9e
-       .long   0x472264e9
-       .long   0x492969e0
-       .long   0x5b347efb
-       .long   0x553f73f2
-       .long   0x7f0e50cd
-       .long   0x71055dc4
-       .long   0x63184adf
-       .long   0x6d1347d6
-       .long   0xd7cadc31
-       .long   0xd9c1d138
-       .long   0xcbdcc623
-       .long   0xc5d7cb2a
-       .long   0xefe6e815
-       .long   0xe1ede51c
-       .long   0xf3f0f207
-       .long   0xfdfbff0e
-       .long   0xa792b479
-       .long   0xa999b970
-       .long   0xbb84ae6b
-       .long   0xb58fa362
-       .long   0x9fbe805d
-       .long   0x91b58d54
-       .long   0x83a89a4f
-       .long   0x8da39746
-
-
-// Tables for main encryption iterations.
-       .globl  _AESEncryptTable
-       .private_extern _AESEncryptTable
-       .align  2
-_AESEncryptTable:
-       // Table 0.
-       .long   0xa56363c6
-       .long   0x847c7cf8
-       .long   0x997777ee
-       .long   0x8d7b7bf6
-       .long   0x0df2f2ff
-       .long   0xbd6b6bd6
-       .long   0xb16f6fde
-       .long   0x54c5c591
-       .long   0x50303060
-       .long   0x03010102
-       .long   0xa96767ce
-       .long   0x7d2b2b56
-       .long   0x19fefee7
-       .long   0x62d7d7b5
-       .long   0xe6abab4d
-       .long   0x9a7676ec
-       .long   0x45caca8f
-       .long   0x9d82821f
-       .long   0x40c9c989
-       .long   0x877d7dfa
-       .long   0x15fafaef
-       .long   0xeb5959b2
-       .long   0xc947478e
-       .long   0x0bf0f0fb
-       .long   0xecadad41
-       .long   0x67d4d4b3
-       .long   0xfda2a25f
-       .long   0xeaafaf45
-       .long   0xbf9c9c23
-       .long   0xf7a4a453
-       .long   0x967272e4
-       .long   0x5bc0c09b
-       .long   0xc2b7b775
-       .long   0x1cfdfde1
-       .long   0xae93933d
-       .long   0x6a26264c
-       .long   0x5a36366c
-       .long   0x413f3f7e
-       .long   0x02f7f7f5
-       .long   0x4fcccc83
-       .long   0x5c343468
-       .long   0xf4a5a551
-       .long   0x34e5e5d1
-       .long   0x08f1f1f9
-       .long   0x937171e2
-       .long   0x73d8d8ab
-       .long   0x53313162
-       .long   0x3f15152a
-       .long   0x0c040408
-       .long   0x52c7c795
-       .long   0x65232346
-       .long   0x5ec3c39d
-       .long   0x28181830
-       .long   0xa1969637
-       .long   0x0f05050a
-       .long   0xb59a9a2f
-       .long   0x0907070e
-       .long   0x36121224
-       .long   0x9b80801b
-       .long   0x3de2e2df
-       .long   0x26ebebcd
-       .long   0x6927274e
-       .long   0xcdb2b27f
-       .long   0x9f7575ea
-       .long   0x1b090912
-       .long   0x9e83831d
-       .long   0x742c2c58
-       .long   0x2e1a1a34
-       .long   0x2d1b1b36
-       .long   0xb26e6edc
-       .long   0xee5a5ab4
-       .long   0xfba0a05b
-       .long   0xf65252a4
-       .long   0x4d3b3b76
-       .long   0x61d6d6b7
-       .long   0xceb3b37d
-       .long   0x7b292952
-       .long   0x3ee3e3dd
-       .long   0x712f2f5e
-       .long   0x97848413
-       .long   0xf55353a6
-       .long   0x68d1d1b9
-       .long   0x00000000
-       .long   0x2cededc1
-       .long   0x60202040
-       .long   0x1ffcfce3
-       .long   0xc8b1b179
-       .long   0xed5b5bb6
-       .long   0xbe6a6ad4
-       .long   0x46cbcb8d
-       .long   0xd9bebe67
-       .long   0x4b393972
-       .long   0xde4a4a94
-       .long   0xd44c4c98
-       .long   0xe85858b0
-       .long   0x4acfcf85
-       .long   0x6bd0d0bb
-       .long   0x2aefefc5
-       .long   0xe5aaaa4f
-       .long   0x16fbfbed
-       .long   0xc5434386
-       .long   0xd74d4d9a
-       .long   0x55333366
-       .long   0x94858511
-       .long   0xcf45458a
-       .long   0x10f9f9e9
-       .long   0x06020204
-       .long   0x817f7ffe
-       .long   0xf05050a0
-       .long   0x443c3c78
-       .long   0xba9f9f25
-       .long   0xe3a8a84b
-       .long   0xf35151a2
-       .long   0xfea3a35d
-       .long   0xc0404080
-       .long   0x8a8f8f05
-       .long   0xad92923f
-       .long   0xbc9d9d21
-       .long   0x48383870
-       .long   0x04f5f5f1
-       .long   0xdfbcbc63
-       .long   0xc1b6b677
-       .long   0x75dadaaf
-       .long   0x63212142
-       .long   0x30101020
-       .long   0x1affffe5
-       .long   0x0ef3f3fd
-       .long   0x6dd2d2bf
-       .long   0x4ccdcd81
-       .long   0x140c0c18
-       .long   0x35131326
-       .long   0x2fececc3
-       .long   0xe15f5fbe
-       .long   0xa2979735
-       .long   0xcc444488
-       .long   0x3917172e
-       .long   0x57c4c493
-       .long   0xf2a7a755
-       .long   0x827e7efc
-       .long   0x473d3d7a
-       .long   0xac6464c8
-       .long   0xe75d5dba
-       .long   0x2b191932
-       .long   0x957373e6
-       .long   0xa06060c0
-       .long   0x98818119
-       .long   0xd14f4f9e
-       .long   0x7fdcdca3
-       .long   0x66222244
-       .long   0x7e2a2a54
-       .long   0xab90903b
-       .long   0x8388880b
-       .long   0xca46468c
-       .long   0x29eeeec7
-       .long   0xd3b8b86b
-       .long   0x3c141428
-       .long   0x79dedea7
-       .long   0xe25e5ebc
-       .long   0x1d0b0b16
-       .long   0x76dbdbad
-       .long   0x3be0e0db
-       .long   0x56323264
-       .long   0x4e3a3a74
-       .long   0x1e0a0a14
-       .long   0xdb494992
-       .long   0x0a06060c
-       .long   0x6c242448
-       .long   0xe45c5cb8
-       .long   0x5dc2c29f
-       .long   0x6ed3d3bd
-       .long   0xefacac43
-       .long   0xa66262c4
-       .long   0xa8919139
-       .long   0xa4959531
-       .long   0x37e4e4d3
-       .long   0x8b7979f2
-       .long   0x32e7e7d5
-       .long   0x43c8c88b
-       .long   0x5937376e
-       .long   0xb76d6dda
-       .long   0x8c8d8d01
-       .long   0x64d5d5b1
-       .long   0xd24e4e9c
-       .long   0xe0a9a949
-       .long   0xb46c6cd8
-       .long   0xfa5656ac
-       .long   0x07f4f4f3
-       .long   0x25eaeacf
-       .long   0xaf6565ca
-       .long   0x8e7a7af4
-       .long   0xe9aeae47
-       .long   0x18080810
-       .long   0xd5baba6f
-       .long   0x887878f0
-       .long   0x6f25254a
-       .long   0x722e2e5c
-       .long   0x241c1c38
-       .long   0xf1a6a657
-       .long   0xc7b4b473
-       .long   0x51c6c697
-       .long   0x23e8e8cb
-       .long   0x7cdddda1
-       .long   0x9c7474e8
-       .long   0x211f1f3e
-       .long   0xdd4b4b96
-       .long   0xdcbdbd61
-       .long   0x868b8b0d
-       .long   0x858a8a0f
-       .long   0x907070e0
-       .long   0x423e3e7c
-       .long   0xc4b5b571
-       .long   0xaa6666cc
-       .long   0xd8484890
-       .long   0x05030306
-       .long   0x01f6f6f7
-       .long   0x120e0e1c
-       .long   0xa36161c2
-       .long   0x5f35356a
-       .long   0xf95757ae
-       .long   0xd0b9b969
-       .long   0x91868617
-       .long   0x58c1c199
-       .long   0x271d1d3a
-       .long   0xb99e9e27
-       .long   0x38e1e1d9
-       .long   0x13f8f8eb
-       .long   0xb398982b
-       .long   0x33111122
-       .long   0xbb6969d2
-       .long   0x70d9d9a9
-       .long   0x898e8e07
-       .long   0xa7949433
-       .long   0xb69b9b2d
-       .long   0x221e1e3c
-       .long   0x92878715
-       .long   0x20e9e9c9
-       .long   0x49cece87
-       .long   0xff5555aa
-       .long   0x78282850
-       .long   0x7adfdfa5
-       .long   0x8f8c8c03
-       .long   0xf8a1a159
-       .long   0x80898909
-       .long   0x170d0d1a
-       .long   0xdabfbf65
-       .long   0x31e6e6d7
-       .long   0xc6424284
-       .long   0xb86868d0
-       .long   0xc3414182
-       .long   0xb0999929
-       .long   0x772d2d5a
-       .long   0x110f0f1e
-       .long   0xcbb0b07b
-       .long   0xfc5454a8
-       .long   0xd6bbbb6d
-       .long   0x3a16162c
-       // Table 1.
-       .long   0x6363c6a5
-       .long   0x7c7cf884
-       .long   0x7777ee99
-       .long   0x7b7bf68d
-       .long   0xf2f2ff0d
-       .long   0x6b6bd6bd
-       .long   0x6f6fdeb1
-       .long   0xc5c59154
-       .long   0x30306050
-       .long   0x01010203
-       .long   0x6767cea9
-       .long   0x2b2b567d
-       .long   0xfefee719
-       .long   0xd7d7b562
-       .long   0xabab4de6
-       .long   0x7676ec9a
-       .long   0xcaca8f45
-       .long   0x82821f9d
-       .long   0xc9c98940
-       .long   0x7d7dfa87
-       .long   0xfafaef15
-       .long   0x5959b2eb
-       .long   0x47478ec9
-       .long   0xf0f0fb0b
-       .long   0xadad41ec
-       .long   0xd4d4b367
-       .long   0xa2a25ffd
-       .long   0xafaf45ea
-       .long   0x9c9c23bf
-       .long   0xa4a453f7
-       .long   0x7272e496
-       .long   0xc0c09b5b
-       .long   0xb7b775c2
-       .long   0xfdfde11c
-       .long   0x93933dae
-       .long   0x26264c6a
-       .long   0x36366c5a
-       .long   0x3f3f7e41
-       .long   0xf7f7f502
-       .long   0xcccc834f
-       .long   0x3434685c
-       .long   0xa5a551f4
-       .long   0xe5e5d134
-       .long   0xf1f1f908
-       .long   0x7171e293
-       .long   0xd8d8ab73
-       .long   0x31316253
-       .long   0x15152a3f
-       .long   0x0404080c
-       .long   0xc7c79552
-       .long   0x23234665
-       .long   0xc3c39d5e
-       .long   0x18183028
-       .long   0x969637a1
-       .long   0x05050a0f
-       .long   0x9a9a2fb5
-       .long   0x07070e09
-       .long   0x12122436
-       .long   0x80801b9b
-       .long   0xe2e2df3d
-       .long   0xebebcd26
-       .long   0x27274e69
-       .long   0xb2b27fcd
-       .long   0x7575ea9f
-       .long   0x0909121b
-       .long   0x83831d9e
-       .long   0x2c2c5874
-       .long   0x1a1a342e
-       .long   0x1b1b362d
-       .long   0x6e6edcb2
-       .long   0x5a5ab4ee
-       .long   0xa0a05bfb
-       .long   0x5252a4f6
-       .long   0x3b3b764d
-       .long   0xd6d6b761
-       .long   0xb3b37dce
-       .long   0x2929527b
-       .long   0xe3e3dd3e
-       .long   0x2f2f5e71
-       .long   0x84841397
-       .long   0x5353a6f5
-       .long   0xd1d1b968
-       .long   0x00000000
-       .long   0xededc12c
-       .long   0x20204060
-       .long   0xfcfce31f
-       .long   0xb1b179c8
-       .long   0x5b5bb6ed
-       .long   0x6a6ad4be
-       .long   0xcbcb8d46
-       .long   0xbebe67d9
-       .long   0x3939724b
-       .long   0x4a4a94de
-       .long   0x4c4c98d4
-       .long   0x5858b0e8
-       .long   0xcfcf854a
-       .long   0xd0d0bb6b
-       .long   0xefefc52a
-       .long   0xaaaa4fe5
-       .long   0xfbfbed16
-       .long   0x434386c5
-       .long   0x4d4d9ad7
-       .long   0x33336655
-       .long   0x85851194
-       .long   0x45458acf
-       .long   0xf9f9e910
-       .long   0x02020406
-       .long   0x7f7ffe81
-       .long   0x5050a0f0
-       .long   0x3c3c7844
-       .long   0x9f9f25ba
-       .long   0xa8a84be3
-       .long   0x5151a2f3
-       .long   0xa3a35dfe
-       .long   0x404080c0
-       .long   0x8f8f058a
-       .long   0x92923fad
-       .long   0x9d9d21bc
-       .long   0x38387048
-       .long   0xf5f5f104
-       .long   0xbcbc63df
-       .long   0xb6b677c1
-       .long   0xdadaaf75
-       .long   0x21214263
-       .long   0x10102030
-       .long   0xffffe51a
-       .long   0xf3f3fd0e
-       .long   0xd2d2bf6d
-       .long   0xcdcd814c
-       .long   0x0c0c1814
-       .long   0x13132635
-       .long   0xececc32f
-       .long   0x5f5fbee1
-       .long   0x979735a2
-       .long   0x444488cc
-       .long   0x17172e39
-       .long   0xc4c49357
-       .long   0xa7a755f2
-       .long   0x7e7efc82
-       .long   0x3d3d7a47
-       .long   0x6464c8ac
-       .long   0x5d5dbae7
-       .long   0x1919322b
-       .long   0x7373e695
-       .long   0x6060c0a0
-       .long   0x81811998
-       .long   0x4f4f9ed1
-       .long   0xdcdca37f
-       .long   0x22224466
-       .long   0x2a2a547e
-       .long   0x90903bab
-       .long   0x88880b83
-       .long   0x46468cca
-       .long   0xeeeec729
-       .long   0xb8b86bd3
-       .long   0x1414283c
-       .long   0xdedea779
-       .long   0x5e5ebce2
-       .long   0x0b0b161d
-       .long   0xdbdbad76
-       .long   0xe0e0db3b
-       .long   0x32326456
-       .long   0x3a3a744e
-       .long   0x0a0a141e
-       .long   0x494992db
-       .long   0x06060c0a
-       .long   0x2424486c
-       .long   0x5c5cb8e4
-       .long   0xc2c29f5d
-       .long   0xd3d3bd6e
-       .long   0xacac43ef
-       .long   0x6262c4a6
-       .long   0x919139a8
-       .long   0x959531a4
-       .long   0xe4e4d337
-       .long   0x7979f28b
-       .long   0xe7e7d532
-       .long   0xc8c88b43
-       .long   0x37376e59
-       .long   0x6d6ddab7
-       .long   0x8d8d018c
-       .long   0xd5d5b164
-       .long   0x4e4e9cd2
-       .long   0xa9a949e0
-       .long   0x6c6cd8b4
-       .long   0x5656acfa
-       .long   0xf4f4f307
-       .long   0xeaeacf25
-       .long   0x6565caaf
-       .long   0x7a7af48e
-       .long   0xaeae47e9
-       .long   0x08081018
-       .long   0xbaba6fd5
-       .long   0x7878f088
-       .long   0x25254a6f
-       .long   0x2e2e5c72
-       .long   0x1c1c3824
-       .long   0xa6a657f1
-       .long   0xb4b473c7
-       .long   0xc6c69751
-       .long   0xe8e8cb23
-       .long   0xdddda17c
-       .long   0x7474e89c
-       .long   0x1f1f3e21
-       .long   0x4b4b96dd
-       .long   0xbdbd61dc
-       .long   0x8b8b0d86
-       .long   0x8a8a0f85
-       .long   0x7070e090
-       .long   0x3e3e7c42
-       .long   0xb5b571c4
-       .long   0x6666ccaa
-       .long   0x484890d8
-       .long   0x03030605
-       .long   0xf6f6f701
-       .long   0x0e0e1c12
-       .long   0x6161c2a3
-       .long   0x35356a5f
-       .long   0x5757aef9
-       .long   0xb9b969d0
-       .long   0x86861791
-       .long   0xc1c19958
-       .long   0x1d1d3a27
-       .long   0x9e9e27b9
-       .long   0xe1e1d938
-       .long   0xf8f8eb13
-       .long   0x98982bb3
-       .long   0x11112233
-       .long   0x6969d2bb
-       .long   0xd9d9a970
-       .long   0x8e8e0789
-       .long   0x949433a7
-       .long   0x9b9b2db6
-       .long   0x1e1e3c22
-       .long   0x87871592
-       .long   0xe9e9c920
-       .long   0xcece8749
-       .long   0x5555aaff
-       .long   0x28285078
-       .long   0xdfdfa57a
-       .long   0x8c8c038f
-       .long   0xa1a159f8
-       .long   0x89890980
-       .long   0x0d0d1a17
-       .long   0xbfbf65da
-       .long   0xe6e6d731
-       .long   0x424284c6
-       .long   0x6868d0b8
-       .long   0x414182c3
-       .long   0x999929b0
-       .long   0x2d2d5a77
-       .long   0x0f0f1e11
-       .long   0xb0b07bcb
-       .long   0x5454a8fc
-       .long   0xbbbb6dd6
-       .long   0x16162c3a
-       // Table 2.
-       .long   0x63c6a563
-       .long   0x7cf8847c
-       .long   0x77ee9977
-       .long   0x7bf68d7b
-       .long   0xf2ff0df2
-       .long   0x6bd6bd6b
-       .long   0x6fdeb16f
-       .long   0xc59154c5
-       .long   0x30605030
-       .long   0x01020301
-       .long   0x67cea967
-       .long   0x2b567d2b
-       .long   0xfee719fe
-       .long   0xd7b562d7
-       .long   0xab4de6ab
-       .long   0x76ec9a76
-       .long   0xca8f45ca
-       .long   0x821f9d82
-       .long   0xc98940c9
-       .long   0x7dfa877d
-       .long   0xfaef15fa
-       .long   0x59b2eb59
-       .long   0x478ec947
-       .long   0xf0fb0bf0
-       .long   0xad41ecad
-       .long   0xd4b367d4
-       .long   0xa25ffda2
-       .long   0xaf45eaaf
-       .long   0x9c23bf9c
-       .long   0xa453f7a4
-       .long   0x72e49672
-       .long   0xc09b5bc0
-       .long   0xb775c2b7
-       .long   0xfde11cfd
-       .long   0x933dae93
-       .long   0x264c6a26
-       .long   0x366c5a36
-       .long   0x3f7e413f
-       .long   0xf7f502f7
-       .long   0xcc834fcc
-       .long   0x34685c34
-       .long   0xa551f4a5
-       .long   0xe5d134e5
-       .long   0xf1f908f1
-       .long   0x71e29371
-       .long   0xd8ab73d8
-       .long   0x31625331
-       .long   0x152a3f15
-       .long   0x04080c04
-       .long   0xc79552c7
-       .long   0x23466523
-       .long   0xc39d5ec3
-       .long   0x18302818
-       .long   0x9637a196
-       .long   0x050a0f05
-       .long   0x9a2fb59a
-       .long   0x070e0907
-       .long   0x12243612
-       .long   0x801b9b80
-       .long   0xe2df3de2
-       .long   0xebcd26eb
-       .long   0x274e6927
-       .long   0xb27fcdb2
-       .long   0x75ea9f75
-       .long   0x09121b09
-       .long   0x831d9e83
-       .long   0x2c58742c
-       .long   0x1a342e1a
-       .long   0x1b362d1b
-       .long   0x6edcb26e
-       .long   0x5ab4ee5a
-       .long   0xa05bfba0
-       .long   0x52a4f652
-       .long   0x3b764d3b
-       .long   0xd6b761d6
-       .long   0xb37dceb3
-       .long   0x29527b29
-       .long   0xe3dd3ee3
-       .long   0x2f5e712f
-       .long   0x84139784
-       .long   0x53a6f553
-       .long   0xd1b968d1
-       .long   0x00000000
-       .long   0xedc12ced
-       .long   0x20406020
-       .long   0xfce31ffc
-       .long   0xb179c8b1
-       .long   0x5bb6ed5b
-       .long   0x6ad4be6a
-       .long   0xcb8d46cb
-       .long   0xbe67d9be
-       .long   0x39724b39
-       .long   0x4a94de4a
-       .long   0x4c98d44c
-       .long   0x58b0e858
-       .long   0xcf854acf
-       .long   0xd0bb6bd0
-       .long   0xefc52aef
-       .long   0xaa4fe5aa
-       .long   0xfbed16fb
-       .long   0x4386c543
-       .long   0x4d9ad74d
-       .long   0x33665533
-       .long   0x85119485
-       .long   0x458acf45
-       .long   0xf9e910f9
-       .long   0x02040602
-       .long   0x7ffe817f
-       .long   0x50a0f050
-       .long   0x3c78443c
-       .long   0x9f25ba9f
-       .long   0xa84be3a8
-       .long   0x51a2f351
-       .long   0xa35dfea3
-       .long   0x4080c040
-       .long   0x8f058a8f
-       .long   0x923fad92
-       .long   0x9d21bc9d
-       .long   0x38704838
-       .long   0xf5f104f5
-       .long   0xbc63dfbc
-       .long   0xb677c1b6
-       .long   0xdaaf75da
-       .long   0x21426321
-       .long   0x10203010
-       .long   0xffe51aff
-       .long   0xf3fd0ef3
-       .long   0xd2bf6dd2
-       .long   0xcd814ccd
-       .long   0x0c18140c
-       .long   0x13263513
-       .long   0xecc32fec
-       .long   0x5fbee15f
-       .long   0x9735a297
-       .long   0x4488cc44
-       .long   0x172e3917
-       .long   0xc49357c4
-       .long   0xa755f2a7
-       .long   0x7efc827e
-       .long   0x3d7a473d
-       .long   0x64c8ac64
-       .long   0x5dbae75d
-       .long   0x19322b19
-       .long   0x73e69573
-       .long   0x60c0a060
-       .long   0x81199881
-       .long   0x4f9ed14f
-       .long   0xdca37fdc
-       .long   0x22446622
-       .long   0x2a547e2a
-       .long   0x903bab90
-       .long   0x880b8388
-       .long   0x468cca46
-       .long   0xeec729ee
-       .long   0xb86bd3b8
-       .long   0x14283c14
-       .long   0xdea779de
-       .long   0x5ebce25e
-       .long   0x0b161d0b
-       .long   0xdbad76db
-       .long   0xe0db3be0
-       .long   0x32645632
-       .long   0x3a744e3a
-       .long   0x0a141e0a
-       .long   0x4992db49
-       .long   0x060c0a06
-       .long   0x24486c24
-       .long   0x5cb8e45c
-       .long   0xc29f5dc2
-       .long   0xd3bd6ed3
-       .long   0xac43efac
-       .long   0x62c4a662
-       .long   0x9139a891
-       .long   0x9531a495
-       .long   0xe4d337e4
-       .long   0x79f28b79
-       .long   0xe7d532e7
-       .long   0xc88b43c8
-       .long   0x376e5937
-       .long   0x6ddab76d
-       .long   0x8d018c8d
-       .long   0xd5b164d5
-       .long   0x4e9cd24e
-       .long   0xa949e0a9
-       .long   0x6cd8b46c
-       .long   0x56acfa56
-       .long   0xf4f307f4
-       .long   0xeacf25ea
-       .long   0x65caaf65
-       .long   0x7af48e7a
-       .long   0xae47e9ae
-       .long   0x08101808
-       .long   0xba6fd5ba
-       .long   0x78f08878
-       .long   0x254a6f25
-       .long   0x2e5c722e
-       .long   0x1c38241c
-       .long   0xa657f1a6
-       .long   0xb473c7b4
-       .long   0xc69751c6
-       .long   0xe8cb23e8
-       .long   0xdda17cdd
-       .long   0x74e89c74
-       .long   0x1f3e211f
-       .long   0x4b96dd4b
-       .long   0xbd61dcbd
-       .long   0x8b0d868b
-       .long   0x8a0f858a
-       .long   0x70e09070
-       .long   0x3e7c423e
-       .long   0xb571c4b5
-       .long   0x66ccaa66
-       .long   0x4890d848
-       .long   0x03060503
-       .long   0xf6f701f6
-       .long   0x0e1c120e
-       .long   0x61c2a361
-       .long   0x356a5f35
-       .long   0x57aef957
-       .long   0xb969d0b9
-       .long   0x86179186
-       .long   0xc19958c1
-       .long   0x1d3a271d
-       .long   0x9e27b99e
-       .long   0xe1d938e1
-       .long   0xf8eb13f8
-       .long   0x982bb398
-       .long   0x11223311
-       .long   0x69d2bb69
-       .long   0xd9a970d9
-       .long   0x8e07898e
-       .long   0x9433a794
-       .long   0x9b2db69b
-       .long   0x1e3c221e
-       .long   0x87159287
-       .long   0xe9c920e9
-       .long   0xce8749ce
-       .long   0x55aaff55
-       .long   0x28507828
-       .long   0xdfa57adf
-       .long   0x8c038f8c
-       .long   0xa159f8a1
-       .long   0x89098089
-       .long   0x0d1a170d
-       .long   0xbf65dabf
-       .long   0xe6d731e6
-       .long   0x4284c642
-       .long   0x68d0b868
-       .long   0x4182c341
-       .long   0x9929b099
-       .long   0x2d5a772d
-       .long   0x0f1e110f
-       .long   0xb07bcbb0
-       .long   0x54a8fc54
-       .long   0xbb6dd6bb
-       .long   0x162c3a16
-       // Table 3.
-       .long   0xc6a56363
-       .long   0xf8847c7c
-       .long   0xee997777
-       .long   0xf68d7b7b
-       .long   0xff0df2f2
-       .long   0xd6bd6b6b
-       .long   0xdeb16f6f
-       .long   0x9154c5c5
-       .long   0x60503030
-       .long   0x02030101
-       .long   0xcea96767
-       .long   0x567d2b2b
-       .long   0xe719fefe
-       .long   0xb562d7d7
-       .long   0x4de6abab
-       .long   0xec9a7676
-       .long   0x8f45caca
-       .long   0x1f9d8282
-       .long   0x8940c9c9
-       .long   0xfa877d7d
-       .long   0xef15fafa
-       .long   0xb2eb5959
-       .long   0x8ec94747
-       .long   0xfb0bf0f0
-       .long   0x41ecadad
-       .long   0xb367d4d4
-       .long   0x5ffda2a2
-       .long   0x45eaafaf
-       .long   0x23bf9c9c
-       .long   0x53f7a4a4
-       .long   0xe4967272
-       .long   0x9b5bc0c0
-       .long   0x75c2b7b7
-       .long   0xe11cfdfd
-       .long   0x3dae9393
-       .long   0x4c6a2626
-       .long   0x6c5a3636
-       .long   0x7e413f3f
-       .long   0xf502f7f7
-       .long   0x834fcccc
-       .long   0x685c3434
-       .long   0x51f4a5a5
-       .long   0xd134e5e5
-       .long   0xf908f1f1
-       .long   0xe2937171
-       .long   0xab73d8d8
-       .long   0x62533131
-       .long   0x2a3f1515
-       .long   0x080c0404
-       .long   0x9552c7c7
-       .long   0x46652323
-       .long   0x9d5ec3c3
-       .long   0x30281818
-       .long   0x37a19696
-       .long   0x0a0f0505
-       .long   0x2fb59a9a
-       .long   0x0e090707
-       .long   0x24361212
-       .long   0x1b9b8080
-       .long   0xdf3de2e2
-       .long   0xcd26ebeb
-       .long   0x4e692727
-       .long   0x7fcdb2b2
-       .long   0xea9f7575
-       .long   0x121b0909
-       .long   0x1d9e8383
-       .long   0x58742c2c
-       .long   0x342e1a1a
-       .long   0x362d1b1b
-       .long   0xdcb26e6e
-       .long   0xb4ee5a5a
-       .long   0x5bfba0a0
-       .long   0xa4f65252
-       .long   0x764d3b3b
-       .long   0xb761d6d6
-       .long   0x7dceb3b3
-       .long   0x527b2929
-       .long   0xdd3ee3e3
-       .long   0x5e712f2f
-       .long   0x13978484
-       .long   0xa6f55353
-       .long   0xb968d1d1
-       .long   0x00000000
-       .long   0xc12ceded
-       .long   0x40602020
-       .long   0xe31ffcfc
-       .long   0x79c8b1b1
-       .long   0xb6ed5b5b
-       .long   0xd4be6a6a
-       .long   0x8d46cbcb
-       .long   0x67d9bebe
-       .long   0x724b3939
-       .long   0x94de4a4a
-       .long   0x98d44c4c
-       .long   0xb0e85858
-       .long   0x854acfcf
-       .long   0xbb6bd0d0
-       .long   0xc52aefef
-       .long   0x4fe5aaaa
-       .long   0xed16fbfb
-       .long   0x86c54343
-       .long   0x9ad74d4d
-       .long   0x66553333
-       .long   0x11948585
-       .long   0x8acf4545
-       .long   0xe910f9f9
-       .long   0x04060202
-       .long   0xfe817f7f
-       .long   0xa0f05050
-       .long   0x78443c3c
-       .long   0x25ba9f9f
-       .long   0x4be3a8a8
-       .long   0xa2f35151
-       .long   0x5dfea3a3
-       .long   0x80c04040
-       .long   0x058a8f8f
-       .long   0x3fad9292
-       .long   0x21bc9d9d
-       .long   0x70483838
-       .long   0xf104f5f5
-       .long   0x63dfbcbc
-       .long   0x77c1b6b6
-       .long   0xaf75dada
-       .long   0x42632121
-       .long   0x20301010
-       .long   0xe51affff
-       .long   0xfd0ef3f3
-       .long   0xbf6dd2d2
-       .long   0x814ccdcd
-       .long   0x18140c0c
-       .long   0x26351313
-       .long   0xc32fecec
-       .long   0xbee15f5f
-       .long   0x35a29797
-       .long   0x88cc4444
-       .long   0x2e391717
-       .long   0x9357c4c4
-       .long   0x55f2a7a7
-       .long   0xfc827e7e
-       .long   0x7a473d3d
-       .long   0xc8ac6464
-       .long   0xbae75d5d
-       .long   0x322b1919
-       .long   0xe6957373
-       .long   0xc0a06060
-       .long   0x19988181
-       .long   0x9ed14f4f
-       .long   0xa37fdcdc
-       .long   0x44662222
-       .long   0x547e2a2a
-       .long   0x3bab9090
-       .long   0x0b838888
-       .long   0x8cca4646
-       .long   0xc729eeee
-       .long   0x6bd3b8b8
-       .long   0x283c1414
-       .long   0xa779dede
-       .long   0xbce25e5e
-       .long   0x161d0b0b
-       .long   0xad76dbdb
-       .long   0xdb3be0e0
-       .long   0x64563232
-       .long   0x744e3a3a
-       .long   0x141e0a0a
-       .long   0x92db4949
-       .long   0x0c0a0606
-       .long   0x486c2424
-       .long   0xb8e45c5c
-       .long   0x9f5dc2c2
-       .long   0xbd6ed3d3
-       .long   0x43efacac
-       .long   0xc4a66262
-       .long   0x39a89191
-       .long   0x31a49595
-       .long   0xd337e4e4
-       .long   0xf28b7979
-       .long   0xd532e7e7
-       .long   0x8b43c8c8
-       .long   0x6e593737
-       .long   0xdab76d6d
-       .long   0x018c8d8d
-       .long   0xb164d5d5
-       .long   0x9cd24e4e
-       .long   0x49e0a9a9
-       .long   0xd8b46c6c
-       .long   0xacfa5656
-       .long   0xf307f4f4
-       .long   0xcf25eaea
-       .long   0xcaaf6565
-       .long   0xf48e7a7a
-       .long   0x47e9aeae
-       .long   0x10180808
-       .long   0x6fd5baba
-       .long   0xf0887878
-       .long   0x4a6f2525
-       .long   0x5c722e2e
-       .long   0x38241c1c
-       .long   0x57f1a6a6
-       .long   0x73c7b4b4
-       .long   0x9751c6c6
-       .long   0xcb23e8e8
-       .long   0xa17cdddd
-       .long   0xe89c7474
-       .long   0x3e211f1f
-       .long   0x96dd4b4b
-       .long   0x61dcbdbd
-       .long   0x0d868b8b
-       .long   0x0f858a8a
-       .long   0xe0907070
-       .long   0x7c423e3e
-       .long   0x71c4b5b5
-       .long   0xccaa6666
-       .long   0x90d84848
-       .long   0x06050303
-       .long   0xf701f6f6
-       .long   0x1c120e0e
-       .long   0xc2a36161
-       .long   0x6a5f3535
-       .long   0xaef95757
-       .long   0x69d0b9b9
-       .long   0x17918686
-       .long   0x9958c1c1
-       .long   0x3a271d1d
-       .long   0x27b99e9e
-       .long   0xd938e1e1
-       .long   0xeb13f8f8
-       .long   0x2bb39898
-       .long   0x22331111
-       .long   0xd2bb6969
-       .long   0xa970d9d9
-       .long   0x07898e8e
-       .long   0x33a79494
-       .long   0x2db69b9b
-       .long   0x3c221e1e
-       .long   0x15928787
-       .long   0xc920e9e9
-       .long   0x8749cece
-       .long   0xaaff5555
-       .long   0x50782828
-       .long   0xa57adfdf
-       .long   0x038f8c8c
-       .long   0x59f8a1a1
-       .long   0x09808989
-       .long   0x1a170d0d
-       .long   0x65dabfbf
-       .long   0xd731e6e6
-       .long   0x84c64242
-       .long   0xd0b86868
-       .long   0x82c34141
-       .long   0x29b09999
-       .long   0x5a772d2d
-       .long   0x1e110f0f
-       .long   0x7bcbb0b0
-       .long   0xa8fc5454
-       .long   0x6dd6bbbb
-       .long   0x2c3a1616
-
-
-// Tables for main decryption iterations.
-       .globl  _AESDecryptTable
-       .private_extern _AESDecryptTable
-       .align  2
-_AESDecryptTable:
-       // Table 0.
-       .long   0x50a7f451
-       .long   0x5365417e
-       .long   0xc3a4171a
-       .long   0x965e273a
-       .long   0xcb6bab3b
-       .long   0xf1459d1f
-       .long   0xab58faac
-       .long   0x9303e34b
-       .long   0x55fa3020
-       .long   0xf66d76ad
-       .long   0x9176cc88
-       .long   0x254c02f5
-       .long   0xfcd7e54f
-       .long   0xd7cb2ac5
-       .long   0x80443526
-       .long   0x8fa362b5
-       .long   0x495ab1de
-       .long   0x671bba25
-       .long   0x980eea45
-       .long   0xe1c0fe5d
-       .long   0x02752fc3
-       .long   0x12f04c81
-       .long   0xa397468d
-       .long   0xc6f9d36b
-       .long   0xe75f8f03
-       .long   0x959c9215
-       .long   0xeb7a6dbf
-       .long   0xda595295
-       .long   0x2d83bed4
-       .long   0xd3217458
-       .long   0x2969e049
-       .long   0x44c8c98e
-       .long   0x6a89c275
-       .long   0x78798ef4
-       .long   0x6b3e5899
-       .long   0xdd71b927
-       .long   0xb64fe1be
-       .long   0x17ad88f0
-       .long   0x66ac20c9
-       .long   0xb43ace7d
-       .long   0x184adf63
-       .long   0x82311ae5
-       .long   0x60335197
-       .long   0x457f5362
-       .long   0xe07764b1
-       .long   0x84ae6bbb
-       .long   0x1ca081fe
-       .long   0x942b08f9
-       .long   0x58684870
-       .long   0x19fd458f
-       .long   0x876cde94
-       .long   0xb7f87b52
-       .long   0x23d373ab
-       .long   0xe2024b72
-       .long   0x578f1fe3
-       .long   0x2aab5566
-       .long   0x0728ebb2
-       .long   0x03c2b52f
-       .long   0x9a7bc586
-       .long   0xa50837d3
-       .long   0xf2872830
-       .long   0xb2a5bf23
-       .long   0xba6a0302
-       .long   0x5c8216ed
-       .long   0x2b1ccf8a
-       .long   0x92b479a7
-       .long   0xf0f207f3
-       .long   0xa1e2694e
-       .long   0xcdf4da65
-       .long   0xd5be0506
-       .long   0x1f6234d1
-       .long   0x8afea6c4
-       .long   0x9d532e34
-       .long   0xa055f3a2
-       .long   0x32e18a05
-       .long   0x75ebf6a4
-       .long   0x39ec830b
-       .long   0xaaef6040
-       .long   0x069f715e
-       .long   0x51106ebd
-       .long   0xf98a213e
-       .long   0x3d06dd96
-       .long   0xae053edd
-       .long   0x46bde64d
-       .long   0xb58d5491
-       .long   0x055dc471
-       .long   0x6fd40604
-       .long   0xff155060
-       .long   0x24fb9819
-       .long   0x97e9bdd6
-       .long   0xcc434089
-       .long   0x779ed967
-       .long   0xbd42e8b0
-       .long   0x888b8907
-       .long   0x385b19e7
-       .long   0xdbeec879
-       .long   0x470a7ca1
-       .long   0xe90f427c
-       .long   0xc91e84f8
-       .long   0x00000000
-       .long   0x83868009
-       .long   0x48ed2b32
-       .long   0xac70111e
-       .long   0x4e725a6c
-       .long   0xfbff0efd
-       .long   0x5638850f
-       .long   0x1ed5ae3d
-       .long   0x27392d36
-       .long   0x64d90f0a
-       .long   0x21a65c68
-       .long   0xd1545b9b
-       .long   0x3a2e3624
-       .long   0xb1670a0c
-       .long   0x0fe75793
-       .long   0xd296eeb4
-       .long   0x9e919b1b
-       .long   0x4fc5c080
-       .long   0xa220dc61
-       .long   0x694b775a
-       .long   0x161a121c
-       .long   0x0aba93e2
-       .long   0xe52aa0c0
-       .long   0x43e0223c
-       .long   0x1d171b12
-       .long   0x0b0d090e
-       .long   0xadc78bf2
-       .long   0xb9a8b62d
-       .long   0xc8a91e14
-       .long   0x8519f157
-       .long   0x4c0775af
-       .long   0xbbdd99ee
-       .long   0xfd607fa3
-       .long   0x9f2601f7
-       .long   0xbcf5725c
-       .long   0xc53b6644
-       .long   0x347efb5b
-       .long   0x7629438b
-       .long   0xdcc623cb
-       .long   0x68fcedb6
-       .long   0x63f1e4b8
-       .long   0xcadc31d7
-       .long   0x10856342
-       .long   0x40229713
-       .long   0x2011c684
-       .long   0x7d244a85
-       .long   0xf83dbbd2
-       .long   0x1132f9ae
-       .long   0x6da129c7
-       .long   0x4b2f9e1d
-       .long   0xf330b2dc
-       .long   0xec52860d
-       .long   0xd0e3c177
-       .long   0x6c16b32b
-       .long   0x99b970a9
-       .long   0xfa489411
-       .long   0x2264e947
-       .long   0xc48cfca8
-       .long   0x1a3ff0a0
-       .long   0xd82c7d56
-       .long   0xef903322
-       .long   0xc74e4987
-       .long   0xc1d138d9
-       .long   0xfea2ca8c
-       .long   0x360bd498
-       .long   0xcf81f5a6
-       .long   0x28de7aa5
-       .long   0x268eb7da
-       .long   0xa4bfad3f
-       .long   0xe49d3a2c
-       .long   0x0d927850
-       .long   0x9bcc5f6a
-       .long   0x62467e54
-       .long   0xc2138df6
-       .long   0xe8b8d890
-       .long   0x5ef7392e
-       .long   0xf5afc382
-       .long   0xbe805d9f
-       .long   0x7c93d069
-       .long   0xa92dd56f
-       .long   0xb31225cf
-       .long   0x3b99acc8
-       .long   0xa77d1810
-       .long   0x6e639ce8
-       .long   0x7bbb3bdb
-       .long   0x097826cd
-       .long   0xf418596e
-       .long   0x01b79aec
-       .long   0xa89a4f83
-       .long   0x656e95e6
-       .long   0x7ee6ffaa
-       .long   0x08cfbc21
-       .long   0xe6e815ef
-       .long   0xd99be7ba
-       .long   0xce366f4a
-       .long   0xd4099fea
-       .long   0xd67cb029
-       .long   0xafb2a431
-       .long   0x31233f2a
-       .long   0x3094a5c6
-       .long   0xc066a235
-       .long   0x37bc4e74
-       .long   0xa6ca82fc
-       .long   0xb0d090e0
-       .long   0x15d8a733
-       .long   0x4a9804f1
-       .long   0xf7daec41
-       .long   0x0e50cd7f
-       .long   0x2ff69117
-       .long   0x8dd64d76
-       .long   0x4db0ef43
-       .long   0x544daacc
-       .long   0xdf0496e4
-       .long   0xe3b5d19e
-       .long   0x1b886a4c
-       .long   0xb81f2cc1
-       .long   0x7f516546
-       .long   0x04ea5e9d
-       .long   0x5d358c01
-       .long   0x737487fa
-       .long   0x2e410bfb
-       .long   0x5a1d67b3
-       .long   0x52d2db92
-       .long   0x335610e9
-       .long   0x1347d66d
-       .long   0x8c61d79a
-       .long   0x7a0ca137
-       .long   0x8e14f859
-       .long   0x893c13eb
-       .long   0xee27a9ce
-       .long   0x35c961b7
-       .long   0xede51ce1
-       .long   0x3cb1477a
-       .long   0x59dfd29c
-       .long   0x3f73f255
-       .long   0x79ce1418
-       .long   0xbf37c773
-       .long   0xeacdf753
-       .long   0x5baafd5f
-       .long   0x146f3ddf
-       .long   0x86db4478
-       .long   0x81f3afca
-       .long   0x3ec468b9
-       .long   0x2c342438
-       .long   0x5f40a3c2
-       .long   0x72c31d16
-       .long   0x0c25e2bc
-       .long   0x8b493c28
-       .long   0x41950dff
-       .long   0x7101a839
-       .long   0xdeb30c08
-       .long   0x9ce4b4d8
-       .long   0x90c15664
-       .long   0x6184cb7b
-       .long   0x70b632d5
-       .long   0x745c6c48
-       .long   0x4257b8d0
-       // Table 1.
-       .long   0xa7f45150
-       .long   0x65417e53
-       .long   0xa4171ac3
-       .long   0x5e273a96
-       .long   0x6bab3bcb
-       .long   0x459d1ff1
-       .long   0x58faacab
-       .long   0x03e34b93
-       .long   0xfa302055
-       .long   0x6d76adf6
-       .long   0x76cc8891
-       .long   0x4c02f525
-       .long   0xd7e54ffc
-       .long   0xcb2ac5d7
-       .long   0x44352680
-       .long   0xa362b58f
-       .long   0x5ab1de49
-       .long   0x1bba2567
-       .long   0x0eea4598
-       .long   0xc0fe5de1
-       .long   0x752fc302
-       .long   0xf04c8112
-       .long   0x97468da3
-       .long   0xf9d36bc6
-       .long   0x5f8f03e7
-       .long   0x9c921595
-       .long   0x7a6dbfeb
-       .long   0x595295da
-       .long   0x83bed42d
-       .long   0x217458d3
-       .long   0x69e04929
-       .long   0xc8c98e44
-       .long   0x89c2756a
-       .long   0x798ef478
-       .long   0x3e58996b
-       .long   0x71b927dd
-       .long   0x4fe1beb6
-       .long   0xad88f017
-       .long   0xac20c966
-       .long   0x3ace7db4
-       .long   0x4adf6318
-       .long   0x311ae582
-       .long   0x33519760
-       .long   0x7f536245
-       .long   0x7764b1e0
-       .long   0xae6bbb84
-       .long   0xa081fe1c
-       .long   0x2b08f994
-       .long   0x68487058
-       .long   0xfd458f19
-       .long   0x6cde9487
-       .long   0xf87b52b7
-       .long   0xd373ab23
-       .long   0x024b72e2
-       .long   0x8f1fe357
-       .long   0xab55662a
-       .long   0x28ebb207
-       .long   0xc2b52f03
-       .long   0x7bc5869a
-       .long   0x0837d3a5
-       .long   0x872830f2
-       .long   0xa5bf23b2
-       .long   0x6a0302ba
-       .long   0x8216ed5c
-       .long   0x1ccf8a2b
-       .long   0xb479a792
-       .long   0xf207f3f0
-       .long   0xe2694ea1
-       .long   0xf4da65cd
-       .long   0xbe0506d5
-       .long   0x6234d11f
-       .long   0xfea6c48a
-       .long   0x532e349d
-       .long   0x55f3a2a0
-       .long   0xe18a0532
-       .long   0xebf6a475
-       .long   0xec830b39
-       .long   0xef6040aa
-       .long   0x9f715e06
-       .long   0x106ebd51
-       .long   0x8a213ef9
-       .long   0x06dd963d
-       .long   0x053eddae
-       .long   0xbde64d46
-       .long   0x8d5491b5
-       .long   0x5dc47105
-       .long   0xd406046f
-       .long   0x155060ff
-       .long   0xfb981924
-       .long   0xe9bdd697
-       .long   0x434089cc
-       .long   0x9ed96777
-       .long   0x42e8b0bd
-       .long   0x8b890788
-       .long   0x5b19e738
-       .long   0xeec879db
-       .long   0x0a7ca147
-       .long   0x0f427ce9
-       .long   0x1e84f8c9
-       .long   0x00000000
-       .long   0x86800983
-       .long   0xed2b3248
-       .long   0x70111eac
-       .long   0x725a6c4e
-       .long   0xff0efdfb
-       .long   0x38850f56
-       .long   0xd5ae3d1e
-       .long   0x392d3627
-       .long   0xd90f0a64
-       .long   0xa65c6821
-       .long   0x545b9bd1
-       .long   0x2e36243a
-       .long   0x670a0cb1
-       .long   0xe757930f
-       .long   0x96eeb4d2
-       .long   0x919b1b9e
-       .long   0xc5c0804f
-       .long   0x20dc61a2
-       .long   0x4b775a69
-       .long   0x1a121c16
-       .long   0xba93e20a
-       .long   0x2aa0c0e5
-       .long   0xe0223c43
-       .long   0x171b121d
-       .long   0x0d090e0b
-       .long   0xc78bf2ad
-       .long   0xa8b62db9
-       .long   0xa91e14c8
-       .long   0x19f15785
-       .long   0x0775af4c
-       .long   0xdd99eebb
-       .long   0x607fa3fd
-       .long   0x2601f79f
-       .long   0xf5725cbc
-       .long   0x3b6644c5
-       .long   0x7efb5b34
-       .long   0x29438b76
-       .long   0xc623cbdc
-       .long   0xfcedb668
-       .long   0xf1e4b863
-       .long   0xdc31d7ca
-       .long   0x85634210
-       .long   0x22971340
-       .long   0x11c68420
-       .long   0x244a857d
-       .long   0x3dbbd2f8
-       .long   0x32f9ae11
-       .long   0xa129c76d
-       .long   0x2f9e1d4b
-       .long   0x30b2dcf3
-       .long   0x52860dec
-       .long   0xe3c177d0
-       .long   0x16b32b6c
-       .long   0xb970a999
-       .long   0x489411fa
-       .long   0x64e94722
-       .long   0x8cfca8c4
-       .long   0x3ff0a01a
-       .long   0x2c7d56d8
-       .long   0x903322ef
-       .long   0x4e4987c7
-       .long   0xd138d9c1
-       .long   0xa2ca8cfe
-       .long   0x0bd49836
-       .long   0x81f5a6cf
-       .long   0xde7aa528
-       .long   0x8eb7da26
-       .long   0xbfad3fa4
-       .long   0x9d3a2ce4
-       .long   0x9278500d
-       .long   0xcc5f6a9b
-       .long   0x467e5462
-       .long   0x138df6c2
-       .long   0xb8d890e8
-       .long   0xf7392e5e
-       .long   0xafc382f5
-       .long   0x805d9fbe
-       .long   0x93d0697c
-       .long   0x2dd56fa9
-       .long   0x1225cfb3
-       .long   0x99acc83b
-       .long   0x7d1810a7
-       .long   0x639ce86e
-       .long   0xbb3bdb7b
-       .long   0x7826cd09
-       .long   0x18596ef4
-       .long   0xb79aec01
-       .long   0x9a4f83a8
-       .long   0x6e95e665
-       .long   0xe6ffaa7e
-       .long   0xcfbc2108
-       .long   0xe815efe6
-       .long   0x9be7bad9
-       .long   0x366f4ace
-       .long   0x099fead4
-       .long   0x7cb029d6
-       .long   0xb2a431af
-       .long   0x233f2a31
-       .long   0x94a5c630
-       .long   0x66a235c0
-       .long   0xbc4e7437
-       .long   0xca82fca6
-       .long   0xd090e0b0
-       .long   0xd8a73315
-       .long   0x9804f14a
-       .long   0xdaec41f7
-       .long   0x50cd7f0e
-       .long   0xf691172f
-       .long   0xd64d768d
-       .long   0xb0ef434d
-       .long   0x4daacc54
-       .long   0x0496e4df
-       .long   0xb5d19ee3
-       .long   0x886a4c1b
-       .long   0x1f2cc1b8
-       .long   0x5165467f
-       .long   0xea5e9d04
-       .long   0x358c015d
-       .long   0x7487fa73
-       .long   0x410bfb2e
-       .long   0x1d67b35a
-       .long   0xd2db9252
-       .long   0x5610e933
-       .long   0x47d66d13
-       .long   0x61d79a8c
-       .long   0x0ca1377a
-       .long   0x14f8598e
-       .long   0x3c13eb89
-       .long   0x27a9ceee
-       .long   0xc961b735
-       .long   0xe51ce1ed
-       .long   0xb1477a3c
-       .long   0xdfd29c59
-       .long   0x73f2553f
-       .long   0xce141879
-       .long   0x37c773bf
-       .long   0xcdf753ea
-       .long   0xaafd5f5b
-       .long   0x6f3ddf14
-       .long   0xdb447886
-       .long   0xf3afca81
-       .long   0xc468b93e
-       .long   0x3424382c
-       .long   0x40a3c25f
-       .long   0xc31d1672
-       .long   0x25e2bc0c
-       .long   0x493c288b
-       .long   0x950dff41
-       .long   0x01a83971
-       .long   0xb30c08de
-       .long   0xe4b4d89c
-       .long   0xc1566490
-       .long   0x84cb7b61
-       .long   0xb632d570
-       .long   0x5c6c4874
-       .long   0x57b8d042
-       // Table 2.
-       .long   0xf45150a7
-       .long   0x417e5365
-       .long   0x171ac3a4
-       .long   0x273a965e
-       .long   0xab3bcb6b
-       .long   0x9d1ff145
-       .long   0xfaacab58
-       .long   0xe34b9303
-       .long   0x302055fa
-       .long   0x76adf66d
-       .long   0xcc889176
-       .long   0x02f5254c
-       .long   0xe54ffcd7
-       .long   0x2ac5d7cb
-       .long   0x35268044
-       .long   0x62b58fa3
-       .long   0xb1de495a
-       .long   0xba25671b
-       .long   0xea45980e
-       .long   0xfe5de1c0
-       .long   0x2fc30275
-       .long   0x4c8112f0
-       .long   0x468da397
-       .long   0xd36bc6f9
-       .long   0x8f03e75f
-       .long   0x9215959c
-       .long   0x6dbfeb7a
-       .long   0x5295da59
-       .long   0xbed42d83
-       .long   0x7458d321
-       .long   0xe0492969
-       .long   0xc98e44c8
-       .long   0xc2756a89
-       .long   0x8ef47879
-       .long   0x58996b3e
-       .long   0xb927dd71
-       .long   0xe1beb64f
-       .long   0x88f017ad
-       .long   0x20c966ac
-       .long   0xce7db43a
-       .long   0xdf63184a
-       .long   0x1ae58231
-       .long   0x51976033
-       .long   0x5362457f
-       .long   0x64b1e077
-       .long   0x6bbb84ae
-       .long   0x81fe1ca0
-       .long   0x08f9942b
-       .long   0x48705868
-       .long   0x458f19fd
-       .long   0xde94876c
-       .long   0x7b52b7f8
-       .long   0x73ab23d3
-       .long   0x4b72e202
-       .long   0x1fe3578f
-       .long   0x55662aab
-       .long   0xebb20728
-       .long   0xb52f03c2
-       .long   0xc5869a7b
-       .long   0x37d3a508
-       .long   0x2830f287
-       .long   0xbf23b2a5
-       .long   0x0302ba6a
-       .long   0x16ed5c82
-       .long   0xcf8a2b1c
-       .long   0x79a792b4
-       .long   0x07f3f0f2
-       .long   0x694ea1e2
-       .long   0xda65cdf4
-       .long   0x0506d5be
-       .long   0x34d11f62
-       .long   0xa6c48afe
-       .long   0x2e349d53
-       .long   0xf3a2a055
-       .long   0x8a0532e1
-       .long   0xf6a475eb
-       .long   0x830b39ec
-       .long   0x6040aaef
-       .long   0x715e069f
-       .long   0x6ebd5110
-       .long   0x213ef98a
-       .long   0xdd963d06
-       .long   0x3eddae05
-       .long   0xe64d46bd
-       .long   0x5491b58d
-       .long   0xc471055d
-       .long   0x06046fd4
-       .long   0x5060ff15
-       .long   0x981924fb
-       .long   0xbdd697e9
-       .long   0x4089cc43
-       .long   0xd967779e
-       .long   0xe8b0bd42
-       .long   0x8907888b
-       .long   0x19e7385b
-       .long   0xc879dbee
-       .long   0x7ca1470a
-       .long   0x427ce90f
-       .long   0x84f8c91e
-       .long   0x00000000
-       .long   0x80098386
-       .long   0x2b3248ed
-       .long   0x111eac70
-       .long   0x5a6c4e72
-       .long   0x0efdfbff
-       .long   0x850f5638
-       .long   0xae3d1ed5
-       .long   0x2d362739
-       .long   0x0f0a64d9
-       .long   0x5c6821a6
-       .long   0x5b9bd154
-       .long   0x36243a2e
-       .long   0x0a0cb167
-       .long   0x57930fe7
-       .long   0xeeb4d296
-       .long   0x9b1b9e91
-       .long   0xc0804fc5
-       .long   0xdc61a220
-       .long   0x775a694b
-       .long   0x121c161a
-       .long   0x93e20aba
-       .long   0xa0c0e52a
-       .long   0x223c43e0
-       .long   0x1b121d17
-       .long   0x090e0b0d
-       .long   0x8bf2adc7
-       .long   0xb62db9a8
-       .long   0x1e14c8a9
-       .long   0xf1578519
-       .long   0x75af4c07
-       .long   0x99eebbdd
-       .long   0x7fa3fd60
-       .long   0x01f79f26
-       .long   0x725cbcf5
-       .long   0x6644c53b
-       .long   0xfb5b347e
-       .long   0x438b7629
-       .long   0x23cbdcc6
-       .long   0xedb668fc
-       .long   0xe4b863f1
-       .long   0x31d7cadc
-       .long   0x63421085
-       .long   0x97134022
-       .long   0xc6842011
-       .long   0x4a857d24
-       .long   0xbbd2f83d
-       .long   0xf9ae1132
-       .long   0x29c76da1
-       .long   0x9e1d4b2f
-       .long   0xb2dcf330
-       .long   0x860dec52
-       .long   0xc177d0e3
-       .long   0xb32b6c16
-       .long   0x70a999b9
-       .long   0x9411fa48
-       .long   0xe9472264
-       .long   0xfca8c48c
-       .long   0xf0a01a3f
-       .long   0x7d56d82c
-       .long   0x3322ef90
-       .long   0x4987c74e
-       .long   0x38d9c1d1
-       .long   0xca8cfea2
-       .long   0xd498360b
-       .long   0xf5a6cf81
-       .long   0x7aa528de
-       .long   0xb7da268e
-       .long   0xad3fa4bf
-       .long   0x3a2ce49d
-       .long   0x78500d92
-       .long   0x5f6a9bcc
-       .long   0x7e546246
-       .long   0x8df6c213
-       .long   0xd890e8b8
-       .long   0x392e5ef7
-       .long   0xc382f5af
-       .long   0x5d9fbe80
-       .long   0xd0697c93
-       .long   0xd56fa92d
-       .long   0x25cfb312
-       .long   0xacc83b99
-       .long   0x1810a77d
-       .long   0x9ce86e63
-       .long   0x3bdb7bbb
-       .long   0x26cd0978
-       .long   0x596ef418
-       .long   0x9aec01b7
-       .long   0x4f83a89a
-       .long   0x95e6656e
-       .long   0xffaa7ee6
-       .long   0xbc2108cf
-       .long   0x15efe6e8
-       .long   0xe7bad99b
-       .long   0x6f4ace36
-       .long   0x9fead409
-       .long   0xb029d67c
-       .long   0xa431afb2
-       .long   0x3f2a3123
-       .long   0xa5c63094
-       .long   0xa235c066
-       .long   0x4e7437bc
-       .long   0x82fca6ca
-       .long   0x90e0b0d0
-       .long   0xa73315d8
-       .long   0x04f14a98
-       .long   0xec41f7da
-       .long   0xcd7f0e50
-       .long   0x91172ff6
-       .long   0x4d768dd6
-       .long   0xef434db0
-       .long   0xaacc544d
-       .long   0x96e4df04
-       .long   0xd19ee3b5
-       .long   0x6a4c1b88
-       .long   0x2cc1b81f
-       .long   0x65467f51
-       .long   0x5e9d04ea
-       .long   0x8c015d35
-       .long   0x87fa7374
-       .long   0x0bfb2e41
-       .long   0x67b35a1d
-       .long   0xdb9252d2
-       .long   0x10e93356
-       .long   0xd66d1347
-       .long   0xd79a8c61
-       .long   0xa1377a0c
-       .long   0xf8598e14
-       .long   0x13eb893c
-       .long   0xa9ceee27
-       .long   0x61b735c9
-       .long   0x1ce1ede5
-       .long   0x477a3cb1
-       .long   0xd29c59df
-       .long   0xf2553f73
-       .long   0x141879ce
-       .long   0xc773bf37
-       .long   0xf753eacd
-       .long   0xfd5f5baa
-       .long   0x3ddf146f
-       .long   0x447886db
-       .long   0xafca81f3
-       .long   0x68b93ec4
-       .long   0x24382c34
-       .long   0xa3c25f40
-       .long   0x1d1672c3
-       .long   0xe2bc0c25
-       .long   0x3c288b49
-       .long   0x0dff4195
-       .long   0xa8397101
-       .long   0x0c08deb3
-       .long   0xb4d89ce4
-       .long   0x566490c1
-       .long   0xcb7b6184
-       .long   0x32d570b6
-       .long   0x6c48745c
-       .long   0xb8d04257
-       // Table 3.
-       .long   0x5150a7f4
-       .long   0x7e536541
-       .long   0x1ac3a417
-       .long   0x3a965e27
-       .long   0x3bcb6bab
-       .long   0x1ff1459d
-       .long   0xacab58fa
-       .long   0x4b9303e3
-       .long   0x2055fa30
-       .long   0xadf66d76
-       .long   0x889176cc
-       .long   0xf5254c02
-       .long   0x4ffcd7e5
-       .long   0xc5d7cb2a
-       .long   0x26804435
-       .long   0xb58fa362
-       .long   0xde495ab1
-       .long   0x25671bba
-       .long   0x45980eea
-       .long   0x5de1c0fe
-       .long   0xc302752f
-       .long   0x8112f04c
-       .long   0x8da39746
-       .long   0x6bc6f9d3
-       .long   0x03e75f8f
-       .long   0x15959c92
-       .long   0xbfeb7a6d
-       .long   0x95da5952
-       .long   0xd42d83be
-       .long   0x58d32174
-       .long   0x492969e0
-       .long   0x8e44c8c9
-       .long   0x756a89c2
-       .long   0xf478798e
-       .long   0x996b3e58
-       .long   0x27dd71b9
-       .long   0xbeb64fe1
-       .long   0xf017ad88
-       .long   0xc966ac20
-       .long   0x7db43ace
-       .long   0x63184adf
-       .long   0xe582311a
-       .long   0x97603351
-       .long   0x62457f53
-       .long   0xb1e07764
-       .long   0xbb84ae6b
-       .long   0xfe1ca081
-       .long   0xf9942b08
-       .long   0x70586848
-       .long   0x8f19fd45
-       .long   0x94876cde
-       .long   0x52b7f87b
-       .long   0xab23d373
-       .long   0x72e2024b
-       .long   0xe3578f1f
-       .long   0x662aab55
-       .long   0xb20728eb
-       .long   0x2f03c2b5
-       .long   0x869a7bc5
-       .long   0xd3a50837
-       .long   0x30f28728
-       .long   0x23b2a5bf
-       .long   0x02ba6a03
-       .long   0xed5c8216
-       .long   0x8a2b1ccf
-       .long   0xa792b479
-       .long   0xf3f0f207
-       .long   0x4ea1e269
-       .long   0x65cdf4da
-       .long   0x06d5be05
-       .long   0xd11f6234
-       .long   0xc48afea6
-       .long   0x349d532e
-       .long   0xa2a055f3
-       .long   0x0532e18a
-       .long   0xa475ebf6
-       .long   0x0b39ec83
-       .long   0x40aaef60
-       .long   0x5e069f71
-       .long   0xbd51106e
-       .long   0x3ef98a21
-       .long   0x963d06dd
-       .long   0xddae053e
-       .long   0x4d46bde6
-       .long   0x91b58d54
-       .long   0x71055dc4
-       .long   0x046fd406
-       .long   0x60ff1550
-       .long   0x1924fb98
-       .long   0xd697e9bd
-       .long   0x89cc4340
-       .long   0x67779ed9
-       .long   0xb0bd42e8
-       .long   0x07888b89
-       .long   0xe7385b19
-       .long   0x79dbeec8
-       .long   0xa1470a7c
-       .long   0x7ce90f42
-       .long   0xf8c91e84
-       .long   0x00000000
-       .long   0x09838680
-       .long   0x3248ed2b
-       .long   0x1eac7011
-       .long   0x6c4e725a
-       .long   0xfdfbff0e
-       .long   0x0f563885
-       .long   0x3d1ed5ae
-       .long   0x3627392d
-       .long   0x0a64d90f
-       .long   0x6821a65c
-       .long   0x9bd1545b
-       .long   0x243a2e36
-       .long   0x0cb1670a
-       .long   0x930fe757
-       .long   0xb4d296ee
-       .long   0x1b9e919b
-       .long   0x804fc5c0
-       .long   0x61a220dc
-       .long   0x5a694b77
-       .long   0x1c161a12
-       .long   0xe20aba93
-       .long   0xc0e52aa0
-       .long   0x3c43e022
-       .long   0x121d171b
-       .long   0x0e0b0d09
-       .long   0xf2adc78b
-       .long   0x2db9a8b6
-       .long   0x14c8a91e
-       .long   0x578519f1
-       .long   0xaf4c0775
-       .long   0xeebbdd99
-       .long   0xa3fd607f
-       .long   0xf79f2601
-       .long   0x5cbcf572
-       .long   0x44c53b66
-       .long   0x5b347efb
-       .long   0x8b762943
-       .long   0xcbdcc623
-       .long   0xb668fced
-       .long   0xb863f1e4
-       .long   0xd7cadc31
-       .long   0x42108563
-       .long   0x13402297
-       .long   0x842011c6
-       .long   0x857d244a
-       .long   0xd2f83dbb
-       .long   0xae1132f9
-       .long   0xc76da129
-       .long   0x1d4b2f9e
-       .long   0xdcf330b2
-       .long   0x0dec5286
-       .long   0x77d0e3c1
-       .long   0x2b6c16b3
-       .long   0xa999b970
-       .long   0x11fa4894
-       .long   0x472264e9
-       .long   0xa8c48cfc
-       .long   0xa01a3ff0
-       .long   0x56d82c7d
-       .long   0x22ef9033
-       .long   0x87c74e49
-       .long   0xd9c1d138
-       .long   0x8cfea2ca
-       .long   0x98360bd4
-       .long   0xa6cf81f5
-       .long   0xa528de7a
-       .long   0xda268eb7
-       .long   0x3fa4bfad
-       .long   0x2ce49d3a
-       .long   0x500d9278
-       .long   0x6a9bcc5f
-       .long   0x5462467e
-       .long   0xf6c2138d
-       .long   0x90e8b8d8
-       .long   0x2e5ef739
-       .long   0x82f5afc3
-       .long   0x9fbe805d
-       .long   0x697c93d0
-       .long   0x6fa92dd5
-       .long   0xcfb31225
-       .long   0xc83b99ac
-       .long   0x10a77d18
-       .long   0xe86e639c
-       .long   0xdb7bbb3b
-       .long   0xcd097826
-       .long   0x6ef41859
-       .long   0xec01b79a
-       .long   0x83a89a4f
-       .long   0xe6656e95
-       .long   0xaa7ee6ff
-       .long   0x2108cfbc
-       .long   0xefe6e815
-       .long   0xbad99be7
-       .long   0x4ace366f
-       .long   0xead4099f
-       .long   0x29d67cb0
-       .long   0x31afb2a4
-       .long   0x2a31233f
-       .long   0xc63094a5
-       .long   0x35c066a2
-       .long   0x7437bc4e
-       .long   0xfca6ca82
-       .long   0xe0b0d090
-       .long   0x3315d8a7
-       .long   0xf14a9804
-       .long   0x41f7daec
-       .long   0x7f0e50cd
-       .long   0x172ff691
-       .long   0x768dd64d
-       .long   0x434db0ef
-       .long   0xcc544daa
-       .long   0xe4df0496
-       .long   0x9ee3b5d1
-       .long   0x4c1b886a
-       .long   0xc1b81f2c
-       .long   0x467f5165
-       .long   0x9d04ea5e
-       .long   0x015d358c
-       .long   0xfa737487
-       .long   0xfb2e410b
-       .long   0xb35a1d67
-       .long   0x9252d2db
-       .long   0xe9335610
-       .long   0x6d1347d6
-       .long   0x9a8c61d7
-       .long   0x377a0ca1
-       .long   0x598e14f8
-       .long   0xeb893c13
-       .long   0xceee27a9
-       .long   0xb735c961
-       .long   0xe1ede51c
-       .long   0x7a3cb147
-       .long   0x9c59dfd2
-       .long   0x553f73f2
-       .long   0x1879ce14
-       .long   0x73bf37c7
-       .long   0x53eacdf7
-       .long   0x5f5baafd
-       .long   0xdf146f3d
-       .long   0x7886db44
-       .long   0xca81f3af
-       .long   0xb93ec468
-       .long   0x382c3424
-       .long   0xc25f40a3
-       .long   0x1672c31d
-       .long   0xbc0c25e2
-       .long   0x288b493c
-       .long   0xff41950d
-       .long   0x397101a8
-       .long   0x08deb30c
-       .long   0xd89ce4b4
-       .long   0x6490c156
-       .long   0x7b6184cb
-       .long   0xd570b632
-       .long   0x48745c6c
-       .long   0xd04257b8
-
-
-// SubBytes embedded in words tables.
-       .globl  _AESSubBytesWordTable
-       .private_extern _AESSubBytesWordTable
-       .align  2
-_AESSubBytesWordTable:
-       // Table 0.
-       .long   0x00000063
-       .long   0x0000007c
-       .long   0x00000077
-       .long   0x0000007b
-       .long   0x000000f2
-       .long   0x0000006b
-       .long   0x0000006f
-       .long   0x000000c5
-       .long   0x00000030
-       .long   0x00000001
-       .long   0x00000067
-       .long   0x0000002b
-       .long   0x000000fe
-       .long   0x000000d7
-       .long   0x000000ab
-       .long   0x00000076
-       .long   0x000000ca
-       .long   0x00000082
-       .long   0x000000c9
-       .long   0x0000007d
-       .long   0x000000fa
-       .long   0x00000059
-       .long   0x00000047
-       .long   0x000000f0
-       .long   0x000000ad
-       .long   0x000000d4
-       .long   0x000000a2
-       .long   0x000000af
-       .long   0x0000009c
-       .long   0x000000a4
-       .long   0x00000072
-       .long   0x000000c0
-       .long   0x000000b7
-       .long   0x000000fd
-       .long   0x00000093
-       .long   0x00000026
-       .long   0x00000036
-       .long   0x0000003f
-       .long   0x000000f7
-       .long   0x000000cc
-       .long   0x00000034
-       .long   0x000000a5
-       .long   0x000000e5
-       .long   0x000000f1
-       .long   0x00000071
-       .long   0x000000d8
-       .long   0x00000031
-       .long   0x00000015
-       .long   0x00000004
-       .long   0x000000c7
-       .long   0x00000023
-       .long   0x000000c3
-       .long   0x00000018
-       .long   0x00000096
-       .long   0x00000005
-       .long   0x0000009a
-       .long   0x00000007
-       .long   0x00000012
-       .long   0x00000080
-       .long   0x000000e2
-       .long   0x000000eb
-       .long   0x00000027
-       .long   0x000000b2
-       .long   0x00000075
-       .long   0x00000009
-       .long   0x00000083
-       .long   0x0000002c
-       .long   0x0000001a
-       .long   0x0000001b
-       .long   0x0000006e
-       .long   0x0000005a
-       .long   0x000000a0
-       .long   0x00000052
-       .long   0x0000003b
-       .long   0x000000d6
-       .long   0x000000b3
-       .long   0x00000029
-       .long   0x000000e3
-       .long   0x0000002f
-       .long   0x00000084
-       .long   0x00000053
-       .long   0x000000d1
-       .long   0x00000000
-       .long   0x000000ed
-       .long   0x00000020
-       .long   0x000000fc
-       .long   0x000000b1
-       .long   0x0000005b
-       .long   0x0000006a
-       .long   0x000000cb
-       .long   0x000000be
-       .long   0x00000039
-       .long   0x0000004a
-       .long   0x0000004c
-       .long   0x00000058
-       .long   0x000000cf
-       .long   0x000000d0
-       .long   0x000000ef
-       .long   0x000000aa
-       .long   0x000000fb
-       .long   0x00000043
-       .long   0x0000004d
-       .long   0x00000033
-       .long   0x00000085
-       .long   0x00000045
-       .long   0x000000f9
-       .long   0x00000002
-       .long   0x0000007f
-       .long   0x00000050
-       .long   0x0000003c
-       .long   0x0000009f
-       .long   0x000000a8
-       .long   0x00000051
-       .long   0x000000a3
-       .long   0x00000040
-       .long   0x0000008f
-       .long   0x00000092
-       .long   0x0000009d
-       .long   0x00000038
-       .long   0x000000f5
-       .long   0x000000bc
-       .long   0x000000b6
-       .long   0x000000da
-       .long   0x00000021
-       .long   0x00000010
-       .long   0x000000ff
-       .long   0x000000f3
-       .long   0x000000d2
-       .long   0x000000cd
-       .long   0x0000000c
-       .long   0x00000013
-       .long   0x000000ec
-       .long   0x0000005f
-       .long   0x00000097
-       .long   0x00000044
-       .long   0x00000017
-       .long   0x000000c4
-       .long   0x000000a7
-       .long   0x0000007e
-       .long   0x0000003d
-       .long   0x00000064
-       .long   0x0000005d
-       .long   0x00000019
-       .long   0x00000073
-       .long   0x00000060
-       .long   0x00000081
-       .long   0x0000004f
-       .long   0x000000dc
-       .long   0x00000022
-       .long   0x0000002a
-       .long   0x00000090
-       .long   0x00000088
-       .long   0x00000046
-       .long   0x000000ee
-       .long   0x000000b8
-       .long   0x00000014
-       .long   0x000000de
-       .long   0x0000005e
-       .long   0x0000000b
-       .long   0x000000db
-       .long   0x000000e0
-       .long   0x00000032
-       .long   0x0000003a
-       .long   0x0000000a
-       .long   0x00000049
-       .long   0x00000006
-       .long   0x00000024
-       .long   0x0000005c
-       .long   0x000000c2
-       .long   0x000000d3
-       .long   0x000000ac
-       .long   0x00000062
-       .long   0x00000091
-       .long   0x00000095
-       .long   0x000000e4
-       .long   0x00000079
-       .long   0x000000e7
-       .long   0x000000c8
-       .long   0x00000037
-       .long   0x0000006d
-       .long   0x0000008d
-       .long   0x000000d5
-       .long   0x0000004e
-       .long   0x000000a9
-       .long   0x0000006c
-       .long   0x00000056
-       .long   0x000000f4
-       .long   0x000000ea
-       .long   0x00000065
-       .long   0x0000007a
-       .long   0x000000ae
-       .long   0x00000008
-       .long   0x000000ba
-       .long   0x00000078
-       .long   0x00000025
-       .long   0x0000002e
-       .long   0x0000001c
-       .long   0x000000a6
-       .long   0x000000b4
-       .long   0x000000c6
-       .long   0x000000e8
-       .long   0x000000dd
-       .long   0x00000074
-       .long   0x0000001f
-       .long   0x0000004b
-       .long   0x000000bd
-       .long   0x0000008b
-       .long   0x0000008a
-       .long   0x00000070
-       .long   0x0000003e
-       .long   0x000000b5
-       .long   0x00000066
-       .long   0x00000048
-       .long   0x00000003
-       .long   0x000000f6
-       .long   0x0000000e
-       .long   0x00000061
-       .long   0x00000035
-       .long   0x00000057
-       .long   0x000000b9
-       .long   0x00000086
-       .long   0x000000c1
-       .long   0x0000001d
-       .long   0x0000009e
-       .long   0x000000e1
-       .long   0x000000f8
-       .long   0x00000098
-       .long   0x00000011
-       .long   0x00000069
-       .long   0x000000d9
-       .long   0x0000008e
-       .long   0x00000094
-       .long   0x0000009b
-       .long   0x0000001e
-       .long   0x00000087
-       .long   0x000000e9
-       .long   0x000000ce
-       .long   0x00000055
-       .long   0x00000028
-       .long   0x000000df
-       .long   0x0000008c
-       .long   0x000000a1
-       .long   0x00000089
-       .long   0x0000000d
-       .long   0x000000bf
-       .long   0x000000e6
-       .long   0x00000042
-       .long   0x00000068
-       .long   0x00000041
-       .long   0x00000099
-       .long   0x0000002d
-       .long   0x0000000f
-       .long   0x000000b0
-       .long   0x00000054
-       .long   0x000000bb
-       .long   0x00000016
-       // Table 1.
-       .long   0x00006300
-       .long   0x00007c00
-       .long   0x00007700
-       .long   0x00007b00
-       .long   0x0000f200
-       .long   0x00006b00
-       .long   0x00006f00
-       .long   0x0000c500
-       .long   0x00003000
-       .long   0x00000100
-       .long   0x00006700
-       .long   0x00002b00
-       .long   0x0000fe00
-       .long   0x0000d700
-       .long   0x0000ab00
-       .long   0x00007600
-       .long   0x0000ca00
-       .long   0x00008200
-       .long   0x0000c900
-       .long   0x00007d00
-       .long   0x0000fa00
-       .long   0x00005900
-       .long   0x00004700
-       .long   0x0000f000
-       .long   0x0000ad00
-       .long   0x0000d400
-       .long   0x0000a200
-       .long   0x0000af00
-       .long   0x00009c00
-       .long   0x0000a400
-       .long   0x00007200
-       .long   0x0000c000
-       .long   0x0000b700
-       .long   0x0000fd00
-       .long   0x00009300
-       .long   0x00002600
-       .long   0x00003600
-       .long   0x00003f00
-       .long   0x0000f700
-       .long   0x0000cc00
-       .long   0x00003400
-       .long   0x0000a500
-       .long   0x0000e500
-       .long   0x0000f100
-       .long   0x00007100
-       .long   0x0000d800
-       .long   0x00003100
-       .long   0x00001500
-       .long   0x00000400
-       .long   0x0000c700
-       .long   0x00002300
-       .long   0x0000c300
-       .long   0x00001800
-       .long   0x00009600
-       .long   0x00000500
-       .long   0x00009a00
-       .long   0x00000700
-       .long   0x00001200
-       .long   0x00008000
-       .long   0x0000e200
-       .long   0x0000eb00
-       .long   0x00002700
-       .long   0x0000b200
-       .long   0x00007500
-       .long   0x00000900
-       .long   0x00008300
-       .long   0x00002c00
-       .long   0x00001a00
-       .long   0x00001b00
-       .long   0x00006e00
-       .long   0x00005a00
-       .long   0x0000a000
-       .long   0x00005200
-       .long   0x00003b00
-       .long   0x0000d600
-       .long   0x0000b300
-       .long   0x00002900
-       .long   0x0000e300
-       .long   0x00002f00
-       .long   0x00008400
-       .long   0x00005300
-       .long   0x0000d100
-       .long   0x00000000
-       .long   0x0000ed00
-       .long   0x00002000
-       .long   0x0000fc00
-       .long   0x0000b100
-       .long   0x00005b00
-       .long   0x00006a00
-       .long   0x0000cb00
-       .long   0x0000be00
-       .long   0x00003900
-       .long   0x00004a00
-       .long   0x00004c00
-       .long   0x00005800
-       .long   0x0000cf00
-       .long   0x0000d000
-       .long   0x0000ef00
-       .long   0x0000aa00
-       .long   0x0000fb00
-       .long   0x00004300
-       .long   0x00004d00
-       .long   0x00003300
-       .long   0x00008500
-       .long   0x00004500
-       .long   0x0000f900
-       .long   0x00000200
-       .long   0x00007f00
-       .long   0x00005000
-       .long   0x00003c00
-       .long   0x00009f00
-       .long   0x0000a800
-       .long   0x00005100
-       .long   0x0000a300
-       .long   0x00004000
-       .long   0x00008f00
-       .long   0x00009200
-       .long   0x00009d00
-       .long   0x00003800
-       .long   0x0000f500
-       .long   0x0000bc00
-       .long   0x0000b600
-       .long   0x0000da00
-       .long   0x00002100
-       .long   0x00001000
-       .long   0x0000ff00
-       .long   0x0000f300
-       .long   0x0000d200
-       .long   0x0000cd00
-       .long   0x00000c00
-       .long   0x00001300
-       .long   0x0000ec00
-       .long   0x00005f00
-       .long   0x00009700
-       .long   0x00004400
-       .long   0x00001700
-       .long   0x0000c400
-       .long   0x0000a700
-       .long   0x00007e00
-       .long   0x00003d00
-       .long   0x00006400
-       .long   0x00005d00
-       .long   0x00001900
-       .long   0x00007300
-       .long   0x00006000
-       .long   0x00008100
-       .long   0x00004f00
-       .long   0x0000dc00
-       .long   0x00002200
-       .long   0x00002a00
-       .long   0x00009000
-       .long   0x00008800
-       .long   0x00004600
-       .long   0x0000ee00
-       .long   0x0000b800
-       .long   0x00001400
-       .long   0x0000de00
-       .long   0x00005e00
-       .long   0x00000b00
-       .long   0x0000db00
-       .long   0x0000e000
-       .long   0x00003200
-       .long   0x00003a00
-       .long   0x00000a00
-       .long   0x00004900
-       .long   0x00000600
-       .long   0x00002400
-       .long   0x00005c00
-       .long   0x0000c200
-       .long   0x0000d300
-       .long   0x0000ac00
-       .long   0x00006200
-       .long   0x00009100
-       .long   0x00009500
-       .long   0x0000e400
-       .long   0x00007900
-       .long   0x0000e700
-       .long   0x0000c800
-       .long   0x00003700
-       .long   0x00006d00
-       .long   0x00008d00
-       .long   0x0000d500
-       .long   0x00004e00
-       .long   0x0000a900
-       .long   0x00006c00
-       .long   0x00005600
-       .long   0x0000f400
-       .long   0x0000ea00
-       .long   0x00006500
-       .long   0x00007a00
-       .long   0x0000ae00
-       .long   0x00000800
-       .long   0x0000ba00
-       .long   0x00007800
-       .long   0x00002500
-       .long   0x00002e00
-       .long   0x00001c00
-       .long   0x0000a600
-       .long   0x0000b400
-       .long   0x0000c600
-       .long   0x0000e800
-       .long   0x0000dd00
-       .long   0x00007400
-       .long   0x00001f00
-       .long   0x00004b00
-       .long   0x0000bd00
-       .long   0x00008b00
-       .long   0x00008a00
-       .long   0x00007000
-       .long   0x00003e00
-       .long   0x0000b500
-       .long   0x00006600
-       .long   0x00004800
-       .long   0x00000300
-       .long   0x0000f600
-       .long   0x00000e00
-       .long   0x00006100
-       .long   0x00003500
-       .long   0x00005700
-       .long   0x0000b900
-       .long   0x00008600
-       .long   0x0000c100
-       .long   0x00001d00
-       .long   0x00009e00
-       .long   0x0000e100
-       .long   0x0000f800
-       .long   0x00009800
-       .long   0x00001100
-       .long   0x00006900
-       .long   0x0000d900
-       .long   0x00008e00
-       .long   0x00009400
-       .long   0x00009b00
-       .long   0x00001e00
-       .long   0x00008700
-       .long   0x0000e900
-       .long   0x0000ce00
-       .long   0x00005500
-       .long   0x00002800
-       .long   0x0000df00
-       .long   0x00008c00
-       .long   0x0000a100
-       .long   0x00008900
-       .long   0x00000d00
-       .long   0x0000bf00
-       .long   0x0000e600
-       .long   0x00004200
-       .long   0x00006800
-       .long   0x00004100
-       .long   0x00009900
-       .long   0x00002d00
-       .long   0x00000f00
-       .long   0x0000b000
-       .long   0x00005400
-       .long   0x0000bb00
-       .long   0x00001600
-       // Table 2.
-       .long   0x00630000
-       .long   0x007c0000
-       .long   0x00770000
-       .long   0x007b0000
-       .long   0x00f20000
-       .long   0x006b0000
-       .long   0x006f0000
-       .long   0x00c50000
-       .long   0x00300000
-       .long   0x00010000
-       .long   0x00670000
-       .long   0x002b0000
-       .long   0x00fe0000
-       .long   0x00d70000
-       .long   0x00ab0000
-       .long   0x00760000
-       .long   0x00ca0000
-       .long   0x00820000
-       .long   0x00c90000
-       .long   0x007d0000
-       .long   0x00fa0000
-       .long   0x00590000
-       .long   0x00470000
-       .long   0x00f00000
-       .long   0x00ad0000
-       .long   0x00d40000
-       .long   0x00a20000
-       .long   0x00af0000
-       .long   0x009c0000
-       .long   0x00a40000
-       .long   0x00720000
-       .long   0x00c00000
-       .long   0x00b70000
-       .long   0x00fd0000
-       .long   0x00930000
-       .long   0x00260000
-       .long   0x00360000
-       .long   0x003f0000
-       .long   0x00f70000
-       .long   0x00cc0000
-       .long   0x00340000
-       .long   0x00a50000
-       .long   0x00e50000
-       .long   0x00f10000
-       .long   0x00710000
-       .long   0x00d80000
-       .long   0x00310000
-       .long   0x00150000
-       .long   0x00040000
-       .long   0x00c70000
-       .long   0x00230000
-       .long   0x00c30000
-       .long   0x00180000
-       .long   0x00960000
-       .long   0x00050000
-       .long   0x009a0000
-       .long   0x00070000
-       .long   0x00120000
-       .long   0x00800000
-       .long   0x00e20000
-       .long   0x00eb0000
-       .long   0x00270000
-       .long   0x00b20000
-       .long   0x00750000
-       .long   0x00090000
-       .long   0x00830000
-       .long   0x002c0000
-       .long   0x001a0000
-       .long   0x001b0000
-       .long   0x006e0000
-       .long   0x005a0000
-       .long   0x00a00000
-       .long   0x00520000
-       .long   0x003b0000
-       .long   0x00d60000
-       .long   0x00b30000
-       .long   0x00290000
-       .long   0x00e30000
-       .long   0x002f0000
-       .long   0x00840000
-       .long   0x00530000
-       .long   0x00d10000
-       .long   0x00000000
-       .long   0x00ed0000
-       .long   0x00200000
-       .long   0x00fc0000
-       .long   0x00b10000
-       .long   0x005b0000
-       .long   0x006a0000
-       .long   0x00cb0000
-       .long   0x00be0000
-       .long   0x00390000
-       .long   0x004a0000
-       .long   0x004c0000
-       .long   0x00580000
-       .long   0x00cf0000
-       .long   0x00d00000
-       .long   0x00ef0000
-       .long   0x00aa0000
-       .long   0x00fb0000
-       .long   0x00430000
-       .long   0x004d0000
-       .long   0x00330000
-       .long   0x00850000
-       .long   0x00450000
-       .long   0x00f90000
-       .long   0x00020000
-       .long   0x007f0000
-       .long   0x00500000
-       .long   0x003c0000
-       .long   0x009f0000
-       .long   0x00a80000
-       .long   0x00510000
-       .long   0x00a30000
-       .long   0x00400000
-       .long   0x008f0000
-       .long   0x00920000
-       .long   0x009d0000
-       .long   0x00380000
-       .long   0x00f50000
-       .long   0x00bc0000
-       .long   0x00b60000
-       .long   0x00da0000
-       .long   0x00210000
-       .long   0x00100000
-       .long   0x00ff0000
-       .long   0x00f30000
-       .long   0x00d20000
-       .long   0x00cd0000
-       .long   0x000c0000
-       .long   0x00130000
-       .long   0x00ec0000
-       .long   0x005f0000
-       .long   0x00970000
-       .long   0x00440000
-       .long   0x00170000
-       .long   0x00c40000
-       .long   0x00a70000
-       .long   0x007e0000
-       .long   0x003d0000
-       .long   0x00640000
-       .long   0x005d0000
-       .long   0x00190000
-       .long   0x00730000
-       .long   0x00600000
-       .long   0x00810000
-       .long   0x004f0000
-       .long   0x00dc0000
-       .long   0x00220000
-       .long   0x002a0000
-       .long   0x00900000
-       .long   0x00880000
-       .long   0x00460000
-       .long   0x00ee0000
-       .long   0x00b80000
-       .long   0x00140000
-       .long   0x00de0000
-       .long   0x005e0000
-       .long   0x000b0000
-       .long   0x00db0000
-       .long   0x00e00000
-       .long   0x00320000
-       .long   0x003a0000
-       .long   0x000a0000
-       .long   0x00490000
-       .long   0x00060000
-       .long   0x00240000
-       .long   0x005c0000
-       .long   0x00c20000
-       .long   0x00d30000
-       .long   0x00ac0000
-       .long   0x00620000
-       .long   0x00910000
-       .long   0x00950000
-       .long   0x00e40000
-       .long   0x00790000
-       .long   0x00e70000
-       .long   0x00c80000
-       .long   0x00370000
-       .long   0x006d0000
-       .long   0x008d0000
-       .long   0x00d50000
-       .long   0x004e0000
-       .long   0x00a90000
-       .long   0x006c0000
-       .long   0x00560000
-       .long   0x00f40000
-       .long   0x00ea0000
-       .long   0x00650000
-       .long   0x007a0000
-       .long   0x00ae0000
-       .long   0x00080000
-       .long   0x00ba0000
-       .long   0x00780000
-       .long   0x00250000
-       .long   0x002e0000
-       .long   0x001c0000
-       .long   0x00a60000
-       .long   0x00b40000
-       .long   0x00c60000
-       .long   0x00e80000
-       .long   0x00dd0000
-       .long   0x00740000
-       .long   0x001f0000
-       .long   0x004b0000
-       .long   0x00bd0000
-       .long   0x008b0000
-       .long   0x008a0000
-       .long   0x00700000
-       .long   0x003e0000
-       .long   0x00b50000
-       .long   0x00660000
-       .long   0x00480000
-       .long   0x00030000
-       .long   0x00f60000
-       .long   0x000e0000
-       .long   0x00610000
-       .long   0x00350000
-       .long   0x00570000
-       .long   0x00b90000
-       .long   0x00860000
-       .long   0x00c10000
-       .long   0x001d0000
-       .long   0x009e0000
-       .long   0x00e10000
-       .long   0x00f80000
-       .long   0x00980000
-       .long   0x00110000
-       .long   0x00690000
-       .long   0x00d90000
-       .long   0x008e0000
-       .long   0x00940000
-       .long   0x009b0000
-       .long   0x001e0000
-       .long   0x00870000
-       .long   0x00e90000
-       .long   0x00ce0000
-       .long   0x00550000
-       .long   0x00280000
-       .long   0x00df0000
-       .long   0x008c0000
-       .long   0x00a10000
-       .long   0x00890000
-       .long   0x000d0000
-       .long   0x00bf0000
-       .long   0x00e60000
-       .long   0x00420000
-       .long   0x00680000
-       .long   0x00410000
-       .long   0x00990000
-       .long   0x002d0000
-       .long   0x000f0000
-       .long   0x00b00000
-       .long   0x00540000
-       .long   0x00bb0000
-       .long   0x00160000
-       // Table 3.
-       .long   0x63000000
-       .long   0x7c000000
-       .long   0x77000000
-       .long   0x7b000000
-       .long   0xf2000000
-       .long   0x6b000000
-       .long   0x6f000000
-       .long   0xc5000000
-       .long   0x30000000
-       .long   0x01000000
-       .long   0x67000000
-       .long   0x2b000000
-       .long   0xfe000000
-       .long   0xd7000000
-       .long   0xab000000
-       .long   0x76000000
-       .long   0xca000000
-       .long   0x82000000
-       .long   0xc9000000
-       .long   0x7d000000
-       .long   0xfa000000
-       .long   0x59000000
-       .long   0x47000000
-       .long   0xf0000000
-       .long   0xad000000
-       .long   0xd4000000
-       .long   0xa2000000
-       .long   0xaf000000
-       .long   0x9c000000
-       .long   0xa4000000
-       .long   0x72000000
-       .long   0xc0000000
-       .long   0xb7000000
-       .long   0xfd000000
-       .long   0x93000000
-       .long   0x26000000
-       .long   0x36000000
-       .long   0x3f000000
-       .long   0xf7000000
-       .long   0xcc000000
-       .long   0x34000000
-       .long   0xa5000000
-       .long   0xe5000000
-       .long   0xf1000000
-       .long   0x71000000
-       .long   0xd8000000
-       .long   0x31000000
-       .long   0x15000000
-       .long   0x04000000
-       .long   0xc7000000
-       .long   0x23000000
-       .long   0xc3000000
-       .long   0x18000000
-       .long   0x96000000
-       .long   0x05000000
-       .long   0x9a000000
-       .long   0x07000000
-       .long   0x12000000
-       .long   0x80000000
-       .long   0xe2000000
-       .long   0xeb000000
-       .long   0x27000000
-       .long   0xb2000000
-       .long   0x75000000
-       .long   0x09000000
-       .long   0x83000000
-       .long   0x2c000000
-       .long   0x1a000000
-       .long   0x1b000000
-       .long   0x6e000000
-       .long   0x5a000000
-       .long   0xa0000000
-       .long   0x52000000
-       .long   0x3b000000
-       .long   0xd6000000
-       .long   0xb3000000
-       .long   0x29000000
-       .long   0xe3000000
-       .long   0x2f000000
-       .long   0x84000000
-       .long   0x53000000
-       .long   0xd1000000
-       .long   0x00000000
-       .long   0xed000000
-       .long   0x20000000
-       .long   0xfc000000
-       .long   0xb1000000
-       .long   0x5b000000
-       .long   0x6a000000
-       .long   0xcb000000
-       .long   0xbe000000
-       .long   0x39000000
-       .long   0x4a000000
-       .long   0x4c000000
-       .long   0x58000000
-       .long   0xcf000000
-       .long   0xd0000000
-       .long   0xef000000
-       .long   0xaa000000
-       .long   0xfb000000
-       .long   0x43000000
-       .long   0x4d000000
-       .long   0x33000000
-       .long   0x85000000
-       .long   0x45000000
-       .long   0xf9000000
-       .long   0x02000000
-       .long   0x7f000000
-       .long   0x50000000
-       .long   0x3c000000
-       .long   0x9f000000
-       .long   0xa8000000
-       .long   0x51000000
-       .long   0xa3000000
-       .long   0x40000000
-       .long   0x8f000000
-       .long   0x92000000
-       .long   0x9d000000
-       .long   0x38000000
-       .long   0xf5000000
-       .long   0xbc000000
-       .long   0xb6000000
-       .long   0xda000000
-       .long   0x21000000
-       .long   0x10000000
-       .long   0xff000000
-       .long   0xf3000000
-       .long   0xd2000000
-       .long   0xcd000000
-       .long   0x0c000000
-       .long   0x13000000
-       .long   0xec000000
-       .long   0x5f000000
-       .long   0x97000000
-       .long   0x44000000
-       .long   0x17000000
-       .long   0xc4000000
-       .long   0xa7000000
-       .long   0x7e000000
-       .long   0x3d000000
-       .long   0x64000000
-       .long   0x5d000000
-       .long   0x19000000
-       .long   0x73000000
-       .long   0x60000000
-       .long   0x81000000
-       .long   0x4f000000
-       .long   0xdc000000
-       .long   0x22000000
-       .long   0x2a000000
-       .long   0x90000000
-       .long   0x88000000
-       .long   0x46000000
-       .long   0xee000000
-       .long   0xb8000000
-       .long   0x14000000
-       .long   0xde000000
-       .long   0x5e000000
-       .long   0x0b000000
-       .long   0xdb000000
-       .long   0xe0000000
-       .long   0x32000000
-       .long   0x3a000000
-       .long   0x0a000000
-       .long   0x49000000
-       .long   0x06000000
-       .long   0x24000000
-       .long   0x5c000000
-       .long   0xc2000000
-       .long   0xd3000000
-       .long   0xac000000
-       .long   0x62000000
-       .long   0x91000000
-       .long   0x95000000
-       .long   0xe4000000
-       .long   0x79000000
-       .long   0xe7000000
-       .long   0xc8000000
-       .long   0x37000000
-       .long   0x6d000000
-       .long   0x8d000000
-       .long   0xd5000000
-       .long   0x4e000000
-       .long   0xa9000000
-       .long   0x6c000000
-       .long   0x56000000
-       .long   0xf4000000
-       .long   0xea000000
-       .long   0x65000000
-       .long   0x7a000000
-       .long   0xae000000
-       .long   0x08000000
-       .long   0xba000000
-       .long   0x78000000
-       .long   0x25000000
-       .long   0x2e000000
-       .long   0x1c000000
-       .long   0xa6000000
-       .long   0xb4000000
-       .long   0xc6000000
-       .long   0xe8000000
-       .long   0xdd000000
-       .long   0x74000000
-       .long   0x1f000000
-       .long   0x4b000000
-       .long   0xbd000000
-       .long   0x8b000000
-       .long   0x8a000000
-       .long   0x70000000
-       .long   0x3e000000
-       .long   0xb5000000
-       .long   0x66000000
-       .long   0x48000000
-       .long   0x03000000
-       .long   0xf6000000
-       .long   0x0e000000
-       .long   0x61000000
-       .long   0x35000000
-       .long   0x57000000
-       .long   0xb9000000
-       .long   0x86000000
-       .long   0xc1000000
-       .long   0x1d000000
-       .long   0x9e000000
-       .long   0xe1000000
-       .long   0xf8000000
-       .long   0x98000000
-       .long   0x11000000
-       .long   0x69000000
-       .long   0xd9000000
-       .long   0x8e000000
-       .long   0x94000000
-       .long   0x9b000000
-       .long   0x1e000000
-       .long   0x87000000
-       .long   0xe9000000
-       .long   0xce000000
-       .long   0x55000000
-       .long   0x28000000
-       .long   0xdf000000
-       .long   0x8c000000
-       .long   0xa1000000
-       .long   0x89000000
-       .long   0x0d000000
-       .long   0xbf000000
-       .long   0xe6000000
-       .long   0x42000000
-       .long   0x68000000
-       .long   0x41000000
-       .long   0x99000000
-       .long   0x2d000000
-       .long   0x0f000000
-       .long   0xb0000000
-       .long   0x54000000
-       .long   0xbb000000
-       .long   0x16000000
-
-
-// InvSubBytes embedded in words tables.
-       .globl  _AESInvSubBytesWordTable
-       .private_extern _AESInvSubBytesWordTable
-       .align  2
-_AESInvSubBytesWordTable:
-       // Table 0.
-       .long   0x00000052
-       .long   0x00000009
-       .long   0x0000006a
-       .long   0x000000d5
-       .long   0x00000030
-       .long   0x00000036
-       .long   0x000000a5
-       .long   0x00000038
-       .long   0x000000bf
-       .long   0x00000040
-       .long   0x000000a3
-       .long   0x0000009e
-       .long   0x00000081
-       .long   0x000000f3
-       .long   0x000000d7
-       .long   0x000000fb
-       .long   0x0000007c
-       .long   0x000000e3
-       .long   0x00000039
-       .long   0x00000082
-       .long   0x0000009b
-       .long   0x0000002f
-       .long   0x000000ff
-       .long   0x00000087
-       .long   0x00000034
-       .long   0x0000008e
-       .long   0x00000043
-       .long   0x00000044
-       .long   0x000000c4
-       .long   0x000000de
-       .long   0x000000e9
-       .long   0x000000cb
-       .long   0x00000054
-       .long   0x0000007b
-       .long   0x00000094
-       .long   0x00000032
-       .long   0x000000a6
-       .long   0x000000c2
-       .long   0x00000023
-       .long   0x0000003d
-       .long   0x000000ee
-       .long   0x0000004c
-       .long   0x00000095
-       .long   0x0000000b
-       .long   0x00000042
-       .long   0x000000fa
-       .long   0x000000c3
-       .long   0x0000004e
-       .long   0x00000008
-       .long   0x0000002e
-       .long   0x000000a1
-       .long   0x00000066
-       .long   0x00000028
-       .long   0x000000d9
-       .long   0x00000024
-       .long   0x000000b2
-       .long   0x00000076
-       .long   0x0000005b
-       .long   0x000000a2
-       .long   0x00000049
-       .long   0x0000006d
-       .long   0x0000008b
-       .long   0x000000d1
-       .long   0x00000025
-       .long   0x00000072
-       .long   0x000000f8
-       .long   0x000000f6
-       .long   0x00000064
-       .long   0x00000086
-       .long   0x00000068
-       .long   0x00000098
-       .long   0x00000016
-       .long   0x000000d4
-       .long   0x000000a4
-       .long   0x0000005c
-       .long   0x000000cc
-       .long   0x0000005d
-       .long   0x00000065
-       .long   0x000000b6
-       .long   0x00000092
-       .long   0x0000006c
-       .long   0x00000070
-       .long   0x00000048
-       .long   0x00000050
-       .long   0x000000fd
-       .long   0x000000ed
-       .long   0x000000b9
-       .long   0x000000da
-       .long   0x0000005e
-       .long   0x00000015
-       .long   0x00000046
-       .long   0x00000057
-       .long   0x000000a7
-       .long   0x0000008d
-       .long   0x0000009d
-       .long   0x00000084
-       .long   0x00000090
-       .long   0x000000d8
-       .long   0x000000ab
-       .long   0x00000000
-       .long   0x0000008c
-       .long   0x000000bc
-       .long   0x000000d3
-       .long   0x0000000a
-       .long   0x000000f7
-       .long   0x000000e4
-       .long   0x00000058
-       .long   0x00000005
-       .long   0x000000b8
-       .long   0x000000b3
-       .long   0x00000045
-       .long   0x00000006
-       .long   0x000000d0
-       .long   0x0000002c
-       .long   0x0000001e
-       .long   0x0000008f
-       .long   0x000000ca
-       .long   0x0000003f
-       .long   0x0000000f
-       .long   0x00000002
-       .long   0x000000c1
-       .long   0x000000af
-       .long   0x000000bd
-       .long   0x00000003
-       .long   0x00000001
-       .long   0x00000013
-       .long   0x0000008a
-       .long   0x0000006b
-       .long   0x0000003a
-       .long   0x00000091
-       .long   0x00000011
-       .long   0x00000041
-       .long   0x0000004f
-       .long   0x00000067
-       .long   0x000000dc
-       .long   0x000000ea
-       .long   0x00000097
-       .long   0x000000f2
-       .long   0x000000cf
-       .long   0x000000ce
-       .long   0x000000f0
-       .long   0x000000b4
-       .long   0x000000e6
-       .long   0x00000073
-       .long   0x00000096
-       .long   0x000000ac
-       .long   0x00000074
-       .long   0x00000022
-       .long   0x000000e7
-       .long   0x000000ad
-       .long   0x00000035
-       .long   0x00000085
-       .long   0x000000e2
-       .long   0x000000f9
-       .long   0x00000037
-       .long   0x000000e8
-       .long   0x0000001c
-       .long   0x00000075
-       .long   0x000000df
-       .long   0x0000006e
-       .long   0x00000047
-       .long   0x000000f1
-       .long   0x0000001a
-       .long   0x00000071
-       .long   0x0000001d
-       .long   0x00000029
-       .long   0x000000c5
-       .long   0x00000089
-       .long   0x0000006f
-       .long   0x000000b7
-       .long   0x00000062
-       .long   0x0000000e
-       .long   0x000000aa
-       .long   0x00000018
-       .long   0x000000be
-       .long   0x0000001b
-       .long   0x000000fc
-       .long   0x00000056
-       .long   0x0000003e
-       .long   0x0000004b
-       .long   0x000000c6
-       .long   0x000000d2
-       .long   0x00000079
-       .long   0x00000020
-       .long   0x0000009a
-       .long   0x000000db
-       .long   0x000000c0
-       .long   0x000000fe
-       .long   0x00000078
-       .long   0x000000cd
-       .long   0x0000005a
-       .long   0x000000f4
-       .long   0x0000001f
-       .long   0x000000dd
-       .long   0x000000a8
-       .long   0x00000033
-       .long   0x00000088
-       .long   0x00000007
-       .long   0x000000c7
-       .long   0x00000031
-       .long   0x000000b1
-       .long   0x00000012
-       .long   0x00000010
-       .long   0x00000059
-       .long   0x00000027
-       .long   0x00000080
-       .long   0x000000ec
-       .long   0x0000005f
-       .long   0x00000060
-       .long   0x00000051
-       .long   0x0000007f
-       .long   0x000000a9
-       .long   0x00000019
-       .long   0x000000b5
-       .long   0x0000004a
-       .long   0x0000000d
-       .long   0x0000002d
-       .long   0x000000e5
-       .long   0x0000007a
-       .long   0x0000009f
-       .long   0x00000093
-       .long   0x000000c9
-       .long   0x0000009c
-       .long   0x000000ef
-       .long   0x000000a0
-       .long   0x000000e0
-       .long   0x0000003b
-       .long   0x0000004d
-       .long   0x000000ae
-       .long   0x0000002a
-       .long   0x000000f5
-       .long   0x000000b0
-       .long   0x000000c8
-       .long   0x000000eb
-       .long   0x000000bb
-       .long   0x0000003c
-       .long   0x00000083
-       .long   0x00000053
-       .long   0x00000099
-       .long   0x00000061
-       .long   0x00000017
-       .long   0x0000002b
-       .long   0x00000004
-       .long   0x0000007e
-       .long   0x000000ba
-       .long   0x00000077
-       .long   0x000000d6
-       .long   0x00000026
-       .long   0x000000e1
-       .long   0x00000069
-       .long   0x00000014
-       .long   0x00000063
-       .long   0x00000055
-       .long   0x00000021
-       .long   0x0000000c
-       .long   0x0000007d
-       // Table 1.
-       .long   0x00005200
-       .long   0x00000900
-       .long   0x00006a00
-       .long   0x0000d500
-       .long   0x00003000
-       .long   0x00003600
-       .long   0x0000a500
-       .long   0x00003800
-       .long   0x0000bf00
-       .long   0x00004000
-       .long   0x0000a300
-       .long   0x00009e00
-       .long   0x00008100
-       .long   0x0000f300
-       .long   0x0000d700
-       .long   0x0000fb00
-       .long   0x00007c00
-       .long   0x0000e300
-       .long   0x00003900
-       .long   0x00008200
-       .long   0x00009b00
-       .long   0x00002f00
-       .long   0x0000ff00
-       .long   0x00008700
-       .long   0x00003400
-       .long   0x00008e00
-       .long   0x00004300
-       .long   0x00004400
-       .long   0x0000c400
-       .long   0x0000de00
-       .long   0x0000e900
-       .long   0x0000cb00
-       .long   0x00005400
-       .long   0x00007b00
-       .long   0x00009400
-       .long   0x00003200
-       .long   0x0000a600
-       .long   0x0000c200
-       .long   0x00002300
-       .long   0x00003d00
-       .long   0x0000ee00
-       .long   0x00004c00
-       .long   0x00009500
-       .long   0x00000b00
-       .long   0x00004200
-       .long   0x0000fa00
-       .long   0x0000c300
-       .long   0x00004e00
-       .long   0x00000800
-       .long   0x00002e00
-       .long   0x0000a100
-       .long   0x00006600
-       .long   0x00002800
-       .long   0x0000d900
-       .long   0x00002400
-       .long   0x0000b200
-       .long   0x00007600
-       .long   0x00005b00
-       .long   0x0000a200
-       .long   0x00004900
-       .long   0x00006d00
-       .long   0x00008b00
-       .long   0x0000d100
-       .long   0x00002500
-       .long   0x00007200
-       .long   0x0000f800
-       .long   0x0000f600
-       .long   0x00006400
-       .long   0x00008600
-       .long   0x00006800
-       .long   0x00009800
-       .long   0x00001600
-       .long   0x0000d400
-       .long   0x0000a400
-       .long   0x00005c00
-       .long   0x0000cc00
-       .long   0x00005d00
-       .long   0x00006500
-       .long   0x0000b600
-       .long   0x00009200
-       .long   0x00006c00
-       .long   0x00007000
-       .long   0x00004800
-       .long   0x00005000
-       .long   0x0000fd00
-       .long   0x0000ed00
-       .long   0x0000b900
-       .long   0x0000da00
-       .long   0x00005e00
-       .long   0x00001500
-       .long   0x00004600
-       .long   0x00005700
-       .long   0x0000a700
-       .long   0x00008d00
-       .long   0x00009d00
-       .long   0x00008400
-       .long   0x00009000
-       .long   0x0000d800
-       .long   0x0000ab00
-       .long   0x00000000
-       .long   0x00008c00
-       .long   0x0000bc00
-       .long   0x0000d300
-       .long   0x00000a00
-       .long   0x0000f700
-       .long   0x0000e400
-       .long   0x00005800
-       .long   0x00000500
-       .long   0x0000b800
-       .long   0x0000b300
-       .long   0x00004500
-       .long   0x00000600
-       .long   0x0000d000
-       .long   0x00002c00
-       .long   0x00001e00
-       .long   0x00008f00
-       .long   0x0000ca00
-       .long   0x00003f00
-       .long   0x00000f00
-       .long   0x00000200
-       .long   0x0000c100
-       .long   0x0000af00
-       .long   0x0000bd00
-       .long   0x00000300
-       .long   0x00000100
-       .long   0x00001300
-       .long   0x00008a00
-       .long   0x00006b00
-       .long   0x00003a00
-       .long   0x00009100
-       .long   0x00001100
-       .long   0x00004100
-       .long   0x00004f00
-       .long   0x00006700
-       .long   0x0000dc00
-       .long   0x0000ea00
-       .long   0x00009700
-       .long   0x0000f200
-       .long   0x0000cf00
-       .long   0x0000ce00
-       .long   0x0000f000
-       .long   0x0000b400
-       .long   0x0000e600
-       .long   0x00007300
-       .long   0x00009600
-       .long   0x0000ac00
-       .long   0x00007400
-       .long   0x00002200
-       .long   0x0000e700
-       .long   0x0000ad00
-       .long   0x00003500
-       .long   0x00008500
-       .long   0x0000e200
-       .long   0x0000f900
-       .long   0x00003700
-       .long   0x0000e800
-       .long   0x00001c00
-       .long   0x00007500
-       .long   0x0000df00
-       .long   0x00006e00
-       .long   0x00004700
-       .long   0x0000f100
-       .long   0x00001a00
-       .long   0x00007100
-       .long   0x00001d00
-       .long   0x00002900
-       .long   0x0000c500
-       .long   0x00008900
-       .long   0x00006f00
-       .long   0x0000b700
-       .long   0x00006200
-       .long   0x00000e00
-       .long   0x0000aa00
-       .long   0x00001800
-       .long   0x0000be00
-       .long   0x00001b00
-       .long   0x0000fc00
-       .long   0x00005600
-       .long   0x00003e00
-       .long   0x00004b00
-       .long   0x0000c600
-       .long   0x0000d200
-       .long   0x00007900
-       .long   0x00002000
-       .long   0x00009a00
-       .long   0x0000db00
-       .long   0x0000c000
-       .long   0x0000fe00
-       .long   0x00007800
-       .long   0x0000cd00
-       .long   0x00005a00
-       .long   0x0000f400
-       .long   0x00001f00
-       .long   0x0000dd00
-       .long   0x0000a800
-       .long   0x00003300
-       .long   0x00008800
-       .long   0x00000700
-       .long   0x0000c700
-       .long   0x00003100
-       .long   0x0000b100
-       .long   0x00001200
-       .long   0x00001000
-       .long   0x00005900
-       .long   0x00002700
-       .long   0x00008000
-       .long   0x0000ec00
-       .long   0x00005f00
-       .long   0x00006000
-       .long   0x00005100
-       .long   0x00007f00
-       .long   0x0000a900
-       .long   0x00001900
-       .long   0x0000b500
-       .long   0x00004a00
-       .long   0x00000d00
-       .long   0x00002d00
-       .long   0x0000e500
-       .long   0x00007a00
-       .long   0x00009f00
-       .long   0x00009300
-       .long   0x0000c900
-       .long   0x00009c00
-       .long   0x0000ef00
-       .long   0x0000a000
-       .long   0x0000e000
-       .long   0x00003b00
-       .long   0x00004d00
-       .long   0x0000ae00
-       .long   0x00002a00
-       .long   0x0000f500
-       .long   0x0000b000
-       .long   0x0000c800
-       .long   0x0000eb00
-       .long   0x0000bb00
-       .long   0x00003c00
-       .long   0x00008300
-       .long   0x00005300
-       .long   0x00009900
-       .long   0x00006100
-       .long   0x00001700
-       .long   0x00002b00
-       .long   0x00000400
-       .long   0x00007e00
-       .long   0x0000ba00
-       .long   0x00007700
-       .long   0x0000d600
-       .long   0x00002600
-       .long   0x0000e100
-       .long   0x00006900
-       .long   0x00001400
-       .long   0x00006300
-       .long   0x00005500
-       .long   0x00002100
-       .long   0x00000c00
-       .long   0x00007d00
-       // Table 2.
-       .long   0x00520000
-       .long   0x00090000
-       .long   0x006a0000
-       .long   0x00d50000
-       .long   0x00300000
-       .long   0x00360000
-       .long   0x00a50000
-       .long   0x00380000
-       .long   0x00bf0000
-       .long   0x00400000
-       .long   0x00a30000
-       .long   0x009e0000
-       .long   0x00810000
-       .long   0x00f30000
-       .long   0x00d70000
-       .long   0x00fb0000
-       .long   0x007c0000
-       .long   0x00e30000
-       .long   0x00390000
-       .long   0x00820000
-       .long   0x009b0000
-       .long   0x002f0000
-       .long   0x00ff0000
-       .long   0x00870000
-       .long   0x00340000
-       .long   0x008e0000
-       .long   0x00430000
-       .long   0x00440000
-       .long   0x00c40000
-       .long   0x00de0000
-       .long   0x00e90000
-       .long   0x00cb0000
-       .long   0x00540000
-       .long   0x007b0000
-       .long   0x00940000
-       .long   0x00320000
-       .long   0x00a60000
-       .long   0x00c20000
-       .long   0x00230000
-       .long   0x003d0000
-       .long   0x00ee0000
-       .long   0x004c0000
-       .long   0x00950000
-       .long   0x000b0000
-       .long   0x00420000
-       .long   0x00fa0000
-       .long   0x00c30000
-       .long   0x004e0000
-       .long   0x00080000
-       .long   0x002e0000
-       .long   0x00a10000
-       .long   0x00660000
-       .long   0x00280000
-       .long   0x00d90000
-       .long   0x00240000
-       .long   0x00b20000
-       .long   0x00760000
-       .long   0x005b0000
-       .long   0x00a20000
-       .long   0x00490000
-       .long   0x006d0000
-       .long   0x008b0000
-       .long   0x00d10000
-       .long   0x00250000
-       .long   0x00720000
-       .long   0x00f80000
-       .long   0x00f60000
-       .long   0x00640000
-       .long   0x00860000
-       .long   0x00680000
-       .long   0x00980000
-       .long   0x00160000
-       .long   0x00d40000
-       .long   0x00a40000
-       .long   0x005c0000
-       .long   0x00cc0000
-       .long   0x005d0000
-       .long   0x00650000
-       .long   0x00b60000
-       .long   0x00920000
-       .long   0x006c0000
-       .long   0x00700000
-       .long   0x00480000
-       .long   0x00500000
-       .long   0x00fd0000
-       .long   0x00ed0000
-       .long   0x00b90000
-       .long   0x00da0000
-       .long   0x005e0000
-       .long   0x00150000
-       .long   0x00460000
-       .long   0x00570000
-       .long   0x00a70000
-       .long   0x008d0000
-       .long   0x009d0000
-       .long   0x00840000
-       .long   0x00900000
-       .long   0x00d80000
-       .long   0x00ab0000
-       .long   0x00000000
-       .long   0x008c0000
-       .long   0x00bc0000
-       .long   0x00d30000
-       .long   0x000a0000
-       .long   0x00f70000
-       .long   0x00e40000
-       .long   0x00580000
-       .long   0x00050000
-       .long   0x00b80000
-       .long   0x00b30000
-       .long   0x00450000
-       .long   0x00060000
-       .long   0x00d00000
-       .long   0x002c0000
-       .long   0x001e0000
-       .long   0x008f0000
-       .long   0x00ca0000
-       .long   0x003f0000
-       .long   0x000f0000
-       .long   0x00020000
-       .long   0x00c10000
-       .long   0x00af0000
-       .long   0x00bd0000
-       .long   0x00030000
-       .long   0x00010000
-       .long   0x00130000
-       .long   0x008a0000
-       .long   0x006b0000
-       .long   0x003a0000
-       .long   0x00910000
-       .long   0x00110000
-       .long   0x00410000
-       .long   0x004f0000
-       .long   0x00670000
-       .long   0x00dc0000
-       .long   0x00ea0000
-       .long   0x00970000
-       .long   0x00f20000
-       .long   0x00cf0000
-       .long   0x00ce0000
-       .long   0x00f00000
-       .long   0x00b40000
-       .long   0x00e60000
-       .long   0x00730000
-       .long   0x00960000
-       .long   0x00ac0000
-       .long   0x00740000
-       .long   0x00220000
-       .long   0x00e70000
-       .long   0x00ad0000
-       .long   0x00350000
-       .long   0x00850000
-       .long   0x00e20000
-       .long   0x00f90000
-       .long   0x00370000
-       .long   0x00e80000
-       .long   0x001c0000
-       .long   0x00750000
-       .long   0x00df0000
-       .long   0x006e0000
-       .long   0x00470000
-       .long   0x00f10000
-       .long   0x001a0000
-       .long   0x00710000
-       .long   0x001d0000
-       .long   0x00290000
-       .long   0x00c50000
-       .long   0x00890000
-       .long   0x006f0000
-       .long   0x00b70000
-       .long   0x00620000
-       .long   0x000e0000
-       .long   0x00aa0000
-       .long   0x00180000
-       .long   0x00be0000
-       .long   0x001b0000
-       .long   0x00fc0000
-       .long   0x00560000
-       .long   0x003e0000
-       .long   0x004b0000
-       .long   0x00c60000
-       .long   0x00d20000
-       .long   0x00790000
-       .long   0x00200000
-       .long   0x009a0000
-       .long   0x00db0000
-       .long   0x00c00000
-       .long   0x00fe0000
-       .long   0x00780000
-       .long   0x00cd0000
-       .long   0x005a0000
-       .long   0x00f40000
-       .long   0x001f0000
-       .long   0x00dd0000
-       .long   0x00a80000
-       .long   0x00330000
-       .long   0x00880000
-       .long   0x00070000
-       .long   0x00c70000
-       .long   0x00310000
-       .long   0x00b10000
-       .long   0x00120000
-       .long   0x00100000
-       .long   0x00590000
-       .long   0x00270000
-       .long   0x00800000
-       .long   0x00ec0000
-       .long   0x005f0000
-       .long   0x00600000
-       .long   0x00510000
-       .long   0x007f0000
-       .long   0x00a90000
-       .long   0x00190000
-       .long   0x00b50000
-       .long   0x004a0000
-       .long   0x000d0000
-       .long   0x002d0000
-       .long   0x00e50000
-       .long   0x007a0000
-       .long   0x009f0000
-       .long   0x00930000
-       .long   0x00c90000
-       .long   0x009c0000
-       .long   0x00ef0000
-       .long   0x00a00000
-       .long   0x00e00000
-       .long   0x003b0000
-       .long   0x004d0000
-       .long   0x00ae0000
-       .long   0x002a0000
-       .long   0x00f50000
-       .long   0x00b00000
-       .long   0x00c80000
-       .long   0x00eb0000
-       .long   0x00bb0000
-       .long   0x003c0000
-       .long   0x00830000
-       .long   0x00530000
-       .long   0x00990000
-       .long   0x00610000
-       .long   0x00170000
-       .long   0x002b0000
-       .long   0x00040000
-       .long   0x007e0000
-       .long   0x00ba0000
-       .long   0x00770000
-       .long   0x00d60000
-       .long   0x00260000
-       .long   0x00e10000
-       .long   0x00690000
-       .long   0x00140000
-       .long   0x00630000
-       .long   0x00550000
-       .long   0x00210000
-       .long   0x000c0000
-       .long   0x007d0000
-       // Table 3.
-       .long   0x52000000
-       .long   0x09000000
-       .long   0x6a000000
-       .long   0xd5000000
-       .long   0x30000000
-       .long   0x36000000
-       .long   0xa5000000
-       .long   0x38000000
-       .long   0xbf000000
-       .long   0x40000000
-       .long   0xa3000000
-       .long   0x9e000000
-       .long   0x81000000
-       .long   0xf3000000
-       .long   0xd7000000
-       .long   0xfb000000
-       .long   0x7c000000
-       .long   0xe3000000
-       .long   0x39000000
-       .long   0x82000000
-       .long   0x9b000000
-       .long   0x2f000000
-       .long   0xff000000
-       .long   0x87000000
-       .long   0x34000000
-       .long   0x8e000000
-       .long   0x43000000
-       .long   0x44000000
-       .long   0xc4000000
-       .long   0xde000000
-       .long   0xe9000000
-       .long   0xcb000000
-       .long   0x54000000
-       .long   0x7b000000
-       .long   0x94000000
-       .long   0x32000000
-       .long   0xa6000000
-       .long   0xc2000000
-       .long   0x23000000
-       .long   0x3d000000
-       .long   0xee000000
-       .long   0x4c000000
-       .long   0x95000000
-       .long   0x0b000000
-       .long   0x42000000
-       .long   0xfa000000
-       .long   0xc3000000
-       .long   0x4e000000
-       .long   0x08000000
-       .long   0x2e000000
-       .long   0xa1000000
-       .long   0x66000000
-       .long   0x28000000
-       .long   0xd9000000
-       .long   0x24000000
-       .long   0xb2000000
-       .long   0x76000000
-       .long   0x5b000000
-       .long   0xa2000000
-       .long   0x49000000
-       .long   0x6d000000
-       .long   0x8b000000
-       .long   0xd1000000
-       .long   0x25000000
-       .long   0x72000000
-       .long   0xf8000000
-       .long   0xf6000000
-       .long   0x64000000
-       .long   0x86000000
-       .long   0x68000000
-       .long   0x98000000
-       .long   0x16000000
-       .long   0xd4000000
-       .long   0xa4000000
-       .long   0x5c000000
-       .long   0xcc000000
-       .long   0x5d000000
-       .long   0x65000000
-       .long   0xb6000000
-       .long   0x92000000
-       .long   0x6c000000
-       .long   0x70000000
-       .long   0x48000000
-       .long   0x50000000
-       .long   0xfd000000
-       .long   0xed000000
-       .long   0xb9000000
-       .long   0xda000000
-       .long   0x5e000000
-       .long   0x15000000
-       .long   0x46000000
-       .long   0x57000000
-       .long   0xa7000000
-       .long   0x8d000000
-       .long   0x9d000000
-       .long   0x84000000
-       .long   0x90000000
-       .long   0xd8000000
-       .long   0xab000000
-       .long   0x00000000
-       .long   0x8c000000
-       .long   0xbc000000
-       .long   0xd3000000
-       .long   0x0a000000
-       .long   0xf7000000
-       .long   0xe4000000
-       .long   0x58000000
-       .long   0x05000000
-       .long   0xb8000000
-       .long   0xb3000000
-       .long   0x45000000
-       .long   0x06000000
-       .long   0xd0000000
-       .long   0x2c000000
-       .long   0x1e000000
-       .long   0x8f000000
-       .long   0xca000000
-       .long   0x3f000000
-       .long   0x0f000000
-       .long   0x02000000
-       .long   0xc1000000
-       .long   0xaf000000
-       .long   0xbd000000
-       .long   0x03000000
-       .long   0x01000000
-       .long   0x13000000
-       .long   0x8a000000
-       .long   0x6b000000
-       .long   0x3a000000
-       .long   0x91000000
-       .long   0x11000000
-       .long   0x41000000
-       .long   0x4f000000
-       .long   0x67000000
-       .long   0xdc000000
-       .long   0xea000000
-       .long   0x97000000
-       .long   0xf2000000
-       .long   0xcf000000
-       .long   0xce000000
-       .long   0xf0000000
-       .long   0xb4000000
-       .long   0xe6000000
-       .long   0x73000000
-       .long   0x96000000
-       .long   0xac000000
-       .long   0x74000000
-       .long   0x22000000
-       .long   0xe7000000
-       .long   0xad000000
-       .long   0x35000000
-       .long   0x85000000
-       .long   0xe2000000
-       .long   0xf9000000
-       .long   0x37000000
-       .long   0xe8000000
-       .long   0x1c000000
-       .long   0x75000000
-       .long   0xdf000000
-       .long   0x6e000000
-       .long   0x47000000
-       .long   0xf1000000
-       .long   0x1a000000
-       .long   0x71000000
-       .long   0x1d000000
-       .long   0x29000000
-       .long   0xc5000000
-       .long   0x89000000
-       .long   0x6f000000
-       .long   0xb7000000
-       .long   0x62000000
-       .long   0x0e000000
-       .long   0xaa000000
-       .long   0x18000000
-       .long   0xbe000000
-       .long   0x1b000000
-       .long   0xfc000000
-       .long   0x56000000
-       .long   0x3e000000
-       .long   0x4b000000
-       .long   0xc6000000
-       .long   0xd2000000
-       .long   0x79000000
-       .long   0x20000000
-       .long   0x9a000000
-       .long   0xdb000000
-       .long   0xc0000000
-       .long   0xfe000000
-       .long   0x78000000
-       .long   0xcd000000
-       .long   0x5a000000
-       .long   0xf4000000
-       .long   0x1f000000
-       .long   0xdd000000
-       .long   0xa8000000
-       .long   0x33000000
-       .long   0x88000000
-       .long   0x07000000
-       .long   0xc7000000
-       .long   0x31000000
-       .long   0xb1000000
-       .long   0x12000000
-       .long   0x10000000
-       .long   0x59000000
-       .long   0x27000000
-       .long   0x80000000
-       .long   0xec000000
-       .long   0x5f000000
-       .long   0x60000000
-       .long   0x51000000
-       .long   0x7f000000
-       .long   0xa9000000
-       .long   0x19000000
-       .long   0xb5000000
-       .long   0x4a000000
-       .long   0x0d000000
-       .long   0x2d000000
-       .long   0xe5000000
-       .long   0x7a000000
-       .long   0x9f000000
-       .long   0x93000000
-       .long   0xc9000000
-       .long   0x9c000000
-       .long   0xef000000
-       .long   0xa0000000
-       .long   0xe0000000
-       .long   0x3b000000
-       .long   0x4d000000
-       .long   0xae000000
-       .long   0x2a000000
-       .long   0xf5000000
-       .long   0xb0000000
-       .long   0xc8000000
-       .long   0xeb000000
-       .long   0xbb000000
-       .long   0x3c000000
-       .long   0x83000000
-       .long   0x53000000
-       .long   0x99000000
-       .long   0x61000000
-       .long   0x17000000
-       .long   0x2b000000
-       .long   0x04000000
-       .long   0x7e000000
-       .long   0xba000000
-       .long   0x77000000
-       .long   0xd6000000
-       .long   0x26000000
-       .long   0xe1000000
-       .long   0x69000000
-       .long   0x14000000
-       .long   0x63000000
-       .long   0x55000000
-       .long   0x21000000
-       .long   0x0c000000
-       .long   0x7d000000
diff --git a/bsd/crypto/aes/i386/EncryptDecrypt.s b/bsd/crypto/aes/i386/EncryptDecrypt.s
deleted file mode 100644 (file)
index 6a6147a..0000000
+++ /dev/null
@@ -1,607 +0,0 @@
-/*     This file defines _aes_encrypt or _aes_decrypt, according to the value of
-       the Select preprocessor symbol.  This file is designed to be included in
-       another assembly file using the preprocessor #include directive, to benefit
-       from some assembly-time calculations.
-
-       These two routines are nearly identical.  They differ only in the tables
-       they use, the direction they iterate through the key, and the permutation
-       performed on part of the state.
-
-       Written by Eric Postpischil, January 2008.
-*/
-
-/* add AES HW detection and HW-specific program branch cclee 3-12-10 */
-#ifdef KERNEL
-#include <i386/cpu_capabilities.h>
-#else
-#include <System/i386/cpu_capabilities.h>
-#endif
-
-#if Select == 0
-       #define Name            _aes_encrypt                            // Routine name.
-       #define MTable          _AESEncryptTable                        // Main table.
-       #define FTable          _AESSubBytesWordTable           // Final table.
-       #define P0                      S0                                                      // State permutation.
-       #define P1                      S1
-       #define P2                      S2
-       #define P3                      S3
-       #define Increment       +16                                                     // ExpandedKey increment.
-#elif Select == 1
-       #define Name            _aes_decrypt                            // Routine name.
-       #define MTable          _AESDecryptTable                        // Main table.
-       #define FTable          _AESInvSubBytesWordTable        // Final table.
-       #define P0                      S2                                                      // State permutation.
-       #define P1                      S3
-       #define P2                      S0
-       #define P3                      S1
-       #define Increment       -16                                                     // ExpandedKey increment.
-#elif Select == 2
-       #define Name            _aes_encrypt_xmm_no_save        // Routine name.
-       #define MTable          _AESEncryptTable                        // Main table.
-       #define FTable          _AESSubBytesWordTable           // Final table.
-       #define P0                      S0                                                      // State permutation.
-       #define P1                      S1
-       #define P2                      S2
-       #define P3                      S3
-       #define Increment       +16                                                     // ExpandedKey increment.
-#elif Select == 3
-       #define Name            _aes_decrypt_xmm_no_save        // Routine name.
-       #define MTable          _AESDecryptTable                        // Main table.
-       #define FTable          _AESInvSubBytesWordTable        // Final table.
-       #define P0                      S2                                                      // State permutation.
-       #define P1                      S3
-       #define P2                      S0
-       #define P3                      S1
-       #define Increment       -16                                                     // ExpandedKey increment.
-#endif // Select
-
-
-/*     Routine:
-
-               _AESEncryptWithExpandedKey (if Select is 0) or
-               _AESDecryptWithExpandedKey (if Select is 1).
-
-       Function:
-
-               Perform the AES cipher or its inverse as defined in Federal Information
-               Processing Standards Publication 197 (FIPS-197), November 26, 2001.
-
-               The inverse cipher here is the "Equivalent Inverse Cipher" in FIPS-197.
-
-       Input:
-
-               Constant data:
-
-                       The following names must be locally defined so the assembler
-                       can calculate certain offsets.
-                               
-                       For encryption:
-
-                               static const Word _AESEncryptTable[4][256].
-
-                                       _AESEncryptTable[i] contains the tables T[i] defined in AES
-                                       Proposal: Rijndael, version 2, 03/09/99, by Joan Daemen and
-                                       Vincent Rijmen, section 5.2.1, page 18.  These tables
-                                       combine the SubBytes and MixColumns operations.
-
-                               static const Word _AESSubBytesWordTable[256].
-
-                                       _AESSubBytesWordTable[i][j] = SubBytes(j) << 8*i, where
-                                       SubBytes is defined in FIPS-197.  _AESSubBytesWordTable
-                                       differs from _AESEncryptTable in that it does not include
-                                       the MixColumn operation.  It is used in performing the last
-                                       round, which differs fromm the previous rounds in that it
-                                       does not include the MixColumn operation.
-
-                       For decryption:
-
-                               static const Word _AESDecryptTable[4][256].
-
-                                       The analog of _AESEncryptTable for decryption.
-
-                               static const Word _AESSubBytesWordTable[256].
-
-                                       _AESInvSubBytesWordTable[i][j] = InvSubBytes(j) << 8*i,
-                                       where InvSubBytes is defined in FIPS-197.
-                                       _AESInvSubBytesWordTable differs from _AESDecryptTable in
-                                       that it does not include the InvMixColumn operation.  It is
-                                       used in performing the last round, which differs from the
-                                       previous rounds in that it does not include the
-                                       InvMixColumn operation.
-
-               Arguments:
-
-                       const Byte *InputText.
-
-                               Address of input, 16 bytes.  Best if four-byte aligned.
-
-                       Byte *OutputText.
-
-                               Address of output, 16 bytes.  Best if four-byte aligned.
-
-                       aes_encrypt_ctx *Context or aes_decrypt_ctx *Context
-
-                               aes_encrypt_ctx and aes_decrypt_ctx are identical except the
-                               former is used for encryption and the latter for decryption.
-
-                               Each is a structure containing the expanded key beginning at
-                               offset ContextKey and a four-byte "key length" beginning at
-                               offset ContextKeyLength.  The "key length" is the number of
-                               bytes from the start of the first round key to the start of the
-                               last round key.  That is 16 less than the number of bytes in
-                               the entire key.
-
-       Output:
-
-               Encrypted or decrypted data is written to *OutputText.
-
-       Return:
-
-               aes_rval        // -1 if "key length" is invalid.  0 otherwise.
-*/
-
-       .text
-       .globl Name
-Name:
-
-    // detect AES HW, cclee 3-13-10
-#if Select < 2                                                                                         // only for aes_encrypt/aes_decrypt
-#if defined __x86_64__
-    movq    __cpu_capabilities@GOTPCREL(%rip), %rax                    // %rax -> __cpu_capabilities
-    mov     (%rax), %eax                                                                       // %eax = __cpu_capabilities
-#else
-#if defined    KERNEL
-    leal    __cpu_capabilities, %eax                                           // %eax -> __cpu_capabilities
-    mov     (%eax), %eax                                                                       // %eax = __cpu_capabilities
-#else
-       mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
-#endif
-#endif
-    test    $(kHasAES), %eax                                                           // __cpu_capabilities & kHasAES
-#if Select == 0
-    jne     _aes_encrypt_hw                                                                    // if AES HW detected, branch to HW specific code
-#else
-    jne     _aes_decrypt_hw                                                                    // if AES HW detected, branch to HW specific code
-#endif
-#endif         // Select
-
-       // Push new stack frame.
-       push    r5
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (3*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       4 (i386) or 0 (x86_64) bytes for ExpandedKeyEnd.
-
-                       5 (i386) or 3 (x86_64) 16-byte spaces to save XMM registers.
-       */
-       #define LocalsSize      (Arch(4, 0) + Arch(5, 3)*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-#ifdef KERNEL
-#if    Select < 2
-       // Save XMM registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-#if defined __i386__
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-#endif
-#endif // Select
-#endif // KERNEL
-
-#if defined __i386__
-
-       // Number of bytes from caller's stack pointer to ours.
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Define location of argument i (presuming 4-byte arguments).
-       #define Argument(i)     StackFrame+4*(i)(%esp)
-
-       #define ArgInputText    Argument(0)
-       #define ArgOutputText   Argument(1)
-       #define ArgContext              Argument(2)
-
-#elif defined __x86_64__
-
-       // Arguments.
-       #define InputText               r7      // Used early then overwritten for other use.
-       #define OutputText              r6      // Needed near end of routine.
-       #define ArgContext              r2
-               /*      The argument passed in r2 overlaps registers we need for other
-                       work, so it must be moved early in the routine.
-               */
-
-#endif
-
-#define        BaseP           Arch(r6, r9)    // Base pointer for addressing global data.
-#define        ExpandedKey     Arch(t0, r10)   // Address of expanded key.
-
-/*     The Work registers defined below are used to hold parts of the AES state
-       while we dissect or assemble it.  They must be assigned to the A, B, C, and
-       D registers so that we can access the bytes in %al, %ah, and so on.
-*/
-#define        Work0d  r0d
-#define        Work0l  r0l
-#define        Work0h  r0h
-#define        Work1d  r3d
-#define        Work1l  r3l
-#define        Work1h  r3h
-#define        Work2d  r1d
-#define        Work2l  r1l
-#define        Work2h  r1h
-#define        Work3d  r2d
-#define        Work3l  r2l
-#define        Work3h  r2h
-
-#define        t0              r5
-#define        t0d             r5d             // Low 32 bits of t0.
-#define        t0l             r5l             // Low byte of t0.
-
-#define        t1              r7
-
-/*     S0, S1, S2, and S3 are where we assemble the new AES state when computing
-       a regular round.  S1, S2, and S3 are assigned to the Work registers, but
-       S0 needs to go somewhere else because Work0 holds part of the old state.
-*/
-#define        S0              Arch(t1, r8d)
-#define        S1              Work1d
-#define        S2              Work2d
-#define        S3              Work3d
-
-/*     These XMM registers are used as holding space, because it is faster to
-       spill to these registers than to the stack.  (On x86_64, we do not need
-       to spill, because there are additional general registers available.
-       However, using more general registers requires saving them to the stack
-       and restoring them.  I timed it, and no time was saved.)
-*/
-#define        vS1             %xmm0
-#define        vS2             %xmm1
-#define        vS3             %xmm2
-#if defined __i386__
-       #define vExpandedKey    %xmm3
-       #define vIncrement              %xmm4
-#endif
-
-       // Get address of expanded key.
-       mov     ArgContext, ExpandedKey
-       #if 0 != ContextKey
-               add             $ContextKey, ExpandedKey
-       #endif
-
-/*     Store sentinel value of ExpandedKey on the stack on i386, a register on
-       x86_64.
-*/
-#define        ExpandedKeyEnd  Arch(5*16(r4), r11)
-
-       // Get and check "key length".
-       movzx   ContextKeyLength(ExpandedKey), r0
-       cmp             $160, r0
-       je              2f
-       cmp             $192, r0
-       je              2f
-       cmp             $224, r0
-       je              2f
-       mov             $-1, r0         // Return error.
-       jmp             9f
-2:
-
-       #if (Select == 0 || Select == 2)
-               // For encryption, prepare to iterate forward through expanded key.
-               add             ExpandedKey, r0
-               mov             r0, ExpandedKeyEnd
-       #else
-               // For decryption, prepare to iterate backward through expanded key.
-               mov             ExpandedKey, ExpandedKeyEnd
-               add             r0, ExpandedKey
-       #endif
-
-       // Initialize State from input text.
-       #if defined __i386__
-               mov             ArgInputText, BaseP
-               #define InputText       BaseP
-       #endif
-       mov             0*4(InputText), Work0d
-       mov             1*4(InputText), S1
-       mov             2*4(InputText), S2
-       mov             3*4(InputText), S3
-#undef InputText       // Register is reused after this for other purposes.
-
-       // Add round key and save results.
-       xor             0*4(ExpandedKey), Work0d        // S0 is in dissection register.
-       xor             1*4(ExpandedKey), S1
-       movd    S1, vS1                                         // Save S1 to S3 in vector registers.
-       xor             2*4(ExpandedKey), S2
-       movd    S2, vS2
-       xor             3*4(ExpandedKey), S3
-       movd    S3, vS3
-
-       add             $Increment, ExpandedKey          // Advance to next round key.
-
-       #if defined __i386__
-               // Save expanded key address and increment in vector registers.
-               mov             $Increment, t1
-               movp    ExpandedKey, vExpandedKey
-               movp    t1, vIncrement
-       #endif
-
-       // Set up relative addressing.
-       #if defined __i386__
-
-               // Get address of 0 in BaseP.
-                       call    0f                              // Push program counter onto stack.
-               0:
-                       pop             BaseP                   // Get program counter.
-
-               // Define macros to help address data.
-#define        LookupM(table, index)   MTable-0b+(table)*TableSize(BaseP, index, 4)
-#define LookupF(table, index)  FTable-0b+(table)*TableSize(BaseP, index, 4)
-
-       #elif defined __x86_64__
-
-               lea     MTable(%rip), BaseP
-
-               // Define macros to help address data.
-               #define LookupM(table, index)   (table)*TableSize(BaseP, index, 4)
-               #define LookupF(table, index)   (table)*TableSize(BaseP, index, 4)
-
-/*     With these definitions of LookupM and LookupF, BaseP must be loaded with
-       the address of the table at the point where it is used.  So we need an
-       instruction to change BaseP after we are done with MTable and before we
-       start using FTable.  I would prefer to use something like:
-
-               .set    FMinusM, FTable - MTable
-               #define LookupF(table, index)   \
-                       FMinusM+(table)*TableSize(BaseP, index, 4)
-
-       Then BaseP would not need to change.  However, this fails due to an
-       assembler/linker bug, <rdar://problem/5683882>.
-*/
-
-       #endif
-
-       // Get round key.
-       mov             0*4(ExpandedKey), S0
-       mov             1*4(ExpandedKey), S1
-       mov             2*4(ExpandedKey), S2
-       mov             3*4(ExpandedKey), S3
-
-1:
-       /*      Word 0 of the current state must be in Work0 now, and the next round
-               key must be in S0 to S3.
-       */
-
-       // Process previous S0.
-       movzx   Work0l, t0
-       xor             LookupM(0, t0), S0
-       movzx   Work0h, t0d
-       xor             LookupM(1, t0), P3
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupM(2, t0), S2
-       movzx   Work0h, t0d
-       xor             LookupM(3, t0), P1
-
-       // Process previous S1.
-       movd    vS1, Work0d
-       movzx   Work0l, t0d
-       xor             LookupM(0, t0), S1
-       movzx   Work0h, t0d
-       xor             LookupM(1, t0), P0
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupM(2, t0), S3
-       movzx   Work0h, t0d
-       xor             LookupM(3, t0), P2
-
-       // Process previous S2.
-       movd    vS2, Work0d
-       movzx   Work0l, t0d
-       xor             LookupM(0, t0), S2
-       movzx   Work0h, t0d
-       xor             LookupM(1, t0), P1
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupM(2, t0), S0
-       movzx   Work0h, t0d
-       xor             LookupM(3, t0), P3
-
-       // Process previous S3.
-       movd    vS3, Work0d
-       movzx   Work0l, t0d
-       xor             LookupM(0, t0), S3
-       movzx   Work0h, t0d
-       xor             LookupM(1, t0), P2
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupM(2, t0), S1
-       movzx   Work0h, t0d
-       xor             LookupM(3, t0), P0
-
-       #if defined __i386__
-               paddd   vIncrement, vExpandedKey
-               movp    vExpandedKey, ExpandedKey
-       #else
-               add             $Increment, ExpandedKey
-       #endif
-
-       // Save state for next iteration and load next round key.
-       mov             S0, Work0d
-       mov             0*4(ExpandedKey), S0
-       movd    S1, vS1
-       mov             1*4(ExpandedKey), S1
-       movd    S2, vS2
-       mov             2*4(ExpandedKey), S2
-       movd    S3, vS3
-       mov             3*4(ExpandedKey), S3
-
-       cmp             ExpandedKeyEnd, ExpandedKey
-       jne             1b
-
-       /*      Word 0 of the current state must be in Work0 now, and the next round
-               key must be in S0 to S3.
-       */
-
-       // Work around assembler bug.  See comments above about Radar 5683882.
-       #if defined __x86_64__
-               lea     FTable(%rip), BaseP
-       #endif
-
-       // Process previous S0.
-       movzx   Work0l, t0
-       xor             LookupF(0, t0), S0
-       movzx   Work0h, t0d
-       xor             LookupF(1, t0), P3
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupF(2, t0), S2
-       movzx   Work0h, t0d
-       xor             LookupF(3, t0), P1
-
-       // Process previous S1.
-       movd    vS1, Work0d
-       movzx   Work0l, t0d
-       xor             LookupF(0, t0), S1
-       movzx   Work0h, t0d
-       xor             LookupF(1, t0), P0
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupF(2, t0), S3
-       movzx   Work0h, t0d
-       xor             LookupF(3, t0), P2
-
-       // Process previous S2.
-       movd    vS2, Work0d
-       movzx   Work0l, t0d
-       xor             LookupF(0, t0), S2
-       movzx   Work0h, t0d
-       xor             LookupF(1, t0), P1
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupF(2, t0), S0
-       movzx   Work0h, t0d
-       xor             LookupF(3, t0), P3
-
-       // Process previous S3.
-       movd    vS3, Work0d
-       movzx   Work0l, t0d
-       xor             LookupF(0, t0), S3
-       movzx   Work0h, t0d
-       xor             LookupF(1, t0), P2
-       shr             $16, Work0d
-       movzx   Work0l, t0d
-       xor             LookupF(2, t0), S1
-       movzx   Work0h, t0d
-       xor             LookupF(3, t0), P0
-
-       #if defined __i386__    // Architecture.
-               // Get OutputText address.
-               #define OutputText      BaseP
-               mov             ArgOutputText, OutputText
-       #endif  // Architecture.
-
-       // Write output.
-       mov             S0, 0*4(OutputText)
-       mov             S1, 1*4(OutputText)
-       mov             S2, 2*4(OutputText)
-       mov             S3, 3*4(OutputText)
-
-       xor             r0, r0          // Return success.
-
-9:
-       // Pop stack and restore registers.
-#ifdef KERNEL
-#if    Select < 2
-#if defined __i386__
-       movaps  4*16(r4), %xmm4
-       movaps  3*16(r4), %xmm3
-#endif
-       movaps  2*16(r4), %xmm2
-       movaps  1*16(r4), %xmm1
-       movaps  0*16(r4), %xmm0
-#endif // Select
-#endif // KERNEL
-       #if 0 < LocalsSize
-               add             $Padding + LocalsSize, r4
-       #endif
-       #if defined __i386__
-               pop             r7
-               pop             r6
-       #elif defined __x86_64__
-       #endif
-       pop             r3
-       pop             r5
-
-       ret
-
-
-#undef ArgExpandedKey
-#undef ArgInputText
-#undef ArgNr
-#undef ArgOutputText
-#undef Argument
-#undef BaseP
-#undef ExpandedKey
-#undef ExpandedKeyEnd
-#undef FTable
-#undef InputText
-#undef LocalsSize
-#undef LookupM
-#undef LookupF
-#undef MTable
-#undef OutputText
-#undef Padding
-#undef SaveSize
-#undef S0
-#undef S1
-#undef S2
-#undef S3
-#undef StackFrame
-#undef Work0d
-#undef Work0h
-#undef Work0l
-#undef Work1d
-#undef Work1h
-#undef Work1l
-#undef Work2d
-#undef Work2h
-#undef Work2l
-#undef Work3d
-#undef Work3h
-#undef Work3l
-#undef t0
-#undef t0d
-#undef t0l
-#undef t1
-#undef vExpandedKey
-#undef vS1
-#undef vS2
-#undef vS3
-
-#undef Name
-#undef MTable
-#undef FTable
-#undef P0
-#undef P1
-#undef P2
-#undef P3
-#undef Increment
diff --git a/bsd/crypto/aes/i386/ExpandKeyForDecryption.s b/bsd/crypto/aes/i386/ExpandKeyForDecryption.s
deleted file mode 100644 (file)
index 457508a..0000000
+++ /dev/null
@@ -1,1214 +0,0 @@
-/*     This file defines _aes_decrypt_key, _aes_decrypt_key128,
-       _aes_decrypt_key192, and _aes_decrypt_key256.  It is designed to be
-       included in another assembly file with the preprocessor #include directive,
-       to benefit from some assembly-time calculations.
-
-       Written by Eric Postpischil, January 2008.
-
-       The comments here do not say much about the algorithm; the code just
-       follows the FIPS-197 specification.  I recommend reading the specification
-       before working with this code or examining the C code in the parent
-       directory that illustrates key expansion.
-
-       One complication is that this routine both expands the key and applies
-       InvMixColumn to most of the words in the expanded key.  This modifies the
-       key for use with the Equivalent Inverse Cipher.
-
-       During key expansion, there are sequences of four or six words that are
-       produced like this:
-
-               E[i+0] = E[i+0-Nk] ^ f(E[i-1]), where f is some function.
-               E[i+1] = E[i+1-Nk] ^ E[i+0].
-               E[i+2] = E[i+2-Nk] ^ E[i+1].
-               E[i+3] = E[i+3-Nk] ^ E[i+2].
-
-       When Nk is four or eight, the sequence stops there.  When it is six, it
-       goes on for two more words.  Let I be the InvMixColumn function.  for the
-       Equivalent Inverse Cipher, we want to store I(E[i+0]), I(E[i+1]),
-       I(E[i+2]), I(E[i+3]) (and two more when Nk is six).  However, we do not
-       need to calculate I four times.  In AES' finite field, I is a linear
-       combination of the four bytes of its input.  The ^ operation on the bits
-       that represent field elements is an addition in the Galois field.  So
-       I(a ^ b) = I(a) ^ I(b).  Then we have:
-
-               I(E[i+0]) = I(E[i+0-Nk] ^ f(E[i-1])) = I(E[i+0-Nk]) ^ I(f(E[i-1])).
-               I(E[i+1]) = I(E[i+1-Nk]) ^ I(E[i+0]).
-               I(E[i+2]) = I(E[i+2-Nk]) ^ I(E[i+1]).
-               I(E[i+3]) = I(E[i+3-Nk]) ^ I(E[i+2]).
-
-       To compute this, we compute I(f(E[i-1])) and XOR it with the previously
-       stored E[i+0-Nk])) to get I(E[i+0])).  Then we XOR that with the previously
-       stored E[i+1-Nk])) to get I(E[i+1])), and so on.
-
-       Note that to compute I(f(E[i-1])), we need to have E[i-1].  So we have to
-       compute the pre-InvMixColumn words of the expanded key; it is not
-       sufficient to have the post-InvMixColumn words.
-*/
-
-
-/*     Routine:
-
-               _aes_decrypt_key.
-
-               _aes_decrypt_key128, _aes_decrypt_key192, and _aes_decrypt_key256.
-
-       Function:
-
-               Expand the user's cipher key into the key schedule, as defined in
-               Federal Information Processing Standards Publication 197 (FIPS-197),
-               November 26, 2001.
-
-               For decryption, the key is modified as shown in Figure 15 in FIPS-197,
-               to support the Equivalent Inverse Cipher.
-
-       Input:
-
-               Constant data:
-
-                       The following names must be locally defined so the assembler
-                       can calculate certain offsets.
-
-                       static const Word _AESSubBytesWordTable[4][256].
-
-                               _AESSubBytesWordTable[i][j] = SubBytes(j) << 8*i, where
-                               SubBytes is defined in FIPS-197.  _AESSubBytesWordTable
-                               differs from _AESEncryptTable in that it does not include
-                               the MixColumn operation.  It is used in performing the last
-                               round, which differs fromm the previous rounds in that it
-                               does not include the MixColumn operation.
-
-                       static const Word _AESSInvMixColumnTable[4][256].
-
-                               _AESInvMixColumnTable[i][j] contains the contribution of byte
-                               j to element i of the InvMixColumn operation.
-
-                               The four bytes of the word _AESInvMixColumnTable[0][j] are:
-
-                                       {0xe}*{j}, {0x9}*{j}, {0xd}*{j}, {0xb}*{j},
-
-                               listed in increasing address order, where multiplication is
-                               performed in the Galois field.  {j} designates the element of
-                               the Galois field represented by j.  _AESInvMixColumn[i][j] has
-                               the same bytes, rotated right in the order shown above.
-
-                       static const Byte _AESRcon[].
-
-                               Round constants, beginning with AESRcon[1] for the first round
-                               (AESRcon[0] is padding.)
-       
-               Arguments:
-
-                       const uint8_t *Key
-
-                               Address of user's cipher key.
-
-                       int Length
-
-                               Number of bytes (16, 24, or 32) or bits (128, 192, or 256) in
-                               user's cipher key.
-
-                               This argument is used with _aes_decrypt_key.  It is not
-                               present for the other routines.  In those routines, Context
-                               is the second argument.
-
-                       aes_decrypt_ctx *Context
-
-                               Structure to contain the expanded key beginning at offset
-                               ContextKey and a four-byte "key length" beginning at offset
-                               ContextKeyLength.  The "key length" is the number of bytes from
-                               the start of the first round key to the startof the last rond
-                               key.  That is 16 less than the number of bytes in the entire
-                               key.
-
-       Output:
-
-               The expanded key and the "key length" are written to *Context.
-
-       Return:
-
-               aes_rval        // -1 if "key length" is invalid.  0 otherwise.
-*/
-/* add AES HW detection and program branch if AES HW is detected cclee 3-12-10 */
-
-#ifdef KERNEL
-#include <i386/cpu_capabilities.h>
-#else
-#include <System/i386/cpu_capabilities.h>
-#endif
-
-#define        dr              r0d                             // Dissection register.
-#define        drl             r0l                             // Low 8 bits of dissection register.
-#define        drh             r0h                             // Second-lowest 8 bits of dissection register.
-
-#define        t0              r1
-#define        t0d             r1d                             // Low 32 bits of t0.
-
-#define        STable  r2                              // Address of SubBytes table.  Overlaps Nk.
-#define        ITable  r3                              // Address of InvMixColumn table.
-#define        offset  Arch(r5, r11)   // Address offset and loop sentinel.
-
-#define        R               r7                              // Address of round constant.
-#define        K               r7                              // User key pointer.
-       // R and K overlap.
-
-#define        E               r6                              // Expanded key pointer.
-
-#define        ve0             %xmm0
-#define        ve1             %xmm1
-#define        ve2             %xmm2
-#define        ve3             %xmm3
-#define        ve4             %xmm4
-#define        ve5             %xmm5
-#define        vt1             %xmm6
-#define        vt0             %xmm7
-
-#define        LookupS(table, index)   (table)*TableSize(STable, index, 4)
-#define        LookupI(table, index)   (table)*TableSize(ITable, index, 4)
-
-
-/*     InvMixColumn puts InvMixColumn(dr) into vt0.  This is a non-standard
-       subroutine.  It does not conform to the ABI.  It is an integral part of
-       _ExpandKeyForDecryption and shares register use with it.
-*/
-InvMixColumn:
-       movzx   drl, t0
-       movd    LookupI(0, t0), vt0             // Look up byte 0 in table 0.
-       movzx   drh, t0d
-       movd    LookupI(1, t0), vt1             // Look up byte 1 in table 1.
-       pxor    vt1, vt0
-       shr             $16, dr
-       movzx   drl, t0d
-       movd    LookupI(2, t0), vt1             // Look up byte 2 in table 2.
-       pxor    vt1, vt0
-       movzx   drh, t0d
-       movd    LookupI(3, t0), vt1             // Look up byte 3 in table 3.
-       pxor    vt1, vt0
-       ret
-
-
-       // SubWordRotWord adds (XORs) SubWord(RotWord(dr)) to vt0.
-       .macro  SubWordRotWord
-               movzx   drl, t0
-               movd    LookupS(3, t0), vt1             // Look up byte 0 in table 3.
-               pxor    vt1, vt0
-               movzx   drh, t0d
-               movd    LookupS(0, t0), vt1             // Look up byte 1 in table 0.
-               pxor    vt1, vt0
-               shr             $$16, dr
-               movzx   drl, t0d
-               movd    LookupS(1, t0), vt1             // Look up byte 2 in table 1.
-               pxor    vt1, vt0
-               movzx   drh, t0d
-               movd    LookupS(2, t0), vt1             // Look up byte 3 in table 2.
-               pxor    vt1, vt0
-       .endmacro
-
-
-       // SubWord puts SubWord(dr) into vt0.
-       .macro  SubWord
-               movzx   drl, t0
-               movd    LookupS(0, t0), vt0             // Look up byte 0 in table 0.
-               movzx   drh, t0d
-               movd    LookupS(1, t0), vt1             // Look up byte 1 in table 1.
-               pxor    vt1,vt0
-               shr             $$16, dr
-               movzx   drl, t0d
-               movd    LookupS(2, t0), vt1             // Look up byte 2 in table 2.
-               pxor    vt1,vt0
-               movzx   drh, t0d
-               movd    LookupS(3, t0), vt1             // Look up byte 3 in table 3.
-               pxor    vt1,vt0
-       .endmacro
-
-       .text
-       .globl _aes_decrypt_key
-//     .private_extern _aes_decrypt_key
-_aes_decrypt_key:
-
-       // detect AES HW, cclee 3-13-10
-#if defined __x86_64__
-    movq    __cpu_capabilities@GOTPCREL(%rip), %rax                            // %rax -> __cpu_capabilities
-    mov     (%rax), %eax                                                                               // %eax  = __cpu_capabilities
-#else
-#if defined    KERNEL
-    leal    __cpu_capabilities, %eax                                                   // %eax -> __cpu_capabilities
-    mov     (%eax), %eax                                                                               // %eax  = __cpu_capabilities
-#else
-       mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
-#endif
-
-#endif
-    test    $(kHasAES), %eax                                                                   // __cpu_capabilities & kHasAES
-    jne     _aes_decrypt_key_hw                                                                        // if AES HW detected, branch to _aes_decrypt_key_hw
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-
-                       8 four-byte spaces for work.
-       */
-       #define LocalsSize      (8*16 + 8*4)
-
-       // Define stack offset to storage space for local data.
-       #define Local   (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-#if defined __i386__
-
-       // Define location of argument i.
-       #define Argument(i)     StackFrame+4*(i)(r4)
-
-       #define Nk              t0d
-
-       // Load arguments.
-       mov             Argument(2), E
-       mov             Argument(1), Nk
-       mov             Argument(0), K
-
-#elif defined __x86_64__
-
-       #define Nk              r9d                     // Number of words in key.
-       mov             r6d, Nk                         // Move Nk argument out of way.
-       mov             r2, E                           // Move E argument to common register.
-
-#endif
-
-       // Dispatch on key length.
-       cmp             $128, Nk
-       jge             2f
-       shl             $3, Nk                          // Convert from bytes to bits.
-       cmp             $128, Nk
-2:
-       je              DKeyHas4Words
-       cmp             $192, Nk
-       je              DKeyHas6Words
-       cmp             $256, Nk
-       je              DKeyHas8Words
-       mov             $-1, r0                         // Return error.
-       jmp             9f
-
-
-       .globl _aes_decrypt_key128
-//     .private_extern _aes_decrypt_key128
-_aes_decrypt_key128:
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-
-                       8 four-byte spaces for work.
-       */
-       #define LocalsSize      (8*16 + 8*4)
-
-       // Define stack offset to storage space for local data.
-       #define Local   (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-#if defined __i386__
-
-       // Load arguments.
-       #define Argument(i)     StackFrame+4*(i)(r4)
-       mov             Argument(1), E
-       mov             Argument(0), K
-
-#endif
-
-// Merge point for _aes_decrypt_key and _aes_decrypt_key128.
-DKeyHas4Words:
-
-       // First words of expanded key are copied from user key.
-       movd    0*4(K), ve0
-       movd    1*4(K), ve1
-       movd    2*4(K), ve2
-       movd    3*4(K), ve3
-
-       movl    $10*16, ContextKeyLength(E)     // Set "key length."
-
-       #if 0 != ContextKey
-               add             $ContextKey, E
-       #endif
-
-       // K cannot be used after we write to R, since they use the same register.
-
-       #if defined __i386__
-
-               lea             _AESRcon, R
-               lea             _AESInvMixColumnTable, ITable
-               lea             _AESSubBytesWordTable, STable
-
-       #elif defined __x86_64__
-
-               lea             _AESRcon(%rip), R
-               lea             _AESInvMixColumnTable(%rip), ITable
-               lea             _AESSubBytesWordTable(%rip), STable
-
-       #endif
-
-       /*      With a four-word key, there are ten rounds (eleven 16-byte key blocks),
-               nine of which have InvMixColumn applied.
-       */
-       mov             $-9*4*4, offset
-       sub             offset, E
-
-       // Store initial words of expanded key, which are copies of user's key.
-       movd    ve0, 0*4(E, offset)
-       movd    ve1, 1*4(E, offset)
-       movd    ve2, 2*4(E, offset)
-       movd    ve3, 3*4(E, offset)
-
-/*     Here is the first iteration of the key expansion.  It is separate from the
-       main loop below because we need to apply InvMixColumn to each of the
-       outputs, in ve0 through ve3.  In the main loop, the technique described at
-       the top of this file is used to compute the proper outputs while using
-       InvMixColumn only once.
-*/
-       add             $1, R                                   // Advance pointer.
-       movd    ve3, dr                                 // Put previous word into work register.
-       movzx   (R), t0d                                // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-       pxor    vt0, ve0
-
-       // Chain to successive words.
-       pxor    ve0, ve1
-       pxor    ve1, ve2
-       pxor    ve2, ve3
-
-       add             $4*4, offset
-
-       /*      Apply InvMixColumn to each word.  The transformed values are stored in
-               the expanded key.  The original values are retained in registers for
-               further computation.
-       */
-       movd    ve0, dr
-       call    InvMixColumn
-       movd    vt0, 0*4(E, offset)
-
-       movd    ve1, dr
-       call    InvMixColumn
-       movd    vt0, 1*4(E, offset)
-
-       movd    ve2, dr
-       call    InvMixColumn
-       movd    vt0, 2*4(E, offset)
-
-       movd    ve3, dr
-       call    InvMixColumn
-       movd    vt0, 3*4(E, offset)
-
-//     Here is the main loop.
-1:
-       add             $1, R                                   // Advance pointer.
-       movd    ve3, dr                                 // Put previous word into work register.
-       movzx   (R), t0d                                // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-       pxor    vt0, ve0
-
-       // Chain to successive words.
-       pxor    ve0, ve1
-       pxor    ve1, ve2
-       pxor    ve2, ve3
-               /*      Dr. Brian Gladman uses a technique with a single XOR here instead
-                       of the previous four.  There is some periodic behavior in the key
-                       expansion, and Gladman maintains E[4*i+3] for the latest four
-                       values of i.  XORing the value in vt0 with one of these yields its
-                       replacement.  However, using this technique requires additional
-                       instructions before the loop (to initialize the values) and after
-                       it (to extract the final values to be stored) and either some way
-                       to rotate or index four values in the loop or a four-fold unrolling
-                       of the loop to provide the indexing.  Experiment suggests the
-                       former is not worthwhile.  Unrolling the loop might give a small
-                       gain, at the cost of increased use of instruction cache, increased
-                       instructions loads the first time the routine is executed, and
-                       increased code complexity, so I decided against it.
-               */
-
-       // Apply InvMixColumn to the difference.
-       movd    vt0, dr
-       call    InvMixColumn
-
-       add             $4*4, offset
-
-       // Chain the transformed difference to previously transformed outputs.
-       movd    (0-4)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 0*4(E, offset)
-
-       movd    (1-4)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 1*4(E, offset)
-
-       movd    (2-4)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 2*4(E, offset)
-
-       movd    (3-4)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 3*4(E, offset)
-
-       jl              1b
-
-// Here is the final iteration, which does not perform InvMixColumn.
-
-       movd    ve3, dr                                 // Put previous word into work register.
-       movzx   1(R), t0d                               // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-       pxor    vt0, ve0
-
-       // Chain to successive words.
-       movd    ve0, 4*4(E, offset)
-       pxor    ve0, ve1
-       movd    ve1, 5*4(E, offset)
-       pxor    ve1, ve2
-       movd    ve2, 6*4(E, offset)
-       pxor    ve2, ve3
-       movd    ve3, 7*4(E, offset)
-
-       xor             r0, r0                          // Return success.
-
-9:
-       // Pop stack and restore registers.
-       movaps  7*16(r4), %xmm7
-       movaps  6*16(r4), %xmm6
-       movaps  5*16(r4), %xmm5
-       movaps  4*16(r4), %xmm4
-       movaps  3*16(r4), %xmm3
-       movaps  2*16(r4), %xmm2
-       movaps  1*16(r4), %xmm1
-       movaps  0*16(r4), %xmm0
-       #if 0 < LocalsSize
-               add             $Padding + LocalsSize, r4
-       #endif
-       #if defined __i386__
-               pop             r7
-               pop             r6
-               pop             r5
-       #endif
-       pop             r3
-
-       ret
-
-
-       .globl _aes_decrypt_key192
-//     .private_extern _aes_decrypt_key192
-_aes_decrypt_key192:
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-
-                       8 four-byte spaces for work.
-       */
-       #define LocalsSize      (8*16 + 8*4)
-
-       // Define stack offset to storage space for local data.
-       #define Local   (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-#if defined __i386__
-
-       // Load arguments.
-       #define Argument(i)     StackFrame+4*(i)(r4)
-       mov             Argument(1), E
-       mov             Argument(0), K
-
-#endif
-
-// Merge point for _aes_decrypt_key and _aes_decrypt_key192.
-DKeyHas6Words:
-
-       // First words of expanded key are copied from user key.
-       movd    0*4(K), ve0
-       movd    1*4(K), ve1
-       movd    2*4(K), ve2
-       movd    3*4(K), ve3
-
-       movl    $12*16, ContextKeyLength(E)     // Set "key length."
-
-       #if 0 != ContextKey
-               add             $ContextKey, E
-       #endif
-
-       movd    4*4(K), ve4
-       movd    5*4(K), ve5
-
-       // K cannot be used after we write to R, since they use the same register.
-
-       #if defined __i386__
-
-               lea             _AESRcon, R
-               lea             _AESInvMixColumnTable, ITable
-               lea             _AESSubBytesWordTable, STable
-
-       #elif defined __x86_64__
-
-               lea             _AESRcon(%rip), R
-               lea             _AESInvMixColumnTable(%rip), ITable
-               lea             _AESSubBytesWordTable(%rip), STable
-
-       #endif
-
-       /*      With a six-word key, there are twelve rounds (thirteen 16-byte key
-               blocks), eleven of which have InvMixColumn applied.  The key expansion
-               proceeds in iterations of six four-byte words, so the termination
-               condition is a bit complicated.  We set offset to the negative of 10
-               four four-byte words, and the loop branch does another iteration if
-               offset is less than or equal to zero, meaning the number of iterations
-               performed so far is less than or equal to 10.  Thus, after ten
-               iterations, it branches again.  After the eleventh iteration, it
-               stops.  Code after the end of the loop computes the twelfth key block,
-               which does not have InvMixColumn applied.
-       */
-       mov             $-10*4*4, offset
-       sub             offset, E
-
-       // Store initial words of expanded key, which are copies of user's key.
-       movd    ve0, 0*4(E, offset)
-       movd    ve1, 1*4(E, offset)
-       movd    ve2, 2*4(E, offset)
-       movd    ve3, 3*4(E, offset)
-
-       /*      The first four words are stored untransformed.  After that, words in
-               the expanded key are transformed by InvMixColumn.
-       */
-       movd    ve4, dr
-       call    InvMixColumn
-       movd    vt0, 4*4(E, offset)
-
-       movd    ve5, dr
-       call    InvMixColumn
-       movd    vt0, 5*4(E, offset)
-
-/*     Here is the first iteration of the key expansion.  It is separate from the
-       main loop below because we need to apply InvMixColumn to each of the
-       outputs, in ve0 through ve5.  In the main loop, the technique described at
-       the top of this file is used to compute the proper outputs while using
-       InvMixColumn only once.
-*/
-       add             $1, R                                   // Advance pointer.
-       movd    ve5, dr                                 // Put previous word into work register.
-       movzx   (R), t0d                                // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-       pxor    vt0, ve0
-
-       // Chain to successive words.
-       pxor    ve0, ve1
-       pxor    ve1, ve2
-       pxor    ve2, ve3
-       pxor    ve3, ve4
-       pxor    ve4, ve5
-
-       add             $6*4, offset
-
-       /*      Apply InvMixColumn to each word.  The transformed values are stored in
-               the expanded key.  The original values are retained in registers for
-               further computation.
-       */
-       movd    ve0, dr
-       call    InvMixColumn
-       movd    vt0, 0*4(E, offset)
-
-       movd    ve1, dr
-       call    InvMixColumn
-       movd    vt0, 1*4(E, offset)
-
-       movd    ve2, dr
-       call    InvMixColumn
-       movd    vt0, 2*4(E, offset)
-
-       movd    ve3, dr
-       call    InvMixColumn
-       movd    vt0, 3*4(E, offset)
-
-       movd    (4-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 4*4(E, offset)
-
-       movd    (5-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 5*4(E, offset)
-
-//     Here is the main loop.
-1:
-       add             $1, R                                   // Advance pointer.
-       movd    ve5, dr                                 // Put previous word into work register.
-       movzx   (R), t0d                                // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-       pxor    vt0, ve0
-
-       // Chain to successive words.
-       pxor    ve0, ve1
-       pxor    ve1, ve2
-       pxor    ve2, ve3
-       pxor    ve3, ve4
-       pxor    ve4, ve5
-
-       // Apply InvMixColumn to the difference.
-       movd    vt0, dr
-       call    InvMixColumn
-
-       add             $6*4, offset
-
-       // Chain the transformed difference to previously transformed outputs.
-       movd    (0-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 0*4(E, offset)
-
-       movd    (1-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 1*4(E, offset)
-
-       movd    (2-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 2*4(E, offset)
-
-       movd    (3-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 3*4(E, offset)
-
-       movd    (4-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 4*4(E, offset)
-
-       movd    (5-6)*4(E, offset), vt1
-       pxor    vt1, vt0
-       movd    vt0, 5*4(E, offset)
-
-       jle             1b
-
-// Here is the final iteration, which does not perform InvMixColumn.
-
-       movd    ve5, dr                                 // Put previous word into work register.
-       movzx   1(R), t0d                               // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-       pxor    vt0, ve0
-
-       // Chain to successive words.
-       movd    ve0, 6*4(E, offset)
-       pxor    ve0, ve1
-       movd    ve1, 7*4(E, offset)
-       pxor    ve1, ve2
-       movd    ve2, 8*4(E, offset)
-       pxor    ve2, ve3
-       movd    ve3, 9*4(E, offset)
-
-       xor             r0, r0                          // Return success.
-
-       // Pop stack and restore registers.
-       movaps  7*16(r4), %xmm7
-       movaps  6*16(r4), %xmm6
-       movaps  5*16(r4), %xmm5
-       movaps  4*16(r4), %xmm4
-       movaps  3*16(r4), %xmm3
-       movaps  2*16(r4), %xmm2
-       movaps  1*16(r4), %xmm1
-       movaps  0*16(r4), %xmm0
-       #if 0 < LocalsSize
-               add             $Padding + LocalsSize, r4
-       #endif
-       #if defined __i386__
-               pop             r7
-               pop             r6
-               pop             r5
-       #endif
-       pop             r3
-
-       ret
-
-
-       .globl _aes_decrypt_key256
-//     .private_extern _aes_decrypt_key256
-_aes_decrypt_key256:
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-
-                       8 four-byte spaces for work.
-       */
-       #define LocalsSize      (8*16 + 8*4)
-
-       // Define stack offset to storage space for local data.
-       #define Local   (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-#if defined __i386__
-
-       // Load arguments.
-       #define Argument(i)     StackFrame+4*(i)(r4)
-       mov             Argument(1), E
-       mov             Argument(0), K
-
-#endif
-
-// Merge point for _aes_decrypt_key and _aes_decrypt_key256.
-DKeyHas8Words:
-
-       // First words of expanded key are copied from user key.
-       movd    0*4(K), ve0
-       movd    1*4(K), ve1
-       movd    2*4(K), ve2
-       movd    3*4(K), ve3
-
-       movl    $14*16, ContextKeyLength(E)     // Set "key length."
-
-       #if 0 != ContextKey
-               add             $ContextKey, E
-       #endif
-
-       // Store initial words of expanded key, which are copies of user's key.
-       movd    ve0, 0*4(E)
-       movd    ve1, 1*4(E)
-       movd    ve2, 2*4(E)
-       movd    ve3, 3*4(E)
-       movd    4*4(K), ve0
-       movd    5*4(K), ve1
-       movd    6*4(K), ve2
-       movd    7*4(K), ve3
-
-       // K cannot be used after we write to R, since they use the same register.
-
-       #if defined __i386__
-
-               lea             _AESRcon, R
-               lea             _AESInvMixColumnTable, ITable
-               lea             _AESSubBytesWordTable, STable
-
-       #elif defined __x86_64__
-
-               lea             _AESRcon(%rip), R
-               lea             _AESInvMixColumnTable(%rip), ITable
-               lea             _AESSubBytesWordTable(%rip), STable
-
-       #endif
-
-       /*      With an eight-word key, there are fourteen rounds (fifteen 16-byte key
-               blocks), thirteen of which have InvMixColumn applied.
-       */
-       mov             $-12*4*4, offset
-       sub             offset, E
-
-       // Save untransformed values in stack area.
-       movd    ve0, 4*4+Local(r4)
-       movd    ve1, 5*4+Local(r4)
-       movd    ve2, 6*4+Local(r4)
-       movd    ve3, 7*4+Local(r4)
-
-       /*      Apply InvMixColumn to words 4 through 7.  The transformed values are
-               stored in the expanded key.  The original values are saved in the stack
-               area for further computation.
-       */
-       movd    ve0, dr
-       call    InvMixColumn
-       movd    vt0, 4*4(E, offset)
-
-       movd    ve1, dr
-       call    InvMixColumn
-       movd    vt0, 5*4(E, offset)
-
-       movd    ve2, dr
-       call    InvMixColumn
-       movd    vt0, 6*4(E, offset)
-
-       movd    ve3, dr
-       call    InvMixColumn
-       movd    vt0, 7*4(E, offset)
-
-/*     Here is the first iteration of the key expansion.  It is separate from the
-       main loop below because we need to apply InvMixColumn to each of the
-       outputs, in ve0 through ve3.  In the main loop, the technique described at
-       the top of this file is used to compute the proper outputs while using
-       InvMixColumn only once.
-*/
-       add             $1, R                                   // Advance pointer.
-       movd    ve3, dr                                 // Put previous word into work register.
-       movzx   (R), t0d                                // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-
-       add             $8*4, offset
-
-       movd    (0-8)*4(E, offset), ve0         // Get old word.
-       pxor    vt0, ve0
-       movd    ve0, 0*4+Local(r4)                      // Save on stack.
-       movd    ve0, dr
-       call    InvMixColumn
-       movd    vt0, 0*4(E, offset)                     // Write to expanded key.
-
-       /*       Chain to successive words and apply InvMixColumn to each word.  The
-                transformed values are stored in the expanded key.  The original
-                values are retained in local data for further computation.
-       */
-       movd    (1-8)*4(E, offset), ve1         // Get old word.
-       pxor    ve0, ve1                                        // Chain.
-       movd    ve1, 1*4+Local(r4)                      // Save on stack.
-       movd    ve1, dr
-       call    InvMixColumn
-       movd    vt0, 1*4(E, offset)                     // Write to expanded key.
-
-       movd    (2-8)*4(E, offset), ve2         // Get old word.
-       pxor    ve1, ve2                                        // Chain.
-       movd    ve2, 2*4+Local(r4)                      // Save on stack.
-       movd    ve2, dr
-       call    InvMixColumn
-       movd    vt0, 2*4(E, offset)                     // Write to expanded key.
-
-       movd    (3-8)*4(E, offset), ve3         // Get old word.
-       pxor    ve2, ve3                                        // Chain.
-       movd    ve3, 3*4+Local(r4)                      // Save on stack.
-       movd    ve3, dr
-       call    InvMixColumn
-       movd    vt0, 3*4(E, offset)                     // Write to expanded key.
-
-       movd    ve3, dr                                         // Put previous word into work register.
-       SubWord
-
-       movd    4*4+Local(r4), ve0                      // Get old word.
-       pxor    vt0, ve0                                        // Chain.
-       movd    ve0, 4*4+Local(r4)                      // Save on stack.
-
-       movd    5*4+Local(r4), ve1                      // Get old word.
-       pxor    ve0, ve1                                        // Chain.
-       movd    ve1, 5*4+Local(r4)                      // Save on stack.
-
-       movd    6*4+Local(r4), ve2                      // Get old word.
-       pxor    ve1, ve2                                        // Chain.
-       movd    ve2, 6*4+Local(r4)                      // Save on stack.
-
-       movd    7*4+Local(r4), ve3                      // Get old word.
-       pxor    ve2, ve3                                        // Chain.
-       movd    ve3, 7*4+Local(r4)                      // Save on stack.
-
-       movd    vt0, dr                                         // Move change to work register.
-       call    InvMixColumn
-
-       movd    (4-8)*4(E, offset), vt1         // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, 4*4(E, offset)                     // Write new word to expanded key.
-
-       movd    (5-8)*4(E, offset), vt1         // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, 5*4(E, offset)                     // Write new word to expanded key.
-
-       movd    (6-8)*4(E, offset), vt1         // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, 6*4(E, offset)                     // Write new word to expanded key.
-
-       movd    (7-8)*4(E, offset), vt1         // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, 7*4(E, offset)                     // Write new word to expanded key.
-
-//     Here is the main loop.
-1:
-       add             $1, R                                           // Advance pointer.
-       movd    ve3, dr                                         // Put previous word into work register.
-       movzx   (R), t0d                                        // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-
-       movd    0*4+Local(r4), ve0                      // Get old word.
-       pxor    vt0, ve0
-       movd    ve0, 0*4+Local(r4)                      // Save on stack.
-
-       // Chain to successive words.
-       movd    1*4+Local(r4), ve1                      // Get old word.
-       pxor    ve0, ve1                                        // Chain.
-       movd    ve1, 1*4+Local(r4)                      // Save on stack.
-
-       movd    2*4+Local(r4), ve2                      // Get old word.
-       pxor    ve1, ve2                                        // Chain.
-       movd    ve2, 2*4+Local(r4)                      // Save on stack.
-
-       movd    3*4+Local(r4), ve3                      // Get old word.
-       pxor    ve2, ve3                                        // Chain.
-       movd    ve3, 3*4+Local(r4)                      // Save on stack.
-
-       movd    vt0, dr                                         // Move change to work register.
-       call    InvMixColumn
-
-       movd    0*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (0+8)*4(E, offset)         // Write new word to expanded key.
-
-       movd    1*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (1+8)*4(E, offset)         // Write new word to expanded key.
-
-       movd    2*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (2+8)*4(E, offset)         // Write new word to expanded key.
-
-       movd    3*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (3+8)*4(E, offset)         // Write new word to expanded key.
-
-       movd    ve3, dr                                         // Put previous word into work register.
-       SubWord
-
-       movd    4*4+Local(r4), ve0                      // Get old word.
-       pxor    vt0, ve0                                        // Chain.
-       movd    ve0, 4*4+Local(r4)                      // Save on stack.
-
-       movd    5*4+Local(r4), ve1                      // Get old word.
-       pxor    ve0, ve1                                        // Chain.
-       movd    ve1, 5*4+Local(r4)                      // Save on stack.
-
-       movd    6*4+Local(r4), ve2                      // Get old word.
-       pxor    ve1, ve2                                        // Chain.
-       movd    ve2, 6*4+Local(r4)                      // Save on stack.
-
-       movd    7*4+Local(r4), ve3                      // Get old word.
-       pxor    ve2, ve3                                        // Chain.
-       movd    ve3, 7*4+Local(r4)                      // Save on stack.
-
-       movd    vt0, dr                                         // Move change to work register.
-       call    InvMixColumn
-
-       movd    4*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (4+8)*4(E, offset)         // Write new word to expanded key.
-
-       movd    5*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (5+8)*4(E, offset)         // Write new word to expanded key.
-
-       movd    6*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (6+8)*4(E, offset)         // Write new word to expanded key.
-
-       movd    7*4(E, offset), vt1                     // Get old word.
-       pxor    vt1, vt0                                        // Chain.
-       movd    vt0, (7+8)*4(E, offset)         // Write new word to expanded key.
-
-       add             $8*4, offset
-
-       jl              1b
-
-       movd    ve3, dr                                         // Put previous word into work register.
-       movzx   1(R), t0d                                       // Get round constant.
-       movd    t0d, vt0
-
-       SubWordRotWord
-
-       movd    0*4+Local(r4), ve0                      // Get old word.
-       pxor    vt0, ve0                                        // Chain.
-       movd    ve0, (0+8)*4(E, offset)
-
-       // Chain to successive words.
-       movd    1*4+Local(r4), ve1                      // Get old word.
-       pxor    ve0, ve1                                        // Chain.
-       movd    ve1, (1+8)*4(E, offset)
-
-       movd    2*4+Local(r4), ve2                      // Get old word.
-       pxor    ve1, ve2                                        // Chain.
-       movd    ve2, (2+8)*4(E, offset)
-
-       movd    3*4+Local(r4), ve3                      // Get old word.
-       pxor    ve2, ve3                                        // Chain.
-       movd    ve3, (3+8)*4(E, offset)
-
-       xor             r0, r0                          // Return success.
-
-       // Pop stack and restore registers.
-       movaps  7*16(r4), %xmm7
-       movaps  6*16(r4), %xmm6
-       movaps  5*16(r4), %xmm5
-       movaps  4*16(r4), %xmm4
-       movaps  3*16(r4), %xmm3
-       movaps  2*16(r4), %xmm2
-       movaps  1*16(r4), %xmm1
-       movaps  0*16(r4), %xmm0
-       #if 0 < LocalsSize
-               add             $Padding + LocalsSize, r4
-       #endif
-       #if defined __i386__
-               pop             r7
-               pop             r6
-               pop             r5
-       #endif
-       pop             r3
-
-       ret
-
-
-#undef Address
-#undef Argument
-#undef E
-#undef ITable
-#undef K
-#undef Local
-#undef LocalsSize
-#undef LookupI
-#undef LookupS
-#undef Nk
-#undef Padding
-#undef R
-#undef SaveSize
-#undef STable
-#undef StackFrame
-#undef dr
-#undef drh
-#undef drl
-#undef offset
-#undef t0
-#undef t0d
-#undef ve0
-#undef ve1
-#undef ve2
-#undef ve3
-#undef ve4
-#undef ve5
-#undef vt0
-#undef vt1
diff --git a/bsd/crypto/aes/i386/ExpandKeyForEncryption.s b/bsd/crypto/aes/i386/ExpandKeyForEncryption.s
deleted file mode 100644 (file)
index 1ce3c95..0000000
+++ /dev/null
@@ -1,801 +0,0 @@
-/*     This file defines _aes_encrypt_key, _aes_encrypt_key128,
-       _aes_encrypt_key192, and _aes_encrypt_key256.  It is designed to be
-       included in another assembly file with the preprocessor #include directive,
-       to benefit from some assembly-time calculations.
-
-       Written by Eric Postpischil, January 2008.
-
-       The comments here do not say much about the algorithm; the code just
-       follows the FIPS-197 specification.  I recommend reading the specification
-       before working with this code or examining the C code in the parent
-       directory that illustrates key expansion.
-*/
-
-
-/*     Routines:
-
-               _aes_encrypt_key.
-
-               _aes_encrypt_key128, _aes_encrypt_key192, and _aes_encrypt_key256.
-
-       Function:
-
-               Expand the user's cipher key into the key schedule, as defined in
-               Federal Information Processing Standards Publication 197 (FIPS-197),
-               November 26, 2001.
-
-       Input:
-
-               Constant data:
-
-                       The following names must be locally defined so the assembler
-                       can calculate certain offsets.
-
-                       static const Word _AESSubBytesWordTable[4][256].
-
-                               _AESSubBytesWordTable[i][j] = SubBytes(j) << 8*i, where
-                               SubBytes is defined in FIPS-197.  _AESSubBytesWordTable
-                               differs from _AESEncryptTable in that it does not include
-                               the MixColumn operation.  It is used in performing the last
-                               round, which differs fromm the previous rounds in that it
-                               does not include the MixColumn operation.
-
-                       static const Byte _AESRcon[].
-
-                               Round constants, beginning with AESRcon[1] for the first round
-                               (AESRcon[0] is padding.)
-       
-               Arguments:
-
-                       const uint8_t *Key
-
-                               Address of user's cipher key.
-
-                       int Length
-
-                               Number of bytes (16, 24, or 32) or bits (128, 192, or 256) in
-                               user's cipher key.
-
-                               This argument is used with _aes_encrypt_key.  It is not
-                               present for the other routines.  In those routines, Context
-                               is the second argument.
-
-                       aes_encrypt_ctx *Context
-
-                               Structure to contain the expanded key beginning at offset
-                               ContextKey and a four-byte "key length" beginning at offset
-                               ContextKeyLength.  The "key length" is the number of bytes from
-                               the start of the first round key to the start of the last round
-                               key.  That is 16 less than the number of bytes in the entire
-                               key.
-
-       Output:
-
-               The expanded key and the "key length" are written to *Context.
-
-       Return:
-
-               aes_rval        // -1 if "key length" is invalid.  0 otherwise.
-*/
-
-/* add AES HW detection and program branch if AES HW is detected cclee 3-12-10 */
-#ifdef KERNEL
-#include <i386/cpu_capabilities.h>
-#else
-#include <System/i386/cpu_capabilities.h>
-#endif
-
-       .text
-       .globl _aes_encrypt_key
-//     .private_extern _aes_encrypt_key
-_aes_encrypt_key:
-
-    // detect AES HW, cclee-3-13-10
-#if defined __x86_64__
-    movq    __cpu_capabilities@GOTPCREL(%rip), %rax                            // %rax -> __cpu_capabilities
-    mov     (%rax), %eax                                                                               // %eax  = __cpu_capabilities
-#else
-#if defined KERNEL
-    leal    __cpu_capabilities, %eax                                                   // %eax -> __cpu_capabilities
-    mov     (%eax), %eax                                                                               // %eax  = __cpu_capabilities
-#else
-       mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
-#endif
-#endif
-    test    $(kHasAES), %eax                                                                   // __cpu_capabilities & kHasAES
-    jne     _aes_encrypt_key_hw                                                                        // if AES HW detected, branch to _aes_encrypt_key_hw
-
-#define        dr              r0d                             // Dissection register.
-#define        drl             r0l                             // Low 8 bits of dissection register.
-#define        drh             r0h                             // Second-lowest 8 bits of dissection register.
-
-#define        t0              r1
-#define        t0d             r1d                             // Low 32 bits of t0.
-
-#define        offset  Arch(r5, r11)   // Address offset and loop sentinel.
-
-#define        R               r7                              // Address of round constant.
-#define        K               r7                              // User key pointer.
-       // R and K overlap.
-
-#define        E               r6                              // Expanded key pointer.
-
-#define        ve0             %xmm0
-#define        ve1             %xmm1
-#define        ve2             %xmm2
-#define        ve3             %xmm3
-#define        vt3             %xmm4
-#define        vt2             %xmm5
-#define        vt1             %xmm6
-#define        vt0             %xmm7
-
-#if defined __i386__
-       #define LookupS(table, index)   \
-               _AESSubBytesWordTable+(table)*TableSize(, index, 4)
-#elif defined __x86_64__
-       #define LookupS(table, index)   (table)*TableSize(STable, index, 4)
-#endif
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-       */
-       #define LocalsSize      (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-#if defined __i386__
-
-       // Define location of argument i.
-       #define Argument(i)     StackFrame+4*(i)(r4)
-
-       #define Nk              t0d
-
-       // Load arguments.
-       mov             Argument(2), E
-       mov             Argument(1), Nk
-       mov             Argument(0), K
-
-#elif defined __x86_64__
-
-       #define Nk              r9d                     // Number of words in key.
-       mov             r6d, Nk                         // Move Nk argument out of way.
-       mov             r2, E                           // Move E argument to common register.
-
-#endif
-
-       // Dispatch on key length.
-       cmp             $128, Nk
-       jge             2f
-       shl             $3, Nk                          // Convert from bytes to bits.
-       cmp             $128, Nk
-2:
-       je              EKeyHas4Words
-       cmp             $192, Nk
-       je              EKeyHas6Words
-       cmp             $256, Nk
-       je              EKeyHas8Words
-       mov             $-1, r0                         // Return error.
-       jmp             9f
-
-// Stop using Nk.
-#undef Nk
-
-       .globl _aes_encrypt_key128
-//     .private_extern _aes_encrypt_key128
-_aes_encrypt_key128:
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-       */
-       #define LocalsSize      (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-       #if defined __i386__
-
-               // Load arguments.
-               #define Argument(i)     StackFrame+4*(i)(r4)
-               mov             Argument(1), E
-               mov             Argument(0), K
-
-       #endif
-
-// Merge point for _aes_encrypt_key and _aes_encrypt_key128.
-EKeyHas4Words:
-
-#define        e0      r2d
-#define        e1      r3d
-#define        e2      Arch(r5d, r11d)
-#define        e3      r7d
-
-       // First words of expanded key are copied from user key.
-       mov             0*4(K), e0
-       mov             1*4(K), e1
-       mov             2*4(K), e2
-       mov             3*4(K), e3
-
-       movl    $10*16, ContextKeyLength(E)     // Set "key length."
-
-       #if 0 != ContextKey
-               add             $ContextKey, E
-       #endif
-
-       // K cannot be used after we write to R, since they use the same register.
-
-       // Cache round constants in output buffer.  The last is a sentinel.
-       movb    $0x01,  1*16(E)
-       movb    $0x02,  2*16(E)
-       movb    $0x04,  3*16(E)
-       movb    $0x08,  4*16(E)
-       movb    $0x10,  5*16(E)
-       movb    $0x20,  6*16(E)
-       movb    $0x40,  7*16(E)
-       movb    $0x80,  8*16(E)
-       movb    $0x1b,  9*16(E)
-       movb    $0x36, 10*16(E)
-
-       #if defined __x86_64__
-
-               #define STable  r8
-               lea             _AESSubBytesWordTable(%rip), STable
-
-       #endif
-
-       // Store initial words of expanded key, which are copies of user's key.
-       mov             e0, 0*4(E)
-       mov             e1, 1*4(E)
-       mov             e2, 2*4(E)
-       mov             e3, 3*4(E)
-
-1:
-       mov             e3, dr                          // Put previous word into dissection register.
-
-       // Perform SubWord(RotWord(dr)).
-       movzx   drl, t0
-       xor             LookupS(3, t0), e0              // Look up byte 0 in table 3.
-       movzx   drh, t0d
-       xor             LookupS(0, t0), e0              // Look up byte 1 in table 0.
-       shr             $16, dr
-       movzx   drl, t0d
-       xor             LookupS(1, t0), e0              // Look up byte 2 in table 1.
-       movzx   drh, t0d
-       xor             LookupS(2, t0), e0              // Look up byte 3 in table 2.
-
-       add             $4*4, E
-
-       movzx   (E), t0d                                // Get cached round constant.
-       xor             t0d, e0                                 // XOR with word from four words back.
-
-       // Chain to successive words.
-       mov             e0, 0*4(E)
-       xor             e0, e1
-       mov             e1, 1*4(E)
-       xor             e1, e2
-       mov             e2, 2*4(E)
-       xor             e2, e3
-       mov             e3, 3*4(E)
-
-       cmp             $0x36, t0d                              // Was this the last round constant?
-
-       jne             1b
-
-       xor             r0, r0          // Return success.
-
-9:
-       // Pop stack and restore registers.
-       movaps  7*16(r4), %xmm7
-       movaps  6*16(r4), %xmm6
-       movaps  5*16(r4), %xmm5
-       movaps  4*16(r4), %xmm4
-       movaps  3*16(r4), %xmm3
-       movaps  2*16(r4), %xmm2
-       movaps  1*16(r4), %xmm1
-       movaps  0*16(r4), %xmm0
-       #if 0 < LocalsSize
-               add             $Padding + LocalsSize, r4
-       #endif
-       #if defined __i386__
-               pop             r7
-               pop             r6
-               pop             r5
-       #endif
-       pop             r3
-
-       ret
-
-
-// Reset definitions for next case.
-#undef e0
-#undef e1
-#undef e2
-#undef e3
-
-#undef vt3
-#undef vt2
-#define        ve4     %xmm4
-#define        ve5     %xmm5
-
-
-       .globl _aes_encrypt_key192
-//     .private_extern _aes_encrypt_key192
-_aes_encrypt_key192:
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-       */
-       #define LocalsSize      (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-       #if defined __i386__
-
-               // Load arguments.
-               #define Argument(i)     StackFrame+4*(i)(r4)
-               mov             Argument(1), E
-               mov             Argument(0), K
-
-       #endif
-
-// Merge point for _aes_encrypt_key and _aes_encrypt_key192.
-EKeyHas6Words:
-
-       // First words of expanded key are copied from user key.
-       movd    0*4(K), ve0
-       movd    1*4(K), ve1
-       movd    2*4(K), ve2
-       movd    3*4(K), ve3
-
-       movl    $12*16, ContextKeyLength(E)     // Set "key length."
-
-       #if 0 != ContextKey
-               add             $ContextKey, E
-       #endif
-
-       movd    4*4(K), ve4
-       movd    5*4(K), ve5
-
-       // K cannot be used after we write to R, since they use the same register.
-
-       #if defined __i386__
-
-               lea             _AESRcon, R
-
-       #elif defined __x86_64__
-
-               lea             _AESRcon(%rip), R
-               lea             _AESSubBytesWordTable(%rip), STable
-
-       #endif
-
-       /*      With a six-word key, there are twelve rounds (thirteen 16-byte key
-               blocks).
-       */
-       mov             $-12*4*4, offset
-       sub             offset, E
-
-       // Store initial words of expanded key, which are copies of user's key.
-       movd    ve0, 0*4(E, offset)
-       movd    ve1, 1*4(E, offset)
-       movd    ve2, 2*4(E, offset)
-       movd    ve3, 3*4(E, offset)
-       movd    ve4, 4*4(E, offset)
-       movd    ve5, 5*4(E, offset)
-
-/*     Jump into loop body.  The key expansion processes six four-byte words per
-       iteration.  52 are needed in the key.  So only four are needed in the last
-       iteration.
-*/
-       jmp             2f              
-1:
-       // Continue chaining to successive words.
-       pxor    ve3, ve4
-       movd    ve4, 4*4(E, offset)
-       pxor    ve4, ve5
-       movd    ve5, 5*4(E, offset)
-2:
-       add             $1, R                           // Advance pointer.
-       movd    ve5, dr                         // Put previous word into dissection register.
-       movzx   (R), t0                         // Get round constant.
-       movd    t0d, vt1
-       pxor    vt1, ve0                        // XOR with word from six words back.
-
-       // Perform SubWord(RotWord(dr)).
-       movzx   drl, t0d
-       movd    LookupS(3, t0), vt0             // Look up byte 0 in table 3.
-       movzx   drh, t0d
-       movd    LookupS(0, t0), vt1             // Look up byte 1 in table 0.
-       shr             $16, dr
-       movzx   drl, t0d
-       pxor    vt1, vt0
-       pxor    vt0, ve0
-       movd    LookupS(1, t0), vt0             // Look up byte 2 in table 1.
-       movzx   drh, t0d
-       movd    LookupS(2, t0), vt1             // Look up byte 3 in table 2.
-       pxor    vt1, vt0
-       pxor    vt0, ve0
-
-       add             $6*4, offset
-
-       // Chain to successive words.
-       movd    ve0, 0*4(E, offset)
-       pxor    ve0, ve1
-       movd    ve1, 1*4(E, offset)
-       pxor    ve1, ve2
-       movd    ve2, 2*4(E, offset)
-       pxor    ve2, ve3
-       movd    ve3, 3*4(E, offset)
-
-       jne             1b
-
-       xor             r0, r0          // Return success.
-
-       // Pop stack and restore registers.
-       movaps  7*16(r4), %xmm7
-       movaps  6*16(r4), %xmm6
-       movaps  5*16(r4), %xmm5
-       movaps  4*16(r4), %xmm4
-       movaps  3*16(r4), %xmm3
-       movaps  2*16(r4), %xmm2
-       movaps  1*16(r4), %xmm1
-       movaps  0*16(r4), %xmm0
-       #if 0 < LocalsSize
-               add             $Padding + LocalsSize, r4
-       #endif
-       #if defined __i386__
-               pop             r7
-               pop             r6
-               pop             r5
-       #endif
-       pop             r3
-
-       ret
-
-
-// Reset definitions for next case.
-#undef ve4
-#undef ve5
-#define        vt3     %xmm4
-#define        vt2     %xmm5
-
-
-       .globl _aes_encrypt_key256
-//     .private_extern _aes_encrypt_key256
-_aes_encrypt_key256:
-
-       /*      Save registers and set SaveSize to the number of bytes pushed onto the
-               stack so far, including the caller's return address.
-       */
-       push    r3
-       #if defined __i386__
-               push    r5
-               push    r6
-               push    r7
-               #define SaveSize        (5*4)
-       #else
-               #define SaveSize        (2*8)
-       #endif
-
-       /*      Number of bytes used for local variables:
-
-                       8 16-byte spaces to save XMM registers.
-       */
-       #define LocalsSize      (8*16)
-
-       #if 0 < LocalsSize
-               // Padding to position stack pointer at a multiple of 16 bytes.
-               #define Padding (15 & -(SaveSize + LocalsSize))
-               sub             $Padding + LocalsSize, r4       // Allocate space on stack.
-       #else
-               #define Padding 0
-       #endif
-
-       /*      StackFrame is the number of bytes in our stack frame, from caller's
-               stack pointer to ours (so it includes the return address).
-       */
-       #define StackFrame      (SaveSize + Padding + LocalsSize)
-
-       // Save xmm registers.
-       movaps  %xmm0, 0*16(r4)
-       movaps  %xmm1, 1*16(r4)
-       movaps  %xmm2, 2*16(r4)
-       movaps  %xmm3, 3*16(r4)
-       movaps  %xmm4, 4*16(r4)
-       movaps  %xmm5, 5*16(r4)
-       movaps  %xmm6, 6*16(r4)
-       movaps  %xmm7, 7*16(r4)
-
-       #if defined __i386__
-
-               // Load arguments.
-               #define Argument(i)     StackFrame+4*(i)(r4)
-               mov             Argument(1), E
-               mov             Argument(0), K
-
-       #endif
-
-// Merge point for _aes_encrypt_key and _aes_encrypt_key256.
-EKeyHas8Words:
-
-       // First words of expanded key are copied from user key.
-       movd    0*4(K), ve0
-       movd    1*4(K), ve1
-       movd    2*4(K), ve2
-       movd    3*4(K), ve3
-
-       movl    $14*16, ContextKeyLength(E)     // Set "key length."
-
-       #if 0 != ContextKey
-               add             $ContextKey, E
-       #endif
-
-       // Store initial words of expanded key, which are copies of user's key.
-       movd    ve0, 0*4(E)
-       movd    ve1, 1*4(E)
-       movd    ve2, 2*4(E)
-       movd    ve3, 3*4(E)
-       movd    4*4(K), ve0
-       movd    5*4(K), ve1
-       movd    6*4(K), ve2
-       movd    7*4(K), ve3
-
-       // K cannot be used after we write to R, since they use the same register.
-
-       #if defined __i386__
-
-               lea             _AESRcon, R
-
-       #elif defined __x86_64__
-
-               lea             _AESRcon(%rip), R
-               lea             _AESSubBytesWordTable(%rip), STable
-
-       #endif
-
-       /*      With an eight-word key, there are fourteen rounds (fifteen 16-byte key
-               blocks).
-       */
-       mov             $-14*4*4, offset
-       sub             offset, E
-
-       // Store initial words of expanded key, which are copies of user's key.
-       movd    ve0, 4*4(E, offset)
-       movd    ve1, 5*4(E, offset)
-       movd    ve2, 6*4(E, offset)
-       movd    ve3, 7*4(E, offset)
-
-/*     Jump into loop body.  The key expansion processes eight four-byte words per
-       iteration.  60 are needed in the key.  So only four are needed in the last
-       iteration.
-*/
-       jmp             2f              
-1:
-       movd    ve3, dr                         // Put previous word into dissection register.
-
-       /*      Get word from eight words back (it is four words back from where E
-               currently points, and we use it to prepare the value to be stored
-               four words beyond where E currently points).
-       */
-       movd    -4*4(E, offset), ve0
-
-       // Perform SubWord(dr).
-       movzx   drl, t0
-       movd    LookupS(0, t0), vt0             // Look up byte 0 in table 0.
-       movzx   drh, t0d
-       movd    LookupS(1, t0), vt1             // Look up byte 1 in table 1.
-       shr             $16, dr
-       movzx   drl, t0d
-       movd    LookupS(2, t0), vt2             // Look up byte 2 in table 2.
-       movzx   drh, t0d
-       movd    LookupS(3, t0), vt3             // Look up byte 3 in table 3.
-       pxor    vt1, vt0
-       pxor    vt3, vt2
-       pxor    vt0, ve0
-       pxor    vt2, ve0
-
-       movd    -3*4(E, offset), ve1    // Get words from eight words back.
-       movd    -2*4(E, offset), ve2
-       movd    -1*4(E, offset), ve3
-
-       // Chain to successive words.
-       movd    ve0, 4*4(E, offset)
-       pxor    ve0, ve1
-       movd    ve1, 5*4(E, offset)
-       pxor    ve1, ve2
-       movd    ve2, 6*4(E, offset)
-       pxor    ve2, ve3
-       movd    ve3, 7*4(E, offset)
-
-2:
-       add             $1, R                           // Advance pointer.
-       movd    ve3, dr                         // Put previous word into dissection register.
-       movzx   (R), t0d                        // Get round constant.
-       movd    t0d, vt1
-       movd    0*4(E, offset), ve0     // Get word from eight words back.
-       pxor    vt1, ve0
-
-       // Perform SubWord(RotWord(dr)).
-       movzx   drl, t0
-       movd    LookupS(3, t0), vt0             // Look up byte 0 in table 3.
-       movzx   drh, t0d
-       movd    LookupS(0, t0), vt1             // Look up byte 1 in table 0.
-       shr             $16, dr
-       movzx   drl, t0d
-       movd    LookupS(1, t0), vt2             // Look up byte 2 in table 1.
-       movzx   drh, t0d
-       movd    LookupS(2, t0), vt3             // Look up byte 3 in table 2.
-       pxor    vt1, vt0
-       pxor    vt3, vt2
-       pxor    vt0, ve0
-       pxor    vt2, ve0
-
-       movd    1*4(E, offset), ve1
-       movd    2*4(E, offset), ve2
-       movd    3*4(E, offset), ve3
-
-       add             $8*4, offset
-
-       // Chain to successive words.
-       movd    ve0, 0*4(E, offset)
-       pxor    ve0, ve1
-       movd    ve1, 1*4(E, offset)
-       pxor    ve1, ve2
-       movd    ve2, 2*4(E, offset)
-       pxor    ve2, ve3
-       movd    ve3, 3*4(E, offset)
-
-       jne             1b
-
-       xor             r0, r0          // Return success.
-
-       // Pop stack and restore registers.
-       movaps  7*16(r4), %xmm7
-       movaps  6*16(r4), %xmm6
-       movaps  5*16(r4), %xmm5
-       movaps  4*16(r4), %xmm4
-       movaps  3*16(r4), %xmm3
-       movaps  2*16(r4), %xmm2
-       movaps  1*16(r4), %xmm1
-       movaps  0*16(r4), %xmm0
-       #if 0 < LocalsSize
-               add             $Padding + LocalsSize, r4
-       #endif
-       #if defined __i386__
-               pop             r7
-               pop             r6
-               pop             r5
-       #endif
-       pop             r3
-
-       ret
-
-
-#undef Address
-#undef Argument
-#undef E
-#undef K
-#undef LocalsSize
-#undef LookupS
-#undef Padding
-#undef R
-#undef SaveSize
-#undef STable
-#undef StackFrame
-#undef dr
-#undef drh
-#undef drl
-#undef offset
-#undef t0
-#undef t0d
-#undef ve0
-#undef ve1
-#undef ve2
-#undef ve3
-#undef vt0
-#undef vt1
-#undef vt2
-#undef vt3
diff --git a/bsd/crypto/aes/i386/MakeData.c b/bsd/crypto/aes/i386/MakeData.c
deleted file mode 100644 (file)
index 262dc59..0000000
+++ /dev/null
@@ -1,516 +0,0 @@
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define        MaxRcon 11
-
-typedef uint8_t Byte;
-typedef uint32_t Word;
-
-
-/*     In comments below, {n} designates the Galois field element represented by
-       the byte n.  See notes about Galois field multiplication in ReadMe.txt.
-
-       So 3+5 is addition of ordinary integers, and 3+5 == 8, while {3}+{5} is
-       addition in the field, and {3} + {5} = {3 XOR 5} = {6}.)
-*/
-
-
-// Define constants for languages.
-typedef enum { C, IntelAssembly } Language;
-
-
-/*     LogBase3[i] will contain the base-three logarithm of i in the 256-element
-       Galois field defined by AES.  That is, {3}**LogBase3[i] == {3}**i.
-*/
-static Byte LogBase3[256];
-
-/*     AntilogBase3[i] will contain {3}**i in the 256-element Galois field defined
-       by AES.  It contains extra elements so that the antilog of a+b can be found
-       by looking up a+b directly, without having to reduce modulo the period, for
-       0 <= a, b < 255.
-
-       (254 is the greatest value we encounter.  Each a or b we use is the
-       base-three logarithm of some element.  As a primitive root, the powers of
-       three cycle through all non-zero elements of the field, of which there are
-       255, so the exponents cover 0 to 254 before the powers repeat.)
-*/
-static Byte AntilogBase3[254+254+1];
-
-
-static void InitializeLogTables(void)
-{
-       // log({1}) is zero, so start {p} (power) at {1} and l (logarithm) at 0.
-       Byte p = 1;
-       int l = 0;
-       do
-       {
-               // Record table entries.
-               LogBase3[p] = l;
-               AntilogBase3[l] = p;
-
-               /*      Observe that {2}*{p} is {p << 1 ^ (a & 0x80 ? 0x1b : 0)}, per notes
-                       in ReadMe.txt.  We produce {3}*{p}:
-
-                               {3}*{p}
-                                       = {1}*{p} + {2}*{p}
-                                       = {1}*{p} + {p << 1 ^ (a & 0x80 ? 0x1b : 0)}
-                                       = {p ^ p << 1 ^ (p & 0x80 ? 0x1b : 0)}.
-               */
-               p ^= p << 1 ^ (p & 0x80 ? 0x1b : 0);
-               ++l;
-
-       } while (p != 1);       // Stop when we have gone around completely.
-
-       /*      The antilogarithms are periodic with a period of 255, and we want to
-               look up elements as high as 254+254 (the largest that a sum of two
-               logarithms could be), so we replicate the table beyond the first
-               period.
-       */
-       for (l = 255; l < 254+254; ++l)
-               AntilogBase3[l] = AntilogBase3[l-255];
-}
-
-
-/*     MultiplyByte(Byte b, Byte c) returns {b}*{c}.  It requires tables that must
-       be initialized before this routine is used.
-*/
-static Byte MultiplyByte(Byte b, Byte c)
-{
-       // Calculate product by adding logarithms, but avoid logarithms of zero.
-       return b == 0 || c == 0 ? 0 : AntilogBase3[LogBase3[b] + LogBase3[c]];
-}
-
-
-// Return {0} if {b} is {0} and the multiplicative inverse of {b} otherwise.
-static Byte InverseByte(Byte b)
-{
-       return b == 0 ? 0 : AntilogBase3[255 - LogBase3[b]];
-}
-
-
-// Perform AES' SubBytes operation on a single byte.
-static Byte SubByte(Byte b)
-{
-       unsigned int r = InverseByte(b);
-
-       // Duplicate r as a proxy for a rotate operation.
-       r = r | r<<8;
-
-       // Apply the standard's affine transformation.
-       return r ^ r>>4 ^ r>>5 ^ r>>6 ^ r>>7 ^ 0x63;
-}
-
-
-// Define and populate tables for the SubBytes and InvSubBytes operations.
-static Byte SubBytesTable[256];
-static Byte InvSubBytesTable[256];
-
-
-static void InitializeSubBytesTable(void)
-{
-       for (int i = 0; i < 256; ++i)
-               SubBytesTable[i] = SubByte((Byte) i);
-}
-
-
-static void InitializeInvSubBytesTable(void)
-{
-       for (int i = 0; i < 256; ++i)
-               InvSubBytesTable[SubByte((Byte) i)] = i;
-}
-
-
-/*     Print tables for SubBytes function providing the output byte embedded in
-       various places in a word, so that the table entries can be used with
-       fewer byte manipulations.
-*/
-static void PrintSubBytesWordTable(Language language)
-{
-       switch (language)
-       {
-               case C:
-                       printf("\n\n"
-                               "// SubBytes embedded in words tables.\n"
-                               "const Word AESSubBytesWordTable[4][256] =\n"
-                               "{\n");
-                       for (int j = 0; j < 4; ++j)
-                       {
-                               printf("\t{\n");
-                               for (int i = 0; i < 256; ++i)
-                                       printf("\t\t0x%08x,\n", SubBytesTable[i] << j*8);
-                               printf("\t},\n");
-                       }
-                       printf("};\n");
-                       break;
-
-               case IntelAssembly:
-                       printf("\n\n"
-                               "// SubBytes embedded in words tables.\n"
-                               "\t.globl\t_AESSubBytesWordTable\n"
-                               "\t.private_extern\t_AESSubBytesWordTable\n"
-                               "\t.align\t2\n"
-                               "_AESSubBytesWordTable:\n");
-                       for (int j = 0; j < 4; ++j)
-                       {
-                               printf("\t// Table %d.\n", j);
-                               for (int i = 0; i < 256; ++i)
-                                       printf("\t.long\t0x%08x\n", SubBytesTable[i] << j*8);
-                       }
-                       break;
-       }
-}
-
-
-/*     Print tables for InvSubBytes function providing the output byte embedded in
-       various places in a word, so that the table entries can be used with
-       fewer byte manipulations.
-*/
-static void PrintInvSubBytesWordTable(Language language)
-{
-       switch (language)
-       {
-               case C:
-                       printf("\n\n"
-                               "// InvSubBytes embedded in words tables.\n"
-                               "const Word AESInvSubBytesWordTable[4][256] =\n"
-                               "{\n");
-                       for (int j = 0; j < 4; ++j)
-                       {
-                               printf("\t{\n");
-                               for (int i = 0; i < 256; ++i)
-                                       printf("\t\t0x%08x,\n", InvSubBytesTable[i] << j*8);
-                               printf("\t},\n");
-                       }
-                       printf("};\n");
-                       break;
-
-               case IntelAssembly:
-                       printf("\n\n"
-                               "// InvSubBytes embedded in words tables.\n"
-                               "\t.globl\t_AESInvSubBytesWordTable\n"
-                               "\t.private_extern\t_AESInvSubBytesWordTable\n"
-                               "\t.align\t2\n"
-                               "_AESInvSubBytesWordTable:\n");
-                       for (int j = 0; j < 4; ++j)
-                       {
-                               printf("\t// Table %d.\n", j);
-                               for (int i = 0; i < 256; ++i)
-                                       printf("\t.long\t0x%08x\n", InvSubBytesTable[i] << j*8);
-                       }
-                       break;
-       }
-}
-
-
-// Print the round constants.
-static void PrintRcon(Language language)
-{
-       union { Byte c[4]; Word w; } t = { { 1, 0, 0, 0 } };
-
-       switch (language)
-       {
-               case C:
-                       printf("\n\n"
-                               "// Round constants.\n"
-                               "const Byte AESRcon[] =\n"
-                               "{\n"
-                               "\t0,\t// Not used, included for indexing simplicity.\n");
-                       for (int i = 1; i < MaxRcon; ++i)
-                       {
-                               printf("\t0x%02x,\n", t.w);
-                               t.c[0] = MultiplyByte(0x2, t.c[0]);
-                       }
-                       printf("};\n");
-                       break;
-
-               case IntelAssembly:
-                       printf("\n\n"
-                               "// Round constants.\n"
-                               "\t.globl\t_AESRcon\n"
-                               "\t.private_extern\t_AESRcon\n"
-                               "_AESRcon:\n"
-                               "\t.byte\t0\t// Not used, included for indexing simplicity.\n");
-                       for (int i = 1; i < MaxRcon; ++i)
-                       {
-                               printf("\t.byte\t0x%02x\n", t.w);
-                               t.c[0] = MultiplyByte(0x2, t.c[0]);
-                       }
-                       break;
-       }
-}
-
-
-// Print tables for the InvMixColumn operation.
-static void PrintInvMixColumnTable(Language language)
-{
-       Word T[4][256];
-
-       for (int i = 0; i < 256; ++i)
-       {
-               union { Byte b[4]; Word w; } c;
-
-               Byte s9 = MultiplyByte(0x9, i);
-               Byte sb = MultiplyByte(0xb, i);
-               Byte sd = MultiplyByte(0xd, i);
-               Byte se = MultiplyByte(0xe, i);
-
-               c.b[0] = se;
-               c.b[1] = s9;
-               c.b[2] = sd;
-               c.b[3] = sb;
-               T[0][i] = c.w;
-
-               c.b[0] = sb;
-               c.b[1] = se;
-               c.b[2] = s9;
-               c.b[3] = sd;
-               T[1][i] = c.w;
-
-               c.b[0] = sd;
-               c.b[1] = sb;
-               c.b[2] = se;
-               c.b[3] = s9;
-               T[2][i] = c.w;
-
-               c.b[0] = s9;
-               c.b[1] = sd;
-               c.b[2] = sb;
-               c.b[3] = se;
-               T[3][i] = c.w;
-       }
-
-       switch (language)
-       {
-               case C:
-                       printf("\n\n"
-                               "// Tables for InvMixColumn.\n"
-                               "const Word AESInvMixColumnTable[4][256] =\n"
-                               "{\n");
-                       for (int i = 0; i < 4; ++i)
-                       {
-                               printf("\t{\n");
-                               for (int j = 0; j < 256; ++j)
-                                       printf("\t\t0x%08x,\n", T[i][j]);
-                               printf("\t},\n");
-                       }
-                       printf("};\n");
-                       break;
-
-               case IntelAssembly:
-                       printf("\n\n"
-                               "// Tables for InvMixColumn.\n"
-                               "\t.globl\t_AESInvMixColumnTable\n"
-                               "\t.private_extern\t_AESInvMixColumnTable\n"
-                               "\t.align\t2\n"
-                               "_AESInvMixColumnTable:\n");
-                       for (int i = 0; i < 4; ++i)
-                       {
-                               printf("\t// Table %d.\n", i);
-                               for (int j = 0; j < 256; ++j)
-                                       printf("\t.long\t0x%08x\n", T[i][j]);
-                       }
-                       break;
-       }
-}
-
-
-/*     Print the tables defined AES Proposal: Rijndael, amended, 9/04/2003,
-       section 5.2.1.  These combine the MixColumn and SubBytes operations.
-*/
-static void PrintEncryptTable(Language language)
-{
-       Word T[4][256];
-
-       for (int i = 0; i < 256; ++i)
-       {
-               union { Byte b[4]; Word w; } c;
-
-               Byte s1 = SubBytesTable[i];
-               Byte s2 = MultiplyByte(0x2, s1);
-               Byte s3 = s1 ^ s2;
-
-               c.b[0] = s2;
-               c.b[1] = s1;
-               c.b[2] = s1;
-               c.b[3] = s3;
-               T[0][i] = c.w;
-
-               c.b[0] = s3;
-               c.b[1] = s2;
-               //c.b[2] = s1;
-               c.b[3] = s1;
-               T[1][i] = c.w;
-
-               c.b[0] = s1;
-               c.b[1] = s3;
-               c.b[2] = s2;
-               //c.b[3] = s1;
-               T[2][i] = c.w;
-
-               //c.b[0] = s1;
-               c.b[1] = s1;
-               c.b[2] = s3;
-               c.b[3] = s2;
-               T[3][i] = c.w;
-       }
-
-       switch (language)
-       {
-               case C:
-                       printf("\n\n"
-                               "// Tables for main encryption iterations.\n"
-                               "const Word AESEncryptTable[4][256] =\n"
-                               "{\n");
-                       for (int i = 0; i < 4; ++i)
-                       {
-                               printf("\t{\n");
-                               for (int j = 0; j < 256; ++j)
-                                       printf("\t\t0x%08x,\n", T[i][j]);
-                               printf("\t},\n");
-                       }
-                       printf("};\n");
-                       break;
-
-               case IntelAssembly:
-                       printf("\n\n"
-                               "// Tables for main encryption iterations.\n"
-                               "\t.globl\t_AESEncryptTable\n"
-                               "\t.private_extern\t_AESEncryptTable\n"
-                               "\t.align\t2\n"
-                               "_AESEncryptTable:\n");
-                       for (int i = 0; i < 4; ++i)
-                       {
-                               printf("\t// Table %d.\n", i);
-                               for (int j = 0; j < 256; ++j)
-                                       printf("\t.long\t0x%08x\n", T[i][j]);
-                       }
-                       break;
-       }
-}
-
-
-/*     Print the inverse tables.  These correspond to the tables above, but for
-       decyrption.  These combine the InvSubBytes and InvMixColumn operations.
-*/
-static void PrintDecryptTable(Language language)
-{
-       Word T[4][256];
-
-       for (int i = 0; i < 256; ++i)
-       {
-               union { Byte b[4]; Word w; } c;
-
-               Byte si = InvSubBytesTable[i];
-
-               Byte s9 = MultiplyByte(0x9, si);
-               Byte sb = MultiplyByte(0xb, si);
-               Byte sd = MultiplyByte(0xd, si);
-               Byte se = MultiplyByte(0xe, si);
-
-               c.b[0] = se;
-               c.b[1] = s9;
-               c.b[2] = sd;
-               c.b[3] = sb;
-               T[0][i] = c.w;
-
-               c.b[0] = sb;
-               c.b[1] = se;
-               c.b[2] = s9;
-               c.b[3] = sd;
-               T[1][i] = c.w;
-
-               c.b[0] = sd;
-               c.b[1] = sb;
-               c.b[2] = se;
-               c.b[3] = s9;
-               T[2][i] = c.w;
-
-               c.b[0] = s9;
-               c.b[1] = sd;
-               c.b[2] = sb;
-               c.b[3] = se;
-               T[3][i] = c.w;
-       }
-
-       switch (language)
-       {
-               case C:
-                       printf("\n\n"
-                               "// Tables for main decryption iterations.\n"
-                               "const Word AESDecryptTable[4][256] =\n"
-                               "{\n");
-                       for (int i = 0; i < 4; ++i)
-                       {
-                               printf("\t{\n");
-                               for (int j = 0; j < 256; ++j)
-                                       printf("\t\t0x%08x,\n", T[i][j]);
-                               printf("\t},\n");
-                       }
-                       printf("};\n");
-                       break;
-
-               case IntelAssembly:
-                       printf("\n\n"
-                               "// Tables for main decryption iterations.\n"
-                               "\t.globl\t_AESDecryptTable\n"
-                               "\t.private_extern\t_AESDecryptTable\n"
-                               "\t.align\t2\n"
-                               "_AESDecryptTable:\n");
-                       for (int i = 0; i < 4; ++i)
-                       {
-                               printf("\t// Table %d.\n", i);
-                               for (int j = 0; j < 256; ++j)
-                                       printf("\t.long\t0x%08x\n", T[i][j]);
-                       }
-                       break;
-       }
-}
-
-
-static void Usage(const char *ProgramName)
-{
-       fprintf(stderr,
-               "%s:  This program must have exactly one argument, \"C\" to generate\n"
-               "C or \"Intel\" to generate GCC i386/x86_64 assembly.\n", ProgramName);
-       exit(EXIT_FAILURE);
-}
-
-
-int main(int argc, char *argv[])
-{
-       if (argc != 2)
-               Usage(argv[0]);
-
-       Language language;
-
-       // Figure out which language to generate, C or Intel assembly.
-       if (0 == strcmp(argv[1], "C"))
-               language = C;
-       else if (0 == strcmp(argv[1], "Intel"))
-               language = IntelAssembly;
-       else
-               Usage(argv[0]);
-
-       printf("// This file was generated by " __FILE__ ".\n");
-
-       if (language == C)
-               printf("\n\n#include \"AES.h\"\n");
-
-       if (language == IntelAssembly)
-               printf("\n\n\t.const\n");
-
-       InitializeLogTables();
-       InitializeSubBytesTable();
-       InitializeInvSubBytesTable();
-
-       PrintRcon(language);
-       PrintInvMixColumnTable(language);
-       PrintEncryptTable(language);
-       PrintDecryptTable(language);
-       PrintSubBytesWordTable(language);
-       PrintInvSubBytesWordTable(language);
-
-       return 0;
-}
diff --git a/bsd/crypto/aes/i386/Makefile b/bsd/crypto/aes/i386/Makefile
deleted file mode 100644 (file)
index 851f7b2..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS = \
-
-INSTINC_SUBDIRS_I386 = \
-
-EXPINC_SUBDIRS = \
-
-EXPINC_SUBDIRS_I386 = \
-
-INSTALL_MI_DIR = crypto
-
-EXPORT_MI_DIR = ${INSTALL_MI_DIR}
-
-PRIVATE_DATAFILES = \
-       aesxts.h
-
-# /System/Library/Frameworks/Kernel.framework/PrivateHeaders
-INSTALL_KF_MD_LCL_LIST = ${PRIVATE_DATAFILES}
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/bsd/crypto/aes/i386/ReadMe.txt b/bsd/crypto/aes/i386/ReadMe.txt
deleted file mode 100644 (file)
index 7ac8331..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-This directory contains a hybrid AES implementation.  The core AES routines
-(the actual encryption, decryption, and key expansion) are in:
-
-       AES.s
-       Data.mk
-       Data.s
-       EncryptDecrypt.s
-       ExpandKeyForDecryption.s
-       ExpandKeyForEncryption.s
-       MakeData.c
-
-Although the above files do not explicitly include aes.h, they confirm to
-certain things defined in it, notably the aes_rval type and the layout of the
-aes_encrypt_ctx and aes_decrypt_ctx structures.  These must be kept
-compatibility; the definitions of ContextKey and ContextKeyLength in AES.s must
-match the offsets of the key ("ks") and key_length ("inf") members of
-aes_encrypt_ctx and aes_decrypt_ctx.  (For some reason, aes_inf is a union that
-is written as a 32-bit integer and read as an 8-bit integer.  I do not know
-why but have reproduced that behavior in the new implementation.)
-
-aes_modes.c extends the API, most notably by implementing CBC mode using the
-basic AES block encryption.  It uses aesopt.h and edefs.h.
diff --git a/bsd/crypto/aes/i386/aes_crypt_hw.s b/bsd/crypto/aes/i386/aes_crypt_hw.s
deleted file mode 100644 (file)
index 2edc3e2..0000000
+++ /dev/null
@@ -1,472 +0,0 @@
-/*     This files defines _aes_encrypt_hw and _aes_decrypt_hw --- Intel Westmere HW AES-based implementation
-       of _aes_encrypt and _aes_decrypt. 
-
-       These 2 functions SHOULD BE entried ONLY after the AES HW is verified to be available. 
-       They SHOULD NOT be called without AES HW detection. It might cause xnu to crash.
-
-       The AES HW is detected 1st thing in 
-               _aes_encrypt (EncryptDecrypt.s) 
-               _aes_decrypt (EncryptDecrypt.s)
-       and, if AES HW is detected, branch without link (ie, jump) to the functions here.
-
-       The implementation here follows the examples in an Intel White Paper
-       "Intel Advanced Encryption Standard (AES) Instruction Set" Rev.2 01
-
-       Note: Rev. 03 Final 2010 01 26 is available. Looks like some code change from Rev.2 01
-
-       cclee 3-13-10
-*/
-
-    .text
-    .align 4,0x90
-.globl _aes_encrypt_hw
-_aes_encrypt_hw:
-
-#if    defined __i386__        
-       movl    4(%esp), %eax   // in
-       movl    12(%esp), %edx  // ctx
-       movl    8(%esp), %ecx   // out
-
-       #define LOCAL_SIZE      (12+16+16)              // 16-byte align (-4 for return address) + 16 (xmm0) + 16 (xmm1)
-       #define in              %eax
-       #define ctx             %edx
-       #define out             %ecx
-       #define r13             %esp
-
-#else          // x86_64
-
-       #define LOCAL_SIZE      (8+16+16)               // 16-byte align (-8 for return address) + 16 (xmm0) + 16 (xmm1)
-       #define in                      %rdi
-       #define ctx                     %rdx
-       #define out                     %rsi
-       #define r13                     %rsp
-
-#endif         // i386 or x86_64
-
-#ifdef KERNEL
-       sub             $LOCAL_SIZE, r13
-       movaps  %xmm0, (r13)
-#endif
-       movups  (in), %xmm0
-
-       // key length identification
-       movl    240(ctx), %eax                  // key length
-       cmp             $160, %eax
-       je              L_AES_128
-       cmp             $192, %eax
-       je              L_AES_192
-       cmp             $224, %eax
-       je              L_AES_256
-       mov             $-1, %eax                                       // return ERROR
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
-L_AES_128:
-       testb   $15, %dl                                        // check whether expanded key is 16-byte aligned
-       jne             0f                                                      // if not 16-byte aligned, aesenc xmm, m128 won't work  
-       pxor    (ctx), %xmm0
-       aesenc  16(ctx), %xmm0
-       aesenc  32(ctx), %xmm0
-       aesenc  48(ctx), %xmm0
-       aesenc  64(ctx), %xmm0
-       aesenc  80(ctx), %xmm0
-       aesenc  96(ctx), %xmm0
-       aesenc  112(ctx), %xmm0
-       aesenc  128(ctx), %xmm0
-       aesenc  144(ctx), %xmm0
-       aesenclast      160(ctx), %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-0:                                                                             // special case expanded key is not 16-byte aligned     
-#ifdef KERNEL
-       movaps  %xmm1, 16(r13)                          // save xmm1 into stack
-#endif
-       movups  (ctx), %xmm1
-       pxor    %xmm1, %xmm0
-       movups  16(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  32(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  48(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  64(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  80(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  96(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  112(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  128(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  144(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  160(ctx), %xmm1
-       aesenclast      %xmm1, %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       movaps  16(r13), %xmm1
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
-L_AES_192:
-       testb   $15, %dl                                        // check whether expanded key is 16-byte aligned
-       jne             0f                                                      // if not 16-byte aligned, aesenc xmm, m128 won't work  
-       pxor    (ctx), %xmm0
-       aesenc  16(ctx), %xmm0
-       aesenc  32(ctx), %xmm0
-       aesenc  48(ctx), %xmm0
-       aesenc  64(ctx), %xmm0
-       aesenc  80(ctx), %xmm0
-       aesenc  96(ctx), %xmm0
-       aesenc  112(ctx), %xmm0
-       aesenc  128(ctx), %xmm0
-       aesenc  144(ctx), %xmm0
-       aesenc  160(ctx), %xmm0
-       aesenc  176(ctx), %xmm0
-       aesenclast      192(ctx), %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-0:                                                                             // special case expanded key is not 16-byte aligned     
-#ifdef KERNEL
-       movaps  %xmm1, 16(r13)                          // save xmm1 into stack
-#endif
-       movups  (ctx), %xmm1
-       pxor    %xmm1, %xmm0
-       movups  16(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  32(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  48(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  64(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  80(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  96(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  112(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  128(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  144(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  160(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  176(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  192(ctx), %xmm1
-       aesenclast      %xmm1, %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       movaps  16(r13), %xmm1
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
-L_AES_256:
-       testb   $15, %dl                                        // check whether expanded key is 16-byte aligned
-       jne             0f                                                      // if not 16-byte aligned, aesenc xmm, m128 won't work  
-       pxor    (ctx), %xmm0
-       aesenc  16(ctx), %xmm0
-       aesenc  32(ctx), %xmm0
-       aesenc  48(ctx), %xmm0
-       aesenc  64(ctx), %xmm0
-       aesenc  80(ctx), %xmm0
-       aesenc  96(ctx), %xmm0
-       aesenc  112(ctx), %xmm0
-       aesenc  128(ctx), %xmm0
-       aesenc  144(ctx), %xmm0
-       aesenc  160(ctx), %xmm0
-       aesenc  176(ctx), %xmm0
-       aesenc  192(ctx), %xmm0
-       aesenc  208(ctx), %xmm0
-       aesenclast      224(ctx), %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-0:                                                                             // special case expanded key is not 16-byte aligned     
-#ifdef KERNEL
-       movaps  %xmm1, 16(r13)                          // save xmm1 into stack
-#endif
-       movups  (ctx), %xmm1
-       pxor    %xmm1, %xmm0
-       movups  16(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  32(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  48(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  64(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  80(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  96(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  112(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  128(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  144(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  160(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  176(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  192(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  208(ctx), %xmm1
-       aesenc  %xmm1, %xmm0
-       movups  224(ctx), %xmm1
-       aesenclast      %xmm1, %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       movaps  16(r13), %xmm1
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
-
-    .text
-    .align 4,0x90
-.globl _aes_decrypt_hw
-_aes_decrypt_hw:
-
-#if    defined __i386__        
-       movl    4(%esp), %eax   // in
-       movl    12(%esp), %edx  // ctx
-       movl    8(%esp), %ecx   // out
-
-#endif
-
-#ifdef KERNEL
-       sub             $LOCAL_SIZE, r13
-       movaps  %xmm0, (r13)
-#endif
-       movups  (in), %xmm0
-
-       // key length identification
-       movl    240(ctx), %eax                  // key length
-       cmp             $160, %eax
-       je              0f                                              // AES-128
-       cmp             $192, %eax
-       je              1f                                              // AES-192
-       cmp             $224, %eax
-       je              2f                                              // AES-256
-       mov             $-1, %eax                               // return ERROR
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
-0:                                                                     // AES-128
-       testb   $15, %dl                                        // check whether expanded key is 16-byte aligned
-       jne             9f                                                      // if not 16-byte aligned, aesenc xmm, m128 won't work  
-       pxor    160(ctx), %xmm0
-       aesdec  144(ctx), %xmm0
-       aesdec  128(ctx), %xmm0
-       aesdec  112(ctx), %xmm0
-       aesdec  96(ctx), %xmm0
-       aesdec  80(ctx), %xmm0
-       aesdec  64(ctx), %xmm0
-       aesdec  48(ctx), %xmm0
-       aesdec  32(ctx), %xmm0
-       aesdec  16(ctx), %xmm0
-       aesdeclast      (ctx), %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-9:                                                                             // AES-128 Decrypt : special case expanded key is not 16-byte aligned 
-#ifdef KERNEL
-       movaps  %xmm1, 16(r13)                          // save xmm1 into stack
-#endif
-       movups  160(ctx), %xmm1
-       pxor    %xmm1, %xmm0
-       movups  144(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  128(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  112(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  96(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  80(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  64(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  48(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  32(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  16(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  (ctx), %xmm1
-       aesdeclast      %xmm1, %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       movaps  16(r13), %xmm1  
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
-1:                                                             // AES-192
-       testb   $15, %dl                                        // check whether expanded key is 16-byte aligned
-       jne             9f                                                      // if not 16-byte aligned, aesenc xmm, m128 won't work  
-       pxor    192(ctx), %xmm0
-       aesdec  176(ctx), %xmm0
-       aesdec  160(ctx), %xmm0
-       aesdec  144(ctx), %xmm0
-       aesdec  128(ctx), %xmm0
-       aesdec  112(ctx), %xmm0
-       aesdec  96(ctx), %xmm0
-       aesdec  80(ctx), %xmm0
-       aesdec  64(ctx), %xmm0
-       aesdec  48(ctx), %xmm0
-       aesdec  32(ctx), %xmm0
-       aesdec  16(ctx), %xmm0
-       aesdeclast      (ctx), %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-9:                                                                             // AES-192 Decrypt : special case expanded key is not 16-byte aligned 
-#ifdef KERNEL
-       movaps  %xmm1, 16(r13)                          // save xmm1 into stack
-#endif
-       movups  192(ctx), %xmm1
-       pxor    %xmm1, %xmm0
-       movups  176(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  160(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  144(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  128(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  112(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  96(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  80(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  64(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  48(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  32(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  16(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  (ctx), %xmm1
-       aesdeclast      %xmm1, %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       movaps  16(r13), %xmm1  
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
-2:                                                     // AES-256
-       testb   $15, %dl                                        // check whether expanded key is 16-byte aligned
-       jne             9f                                                      // if not 16-byte aligned, aesenc xmm, m128 won't work  
-       pxor    224(ctx), %xmm0
-       aesdec  208(ctx), %xmm0
-       aesdec  192(ctx), %xmm0
-       aesdec  176(ctx), %xmm0
-       aesdec  160(ctx), %xmm0
-       aesdec  144(ctx), %xmm0
-       aesdec  128(ctx), %xmm0
-       aesdec  112(ctx), %xmm0
-       aesdec  96(ctx), %xmm0
-       aesdec  80(ctx), %xmm0
-       aesdec  64(ctx), %xmm0
-       aesdec  48(ctx), %xmm0
-       aesdec  32(ctx), %xmm0
-       aesdec  16(ctx), %xmm0
-       aesdeclast      (ctx), %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-9:                                                                             // AES-256 Decrypt : special case expanded key is not 16-byte aligned 
-#ifdef KERNEL
-       movaps  %xmm1, 16(r13)                          // save xmm1 into stack
-#endif
-       movups  224(ctx), %xmm1
-       pxor    %xmm1, %xmm0
-       movups  208(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  192(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  176(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  160(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  144(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  128(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  112(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  96(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  80(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  64(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  48(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  32(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  16(ctx), %xmm1
-       aesdec  %xmm1, %xmm0
-       movups  (ctx), %xmm1
-       aesdeclast      %xmm1, %xmm0
-       xorl    %eax, %eax
-       movups  %xmm0, (out)
-#ifdef KERNEL
-       movaps  (r13), %xmm0    
-       movaps  16(r13), %xmm1  
-       add             $LOCAL_SIZE, r13
-#endif
-       ret
-
diff --git a/bsd/crypto/aes/i386/aes_key_hw.s b/bsd/crypto/aes/i386/aes_key_hw.s
deleted file mode 100644 (file)
index 434fa55..0000000
+++ /dev/null
@@ -1,405 +0,0 @@
-/*     This files defines _aes_encrypt_key_hw and _aes_decrypt_key_hw --- Intel Westmere HW AES-based implementation
-       of _aes_encrypt_key and _aes_decrypt_key. 
-
-       These 2 functions SHOULD BE entried ONLY after the AES HW is verified to be available. 
-       They SHOULD NOT be called without AES HW detection. It might cause xnu to crash.
-
-       The AES HW is detected 1st thing in 
-               _aes_encrypt_key (ExpandKeyForEncryption.s) 
-               _aes_decrypt_key (ExpandKeyForDecryption.s)
-       and, if AES HW is detected, branch without link (ie, jump) to the functions here.
-
-       The implementation here follows the examples in an Intel White Paper
-       "Intel Advanced Encryption Standard (AES) Instruction Set" Rev.2 01
-
-       Note: Rev. 03 Final 2010 01 26 is available. Looks like some code change from Rev.2 01
-
-       cclee 3-13-10
-*/
-
-       .text   
-       .align  4,0x90
-
-       // hw_aes_encrypt_key(key, klen, hwectx);
-       // klen = 16, 24, or 32, or (128/192/256)
-
-       .globl  _aes_encrypt_key_hw
-_aes_encrypt_key_hw:
-
-#ifdef __i386__
-       push    %ebp
-       mov             %esp, %ebp
-       push    %ebx
-       push    %edi    
-       mov             8(%ebp), %eax           // pointer to key
-       mov             12(%ebp), %ebx          // klen
-       mov             16(%ebp), %edi          // ctx
-       #define pkey    %eax
-       #define klen    %ebx
-       #define ctx             %edi
-       #define sp              %esp
-       #define cx              %ecx
-#else
-       #define pkey    %rdi
-       #define klen    %rsi
-       #define ctx             %rdx
-       #define sp              %rsp
-       #define cx              %rcx
-       push    %rbp
-       mov             %rsp, %rbp
-#endif
-
-#ifdef KERNEL
-       // for xmm registers save and restore
-       sub             $(16*4), sp
-#endif
-
-       cmp             $32, klen
-       jg              0f                                      // klen>32
-       shl             $3, klen                        // convert 16/24/32 to 128/192/256
-0:
-
-       cmp             $128, klen                      // AES-128 ?
-       je              L_AES_128_Encrypt_Key
-       cmp             $192, klen                      // AES-192 ?
-       je              L_AES_192_Encrypt_Key
-       cmp             $256, klen                      // AES-256 ?
-       je              L_AES_256_Encrypt_Key
-       mov             $1, %eax                        // return error for wrong klen 
-L_Encrypt_Key_2_return:
-#ifdef KERNEL
-       add             $(16*4), sp
-#endif
-#ifdef __i386__
-       pop             %edi
-       pop             %ebx
-#endif
-       leave
-       ret
-
-L_AES_128_Encrypt_Key:
-#ifdef KERNEL
-       // save xmm registers
-       movaps  %xmm1, (sp)
-       movaps  %xmm2, 16(sp)
-       movaps  %xmm3, 32(sp)
-#endif // KERNEL
-
-       movl    $160, 240(ctx)          // write expanded key length to ctx
-       xor             cx, cx
-
-       movups  (pkey), %xmm1
-       movups  %xmm1, (ctx)
-       aeskeygenassist $1, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $2, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $4, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $8, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $0x10, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $0x20, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $0x40, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $0x80, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $0x1b, %xmm1, %xmm2
-       call    L_key_expansion_128
-       aeskeygenassist $0x36, %xmm1, %xmm2
-       call    L_key_expansion_128
-
-#ifdef KERNEL
-       // restore xmm registers
-       movaps  (sp), %xmm1
-       movaps  16(sp), %xmm2
-       movaps  32(sp), %xmm3
-#endif // KERNEL
-       xor             %eax, %eax                      // return 0 for success
-       jmp             L_Encrypt_Key_2_return
-
-       .align  4, 0x90
-L_key_expansion_128:
-       pshufd  $0xff, %xmm2, %xmm2
-       movaps  %xmm1, %xmm3
-       pslldq  $4, %xmm3
-       pxor    %xmm3, %xmm1
-       movaps  %xmm1, %xmm3
-       pslldq  $4, %xmm3
-       pxor    %xmm3, %xmm1
-       movaps  %xmm1, %xmm3
-       pslldq  $4, %xmm3
-       pxor    %xmm3, %xmm1
-       pxor    %xmm2, %xmm1
-       add             $16, cx
-       movups  %xmm1, (ctx, cx)
-       ret
-
-L_AES_192_Encrypt_Key:
-#ifdef KERNEL
-       // save xmm registers
-       movaps  %xmm1, (sp)
-       movaps  %xmm2, 16(sp)
-       movaps  %xmm3, 32(sp)
-       movaps  %xmm4, 48(sp)
-#endif // KERNEL
-       movl    $192, 240(ctx)          // write expanded key length to ctx
-
-       movups  (pkey), %xmm1
-       movq    16(pkey), %xmm3
-
-       movups  %xmm1, (ctx)
-       movq    %xmm3, 16(ctx)
-
-       lea             24(ctx), cx
-
-       aeskeygenassist $1, %xmm3, %xmm2
-       call    L_key_expansion_192
-       aeskeygenassist $2, %xmm3, %xmm2
-       call    L_key_expansion_192
-       aeskeygenassist $4, %xmm3, %xmm2
-       call    L_key_expansion_192
-       aeskeygenassist $8, %xmm3, %xmm2
-       call    L_key_expansion_192
-       aeskeygenassist $0x10, %xmm3, %xmm2
-       call    L_key_expansion_192
-       aeskeygenassist $0x20, %xmm3, %xmm2
-       call    L_key_expansion_192
-       aeskeygenassist $0x40, %xmm3, %xmm2
-       call    L_key_expansion_192
-       aeskeygenassist $0x80, %xmm3, %xmm2
-       call    L_key_expansion_192
-
-#ifdef KERNEL
-       // restore xmm registers
-       movaps  (sp), %xmm1
-       movaps  16(sp), %xmm2
-       movaps  32(sp), %xmm3
-       movaps  48(sp), %xmm4
-#endif // KERNEL
-       xor             %eax, %eax                      // return 0 for success
-       jmp             L_Encrypt_Key_2_return
-
-       .align  4, 0x90
-L_key_expansion_192:
-       pshufd  $0x55, %xmm2, %xmm2
-
-       movaps  %xmm1, %xmm4
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pxor    %xmm2, %xmm1
-
-       pshufd  $0xff, %xmm1, %xmm2
-
-       movaps  %xmm3, %xmm4
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm3
-       pxor    %xmm2, %xmm3
-
-       movups  %xmm1, (cx)
-       movq    %xmm3, 16(cx)
-
-       add             $24, cx
-       ret
-
-L_AES_256_Encrypt_Key:
-#ifdef KERNEL
-       // save xmm registers
-       movaps  %xmm1, (sp)
-       movaps  %xmm2, 16(sp)
-       movaps  %xmm3, 32(sp)
-       movaps  %xmm4, 48(sp)
-#endif // KERNEL
-       movl    $224, 240(ctx)          // write expanded key length to ctx
-
-       movups  (pkey), %xmm1
-       movups  16(pkey), %xmm3
-       movups  %xmm1, (ctx)
-       movups  %xmm3, 16(ctx)
-
-       lea             32(ctx), cx
-
-       aeskeygenassist $1, %xmm3, %xmm2
-       call    L_key_expansion_256
-       aeskeygenassist $2, %xmm3, %xmm2
-       call    L_key_expansion_256
-       aeskeygenassist $4, %xmm3, %xmm2
-       call    L_key_expansion_256
-       aeskeygenassist $8, %xmm3, %xmm2
-       call    L_key_expansion_256
-       aeskeygenassist $0x10, %xmm3, %xmm2
-       call    L_key_expansion_256
-       aeskeygenassist $0x20, %xmm3, %xmm2
-       call    L_key_expansion_256
-       aeskeygenassist $0x40, %xmm3, %xmm2
-       call    L_key_expansion_256_final
-
-#ifdef KERNEL
-       // restore xmm registers
-       movaps  (sp), %xmm1
-       movaps  16(sp), %xmm2
-       movaps  32(sp), %xmm3
-       movaps  48(sp), %xmm4
-#endif // KERNEL
-       xor             %eax, %eax                      // return 0 for success
-       jmp             L_Encrypt_Key_2_return
-
-       .align  4, 0x90
-L_key_expansion_256:
-
-       pshufd  $0xff, %xmm2, %xmm2
-
-       movaps  %xmm1, %xmm4
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pxor    %xmm2, %xmm1
-
-       movups  %xmm1, (cx)
-
-       aeskeygenassist $0, %xmm1, %xmm4
-
-       pshufd  $0xaa, %xmm4, %xmm2
-
-       movaps  %xmm3, %xmm4
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm3
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm3
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm3
-       pxor    %xmm2, %xmm3
-
-       movups  %xmm3, 16(cx)
-
-       add             $32, cx
-       ret
-
-       .align  4, 0x90
-L_key_expansion_256_final:
-
-       pshufd  $0xff, %xmm2, %xmm2
-
-       movaps  %xmm1, %xmm4
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pslldq  $4, %xmm4
-
-       pxor    %xmm4, %xmm1
-       pxor    %xmm2, %xmm1
-
-       movups  %xmm1, (cx)
-       ret 
-
-// _aes_decrypt_key_hw is implemented as
-//     1. call _aes_encrypt_key_hw
-//     2. use aesimc to convert the expanded round keys (except the 1st and last round keys)
-
-       .text   
-       .align  4, 0x90
-       .globl  _aes_decrypt_key_hw
-_aes_decrypt_key_hw:
-
-#ifdef __i386__
-
-       push    %ebp
-       mov             %esp, %ebp
-       sub             $(8+16), %esp
-
-       // copy input arguments for calling aes_decrypt_key_hw
-
-       mov             8(%ebp), %eax
-       mov             %eax, (%esp)
-       mov             12(%ebp), %eax
-       mov             %eax, 4(%esp)
-       mov             16(%ebp), %eax
-       mov             %eax, 8(%esp)
-
-#else
-
-       push    %rbp
-       mov             %rsp, %rbp
-       sub             $16, %rsp
-
-       // calling arguments %rdi/%rsi/%rdx will be used for encrypt_key 
-       // %rdx (ctx) will return unchanged
-       // %rsi (klen) will (<<3) if <= 32
-
-#endif
-       call    _aes_encrypt_key_hw
-       cmp             $0, %eax
-       je              L_decrypt_inv
-L_decrypt_almost_done:
-#ifdef __i386__
-       add             $(8+16), %esp
-#else
-       add             $16, %rsp
-#endif
-       leave
-       ret
-
-L_decrypt_inv:
-#ifdef KERNEL
-       movaps  %xmm0, (sp)
-#endif
-
-#ifdef __i386__        
-       #undef  klen
-       #undef  ctx     
-       mov     12(%ebp), %eax      // klen
-    mov     16(%ebp), %edx      // ctx
-       #define klen    %eax
-       #define ctx             %edx
-       cmp             $32, klen
-       jg              0f                                      // klen>32
-       shl             $3, klen                        // convert 16/24/32 to 128/192/256
-0:
-#endif
-
-       mov             $9, cx                          // default is AES-128
-       cmp             $128, klen
-       je              L_Decrypt_Key
-       add             $2, cx
-       cmp             $192, klen
-       je              L_Decrypt_Key
-       add             $2, cx 
-
-L_Decrypt_Key:
-       add             $16, ctx
-       movups  (ctx), %xmm0
-       aesimc  %xmm0, %xmm0
-       movups  %xmm0, (ctx)
-       sub             $1, cx
-       jg              L_Decrypt_Key
-
-#ifdef KERNEL
-       movaps  (sp), %xmm0
-#endif
-#ifdef __i386__
-       xor             %eax, %eax
-#endif
-       jmp             L_decrypt_almost_done
-
diff --git a/bsd/crypto/aes/i386/aes_modes_asm.s b/bsd/crypto/aes/i386/aes_modes_asm.s
deleted file mode 100644 (file)
index 3b0f29a..0000000
+++ /dev/null
@@ -1,420 +0,0 @@
-/*\r
- ---------------------------------------------------------------------------\r
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.\r
-\r
- LICENSE TERMS\r
-\r
- The free distribution and use of this software in both source and binary\r
- form is allowed (with or without changes) provided that:\r
-\r
-   1. distributions of this source code include the above copyright\r
-      notice, this list of conditions and the following disclaimer;\r
-\r
-   2. distributions in binary form include the above copyright\r
-      notice, this list of conditions and the following disclaimer\r
-      in the documentation and/or other associated materials;\r
-\r
-   3. the copyright holder's name is not used to endorse products\r
-      built using this software without specific written permission.\r
-\r
- ALTERNATIVELY, provided that this notice is retained in full, this product\r
- may be distributed under the terms of the GNU General Public License (GPL),\r
- in which case the provisions of the GPL apply INSTEAD OF those given above.\r
-\r
- DISCLAIMER\r
-\r
- This software is provided 'as is' with no explicit or implied warranties\r
- in respect of its properties, including, but not limited to, correctness\r
- and/or fitness for purpose.\r
- ---------------------------------------------------------------------------\r
- Issue 31/01/2006\r
-\r
- These subroutines implement multiple block AES modes for ECB, CBC, CFB,\r
- OFB and CTR encryption,  The code provides support for the VIA Advanced \r
- Cryptography Engine (ACE).\r
-\r
- NOTE: In the following subroutines, the AES contexts (ctx) must be\r
- 16 byte aligned if VIA ACE is being used\r
-*/\r
-\r
-/* modified 3/5/10 cclee */\r
-/* Clean up those related to VIA ACE and hand optimize aes_cbc_encrypt and aes_cbc_decrypt */\r
-/* move the xmm registers save/restore originally inside the callee functions into these 2 caller functions */\r
-\r
-/* add code comments/description and HW AES detection and execution branch cclee 3-13-10 */\r
-\r
-#ifdef KERNEL\r
-#include <i386/cpu_capabilities.h>     // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW\r
-#else\r
-#include <System/i386/cpu_capabilities.h>      // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW\r
-#endif\r
-\r
-#if 0\r
-\r
-// TODO:\r
-// aes_ecb_encrypt and aes_ecb_decrypt are not present in gen/aescrypt.c\r
-// would add the implementation if needed\r
-// they are now compiled from aes_modes.c\r
-\r
-aes_rval aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,\r
-                    int len, const aes_encrypt_ctx ctx[1])\r
-{   int nb = len >> 4;\r
-\r
-    if(len & (AES_BLOCK_SIZE - 1)) return 1;\r
-    while(nb--) {\r
-        aes_encrypt(ibuf, obuf, ctx);\r
-        ibuf += AES_BLOCK_SIZE;\r
-        obuf += AES_BLOCK_SIZE;\r
-    }\r
-    return 0;\r
-}\r
-\r
-aes_rval aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,\r
-                    int len, const aes_decrypt_ctx ctx[1])\r
-{   int nb = len >> 4;\r
-\r
-    if(len & (AES_BLOCK_SIZE - 1)) return 1;\r
-    while(nb--) {\r
-        aes_decrypt(ibuf, obuf, ctx);\r
-        ibuf += AES_BLOCK_SIZE;\r
-        obuf += AES_BLOCK_SIZE;\r
-    }\r
-    return 0;\r
-}\r
-#endif\r
-\r
-#if 0\r
-aes_rval aes_encrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,\r
-                                        unsigned char *obuf, const aes_encrypt_ctx ctx[1])\r
-{\r
-               unsigned char iv[16];\r
-               int i;\r
-               \r
-               for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);\r
-\r
-               while (num_blk--) {\r
-                       iv ^= ibuf;                     // 128-bit      \r
-            aes_encrypt(iv, iv, ctx);\r
-            memcpy(obuf, iv, AES_BLOCK_SIZE);\r
-            ibuf += AES_BLOCK_SIZE;\r
-            obuf += AES_BLOCK_SIZE;\r
-                       \r
-               }               \r
-\r
-               return 0;\r
-}\r
-#endif\r
-\r
-       .text\r
-       .align  4,0x90\r
-       .globl  _aes_encrypt_cbc\r
-_aes_encrypt_cbc:\r
-\r
-       // detect AES HW\r
-       // if AES HW detected, branch to AES-HW-specific function _aes_encrypt_cbc_hw (aes_modes_hw.s)\r
-       // o.w., fall through to the original AES-SW function\r
-\r
-#if defined    __x86_64__\r
-       movq    __cpu_capabilities@GOTPCREL(%rip), %rax                 // %rax -> __cpu_capability\r
-       mov             (%rax), %eax                                                                    // %eax = __cpu_capabilities\r
-#else\r
-#ifdef KERNEL\r
-       leal    __cpu_capabilities, %eax                                                // %eax -> __cpu_capabilities\r
-       mov             (%eax), %eax                                                                    // %eax = __cpu_capabilities\r
-#else\r
-       mov    _COMM_PAGE_CPU_CAPABILITIES, %eax\r
-#endif\r
-#endif\r
-       test    $(kHasAES), %eax                                                                // kHasAES & __cpu_capabilities\r
-       jne             _aes_encrypt_cbc_hw                                                             // if AES HW detected, branch to HW-specific code\r
-\r
-       // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)\r
-#if    defined __i386__\r
-       push    %ebp\r
-       mov             %esp, %ebp\r
-       push    %ebx                                    // to be used as ibuf\r
-       push    %edi                                    // to be used as obuf\r
-       sub             $(16+16+7*16), %esp             // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r
-       mov             %esi, 12(%esp)                  // save %esp in the unused 4-bytes, to be used as num_blk\r
-\r
-       #define sp      %esp\r
-#else  // __x86_64__\r
-       push    %rbp\r
-       mov             %rsp, %rbp\r
-       push    %rbx\r
-       push    %r12\r
-       push    %r13\r
-       push    %r14\r
-       push    %r15\r
-       sub             $(8+16+5*16+16), %rsp   // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)    \r
-\r
-       #define sp      %rsp\r
-#endif\r
-\r
-       // save xmm registers for kernel use\r
-       // xmm6-xmm7 will be used locally\r
-       // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)\r
-       // there is a hole not used for xmm, which is 48(sp). \r
-       // it has been used to store iv (16-bytes) in i386 code\r
-       // for consistency between i386 and x86_64, this hole is dummied in x86_64 code\r
-       // also the 1st 16 bytes (sp) is dummied in x86_64 code\r
-\r
-#ifdef KERNEL\r
-       movaps  %xmm7, 16(sp)\r
-       movaps  %xmm6, 32(sp)\r
-       movaps  %xmm0, 64(sp)\r
-       movaps  %xmm1, 80(sp)\r
-       movaps  %xmm2, 96(sp)\r
-#if defined    __i386__\r
-       movaps  %xmm3, 112(sp)\r
-       movaps  %xmm4, 128(sp)\r
-#endif\r
-#endif\r
-\r
-       // set up registers from calling arguments\r
-\r
-#if defined    __i386__\r
-\r
-       mov             12(%ebp), %eax                  // in_iv\r
-       mov             24(%ebp), %edx                  // ctx\r
-       movups  (%eax), %xmm7                   // in_iv        \r
-       lea             48(%esp), %eax                  // &iv[0]\r
-       mov             %eax, (%esp)                    // 1st iv for aes_encrypt\r
-       mov             %eax, 4(%esp)                   // 2nd iv for aes_encrypt\r
-       mov             %edx, 8(%esp)                   // ctx for aes_encrypt\r
-       mov             8(%ebp), %ebx                   // ibuf\r
-       mov             16(%ebp), %esi                  // num_blk\r
-       mov             20(%ebp), %edi                  // obuf\r
-\r
-       #define ibuf    %ebx\r
-       #define obuf    %edi\r
-       #define num_blk %esi    \r
-\r
-#else  //      __x86_64__, calling arguments order : rdi/rsi/rdx/rcx/r8\r
-\r
-       mov             %rdi, %rbx                              // ibuf\r
-       lea             48(sp), %r12                    // &iv\r
-       movups  (%rsi), %xmm7                   // in_iv\r
-       mov             %rdx, %r13                              // num_blk\r
-       mov             %rcx, %r14                              // obuf\r
-       mov             %r8, %r15                               // ctx  \r
-\r
-       #define ibuf    %rbx\r
-       #define iv              %r12\r
-       #define num_blk %r13d\r
-       #define obuf    %r14    \r
-       #define ctx             %r15\r
-\r
-#endif\r
-\r
-       cmp             $1, num_blk                             // num_blk vs 1\r
-       jl              9f                                              // if num_blk < 1, branch to bypass the main loop\r
-0:\r
-       movups  (ibuf), %xmm6                   // ibuf\r
-#if defined    __i386__\r
-       lea             48(sp), %eax                    // &iv[0]\r
-       pxor    %xmm6, %xmm7                    // iv ^= ibuf\r
-       movups  %xmm7, (%eax)                   // save iv\r
-#else\r
-       pxor    %xmm6, %xmm7                    // iv ^= ibuf\r
-       movups  %xmm7, (iv)                             // save iv\r
-       mov             iv, %rdi                                // 1st calling argument for aes_encrypt\r
-       mov             iv, %rsi                                // 2nd calling argument for aes_encrypt\r
-       mov             ctx, %rdx                               // 3rd calling argument for aes_encrypt\r
-#endif\r
-       call    _aes_encrypt_xmm_no_save        // aes_encrypt(iv, iv, ctx)\r
-#if defined __i386__\r
-       leal    48(%esp), %eax                  // &iv[0]\r
-       movups  (%eax), %xmm7                   // read iv\r
-#else\r
-       movups  (iv), %xmm7                             // read iv\r
-#endif\r
-       movups  %xmm7, (obuf)                   // memcpy(obuf, iv, AES_BLOCK_SIZE);\r
-       add             $16, ibuf                               // ibuf += AES_BLOCK_SIZE; \r
-       add             $16, obuf                               // obuf += AES_BLOCK_SIZE;      \r
-       sub             $1, num_blk                             // num_blk --\r
-       jg              0b                                              // if num_blk > 0, repeat the loop\r
-9:     \r
-\r
-L_crypt_cbc_done:\r
-\r
-       // restore xmm registers due to kernel use\r
-#ifdef KERNEL\r
-       movaps  16(sp), %xmm7\r
-       movaps  32(sp), %xmm6\r
-       movaps  64(sp), %xmm0\r
-       movaps  80(sp), %xmm1\r
-       movaps  96(sp), %xmm2\r
-#if defined    __i386__\r
-       movaps  112(sp), %xmm3\r
-       movaps  128(sp), %xmm4\r
-#endif\r
-#endif\r
-\r
-       xor             %eax, %eax                              // to return 0 for SUCCESS\r
-\r
-#if    defined __i386__\r
-       mov             12(%esp), %esi                  // restore %esi\r
-       add             $(16+16+7*16), %esp             // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r
-       pop             %edi\r
-       pop             %ebx\r
-#else\r
-       add             $(8+16+5*16+16), %rsp   // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)    \r
-       pop             %r15\r
-       pop             %r14\r
-       pop             %r13\r
-       pop             %r12\r
-       pop             %rbx\r
-#endif\r
-       leave\r
-       ret\r
-\r
-#if 0\r
-aes_rval aes_decrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,\r
-                                        unsigned char *obuf, const aes_decrypt_ctx cx[1])\r
-{\r
-               unsigned char iv[16], tmp[16];\r
-               int i;\r
-               \r
-               for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);\r
-\r
-               while (num_blk--) {\r
-\r
-            memcpy(tmp, ibuf, AES_BLOCK_SIZE);\r
-            aes_decrypt(ibuf, obuf, ctx);\r
-                       obuf ^= iv;\r
-            memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-            ibuf += AES_BLOCK_SIZE;\r
-            obuf += AES_BLOCK_SIZE;\r
-               }\r
-\r
-               return 0;\r
-}\r
-#endif\r
-\r
-       .text\r
-       .align  4,0x90\r
-       .globl  _aes_decrypt_cbc\r
-_aes_decrypt_cbc:\r
-\r
-       // detect AES HW\r
-       // if AES HW detected, branch to AES-HW-specific function _aes_decrypt_cbc_hw (aes_modes_hw.s)\r
-       // o.w., fall through to the original AES-SW function\r
-\r
-#if defined    __x86_64__\r
-       movq    __cpu_capabilities@GOTPCREL(%rip), %rax                 // %rax -> __cpu_capability\r
-       mov             (%rax), %eax                                                                    // %eax = __cpu_capabilities\r
-#else\r
-#ifdef KERNEL\r
-       leal    __cpu_capabilities, %eax                                                // %eax -> __cpu_capabilities\r
-       mov             (%eax), %eax                                                                    // %eax = __cpu_capabilities\r
-#else\r
-       mov    _COMM_PAGE_CPU_CAPABILITIES, %eax\r
-#endif\r
-#endif\r
-       test    $(kHasAES), %eax                                                                // kHasAES & __cpu_capabilities\r
-       jne             _aes_decrypt_cbc_hw\r
-\r
-       // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)\r
-#if    defined __i386__\r
-       push    %ebp\r
-       mov             %esp, %ebp\r
-       push    %ebx                                    // to be used as ibuf\r
-       push    %edi                                    // to be used as obuf\r
-       sub             $(16+16+7*16), %esp             // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r
-       mov             %esi, 12(%esp)                  // save %esp in the unused 4-bytes, to be used as num_blk\r
-\r
-       #define sp      %esp\r
-#else  // __x86_64__\r
-       push    %rbp\r
-       mov             %rsp, %rbp\r
-       push    %rbx\r
-       push    %r12\r
-       push    %r13\r
-       push    %r14\r
-       push    %r15\r
-       sub             $(8+16+5*16+16), %rsp   // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)    \r
-\r
-       #define sp      %rsp\r
-#endif\r
-\r
-       // save xmm registers for kernel use\r
-       // xmm6-xmm7 will be used locally\r
-       // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)\r
-       // there is a hole not used for xmm, which is 48(sp). \r
-       // it has been used to store iv (16-bytes) in i386 code\r
-       // for consistency between i386 and x86_64, this hole is dummied in x86_64 code\r
-       // also the 1st 16 bytes (sp) is dummied in x86_64 code\r
-\r
-#ifdef KERNEL\r
-       movaps  %xmm7, 16(sp)\r
-       movaps  %xmm6, 32(sp)\r
-       movaps  %xmm0, 64(sp)\r
-       movaps  %xmm1, 80(sp)\r
-       movaps  %xmm2, 96(sp)\r
-#if defined    __i386__\r
-       movaps  %xmm3, 112(sp)\r
-       movaps  %xmm4, 128(sp)\r
-#endif\r
-#endif\r
-\r
-       // set up registers from calling arguments\r
-\r
-#if defined    __i386__\r
-       mov             12(%ebp), %eax                  // in_iv\r
-       mov             24(%ebp), %edx                  // ctx\r
-       movups  (%eax), %xmm7                   // in_iv        \r
-       mov             %edx, 8(%esp)                   // ctx for aes_encrypt\r
-       mov             8(%ebp), %ebx                   // ibuf\r
-       mov             16(%ebp), %esi                  // num_blk\r
-       mov             20(%ebp), %edi                  // obuf\r
-\r
-       #define ibuf    %ebx\r
-       #define obuf    %edi\r
-       #define num_blk %esi    \r
-#else  //      __x86_64__, rdi/rsi/rdx/rcx/r8\r
-       mov             %rdi, %rbx                              // ibuf\r
-       movups  (%rsi), %xmm7                   // in_iv\r
-       mov             %rdx, %r13                              // num_blk\r
-       mov             %rcx, %r14                              // obuf \r
-       mov             %r8, %r15                               // ctx  \r
-\r
-       #define ibuf    %rbx\r
-       #define num_blk %r13d\r
-       #define obuf    %r14    \r
-       #define ctx             %r15\r
-\r
-#endif\r
-           // memcpy(tmp, ibuf, AES_BLOCK_SIZE);\r
-           // aes_decrypt(ibuf, obuf, ctx);\r
-                       //      obuf ^= iv;\r
-           // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-           // ibuf += AES_BLOCK_SIZE;\r
-           // obuf += AES_BLOCK_SIZE;\r
-\r
-       cmp             $1, num_blk                                     // num_blk vs 1\r
-       jl              L_crypt_cbc_done                        // if num_blk < 1, bypass the main loop, jump to finishing code\r
-0:\r
-       movups  (ibuf), %xmm6                           // tmp\r
-#if defined    __i386__\r
-       mov             ibuf, (sp)                                      // ibuf\r
-       mov             obuf, 4(sp)                                     // obuf\r
-#else\r
-       mov             ibuf, %rdi                                      // ibuf \r
-       mov             obuf, %rsi                                      // obuf\r
-       mov             ctx, %rdx                                       // ctx\r
-#endif\r
-       call    _aes_decrypt_xmm_no_save        // aes_decrypt(ibuf, obuf, ctx)\r
-       movups  (obuf), %xmm0                           // obuf\r
-       pxor    %xmm7, %xmm0                            // obuf ^= iv;\r
-       movaps  %xmm6, %xmm7                            // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       movups  %xmm0, (obuf)                           // update obuf\r
-       add             $16, ibuf                                       // ibuf += AES_BLOCK_SIZE; \r
-       add             $16, obuf                                       // obuf += AES_BLOCK_SIZE;      \r
-       sub             $1, num_blk                                     // num_blk --\r
-       jg              0b                                                      // if num_blk > 0, repeat the loop\r
-9:     \r
-\r
-       // we are done here, the finishing code is identical to that in aes_encrypt_cbc, so just jump to there\r
-       jmp             L_crypt_cbc_done\r
-\r
diff --git a/bsd/crypto/aes/i386/aes_modes_hw.s b/bsd/crypto/aes/i386/aes_modes_hw.s
deleted file mode 100644 (file)
index b9e3508..0000000
+++ /dev/null
@@ -1,1623 +0,0 @@
-/*\r
- ---------------------------------------------------------------------------\r
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.\r
-\r
- LICENSE TERMS\r
-\r
- The free distribution and use of this software in both source and binary\r
- form is allowed (with or without changes) provided that:\r
-\r
-   1. distributions of this source code include the above copyright\r
-      notice, this list of conditions and the following disclaimer;\r
-\r
-   2. distributions in binary form include the above copyright\r
-      notice, this list of conditions and the following disclaimer\r
-      in the documentation and/or other associated materials;\r
-\r
-   3. the copyright holder's name is not used to endorse products\r
-      built using this software without specific written permission.\r
-\r
- ALTERNATIVELY, provided that this notice is retained in full, this product\r
- may be distributed under the terms of the GNU General Public License (GPL),\r
- in which case the provisions of the GPL apply INSTEAD OF those given above.\r
-\r
- DISCLAIMER\r
-\r
- This software is provided 'as is' with no explicit or implied warranties\r
- in respect of its properties, including, but not limited to, correctness\r
- and/or fitness for purpose.\r
- ---------------------------------------------------------------------------\r
- Issue 31/01/2006\r
-\r
- These subroutines implement multiple block AES modes for ECB, CBC, CFB,\r
- OFB and CTR encryption,  The code provides support for the VIA Advanced \r
- Cryptography Engine (ACE).\r
-\r
- NOTE: In the following subroutines, the AES contexts (ctx) must be\r
- 16 byte aligned if VIA ACE is being used\r
-*/\r
-\r
-\r
-/* ---------------------------------------------------------------------------------------------------------------- \r
-\r
-       aes_encrypt_cbc function (see aes_modes.c or aes_modes_asm.s) :\r
-\r
-       For simplicity, I am assuming all variables are in 128-bit data type.\r
-\r
-       aes_rval aes_encrypt_cbc(const __m128 *ibuf, __m128 *iv, int num_blk, __m128 *obuf, const aes_encrypt_ctx *ctx)\r
-       {\r
-               while(num_blk--) {\r
-                       *iv ^= *ibuf++;\r
-                       aes_encrypt(iv, iv, ctx);\r
-                       *obuf++ = *iv;\r
-               }\r
-               return 0;\r
-       }\r
-\r
-       The following is an implementation of this function using Intel AESNI.\r
-       This function _aes_encrypt_cbc_hw SHOULD NOT be called directly. \r
-       Developer should still call _aes_encrypt_cbc (in aes_modes_asm.s) which will poll cpu_capabilities and branch\r
-       to this aesni-based function should it detecs that aesni is available.\r
-       Blindly call this function SURELY will cause a CRASH on systems with no aesni support. \r
-\r
-       Note that each block starts with *iv, which is the output of the previous block. Therefore, the cbc blocks\r
-       are serially chained. This prevents us from arranging several blocks for encryption in parallel.\r
-\r
-   ----------------------------------------------------------------------------------------------------------------*/\r
-\r
-       .text\r
-       .align  4,0x90\r
-       .globl  _aes_encrypt_cbc_hw\r
-_aes_encrypt_cbc_hw:\r
-\r
-       // push/save registers for local use\r
-#if    defined __i386__\r
-\r
-       push    %ebp\r
-       movl    %esp, %ebp\r
-       push    %ebx\r
-       push    %edi\r
-\r
-       #define sp      %esp\r
-\r
-#else  // __x86_64__\r
-\r
-       push    %rbp\r
-       mov             %rsp, %rbp\r
-       push    %rbx\r
-       push    %r13\r
-       push    %r14\r
-       push    %r15\r
-\r
-       #define sp      %rsp\r
-\r
-#endif\r
-\r
-       // if this is kernel code, need to save used xmm registers\r
-#ifdef KERNEL\r
-\r
-#if defined __i386__\r
-       sub             $(8*16), %esp                   // for possible xmm0-xmm7 save/restore\r
-#else\r
-       sub             $(16*16), %rsp          // xmm0-xmm15 save/restore      \r
-#endif\r
-\r
-       movaps  %xmm0, (sp)\r
-       movaps  %xmm1, 16(sp)\r
-       movaps  %xmm2, 32(sp)\r
-       movaps  %xmm3, 48(sp)\r
-       movaps  %xmm4, 64(sp)\r
-       movaps  %xmm5, 80(sp)\r
-       movaps  %xmm6, 96(sp)\r
-       movaps  %xmm7, 112(sp)\r
-#if defined    __x86_64__\r
-       movaps  %xmm8, 16*8(sp)\r
-       movaps  %xmm9, 16*9(sp)\r
-       movaps  %xmm10, 16*10(sp)\r
-       movaps  %xmm11, 16*11(sp)\r
-       movaps  %xmm12, 16*12(sp)\r
-       movaps  %xmm13, 16*13(sp)\r
-       movaps  %xmm14, 16*14(sp)\r
-       movaps  %xmm15, 16*15(sp)\r
-#endif // __x86_64__\r
-\r
-#endif // KERNEL\r
-\r
-       #define iv      %xmm0\r
-\r
-#ifdef __i386__\r
-\r
-       mov             12(%ebp), %eax                  // in_iv\r
-       mov             24(%ebp), %edx                  // ctx\r
-       movups  (%eax), iv                              // iv = in_iv   \r
-       mov             8(%ebp), %ebx                   // ibuf\r
-       mov             16(%ebp), %ecx                  // num_blk\r
-       mov             20(%ebp), %edi                  // obuf\r
-\r
-       #define ibuf    %ebx\r
-       #define obuf    %edi\r
-       #define num_blk %ecx    \r
-       #define ctx             %edx\r
-\r
-#else\r
-\r
-       mov             %rdi, %rbx                              // ibuf\r
-       movups  (%rsi), iv                              // iv = in_iv\r
-       mov             %rdx, %r13                              // num_blk\r
-       mov             %rcx, %r14                              // obuf\r
-       mov             %r8, %r15                               // ctx  \r
-\r
-       #define ibuf    %rbx\r
-       #define num_blk %r13d\r
-       #define obuf    %r14    \r
-       #define ctx             %r15\r
-\r
-#endif\r
-\r
-       mov             240(ctx), %eax                  // aes length\r
-       cmp             $160, %eax                              // aes-128 encrypt ?\r
-       je              L_encrypt_128\r
-       cmp             $192, %eax                              // aes-192 encrypt ?\r
-       je              L_encrypt_192\r
-       cmp             $224, %eax                              // aes-256 encrypt ?\r
-       je              L_encrypt_256\r
-       mov             $-1, %eax                               // return error\r
-       jmp             L_error \r
-\r
-       //\r
-       // aes-128 encrypt_cbc operation, up to L_HW_cbc_done\r
-       //\r
-\r
-L_encrypt_128:\r
-\r
-       cmp             $1, num_blk                             // check number of block\r
-       jl              L_HW_cbc_done                   // should it be less than 1, nothing to do\r
-\r
-       movups  (ctx), %xmm2                    // key0\r
-       movups  16(ctx), %xmm3                  // key1\r
-       movups  32(ctx), %xmm4                  // key2\r
-       movups  48(ctx), %xmm5                  // key3\r
-       movups  64(ctx), %xmm6                  // key4\r
-       movups  80(ctx), %xmm7                  // key5\r
-#if defined    __x86_64__\r
-       movups  96(ctx), %xmm8                  // key6\r
-       movups  112(ctx), %xmm9                 // key7\r
-       movups  128(ctx), %xmm10                // key8\r
-       movups  144(ctx), %xmm11                // key9\r
-       movups  160(ctx), %xmm12                // keyA\r
-#endif\r
-\r
-       // while (num_blk--) {\r
-       //                      *iv ^= *ibuf++;\r
-       //                      aes_encrypt(iv, iv, ctx);\r
-       //                      *obuf++ = *iv;\r
-       // }\r
-0:\r
-       movups  (ibuf), %xmm1                           // *ibuf\r
-       pxor    %xmm2, iv                                       // 1st instruction inside aes_encrypt\r
-       pxor    %xmm1, iv                                       // *iv ^= *ibuf\r
-\r
-       // finishing up the rest of aes_encrypt\r
-    aesenc  %xmm3, iv\r
-    aesenc  %xmm4, iv\r
-    aesenc  %xmm5, iv\r
-    aesenc  %xmm6, iv\r
-    aesenc  %xmm7, iv\r
-#if defined    __x86_64__\r
-    aesenc  %xmm8, iv\r
-    aesenc  %xmm9, iv\r
-    aesenc  %xmm10, iv\r
-    aesenc  %xmm11, iv\r
-    aesenclast  %xmm12, iv\r
-#else\r
-       movups  96(ctx), %xmm1                          // key6\r
-    aesenc  %xmm1, iv\r
-       movups  112(ctx), %xmm1                         // key7\r
-    aesenc  %xmm1, iv\r
-       movups  128(ctx), %xmm1                         // key8\r
-    aesenc  %xmm1, iv\r
-       movups  144(ctx), %xmm1                         // key9\r
-    aesenc  %xmm1, iv\r
-       movups  160(ctx), %xmm1                         // keyA\r
-    aesenclast  %xmm1, iv\r
-#endif\r
-\r
-       movups  iv, (obuf)                                      // *obuf = *iv;\r
-       add             $16, obuf                                       // obuf++;\r
-       add             $16, ibuf                                       // ibuf++;\r
-       sub             $1, num_blk                                     // num_blk --\r
-       jg              0b                                                      // if num_blk > 0, repeat the loop\r
-\r
-       // the following will be branched to from all other cases (encrypt/decrypt 128/192/256)\r
-\r
-L_HW_cbc_done:\r
-\r
-       xor             %eax, %eax                              // to return CRYPT_OK\r
-\r
-L_error:\r
-\r
-       // if kernel, restore xmm registers\r
-#ifdef KERNEL \r
-       movaps  0(sp), %xmm0\r
-       movaps  16(sp), %xmm1\r
-       movaps  32(sp), %xmm2\r
-       movaps  48(sp), %xmm3\r
-       movaps  64(sp), %xmm4\r
-       movaps  80(sp), %xmm5\r
-       movaps  96(sp), %xmm6\r
-       movaps  112(sp), %xmm7\r
-#if defined    __x86_64__\r
-       movaps  16*8(sp), %xmm8\r
-       movaps  16*9(sp), %xmm9\r
-       movaps  16*10(sp), %xmm10\r
-       movaps  16*11(sp), %xmm11\r
-       movaps  16*12(sp), %xmm12\r
-       movaps  16*13(sp), %xmm13\r
-       movaps  16*14(sp), %xmm14\r
-       movaps  16*15(sp), %xmm15\r
-#endif // __x86_64__\r
-#endif // KERNEL\r
-\r
-       // release used stack memory, restore used callee-saved registers, and return \r
-#if    defined __i386__\r
-#ifdef KERNEL\r
-       add             $(8*16), %esp\r
-#endif\r
-       pop             %edi\r
-       pop             %ebx\r
-#else\r
-#ifdef KERNEL\r
-       add             $(16*16), %rsp  \r
-#endif\r
-       pop             %r15\r
-       pop             %r14\r
-       pop             %r13\r
-       pop             %rbx\r
-#endif\r
-       leave\r
-       ret\r
-\r
-       //\r
-       // aes-192 encrypt_cbc operation, after completion, branch to L_HW_cbc_done\r
-       //\r
-\r
-L_encrypt_192:\r
-\r
-       cmp             $1, num_blk                             // check number of block\r
-       jl              L_HW_cbc_done                   // should it be less than 1, nothing to do\r
-\r
-       movups  (ctx), %xmm2                    // key0\r
-       movups  16(ctx), %xmm3                  // key1\r
-       movups  32(ctx), %xmm4                  // key2\r
-       movups  48(ctx), %xmm5                  // key3\r
-       movups  64(ctx), %xmm6                  // key4\r
-       movups  80(ctx), %xmm7                  // key5\r
-#if defined    __x86_64__\r
-       movups  96(ctx), %xmm8                  // key6\r
-       movups  112(ctx), %xmm9                 // key7\r
-       movups  128(ctx), %xmm10                // key8\r
-       movups  144(ctx), %xmm11                // key9\r
-       movups  160(ctx), %xmm12                // keyA\r
-       movups  176(ctx), %xmm13                // keyB\r
-       movups  192(ctx), %xmm14                // keyC\r
-#endif\r
-       \r
-       // while (num_blk--) {\r
-       //                      *iv ^= *ibuf++;\r
-       //                      aes_encrypt(iv, iv, ctx);\r
-       //                      *obuf++ = *iv;\r
-       // }\r
-0:\r
-       movups  (ibuf), %xmm1                   // *ibuf\r
-       pxor    %xmm1, iv                               // *iv ^= ibuf\r
-\r
-       // aes_encrypt(iv, iv, ctx);\r
-\r
-       pxor    %xmm2, iv\r
-    aesenc  %xmm3, iv\r
-    aesenc  %xmm4, iv\r
-    aesenc  %xmm5, iv\r
-    aesenc  %xmm6, iv\r
-    aesenc  %xmm7, iv\r
-#if defined    __x86_64__\r
-    aesenc  %xmm8, iv\r
-    aesenc  %xmm9, iv\r
-    aesenc  %xmm10, iv\r
-    aesenc  %xmm11, iv\r
-    aesenc  %xmm12, iv\r
-    aesenc  %xmm13, iv\r
-    aesenclast  %xmm14, iv\r
-#else\r
-       movups  96(ctx), %xmm1\r
-    aesenc  %xmm1, iv\r
-       movups  112(ctx), %xmm1\r
-    aesenc  %xmm1, iv\r
-       movups  128(ctx), %xmm1\r
-    aesenc  %xmm1, iv\r
-       movups  144(ctx), %xmm1\r
-    aesenc  %xmm1, iv\r
-       movups  160(ctx), %xmm1\r
-    aesenc  %xmm1, iv\r
-       movups  176(ctx), %xmm1\r
-    aesenc  %xmm1, iv\r
-       movups  192(ctx), %xmm1\r
-    aesenclast  %xmm1, iv\r
-#endif\r
-\r
-       movups  iv, (obuf)                              // *obuf = *iv;\r
-       add             $16, ibuf                               // ibuf++\r
-       add             $16, obuf                               // obuf++\r
-\r
-       sub             $1, num_blk                             // num_blk --\r
-       jg              0b                                              // if num_blk > 0, repeat the loop\r
-\r
-       jmp             L_HW_cbc_done                   // share with the common exit code\r
-\r
-       //\r
-       // aes-256 encrypt_cbc operation, after completion, branch to L_HW_cbc_done\r
-       //\r
-\r
-L_encrypt_256:\r
-\r
-       cmp             $1, num_blk                             // check number of block\r
-       jl              L_HW_cbc_done                   // should it be less than 1, nothing to do\r
-\r
-       movups  (ctx), %xmm2                    // key0\r
-       movups  16(ctx), %xmm3                  // key1\r
-       movups  32(ctx), %xmm4                  // key2\r
-       movups  48(ctx), %xmm5                  // key3\r
-       movups  64(ctx), %xmm6                  // key4\r
-       movups  80(ctx), %xmm7                  // key5\r
-#if defined    __x86_64__\r
-       movups  96(ctx), %xmm8                  // key6\r
-       movups  112(ctx), %xmm9                 // key7\r
-       movups  128(ctx), %xmm10                // key8\r
-       movups  144(ctx), %xmm11                // key9\r
-       movups  160(ctx), %xmm12                // keyA\r
-       movups  176(ctx), %xmm13                // keyB\r
-       movups  192(ctx), %xmm14                // keyC\r
-       movups  208(ctx), %xmm15                // keyD\r
-       // movups       224(ctx), %xmm1         // keyE\r
-#endif\r
-\r
-       // while (num_blk--) {\r
-       //                      *iv ^= *ibuf++;\r
-       //                      aes_encrypt(iv, iv, ctx);\r
-       //                      *obuf++ = *iv;\r
-       // }\r
-0:\r
-       movups  (ibuf), %xmm1                   // *ibuf\r
-       pxor    %xmm1, iv                               // *iv ^= ibuf\r
-       \r
-       // aes_encrypt(iv, iv, ctx);\r
-       pxor    %xmm2, iv\r
-    aesenc  %xmm3, iv\r
-    aesenc  %xmm4, iv\r
-    aesenc  %xmm5, iv\r
-    aesenc  %xmm6, iv\r
-    aesenc  %xmm7, iv\r
-#if defined    __x86_64__\r
-       movups  224(ctx), %xmm1                 // keyE\r
-    aesenc  %xmm8, iv\r
-    aesenc  %xmm9, iv\r
-    aesenc  %xmm10, iv\r
-    aesenc  %xmm11, iv\r
-    aesenc  %xmm12, iv\r
-    aesenc  %xmm13, iv\r
-    aesenc  %xmm14, iv\r
-    aesenc  %xmm15, iv\r
-    aesenclast  %xmm1, iv\r
-#else\r
-       movups  96(ctx), %xmm1                  // key6\r
-    aesenc  %xmm1, iv\r
-       movups  112(ctx), %xmm1                 // key7\r
-    aesenc  %xmm1, iv\r
-       movups  128(ctx), %xmm1                 // key8\r
-    aesenc  %xmm1, iv\r
-       movups  144(ctx), %xmm1                 // key9\r
-    aesenc  %xmm1, iv\r
-       movups  160(ctx), %xmm1                 // keyA\r
-    aesenc  %xmm1, iv\r
-       movups  176(ctx), %xmm1                 // keyB\r
-    aesenc  %xmm1, iv\r
-       movups  192(ctx), %xmm1                 // keyC\r
-    aesenc  %xmm1, iv\r
-       movups  208(ctx), %xmm1                 // keyD\r
-    aesenc  %xmm1, iv\r
-       movups  224(ctx), %xmm1                 // keyE\r
-    aesenclast  %xmm1, iv\r
-#endif\r
-\r
-       movups  iv, (obuf)                              // *obuf = *iv;\r
-       add             $16, ibuf                               // ibuf++\r
-       add             $16, obuf                               // obuf++\r
-\r
-       sub             $1, num_blk                             // num_blk --\r
-       jg              0b                                              // if num_blk > 0, repeat the loop\r
-\r
-       jmp             L_HW_cbc_done                   // share with the common exit code\r
-\r
-\r
-\r
-       //\r
-       // --------- END of aes_encrypt_cbc_hw  -------------------\r
-       //\r
-\r
-\r
-/* ---------------------------------------------------------------------------------------------------------------- \r
-\r
-       aes_decrypt_cbc function (see aes_modes.c or aes_modes_asm.s) :\r
-\r
-       For simplicity, I am assuming all variables are in 128-bit data type.\r
-\r
-       aes_rval aes_decrypt_cbc(const __m128 *ibuf, __m128 *iv, int num_blk, __m128 *obuf, const aes_decrypt_ctx *ctx)\r
-       {\r
-               while(num_blk--) {\r
-                       aes_decrypt(ibuf, obuf, ctx);\r
-                       *obuf++ ^= *iv;\r
-                       *iv = *ibuf++;\r
-               }\r
-               return 0;\r
-       }\r
-\r
-       The following is an implementation of this function using Intel AESNI.\r
-       This function _aes_decrypt_cbc_hw SHOULD NOT be called directly. \r
-       Developer should still call _aes_decrypt_cbc (in aes_modes_asm.s) which will poll cpu_capabilities and branch\r
-       to this aesni-based function should it detecs that aesni is available.\r
-       Blindly call this function SURELY will cause a CRASH on systems with no aesni support. \r
-\r
-       Note that the decryption operation is not related over blocks.\r
-       This gives opportunity of arranging aes_decrypt operations in parallel to speed up code.\r
-       This is equivalent to what has been described in the Intel AES Instruction Set White Paper (Rev. 2.0 page 53-55)\r
-       The following assembly code exploits this idea to achieve ~ 1.4 speed up in aes_decrypt_cbc.\r
-\r
-       Example C code for packing 4 blocks in an iteration is shown as follows:\r
-\r
-               while ((num_blk-=4)>=0) {\r
-\r
-                       // the following 4 functions can be interleaved to exploit parallelism\r
-                       aes_decrypt(ibuf, obuf, ctx);\r
-                       aes_decrypt(ibuf+1, obuf+1, ctx);\r
-                       aes_decrypt(ibuf+2, obuf+2, ctx);\r
-                       aes_decrypt(ibuf+3, obuf+3, ctx);\r
-\r
-                       obuf[0] ^= *iv; obuf[1] ^= ibuf[1]; obuf[2] ^= ibuf[1]; obuf[3] ^= ibuf[2];\r
-                       *iv = ibuf[3];          ibuf += 4;      obuf += 4;\r
-               }\r
-               num_blk+=4;\r
-\r
-   ----------------------------------------------------------------------------------------------------------------*/\r
-\r
-       .text\r
-       .align  4,0x90\r
-       .globl  _aes_decrypt_cbc_hw\r
-_aes_decrypt_cbc_hw:\r
-\r
-       // push/save registers for local use\r
-#if    defined __i386__\r
-\r
-       push    %ebp\r
-       movl    %esp, %ebp\r
-       push    %ebx                                    // ibuf\r
-       push    %edi                                    // obuf\r
-\r
-       #define sp      %esp\r
-\r
-#else  // __x86_64__\r
-\r
-       push    %rbp\r
-       mov             %rsp, %rbp\r
-       push    %rbx\r
-       push    %r13\r
-       push    %r14\r
-       push    %r15\r
-\r
-       #define sp      %rsp\r
-\r
-#endif\r
-\r
-\r
-       // if kernel, allocate stack space to save xmm registers\r
-#ifdef KERNEL\r
-#if defined __i386__\r
-       sub             $(8*16), %esp\r
-#else\r
-       sub             $(16*16), %rsp\r
-#endif\r
-       movaps  %xmm0, (sp)\r
-       movaps  %xmm1, 16(sp)\r
-       movaps  %xmm2, 32(sp)\r
-       movaps  %xmm3, 48(sp)\r
-       movaps  %xmm4, 64(sp)\r
-       movaps  %xmm5, 80(sp)\r
-       movaps  %xmm6, 96(sp)\r
-       movaps  %xmm7, 112(sp)\r
-#if defined    __x86_64__\r
-       movaps  %xmm8, 16*8(sp)\r
-       movaps  %xmm9, 16*9(sp)\r
-       movaps  %xmm10, 16*10(sp)\r
-       movaps  %xmm11, 16*11(sp)\r
-       movaps  %xmm12, 16*12(sp)\r
-       movaps  %xmm13, 16*13(sp)\r
-       movaps  %xmm14, 16*14(sp)\r
-       movaps  %xmm15, 16*15(sp)\r
-#endif // __x86_64__\r
-#endif\r
-\r
-       #undef  iv\r
-       #define iv      %xmm0\r
-\r
-#if defined    __i386__\r
-       mov             12(%ebp), %eax                  // in_iv\r
-       mov             24(%ebp), %edx                  // ctx\r
-       movups  (%eax), iv                              // iv = in_iv   \r
-       mov             8(%ebp), %ebx                   // ibuf\r
-       mov             16(%ebp), %ecx                  // num_blk\r
-       mov             20(%ebp), %edi                  // obuf\r
-\r
-       #define ibuf    %ebx\r
-       #define obuf    %edi\r
-       #define num_blk %ecx    \r
-       #define ctx             %edx\r
-\r
-#else  //      __x86_64__, rdi/rsi/rdx/rcx/r8\r
-\r
-       mov             %rdi, %rbx                              // ibuf\r
-       movups  (%rsi), iv                              // iv = in_iv\r
-       mov             %rdx, %r13                              // num_blk\r
-       mov             %rcx, %r14                              // obuf\r
-       mov             %r8, %r15                               // ctx  \r
-\r
-       #define ibuf    %rbx\r
-       #define num_blk %r13d\r
-       #define obuf    %r14    \r
-       #define ctx             %r15\r
-\r
-#endif\r
-\r
-       mov             240(ctx), %eax                  // aes length\r
-       cmp             $160, %eax                              // aes-128 decrypt\r
-       je              L_decrypt_128\r
-       cmp             $192, %eax                              // aes-192 decrypt\r
-       je              L_decrypt_192\r
-       cmp             $224, %eax                              // aes-256 decrypt\r
-       je              L_decrypt_256\r
-\r
-       mov             $-1, %eax                               // wrong aes length, to return -1\r
-       jmp             L_error                                 // early exit due to wrong aes length\r
-\r
-\r
-       //\r
-       // aes-128 decrypt_cbc operation, after completion, branch to L_HW_cbc_done\r
-       //\r
-\r
-L_decrypt_128:\r
-\r
-       cmp             $1, num_blk\r
-       jl              L_HW_cbc_done                   // if num_blk < 1, early return\r
-\r
-       // aes-128 decrypt expanded keys\r
-       movups  160(ctx), %xmm3\r
-       movups  144(ctx), %xmm4\r
-       movups  128(ctx), %xmm5\r
-       movups  112(ctx), %xmm6\r
-       movups  96(ctx), %xmm7\r
-#if defined    __x86_64__\r
-       movups  80(ctx), %xmm8\r
-       movups  64(ctx), %xmm9\r
-       movups  48(ctx), %xmm10\r
-       movups  32(ctx), %xmm11\r
-       movups  16(ctx), %xmm12\r
-       movups  0(ctx), %xmm13\r
-#endif\r
-\r
-       // performs 4 block decryption in an iteration to exploit decrypt in parallel\r
-\r
-       //              while ((num_blk-=4)>=0) {\r
-       //                      aes_decrypt(ibuf, obuf, ctx);\r
-       //                      aes_decrypt(ibuf+1, obuf+1, ctx);\r
-       //                      aes_decrypt(ibuf+2, obuf+2, ctx);\r
-       //                      aes_decrypt(ibuf+3, obuf+3, ctx);\r
-       //                      obuf[0] ^= *iv; obuf[1] ^= ibuf[1]; obuf[2] ^= ibuf[1]; obuf[3] ^= ibuf[2];\r
-       //                      *iv = ibuf[3]; ibuf += 4; obuf += 4;\r
-       //              }\r
-\r
-       sub             $4, num_blk                                     // pre decrement num_blk by 4\r
-       jl              9f                                                      // if num_blk < 4, skip the per-4-blocks processing code\r
-\r
-0:\r
-\r
-\r
-#if defined    __x86_64__\r
-\r
-       movups  (ibuf), %xmm1                           // tmp = 1st ibuf\r
-       movups  16(ibuf), %xmm2                         // tmp = 2nd ibuf\r
-       movups  32(ibuf), %xmm14                        // tmp = 3rd ibuf\r
-       movups  48(ibuf), %xmm15                        // tmp = 4th ibuf\r
-\r
-       // for x86_64, the expanded keys are already stored in xmm3-xmm13\r
-\r
-       // aes-128 decrypt round 0 per 4 blocks\r
-       pxor    %xmm3, %xmm1\r
-       pxor    %xmm3, %xmm2\r
-       pxor    %xmm3, %xmm14\r
-       pxor    %xmm3, %xmm15\r
-\r
-       // aes-128 decrypt round 1 per 4 blocks\r
-    aesdec  %xmm4, %xmm1\r
-    aesdec  %xmm4, %xmm2\r
-    aesdec  %xmm4, %xmm14\r
-    aesdec  %xmm4, %xmm15\r
-\r
-       // aes-128 decrypt round 2 per 4 blocks\r
-    aesdec  %xmm5, %xmm1\r
-    aesdec  %xmm5, %xmm2\r
-    aesdec  %xmm5, %xmm14\r
-    aesdec  %xmm5, %xmm15\r
-\r
-       // aes-128 decrypt round 3 per 4 blocks\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm14\r
-    aesdec  %xmm6, %xmm15\r
-\r
-       // aes-128 decrypt round 4 per 4 blocks\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm14\r
-    aesdec  %xmm7, %xmm15\r
-\r
-       // aes-128 decrypt round 5 per 4 blocks\r
-    aesdec  %xmm8, %xmm1\r
-    aesdec  %xmm8, %xmm2\r
-    aesdec  %xmm8, %xmm14\r
-    aesdec  %xmm8, %xmm15\r
-\r
-       // aes-128 decrypt round 6 per 4 blocks\r
-    aesdec  %xmm9, %xmm1\r
-    aesdec  %xmm9, %xmm2\r
-    aesdec  %xmm9, %xmm14\r
-    aesdec  %xmm9, %xmm15\r
-\r
-       // aes-128 decrypt round 7 per 4 blocks\r
-    aesdec  %xmm10, %xmm1\r
-    aesdec  %xmm10, %xmm2\r
-    aesdec  %xmm10, %xmm14\r
-    aesdec  %xmm10, %xmm15\r
-\r
-       // aes-128 decrypt round 8 per 4 blocks\r
-    aesdec  %xmm11, %xmm1\r
-    aesdec  %xmm11, %xmm2\r
-    aesdec  %xmm11, %xmm14\r
-    aesdec  %xmm11, %xmm15\r
-\r
-       // aes-128 decrypt round 9 per 4 blocks\r
-    aesdec  %xmm12, %xmm1\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm12, %xmm14\r
-    aesdec  %xmm12, %xmm15\r
-\r
-       // aes-128 decrypt round 10 (last) per 4 blocks\r
-    aesdeclast  %xmm13, %xmm1\r
-    aesdeclast  %xmm13, %xmm2\r
-    aesdeclast  %xmm13, %xmm14\r
-    aesdeclast  %xmm13, %xmm15\r
-\r
-       pxor    iv, %xmm1                               // obuf[0] ^= *iv; \r
-       movups  (ibuf), iv                              // ibuf[0]\r
-       pxor    iv, %xmm2                               // obuf[1] ^= ibuf[0]; \r
-       movups  16(ibuf), iv                    // ibuf[1]\r
-       pxor    iv, %xmm14                              // obuf[2] ^= ibuf[1]; \r
-       movups  32(ibuf), iv                    // ibuf[2] \r
-       pxor    iv, %xmm15                              // obuf[3] ^= obuf[2]; \r
-       movups  48(ibuf), iv                    // *iv = ibuf[3]\r
-\r
-       movups  %xmm1, (obuf)                   // write 1st obuf\r
-       movups  %xmm2, 16(obuf)                 // write 2nd obuf\r
-       movups  %xmm14, 32(obuf)                // write 3rd obuf\r
-       movups  %xmm15, 48(obuf)                // write 4th obuf\r
-\r
-\r
-#else\r
-\r
-       // aes_decrypt_cbc per 4 blocks using aes-128 for i386\r
-       // xmm1/xmm2/xmm4/xmm5 used for obuf per block\r
-       // xmm3 = key0\r
-       // xmm0 = iv\r
-       // xmm6/xmm7 dynamically load with other expanded keys\r
-\r
-       movups  (ibuf), %xmm1                   // tmp = 1st ibuf\r
-       movups  16(ibuf), %xmm2                 // tmp = 2nd ibuf\r
-       movups  32(ibuf), %xmm4                 // tmp = 3rd ibuf\r
-       movups  48(ibuf), %xmm5                 // tmp = 4th ibuf\r
-\r
-       // aes_decrypt\r
-       // for i386, sequentially load expanded keys into xmm6/xmm7\r
-\r
-       movups  144(ctx), %xmm6                 // key1\r
-\r
-       // aes-128 decrypt round 0 per 4 blocks\r
-       pxor    %xmm3, %xmm1\r
-       pxor    %xmm3, %xmm2\r
-       pxor    %xmm3, %xmm4\r
-       pxor    %xmm3, %xmm5\r
-\r
-       movups  128(ctx), %xmm7                 // key2\r
-\r
-       // aes-128 decrypt round 1 per 4 blocks\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  112(ctx), %xmm6                 // key3\r
-\r
-       // aes-128 decrypt round 2 per 4 blocks\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  96(ctx), %xmm7                  // key4\r
-\r
-       // aes-128 decrypt round 3 per 4 blocks\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  80(ctx), %xmm6                  // key5\r
-\r
-       // aes-128 decrypt round 4 per 4 blocks\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  64(ctx), %xmm7                  // key6\r
-\r
-       // aes-128 decrypt round 5 per 4 blocks\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  48(ctx), %xmm6                  // key7\r
-\r
-       // aes-128 decrypt round 6 per 4 blocks\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  32(ctx), %xmm7                  // key8\r
-\r
-       // aes-128 decrypt round 7 per 4 blocks\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  16(ctx), %xmm6                  // key9\r
-\r
-       // aes-128 decrypt round 8 per 4 blocks\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  0(ctx), %xmm7                   // keyA\r
-\r
-       // aes-128 decrypt round 9 per 4 blocks\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       // aes-128 decrypt round 10 (last) per 4 blocks\r
-    aesdeclast  %xmm7, %xmm1\r
-    aesdeclast  %xmm7, %xmm2\r
-    aesdeclast  %xmm7, %xmm4\r
-    aesdeclast  %xmm7, %xmm5\r
-\r
-       pxor    iv, %xmm1                               // 1st obuf ^= iv; \r
-       movups  (ibuf), iv                              // 1st memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm2                               // 2nd obuf ^= iv; \r
-       movups  16(ibuf), iv                    // 2nd memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm4                               // 3rd obuf ^= iv; \r
-       movups  32(ibuf), iv                    // 3rd memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm5                               // 4th obuf ^= iv; \r
-       movups  48(ibuf), iv                    // 4th memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-\r
-       movups  %xmm1, (obuf)                   // write 1st obuf\r
-       movups  %xmm2, 16(obuf)                 // write 2nd obuf\r
-       movups  %xmm4, 32(obuf)                 // write 3rd obuf\r
-       movups  %xmm5, 48(obuf)                 // write 4th obuf\r
-#endif\r
-\r
-       add             $64, ibuf                               // ibuf += 4; \r
-       add             $64, obuf                               // obuf += 4;   \r
-\r
-       sub             $4, num_blk                             // num_blk -= 4\r
-       jge             0b                                              // if num_blk > 0, repeat the loop\r
-\r
-9:     add             $4, num_blk                             // post incremtn num_blk by 4\r
-       je              L_HW_cbc_done                   // if num_blk == 0, no need for forthur processing code\r
-\r
-#if defined    __i386__\r
-       // updated as they might be needed as expanded keys in the remaining\r
-       movups  144(ctx), %xmm4\r
-       movups  128(ctx), %xmm5\r
-       movups  112(ctx), %xmm6\r
-       movups  96(ctx), %xmm7\r
-#endif\r
-\r
-       test    $2, num_blk                             // check whether num_blk has 2 blocks\r
-       je              9f                                              // if num_blk & 2 == 0, skip the per-pair processing code\r
-\r
-       // do the remaining 2 blocks together\r
-\r
-       movups  (ibuf), %xmm1                           // tmp = 1st ibuf\r
-       movups  16(ibuf), %xmm2                         // tmp = 2nd ibuf\r
-\r
-       // aes_decrypt\r
-       pxor    %xmm3, %xmm1\r
-       pxor    %xmm3, %xmm2\r
-    aesdec  %xmm4, %xmm1\r
-    aesdec  %xmm4, %xmm2\r
-    aesdec  %xmm5, %xmm1\r
-    aesdec  %xmm5, %xmm2\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-#if defined    __x86_64__\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm8, %xmm1\r
-    aesdec  %xmm8, %xmm2\r
-    aesdec  %xmm9, %xmm1\r
-    aesdec  %xmm9, %xmm2\r
-    aesdec  %xmm10, %xmm1\r
-    aesdec  %xmm10, %xmm2\r
-    aesdec  %xmm11, %xmm1\r
-    aesdec  %xmm11, %xmm2\r
-    aesdec  %xmm12, %xmm1\r
-    aesdec  %xmm12, %xmm2\r
-    aesdeclast  %xmm13, %xmm1\r
-    aesdeclast  %xmm13, %xmm2\r
-#else\r
-       movups  80(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-       movups  64(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-       movups  48(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-       movups  32(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-       movups  16(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-       movups  0(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdeclast  %xmm7, %xmm1\r
-    aesdeclast  %xmm7, %xmm2\r
-       movups  112(ctx), %xmm6\r
-       movups  96(ctx), %xmm7\r
-#endif\r
-\r
-       pxor    iv, %xmm1                               // obuf[0] ^= *iv; \r
-       movups  (ibuf), iv                              // ibuf[0]\r
-       pxor    iv, %xmm2                               // obuf[1] ^= ibuf[0]\r
-       movups  16(ibuf), iv                    // *iv = ibuf[1]\r
-\r
-       movups  %xmm1, (obuf)                   // write obuf[0]\r
-       movups  %xmm2, 16(obuf)                 // write obuf[1]\r
-\r
-       add             $32, ibuf                               // ibuf += 2\r
-       add             $32, obuf                               // obuf += 2\r
-\r
-9:\r
-       test    $1, num_blk                             // check whether num_blk has residual 1 block\r
-       je              L_HW_cbc_done                   // if num_blk == 0, no need for residual processing code\r
-       \r
-       movups  (ibuf), %xmm2                           // tmp = ibuf\r
-       // aes_decrypt\r
-       pxor    %xmm3, %xmm2\r
-    aesdec  %xmm4, %xmm2\r
-    aesdec  %xmm5, %xmm2\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm7, %xmm2\r
-#if defined    __x86_64__\r
-    aesdec  %xmm8, %xmm2\r
-    aesdec  %xmm9, %xmm2\r
-    aesdec  %xmm10, %xmm2\r
-    aesdec  %xmm11, %xmm2\r
-    aesdec  %xmm12, %xmm2\r
-    aesdeclast  %xmm13, %xmm2\r
-#else\r
-       movups  80(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  64(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  48(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  32(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  16(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  (ctx), %xmm1\r
-    aesdeclast  %xmm1, %xmm2\r
-#endif\r
-\r
-       pxor    iv, %xmm2                       // *obuf ^= *iv; \r
-       movups  (ibuf), iv                      // *iv = *ibuf;\r
-       movups  %xmm2, (obuf)           // write *obuf\r
-\r
-       jmp             L_HW_cbc_done\r
-\r
-       //\r
-       // aes-192 decrypt_cbc operation, after completion, branch to L_HW_cbc_done\r
-       //\r
-\r
-L_decrypt_192:\r
-\r
-       cmp             $1, num_blk\r
-       jl              L_HW_cbc_done                   // if num_blk < 1, early return\r
-\r
-       // aes-192 decryp expanded keys\r
-       movups  192(ctx), %xmm3\r
-       movups  176(ctx), %xmm4\r
-       movups  160(ctx), %xmm5\r
-       movups  144(ctx), %xmm6\r
-       movups  128(ctx), %xmm7\r
-#if defined    __x86_64__\r
-       movups  112(ctx), %xmm8\r
-       movups  96(ctx), %xmm9\r
-       movups  80(ctx), %xmm10\r
-       movups  64(ctx), %xmm11\r
-       movups  48(ctx), %xmm12\r
-       movups  32(ctx), %xmm13\r
-       movups  16(ctx), %xmm14\r
-       movups  (ctx), %xmm15\r
-#endif\r
-\r
-       // performs 4 block decryption in an iteration to exploit decrypt in parallel\r
-\r
-       //              while ((num_blk-=4)>=0) {\r
-       //                      aes_decrypt(ibuf, obuf, ctx);\r
-       //                      aes_decrypt(ibuf+1, obuf+1, ctx);\r
-       //                      aes_decrypt(ibuf+2, obuf+2, ctx);\r
-       //                      aes_decrypt(ibuf+3, obuf+3, ctx);\r
-       //                      obuf[0] ^= *iv; obuf[1] ^= ibuf[1]; obuf[2] ^= ibuf[1]; obuf[3] ^= ibuf[2];\r
-       //                      *iv = ibuf[3]; ibuf += 4; obuf += 4;\r
-       //              }\r
-\r
-       sub             $4, num_blk                                     // pre decrement num_blk by 4\r
-       jl              9f                                                      // if num_blk < 4, skip the per-4-blocks processing code\r
-0:\r
-\r
-#if defined    __x86_64__\r
-\r
-       movups  (ibuf), %xmm1                           // tmp = 1st ibuf\r
-       movups  16(ibuf), %xmm2                         // tmp = 2nd ibuf\r
-       movups  32(ibuf), %xmm14                        // tmp = 3rd ibuf\r
-       movups  48(ibuf), %xmm15                        // tmp = 4th ibuf\r
-\r
-       // aes_decrypt, for x86_64, the expanded keys are already stored in xmm3-xmm13\r
-       // use %xmm12/%xmm13 ts dynamic keys in the middle, restored afterwards\r
-\r
-       // round 0 for 4 blocks\r
-       pxor    %xmm3, %xmm1\r
-       pxor    %xmm3, %xmm2\r
-       pxor    %xmm3, %xmm14\r
-       pxor    %xmm3, %xmm15\r
-\r
-       // round 1 for 4 blocks\r
-    aesdec  %xmm4, %xmm1\r
-    aesdec  %xmm4, %xmm2\r
-    aesdec  %xmm4, %xmm14\r
-    aesdec  %xmm4, %xmm15\r
-\r
-       // round 2 for 4 blocks\r
-    aesdec  %xmm5, %xmm1\r
-    aesdec  %xmm5, %xmm2\r
-    aesdec  %xmm5, %xmm14\r
-    aesdec  %xmm5, %xmm15\r
-\r
-       // round 3 for 4 blocks\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm14\r
-    aesdec  %xmm6, %xmm15\r
-\r
-       // round 4 for 4 blocks\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm14\r
-    aesdec  %xmm7, %xmm15\r
-\r
-       // round 5 for 4 blocks\r
-    aesdec  %xmm8, %xmm1\r
-    aesdec  %xmm8, %xmm2\r
-    aesdec  %xmm8, %xmm14\r
-    aesdec  %xmm8, %xmm15\r
-\r
-       // round 6 for 4 blocks\r
-    aesdec  %xmm9, %xmm1\r
-    aesdec  %xmm9, %xmm2\r
-    aesdec  %xmm9, %xmm14\r
-    aesdec  %xmm9, %xmm15\r
-\r
-       // round 7 for 4 blocks\r
-    aesdec  %xmm10, %xmm1\r
-    aesdec  %xmm10, %xmm2\r
-    aesdec  %xmm10, %xmm14\r
-    aesdec  %xmm10, %xmm15\r
-\r
-       // round 8 for 4 blocks\r
-    aesdec  %xmm11, %xmm1\r
-    aesdec  %xmm11, %xmm2\r
-    aesdec  %xmm11, %xmm14\r
-    aesdec  %xmm11, %xmm15\r
-\r
-       // round 9 for 4 blocks\r
-    aesdec  %xmm12, %xmm1\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm12, %xmm14\r
-    aesdec  %xmm12, %xmm15\r
-\r
-       movups  16(ctx), %xmm12\r
-\r
-       // round A for 4 blocks\r
-    aesdec  %xmm13, %xmm1\r
-    aesdec  %xmm13, %xmm2\r
-    aesdec  %xmm13, %xmm14\r
-    aesdec  %xmm13, %xmm15\r
-\r
-       movups  (ctx), %xmm13\r
-\r
-       // round B for 4 blocks\r
-    aesdec  %xmm12, %xmm1\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm12, %xmm14\r
-    aesdec  %xmm12, %xmm15\r
-\r
-       movups  48(ctx), %xmm12         // restore %xmm12 to its original key\r
-\r
-       // round C (last) for 4 blocks\r
-    aesdeclast  %xmm13, %xmm1\r
-    aesdeclast  %xmm13, %xmm2\r
-    aesdeclast  %xmm13, %xmm14\r
-    aesdeclast  %xmm13, %xmm15\r
-\r
-       movups  32(ctx), %xmm13         // restore %xmm13 to its original key\r
-\r
-       pxor    iv, %xmm1                               // obuf[0] ^= *iv; \r
-       movups  (ibuf), iv                              // ibuf[0]\r
-       pxor    iv, %xmm2                               // obuf[1] ^= ibuf[0] \r
-       movups  16(ibuf), iv                    // ibuf[1]\r
-       pxor    iv, %xmm14                              // obuf[2] ^= ibuf[1] \r
-       movups  32(ibuf), iv                    // ibuf[2] \r
-       pxor    iv, %xmm15                              // obuf[3] ^= ibuf[2] \r
-       movups  48(ibuf), iv                    // *iv = ibuf[3] \r
-\r
-       movups  %xmm1, (obuf)                   // write 1st obuf\r
-       movups  %xmm2, 16(obuf)                 // write 2nd obuf\r
-       movups  %xmm14, 32(obuf)                // write 3rd obuf\r
-       movups  %xmm15, 48(obuf)                // write 4th obuf\r
-\r
-       add             $64, ibuf                               // ibuf += 4; \r
-       add             $64, obuf                               // obuf += 4;   \r
-\r
-       sub             $4, num_blk                             // num_blk -= 4\r
-       jge             0b                                              // if num_blk > 0, repeat the loop\r
-\r
-9:     add             $4, num_blk                             // post incremtn num_blk by 4\r
-       je              L_HW_cbc_done                   // if num_blk == 0, prepare to return \r
-\r
-       movups  16(ctx), %xmm14                 // restore %xmm14 to its key\r
-       movups  (ctx), %xmm15                   // restore %xmm15 to its key\r
-\r
-#else\r
-\r
-       movups  (ibuf), %xmm1                   // tmp = 1st ibuf\r
-       movups  16(ibuf), %xmm2                 // tmp = 2nd ibuf\r
-       movups  32(ibuf), %xmm4                 // tmp = 3rd ibuf\r
-       movups  48(ibuf), %xmm5                 // tmp = 4th ibuf\r
-\r
-       // aes_decrypt\r
-       // for i386, sequentially load expanded keys into xmm6/xmm7\r
-       movups  176(ctx), %xmm6\r
-       pxor    %xmm3, %xmm1\r
-       pxor    %xmm3, %xmm2\r
-       pxor    %xmm3, %xmm4\r
-       pxor    %xmm3, %xmm5\r
-\r
-       movups  160(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  144(ctx), %xmm6\r
-       aesdec    %xmm7, %xmm1\r
-       aesdec    %xmm7, %xmm2\r
-       aesdec    %xmm7, %xmm4\r
-       aesdec    %xmm7, %xmm5\r
-\r
-       movups  128(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  112(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  96(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  80(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  64(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  48(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  32(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  16(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  0(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-    aesdeclast  %xmm7, %xmm1\r
-    aesdeclast  %xmm7, %xmm2\r
-    aesdeclast  %xmm7, %xmm4\r
-    aesdeclast  %xmm7, %xmm5\r
-\r
-       pxor    iv, %xmm1                               // 1st obuf ^= iv; \r
-       movups  (ibuf), iv                              // 1st memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm2                               // 2nd obuf ^= iv; \r
-       movups  16(ibuf), iv                    // 2nd memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm4                               // 3rd obuf ^= iv; \r
-       movups  32(ibuf), iv                    // 3rd memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm5                               // 4th obuf ^= iv; \r
-       movups  48(ibuf), iv                    // 4th memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       movups  %xmm1, (obuf)                   // write 1st obuf\r
-       movups  %xmm2, 16(obuf)                 // write 2nd obuf\r
-       movups  %xmm4, 32(obuf)                 // write 3rd obuf\r
-       movups  %xmm5, 48(obuf)                 // write 4th obuf\r
-\r
-       add             $64, ibuf                               // ibuf += AES_BLOCK_SIZE * 4; \r
-       add             $64, obuf                               // obuf += AES_BLOCK_SIZE * 4;  \r
-\r
-       sub             $4, num_blk                             // num_blk -= 4\r
-       jge             0b                                              // if num_blk > 0, repeat the loop\r
-\r
-\r
-9:     add             $4, num_blk                             //      post incremtn num_blk by 4\r
-       je              L_HW_cbc_done                   // if num_blk == 0, no need for forthur processing code\r
-\r
-       movups  176(ctx), %xmm4\r
-       movups  160(ctx), %xmm5\r
-       movups  144(ctx), %xmm6\r
-       movups  128(ctx), %xmm7\r
-\r
-#endif\r
-\r
-       // per-block aes_decrypt_cbc loop\r
-\r
-0:\r
-       movups  (ibuf), %xmm2                           // tmp = ibuf\r
-\r
-       // aes_decrypt\r
-       pxor    %xmm3, %xmm2\r
-    aesdec  %xmm4, %xmm2\r
-    aesdec  %xmm5, %xmm2\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm7, %xmm2\r
-#if defined    __x86_64__\r
-    aesdec  %xmm8, %xmm2\r
-    aesdec  %xmm9, %xmm2\r
-    aesdec  %xmm10, %xmm2\r
-    aesdec  %xmm11, %xmm2\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm13, %xmm2\r
-    aesdec  %xmm14, %xmm2\r
-    aesdeclast  %xmm15, %xmm2\r
-#else\r
-       movups  112(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  96(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  80(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  64(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  48(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  32(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  16(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  (ctx), %xmm1\r
-    aesdeclast  %xmm1, %xmm2\r
-#endif\r
-\r
-       pxor    iv, %xmm2                       // obuf ^= iv; \r
-       movups  (ibuf), iv                      // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-\r
-       movups  %xmm2, (obuf)           // write obuf\r
-\r
-       add             $16, ibuf                               // ibuf += AES_BLOCK_SIZE; \r
-       add             $16, obuf                               // obuf += AES_BLOCK_SIZE;      \r
-       sub             $1, num_blk                             // num_blk --\r
-       jg              0b                                              // if num_blk > 0, repeat the loop\r
-\r
-       jmp             L_HW_cbc_done\r
-\r
-       //\r
-       // aes-256 decrypt_cbc operation, after completion, branch to L_HW_cbc_done\r
-       //\r
-\r
-L_decrypt_256:\r
-\r
-       cmp             $1, num_blk\r
-       jl              L_HW_cbc_done   \r
-\r
-       movups  224(ctx), %xmm3\r
-       movups  208(ctx), %xmm4\r
-       movups  192(ctx), %xmm5\r
-       movups  176(ctx), %xmm6\r
-       movups  160(ctx), %xmm7\r
-#if defined    __x86_64__\r
-       movups  144(ctx), %xmm8\r
-       movups  128(ctx), %xmm9\r
-       movups  112(ctx), %xmm10\r
-       movups  96(ctx), %xmm11\r
-       movups  80(ctx), %xmm12\r
-       movups  64(ctx), %xmm13\r
-       movups  48(ctx), %xmm14\r
-       movups  32(ctx), %xmm15\r
-//     movups  16(ctx), %xmm14\r
-//     movups  (ctx), %xmm15\r
-#endif\r
-\r
-#if defined    __x86_64__\r
-\r
-       sub             $4, num_blk                                     // pre decrement num_blk by 4\r
-       jl              9f                                                      // if num_blk < 4, skip the per-4-blocks processing code\r
-0:\r
-       movups  (ibuf), %xmm1                           // tmp = 1st ibuf\r
-       movups  16(ibuf), %xmm2                         // tmp = 2nd ibuf\r
-       movups  32(ibuf), %xmm14                        // tmp = 3rd ibuf\r
-       movups  48(ibuf), %xmm15                        // tmp = 4th ibuf\r
-\r
-       // aes_decrypt, for x86_64, the expanded keys are already stored in xmm3-xmm13\r
-       pxor    %xmm3, %xmm1\r
-       pxor    %xmm3, %xmm2\r
-       pxor    %xmm3, %xmm14\r
-       pxor    %xmm3, %xmm15\r
-\r
-    aesdec  %xmm4, %xmm1\r
-    aesdec  %xmm4, %xmm2\r
-    aesdec  %xmm4, %xmm14\r
-    aesdec  %xmm4, %xmm15\r
-\r
-    aesdec  %xmm5, %xmm1\r
-    aesdec  %xmm5, %xmm2\r
-    aesdec  %xmm5, %xmm14\r
-    aesdec  %xmm5, %xmm15\r
-\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm14\r
-    aesdec  %xmm6, %xmm15\r
-\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm14\r
-    aesdec  %xmm7, %xmm15\r
-\r
-    aesdec  %xmm8, %xmm1\r
-    aesdec  %xmm8, %xmm2\r
-    aesdec  %xmm8, %xmm14\r
-    aesdec  %xmm8, %xmm15\r
-\r
-    aesdec  %xmm9, %xmm1\r
-    aesdec  %xmm9, %xmm2\r
-    aesdec  %xmm9, %xmm14\r
-    aesdec  %xmm9, %xmm15\r
-\r
-    aesdec  %xmm10, %xmm1\r
-    aesdec  %xmm10, %xmm2\r
-    aesdec  %xmm10, %xmm14\r
-    aesdec  %xmm10, %xmm15\r
-\r
-    aesdec  %xmm11, %xmm1\r
-    aesdec  %xmm11, %xmm2\r
-    aesdec  %xmm11, %xmm14\r
-    aesdec  %xmm11, %xmm15\r
-\r
-    aesdec  %xmm12, %xmm1\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm12, %xmm14\r
-    aesdec  %xmm12, %xmm15\r
-       movups  48(ctx), %xmm12\r
-\r
-    aesdec  %xmm13, %xmm1\r
-    aesdec  %xmm13, %xmm2\r
-    aesdec  %xmm13, %xmm14\r
-    aesdec  %xmm13, %xmm15\r
-       movups  32(ctx), %xmm13\r
-\r
-    aesdec  %xmm12, %xmm1\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm12, %xmm14\r
-    aesdec  %xmm12, %xmm15\r
-       movups  16(ctx), %xmm12\r
-\r
-    aesdec  %xmm13, %xmm1\r
-    aesdec  %xmm13, %xmm2\r
-    aesdec  %xmm13, %xmm14\r
-    aesdec  %xmm13, %xmm15\r
-       movups  (ctx), %xmm13\r
-\r
-    aesdec  %xmm12, %xmm1\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm12, %xmm14\r
-    aesdec  %xmm12, %xmm15\r
-       movups  80(ctx), %xmm12\r
-\r
-    aesdeclast  %xmm13, %xmm1\r
-    aesdeclast  %xmm13, %xmm2\r
-    aesdeclast  %xmm13, %xmm14\r
-    aesdeclast  %xmm13, %xmm15\r
-       movups  64(ctx), %xmm13\r
-\r
-       pxor    iv, %xmm1                               // obuf ^= iv; \r
-       movups  (ibuf), iv                              // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm2                               // obuf ^= iv; \r
-       movups  16(ibuf), iv                    // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm14                              // obuf ^= iv; \r
-       movups  32(ibuf), iv                    // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm15                              // obuf ^= iv; \r
-       movups  48(ibuf), iv                    // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-\r
-       movups  %xmm1, (obuf)                   // write 1st obuf\r
-       movups  %xmm2, 16(obuf)                 // write 2nd obuf\r
-       movups  %xmm14, 32(obuf)                // write 3rd obuf\r
-       movups  %xmm15, 48(obuf)                // write 4th obuf\r
-\r
-       add             $64, ibuf                               // ibuf += AES_BLOCK_SIZE*4; \r
-       add             $64, obuf                               // obuf += AES_BLOCK_SIZE*4;    \r
-\r
-       sub             $4, num_blk                             // num_blk -= 4\r
-       jge             0b                                              // if num_blk > 0, repeat the loop\r
-\r
-9:     add             $4, num_blk                             //      post incremtn num_blk by 4\r
-       je              L_HW_cbc_done                   // if num_blk == 0, no need for forthur processing code\r
-\r
-       movups  48(ctx), %xmm14\r
-       movups  32(ctx), %xmm15\r
-\r
-#else\r
-\r
-       sub             $4, num_blk                             // pre decrement num_blk by 4\r
-       jl              9f                                              // if num_blk < 4, skip the per-pair processing code\r
-0:\r
-       movups  (ibuf), %xmm1                   // tmp = 1st ibuf\r
-       movups  16(ibuf), %xmm2                 // tmp = 2nd ibuf\r
-       movups  32(ibuf), %xmm4                 // tmp = 3rd ibuf\r
-       movups  48(ibuf), %xmm5                 // tmp = 4th ibuf\r
-\r
-       // aes_decrypt\r
-       // for i386, sequentially load expanded keys into xmm6/xmm7\r
-       movups  208(ctx), %xmm6\r
-       pxor    %xmm3, %xmm1\r
-       pxor    %xmm3, %xmm2\r
-       pxor    %xmm3, %xmm4\r
-       pxor    %xmm3, %xmm5\r
-\r
-       movups  192(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  176(ctx), %xmm6\r
-       aesdec  %xmm7, %xmm1\r
-       aesdec  %xmm7, %xmm2\r
-       aesdec  %xmm7, %xmm4\r
-       aesdec  %xmm7, %xmm5\r
-\r
-       movups  160(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  144(ctx), %xmm6\r
-       aesdec  %xmm7, %xmm1\r
-       aesdec  %xmm7, %xmm2\r
-       aesdec  %xmm7, %xmm4\r
-       aesdec  %xmm7, %xmm5\r
-\r
-       movups  128(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  112(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  96(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  80(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  64(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  48(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  32(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-       movups  16(ctx), %xmm6\r
-    aesdec  %xmm7, %xmm1\r
-    aesdec  %xmm7, %xmm2\r
-    aesdec  %xmm7, %xmm4\r
-    aesdec  %xmm7, %xmm5\r
-\r
-       movups  0(ctx), %xmm7\r
-    aesdec  %xmm6, %xmm1\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm6, %xmm4\r
-    aesdec  %xmm6, %xmm5\r
-\r
-    aesdeclast  %xmm7, %xmm1\r
-    aesdeclast  %xmm7, %xmm2\r
-    aesdeclast  %xmm7, %xmm4\r
-    aesdeclast  %xmm7, %xmm5\r
-\r
-       pxor    iv, %xmm1                               // 1st obuf ^= iv; \r
-       movups  (ibuf), iv                              // 1st memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm2                               // 2nd obuf ^= iv; \r
-       movups  16(ibuf), iv                    // 2nd memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm4                               // 3rd obuf ^= iv; \r
-       movups  32(ibuf), iv                    // 3rd memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       pxor    iv, %xmm5                               // 4th obuf ^= iv; \r
-       movups  48(ibuf), iv                    // 4th memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-       movups  %xmm1, (obuf)                   // write 1st obuf\r
-       movups  %xmm2, 16(obuf)                 // write 2nd obuf\r
-       movups  %xmm4, 32(obuf)                 // write 3rd obuf\r
-       movups  %xmm5, 48(obuf)                 // write 4th obuf\r
-\r
-       add             $64, ibuf                               // ibuf += AES_BLOCK_SIZE * 4; \r
-       add             $64, obuf                               // obuf += AES_BLOCK_SIZE * 4;  \r
-\r
-       sub             $4, num_blk                             // num_blk -= 4\r
-       jge             0b                                              // if num_blk > 0, repeat the loop\r
-\r
-\r
-9:     add             $4, num_blk                             //      post incremtn num_blk by 4\r
-       je              L_HW_cbc_done                   // if num_blk == 0, no need for forthur processing code\r
-\r
-       movups  208(ctx), %xmm4\r
-       movups  192(ctx), %xmm5\r
-       movups  176(ctx), %xmm6\r
-       movups  160(ctx), %xmm7\r
-\r
-#endif\r
-\r
-0:\r
-       movups  (ibuf), %xmm2                           // tmp = ibuf\r
-\r
-       // aes_decrypt\r
-       pxor    %xmm3, %xmm2\r
-    aesdec  %xmm4, %xmm2\r
-    aesdec  %xmm5, %xmm2\r
-    aesdec  %xmm6, %xmm2\r
-    aesdec  %xmm7, %xmm2\r
-#if defined    __x86_64__\r
-    aesdec  %xmm8, %xmm2\r
-    aesdec  %xmm9, %xmm2\r
-    aesdec  %xmm10, %xmm2\r
-    aesdec  %xmm11, %xmm2\r
-    aesdec  %xmm12, %xmm2\r
-    aesdec  %xmm13, %xmm2\r
-    aesdec  %xmm14, %xmm2\r
-    aesdec  %xmm15, %xmm2\r
-#else\r
-       movups  144(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  128(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  112(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  96(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  80(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  64(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  48(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  32(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-#endif\r
-       movups  16(ctx), %xmm1\r
-    aesdec  %xmm1, %xmm2\r
-       movups  (ctx), %xmm1\r
-    aesdeclast  %xmm1, %xmm2\r
-\r
-       pxor    iv, %xmm2                       // obuf ^= iv; \r
-       movups  (ibuf), iv                      // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
-\r
-       movups  %xmm2, (obuf)           // write obuf\r
-\r
-       add             $16, ibuf                               // ibuf += AES_BLOCK_SIZE; \r
-       add             $16, obuf                               // obuf += AES_BLOCK_SIZE;      \r
-       sub             $1, num_blk                             // num_blk --\r
-       jg              0b                                              // if num_blk > 0, repeat the loop\r
-\r
-       jmp             L_HW_cbc_done\r
-\r
-       //\r
-       // --------- END of aes_decrypt_cbc_hw  -------------------\r
-       //\r
diff --git a/bsd/crypto/aes/i386/aesxts.c b/bsd/crypto/aes/i386/aesxts.c
deleted file mode 100644 (file)
index c0eaaa6..0000000
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright (c) 2010 Apple Inc. All Rights Reserved.
- * 
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-
-#include "aesxts.h"
-#include <sys/types.h>
-#include <string.h>
-#include <libkern/libkern.h>
-
-int 
-aes_encrypt_key(const uint8_t *key, int key_len, aesedp_encrypt_ctx cx[1]);
-
-int 
-aes_decrypt_key(const uint8_t *key, int key_len, aesedp_decrypt_ctx cx[1]);
-
-int
-aes_encrypt(const uint8_t *Plaintext, uint8_t *Ciphertext, aesedp_encrypt_ctx *ctx);
-
-int
-aes_decrypt(const uint8_t *Ciphertext, uint8_t *Plaintext, aesedp_decrypt_ctx *ctx);
-
-
-/* error codes [will be expanded in future releases] */
-enum {
-   CRYPT_OK=0,             /* Result OK */
-   CRYPT_ERROR=1,            /* Generic Error */
-   CRYPT_INVALID_KEYSIZE=3,  /* Invalid key size given */
-   CRYPT_INVALID_ARG=16,      /* Generic invalid argument */
-};
-
-static int 
-aesedp_keysize(int *keysize)
-{
-       switch (*keysize) {
-               case 16:
-               case 24:
-               case 32:
-                       return CRYPT_OK;
-               default:
-                       return CRYPT_INVALID_KEYSIZE;
-       }
-}
-
-static int 
-aesedp_setup(const uint8_t *key, int keylen, int num_rounds __unused, aesedp_ctx *skey)
-{
-       aesedp_ctx *ctx = (aesedp_ctx *) skey;
-       int retval;
-
-       if((retval = aesedp_keysize(&keylen)) != CRYPT_OK) return retval;
-       if((retval = aes_encrypt_key(key, keylen, &ctx->encrypt)) != CRYPT_OK) return CRYPT_ERROR;
-       if((retval = aes_decrypt_key(key, keylen, &ctx->decrypt)) != CRYPT_OK) return CRYPT_ERROR;
-       return CRYPT_OK;
-}
-
-#ifdef ZZZNEVER
-static int 
-aesedp_ecb_encrypt(const uint8_t *pt, uint8_t *ct, aesedp_ctx *skey)
-{
-       aesedp_ctx *ctx = (aesedp_ctx *) skey;
-       return aes_encrypt(pt, ct, &ctx->encrypt);
-}
-
-
-
-static int 
-aesedp_ecb_decrypt(const uint8_t *ct, uint8_t *pt, aesedp_ctx *skey)
-{
-       return aes_decrypt(ct, pt, &skey->decrypt);
-}
-#endif
-
-
-static void 
-aesedp_done(aesedp_ctx *skey __unused)
-{
-}
-
-/** Start XTS mode
-   @param cipher      The index of the cipher to use
-   @param key1        The encrypt key
-   @param key2        The tweak encrypt key
-   @param keylen      The length of the keys (each) in octets
-   @param num_rounds  The number of rounds for the cipher (0 == default)
-   @param xts         [out] XTS structure
-   Returns CRYPT_OK upon success.
-*/
-
-uint32_t
-xts_start(uint32_t cipher, // ignored - we're doing this for xts-aes only
-                                               const uint8_t *IV __unused, // ignored
-                                               const uint8_t *key1, int keylen,
-                                               const uint8_t *key2, int tweaklen __unused, // both keys are the same size for xts
-                                               uint32_t num_rounds, // ignored
-                                               uint32_t options __unused,    // ignored
-                                               symmetric_xts *xts)
-{
-   uint32_t err;
-
-   /* check inputs */
-   if((key1 == NULL)|| (key2 == NULL) || (xts == NULL)) return CRYPT_INVALID_ARG;
-
-   /* schedule the two ciphers */
-   if ((err = aesedp_setup(key1, keylen, num_rounds, &xts->key1)) != 0) {
-      return err;
-   }
-   if ((err = aesedp_setup(key2, keylen, num_rounds, &xts->key2)) != 0) {
-      return err;
-   }
-   xts->cipher = cipher;
-
-   return err;
-}
-
-
-
-
-/** multiply by x 
-  @param I      The value to multiply by x (LFSR shift)
-*/
-#if defined __x86_64__ || defined __i386__
-extern void xts_mult_x(uint8_t *I);
-#else
-static void xts_mult_x(uint8_t *I)
-{
-  uint32_t x;
-  uint8_t t, tt;
-
-  for (x = t = 0; x < 16; x++) {
-     tt   = I[x] >> 7;
-     I[x] = ((I[x] << 1) | t) & 0xFF;
-     t    = tt;
-  }
-  if (tt) {
-     I[0] ^= 0x87;
-  } 
-}
-#endif
-
-#if defined __x86_64__ || defined __i386__
-extern int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx);
-extern int tweak_crypt_group(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx, uint32_t lim);
-#else
-static int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx)
-{
-   uint32_t x;
-   uint32_t err;
-
-   /* tweak encrypt block i */
-   for (x = 0; x < 16; x += sizeof(uint64_t)) {
-      *((uint64_t*)&C[x]) = *((uint64_t*)&P[x]) ^ *((uint64_t*)&T[x]);
-   }
-     
-   if ((err = aes_encrypt(C, C, ctx)) != CRYPT_OK) {
-      return CRYPT_INVALID_KEYSIZE;
-   }
-
-   for (x = 0; x < 16; x += sizeof(uint64_t)) {
-      *((uint64_t*)&C[x]) ^= *((uint64_t*)&T[x]);
-   }
-
-   /* LFSR the tweak */
-   xts_mult_x(T);
-
-   return CRYPT_OK;
-}   
-#endif
-
-/** XTS Encryption
-  @param pt     [in]  Plaintext
-  @param ptlen  Length of plaintext (and ciphertext)
-  @param ct     [out] Ciphertext
-  @param tweak  [in] The 128--bit encryption tweak (e.g. sector number)
-  @param xts    The XTS structure
-  Returns CRYPT_OK upon success
-*/
-int xts_encrypt(
-   const uint8_t *pt, unsigned long ptlen,
-         uint8_t *ct,
-   const uint8_t *tweak,
-         symmetric_xts *xts)
-{
-   aesedp_encrypt_ctx *encrypt_ctx = &xts->key1.encrypt;
-   uint8_t PP[16], CC[16], T[16];
-   uint32_t i, m, mo, lim;
-   uint32_t err;
-
-   /* check inputs */
-   if((pt == NULL) || (ct == NULL)|| (tweak == NULL) || (xts == NULL)) return 1;
-
-   /* get number of blocks */
-   m  = ptlen >> 4;
-   mo = ptlen & 15;
-
-       /* must have at least one full block */
-   if (m == 0) {
-      return CRYPT_INVALID_ARG;
-   }
-
-   /* encrypt the tweak */
-   if ((err = aes_encrypt(tweak, T, &xts->key2.encrypt)) != 0) {
-      return CRYPT_INVALID_KEYSIZE;
-   }
-
-   /* for i = 0 to m-2 do */
-   if (mo == 0) {
-      lim = m;
-   } else {
-      lim = m - 1;
-   }
-
-#if defined __x86_64__ || defined __i386__
-       if (lim>0) {
-               err = tweak_crypt_group(pt, ct, T, encrypt_ctx, lim);
-       ct += (lim<<4);
-       pt += (lim<<4);
-       }
-#else
-   for (i = 0; i < lim; i++) {
-      err = tweak_crypt(pt, ct, T, encrypt_ctx);
-      ct += 16;
-      pt += 16;
-   }
-#endif
-   
-   /* if ptlen not divide 16 then */
-   if (mo > 0) {
-      /* CC = tweak encrypt block m-1 */
-      if ((err = tweak_crypt(pt, CC, T, encrypt_ctx)) != 0) {
-         return err;
-      }
-
-      /* Cm = first ptlen % 16 bytes of CC */
-      for (i = 0; i < mo; i++) {
-          PP[i] = pt[16+i];
-          ct[16+i] = CC[i];
-      }
-
-      for (; i < 16; i++) {
-          PP[i] = CC[i];
-      }
-
-      /* Cm-1 = Tweak encrypt PP */
-      if ((err = tweak_crypt(PP, ct, T, encrypt_ctx)) != 0) {
-         return err;
-      }
-   }
-
-   return err;
-}
-
-#if defined __x86_64__ || defined __i386__
-extern int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx);
-extern int tweak_uncrypt_group(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx, uint32_t lim);
-#else
-static int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx)
-{
-   uint32_t x;
-   uint32_t err;
-
-   /* tweak encrypt block i */
-   for (x = 0; x < 16; x += sizeof(uint64_t)) {
-      *((uint64_t*)&P[x]) = *((uint64_t*)&C[x]) ^ *((uint64_t*)&T[x]);
-   }
-     
-   err = aes_decrypt(P, P, ctx);  
-
-   for (x = 0; x < 16; x += sizeof(uint64_t)) {
-      *((uint64_t*)&P[x]) ^=  *((uint64_t*)&T[x]);
-   }
-
-   /* LFSR the tweak */
-   xts_mult_x(T);
-
-   return err;
-}   
-#endif
-
-/** XTS Decryption
-  @param ct     [in] Ciphertext
-  @param ptlen  Length of plaintext (and ciphertext)
-  @param pt     [out]  Plaintext
-  @param tweak  [in] The 128--bit encryption tweak (e.g. sector number)
-  @param xts    The XTS structure
-  Returns CRYPT_OK upon success
-*/
-
-int xts_decrypt(
-   const uint8_t *ct, unsigned long ptlen,
-         uint8_t *pt,
-   const uint8_t *tweak,
-         symmetric_xts *xts)
-{
-   aesedp_decrypt_ctx *decrypt_ctx = &xts->key1.decrypt;
-   uint8_t PP[16], CC[16], T[16];
-   uint32_t i, m, mo, lim;
-   uint32_t err;
-
-   /* check inputs */
-   if((pt == NULL) || (ct == NULL)|| (tweak == NULL) || (xts == NULL)) return 1;
-
-   /* get number of blocks */
-   m  = ptlen >> 4;
-   mo = ptlen & 15;
-
-   /* must have at least one full block */
-   if (m == 0) {
-      return CRYPT_INVALID_ARG;
-   }
-
-   /* encrypt the tweak , yes - encrypt */
-   if ((err = aes_encrypt(tweak, T, &xts->key2.encrypt)) != 0) {
-      return CRYPT_INVALID_KEYSIZE;
-   }
-
-   /* for i = 0 to m-2 do */
-   if (mo == 0) {
-      lim = m;
-   } else {
-      lim = m - 1;
-   }
-
-#if defined __x86_64__ || defined __i386__
-       if (lim>0) {
-               err = tweak_uncrypt_group(ct, pt, T, decrypt_ctx, lim);
-       ct += (lim<<4);
-       pt += (lim<<4);
-       }
-#else
-   for (i = 0; i < lim; i++) {
-      err = tweak_uncrypt(ct, pt, T, decrypt_ctx);
-      ct += 16;
-      pt += 16;
-   }
-#endif
-   
-   /* if ptlen not divide 16 then */
-   if (mo > 0) {
-      memcpy(CC, T, 16);
-      xts_mult_x(CC);
-
-      /* PP = tweak decrypt block m-1 */
-      if ((err = tweak_uncrypt(ct, PP, CC, decrypt_ctx)) != CRYPT_OK) {
-        return err;
-      }
-
-      /* Pm = first ptlen % 16 bytes of PP */
-      for (i = 0; i < mo; i++) {
-          CC[i]    = ct[16+i];
-          pt[16+i] = PP[i];
-      }
-      for (; i < 16; i++) {
-          CC[i] = PP[i];
-      }
-
-      /* Pm-1 = Tweak uncrypt CC */
-      if ((err = tweak_uncrypt(CC, pt, T, decrypt_ctx)) != CRYPT_OK) {
-        return err;
-      }
-   }
-
-   return CRYPT_OK;
-}
-
-
-
-void xts_done(symmetric_xts *xts)
-{
-   if(xts == NULL) return;
-   aesedp_done(&xts->key1);
-   aesedp_done(&xts->key2);
-}
-
diff --git a/bsd/crypto/aes/i386/aesxts.h b/bsd/crypto/aes/i386/aesxts.h
deleted file mode 100644 (file)
index fe76180..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2010 Apple Inc. All Rights Reserved.
- * 
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-
-/*
- *  aesxts.h
- *
- *
- */
-
-#include "stdint.h"
-
-
-#ifndef _AESXTS_H
-#define _AESXTS_H
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-/*
- * The context for XTS-AES
- */
-
-
-#define KS_LENGTH       60
-
-typedef struct {   
-       uint32_t ks[KS_LENGTH];
-       uint32_t rn;
-} aesedp_encrypt_ctx;
-
-typedef struct {   
-       uint32_t ks[KS_LENGTH];
-       uint32_t rn;
-} aesedp_decrypt_ctx;
-
-typedef struct {   
-       aesedp_decrypt_ctx decrypt;
-       aesedp_encrypt_ctx encrypt;
-} aesedp_ctx;
-
-// xts mode context
-
-typedef struct {
-   aesedp_ctx                          key1, key2;
-   uint32_t                                            cipher; // ignore - this is to fit with the library, but in this case we're only using aes
-} symmetric_xts;
-
-
-/*
- * These are the interfaces required for XTS-AES support
- */
-uint32_t
-xts_start(uint32_t cipher, // ignored - we're doing this for xts-aes only
-                                               const uint8_t *IV, // ignored
-                                               const uint8_t *key1, int keylen,
-                                               const uint8_t *key2, int tweaklen, // both keys are the same size for xts
-                                               uint32_t num_rounds, // ignored
-                                               uint32_t options,    // ignored
-                                               symmetric_xts *xts);
-
-int xts_encrypt(
-   const uint8_t *pt, unsigned long ptlen,
-         uint8_t *ct,
-   const uint8_t *tweak, // this can be considered the sector IV for this use
-         symmetric_xts *xts);
-                
-int xts_decrypt(
-   const uint8_t *ct, unsigned long ptlen,
-         uint8_t *pt,
-   const uint8_t *tweak, // this can be considered the sector IV for this use
-         symmetric_xts *xts);
-
-
-void xts_done(symmetric_xts *xts);
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif /* _AESXTS_H */
\ No newline at end of file
diff --git a/bsd/crypto/aes/i386/aesxts_asm.s b/bsd/crypto/aes/i386/aesxts_asm.s
deleted file mode 100644 (file)
index ec6b924..0000000
+++ /dev/null
@@ -1,1305 +0,0 @@
-/*
-       This file "aesxts.s" provides x86_64 / i386 optimization of the following functions
-
-       0. xts_mult_x_on_xmm7 : a code macro that is used throughout all other functions
-       1. void xts_mult_x(uint8_t *I);
-       2. int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx);
-       3. int tweak_crypt_group(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx, uint32_t lim); 
-       4. int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx);
-       5. int tweak_uncrypt_group(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx, uint32_t lim); 
-
-       This file should be compiled together with xtsClearC.c
-
-       functions 1,2,4 are supposed to replace the C functions in xtsClearC.c for x86_64/i386 architectures
-       functions 3,5 are only given here, no C code is available, they are called in xts_encrypt/xts_decrypt (xtsClearC.c)
-         - we can possibly add C code for functions 3 and 5 for future porting to other architectures
-
-       cclee 4-29-10
-
-*/
-
-#ifdef KERNEL
-#include <i386/cpu_capabilities.h>
-#else
-#include <System/i386/cpu_capabilities.h>
-#endif
-#define        CRYPT_OK        0               // can not include "crypt.h" in which CRYPT_OK is from enum  
-
-/*
-       The following macro is used throughout the functions in this file.
-       It is the core function within the function xts_mult_x defined in (xtsClearC.c)
-
-       upon entry, %xmm7 = the input tweak (128-bit), 
-       on return, %xmm7 = the updated tweak (128-bit)
-       the macro uses %xmm1/%xmm2/%ecx in the computation
-       the operation can be described as follows :
-       0. let x = %xmm7;                                       // 128-bit little-endian input 
-       1. x = rotate_left(x,1);                        // rotate left by 1 -bit
-       2. if (x&1) x ^= 0x0000...0086;         // if least significant bit = 1, least significant byte ^= 0x86;
-       3. return x; 
-
-       It's a pity that SSE does not support shifting of the whole 128-bit xmm registers.
-       The workaround is 
-               1. using parallel dual quad (8-byte) shifting, 1 for the 2 bottom 63-bits, 1 for the 2 leading bits
-               2. manipulating the shifted quad words to form the 128-bit shifted result.
-
-       Input : %xmm7
-       Output : %xmm7
-       Used : %xmm1/%xmm2/%ecx
-
-       The macro is good for both x86_64 and i386. 
-
-*/     
-
-       .macro          xts_mult_x_on_xmm7                      // input : x = %xmm7, MS = most significant, LS = least significant
-       movaps          %xmm7, %xmm1                            // %xmm1 = a copy of x 
-       movaps          %xmm7, %xmm2                            // %xmm2 = a copy of x
-       psllq           $$1, %xmm7                                      // 1-bit left shift of 2 quad words (x1<<1, x0<<1), zero-filled 
-       psrlq           $$63, %xmm1                                     // 2 leading bits, each in the least significant bit of a quad word 
-       psrad           $$31, %xmm2                                     // the MS 32-bit will be either 0 or -1, depending on the MS bit of x
-       pshufd          $$0xc6, %xmm1, %xmm1            // switch the positions of the 2 leading bits
-       pshufd          $$0x03, %xmm2, %xmm2            // the LS 32-bit will be either 0 or -1, depending on the MS bit of x
-       por                     %xmm1, %xmm7                            // we finally has %xmm7 = rotate_left(x,1);
-       movl            $$0x86, %ecx                            // a potential byte to xor the bottom byte
-       movd            %ecx, %xmm1                                     // copy it to %xmm1, the other is 0
-       pand            %xmm2, %xmm1                            // %xmm1 = 0 or 0x86, depending on the MS bit of x
-       pxor            %xmm1, %xmm7                            // rotate_left(x,1) ^= 0 or 0x86 depending on the MS bit of x
-       .endm
-
-
-/* 
-       function : void xts_mult_x(uint8_t *I);
-
-       1. load (__m128*) (I) into xmm7
-       2. macro xts_mult_x_on_xmm7 (i/o @ xmm7, used xmm1/xmm2/ecx) 
-       3. save output (%xmm7) to memory pointed by I
-
-       input : 16-byte memory pointed by I
-       output : same 16-byte memory pointed by I
-
-       if kernel code, xmm1/xmm2/xmm7 saved and restored
-       other used registers : eax/ecx
-
- */    
-       .text
-       .align  4,0x90
-       .globl  _xts_mult_x
-_xts_mult_x:
-
-#if defined __x86_64__
-       #define I       %rdi                                            // 1st argument at %rdi for x86_64
-       #define sp      %rsp
-#else
-       mov             4(%esp), %eax                                   // 1st argument at stack, offset 4 for ret_addr for i386
-       #define I       %eax
-       #define sp      %esp
-#endif 
-
-       // if KERNEL code, allocate memory and save xmm1/xmm2/xmm7
-#ifdef KERNEL
-#if defined __x86_64__
-       sub             $0x38, sp                                               // 8-bytes alignment + 3 * 16 bytes     
-#else
-       sub             $0x3c, sp                                               // 12-bytes alignment + 3 * 16 bytes 
-#endif
-       movaps  %xmm1, (sp)
-       movaps  %xmm2, 16(sp)
-       movaps  %xmm7, 32(sp)
-#endif
-
-       // load, compute, and save
-       movups  (I), %xmm7                                              // load input tweak 128-bit into %xmm7
-       xts_mult_x_on_xmm7                                              // the macro (also used else where) will update %xmm7 as the output
-       movups  %xmm7, (I)                                              // save the xts_mult_x output 
-
-       // if KERNEL code, restore xmm1/xmm2/xmm7 and deallocate stack memory
-#ifdef KERNEL
-       movaps  (sp), %xmm1
-       movaps  16(sp), %xmm2
-       movaps  32(sp), %xmm7
-#if defined __x86_64__
-       add             $0x38, sp                                               // 8-bytes alignment + 3 * 16 bytes     
-#else
-       add             $0x3c, sp                                               // 12-bytes alignment + 3 * 16 bytes    
-#endif
-#endif
-
-       ret                                                                             // return
-
-       #undef  I
-       #undef  sp
-
-/* 
-       The following is x86_64/i386 assembly implementation of 
-
-       int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx);
-
-       Its C code implementation is given in xtsClearC.c
-
-       all pointers P/C/T points to a block of 16 bytes. In the following description, P/C/T represent 128-bit data.
-
-       The operation of tweak_crypt
-
-       1. C = P ^ T
-       2. err = aes_encryp(C, C, ctx); if (err != CRYPT_OK) return err;
-       3. C = C ^ T
-       4. xts_mult_x(T)
-       5. return CRYPT_OK;
-
-       The following is the assembly implementation flow
-
-       1. save used xmm registers (xmm1/xmm7) if kernel code 
-       2. load xmm1 = P, xmm7 = T
-       3. xmm1 = C = P ^ T
-       4. write xmm1 to C
-       5. call aes_encryp(C,C,ctx); note that it will use aesni if available, also xmm will return intact
-       6. load xmm1 = C
-       7. xmm1 = C = C^T = xmm1 ^ xmm7
-       8. write xmm1 to C
-       9. update T (in xmm7) via xts_mult_x macro
-       a. restore xmm registers (xmm1/xmm7) if kernel code
-       b. return CRYPT_OK (in eax) 
-
-       Note: used xmm registers : xmm1/xmm2/xmm7, xmm2 in xts_mult_x macro
-
-*/
-
-       .text
-       .align  4,0x90
-       .globl  _tweak_crypt
-_tweak_crypt:
-#if defined    __i386__
-
-       // push into stack for local use
-       push    %ebp
-       mov             %esp, %ebp
-       push    %ebx
-       push    %edi
-       push    %esi
-
-       // alllocate stack memory for local use
-       sub             $12+16*4, %esp                          // 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
-
-       // load with called arguments
-       mov             8(%ebp), %eax                           // P, we need this only briefly, so eax is fine
-       mov             12(%ebp), %edi                          // C
-       mov             16(%ebp), %ebx                          // T
-       mov             20(%ebp), %esi                          // ctx
-
-       #define P       %eax
-       #define C       %edi
-       #define T       %ebx
-       #define ctx     %esi
-       #define sp      %esp
-
-#else
-       // x86_64 calling argument order : rdi/rsi/rdx/rcx/r8
-
-       // push into stack for local use
-       push    %rbp
-       mov             %rsp, %rbp
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-
-       // alllocate stack memory for local use, if kernel code, need to save/restore xmm registers
-#ifdef KERNEL
-       sub             $4*16, %rsp                                     // only need 3*16, add 16 extra so to make save/restore xmm common to i386
-#endif
-
-       // load with called arguments, release rdi/rsi/rdx/rcx/r8, as need to call aes_encrypt
-       mov             %rsi, %r13
-       mov             %rdx, %r14
-       mov             %rcx, %r15
-
-       #define P       %rdi
-       #define C       %r13
-       #define T       %r14
-       #define ctx     %r15
-       #define sp      %rsp
-
-#endif
-
-       // if kernel, save used xmm registers
-#ifdef KERNEL
-       movaps  %xmm1, 16(sp)
-       movaps  %xmm2, 32(sp)
-       movaps  %xmm7, 48(sp)
-#endif
-
-       movups  (P), %xmm1                                      // P
-       movups  (T), %xmm7                                      // T
-
-       // setup caliing arguments for aes_encrypt
-#if defined    __i386__
-       mov             C, (%esp)                                       // C
-       mov             C, 4(%esp)                                      // C
-       mov             ctx, 8(%esp)                            // ctx
-#else
-       mov             C, %rdi                                         // C
-       mov             C, %rsi                                         // C
-       mov             ctx, %rdx                                       // ctx
-#endif
-
-       pxor    %xmm7, %xmm1                            // C = P ^ T    
-       movups  %xmm1, (C)                                      // save C into memory
-
-       call    _aes_encrypt                            // err = aes_encrypt(C,C,ctx);
-
-       cmp             $CRYPT_OK, %eax                         // check err == CRYPT_OK
-       jne             9f                                                      // if err != CRYPT_OK, exit
-
-       movups  (C), %xmm1                                      // load xmm1 = C
-       pxor    %xmm7, %xmm1                            // C ^= T
-       movups  %xmm1, (C)                                      // write C with xmm1, xmm1 is freed now, will be changed in the following macro
-
-       xts_mult_x_on_xmm7                                      // update T (on xmm7)
-
-       movups  %xmm7, (T)                                      // write xmm7 to T
-9:
-
-       // restore used xmm registers if this is for kernel
-#ifdef KERNEL
-       movaps  16(sp), %xmm1
-       movaps  32(sp), %xmm2
-       movaps  48(sp), %xmm7
-#endif
-
-       // free stack memory and restore callee registers
-#if defined    __i386__
-       add             $12+16*4, %esp                          // 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
-       pop             %esi
-       pop             %edi
-       pop             %ebx
-#else
-#ifdef KERNEL
-       add             $4*16, %rsp                                     // only need 3*16, add 16 extra so make save/restore xmm common to i386
-#endif
-       pop             %r15
-       pop             %r14
-       pop             %r13
-       pop             %r12
-#endif
-
-       // return, eax/rax already has the return val
-       leave
-       ret
-
-       #undef  P
-       #undef  C
-       #undef  T
-       #undef  ctx
-       #undef  sp
-
-/* 
-       The following is x86_64/i386 assembly implementation of 
-
-       int tweak_crypt_group(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx, uint32_t lim);
-
-       TODO : Its C code implementation is YET to be provided in xtsClearC.c (for the benefit of porting to other ISAs)
-       This function is grouped version of the above function tweak_crypt(), so xmm registers save/restore only need
-       to happen once for all grouped blocks.
-
-       The implementation here probes __cpu_capabilities to detect whether aesni (or hw-aes instruction) is available.
-       If aesni is available, the code branch to optimized code that uses aesni.
-
-       The optimized aesni code operates as follows:
-
-       while (more than 4 consecutive blocks available) {
-
-               do xts_mult_x macro 4 times and write the 4 tweaks on stack (16-byte aligned)
-       
-               perform 4 C = P ^ T;    // T is on 16-byte aligned stack
-
-               perform 4 aes_encrypt (all aes_encrypt instruction interleaved to achieve better throughtput)
-
-               perform 4 C = C ^ T             // T is on 16-byte aligned stack
-
-       }
-
-       The code then falls through to the scalar code, that sequentially performs what tweak_crypt does
-
-       1. C = P ^ T
-       2. err = aes_encryp(C, C, ctx); if (err != CRYPT_OK) return err;
-       3. C = C ^ T
-       4. xts_mult_x(T)
-
-       Note: used xmm registers : 
-                       xmm0-xmm5, xmm7 if aesni is available
-                       xmm0-xmm4, xmm7 if aesni is not available.
-
-*/
-
-    .text
-       .align  4,0x90
-       .globl  _tweak_crypt_group
-_tweak_crypt_group:
-
-#if defined    __i386__
-
-       // push callee-saved registers for local use
-       push    %ebp
-       mov             %esp, %ebp
-       push    %ebx
-       push    %edi
-       push    %esi
-
-       // allocate stack memory for local use and/or xmm register save for kernel code
-       sub             $(12+8*16+16*4), %esp           // 12 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) aesni
-                                                                               // 12 (alignment) + 8*16 (xmm) + 4*16 (only 12 used for aes_encrypt) no aesni 
-       // transfer calling arguments
-       mov             20(%ebp), %eax                          // ctx
-       mov             12(%ebp), %edi                          // C
-       mov             16(%ebp), %ebx                          // T
-       mov             8(%ebp), %esi                           // P
-       mov             %eax, 8(%esp)                           // ctx as the 3rd parameter to aes_decrypt
-
-       #define P       %esi
-       #define C       %edi
-       #define T       %ebx
-       #define lim     24(%ebp)
-       #define sp      %esp
-
-#else
-
-       // push callee-saved registers for local use
-       push    %rbp
-       mov             %rsp, %rbp
-       push    %rbx
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-
-       // allocate stack memory for local use and/or xmm register save for kernel code
-       sub             $(8+8*16+16*5), %rsp            // 8 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) + 16 (common to i386)
-
-       // rdi/rsi/rdx/rcx/r8
-       // transfer calling arguments
-       mov             %rdi, %r12      
-       mov             %rsi, %r13
-       mov             %rdx, %r14
-       mov             %rcx, %r15
-       mov             %r8,  %rbx
-
-       #define P       %r12
-       #define C       %r13
-       #define T       %r14
-       #define ctx     %r15
-       #define lim     %ebx
-       #define sp      %rsp
-#endif
-
-#ifdef KERNEL
-       movaps  %xmm0, 0x50(sp)
-       movaps  %xmm1, 0x60(sp)
-       movaps  %xmm2, 0x70(sp)
-       movaps  %xmm3, 0x80(sp)
-       movaps  %xmm4, 0x90(sp)
-       movaps  %xmm7, 0xa0(sp)
-#endif
-
-       // probe __cpu_capabilities to detect aesni
-#if defined __x86_64__
-    movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
-    mov     (%rax), %eax                                    // %eax = __cpu_capabilities
-#else          // i386
-#if defined KERNEL
-    leal    __cpu_capabilities, %eax                        // %eax -> __cpu_capabilities
-    mov     (%eax), %eax                                    // %eax = __cpu_capabilities
-#else
-    movl    _COMM_PAGE_CPU_CAPABILITIES, %eax
-#endif
-#endif
-       test    $(kHasAES), %eax
-       je              L_crypt_group_sw                                                                // if aesni not available, jump to sw-based implementation
-
-       // aesni-based implementation
-
-       sub             $4, lim                                                                                 // pre-decrement lim by 4
-       jl              9f                                                                                              // if lim < 4, skip the following code
-
-       movups  (T), %xmm7                                                                              // xmm7 is the tweak before encrypting every 4 blocks   
-#ifdef KERNEL
-       movaps  %xmm5, 0xb0(sp)                                                                 // hw-aes-based uses extra xmm5
-#endif
-
-0:
-       // derive 4 tweaks using xts_mult_x macro, and save on aligned stack space
-       // xmm7 will be the tweak for next 4-blocks iteration
-
-       #define tweak1  16(sp)
-       #define tweak2  32(sp)
-       #define tweak3  48(sp)
-       #define tweak4  64(sp)
-
-       movaps  %xmm7, tweak1                                                                   // save 1st tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 2nd tweak
-       movaps  %xmm7, tweak2                                                                   // save 2nd tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 3rd tweak
-       movaps  %xmm7, tweak3                                                                   // save 3rd tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 4th tweak
-       movaps  %xmm7, tweak4                                                                   // save 4th tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 1st tweak for next iteration
-
-       // read 4 Ps
-       movups  (P), %xmm0
-       movups  16(P), %xmm1
-       movups  32(P), %xmm2
-       movups  48(P), %xmm3
-
-       // 4 C = P ^ T
-       pxor    tweak1, %xmm0
-       pxor    tweak2, %xmm1
-       pxor    tweak3, %xmm2
-       pxor    tweak4, %xmm3
-
-       // 4 interleaved aes_encrypt
-
-#if defined    __i386__
-       mov             8(sp), %ecx     // ctx
-       #undef  ctx
-       #define ctx     %ecx
-#endif
-
-       mov             240(ctx), %eax                                  // aes length 
-
-       cmp             $160, %eax                                              // AES-128 ?
-       je              160f
-       cmp             $192, %eax                                              // AES-192 ?
-       je              192f
-       cmp             $224, %eax                                              // AES-256 ?
-       je              224f
-       mov             $-1, %eax                                               // error : non-supported aes length
-#ifdef KERNEL
-       movaps  0xb0(sp), %xmm5                                 // hw-aes-based uses extra xmm5
-#endif
-       jmp             L_error_crypt
-
-       // definitions, macros, and constructs for 4 blocks hw-aes-encrypt
-
-       // the following key definitions will also be used in tweak_uncrypt_group 
-       #define key0                    0(ctx)
-       #define key1                    16(ctx)
-       #define key2                    32(ctx)
-       #define key3                    48(ctx)
-       #define key4                    64(ctx)
-       #define key5                    80(ctx)
-       #define key6                    96(ctx)
-       #define key7                    112(ctx)
-       #define key8                    128(ctx)
-       #define key9                    144(ctx)
-       #define keyA                    160(ctx)
-       #define keyB                    176(ctx)
-       #define keyC                    192(ctx)
-       #define keyD                    208(ctx)
-       #define keyE                    224(ctx)
-
-       #define aes             aesenc
-       #define aeslast aesenclast
-
-       // all aes encrypt operations start with the following sequence
-       .macro  aes_common_part
-       movups  key0, %xmm4
-       movups  key1, %xmm5
-       pxor    %xmm4, %xmm0
-       pxor    %xmm4, %xmm1
-       pxor    %xmm4, %xmm2
-       pxor    %xmm4, %xmm3
-       movups  key2, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key3, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  key4, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key5, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  key6, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key7, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  key8, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key9, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  keyA, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       .endm
-
-       // all aes encypt operations end with the following 4 instructions      
-       .macro  aes_last
-       aeslast %xmm4, %xmm0
-       aeslast %xmm4, %xmm1
-       aeslast %xmm4, %xmm2
-       aeslast %xmm4, %xmm3
-       .endm
-
-       .macro  aes_128
-       aes_common_part                 // encrypt common part
-       aes_last                                // encrypt ending part
-       .endm
-       
-       .macro  aes_192
-       aes_common_part                 // encrypt common part
-
-       // 10 extra instructions in between common and ending
-       movups  keyB, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  keyC, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-
-       aes_last                                // encrypt ending part
-       .endm
-
-       .macro  aes_256
-       aes_common_part                 // encrypt common part
-
-       // 20 extra instructions in between common and ending
-       movups  keyB, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  keyC, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  keyD, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  keyE, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-
-       aes_last                                // encrypt ending part
-       .endm
-
-160:   // AES-128 encrypt
-       aes_128
-       jmp             8f
-
-192:   // AES-192 encrypt
-       aes_192
-       jmp             8f
-
-224:   // AES-256 encrypt      
-       aes_256
-
-8:     
-
-       // 4 C = C ^ T
-       pxor    tweak1, %xmm0
-       pxor    tweak2, %xmm1
-       pxor    tweak3, %xmm2
-       pxor    tweak4, %xmm3
-
-       // write 4 Cs
-       movups  %xmm0, (C)
-       movups  %xmm1, 16(C)
-       movups  %xmm2, 32(C)
-       movups  %xmm3, 48(C)
-
-       add     $64, P
-       add     $64, C
-
-       sub             $4, lim
-       jge             0b
-
-#ifdef KERNEL
-       movaps  0xb0(sp), %xmm5                         // hw-aes-based uses extra xmm5
-#endif
-       movups  %xmm7, (T)
-
-9:
-       xor             %eax, %eax                                      // to return CRYPT_OK
-       add             $4, lim                                         // post-increment lim by 4
-       je              9f                                                      // if lim==0, branch to prepare to return       
-
-L_crypt_group_sw:
-
-       movups  (T), %xmm7                                      // T, xmm7 will be used as T (128-bit) throughtout the loop
-
-       sub             $1, lim                                         // pre-decrement lim by 1       
-       jl              1f                                                      // if lim < 1, branch to prepare to return
-0:
-       movups  (P), %xmm0                                      // P
-
-       // prepare for calling aes_encrypt
-#if defined    __i386__
-       mov             C, (%esp)                                       // C
-       mov             C, 4(%esp)                                      // C
-                                                                               // ctx was prepared previously in preamble
-#else
-       mov             C, %rdi                                         // C
-       mov             C, %rsi                                         // C
-       mov             ctx, %rdx                                       // ctx
-#endif
-
-       pxor    %xmm7, %xmm0                            // C = P ^ T    
-       movups  %xmm0, (C)                                      // save C into memory
-
-       call    _aes_encrypt_xmm_no_save        // err = aes_encrypt(C,C,ctx);
-
-       cmp             $CRYPT_OK, %eax                         // err == CRYPT_OK ? 
-       jne             9f                                                      // if err != CRYPT_OK, branch to exit with error
-
-       movups  (C), %xmm0                                      // load xmm0 with C
-       pxor    %xmm7, %xmm0                            // C ^= T
-       movups  %xmm0, (C)                                      // save output C
-
-       xts_mult_x_on_xmm7
-
-       add             $16, C                                          // next C
-       add             $16, P                                          // next P
-       sub             $1, lim                                         // lim--
-       jge             0b                                                      // if (lim>0) repeat the scalar loop
-
-1:     movups  %xmm7, (T)                                      // save final tweak 
-L_error_crypt:
-9:
-       // if kernel, restore used xmm registers
-#ifdef KERNEL
-       movaps  0x50(sp), %xmm0
-       movaps  0x60(sp), %xmm1
-       movaps  0x70(sp), %xmm2
-       movaps  0x80(sp), %xmm3
-       movaps  0x90(sp), %xmm4
-       movaps  0xa0(sp), %xmm7
-#endif
-
-#if defined    __i386__
-       add             $(12+16*8+16*4), %esp
-       pop             %esi
-       pop             %edi
-       pop             %ebx
-#else
-       add             $(8+16*8+16*5), %rsp
-       pop             %r15
-       pop             %r14
-       pop             %r13
-       pop             %r12
-       pop             %rbx
-#endif
-       leave
-       ret
-
-       #undef  P
-       #undef  C
-       #undef  T
-       #undef  ctx
-       #undef  sp
-
-/* 
-       The following is x86_64/i386 assembly implementation of 
-
-       int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx);
-
-       Its C code implementation is given in xtsClearC.c
-
-       all pointers C/P/T points to a block of 16 bytes. In the following description, C/P/T represent 128-bit data.
-
-       The operation of tweak_crypt
-
-       1. P = C ^ T
-       2. err = aes_decryp(P, P, ctx); if (err != CRYPT_OK) return err;
-       3. P = P ^ T
-       4. xts_mult_x(T)
-       5. return CRYPT_OK;
-
-       The following is the assembly implementation flow
-
-       1. save used xmm registers (xmm1/xmm7) if kernel code 
-       2. load xmm1 = C, xmm7 = T
-       3. xmm1 = P = C ^ T
-       4. write xmm1 to P
-       5. call aes_decryp(P,P,ctx); note that it will use aesni if available, also xmm will return intact
-       6. load xmm1 = P
-       7. xmm1 = P = P^T = xmm1 ^ xmm7
-       8. write xmm1 to P
-       9. update T (in xmm7) via xts_mult_x macro
-       a. restore xmm registers (xmm1/xmm7) if kernel code
-       b. return CRYPT_OK (in eax) 
-
-       Note: used xmm registers : xmm1/xmm2/xmm7, xmm2 in xts_mult_x macro
-
-*/
-
-       .text
-       .align  4,0x90
-       .globl  _tweak_uncrypt
-_tweak_uncrypt:
-#if defined    __i386__
-
-       // push into stack for local use
-       push    %ebp
-       mov             %esp, %ebp
-       push    %ebx
-       push    %edi
-       push    %esi
-
-       // alllocate stack memory for local use
-       sub             $12+16*4, %esp                          // 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
-
-       // load with called arguments
-       mov             8(%ebp), %eax                           // C, we need this only briefly, so eax is fine
-       mov             12(%ebp), %edi                          // P
-       mov             16(%ebp), %ebx                          // T
-       mov             20(%ebp), %esi                          // ctx
-
-       #define C       %eax
-       #define P       %edi
-       #define T       %ebx
-       #define ctx     %esi
-       #define sp      %esp
-
-#else
-       // x86_64 calling argument order : rdi/rsi/rdx/rcx/r8
-
-       // push into stack for local use
-       push    %rbp
-       mov             %rsp, %rbp
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-
-       // alllocate stack memory for local use, if kernel code, need to save/restore xmm registers
-#ifdef KERNEL
-       sub             $4*16, %rsp                                     // only need 3*16, add 16 extra so to make save/restore xmm common to i386
-#endif
-
-       // load with called arguments, release rdi/rsi/rdx/rcx/r8, as need to call aes_decrypt
-       mov             %rsi, %r13
-       mov             %rdx, %r14
-       mov             %rcx, %r15
-
-       #define C       %rdi
-       #define P       %r13
-       #define T       %r14
-       #define ctx     %r15
-       #define sp      %rsp
-
-#endif
-
-       // if kernel, save used xmm registers
-#ifdef KERNEL
-       movaps  %xmm1, 16(sp)
-       movaps  %xmm2, 32(sp)
-       movaps  %xmm7, 48(sp)
-#endif
-
-       movups  (C), %xmm1                                      // C
-       movups  (T), %xmm7                                      // T
-
-       // setup caliing arguments for aes_decrypt
-#if defined    __i386__
-       mov             P, (%esp)                                       // P
-       mov             P, 4(%esp)                                      // P
-       mov             ctx, 8(%esp)                            // ctx
-#else
-       mov             P, %rdi                                         // P
-       mov             P, %rsi                                         // P
-       mov             ctx, %rdx                                       // ctx
-#endif
-
-       pxor    %xmm7, %xmm1                            // P = C ^ T    
-       movups  %xmm1, (P)                                      // save P into memory
-
-       call    _aes_decrypt                            // err = aes_decrypt(P,P,ctx);
-
-       cmp             $CRYPT_OK, %eax                         // check err == CRYPT_OK
-       jne             9f                                                      // if err != CRYPT_OK, exit
-
-       movups  (P), %xmm1                                      // load xmm1 = P
-       pxor    %xmm7, %xmm1                            // P ^= T
-       movups  %xmm1, (P)                                      // write P with xmm1, xmm1 is freed now, will be changed in the following macro
-
-       xts_mult_x_on_xmm7                                      // update T (on xmm7)
-
-       movups  %xmm7, (T)                                      // write xmm7 to T
-9:
-
-       // restore used xmm registers if this is for kernel
-#ifdef KERNEL
-       movaps  16(sp), %xmm1
-       movaps  32(sp), %xmm2
-       movaps  48(sp), %xmm7
-#endif
-
-       // free stack memory and restore callee registers
-#if defined    __i386__
-       add             $12+16*4, %esp                          // 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
-       pop             %esi
-       pop             %edi
-       pop             %ebx
-#else
-#ifdef KERNEL
-       add             $4*16, %rsp                                     // only need 3*16, add 16 extra so make save/restore xmm common to i386
-#endif
-       pop             %r15
-       pop             %r14
-       pop             %r13
-       pop             %r12
-#endif
-
-       // return, eax/rax already has the return val
-       leave
-       ret
-
-       #undef  P
-       #undef  C
-       #undef  T
-       #undef  ctx
-       #undef  sp
-
-/* 
-       The following is x86_64/i386 assembly implementation of 
-
-       int tweak_uncrypt_group(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx, uint32_t lim);
-
-       TODO : Its C code implementation is YET to be provided in xtsClearC.c (for the benefit of porting to other ISAs)
-       This function is grouped version of the above function tweak_uncrypt(), so xmm registers save/restore only need
-       to happen once for all grouped blocks.
-
-       The implementation here probes __cpu_capabilities to detect whether aesni (or hw-aes instruction) is available.
-       If aesni is available, the code branch to optimized code that uses aesni.
-
-       The optimized aesni code operates as follows:
-
-       while (more than 4 consecutive blocks available) {
-
-               do xts_mult_x macro 4 times and write the 4 tweaks on stack (16-byte aligned)
-       
-               perform 4 P = C ^ T;    // T is on 16-byte aligned stack
-
-               perform 4 aes_decrypt (all aes_decrypt instruction interleaved to achieve better throughtput)
-
-               perform 4 P = P ^ T             // T is on 16-byte aligned stack
-
-       }
-
-       The code then falls through to the scalar code, that sequentially performs what tweak_crypt does
-
-       1. P = C ^ T
-       2. err = aes_decryp(P, P, ctx); if (err != CRYPT_OK) return err;
-       3. P = P ^ T
-       4. xts_mult_x(T)
-
-       Note: used xmm registers : 
-                       xmm0-xmm5, xmm7 if aesni is available
-                       xmm0-xmm4, xmm7 if aesni is not available.
-
-*/
-
-    .text
-       .align  4,0x90
-       .globl  _tweak_uncrypt_group
-_tweak_uncrypt_group:
-
-#if defined    __i386__
-
-       // push callee-saved registers for local use
-       push    %ebp
-       mov             %esp, %ebp
-       push    %ebx
-       push    %edi
-       push    %esi
-
-       // allocate stack memory for local use and/or xmm register save for kernel code
-       sub             $(12+8*16+16*4), %esp           // 12 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) aesni
-                                                                               // 12 (alignment) + 8*16 (xmm) + 4*16 (only 12 used for aes_decrypt) no aesni 
-       // transfer calling arguments
-       mov             20(%ebp), %eax                          // ctx
-       mov             12(%ebp), %edi                          // P
-       mov             16(%ebp), %ebx                          // T
-       mov             8(%ebp), %esi                           // C
-       mov             %eax, 8(%esp)                           // ctx as the 3rd parameter to aes_decrypt
-
-       #define C       %esi
-       #define P       %edi
-       #define T       %ebx
-       #define lim     24(%ebp)
-       #define sp      %esp
-
-#else
-
-       // push callee-saved registers for local use
-       push    %rbp
-       mov             %rsp, %rbp
-       push    %rbx
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-
-       // allocate stack memory for local use and/or xmm register save for kernel code
-       sub             $(8+8*16+16*5), %rsp            // 8 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) + 16 (common to i386)
-
-       // rdi/rsi/rdx/rcx/r8
-       // transfer calling arguments
-       mov             %rdi, %r12      
-       mov             %rsi, %r13
-       mov             %rdx, %r14
-       mov             %rcx, %r15
-       mov             %r8,  %rbx
-
-       #define C       %r12
-       #define P       %r13
-       #define T       %r14
-       #define ctx     %r15
-       #define lim     %ebx
-       #define sp      %rsp
-#endif
-
-#ifdef KERNEL
-       movaps  %xmm0, 0x50(sp)
-       movaps  %xmm1, 0x60(sp)
-       movaps  %xmm2, 0x70(sp)
-       movaps  %xmm3, 0x80(sp)
-       movaps  %xmm4, 0x90(sp)
-       movaps  %xmm7, 0xa0(sp)
-#endif
-
-       // probe __cpu_capabilities to detect aesni
-#if defined __x86_64__
-    movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
-    mov     (%rax), %eax                                    // %eax = __cpu_capabilities
-#else          // i386
-#if defined KERNEL
-    leal    __cpu_capabilities, %eax                        // %eax -> __cpu_capabilities
-    mov     (%eax), %eax                                    // %eax = __cpu_capabilities
-#else
-    movl    _COMM_PAGE_CPU_CAPABILITIES, %eax
-#endif
-#endif
-       test    $(kHasAES), %eax
-       je              L_uncrypt_group_sw                                                              // if aesni not available, jump to sw-based implementation
-
-       // aesni-based implementation
-
-       sub             $4, lim                                                                                 // pre-decrement lim by 4
-       jl              9f                                                                                              // if lim < 4, skip the following code
-
-       movups  (T), %xmm7                                                                              // xmm7 is the tweak before decrypting every 4 blocks   
-#ifdef KERNEL
-       movaps  %xmm5, 0xb0(sp)                                                                 // hw-aes-based uses extra xmm5
-#endif
-
-0:
-       // derive 4 tweaks using xts_mult_x macro, and save on aligned stack space
-       // xmm7 will be the tweak for next 4-blocks iteration
-
-       #define tweak1  16(sp)
-       #define tweak2  32(sp)
-       #define tweak3  48(sp)
-       #define tweak4  64(sp)
-
-       movaps  %xmm7, tweak1                                                                   // save 1st tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 2nd tweak
-       movaps  %xmm7, tweak2                                                                   // save 2nd tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 3rd tweak
-       movaps  %xmm7, tweak3                                                                   // save 3rd tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 4th tweak
-       movaps  %xmm7, tweak4                                                                   // save 4th tweak on stack
-       xts_mult_x_on_xmm7                                                                              // compute 1st tweak for next iteration
-
-       // read 4 Cs
-       movups  (C), %xmm0
-       movups  16(C), %xmm1
-       movups  32(C), %xmm2
-       movups  48(C), %xmm3
-
-       // 4 P = C ^ T
-       pxor    tweak1, %xmm0
-       pxor    tweak2, %xmm1
-       pxor    tweak3, %xmm2
-       pxor    tweak4, %xmm3
-
-       // 4 interleaved aes_decrypt
-
-#if defined    __i386__
-       mov             8(sp), %ecx     // ctx
-       #undef  ctx
-       #define ctx     %ecx
-#endif
-
-       mov             240(ctx), %eax                                  // aes length 
-
-       cmp             $160, %eax                                              // AES-128 ?
-       je              160f
-       cmp             $192, %eax                                              // AES-192 ?
-       je              192f
-       cmp             $224, %eax                                              // AES-256 ?
-       je              224f
-       mov             $-1, %eax                                               // error : non-supported aes length
-#ifdef KERNEL
-       movaps  0xb0(sp), %xmm5                                 // hw-aes-based uses extra xmm5
-#endif
-       jmp             L_error_uncrypt
-
-       // definitions, macros to construc hw-aes-decrypt
-       // will reuse previously defined key0 = (ctx), key1 = 16(ctx), ....
-       #undef  aes
-       #undef  aeslast
-       #define aes     aesdec
-       #define aeslast aesdeclast
-
-       .macro  aes_decrypt_common
-       movups  key8, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key7, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  key6, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key5, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  key4, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key3, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  key2, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key1, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  key0, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       aeslast %xmm4, %xmm0
-       aeslast %xmm4, %xmm1
-       aeslast %xmm4, %xmm2
-       aeslast %xmm4, %xmm3
-       .endm
-
-       .macro  aes_dec_128
-       movups  keyA, %xmm4
-       movups  key9, %xmm5
-       pxor    %xmm4, %xmm0
-       pxor    %xmm4, %xmm1
-       pxor    %xmm4, %xmm2
-       pxor    %xmm4, %xmm3
-       aes_decrypt_common
-       .endm
-
-       .macro  aes_dec_192
-       movups  keyC, %xmm4
-       movups  keyB, %xmm5
-       pxor    %xmm4, %xmm0
-       pxor    %xmm4, %xmm1
-       pxor    %xmm4, %xmm2
-       pxor    %xmm4, %xmm3
-       movups  keyA, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key9, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       aes_decrypt_common
-       .endm
-
-       .macro  aes_dec_256
-       movups  keyE, %xmm4
-       movups  keyD, %xmm5
-       pxor    %xmm4, %xmm0
-       pxor    %xmm4, %xmm1
-       pxor    %xmm4, %xmm2
-       pxor    %xmm4, %xmm3
-       movups  keyC, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  keyB, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       movups  keyA, %xmm4
-       aes             %xmm5, %xmm0
-       aes             %xmm5, %xmm1
-       aes             %xmm5, %xmm2
-       aes             %xmm5, %xmm3
-       movups  key9, %xmm5
-       aes             %xmm4, %xmm0
-       aes             %xmm4, %xmm1
-       aes             %xmm4, %xmm2
-       aes             %xmm4, %xmm3
-       aes_decrypt_common
-       .endm
-
-160:   // AES-128 decrypt
-       aes_dec_128
-       jmp             8f
-
-192:   // AES-192 decrypt
-       aes_dec_192
-       jmp             8f
-
-224:   // AES-256 decrypt      
-       aes_dec_256
-
-8:     
-
-       // 4 P = P ^ T
-       pxor    tweak1, %xmm0
-       pxor    tweak2, %xmm1
-       pxor    tweak3, %xmm2
-       pxor    tweak4, %xmm3
-
-       // write 4 Ps
-       movups  %xmm0, (P)
-       movups  %xmm1, 16(P)
-       movups  %xmm2, 32(P)
-       movups  %xmm3, 48(P)
-
-       add     $64, C
-       add     $64, P
-
-       sub             $4, lim
-       jge             0b
-
-#ifdef KERNEL
-       movaps  0xb0(sp), %xmm5                         // hw-aes-based uses extra xmm5
-#endif
-       movups  %xmm7, (T)
-
-9:
-       xor             %eax, %eax                                      // to return CRYPT_OK
-       add             $4, lim                                         // post-increment lim by 4
-       je              9f                                                      // if lim==0, branch to prepare to return       
-
-L_uncrypt_group_sw:
-
-       movups  (T), %xmm7                                      // T, xmm7 will be used as T (128-bit) throughtout the loop
-
-       sub             $1, lim                                         // pre-decrement lim by 1       
-       jl              1f                                                      // if lim < 1, branch to prepare to return
-0:
-       movups  (C), %xmm0                                      // C
-
-       // prepare for calling aes_decrypt
-#if defined    __i386__
-       mov             P, (%esp)                                       // P
-       mov             P, 4(%esp)                                      // P
-                                                                               // ctx was prepared previously in preamble
-#else
-       mov             P, %rdi                                         // P
-       mov             P, %rsi                                         // P
-       mov             ctx, %rdx                                       // ctx
-#endif
-
-       pxor    %xmm7, %xmm0                            // P = C ^ T    
-       movups  %xmm0, (P)                                      // save P into memory
-
-       call    _aes_decrypt_xmm_no_save        // err = aes_decrypt(P,P,ctx);
-
-       cmp             $CRYPT_OK, %eax                         // err == CRYPT_OK ? 
-       jne             9f                                                      // if err != CRYPT_OK, branch to exit with error
-
-       movups  (P), %xmm0                                      // load xmm0 with P
-       pxor    %xmm7, %xmm0                            // P ^= T
-       movups  %xmm0, (P)                                      // save output P
-
-       xts_mult_x_on_xmm7
-
-       add             $16, C                                          // next C
-       add             $16, P                                          // next P
-       sub             $1, lim                                         // lim--
-       jge             0b                                                      // if (lim>0) repeat the scalar loop
-
-1:     movups  %xmm7, (T)                                      // save final tweak 
-L_error_uncrypt:
-9:
-       // if kernel, restore used xmm registers
-#ifdef KERNEL
-       movaps  0x50(sp), %xmm0
-       movaps  0x60(sp), %xmm1
-       movaps  0x70(sp), %xmm2
-       movaps  0x80(sp), %xmm3
-       movaps  0x90(sp), %xmm4
-       movaps  0xa0(sp), %xmm7
-#endif
-
-#if defined    __i386__
-       add             $(12+16*8+16*4), %esp
-       pop             %esi
-       pop             %edi
-       pop             %ebx
-#else
-       add             $(8+16*8+16*5), %rsp
-       pop             %r15
-       pop             %r14
-       pop             %r13
-       pop             %r12
-       pop             %rbx
-#endif
-       leave
-       ret
diff --git a/bsd/crypto/aesxts.h b/bsd/crypto/aesxts.h
new file mode 100644 (file)
index 0000000..574ed19
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2012 Apple Inc. All Rights Reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*
+ * This header file is kept for legacy reasons and may be removed in
+ * future; the interface resides in <libkern/crypto/aesxts.h>.
+ */
+#include <libkern/crypto/aesxts.h>
diff --git a/bsd/crypto/des.h b/bsd/crypto/des.h
new file mode 100644 (file)
index 0000000..5347f7b
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * This header file is kept for legacy reasons and may be removed in
+ * future; the supported interface resides in <libkern/crypto/aes.h>.
+ */
+#include <libkern/crypto/des.h>
diff --git a/bsd/crypto/des/Makefile b/bsd/crypto/des/Makefile
deleted file mode 100644 (file)
index 2eee630..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS = \
-
-INSTINC_SUBDIRS_I386 = \
-
-INSTINC_SUBDIRS_X86_64 = \
-
-INSTINC_SUBDIRS_ARM = \
-
-EXPINC_SUBDIRS = \
-
-EXPINC_SUBDIRS_I386 = \
-
-EXPINC_SUBDIRS_X86_64 = \
-
-EXPINC_SUBDIRS_ARM = \
-
-PRIVATE_DATAFILES = \
-       des.h
-
-INSTALL_MI_DIR = crypto
-
-EXPORT_MI_DIR = ${INSTALL_MI_DIR}
-
-INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/bsd/crypto/des/des.h b/bsd/crypto/des/des.h
deleted file mode 100644 (file)
index 9f232b1..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/des/des.h,v 1.1.2.3 2002/03/26 10:12:24 ume Exp $      */
-/*     $KAME: des.h,v 1.8 2001/09/10 04:03:57 itojun Exp $     */
-
-/* lib/des/des.h */
-/* Copyright (C) 1995-1996 Eric Young (eay@mincom.oz.au)
- * All rights reserved.
- *
- * This file is part of an SSL implementation written
- * by Eric Young (eay@mincom.oz.au).
- * The implementation was written so as to conform with Netscapes SSL
- * specification.  This library and applications are
- * FREE FOR COMMERCIAL AND NON-COMMERCIAL USE
- * as long as the following conditions are aheared to.
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.  If this code is used in a product,
- * Eric Young should be given attribution as the author of the parts used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Eric Young (eay@mincom.oz.au)
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#ifndef HEADER_DES_H
-#define HEADER_DES_H
-
-#ifdef  __cplusplus
-extern "C" {
-#endif
-
-/* must be 32bit quantity */
-#define DES_LONG u_int32_t
-
-typedef unsigned char des_cblock[8];
-typedef struct des_ks_struct
-       {
-       union   {
-       des_cblock cblock;
-       /* make sure things are correct size on machines with
-        * 8 byte longs */
-       DES_LONG deslong[2];
-       } ks;
-       int weak_key;
-} des_key_schedule[16];
-
-#define DES_KEY_SZ     (sizeof(des_cblock))
-#define DES_SCHEDULE_SZ (sizeof(des_key_schedule))
-
-#define DES_ENCRYPT    1
-#define DES_DECRYPT    0
-
-#define DES_CBC_MODE   0
-#define DES_PCBC_MODE  1
-
-extern int des_check_key;      /* defaults to false */
-
-char *des_options(void);
-void des_ecb_encrypt(des_cblock *, des_cblock *, des_key_schedule, int);
-
-void des_encrypt1(DES_LONG *, des_key_schedule, int);
-void des_encrypt2(DES_LONG *, des_key_schedule, int);
-void des_encrypt3(DES_LONG *, des_key_schedule, des_key_schedule,
-                     des_key_schedule);
-void des_decrypt3(DES_LONG *, des_key_schedule, des_key_schedule,
-                     des_key_schedule);
-
-void des_ecb3_encrypt(des_cblock *, des_cblock *, des_key_schedule, 
-                         des_key_schedule, des_key_schedule, int);
-
-void des_ncbc_encrypt(const unsigned char *, unsigned char *, long,
-                         des_key_schedule, des_cblock *, int);
-
-void des_ede3_cbc_encrypt(const unsigned char *, unsigned char *, long,
-                         des_key_schedule, des_key_schedule, 
-                         des_key_schedule, des_cblock *, int);
-
-void des_set_odd_parity(des_cblock *);
-void des_fixup_key_parity(des_cblock *); 
-int des_is_weak_key(des_cblock *);
-int des_set_key(des_cblock *, des_key_schedule);
-int des_key_sched(des_cblock *, des_key_schedule);
-int des_set_key_checked(des_cblock *, des_key_schedule);
-void des_set_key_unchecked(des_cblock *, des_key_schedule);
-int des_check_key_parity(des_cblock *);
-
-#ifdef  __cplusplus
-}
-#endif
-
-#endif
diff --git a/bsd/crypto/des/des_ecb.c b/bsd/crypto/des/des_ecb.c
deleted file mode 100644 (file)
index 4a3ea99..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/des/des_ecb.c,v 1.1.2.3 2002/03/26 10:12:24 ume Exp $  */
-/*     $KAME: des_ecb.c,v 1.6 2001/09/10 04:03:58 itojun Exp $ */
-
-/* crypto/des/ecb_enc.c */
-/* Copyright (C) 1995-1998 Eric Young (eay@mincom.oz.au)
- * All rights reserved.
- *
- * This file is part of an SSL implementation written
- * by Eric Young (eay@mincom.oz.au).
- * The implementation was written so as to conform with Netscapes SSL
- * specification.  This library and applications are
- * FREE FOR COMMERCIAL AND NON-COMMERCIAL USE
- * as long as the following conditions are aheared to.
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.  If this code is used in a product,
- * Eric Young should be given attribution as the author of the parts used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Eric Young (eay@mincom.oz.au)
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <crypto/des/des_locl.h>
-#include <crypto/des/spr.h>
-
-/* char *libdes_version="libdes v 3.24 - 20-Apr-1996 - eay"; */ /* wrong */
-/* char *DES_version="DES part of SSLeay 0.6.4 30-Aug-1996"; */
-
-char *des_options(void)
-        {
-        static int init=1;
-        static char buf[32];
-
-        if (init)
-                {
-                const char *ptr,*unroll,*risc,*size;
-
-#ifdef DES_PTR
-                ptr="ptr";
-#else
-                ptr="idx";
-#endif
-#if defined(DES_RISC1) || defined(DES_RISC2)
-#ifdef DES_RISC1
-                risc="risc1";
-#endif
-#ifdef DES_RISC2
-                risc="risc2";
-#endif
-#else
-                risc="cisc";
-#endif
-#ifdef DES_UNROLL
-                unroll="16";
-#else
-                unroll="4";
-#endif
-                if (sizeof(DES_LONG) != sizeof(long))
-                        size="int";
-                else
-                        size="long";
-                snprintf(buf, sizeof(buf), "des(%s,%s,%s,%s)",
-                               ptr, risc, unroll, size);
-                init=0;
-                }
-        return(buf);
-}
-void des_ecb_encrypt(des_cblock *input, des_cblock *output, 
-                    des_key_schedule ks, int enc)
-{
-       register DES_LONG l;
-       DES_LONG ll[2];
-       const unsigned char *in=&(*input)[0];
-       unsigned char *out = &(*output)[0];
-
-       c2l(in,l); ll[0]=l;
-       c2l(in,l); ll[1]=l;
-       des_encrypt1(ll,ks,enc);
-       l=ll[0]; l2c(l,out);
-       l=ll[1]; l2c(l,out);
-       l=ll[0]=ll[1]=0;
-}
-
-void des_ecb3_encrypt(des_cblock *input, des_cblock *output,
-             des_key_schedule ks1, des_key_schedule ks2, des_key_schedule ks3,
-             int enc)
-{
-       register DES_LONG l0,l1;
-       DES_LONG ll[2];
-       const unsigned char *in = &(*input)[0];
-       unsigned char *out = &(*output)[0];
-       c2l(in,l0); 
-       c2l(in,l1);
-       ll[0]=l0; 
-       ll[1]=l1;
-
-       if (enc)
-               des_encrypt3(ll,ks1,ks2,ks3);
-       else
-               des_decrypt3(ll,ks1,ks2,ks3);
-
-       l0=ll[0];
-       l1=ll[1];
-       l2c(l0,out);
-       l2c(l1,out);
-}
diff --git a/bsd/crypto/des/des_enc.c b/bsd/crypto/des/des_enc.c
deleted file mode 100644 (file)
index f5e269e..0000000
+++ /dev/null
@@ -1,294 +0,0 @@
-/*     $KAME: kame/kame/sys/crypto/des/des_enc.c,v 1.1 2001/09/10 04:03:58 itojun Exp $        */
-/*     $FreeBSD: src/sys/crypto/des/des_enc.c,v 1.1.2.1 2002/03/26 10:12:24 ume Exp $  */
-
-/* crypto/des/des_enc.c */
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- * 
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to.  The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- * 
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    "This product includes cryptographic software written by
- *     Eric Young (eay@cryptsoft.com)"
- *    The word 'cryptographic' can be left out if the rouines from the library
- *    being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from 
- *    the apps directory (application code) you must include an acknowledgement:
- *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- * 
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * 
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <sys/types.h>
-#include <crypto/des/des_locl.h>
-
-extern const DES_LONG des_SPtrans[8][64];
-
-void des_encrypt1(DES_LONG *data, des_key_schedule ks, int enc)
-{
-       register DES_LONG l,r,t,u;
-#ifdef DES_PTR
-       register const unsigned char *des_SP=(const unsigned char *)des_SPtrans;
-#endif
-#ifndef DES_UNROLL
-       register int i;
-#endif
-       register DES_LONG *s;
-
-       r=data[0];
-       l=data[1];
-
-       IP(r,l);
-       /* Things have been modified so that the initial rotate is
-        * done outside the loop.  This required the
-        * des_SPtrans values in sp.h to be rotated 1 bit to the right.
-        * One perl script later and things have a 5% speed up on a sparc2.
-        * Thanks to Richard Outerbridge <71755.204@CompuServe.COM>
-        * for pointing this out. */
-       /* clear the top bits on machines with 8byte longs */
-       /* shift left by 2 */
-       r=ROTATE(r,29)&0xffffffffL;
-       l=ROTATE(l,29)&0xffffffffL;
-
-       s=ks->ks.deslong;
-       /* I don't know if it is worth the effort of loop unrolling the
-        * inner loop */
-       if (enc)
-               {
-#ifdef DES_UNROLL
-               D_ENCRYPT(l,r, 0); /*  1 */
-               D_ENCRYPT(r,l, 2); /*  2 */
-               D_ENCRYPT(l,r, 4); /*  3 */
-               D_ENCRYPT(r,l, 6); /*  4 */
-               D_ENCRYPT(l,r, 8); /*  5 */
-               D_ENCRYPT(r,l,10); /*  6 */
-               D_ENCRYPT(l,r,12); /*  7 */
-               D_ENCRYPT(r,l,14); /*  8 */
-               D_ENCRYPT(l,r,16); /*  9 */
-               D_ENCRYPT(r,l,18); /*  10 */
-               D_ENCRYPT(l,r,20); /*  11 */
-               D_ENCRYPT(r,l,22); /*  12 */
-               D_ENCRYPT(l,r,24); /*  13 */
-               D_ENCRYPT(r,l,26); /*  14 */
-               D_ENCRYPT(l,r,28); /*  15 */
-               D_ENCRYPT(r,l,30); /*  16 */
-#else
-               for (i=0; i<32; i+=8)
-                       {
-                       D_ENCRYPT(l,r,i+0); /*  1 */
-                       D_ENCRYPT(r,l,i+2); /*  2 */
-                       D_ENCRYPT(l,r,i+4); /*  3 */
-                       D_ENCRYPT(r,l,i+6); /*  4 */
-                       }
-#endif
-               }
-       else
-               {
-#ifdef DES_UNROLL
-               D_ENCRYPT(l,r,30); /* 16 */
-               D_ENCRYPT(r,l,28); /* 15 */
-               D_ENCRYPT(l,r,26); /* 14 */
-               D_ENCRYPT(r,l,24); /* 13 */
-               D_ENCRYPT(l,r,22); /* 12 */
-               D_ENCRYPT(r,l,20); /* 11 */
-               D_ENCRYPT(l,r,18); /* 10 */
-               D_ENCRYPT(r,l,16); /*  9 */
-               D_ENCRYPT(l,r,14); /*  8 */
-               D_ENCRYPT(r,l,12); /*  7 */
-               D_ENCRYPT(l,r,10); /*  6 */
-               D_ENCRYPT(r,l, 8); /*  5 */
-               D_ENCRYPT(l,r, 6); /*  4 */
-               D_ENCRYPT(r,l, 4); /*  3 */
-               D_ENCRYPT(l,r, 2); /*  2 */
-               D_ENCRYPT(r,l, 0); /*  1 */
-#else
-               for (i=30; i>0; i-=8)
-                       {
-                       D_ENCRYPT(l,r,i-0); /* 16 */
-                       D_ENCRYPT(r,l,i-2); /* 15 */
-                       D_ENCRYPT(l,r,i-4); /* 14 */
-                       D_ENCRYPT(r,l,i-6); /* 13 */
-                       }
-#endif
-               }
-
-       /* rotate and clear the top bits on machines with 8byte longs */
-       l=ROTATE(l,3)&0xffffffffL;
-       r=ROTATE(r,3)&0xffffffffL;
-
-       FP(r,l);
-       data[0]=l;
-       data[1]=r;
-       l=r=t=u=0;
-}
-
-void des_encrypt2(DES_LONG *data, des_key_schedule ks, int enc)
-{
-       register DES_LONG l,r,t,u;
-#ifdef DES_PTR
-       register const unsigned char *des_SP=(const unsigned char *)des_SPtrans;
-#endif
-#ifndef DES_UNROLL
-       register int i;
-#endif
-       register DES_LONG *s;
-
-       r=data[0];
-       l=data[1];
-
-       /* Things have been modified so that the initial rotate is
-        * done outside the loop.  This required the
-        * des_SPtrans values in sp.h to be rotated 1 bit to the right.
-        * One perl script later and things have a 5% speed up on a sparc2.
-        * Thanks to Richard Outerbridge <71755.204@CompuServe.COM>
-        * for pointing this out. */
-       /* clear the top bits on machines with 8byte longs */
-       r=ROTATE(r,29)&0xffffffffL;
-       l=ROTATE(l,29)&0xffffffffL;
-
-       s=ks->ks.deslong;
-       /* I don't know if it is worth the effort of loop unrolling the
-        * inner loop */
-       if (enc)
-               {
-#ifdef DES_UNROLL
-               D_ENCRYPT(l,r, 0); /*  1 */
-               D_ENCRYPT(r,l, 2); /*  2 */
-               D_ENCRYPT(l,r, 4); /*  3 */
-               D_ENCRYPT(r,l, 6); /*  4 */
-               D_ENCRYPT(l,r, 8); /*  5 */
-               D_ENCRYPT(r,l,10); /*  6 */
-               D_ENCRYPT(l,r,12); /*  7 */
-               D_ENCRYPT(r,l,14); /*  8 */
-               D_ENCRYPT(l,r,16); /*  9 */
-               D_ENCRYPT(r,l,18); /*  10 */
-               D_ENCRYPT(l,r,20); /*  11 */
-               D_ENCRYPT(r,l,22); /*  12 */
-               D_ENCRYPT(l,r,24); /*  13 */
-               D_ENCRYPT(r,l,26); /*  14 */
-               D_ENCRYPT(l,r,28); /*  15 */
-               D_ENCRYPT(r,l,30); /*  16 */
-#else
-               for (i=0; i<32; i+=8)
-                       {
-                       D_ENCRYPT(l,r,i+0); /*  1 */
-                       D_ENCRYPT(r,l,i+2); /*  2 */
-                       D_ENCRYPT(l,r,i+4); /*  3 */
-                       D_ENCRYPT(r,l,i+6); /*  4 */
-                       }
-#endif
-               }
-       else
-               {
-#ifdef DES_UNROLL
-               D_ENCRYPT(l,r,30); /* 16 */
-               D_ENCRYPT(r,l,28); /* 15 */
-               D_ENCRYPT(l,r,26); /* 14 */
-               D_ENCRYPT(r,l,24); /* 13 */
-               D_ENCRYPT(l,r,22); /* 12 */
-               D_ENCRYPT(r,l,20); /* 11 */
-               D_ENCRYPT(l,r,18); /* 10 */
-               D_ENCRYPT(r,l,16); /*  9 */
-               D_ENCRYPT(l,r,14); /*  8 */
-               D_ENCRYPT(r,l,12); /*  7 */
-               D_ENCRYPT(l,r,10); /*  6 */
-               D_ENCRYPT(r,l, 8); /*  5 */
-               D_ENCRYPT(l,r, 6); /*  4 */
-               D_ENCRYPT(r,l, 4); /*  3 */
-               D_ENCRYPT(l,r, 2); /*  2 */
-               D_ENCRYPT(r,l, 0); /*  1 */
-#else
-               for (i=30; i>0; i-=8)
-                       {
-                       D_ENCRYPT(l,r,i-0); /* 16 */
-                       D_ENCRYPT(r,l,i-2); /* 15 */
-                       D_ENCRYPT(l,r,i-4); /* 14 */
-                       D_ENCRYPT(r,l,i-6); /* 13 */
-                       }
-#endif
-               }
-       /* rotate and clear the top bits on machines with 8byte longs */
-       data[0]=ROTATE(l,3)&0xffffffffL;
-       data[1]=ROTATE(r,3)&0xffffffffL;
-       l=r=t=u=0;
-}
-
-void des_encrypt3(DES_LONG *data, des_key_schedule ks1, des_key_schedule ks2,
-            des_key_schedule ks3)
-{
-       register DES_LONG l,r;
-
-       l=data[0];
-       r=data[1];
-       IP(l,r);
-       data[0]=l;
-       data[1]=r;
-       des_encrypt2((DES_LONG *)data,ks1,DES_ENCRYPT);
-       des_encrypt2((DES_LONG *)data,ks2,DES_DECRYPT);
-       des_encrypt2((DES_LONG *)data,ks3,DES_ENCRYPT);
-       l=data[0];
-       r=data[1];
-       FP(r,l);
-       data[0]=l;
-       data[1]=r;
-}
-
-void des_decrypt3(DES_LONG *data, des_key_schedule ks1, des_key_schedule ks2,
-            des_key_schedule ks3)
-{
-       register DES_LONG l,r;
-
-       l=data[0];
-       r=data[1];
-       IP(l,r);
-       data[0]=l;
-       data[1]=r;
-       des_encrypt2((DES_LONG *)data,ks3,DES_DECRYPT);
-       des_encrypt2((DES_LONG *)data,ks2,DES_ENCRYPT);
-       des_encrypt2((DES_LONG *)data,ks1,DES_DECRYPT);
-       l=data[0];
-       r=data[1];
-       FP(r,l);
-       data[0]=l;
-       data[1]=r;
-}
diff --git a/bsd/crypto/des/des_locl.h b/bsd/crypto/des/des_locl.h
deleted file mode 100644 (file)
index e894cb2..0000000
+++ /dev/null
@@ -1,364 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/des/des_locl.h,v 1.2.2.3 2002/03/26 10:12:25 ume Exp $ */
-/*     $KAME: des_locl.h,v 1.7 2001/09/10 04:03:58 itojun Exp $        */
-
-/* crypto/des/des_locl.h */
-/* Copyright (C) 1995-1997 Eric Young (eay@mincom.oz.au)
- * All rights reserved.
- *
- * This file is part of an SSL implementation written
- * by Eric Young (eay@mincom.oz.au).
- * The implementation was written so as to conform with Netscapes SSL
- * specification.  This library and applications are
- * FREE FOR COMMERCIAL AND NON-COMMERCIAL USE
- * as long as the following conditions are aheared to.
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.  If this code is used in a product,
- * Eric Young should be given attribution as the author of the parts used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Eric Young (eay@mincom.oz.au)
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#ifndef HEADER_DES_LOCL_H
-#define HEADER_DES_LOCL_H
-
-#include <crypto/des/des.h>
-
-#undef DES_PTR
-
-#ifdef __STDC__
-#undef NOPROTO
-#endif
-
-#define ITERATIONS 16
-#define HALF_ITERATIONS 8
-
-/* used in des_read and des_write */
-#define MAXWRITE       (1024*16)
-#define BSIZE          (MAXWRITE+4)
-
-#define c2l(c,l)       (l =((DES_LONG)(*((c)++)))    , \
-                        l|=((DES_LONG)(*((c)++)))<< 8L, \
-                        l|=((DES_LONG)(*((c)++)))<<16L, \
-                        l|=((DES_LONG)(*((c)++)))<<24L)
-
-/* NOTE - c is not incremented as per c2l */
-#define c2ln(c,l1,l2,n)        { \
-                       c+=n; \
-                       l1=l2=0; \
-                       switch (n) { \
-                       case 8: l2 =((DES_LONG)(*(--(c))))<<24L; \
-                       case 7: l2|=((DES_LONG)(*(--(c))))<<16L; \
-                       case 6: l2|=((DES_LONG)(*(--(c))))<< 8L; \
-                       case 5: l2|=((DES_LONG)(*(--(c))));     \
-                       case 4: l1 =((DES_LONG)(*(--(c))))<<24L; \
-                       case 3: l1|=((DES_LONG)(*(--(c))))<<16L; \
-                       case 2: l1|=((DES_LONG)(*(--(c))))<< 8L; \
-                       case 1: l1|=((DES_LONG)(*(--(c))));     \
-                               } \
-                       }
-
-#define l2c(l,c)       (*((c)++)=(unsigned char)(((l)     )&0xff), \
-                        *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>24L)&0xff))
-
-/* replacements for htonl and ntohl since I have no idea what to do
- * when faced with machines with 8 byte longs. */
-#define HDRSIZE 4
-
-#define n2l(c,l)       (l =((DES_LONG)(*((c)++)))<<24L, \
-                        l|=((DES_LONG)(*((c)++)))<<16L, \
-                        l|=((DES_LONG)(*((c)++)))<< 8L, \
-                        l|=((DES_LONG)(*((c)++))))
-
-#define l2n(l,c)       (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
-                        *((c)++)=(unsigned char)(((l)     )&0xff))
-
-/* NOTE - c is not incremented as per l2c */
-#define l2cn(l1,l2,c,n)        { \
-                       c+=n; \
-                       switch (n) { \
-                       case 8: *(--(c))=(unsigned char)(((l2)>>24L)&0xff); \
-                       case 7: *(--(c))=(unsigned char)(((l2)>>16L)&0xff); \
-                       case 6: *(--(c))=(unsigned char)(((l2)>> 8L)&0xff); \
-                       case 5: *(--(c))=(unsigned char)(((l2)     )&0xff); \
-                       case 4: *(--(c))=(unsigned char)(((l1)>>24L)&0xff); \
-                       case 3: *(--(c))=(unsigned char)(((l1)>>16L)&0xff); \
-                       case 2: *(--(c))=(unsigned char)(((l1)>> 8L)&0xff); \
-                       case 1: *(--(c))=(unsigned char)(((l1)     )&0xff); \
-                               } \
-                       }
-
-#define        ROTATE(a,n)     (((a)>>(n))+((a)<<(32-(n))))
-
-#define LOAD_DATA_tmp(a,b,c,d,e,f) LOAD_DATA(a,b,c,d,e,f,g)
-#define LOAD_DATA(R,S,u,t,E0,E1,tmp) \
-       u=R^s[S  ]; \
-       t=R^s[S+1]
-
-/* The changes to this macro may help or hinder, depending on the
- * compiler and the achitecture.  gcc2 always seems to do well :-).
- * Inspired by Dana How <how@isl.stanford.edu>
- * DO NOT use the alternative version on machines with 8 byte longs.
- * It does not seem to work on the Alpha, even when DES_LONG is 4
- * bytes, probably an issue of accessing non-word aligned objects :-( */
-#ifdef DES_PTR
-
-/* It recently occurred to me that 0^0^0^0^0^0^0 == 0, so there
- * is no reason to not xor all the sub items together.  This potentially
- * saves a register since things can be xored directly into L */
-
-#if defined(DES_RISC1) || defined(DES_RISC2)
-#ifdef DES_RISC1
-#define D_ENCRYPT(LL,R,S) { \
-        unsigned int u1,u2,u3; \
-        LOAD_DATA(R,S,u,t,E0,E1,u1); \
-        u2=(int)u>>8L; \
-        u1=(int)u&0xfc; \
-        u2&=0xfc; \
-        t=ROTATE(t,4); \
-        u>>=16L; \
-        LL^= *(const DES_LONG *)(des_SP      +u1); \
-        LL^= *(const DES_LONG *)(des_SP+0x200+u2); \
-        u3=(int)(u>>8L); \
-        u1=(int)u&0xfc; \
-        u3&=0xfc; \
-        LL^= *(const DES_LONG *)(des_SP+0x400+u1); \
-        LL^= *(const DES_LONG *)(des_SP+0x600+u3); \
-        u2=(int)t>>8L; \
-        u1=(int)t&0xfc; \
-        u2&=0xfc; \
-        t>>=16L; \
-        LL^= *(const DES_LONG *)(des_SP+0x100+u1); \
-        LL^= *(const DES_LONG *)(des_SP+0x300+u2); \
-        u3=(int)t>>8L; \
-        u1=(int)t&0xfc; \
-        u3&=0xfc; \
-        LL^= *(const DES_LONG *)(des_SP+0x500+u1); \
-        LL^= *(const DES_LONG *)(des_SP+0x700+u3); }
-#endif /* DES_RISC1 */
-#ifdef DES_RISC2
-#define D_ENCRYPT(LL,R,S) { \
-        unsigned int u1,u2,s1,s2; \
-        LOAD_DATA(R,S,u,t,E0,E1,u1); \
-        u2=(int)u>>8L; \
-        u1=(int)u&0xfc; \
-        u2&=0xfc; \
-        t=ROTATE(t,4); \
-        LL^= *(const DES_LONG *)(des_SP      +u1); \
-        LL^= *(const DES_LONG *)(des_SP+0x200+u2); \
-        s1=(int)(u>>16L); \
-        s2=(int)(u>>24L); \
-        s1&=0xfc; \
-        s2&=0xfc; \
-        LL^= *(const DES_LONG *)(des_SP+0x400+s1); \
-        LL^= *(const DES_LONG *)(des_SP+0x600+s2); \
-        u2=(int)t>>8L; \
-        u1=(int)t&0xfc; \
-        u2&=0xfc; \
-        LL^= *(const DES_LONG *)(des_SP+0x100+u1); \
-        LL^= *(const DES_LONG *)(des_SP+0x300+u2); \
-        s1=(int)(t>>16L); \
-        s2=(int)(t>>24L); \
-        s1&=0xfc; \
-        s2&=0xfc; \
-        LL^= *(const DES_LONG *)(des_SP+0x400+s1); \
-        LL^= *(const DES_LONG *)(des_SP+0x600+s2); \
-        u2=(int)t>>8L; \
-        u1=(int)t&0xfc; \
-        u2&=0xfc; \
-        LL^= *(const DES_LONG *)(des_SP+0x100+u1); \
-        LL^= *(const DES_LONG *)(des_SP+0x300+u2); \
-        s1=(int)(t>>16L); \
-        s2=(int)(t>>24L); \
-        s1&=0xfc; \
-        s2&=0xfc; \
-        LL^= *(const DES_LONG *)(des_SP+0x500+s1); \
-        LL^= *(const DES_LONG *)(des_SP+0x700+s2); }
-#endif /* DES_RISC2 */
-#else  /* DES_RISC1 || DES_RISC2 */
-#define D_ENCRYPT(LL,R,S) { \
-       LOAD_DATA_tmp(R,S,u,t,E0,E1); \
-       t=ROTATE(t,4); \
-       LL^= \
-       *(const DES_LONG *)(des_SP      +((u     )&0xfc))^ \
-       *(const DES_LONG *)(des_SP+0x200+((u>> 8L)&0xfc))^ \
-       *(const DES_LONG *)(des_SP+0x400+((u>>16L)&0xfc))^ \
-       *(const DES_LONG *)(des_SP+0x600+((u>>24L)&0xfc))^ \
-       *(const DES_LONG *)(des_SP+0x100+((t     )&0xfc))^ \
-       *(const DES_LONG *)(des_SP+0x300+((t>> 8L)&0xfc))^ \
-       *(const DES_LONG *)(des_SP+0x500+((t>>16L)&0xfc))^ \
-       *(const DES_LONG *)(des_SP+0x700+((t>>24L)&0xfc)); }
-#endif /* DES_RISC1 || DES_RISC2 */
-#else /* original version */
-
-#if defined(DES_RISC1) || defined(DES_RISC2)
-#ifdef DES_RISC1
-#define D_ENCRYPT(LL,R,S) {\
-       unsigned int u1,u2,u3; \
-       LOAD_DATA(R,S,u,t,E0,E1,u1); \
-       u>>=2L; \
-       t=ROTATE(t,6); \
-       u2=(int)u>>8L; \
-       u1=(int)u&0x3f; \
-       u2&=0x3f; \
-       u>>=16L; \
-       LL^=des_SPtrans[0][u1]; \
-       LL^=des_SPtrans[2][u2]; \
-       u3=(int)u>>8L; \
-       u1=(int)u&0x3f; \
-       u3&=0x3f; \
-       LL^=des_SPtrans[4][u1]; \
-       LL^=des_SPtrans[6][u3]; \
-       u2=(int)t>>8L; \
-       u1=(int)t&0x3f; \
-       u2&=0x3f; \
-       t>>=16L; \
-       LL^=des_SPtrans[1][u1]; \
-       LL^=des_SPtrans[3][u2]; \
-       u3=(int)t>>8L; \
-       u1=(int)t&0x3f; \
-       u3&=0x3f; \
-       LL^=des_SPtrans[5][u1]; \
-       LL^=des_SPtrans[7][u3]; }
-#endif /* DES_RISC1 */
-#ifdef DES_RISC2
-#define D_ENCRYPT(LL,R,S) {\
-       unsigned int u1,u2,s1,s2; \
-       LOAD_DATA(R,S,u,t,E0,E1,u1); \
-       u>>=2L; \
-       t=ROTATE(t,6); \
-       u2=(int)u>>8L; \
-       u1=(int)u&0x3f; \
-       u2&=0x3f; \
-       LL^=des_SPtrans[0][u1]; \
-       LL^=des_SPtrans[2][u2]; \
-       s1=(int)u>>16L; \
-       s2=(int)u>>24L; \
-       s1&=0x3f; \
-       s2&=0x3f; \
-       LL^=des_SPtrans[4][s1]; \
-       LL^=des_SPtrans[6][s2]; \
-       u2=(int)t>>8L; \
-       u1=(int)t&0x3f; \
-       u2&=0x3f; \
-       LL^=des_SPtrans[1][u1]; \
-       LL^=des_SPtrans[3][u2]; \
-       s1=(int)t>>16; \
-       s2=(int)t>>24L; \
-       s1&=0x3f; \
-       s2&=0x3f; \
-       LL^=des_SPtrans[5][s1]; \
-       LL^=des_SPtrans[7][s2]; }
-#endif /* DES_RISC2 */
-
-#else /* DES_RISC1 || DES_RISC2 */
-
-#define D_ENCRYPT(LL,R,S) {\
-       LOAD_DATA_tmp(R,S,u,t,E0,E1); \
-       t=ROTATE(t,4); \
-       LL^=\
-               des_SPtrans[0][(u>> 2L)&0x3f]^ \
-               des_SPtrans[2][(u>>10L)&0x3f]^ \
-               des_SPtrans[4][(u>>18L)&0x3f]^ \
-               des_SPtrans[6][(u>>26L)&0x3f]^ \
-               des_SPtrans[1][(t>> 2L)&0x3f]^ \
-               des_SPtrans[3][(t>>10L)&0x3f]^ \
-               des_SPtrans[5][(t>>18L)&0x3f]^ \
-               des_SPtrans[7][(t>>26L)&0x3f]; }
-#endif /* DES_RISC1 || DES_RISC2 */
-#endif /* DES_PTR */
-
-       /* IP and FP
-        * The problem is more of a geometric problem that random bit fiddling.
-        0  1  2  3  4  5  6  7      62 54 46 38 30 22 14  6
-        8  9 10 11 12 13 14 15      60 52 44 36 28 20 12  4
-       16 17 18 19 20 21 22 23      58 50 42 34 26 18 10  2
-       24 25 26 27 28 29 30 31  to  56 48 40 32 24 16  8  0
-
-       32 33 34 35 36 37 38 39      63 55 47 39 31 23 15  7
-       40 41 42 43 44 45 46 47      61 53 45 37 29 21 13  5
-       48 49 50 51 52 53 54 55      59 51 43 35 27 19 11  3
-       56 57 58 59 60 61 62 63      57 49 41 33 25 17  9  1
-
-       The output has been subject to swaps of the form
-       0 1 -> 3 1 but the odd and even bits have been put into
-       2 3    2 0
-       different words.  The main trick is to remember that
-       t=((l>>size)^r)&(mask);
-       r^=t;
-       l^=(t<<size);
-       can be used to swap and move bits between words.
-
-       So l =  0  1  2  3  r = 16 17 18 19
-               4  5  6  7      20 21 22 23
-               8  9 10 11      24 25 26 27
-              12 13 14 15      28 29 30 31
-       becomes (for size == 2 and mask == 0x3333)
-          t =   2^16  3^17 -- --   l =  0  1 16 17  r =  2  3 18 19
-                6^20  7^21 -- --        4  5 20 21       6  7 22 23
-               10^24 11^25 -- --        8  9 24 25      10 11 24 25
-               14^28 15^29 -- --       12 13 28 29      14 15 28 29
-
-       Thanks for hints from Richard Outerbridge - he told me IP&FP
-       could be done in 15 xor, 10 shifts and 5 ands.
-       When I finally started to think of the problem in 2D
-       I first got ~42 operations without xors.  When I remembered
-       how to use xors :-) I got it to its final state.
-       */
-#define PERM_OP(a,b,t,n,m) ((t)=((((a)>>(n))^(b))&(m)),\
-       (b)^=(t),\
-       (a)^=((t)<<(n)))
-
-#define IP(l,r) \
-       { \
-       register DES_LONG tt; \
-       PERM_OP(r,l,tt, 4,0x0f0f0f0fL); \
-       PERM_OP(l,r,tt,16,0x0000ffffL); \
-       PERM_OP(r,l,tt, 2,0x33333333L); \
-       PERM_OP(l,r,tt, 8,0x00ff00ffL); \
-       PERM_OP(r,l,tt, 1,0x55555555L); \
-       }
-
-#define FP(l,r) \
-       { \
-       register DES_LONG tt; \
-       PERM_OP(l,r,tt, 1,0x55555555L); \
-       PERM_OP(r,l,tt, 8,0x00ff00ffL); \
-       PERM_OP(l,r,tt, 2,0x33333333L); \
-       PERM_OP(r,l,tt,16,0x0000ffffL); \
-       PERM_OP(l,r,tt, 4,0x0f0f0f0fL); \
-       }
-#endif
diff --git a/bsd/crypto/des/des_setkey.c b/bsd/crypto/des/des_setkey.c
deleted file mode 100644 (file)
index 5b7f5de..0000000
+++ /dev/null
@@ -1,232 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/des/des_setkey.c,v 1.1.2.4 2002/03/26 10:12:25 ume Exp $       */
-/*     $KAME: des_setkey.c,v 1.7 2001/09/10 04:03:58 itojun Exp $      */
-
-/* crypto/des/set_key.c */
-/* Copyright (C) 1995-1996 Eric Young (eay@mincom.oz.au)
- * All rights reserved.
- *
- * This file is part of an SSL implementation written
- * by Eric Young (eay@mincom.oz.au).
- * The implementation was written so as to conform with Netscapes SSL
- * specification.  This library and applications are
- * FREE FOR COMMERCIAL AND NON-COMMERCIAL USE
- * as long as the following conditions are aheared to.
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.  If this code is used in a product,
- * Eric Young should be given attribution as the author of the parts used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Eric Young (eay@mincom.oz.au)
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-/* set_key.c v 1.4 eay 24/9/91
- * 1.4 Speed up by 400% :-)
- * 1.3 added register declarations.
- * 1.2 unrolled make_key_sched a bit more
- * 1.1 added norm_expand_bits
- * 1.0 First working version
- */
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <crypto/des/des_locl.h>
-#include <crypto/des/podd.h>
-#include <crypto/des/sk.h>
-
-int des_check_key=0;
-
-void des_set_odd_parity(des_cblock *key)
-{
-       int i;
-
-       for (i=0; i<DES_KEY_SZ; i++)
-               (*key)[i]=odd_parity[(*key)[i]];
-}
-
-int des_check_key_parity(des_cblock *key)
-{
-       int i;
-
-       for (i=0; i<DES_KEY_SZ; i++)
-               {
-               if ((*key)[i] != odd_parity[(*key)[i]])
-                       return(0);
-               }
-       return(1);
-}
-
-/* Weak and semi week keys as take from
- * %A D.W. Davies
- * %A W.L. Price
- * %T Security for Computer Networks
- * %I John Wiley & Sons
- * %D 1984
- * Many thanks to smb@ulysses.att.com (Steven Bellovin) for the reference
- * (and actual cblock values).
- */
-#define NUM_WEAK_KEY   16
-static des_cblock weak_keys[NUM_WEAK_KEY]={
-       /* weak keys */
-       {0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01},
-       {0xFE,0xFE,0xFE,0xFE,0xFE,0xFE,0xFE,0xFE},
-       {0x1F,0x1F,0x1F,0x1F,0x0E,0x0E,0x0E,0x0E},
-       {0xE0,0xE0,0xE0,0xE0,0xF1,0xF1,0xF1,0xF1},
-       /* semi-weak keys */
-       {0x01,0xFE,0x01,0xFE,0x01,0xFE,0x01,0xFE},
-       {0xFE,0x01,0xFE,0x01,0xFE,0x01,0xFE,0x01},
-       {0x1F,0xE0,0x1F,0xE0,0x0E,0xF1,0x0E,0xF1},
-       {0xE0,0x1F,0xE0,0x1F,0xF1,0x0E,0xF1,0x0E},
-       {0x01,0xE0,0x01,0xE0,0x01,0xF1,0x01,0xF1},
-       {0xE0,0x01,0xE0,0x01,0xF1,0x01,0xF1,0x01},
-       {0x1F,0xFE,0x1F,0xFE,0x0E,0xFE,0x0E,0xFE},
-       {0xFE,0x1F,0xFE,0x1F,0xFE,0x0E,0xFE,0x0E},
-       {0x01,0x1F,0x01,0x1F,0x01,0x0E,0x01,0x0E},
-       {0x1F,0x01,0x1F,0x01,0x0E,0x01,0x0E,0x01},
-       {0xE0,0xFE,0xE0,0xFE,0xF1,0xFE,0xF1,0xFE},
-       {0xFE,0xE0,0xFE,0xE0,0xFE,0xF1,0xFE,0xF1}};
-
-int des_is_weak_key(des_cblock *key)
-{
-       int i;
-
-       for (i=0; i<NUM_WEAK_KEY; i++)
-               /* Added == 0 to comparison, I obviously don't run
-                * this section very often :-(, thanks to
-                * engineering@MorningStar.Com for the fix
-                * eay 93/06/29
-                * Another problem, I was comparing only the first 4
-                * bytes, 97/03/18 */
-               if (memcmp(weak_keys[i],key,sizeof(des_cblock)) == 0) return(1);
-       return(0);
-}
-
-/* NOW DEFINED IN des_local.h
- * See ecb_encrypt.c for a pseudo description of these macros. 
- * #define PERM_OP(a,b,t,n,m) ((t)=((((a)>>(n))^(b))&(m)),\
- *     (b)^=(t),\
- *     (a)=((a)^((t)<<(n))))
- */
-
-#define HPERM_OP(a,t,n,m) ((t)=((((a)<<(16-(n)))^(a))&(m)),\
-       (a)=(a)^(t)^(t>>(16-(n))))
-
-int des_set_key(des_cblock *key, des_key_schedule schedule)
-{
-       if (des_check_key)
-       {
-               return des_set_key_checked(key, schedule);
-       }
-       else
-       {
-               des_set_key_unchecked(key, schedule);
-               return 0;
-       }
-}
-
-/* return 0 if key parity is odd (correct),
- * return -1 if key parity error,
- * return -2 if illegal weak key.
- */
-int des_set_key_checked(des_cblock *key, des_key_schedule schedule)
-{
-       if (!des_check_key_parity(key))
-               return(-1);
-       if (des_is_weak_key(key))
-               return(-2);
-       des_set_key_unchecked(key, schedule);
-       return 0;
-}
-
-void des_set_key_unchecked(des_cblock *key, des_key_schedule schedule)
-{
-       static int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0};
-       register DES_LONG c,d,t,s,t2;
-       register const unsigned char *in;
-       register DES_LONG *k;
-       register int i;
-
-       k = &schedule->ks.deslong[0];
-       in = &(*key)[0];
-
-       c2l(in,c);
-       c2l(in,d);
-
-       /* do PC1 in 47 simple operations :-)
-        * Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov)
-        * for the inspiration. :-) */
-       PERM_OP (d,c,t,4,0x0f0f0f0fL);
-       HPERM_OP(c,t,-2,0xcccc0000L);
-       HPERM_OP(d,t,-2,0xcccc0000L);
-       PERM_OP (d,c,t,1,0x55555555L);
-       PERM_OP (c,d,t,8,0x00ff00ffL);
-       PERM_OP (d,c,t,1,0x55555555L);
-       d=      (((d&0x000000ffL)<<16L)| (d&0x0000ff00L)     |
-                ((d&0x00ff0000L)>>16L)|((c&0xf0000000L)>>4L));
-       c&=0x0fffffffL;
-
-       for (i=0; i<ITERATIONS; i++)
-       {
-               if (shifts2[i])
-                       { c=((c>>2L)|(c<<26L)); d=((d>>2L)|(d<<26L)); }
-               else
-                       { c=((c>>1L)|(c<<27L)); d=((d>>1L)|(d<<27L)); }
-               c&=0x0fffffffL;
-               d&=0x0fffffffL;
-               /* could be a few less shifts but I am to lazy at this
-               * point in time to investigate */
-               s=      des_skb[0][ (c    )&0x3f                ]|
-                       des_skb[1][((c>> 6L)&0x03)|((c>> 7L)&0x3c)]|
-                       des_skb[2][((c>>13L)&0x0f)|((c>>14L)&0x30)]|
-                       des_skb[3][((c>>20L)&0x01)|((c>>21L)&0x06) |
-                                                       ((c>>22L)&0x38)];
-               t=      des_skb[4][ (d    )&0x3f                ]|
-                       des_skb[5][((d>> 7L)&0x03)|((d>> 8L)&0x3c)]|
-                       des_skb[6][ (d>>15L)&0x3f                ]|
-                       des_skb[7][((d>>21L)&0x0f)|((d>>22L)&0x30)];
-
-               /* table contained 0213 4657 */
-               t2=((t<<16L)|(s&0x0000ffffL))&0xffffffffL;
-               *(k++)=ROTATE(t2,30)&0xffffffffL;
-
-               t2=((s>>16L)|(t&0xffff0000L));
-               *(k++)=ROTATE(t2,26)&0xffffffffL;
-       }
-}
-
-int des_key_sched(des_cblock *key, des_key_schedule schedule)
-{
-       return(des_set_key(key,schedule));
-}
-
-void des_fixup_key_parity(des_cblock *key)
-{
-       des_set_odd_parity(key);
-}
diff --git a/bsd/crypto/des/podd.h b/bsd/crypto/des/podd.h
deleted file mode 100644 (file)
index 61646cc..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/des/podd.h,v 1.1.2.1 2000/07/15 07:14:21 kris Exp $    */
-/*     $KAME: podd.h,v 1.3 2000/03/27 04:36:34 sumikawa Exp $  */
-
-/* crypto/des/podd.h */
-/* Copyright (C) 1995-1996 Eric Young (eay@mincom.oz.au)
- * All rights reserved.
- *
- * This file is part of an SSL implementation written
- * by Eric Young (eay@mincom.oz.au).
- * The implementation was written so as to conform with Netscapes SSL
- * specification.  This library and applications are
- * FREE FOR COMMERCIAL AND NON-COMMERCIAL USE
- * as long as the following conditions are aheared to.
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.  If this code is used in a product,
- * Eric Young should be given attribution as the author of the parts used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Eric Young (eay@mincom.oz.au)
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-static const unsigned char odd_parity[256]={
-  1,  1,  2,  2,  4,  4,  7,  7,  8,  8, 11, 11, 13, 13, 14, 14,
- 16, 16, 19, 19, 21, 21, 22, 22, 25, 25, 26, 26, 28, 28, 31, 31,
- 32, 32, 35, 35, 37, 37, 38, 38, 41, 41, 42, 42, 44, 44, 47, 47,
- 49, 49, 50, 50, 52, 52, 55, 55, 56, 56, 59, 59, 61, 61, 62, 62,
- 64, 64, 67, 67, 69, 69, 70, 70, 73, 73, 74, 74, 76, 76, 79, 79,
- 81, 81, 82, 82, 84, 84, 87, 87, 88, 88, 91, 91, 93, 93, 94, 94,
- 97, 97, 98, 98,100,100,103,103,104,104,107,107,109,109,110,110,
-112,112,115,115,117,117,118,118,121,121,122,122,124,124,127,127,
-128,128,131,131,133,133,134,134,137,137,138,138,140,140,143,143,
-145,145,146,146,148,148,151,151,152,152,155,155,157,157,158,158,
-161,161,162,162,164,164,167,167,168,168,171,171,173,173,174,174,
-176,176,179,179,181,181,182,182,185,185,186,186,188,188,191,191,
-193,193,194,194,196,196,199,199,200,200,203,203,205,205,206,206,
-208,208,211,211,213,213,214,214,217,217,218,218,220,220,223,223,
-224,224,227,227,229,229,230,230,233,233,234,234,236,236,239,239,
-241,241,242,242,244,244,247,247,248,248,251,251,253,253,254,254};
diff --git a/bsd/crypto/des/sk.h b/bsd/crypto/des/sk.h
deleted file mode 100644 (file)
index 6009c11..0000000
+++ /dev/null
@@ -1,196 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/des/sk.h,v 1.1.2.1 2000/07/15 07:14:21 kris Exp $      */
-/*     $KAME: sk.h,v 1.3 2000/03/27 04:36:34 sumikawa Exp $    */
-
-/* crypto/des/sk.h */
-/* Copyright (C) 1995-1996 Eric Young (eay@mincom.oz.au)
- * All rights reserved.
- *
- * This file is part of an SSL implementation written
- * by Eric Young (eay@mincom.oz.au).
- * The implementation was written so as to conform with Netscapes SSL
- * specification.  This library and applications are
- * FREE FOR COMMERCIAL AND NON-COMMERCIAL USE
- * as long as the following conditions are aheared to.
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.  If this code is used in a product,
- * Eric Young should be given attribution as the author of the parts used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Eric Young (eay@mincom.oz.au)
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-static const DES_LONG des_skb[8][64]={
-{
-/* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
-0x00000000L,0x00000010L,0x20000000L,0x20000010L,
-0x00010000L,0x00010010L,0x20010000L,0x20010010L,
-0x00000800L,0x00000810L,0x20000800L,0x20000810L,
-0x00010800L,0x00010810L,0x20010800L,0x20010810L,
-0x00000020L,0x00000030L,0x20000020L,0x20000030L,
-0x00010020L,0x00010030L,0x20010020L,0x20010030L,
-0x00000820L,0x00000830L,0x20000820L,0x20000830L,
-0x00010820L,0x00010830L,0x20010820L,0x20010830L,
-0x00080000L,0x00080010L,0x20080000L,0x20080010L,
-0x00090000L,0x00090010L,0x20090000L,0x20090010L,
-0x00080800L,0x00080810L,0x20080800L,0x20080810L,
-0x00090800L,0x00090810L,0x20090800L,0x20090810L,
-0x00080020L,0x00080030L,0x20080020L,0x20080030L,
-0x00090020L,0x00090030L,0x20090020L,0x20090030L,
-0x00080820L,0x00080830L,0x20080820L,0x20080830L,
-0x00090820L,0x00090830L,0x20090820L,0x20090830L,
-},{
-/* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
-0x00000000L,0x02000000L,0x00002000L,0x02002000L,
-0x00200000L,0x02200000L,0x00202000L,0x02202000L,
-0x00000004L,0x02000004L,0x00002004L,0x02002004L,
-0x00200004L,0x02200004L,0x00202004L,0x02202004L,
-0x00000400L,0x02000400L,0x00002400L,0x02002400L,
-0x00200400L,0x02200400L,0x00202400L,0x02202400L,
-0x00000404L,0x02000404L,0x00002404L,0x02002404L,
-0x00200404L,0x02200404L,0x00202404L,0x02202404L,
-0x10000000L,0x12000000L,0x10002000L,0x12002000L,
-0x10200000L,0x12200000L,0x10202000L,0x12202000L,
-0x10000004L,0x12000004L,0x10002004L,0x12002004L,
-0x10200004L,0x12200004L,0x10202004L,0x12202004L,
-0x10000400L,0x12000400L,0x10002400L,0x12002400L,
-0x10200400L,0x12200400L,0x10202400L,0x12202400L,
-0x10000404L,0x12000404L,0x10002404L,0x12002404L,
-0x10200404L,0x12200404L,0x10202404L,0x12202404L,
-},{
-/* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
-0x00000000L,0x00000001L,0x00040000L,0x00040001L,
-0x01000000L,0x01000001L,0x01040000L,0x01040001L,
-0x00000002L,0x00000003L,0x00040002L,0x00040003L,
-0x01000002L,0x01000003L,0x01040002L,0x01040003L,
-0x00000200L,0x00000201L,0x00040200L,0x00040201L,
-0x01000200L,0x01000201L,0x01040200L,0x01040201L,
-0x00000202L,0x00000203L,0x00040202L,0x00040203L,
-0x01000202L,0x01000203L,0x01040202L,0x01040203L,
-0x08000000L,0x08000001L,0x08040000L,0x08040001L,
-0x09000000L,0x09000001L,0x09040000L,0x09040001L,
-0x08000002L,0x08000003L,0x08040002L,0x08040003L,
-0x09000002L,0x09000003L,0x09040002L,0x09040003L,
-0x08000200L,0x08000201L,0x08040200L,0x08040201L,
-0x09000200L,0x09000201L,0x09040200L,0x09040201L,
-0x08000202L,0x08000203L,0x08040202L,0x08040203L,
-0x09000202L,0x09000203L,0x09040202L,0x09040203L,
-},{
-/* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
-0x00000000L,0x00100000L,0x00000100L,0x00100100L,
-0x00000008L,0x00100008L,0x00000108L,0x00100108L,
-0x00001000L,0x00101000L,0x00001100L,0x00101100L,
-0x00001008L,0x00101008L,0x00001108L,0x00101108L,
-0x04000000L,0x04100000L,0x04000100L,0x04100100L,
-0x04000008L,0x04100008L,0x04000108L,0x04100108L,
-0x04001000L,0x04101000L,0x04001100L,0x04101100L,
-0x04001008L,0x04101008L,0x04001108L,0x04101108L,
-0x00020000L,0x00120000L,0x00020100L,0x00120100L,
-0x00020008L,0x00120008L,0x00020108L,0x00120108L,
-0x00021000L,0x00121000L,0x00021100L,0x00121100L,
-0x00021008L,0x00121008L,0x00021108L,0x00121108L,
-0x04020000L,0x04120000L,0x04020100L,0x04120100L,
-0x04020008L,0x04120008L,0x04020108L,0x04120108L,
-0x04021000L,0x04121000L,0x04021100L,0x04121100L,
-0x04021008L,0x04121008L,0x04021108L,0x04121108L,
-},{
-/* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
-0x00000000L,0x10000000L,0x00010000L,0x10010000L,
-0x00000004L,0x10000004L,0x00010004L,0x10010004L,
-0x20000000L,0x30000000L,0x20010000L,0x30010000L,
-0x20000004L,0x30000004L,0x20010004L,0x30010004L,
-0x00100000L,0x10100000L,0x00110000L,0x10110000L,
-0x00100004L,0x10100004L,0x00110004L,0x10110004L,
-0x20100000L,0x30100000L,0x20110000L,0x30110000L,
-0x20100004L,0x30100004L,0x20110004L,0x30110004L,
-0x00001000L,0x10001000L,0x00011000L,0x10011000L,
-0x00001004L,0x10001004L,0x00011004L,0x10011004L,
-0x20001000L,0x30001000L,0x20011000L,0x30011000L,
-0x20001004L,0x30001004L,0x20011004L,0x30011004L,
-0x00101000L,0x10101000L,0x00111000L,0x10111000L,
-0x00101004L,0x10101004L,0x00111004L,0x10111004L,
-0x20101000L,0x30101000L,0x20111000L,0x30111000L,
-0x20101004L,0x30101004L,0x20111004L,0x30111004L,
-},{
-/* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
-0x00000000L,0x08000000L,0x00000008L,0x08000008L,
-0x00000400L,0x08000400L,0x00000408L,0x08000408L,
-0x00020000L,0x08020000L,0x00020008L,0x08020008L,
-0x00020400L,0x08020400L,0x00020408L,0x08020408L,
-0x00000001L,0x08000001L,0x00000009L,0x08000009L,
-0x00000401L,0x08000401L,0x00000409L,0x08000409L,
-0x00020001L,0x08020001L,0x00020009L,0x08020009L,
-0x00020401L,0x08020401L,0x00020409L,0x08020409L,
-0x02000000L,0x0A000000L,0x02000008L,0x0A000008L,
-0x02000400L,0x0A000400L,0x02000408L,0x0A000408L,
-0x02020000L,0x0A020000L,0x02020008L,0x0A020008L,
-0x02020400L,0x0A020400L,0x02020408L,0x0A020408L,
-0x02000001L,0x0A000001L,0x02000009L,0x0A000009L,
-0x02000401L,0x0A000401L,0x02000409L,0x0A000409L,
-0x02020001L,0x0A020001L,0x02020009L,0x0A020009L,
-0x02020401L,0x0A020401L,0x02020409L,0x0A020409L,
-},{
-/* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
-0x00000000L,0x00000100L,0x00080000L,0x00080100L,
-0x01000000L,0x01000100L,0x01080000L,0x01080100L,
-0x00000010L,0x00000110L,0x00080010L,0x00080110L,
-0x01000010L,0x01000110L,0x01080010L,0x01080110L,
-0x00200000L,0x00200100L,0x00280000L,0x00280100L,
-0x01200000L,0x01200100L,0x01280000L,0x01280100L,
-0x00200010L,0x00200110L,0x00280010L,0x00280110L,
-0x01200010L,0x01200110L,0x01280010L,0x01280110L,
-0x00000200L,0x00000300L,0x00080200L,0x00080300L,
-0x01000200L,0x01000300L,0x01080200L,0x01080300L,
-0x00000210L,0x00000310L,0x00080210L,0x00080310L,
-0x01000210L,0x01000310L,0x01080210L,0x01080310L,
-0x00200200L,0x00200300L,0x00280200L,0x00280300L,
-0x01200200L,0x01200300L,0x01280200L,0x01280300L,
-0x00200210L,0x00200310L,0x00280210L,0x00280310L,
-0x01200210L,0x01200310L,0x01280210L,0x01280310L,
-},{
-/* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
-0x00000000L,0x04000000L,0x00040000L,0x04040000L,
-0x00000002L,0x04000002L,0x00040002L,0x04040002L,
-0x00002000L,0x04002000L,0x00042000L,0x04042000L,
-0x00002002L,0x04002002L,0x00042002L,0x04042002L,
-0x00000020L,0x04000020L,0x00040020L,0x04040020L,
-0x00000022L,0x04000022L,0x00040022L,0x04040022L,
-0x00002020L,0x04002020L,0x00042020L,0x04042020L,
-0x00002022L,0x04002022L,0x00042022L,0x04042022L,
-0x00000800L,0x04000800L,0x00040800L,0x04040800L,
-0x00000802L,0x04000802L,0x00040802L,0x04040802L,
-0x00002800L,0x04002800L,0x00042800L,0x04042800L,
-0x00002802L,0x04002802L,0x00042802L,0x04042802L,
-0x00000820L,0x04000820L,0x00040820L,0x04040820L,
-0x00000822L,0x04000822L,0x00040822L,0x04040822L,
-0x00002820L,0x04002820L,0x00042820L,0x04042820L,
-0x00002822L,0x04002822L,0x00042822L,0x04042822L,
-}};
diff --git a/bsd/crypto/des/spr.h b/bsd/crypto/des/spr.h
deleted file mode 100644 (file)
index e7d8626..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/des/spr.h,v 1.1.2.2 2002/03/26 10:12:25 ume Exp $      */
-/*     $KAME: spr.h,v 1.4 2001/09/10 04:03:58 itojun Exp $     */
-
-/* crypto/des/spr.h */
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- * 
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to.  The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- * 
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    "This product includes cryptographic software written by
- *     Eric Young (eay@cryptsoft.com)"
- *    The word 'cryptographic' can be left out if the rouines from the library
- *    being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from 
- *    the apps directory (application code) you must include an acknowledgement:
- *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- * 
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * 
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-const DES_LONG des_SPtrans[8][64]={
-{
-/* nibble 0 */
-0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
-0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
-0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,
-0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,
-0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,
-0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,
-0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,
-0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,
-0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,
-0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,
-0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,
-0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,
-0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,
-0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,
-0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,
-0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L,
-},{
-/* nibble 1 */
-0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,
-0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,
-0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,
-0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,
-0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,
-0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,
-0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,
-0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,
-0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,
-0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,
-0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,
-0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,
-0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,
-0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,
-0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,
-0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L,
-},{
-/* nibble 2 */
-0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,
-0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,
-0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,
-0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,
-0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,
-0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,
-0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,
-0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,
-0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,
-0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,
-0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,
-0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,
-0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,
-0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,
-0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,
-0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L,
-},{
-/* nibble 3 */
-0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,
-0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,
-0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,
-0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,
-0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,
-0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,
-0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,
-0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,
-0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,
-0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,
-0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,
-0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,
-0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,
-0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,
-0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,
-0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L,
-},{
-/* nibble 4 */
-0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,
-0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,
-0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,
-0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,
-0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,
-0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,
-0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,
-0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,
-0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,
-0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,
-0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,
-0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,
-0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,
-0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,
-0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,
-0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L,
-},{
-/* nibble 5 */
-0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,
-0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,
-0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,
-0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,
-0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,
-0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,
-0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,
-0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,
-0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,
-0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,
-0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,
-0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,
-0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,
-0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,
-0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,
-0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,
-},{
-/* nibble 6 */
-0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,
-0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,
-0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,
-0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,
-0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,
-0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,
-0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,
-0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,
-0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,
-0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,
-0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,
-0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,
-0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,
-0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,
-0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,
-0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,
-},{
-/* nibble 7 */
-0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,
-0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,
-0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,
-0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,
-0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,
-0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,
-0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,
-0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,
-0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,
-0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,
-0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,
-0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,
-0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,
-0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,
-0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,
-0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,
-}};
diff --git a/bsd/crypto/sha2.h b/bsd/crypto/sha2.h
new file mode 100644 (file)
index 0000000..7e1dea8
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * This header file is kept for legacy reasons and may be removed in
+ * future; the interface resides in <libkern/crypto/sha2.h>.
+ */
+#include <libkern/crypto/sha2.h>
diff --git a/bsd/crypto/sha2/Makefile b/bsd/crypto/sha2/Makefile
deleted file mode 100644 (file)
index 4cc93fb..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS = \
-
-INSTINC_SUBDIRS_I386 = \
-
-INSTINC_SUBDIRS_X86_64 = \
-
-INSTINC_SUBDIRS_ARM = \
-
-EXPINC_SUBDIRS = \
-
-EXPINC_SUBDIRS_I386 = \
-
-EXPINC_SUBDIRS_X86_64 = \
-
-EXPINC_SUBDIRS_ARM = \
-
-PRIVATE_DATAFILES = \
-       sha2.h
-
-INSTALL_MI_DIR = crypto
-
-EXPORT_MI_DIR = ${INSTALL_MI_DIR}
-
-INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/bsd/crypto/sha2/intel/sha256.s b/bsd/crypto/sha2/intel/sha256.s
deleted file mode 100644 (file)
index 59353ff..0000000
+++ /dev/null
@@ -1,617 +0,0 @@
-/*
-       This file provides x86_64/i386 hand implementation of the following function
-
-       void SHA256_Transform(SHA256_ctx *ctx, char *data, unsigned int num_blocks);
-
-       which is a C function in sha2.c (from xnu).
-
-       The code 1st probes cpu_capabilities to detect whether ssse3 is supported. If not, it branches to
-       SHA256_Transform_nossse3 (in a separate source file sha256nossse3.s) that was cloned from this file
-       with all ssse3 instructions replaced with sse3 or below instructions.
-
-       sha256 algorithm per block description:
-
-               1. W(0:15) = big-endian (per 4 bytes) loading of input data (64 byte) 
-               2. load 8 digests a-h from ctx->state
-               3. for r = 0:15
-                               T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
-                               d += T1;
-                               h = T1 + Sigma0(a) + Maj(a,b,c)
-                               permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
-               4. for r = 16:63
-                               W[r] = W[r-16] + sigma1(W[r-2]) + W[r-7] + sigma0(W[r-15]);
-                               T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
-                               d += T1;
-                               h = T1 + Sigma0(a) + Maj(a,b,c)
-                               permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
-                               
-       In the assembly implementation: 
-               - a circular window of message schedule W(r:r+15) is updated and stored in xmm0-xmm3
-               - its corresponding W+K(r:r+15) is updated and stored in a stack space circular buffer
-               - the 8 digests (a-h) will be stored in GPR or m32 (all in GPR for x86_64, and some in m32 for i386)
-
-       the implementation per block looks like
-
-       ----------------------------------------------------------------------------
-
-       load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
-       pre_calculate and store W+K(0:15) in stack
-
-       load digests a-h from ctx->state;
-
-       for (r=0;r<48;r+=4) {
-               digests a-h update and permute round r:r+3
-               update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
-       }
-
-       for (r=48;r<64;r+=4) {
-               digests a-h update and permute round r:r+3
-       }
-
-       ctx->states += digests a-h;
-
-       ----------------------------------------------------------------------------
-
-       our implementation (allows multiple blocks per call) pipelines the loading of W/WK of a future block 
-       into the last 16 rounds of its previous block:
-
-       ----------------------------------------------------------------------------
-
-       load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
-       pre_calculate and store W+K(0:15) in stack
-
-L_loop:
-
-       load digests a-h from ctx->state;
-
-       for (r=0;r<48;r+=4) {
-               digests a-h update and permute round r:r+3
-               update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
-       }
-
-       num_block--;
-       if (num_block==0)       jmp L_last_block;
-
-       for (r=48;r<64;r+=4) {
-               digests a-h update and permute round r:r+3
-               load W([r:r+3]%16) (big-endian per 4 bytes) into xmm0:xmm3 
-               pre_calculate and store W+K([r:r+3]%16) in stack
-       }
-
-       ctx->states += digests a-h;
-
-       jmp     L_loop;
-
-L_last_block:
-
-       for (r=48;r<64;r+=4) {
-               digests a-h update and permute round r:r+3
-       }
-
-       ctx->states += digests a-h;
-
-       ------------------------------------------------------------------------
-
-       Apple CoreOS vector & numerics
-       cclee 8-3-10
-*/
-
-#if defined    KERNEL
-#include <i386/cpu_capabilities.h>
-#else
-#include <System/i386/cpu_capabilities.h>
-#endif
-
-       // associate variables with registers or memory
-
-#if defined    (__x86_64__)
-       #define sp                      %rsp
-       #define ctx                     %rdi
-       #define data            %rsi
-       #define num_blocks      %rdx
-
-       #define a                       %r8d
-       #define b                       %r9d
-       #define c                       %r10d
-       #define d                       %r11d
-       #define e                       %r12d
-       #define f                       %r13d
-       #define g                       %r14d
-       #define h                       %r15d
-
-       #define K                       %rbx
-       #define stack_size      (8+16*8+16+64)  // 8 (align) + xmm0:xmm7 + L_aligned_bswap + WK(0:15)
-
-       #define L_aligned_bswap 64(sp)          // bswap : big-endian loading of 4-byte words
-       #define xmm_save        80(sp)                  // starting address for xmm save/restore
-#else
-       #define sp      %esp
-       #define stack_size      (12+16*8+16+16+64)      // 12 (align) + xmm0:xmm7 + 16 (c,f,h,K) + L_aligned_bswap + WK(0:15)
-       #define ctx_addr        20+stack_size(sp)       // ret_addr + 4 registers = 20, 1st caller argument
-       #define data_addr       24+stack_size(sp)       // 2nd caller argument
-       #define num_blocks      28+stack_size(sp)       // 3rd caller argument
-
-       #define a       %ebx
-       #define b       %edx
-       #define c       64(sp)
-       #define d       %ebp
-       #define e       %esi
-       #define f       68(sp)
-       #define g       %edi
-       #define h       72(sp)
-
-       #define K       76(sp)                                  // pointer to K256[] table
-       #define L_aligned_bswap 80(sp)          // bswap : big-endian loading of 4-byte words
-       #define xmm_save        96(sp)                  // starting address for xmm save/restore
-#endif
-
-       // 2 local variables
-       #define t       %eax
-       #define s       %ecx
-
-       // a window (16 words) of message scheule
-       #define W0      %xmm0
-       #define W1      %xmm1
-       #define W2      %xmm2
-       #define W3      %xmm3
-
-       // circular buffer for WK[(r:r+15)%16]
-       #define WK(x)   (x&15)*4(sp)
-
-// #define Ch(x,y,z)   (((x) & (y)) ^ ((~(x)) & (z)))
-
-       .macro Ch
-       mov             $0, t           // x
-       mov             $0, s           // x
-       not             t                       // ~x
-       and             $1, s           // x & y
-       and             $2, t           // ~x & z
-       xor             s, t            // t = ((x) & (y)) ^ ((~(x)) & (z));
-       .endm
-
-// #define Maj(x,y,z)  (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
-
-       .macro  Maj
-       mov             $0, t           // x
-       mov             $1, s           // y
-       and             s, t            // x&y
-       and             $2, s           // y&z
-       xor             s, t            // (x&y) ^ (y&z)
-       mov             $2, s           // z
-       and             $0, s           // (x&z)
-       xor             s, t            // t = (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 
-       .endm
-
-/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
-// #define R(b,x)      ((x) >> (b))
-/* 32-bit Rotate-right (used in SHA-256): */
-// #define S32(b,x)    (((x) >> (b)) | ((x) << (32 - (b))))
-
-// #define sigma0_256(x)   (S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
-
-       // performs sigma0_256 on 4 words on an xmm registers
-       // use xmm6/xmm7 as intermediate registers
-       .macro  sigma0
-       movdqa  $0, %xmm6
-       movdqa  $0, %xmm7
-       psrld   $$3, $0                 // SHR3(x)
-       psrld   $$7, %xmm6              // part of ROTR7
-       pslld   $$14, %xmm7             // part of ROTR18
-       pxor    %xmm6, $0
-       pxor    %xmm7, $0
-       psrld   $$11, %xmm6             // part of ROTR18
-       pslld   $$11, %xmm7             // part of ROTR7
-       pxor    %xmm6, $0
-       pxor    %xmm7, $0
-       .endm
-
-// #define sigma1_256(x)   (S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))
-
-       // performs sigma1_256 on 4 words on an xmm registers
-       // use xmm6/xmm7 as intermediate registers
-       .macro  sigma1
-       movdqa  $0, %xmm6
-       movdqa  $0, %xmm7
-       psrld   $$10, $0                // SHR10(x)
-       psrld   $$17, %xmm6             // part of ROTR17
-       pxor    %xmm6, $0
-       pslld   $$13, %xmm7             // part of ROTR19
-       pxor    %xmm7, $0
-       psrld   $$2, %xmm6              // part of ROTR19
-       pxor    %xmm6, $0
-       pslld   $$2, %xmm7              // part of ROTR17
-       pxor    %xmm7, $0
-       .endm
-
-// #define Sigma0_256(x)   (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
-
-       .macro  Sigma0
-       mov             $0, t                   // x
-       mov             $0, s                   // x
-       ror             $$2, t                  // S32(2,  (x))
-       ror             $$13, s                 // S32(13,  (x))
-       xor             s, t                    // S32(2,  (x)) ^ S32(13, (x))
-       ror             $$9, s                  // S32(22,  (x))
-       xor             s, t                    // t = (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
-       .endm
-
-// #define Sigma1_256(x)   (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
-
-       .macro  Sigma1
-       mov             $0, s                   // x
-       ror             $$6, s                  // S32(6,  (x))
-       mov             s, t                    // S32(6,  (x))
-       ror             $$5, s                  // S32(11, (x))
-       xor             s, t                    // S32(6,  (x)) ^ S32(11, (x))
-       ror             $$14, s                 // S32(25, (x))
-       xor             s, t                    // t = (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
-       .endm
-
-       // per round digests update
-       .macro  round
-       Sigma1  $4                              // t = T1
-       add             t, $7                   // use h to store h+Sigma1(e)
-       Ch              $4, $5, $6              // t = Ch (e, f, g);
-       add             $7, t                   // t = h+Sigma1(e)+Ch(e,f,g);
-       add             WK($8), t               // h = T1
-       add             t, $3                   // d += T1;
-       mov             t, $7                   // h = T1
-       Sigma0  $0                              // t = Sigma0(a);
-       add             t, $7                   // h = T1 + Sigma0(a);
-       Maj             $0, $1, $2              // t = Maj(a,b,c)
-       add             t, $7                   // h = T1 + Sigma0(a) + Maj(a,b,c);                     
-       .endm
-
-       // per 4 rounds digests update and permutation
-       // permutation is absorbed by rotating the roles of digests a-h
-       .macro  rounds
-       round   $0, $1, $2, $3, $4, $5, $6, $7, 0+$8
-       round   $7, $0, $1, $2, $3, $4, $5, $6, 1+$8
-       round   $6, $7, $0, $1, $2, $3, $4, $5, 2+$8
-       round   $5, $6, $7, $0, $1, $2, $3, $4, 3+$8
-       .endm
-
-       // update the message schedule W and W+K (4 rounds) 16 rounds ahead in the future 
-       .macro  message_schedule
-
-       // 4 32-bit K256 words in xmm5
-#if defined    (__x86_64__)
-       movdqu  (K), %xmm5
-#else
-       mov             K, t
-       movdqu  (t), %xmm5 
-#endif 
-       add             $$16, K                         // K points to next K256 word for next iteration
-       movdqa  $1, %xmm4                       // W7:W4
-       palignr $$4, $0, %xmm4          // W4:W1
-       sigma0  %xmm4                           // sigma0(W4:W1)
-       movdqa  $3, %xmm6                       // W15:W12
-       paddd   %xmm4, $0                       // $0 = W3:W0 + sigma0(W4:W1) 
-       palignr $$4, $2, %xmm6          // W12:W9
-       paddd   %xmm6, $0                       // $0 = W12:W9 + sigma0(W4:W1) + W3:W0  
-       movdqa  $3, %xmm4                       // W15:W12
-       psrldq  $$8, %xmm4                      // 0,0,W15,W14  
-       sigma1  %xmm4                           // sigma1(0,0,W15,W14)
-       paddd   %xmm4, $0                       // sigma1(0,0,W15,W14) + W12:W9 + sigma0(W4:W1) + W3:W0
-       movdqa  $0, %xmm4                       // W19-sigma1(W17), W18-sigma1(W16), W17, W16
-       pslldq  $$8, %xmm4                      // W17, W16, 0, 0
-       sigma1  %xmm4                           // sigma1(W17,W16,0,0)
-       paddd   %xmm4, $0                       // W19:W16
-       paddd   $0, %xmm5                       // WK
-       movdqa  %xmm5, WK($4)
-       .endm
-
-       // this macro is used in the last 16 rounds of a current block
-       // it reads the next message (16 4-byte words), load it into 4 words W[r:r+3], computes WK[r:r+3]
-       // and save into stack to prepare for next block
-
-       .macro  update_W_WK
-#if defined (__x86_64__)
-       movdqu  $0*16(data), $1         // read 4 4-byte words
-       pshufb  L_aligned_bswap, $1     // big-endian of each 4-byte word, W[r:r+3]
-       movdqu  $0*16(K), %xmm4         // K[r:r+3]
-#else
-       mov             data_addr, t
-       movdqu  $0*16(t), $1            // read 4 4-byte words
-       pshufb  L_aligned_bswap, $1     // big-endian of each 4-byte word, W[r:r+3]
-       mov             K, t
-       movdqu  $0*16(t), %xmm4         // K[r:r+3]
-#endif
-       paddd   $1, %xmm4                       // WK[r:r+3]
-       movdqa  %xmm4, WK($0*4)         // save WK[r:r+3] into stack circular buffer
-       .endm
-
-       .text
-
-#if defined (__x86_64__) || defined (__i386__)
-
-       .globl  _SHA256_Transform
-
-_SHA256_Transform:
-
-
-       // detect SSSE3 and dispatch appropriate code branch
-       #if defined __x86_64__
-        movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
-        mov     (%rax), %eax                                    // %eax = __cpu_capabilities
-    #else       // i386
-        #if defined KERNEL
-            leal    __cpu_capabilities, %eax                    // %eax -> __cpu_capabilities
-            mov     (%eax), %eax                                // %eax = __cpu_capabilities
-        #else
-            mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
-        #endif
-    #endif
-    test    $(kHasSupplementalSSE3), %eax
-    je      _SHA256_Transform_nossse3                              // branch to no-ssse3 code
-
-       // push callee-saved registers
-#if defined    (__x86_64__)
-       push    %rbp
-       push    %rbx
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-#else
-    push    %ebp
-       push    %ebx
-    push    %esi
-    push    %edi
-#endif
-
-       // allocate stack space
-       sub             $stack_size, sp
-
-       // if kernel code, save used xmm registers
-#if    KERNEL
-       movdqa  %xmm0, 0*16+xmm_save
-       movdqa  %xmm1, 1*16+xmm_save
-       movdqa  %xmm2, 2*16+xmm_save
-       movdqa  %xmm3, 3*16+xmm_save
-       movdqa  %xmm4, 4*16+xmm_save
-       movdqa  %xmm5, 5*16+xmm_save
-       movdqa  %xmm6, 6*16+xmm_save
-       movdqa  %xmm7, 7*16+xmm_save
-#endif
-
-       // set up bswap parameters in the aligned stack space and pointer to table K256[]
-#if defined (__x86_64__)
-       lea             _K256(%rip), K
-       lea             L_bswap(%rip), %rax
-       movdqa  (%rax), %xmm0
-#else
-       lea             _K256, t
-       mov             t, K
-       lea             L_bswap, %eax
-       movdqa  (%eax), %xmm0
-#endif
-       movdqa  %xmm0, L_aligned_bswap  
-
-       // load W[0:15] into xmm0-xmm3
-#if defined (__x86_64__)
-       movdqu  0*16(data), W0
-       movdqu  1*16(data), W1
-       movdqu  2*16(data), W2
-       movdqu  3*16(data), W3
-       add             $64, data
-#else
-       mov             data_addr, t
-       movdqu  0*16(t), W0
-       movdqu  1*16(t), W1
-       movdqu  2*16(t), W2
-       movdqu  3*16(t), W3
-       add             $64, data_addr
-#endif
-       pshufb  L_aligned_bswap, W0
-       pshufb  L_aligned_bswap, W1
-       pshufb  L_aligned_bswap, W2
-       pshufb  L_aligned_bswap, W3
-
-       // compute WK[0:15] and save in stack
-#if defined (__x86_64__)
-       movdqu  0*16(K), %xmm4  
-       movdqu  1*16(K), %xmm5
-       movdqu  2*16(K), %xmm6  
-       movdqu  3*16(K), %xmm7
-#else
-       mov             K, t
-       movdqu  0*16(t), %xmm4  
-       movdqu  1*16(t), %xmm5
-       movdqu  2*16(t), %xmm6  
-       movdqu  3*16(t), %xmm7
-#endif
-       add             $64, K
-       paddd   %xmm0, %xmm4
-       paddd   %xmm1, %xmm5
-       paddd   %xmm2, %xmm6
-       paddd   %xmm3, %xmm7
-       movdqa  %xmm4, WK(0)
-       movdqa  %xmm5, WK(4)
-       movdqa  %xmm6, WK(8)
-       movdqa  %xmm7, WK(12)
-
-L_loop:
-
-       // digests a-h = ctx->states;
-#if defined (__x86_64__)
-       mov             0*4(ctx), a
-       mov             1*4(ctx), b
-       mov             2*4(ctx), c
-       mov             3*4(ctx), d
-       mov             4*4(ctx), e
-       mov             5*4(ctx), f
-       mov             6*4(ctx), g
-       mov             7*4(ctx), h
-#else
-       mov             ctx_addr, t
-       mov     0*4(t), a
-       mov     1*4(t), b
-       mov     2*4(t), s
-       mov             s, c
-       mov     3*4(t), d
-       mov     4*4(t), e
-       mov     5*4(t), s
-       mov             s, f
-       mov     6*4(t), g
-       mov     7*4(t), s
-       mov             s, h
-#endif
-
-       // rounds 0:47 interleaved with W/WK update for rounds 16:63
-       rounds  a, b, c, d, e, f, g, h, 0
-       message_schedule W0,W1,W2,W3,16
-       rounds  e, f, g, h, a, b, c, d, 4 
-       message_schedule W1,W2,W3,W0,20
-       rounds  a, b, c, d, e, f, g, h, 8
-       message_schedule W2,W3,W0,W1,24
-       rounds  e, f, g, h, a, b, c, d, 12 
-       message_schedule W3,W0,W1,W2,28
-       rounds  a, b, c, d, e, f, g, h, 16
-       message_schedule W0,W1,W2,W3,32
-       rounds  e, f, g, h, a, b, c, d, 20 
-       message_schedule W1,W2,W3,W0,36
-       rounds  a, b, c, d, e, f, g, h, 24
-       message_schedule W2,W3,W0,W1,40
-       rounds  e, f, g, h, a, b, c, d, 28 
-       message_schedule W3,W0,W1,W2,44
-       rounds  a, b, c, d, e, f, g, h, 32
-       message_schedule W0,W1,W2,W3,48
-       rounds  e, f, g, h, a, b, c, d, 36 
-       message_schedule W1,W2,W3,W0,52
-       rounds  a, b, c, d, e, f, g, h, 40
-       message_schedule W2,W3,W0,W1,56
-       rounds  e, f, g, h, a, b, c, d, 44 
-       message_schedule W3,W0,W1,W2,60
-
-       // revert K to the beginning of K256[]
-#if defined __x86_64__
-       sub             $256, K
-#else
-       subl    $256, K
-#endif
-
-       sub             $1, num_blocks                          // num_blocks--
-       je              L_final_block                           // if final block, wrap up final rounds
-
-       // rounds 48:63 interleaved with W/WK initialization for next block rounds 0:15 
-       rounds  a, b, c, d, e, f, g, h, 48
-       update_W_WK     0, W0
-       rounds  e, f, g, h, a, b, c, d, 52 
-       update_W_WK     1, W1
-       rounds  a, b, c, d, e, f, g, h, 56
-       update_W_WK     2, W2
-       rounds  e, f, g, h, a, b, c, d, 60 
-       update_W_WK     3, W3
-
-       add             $64, K
-#if defined (__x86_64__)
-       add             $64, data
-#else
-       add             $64, data_addr
-#endif
-
-       // ctx->states += digests a-h
-#if    defined (__x86_64__)
-       add             a, 0*4(ctx)
-       add             b, 1*4(ctx)
-       add             c, 2*4(ctx)
-       add             d, 3*4(ctx)
-       add             e, 4*4(ctx)
-       add             f, 5*4(ctx)
-       add             g, 6*4(ctx)
-       add             h, 7*4(ctx)
-#else
-       mov             ctx_addr, t
-       add             a, 0*4(t)
-       add             b, 1*4(t)
-       mov             c, s
-       add             s, 2*4(t)
-       add             d, 3*4(t)
-       add             e, 4*4(t)
-       mov             f, s
-       add             s, 5*4(t)
-       add             g, 6*4(t)
-       mov             h, s
-       add             s, 7*4(t)
-#endif
-
-       jmp             L_loop                          // branch for next block
-
-       // wrap up digest update round 48:63 for final block
-L_final_block:
-       rounds  a, b, c, d, e, f, g, h, 48
-       rounds  e, f, g, h, a, b, c, d, 52 
-       rounds  a, b, c, d, e, f, g, h, 56
-       rounds  e, f, g, h, a, b, c, d, 60 
-
-       // ctx->states += digests a-h
-#if    defined (__x86_64__)
-       add             a, 0*4(ctx)
-       add             b, 1*4(ctx)
-       add             c, 2*4(ctx)
-       add             d, 3*4(ctx)
-       add             e, 4*4(ctx)
-       add             f, 5*4(ctx)
-       add             g, 6*4(ctx)
-       add             h, 7*4(ctx)
-#else
-       mov             ctx_addr, t
-       add             a, 0*4(t)
-       add             b, 1*4(t)
-       mov             c, s
-       add             s, 2*4(t)
-       add             d, 3*4(t)
-       add             e, 4*4(t)
-       mov             f, s
-       add             s, 5*4(t)
-       add             g, 6*4(t)
-       mov             h, s
-       add             s, 7*4(t)
-#endif
-
-       // if kernel, restore xmm0-xmm7
-#if    KERNEL
-       movdqa  0*16+xmm_save, %xmm0
-       movdqa  1*16+xmm_save, %xmm1
-       movdqa  2*16+xmm_save, %xmm2
-       movdqa  3*16+xmm_save, %xmm3
-       movdqa  4*16+xmm_save, %xmm4
-       movdqa  5*16+xmm_save, %xmm5
-       movdqa  6*16+xmm_save, %xmm6
-       movdqa  7*16+xmm_save, %xmm7
-#endif
-
-       // free allocated stack memory
-       add             $stack_size, sp
-
-       // restore callee-saved registers
-#if defined (__x86_64__)
-       pop             %r15
-       pop             %r14
-       pop             %r13
-       pop             %r12
-       pop             %rbx
-       pop             %rbp
-#else
-    pop                %edi
-    pop                %esi
-       pop             %ebx
-    pop                %ebp
-#endif
-
-       // return
-       ret
-
-
-       .const
-       .align  4, 0x90
-
-L_bswap:
-    .long   0x00010203
-    .long   0x04050607
-    .long   0x08090a0b
-    .long   0x0c0d0e0f
-
-#endif         // x86_64/i386
-
diff --git a/bsd/crypto/sha2/intel/sha256nossse3.s b/bsd/crypto/sha2/intel/sha256nossse3.s
deleted file mode 100644 (file)
index b4dd0a0..0000000
+++ /dev/null
@@ -1,649 +0,0 @@
-/*
-       This file provides x86_64/i386 hand implementation of the following function
-
-       void SHA256_Transform(SHA256_ctx *ctx, char *data, unsigned int num_blocks);
-
-       which is a C function in sha2.c (from xnu).
-
-       The code SHA256_Transform_nossse3 is a clone of SHA256_Transform
-       with all ssse3 instructions replaced with sse3 or below instructions.
-
-       For performance reason, this function should not be called directly. This file should be working
-       together with the one that implements SHA256_Transform. There, cpu_capabilities is probed to detect
-       ssse3. If ssse3 is not supported, the execution will be branched to this no-ssse3-specific function.
-
-       sha256 algorithm per block description:
-
-               1. W(0:15) = big-endian (per 4 bytes) loading of input data (64 byte) 
-               2. load 8 digests a-h from ctx->state
-               3. for r = 0:15
-                               T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
-                               d += T1;
-                               h = T1 + Sigma0(a) + Maj(a,b,c)
-                               permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
-               4. for r = 16:63
-                               W[r] = W[r-16] + sigma1(W[r-2]) + W[r-7] + sigma0(W[r-15]);
-                               T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
-                               d += T1;
-                               h = T1 + Sigma0(a) + Maj(a,b,c)
-                               permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
-                               
-       In the assembly implementation: 
-               - a circular window of message schedule W(r:r+15) is updated and stored in xmm0-xmm3
-               - its corresponding W+K(r:r+15) is updated and stored in a stack space circular buffer
-               - the 8 digests (a-h) will be stored in GPR or m32 (all in GPR for x86_64, and some in m32 for i386)
-
-       the implementation per block looks like
-
-       ----------------------------------------------------------------------------
-
-       load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
-       pre_calculate and store W+K(0:15) in stack
-
-       load digests a-h from ctx->state;
-
-       for (r=0;r<48;r+=4) {
-               digests a-h update and permute round r:r+3
-               update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
-       }
-
-       for (r=48;r<64;r+=4) {
-               digests a-h update and permute round r:r+3
-       }
-
-       ctx->states += digests a-h;
-
-       ----------------------------------------------------------------------------
-
-       our implementation (allows multiple blocks per call) pipelines the loading of W/WK of a future block 
-       into the last 16 rounds of its previous block:
-
-       ----------------------------------------------------------------------------
-
-       load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
-       pre_calculate and store W+K(0:15) in stack
-
-L_loop:
-
-       load digests a-h from ctx->state;
-
-       for (r=0;r<48;r+=4) {
-               digests a-h update and permute round r:r+3
-               update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
-       }
-
-       num_block--;
-       if (num_block==0)       jmp L_last_block;
-
-       for (r=48;r<64;r+=4) {
-               digests a-h update and permute round r:r+3
-               load W([r:r+3]%16) (big-endian per 4 bytes) into xmm0:xmm3 
-               pre_calculate and store W+K([r:r+3]%16) in stack
-       }
-
-       ctx->states += digests a-h;
-
-       jmp     L_loop;
-
-L_last_block:
-
-       for (r=48;r<64;r+=4) {
-               digests a-h update and permute round r:r+3
-       }
-
-       ctx->states += digests a-h;
-
-       ------------------------------------------------------------------------
-
-       Apple CoreOS vector & numerics
-       cclee 8-3-10
-*/
-
-#if defined    KERNEL
-#include <i386/cpu_capabilities.h>
-#else
-#include <System/i386/cpu_capabilities.h>
-#endif
-
-       // associate variables with registers or memory
-
-#if defined    (__x86_64__)
-       #define sp                      %rsp
-       #define ctx                     %rdi
-       #define data            %rsi
-       #define num_blocks      %rdx
-
-       #define a                       %r8d
-       #define b                       %r9d
-       #define c                       %r10d
-       #define d                       %r11d
-       #define e                       %r12d
-       #define f                       %r13d
-       #define g                       %r14d
-       #define h                       %r15d
-
-       #define K                       %rbx
-       #define stack_size      (8+16*8+16+64)  // 8 (align) + xmm0:xmm7 + L_aligned_bswap + WK(0:15)
-
-       #define xmm_save        80(sp)                  // starting address for xmm save/restore
-#else
-       #define sp      %esp
-       #define stack_size      (12+16*8+16+16+64)      // 12 (align) + xmm0:xmm7 + 16 (c,f,h,K) + L_aligned_bswap + WK(0:15)
-       #define ctx_addr        20+stack_size(sp)       // ret_addr + 4 registers = 20, 1st caller argument
-       #define data_addr       24+stack_size(sp)       // 2nd caller argument
-       #define num_blocks      28+stack_size(sp)       // 3rd caller argument
-
-       #define a       %ebx
-       #define b       %edx
-       #define c       64(sp)
-       #define d       %ebp
-       #define e       %esi
-       #define f       68(sp)
-       #define g       %edi
-       #define h       72(sp)
-
-       #define K       76(sp)                                  // pointer to K256[] table
-       #define xmm_save        96(sp)                  // starting address for xmm save/restore
-#endif
-
-       // 2 local variables
-       #define t       %eax
-       #define s       %ecx
-
-       // a window (16 words) of message scheule
-       #define W0      %xmm0
-       #define W1      %xmm1
-       #define W2      %xmm2
-       #define W3      %xmm3
-
-       // circular buffer for WK[(r:r+15)%16]
-       #define WK(x)   (x&15)*4(sp)
-
-// #define Ch(x,y,z)   (((x) & (y)) ^ ((~(x)) & (z)))
-
-       .macro Ch
-       mov             $0, t           // x
-       mov             $0, s           // x
-       not             t                       // ~x
-       and             $1, s           // x & y
-       and             $2, t           // ~x & z
-       xor             s, t            // t = ((x) & (y)) ^ ((~(x)) & (z));
-       .endm
-
-// #define Maj(x,y,z)  (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
-
-       .macro  Maj
-       mov             $0, t           // x
-       mov             $1, s           // y
-       and             s, t            // x&y
-       and             $2, s           // y&z
-       xor             s, t            // (x&y) ^ (y&z)
-       mov             $2, s           // z
-       and             $0, s           // (x&z)
-       xor             s, t            // t = (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 
-       .endm
-
-/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
-// #define R(b,x)      ((x) >> (b))
-/* 32-bit Rotate-right (used in SHA-256): */
-// #define S32(b,x)    (((x) >> (b)) | ((x) << (32 - (b))))
-
-// #define sigma0_256(x)   (S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
-
-       // performs sigma0_256 on 4 words on an xmm registers
-       // use xmm6/xmm7 as intermediate registers
-       .macro  sigma0
-       movdqa  $0, %xmm6
-       movdqa  $0, %xmm7
-       psrld   $$3, $0                 // SHR3(x)
-       psrld   $$7, %xmm6              // part of ROTR7
-       pslld   $$14, %xmm7             // part of ROTR18
-       pxor    %xmm6, $0
-       pxor    %xmm7, $0
-       psrld   $$11, %xmm6             // part of ROTR18
-       pslld   $$11, %xmm7             // part of ROTR7
-       pxor    %xmm6, $0
-       pxor    %xmm7, $0
-       .endm
-
-// #define sigma1_256(x)   (S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))
-
-       // performs sigma1_256 on 4 words on an xmm registers
-       // use xmm6/xmm7 as intermediate registers
-       .macro  sigma1
-       movdqa  $0, %xmm6
-       movdqa  $0, %xmm7
-       psrld   $$10, $0                // SHR10(x)
-       psrld   $$17, %xmm6             // part of ROTR17
-       pxor    %xmm6, $0
-       pslld   $$13, %xmm7             // part of ROTR19
-       pxor    %xmm7, $0
-       psrld   $$2, %xmm6              // part of ROTR19
-       pxor    %xmm6, $0
-       pslld   $$2, %xmm7              // part of ROTR17
-       pxor    %xmm7, $0
-       .endm
-
-// #define Sigma0_256(x)   (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
-
-       .macro  Sigma0
-       mov             $0, t                   // x
-       mov             $0, s                   // x
-       ror             $$2, t                  // S32(2,  (x))
-       ror             $$13, s                 // S32(13,  (x))
-       xor             s, t                    // S32(2,  (x)) ^ S32(13, (x))
-       ror             $$9, s                  // S32(22,  (x))
-       xor             s, t                    // t = (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
-       .endm
-
-// #define Sigma1_256(x)   (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
-
-       .macro  Sigma1
-       mov             $0, s                   // x
-       ror             $$6, s                  // S32(6,  (x))
-       mov             s, t                    // S32(6,  (x))
-       ror             $$5, s                  // S32(11, (x))
-       xor             s, t                    // S32(6,  (x)) ^ S32(11, (x))
-       ror             $$14, s                 // S32(25, (x))
-       xor             s, t                    // t = (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
-       .endm
-
-       // per round digests update
-       .macro  round
-       Sigma1  $4                              // t = T1
-       add             t, $7                   // use h to store h+Sigma1(e)
-       Ch              $4, $5, $6              // t = Ch (e, f, g);
-       add             $7, t                   // t = h+Sigma1(e)+Ch(e,f,g);
-       add             WK($8), t               // h = T1
-       add             t, $3                   // d += T1;
-       mov             t, $7                   // h = T1
-       Sigma0  $0                              // t = Sigma0(a);
-       add             t, $7                   // h = T1 + Sigma0(a);
-       Maj             $0, $1, $2              // t = Maj(a,b,c)
-       add             t, $7                   // h = T1 + Sigma0(a) + Maj(a,b,c);                     
-       .endm
-
-       // per 4 rounds digests update and permutation
-       // permutation is absorbed by rotating the roles of digests a-h
-       .macro  rounds
-       round   $0, $1, $2, $3, $4, $5, $6, $7, 0+$8
-       round   $7, $0, $1, $2, $3, $4, $5, $6, 1+$8
-       round   $6, $7, $0, $1, $2, $3, $4, $5, 2+$8
-       round   $5, $6, $7, $0, $1, $2, $3, $4, 3+$8
-       .endm
-
-       // update the message schedule W and W+K (4 rounds) 16 rounds ahead in the future 
-       .macro  message_schedule
-
-       // 4 32-bit K256 words in xmm5
-#if defined    (__x86_64__)
-       movdqu  (K), %xmm5
-#else
-       mov             K, t
-       movdqu  (t), %xmm5 
-#endif 
-       add             $$16, K                         // K points to next K256 word for next iteration
-       movdqa  $1, %xmm4                       // W7:W4
-#if 0
-       palignr $$4, $0, %xmm4          // W4:W1
-#else  // no-ssse3 implementation of palignr
-       movdqa  $0, %xmm7
-    pslldq  $$12, %xmm4
-    psrldq  $$4, %xmm7
-    por     %xmm7, %xmm4
-#endif
-       sigma0  %xmm4                           // sigma0(W4:W1)
-       movdqa  $3, %xmm6                       // W15:W12
-       paddd   %xmm4, $0                       // $0 = W3:W0 + sigma0(W4:W1) 
-#if 0
-       palignr $$4, $2, %xmm6          // W12:W9
-#else  // no-ssse3 implementation of palignr
-       movdqa  $2, %xmm7
-    pslldq  $$12, %xmm6
-    psrldq  $$4, %xmm7
-    por     %xmm7, %xmm6
-#endif
-       paddd   %xmm6, $0                       // $0 = W12:W9 + sigma0(W4:W1) + W3:W0  
-       movdqa  $3, %xmm4                       // W15:W12
-       psrldq  $$8, %xmm4                      // 0,0,W15,W14  
-       sigma1  %xmm4                           // sigma1(0,0,W15,W14)
-       paddd   %xmm4, $0                       // sigma1(0,0,W15,W14) + W12:W9 + sigma0(W4:W1) + W3:W0
-       movdqa  $0, %xmm4                       // W19-sigma1(W17), W18-sigma1(W16), W17, W16
-       pslldq  $$8, %xmm4                      // W17, W16, 0, 0
-       sigma1  %xmm4                           // sigma1(W17,W16,0,0)
-       paddd   %xmm4, $0                       // W19:W16
-       paddd   $0, %xmm5                       // WK
-       movdqa  %xmm5, WK($4)
-       .endm
-
-       // this macro is used in the last 16 rounds of a current block
-       // it reads the next message (16 4-byte words), load it into 4 words W[r:r+3], computes WK[r:r+3]
-       // and save into stack to prepare for next block
-
-       .macro  update_W_WK
-#if defined (__x86_64__)
-#if 0
-       movdqu  $0*16(data), $1         // read 4 4-byte words
-       pshufb  L_aligned_bswap, $1     // big-endian of each 4-byte word, W[r:r+3]
-#else  // no-ssse3 implementation
-       mov     0+$0*16(data), s
-    bswap   s
-    mov     s, 0+WK($0*4)
-    mov     4+$0*16(data), s
-    bswap   s
-    mov     s, 4+WK($0*4)
-    mov     8+$0*16(data), s
-    bswap   s
-    mov     s, 8+WK($0*4)
-    mov     12+$0*16(data), s
-    bswap   s
-    mov     s, 12+WK($0*4)
-    movdqa  WK($0*4), $1
-#endif
-       movdqu  $0*16(K), %xmm4         // K[r:r+3]
-#else
-       mov             data_addr, t
-#if 0
-       movdqu  $0*16(t), $1            // read 4 4-byte words
-       pshufb  L_aligned_bswap, $1     // big-endian of each 4-byte word, W[r:r+3]
-#else  // no-ssse3 implementation
-       mov     0+$0*16(t), s
-    bswap   s
-    mov     s, 0+WK($0*4)
-    mov     4+$0*16(t), s
-    bswap   s
-    mov     s, 4+WK($0*4)
-    mov     8+$0*16(t), s
-    bswap   s
-    mov     s, 8+WK($0*4)
-    mov     12+$0*16(t), s
-    bswap   s
-    mov     s, 12+WK($0*4)
-    movdqa  WK($0*4), $1
-#endif
-       mov             K, t
-       movdqu  $0*16(t), %xmm4         // K[r:r+3]
-#endif
-       paddd   $1, %xmm4                       // WK[r:r+3]
-       movdqa  %xmm4, WK($0*4)         // save WK[r:r+3] into stack circular buffer
-       .endm
-
-       .text
-
-#if defined (__x86_64__) || defined (__i386__)
-
-       .globl  _SHA256_Transform_nossse3
-
-_SHA256_Transform_nossse3:
-
-       // push callee-saved registers
-#if defined    (__x86_64__)
-       push    %rbp
-       push    %rbx
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-#else
-    push    %ebp
-       push    %ebx
-    push    %esi
-    push    %edi
-#endif
-
-       // allocate stack space
-       sub             $stack_size, sp
-
-       // if kernel code, save used xmm registers
-#if    KERNEL
-       movdqa  %xmm0, 0*16+xmm_save
-       movdqa  %xmm1, 1*16+xmm_save
-       movdqa  %xmm2, 2*16+xmm_save
-       movdqa  %xmm3, 3*16+xmm_save
-       movdqa  %xmm4, 4*16+xmm_save
-       movdqa  %xmm5, 5*16+xmm_save
-       movdqa  %xmm6, 6*16+xmm_save
-       movdqa  %xmm7, 7*16+xmm_save
-#endif
-
-       // set up pointer to table K256[]
-#if defined (__x86_64__)
-       lea             _K256(%rip), K
-#else
-       lea             _K256, t
-       mov             t, K
-#endif
-
-       // load W[0:15] into xmm0-xmm3
-    .macro  mybswap
-    movl    0+$0*16($1), a
-    movl    4+$0*16($1), b
-    movl    8+$0*16($1), e
-    movl    12+$0*16($1), d
-    bswap   a
-    bswap   b
-    bswap   e
-    bswap   d
-    movl    a, $0*16(sp)
-    movl    b, 4+$0*16(sp)
-    movl    e, 8+$0*16(sp)
-    movl    d, 12+$0*16(sp)
-    .endm
-
-#if defined (__x86_64__)
-    mybswap 0, data
-    mybswap 1, data
-    mybswap 2, data
-    mybswap 3, data
-    add     $64, data
-#else
-    mov     data_addr, t
-    mybswap 0, t
-    mybswap 1, t
-    mybswap 2, t
-    mybswap 3, t
-    add     $64, data_addr
-#endif
-    movdqa  0*16(sp), W0
-    movdqa  1*16(sp), W1
-    movdqa  2*16(sp), W2
-    movdqa  3*16(sp), W3
-
-       // compute WK[0:15] and save in stack
-#if defined (__x86_64__)
-       movdqu  0*16(K), %xmm4  
-       movdqu  1*16(K), %xmm5
-       movdqu  2*16(K), %xmm6  
-       movdqu  3*16(K), %xmm7
-#else
-       mov             K, t
-       movdqu  0*16(t), %xmm4  
-       movdqu  1*16(t), %xmm5
-       movdqu  2*16(t), %xmm6  
-       movdqu  3*16(t), %xmm7
-#endif
-       add             $64, K
-       paddd   %xmm0, %xmm4
-       paddd   %xmm1, %xmm5
-       paddd   %xmm2, %xmm6
-       paddd   %xmm3, %xmm7
-       movdqa  %xmm4, WK(0)
-       movdqa  %xmm5, WK(4)
-       movdqa  %xmm6, WK(8)
-       movdqa  %xmm7, WK(12)
-
-L_loop:
-
-       // digests a-h = ctx->states;
-#if defined (__x86_64__)
-       mov             0*4(ctx), a
-       mov             1*4(ctx), b
-       mov             2*4(ctx), c
-       mov             3*4(ctx), d
-       mov             4*4(ctx), e
-       mov             5*4(ctx), f
-       mov             6*4(ctx), g
-       mov             7*4(ctx), h
-#else
-       mov             ctx_addr, t
-       mov     0*4(t), a
-       mov     1*4(t), b
-       mov     2*4(t), s
-       mov             s, c
-       mov     3*4(t), d
-       mov     4*4(t), e
-       mov     5*4(t), s
-       mov             s, f
-       mov     6*4(t), g
-       mov     7*4(t), s
-       mov             s, h
-#endif
-
-       // rounds 0:47 interleaved with W/WK update for rounds 16:63
-       rounds  a, b, c, d, e, f, g, h, 0
-       message_schedule W0,W1,W2,W3,16
-       rounds  e, f, g, h, a, b, c, d, 4 
-       message_schedule W1,W2,W3,W0,20
-       rounds  a, b, c, d, e, f, g, h, 8
-       message_schedule W2,W3,W0,W1,24
-       rounds  e, f, g, h, a, b, c, d, 12 
-       message_schedule W3,W0,W1,W2,28
-       rounds  a, b, c, d, e, f, g, h, 16
-       message_schedule W0,W1,W2,W3,32
-       rounds  e, f, g, h, a, b, c, d, 20 
-       message_schedule W1,W2,W3,W0,36
-       rounds  a, b, c, d, e, f, g, h, 24
-       message_schedule W2,W3,W0,W1,40
-       rounds  e, f, g, h, a, b, c, d, 28 
-       message_schedule W3,W0,W1,W2,44
-       rounds  a, b, c, d, e, f, g, h, 32
-       message_schedule W0,W1,W2,W3,48
-       rounds  e, f, g, h, a, b, c, d, 36 
-       message_schedule W1,W2,W3,W0,52
-       rounds  a, b, c, d, e, f, g, h, 40
-       message_schedule W2,W3,W0,W1,56
-       rounds  e, f, g, h, a, b, c, d, 44 
-       message_schedule W3,W0,W1,W2,60
-
-       // revert K to the beginning of K256[]
-#if defined __x86_64__
-       sub             $256, K
-#else
-       subl    $256, K
-#endif
-
-       sub             $1, num_blocks                          // num_blocks--
-       je              L_final_block                           // if final block, wrap up final rounds
-
-       // rounds 48:63 interleaved with W/WK initialization for next block rounds 0:15 
-       rounds  a, b, c, d, e, f, g, h, 48
-       update_W_WK     0, W0
-       rounds  e, f, g, h, a, b, c, d, 52 
-       update_W_WK     1, W1
-       rounds  a, b, c, d, e, f, g, h, 56
-       update_W_WK     2, W2
-       rounds  e, f, g, h, a, b, c, d, 60 
-       update_W_WK     3, W3
-
-       add             $64, K
-#if defined (__x86_64__)
-       add             $64, data
-#else
-       add             $64, data_addr
-#endif
-
-       // ctx->states += digests a-h
-#if    defined (__x86_64__)
-       add             a, 0*4(ctx)
-       add             b, 1*4(ctx)
-       add             c, 2*4(ctx)
-       add             d, 3*4(ctx)
-       add             e, 4*4(ctx)
-       add             f, 5*4(ctx)
-       add             g, 6*4(ctx)
-       add             h, 7*4(ctx)
-#else
-       mov             ctx_addr, t
-       add             a, 0*4(t)
-       add             b, 1*4(t)
-       mov             c, s
-       add             s, 2*4(t)
-       add             d, 3*4(t)
-       add             e, 4*4(t)
-       mov             f, s
-       add             s, 5*4(t)
-       add             g, 6*4(t)
-       mov             h, s
-       add             s, 7*4(t)
-#endif
-
-       jmp             L_loop                          // branch for next block
-
-       // wrap up digest update round 48:63 for final block
-L_final_block:
-       rounds  a, b, c, d, e, f, g, h, 48
-       rounds  e, f, g, h, a, b, c, d, 52 
-       rounds  a, b, c, d, e, f, g, h, 56
-       rounds  e, f, g, h, a, b, c, d, 60 
-
-       // ctx->states += digests a-h
-#if    defined (__x86_64__)
-       add             a, 0*4(ctx)
-       add             b, 1*4(ctx)
-       add             c, 2*4(ctx)
-       add             d, 3*4(ctx)
-       add             e, 4*4(ctx)
-       add             f, 5*4(ctx)
-       add             g, 6*4(ctx)
-       add             h, 7*4(ctx)
-#else
-       mov             ctx_addr, t
-       add             a, 0*4(t)
-       add             b, 1*4(t)
-       mov             c, s
-       add             s, 2*4(t)
-       add             d, 3*4(t)
-       add             e, 4*4(t)
-       mov             f, s
-       add             s, 5*4(t)
-       add             g, 6*4(t)
-       mov             h, s
-       add             s, 7*4(t)
-#endif
-
-       // if kernel, restore xmm0-xmm7
-#if    KERNEL
-       movdqa  0*16+xmm_save, %xmm0
-       movdqa  1*16+xmm_save, %xmm1
-       movdqa  2*16+xmm_save, %xmm2
-       movdqa  3*16+xmm_save, %xmm3
-       movdqa  4*16+xmm_save, %xmm4
-       movdqa  5*16+xmm_save, %xmm5
-       movdqa  6*16+xmm_save, %xmm6
-       movdqa  7*16+xmm_save, %xmm7
-#endif
-
-       // free allocated stack memory
-       add             $stack_size, sp
-
-       // restore callee-saved registers
-#if defined (__x86_64__)
-       pop             %r15
-       pop             %r14
-       pop             %r13
-       pop             %r12
-       pop             %rbx
-       pop             %rbp
-#else
-    pop                %edi
-    pop                %esi
-       pop             %ebx
-    pop                %ebp
-#endif
-
-       // return
-       ret
-
-
-#endif         // x86_64/i386
-
diff --git a/bsd/crypto/sha2/sha2.c b/bsd/crypto/sha2/sha2.c
deleted file mode 100644 (file)
index 603d328..0000000
+++ /dev/null
@@ -1,1083 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/sha2/sha2.c,v 1.2.2.2 2002/03/05 08:36:47 ume Exp $    */
-/*     $KAME: sha2.c,v 1.8 2001/11/08 01:07:52 itojun Exp $    */
-
-/*
- * sha2.c
- *
- * Version 1.0.0beta1
- *
- * Written by Aaron D. Gifford <me@aarongifford.com>
- *
- * Copyright 2000 Aaron D. Gifford.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the copyright holder nor the names of contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/systm.h>
-#include <machine/endian.h>
-#include <crypto/sha2/sha2.h>
-
-/*
- * ASSERT NOTE:
- * Some sanity checking code is included using assert().  On my FreeBSD
- * system, this additional code can be removed by compiling with NDEBUG
- * defined.  Check your own systems manpage on assert() to see how to
- * compile WITHOUT the sanity checking code on your system.
- *
- * UNROLLED TRANSFORM LOOP NOTE:
- * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform
- * loop version for the hash transform rounds (defined using macros
- * later in this file).  Either define on the command line, for example:
- *
- *   cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c
- *
- * or define below:
- *
- *   #define SHA2_UNROLL_TRANSFORM
- *
- */
-
-#ifndef assert
-#define assert(x) do {} while(0)
-#endif
-
-/*** SHA-256/384/512 Machine Architecture Definitions *****************/
-/*
- * BYTE_ORDER NOTE:
- *
- * Please make sure that your system defines BYTE_ORDER.  If your
- * architecture is little-endian, make sure it also defines
- * LITTLE_ENDIAN and that the two (BYTE_ORDER and LITTLE_ENDIAN) are
- * equivilent.
- *
- * If your system does not define the above, then you can do so by
- * hand like this:
- *
- *   #define LITTLE_ENDIAN 1234
- *   #define BIG_ENDIAN    4321
- *
- * And for little-endian machines, add:
- *
- *   #define BYTE_ORDER LITTLE_ENDIAN 
- *
- * Or for big-endian machines:
- *
- *   #define BYTE_ORDER BIG_ENDIAN
- *
- * The FreeBSD machine this was written on defines BYTE_ORDER
- * appropriately by including <sys/types.h> (which in turn includes
- * <machine/endian.h> where the appropriate definitions are actually
- * made).
- */
-#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN && BYTE_ORDER != BIG_ENDIAN)
-#error Define BYTE_ORDER to be equal to either LITTLE_ENDIAN or BIG_ENDIAN
-#endif
-
-/*
- * Define the followingsha2_* types to types of the correct length on
- * the native archtecture.   Most BSD systems and Linux define u_intXX_t
- * types.  Machines with very recent ANSI C headers, can use the
- * uintXX_t definintions from inttypes.h by defining SHA2_USE_INTTYPES_H
- * during compile or in the sha.h header file.
- *
- * Machines that support neither u_intXX_t nor inttypes.h's uintXX_t
- * will need to define these three typedefs below (and the appropriate
- * ones in sha.h too) by hand according to their system architecture.
- *
- * Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t
- * types and pointing out recent ANSI C support for uintXX_t in inttypes.h.
- */
-#if 0 /*def SHA2_USE_INTTYPES_H*/
-
-typedef uint8_t  sha2_byte;    /* Exactly 1 byte */
-typedef uint32_t sha2_word32;  /* Exactly 4 bytes */
-typedef uint64_t sha2_word64;  /* Exactly 8 bytes */
-
-#else /* SHA2_USE_INTTYPES_H */
-
-typedef u_int8_t  sha2_byte;   /* Exactly 1 byte */
-typedef u_int32_t sha2_word32; /* Exactly 4 bytes */
-typedef u_int64_t sha2_word64; /* Exactly 8 bytes */
-
-#endif /* SHA2_USE_INTTYPES_H */
-
-
-/*** SHA-256/384/512 Various Length Definitions ***********************/
-/* NOTE: Most of these are in sha2.h */
-#define SHA256_SHORT_BLOCK_LENGTH      (SHA256_BLOCK_LENGTH - 8)
-#define SHA384_SHORT_BLOCK_LENGTH      (SHA384_BLOCK_LENGTH - 16)
-#define SHA512_SHORT_BLOCK_LENGTH      (SHA512_BLOCK_LENGTH - 16)
-
-
-/*** ENDIAN REVERSAL MACROS *******************************************/
-#if BYTE_ORDER == LITTLE_ENDIAN
-#define REVERSE32(w,x) { \
-       sha2_word32 tmp = (w); \
-       tmp = (tmp >> 16) | (tmp << 16); \
-       (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \
-}
-#define REVERSE64(w,x) { \
-       sha2_word64 tmp = (w); \
-       tmp = (tmp >> 32) | (tmp << 32); \
-       tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \
-             ((tmp & 0x00ff00ff00ff00ffULL) << 8); \
-       (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \
-             ((tmp & 0x0000ffff0000ffffULL) << 16); \
-}
-#endif /* BYTE_ORDER == LITTLE_ENDIAN */
-
-/*
- * Macro for incrementally adding the unsigned 64-bit integer n to the
- * unsigned 128-bit integer (represented using a two-element array of
- * 64-bit words):
- */
-#define ADDINC128(w,n) { \
-       (w)[0] += (sha2_word64)(n); \
-       if ((w)[0] < (n)) { \
-               (w)[1]++; \
-       } \
-}
-
-/*** THE SIX LOGICAL FUNCTIONS ****************************************/
-/*
- * Bit shifting and rotation (used by the six SHA-XYZ logical functions:
- *
- *   NOTE:  The naming of R and S appears backwards here (R is a SHIFT and
- *   S is a ROTATION) because the SHA-256/384/512 description document
- *   (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this
- *   same "backwards" definition.
- */
-/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
-#define R(b,x)                 ((x) >> (b))
-/* 32-bit Rotate-right (used in SHA-256): */
-#define S32(b,x)       (((x) >> (b)) | ((x) << (32 - (b))))
-/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */
-#define S64(b,x)       (((x) >> (b)) | ((x) << (64 - (b))))
-
-/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */
-#define Ch(x,y,z)      (((x) & (y)) ^ ((~(x)) & (z)))
-#define Maj(x,y,z)     (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
-
-/* Four of six logical functions used in SHA-256: */
-#define Sigma0_256(x)  (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
-#define Sigma1_256(x)  (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
-#define sigma0_256(x)  (S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
-#define sigma1_256(x)  (S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))
-
-/* Four of six logical functions used in SHA-384 and SHA-512: */
-#define Sigma0_512(x)  (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x)))
-#define Sigma1_512(x)  (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x)))
-#define sigma0_512(x)  (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7,   (x)))
-#define sigma1_512(x)  (S64(19, (x)) ^ S64(61, (x)) ^ R( 6,   (x)))
-
-/*** INTERNAL FUNCTION PROTOTYPES *************************************/
-/* NOTE: These should not be accessed directly from outside this
- * library -- they are intended for private internal visibility/use
- * only.
- */
-void SHA512_Last(SHA512_CTX*);
-#if defined    (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
-void SHA256_Transform(SHA256_CTX*, const sha2_word32*, unsigned int num_blocks);
-#else
-void SHA256_Transform(SHA256_CTX*, const sha2_word32*);
-#endif
-void SHA512_Transform(SHA512_CTX*, const sha2_word64*);
-
-
-/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
-/* Hash constant words K for SHA-256: */
-#if defined    (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
-const sha2_word32 K256[64] = {         // assembly code will need to read this table
-#else
-static const sha2_word32 K256[64] = {
-#endif
-       0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
-       0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
-       0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
-       0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
-       0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
-       0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
-       0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
-       0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
-       0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
-       0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
-       0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
-       0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
-       0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
-       0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
-       0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
-       0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
-};
-
-/* Initial hash value H for SHA-256: */
-static const sha2_word32 sha256_initial_hash_value[8] = {
-       0x6a09e667UL,
-       0xbb67ae85UL,
-       0x3c6ef372UL,
-       0xa54ff53aUL,
-       0x510e527fUL,
-       0x9b05688cUL,
-       0x1f83d9abUL,
-       0x5be0cd19UL
-};
-
-/* Hash constant words K for SHA-384 and SHA-512: */
-static const sha2_word64 K512[80] = {
-       0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
-       0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
-       0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
-       0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
-       0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
-       0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
-       0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
-       0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
-       0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
-       0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
-       0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
-       0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
-       0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
-       0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
-       0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
-       0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
-       0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
-       0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
-       0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
-       0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
-       0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
-       0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
-       0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
-       0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
-       0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
-       0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
-       0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
-       0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
-       0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
-       0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
-       0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
-       0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
-       0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
-       0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
-       0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
-       0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
-       0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
-       0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
-       0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
-       0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
-};
-
-/* Initial hash value H for SHA-384 */
-static const sha2_word64 sha384_initial_hash_value[8] = {
-       0xcbbb9d5dc1059ed8ULL,
-       0x629a292a367cd507ULL,
-       0x9159015a3070dd17ULL,
-       0x152fecd8f70e5939ULL,
-       0x67332667ffc00b31ULL,
-       0x8eb44a8768581511ULL,
-       0xdb0c2e0d64f98fa7ULL,
-       0x47b5481dbefa4fa4ULL
-};
-
-/* Initial hash value H for SHA-512 */
-static const sha2_word64 sha512_initial_hash_value[8] = {
-       0x6a09e667f3bcc908ULL,
-       0xbb67ae8584caa73bULL,
-       0x3c6ef372fe94f82bULL,
-       0xa54ff53a5f1d36f1ULL,
-       0x510e527fade682d1ULL,
-       0x9b05688c2b3e6c1fULL,
-       0x1f83d9abfb41bd6bULL,
-       0x5be0cd19137e2179ULL
-};
-
-/*
- * Constant used by SHA256/384/512_End() functions for converting the
- * digest to a readable hexadecimal character string:
- */
-static const char *sha2_hex_digits = "0123456789abcdef";
-
-
-/*** SHA-256: *********************************************************/
-void SHA256_Init(SHA256_CTX* context) {
-       if (context == (SHA256_CTX*)0) {
-               return;
-       }
-       bcopy(sha256_initial_hash_value, context->state, SHA256_DIGEST_LENGTH);
-       bzero(context->buffer, SHA256_BLOCK_LENGTH);
-       context->bitcount = 0;
-}
-
-#if !(defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__)))
-
-#ifdef SHA2_UNROLL_TRANSFORM
-
-/* Unrolled SHA-256 round macros: */
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-
-#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h)      \
-       REVERSE32(*data++, W256[j]); \
-       T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
-             K256[j] + W256[j]; \
-       (d) += T1; \
-       (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
-       j++
-
-
-#else /* BYTE_ORDER == LITTLE_ENDIAN */
-
-#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h)      \
-       T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
-            K256[j] + (W256[j] = *data++); \
-       (d) += T1; \
-       (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
-       j++
-
-#endif /* BYTE_ORDER == LITTLE_ENDIAN */
-
-#define ROUND256(a,b,c,d,e,f,g,h)      \
-       s0 = W256[(j+1)&0x0f]; \
-       s0 = sigma0_256(s0); \
-       s1 = W256[(j+14)&0x0f]; \
-       s1 = sigma1_256(s1); \
-       T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \
-            (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \
-       (d) += T1; \
-       (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
-       j++
-
-void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
-       sha2_word32     a, b, c, d, e, f, g, h, s0, s1;
-       sha2_word32     T1, *W256;
-       int             j;
-
-       W256 = (sha2_word32*)context->buffer;
-
-       /* Initialize registers with the prev. intermediate value */
-       a = context->state[0];
-       b = context->state[1];
-       c = context->state[2];
-       d = context->state[3];
-       e = context->state[4];
-       f = context->state[5];
-       g = context->state[6];
-       h = context->state[7];
-
-       j = 0;
-       do {
-               /* Rounds 0 to 15 (unrolled): */
-               ROUND256_0_TO_15(a,b,c,d,e,f,g,h);
-               ROUND256_0_TO_15(h,a,b,c,d,e,f,g);
-               ROUND256_0_TO_15(g,h,a,b,c,d,e,f);
-               ROUND256_0_TO_15(f,g,h,a,b,c,d,e);
-               ROUND256_0_TO_15(e,f,g,h,a,b,c,d);
-               ROUND256_0_TO_15(d,e,f,g,h,a,b,c);
-               ROUND256_0_TO_15(c,d,e,f,g,h,a,b);
-               ROUND256_0_TO_15(b,c,d,e,f,g,h,a);
-       } while (j < 16);
-
-       /* Now for the remaining rounds to 64: */
-       do {
-               ROUND256(a,b,c,d,e,f,g,h);
-               ROUND256(h,a,b,c,d,e,f,g);
-               ROUND256(g,h,a,b,c,d,e,f);
-               ROUND256(f,g,h,a,b,c,d,e);
-               ROUND256(e,f,g,h,a,b,c,d);
-               ROUND256(d,e,f,g,h,a,b,c);
-               ROUND256(c,d,e,f,g,h,a,b);
-               ROUND256(b,c,d,e,f,g,h,a);
-       } while (j < 64);
-
-       /* Compute the current intermediate hash value */
-       context->state[0] += a;
-       context->state[1] += b;
-       context->state[2] += c;
-       context->state[3] += d;
-       context->state[4] += e;
-       context->state[5] += f;
-       context->state[6] += g;
-       context->state[7] += h;
-
-       /* Clean up */
-       a = b = c = d = e = f = g = h = T1 = 0;
-}
-
-#else /* SHA2_UNROLL_TRANSFORM */
-
-void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
-       sha2_word32     a, b, c, d, e, f, g, h, s0, s1;
-       sha2_word32     T1, T2, *W256;
-       int             j;
-
-       W256 = (sha2_word32*)context->buffer;
-
-       /* Initialize registers with the prev. intermediate value */
-       a = context->state[0];
-       b = context->state[1];
-       c = context->state[2];
-       d = context->state[3];
-       e = context->state[4];
-       f = context->state[5];
-       g = context->state[6];
-       h = context->state[7];
-
-       j = 0;
-       do {
-#if BYTE_ORDER == LITTLE_ENDIAN
-               /* Copy data while converting to host byte order */
-               REVERSE32(*data++,W256[j]);
-               /* Apply the SHA-256 compression function to update a..h */
-               T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j];
-#else /* BYTE_ORDER == LITTLE_ENDIAN */
-               /* Apply the SHA-256 compression function to update a..h with copy */
-               T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = *data++);
-#endif /* BYTE_ORDER == LITTLE_ENDIAN */
-               T2 = Sigma0_256(a) + Maj(a, b, c);
-               h = g;
-               g = f;
-               f = e;
-               e = d + T1;
-               d = c;
-               c = b;
-               b = a;
-               a = T1 + T2;
-
-               j++;
-       } while (j < 16);
-
-       do {
-               /* Part of the message block expansion: */
-               s0 = W256[(j+1)&0x0f];
-               s0 = sigma0_256(s0);
-               s1 = W256[(j+14)&0x0f]; 
-               s1 = sigma1_256(s1);
-
-               /* Apply the SHA-256 compression function to update a..h */
-               T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + 
-                    (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0);
-               T2 = Sigma0_256(a) + Maj(a, b, c);
-               h = g;
-               g = f;
-               f = e;
-               e = d + T1;
-               d = c;
-               c = b;
-               b = a;
-               a = T1 + T2;
-
-               j++;
-       } while (j < 64);
-
-       /* Compute the current intermediate hash value */
-       context->state[0] += a;
-       context->state[1] += b;
-       context->state[2] += c;
-       context->state[3] += d;
-       context->state[4] += e;
-       context->state[5] += f;
-       context->state[6] += g;
-       context->state[7] += h;
-
-       /* Clean up */
-       a = b = c = d = e = f = g = h = T1 = T2 = 0;
-}
-
-#endif /* SHA2_UNROLL_TRANSFORM */
-
-#endif // defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
-
-void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
-       unsigned int    freespace, usedspace;
-
-       if (len == 0) {
-               /* Calling with no data is valid - we do nothing */
-               return;
-       }
-
-       /* Sanity check: */
-       assert(context != (SHA256_CTX*)0 && data != (sha2_byte*)0);
-
-       usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
-       if (usedspace > 0) {
-               /* Calculate how much free space is available in the buffer */
-               freespace = SHA256_BLOCK_LENGTH - usedspace;
-
-               if (len >= freespace) {
-                       /* Fill the buffer completely and process it */
-                       bcopy(data, &context->buffer[usedspace], freespace);
-                       context->bitcount += freespace << 3;
-                       len -= freespace;
-                       data += freespace;
-#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
-                       SHA256_Transform(context, (sha2_word32*)context->buffer, 1);
-#else
-                       SHA256_Transform(context, (sha2_word32*)context->buffer);
-#endif
-               } else {
-                       /* The buffer is not yet full */
-                       bcopy(data, &context->buffer[usedspace], len);
-                       context->bitcount += len << 3;
-                       /* Clean up: */
-                       usedspace = freespace = 0;
-                       return;
-               }
-       }
-#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
-       {
-               unsigned int    kk = len/SHA256_BLOCK_LENGTH;
-               if (kk>0) {
-                       SHA256_Transform(context, (const sha2_word32*)data, kk);
-                       context->bitcount += (SHA256_BLOCK_LENGTH << 3)*kk;
-                       len -= SHA256_BLOCK_LENGTH*kk;
-                       data += SHA256_BLOCK_LENGTH*kk;
-               }
-       }       
-#else
-       while (len >= SHA256_BLOCK_LENGTH) {
-               /* Process as many complete blocks as we can */
-               SHA256_Transform(context, (const sha2_word32*)data);
-               context->bitcount += SHA256_BLOCK_LENGTH << 3;
-               len -= SHA256_BLOCK_LENGTH;
-               data += SHA256_BLOCK_LENGTH;
-       }
-#endif
-       if (len > 0) {
-               /* There's left-overs, so save 'em */
-               bcopy(data, context->buffer, len);
-               context->bitcount += len << 3;
-       }
-       /* Clean up: */
-       usedspace = freespace = 0;
-}
-
-void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) {
-       sha2_word32     *d = (sha2_word32*)digest;
-       unsigned int    usedspace;
-
-       /* Sanity check: */
-       assert(context != (SHA256_CTX*)0);
-
-       /* If no digest buffer is passed, we don't bother doing this: */
-       if (digest != (sha2_byte*)0) {
-               usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH;
-#if BYTE_ORDER == LITTLE_ENDIAN
-               /* Convert FROM host byte order */
-               REVERSE64(context->bitcount,context->bitcount);
-#endif
-               if (usedspace > 0) {
-                       /* Begin padding with a 1 bit: */
-                       context->buffer[usedspace++] = 0x80;
-
-                       if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) {
-                               /* Set-up for the last transform: */
-                               bzero(&context->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace);
-                       } else {
-                               if (usedspace < SHA256_BLOCK_LENGTH) {
-                                       bzero(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace);
-                               }
-                               /* Do second-to-last transform: */
-#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
-                               SHA256_Transform(context, (sha2_word32*)context->buffer, 1);
-#else
-                               SHA256_Transform(context, (sha2_word32*)context->buffer);
-#endif
-
-                               /* And set-up for the last transform: */
-                               bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
-                       }
-               } else {
-                       /* Set-up for the last transform: */
-                       bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
-
-                       /* Begin padding with a 1 bit: */
-                       *context->buffer = 0x80;
-               }
-               /* Set the bit count: */
-               *(sha2_word64*)&context->buffer[SHA256_SHORT_BLOCK_LENGTH] = context->bitcount;
-
-               /* Final transform: */
-#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
-               SHA256_Transform(context, (sha2_word32*)context->buffer, 1);
-#else
-               SHA256_Transform(context, (sha2_word32*)context->buffer);
-#endif
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-               {
-                       /* Convert TO host byte order */
-                       int     j;
-                       for (j = 0; j < 8; j++) {
-                               REVERSE32(context->state[j],context->state[j]);
-                               *d++ = context->state[j];
-                       }
-               }
-#else
-               bcopy(context->state, d, SHA256_DIGEST_LENGTH);
-#endif
-       }
-
-       /* Clean up state data: */
-       bzero(context, sizeof(context));
-       usedspace = 0;
-}
-
-char *SHA256_End(SHA256_CTX* context, char buffer[]) {
-       sha2_byte       digest[SHA256_DIGEST_LENGTH], *d = digest;
-       int             i;
-
-       /* Sanity check: */
-       assert(context != (SHA256_CTX*)0);
-
-       if (buffer != (char*)0) {
-               SHA256_Final(digest, context);
-
-               for (i = 0; i < SHA256_DIGEST_LENGTH; i++) {
-                       *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
-                       *buffer++ = sha2_hex_digits[*d & 0x0f];
-                       d++;
-               }
-               *buffer = (char)0;
-       } else {
-               bzero(context, sizeof(context));
-       }
-       bzero(digest, SHA256_DIGEST_LENGTH);
-       return buffer;
-}
-
-char* SHA256_Data(const sha2_byte* data, size_t len, char digest[SHA256_DIGEST_STRING_LENGTH]) {
-       SHA256_CTX      context;
-
-       SHA256_Init(&context);
-       SHA256_Update(&context, data, len);
-       return SHA256_End(&context, digest);
-}
-
-
-/*** SHA-512: *********************************************************/
-void SHA512_Init(SHA512_CTX* context) {
-       if (context == (SHA512_CTX*)0) {
-               return;
-       }
-       bcopy(sha512_initial_hash_value, context->state, SHA512_DIGEST_LENGTH);
-       bzero(context->buffer, SHA512_BLOCK_LENGTH);
-       context->bitcount[0] = context->bitcount[1] =  0;
-}
-
-#ifdef SHA2_UNROLL_TRANSFORM
-
-/* Unrolled SHA-512 round macros: */
-#if BYTE_ORDER == LITTLE_ENDIAN
-
-#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h)      \
-       REVERSE64(*data++, W512[j]); \
-       T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \
-             K512[j] + W512[j]; \
-       (d) += T1, \
-       (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)), \
-       j++
-
-
-#else /* BYTE_ORDER == LITTLE_ENDIAN */
-
-#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h)      \
-       T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \
-             K512[j] + (W512[j] = *data++); \
-       (d) += T1; \
-       (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \
-       j++
-
-#endif /* BYTE_ORDER == LITTLE_ENDIAN */
-
-#define ROUND512(a,b,c,d,e,f,g,h)      \
-       s0 = W512[(j+1)&0x0f]; \
-       s0 = sigma0_512(s0); \
-       s1 = W512[(j+14)&0x0f]; \
-       s1 = sigma1_512(s1); \
-       T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + \
-             (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \
-       (d) += T1; \
-       (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \
-       j++
-
-void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) {
-       sha2_word64     a, b, c, d, e, f, g, h, s0, s1;
-       sha2_word64     T1, *W512 = (sha2_word64*)context->buffer;
-       int             j;
-
-       /* Initialize registers with the prev. intermediate value */
-       a = context->state[0];
-       b = context->state[1];
-       c = context->state[2];
-       d = context->state[3];
-       e = context->state[4];
-       f = context->state[5];
-       g = context->state[6];
-       h = context->state[7];
-
-       j = 0;
-       do {
-               ROUND512_0_TO_15(a,b,c,d,e,f,g,h);
-               ROUND512_0_TO_15(h,a,b,c,d,e,f,g);
-               ROUND512_0_TO_15(g,h,a,b,c,d,e,f);
-               ROUND512_0_TO_15(f,g,h,a,b,c,d,e);
-               ROUND512_0_TO_15(e,f,g,h,a,b,c,d);
-               ROUND512_0_TO_15(d,e,f,g,h,a,b,c);
-               ROUND512_0_TO_15(c,d,e,f,g,h,a,b);
-               ROUND512_0_TO_15(b,c,d,e,f,g,h,a);
-       } while (j < 16);
-
-       /* Now for the remaining rounds up to 79: */
-       do {
-               ROUND512(a,b,c,d,e,f,g,h);
-               ROUND512(h,a,b,c,d,e,f,g);
-               ROUND512(g,h,a,b,c,d,e,f);
-               ROUND512(f,g,h,a,b,c,d,e);
-               ROUND512(e,f,g,h,a,b,c,d);
-               ROUND512(d,e,f,g,h,a,b,c);
-               ROUND512(c,d,e,f,g,h,a,b);
-               ROUND512(b,c,d,e,f,g,h,a);
-       } while (j < 80);
-
-       /* Compute the current intermediate hash value */
-       context->state[0] += a;
-       context->state[1] += b;
-       context->state[2] += c;
-       context->state[3] += d;
-       context->state[4] += e;
-       context->state[5] += f;
-       context->state[6] += g;
-       context->state[7] += h;
-
-       /* Clean up */
-       a = b = c = d = e = f = g = h = T1 = 0;
-}
-
-#else /* SHA2_UNROLL_TRANSFORM */
-
-void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) {
-       sha2_word64     a, b, c, d, e, f, g, h, s0, s1;
-       sha2_word64     T1, T2, *W512 = (sha2_word64*)context->buffer;
-       int             j;
-
-       /* Initialize registers with the prev. intermediate value */
-       a = context->state[0];
-       b = context->state[1];
-       c = context->state[2];
-       d = context->state[3];
-       e = context->state[4];
-       f = context->state[5];
-       g = context->state[6];
-       h = context->state[7];
-
-       j = 0;
-       do {
-#if BYTE_ORDER == LITTLE_ENDIAN
-               /* Convert TO host byte order */
-               REVERSE64(*data++, W512[j]);
-               /* Apply the SHA-512 compression function to update a..h */
-               T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j];
-#else /* BYTE_ORDER == LITTLE_ENDIAN */
-               /* Apply the SHA-512 compression function to update a..h with copy */
-               T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + (W512[j] = *data++);
-#endif /* BYTE_ORDER == LITTLE_ENDIAN */
-               T2 = Sigma0_512(a) + Maj(a, b, c);
-               h = g;
-               g = f;
-               f = e;
-               e = d + T1;
-               d = c;
-               c = b;
-               b = a;
-               a = T1 + T2;
-
-               j++;
-       } while (j < 16);
-
-       do {
-               /* Part of the message block expansion: */
-               s0 = W512[(j+1)&0x0f];
-               s0 = sigma0_512(s0);
-               s1 = W512[(j+14)&0x0f];
-               s1 =  sigma1_512(s1);
-
-               /* Apply the SHA-512 compression function to update a..h */
-               T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] +
-                    (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0);
-               T2 = Sigma0_512(a) + Maj(a, b, c);
-               h = g;
-               g = f;
-               f = e;
-               e = d + T1;
-               d = c;
-               c = b;
-               b = a;
-               a = T1 + T2;
-
-               j++;
-       } while (j < 80);
-
-       /* Compute the current intermediate hash value */
-       context->state[0] += a;
-       context->state[1] += b;
-       context->state[2] += c;
-       context->state[3] += d;
-       context->state[4] += e;
-       context->state[5] += f;
-       context->state[6] += g;
-       context->state[7] += h;
-
-       /* Clean up */
-       a = b = c = d = e = f = g = h = T1 = T2 = 0;
-}
-
-#endif /* SHA2_UNROLL_TRANSFORM */
-
-void SHA512_Update(SHA512_CTX* context, const sha2_byte *data, size_t len) {
-       unsigned int    freespace, usedspace;
-
-       if (len == 0) {
-               /* Calling with no data is valid - we do nothing */
-               return;
-       }
-
-       /* Sanity check: */
-       assert(context != (SHA512_CTX*)0 && data != (sha2_byte*)0);
-
-       usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH;
-       if (usedspace > 0) {
-               /* Calculate how much free space is available in the buffer */
-               freespace = SHA512_BLOCK_LENGTH - usedspace;
-
-               if (len >= freespace) {
-                       /* Fill the buffer completely and process it */
-                       bcopy(data, &context->buffer[usedspace], freespace);
-                       ADDINC128(context->bitcount, freespace << 3);
-                       len -= freespace;
-                       data += freespace;
-                       SHA512_Transform(context, (sha2_word64*)context->buffer);
-               } else {
-                       /* The buffer is not yet full */
-                       bcopy(data, &context->buffer[usedspace], len);
-                       ADDINC128(context->bitcount, len << 3);
-                       /* Clean up: */
-                       usedspace = freespace = 0;
-                       return;
-               }
-       }
-       while (len >= SHA512_BLOCK_LENGTH) {
-               /* Process as many complete blocks as we can */
-               SHA512_Transform(context, (const sha2_word64*)data);
-               ADDINC128(context->bitcount, SHA512_BLOCK_LENGTH << 3);
-               len -= SHA512_BLOCK_LENGTH;
-               data += SHA512_BLOCK_LENGTH;
-       }
-       if (len > 0) {
-               /* There's left-overs, so save 'em */
-               bcopy(data, context->buffer, len);
-               ADDINC128(context->bitcount, len << 3);
-       }
-       /* Clean up: */
-       usedspace = freespace = 0;
-}
-
-void SHA512_Last(SHA512_CTX* context) {
-       unsigned int    usedspace;
-
-       usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH;
-#if BYTE_ORDER == LITTLE_ENDIAN
-       /* Convert FROM host byte order */
-       REVERSE64(context->bitcount[0],context->bitcount[0]);
-       REVERSE64(context->bitcount[1],context->bitcount[1]);
-#endif
-       if (usedspace > 0) {
-               /* Begin padding with a 1 bit: */
-               context->buffer[usedspace++] = 0x80;
-
-               if (usedspace <= SHA512_SHORT_BLOCK_LENGTH) {
-                       /* Set-up for the last transform: */
-                       bzero(&context->buffer[usedspace], SHA512_SHORT_BLOCK_LENGTH - usedspace);
-               } else {
-                       if (usedspace < SHA512_BLOCK_LENGTH) {
-                               bzero(&context->buffer[usedspace], SHA512_BLOCK_LENGTH - usedspace);
-                       }
-                       /* Do second-to-last transform: */
-                       SHA512_Transform(context, (sha2_word64*)context->buffer);
-
-                       /* And set-up for the last transform: */
-                       bzero(context->buffer, SHA512_BLOCK_LENGTH - 2);
-               }
-       } else {
-               /* Prepare for final transform: */
-               bzero(context->buffer, SHA512_SHORT_BLOCK_LENGTH);
-
-               /* Begin padding with a 1 bit: */
-               *context->buffer = 0x80;
-       }
-       /* Store the length of input data (in bits): */
-       *(sha2_word64*)&context->buffer[SHA512_SHORT_BLOCK_LENGTH] = context->bitcount[1];
-       *(sha2_word64*)&context->buffer[SHA512_SHORT_BLOCK_LENGTH+8] = context->bitcount[0];
-
-       /* Final transform: */
-       SHA512_Transform(context, (sha2_word64*)context->buffer);
-}
-
-void SHA512_Final(sha2_byte digest[], SHA512_CTX* context) {
-       sha2_word64     *d = (sha2_word64*)digest;
-
-       /* Sanity check: */
-       assert(context != (SHA512_CTX*)0);
-
-       /* If no digest buffer is passed, we don't bother doing this: */
-       if (digest != (sha2_byte*)0) {
-               SHA512_Last(context);
-
-               /* Save the hash data for output: */
-#if BYTE_ORDER == LITTLE_ENDIAN
-               {
-                       /* Convert TO host byte order */
-                       int     j;
-                       for (j = 0; j < 8; j++) {
-                               REVERSE64(context->state[j],context->state[j]);
-                               *d++ = context->state[j];
-                       }
-               }
-#else
-               bcopy(context->state, d, SHA512_DIGEST_LENGTH);
-#endif
-       }
-
-       /* Zero out state data */
-       bzero(context, sizeof(context));
-}
-
-char *SHA512_End(SHA512_CTX* context, char buffer[]) {
-       sha2_byte       digest[SHA512_DIGEST_LENGTH], *d = digest;
-       int             i;
-
-       /* Sanity check: */
-       assert(context != (SHA512_CTX*)0);
-
-       if (buffer != (char*)0) {
-               SHA512_Final(digest, context);
-
-               for (i = 0; i < SHA512_DIGEST_LENGTH; i++) {
-                       *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
-                       *buffer++ = sha2_hex_digits[*d & 0x0f];
-                       d++;
-               }
-               *buffer = (char)0;
-       } else {
-               bzero(context, sizeof(context));
-       }
-       bzero(digest, SHA512_DIGEST_LENGTH);
-       return buffer;
-}
-
-char* SHA512_Data(const sha2_byte* data, size_t len, char digest[SHA512_DIGEST_STRING_LENGTH]) {
-       SHA512_CTX      context;
-
-       SHA512_Init(&context);
-       SHA512_Update(&context, data, len);
-       return SHA512_End(&context, digest);
-}
-
-
-/*** SHA-384: *********************************************************/
-void SHA384_Init(SHA384_CTX* context) {
-       if (context == (SHA384_CTX*)0) {
-               return;
-       }
-       bcopy(sha384_initial_hash_value, context->state, SHA512_DIGEST_LENGTH);
-       bzero(context->buffer, SHA384_BLOCK_LENGTH);
-       context->bitcount[0] = context->bitcount[1] = 0;
-}
-
-void SHA384_Update(SHA384_CTX* context, const sha2_byte* data, size_t len) {
-       SHA512_Update((SHA512_CTX*)context, data, len);
-}
-
-void SHA384_Final(sha2_byte digest[], SHA384_CTX* context) {
-       sha2_word64     *d = (sha2_word64*)digest;
-
-       /* Sanity check: */
-       assert(context != (SHA384_CTX*)0);
-
-       /* If no digest buffer is passed, we don't bother doing this: */
-       if (digest != (sha2_byte*)0) {
-               SHA512_Last((SHA512_CTX*)context);
-
-               /* Save the hash data for output: */
-#if BYTE_ORDER == LITTLE_ENDIAN
-               {
-                       /* Convert TO host byte order */
-                       int     j;
-                       for (j = 0; j < 6; j++) {
-                               REVERSE64(context->state[j],context->state[j]);
-                               *d++ = context->state[j];
-                       }
-               }
-#else
-               bcopy(context->state, d, SHA384_DIGEST_LENGTH);
-#endif
-       }
-
-       /* Zero out state data */
-       bzero(context, sizeof(context));
-}
-
-char *SHA384_End(SHA384_CTX* context, char buffer[]) {
-       sha2_byte       digest[SHA384_DIGEST_LENGTH], *d = digest;
-       int             i;
-
-       /* Sanity check: */
-       assert(context != (SHA384_CTX*)0);
-
-       if (buffer != (char*)0) {
-               SHA384_Final(digest, context);
-
-               for (i = 0; i < SHA384_DIGEST_LENGTH; i++) {
-                       *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4];
-                       *buffer++ = sha2_hex_digits[*d & 0x0f];
-                       d++;
-               }
-               *buffer = (char)0;
-       } else {
-               bzero(context, sizeof(context));
-       }
-       bzero(digest, SHA384_DIGEST_LENGTH);
-       return buffer;
-}
-
-char* SHA384_Data(const sha2_byte* data, size_t len, char digest[SHA384_DIGEST_STRING_LENGTH]) {
-       SHA384_CTX      context;
-
-       SHA384_Init(&context);
-       SHA384_Update(&context, data, len);
-       return SHA384_End(&context, digest);
-}
-
diff --git a/bsd/crypto/sha2/sha2.h b/bsd/crypto/sha2/sha2.h
deleted file mode 100644 (file)
index 3997e63..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-/*     $FreeBSD: src/sys/crypto/sha2/sha2.h,v 1.1.2.1 2001/07/03 11:01:36 ume Exp $    */
-/*     $KAME: sha2.h,v 1.3 2001/03/12 08:27:48 itojun Exp $    */
-
-/*
- * sha2.h
- *
- * Version 1.0.0beta1
- *
- * Written by Aaron D. Gifford <me@aarongifford.com>
- *
- * Copyright 2000 Aaron D. Gifford.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the copyright holder nor the names of contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-#ifndef __SHA2_H__
-#define __SHA2_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*** SHA-256/384/512 Various Length Definitions ***********************/
-#define SHA256_BLOCK_LENGTH            64
-#define SHA256_DIGEST_LENGTH           32
-#define SHA256_DIGEST_STRING_LENGTH    (SHA256_DIGEST_LENGTH * 2 + 1)
-#define SHA384_BLOCK_LENGTH            128
-#define SHA384_DIGEST_LENGTH           48
-#define SHA384_DIGEST_STRING_LENGTH    (SHA384_DIGEST_LENGTH * 2 + 1)
-#define SHA512_BLOCK_LENGTH            128
-#define SHA512_DIGEST_LENGTH           64
-#define SHA512_DIGEST_STRING_LENGTH    (SHA512_DIGEST_LENGTH * 2 + 1)
-
-
-/*** SHA-256/384/512 Context Structures *******************************/
-/* NOTE: If your architecture does not define either u_intXX_t types or
- * uintXX_t (from inttypes.h), you may need to define things by hand
- * for your system:
- */
-#if 0
-typedef unsigned char u_int8_t;                /* 1-byte  (8-bits)  */
-typedef unsigned int u_int32_t;                /* 4-bytes (32-bits) */
-typedef unsigned long long u_int64_t;  /* 8-bytes (64-bits) */
-#endif
-/*
- * Most BSD systems already define u_intXX_t types, as does Linux.
- * Some systems, however, like Compaq's Tru64 Unix instead can use
- * uintXX_t types defined by very recent ANSI C standards and included
- * in the file:
- *
- *   #include <inttypes.h>
- *
- * If you choose to use <inttypes.h> then please define: 
- *
- *   #define SHA2_USE_INTTYPES_H
- *
- * Or on the command line during compile:
- *
- *   cc -DSHA2_USE_INTTYPES_H ...
- */
-#if 0 /*def SHA2_USE_INTTYPES_H*/
-
-typedef struct _SHA256_CTX {
-       uint32_t        state[8];
-       uint64_t        bitcount;
-       uint8_t buffer[SHA256_BLOCK_LENGTH];
-} SHA256_CTX;
-typedef struct _SHA512_CTX {
-       uint64_t        state[8];
-       uint64_t        bitcount[2];
-       uint8_t buffer[SHA512_BLOCK_LENGTH];
-} SHA512_CTX;
-
-#else /* SHA2_USE_INTTYPES_H */
-
-typedef struct _SHA256_CTX {
-       u_int32_t       state[8];
-       u_int64_t       bitcount;
-       u_int8_t        buffer[SHA256_BLOCK_LENGTH];
-} SHA256_CTX;
-typedef struct _SHA512_CTX {
-       u_int64_t       state[8];
-       u_int64_t       bitcount[2];
-       u_int8_t        buffer[SHA512_BLOCK_LENGTH];
-} SHA512_CTX;
-
-#endif /* SHA2_USE_INTTYPES_H */
-
-typedef SHA512_CTX SHA384_CTX;
-
-
-/*** SHA-256/384/512 Function Prototypes ******************************/
-
-void SHA256_Init(SHA256_CTX *);
-void SHA256_Update(SHA256_CTX*, const u_int8_t*, size_t);
-void SHA256_Final(u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*);
-char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]);
-char* SHA256_Data(const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]);
-
-void SHA384_Init(SHA384_CTX*);
-void SHA384_Update(SHA384_CTX*, const u_int8_t*, size_t);
-void SHA384_Final(u_int8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*);
-char* SHA384_End(SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH]);
-char* SHA384_Data(const u_int8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH]);
-
-void SHA512_Init(SHA512_CTX*);
-void SHA512_Update(SHA512_CTX*, const u_int8_t*, size_t);
-void SHA512_Final(u_int8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*);
-char* SHA512_End(SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH]);
-char* SHA512_Data(const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif /* __SHA2_H__ */
-
index 745a0fa0173abe875df94c72cc5abc8a4adad564..1cf6fff333c6ba102a58736c574f74e99abe3d53 100644 (file)
@@ -737,20 +737,11 @@ static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
  * for these functions, there will be a comment above the function reading
  * "Note:  not called from probe context."
  */
-void
-dtrace_panic(const char *format, ...)
-{
-       va_list alist;
-
-       va_start(alist, format);
-       dtrace_vpanic(format, alist);
-       va_end(alist);
-}
 
 int
 dtrace_assfail(const char *a, const char *f, int l)
 {
-       dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
+       panic("dtrace: assertion failed: %s, file: %s, line: %d", a, f, l);
 
        /*
         * We just need something here that even the most clever compiler
@@ -6168,7 +6159,7 @@ dtrace_action_panic(dtrace_ecb_t *ecb)
         * thread calls panic() from dtrace_probe(), and that panic() is
         * called exactly once.)
         */
-       dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
+       panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
            probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
            probe->dtpr_func, probe->dtpr_name, (void *)ecb);
 
@@ -6231,7 +6222,6 @@ dtrace_action_stop(void)
                uthread->t_dtrace_stop = 1;
                act_set_astbsd(current_thread());
        }
-
 #endif /* __APPLE__ */
 }
 
@@ -6246,7 +6236,6 @@ dtrace_action_pidresume(uint64_t pid)
                DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);           
                return;
        }
-
         uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
 
        /*
@@ -16289,7 +16278,7 @@ static void
 dtrace_module_loaded(struct modctl *ctl)
 #else
 static int
-dtrace_module_loaded(struct kmod_info *kmod)
+dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag)
 #endif /* __APPLE__ */
 {
        dtrace_provider_t *prv;
@@ -16378,16 +16367,36 @@ dtrace_module_loaded(struct kmod_info *kmod)
        lck_mtx_lock(&dtrace_lock);
        
        /*
-        * If the module does not have a valid UUID, we will not be able to find symbols for it from
-        * userspace. Go ahead and instrument it now.
+        * DTrace must decide if it will instrument modules lazily via
+        * userspace symbols (default mode), or instrument immediately via 
+        * kernel symbols (non-default mode)
+        *
+        * When in default/lazy mode, DTrace will only support modules
+        * built with a valid UUID.
+        *
+        * Overriding the default can be done explicitly in one of
+        * the following two ways.
+        *
+        * A module can force symbols from kernel space using the plist key,
+        * OSBundleForceDTraceInit (see kmod.h).  If this per kext state is set,
+        * we fall through and instrument this module now.
+        *
+        * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols
+        * from kernel space (see dtrace_impl.h).  If this system state is set
+        * to a non-userspace mode, we fall through and instrument the module now.
         */
-       if (MOD_HAS_UUID(ctl) && (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE)) {
+
+       if ((dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) &&
+           (!(flag & KMOD_DTRACE_FORCE_INIT)))
+       {
+               /* We will instrument the module lazily -- this is the default */
                lck_mtx_unlock(&dtrace_lock);
                lck_mtx_unlock(&mod_lock);
                lck_mtx_unlock(&dtrace_provider_lock);
                return 0;
        }
        
+       /* We will instrument the module immediately using kernel symbols */
        ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS;
        
        lck_mtx_unlock(&dtrace_lock);
@@ -19713,6 +19722,8 @@ dtrace_init( void )
 
                (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
 
+               dtrace_isa_init();
+               
                /*
                 * See dtrace_impl.h for a description of dof modes.
                 * The default is lazy dof.
@@ -19781,7 +19792,7 @@ dtrace_postinit(void)
        fake_kernel_kmod.address = g_kernel_kmod_info.address;
        fake_kernel_kmod.size = g_kernel_kmod_info.size;
 
-       if (dtrace_module_loaded(&fake_kernel_kmod) != 0) {
+       if (dtrace_module_loaded(&fake_kernel_kmod, 0) != 0) {
                printf("dtrace_postinit: Could not register mach_kernel modctl\n");
        }
        
index a046e3eace3e065f6fb5da2c31d80f189642e813..db3c5766c800c16fb6c647e4ed0d48dbe016ce4b 100644 (file)
@@ -1480,13 +1480,6 @@ dtrace_tally_fault(user_addr_t uaddr)
        return( DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT) ? TRUE : FALSE );
 }
 
-void
-dtrace_vpanic(const char *format, va_list alist)
-{
-       vuprintf( format, alist );
-       panic("dtrace_vpanic");
-}
-
 #define TOTTY   0x02
 extern int prf(const char *, va_list, int, struct tty *); /* bsd/kern/subr_prf.h */
 
index 9c75c3f1fc644d7e1156099c72ff652d2e0e3a9a..1027f0d0174eaf167476b4d88fcf9f9130508487 100644 (file)
@@ -127,10 +127,14 @@ dtrace_ptss_claim_entry(struct proc* p) {
 
 /*
  * This function does not require any locks to be held on entry.
+ *
+ * (PR-11138709) A NULL p->p_dtrace_ptss_pages means the entry can
+ * no longer be referenced safely. When found in this state, the chore
+ * of releasing an entry to the free list is ignored.
  */
 void
 dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e) {
-       if (p && e) {
+       if (p && p->p_dtrace_ptss_pages && e) {
                do {
                        e->next = p->p_dtrace_ptss_free_list;
                } while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list));
@@ -164,7 +168,7 @@ dtrace_ptss_allocate_page(struct proc* p)
 #if CONFIG_EMBEDDED
        /* The embedded OS has extra permissions for writable and executable pages. We can't pass in the flags
         * we need for the correct permissions from mach_vm_allocate, so need to call mach_vm_map directly. */
-       vm_map_offset_t map_addr = 0;
+       mach_vm_offset_t map_addr = 0;
        kern_return_t kr = mach_vm_map(map, &map_addr, size, 0, VM_FLAGS_ANYWHERE, IPC_PORT_NULL, 0, FALSE, VM_PROT_READ|VM_PROT_EXECUTE, VM_PROT_READ|VM_PROT_EXECUTE, VM_INHERIT_DEFAULT);
        if (kr != KERN_SUCCESS) {
                goto err;
index c3a69c48f04b165cfc0ceb855c8ae6058d86f9a9..c609fddec268ac652ff4ee54a5ea95329ce40136 100644 (file)
@@ -53,7 +53,7 @@ void (*dtrace_cpu_init)(processorid_t);
 void (*dtrace_modload)(struct modctl *);
 void (*dtrace_modunload)(struct modctl *);
 #else
-int (*dtrace_modload)(struct kmod_info *);
+int (*dtrace_modload)(struct kmod_info *, uint32_t);
 int (*dtrace_modunload)(struct kmod_info *);
 void (*dtrace_helpers_cleanup)(proc_t *);
 #endif  /*__APPLE__*/
index a9f003e655ffe3fe9b154e7b0bbbb7a3142aa1c8..68a3a91d0743d050b002ddab39972504e4fff1a7 100644 (file)
@@ -63,7 +63,6 @@
 #error "not ported to this architecture"
 #endif
 
-
 typedef struct lockstat_probe {
        const char      *lsp_func;
        const char      *lsp_name;
@@ -74,21 +73,19 @@ typedef struct lockstat_probe {
 lockstat_probe_t lockstat_probes[] =
 {
 #if defined(__i386__) || defined(__x86_64__)
-       /* Not implemented yet on PPC... */
+       /* Only provide implemented probes for each architecture  */
        { LS_LCK_MTX_LOCK,      LSA_ACQUIRE,    LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_MTX_LOCK,      LSA_SPIN,       LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE },
+       { LS_LCK_MTX_LOCK,      LSA_BLOCK,      LS_LCK_MTX_LOCK_BLOCK, DTRACE_IDNONE }, 
        { LS_LCK_MTX_TRY_LOCK,  LSA_ACQUIRE,    LS_LCK_MTX_TRY_LOCK_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_MTX_TRY_SPIN_LOCK, LSA_ACQUIRE, LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_MTX_UNLOCK,    LSA_RELEASE,    LS_LCK_MTX_UNLOCK_RELEASE, DTRACE_IDNONE },
        { LS_LCK_MTX_EXT_LOCK,  LSA_ACQUIRE,    LS_LCK_MTX_EXT_LOCK_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_MTX_EXT_LOCK,  LSA_SPIN,       LS_LCK_MTX_EXT_LOCK_SPIN, DTRACE_IDNONE },
-       { LS_LCK_MTX_EXT_TRY_LOCK, LSA_ACQUIRE, LS_LCK_MTX_TRY_EXT_LOCK_ACQUIRE, DTRACE_IDNONE },
-       { LS_LCK_MTX_UNLOCK,    LSA_RELEASE,    LS_LCK_MTX_EXT_UNLOCK_RELEASE, DTRACE_IDNONE },
-       { LS_LCK_MTX_LOCK_SPIN_LOCK,    LSA_ACQUIRE,    LS_LCK_MTX_LOCK_SPIN_ACQUIRE, DTRACE_IDNONE },
-#endif
-       { LS_LCK_MTX_LOCK,      LSA_BLOCK,      LS_LCK_MTX_LOCK_BLOCK, DTRACE_IDNONE },
        { LS_LCK_MTX_EXT_LOCK,  LSA_BLOCK,      LS_LCK_MTX_EXT_LOCK_BLOCK, DTRACE_IDNONE },
-
+//     { LS_LCK_MTX_EXT_TRY_LOCK, LSA_ACQUIRE, LS_LCK_MTX_TRY_EXT_LOCK_ACQUIRE, DTRACE_IDNONE },       
+       { LS_LCK_MTX_EXT_UNLOCK,   LSA_RELEASE, LS_LCK_MTX_EXT_UNLOCK_RELEASE, DTRACE_IDNONE },
+       { LS_LCK_MTX_LOCK_SPIN_LOCK,    LSA_ACQUIRE,    LS_LCK_MTX_LOCK_SPIN_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_RW_LOCK_SHARED,        LSR_ACQUIRE,    LS_LCK_RW_LOCK_SHARED_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_RW_LOCK_SHARED,        LSR_BLOCK,      LS_LCK_RW_LOCK_SHARED_BLOCK, DTRACE_IDNONE },
        { LS_LCK_RW_LOCK_SHARED,        LSR_SPIN,       LS_LCK_RW_LOCK_SHARED_SPIN, DTRACE_IDNONE },
@@ -99,11 +96,10 @@ lockstat_probe_t lockstat_probes[] =
        { LS_LCK_RW_TRY_LOCK_SHARED,    LSR_ACQUIRE,    LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_RW_TRY_LOCK_EXCL,      LSR_ACQUIRE,    LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, DTRACE_IDNONE },
        { LS_LCK_RW_LOCK_SHARED_TO_EXCL, LSR_UPGRADE,   LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, DTRACE_IDNONE },
-       { LS_LCK_RW_LOCK_SHARED_TO_EXCL,        LSR_BLOCK,      LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, DTRACE_IDNONE },
        { LS_LCK_RW_LOCK_SHARED_TO_EXCL,        LSR_SPIN,       LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, DTRACE_IDNONE },
+       { LS_LCK_RW_LOCK_SHARED_TO_EXCL,        LSR_BLOCK,      LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, DTRACE_IDNONE },   
        { LS_LCK_RW_LOCK_EXCL_TO_SHARED,        LSR_DOWNGRADE,  LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, DTRACE_IDNONE },
-
-
+#endif
 #ifdef LATER
        /* Interlock and spinlock measurements would be nice, but later */
        { LS_LCK_SPIN_LOCK,     LSS_ACQUIRE,    LS_LCK_SPIN_LOCK_ACQUIRE, DTRACE_IDNONE },
@@ -130,6 +126,8 @@ extern void lck_mtx_unlock_lockstat_patch_point(void);
 extern void lck_mtx_lock_ext_lockstat_patch_point(void);
 extern void lck_mtx_ext_unlock_lockstat_patch_point(void);
 
+extern void lck_rw_done_release1_lockstat_patch_point(void);
+extern void lck_rw_done_release2_lockstat_patch_point(void);
 extern void lck_rw_lock_shared_lockstat_patch_point(void);
 extern void lck_rw_lock_exclusive_lockstat_patch_point(void);
 extern void lck_rw_lock_shared_to_exclusive_lockstat_patch_point(void);
@@ -138,61 +136,89 @@ extern void lck_rw_try_lock_exclusive_lockstat_patch_point(void);
 extern void lck_mtx_lock_spin_lockstat_patch_point(void);
 #endif /* CONFIG_DTRACE */
 
-vm_offset_t *assembly_probes[] = {
+typedef struct lockstat_assembly_probe {
+       int lsap_probe;
+       vm_offset_t * lsap_patch_point;
+} lockstat_assembly_probe_t;
+       
+
+       lockstat_assembly_probe_t assembly_probes[] =
+       {
 #if CONFIG_DTRACE
 #if defined(__i386__) || defined(__x86_64__)
-       /*
-        * On x86 these points are better done via hot patches, which ensure
-        * there is zero overhead when not in use.  On x86 these patch points
-        * are swapped between the return instruction and a no-op, with the
-        * Dtrace call following the return.
-        */ 
-       (vm_offset_t *) lck_mtx_lock_lockstat_patch_point,
-       (vm_offset_t *) lck_mtx_try_lock_lockstat_patch_point,
-       (vm_offset_t *) lck_mtx_try_lock_spin_lockstat_patch_point,
-       (vm_offset_t *) lck_mtx_unlock_lockstat_patch_point,
-       (vm_offset_t *) lck_mtx_lock_ext_lockstat_patch_point,
-       (vm_offset_t *) lck_mtx_ext_unlock_lockstat_patch_point,
-       (vm_offset_t *) lck_rw_lock_shared_lockstat_patch_point,
-       (vm_offset_t *) lck_rw_lock_exclusive_lockstat_patch_point,
-       (vm_offset_t *) lck_rw_lock_shared_to_exclusive_lockstat_patch_point,
-       (vm_offset_t *) lck_rw_try_lock_shared_lockstat_patch_point,
-       (vm_offset_t *) lck_rw_try_lock_exclusive_lockstat_patch_point,
-       (vm_offset_t *) lck_mtx_lock_spin_lockstat_patch_point,
-#else
-       (vm_offset_t *) lck_mtx_unlock_lockstat_patch_point,
+               /*
+                * On x86 these points are better done via hot patches, which ensure
+                * there is zero overhead when not in use.  On x86 these patch points
+                * are swapped between the return instruction and a no-op, with the
+                * Dtrace call following the return.
+                */ 
+               { LS_LCK_MTX_LOCK_ACQUIRE,              (vm_offset_t *) lck_mtx_lock_lockstat_patch_point },
+               { LS_LCK_MTX_TRY_LOCK_ACQUIRE,          (vm_offset_t *) lck_mtx_try_lock_lockstat_patch_point },
+               { LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE,     (vm_offset_t *) lck_mtx_try_lock_spin_lockstat_patch_point },
+               { LS_LCK_MTX_UNLOCK_RELEASE,            (vm_offset_t *) lck_mtx_unlock_lockstat_patch_point },
+               { LS_LCK_MTX_EXT_LOCK_ACQUIRE,          (vm_offset_t *) lck_mtx_lock_ext_lockstat_patch_point },
+               { LS_LCK_MTX_EXT_UNLOCK_RELEASE,        (vm_offset_t *) lck_mtx_ext_unlock_lockstat_patch_point },
+               { LS_LCK_RW_LOCK_SHARED_ACQUIRE,        (vm_offset_t *) lck_rw_lock_shared_lockstat_patch_point },
+               { LS_LCK_RW_LOCK_EXCL_ACQUIRE,          (vm_offset_t *) lck_rw_lock_exclusive_lockstat_patch_point },
+               { LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE,(vm_offset_t *) lck_rw_lock_shared_to_exclusive_lockstat_patch_point },
+               { LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE,    (vm_offset_t *) lck_rw_try_lock_shared_lockstat_patch_point },
+               { LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE,      (vm_offset_t *) lck_rw_try_lock_exclusive_lockstat_patch_point },
+               { LS_LCK_MTX_LOCK_SPIN_ACQUIRE,         (vm_offset_t *) lck_mtx_lock_spin_lockstat_patch_point },
 #endif
 #endif /* CONFIG_DTRACE */
-       NULL
+               { LS_LCK_INVALID, NULL }
 };
 /*
  * Hot patch switches back and forth the probe points between NOP and RET.
- * The argument indicates whether the probe point is on or off.
+ * The active argument indicates whether the probe point will turn on or off.
+ *     on == plant a NOP and thus fall through to the probe call
+ *     off == plant a RET and thus avoid the probe call completely
+ * The lsap_probe identifies which probe we will patch.
  */
 #if defined(__APPLE__)
 static
-#endif /* __APPLE__ */
-void lockstat_hot_patch(boolean_t active)
+void lockstat_hot_patch(boolean_t active, int ls_probe)
 {
 #pragma unused(active)
        int i;
 
-
-       for (i = 0; assembly_probes[i]; i++) {
+       /*
+        * Loop through entire table, in case there are
+        * multiple patch points per probe. 
+        */
+       for (i = 0; assembly_probes[i].lsap_patch_point; i++) {
+               if (ls_probe == assembly_probes[i].lsap_probe)
 #if defined(__i386__) || defined(__x86_64__)
-               uint8_t instr;
-               instr = (active ? NOP : RET );
-               (void) ml_nofault_copy( (vm_offset_t)&instr, *(assembly_probes[i]), 
+               {                       
+                       uint8_t instr;
+                       instr = (active ? NOP : RET );
+                       (void) ml_nofault_copy( (vm_offset_t)&instr, *(assembly_probes[i].lsap_patch_point), 
                                                                sizeof(instr));
+               }
 #endif
-       }
+       } /* for */
 }
-
+#endif /* __APPLE__*/
 
 
 void (*lockstat_probe)(dtrace_id_t, uint64_t, uint64_t,
                                    uint64_t, uint64_t, uint64_t);
 
+#if defined(__APPLE__)
+/* This wrapper is used by arm assembler hot patched probes */
+void
+lockstat_probe_wrapper(int probe, uintptr_t lp, int rwflag)
+{
+       dtrace_id_t id;
+       id = lockstat_probemap[probe];
+       if (id != 0)
+       {
+               (*lockstat_probe)(id, (uintptr_t)lp, (uint64_t)rwflag, 0,0,0);
+       }
+}
+#endif /* __APPLE__ */
+    
+
 static dev_info_t      *lockstat_devi; /* saved in xxattach() for xxinfo() */
 static dtrace_provider_id_t lockstat_id;
 
@@ -209,7 +235,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg)
        lockstat_probemap[probe->lsp_probe] = id;
        membar_producer();
 
-       lockstat_hot_patch(TRUE);
+       lockstat_hot_patch(TRUE, probe->lsp_probe);
        membar_producer();
        return(0);
 
@@ -227,7 +253,7 @@ lockstat_disable(void *arg, dtrace_id_t id, void *parg)
        ASSERT(lockstat_probemap[probe->lsp_probe]);
 
        lockstat_probemap[probe->lsp_probe] = 0;
-       lockstat_hot_patch(FALSE);
+       lockstat_hot_patch(FALSE, probe->lsp_probe);
        membar_producer();
 
        /*
index 69f3aadd53b2e467b068f9af651ca36414828495..36e213ce1040e333ce0d5a1b1f4faf4d3a3ea302 100644 (file)
@@ -227,7 +227,7 @@ profile_fire(void *arg)
                        x86_saved_state32_t *regs = saved_state32(tagged_regs);
 
                        dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0);
-               }       
+               }
        }
 #else
 #error Unknown architecture
@@ -273,7 +273,7 @@ profile_tick(void *arg)
                        x86_saved_state32_t *regs = saved_state32(tagged_regs);
 
                        dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0);
-               }       
+               }
        }
 #else
 #error Unknown architecture
index bca167f015f66cadf5b7ab20d05feaa4b0ae7685..89ac8ef2bcf1c5a42af761b1a06edf4de990c576 100644 (file)
@@ -63,11 +63,7 @@ extern kern_return_t fbt_perfCallback(int, struct savearea_t *, int, int);
 
 #define        SDT_PROBETAB_SIZE       0x1000          /* 4k entries -- 16K total */
 
-#if defined(__x86_64__)
-#define DTRACE_PROBE_PREFIX "_dtrace_probeDOLLAR"
-#else
 #define DTRACE_PROBE_PREFIX "_dtrace_probe$"
-#endif
 
 static dev_info_t              *sdt_devi;
 static int                     sdt_verbose = 0;
@@ -508,14 +504,6 @@ static struct module g_sdt_mach_module;
 #include <mach-o/nlist.h>
 #include <libkern/kernel_mach_header.h>
 
-#if defined(__LP64__)
-#define KERNEL_MAGIC MH_MAGIC_64
-typedef struct nlist_64 kernel_nlist_t;
-#else
-#define KERNEL_MAGIC MH_MAGIC
-typedef struct nlist kernel_nlist_t;
-#endif
-
 void sdt_init( void )
 {
        if (0 == gSDTInited)
@@ -528,7 +516,7 @@ void sdt_init( void )
                        return;
                }
 
-               if (KERNEL_MAGIC != _mh_execute_header.magic) {
+               if (MH_MAGIC_KERNEL != _mh_execute_header.magic) {
                        g_sdt_kernctl.mod_address = (vm_address_t)NULL;
                        g_sdt_kernctl.mod_size = 0;
                } else {
index 271b2a0e1482f6acbbb3a3d03aebb69c95b264ed..6761beec99c3d04066d44b58f0e784c32c8f0e80 100644 (file)
@@ -95,15 +95,20 @@ extern const char *syscallnames[];
 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
 
+extern lck_attr_t* dtrace_lck_attr;
+extern lck_grp_t* dtrace_lck_grp;
+static lck_mtx_t       dtrace_systrace_lock;           /* probe state lock */
+
 systrace_sysent_t *systrace_sysent = NULL;
-void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t,
-    uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+
+static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int);        
 
 void
 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
-    uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7)
+    uint64_t arg2, uint64_t arg3, uint64_t arg4)
 {
-#pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7)
+#pragma unused(id,arg0,arg1,arg2,arg3,arg4)
 }
 
 int32_t
@@ -153,10 +158,17 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
        sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
 
        if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
+               uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());            
+               if (uthread)
+                       uthread->t_dtrace_syscall_args = (void *)ip;
+               
                if (ip)
-                       (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4), *(ip+5), *(ip+6), *(ip+7));
+                       (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
                else
-                       (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0);
+                       (*systrace_probe)(id, 0, 0, 0, 0, 0);
+               
+               if (uthread)
+                       uthread->t_dtrace_syscall_args = (void *)0;
        }
 
 #if 0 /* XXX */
@@ -244,7 +256,7 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
                 * 
                 * This change was made 4/23/2003 according to the DTrace project's putback log."
                 */ 
-               (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
+               (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
        }
 
        return (rval);
@@ -312,7 +324,7 @@ dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
                        munged_rv1 = 0LL;
                }
 
-               (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
+               (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
        }
 }
 #endif /* __APPLE__ */
@@ -393,6 +405,7 @@ systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
                s->stsy_underlying = a->sy_callc;
                s->stsy_return_type = a->sy_return_type;
        }
+       lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
 }
 
 #endif /* __APPLE__ */
@@ -491,15 +504,18 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg)
                ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
                return(0);
        }
-
-       (void) casptr(&sysent[sysnum].sy_callc,
-           (void *)systrace_sysent[sysnum].stsy_underlying,
-           (void *)dtrace_systrace_syscall);
 #ifdef _SYSCALL32_IMPL
        (void) casptr(&sysent32[sysnum].sy_callc,
            (void *)systrace_sysent32[sysnum].stsy_underlying,
            (void *)dtrace_systrace_syscall32);
 #endif
+
+       lck_mtx_lock(&dtrace_systrace_lock);
+       if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
+               vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
+               ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
+       }
+       lck_mtx_unlock(&dtrace_systrace_lock);
        return (0);
 }
 
@@ -514,9 +530,10 @@ systrace_disable(void *arg, dtrace_id_t id, void *parg)
            systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
 
        if (disable) {
-               (void) casptr(&sysent[sysnum].sy_callc,
-                   (void *)dtrace_systrace_syscall,
-                   (void *)systrace_sysent[sysnum].stsy_underlying);
+               lck_mtx_lock(&dtrace_systrace_lock);
+               if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
+                       ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
+               lck_mtx_unlock(&dtrace_systrace_lock);
 
 #ifdef _SYSCALL32_IMPL
                (void) casptr(&sysent32[sysnum].sy_callc,
@@ -554,7 +571,7 @@ static dtrace_pops_t systrace_pops = {
        NULL,
        NULL,
        NULL,
-       NULL,
+       systrace_getarg,
        NULL,
        systrace_destroy
 };
@@ -723,14 +740,14 @@ typedef kern_return_t (*mach_call_t)(void *);
 typedef void    mach_munge_t(const void *, void *);
 
 typedef struct {
-        int                     mach_trap_arg_count;
-        int                     (*mach_trap_function)(void);
+       int                     mach_trap_arg_count;
+       kern_return_t           (*mach_trap_function)(void *);
 #if 0 /* no active architectures use mungers for mach traps */
-        mach_munge_t            *mach_trap_arg_munge32; /* system call arguments for 32-bit */
-        mach_munge_t            *mach_trap_arg_munge64; /* system call arguments for 64-bit */
+       mach_munge_t            *mach_trap_arg_munge32; /* system call arguments for 32-bit */
+       mach_munge_t            *mach_trap_arg_munge64; /* system call arguments for 64-bit */
 #endif
-#if     MACH_ASSERT
-        const char*             mach_trap_name;
+#if    MACH_ASSERT
+       const char*             mach_trap_name;
 #endif /* MACH_ASSERT */
 } mach_trap_t;
 
@@ -759,13 +776,20 @@ struct mach_call_args {
 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
 #endif
 
-typedef systrace_sysent_t machtrace_sysent_t;
+typedef struct machtrace_sysent {
+       dtrace_id_t     stsy_entry;
+       dtrace_id_t     stsy_return;
+       kern_return_t   (*stsy_underlying)(void *);
+       int32_t         stsy_return_type;
+} machtrace_sysent_t;
 
 static machtrace_sysent_t *machtrace_sysent = NULL;
 
 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
     uint64_t, uint64_t, uint64_t);
 
+static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);       
+
 static dev_info_t *machtrace_devi;
 static dtrace_provider_id_t machtrace_id;
 
@@ -802,8 +826,17 @@ dtrace_machtrace_syscall(struct mach_call_args *args)
 
        sy = &machtrace_sysent[code];
 
-       if ((id = sy->stsy_entry) != DTRACE_IDNONE)
+       if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
+               uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());    
+
+               if (uthread)
+                       uthread->t_dtrace_syscall_args = (void *)ip;
+               
                (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
+               
+               if (uthread)
+                       uthread->t_dtrace_syscall_args = (void *)0;             
+       }
 
 #if 0 /* XXX */
        /*
@@ -846,10 +879,10 @@ machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
                if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
                        continue;
 
-               if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall))
+               if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
                        continue;
 
-               s->stsy_underlying = (sy_call_t *)a->mach_trap_function;
+               s->stsy_underlying = a->mach_trap_function;
        }
 }
 
@@ -924,13 +957,19 @@ machtrace_enable(void *arg, dtrace_id_t id, void *parg)
        }
 
        if (enabled) {
-           ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall);
+           ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
            return(0);
        }
 
-       (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
-                     (void *)machtrace_sysent[sysnum].stsy_underlying,
-                     (void *)dtrace_machtrace_syscall);
+       lck_mtx_lock(&dtrace_systrace_lock);
+
+       if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
+               vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
+               ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
+       }
+
+       lck_mtx_unlock(&dtrace_systrace_lock);
+
        return(0);
 }
 
@@ -945,10 +984,13 @@ machtrace_disable(void *arg, dtrace_id_t id, void *parg)
                        machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
 
        if (disable) {
-               (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
-                             (void *)dtrace_machtrace_syscall,
-                             (void *)machtrace_sysent[sysnum].stsy_underlying);
 
+               lck_mtx_lock(&dtrace_systrace_lock);
+
+               if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
+                       ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
+               }
+               lck_mtx_unlock(&dtrace_systrace_lock);
        }
 
        if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
@@ -974,7 +1016,7 @@ static dtrace_pops_t machtrace_pops = {
        NULL,
        NULL,
        NULL,
-       NULL,
+       machtrace_getarg,
        NULL,
        machtrace_destroy
 };
@@ -1076,3 +1118,49 @@ void systrace_init( void )
 }
 #undef SYSTRACE_MAJOR
 #endif /* __APPLE__ */
+
+static uint64_t
+systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
+{
+#pragma unused(arg,id,parg,aframes)     /* __APPLE__ */
+       uint64_t val = 0;
+       syscall_arg_t *stack = (syscall_arg_t *)NULL;
+
+       uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());    
+
+       if (uthread)
+               stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
+
+       if (!stack)
+               return(0);
+
+       DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+       /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
+       val = (uint64_t)*(stack+argno);
+       DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+       return (val);
+}
+
+
+static uint64_t
+machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
+{
+#pragma unused(arg,id,parg,aframes)     /* __APPLE__ */
+       uint64_t val = 0;
+       syscall_arg_t *stack = (syscall_arg_t *)NULL;
+
+       uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
+       
+       if (uthread)
+               stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
+       
+       if (!stack)
+               return(0);
+
+       DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+       /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
+       val = (uint64_t)*(stack+argno); 
+       DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+       return (val);
+}
+
index 915ed25618d5b66e60c42c118e5c737d7721b801..aeab1de4e5503f5e0ca24543195749ee4367fdf6 100644 (file)
@@ -71,9 +71,9 @@ extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
 #else
 extern void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t,
-    uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+    uint64_t, uint64_t, uint64_t);
 extern void systrace_stub(dtrace_id_t, uint64_t, uint64_t,
-    uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+    uint64_t, uint64_t, uint64_t);
 
 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
 
index b7de69df7ad0b63c71a8e60c07a6f5c1febd12ec..bff6f72322b7a8dc0bfe655fecaf217a08467346 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1997-2012 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -121,6 +121,7 @@ extern d_ioctl_t    volioctl;
 #endif
 
 extern d_open_t                cttyopen;
+extern d_close_t       cttyclose;
 extern d_read_t                cttyread;
 extern d_write_t       cttywrite;
 extern d_ioctl_t       cttyioctl;
@@ -201,9 +202,9 @@ struct cdevsw       cdevsw[] =
     },
     NO_CDEVICE,                                                                /* 1*/
     {
-       cttyopen,       nullclose,      cttyread,       cttywrite,      /* 2*/
+       cttyopen,       cttyclose,      cttyread,       cttywrite,      /* 2*/
        cttyioctl,      nullstop,       nullreset,      0,              cttyselect,
-       eno_mmap,       eno_strat,      eno_getc,       eno_putc,       D_TTY
+       eno_mmap,       eno_strat,      eno_getc,       eno_putc,       D_TTY | D_TRACKCLOSE
     },
     {
        nullopen,       nullclose,      mmread,         mmwrite,        /* 3*/
@@ -307,7 +308,7 @@ isdisk(dev_t dev, int type)
                }
                /* FALL THROUGH */
        case VBLK:
-               if (bdevsw[maj].d_type == D_DISK) {
+               if ((D_TYPEMASK & bdevsw[maj].d_type) == D_DISK) {
                        return (1);
                }
                break;
@@ -324,7 +325,7 @@ static int chrtoblktab[] = {
        /*  8 */        NODEV,          /*  9 */        NODEV,
        /* 10 */        NODEV,          /* 11 */        NODEV,
        /* 12 */        NODEV,          /* 13 */        NODEV,
-       /* 14 */        6,              /* 15 */        NODEV,
+       /* 14 */        NODEV,          /* 15 */        NODEV,
        /* 16 */        NODEV,          /* 17 */        NODEV,
        /* 18 */        NODEV,          /* 19 */        NODEV,
        /* 20 */        NODEV,          /* 21 */        NODEV,
@@ -337,7 +338,7 @@ static int chrtoblktab[] = {
        /* 34 */        NODEV,          /* 35 */        NODEV,
        /* 36 */        NODEV,          /* 37 */        NODEV,
        /* 38 */        NODEV,          /* 39 */        NODEV,
-       /* 40 */        NODEV,          /* 41 */        1,
+       /* 40 */        NODEV,          /* 41 */        NODEV,
        /* 42 */        NODEV,          /* 43 */        NODEV,
        /* 44 */        NODEV,
 };
index 88e789fce5c1d8d70197676c253d341e12c4ba95..bdb17702849f0272ea9ee6a40b175a84a2115b31 100644 (file)
@@ -165,6 +165,15 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
        }
 }
 
+/*
+ * Initialization
+ */
+void
+dtrace_isa_init(void)
+{
+       return;
+}
+
 /*
  * Runtime and ABI
  */
index baec24f8353f6d2c5488638c93489a47e5157c79..ef45ffacd67bd7c6322493a49bd6ffc673bac3b9 100644 (file)
@@ -1533,7 +1533,13 @@ __user_syms_provide_module(void *arg, struct modctl *ctl)
        dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
        if (module_symbols) {
                for (i=0; i<module_symbols->dtmodsyms_count; i++) {
+
+                       /* 
+                        * symbol->dtsym_addr (the symbol address) passed in from
+                        * user space, is already slid for both kexts and kernel.
+                        */
                        dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
+
                        char* name = symbol->dtsym_name;
                        
                        /* Lop off omnipresent leading underscore. */                   
@@ -1543,8 +1549,8 @@ __user_syms_provide_module(void *arg, struct modctl *ctl)
                        /*
                         * We're only blacklisting functions in the kernel for now.
                         */
-                       if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
-                               continue;
+                        if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
+                               continue;
                        
                        __provide_probe_64(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
                }
index c77af1d8be5f5878f556b6ea92a67c649d68d7a2..79bd890e2fedeb10db81ecdf1945778855adaa24 100644 (file)
@@ -40,8 +40,6 @@
 #include       <machine/exec.h>
 #include       <machine/machine_routines.h>
 
-extern int bootarg_no64exec;   /* bsd_init.c */
-
 /**********************************************************************
  * Routine:    grade_binary()
  *
@@ -58,7 +56,7 @@ grade_binary(cpu_type_t exectype, __unused cpu_subtype_t execsubtype)
        case CPU_TYPE_POWERPC:          /* via translator */
                return 1;
        case CPU_TYPE_X86_64:           /* native 64-bit */
-               return ((ml_is64bit() && !bootarg_no64exec) ? 2 : 0);
+               return (ml_is64bit() ? 2 : 0);
        default:                        /* all other binary types */
                return 0;
        }
index 4b45892954c3bd168d1244c9478f84c7d48c6a52..61f0d692994acbd0211779d6455621056fdd862f 100644 (file)
@@ -119,11 +119,15 @@ mmioctl(dev_t dev, u_long cmd, __unused caddr_t data,
 {
        int minnum = minor(dev);
 
-       if ((minnum == 0) || (minnum == 1))
-#if !defined(SECURE_KERNEL)
-               if (setup_kmem == 0) 
-                       return(EINVAL);
+       if (0 == minnum || 1 == minnum) {
+               /* /dev/mem and /dev/kmem */
+#if defined(SECURE_KERNEL)
+               return (ENODEV);
+#else
+               if (0 == setup_kmem) 
+                       return (EINVAL);
 #endif
+       }
 
        switch (cmd) {
        case FIONBIO:
index 3e001de2b7137237869615592418caa7f77b62ce..c2d7de7ff03b8273ac42a45975fec04361d4b8db 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -36,6 +36,7 @@
 #include <i386/ucode.h>
 #include <kern/clock.h>
 #include <libkern/libkern.h>
+#include <i386/lapic.h>
 
 static int
 _i386_cpu_info SYSCTL_HANDLER_ARGS
@@ -119,6 +120,7 @@ cpu_xsave SYSCTL_HANDLER_ARGS
     return _i386_cpu_info(oidp, ptr, arg2, req);
 }
 
+
 static int
 cpu_features SYSCTL_HANDLER_ARGS
 {
@@ -280,6 +282,26 @@ misc_interrupt_latency_max(__unused struct sysctl_oid *oidp, __unused void *arg1
        return error;
 }
 
+/*
+ * Triggers a machine-check exception - for a suitably configured kernel only.
+ */
+extern void mca_exception_panic(void);
+static int
+misc_machine_check_panic(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       int changed = 0, error;
+       char buf[128];
+       buf[0] = '\0';
+
+       error = sysctl_io_string(req, buf, sizeof(buf), 0, &changed);
+
+       if (error == 0 && changed) {
+               mca_exception_panic();
+       }
+       return error;
+}
+
+
 SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "CPU info");
 
@@ -660,6 +682,15 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, ucupdate,
                        CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0,
             cpu_ucode_update, "S", "Microcode update interface");
 
+static const uint32_t apic_timer_vector = (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT);
+static const uint32_t apic_IPI_vector = (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_INTERPROCESSOR_INTERRUPT);
+
+SYSCTL_NODE(_machdep, OID_AUTO, vectors, CTLFLAG_RD | CTLFLAG_LOCKED, 0,
+       "Interrupt vector assignments");
+
+SYSCTL_UINT     (_machdep_vectors, OID_AUTO, timer, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (uint32_t *)&apic_timer_vector, 0, "");
+SYSCTL_UINT     (_machdep_vectors, OID_AUTO, IPI, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (uint32_t *)&apic_IPI_vector, 0, "");
+
 uint64_t pmap_pv_hashlist_walks;
 uint64_t pmap_pv_hashlist_cnts;
 uint32_t pmap_pv_hashlist_max;
@@ -709,6 +740,13 @@ SYSCTL_PROC(_machdep_misc, OID_AUTO, panic_restart_timeout,
            0, 0,
            panic_set_restart_timeout, "I", "Panic restart timeout in seconds");
 
-SYSCTL_PROC(_machdep_misc, OID_AUTO, interrupt_latency_max, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 
+SYSCTL_PROC(_machdep_misc, OID_AUTO, interrupt_latency_max,
+           CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 
            0, 0,
            misc_interrupt_latency_max, "A", "Maximum Interrupt latency");
+
+SYSCTL_PROC(_machdep_misc, OID_AUTO, machine_check_panic,
+           CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 
+           0, 0,
+           misc_machine_check_panic, "A", "Machine-check exception test");
+
index 7a849ca31074be2b592fc8a81d460321b18db873..e8494ca4eb4b5c0742a8c4eab7d494b197bac71d 100644 (file)
@@ -167,8 +167,9 @@ unix_syscall(x86_saved_state_t *state)
                if (__probable(code != 180)) {
                        int *ip = (int *)vt;
 
-                       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
-                                     *ip, *(ip+1), *(ip+2), *(ip+3), 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+                               *ip, *(ip+1), *(ip+2), *(ip+3), 0);
                }
                mungerp = callp->sy_arg_munge32;
 
@@ -182,7 +183,8 @@ unix_syscall(x86_saved_state_t *state)
                if (mungerp != NULL)
                        (*mungerp)(NULL, vt);
        } else
-               KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                       BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
                        0, 0, 0, 0, 0);
 
        /*
@@ -256,8 +258,9 @@ unix_syscall(x86_saved_state_t *state)
                throttle_lowpri_io(TRUE);
        }
        if (__probable(code != 180))
-               KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
-                                     error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                       BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
+                       error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
 
        if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
                pal_execve_return(thread);
@@ -328,8 +331,9 @@ unix_syscall64(x86_saved_state_t *state)
                if (code != 180) {
                        uint64_t *ip = (uint64_t *)uargp;
 
-                       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
-                                       (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                               BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+                               (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
                }
                assert(callp->sy_narg <= 8);
 
@@ -354,8 +358,9 @@ unix_syscall64(x86_saved_state_t *state)
                        goto unsafe;
                }
        } else
-               KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
-                                     0, 0, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+                       0, 0, 0, 0, 0);
 unsafe:
 
        /*
@@ -453,8 +458,9 @@ unsafe:
                throttle_lowpri_io(TRUE);
        }
        if (__probable(code != 180))
-               KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
-                                     error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                       BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
+                       error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
 
        thread_exception_return();
        /* NOTREACHED */
@@ -599,8 +605,9 @@ unix_syscall_return(int error)
                throttle_lowpri_io(TRUE);
        }
        if (code != 180)
-               KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
-                                     error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                       BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
+                       error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
 
        thread_exception_return();
        /* NOTREACHED */
index 4292d6515f660fb138d39f7bdb0130b1f337fe2d..4f31f83e5a7914d19b21bbd014282b6f8691ca95 100644 (file)
@@ -102,9 +102,10 @@ struct sigframe32 {
 
 /*
  * NOTE: Source and target may *NOT* overlap!
+ * XXX: Unify with bsd/kern/kern_exit.c
  */
 static void
-siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out)
+siginfo_user_to_user32_x86(user_siginfo_t *in, user32_siginfo_t *out)
 {
        out->si_signo   = in->si_signo;
        out->si_errno   = in->si_errno;
@@ -120,7 +121,7 @@ siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out)
 }
 
 static void
-siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out)
+siginfo_user_to_user64_x86(user_siginfo_t *in, user64_siginfo_t *out)
 {
        out->si_signo   = in->si_signo;
        out->si_errno   = in->si_errno;
@@ -521,7 +522,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                
                bzero((caddr_t)&sinfo64_user64, sizeof(sinfo64_user64));
                          
-               siginfo_user_to_user64(&sinfo64,&sinfo64_user64);
+               siginfo_user_to_user64_x86(&sinfo64,&sinfo64_user64);
 
 #if CONFIG_DTRACE
         bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo));
@@ -560,7 +561,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
 
                bzero((caddr_t)&sinfo32, sizeof(sinfo32));
 
-               siginfo_user_to_user32(&sinfo64,&sinfo32);
+               siginfo_user_to_user32_x86(&sinfo64,&sinfo32);
 
 #if CONFIG_DTRACE
         bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo));
index c425c7e08abbbd8553657b08b6ea934c3d9b88e2..58fecce01b4ae9b9dc857cf9e932c3ad4f9a7e86 100644 (file)
@@ -113,7 +113,7 @@ static int                          mdevrw(dev_t dev, struct uio *uio, int ioflag);
 
 static char *                  nonspace(char *pos, char *end);
 static char *                  getspace(char *pos, char *end);
-static char *                  cvtnum(char *pos, char *end, unsigned int *num);
+static char *                  cvtnum(char *pos, char *end, uint64_t *num);
 
 #endif /* CONFIG_MEMDEV_INSECURE */
 
@@ -436,8 +436,8 @@ void mdevinit(__unused int the_cnt) {
 #ifdef CONFIG_MEMDEV_INSECURE
 
        int devid, phys;
-       ppnum_t base;
-       unsigned int size;
+       uint64_t base;
+       uint64_t size;
        char *ba, *lp;
        dev_t dev;
        
@@ -476,7 +476,7 @@ void mdevinit(__unused int the_cnt) {
                        if((ba[0] != ' ') && (ba[0] != 0)) continue;    /* End must be null or space */
                }
                
-               dev = mdevadd(devid, base >> 12, size >> 12, phys);     /* Go add the device */ 
+               dev = mdevadd(devid, base >> 12, (unsigned)size >> 12, phys);   /* Go add the device */ 
        }
 
 #endif /* CONFIG_MEMDEV_INSECURE */
@@ -509,7 +509,7 @@ char *getspace(char *pos, char *end) {                                      /* Find next non-space in string */
        }
 }
 
-char *cvtnum(char *pos, char *end, unsigned int *num) {                /* Convert to a number */
+char *cvtnum(char *pos, char *end, uint64_t *num) {            /* Convert to a number */
 
        int rad, dig;
        
index c29e9f877fec0e9672e4ef05787cbc08ffff21b6..c081bd20904ca64b7dc49b091f1cb734eac98ad6 100644 (file)
@@ -56,7 +56,6 @@
 #include <dev/random/YarrowCoreLib/include/yarrow.h>
 
 #include <libkern/OSByteOrder.h>
-#include <libkern/OSAtomic.h>
 
 #include <mach/mach_time.h>
 #include <machine/machine_routines.h>
@@ -102,14 +101,13 @@ static struct cdevsw random_cdevsw =
 
 
 /* Used to detect whether we've already been initialized */
-static UInt8 gRandomInstalled = 0;
+static int gRandomInstalled = 0;
 static PrngRef gPrngRef;
 static int gRandomError = 1;
 static lck_grp_t *gYarrowGrp;
 static lck_attr_t *gYarrowAttr;
 static lck_grp_attr_t *gYarrowGrpAttr;
 static lck_mtx_t *gYarrowMutex = 0;
-static UInt8 gYarrowInitializationLock = 0;
 
 #define RESEED_TICKS 50 /* how long a reseed operation can take */
 
@@ -309,27 +307,6 @@ PreliminarySetup(void)
 {
     prng_error_status perr;
 
-       /* Multiple threads can enter this as a result of an earlier
-        * check of gYarrowMutex.  We make sure that only one of them
-        * can enter at a time.  If one of them enters and discovers
-        * that gYarrowMutex is no longer NULL, we know that another
-        * thread has initialized the Yarrow state and we can exit.
-        */
-       
-       /* The first thread that enters this function will find
-        * gYarrowInitializationLock set to 0.  It will atomically
-        * set the value to 1 and, seeing that it was zero, drop
-        * out of the loop.  Other threads will see that the value is
-        * 1 and continue to loop until we are initialized.
-     */
-
-       while (OSTestAndSet(0, &gYarrowInitializationLock)); /* serialize access to this function */
-       
-       if (gYarrowMutex) {
-               /*  we've already been initialized, clear and get out */
-               goto function_exit;
-       }
-
     /* create a Yarrow object */
     perr = prngInitialize(&gPrngRef);
     if (perr != 0) {
@@ -344,8 +321,6 @@ PreliminarySetup(void)
     char buffer [16];
 
     /* get a little non-deterministic data as an initial seed. */
-       /* On OSX, securityd will add much more entropy as soon as it */
-       /* comes up.  On iOS, entropy is added with each system interrupt. */
     microtime(&tt);
 
     /*
@@ -359,7 +334,7 @@ PreliminarySetup(void)
     if (perr != 0) {
         /* an error, complain */
         printf ("Couldn't seed Yarrow.\n");
-        goto function_exit;
+        return;
     }
     
     /* turn the data around */
@@ -375,10 +350,6 @@ PreliminarySetup(void)
     gYarrowMutex   = lck_mtx_alloc_init(gYarrowGrp, gYarrowAttr);
        
        fips_initialize ();
-
-function_exit:
-       /* allow other threads to figure out whether or not we have been initialized. */
-       gYarrowInitializationLock = 0;
 }
 
 const Block kKnownAnswer = {0x92, 0xb4, 0x04, 0xe5, 0x56, 0x58, 0x8c, 0xed, 0x6c, 0x1a, 0xcd, 0x4e, 0xbf, 0x05, 0x3f, 0x68, 0x09, 0xf7, 0x3a, 0x93};
@@ -413,11 +384,14 @@ random_init(void)
 {
        int ret;
 
-       if (OSTestAndSet(0, &gRandomInstalled)) {
-               /* do this atomically so that it works correctly with
-                multiple threads */
+       if (gRandomInstalled)
                return;
-       }
+
+       /* install us in the file system */
+       gRandomInstalled = 1;
+
+       /* setup yarrow and the mutex */
+       PreliminarySetup();
 
        ret = cdevsw_add(RANDOM_MAJOR, &random_cdevsw);
        if (ret < 0) {
@@ -435,9 +409,6 @@ random_init(void)
         */
        devfs_make_node(makedev (ret, 1), DEVFS_CHAR,
                UID_ROOT, GID_WHEEL, 0666, "urandom", 0);
-
-       /* setup yarrow and the mutex if needed*/
-       PreliminarySetup();
 }
 
 int
index f167a175236f20923b1a0c304364c26f0a31cb01..a1d8f5200890c20cf367346e9e0cda4bf2c12035 100644 (file)
@@ -157,11 +157,7 @@ bsd_startupearly(void)
 
 #if SOCKETS
        {
-#if CONFIG_USESOCKTHRESHOLD
-               static const unsigned int       maxspace = 64 * 1024;
-#else
                static const unsigned int       maxspace = 128 * 1024;
-#endif
                int             scale;
 
                nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES;
@@ -303,7 +299,6 @@ done:
 #if defined(__LP64__)
 extern int tcp_tcbhashsize;
 extern int max_cached_sock_count;
-void IOSleep(int);
 #endif 
 
 
index 114fcecc71159a77083557256610ad51c2e050f9..ec8db386457223d931d245cbfb661908c0b9c2c2 100644 (file)
 #include <hfs/hfscommon/headers/HybridAllocator.h>
 #endif
 
+#if CONFIG_PROTECT
+/* Forward declare the cprotect struct */
+struct cprotect;
+#endif
+
 /*
  *     Just reported via MIG interface.
  */
@@ -144,6 +149,7 @@ typedef struct hfsmount {
        /* Physical Description */
        u_int32_t     hfs_logical_block_size;   /* Logical block size of the disk as reported by ioctl(DKIOCGETBLOCKSIZE), always a multiple of 512 */
        daddr64_t     hfs_logical_block_count;  /* Number of logical blocks on the disk */
+       u_int64_t     hfs_logical_bytes;        /* Number of bytes on the disk device this HFS is mounted on (blockcount * blocksize) */
        daddr64_t     hfs_alt_id_sector;        /* location of alternate VH/MDB */
        u_int32_t     hfs_physical_block_size;  /* Physical block size of the disk as reported by ioctl(DKIOCGETPHYSICALBLOCKSIZE) */ 
        u_int32_t     hfs_log_per_phys;         /* Number of logical blocks per physical block size */
@@ -320,6 +326,11 @@ typedef struct hfsmount {
        u_int32_t               hfs_resize_blocksmoved;
        u_int32_t               hfs_resize_totalblocks;
        u_int32_t               hfs_resize_progress;
+#if CONFIG_PROTECT
+       struct cprotect *hfs_resize_cpentry;
+       u_int16_t               hfs_running_cp_major_vers;
+#endif
+
 
        /* Per mount cnode hash variables: */
        lck_mtx_t      hfs_chash_mutex; /* protects access to cnode hash table */
@@ -438,9 +449,11 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
  */
 #define HFS_RDONLY_DOWNGRADE      0x80000
 #define HFS_DID_CONTIG_SCAN      0x100000
+#define HFS_UNMAP                0x200000
 #define HFS_SSD                                         0x400000
 
 
+
 /* Macro to update next allocation block in the HFS mount structure.  If 
  * the HFS_SKIP_UPDATE_NEXT_ALLOCATION is set, do not update 
  * nextAllocation block.
@@ -586,10 +599,10 @@ int hfs_vnop_readdirattr(struct vnop_readdirattr_args *);  /* in hfs_attrlist.c
 
 int hfs_vnop_inactive(struct vnop_inactive_args *);        /* in hfs_cnode.c */
 int hfs_vnop_reclaim(struct vnop_reclaim_args *);          /* in hfs_cnode.c */
+
 int hfs_set_backingstore (struct vnode *vp, int val);                          /* in hfs_cnode.c */
 int hfs_is_backingstore (struct vnode *vp, int *val);          /* in hfs_cnode.c */
 
-
 int hfs_vnop_link(struct vnop_link_args *);                /* in hfs_link.c */
 
 int hfs_vnop_lookup(struct vnop_lookup_args *);            /* in hfs_lookup.c */
index 3f1e6da64856a922b22f7c40b9124c0698814aea..2cb73b6b5f0fd56d3ce77c0998577dcc50dd70bf 100644 (file)
@@ -139,7 +139,7 @@ hfs_vnop_readdirattr(ap)
                return (EINVAL);
        }
 
-       if (VTOC(dvp)->c_flags & UF_COMPRESSED) {
+       if (VTOC(dvp)->c_bsdflags & UF_COMPRESSED) {
                int compressed = hfs_file_is_compressed(VTOC(dvp), 0);  /* 0 == take the cnode lock */
 
                if (!compressed) {
index c40ba1e5688af35eca631b8e0b36078c790563f7..cb72bce1e8f1072aec4aa4587c22eb152f78abdc 100644 (file)
@@ -65,9 +65,23 @@ struct attrblock {
         ATTR_CMN_FLAGS | ATTR_CMN_USERACCESS |         \
         ATTR_CMN_FILEID | ATTR_CMN_PARENTID )
 
+#define HFS_ATTR_CMN_SEARCH_VALID      \
+       (ATTR_CMN_NAME | ATTR_CMN_OBJID |       \
+        ATTR_CMN_PAROBJID | ATTR_CMN_CRTIME |  \
+        ATTR_CMN_MODTIME | ATTR_CMN_CHGTIME |  \
+        ATTR_CMN_ACCTIME | ATTR_CMN_BKUPTIME | \
+        ATTR_CMN_FNDRINFO | ATTR_CMN_OWNERID | \
+        ATTR_CMN_GRPID | ATTR_CMN_ACCESSMASK | \
+        ATTR_CMN_FILEID | ATTR_CMN_PARENTID ) 
+
+
+
 #define HFS_ATTR_DIR_VALID                             \
        (ATTR_DIR_LINKCOUNT | ATTR_DIR_ENTRYCOUNT | ATTR_DIR_MOUNTSTATUS)
 
+#define HFS_ATTR_DIR_SEARCH_VALID      \
+       (ATTR_DIR_ENTRYCOUNT)
+
 #define HFS_ATTR_FILE_VALID                              \
        (ATTR_FILE_LINKCOUNT |ATTR_FILE_TOTALSIZE |       \
         ATTR_FILE_ALLOCSIZE | ATTR_FILE_IOBLOCKSIZE |    \
@@ -75,6 +89,9 @@ struct attrblock {
         ATTR_FILE_DATALENGTH | ATTR_FILE_DATAALLOCSIZE | \
         ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE)
 
+#define HFS_ATTR_FILE_SEARCH_VALID             \
+       (ATTR_FILE_DATALENGTH | ATTR_FILE_DATAALLOCSIZE |       \
+        ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE )
 
 extern int hfs_attrblksize(struct attrlist *attrlist);
 
index f69c780c08ac76292e0ec23c8f2624dacfafcd3a..7e5182cd9996c82bd7f3fae46d712db108660021 100644 (file)
@@ -865,6 +865,18 @@ again:
        lck_mtx_unlock(&hfsmp->hfs_mutex);
 
        (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+
+       if (intrans) {
+               hfs_end_transaction(hfsmp);
+               intrans = 0;
+       }
+
+       /* Initialize the vnode for virtual attribute data file */
+       result = init_attrdata_vnode(hfsmp);
+       if (result) {
+               printf("hfs_create_attr_btree: init_attrdata_vnode() error=%d\n", result); 
+       }
+
 exit:
        if (vp) {
                hfs_unlock(VTOC(vp));
index 673adbb9181cf3c18c2eddeb1747932c5533c25d..bc2e5959e6e3e4aa471fe5359e17fc1dd6825c2b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -575,7 +575,10 @@ cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
        cnid_t fileID;
        u_int32_t prefixlen;
        int result;
-       int extlen1, extlen2;
+       u_int8_t utf8[NAME_MAX + 1];
+       u_int32_t utf8len;
+       u_int16_t unicode[kHFSPlusMaxFileNameChars + 1];
+       size_t unicodelen;
        
        if (wantrsrc)
                return (ENOENT);
@@ -598,19 +601,26 @@ cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
        if (descp->cd_parentcnid != outdescp->cd_parentcnid)
                goto falsematch;
 
-       if (((u_int16_t)outdescp->cd_namelen < prefixlen) ||
-               bcmp(outdescp->cd_nameptr, descp->cd_nameptr, prefixlen-6) != 0)
-               goto falsematch;
-
-       extlen1 = CountFilenameExtensionChars(descp->cd_nameptr, descp->cd_namelen);
-       extlen2 = CountFilenameExtensionChars(outdescp->cd_nameptr, outdescp->cd_namelen);
-       if (extlen1 != extlen2)
+       /*
+        * Compare the mangled version of file name looked up from the 
+        * disk with the mangled name provided by the user.  Note that 
+        * this comparison is case-sensitive, which should be fine
+        * since we're trying to prevent user space from constructing
+        * a mangled name that differs from the one they'd get from the
+        * file system.
+        */
+       result = utf8_decodestr(outdescp->cd_nameptr, outdescp->cd_namelen,
+                       unicode, &unicodelen, sizeof(unicode), ':', 0);
+       if (result) {
                goto falsematch;
-
-       if (bcmp(outdescp->cd_nameptr + (outdescp->cd_namelen - extlen2),
-                       descp->cd_nameptr + (descp->cd_namelen - extlen1),
-                       extlen1) != 0)
+       }
+       result = ConvertUnicodeToUTF8Mangled(unicodelen, unicode, 
+                       sizeof(utf8), &utf8len, utf8, fileID);
+       if ((result != 0) || 
+           ((u_int16_t)descp->cd_namelen != utf8len) ||
+           (bcmp(descp->cd_nameptr, utf8, utf8len) != 0)) { 
                goto falsematch;
+       }
 
        return (0);
 
@@ -657,6 +667,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h
        if (!std_hfs) {
                parentid = keyp->hfsPlus.parentID;
        }
+       
        encoding = getencoding(recp);
        hint = iterator->hint.nodeNum;
 
@@ -707,7 +718,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h
                                /* Update the inode number for this hard link */
                                attrp->ca_linkref = ilink;
                        }
-
+                       
                        /* 
                         * Set kHFSHasLinkChainBit for hard links, and reset it for all 
                         * other items.  Also set linkCount to 1 for regular files.
@@ -728,12 +739,10 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h
                                }
                        } else { 
                                /* Make sure that this non-hard link (regular) record is not 
-                                * an inode record or a valid hard link being that is not 
-                                * resolved for volume resize purposes.  We do not want to 
-                                * reset the hard link bit or reset link count on these records.
+                                * an inode record that was looked up and we do not end up 
+                                * reseting the hard link bit on it.
                                 */
-                               if (!(flags & HFS_LOOKUP_HARDLINK) && 
-                                   (parentid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && 
+                               if ((parentid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && 
                                    (parentid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid)) {
                                        /* This is not a hard link or inode and the link count bit was set */
                                        if (attrp->ca_recflags & kHFSHasLinkChainMask) {
@@ -1111,10 +1120,11 @@ cat_rename (
         * When moving a directory, make sure its a valid move.
         */
        if (directory && (from_cdp->cd_parentcnid != to_cdp->cd_parentcnid)) {
-               struct BTreeIterator iterator;
+               struct BTreeIterator *dir_iterator = NULL;
+
                cnid_t cnid = from_cdp->cd_cnid;
                cnid_t pathcnid = todir_cdp->cd_parentcnid;
-
+       
                /* First check the obvious ones */
                if (cnid == fsRtDirID  ||
                    cnid == to_cdp->cd_parentcnid  ||
@@ -1122,25 +1132,33 @@ cat_rename (
                        result = EINVAL;
                        goto exit;
                }
-               bzero(&iterator, sizeof(iterator));
+               /* now allocate the dir_iterator */
+               MALLOC (dir_iterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+               if (dir_iterator == NULL) {
+                       return ENOMEM; 
+               }
+               bzero(dir_iterator, sizeof(*dir_iterator));
+                       
                /*
                 * Traverse destination path all the way back to the root
                 * making sure that source directory is not encountered.
                 *
                 */
                while (pathcnid > fsRtDirID) {
-                       buildthreadkey(pathcnid, std_hfs,
-                                       (CatalogKey *)&iterator.key);
-                       result = BTSearchRecord(fcb, &iterator, &btdata,
-                                       &datasize, NULL);
-                       if (result) goto exit;
-                       
+                       buildthreadkey(pathcnid, std_hfs, (CatalogKey *)&dir_iterator->key);
+                       result = BTSearchRecord(fcb, dir_iterator, &btdata, &datasize, NULL);
+                       if (result) {
+                               FREE(dir_iterator, M_TEMP);
+                               goto exit;
+                       }
                        pathcnid = getparentcnid(recp);
                        if (pathcnid == cnid || pathcnid == 0) {
                                result = EINVAL;
+                               FREE(dir_iterator, M_TEMP);
                                goto exit;
                        }
                }
+               FREE(dir_iterator, M_TEMP);
        }
 
        /*
@@ -1783,7 +1801,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st
                         * a hardlink.  In this case, update the linkcount from the cat_attr passed in.
                         */
                        if ((descp->cd_cnid != attrp->ca_fileid) || (attrp->ca_linkcount > 1 ) ||
-                                       (file->hl_linkCount > 1)) {
+                               (file->hl_linkCount > 1)) {
                                file->hl_linkCount = attrp->ca_linkcount;
                        }
                }
@@ -1966,7 +1984,7 @@ cat_update_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevli
        /* Create an iterator for use by us temporarily */
        MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
        bzero(iterator, sizeof(*iterator));
-
+       
        result = getkey(hfsmp, linkfileid, (CatalogKey *)&iterator->key);
        if (result == 0) {
                result = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr)update_siblinglinks_callback, &state);
@@ -1974,7 +1992,7 @@ cat_update_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevli
        } else {
                printf("hfs: cat_update_siblinglinks: couldn't resolve cnid %d\n", linkfileid);
        }
-
+       
        FREE (iterator, M_TEMP);
        return MacToVFSError(result);
 }
@@ -2041,16 +2059,13 @@ cat_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevl
        /* Create an iterator for use by us temporarily */
        MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
        bzero(iterator, sizeof(*iterator));
-
-
+       
        if ((result = getkey(hfsmp, linkfileid, (CatalogKey *)&iterator->key))) {
-               printf("hfs: cat_lookup_siblinglinks: getkey for %d failed %d\n", linkfileid, result);
                goto exit;
        }
        BDINIT(btdata, &file);
 
        if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) {
-               printf("hfs: cat_lookup_siblinglinks: cannot find %d\n", linkfileid);
                goto exit;
        }
        /* The prev/next chain is only valid when kHFSHasLinkChainMask is set. */
@@ -2737,7 +2752,7 @@ typedef struct linkinfo linkinfo_t;
 
 /* State information for the getdirentries_callback function. */
 struct packdirentry_state {
-       int            cbs_extended;
+       int            cbs_flags;               /* VNODE_READDIR_* flags */
        u_int32_t      cbs_parentID;
        u_int32_t      cbs_index;
        uio_t          cbs_uio;
@@ -2814,7 +2829,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp,
                 * especially since it's closer to the return of this function.
                 */              
                        
-               if (state->cbs_extended) {
+               if (state->cbs_flags & VNODE_READDIR_EXTENDED) {
                        /* The last record has not been returned yet, so we 
                         * want to stop after packing the last item 
                         */
@@ -2832,16 +2847,26 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp,
                }
        }
 
-       if (state->cbs_extended) {
+       if (state->cbs_flags & VNODE_READDIR_EXTENDED) {
                entry = state->cbs_direntry;
                nameptr = (u_int8_t *)&entry->d_name[0];
-               maxnamelen = NAME_MAX;
+               if (state->cbs_flags & VNODE_READDIR_NAMEMAX) {
+                       /*
+                        * The NFS server sometimes needs to make filenames fit in
+                        * NAME_MAX bytes (since its client may not be able to
+                        * handle a longer name).  In that case, NFS will ask us
+                        * to mangle the name to keep it short enough.
+                        */
+                       maxnamelen = NAME_MAX;
+               } else {
+                       maxnamelen = sizeof(entry->d_name);
+               }
        } else {
                nameptr = (u_int8_t *)&catent.d_name[0];
-               maxnamelen = NAME_MAX;
+               maxnamelen = sizeof(catent.d_name);
        }
 
-       if (state->cbs_extended && stop_after_pack) {
+       if ((state->cbs_flags & VNODE_READDIR_EXTENDED) && stop_after_pack) {
                /* The last item returns a non-zero invalid cookie */
                cnid = INT_MAX;         
        } else {
@@ -2951,7 +2976,7 @@ encodestr:
                }
        }
 
-       if (state->cbs_extended) {
+       if (state->cbs_flags & VNODE_READDIR_EXTENDED) {
                /*
                 * The index is 1 relative and includes "." and ".."
                 *
@@ -2983,7 +3008,7 @@ encodestr:
                return (0);     /* stop */
        }
 
-       if (!state->cbs_extended || state->cbs_hasprevdirentry) {
+       if (!(state->cbs_flags & VNODE_READDIR_EXTENDED) || state->cbs_hasprevdirentry) {
                state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio);
                if (state->cbs_result == 0) {
                        ++state->cbs_index;
@@ -3047,7 +3072,7 @@ encodestr:
        }
 
        /* Fill the direntry to be used the next time */
-       if (state->cbs_extended) {      
+       if (state->cbs_flags & VNODE_READDIR_EXTENDED) {        
                if (stop_after_pack) {
                        state->cbs_eof = true;
                        return (0);     /* stop */
@@ -3167,8 +3192,8 @@ getdirentries_std_callback(const CatalogKey *ckp, const CatalogRecord *crp,
  * Pack a uio buffer with directory entries from the catalog
  */
 int
-cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint,
-                                 uio_t uio, int extended, int * items, int * eofflag)
+cat_getdirentries(struct hfsmount *hfsmp, u_int32_t entrycnt, directoryhint_t *dirhint,
+                                 uio_t uio, int flags, int * items, int * eofflag)
 {
        FCB* fcb;
        BTreeIterator * iterator;
@@ -3180,7 +3205,10 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
        int result;
        int index;
        int have_key;
-
+       int extended;
+       
+       extended = flags & VNODE_READDIR_EXTENDED;
+       
        if (extended && (hfsmp->hfs_flags & HFS_STANDARD)) {
                return (ENOTSUP);
        }
@@ -3189,7 +3217,7 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
        /*
         * Get a buffer for link info array, btree iterator and a direntry:
         */
-       maxlinks = MIN(entrycnt, uio_resid(uio) / SMALL_DIRENTRY_SIZE);
+       maxlinks = MIN(entrycnt, (u_int32_t)(uio_resid(uio) / SMALL_DIRENTRY_SIZE));
        bufsize = MAXPATHLEN + (maxlinks * sizeof(linkinfo_t)) + sizeof(*iterator);
        if (extended) {
                bufsize += 2*sizeof(struct direntry);
@@ -3197,7 +3225,7 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
        MALLOC(buffer, void *, bufsize, M_TEMP, M_WAITOK);
        bzero(buffer, bufsize);
 
-       state.cbs_extended = extended;
+       state.cbs_flags = flags;
        state.cbs_hasprevdirentry = false;
        state.cbs_previlinkref = 0;
        state.cbs_nlinks = 0;
@@ -3323,7 +3351,7 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
                 * dummy values to copy the last directory entry stored in 
                 * packdirentry_state 
                 */
-               if (state.cbs_extended && (result == fsBTRecordNotFoundErr)) {
+               if (extended && (result == fsBTRecordNotFoundErr)) {
                        CatalogKey ckp;
                        CatalogRecord crp;
 
index e8574e17d1863164a9783a14c602e935b3e4d25d..13aa865133c4c73664e8d2dc1ff997ea307ef18d 100644 (file)
@@ -323,7 +323,7 @@ extern int cat_update (     struct hfsmount *hfsmp,
 
 extern int cat_getdirentries(
                        struct hfsmount *hfsmp,
-                       int entrycnt,
+                       u_int32_t entrycnt,
                        directoryhint_t *dirhint,
                        uio_t uio,
                        int extended,
index 13c58bf5b93dfae8a7d01dbd0bcece2ca6212f18..2910c56126bf5a99cd0cdf2ff070516abd58cc40 100644 (file)
@@ -202,12 +202,12 @@ loop:
                 */
                if (!allow_deleted) {
                        if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
-                               if (!skiplock)
-                                               hfs_unlock(cp);
+                               if (!skiplock) {
+                                       hfs_unlock(cp);
+                               }
                                vnode_put(vp);
-       
                                return (NULL);
-                       }
+                       }                       
                }
                return (vp);
        }
@@ -342,12 +342,12 @@ loop_with_lock:
                                goto loop;
                }
                if (ncp) {
-                       /*
+                       /*
                         * someone else won the race to create
                         * this cnode and add it to the hash
                         * just dump our allocation
                         */
-                   FREE_ZONE(ncp, sizeof(struct cnode), M_HFSNODE);
+                       FREE_ZONE(ncp, sizeof(struct cnode), M_HFSNODE);
                        ncp = NULL;
                }
 
@@ -376,9 +376,8 @@ loop_with_lock:
                                vnode_put(vp);
                        } else {
                                hfs_chash_lock_spin(hfsmp);
-                       CLR(cp->c_hflag, H_ATTACH);
+                               CLR(cp->c_hflag, H_ATTACH);
                                *hflags &= ~H_ATTACH;
-
                                if (ISSET(cp->c_hflag, H_WAITING)) {
                                        CLR(cp->c_hflag, H_WAITING);
                                        wakeup((caddr_t)cp);
@@ -403,7 +402,8 @@ loop_with_lock:
 
        if (ncp == NULL) {
                hfs_chash_unlock(hfsmp);
-           MALLOC_ZONE(ncp, struct cnode *, sizeof(struct cnode), M_HFSNODE, M_WAITOK);
+
+               MALLOC_ZONE(ncp, struct cnode *, sizeof(struct cnode), M_HFSNODE, M_WAITOK);
                /*
                 * since we dropped the chash lock, 
                 * we need to go back and re-verify
index 016df24e0f6ee39cfeee98fe22151838680d49c5..970f2648eafc53586bee35e7893f68ddb8ae4078 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -37,6 +37,7 @@
 #include <sys/quota.h>
 #include <sys/kdebug.h>
 #include <libkern/OSByteOrder.h>
+#include <sys/buf_internal.h>
 
 #include <kern/locks.h>
 
@@ -59,11 +60,12 @@ static void  hfs_reclaim_cnode(struct cnode *);
 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
 static int hfs_isordered(struct cnode *, struct cnode *);
 
+extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
+
 __inline__ int hfs_checkdeleted (struct cnode *cp) {
        return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);  
 }
 
-
 /*
  * Function used by a special fcntl() that decorates a cnode/vnode that
  * indicates it is backing another filesystem, like a disk image.
@@ -240,8 +242,35 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
                (!ISSET(cp->c_flag, C_NOEXISTS)) &&
                (VTOF(vp)->ff_blocks) &&
                (reclaim == 0)) {
+               /* 
+                * Note that if content protection is enabled, then this is where we will
+                * attempt to issue IOs for all dirty regions of this file.  
+                *
+                * If we're called from hfs_vnop_inactive, all this means is at the time 
+                * the logic for deciding to call this function, there were not any lingering
+                * mmap/fd references for this file.  However, there is nothing preventing the system
+                * from creating a new reference in between the time that logic was checked
+                * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
+                * that there aren't any references is during vnop_reclaim.
+                */
                hfs_filedone(vp, ctx);
        }
+
+       /* 
+        * We're holding the cnode lock now.  Stall behind any shadow BPs that may
+        * be involved with this vnode if it is a symlink.  We don't want to allow 
+        * the blocks that we're about to release to be put back into the pool if there
+        * is pending I/O to them.
+        */
+       if (v_type == VLNK) {   
+               /* 
+                * This will block if the asynchronous journal flush is in progress.
+                * If this symlink is not being renamed over and doesn't have any open FDs,
+                * then we'll remove it from the journal's bufs below in kill_block.
+                */
+               buf_wait_for_shadow_io (vp, 0);
+       }
+
        /* 
         * Remove any directory hints or cached origins
         */
@@ -282,9 +311,44 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
        if ((v_type == VREG || v_type == VLNK) && 
                (cp->c_flag & C_DELETED) &&
                ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
-               
+
+               /* Start a transaction here.  We're about to change file sizes */
+               if (started_tr == 0) {
+                       if (hfs_start_transaction(hfsmp) != 0) {
+                               error = EINVAL;
+                               goto out;
+                       }
+                       else {
+                               started_tr = 1;
+                       }
+               }
+       
                /* Truncate away our own fork data. (Case A, B, C above) */
                if (VTOF(vp)->ff_blocks != 0) {
+                       
+                       /*
+                        * At this point, we have decided that this cnode is
+                        * suitable for full removal.  We are about to deallocate
+                        * its blocks and remove its entry from the catalog. 
+                        * If it was a symlink, then it's possible that the operation
+                        * which created it is still in the current transaction group
+                        * due to coalescing.  Take action here to kill the data blocks
+                        * of the symlink out of the journal before moving to 
+                        * deallocate the blocks.  We need to be in the middle of
+                        * a transaction before calling buf_iterate like this.
+                        * 
+                        * Note: we have to kill any potential symlink buffers out of 
+                        * the journal prior to deallocating their blocks.  This is so 
+                        * that we don't race with another thread that may be doing an 
+                        * an allocation concurrently and pick up these blocks. It could
+                        * generate I/O against them which could go out ahead of our journal
+                        * transaction.
+                        */
+
+                       if (hfsmp->jnl && vnode_islnk(vp)) {
+                               buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
+                       }
+
                        /*
                         * Since we're already inside a transaction,
                         * tell hfs_truncate to skip the ubc_setsize.
@@ -303,46 +367,85 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
                /* 
                 * Truncate away the resource fork, if we represent the data fork and
                 * it is the last fork.  That means, by definition, the rsrc fork is not in 
-                * core.  So we bring it into core, and then truncate it away. 
+                * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
+                * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
+                * to get rid of the resource fork's data. 
                 * 
                 * This is invoked via case A above only.
                 */
                if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
-                       struct vnode *rvp = NULLVP;
+                       struct cat_lookup_buffer *lookup_rsrc = NULL;
+                       struct cat_desc *desc_ptr = NULL;
+                       lockflags = 0;
+
+                       MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
+                       if (lookup_rsrc == NULL) {
+                               printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
+                               error = ENOMEM;
+                               goto out;
+                       }
+                       else {
+                               bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
+                       }
+
+                       if (cp->c_desc.cd_namelen == 0) {
+                               /* Initialize the rsrc descriptor for lookup if necessary*/
+                               MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
+                               
+                               lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
+                               lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
+                               lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+                               lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;  
+                               
+                               desc_ptr = &lookup_rsrc->lookup_desc;
+                       }
+                       else {
+                               desc_ptr = &cp->c_desc; 
+                       }
+
+                       lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+                       error = cat_lookup (hfsmp, desc_ptr, 1, (struct cat_desc *) NULL, 
+                                       (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
+
+                       hfs_systemfile_unlock (hfsmp, lockflags);
                        
-                       /* 
-                        * It is safe for us to pass FALSE to the argument can_drop_lock
-                        * on this call to hfs_vgetrsrc.  We know that the resource fork does not 
-                        * exist in core, so we'll have to go to the catalog to retrieve its 
-                        * information.  That will attach the resource fork vnode to our cnode. 
-                        */
-                       error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE, FALSE);
                        if (error) {
+                               FREE (lookup_rsrc, M_TEMP);
                                goto out;
                        }
+
                        /*
-                        * Defer the vnode_put and ubc_setsize on rvp until hfs_unlock().
-                        * 
-                        * By bringing the vnode into core above, we may force hfs_vnop_reclaim 
-                        * to only partially finish if that's what called us.  Bringing the 
-                        * resource fork into core results in a new rsrc vnode that will get 
-                        * immediately marked for termination below.  It will get recycled/reclaimed 
-                        * as soon as possible, but that could cause another round of inactive and reclaim. 
+                        * Make the filefork in our temporary struct look like a real 
+                        * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
+                        */
+                       rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
+                       lookup_rsrc->lookup_fork.ff_cp = cp;
+
+                       /* 
+                        * If there were no errors, then we have the catalog's fork information 
+                        * for the resource fork in question.  Go ahead and delete the data in it now.
                         */
-                       cp->c_flag |= C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE;
-                       error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 0, ctx);
+
+                       error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
+                       FREE(lookup_rsrc, M_TEMP);
+
                        if (error) {
                                goto out;
                        }
-                       
-                       /* 
-                        * Note that the following call to vnode_recycle is safe from within the
-                        * context of hfs_vnop_inactive or hfs_vnop_reclaim.  It is being invoked
-                        * on the RSRC fork vp (which is not our current vnode) As such, we hold 
-                        * an iocount on it and vnode_recycle will just add the MARKTERM bit at this
-                        * point.
+
+                       /*
+                        * This fileid's resource fork extents have now been fully deleted on-disk
+                        * and this CNID is no longer valid. At this point, we should be able to
+                        * zero out cp->c_blocks to indicate there is no data left in this file.
                         */
-                       vnode_recycle(rvp);  /* all done with this vnode */
+                       cp->c_blocks = 0;
+               }
+
+               /* End the transaction from the start of the file truncation segment */
+               if (started_tr) {
+                       hfs_end_transaction(hfsmp);
+                       started_tr = 0;
                }
        }
        
@@ -485,13 +588,44 @@ out:
         hfs_end_transaction(hfsmp);
         started_tr = 0;
     }
-       
+
+#if 0
+#if CONFIG_PROTECT
+       /* 
+        * cnode truncate lock and cnode lock are both held exclusive here. 
+        *
+        * Go ahead and flush the keys out if this cnode is the last fork
+        * and it is not class F.  Class F keys should not be purged because they only
+        * exist in memory and have no persistent keys.  Only do this 
+        * if we haven't already done it yet (maybe a vnode skipped inactive 
+        * and went straight to reclaim).  This function gets called from both reclaim and
+        * inactive, so it will happen first in inactive if possible.
+        * 
+        * We need to be mindful that all pending IO for this file has already been
+        * issued and completed before we bzero out the key.  This is because
+        * if it isn't, tossing the key here could result in garbage IO being
+        * written (by using the bzero'd key) if the writes are happening asynchronously.
+        * 
+        * In addition, class A files may have already been purged due to the 
+        * lock event occurring.
+        */
+       if (forkcount == 1) {
+               struct cprotect *entry = cp->c_cpentry;
+               if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) {
+                       if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
+                               cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
+                               bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
+                               bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
+                       }
+               }
+       }
+#endif
+#endif
        
        return error;   
 }
 
 
-
 /*
  * hfs_vnop_inactive
  *
@@ -600,6 +734,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
        struct rl_entry *invalid_range;
        off_t leof;
        u_int32_t blks, blocksize;
+       /* flags for zero-filling sparse ranges */
        int cluster_flags = IO_CLOSE;
        int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
 
@@ -611,6 +746,25 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
        if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
                return (0);
 
+#if CONFIG_PROTECT
+       /* 
+        * Figure out if we need to do synchronous IO. 
+        * 
+        * If the file represents a content-protected file, we may need
+        * to issue synchronous IO when we dispatch to the cluster layer.
+        * If we didn't, then the IO would go out to the disk asynchronously.
+        * If the vnode hits the end of inactive before getting reclaimed, the
+        * content protection keys would be wiped/bzeroed out, and we'd end up
+        * trying to issue the IO with an invalid key.  This will lead to file 
+        * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
+        * have completed (though they may be in the track cache).
+        */
+       if (cp_fs_protected(VTOVFS(vp))) {
+               cluster_flags |= IO_SYNC;
+               cluster_zero_flags |= IO_SYNC;
+       }
+#endif
+
        /* 
         * If we are being invoked from F_SWAPDATAEXTENTS, then we 
         * need to issue synchronous IO; Unless we are sure that all 
@@ -654,8 +808,10 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
        /*
         * Shrink the peof to the smallest size neccessary to contain the leof.
         */
-       if (blks < fp->ff_blocks)
+       if (blks < fp->ff_blocks) {
                (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
+       }
+
        hfs_unlock(cp);
        (void) cluster_push(vp, cluster_flags);
        hfs_lock(cp, HFS_FORCE_LOCK);
@@ -871,7 +1027,7 @@ hfs_getnewvnode(
        if (cp == NULL) {
                return (ENOENT);
        }
-       
+
        /* 
         * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the 
         * descriptor in the cnode as needed if the cnode represents a hardlink.  
@@ -913,7 +1069,7 @@ hfs_getnewvnode(
                                        vnode_put (*vpp);
                                        *vpp = NULL;
                                }
-
+                               
                                /*
                                 * If we raced with VNOP_RECLAIM for this vnode, the hash code could
                                 * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
@@ -925,18 +1081,23 @@ hfs_getnewvnode(
                                if (hflags) {
                                        hfs_chashwakeup(hfsmp, cp, hflags);
                                }
-
+                               
                                *out_flags = GNV_CAT_ATTRCHANGED;
                                return ERECYCLE;        
                        }
                        else {
-                               /* Otherwise, CNID != fileid. Go ahead and copy in the new descriptor */
+                               /* 
+                                * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor. 
+                                *
+                                * Replacing the descriptor here is fine because we looked up the item without
+                                * a vnode in hand before.  If a vnode existed, its identity must be attached to this
+                                * item.  We are not susceptible to the lookup fastpath issue at this point.
+                                */
                                replace_desc(cp, descp);
                        }
                }
        }
        
-       
        /* Check if we found a matching vnode */
        if (*vpp != NULL) {
                return (0);
@@ -1210,8 +1371,10 @@ hfs_getnewvnode(
        }
        
 #if CONFIG_PROTECT
-       if (!issystemfile && (*out_flags & GNV_NEW_CNODE))
+       /* Initialize the cp data structures. The key should be in place now. */
+       if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
                cp_entry_init(cp, mp);
+       }
 #endif
 
        *vpp = vp;
@@ -1264,7 +1427,7 @@ hfs_reclaim_cnode(struct cnode *cp)
        }
 #endif
 #if CONFIG_PROTECT
-       cp_entry_destroy(cp);
+       cp_entry_destroy(&cp->c_cpentry);
 #endif
        
        
@@ -1303,7 +1466,6 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
        if (dvp && cnp) {
                int lookup = 0;
                struct cat_fork fork;
-
                bzero(&cndesc, sizeof(cndesc));
                cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
                cndesc.cd_namelen = cnp->cn_namelen;
@@ -1327,6 +1489,7 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
                 */
                
                lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL);
+
                if ((lookup == 0) && (cnid == attr.ca_fileid)) {
                        stillvalid = 1;
                        *error = 0;
@@ -1394,6 +1557,7 @@ notvalid:
        return (stillvalid);
 }
 
+
 /*
  * Per HI and Finder requirements, HFS should add in the
  * date/time that a particular directory entry was added 
@@ -1408,11 +1572,11 @@ notvalid:
  */
 void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
        u_int8_t *finfo = NULL;
-       
+
        /* overlay the FinderInfo to the correct pointer, and advance */
        finfo = (u_int8_t*)attrp->ca_finderinfo;
        finfo = finfo + 16;
-       
+
        /* 
         * Make sure to write it out as big endian, since that's how
         * finder info is defined.  
@@ -1427,27 +1591,27 @@ void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
        else if (S_ISDIR(attrp->ca_mode)) {
                struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
                extinfo->date_added = OSSwapHostToBigInt32(dateadded);          
-               attrp->ca_recflags |= kHFSHasDateAddedMask; 
+                               attrp->ca_recflags |= kHFSHasDateAddedMask; 
        }
-    
        /* If it were neither directory/file, then we'd bail out */
        return;
 }
 
+
 u_int32_t hfs_get_dateadded (struct cnode *cp) {
        u_int8_t *finfo = NULL;
        u_int32_t dateadded = 0;
-       
+
        if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
                /* Date added was never set.  Return 0. */
                return dateadded;
        }
-       
-    
+
+
        /* overlay the FinderInfo to the correct pointer, and advance */
        finfo = (u_int8_t*)cp->c_finderinfo;
        finfo = finfo + 16;
-       
+
        /* 
         * FinderInfo is written out in big endian... make sure to convert it to host
         * native before we use it.
@@ -1460,12 +1624,10 @@ u_int32_t hfs_get_dateadded (struct cnode *cp) {
                struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
                dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
        }
-    
+
        return dateadded;
 }
 
-
-
 /*
  * Touch cnode times based on c_touch_xxx flags
  *
@@ -1546,7 +1708,7 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
                        cp->c_flag |= C_MODIFIED;
                        touchvol = 1;
                }
-               
+
                if (cp->c_flag & C_NEEDS_DATEADDED) {
                        hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
                        cp->c_flag |= C_MODIFIED;
index 03878ef7f5a651f5fe0dcd50c2103d991cb78dd8..082fbb858a2675a0206ab9ffe2b8e31d5644225a 100644 (file)
@@ -65,6 +65,30 @@ struct filefork {
 };
 typedef struct filefork filefork_t;
 
+
+#define HFS_TEMPLOOKUP_NAMELEN 32
+
+/*
+ * Catalog Lookup struct (runtime)
+ *
+ * This is used so that when we need to malloc a container for a catalog
+ * lookup operation, we can acquire memory for everything in one fell swoop
+ * as opposed to putting many of these objects on the stack.  The cat_fork
+ * data structure can take up 100+bytes easily, and that can add to stack
+ * overhead.  
+ *
+ * As a result, we use this to easily pass around the memory needed for a
+ * lookup operation.
+ */
+struct cat_lookup_buffer {
+       struct cat_desc lookup_desc;
+       struct cat_attr lookup_attr;
+       struct filefork lookup_fork;
+       struct componentname lookup_cn;
+       char lookup_name[HFS_TEMPLOOKUP_NAMELEN]; /* for open-unlinked paths only */
+};
+
+
 /* Aliases for common fields */
 #define ff_size          ff_data.cf_size
 #define ff_new_size      ff_data.cf_new_size
@@ -161,7 +185,7 @@ typedef struct cnode cnode_t;
 #define c_ctime                c_attr.ca_ctime
 #define c_itime                c_attr.ca_itime
 #define c_btime                c_attr.ca_btime
-#define c_flags                c_attr.ca_flags
+#define c_bsdflags             c_attr.ca_flags
 #define c_finderinfo   c_attr.ca_finderinfo
 #define c_blocks       c_attr.ca_union2.cau_blocks
 #define c_entries      c_attr.ca_union2.cau_entries
@@ -192,7 +216,12 @@ typedef struct cnode cnode_t;
 #define C_FORCEUPDATE      0x00100  /* force the catalog entry update */
 #define C_HASXATTRS        0x00200  /* cnode has extended attributes */
 #define C_NEG_ENTRIES      0x00400  /* directory has negative name entries */
-#define C_SWAPINPROGRESS   0x00800     /* cnode's data is about to be swapped.  Issue synchronous cluster io */
+/* 
+ * For C_SSD_STATIC: SSDs may want to deal with the file payload data in a 
+ * different manner knowing that the content is not likely to be modified. This is
+ * purely advisory at the HFS level, and is not maintained after the cnode goes out of core.
+ */
+#define C_SSD_STATIC       0x00800  /* Assume future writes contain static content */
 
 #define C_NEED_DATA_SETSIZE  0x01000  /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */
 #define C_NEED_RSRC_SETSIZE  0x02000  /* Do a ubc_setsize(0) on c_vp after the unlock */
@@ -202,6 +231,9 @@ typedef struct cnode cnode_t;
 #define C_RENAMED                      0x10000 /* cnode was deleted as part of rename; C_DELETED should also be set */
 #define C_NEEDS_DATEADDED      0x20000 /* cnode needs date-added written to the finderinfo bit */
 #define C_BACKINGSTORE         0x40000 /* cnode is a backing store for an existing or currently-mounting filesystem */
+#define C_SWAPINPROGRESS       0x80000 /* cnode's data is about to be swapped.  Issue synchronous cluster io */
+
+
 #define ZFTIMELIMIT    (5 * 60)
 
 /*
@@ -318,8 +350,8 @@ extern void  hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct
 extern void  hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int flags);
 extern void  hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp);
 
-extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc,
-                                                                                int skiplock, int allow_deleted);
+extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, 
+                                                                               int skiplock, int allow_deleted);
 extern struct cnode * hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, 
                                                                                 int wantrsrc, int skiplock, int *out_flags, int *hflags);
 extern int hfs_chash_snoop(struct hfsmount *, ino_t, int, int (*)(const struct cat_desc *,
index 0345e4d9e99ed32a2ba5ef2566e46ef6dfdbf684..4a88d0c527630b81e453aa89d7f98e554495f7ab 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/ubc_internal.h>
 #include <sys/vnode_if.h>
 #include <sys/vnode_internal.h>
+#include <sys/fcntl.h>
 #include <libkern/OSByteOrder.h>
 
 #include "hfs.h"
 #include "hfs_cnode.h"
 
-#ifdef CONFIG_PROTECT
+#if CONFIG_PROTECT
 static struct cp_wrap_func             g_cp_wrap_func = {NULL, NULL};
-static struct cp_global_state  g_cp_state = {0, 0};
+static struct cp_global_state  g_cp_state = {0, 0, 0};
 
 extern int (**hfs_vnodeop_p) (void *);
 
@@ -49,24 +50,26 @@ extern int (**hfs_vnodeop_p) (void *);
  * CP private functions
  */
 static int cp_is_valid_class(int);
-static int cp_getxattr(cnode_t *, struct cp_xattr *);
-static int cp_setxattr(cnode_t *, struct cp_xattr *, int);
-static struct cprotect *cp_entry_alloc(void);
-static int cp_make_keys (struct cprotect *);
-static int cp_restore_keys(struct cprotect *);
+static int cp_root_major_vers(mount_t mp);
+static int cp_getxattr(cnode_t *, struct hfsmount *hfsmp, struct cprotect **);
+static struct cprotect *cp_entry_alloc(size_t);
+static void cp_entry_dealloc(struct cprotect *entry);
+static int cp_setup_aes_ctx(struct cprotect *);
+static int cp_make_keys (struct cprotect **, struct hfsmount *hfsmp, cnid_t,  int);
+static int cp_restore_keys(struct cprotect *, struct hfsmount *hfsmp);
 static int cp_lock_vfs_callback(mount_t, void *);
 static int cp_lock_vnode_callback(vnode_t, void *);
 static int cp_vnode_is_eligible (vnode_t);
 static int cp_check_access (cnode_t *, int);
-static int cp_wrap(int, void *, void *);
-static int cp_unwrap(int, void *, void *);
+static int cp_wrap(int, struct hfsmount *hfsmp, cnid_t, struct cprotect**);
+static int cp_unwrap(int, struct cprotect *);
 
 
 
 #if DEVELOPMENT || DEBUG
 #define CP_ASSERT(x)           \
        if ((x) == 0) {                 \
-               panic("CP: failed assertion in %s", __FUNCTION__);      \
+               panic("Content Protection: failed assertion in %s", __FUNCTION__);      \
        }
 #else
 #define CP_ASSERT(x)
@@ -76,10 +79,17 @@ int
 cp_key_store_action(int action)
 {
        g_cp_state.lock_state = action;
-       if (action == CP_LOCKED_STATE)
-               return vfs_iterate(0, cp_lock_vfs_callback, (void *)action);
-       else
-               return 0;
+       if (action == CP_LOCKED_STATE) {
+               /*
+                * Note that because we are using the void* arg to pass the key store
+                * value into the vfs cp iteration, we need to pass around the int as an ptr.
+                * This may silence 32-64 truncation warnings.
+                */
+               return vfs_iterate(0, cp_lock_vfs_callback, (void*)((uintptr_t)action));
+    }
+    
+    return 0;
+    
 }
 
 
@@ -94,87 +104,248 @@ cp_register_wraps(cp_wrap_func_t key_store_func)
        return 0;
 }
 
+#if 0
+/* 
+ * If necessary, this function can be used to 
+ * query the device's lock state.
+ */
+int 
+cp_isdevice_locked (void) {    
+       if (g_cp_state.lock_state == CP_UNLOCKED_STATE) {
+               return 0;
+       }
+       return 1;
+}
+#endif
+
 /*
  * Allocate and initialize a cprotect blob for a new cnode.
- * Called from hfs_getnewcnode: cnode is locked exclusive.
+ * Called from hfs_getnewvnode: cnode is locked exclusive.
  * Read xattr data off the cnode. Then, if conditions permit,
  * unwrap the file key and cache it in the cprotect blob.
  */
 int 
-cp_entry_init(cnode_t *cnode, struct mount *mp)
+cp_entry_init(struct cnode *cp, struct mount *mp)
 {
-       struct cprotect *entry;
-       struct cp_xattr xattr;
+       struct cprotect *entry = NULL;
        int error = 0;
-       
+       struct hfsmount *hfsmp = VFSTOHFS(mp);
+
        if (!cp_fs_protected (mp)) {
-               cnode->c_cpentry = NULL;
+               cp->c_cpentry = NULL;
                return 0;
        }
        
-       if (!S_ISREG(cnode->c_mode)) {
-               cnode->c_cpentry = NULL;
+       if (!S_ISREG(cp->c_mode) && !S_ISDIR(cp->c_mode)) {
+               cp->c_cpentry = NULL;
                return 0;
        }
-
+       
        if (!g_cp_state.wrap_functions_set) {
                printf("hfs: cp_update_entry: wrap functions not yet set\n");
                return ENXIO;
        }
        
-       CP_ASSERT (cnode->c_cpentry == NULL);
-       
-       entry = cp_entry_alloc();
-       if (!entry)
-               return ENOMEM;
-       
-       entry->cp_flags |= CP_KEY_FLUSHED;
-       cnode->c_cpentry = entry;
+       if (hfsmp->hfs_running_cp_major_vers == 0) {
+               cp_root_major_vers(mp);
+       }
        
-       error = cp_getxattr(cnode, &xattr);
+       CP_ASSERT (cp->c_cpentry == NULL);
+
+       error = cp_getxattr(cp, hfsmp, &entry);
+
+       /*
+        * Normally, we should always have a CP EA for a file or directory that
+        * we are initializing here. However, there are some extenuating circumstances,
+        * such as the root directory immediately following a newfs_hfs.
+        *
+        * As a result, we leave code here to deal with an ENOATTR which will always 
+        * default to a 'D' key, though we don't expect to use it much.
+        */
        if (error == ENOATTR) {
+               int sub_error;
+               
+               sub_error = cp_entry_create_keys (&entry, NULL, hfsmp, PROTECTION_CLASS_D, cp->c_fileid, cp->c_mode);
+
+               /* Now we have keys.  Write them out. */
+               if (sub_error == 0) {
+                       sub_error = cp_setxattr (cp, entry, hfsmp, cp->c_fileid, XATTR_CREATE);
+               }
+               error = sub_error;
+       }
+       else if (error == 0) {
+               if (S_ISREG(cp->c_mode)) {
+                       entry->cp_flags |= CP_KEY_FLUSHED;
+               }
+       }       
+       /* 
+        * For errors other than ENOATTR, we don't do anything. 
+        * cp_entry_destroy can deal with a NULL argument if cp_getxattr
+        * failed malloc or there was a B-Tree error.
+        */
+
+       cp->c_cpentry = entry;
+
+       if (error)  {
+               cp_entry_destroy(&cp->c_cpentry);
+       }
+       
+       return error;
+}
+
+/*
+ * Set up initial key/class pair on cnode. The cnode does not yet exist,
+ * so we must take a pointer to the cprotect struct.  
+ * 
+ * NOTE:
+ * We call this function in two places:
+ * 1) hfs_makenode *prior* to taking the journal/b-tree locks.
+ * A successful return value from this function is a pre-requisite for continuing on
+ * with file creation, as a wrap failure should immediately preclude the creation of
+ * the file.
+ *
+ * 2) cp_entry_init if we are trying to establish keys for a file/directory that did not
+ * have them already.  (newfs_hfs may create entries in the namespace).
+ *
+ * At this point, we hold the directory cnode lock exclusive if it is available.
+ */ 
+int
+cp_entry_create_keys(struct cprotect **entry_ptr, struct cnode *dcp, struct hfsmount *hfsmp,
+               uint32_t input_class, cnid_t fileid, mode_t cmode)
+{
+       int error = 0;
+       struct cprotect *entry = NULL;
+       size_t keylen;
+
+       /* Default to class D */
+       uint32_t target_class = PROTECTION_CLASS_D;
+
+       /* Decide the target class.  Input argument takes priority. */
+       if (cp_is_valid_class (input_class)) {
+               target_class = input_class;
                /* 
-                * Can't tell if the file is new, or was previously created but never
-                * written to or set-classed. In either case, it'll need a fresh 
-                * per-file key.
+                * One exception, F is never valid for a directory 
+                * because its children may inherit and userland will be
+                * unable to read/write to the files.
                 */
-               entry->cp_flags |= CP_NEEDS_KEYS;
-               error = 0;
-       } else {
-               if (xattr.xattr_major_version != CP_CURRENT_MAJOR_VERS) {
-                       printf("hfs: cp_entry_init: bad xattr version\n");
-                       error = EINVAL;
-                       goto out;
+               if (S_ISDIR(cmode)) {
+                       if (target_class == PROTECTION_CLASS_F) {
+                               return EINVAL;
+                       }
+               }
+       }
+       else {
+               /* If no valid class was supplied, then inherit from parent if possible */
+               if ((dcp) && (dcp->c_cpentry)) {
+                       uint32_t parentclass = dcp->c_cpentry->cp_pclass;
+                       /* If the parent class is not valid, default back to D */
+                       if (cp_is_valid_class(parentclass)) {
+                               /* Parent class was good. use it. */
+                               target_class = parentclass;
+                       }
+                       /* Otherwise, we already defaulted to 'D' */
                }
+       }
 
-               /* set up entry with information from xattr */
-               entry->cp_pclass = xattr.persistent_class;
-               bcopy(&xattr.persistent_key, &entry->cp_persistent_key, CP_WRAPPEDKEYSIZE);
+       keylen = S_ISDIR(cmode) ? 0 : CP_INITIAL_WRAPPEDKEYSIZE;
+       entry = cp_entry_alloc (keylen);
+       if (!entry) {
+               *entry_ptr = NULL;
+               return ENOMEM;
        }
 
-out:
+       if (S_ISREG(cmode)) {
+               entry->cp_pclass = target_class;
+               entry->cp_flags |= CP_NEEDS_KEYS;
+               /* 
+                * The 'fileid' argument to this function will either be 
+                * a valid fileid for an existing file/dir, or it will be 0.
+                * If it is 0, then that is an indicator to the layer below
+                * that the file does not yet exist and we need to bypass the
+                * cp_wrap work to the keybag.
+                *
+                * If we are being invoked on behalf of a file/dir that does
+                * not yet have a key, then it will be a valid key and we
+                * need to behave like a setclass.
+                */
+               error = cp_make_keys(&entry, hfsmp, fileid, entry->cp_pclass);
+       }
+       else if (S_ISDIR(cmode)) {
+               /* Directories just get their cp_pclass set */
+               entry->cp_pclass = target_class;
+       }
+       else {
+               /* Unsupported for non-dir and non-file. */
+               error = EINVAL;
+       }
+
+       /* 
+        * We only initialize and create the keys here; we cannot 
+        * write out the EA until the journal lock and EA b-tree locks
+        * are acquired.
+        */
+
        if (error) {
-               cp_entry_destroy (cnode);
+               /* destroy the CP blob */
+               cp_entry_destroy (&entry);
+               *entry_ptr = NULL;
+       }
+       else {
+               /* otherwise, emit the cprotect entry */
+               *entry_ptr = entry;
        }
+
        return error;
 }
 
 /*
- * Set up initial key/class pair on cnode. The cnode is locked exclusive.
+ * Set up an initial key/class pair for a disassociated cprotect entry.
+ * This function is used to generate transient keys that will never be 
+ * written to disk.  We use class F for this since it provides the exact
+ * semantics that are needed here.  Because we never attach this blob to
+ * a cnode directly, we take a pointer to the cprotect struct.
+ *
+ * This function is primarily used in the HFS FS truncation codepath
+ * where we may rely on AES symmetry to relocate encrypted data from
+ * one spot in the disk to another.
  */
-int 
-cp_entry_create_keys(cnode_t *cnode)
-{
-       struct cprotect *entry = cnode->c_cpentry;
+int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp) {
+       int error = 0;
+       struct cprotect *entry = NULL;
+       size_t keylen;
+
+       /* Default to class F */
+       uint32_t target_class = PROTECTION_CLASS_F;
 
+       /* 
+        * This should only be  used for files, so we default to the
+        * initial wrapped key size
+        */
+       keylen = CP_INITIAL_WRAPPEDKEYSIZE;
+       entry = cp_entry_alloc (keylen);
        if (!entry) {
-               //unprotected file: continue
-               return 0;
+               *entry_ptr = NULL;
+               return ENOMEM;
        }
 
-       CP_ASSERT((entry->cp_flags & CP_NEEDS_KEYS));
+       error = cp_make_keys (&entry, hfsmp, 0, target_class);
+
+       /* 
+        * We only initialize the keys here; we don't write anything out
+        */
+
+       if (error) {
+               /* destroy the CP blob */
+               cp_entry_destroy (&entry);
+               *entry_ptr = NULL;
+       }
+       else {
+               /* otherwise, emit the cprotect entry */
+               *entry_ptr = entry;
+       }
+
+       return error;
 
-       return cp_make_keys(entry);
 }
 
 /*
@@ -182,18 +353,17 @@ cp_entry_create_keys(cnode_t *cnode)
  * Called at hfs_reclaim_cnode: cnode is locked exclusive. 
  */
 void
-cp_entry_destroy(cnode_t *cnode)
-{
-       struct cprotect *entry = cnode->c_cpentry;
+cp_entry_destroy(struct cprotect **entry_ptr) {
+       struct cprotect *entry = *entry_ptr;
        if (!entry) {
                /* nothing to clean up */
                return;
        }
-       cnode->c_cpentry = NULL;
-       bzero(entry, sizeof(*entry));
-       FREE(entry, M_TEMP);
+       *entry_ptr = NULL;
+       cp_entry_dealloc(entry);
 }
 
+
 int 
 cp_fs_protected (mount_t mnt) {
        return (vfs_flags(mnt) & MNT_CPROTECT);
@@ -204,8 +374,8 @@ cp_fs_protected (mount_t mnt) {
  * Return a pointer to underlying cnode if there is one for this vnode.
  * Done without taking cnode lock, inspecting only vnode state.
  */
-cnode_t *
-cp_get_protected_cnode(vnode_t vp)
+struct cnode *
+cp_get_protected_cnode(struct vnode *vp)
 {
        if (!cp_vnode_is_eligible(vp)) {
                return NULL;
@@ -216,7 +386,7 @@ cp_get_protected_cnode(vnode_t vp)
                return NULL;
        }
        
-       return (cnode_t *) vp->v_data;
+       return (struct cnode*) vp->v_data;
 }
 
 
@@ -225,206 +395,467 @@ cp_get_protected_cnode(vnode_t vp)
  * or returns error.
  */
 int 
-cp_vnode_getclass(vnode_t vp, int *class)
+cp_vnode_getclass(struct vnode *vp, int *class)
 {
-       struct cp_xattr xattr;
+       struct cprotect *entry;
        int error = 0;
-       struct cnode *cnode;
-       
+       struct cnode *cp;
+       int took_truncate_lock = 0;
+       struct hfsmount *hfsmp = NULL;
+
+       /* Is this an interesting vp? */
        if (!cp_vnode_is_eligible (vp)) {
                return EBADF;
        }
-       
-       cnode = VTOC(vp);
 
-       hfs_lock(cnode, HFS_SHARED_LOCK);
+       /* Is the mount point formatted for content protection? */
+       if (!cp_fs_protected(VTOVFS(vp))) {
+               return EPERM;
+       }
+       
+       cp = VTOC(vp);
+       hfsmp = VTOHFS(vp);
+       
+       /*
+        * Take the truncate lock up-front in shared mode because we may need 
+        * to manipulate the CP blob. Pend lock events until we're done here. 
+        */
+       hfs_lock_truncate (cp, HFS_SHARED_LOCK);
+       took_truncate_lock = 1;
 
-       if (cp_fs_protected(VTOVFS(vp))) {
-               /* pull the class from the live entry */
-               struct cprotect *entry = cnode->c_cpentry;
-               if (!entry) {
-                       panic("Content Protection: uninitialized cnode %p", cnode);
-               }
+       /*
+        * We take only the shared cnode lock up-front.  If it turns out that
+        * we need to manipulate the CP blob to write a key out, drop the 
+        * shared cnode lock and acquire an exclusive lock. 
+        */
+       error = hfs_lock(cp, HFS_SHARED_LOCK);
+       if (error) {
+               hfs_unlock_truncate(cp, 0);
+               return error;
+       }
+       
+       /* pull the class from the live entry */
+       entry = cp->c_cpentry;
+       
+       if (!entry) {
+               panic("Content Protection: uninitialized cnode %p", cp);
+       }
+       
+       /*
+        * Any vnode on a content protected filesystem must have keys
+        * created by the time the vnode is vended out.  If we generate
+        * a vnode that does not have keys, something bad happened.
+        */
+       if ((entry->cp_flags & CP_NEEDS_KEYS)) {
+               panic ("cp_vnode_getclass: cp %p has no keys!", cp);
+       }
 
-               if ((entry->cp_flags & CP_NEEDS_KEYS)) {
-                       error = cp_make_keys(entry);
-               }
+       if (error == 0) {
                *class = entry->cp_pclass;
-
-       } else {
-               /* 
-                * Mount point is not formatted for content protection. If a class
-                * has been specified anyway, report it. Otherwise, report D.
-                */
-               error = cp_getxattr(cnode, &xattr);
-               if (error == ENOATTR) {
-                       *class = PROTECTION_CLASS_D;
-                       error = 0;
-               } else if (error == 0) {
-                       *class = xattr.persistent_class;
-               }
        }
        
-       hfs_unlock(cnode);
+       if (took_truncate_lock) {
+               hfs_unlock_truncate(cp, 0);
+       }
+       
+       hfs_unlock(cp);
        return error;
 }
 
 
 /*
- * Sets persistent class for this file.
+ * Sets persistent class for this file or directory.
  * If vnode cannot be protected (system file, non-regular file, non-hfs), EBADF.
  * If the new class can't be accessed now, EPERM.
  * Otherwise, record class and re-wrap key if the mount point is content-protected.
  */
 int 
-cp_vnode_setclass(vnode_t vp, uint32_t newclass)
+cp_vnode_setclass(struct vnode *vp, uint32_t newclass)
 {
-       struct cnode *cnode;
-       struct cp_xattr xattr;
+       struct cnode *cp;
        struct cprotect *entry = 0;
        int error = 0;
+       int took_truncate_lock = 0;
+       u_int32_t keylen = 0;
+       struct hfsmount *hfsmp = NULL;
        
        if (!cp_is_valid_class(newclass)) {
                printf("hfs: CP: cp_setclass called with invalid class %d\n", newclass);
                return EINVAL;
        }
 
-       /* is this an interesting file? */
+       if (vnode_isdir(vp)) {
+               if (newclass == PROTECTION_CLASS_F) {
+                       /* 
+                        * Directories are not allowed to set to class F, since the
+                        * children may inherit it and then userland will not be able
+                        * to read/write to the file.
+                        */
+                       return EINVAL;
+               }
+       }
+
+       /* Is this an interesting vp? */
        if (!cp_vnode_is_eligible(vp)) {
                return EBADF;
        }
 
-       cnode = VTOC(vp);
+       /* Is the mount point formatted for content protection? */
+       if (!cp_fs_protected(VTOVFS(vp))) {
+               return EPERM;
+       }
+
+       cp = VTOC(vp);
+       hfsmp = VTOHFS(vp);
 
-       if (hfs_lock(cnode, HFS_EXCLUSIVE_LOCK)) {
+       /* 
+        * Take the cnode truncate lock exclusive because we want to manipulate the 
+        * CP blob. The lock-event handling code is doing the same.  This also forces
+        * all pending IOs to drain before we can re-write the persistent and cache keys.
+        */
+       hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK);
+       took_truncate_lock = 1;
+       
+       if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) {
                return EINVAL;
        }
        
-       /* is the volume formatted for content protection? */
-       if (cp_fs_protected(VTOVFS(vp))) {
-               entry = cnode->c_cpentry;
-               if (entry == NULL) { 
-                       error = EINVAL;
-                       goto out;
-               }
+       entry = cp->c_cpentry;
+       if (entry == NULL) {
+               error = EINVAL;
+               goto out;
+       }
 
-               if ((entry->cp_flags & CP_NEEDS_KEYS)) {
-                       if ((error = cp_make_keys(entry)) != 0) {
-                               goto out;
-                       }
-               }
+       if ((entry->cp_flags & CP_NEEDS_KEYS)) {
+               /* 
+                * We should have created this vnode and its keys atomically during
+                * file/directory creation.  If we get here and it doesn't have keys yet,
+                * something bad happened.
+                */
+               panic ("cp_vnode_setclass: cp %p has no keys!\n", cp);
+       }
 
-               if (entry->cp_flags & CP_KEY_FLUSHED) {
-                       error = cp_restore_keys(entry);
-                       if (error)
-                               goto out;
-               }
+       if (entry->cp_flags & CP_KEY_FLUSHED) {
+               error = cp_restore_keys(entry, hfsmp);
+               if (error)
+                       goto out;
+       }
 
-               /* re-wrap per-file key with new class */
-               error = cp_wrap(newclass,
-                                               &entry->cp_cache_key[0], 
-                                               &entry->cp_persistent_key[0]);
+       /* re-wrap per-file key with new class */
+       if (vnode_isreg(vp)) {
+               error = cp_wrap(newclass, hfsmp, cp->c_fileid, &cp->c_cpentry);
                if (error) {
                        /* we didn't have perms to set this class. leave file as-is and error out */
                        goto out;
                }
+       }
 
-               entry->cp_pclass = newclass;
+       /* cp_wrap() potentially updates c_cpentry because we passed in its ptr */
+       entry = cp->c_cpentry;
+       
+       entry->cp_pclass = newclass;
 
-               /* prepare to write the xattr out */
-               bcopy(&entry->cp_persistent_key, &xattr.persistent_key, CP_WRAPPEDKEYSIZE);
-       } else {
-               /* no live keys for this file. just remember intended class */
-               bzero(&xattr.persistent_key, CP_WRAPPEDKEYSIZE);
+       /* prepare to write the xattr out */
+       keylen = entry->cp_persistent_key_len;
+       
+       error = cp_setxattr(cp, entry, VTOHFS(vp), 0,XATTR_REPLACE);    
+       if (error == ENOATTR) 
+               error = cp_setxattr(cp, entry, VTOHFS(vp), 0, XATTR_CREATE);            
+       
+out:
+       
+       if (took_truncate_lock) {
+               hfs_unlock_truncate (cp, 0);
+       }
+       hfs_unlock(cp);
+       return error;
+}
+
+
+int cp_vnode_transcode(vnode_t vp)
+{
+       struct cnode *cp;
+       struct cprotect *entry = 0;
+       int error = 0;
+       int took_truncate_lock = 0;
+       struct hfsmount *hfsmp = NULL;
+
+       /* Is this an interesting vp? */
+       if (!cp_vnode_is_eligible(vp)) {
+               return EBADF;
+       }
+
+       /* Is the mount point formatted for content protection? */
+       if (!cp_fs_protected(VTOVFS(vp))) {
+               return EPERM;
        }
 
-       xattr.xattr_major_version = CP_CURRENT_MAJOR_VERS;
-       xattr.xattr_minor_version = CP_CURRENT_MINOR_VERS;
-       xattr.key_size = CP_WRAPPEDKEYSIZE;
-       xattr.flags = 0;
-       xattr.persistent_class = newclass;
-       error = cp_setxattr(cnode, &xattr, XATTR_REPLACE);
+       cp = VTOC(vp);
+       hfsmp = VTOHFS(vp);
+
+       /* 
+        * Take the cnode truncate lock exclusive because we want to manipulate the 
+        * CP blob. The lock-event handling code is doing the same.  This also forces
+        * all pending IOs to drain before we can re-write the persistent and cache keys.
+        */
+       hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK);
+       took_truncate_lock = 1;
        
-       if (error == ENOATTR) {
-               error = cp_setxattr (cnode, &xattr, XATTR_CREATE);
+       if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) {
+               return EINVAL;
+       }
+       
+       entry = cp->c_cpentry;
+       if (entry == NULL) {
+               error = EINVAL;
+               goto out;
+       }
+
+       if ((entry->cp_flags & CP_NEEDS_KEYS)) {
+               /*
+                * If we are transcoding keys for AKB, then we should have already established
+                * a set of keys for this vnode. IF we don't have keys yet, then something bad 
+                * happened.
+                */
+               panic ("cp_vnode_transcode: cp %p has no keys!", cp);
+       }
+
+       if (entry->cp_flags & CP_KEY_FLUSHED) {
+               error = cp_restore_keys(entry, hfsmp);
+
+               if (error) {
+                       goto out;
+        }
+       }
+
+       /* Send the per-file key for re-wrap with the current class information
+        * Send NULLs in the output parameters of the wrapper() and AKS will do the rest.
+        * Don't need to process any outputs, so just clear the locks and pass along the error. */
+       if (vnode_isreg(vp)) {
+
+               /* Picked up the following from cp_wrap().
+                * If needed, more comments available there. */
+
+               if (entry->cp_pclass == PROTECTION_CLASS_F) {
+                       error = EINVAL;
+                       goto out;
+               }
+
+               error = g_cp_wrap_func.wrapper(entry->cp_pclass,
+                                                                          cp->c_fileid,
+                                                                          entry->cp_cache_key,
+                                                                          entry->cp_cache_key_len,
+                                                                          NULL,
+                                                                          NULL);
+
+               if(error)
+                       error = EPERM;
        }
 
 out:
-       hfs_unlock(cnode);
+       if (took_truncate_lock) {
+               hfs_unlock_truncate (cp, 0);
+       }
+       hfs_unlock(cp);
        return error;
 }
 
+
 /*
- * Check permission for the given operation (read, write, page in) on this node.
+ * Check permission for the given operation (read, write) on this node.
  * Additionally, if the node needs work, do it:
  * - create a new key for the file if one hasn't been set before
  * - write out the xattr if it hasn't already been saved
  * - unwrap the key if needed
  *
  * Takes cnode lock, and upgrades to exclusive if modifying cprotect.
+ *
+ * Note that this function does *NOT* take the cnode truncate lock.  This is because 
+ * the thread calling us may already have the truncate lock.  It is not necessary
+ * because either we successfully finish this function before the keys are tossed
+ * and the IO will fail, or the keys are tossed and then this function will fail. 
+ * Either way, the cnode lock still ultimately guards the keys.  We only rely on the
+ * truncate lock to protect us against tossing the keys as a cluster call is in-flight. 
  */
-       int
-cp_handle_vnop(cnode_t *cnode, int vnop)
+int
+cp_handle_vnop(struct vnode *vp, int vnop, int ioflag)
 {
        struct cprotect *entry;
        int error = 0;
-       struct cp_xattr xattr;
+       struct hfsmount *hfsmp = NULL;
+       struct cnode *cp = NULL;
 
-       if ((error = hfs_lock(cnode, HFS_SHARED_LOCK)) != KERN_SUCCESS) {
+       /* 
+        * First, do validation against the vnode before proceeding any further:
+        * Is this vnode originating from a valid content-protected filesystem ?
+        */
+       if (cp_vnode_is_eligible(vp) == 0) {
+               /* 
+                * It is either not HFS or not a file/dir.  Just return success. This is a valid
+                * case if servicing i/o against another filesystem type from VFS
+                */
+               return 0;
+       }
+
+       if (cp_fs_protected (VTOVFS(vp)) == 0) {
+               /*
+                * The underlying filesystem does not support content protection.  This is also 
+                * a valid case.  Simply return success.
+                */
+               return 0;
+       }
+       
+       /* 
+        * At this point, we know we have a HFS vnode that backs a file or directory on a
+        * filesystem that supports content protection
+        */
+       cp = VTOC(vp);
+
+       if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) {
                return error;
        }
 
-       entry = cnode->c_cpentry;
-       if (!entry)
-               goto out;
+       entry = cp->c_cpentry;
+       
+       if (!entry) {
+               /*
+                * If this cnode is not content protected, simply return success.
+                * Note that this function is called by all I/O-based call sites 
+                * when CONFIG_PROTECT is enabled during XNU building.
+                */
 
-       if ((error = cp_check_access(cnode, vnop)) != KERN_SUCCESS) {
                goto out;
        }
 
+       vp = CTOV(cp, 0);
+       if (vp == NULL) {
+               /* is it a rsrc */
+               vp = CTOV(cp,1);
+               if (vp == NULL) {
+                       error = EINVAL;
+                       goto out;
+               }
+       }
+       hfsmp = VTOHFS(vp);
+
+       if ((error = cp_check_access(cp, vnop))) {
+               /* check for raw encrypted access before bailing out */
+               if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) {
+                       /* 
+                        * read access only + asking for the raw encrypted bytes 
+                        * is legitimate, so reset the error value to 0
+                        */
+                       error = 0;
+               }
+               else {
+                       goto out;
+               }
+       }
+
        if (entry->cp_flags == 0) {
                /* no more work to do */
                goto out;
        }
 
        /* upgrade to exclusive lock */
-       if (lck_rw_lock_shared_to_exclusive(&cnode->c_rwlock) == FALSE) {
-               if ((error = hfs_lock(cnode, HFS_EXCLUSIVE_LOCK)) != KERN_SUCCESS) {
+       if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE) {
+               if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { 
                        return error;
                }
        } else {
-               cnode->c_lockowner = current_thread();
+               cp->c_lockowner = current_thread();
        }
-
+       
        /* generate new keys if none have ever been saved */
        if ((entry->cp_flags & CP_NEEDS_KEYS)) {
-               if ((error = cp_make_keys(entry)) != 0) {
-                       goto out;
+               /*
+                * By the time we're trying to initiate I/O against a content
+                * protected vnode, we should have already created keys for this
+                * file/dir. If we don't have keys, something bad happened.
+                */
+               panic ("cp_handle_vnop: cp %p has no keys!", cp);
+       }
+
+       /* unwrap keys if needed */
+       if (entry->cp_flags & CP_KEY_FLUSHED) {
+               if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) {
+                       /* no need to try to restore keys; they are not going to be used */
+                       error = 0;
+               }
+               else {
+                       error = cp_restore_keys(entry, hfsmp);
+
+                       if (error) {
+                               goto out;
+                       }
                }
        }
 
-       /* unwrap keys if needed */
-       if (entry->cp_flags & CP_KEY_FLUSHED) {
-               error = cp_restore_keys(entry);
-               if (error)
-                       goto out;
+       /* write out the xattr if it's new */
+       if (entry->cp_flags & CP_NO_XATTR)
+               error = cp_setxattr(cp, entry, VTOHFS(cp->c_vp), 0, XATTR_CREATE);
+
+out:
+
+       hfs_unlock(cp);
+       return error;
+}
+
+
+int
+cp_handle_open(struct vnode *vp, int mode)
+{
+       struct cnode *cp = NULL ;
+       struct cprotect *entry = NULL;
+       int error = 0;
+       
+       /* If vnode not eligible, just return success */
+       if (!cp_vnode_is_eligible(vp)) {
+               return 0;
+       }
+       
+       /* If mount point not properly set up, then also return success */
+       if (!cp_fs_protected(VTOVFS(vp))) {
+               return 0;
+       }
+
+       /* We know the vnode is in a valid state. acquire cnode and validate */
+       cp = VTOC(vp);
+
+       if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) {
+               return error;
        }
 
-       /* write out the xattr if it's new */
-       if (entry->cp_flags & CP_NO_XATTR) {
-               bcopy(&entry->cp_persistent_key[0], &xattr.persistent_key, CP_WRAPPEDKEYSIZE);
-               xattr.xattr_major_version = CP_CURRENT_MAJOR_VERS;
-               xattr.xattr_minor_version = CP_CURRENT_MINOR_VERS;
-               xattr.key_size = CP_WRAPPEDKEYSIZE;
-               xattr.persistent_class = entry->cp_pclass;
-               error = cp_setxattr(cnode, &xattr, XATTR_CREATE);
+       entry = cp->c_cpentry;
+       if (!entry)
+               goto out;
+
+       if (!S_ISREG(cp->c_mode))
+               goto out;
+
+       switch (entry->cp_pclass) {
+               case PROTECTION_CLASS_B:
+                       /* Class B always allows creation */
+                       if (mode & O_CREAT)
+                               goto out;
+               case PROTECTION_CLASS_A:
+                       error = g_cp_wrap_func.unwrapper(entry->cp_pclass,
+                                                                                       entry->cp_persistent_key,
+                                                                                       entry->cp_persistent_key_len,
+                                                                                       NULL, NULL);
+                       if (error)
+                               error = EPERM;
+                       break;
+               default:
+                       break;
        }
 
 out:
-       hfs_unlock(cnode);
+       hfs_unlock(cp);
        return error;
 }
 
+
 /*  
  * During hfs resize operations, we have slightly different constraints than during
  * normal VNOPS that read/write data to files.  Specifically, we already have the cnode
@@ -433,7 +864,8 @@ out:
  * vs. lock), and don't worry about non-existing keys.  If the file exists on-disk with valid
  * payload, then it must have keys set up already by definition.
  */
-int cp_handle_relocate (cnode_t *cp) {
+int 
+cp_handle_relocate (struct cnode *cp, struct hfsmount *hfsmp) {
        struct cprotect *entry;
        int error = -1;
 
@@ -446,7 +878,7 @@ int cp_handle_relocate (cnode_t *cp) {
         * Still need to validate whether to permit access to the file or not 
         * based on lock status 
         */
-       if ((error = cp_check_access(cp, CP_READ_ACCESS | CP_WRITE_ACCESS)) != KERN_SUCCESS) {
+       if ((error = cp_check_access(cp, CP_READ_ACCESS | CP_WRITE_ACCESS))) {
                goto out;
        }       
 
@@ -460,18 +892,19 @@ int cp_handle_relocate (cnode_t *cp) {
 
        /* unwrap keys if needed */
        if (entry->cp_flags & CP_KEY_FLUSHED) {
-               error = cp_restore_keys(entry);
+               error = cp_restore_keys(entry, hfsmp);
        }
 
-       /* don't need to write out the EA since the file is extant */
+       /* 
+        * Don't need to write out the EA since if the file has actual extents,
+        * it must have an EA
+        */
 out:   
 
        /* return the cp still locked */
        return error;
 }
 
-
-
 /*
  * cp_getrootxattr:
  * Gets the EA we set on the root folder (fileid 1) to get information about the
@@ -479,8 +912,8 @@ out:
  * Note that all multi-byte fields are written to disk little endian so they must be
  * converted to native endian-ness as needed.
  */
-
-int cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) {
+int 
+cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) {
        uio_t   auio;
        char    uio_buf[UIO_SIZEOF(1)];
        size_t attrsize = sizeof(struct cp_root_xattr);
@@ -488,7 +921,7 @@ int cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) {
        struct vnop_getxattr_args args;
 
        if (!outxattr) {
-               panic("cp_xattr called with xattr == NULL");
+               panic("Content Protection: cp_xattr called with xattr == NULL");
        }
 
        auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
@@ -509,7 +942,7 @@ int cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) {
        outxattr->minor_version = OSSwapLittleToHostInt16(outxattr->minor_version);
        outxattr->flags = OSSwapLittleToHostInt64(outxattr->flags);
 
-       if (error != KERN_SUCCESS) {
+       if (error != 0) { 
                goto out;
        }
 
@@ -528,7 +961,7 @@ out:
  * This will be written to the disk when it detects the EA is not there, or when we need
  * to make a modification to the on-disk version that can be done in-place.
  */
-       int
+int
 cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr)
 {
        int error = 0;
@@ -552,18 +985,193 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr)
 }
 
 
+/*
+ * Stores new xattr data on the cnode.
+ * cnode lock held exclusive (if available).
+ *
+ * This function is also invoked during file creation.
+ */
+int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options)
+{
+       int error = 0;
+       size_t attrsize; 
+       struct vnop_setxattr_args args;
+       uint32_t target_fileid;
+       struct cnode *arg_cp = NULL;
+       uint32_t tempflags = 0;
+
+       args.a_desc = NULL;
+       if (cp) {
+               args.a_vp = cp->c_vp;
+               target_fileid = 0;
+               arg_cp = cp;
+       }
+       else {
+               /* 
+                * When we set the EA in the same txn as the file creation,
+                * we do not have a vnode/cnode yet. Use the specified fileid.
+                */
+               args.a_vp = NULL;
+               target_fileid = fileid;
+       }
+       args.a_name = CONTENT_PROTECTION_XATTR_NAME;
+       args.a_uio = NULL; //pass data ptr instead
+       args.a_options = options; 
+       args.a_context = vfs_context_current();
+       
+       /* Add asserts for the CP flags in the CP blob. */
+       if (entry->cp_flags & CP_NEEDS_KEYS) {
+               panic ("cp_setxattr: cp %p , cpentry %p still needs keys!", cp, entry);
+       }
+
+       /* Disable flags that will be invalid as we're writing the EA out at this point. */
+       tempflags = entry->cp_flags;
+       tempflags &= ~CP_NO_XATTR;
+
+       switch(hfsmp->hfs_running_cp_major_vers) {
+               case CP_NEW_MAJOR_VERS: {
+                       struct cp_xattr_v4 *newxattr = NULL; // 70+ bytes; don't alloc on stack.
+                       MALLOC (newxattr, struct cp_xattr_v4*, sizeof(struct cp_xattr_v4), M_TEMP, M_WAITOK);
+                       if (newxattr == NULL) {
+                               error = ENOMEM;
+                               break;
+                       }
+                       bzero (newxattr, sizeof(struct cp_xattr_v4));
+
+                       attrsize = sizeof(*newxattr) - CP_MAX_WRAPPEDKEYSIZE + entry->cp_persistent_key_len;
+                       
+                       /* Endian swap the multi-byte fields into L.E from host. */
+                       newxattr->xattr_major_version = OSSwapHostToLittleInt16 (hfsmp->hfs_running_cp_major_vers);
+                       newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS);
+                       newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len);
+                       newxattr->flags = OSSwapHostToLittleInt32(tempflags);
+                       newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); 
+                       bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len);
+                       
+                       error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid);                        
+
+                       FREE(newxattr, M_TEMP);
+                       break;
+               }
+               case CP_PREV_MAJOR_VERS: {
+                       struct cp_xattr_v2 *newxattr = NULL;
+                       MALLOC (newxattr, struct cp_xattr_v2*, sizeof(struct cp_xattr_v2), M_TEMP, M_WAITOK);
+                       if (newxattr == NULL) {
+                               error = ENOMEM;
+                               break;
+                       }
+                       bzero (newxattr, sizeof(struct cp_xattr_v2));
+                       
+                       attrsize = sizeof(*newxattr);
+                       
+                       /* Endian swap the multi-byte fields into L.E from host. */
+                       newxattr->xattr_major_version = OSSwapHostToLittleInt16(hfsmp->hfs_running_cp_major_vers);
+                       newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS);
+                       newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len);
+                       newxattr->flags = OSSwapHostToLittleInt32(tempflags);
+                       newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); 
+                       bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len);
+                                               
+                       error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid);
+
+                       FREE (newxattr, M_TEMP);
+                       break;
+               }
+       }
+       
+       if (error == 0 ) {
+               entry->cp_flags &= ~CP_NO_XATTR;
+       }
+
+       return error;
+
+
+}
+
+/*
+ * This function takes a cprotect struct with the cache keys and re-wraps them for 
+ * MKB's sake so that it can update its own data structures.  It is useful when
+ * there may not be a cnode in existence yet (for example, after creating
+ * a file).
+ */
+int 
+cp_update_mkb (struct cprotect *entry, uint32_t fileid) {
+
+       int error = 0;
+
+       /* We already validated this pclass earlier */
+       if (entry->cp_pclass != PROTECTION_CLASS_F ) {
+               error = g_cp_wrap_func.wrapper (entry->cp_pclass, fileid, entry->cp_cache_key, 
+                               entry->cp_cache_key_len, NULL, NULL);
+       }               
+
+       if (error) {
+               error = EPERM;
+       }
+
+       return error;
+}
+
+/*
+ * Used by an fcntl to query the underlying FS for its content protection version #
+ */
+
+int 
+cp_get_root_major_vers(vnode_t vp, uint32_t *level) {
+       int err = 0;
+       struct hfsmount *hfsmp = NULL;
+       struct mount *mp = NULL;
+
+       mp = VTOVFS(vp);
+
+       /* check if it supports content protection */
+       if (cp_fs_protected(mp) == 0) {
+               return EINVAL;
+       }
+
+       hfsmp = VFSTOHFS(mp);
+       /* figure out the level */
+
+       err = cp_root_major_vers(mp);
+
+       if (err == 0) {
+               *level = hfsmp->hfs_running_cp_major_vers;
+       }
+       /* in error case, cp_root_major_vers will just return EINVAL. Use that */
 
+       return err;
+}
 
 /********************
  * Private Functions
  *******************/
 
 static int
-cp_vnode_is_eligible(vnode_t vp)
+cp_root_major_vers(mount_t mp)
+{
+       int err = 0;
+       struct cp_root_xattr xattr;
+       struct hfsmount *hfsmp = NULL;
+
+       hfsmp = vfs_fsprivate(mp);
+       err = cp_getrootxattr (hfsmp, &xattr);
+
+       if (err == 0) {
+               hfsmp->hfs_running_cp_major_vers = xattr.major_version; 
+       }
+       else {
+               return EINVAL;
+       }
+
+       return 0;
+}
+
+static int
+cp_vnode_is_eligible(struct vnode *vp)
 {
        return ((vp->v_op == hfs_vnodeop_p) &&
                        (!vnode_issystem(vp)) &&
-                       (vnode_isreg(vp)));
+                       (vnode_isreg(vp) || vnode_isdir(vp)));
 }
 
 
@@ -577,101 +1185,214 @@ cp_is_valid_class(int class)
 
 
 static struct cprotect *
-cp_entry_alloc(void)
+cp_entry_alloc(size_t keylen)
 {
        struct cprotect *cp_entry;
+
+       if (keylen > CP_MAX_WRAPPEDKEYSIZE)
+               return (NULL);
        
-       MALLOC(cp_entry, struct cprotect *, sizeof(struct cprotect), 
+       MALLOC(cp_entry, struct cprotect *, sizeof(struct cprotect) + keylen
                   M_TEMP, M_WAITOK);
        if (cp_entry == NULL)
                return (NULL);
-       
-       bzero(cp_entry, sizeof(*cp_entry));
+
+       bzero(cp_entry, sizeof(*cp_entry) + keylen);
+       cp_entry->cp_persistent_key_len = keylen;
        return (cp_entry);
 }
 
+static void
+cp_entry_dealloc(struct cprotect *entry)
+{
+       uint32_t keylen = entry->cp_persistent_key_len;
+       bzero(entry, (sizeof(*entry) + keylen));
+       FREE(entry, M_TEMP);    
+}
+
 
 /*
- * Reads xattr data off the cnode and into provided xattr.
+ * Initializes a new cprotect entry with xattr data from the cnode.
  * cnode lock held shared
  */
 static int 
-cp_getxattr(cnode_t *cnode, struct cp_xattr *outxattr)
+cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry)
 {
-       uio_t   auio;
-       char    uio_buf[UIO_SIZEOF(1)];
-       size_t attrsize = sizeof(struct cp_xattr);
        int error = 0;
+       uio_t auio;
+       size_t attrsize;
+       char uio_buf[UIO_SIZEOF(1)];
        struct vnop_getxattr_args args;
-               
+       struct cprotect *entry = NULL;
+
        auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
-       uio_addiov(auio, CAST_USER_ADDR_T(outxattr), attrsize);
-       
        args.a_desc = NULL; // unused
-       args.a_vp = cnode->c_vp;
+       args.a_vp = cp->c_vp;
        args.a_name = CONTENT_PROTECTION_XATTR_NAME;
        args.a_uio = auio;
-       args.a_size = &attrsize;
        args.a_options = XATTR_REPLACE;
        args.a_context = vfs_context_current(); // unused
-       error = hfs_getxattr_internal(cnode, &args, VTOHFS(cnode->c_vp), 0);
-       if (error != KERN_SUCCESS) {
-               goto out;
-       }
 
-       /* Endian swap the multi-byte fields into host endianness from L.E. */
-       outxattr->xattr_major_version = OSSwapLittleToHostInt16(outxattr->xattr_major_version);
-       outxattr->xattr_minor_version = OSSwapLittleToHostInt16(outxattr->xattr_minor_version);
-       outxattr->key_size = OSSwapLittleToHostInt32(outxattr->key_size);
-       outxattr->flags = OSSwapLittleToHostInt32(outxattr->flags);
-       outxattr->persistent_class = OSSwapLittleToHostInt32(outxattr->persistent_class);
+       switch (hfsmp->hfs_running_cp_major_vers) {
+               case CP_NEW_MAJOR_VERS: {
+                       struct cp_xattr_v4 *xattr = NULL;
+                       MALLOC (xattr, struct cp_xattr_v4*, sizeof(struct cp_xattr_v4), M_TEMP, M_WAITOK);
+                       if (xattr == NULL) {
+                               error = ENOMEM;
+                               break;
+                       }
+                       bzero(xattr, sizeof (struct cp_xattr_v4));
+                       attrsize = sizeof(*xattr);
+
+                       uio_addiov(auio, CAST_USER_ADDR_T(xattr), attrsize);
+                       args.a_size = &attrsize;
+
+                       error = hfs_getxattr_internal(cp, &args, VTOHFS(cp->c_vp), 0);
+                       if (error != 0) {
+                               FREE (xattr, M_TEMP);
+                               goto out;
+                       }
+                       
+                       /* Endian swap the multi-byte fields into host endianness from L.E. */
+                       xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version);
+                       xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version);
+                       xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size);
+                       xattr->flags = OSSwapLittleToHostInt32(xattr->flags);
+                       xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class);
+                       
+                       if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers ) {
+                               printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", 
+                                       xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers);
+                               error = EINVAL;
+                               FREE (xattr, M_TEMP);
+
+                               goto out;
+                       }
+                       /*
+                        * Prevent a buffer overflow, and validate the key length obtained from the
+                        * EA. If it's too big, then bail out, because the EA can't be trusted at this
+                        * point.
+                        */
+                       if (xattr->key_size > CP_MAX_WRAPPEDKEYSIZE) {
+                               error = EINVAL;
+                               FREE (xattr, M_TEMP);
+
+                               goto out;       
+                       }
+
+                       /* set up entry with information from xattr */
+                       entry = cp_entry_alloc(xattr->key_size);
+                       if (!entry) {
+                               FREE (xattr, M_TEMP);
+
+                               return ENOMEM;
+                       }
+                       
+                       entry->cp_pclass = xattr->persistent_class;     
+                       if (xattr->xattr_major_version >= CP_NEW_MAJOR_VERS) {
+                               entry->cp_flags |= CP_OFF_IV_ENABLED;
+                       }
+                       bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size);                        
+
+                       FREE (xattr, M_TEMP);
+
+                       break;
+               }
+               case CP_PREV_MAJOR_VERS: {
+                       struct cp_xattr_v2 *xattr = NULL;
+                       MALLOC (xattr, struct cp_xattr_v2*, sizeof(struct cp_xattr_v2), M_TEMP, M_WAITOK);
+                       if (xattr == NULL) {
+                               error = ENOMEM;
+                               break;
+                       }
+                       bzero (xattr, sizeof (struct cp_xattr_v2));
+                       attrsize = sizeof(*xattr);
+
+                       uio_addiov(auio, CAST_USER_ADDR_T(xattr), attrsize);
+                       args.a_size = &attrsize;
+                       
+                       error = hfs_getxattr_internal(cp, &args, VTOHFS(cp->c_vp), 0);
+                       if (error != 0) {
+                               FREE (xattr, M_TEMP);
+                               goto out;
+                       }
+                       
+                       /* Endian swap the multi-byte fields into host endianness from L.E. */
+                       xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version);
+                       xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version);
+                       xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size);
+                       xattr->flags = OSSwapLittleToHostInt32(xattr->flags);
+                       xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class);
+                       
+                       if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers) {
+                               printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", 
+                                       xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers);
+                               error = EINVAL;
+                               FREE (xattr, M_TEMP);
+                               goto out;
+                       }       
+
+                       /*
+                        * Prevent a buffer overflow, and validate the key length obtained from the
+                        * EA. If it's too big, then bail out, because the EA can't be trusted at this
+                        * point.
+                        */
+                       if (xattr->key_size > CP_V2_WRAPPEDKEYSIZE) {
+                               error = EINVAL;
+                               FREE (xattr, M_TEMP);
+                               goto out;       
+                       }
+                       /* set up entry with information from xattr */
+                       entry = cp_entry_alloc(xattr->key_size);
+                       if (!entry) {
+                               FREE (xattr, M_TEMP);
+                               return ENOMEM;
+                       }
+                       
+                       entry->cp_pclass = xattr->persistent_class;
+                       bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size);
+                       FREE (xattr, M_TEMP);
+                       break;
+               }
+       }
 
 out:
        uio_free(auio);
+       
+       *outentry = entry;      
        return error;
 }
 
-/*
- * Stores new xattr data on the cnode.
- * cnode lock held exclusive
- */
+
+/* Setup AES context */
 static int
-cp_setxattr(cnode_t *cnode, struct cp_xattr *newxattr, int options)
+cp_setup_aes_ctx(struct cprotect *entry)
 {
-       int error = 0;
-       struct vnop_setxattr_args args;
+       SHA1_CTX sha1ctxt;
+       uint8_t cp_cache_iv_key[CP_IV_KEYSIZE]; /* Kiv */
        
-       args.a_desc = NULL;
-       args.a_vp = cnode->c_vp;
-       args.a_name = CONTENT_PROTECTION_XATTR_NAME;
-       args.a_uio = NULL; //pass data ptr instead
-       args.a_options = options; 
-       args.a_context = vfs_context_current();
-
-       /* Endian swap the multi-byte fields into L.E from host. */
-       newxattr->xattr_major_version = OSSwapHostToLittleInt16(newxattr->xattr_major_version);
-       newxattr->xattr_minor_version = OSSwapHostToLittleInt16(newxattr->xattr_minor_version);
-       newxattr->key_size = OSSwapHostToLittleInt32(newxattr->key_size);
-       newxattr->flags = OSSwapHostToLittleInt32(newxattr->flags);
-       newxattr->persistent_class = OSSwapHostToLittleInt32(newxattr->persistent_class);
-
-       error = hfs_setxattr_internal(cnode, (caddr_t)newxattr, 
-                                                                 sizeof(struct cp_xattr), &args, VTOHFS(cnode->c_vp), 0);
-
-       if ((error == KERN_SUCCESS) && (cnode->c_cpentry)) {
-               cnode->c_cpentry->cp_flags &= ~CP_NO_XATTR;
-       }
+       /* First init the cp_cache_iv_key[] */
+       SHA1Init(&sha1ctxt);
+       SHA1Update(&sha1ctxt, &entry->cp_cache_key[0], CP_MAX_KEYSIZE);
+       SHA1Final(&cp_cache_iv_key[0], &sha1ctxt);
+       
+       aes_encrypt_key128(&cp_cache_iv_key[0], &entry->cp_cache_iv_ctx);
 
-       return error;
+       return 0;
 }
 
 
 /*
  * Make a new random per-file key and wrap it.
+ * Normally this will get default_pclass as PROTECTION_CLASS_D.
+ *
+ * But when the directory's class is set, we use that as the default.
  */
 static int
-cp_make_keys(struct cprotect *entry)
+cp_make_keys(struct cprotect **entry_arg, struct hfsmount *hfsmp, cnid_t fileid, int default_pclass)
 {
+       struct cprotect *entry = *entry_arg;
+       int target_pclass = 0;
        int error = 0;
 
        if (g_cp_state.wrap_functions_set != 1) {
@@ -680,94 +1401,122 @@ cp_make_keys(struct cprotect *entry)
        }
 
        /* create new cp data: key and class */
-       read_random(&entry->cp_cache_key[0], CP_KEYSIZE);
-       entry->cp_pclass = PROTECTION_CLASS_D;
+       entry->cp_cache_key_len = CP_MAX_KEYSIZE;
+       read_random(&entry->cp_cache_key[0], entry->cp_cache_key_len);
+
+       if (cp_is_valid_class(default_pclass) == 0) {
+               target_pclass = PROTECTION_CLASS_D;
+       } else {
+               target_pclass = default_pclass;
+       }
+
+       /*
+        * Attempt to wrap the new key in the class key specified by target_pclass
+        * Note that because we may be inheriting a protection level specified
+        * by the containing directory, this can fail;  we could be trying to
+        * wrap this cache key in the class 'A' key while the device is locked.  
+        * As such, emit an error if we fail to wrap the key here, instead of
+        * panicking.
+        */
+
+       error = cp_wrap(target_pclass, hfsmp, fileid, entry_arg);
 
-       /* wrap the new key in the class key */
-       error = cp_wrap(PROTECTION_CLASS_D,
-                                       &entry->cp_cache_key[0], 
-                                       &entry->cp_persistent_key[0]);
-       
        if (error) {
-               panic("could not wrap new key in class D\n");
+               goto out;
+       }
+       /* cp_wrap() potentially updates c_cpentry */
+       entry = *entry_arg;
+
+       /* set the pclass to the target since the wrap was successful */
+       entry->cp_pclass = target_pclass;
+
+       /* No need to go here for older EAs */
+       if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) {
+               cp_setup_aes_ctx(entry);
+               entry->cp_flags |= CP_OFF_IV_ENABLED;
        }
 
        /* ready for business */
        entry->cp_flags &= ~CP_NEEDS_KEYS;
        entry->cp_flags |= CP_NO_XATTR;
 
+out:
        return error;
 }
 
 /*
  * If permitted, restore entry's unwrapped key from the persistent key.
- * If not, clear key and set CP_ENTRY_FLUSHED.
+ * If not, clear key and set CP_KEY_FLUSHED.
  * cnode lock held exclusive
  */
 static int
-cp_restore_keys(struct cprotect *entry)
+cp_restore_keys(struct cprotect *entry, struct hfsmount *hfsmp)
 {
        int error = 0;
 
-       error = cp_unwrap(entry->cp_pclass,
-                                         &entry->cp_persistent_key[0],
-                                         &entry->cp_cache_key[0]);
-       
+       error = cp_unwrap(entry->cp_pclass, entry);
        if (error) {
                entry->cp_flags |= CP_KEY_FLUSHED;
-               bzero(entry->cp_cache_key, CP_KEYSIZE);
+               bzero(entry->cp_cache_key, entry->cp_cache_key_len);
                error = EPERM;
        }
        else {
+               /* No need to go here for older EAs */
+               if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) {            
+                       cp_setup_aes_ctx(entry);
+                       entry->cp_flags |= CP_OFF_IV_ENABLED;
+               }
+               
+               /* ready for business */
                entry->cp_flags &= ~CP_KEY_FLUSHED;
+               
        }
        return error;
 }
 
 static int
-cp_lock_vfs_callback(mount_t mp, void *arg)
-{
-       if (!cp_fs_protected(mp)) {
-               /* not interested in this mount point */
-               return 0;
-       }
-       
-       return vnode_iterate(mp, 0, cp_lock_vnode_callback, arg);
+cp_lock_vfs_callback(mount_t mp, void *arg) {
+    
+    /*
+     * When iterating the various mount points that may 
+     * be present on a content-protected device, we need to skip
+     * those that do not have it enabled.
+     */
+    if (!cp_fs_protected(mp)) {
+        return 0;
+    }
+    
+    return vnode_iterate(mp, 0, cp_lock_vnode_callback, arg);
 }
 
 
 /*
  * Deny access to protected files if keys have been locked.
- *
- * cnode lock is taken shared.
  */
-       static int
-cp_check_access(cnode_t *cnode, int vnop)
+static int
+cp_check_access(struct cnode *cp, int vnop __unused)
 {
        int error = 0;
 
        if (g_cp_state.lock_state == CP_UNLOCKED_STATE) {
-               return KERN_SUCCESS;
+               return 0;
        }
 
-       if (!cnode->c_cpentry) {
+       if (!cp->c_cpentry) {
                /* unprotected node */
-               return KERN_SUCCESS;
+               return 0;
+       }
+
+       if (!S_ISREG(cp->c_mode)) {
+               return 0;
        }
 
-       /* Deny all access for class A files, and read access for class B */
-       switch (cnode->c_cpentry->cp_pclass) {
+       /* Deny all access for class A files */
+       switch (cp->c_cpentry->cp_pclass) {
                case PROTECTION_CLASS_A: {
                        error = EPERM;
                        break;
                }
-               case PROTECTION_CLASS_B: {
-                       if (vnop & CP_READ_ACCESS)
-                               error = EPERM;
-                       else
-                               error = 0;
-                       break;
-               }
                default:
                        error = 0;
                        break;
@@ -776,21 +1525,20 @@ cp_check_access(cnode_t *cnode, int vnop)
        return error;
 }
 
-
-
 /*
  * Respond to a lock or unlock event.
  * On lock: clear out keys from memory, then flush file contents.
  * On unlock: nothing (function not called).
  */
 static int
-cp_lock_vnode_callback(vnode_t vp, void *arg)
+cp_lock_vnode_callback(struct vnode *vp, void *arg)
 {
        cnode_t *cp = NULL;
        struct cprotect *entry = NULL;
        int error = 0;
        int locked = 1;
        int action = 0;
+       int took_truncate_lock = 0;
 
        error = vnode_getwithref (vp);
        if (error) {
@@ -798,6 +1546,18 @@ cp_lock_vnode_callback(vnode_t vp, void *arg)
        }
 
        cp = VTOC(vp);
+       
+       /*
+        * When cleaning cnodes due to a lock event, we must
+        * take the truncate lock AND the cnode lock.  By taking
+        * the truncate lock here, we force (nearly) all pending IOs 
+        * to drain before we can acquire the truncate lock.  All HFS cluster
+        * io calls except for swapfile IO need to acquire the truncate lock
+        * prior to calling into the cluster layer.
+        */
+       hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK);
+       took_truncate_lock = 1;
+       
        hfs_lock(cp, HFS_FORCE_LOCK);
        
        entry = cp->c_cpentry;
@@ -810,12 +1570,26 @@ cp_lock_vnode_callback(vnode_t vp, void *arg)
        switch (action) {
                case CP_LOCKED_STATE: {
                        vfs_context_t ctx;
-                       if (entry->cp_pclass != PROTECTION_CLASS_A) {
-                               /* no change at lock for other classes */
+                       if (entry->cp_pclass != PROTECTION_CLASS_A ||
+                               vnode_isdir(vp)) {
+                               /* 
+                                * There is no change at lock for other classes than A.
+                                * B is kept in memory for writing, and class F (for VM) does
+                                * not have a wrapped key, so there is no work needed for 
+                                * wrapping/unwrapping.  
+                                * 
+                                * Note that 'class F' is relevant here because if 
+                                * hfs_vnop_strategy does not take the cnode lock
+                                * to protect the cp blob across IO operations, we rely 
+                                * implicitly on the truncate lock to be held when doing IO.  
+                                * The only case where the truncate lock is not held is during 
+                                * swapfile IO because HFS just funnels the VNOP_PAGEOUT 
+                                * directly to cluster_pageout.  
+                                */
                                goto out;
                        }
                        
-                       /* Before doing anything else, zero-fille sparse ranges as needed */
+                       /* Before doing anything else, zero-fill sparse ranges as needed */
                        ctx = vfs_context_current();
                        (void) hfs_filedone (vp, ctx);
 
@@ -823,10 +1597,20 @@ cp_lock_vnode_callback(vnode_t vp, void *arg)
                        hfs_unlock (cp);
                        ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_PUSHALL | UBC_INVALIDATE | UBC_SYNC);
                        hfs_lock (cp, HFS_FORCE_LOCK);
-                       
-                       /* flush keys */
+
+                       /* flush keys:
+                        * There was a concern here(9206856) about flushing keys before nand layer is done using them.
+                        * But since we are using ubc_msync with UBC_SYNC, it blocks until all IO is completed.
+                        * Once IOFS caches or is done with these keys, it calls the completion routine in IOSF.
+                        * Which in turn calls buf_biodone() and eventually unblocks ubc_msync()
+                        * Also verified that the cached data in IOFS is overwritten by other data, and there 
+                        * is no key leakage in that layer.
+                        */
+
                        entry->cp_flags |= CP_KEY_FLUSHED;
-                       bzero(&entry->cp_cache_key, CP_KEYSIZE);
+                       bzero(&entry->cp_cache_key, entry->cp_cache_key_len);
+                       bzero(&entry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
+                       
                        /* some write may have arrived in the mean time. dump those pages */
                        hfs_unlock(cp);
                        locked = 0;
@@ -839,56 +1623,120 @@ cp_lock_vnode_callback(vnode_t vp, void *arg)
                        break;
                }
                default:
-                       panic("unknown lock action %d\n", action);
+                       panic("Content Protection: unknown lock action %d\n", action);
        }
        
 out:
-       if (locked)
+       if (locked) {
                hfs_unlock(cp);
+       }
+       
+       if (took_truncate_lock) {
+               hfs_unlock_truncate (cp, 0);
+       }
+       
        vnode_put (vp);
        return error;
 }
 
 static int
-cp_wrap(int class, void *inkey, void *outkey)
+cp_wrap(int class, struct hfsmount *hfsmp, cnid_t fileid, struct cprotect **entry_ptr)
 {
-       int error = 0;
-       size_t keyln = CP_WRAPPEDKEYSIZE;
        
+       struct cprotect *entry = *entry_ptr;
+       uint8_t newkey[CP_MAX_WRAPPEDKEYSIZE];
+       size_t keylen = CP_MAX_WRAPPEDKEYSIZE;
+       int error = 0;
+
+       /*
+        * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient 
+        * key that is only good as long as the file is open.  There is no
+        * wrapped key, so there isn't anything to wrap. 
+        */
        if (class == PROTECTION_CLASS_F) {
-               bzero(outkey, CP_WRAPPEDKEYSIZE);
+               bzero(entry->cp_persistent_key, entry->cp_persistent_key_len);
+               entry->cp_persistent_key_len = 0;
                return 0;
        }
-       
+
+       /*
+        * inode is passed here to find the backup bag wrapped blob
+        * from userspace.  This lookup will occur shortly after creation
+        * and only if the file still exists.  Beyond this lookup the 
+        * inode is not used.  Technically there is a race, we practically
+        * don't lose.
+        */
        error = g_cp_wrap_func.wrapper(class,
-                                                                  inkey,
-                                                                  CP_KEYSIZE,
-                                                                  outkey,
-                                                                  &keyln);
-       
+                                                                  fileid,
+                                                                  entry->cp_cache_key,
+                                                                  entry->cp_cache_key_len,
+                                                                  newkey,
+                                                                  &keylen);
+
+       if (!error) {
+               /*
+                * v2 EA's don't support the larger class B keys 
+                */
+               if ((keylen != CP_V2_WRAPPEDKEYSIZE) &&
+                       (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) {
+                       return EINVAL;
+               }
+
+               /*
+                * Reallocate the entry if the new persistent key changed length
+                */
+               if (entry->cp_persistent_key_len != keylen) {
+                       struct cprotect *oldentry = entry;
+
+                       entry = cp_entry_alloc(keylen);
+                       if (entry == NULL)
+                               return ENOMEM;
+
+                       bcopy(oldentry, entry, sizeof(struct cprotect));
+                       entry->cp_persistent_key_len = keylen;
+
+                       cp_entry_destroy (&oldentry);
+
+                       *entry_ptr = entry;
+               }
+
+               bcopy(newkey, entry->cp_persistent_key, keylen);                
+       } 
+       else {
+               error = EPERM;
+       }
+
        return error;
 }
 
 
 static int
-cp_unwrap(int class, void *inkey, void *outkey)
+cp_unwrap(int class, struct cprotect *entry)
 {
        int error = 0;
-       size_t keyln = CP_KEYSIZE;
-       
+       size_t keylen = CP_MAX_KEYSIZE;
+
+       /*
+        * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient 
+        * key that is only good as long as the file is open.  There is no
+        * wrapped key, so there isn't anything to unwrap. 
+        */
        if (class == PROTECTION_CLASS_F) {
-               /* we didn't save a wrapped key, so nothing to unwrap */
                return EPERM;
        }
-       
+
        error = g_cp_wrap_func.unwrapper(class,
-                                                                        inkey,
-                                                                        CP_WRAPPEDKEYSIZE,
-                                                                        outkey,
-                                                                        &keyln);
+                                                                        entry->cp_persistent_key,
+                                                                        entry->cp_persistent_key_len,
+                                                                        entry->cp_cache_key,
+                                                                        &keylen);
+       if (!error) {
+               entry->cp_cache_key_len = keylen;
+       } else {
+               error = EPERM;
+       }
        
        return error;
-       
 }
 
 
index ae1039a3e550873fe6f56727358e5675114c480a..2cf6a07562275560d268bd4d209f7420d846939b 100644 (file)
@@ -373,7 +373,6 @@ enum {
 
        kHFSHasDateAddedBit = 0x0007,   /* File/Folder has the date-added stored in the finder info. */
        kHFSHasDateAddedMask = 0x0080 
-
 };
 
 
index 7bebee3fbc7fc9d852bb2e3a2d9781459327d6bc..d19d8e7d41574ed1874b822667cd8e34128c7849 100644 (file)
@@ -125,6 +125,20 @@ struct hfs_journal_info {
 #define HFSIOC_DISABLE_METAZONE        _IO('h', 25)
 #define HFS_DISABLE_METAZONE   IOCBASECMD(HFSIOC_DISABLE_METAZONE)
 
+/* Change the next CNID value */
+#define HFSIOC_CHANGE_NEXTCNID _IOWR('h', 26, u_int32_t)
+#define HFS_CHANGE_NEXTCNID            IOCBASECMD(HFSIOC_CHANGE_NEXTCNID)
+       
+/* Get the low disk space values */
+#define        HFSIOC_GET_VERY_LOW_DISK        _IOR('h', 27, u_int32_t)
+#define        HFS_FSCTL_GET_VERY_LOW_DISK     IOCBASECMD(HFSIOC_GET_VERY_LOW_DISK)
+
+#define        HFSIOC_GET_LOW_DISK     _IOR('h', 28, u_int32_t)
+#define        HFS_FSCTL_GET_LOW_DISK  IOCBASECMD(HFSIOC_GET_LOW_DISK)
+
+#define        HFSIOC_GET_DESIRED_DISK _IOR('h', 29, u_int32_t)
+#define        HFS_FSCTL_GET_DESIRED_DISK      IOCBASECMD(HFSIOC_GET_DESIRED_DISK)
+
 #endif /* __APPLE_API_UNSTABLE */
 
 #endif /* ! _HFS_FSCTL_H_ */
index 66e273b5de4aea3c62c4205bea7829d122fe1854..7ebb82bc6a569f88cc22f98a585eb97d3a970796 100644 (file)
@@ -810,7 +810,7 @@ hfs_addhotfile_internal(struct vnode *vp)
            (ffp->ff_size == 0) ||
            (ffp->ff_blocks > hotdata->maxblocks) ||
            (cp->c_flag & (C_DELETED | C_NOEXISTS)) ||
-           (cp->c_flags & UF_NODUMP) ||
+           (cp->c_bsdflags & UF_NODUMP) ||
            (cp->c_atime < hfsmp->hfc_timebase)) {
                return (0);
        }
index 5dd5d6a9c0be220b9a4d3e820c69bb92603f7212..d3202bca4deb45d422d38129e622acf44b656084 100644 (file)
@@ -37,8 +37,8 @@ enum {
        HFSDBG_UNMAP_CALLBACK           0, extentCount, 0, 0 ... 0, 0, 0, 0
        HFSDBG_UNMAP_FREE                       startBlock, blockCount, 0, 0 ... err, 0, 0, 0
        HFSDBG_UNMAP_ALLOC                      startBlock, blockCount, 0, 0 ... err, 0, 0, 0
-       HFSDBG_REMOVE_EXTENT_CACHE      startBlock, blockCount, 0, 0 ... 0,   0, 0, 0
-       HFSDBG_ADD_EXTENT_CACHE         startBlock, blockCount, 0, 0 ... err, 0, 0, 0
+       HFSDBG_REMOVE_EXTENT_CACHE      startBlock, blockCount, vcbFreeExtCnt, 0 ... 0, 0, vcbFreeExtCnt, extentsRemoved
+       HFSDBG_ADD_EXTENT_CACHE         startBlock, blockCount, vcbFreeExtCnt, 0 ... 0, 0, vcbFreeExtCnt, retval
        HFSDBG_MARK_ALLOC_BITMAP        startBlock, blockCount, 0, 0 ... err, 0, 0, 0
        HFSDBG_MARK_FREE_BITMAP         startBlock, blockCount, valid, 0 ... err, 0, 0, 0
        HFSDBG_BLOCK_DEALLOCATE         startBlock, blockCount, flags, 0 ... err, 0, 0, 0
index d24a92011662adb08346426c904c92721632118c..b1b9359afd873493264aa288c345c0a46a81249b 100644 (file)
@@ -503,7 +503,7 @@ hfs_vnop_link(struct vnop_link_args *ap)
                error = EMLINK;
                goto out;
        }
-       if (cp->c_flags & (IMMUTABLE | APPEND)) {
+       if (cp->c_bsdflags & (IMMUTABLE | APPEND)) {
                error = EPERM;
                goto out;
        }
index 13cb1aa48d899c02edd1c6dfce1a47abcf45454d..2200fe1de12b88b26ab469a68421b515a55495e1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -80,6 +80,7 @@
 #include <sys/kdebug.h>
 #include <sys/kauth.h>
 #include <sys/namei.h>
+#include <sys/user.h>
 
 #include "hfs.h"
 #include "hfs_catalog.h"
@@ -498,12 +499,14 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
         */
 
        if ((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK)) {
-               hfs_lock(cp, HFS_FORCE_LOCK);
+               int stale_link = 0;
+
+               hfs_lock(cp, HFS_FORCE_LOCK);   
                if ((cp->c_parentcnid != dcp->c_cnid) ||
                    (bcmp(cnp->cn_nameptr, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0)) {
                        struct cat_desc desc;
+                       struct cat_attr lookup_attr;
                        int lockflags;
-
                        /*
                         * Get an updated descriptor
                         */
@@ -514,28 +517,84 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
                        desc.cd_encoding = 0;
                        desc.cd_cnid = 0;
                        desc.cd_flags = S_ISDIR(cp->c_mode) ? CD_ISDIR : 0;
-       
+
+                       /*
+                        * Because lookups call replace_desc to put a new descriptor in
+                        * the cnode we are modifying it is possible that this cnode's 
+                        * descriptor is out of date for the parent ID / name that
+                        * we are trying to look up. (It may point to a different hardlink).
+                        *
+                        * We need to be cautious that when re-supplying the 
+                        * descriptor below that the results of the catalog lookup
+                        * still point to the same raw inode for the hardlink.  This would 
+                        * not be the case if we found something in the cache above but 
+                        * the vnode it returned no longer has a valid hardlink for the 
+                        * parent ID/filename combo we are requesting.  (This is because 
+                        * hfs_unlink does not directly trigger namecache removal). 
+                        *
+                        * As a result, before vending out the vnode (and replacing
+                        * its descriptor) verify that the fileID is the same by comparing
+                        * the in-cnode attributes vs. the one returned from the lookup call
+                        * below.  If they do not match, treat this lookup as if we never hit
+                        * in the cache at all.
+                        */
 
                        lockflags = hfs_systemfile_lock(VTOHFS(dvp), SFL_CATALOG, HFS_SHARED_LOCK);             
-                       if (cat_lookup(VTOHFS(vp), &desc, 0, &desc, NULL, NULL, NULL) == 0)
-                               replace_desc(cp, &desc);
+               
+                       error = cat_lookup(VTOHFS(vp), &desc, 0, &desc, &lookup_attr, NULL, NULL);      
+                       
                        hfs_systemfile_unlock(VTOHFS(dvp), lockflags);
 
                        /* 
-                        * Save the origin info for file and directory hardlinks.  Directory hardlinks 
-                        * need the origin for '..' lookups, and file hardlinks need it to ensure that 
-                        * competing lookups do not cause us to vend different hardlinks than the ones requested.
-                        * We want to restrict saving the cache entries to LOOKUP namei operations, since
-                        * we're really doing this to protect getattr.
+                        * Note that cat_lookup may fail to find something with the name provided in the
+                        * stack-based descriptor above. In that case, an ENOENT is a legitimate errno
+                        * to be placed in error, which will get returned in the fastpath below.
                         */
-                       if (cnp->cn_nameiop == LOOKUP) {
-                               hfs_savelinkorigin(cp, dcp->c_fileid);
+                       if (error == 0) {
+                               if (lookup_attr.ca_fileid == cp->c_attr.ca_fileid) {
+                                       /* It still points to the right raw inode.  Replacing the descriptor is fine */
+                                       replace_desc (cp, &desc);
+
+                                       /* 
+                                        * Save the origin info for file and directory hardlinks.  Directory hardlinks 
+                                        * need the origin for '..' lookups, and file hardlinks need it to ensure that 
+                                        * competing lookups do not cause us to vend different hardlinks than the ones requested.
+                                        * We want to restrict saving the cache entries to LOOKUP namei operations, since
+                                        * we're really doing this to protect getattr.
+                                        */
+                                       if (cnp->cn_nameiop == LOOKUP) {
+                                               hfs_savelinkorigin(cp, dcp->c_fileid);
+                                       }
+                               }
+                               else {
+                                       /* If the fileID does not match then do NOT replace the descriptor! */
+                                       stale_link = 1;
+                               }       
                        }
                }
-               hfs_unlock(cp);
-       }
+               hfs_unlock (cp);
+               
+               if (stale_link) {
+                       /* 
+                        * If we had a stale_link, then we need to pretend as though
+                        * we never found this vnode and force a lookup through the 
+                        * traditional path.  Drop the iocount acquired through 
+                        * cache_lookup above and force a cat lookup / getnewvnode
+                        */
+                       vnode_put(vp);
+                       goto lookup;
+               }
+               
+               if (error) {
+                       /* 
+                        * If the cat_lookup failed then the caller will not expect 
+                        * a vnode with an iocount on it.
+                        */
+                       vnode_put(vp);
+               }
 
-       return (error);
+       }       
+       goto exit;
        
 lookup:
        /*
@@ -550,6 +609,24 @@ lookup:
        if (cnode_locked)
                hfs_unlock(VTOC(*vpp));
 exit:
+       {
+       uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
+
+       /*
+        * check to see if we issued any I/O while completing this lookup and
+        * this thread/task is throttleable... if so, throttle now
+        *
+        * this allows us to throttle in between multiple meta data reads that
+        * might result due to looking up a long pathname (since we'll have to
+        * re-enter hfs_vnop_lookup for each component of the pathnam not in
+        * the VFS cache), instead of waiting until the entire path lookup has
+        * completed and throttling at the systemcall return
+        */
+       if (__improbable(ut->uu_lowpri_window)) {
+               throttle_lowpri_io(TRUE);
+       }
+       }
+
        return (error);
 }
 
index 227e744b2cb2fb028d9579eafb25ccf81463f1e4..2423db07a6f9484aa14017a2fd08fa27cad3e4cf 100644 (file)
@@ -58,7 +58,14 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp)
 {
        fsid_t fsid;
        u_int32_t freeblks, state=999;
-               
+       char *volname = NULL;
+
+       if (hfsmp->vcbVN) {
+               if (strlen((char*)hfsmp->vcbVN) < 256) {
+                       volname = (char*) hfsmp->vcbVN; 
+               }
+       }
+
        fsid.val[0] = (long)hfsmp->hfs_raw_dev;
        fsid.val[1] = (long)vfs_typenum(HFSTOVFS(hfsmp));
        
@@ -74,14 +81,25 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp)
 
        if (state == 2 && !(hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK)) {
                /* Dump some logging to track down intermittent issues */
-               printf("HFS: Very Low Disk: freeblks: %d, dangerlimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_dangerlimit);
+               if (volname) {
+                       printf("HFS: Vol: %s Very Low Disk: freeblks: %d, dangerlimit: %d\n", volname, freeblks, hfsmp->hfs_freespace_notify_dangerlimit);
+               }
+               else {
+                       printf("HFS: Very Low Disk: freeblks: %d, dangerlimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_dangerlimit);
+               }
+
 #if HFS_SPARSE_DEV
                if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
                        if (hfsmp->hfs_backingfs_rootvp) {
                                struct mount *mp = vnode_mount (hfsmp->hfs_backingfs_rootvp);
                                /* If we're a sparse device, dump some info about the backing store... */
-                               if (mp) {
-                                       printf("HFS: Very Low Disk: backingstore b_avail %lld, tag %d\n", mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag);
+                               if (mp) {                                       
+                                       if (volname) {
+                                               printf("HFS: Vol: %s Very Low Disk: backingstore b_avail %lld, tag %d\n", volname, mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag);
+                                       }
+                                       else {
+                                               printf("HFS: Very Low Disk: backingstore b_avail %lld, tag %d\n", mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag);
+                                       }
                                }
                        }
                }
@@ -90,7 +108,12 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp)
                vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
        } else if (state == 1) {
                if (!(hfsmp->hfs_notification_conditions & VQ_LOWDISK)) {
-                       printf("HFS: Low Disk: freeblks: %d, warninglimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_warninglimit);
+                       if (volname) {
+                               printf("HFS: Low Disk: Vol: %s freeblks: %d, warninglimit: %d\n", volname, freeblks, hfsmp->hfs_freespace_notify_warninglimit);
+                       }
+                       else {
+                               printf("HFS: Low Disk: freeblks: %d, warninglimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_warninglimit);
+                       }
                        hfsmp->hfs_notification_conditions |= VQ_LOWDISK;
                        vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
                } else if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) {
index 63acbac05944fc5edbf0d3f27a2d6b9974393477..f0b91b94a76f464fa9e8a2f2a573686caa217bbb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -41,6 +41,7 @@
 #include <sys/filedesc.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
+#include <sys/buf_internal.h>
 #include <sys/proc.h>
 #include <sys/kauth.h>
 #include <sys/vnode.h>
@@ -52,6 +53,7 @@
 #include <sys/disk.h>
 #include <sys/sysctl.h>
 #include <sys/fsctl.h>
+#include <sys/mount_internal.h>
 
 #include <miscfs/specfs/specdev.h>
 
@@ -98,6 +100,16 @@ SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED,
 int
 hfs_vnop_read(struct vnop_read_args *ap)
 {
+       /*
+          struct vnop_read_args {
+          struct vnodeop_desc *a_desc;
+          vnode_t a_vp;
+          struct uio *a_uio;
+          int a_ioflag;
+          vfs_context_t a_context;
+          };
+        */
+
        uio_t uio = ap->a_uio;
        struct vnode *vp = ap->a_vp;
        struct cnode *cp;
@@ -109,6 +121,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
        off_t offset = uio_offset(uio);
        int retval = 0;
        int took_truncate_lock = 0;
+       int io_throttle = 0;
 
        /* Preflight checks */
        if (!vnode_isreg(vp)) {
@@ -147,7 +160,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
                        }
                        /* otherwise the file was converted back to a regular file while we were reading it */
                        retval = 0;
-               } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+               } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
                        int error;
                        
                        error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
@@ -164,11 +177,24 @@ hfs_vnop_read(struct vnop_read_args *ap)
        hfsmp = VTOHFS(vp);
 
 #if CONFIG_PROTECT
-       if ((retval = cp_handle_vnop (cp, CP_READ_ACCESS)) != 0) {
+       if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
                goto exit;
        }
 #endif
 
+       /* 
+        * If this read request originated from a syscall (as opposed to 
+        * an in-kernel page fault or something), then set it up for 
+        * throttle checks.  For example, large EAs may cause a VNOP_READ
+        * to occur, and we wouldn't want to throttle I/O while holding the
+        * EA B-Tree lock.
+        */
+       if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
+               io_throttle = IO_RETURN_ON_THROTTLE;
+       }
+
+read_again:
+
        /* Protect against a size change. */
        hfs_lock_truncate(cp, HFS_SHARED_LOCK);
        took_truncate_lock = 1;
@@ -186,7 +212,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
                (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 
-       retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
+       retval = cluster_read(vp, uio, filesize, ap->a_ioflag | (io_throttle));
 
        cp->c_touch_acctime = TRUE;
 
@@ -227,7 +253,12 @@ exit:
        if (took_truncate_lock) {
                hfs_unlock_truncate(cp, 0);
        }
+       if (retval == EAGAIN) {
+               throttle_lowpri_io(1);
 
+               retval = 0;
+               goto read_again;
+       }
        return (retval);
 }
 
@@ -259,6 +290,7 @@ hfs_vnop_write(struct vnop_write_args *ap)
        int do_snapshot = 1;
        time_t orig_ctime=VTOC(vp)->c_ctime;
        int took_truncate_lock = 0;
+       int io_return_on_throttle = 0;
        struct rl_entry *invalid_range;
 
 #if HFS_COMPRESSION
@@ -277,7 +309,7 @@ hfs_vnop_write(struct vnop_write_args *ap)
                                printf("invalid state %d for compressed file\n", state);
                                /* fall through */
                }
-       } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+       } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
                int error;
                
                error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
@@ -308,7 +340,7 @@ hfs_vnop_write(struct vnop_write_args *ap)
        hfsmp = VTOHFS(vp);
 
 #if CONFIG_PROTECT
-       if ((retval = cp_handle_vnop (cp, CP_WRITE_ACCESS)) != 0) {
+       if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
                goto exit;
        }
 #endif
@@ -327,6 +359,10 @@ hfs_vnop_write(struct vnop_write_args *ap)
        }
 #endif /* HFS_SPARSE_DEV */
 
+       if ((ioflag & (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) == 
+                       (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) { 
+               io_return_on_throttle = IO_RETURN_ON_THROTTLE;
+       }
 again:
        /* Protect against a size change. */
        /*
@@ -349,7 +385,7 @@ again:
                uio_setoffset(uio, fp->ff_size);
                offset = fp->ff_size;
        }
-       if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
+       if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
                retval = EPERM;
                goto exit;
        }
@@ -647,9 +683,39 @@ sizeok:
                        ubc_setsize(vp, filesize);
                }
                retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
-                               tail_off, lflag | IO_NOZERODIRTY);
+                               tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
                if (retval) {
                        fp->ff_new_size = 0;    /* no longer extending; use ff_size */
+                       
+                       if (retval == EAGAIN) {
+                               /*
+                                * EAGAIN indicates that we still have I/O to do, but
+                                * that we now need to be throttled
+                                */
+                               if (resid != uio_resid(uio)) {
+                                       /*
+                                        * did manage to do some I/O before returning EAGAIN
+                                        */
+                                       resid = uio_resid(uio);
+                                       offset = uio_offset(uio);
+
+                                       cp->c_touch_chgtime = TRUE;
+                                       cp->c_touch_modtime = TRUE;
+                               }
+                               if (filesize > fp->ff_size) {
+                                       /*
+                                        * we called ubc_setsize before the call to
+                                        * cluster_write... since we only partially
+                                        * completed the I/O, we need to 
+                                        * re-adjust our idea of the filesize based
+                                        * on our interim EOF
+                                        */
+                                       ubc_setsize(vp, offset);
+
+                                       fp->ff_size = offset;
+                               }
+                               goto exit;
+                       }
                        if (filesize > origFileSize) {
                                ubc_setsize(vp, origFileSize);
                        }
@@ -732,6 +798,12 @@ exit:
        if (took_truncate_lock) {
                hfs_unlock_truncate(cp, 0);
        }
+       if (retval == EAGAIN) {
+               throttle_lowpri_io(1);
+
+               retval = 0;
+               goto again;
+       }
        return (retval);
 }
 
@@ -1000,8 +1072,11 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
        } else {
            int lockflags;
                        
+           if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
+                   throttle_lowpri_io(1);
+
            lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
-                       
+
            /* lookup this cnid in the catalog */
            error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
                        
@@ -1177,7 +1252,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
     boolean_t is64bit;
 
     /*
-     * NOTE: on entry, the vnode is locked. Incase this vnode
+     * NOTE: on entry, the vnode has an io_ref. In case this vnode
      * happens to be in our list of file_ids, we'll note it
      * avoid calling hfs_chashget_nowait() on that id as that
      * will cause a "locking against myself" panic.
@@ -1422,7 +1497,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
            access[i] = 0;
            continue;
        }
-                       
+       
        myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 
            skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
                        
@@ -1532,7 +1607,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 #if CONFIG_PROTECT
        {
                int error = 0;
-               if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+               if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
                        return error;
                }
        }
@@ -1811,6 +1886,50 @@ fail_change_next_allocation:
        }
 #endif /* HFS_SPARSE_DEV */
 
+       /* Change the next CNID stored in the VH */
+       case HFS_CHANGE_NEXTCNID: {
+               int error = 0;          /* Assume success */
+               u_int32_t fileid;
+               int wraparound = 0;
+               int lockflags = 0;
+
+               if (vnode_vfsisrdonly(vp)) {
+                       return (EROFS);
+               }
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               
+               fileid = *(u_int32_t *)ap->a_data;
+
+               /* Must have catalog lock excl. to advance the CNID pointer */
+               lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
+
+               HFS_MOUNT_LOCK(hfsmp, TRUE);
+               
+               /* If it is less than the current next CNID, force the wraparound bit to be set */
+               if (fileid < hfsmp->vcbNxtCNID) {
+                       wraparound=1;
+               }
+
+               /* Return previous value. */
+               *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
+
+               hfsmp->vcbNxtCNID = fileid;
+
+               if (wraparound) {
+                       hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
+               }
+               
+               MarkVCBDirty(hfsmp);
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+               hfs_systemfile_unlock (hfsmp, lockflags);
+
+               return (error);
+       }
+       
        case F_FREEZE_FS: {
                struct mount *mp;
  
@@ -1942,6 +2061,73 @@ fail_change_next_allocation:
                        return (EINVAL);        
        }
 
+       case F_SETSTATICCONTENT: {
+               int error;
+               int enable_static = 0;
+               struct cnode *cp = NULL;
+               /* 
+                * lock the cnode, decorate the cnode flag, and bail out.
+                * VFS should have already authenticated the caller for us.
+                */
+
+               if (ap->a_data) {
+                       /* 
+                        * Note that even though ap->a_data is of type caddr_t,
+                        * the fcntl layer at the syscall handler will pass in NULL
+                        * or 1 depending on what the argument supplied to the fcntl
+                        * was.  So it is in fact correct to check the ap->a_data 
+                        * argument for zero or non-zero value when deciding whether or not
+                        * to enable the static bit in the cnode.
+                        */
+                       enable_static = 1;
+               }
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return EROFS;
+               }
+               cp = VTOC(vp);
+
+               error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK);
+               if (error == 0) {
+                       if (enable_static) {
+                               cp->c_flag |= C_SSD_STATIC;
+                       }
+                       else {
+                               cp->c_flag &= ~C_SSD_STATIC;
+                       }
+                       hfs_unlock (cp);
+               }
+               return error;
+       }
+
+       case F_SETBACKINGSTORE: {
+
+               int error = 0;
+
+               /* 
+                * See comment in F_SETSTATICCONTENT re: using
+            * a null check for a_data
+                */
+               if (ap->a_data) {
+                       error = hfs_set_backingstore (vp, 1);
+               }
+               else {
+                       error = hfs_set_backingstore (vp, 0);
+               }               
+
+               return error;
+       }
+
+       case F_GETPATH_MTMINFO: {
+               int error = 0;
+
+               int *data = (int*) ap->a_data;  
+
+               /* Ask if this is a backingstore vnode */
+               error = hfs_is_backingstore (vp, data);
+
+               return error;
+       }
+
        case F_FULLFSYNC: {
                int error;
                
@@ -2013,10 +2199,6 @@ fail_change_next_allocation:
                return (error);
        }
 
-       case F_READBOOTSTRAP:
-       case F_WRITEBOOTSTRAP:
-               return 0;
-
        case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
        {
                if (is64bit) {
@@ -2036,6 +2218,10 @@ fail_change_next_allocation:
            *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
            break;
 
+       case HFS_FSCTL_GET_VERY_LOW_DISK:
+           *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
+           break;
+
        case HFS_FSCTL_SET_VERY_LOW_DISK:
            if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
                return EINVAL;
@@ -2044,6 +2230,10 @@ fail_change_next_allocation:
            hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
            break;
 
+       case HFS_FSCTL_GET_LOW_DISK:
+           *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
+           break;
+
        case HFS_FSCTL_SET_LOW_DISK:
            if (   *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
                || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
@@ -2054,6 +2244,10 @@ fail_change_next_allocation:
            hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
            break;
 
+       case HFS_FSCTL_GET_DESIRED_DISK:
+           *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
+           break;
+
        case HFS_FSCTL_SET_DESIRED_DISK:
            if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
                return EINVAL;
@@ -2464,44 +2658,43 @@ retry:
                } 
                
                /* Validate if the start offset is within logical file size */
-               if (ap->a_foffset > fp->ff_size) {
+               if (ap->a_foffset >= fp->ff_size) {
                        goto exit;
                }
 
-               /* Searching file extents has failed for read operation, therefore 
-                * search rangelist for any uncommitted holes in the file. 
+               /*
+                * At this point, we have encountered a failure during 
+                * MapFileBlockC that resulted in ERANGE, and we are not servicing
+                * a write, and there are borrowed blocks.
+                * 
+                * However, the cluster layer will not call blockmap for 
+                * blocks that are borrowed and in-cache.  We have to assume that 
+                * because we observed ERANGE being emitted from MapFileBlockC, this 
+                * extent range is not valid on-disk.  So we treat this as a 
+                * mapping that needs to be zero-filled prior to reading.  
+                *
+                * Note that under certain circumstances (such as non-contiguous 
+                * userland VM mappings in the calling process), cluster_io 
+                * may be forced to split a large I/O driven by hfs_vnop_write 
+                * into multiple sub-I/Os that necessitate a RMW cycle.  If this is
+                * the case here, then we have already removed the invalid range list
+                * mapping prior to getting to this blockmap call, so we should not
+                * search the invalid rangelist for this byte range.
                 */
-               overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
-                                     ap->a_foffset + (off_t)(ap->a_size - 1),
-                                     &invalid_range);
-               switch(overlaptype) {
-               case RL_OVERLAPISCONTAINED:
-                       /* start_offset <= rl_start, end_offset >= rl_end */
-                       if (ap->a_foffset != invalid_range->rl_start) {
-                               break;
-                       }
-               case RL_MATCHINGOVERLAP:
-                       /* start_offset = rl_start, end_offset = rl_end */
-               case RL_OVERLAPCONTAINSRANGE:
-                       /* start_offset >= rl_start, end_offset <= rl_end */
-               case RL_OVERLAPSTARTSBEFORE:
-                       /* start_offset > rl_start, end_offset >= rl_start */
-                       if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
-                               bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
-                       } else {
-                               bytesContAvail = fp->ff_size - ap->a_foffset;
-                       }
-                       if (bytesContAvail > ap->a_size) {
-                               bytesContAvail = ap->a_size;
-                       }
-                       *ap->a_bpn = (daddr64_t)-1;
-                       retval = 0;
-                       break;
-               case RL_OVERLAPENDSAFTER:
-                       /* start_offset < rl_start, end_offset < rl_end */
-               case RL_NOOVERLAP:
-                       break;
+
+               bytesContAvail = fp->ff_size - ap->a_foffset;
+               /*
+                * Clip the contiguous available bytes to, at most, the allowable
+                * maximum or the amount requested.
+                */
+
+               if (bytesContAvail > ap->a_size) {
+                       bytesContAvail = ap->a_size;
                }
+
+               *ap->a_bpn = (daddr64_t) -1;
+               retval = 0;
+
                goto exit;
        }
 
@@ -2566,7 +2759,6 @@ exit:
        return (MacToVFSError(retval));
 }
 
-
 /*
  * prepare and issue the I/O
  * buf_strategy knows how to deal
@@ -2580,28 +2772,53 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap)
        vnode_t vp = buf_vnode(bp);
        int error = 0;
        
+       /* Mark buffer as containing static data if cnode flag set */
+       if (VTOC(vp)->c_flag & C_SSD_STATIC) {
+               buf_markstatic(bp);
+       }
+       
 #if CONFIG_PROTECT
        cnode_t *cp = NULL; 
        
        if ((cp = cp_get_protected_cnode(vp)) != NULL) {
-               /*
-                * Some paths to hfs_vnop_strategy will take the cnode lock, 
-                * and some won't. But since content protection is only enabled
-                * for files that (a) aren't system files and (b) are regular 
-                * files, any valid cnode here will be unlocked.
+               /* 
+                * We rely upon the truncate lock to protect the
+                * CP cache key from getting tossed prior to our IO finishing here.
+                * Nearly all cluster io calls to manipulate file payload from HFS
+                * take the truncate lock before calling into the cluster
+                * layer to ensure the file size does not change, or that they
+                * have exclusive right to change the EOF of the file.  
+                * That same guarantee protects us here since the code that
+                * deals with CP lock events must now take the truncate lock 
+                * before doing anything. 
+                *
+                * There is 1 exception here:
+                * 1) One exception should be the VM swapfile IO, because HFS will
+                * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
+                * swapfile code only without holding the truncate lock.  This is because
+                * individual swapfiles are maintained at fixed-length sizes by the VM code.
+                * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to 
+                * create our own UPL and thus take the truncate lock before calling 
+                * into the cluster layer.  In that case, however, we are not concerned 
+                * with the CP blob being wiped out in the middle of the IO 
+                * because there isn't anything to toss; the VM swapfile key stays
+                * in-core as long as the file is open. 
+                * 
+                * NB:
+                * For filesystem resize, we may not have access to the underlying
+                * file's cache key for whatever reason (device may be locked).  However,
+                * we do not need it since we are going to use the temporary HFS-wide resize key
+                * which is generated once we start relocating file content.  If this file's I/O 
+                * should be done using the resize key, it will have been supplied already, so
+                * do not attach the file's cp blob to the buffer. 
                 */
-               hfs_lock(cp, HFS_SHARED_LOCK);
-               buf_setcpaddr(bp, cp->c_cpentry);
+               if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
+                       buf_setcpaddr(bp, cp->c_cpentry);
+               }
        }
 #endif /* CONFIG_PROTECT */
        
        error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
-
-#if CONFIG_PROTECT
-       if (cp) {
-               hfs_unlock(cp);
-       }
-#endif
        
        return error;
 }
@@ -2938,7 +3155,9 @@ hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
        
        struct filefork *fp = VTOF(vp);
        struct cnode *cp = VTOC(vp);
+#if QUOTA
        int retval = 0;
+#endif /* QUOTA */
        
        /* Cannot truncate an HFS directory! */
        if (vnode_isdir(vp)) {
@@ -3001,6 +3220,12 @@ hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
  * for use when deleting a file.  The simplification here is that we know 
  * that we are releasing all blocks.
  *
+ * Note that this function may be called when there is no vnode backing
+ * the file fork in question.  We may call this from hfs_vnop_inactive
+ * to clear out resource fork data (and may not want to clear out the data 
+ * fork yet).  As a result, we pointer-check both sets of inputs before 
+ * doing anything with them.
+ *
  * The caller is responsible for saving off a copy of the filefork(s)
  * embedded within the cnode prior to calling this function.  The pointers
  * supplied as arguments must be valid even if the cnode is no longer valid.
@@ -3019,7 +3244,7 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
        blksize = hfsmp->blockSize;
        
        /* Data Fork */
-       if (datafork->ff_blocks > 0) {
+       if ((datafork != NULL) && (datafork->ff_blocks > 0)) {
                fileblocks = datafork->ff_blocks;
                filebytes = (off_t)fileblocks * (off_t)blksize;         
                
@@ -3477,7 +3702,7 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap)
        fp = VTOF(vp);
 
 #if CONFIG_PROTECT
-       if ((error = cp_handle_vnop(cp, CP_READ_ACCESS | CP_WRITE_ACCESS)) != 0) {
+       if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
                return error;
        }
 #endif /* CONFIG_PROTECT */
@@ -3554,6 +3779,8 @@ retry_pagein:
                error = EINVAL;
                goto pagein_done;
        }
+       ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
+
        isize = ap->a_size;
 
        /* 
@@ -3813,10 +4040,19 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                a_pl_offset = 0;
 
                /*
-                * take truncate lock (shared) to guard against 
-                * zero-fill thru fsync interfering, but only for v2 
+                * For V2 semantics, we want to take the cnode truncate lock
+                * shared to guard against the file size changing via zero-filling.
+                * 
+                * However, we have to be careful because we may be invoked 
+                * via the ubc_msync path to write out dirty mmap'd pages
+                * in response to a lock event on a content-protected
+                * filesystem (e.g. to write out class A files).
+                * As a result, we want to take the truncate lock 'SHARED' with 
+                * the mini-recursion locktype so that we don't deadlock/panic 
+                * because we may be already holding the truncate lock exclusive to force any other
+                * IOs to have blocked behind us. 
                 */
-               hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+               hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK);
 
                if (a_flags & UPL_MSYNC) {
                        request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
@@ -4035,8 +4271,13 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
 
 pageout_done:
        if (is_pageoutv2) {
-               /* release truncate lock (shared) */
-               hfs_unlock_truncate(cp, 0);
+               /* 
+                * Release the truncate lock.  Note that because 
+                * we may have taken the lock recursively by 
+                * being invoked via ubc_msync due to lockdown,
+                * we should release it recursively, too.
+                */
+               hfs_unlock_truncate(cp, 1);
        }
        return (retval);
 }
@@ -4173,12 +4414,12 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
                return EINVAL;
        }
 #endif
-
        /* If it's an SSD, also disable HFS relocation */
        if (hfsmp->hfs_flags & HFS_SSD) {
                return EINVAL;
        }
 
+
        blksize = hfsmp->blockSize;
        if (blockHint == 0)
                blockHint = hfsmp->nextAllocation;
@@ -4452,7 +4693,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
        hfs_unlock(VTOC(vp));
 
 #if CONFIG_PROTECT
-       if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+       if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
                hfs_lock(VTOC(vp), HFS_FORCE_LOCK);     
                return (error);
        }
@@ -4477,7 +4718,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
                        break;
                }
                if (uio_resid(auio) != 0) {
-                       printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
+                       printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
                        error = EIO;            
                        break;
                }
index 878c70dc5b2a02d6a1c01a8984f04fc800203faa..b73ec833904d5ef642dd759d013f1dca295c4508 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 1997-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -66,6 +66,8 @@
 #include "hfscommon/headers/BTreeScanner.h"
 #include "hfscommon/headers/CatalogPrivate.h"
 
+#if CONFIG_SEARCHFS
+
 /* Search criterea. */
 struct directoryInfoSpec
 {
@@ -206,7 +208,9 @@ hfs_vnop_search(ap)
                return (EINVAL);
 
        /*
-        * Reject requests for unsupported attributes.
+        * Fail requests for attributes that HFS does not support for the
+        * items that match the search criteria.  Note that these checks
+        * are for the OUTBOUND attributes to be returned (not search criteria).
         */
        if ((ap->a_returnattrs->commonattr & ~HFS_ATTR_CMN_VALID) ||
            (ap->a_returnattrs->volattr != 0) ||
@@ -280,6 +284,7 @@ hfs_vnop_search(ap)
                err = ENOMEM;
                goto ExitThisRoutine;
        }
+       bzero(attributesBuffer, eachReturnBufferSize);
        variableBuffer = (void*)((char*) attributesBuffer + fixedBlockSize);
 
        // XXXdbg - have to lock the user's buffer so we don't fault
@@ -422,7 +427,7 @@ ExitThisRoutine:
        if (attributesBuffer)
                FREE(attributesBuffer, M_TEMP);
 
-       if (hfsmp->jnl && user_start) {
+       if (user_start) {
                vsunlock(user_start, user_len, TRUE);
        }
 
@@ -1075,7 +1080,7 @@ InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec,
        
        *((u_int32_t *)attributesBuffer) = packedBufferSize;    /* Store length of fixed + var block */
        
-       err = uiomove( (caddr_t)attributesBuffer, packedBufferSize, a_uio );    /* XXX should be packedBufferSize */
+       err = uiomove( (caddr_t)attributesBuffer, packedBufferSize, a_uio );
 exit:
        cat_releasedesc(&c_desc);
        
@@ -1291,4 +1296,4 @@ UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist       *alist,
 
        return (0);
 }
-
+#endif /* CONFIG_SEARCHFS */
index 4e5b76b1457d9b84ebf44a1c14dc11869f6af7ce..26bafe77087d817a928756edafd933fbff19894a 100644 (file)
@@ -88,6 +88,7 @@
 #include <sys/kdebug.h>
 #include <sys/fslog.h>
 #include <sys/ubc.h>
+#include <sys/buf_internal.h>
 
 #include <kern/locks.h>
 
@@ -154,9 +155,11 @@ static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_con
 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
+static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
 
 void hfs_initialize_allocator (struct hfsmount *hfsmp);
 int hfs_teardown_allocator (struct hfsmount *hfsmp);
+void hfs_unmap_blocks (struct hfsmount *hfsmp);
 
 int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
@@ -292,7 +295,39 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                        retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
                        hfsmp->hfs_flags |= HFS_READ_ONLY;
 
-                       /* also get the volume bitmap blocks */
+                       /*
+                        * Close down the journal. 
+                        *
+                        * NOTE: It is critically important to close down the journal
+                        * and have it issue all pending I/O prior to calling VNOP_FSYNC below.
+                        * In a journaled environment it is expected that the journal be
+                        * the only actor permitted to issue I/O for metadata blocks in HFS.
+                        * If we were to call VNOP_FSYNC prior to closing down the journal,
+                        * we would inadvertantly issue (and wait for) the I/O we just 
+                        * initiated above as part of the flushvolumeheader call.
+                        * 
+                        * To avoid this, we follow the same order of operations as in
+                        * unmount and issue the journal_close prior to calling VNOP_FSYNC.
+                        */
+       
+                       if (hfsmp->jnl) {
+                               hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
+                           journal_close(hfsmp->jnl);
+                           hfsmp->jnl = NULL;
+
+                           // Note: we explicitly don't want to shutdown
+                           //       access to the jvp because we may need
+                           //       it later if we go back to being read-write.
+
+                               hfs_unlock_global (hfsmp);
+                       }
+
+
+                       /*
+                        * Write out any pending I/O still outstanding against the device node
+                        * now that the journal has been closed.
+                        */
                        if (!retval) {
                                if (vnode_mount(hfsmp->hfs_devvp) == mp) {
                                        retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
@@ -302,6 +337,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                        vnode_put(hfsmp->hfs_devvp);
                                }
                        }
+
                        if (retval) {
                                if (HFS_MOUNT_DEBUG) {
                                        printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
@@ -311,19 +347,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                hfsmp->hfs_flags &= ~HFS_READ_ONLY;
                                goto out;
                        }
-                       if (hfsmp->jnl) {
-                               hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
-
-                           journal_close(hfsmp->jnl);
-                           hfsmp->jnl = NULL;
-
-                           // Note: we explicitly don't want to shutdown
-                           //       access to the jvp because we may need
-                           //       it later if we go back to being read-write.
-
-                               hfs_unlock_global (hfsmp);
-                       }
-
+       
 #if CONFIG_HFS_ALLOC_RBTREE
                        (void) hfs_teardown_allocator(hfsmp);
 #endif                                         
@@ -439,7 +463,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                 * Allow hot file clustering if conditions allow.
                                 */
                                if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && 
-                                               ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
+                                          ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0))    {
                                        (void) hfs_recording_init(hfsmp);
                                }
                                /* Force ACLs on HFS+ file systems. */
@@ -465,6 +489,9 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                         * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
                         * which has not previously encountered a bad error on the red-black tree code.  Also, don't
                         * try to re-build a tree that already exists. 
+                        *
+                        * When this is enabled, we must re-integrate the above function into our bitmap iteration
+                        * so that we accurately send TRIMs down to the underlying disk device as needed.
                         */
                        
                        if (hfsmp->extent_tree_flags == 0) {
@@ -504,32 +531,49 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                 */
                if ((retval == 0) && (cp_fs_protected (mp))) {
                        int err = 0;
-                       struct cp_root_xattr xattr;
-                       bzero (&xattr, sizeof(struct cp_root_xattr));
+                       
+                       struct cp_root_xattr *xattr = NULL;
+                       MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
+                       if (xattr == NULL) {
+                               err = ENOMEM;
+                               goto badalloc;
+                       }
+                       bzero (xattr, sizeof(struct cp_root_xattr));
                        hfsmp = vfs_fsprivate(mp);
 
                        /* go get the EA to get the version information */
-                       err = cp_getrootxattr (hfsmp, &xattr);
-                       /* If there was no EA there, then write one out. */
+                       err = cp_getrootxattr (hfsmp, xattr);
+                       /* 
+                        * If there was no EA there, then write one out. 
+                        * Assuming EA is not present on the root means 
+                        * this is an erase install or a very old FS
+                        */
                        if (err == ENOATTR) {
-                               bzero(&xattr, sizeof(struct cp_root_xattr));
-                               xattr.major_version = CP_CURRENT_MAJOR_VERS;
-                               xattr.minor_version = CP_CURRENT_MINOR_VERS;
-                               xattr.flags = 0;
+                               printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
+                               bzero(xattr, sizeof(struct cp_root_xattr));
+                               xattr->major_version = CP_NEW_MAJOR_VERS;
+                               xattr->minor_version = CP_MINOR_VERS;
+                               xattr->flags = 0;
+
+                               err = cp_setrootxattr (hfsmp, xattr);
+                       }
 
-                               err = cp_setrootxattr (hfsmp, &xattr);
-                       }       
                        /* 
                         * For any other error, including having an out of date CP version in the
                         * EA, or for an error out of cp_setrootxattr, deny the mount 
                         * and do not proceed further.
                         */
-                       if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS)  {
+                       if (err || (xattr->major_version != CP_NEW_MAJOR_VERS && xattr->major_version != CP_PREV_MAJOR_VERS))  {
                                /* Deny the mount and tear down. */
                                retval = EPERM;
                                (void) hfs_unmount (mp, MNT_FORCE, context);
-                       }       
-               }                                 
+                       }
+                       printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
+badalloc:
+                       if(xattr) {
+                               FREE(xattr, M_TEMP);
+                       }
+               }
 #endif
        }
 out:
@@ -1155,6 +1199,22 @@ bailout:
 #endif
 }
 
+void hfs_unmap_blocks (struct hfsmount *hfsmp) {
+       /*
+        * Take the allocation file lock.  Journal transactions will block until
+        * we're done here. 
+        */
+       int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+       /*
+        * UnmapBlocks assumes that the bitmap lock is held when you call the function.
+        * We don't care if there were any error issuing unmaps yet.
+        */
+       (void) UnmapBlocks(hfsmp);
+
+       hfs_systemfile_unlock(hfsmp, flags);
+}
+
 
 /* 
  * Teardown code for the Red-Black Tree allocator. 
@@ -1205,7 +1265,6 @@ hfs_teardown_allocator (struct hfsmount *hfsmp) {
        
 }
 
-
 static int hfs_root_unmounted_cleanly = 0;
 
 SYSCTL_DECL(_vfs_generic);
@@ -1239,6 +1298,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        daddr64_t mdb_offset;
        int isvirtual = 0;
        int isroot = 0;
+       u_int32_t device_features = 0;
        int isssd;
 #if CONFIG_HFS_ALLOC_RBTREE
        thread_t allocator_thread;
@@ -1405,9 +1465,21 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        bzero(hfsmp, sizeof(struct hfsmount));
        
        hfs_chashinit_finish(hfsmp);
-
+       
        /*
-        * See if the disk is a solid state device.  We need this to decide what to do about 
+        * See if the disk supports unmap (trim).
+        *
+        * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
+        * returned by vfs_ioattr.  We need to call VNOP_IOCTL ourselves.
+        */
+       if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
+               if (device_features & DK_FEATURE_UNMAP) {
+                       hfsmp->hfs_flags |= HFS_UNMAP;
+               }
+       }       
+
+       /* 
+        * See if the disk is a solid state device, too.  We need this to decide what to do about 
         * hotfiles.
         */
        if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
@@ -1434,6 +1506,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
        hfsmp->hfs_logical_block_size = log_blksize;
        hfsmp->hfs_logical_block_count = log_blkcnt;
+       hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
        hfsmp->hfs_physical_block_size = phys_blksize;
        hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
        hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
@@ -1493,7 +1566,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        /* Mount a standard HFS disk */
        if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
            (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
-
+#if CONFIG_HFS_STD 
                /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
                if (vfs_isrdwr(mp)) {
                        retval = EROFS;
@@ -1529,6 +1602,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                        }
                        hfsmp->hfs_logical_block_size = log_blksize;
                        hfsmp->hfs_logical_block_count = log_blkcnt;
+                       hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
                        hfsmp->hfs_physical_block_size = log_blksize;
                        hfsmp->hfs_log_per_phys = 1;
                }
@@ -1548,6 +1622,11 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
                if (retval)
                        (void) hfs_relconverter(hfsmp->hfs_encoding);
+#else
+               /* On platforms where HFS Standard is not supported, deny the mount altogether */
+               retval = EINVAL;
+               goto error_exit;
+#endif
 
        } else /* Mount an HFS Plus disk */ {
                HFSPlusVolumeHeader *vhp;
@@ -1595,6 +1674,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                                /* Update logical /physical block size */
                                hfsmp->hfs_logical_block_size = log_blksize;
                                hfsmp->hfs_physical_block_size = log_blksize;
+                               
                                phys_blksize = log_blksize;
                                hfsmp->hfs_log_per_phys = 1;
                        }
@@ -1604,6 +1684,8 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 
                        hfsmp->hfs_logical_block_count = disksize / log_blksize;
        
+                       hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
+                       
                        mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
                        retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
                                        phys_blksize, cred, &bp);
@@ -1624,7 +1706,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                }
 
                if (isroot) {
-                       hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
+                       hfs_root_unmounted_cleanly = ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0);
                }
 
                /*
@@ -1758,7 +1840,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                 */
                if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
                        printf("hfs_mountfs: could not use physical block size "
-                               "(%d) switching to 512\n", log_blksize);
+                                       "(%d) switching to 512\n", log_blksize);
                        log_blksize = 512;
                        if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
                                if (HFS_MOUNT_DEBUG) { 
@@ -1776,10 +1858,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                        }
                        devvp->v_specsize = log_blksize;
                        /* Note: relative block count adjustment (in case this is an embedded volume). */
-                       hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
-                       hfsmp->hfs_logical_block_size = log_blksize;
-                       hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
+                       hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
+                       hfsmp->hfs_logical_block_size = log_blksize;
+                       hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
+       
+                       hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
+
                        if (hfsmp->jnl && hfsmp->jvp == devvp) {
                            // close and re-open this with the new block size
                            journal_close(hfsmp->jnl);
@@ -1931,7 +2015,20 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                        }
                }
        }
-       
+
+#if CONFIG_HFS_MOUNT_UNMAP
+       /* Enable UNMAPs for embedded SSDs only for now */
+       /*
+        * TODO: Should we enable this for CoreStorage volumes, too?
+        */
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
+               if (hfsmp->hfs_flags & HFS_UNMAP) {
+                       hfs_unmap_blocks(hfsmp);
+               }
+       }
+#endif
+
+
 #if CONFIG_HFS_ALLOC_RBTREE
        /* 
         * We spawn a thread to create the pair of red-black trees for this volume.
@@ -3456,11 +3553,11 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
        HFSMasterDirectoryBlock *mdb;
        struct buf *bp = NULL;
        int retval;
-       int sectorsize;
+       int sector_size;
        ByteCount namelen;
 
-       sectorsize = hfsmp->hfs_logical_block_size;
-       retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
+       sector_size = hfsmp->hfs_logical_block_size;
+       retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp);
        if (retval) {
                if (bp)
                        buf_brelse(bp);
@@ -3469,7 +3566,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
 
        lck_mtx_lock(&hfsmp->hfs_mutex);
 
-       mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
+       mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size));
     
        mdb->drCrDate   = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
        mdb->drLsMod    = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
@@ -3524,8 +3621,8 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
        if (altflush) {
                struct buf *alt_bp = NULL;
 
-               if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
-                       bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
+               if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sector_size, NOCRED, &alt_bp) == 0) {
+                       bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize);
 
                        (void) VNOP_BWRITE(alt_bp);
                } else if (alt_bp)
@@ -3870,9 +3967,10 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        u_int64_t  newblkcnt;
        u_int64_t  prev_phys_block_count;
        u_int32_t  addblks;
-       u_int64_t  sectorcnt;
-       u_int32_t  sectorsize;
-       u_int32_t  phys_sectorsize;
+       u_int64_t  sector_count;
+       u_int32_t  sector_size;
+       u_int32_t  phys_sector_size;
+       u_int32_t  overage_blocks;      
        daddr64_t  prev_alt_sector;
        daddr_t    bitmapblks;
        int  lockflags = 0;
@@ -3916,33 +4014,33 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                if (error)
                        return (error);
        }
-       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
+       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sector_size, 0, context)) {
                return (ENXIO);
        }
-       if (sectorsize != hfsmp->hfs_logical_block_size) {
+       if (sector_size != hfsmp->hfs_logical_block_size) {
                return (ENXIO);
        }
-       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
+       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sector_count, 0, context)) {
                return (ENXIO);
        }
-       if ((sectorsize * sectorcnt) < newsize) {
+       if ((sector_size * sector_count) < newsize) {
                printf("hfs_extendfs: not enough space on device\n");
                return (ENOSPC);
        }
-       error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
+       error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context);
        if (error) {
                if ((error != ENOTSUP) && (error != ENOTTY)) {
                        return (ENXIO);
                }
                /* If ioctl is not supported, force physical and logical sector size to be same */
-               phys_sectorsize = sectorsize;
+               phys_sector_size = sector_size;
        }
        oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 
        /*
         * Validate new size.
         */
-       if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
+       if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) {
                printf("hfs_extendfs: invalid size\n");
                return (EINVAL);
        }
@@ -3966,7 +4064,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        }
        hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
        HFS_MOUNT_UNLOCK(hfsmp, TRUE);
-
+       
        /* Start with a clean journal. */
        hfs_journal_flush(hfsmp, TRUE);
 
@@ -3979,6 +4077,21 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        }
        transaction_begun = 1;
 
+
+       /* Update the hfsmp fields for the physical information about the device */     
+       prev_phys_block_count = hfsmp->hfs_logical_block_count;
+       prev_alt_sector = hfsmp->hfs_alt_id_sector;
+
+       hfsmp->hfs_logical_block_count = sector_count;
+       /* 
+        * Note that the new AltVH location must be based on the device's EOF rather than the new
+        * filesystem's EOF, so we use logical_block_count here rather than newsize.
+        */
+       hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
+                                 HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count);
+       hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size;
+
+
        /*
         * Note: we take the attributes lock in case we have an attribute data vnode
         * which needs to change size.
@@ -4005,9 +4118,19 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * After extending the file system, those bits can represent valid 
         * allocation blocks, so we mark all the bits from the end of current 
         * volume to end of allocation bitmap as "free".
+        *
+        * Figure out the number of overage blocks before proceeding though,
+        * so we don't add more bytes to our I/O than necessary.  
+        * First figure out the total number of blocks representable by the 
+        * end of the bitmap file vs. the total number of blocks in the new FS.
+        * Then subtract away the number of blocks in the current FS.  This is how much
+        * we can mark as free right now without having to grow the bitmap file.
         */
-       BlockMarkFreeUnused(vcb, vcb->totalBlocks, 
-                       (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
+       overage_blocks = fp->ff_blocks * vcb->blockSize * 8;
+       overage_blocks = MIN (overage_blocks, newblkcnt);
+       overage_blocks -= vcb->totalBlocks;
+
+       BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks);
 
        if (bitmapblks > 0) {
                daddr64_t blkno;
@@ -4125,14 +4248,8 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        /*
         * Adjust file system variables for new space.
         */
-       prev_phys_block_count = hfsmp->hfs_logical_block_count;
-       prev_alt_sector = hfsmp->hfs_alt_id_sector;
-
        vcb->totalBlocks += addblks;
        vcb->freeBlocks += addblks;
-       hfsmp->hfs_logical_block_count = newsize / sectorsize;
-       hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
-                                 HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
        MarkVCBDirty(vcb);
        error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
        if (error) {
@@ -4223,11 +4340,30 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         *
         * We only update hfsmp->allocLimit if totalBlocks actually increased. 
         */
-       
        if (error == 0) {
                UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
        }
-       
+
+       /* Release all locks and sync up journal content before 
+        * checking and extending, if required, the journal 
+        */
+       if (lockflags) {
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               lockflags = 0;
+       }
+       if (transaction_begun) {
+               hfs_end_transaction(hfsmp);
+               hfs_journal_flush(hfsmp, TRUE);
+               transaction_begun = 0;
+       }
+
+       /* Increase the journal size, if required. */
+       error = hfs_extend_journal(hfsmp, sector_size, sector_count, context);
+       if (error) {
+               printf ("hfs_extendfs: Could not extend journal size\n");
+               goto out_noalloc;
+       }
+
        /* Log successful extending */
        printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
               hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
@@ -4239,7 +4375,8 @@ out:
                VTOC(vp)->c_blocks = fp->ff_blocks;
                
        }
-       
+
+out_noalloc:
        HFS_MOUNT_LOCK(hfsmp, TRUE);    
        hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
        HFS_MOUNT_UNLOCK(hfsmp, TRUE);  
@@ -4530,6 +4667,14 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         */
        hfsmp->totalBlocks = newblkcnt;
        hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
+       hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
+
+       /*
+        * Note that although the logical block size is updated here, it is only done for
+        * the benefit of the partition management software.  The logical block count change 
+        * has not yet actually been propagated to the disk device yet. 
+        */
+
        hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
        MarkVCBDirty(hfsmp);
        error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
@@ -4641,6 +4786,9 @@ hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sector
  * physical block number of any buffer cache block in the copied extent
  * (so that if the block is written, it will go through VNOP_BLOCKMAP to
  * determine the new physical block number).
+ *
+ * At this point, for regular files, we hold the truncate lock exclusive
+ * and the cnode lock exclusive.
  */
 static int
 hfs_copy_extent(
@@ -4677,24 +4825,45 @@ hfs_copy_extent(
                panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
 
 #if CONFIG_PROTECT
-       /* Prepare the CP blob and get it ready for use */
-       if (!vnode_issystem (vp) && vnode_isreg(vp) &&
-                       cp_fs_protected (hfsmp->hfs_mp)) {
+       /* 
+        * Prepare the CP blob and get it ready for use, if necessary.
+        *
+        * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs),
+        * because they are implicitly protected via the media key on iOS.  As such, they
+        * must not be relocated except with the media key.  So it is OK to not pass down
+        * a special cpentry to the IOMedia/LwVM code for handling. 
+        */
+       if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
                int cp_err = 0;
-               cp_err = cp_handle_relocate (cp);
+               /* 
+                * Ideally, the file whose extents we are about to manipulate is using the
+                * newer offset-based IVs so that we can manipulate it regardless of the 
+                * current lock state.  However, we must maintain support for older-style 
+                * EAs.  
+                * 
+                * For the older EA case, the IV was tied to the device LBA for file content.
+                * This means that encrypted data cannot be moved from one location to another
+                * in the filesystem without garbling the IV data.  As a result, we need to 
+                * access the file's plaintext because we cannot do our AES-symmetry trick 
+                * here.  This requires that we attempt a key-unwrap here (via cp_handle_relocate) 
+                * to make forward progress.  If the keys are unavailable then we will 
+                * simply stop the resize in its tracks here since we cannot move 
+                * this extent at this time.
+                */
+               if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) {
+                       cp_err = cp_handle_relocate(cp, hfsmp);
+               }
+
                if (cp_err) {
-                       /* 
-                        * can't copy the file because we couldn't set up keys.
-                        * bail out 
-                        */
+                       printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err);
                        return cp_err;
                }
-               else {
-                       cpenabled = 1;
-               }
+
+               cpenabled = 1;
        }
 #endif
 
+
        /*
         * Determine the I/O size to use
         *
@@ -4725,10 +4894,31 @@ hfs_copy_extent(
                buf_setblkno(bp, srcSector);
                buf_setlblkno(bp, srcSector);
 
-               /* Attach the CP to the buffer */
+               /*
+                * Note that because this is an I/O to the device vp
+                * it is correct to have lblkno and blkno both point to the 
+                * start sector being read from.  If it were being issued against the
+                * underlying file then that would be different.
+                */
+
+               /* Attach the new CP blob  to the buffer if needed */
 #if CONFIG_PROTECT
                if (cpenabled) {
-                       buf_setcpaddr (bp, cp->c_cpentry);
+                       if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
+                               /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
+                               cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
+                               buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
+                       }
+                       else {
+                               /* 
+                                * Use the cnode's cp key.  This file is tied to the 
+                                * LBAs of the physical blocks that it occupies.
+                                */
+                               buf_setcpaddr (bp, cp->c_cpentry);
+                       }
+               
+                       /* Initialize the content protection file offset to start at 0 */
+                       buf_setcpoff (bp, 0);
                }
 #endif
 
@@ -4737,6 +4927,12 @@ hfs_copy_extent(
                if (!err)
                        err = buf_biowait(bp);
                if (err) {
+#if CONFIG_PROTECT
+                       /* Turn the flag off in error cases. */
+                       if (cpenabled) {
+                               cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
+                       }
+#endif
                        printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
                        break;
                }
@@ -4751,17 +4947,39 @@ hfs_copy_extent(
                        buf_markfua(bp);
 
 #if CONFIG_PROTECT
-               /* Attach the CP to the buffer */
+               /* Attach the CP to the buffer if needed */
                if (cpenabled) {
-                       buf_setcpaddr (bp, cp->c_cpentry);
-               }
+                       if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
+                               buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
+                       }
+                       else {
+                               /* 
+                                * Use the cnode's CP key.  This file is still tied
+                                * to the LBAs of the physical blocks that it occupies.
+                                */
+                               buf_setcpaddr (bp, cp->c_cpentry);
+                       }
+                       /* 
+                        * The last STRATEGY call may have updated the cp file offset behind our
+                        * back, so we cannot trust it.  Re-initialize the content protection
+                        * file offset back to 0 before initiating the write portion of this I/O.
+                        */
+                       buf_setcpoff (bp, 0);
+               }                       
 #endif
                        
                /* Do the write */
                vnode_startwrite(hfsmp->hfs_devvp);
                err = VNOP_STRATEGY(bp);
-               if (!err)
+               if (!err) {
                        err = buf_biowait(bp);
+               }
+#if CONFIG_PROTECT
+               /* Turn the flag off regardless once the strategy call finishes. */
+               if (cpenabled) {
+                       cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
+               }
+#endif
                if (err) {
                        printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
                        break;
@@ -5564,7 +5782,7 @@ out:
 static void 
 hfs_truncatefs_progress(struct hfsmount *hfsmp)
 {
-       u_int32_t cur_progress;
+       u_int32_t cur_progress = 0;
 
        hfs_resize_progress(hfsmp, &cur_progress);
        if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
@@ -5630,6 +5848,17 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
 
        cp = VTOC(vp);
 
+       if (hfs_resize_debug) {
+               const char *filename = (const char *) cp->c_desc.cd_nameptr;
+               int namelen = cp->c_desc.cd_namelen;
+
+               if (filename == NULL) {
+                       filename = "";
+                       namelen = 0;
+               }
+               printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename);
+       }
+
        MALLOC(extent_info, struct hfs_reclaim_extent_info *, 
               sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
        if (extent_info == NULL) {
@@ -5895,6 +6124,7 @@ struct hfs_journal_relocate_args {
        struct hfsmount *hfsmp;
        vfs_context_t context;
        u_int32_t newStartBlock;
+       u_int32_t newBlockCount;
 };
 
 static errno_t
@@ -5910,7 +6140,7 @@ hfs_journal_relocate_callback(void *_args)
                hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
                hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
        if (error) {
-               printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
+               printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
                if (bp) {
                        buf_brelse(bp);
                }
@@ -5918,18 +6148,18 @@ hfs_journal_relocate_callback(void *_args)
        }
        jibp = (JournalInfoBlock*) buf_dataptr(bp);
        jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
-       jibp->size = SWAP_BE64(hfsmp->jnl_size);
+       jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize);
        if (journal_uses_fua(hfsmp->jnl))
                buf_markfua(bp);
        error = buf_bwrite(bp);
        if (error) {
-               printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
+               printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error);
                return error;
        }
        if (!journal_uses_fua(hfsmp->jnl)) {
                error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
                if (error) {
-                       printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
+                       printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
                        error = 0;              /* Don't fail the operation. */
                }
        }
@@ -5938,8 +6168,21 @@ hfs_journal_relocate_callback(void *_args)
 }
 
 
+/* Type of resize operation in progress */
+#define HFS_RESIZE_TRUNCATE    1
+#define HFS_RESIZE_EXTEND      2
+
+/* 
+ * Core function to relocate the journal file.  This function takes the 
+ * journal size of the newly relocated journal --- the caller can 
+ * provide a new journal size if they want to change the size of 
+ * the journal.  The function takes care of updating the journal info 
+ * block and all other data structures correctly.
+ *
+ * Note: This function starts a transaction and grabs the btree locks. 
+ */
 static int
-hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
+hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context)
 {
        int error;
        int journal_err;
@@ -5948,51 +6191,70 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_conte
        u_int32_t newStartBlock;
        u_int32_t oldBlockCount;
        u_int32_t newBlockCount;
+       u_int32_t jnlBlockCount;
+       u_int32_t alloc_skipfreeblks;
        struct cat_desc journal_desc;
        struct cat_attr journal_attr;
        struct cat_fork journal_fork;
        struct hfs_journal_relocate_args callback_args;
 
-       if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
-               /* The journal does not require relocation */
-               return 0;
+       /* Calculate the number of allocation blocks required for the journal */ 
+       jnlBlockCount = howmany(jnl_size, hfsmp->blockSize);
+
+       /* 
+        * During truncatefs(), the volume free block count is updated
+        * before relocating data and reflects the total number of free
+        * blocks that will exist on volume after the resize is successful.
+        * This means that the allocation blocks required for relocation 
+        * have already been reserved and accounted for in the free block 
+        * count.  Therefore, block allocation and deallocation routines 
+        * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS 
+        * flag. 
+        *
+        * This special handling is not required when the file system 
+        * is being extended as we want all the allocated and deallocated
+        * blocks to be accounted for correctly. 
+        */
+       if (resize_type == HFS_RESIZE_TRUNCATE) {
+               alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS;
+       } else {
+               alloc_skipfreeblks = 0;
        }
 
        error = hfs_start_transaction(hfsmp);
        if (error) {
-               printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
+               printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error);
                return error;
        }
        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
        
-       oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
-       
-       /* TODO: Allow the journal to change size based on the new volume size. */
-       error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, 
-                       HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, 
+       error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount, 
+                       HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | alloc_skipfreeblks,
                         &newStartBlock, &newBlockCount);
        if (error) {
-               printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
+               printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error);
                goto fail;
        }
-       if (newBlockCount != oldBlockCount) {
-               printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
+       if (newBlockCount != jnlBlockCount) {
+               printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount);
                goto free_fail;
        }
        
-       error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
+       error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
        if (error) {
-               printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
+               printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error);
                goto free_fail;
        }
 
-       /* Update the catalog record for .journal */
-       error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
+       oldStartBlock = journal_fork.cf_extents[0].startBlock;
+       oldBlockCount = journal_fork.cf_extents[0].blockCount;
+       error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks);
        if (error) {
-               printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
+               printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
                goto free_fail;
        }
-       oldStartBlock = journal_fork.cf_extents[0].startBlock;
+
+       /* Update the catalog record for .journal */
        journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
        journal_fork.cf_extents[0].startBlock = newStartBlock;
        journal_fork.cf_extents[0].blockCount = newBlockCount;
@@ -6000,54 +6262,117 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_conte
        error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
        cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
        if (error) {
-               printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
+               printf("hfs_relocate_journal_file: cat_update returned %d\n", error);
                goto free_fail;
        }
-       callback_args.hfsmp = hfsmp;
-       callback_args.context = context;
-       callback_args.newStartBlock = newStartBlock;
        
-       error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
-               (off_t)newBlockCount*hfsmp->blockSize, 0,
-               hfs_journal_relocate_callback, &callback_args);
-       if (error) {
-               /* NOTE: journal_relocate will mark the journal invalid. */
-               printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
-               goto fail;
+       /*
+        * If the journal is part of the file system, then tell the journal
+        * code about the new location.  If the journal is on an external
+        * device, then just keep using it as-is.
+        */
+       if (hfsmp->jvp == hfsmp->hfs_devvp) {
+               callback_args.hfsmp = hfsmp;
+               callback_args.context = context;
+               callback_args.newStartBlock = newStartBlock;
+               callback_args.newBlockCount = newBlockCount;
+
+               error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
+                       (off_t)newBlockCount*hfsmp->blockSize, 0,
+                       hfs_journal_relocate_callback, &callback_args);
+               if (error) {
+                       /* NOTE: journal_relocate will mark the journal invalid. */
+                       printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error);
+                       goto fail;
+               }
+               if (hfs_resize_debug) {
+                       printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+               }
+               hfsmp->jnl_start = newStartBlock;
+               hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
        }
-       hfsmp->jnl_start = newStartBlock;
-       hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
 
        hfs_systemfile_unlock(hfsmp, lockflags);
        error = hfs_end_transaction(hfsmp);
        if (error) {
-               printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
-       }
-       
-       /* Account for the blocks relocated and print progress */
-       hfsmp->hfs_resize_blocksmoved += oldBlockCount;
-       hfs_truncatefs_progress(hfsmp);
-       if (!error) {
-               printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n", 
-                               oldBlockCount, hfsmp->vcbVN);
-               if (hfs_resize_debug) {
-                       printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
-               }
+               printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error);
        }
+
        return error;
 
 free_fail:
        journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); 
        if (journal_err) {
-               printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
+               printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
                hfs_mark_volume_inconsistent(hfsmp);
        }
 fail:
        hfs_systemfile_unlock(hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
        if (hfs_resize_debug) {
-               printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
+               printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error);
+       }
+       return error;
+}
+
+
+/* 
+ * Relocate the journal file when the file system is being truncated.  
+ * We do not down-size the journal when the file system size is 
+ * reduced, so we always provide the current journal size to the 
+ * relocate code. 
+ */
+static int 
+hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
+{
+       int error = 0;
+       u_int32_t startBlock;
+       u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize;
+
+       /*
+        * Figure out the location of the .journal file.  When the journal
+        * is on an external device, we need to look up the .journal file.
+        */
+       if (hfsmp->jvp == hfsmp->hfs_devvp) {
+               startBlock = hfsmp->jnl_start;
+               blockCount = hfsmp->jnl_size / hfsmp->blockSize;
+       } else {
+               u_int32_t fileid;
+               u_int32_t old_jnlfileid;
+               struct cat_attr attr;
+               struct cat_fork fork;
+
+               /*
+                * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid
+                * is set, and it is trying to hide the .journal file.  So temporarily
+                * unset the field while calling GetFileInfo.
+                */
+               old_jnlfileid = hfsmp->hfs_jnlfileid;
+               hfsmp->hfs_jnlfileid = 0;
+               fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork);
+               hfsmp->hfs_jnlfileid = old_jnlfileid;
+               if (fileid != old_jnlfileid) {
+                       printf("hfs_reclaim_journal_file: cannot find .journal file!\n");
+                       return EIO;
+               }
+
+               startBlock = fork.cf_extents[0].startBlock;
+               blockCount = fork.cf_extents[0].blockCount;
        }
+
+       if (startBlock + blockCount <= allocLimit) {
+               /* The journal file does not require relocation */
+               return 0;
+       }
+
+       error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context);
+       if (error == 0) {
+               hfsmp->hfs_resize_blocksmoved += blockCount;
+               hfs_truncatefs_progress(hfsmp);
+               printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n", 
+                               blockCount, hfsmp->vcbVN);
+       }
+
        return error;
 }
 
@@ -6134,7 +6459,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs
        /* Update the catalog record for .journal_info_block */
        error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
        if (error) {
-               printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
+               printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error);
                goto fail;
        }
        oldBlock = jib_fork.cf_extents[0].startBlock;
@@ -6195,6 +6520,72 @@ fail:
 }
 
 
+static u_int64_t
+calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count) 
+{
+       u_int64_t journal_size;
+       u_int32_t journal_scale;
+
+#define DEFAULT_JOURNAL_SIZE (8*1024*1024)
+#define MAX_JOURNAL_SIZE     (512*1024*1024)
+
+       /* Calculate the journal size for this volume.   We want 
+        * at least 8 MB of journal for each 100 GB of disk space. 
+        * We cap the size at 512 MB, unless the allocation block
+        * size is larger, in which case, we use one allocation 
+        * block.
+        */
+       journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024);
+       journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1);
+       if (journal_size > MAX_JOURNAL_SIZE) {
+               journal_size = MAX_JOURNAL_SIZE;
+       }
+       if (journal_size < hfsmp->blockSize) {
+               journal_size = hfsmp->blockSize;
+       }
+       return journal_size;
+}
+               
+
+/* 
+ * Calculate the expected journal size based on current partition size.  
+ * If the size of the current journal is less than the calculated size, 
+ * force journal relocation with the new journal size. 
+ */
+static int 
+hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context)
+{
+       int error = 0;
+       u_int64_t calc_journal_size;
+
+       if (hfsmp->jvp != hfsmp->hfs_devvp) {
+               if (hfs_resize_debug) {
+                       printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n");
+               }
+               return 0;
+       }
+
+       calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count);
+       if (calc_journal_size <= hfsmp->jnl_size) {
+               /* The journal size requires no modification */
+               goto out;
+       }
+
+       if (hfs_resize_debug) {
+               printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size);
+       }
+
+       /* Extend the journal to the new calculated size */
+       error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context);
+       if (error == 0) {
+               printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n", 
+                               hfsmp->jnl_size, hfsmp->vcbVN);
+       }
+out:
+       return error;
+}
+
+
 /*
  * This function traverses through all extended attribute records for a given 
  * fileID, and calls function that reclaims data blocks that exist in the 
@@ -6524,8 +6915,28 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_
        prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
 
        if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
-               return ENOMEM;
+               error = ENOMEM; 
+               goto reclaim_filespace_done;
+       }
+
+#if CONFIG_PROTECT
+       int keys_generated = 0;
+       /*
+        * For content-protected filesystems, we may need to relocate files that
+        * are encrypted.  If they use the new-style offset-based IVs, then
+        * we can move them regardless of the lock state.  We create a temporary
+        * key here that we use to read/write the data, then we discard it at the
+        * end of the function.
+        */
+       if (cp_fs_protected (hfsmp->hfs_mp)) {
+               error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp);
+               if (error) {
+                       printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
+                       goto reclaim_filespace_done;
+               }
        }
+#endif
+
        bzero(iterator, sizeof(*iterator));
 
        btdata.bufferAddress = &filerec;
@@ -6556,6 +6967,9 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_
 
                /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
                if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
+                       if (hfs_resize_debug) {
+                               printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID);
+                       }
                        continue;
                }
 
@@ -6615,7 +7029,16 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_
                                files_moved, hfsmp->vcbVN);
        }
 
-       kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
+reclaim_filespace_done:
+       if (iterator) {
+               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
+       }
+
+#if CONFIG_PROTECT
+       if (keys_generated) {
+               cp_entry_destroy(&hfsmp->hfs_resize_cpentry);
+       }
+#endif
        return error;
 }
 
@@ -7153,7 +7576,7 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
                                if (!error) {
                                        strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
                                        volname_length = strlen ((const char*)vcb->vcbVN);
-#define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
+#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
                                        /* Send the volume name down to CoreStorage if necessary */     
                                        error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
                                        if (error == 0) {
@@ -7256,6 +7679,7 @@ void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
 {
        int retval = 0;
+       int error = 0;
        struct mount *mp = NULL;
        struct hfs_mount_args *args = NULL;
 
@@ -7285,7 +7709,10 @@ static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
        buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
        
        /* FSYNC the devnode to be sure all data has been flushed */
-       retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
+       error = VNOP_FSYNC(devvp, MNT_WAIT, context);
+       if (error) {
+               retval = error;
+       }
 
 out:
        if (mp) {
index 1032324317b75165d25011e572e18ecc282272ef..84a81a948c34ae4dd815ac6674df7ec66c93651b 100644 (file)
@@ -676,7 +676,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
        volname_length = strlen ((const char*)vcb->vcbVN);
        cat_releasedesc(&cndesc);
        
-#define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
+#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
 
 
        /* Send the volume name down to CoreStorage if necessary */     
@@ -3092,7 +3092,7 @@ check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *a
                return 0;
        }
        
-       if (VTOC(vp)->c_flags & UF_TRACKED) {
+       if (VTOC(vp)->c_bsdflags & UF_TRACKED) {
                // the file has the tracked bit set, so send an event to the tracked-file handler
                int error;
                
@@ -3137,7 +3137,7 @@ check_for_dataless_file(struct vnode *vp, uint64_t op_type)
 {
        int error;
 
-       if (vp == NULL || (VTOC(vp)->c_flags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
+       if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
                // there's nothing to do, it's not dataless
                return 0;
        }
@@ -3155,7 +3155,7 @@ check_for_dataless_file(struct vnode *vp, uint64_t op_type)
                        // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
                        return EINTR;
                }
-       } else if (VTOC(vp)->c_flags & UF_COMPRESSED) {
+       } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
                //
                // if we're here, the dataless bit is still set on the file 
                // which means it didn't get handled.  we return an error
index 623f61d188b117cf20936ed14dae90854ed7b811..e48966c3cc28640e3dc375a8844e719a54fc3718 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -33,6 +33,7 @@
 #include <sys/dirent.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
+#include <sys/buf_internal.h>
 #include <sys/mount.h>
 #include <sys/vnode_if.h>
 #include <sys/vnode_internal.h>
@@ -47,7 +48,7 @@
 #include <sys/uio_internal.h>
 #include <sys/fsctl.h>
 #include <sys/cprotect.h>
-
+#include <sys/xattr.h>
 #include <string.h>
 
 #include <miscfs/specfs/specdev.h>
@@ -89,11 +90,13 @@ int hfs_removedir(struct vnode *, struct vnode *, struct componentname *,
 int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
                           int, int, int, struct vnode *, int);
 
+/* Used here and in cnode teardown -- for symlinks */
+int hfs_removefile_callback(struct buf *bp, void *hfsmp);
+
 int hfs_movedata (struct vnode *, struct vnode*);
 static int hfs_move_fork (struct filefork *srcfork, struct cnode *src, 
                                                  struct filefork *dstfork, struct cnode *dst);
 
-
 #if FIFO
 static int hfsfifo_read(struct vnop_read_args *);
 static int hfsfifo_write(struct vnop_write_args *);
@@ -136,6 +139,27 @@ int hfsspec_close(struct vnop_close_args *);
 *
 *****************************************************************************/
 
+/*
+ * Is the given cnode either the .journal or .journal_info_block file on
+ * a volume with an active journal?  Many VNOPs use this to deny access
+ * to those files.
+ *
+ * Note: the .journal file on a volume with an external journal still
+ * returns true here, even though it does not actually hold the contents
+ * of the volume's journal.
+ */
+static _Bool
+hfs_is_journal_file(struct hfsmount *hfsmp, struct cnode *cp)
+{
+       if (hfsmp->jnl != NULL &&
+           (cp->c_fileid == hfsmp->hfs_jnlinfoblkid ||
+            cp->c_fileid == hfsmp->hfs_jnlfileid)) {
+               return true;
+       } else {
+               return false;
+       }
+}
+
 /*
  * Create a regular file.
  */
@@ -307,7 +331,7 @@ hfs_file_is_compressed(struct cnode *cp, int skiplock)
        int ret = 0;
        
        /* fast check to see if file is compressed. If flag is clear, just answer no */
-       if (!(cp->c_flags & UF_COMPRESSED)) {
+       if (!(cp->c_bsdflags & UF_COMPRESSED)) {
                return 0;
        }
 
@@ -471,15 +495,15 @@ hfs_vnop_open(struct vnop_open_args *ap)
        /*
         * Files marked append-only must be opened for appending.
         */
-       if ((cp->c_flags & APPEND) && !vnode_isdir(vp) &&
+       if ((cp->c_bsdflags & APPEND) && !vnode_isdir(vp) &&
            (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
                return (EPERM);
 
        if (vnode_isreg(vp) && !UBCINFOEXISTS(vp))
                return (EBUSY);  /* file is in use by the kernel */
 
-       /* Don't allow journal file to be opened externally. */
-       if (cp->c_fileid == hfsmp->hfs_jnlfileid)
+       /* Don't allow journal to be opened externally. */
+       if (hfs_is_journal_file(hfsmp, cp))
                return (EPERM);
 
        if ((hfsmp->hfs_flags & HFS_READ_ONLY) ||
@@ -706,7 +730,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                vap->va_uid = cp->c_uid;
                vap->va_gid = cp->c_gid;
                vap->va_mode = cp->c_mode;
-               vap->va_flags = cp->c_flags;
+               vap->va_flags = cp->c_bsdflags;
                vap->va_supported |= VNODE_ATTR_AUTH & ~VNODE_ATTR_va_acl;
 
                if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) {
@@ -926,13 +950,12 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                }
        }
 
-
        /* XXX is this really a good 'optimal I/O size'? */
        vap->va_iosize = hfsmp->hfs_logBlockSize;
        vap->va_uid = cp->c_uid;
        vap->va_gid = cp->c_gid;
        vap->va_mode = cp->c_mode;
-       vap->va_flags = cp->c_flags;
+       vap->va_flags = cp->c_bsdflags;
 
        /*
         * Exporting file IDs from HFS Plus:
@@ -1122,15 +1145,15 @@ hfs_vnop_setattr(ap)
 
 
 #if CONFIG_PROTECT
-       if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+       if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
                return (error);
        }
 #endif /* CONFIG_PROTECT */
 
        hfsmp = VTOHFS(vp);
 
-       /* Don't allow modification of the journal file. */
-       if (hfsmp->hfs_jnlfileid == VTOC(vp)->c_fileid) {
+       /* Don't allow modification of the journal. */
+       if (hfs_is_journal_file(hfsmp, VTOC(vp))) {
                return (EPERM);
        }
 
@@ -1248,7 +1271,7 @@ hfs_vnop_setattr(ap)
                u_int16_t *fdFlags;
 
 #if HFS_COMPRESSION
-               if ((cp->c_flags ^ vap->va_flags) & UF_COMPRESSED) {
+               if ((cp->c_bsdflags ^ vap->va_flags) & UF_COMPRESSED) {
                        /*
                         * the UF_COMPRESSED was toggled, so reset our cached compressed state
                         * but we don't want to actually do the update until we've released the cnode lock down below
@@ -1259,7 +1282,7 @@ hfs_vnop_setattr(ap)
                }
 #endif
 
-               cp->c_flags = vap->va_flags;
+               cp->c_bsdflags = vap->va_flags;
                cp->c_touch_chgtime = TRUE;
                
                /*
@@ -1376,14 +1399,9 @@ hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struc
        if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord)
                return (0);
 
-       // XXXdbg - don't allow modification of the journal or journal_info_block
-       if (VTOHFS(vp)->jnl && cp && cp->c_datafork) {
-               struct HFSPlusExtentDescriptor *extd;
-
-               extd = &cp->c_datafork->ff_extents[0];
-               if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
-                       return EPERM;
-               }
+       // Don't allow modification of the journal or journal_info_block
+       if (hfs_is_journal_file(VTOHFS(vp), cp)) {
+               return EPERM;
        }
 
 #if OVERRIDE_UNKNOWN_PERMISSIONS
@@ -1422,7 +1440,7 @@ hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean co
        }
  
        /* If immutable bit set, nobody gets to write it. */
-       if (considerFlags && (cp->c_flags & IMMUTABLE))
+       if (considerFlags && (cp->c_bsdflags & IMMUTABLE))
                return (EPERM);
 
        /* Otherwise, user id 0 always gets access. */
@@ -1618,6 +1636,18 @@ hfs_vnop_exchange(ap)
        orig_from_ctime = VTOC(from_vp)->c_ctime;
        orig_to_ctime = VTOC(to_vp)->c_ctime;
 
+
+#if CONFIG_PROTECT
+       /* 
+        * Do not allow exchangedata/F_MOVEDATAEXTENTS on data-protected filesystems 
+        * because the EAs will not be swapped.  As a result, the persistent keys would not
+        * match and the files will be garbage.
+        */
+       if (cp_fs_protected (vnode_mount(from_vp))) {
+               return EINVAL;
+       }
+#endif
+
 #if HFS_COMPRESSION
        if ( hfs_file_is_compressed(VTOC(from_vp), 0) ) {
                if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) {
@@ -1639,8 +1669,7 @@ hfs_vnop_exchange(ap)
        if ((ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) == 0) {
                check_for_tracked_file(from_vp, orig_from_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
                check_for_tracked_file(to_vp, orig_to_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
-       }
-       else {
+       } else {
                /* 
                 * We're doing a data-swap.
                 * Take the truncate lock/cnode lock, then verify there are no mmap references.
@@ -1675,7 +1704,6 @@ hfs_vnop_exchange(ap)
                }
        }
 
-       
        if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK)))
                return (error);
 
@@ -1690,27 +1718,13 @@ hfs_vnop_exchange(ap)
                goto exit;
        }
 
-       // XXXdbg - don't allow modification of the journal or journal_info_block
-       if (hfsmp->jnl) {
-               struct HFSPlusExtentDescriptor *extd;
-
-               if (from_cp->c_datafork) {
-                       extd = &from_cp->c_datafork->ff_extents[0];
-                       if (extd->startBlock == VTOVCB(from_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
-                               error = EPERM;
-                               goto exit;
-                       }
-               }
-
-               if (to_cp->c_datafork) {
-                       extd = &to_cp->c_datafork->ff_extents[0];
-                       if (extd->startBlock == VTOVCB(to_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) {
-                               error = EPERM;
-                               goto exit;
-                       }
-               }
+       // Don't allow modification of the journal or journal_info_block
+       if (hfs_is_journal_file(hfsmp, from_cp) ||
+           hfs_is_journal_file(hfsmp, to_cp)) {
+               error = EPERM;
+               goto exit;
        }
-
+       
        /* 
         * Ok, now that all of the pre-flighting is done, call the underlying
         * function if needed.
@@ -1720,7 +1734,7 @@ hfs_vnop_exchange(ap)
                goto exit;
        }
        
-       
+
        if ((error = hfs_start_transaction(hfsmp)) != 0) {
            goto exit;
        }
@@ -1802,7 +1816,7 @@ hfs_vnop_exchange(ap)
        from_cp->c_ctime = to_cp->c_ctime;
        from_cp->c_gid = to_cp->c_gid;
        from_cp->c_uid = to_cp->c_uid;
-       from_cp->c_flags = to_cp->c_flags;
+       from_cp->c_bsdflags = to_cp->c_bsdflags;
        from_cp->c_mode = to_cp->c_mode;
        from_cp->c_linkcount = to_cp->c_linkcount;
        from_cp->c_flag = to_cp->c_flag & (C_HARDLINK | C_HASXATTRS);
@@ -1818,7 +1832,7 @@ hfs_vnop_exchange(ap)
        to_cp->c_ctime = tempattr.ca_ctime;
        to_cp->c_gid = tempattr.ca_gid;
        to_cp->c_uid = tempattr.ca_uid;
-       to_cp->c_flags = tempattr.ca_flags;
+       to_cp->c_bsdflags = tempattr.ca_flags;
        to_cp->c_mode = tempattr.ca_mode;
        to_cp->c_linkcount = tempattr.ca_linkcount;
        to_cp->c_flag = tempflag;
@@ -1832,14 +1846,14 @@ hfs_vnop_exchange(ap)
         * When a file moves out of "Cleanup At Startup"
         * we can drop its NODUMP status.
         */
-       if ((from_cp->c_flags & UF_NODUMP) &&
+       if ((from_cp->c_bsdflags & UF_NODUMP) &&
            (from_cp->c_parentcnid != to_cp->c_parentcnid)) {
-               from_cp->c_flags &= ~UF_NODUMP;
+               from_cp->c_bsdflags &= ~UF_NODUMP;
                from_cp->c_touch_chgtime = TRUE;
        }
-       if ((to_cp->c_flags & UF_NODUMP) &&
+       if ((to_cp->c_bsdflags & UF_NODUMP) &&
            (to_cp->c_parentcnid != from_cp->c_parentcnid)) {
-               to_cp->c_flags &= ~UF_NODUMP;
+               to_cp->c_bsdflags &= ~UF_NODUMP;
                to_cp->c_touch_chgtime = TRUE;
        }
 
@@ -1867,7 +1881,7 @@ hfs_vnop_mmap(struct vnop_mmap_args *ap)
                int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
                time_t orig_ctime = VTOC(vp)->c_ctime;
                
-               if (!compressed && (VTOC(vp)->c_flags & UF_COMPRESSED)) {
+               if (!compressed && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
                        error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
                        if (error != 0) {
                                return error;
@@ -1919,7 +1933,7 @@ hfs_vnop_mmap(struct vnop_mmap_args *ap)
  * 
  */
 int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
-
+       
        struct cnode *from_cp;
        struct cnode *to_cp;
        struct hfsmount *hfsmp = NULL;
@@ -1928,13 +1942,13 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
        int lockflags = 0;
        int overflow_blocks;
        int rsrc = 0;
-
-
+       
+       
        /* Get the HFS pointers */
        from_cp = VTOC(from_vp);
        to_cp = VTOC(to_vp);
        hfsmp = VTOHFS(from_vp);
-
+       
        /* Verify that neither source/dest file is open-unlinked */
        if (from_cp->c_flag & (C_DELETED | C_NOEXISTS)) {
                error = EBUSY;
@@ -1966,7 +1980,7 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
        if (from_cp->c_rsrc_vp == from_vp) {
                rsrc = 1;
        }
-
+       
        /* 
         * We assume that the destination file is already empty. 
         * Verify that it is.
@@ -1983,7 +1997,7 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
                        goto movedata_exit;
                }
        }
-
+       
        /* If the source has the rsrc open, make sure the destination is also the rsrc */
        if (rsrc) {
                if (to_vp != to_cp->c_rsrc_vp) {
@@ -1996,9 +2010,9 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
                if (to_vp != to_cp->c_vp) {
                        error = EINVAL;
                        goto movedata_exit;
-               }          
+               }          
        }
-
+       
        /* 
         * See if the source file has overflow extents.  If it doesn't, we don't
         * need to call into MoveData, and the catalog will be enough.
@@ -2009,15 +2023,15 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
        else {
                overflow_blocks = overflow_extents(from_cp->c_datafork);
        }       
-
+       
        if ((error = hfs_start_transaction (hfsmp)) != 0) {
                goto movedata_exit;
        }
        started_tr = 1;
-
+       
        /* Lock the system files: catalog, extents, attributes */
        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-
+       
        /* Copy over any catalog allocation data into the new spot. */
        if (rsrc) {
                if ((error = hfs_move_fork (from_cp->c_rsrcfork, from_cp, to_cp->c_rsrcfork, to_cp))){
@@ -2031,7 +2045,7 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
                        goto movedata_exit;
                }
        }
-
+       
        /* 
         * Note that because all we're doing is moving the extents around, we can 
         * probably do this in a single transaction:  Each extent record (group of 8) 
@@ -2052,7 +2066,7 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
                        error = MoveData (hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0);
                }
        }
-
+       
        if (error) {
                /* Reverse the operation. Copy the fork data back into the source */
                if (rsrc) {
@@ -2067,52 +2081,52 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
                struct cat_fork *src_rsrc = NULL;
                struct cat_fork *dst_data = NULL;
                struct cat_fork *dst_rsrc = NULL;
-
+               
                /* Touch the times*/
                to_cp->c_touch_acctime = TRUE;
                to_cp->c_touch_chgtime = TRUE;
                to_cp->c_touch_modtime = TRUE;
-
+               
                from_cp->c_touch_acctime = TRUE;
                from_cp->c_touch_chgtime = TRUE;
                from_cp->c_touch_modtime = TRUE;
-
+               
                hfs_touchtimes(hfsmp, to_cp);
                hfs_touchtimes(hfsmp, from_cp);
-
+               
                if (from_cp->c_datafork) {
                        src_data = &from_cp->c_datafork->ff_data;
                }
                if (from_cp->c_rsrcfork) {
                        src_rsrc = &from_cp->c_rsrcfork->ff_data;
                }
-
+               
                if (to_cp->c_datafork) {
                        dst_data = &to_cp->c_datafork->ff_data;
                }
                if (to_cp->c_rsrcfork) {
                        dst_rsrc = &to_cp->c_rsrcfork->ff_data;
                }
-
+               
                /* Update the catalog nodes */
                (void) cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, 
-                               src_data, src_rsrc);
-
+                                                 src_data, src_rsrc);
+               
                (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, 
-                               dst_data, dst_rsrc);
-
+                                                 dst_data, dst_rsrc);
+               
        }
        /* unlock the system files */
        hfs_systemfile_unlock(hfsmp, lockflags);
-
-
+       
+       
 movedata_exit:
        if (started_tr) {
                hfs_end_transaction(hfsmp);
        }
-
+       
        return error;
-
+       
 }              
 
 /* 
@@ -2123,31 +2137,31 @@ movedata_exit:
  * non overflow-extent extents into the destination here.
  */
 static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp,
-               struct filefork *dstfork, struct cnode *dst_cp) {
+                                                 struct filefork *dstfork, struct cnode *dst_cp) {
        struct rl_entry *invalid_range;
        int size = sizeof(struct HFSPlusExtentDescriptor);
        size = size * kHFSPlusExtentDensity;
-
+       
        /* If the dstfork has any invalid ranges, bail out */
        invalid_range = TAILQ_FIRST(&dstfork->ff_invalidranges);
        if (invalid_range != NULL) {
                return EFBIG;
        }
-
+       
        if (dstfork->ff_data.cf_size != 0 || dstfork->ff_data.cf_new_size != 0) {
                return EFBIG;
        }
-
+       
        /* First copy the invalid ranges */
        while ((invalid_range = TAILQ_FIRST(&srcfork->ff_invalidranges))) {
                off_t start = invalid_range->rl_start;
                off_t end = invalid_range->rl_end;
-
+               
                /* Remove it from the srcfork and add it to dstfork */
                rl_remove(start, end, &srcfork->ff_invalidranges);
                rl_add(start, end, &dstfork->ff_invalidranges);
        }
-
+       
        /* 
         * Ignore the ff_union.  We don't move symlinks or system files.  
         * Now copy the in-catalog extent information
@@ -2156,20 +2170,20 @@ static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp,
        dstfork->ff_data.cf_new_size = srcfork->ff_data.cf_new_size;
        dstfork->ff_data.cf_vblocks = srcfork->ff_data.cf_vblocks;
        dstfork->ff_data.cf_blocks = srcfork->ff_data.cf_blocks;
-
+       
        /* just memcpy the whole array of extents to the new location. */
        memcpy (dstfork->ff_data.cf_extents, srcfork->ff_data.cf_extents, size);
-
+       
        /* 
         * Copy the cnode attribute data.
         *
         */
        src_cp->c_blocks -= srcfork->ff_data.cf_vblocks;
        src_cp->c_blocks -= srcfork->ff_data.cf_blocks;
-
+       
        dst_cp->c_blocks += srcfork->ff_data.cf_vblocks;
        dst_cp->c_blocks += srcfork->ff_data.cf_blocks;
-
+       
        /* Now delete the entries in the source fork */
        srcfork->ff_data.cf_size = 0;
        srcfork->ff_data.cf_new_size = 0;
@@ -2181,8 +2195,7 @@ static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp,
        bzero (srcfork->ff_data.cf_extents, size);
        return 0;
 }
-
-
+       
 
 /*
  *  cnode must be locked
@@ -2200,6 +2213,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
        int wait;               /* all other attributes (e.g. atime, etc.) */
        int lockflag;
        int took_trunc_lock = 0;
+       int locked_buffers = 0;
 
        /*
         * Applications which only care about data integrity rather than full
@@ -2251,7 +2265,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
         */
        if (fp && (((cp->c_flag & C_ALWAYS_ZEROFILL) && !TAILQ_EMPTY(&fp->ff_invalidranges)) ||
            ((wait || (cp->c_flag & C_ZFWANTSYNC)) &&
-               ((cp->c_flags & UF_NODUMP) == 0) &&
+               ((cp->c_bsdflags & UF_NODUMP) == 0) &&
                UBCINFOEXISTS(vp) && (vnode_issystem(vp) ==0) &&
                cp->c_zftimeout != 0))) {
 
@@ -2318,8 +2332,32 @@ datasync:
 
        /*
         * Flush all dirty buffers associated with a vnode.
+        * Record how many of them were dirty AND locked (if necessary).
         */
-       buf_flushdirtyblks(vp, waitdata, lockflag, "hfs_fsync");
+       locked_buffers = buf_flushdirtyblks_skipinfo(vp, waitdata, lockflag, "hfs_fsync");
+       if ((lockflag & BUF_SKIP_LOCKED) && (locked_buffers) && (vnode_vtype(vp) == VLNK)) {
+               /* 
+                * If there are dirty symlink buffers, then we may need to take action
+                * to prevent issues later on if we are journaled. If we're fsyncing a 
+                * symlink vnode then we are in one of three cases:
+                * 
+                * 1) automatic sync has fired.  In this case, we don't want the behavior to change.
+                * 
+                * 2) Someone has opened the FD for the symlink (not what it points to)
+                * and has issued an fsync against it.  This should be rare, and we don't
+                * want the behavior to change.
+                * 
+                * 3) We are being called by a vclean which is trying to reclaim this
+                * symlink vnode.  If this is the case, then allowing this fsync to 
+                * proceed WITHOUT flushing the journal could result in the vclean 
+                * invalidating the buffer's blocks before the journal transaction is
+                * written to disk.  To prevent this, we force a journal flush 
+                * if the vnode is in the middle of a recycle (VL_TERMINATE or VL_DEAD is set).
+                */
+               if (vnode_isrecycled(vp)) {
+                       fullsync = 1;
+               }
+       }
 
 metasync:
        if (vnode_isreg(vp) && vnode_issystem(vp)) {
@@ -2653,7 +2691,7 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
         *  the current directory and thus be
         *  non-empty.)
         */
-       if ((dcp->c_flags & APPEND) || (cp->c_flags & (IMMUTABLE | APPEND))) {
+       if ((dcp->c_bsdflags & APPEND) || (cp->c_bsdflags & (IMMUTABLE | APPEND))) {
                error = EPERM;
                goto out;
        }
@@ -2750,17 +2788,16 @@ hfs_vnop_remove(ap)
        struct cnode *dcp = VTOC(dvp);
        struct cnode *cp;
        struct vnode *rvp = NULL;
-       struct hfsmount *hfsmp = VTOHFS(vp);    
        int error=0, recycle_rsrc=0;
-       int drop_rsrc_vnode = 0;
        time_t orig_ctime;
+       uint32_t rsrc_vid = 0;
 
        if (dvp == vp) {
                return (EINVAL);
        }
 
        orig_ctime = VTOC(vp)->c_ctime;
-       if (!vnode_isnamedstream(vp)) {
+       if ( (!vnode_isnamedstream(vp)) && ((ap->a_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) == 0)) {
                error = check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL);
                if (error) {
                        // XXXdbg - decide on a policy for handling namespace handler failures!
@@ -2771,51 +2808,56 @@ hfs_vnop_remove(ap)
 
        cp = VTOC(vp);
 
-       /* 
-        * We need to grab the cnode lock on 'cp' before the lockpair() 
-        * to get an iocount on the rsrc fork BEFORE we enter hfs_removefile.
-        * To prevent other deadlocks, it's best to call hfs_vgetrsrc in a way that
-        * allows it to drop the cnode lock that it expects to be held coming in.  
-        * If we don't, we could commit a lock order violation, causing a deadlock.  
-        * In order to safely get the rsrc vnode with an iocount, we need to only hold the 
-        * lock on the file temporarily.  Unlike hfs_vnop_rename, we don't have to worry 
-        * about one rsrc fork getting recycled for another, but we do want to ensure
-        * that there are no deadlocks due to lock ordering issues.
-        * 
+relock:
+
+       hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+
+       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+               hfs_unlock_truncate(cp, 0);
+               if (rvp) {
+                       vnode_put (rvp);
+               }       
+               return (error);
+       }
+       
+       /*
+        * Lazily respond to determining if there is a valid resource fork
+        * vnode attached to 'cp' if it is a regular file or symlink.  
+        * If the vnode does not exist, then we may proceed without having to
+        * create it.
+        *
+        * If, however, it does exist, then we need to acquire an iocount on the
+        * vnode after acquiring its vid.  This ensures that if we have to do I/O
+        * against it, it can't get recycled from underneath us in the middle
+        * of this call.
+        *
         * Note: this function may be invoked for directory hardlinks, so just skip these
         * steps if 'vp' is a directory.
         */
 
 
        if ((vp->v_type == VLNK) || (vp->v_type == VREG)) {
+               if ((cp->c_rsrc_vp) && (rvp == NULL)) {
+                       /* We need to acquire the rsrc vnode */
+                       rvp = cp->c_rsrc_vp;
+                       rsrc_vid = vnode_vid (rvp);
+               
+                       /* Unlock everything to acquire iocount on the rsrc vnode */    
+                       hfs_unlock_truncate (cp, 0);
+                       hfs_unlockpair (dcp, cp);
 
-               if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK))) {
-                       return (error);
-               }
-
-               error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
-               hfs_unlock(cp);
-               if (error) {
-                       /* we may have gotten an rsrc vp even though we got an error */
-                       if (rvp) {
-                               vnode_put(rvp);
+                       /* Use the vid to maintain identity on rvp */
+                       if (vnode_getwithvid(rvp, rsrc_vid)) {
+                               /*
+                                * If this fails, then it was recycled or 
+                                * reclaimed in the interim.  Reset fields and
+                                * start over.
+                                */
                                rvp = NULL;
+                               rsrc_vid = 0;
                        }
-                       return (error); 
-               }
-               drop_rsrc_vnode = 1;
-       }
-       /* Now that we may have an iocount on rvp, do the lock pair */
-
-       hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
-
-       if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
-               hfs_unlock_truncate(cp, 0);
-               /* drop the iocount on rvp if necessary */
-               if (drop_rsrc_vnode) {
-                       vnode_put (rvp);
+                       goto relock;
                }
-               return (error);
        }
 
        /* 
@@ -2827,7 +2869,7 @@ hfs_vnop_remove(ap)
                goto rm_done;   
        }
 
-       error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, rvp, 0);
+       error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, NULL, 0);
        
        /*
         * If the remove succeeded in deleting the file, then we may need to mark
@@ -2866,7 +2908,7 @@ rm_done:
                vnode_recycle(rvp);
        } 
        
-       if (drop_rsrc_vnode) {
+       if (rvp) {
                /* drop iocount on rsrc fork, was obtained at beginning of fxn */
                vnode_put(rvp);
        }
@@ -2875,7 +2917,7 @@ rm_done:
 }
 
 
-static int
+int
 hfs_removefile_callback(struct buf *bp, void *hfsmp) {
 
         if ( !(buf_flags(bp) & B_META))
@@ -2895,22 +2937,27 @@ hfs_removefile_callback(struct buf *bp, void *hfsmp) {
  * This function may be used to remove directories if they have
  * lots of EA's -- note the 'allow_dirs' argument.
  *
- * The 'rvp' argument is used to pass in a resource fork vnode with
- * an iocount to prevent it from getting recycled during usage.  If it
- * is NULL, then it is assumed the caller is a VNOP that cannot operate
- * on resource forks, like hfs_vnop_symlink or hfs_removedir. Otherwise in 
- * a VNOP that takes multiple vnodes, we could violate lock order and 
- * cause a deadlock.  
+ * This function is able to delete blocks & fork data for the resource
+ * fork even if it does not exist in core (and have a backing vnode).  
+ * It should infer the correct behavior based on the number of blocks
+ * in the cnode and whether or not the resource fork pointer exists or 
+ * not.  As a result, one only need pass in the 'vp' corresponding to the
+ * data fork of this file (or main vnode in the case of a directory).  
+ * Passing in a resource fork will result in an error.
+ *
+ * Because we do not create any vnodes in this function, we are not at 
+ * risk of deadlocking against ourselves by double-locking.
  *
  * Requires cnode and truncate locks to be held.
  */
 int
 hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                int flags, int skip_reserve, int allow_dirs, 
-                          struct vnode *rvp, int only_unlink)
+                          __unused struct vnode *rvp, int only_unlink)
 {
        struct cnode *cp;
        struct cnode *dcp;
+       struct vnode *rsrc_vp = NULL;
        struct hfsmount *hfsmp;
        struct cat_desc desc;
        struct timeval tv;
@@ -2921,7 +2968,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
        int started_tr = 0;
        int isbigfile = 0, defer_remove=0, isdir=0;
        int update_vh = 0;
-       
+
        cp = VTOC(vp);
        dcp = VTOC(dvp);
        hfsmp = VTOHFS(vp);
@@ -2939,11 +2986,37 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
        if (VNODE_IS_RSRC(vp)) {
                return (EPERM);
        }
+       else {
+               /* 
+                * We know it's a data fork.
+                * Probe the cnode to see if we have a valid resource fork
+                * in hand or not.
+                */
+               rsrc_vp = cp->c_rsrc_vp;
+       }
+
        /* Don't allow deleting the journal or journal_info_block. */
-       if (hfsmp->jnl &&
-           (cp->c_fileid == hfsmp->hfs_jnlfileid || cp->c_fileid == hfsmp->hfs_jnlinfoblkid)) {
+       if (hfs_is_journal_file(hfsmp, cp)) {
                return (EPERM);
        }
+
+       /*
+        * If removing a symlink, then we need to ensure that the
+        * data blocks for the symlink are not still in-flight or pending.  
+        * If so, we will unlink the symlink here, making its blocks 
+        * available for re-allocation by a subsequent transaction.  That is OK, but
+        * then the I/O for the data blocks could then go out before the journal 
+        * transaction that created it was flushed, leading to I/O ordering issues.
+        */
+       if (vp->v_type == VLNK) {       
+               /* 
+                * This will block if the asynchronous journal flush is in progress.
+                * If this symlink is not being renamed over and doesn't have any open FDs,
+                * then we'll remove it from the journal's bufs below in kill_block.
+                */
+               buf_wait_for_shadow_io (vp, 0);
+       }
+
        /*
         * Hard links require special handling.
         */
@@ -2962,6 +3035,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                        return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve);
                }
        }
+
        /* Directories should call hfs_rmdir! (unless they have a lot of attributes) */
        if (vnode_isdir(vp)) {
                if (allow_dirs == 0)
@@ -2982,23 +3056,30 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 
        /* Remove our entry from the namei cache. */
        cache_purge(vp);
-
+       
        /*
-        * We expect the caller, if operating on files,
-        * will have passed in a resource fork vnode with
-        * an iocount, even if there was no content.
-        * We only do the hfs_truncate on the rsrc fork
-        * if we know that it DID have content, however.
-        * This has the bonus of not requiring us to defer
-        * its removal, unless it is in use.
+        * If the caller was operating on a file (as opposed to a 
+        * directory with EAs), then we need to figure out
+        * whether or not it has a valid resource fork vnode.
+        * 
+        * If there was a valid resource fork vnode, then we need
+        * to use hfs_truncate to eliminate its data.  If there is
+        * no vnode, then we hold the cnode lock which would
+        * prevent it from being created.  As a result, 
+        * we can use the data deletion functions which do not
+        * require that a cnode/vnode pair exist.
         */
 
        /* Check if this file is being used. */
        if (isdir == 0) {
                dataforkbusy = vnode_isinuse(vp, 0);
-               /* Only need to defer resource fork removal if in use and has content */
-               if (rvp && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
-                       rsrcforkbusy = vnode_isinuse(rvp, 0);
+               /*  
+                * At this point, we know that 'vp' points to the 
+                * a data fork because we checked it up front. And if 
+                * there is no rsrc fork, rsrc_vp will be NULL.
+                */
+               if (rsrc_vp && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
+                       rsrcforkbusy = vnode_isinuse(rsrc_vp, 0);
                }
        }
        
@@ -3054,7 +3135,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                if (!dataforkbusy && cp->c_datafork->ff_blocks && !isbigfile) {
                        cp->c_flag |= C_NEED_DATA_SETSIZE;
                }
-               if (!rsrcforkbusy && rvp) {
+               if (!rsrcforkbusy && rsrc_vp) {
                        cp->c_flag |= C_NEED_RSRC_SETSIZE;
                }
        }
@@ -3096,8 +3177,12 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                        }
                        update_vh = 1;
                }
-               if (!rsrcforkbusy && rvp) {
-                       error = hfs_prepare_release_storage (hfsmp, rvp);
+               
+               /*
+                * If the resource fork vnode does not exist, we can skip this step.
+                */
+               if (!rsrcforkbusy && rsrc_vp) {
+                       error = hfs_prepare_release_storage (hfsmp, rsrc_vp);
                        if (error) {
                                goto out;
                        }
@@ -3205,17 +3290,55 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                        goto out;
 
        } 
-       else /* Not busy */ {
-               
+       else {
+               /*
+                * Nobody is using this item; we can safely remove everything.
+                */
+               struct filefork *temp_rsrc_fork = NULL;
 #if QUOTA
                off_t savedbytes;
                int blksize = hfsmp->blockSize;
 #endif
                u_int32_t fileid = cp->c_fileid;
-               
+       
+               /* 
+                * Figure out if we need to read the resource fork data into 
+                * core before wiping out the catalog record.  
+                *
+                * 1) Must not be a directory
+                * 2) cnode's c_rsrcfork ptr must be NULL.
+                * 3) rsrc fork must have actual blocks 
+                */
+               if ((isdir == 0) && (cp->c_rsrcfork == NULL) && 
+                               (cp->c_blocks - VTOF(vp)->ff_blocks)) {
+                       /*
+                        * The resource fork vnode & filefork did not exist.
+                        * Create a temporary one for use in this function only. 
+                        */
+                       MALLOC_ZONE (temp_rsrc_fork, struct filefork *, sizeof (struct filefork), M_HFSFORK, M_WAITOK);
+                       bzero(temp_rsrc_fork, sizeof(struct filefork));
+                       temp_rsrc_fork->ff_cp = cp;
+                       rl_init(&temp_rsrc_fork->ff_invalidranges);
+               }       
+
                lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+               /* Look up the resource fork first, if necessary */
+               if (temp_rsrc_fork) {
+                       error = cat_lookup (hfsmp, &desc, 1, (struct cat_desc*) NULL, 
+                                       (struct cat_attr*) NULL, &temp_rsrc_fork->ff_data, NULL);
+                       if (error) {
+                               FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK);
+                               hfs_systemfile_unlock (hfsmp, lockflags);
+                               goto out;
+                       }
+               }
+
                if (!skip_reserve) {
                        if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) {
+                               if (temp_rsrc_fork) {
+                                       FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK);
+                               }
                                hfs_systemfile_unlock(hfsmp, lockflags);
                                goto out;
                        }
@@ -3238,7 +3361,11 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                        (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
                }
                hfs_systemfile_unlock(hfsmp, lockflags);
+
                if (error) {
+                       if (temp_rsrc_fork) {
+                               FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK);
+                       }
                        goto out;
                }
                
@@ -3253,9 +3380,23 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                        (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
                }
                
-               if (cp->c_rsrcfork && (cp->c_rsrcfork->ff_blocks > 0)) {
-                       savedbytes = ((off_t)cp->c_rsrcfork->ff_blocks * (off_t)blksize);
-                       (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+               /*
+                * We may have just deleted the catalog record for a resource fork even 
+                * though it did not exist in core as a vnode. However, just because there 
+                * was a resource fork pointer in the cnode does not mean that it had any blocks.
+                */
+               if (temp_rsrc_fork || cp->c_rsrcfork) {
+                       if (cp->c_rsrcfork) {
+                               if (cp->c_rsrcfork->ff_blocks > 0) {
+                                       savedbytes = ((off_t)cp->c_rsrcfork->ff_blocks * (off_t)blksize);
+                                       (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+                               }
+                       }       
+                       else {
+                               /* we must have used a temporary fork */
+                               savedbytes = ((off_t)temp_rsrc_fork->ff_blocks * (off_t)blksize);       
+                               (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+                       }
                }
                
                if (hfsmp->hfs_flags & HFS_QUOTAS) {
@@ -3263,13 +3404,17 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                }
 #endif
                
-               
                /* 
                 * If we didn't get any errors deleting the catalog entry, then go ahead
                 * and release the backing store now.  The filefork pointers are still valid.
-                */             
-               error = hfs_release_storage (hfsmp, cp->c_datafork, cp->c_rsrcfork, fileid);
-               
+                */
+               if (temp_rsrc_fork) {   
+                       error = hfs_release_storage (hfsmp, cp->c_datafork, temp_rsrc_fork, fileid);
+               }
+               else {
+                       /* if cp->c_rsrcfork == NULL, hfs_release_storage will skip over it. */
+                       error = hfs_release_storage (hfsmp, cp->c_datafork, cp->c_rsrcfork, fileid);
+               }
                if (error) {
                        /* 
                         * If we encountered an error updating the extents and bitmap,
@@ -3284,7 +3429,12 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                        /* reset update_vh to 0, since hfs_release_storage should have done it for us */
                        update_vh = 0;
                }
-               
+
+               /* Get rid of the temporary rsrc fork */
+               if (temp_rsrc_fork) {
+                       FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK);
+               }
+
                cp->c_flag |= C_NOEXISTS;
                cp->c_flag &= ~C_DELETED;
                
@@ -3358,6 +3508,7 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
        cdp->cd_flags &= ~CD_HASBUF;
 }
 
+
 /*
  * Rename a cnode.
  *
@@ -3397,8 +3548,13 @@ hfs_vnop_rename(ap)
        struct vnode *tdvp = ap->a_tdvp;
        struct vnode *fvp = ap->a_fvp;
        struct vnode *fdvp = ap->a_fdvp;
-       struct vnode *fvp_rsrc = NULLVP;
+       /*
+        * Note that we only need locals for the target/destination's
+        * resource fork vnode (and only if necessary).  We don't care if the
+        * source has a resource fork vnode or not.
+        */
        struct vnode *tvp_rsrc = NULLVP;
+       uint32_t tvp_rsrc_vid = 0;
        struct componentname *tcnp = ap->a_tcnp;
        struct componentname *fcnp = ap->a_fcnp;
        struct proc *p = vfs_context_proc(ap->a_context);
@@ -3479,77 +3635,13 @@ hfs_vnop_rename(ap)
                }
        }
        
-       /* 
-        * Before grabbing the four locks, we may need to get an iocount on the resource fork
-        * vnodes in question, just like hfs_vnop_remove.  If fvp and tvp are not
-        * directories, then go ahead and grab the resource fork vnodes now
-        * one at a time.  We don't actively need the fvp_rsrc to do the rename operation,
-        * but we need the iocount to prevent the vnode from getting recycled/reclaimed
-        * during the middle of the VNOP.
-        */
-
-
-       if ((vnode_isreg(fvp)) || (vnode_islnk(fvp))) {
-
-               if ((error = hfs_lock (VTOC(fvp), HFS_EXCLUSIVE_LOCK))) {
-                       return (error);
-               }
-               /*
-                * We care if we race against rename/delete with this cp, so we'll error out
-                * if the file becomes open-unlinked during this call.
-                */
-               error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE, TRUE);
-               hfs_unlock (VTOC(fvp));
-               if (error) {
-                       if (fvp_rsrc) {
-                               vnode_put(fvp_rsrc);
-                       }
-                       return error;
-               }
-       }
-               
-       if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) {
-               /* 
-                * Lock failure is OK on tvp, since we may race with a remove on the dst.
-                * But this shouldn't stop rename from proceeding, so only try to
-                * grab the resource fork if the lock succeeded.
-                */
-               if (hfs_lock (VTOC(tvp), HFS_EXCLUSIVE_LOCK) == 0) {
-                       tcp = VTOC(tvp);
-                       /* 
-                        * We only care if we get an open-unlinked file on the dst so we 
-                        * know to null out tvp/tcp to make the rename operation act 
-                        * as if they never existed.  Because they're effectively out of the
-                        * namespace already it's fine to do this.  If this is true, then
-                        * make sure to unlock the cnode and drop the iocount only after the unlock.
-                        */
-
-                       error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE, TRUE);
-                       hfs_unlock (tcp);
-                       if (error) {
-                               /*
-                                * Since we specify TRUE for error_on_unlinked in hfs_vgetrsrc,
-                                * we can get a rsrc fork vnode even if it returns an error.
-                                */
-                               tcp = NULL;
-                               tvp = NULL;
-                               if (tvp_rsrc) {
-                                       vnode_put (tvp_rsrc);
-                                       tvp_rsrc = NULL;
-                               }
-                               /* just bypass truncate lock and act as if we never got tcp/tvp */
-                               goto retry;
-                       }
-               }
-       }
-
+retry:
        /* When tvp exists, take the truncate lock for hfs_removefile(). */
        if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) {
                hfs_lock_truncate(VTOC(tvp), HFS_EXCLUSIVE_LOCK);
                took_trunc_lock = 1;
        }
 
-  retry:
        error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL,
                             HFS_EXCLUSIVE_LOCK, &error_cnode);
        if (error) {
@@ -3557,6 +3649,20 @@ hfs_vnop_rename(ap)
                        hfs_unlock_truncate(VTOC(tvp), 0);
                        took_trunc_lock = 0;
                }
+
+               /* 
+                * We hit an error path.  If we were trying to re-acquire the locks
+                * after coming through here once, we might have already obtained
+                * an iocount on tvp's resource fork vnode.  Drop that before dealing
+                * with the failure.  Note this is safe -- since we are in an
+                * error handling path, we can't be holding the cnode locks.
+                */
+               if (tvp_rsrc) {
+                       vnode_put (tvp_rsrc);
+                       tvp_rsrc_vid = 0;
+                       tvp_rsrc = NULL;
+               }
+
                /* 
                 * tvp might no longer exist.  If the cause of the lock failure 
                 * was tvp, then we can try again with tvp/tcp set to NULL.  
@@ -3568,13 +3674,7 @@ hfs_vnop_rename(ap)
                        tvp = NULL;
                        goto retry;
                }
-               /* otherwise, drop iocounts on the rsrc forks and bail out */
-               if (fvp_rsrc) {
-                       vnode_put (fvp_rsrc);
-               }
-               if (tvp_rsrc) {
-                       vnode_put (tvp_rsrc);
-               }
+
                return (error);
        }
 
@@ -3583,6 +3683,37 @@ hfs_vnop_rename(ap)
        tdcp = VTOC(tdvp);
        tcp = tvp ? VTOC(tvp) : NULL;
 
+       /* 
+        * Acquire iocounts on the destination's resource fork vnode 
+        * if necessary. If dst/src are files and the dst has a resource 
+        * fork vnode, then we need to try and acquire an iocount on the rsrc vnode. 
+        * If it does not exist, then we don't care and can skip it.
+        */
+       if ((vnode_isreg(fvp)) || (vnode_islnk(fvp))) {
+               if ((tvp) && (tcp->c_rsrc_vp) && (tvp_rsrc == NULL)) {
+                       tvp_rsrc = tcp->c_rsrc_vp;
+                       /*
+                        * We can look at the vid here because we're holding the 
+                        * cnode lock on the underlying cnode for this rsrc vnode. 
+                        */
+                       tvp_rsrc_vid = vnode_vid (tvp_rsrc);
+
+                       /* Unlock everything to acquire iocount on this rsrc vnode */
+                       if (took_trunc_lock) {
+                               hfs_unlock_truncate (VTOC(tvp), 0);
+                               took_trunc_lock = 0;    
+                       }       
+                       hfs_unlockfour(fdcp, fcp, tdcp, tcp);
+
+                       if (vnode_getwithvid (tvp_rsrc, tvp_rsrc_vid)) {
+                               /* iocount acquisition failed.  Reset fields and start over.. */
+                               tvp_rsrc_vid = 0;
+                               tvp_rsrc = NULL;
+                       }
+                       goto retry;
+               }
+       }
+
        /* Ensure we didn't race src or dst parent directories with rmdir. */
        if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) {
                error = ENOENT;
@@ -3709,7 +3840,7 @@ hfs_vnop_rename(ap)
        /*
         * Make sure "from" vnode and its parent are changeable.
         */
-       if ((fcp->c_flags & (IMMUTABLE | APPEND)) || (fdcp->c_flags & APPEND)) {
+       if ((fcp->c_bsdflags & (IMMUTABLE | APPEND)) || (fdcp->c_bsdflags & APPEND)) {
                error = EPERM;
                goto out;
        }
@@ -3731,6 +3862,13 @@ hfs_vnop_rename(ap)
                goto out;
        }
 
+       /* Don't allow modification of the journal or journal_info_block */
+       if (hfs_is_journal_file(hfsmp, fcp) ||
+           (tcp && hfs_is_journal_file(hfsmp, tcp))) {
+               error = EPERM;
+               goto out;
+       }
+
 #if QUOTA
        if (tvp)
                (void)hfs_getinoquota(tcp);
@@ -3908,7 +4046,7 @@ hfs_vnop_rename(ap)
                        error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE, 1);
                }
                else {
-                       error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, tvp_rsrc, 1);
+                       error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, NULL, 1);
                        
                        /*
                         * If the destination file had a resource fork vnode, then we need to get rid of
@@ -3980,7 +4118,7 @@ skip_rm:
        replace_desc(fcp, &out_desc);
        fcp->c_parentcnid = tdcp->c_fileid;
        fcp->c_hint = 0;
-       
+
        /* Now indicate this cnode needs to have date-added written to the finderinfo */
        fcp->c_flag |= C_NEEDS_DATEADDED;
        (void) hfs_update (fvp, 0);
@@ -4029,42 +4167,6 @@ skip_rm:
 
        tdcp->c_flag |= C_FORCEUPDATE;  // XXXdbg - force it out!
        (void) hfs_update(tdvp, 0);
-
-
-       /* Update the vnode's name now that the rename has completed. */
-       vnode_update_identity(fvp, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, 
-                       tcnp->cn_hash, (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
-       /* 
-        * At this point, we may have a resource fork vnode attached to the 
-        * 'from' vnode.  If it exists, we will want to update its name, because
-        * it contains the old name + _PATH_RSRCFORKSPEC. ("/..namedfork/rsrc").
-        *
-        * Note that the only thing we need to update here is the name attached to
-        * the vnode, since a resource fork vnode does not have a separate resource
-        * cnode -- it's still 'fcp'.
-        */
-       if (fcp->c_rsrc_vp) {
-               char* rsrc_path = NULL;
-               int len;
-
-               /* Create a new temporary buffer that's going to hold the new name */
-               MALLOC_ZONE (rsrc_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
-               len = snprintf (rsrc_path, MAXPATHLEN, "%s%s", tcnp->cn_nameptr, _PATH_RSRCFORKSPEC);
-               len = MIN(len, MAXPATHLEN);
-
-               /* 
-                * vnode_update_identity will do the following for us:
-                * 1) release reference on the existing rsrc vnode's name.
-                * 2) copy/insert new name into the name cache
-                * 3) attach the new name to the resource vnode
-                * 4) update the vnode's vid
-                */
-               vnode_update_identity (fcp->c_rsrc_vp, fvp, rsrc_path, len, 0, (VNODE_UPDATE_NAME | VNODE_UPDATE_CACHE));
-
-               /* Free the memory associated with the resource fork's name */
-               FREE_ZONE (rsrc_path, MAXPATHLEN, M_NAMEI);     
-       }
-
 out:
        if (got_cookie) {
                cat_postflight(hfsmp, &cookie, p);
@@ -4086,12 +4188,10 @@ out:
 
        hfs_unlockfour(fdcp, fcp, tdcp, tcp);
        
-       /* Now vnode_put the resource forks vnodes if necessary */
+       /* Now vnode_put the resource fork vnode if necessary */
        if (tvp_rsrc) {
                vnode_put(tvp_rsrc);
-       }
-       if (fvp_rsrc) {
-               vnode_put(fvp_rsrc);
+               tvp_rsrc = NULL;
        }
 
        /* After tvp is removed the only acceptable error is EIO */
@@ -4339,7 +4439,7 @@ hfs_vnop_readdir(ap)
        if (uio_iovcnt(uio) > 1)
                return (EINVAL);
 
-       if (VTOC(vp)->c_flags & UF_COMPRESSED) {
+       if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
                int compressed = hfs_file_is_compressed(VTOC(vp), 0);  /* 0 == take the cnode lock */
                if (VTOCMP(vp) != NULL && !compressed) {
                        error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
@@ -4512,7 +4612,7 @@ hfs_vnop_readdir(ap)
        }
 
        /* Pack the buffer with dirent entries. */
-       error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items, &eofflag);
+       error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, ap->a_flags, &items, &eofflag);
 
        if (index == 0 && error == 0) {
                cp->c_dirthreadhint = dirhint->dh_threadhint;
@@ -4794,52 +4894,52 @@ hfs_update(struct vnode *vp, __unused int waitfor)
            return error;
        }
 
-       /* 
-        * Modify the values passed to cat_update based on whether or not
-        * the file has invalid ranges or borrowed blocks.
-        */
-       if (dataforkp) {
-               off_t numbytes = 0;
-
-               /* copy the datafork into a temporary copy so we don't pollute the cnode's */
-               bcopy(dataforkp, &datafork, sizeof(datafork));
-               dataforkp = &datafork;
-
-               /*
-                * If there are borrowed blocks, ensure that they are subtracted
-                * from the total block count before writing the cnode entry to disk.
-                * Only extents that have actually been marked allocated in the bitmap
-                * should be reflected in the total block count for this fork.
-                */
-               if (cp->c_datafork->ff_unallocblocks != 0) {
-                       // make sure that we don't assign a negative block count
-                       if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
-                               panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
-                                               cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
-                       }
-
-                       /* Also cap the LEOF to the total number of bytes that are allocated. */
-                       datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
-                       datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
-               }
-               
-               /*
-                * For files with invalid ranges (holes) the on-disk
-                * field representing the size of the file (cf_size)
-                * must be no larger than the start of the first hole.
-                * However, note that if the first invalid range exists
-                * solely within borrowed blocks, then our LEOF and block
-                * count should both be zero.  As a result, set it to the 
-                * min of the current cf_size and the start of the first 
-                * invalid range, because it may have already been reduced
-                * to zero by the borrowed blocks check above.
-                */
-               if (!TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges))  {
-                       numbytes = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
-                       datafork.cf_size = MIN((numbytes), (datafork.cf_size));
-               }
-       }
-
+    /* 
+     * Modify the values passed to cat_update based on whether or not
+     * the file has invalid ranges or borrowed blocks.
+     */
+    if (dataforkp) {
+        off_t numbytes = 0;
+
+        /* copy the datafork into a temporary copy so we don't pollute the cnode's */
+        bcopy(dataforkp, &datafork, sizeof(datafork));
+        dataforkp = &datafork;
+
+        /*
+         * If there are borrowed blocks, ensure that they are subtracted
+         * from the total block count before writing the cnode entry to disk.
+         * Only extents that have actually been marked allocated in the bitmap
+         * should be reflected in the total block count for this fork.
+         */
+        if (cp->c_datafork->ff_unallocblocks != 0) {
+            // make sure that we don't assign a negative block count
+            if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
+                panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
+                        cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
+            }
+
+            /* Also cap the LEOF to the total number of bytes that are allocated. */
+            datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
+            datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+        }
+
+        /*
+         * For files with invalid ranges (holes) the on-disk
+         * field representing the size of the file (cf_size)
+         * must be no larger than the start of the first hole.
+         * However, note that if the first invalid range exists
+         * solely within borrowed blocks, then our LEOF and block
+         * count should both be zero.  As a result, set it to the 
+         * min of the current cf_size and the start of the first 
+         * invalid range, because it may have already been reduced
+         * to zero by the borrowed blocks check above.
+         */
+        if (!TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges))  {
+            numbytes = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
+            datafork.cf_size = MIN((numbytes), (datafork.cf_size));
+        }
+    }
+       
        /*
         * For resource forks with delayed allocations, make sure
         * the block count and file size match the number of blocks
@@ -4890,8 +4990,18 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        enum vtype vnodetype;
        int mode;
        int newvnode_flags = 0;
-       int nocache = 0;
        u_int32_t gnv_flags = 0;
+       int protectable_target = 0;
+
+#if CONFIG_PROTECT
+       struct cprotect *entry = NULL;
+       uint32_t cp_class = 0;
+       if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) {
+               cp_class = vap->va_dataprotect_class;
+       }
+       int protected_mount = 0;        
+#endif
+
 
        if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK)))
                return (error);
@@ -4906,8 +5016,9 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        }
 
        dcp->c_flag |= C_DIR_MODIFICATION;
-       
+
        hfsmp = VTOHFS(dvp);
+
        *vpp = NULL;
        tvp = NULL;
        out_desc.cd_flags = 0;
@@ -4918,13 +5029,11 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                vnodetype = VREG;
        mode = MAKEIMODE(vnodetype, vap->va_mode);
 
-#if CONFIG_PROTECT
-       /* If we're creating a regular file on a CP filesystem, then delay caching */
-       if ((vnodetype == VREG ) && (cp_fs_protected (VTOVFS(dvp)))) {
-               nocache = 1;
+       if (S_ISDIR (mode) || S_ISREG (mode)) {
+               protectable_target = 1;
        }
-#endif
        
+
        /* Check if were out of usable disk space. */
        if ((hfs_freeblks(hfsmp, 1) == 0) && (vfs_context_suser(ctx) != 0)) {
                error = ENOSPC;
@@ -4955,7 +5064,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                VATTR_SET_SUPPORTED(vap, va_flags);
                attr.ca_flags = vap->va_flags;
        }
-
+       
        /* 
         * HFS+ only: all files get ThreadExists
         * HFSX only: dirs get HasFolderCount
@@ -4969,7 +5078,29 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                }
        }
 
-       /* Add the date added to the item */
+#if CONFIG_PROTECT     
+       if (cp_fs_protected(hfsmp->hfs_mp)) {
+               protected_mount = 1;
+       }
+       /*
+        * On a content-protected HFS+/HFSX filesystem, files and directories
+        * cannot be created without atomically setting/creating the EA that 
+        * contains the protection class metadata and keys at the same time, in
+        * the same transaction.  As a result, pre-set the "EAs exist" flag
+        * on the cat_attr for protectable catalog record creations.  This will
+        * cause the cnode creation routine in hfs_getnewvnode to mark the cnode
+        * as having EAs.
+        */
+       if ((protected_mount) && (protectable_target)) {
+               attr.ca_recflags |= kHFSHasAttributesMask;
+       }
+#endif
+
+
+       /* 
+        * Add the date added to the item. See above, as
+        * all of the dates are set to the itime.
+        */
        hfs_write_dateadded (&attr, attr.ca_atime);
 
        attr.ca_uid = vap->va_uid;
@@ -5010,6 +5141,22 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        in_desc.cd_hint = dcp->c_childhint;
        in_desc.cd_encoding = 0;
 
+#if CONFIG_PROTECT
+       /*
+        * To preserve file creation atomicity with regards to the content protection EA,
+        * we must create the file in the catalog and then write out the EA in the same
+        * transaction.  Pre-flight any operations that we can (such as allocating/preparing
+        * the buffer, wrapping the keys) before we start the txn and take the requisite 
+        * b-tree locks.   We pass '0' as the fileid because we do not know it yet. 
+        */
+       if ((protected_mount) && (protectable_target)) {
+               error = cp_entry_create_keys (&entry, dcp, hfsmp, cp_class, 0, attr.ca_mode);
+               if (error) {
+                       goto exit;
+               }
+       }
+#endif
+
        if ((error = hfs_start_transaction(hfsmp)) != 0) {
            goto exit;
        }
@@ -5037,6 +5184,40 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                dcp->c_ctime = tv.tv_sec;
                dcp->c_mtime = tv.tv_sec;
                (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+
+#if CONFIG_PROTECT
+               /*
+                * If we are creating a content protected file, now is when
+                * we create the EA. We must create it in the same transaction
+                * that creates the file.  We can also guarantee that the file 
+                * MUST exist because we are still holding the catalog lock
+                * at this point.
+                */
+               if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) {
+                       error = cp_setxattr (NULL, entry, hfsmp, attr.ca_fileid, XATTR_CREATE);
+                       
+                       if (error) {
+                               int delete_err;
+                               /* 
+                                * If we fail the EA creation, then we need to delete the file. 
+                                * Luckily, we are still holding all of the right locks.
+                                */
+                               delete_err = cat_delete (hfsmp, &out_desc, &attr);
+                               if (delete_err == 0) {
+                                       /* Update the parent directory */
+                                       if (dcp->c_entries > 0)
+                                               dcp->c_entries--;
+                                       dcp->c_dirchangecnt++;
+                                       dcp->c_ctime = tv.tv_sec;
+                                       dcp->c_mtime = tv.tv_sec;
+                                       (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+                               }
+
+                               /* Emit EINVAL if we fail to create EA*/
+                               error = EINVAL;
+                       }
+               }               
+#endif
        }
        hfs_systemfile_unlock(hfsmp, lockflags);
        if (error)
@@ -5068,15 +5249,26 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
            started_tr = 0;
        }
 
+#if CONFIG_PROTECT
+       /* 
+        * At this point, we must have encountered success with writing the EA.
+        * Update MKB with the data for the cached key, then destroy it.  This may
+        * prevent information leakage by ensuring the cache key is only unwrapped
+        * to perform file I/O and it is allowed.
+        */
+
+       if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target))  {
+               cp_update_mkb (entry, attr.ca_fileid);
+               cp_entry_destroy (&entry);
+       }
+#endif
+
        /* Do not create vnode for whiteouts */
        if (S_ISWHT(mode)) {
                goto exit;
        }       
 
        gnv_flags |= GNV_CREATE;
-       if (nocache) {
-               gnv_flags |= GNV_NOCACHE;
-       }
 
        /*
         * Create a vnode for the object just created.
@@ -5102,49 +5294,6 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        cp = VTOC(tvp);
        *vpp = tvp;
 
-#if CONFIG_PROTECT
-       error = cp_entry_create_keys(cp);
-       /* 
-        * If we fail to create keys, then do NOT allow this vnode to percolate out into the
-        * namespace.  Delete it and return the errno that cp_entry_create_keys generated.
-        * Luckily, we can do this without issues because the entry was newly created
-        * and we're still holding the directory cnode lock.  Because we prevented it from
-        * getting inserted into the namecache upon vnode creation, all accesss to this file
-        * would have to go through the directory, whose lock we are still holding.
-        */
-       if (error) {
-               /*
-                * If we fail to remove/recycle the item here, we can't do much about it.  Log 
-                * a message to the console and then we can backtrack it.  The ultimate error
-                * that will get emitted to userland will be from the failure to create the EA blob.
-                */
-               int err = hfs_removefile (dvp, tvp, cnp, 0, 0, 0, NULL, 0);
-               if (err) {
-                       printf("hfs_makenode: removefile failed (%d) for CP file %p\n", err, tvp);
-               }
-               hfs_unlock (cp);
-               err = vnode_recycle (tvp);
-               if (err) {
-                       printf("hfs_makenode: vnode_recycle failed (%d) for CP file %p\n", err, tvp);
-               }
-               /* Drop the iocount on the new vnode to force reclamation/recycling */
-               vnode_put (tvp);
-               cp = NULL;
-               *vpp = NULL;
-       }
-       else {
-               /* insert item into name cache if it wasn't already inserted.*/
-               if (nocache) {
-                       cache_enter (dvp, tvp, cnp);
-               }
-       }               
-
-#endif
-/* 
- * If CONFIG_PROTECT is not enabled, then all items will get automatically added into 
- * the namecache, as nocache will be set to 0.
- */
-
 #if QUOTA
        /* 
         * Once we create this vnode, we need to initialize its quota data 
@@ -5160,6 +5309,18 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 exit:
        cat_releasedesc(&out_desc);
        
+#if CONFIG_PROTECT
+       /*  
+        * We may have jumped here in error-handling various situations above.
+        * If we haven't already dumped the temporary CP used to initialize
+        * the file atomically, then free it now. cp_entry_destroy should null
+        * out the pointer if it was called already.
+        */
+       if (entry) {
+               cp_entry_destroy (&entry);
+       }       
+#endif
+
        /*
         * Make sure we release cnode lock on dcp.
         */
@@ -5554,7 +5715,7 @@ hfs_vnop_fsync(ap)
        }
 
 #if CONFIG_PROTECT
-       if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+       if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
                return (error);
        }
 #endif /* CONFIG_PROTECT */
@@ -5685,7 +5846,11 @@ struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = {
     { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf },               /* pathconf */
     { &vnop_advlock_desc, (VOPFUNC)err_advlock },              /* advlock */
     { &vnop_allocate_desc, (VOPFUNC)hfs_readonly_op },         /* allocate (READONLY) */
+#if CONFIG_SEARCHFS
     { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search },         /* search fs */
+#else
+    { &vnop_searchfs_desc, (VOPFUNC)err_searchfs },            /* search fs */
+#endif
     { &vnop_bwrite_desc, (VOPFUNC)hfs_readonly_op },           /* bwrite (READONLY) */
     { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },           /* pagein */
     { &vnop_pageout_desc,(VOPFUNC) hfs_readonly_op },          /* pageout (READONLY)  */
@@ -5743,7 +5908,11 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
     { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf },               /* pathconf */
     { &vnop_advlock_desc, (VOPFUNC)err_advlock },              /* advlock */
     { &vnop_allocate_desc, (VOPFUNC)hfs_vnop_allocate },               /* allocate */
+#if CONFIG_SEARCHFS
     { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search },         /* search fs */
+#else
+    { &vnop_searchfs_desc, (VOPFUNC)err_searchfs },            /* search fs */
+#endif
     { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite },           /* bwrite */
     { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },           /* pagein */
     { &vnop_pageout_desc,(VOPFUNC) hfs_vnop_pageout },         /* pageout */
index 8091dfaa2b9f148d41e3f3c7eb0d76cd6bfb898d..e7a91adddf500de41e88bec397d4384f56d6dfc2 100644 (file)
@@ -232,6 +232,7 @@ out:
 }
 #endif
 
+
 /* Zero out the date added field for the specified cnode */
 static int hfs_zero_dateadded (struct cnode *cp, u_int8_t *finderinfo) {
        u_int8_t *finfo = finderinfo;
@@ -255,7 +256,6 @@ static int hfs_zero_dateadded (struct cnode *cp, u_int8_t *finderinfo) {
     
 }
 
-
 /*
  * Retrieve the data of an extended attribute.
  */
@@ -785,9 +785,9 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
                 */
                fdFlags = *((u_int16_t *) &cp->c_finderinfo[8]);
                if (fdFlags & OSSwapHostToBigConstInt16(kFinderInvisibleMask))
-                       cp->c_flags |= UF_HIDDEN;
+                       cp->c_bsdflags |= UF_HIDDEN;
                else
-                       cp->c_flags &= ~UF_HIDDEN;
+                       cp->c_bsdflags &= ~UF_HIDDEN;
 
                result = hfs_update(vp, FALSE);
 
@@ -953,12 +953,22 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize,
        int exists = 0;
        int allocatedblks = 0;
        u_int32_t target_id;
+       int takelock = 1;
 
        if (cp) {
                target_id = cp->c_fileid;
        }
        else {
                target_id = fileid;
+               if (target_id != 1) {
+                       /* 
+                        * If we are manipulating something other than 
+                        * the root folder (id 1), and do not have a cnode-in-hand, 
+                        * then we must already hold the requisite b-tree locks from 
+                        * earlier up the call stack. (See hfs_makenode)
+                        */
+                       takelock = 0;
+               }
        }
        
        /* Start a transaction for our changes. */
@@ -990,10 +1000,12 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize,
        if (hfsmp->hfs_max_inline_attrsize == 0) {
                hfsmp->hfs_max_inline_attrsize = getmaxinlineattrsize(hfsmp->hfs_attribute_vp);
        }
-       
-       /* Take exclusive access to the attributes b-tree. */
-       lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-       
+
+       if (takelock) {
+               /* Take exclusive access to the attributes b-tree. */
+               lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+       }
+
        /* Build the b-tree key. */
        MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
        if (iterator == NULL) {
@@ -1349,7 +1361,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
                
                /* Do the byte compare against the local copy */
                if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) == 0) {
-                       hfs_unlock (cp);
+            hfs_unlock(cp);
                        return (ENOATTR);
                }
                
@@ -1640,6 +1652,7 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap)
        int result;
     u_int8_t finderinfo[32];
 
+
        if (VNODE_IS_RSRC(vp)) {
                return (EPERM);
        }
@@ -1671,9 +1684,9 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap)
                fip->fdType = 0;
                fip->fdCreator = 0;
        }       
+
        
-    
-       /* If Finder Info is non-empty then export it's name. */
+    /* If Finder Info is non-empty then export it's name. */
        if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) != 0) {
                if (uio == NULL) {
                        *ap->a_size += sizeof(XATTR_FINDERINFO_NAME);
index 219cd538f2adcaf60ef0b4d5375a53d1855c0c5c..114104c6f107812000d58a193964476d2839ebc5 100644 (file)
@@ -28,6 +28,9 @@
 #include <sys/param.h>
 #include <sys/utfconv.h>
 #include <sys/stat.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <libkern/libkern.h>
 
 #include       "../headers/FileMgrInternal.h"
 #include       "../headers/BTreesInternal.h"
@@ -52,10 +55,15 @@ LocateCatalogNodeByKey(const ExtendedVCB *volume, u_int32_t hint, CatalogKey *ke
        HFSCatalogNodeID        threadParentID;
        u_int16_t tempSize;
        FSBufferDescriptor       btRecord;
-       BTreeIterator            searchIterator; 
+       struct BTreeIterator *searchIterator;
        FCB                     *fcb;
 
-       bzero(&searchIterator, sizeof(searchIterator));
+       MALLOC (searchIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (searchIterator == NULL) {
+               return memFullErr;  // translates to ENOMEM
+       }
+
+       bzero(searchIterator, sizeof(*searchIterator));
 
        fcb = GetFileControlBlock(volume->catalogRefNum);
 
@@ -63,22 +71,27 @@ LocateCatalogNodeByKey(const ExtendedVCB *volume, u_int32_t hint, CatalogKey *ke
        btRecord.itemCount = 1;
        btRecord.itemSize = sizeof(CatalogRecord);
 
-       searchIterator.hint.nodeNum = hint;
+       searchIterator->hint.nodeNum = hint;
 
-       bcopy(keyPtr, &searchIterator.key, sizeof(CatalogKey));
+       bcopy(keyPtr, &searchIterator->key, sizeof(CatalogKey));
        
-       result = BTSearchRecord( fcb, &searchIterator, &btRecord, &tempSize, &searchIterator );
+       result = BTSearchRecord( fcb, searchIterator, &btRecord, &tempSize, searchIterator );
 
        if (result == noErr)
        {
-               *newHint = searchIterator.hint.nodeNum;
+               *newHint = searchIterator->hint.nodeNum;
 
-               BlockMoveData(&searchIterator.key, keyPtr, sizeof(CatalogKey));
+               BlockMoveData(&searchIterator->key, keyPtr, sizeof(CatalogKey));
        }
 
-       if (result == btNotFound)
-               result = cmNotFound;    
-       ReturnIfError(result);
+       if (result == btNotFound) {
+               result = cmNotFound;
+       }       
+
+       if (result) {
+               FREE(searchIterator, M_TEMP);
+               return result;
+       }
        
        // if we got a thread record, then go look up real record
        switch ( dataPtr->recordType )
@@ -103,6 +116,7 @@ LocateCatalogNodeByKey(const ExtendedVCB *volume, u_int32_t hint, CatalogKey *ke
        if ( threadParentID )           // found a thread
                result = LocateCatalogRecord(volume, threadParentID, nodeName, kNoHint, keyPtr, dataPtr, newHint);
        
+       FREE (searchIterator, M_TEMP);
        return result;
 }
 
@@ -122,11 +136,17 @@ LocateCatalogRecord(const ExtendedVCB *volume, HFSCatalogNodeID folderID, const
        OSErr result;
        uint16_t tempSize;
        FSBufferDescriptor btRecord;
-       BTreeIterator searchIterator;
+       struct BTreeIterator *searchIterator = NULL;
        FCB *fcb;
        BTreeControlBlock *btcb;
 
-       bzero(&searchIterator, sizeof(searchIterator));
+       MALLOC (searchIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (searchIterator == NULL) {
+               return memFullErr;  // translates to ENOMEM
+       }
+
+       bzero(searchIterator, sizeof(*searchIterator));
+
 
        fcb = GetFileControlBlock(volume->catalogRefNum);
        btcb = (BTreeControlBlock *)fcb->fcbBTCBPtr;
@@ -135,14 +155,15 @@ LocateCatalogRecord(const ExtendedVCB *volume, HFSCatalogNodeID folderID, const
        btRecord.itemCount = 1;
        btRecord.itemSize = sizeof(CatalogRecord);
 
-       BuildCatalogKey(folderID, name, (volume->vcbSigWord == kHFSPlusSigWord), (CatalogKey *)&searchIterator.key);
+       BuildCatalogKey(folderID, name, (volume->vcbSigWord == kHFSPlusSigWord), (CatalogKey *)&searchIterator->key);
 
-       result = BTSearchRecord(fcb, &searchIterator, &btRecord, &tempSize, &searchIterator);
+       result = BTSearchRecord(fcb, searchIterator, &btRecord, &tempSize, searchIterator);
        if (result == noErr) {
-               *newHint = searchIterator.hint.nodeNum;
-               BlockMoveData(&searchIterator.key, keyPtr, CalcKeySize(btcb, &searchIterator.key));
+               *newHint = searchIterator->hint.nodeNum;
+               BlockMoveData(&searchIterator->key, keyPtr, CalcKeySize(btcb, &searchIterator->key));
        }
 
+       FREE (searchIterator, M_TEMP);
        return (result == btNotFound ? cmNotFound : result);
 }
 
index fee50fe6de2001cb795e415081fe6c6d4d7e02ae..29242d367f91ef9b226cb3fe1edae93683636f7f 100644 (file)
@@ -32,6 +32,9 @@
 #include       "../headers/FileMgrInternal.h"
 #include       "../headers/HFSUnicodeWrappers.h"
 #include       "../headers/CatalogPrivate.h"
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <libkern/libkern.h>
 
 
 struct ExtentsRecBuffer {
@@ -112,13 +115,13 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
        int16_t         numDestExtentBlocks;
        OSErr           err;
        Boolean         isHFSPlus = ( vcb->vcbSigWord == kHFSPlusSigWord );
-       
+
        err = BuildCatalogKeyUTF8(vcb, srcID, srcName, kUndefinedStrLen, &srcKey, NULL);
        ReturnIfError(err);
-       
+
        err = BuildCatalogKeyUTF8(vcb, destID, destName, kUndefinedStrLen, &destKey, NULL);
        ReturnIfError(err);
-       
+
        if ( isHFSPlus )
        {
                //--    Step 1: Check the catalog nodes for extents
@@ -126,27 +129,27 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                //--    locate the source file, test for extents in extent file, and copy the cat record for later
                err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
                ReturnIfError( err );
-               
+       
                if ( srcData.recordType != kHFSPlusFileRecord )
                        return( cmFThdDirErr );                                 //      Error "cmFThdDirErr = it is a directory"
-               
+                       
                //--    Check if there are any extents in the source file
                //\80\80    I am only checling the extents in the low 32 bits, routine will fail if files extents after 2 gig are in overflow
                numSrcExtentBlocks = CheckExtents( srcData.hfsPlusFile.dataFork.extents, srcData.hfsPlusFile.dataFork.totalBlocks, isHFSPlus );
                if ( numSrcExtentBlocks == 0 )                                  //      then check the resource fork extents
                        numSrcExtentBlocks = CheckExtents( srcData.hfsPlusFile.resourceFork.extents, srcData.hfsPlusFile.resourceFork.totalBlocks, isHFSPlus );
-               
+
                //--    Check if there are any extents in the destination file
                err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
                ReturnIfError( err );
-               
+       
                if ( destData.recordType != kHFSPlusFileRecord )
                        return( cmFThdDirErr );                                 //      Error "cmFThdDirErr = it is a directory"
-               
+
                numDestExtentBlocks = CheckExtents( destData.hfsPlusFile.dataFork.extents, destData.hfsPlusFile.dataFork.totalBlocks, isHFSPlus );
                if ( numDestExtentBlocks == 0 )                                 //      then check the resource fork extents
                        numDestExtentBlocks = CheckExtents( destData.hfsPlusFile.resourceFork.extents, destData.hfsPlusFile.resourceFork.totalBlocks, isHFSPlus );
-               
+
                //--    Step 2: Exchange the Extent key in the extent file
                
                //--    Exchange the extents key in the extent file
@@ -156,7 +159,7 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                if ( numSrcExtentBlocks && numDestExtentBlocks )        //      if both files have extents
                {
                        //--    Change the source extents file ids to our known bogus value
-                       err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+                       err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, kHFSBogusExtentFileID, 0,0, isHFSPlus );
                        if ( err != noErr )
                        {
                                if ( err != dskFulErr )
@@ -171,13 +174,13 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
-                       ExUndo2aPlus:   err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+
+ExUndo2aPlus:  err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                 err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );  //      Move the extents back
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+                                       
                                goto ExUndo1a;
                        }
                        
@@ -187,13 +190,13 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
+
                                err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                                err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );     //      Move the extents back
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+                                       
                                goto ExUndo2aPlus;
                        }
                        
@@ -205,10 +208,10 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
+
                                err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                                goto FlushAndReturn;
                        }
                }
@@ -219,14 +222,14 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
+
                                err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                                goto FlushAndReturn;
                        }
                }
-               
+
                //--    Step 3: Change the data in the catalog nodes
                
                //--    find the source cnode and put dest info in it
@@ -239,12 +242,12 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                
                err = ReplaceBTreeRecord( vcb->catalogRefNum, &srcKey, srcHint, &srcData, sizeof(HFSPlusCatalogFile), &srcHint );
                ReturnIfError( err );
-               
+
                //      find the destination cnode and put source info in it            
                err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
                if ( err != noErr )
                        return( cmBadNews );
-               
+                       
                CopyBigCatalogNodeInfo( &swapData, &destData );
                err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSPlusCatalogFile), &destHint );
                ReturnIfError( err );
@@ -256,10 +259,10 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                //--    locate the source file, test for extents in extent file, and copy the cat record for later
                err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
                ReturnIfError( err );
-               
+       
                if ( srcData.recordType != kHFSFileRecord )
                        return( cmFThdDirErr );                                 //      Error "cmFThdDirErr = it is a directory"
-               
+                       
                //--    Check if there are any extents in the source file
                numSrcExtentBlocks = CheckExtents( srcData.hfsFile.dataExtents, srcData.hfsFile.dataPhysicalSize / vcb->blockSize, isHFSPlus );
                if ( numSrcExtentBlocks == 0 )                                  //      then check the resource fork extents
@@ -268,21 +271,21 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                
                //\80\80    Do we save the found source node for later use?
                
-               
+                               
                //--    Check if there are any extents in the destination file
                err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
                ReturnIfError( err );
-               
+       
                if ( destData.recordType != kHFSFileRecord )
                        return( cmFThdDirErr );                                 //      Error "cmFThdDirErr = it is a directory"
-               
+
                numDestExtentBlocks = CheckExtents( destData.hfsFile.dataExtents, destData.hfsFile.dataPhysicalSize / vcb->blockSize, isHFSPlus );
                if ( numDestExtentBlocks == 0 )                                 //      then check the resource fork extents
                        numDestExtentBlocks = CheckExtents( destData.hfsFile.rsrcExtents, destData.hfsFile.rsrcPhysicalSize / vcb->blockSize, isHFSPlus );
-               
+                       
                //\80\80    Do we save the found destination node for later use?
-               
-               
+
+
                //--    Step 2: Exchange the Extent key in the extent file
                
                //--    Exchange the extents key in the extent file
@@ -292,15 +295,15 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                if ( numSrcExtentBlocks && numDestExtentBlocks )        //      if both files have extents
                {
                        //--    Change the source extents file ids to our known bogus value
-                       err = MoveExtents( vcb, srcData.hfsFile.fileID, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+        err = MoveExtents( vcb, srcData.hfsFile.fileID, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
                        if ( err != noErr )
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
-                       ExUndo1a:               err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+
+ExUndo1a:              err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                                err = FlushCatalog( vcb );                      //      flush the catalog
                                err = FlushExtentFile( vcb );                   //      flush the extent file (unneeded for common case, but it's cheap)                        
                                return( dskFulErr );
@@ -312,13 +315,13 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
-                       ExUndo2a:               err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
+
+ExUndo2a:              err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                 err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsFile.fileID, 0, 0, isHFSPlus );      //      Move the extents back
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+                                       
                                goto ExUndo1a;
                        }
                        
@@ -328,13 +331,13 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
+
                                err = DeleteExtents( vcb, destData.hfsFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                                err = MoveExtents( vcb, srcData.hfsFile.fileID, destData.hfsFile.fileID, 0, 0, isHFSPlus );     //      Move the extents back
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+                                       
                                goto ExUndo2a;
                        }
                        
@@ -346,10 +349,10 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
+
                                err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                                goto FlushAndReturn;
                        }
                }
@@ -360,14 +363,14 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                        {
                                if ( err != dskFulErr )
                                        return( err );
-                               
+
                                err = DeleteExtents( vcb, destData.hfsFile.fileID, 0, 0, isHFSPlus );
                                ReturnIfError( err );                                   //      we are doomed. Just QUIT!
-                               
+
                                goto FlushAndReturn;
                        }
                }
-               
+
                //--    Step 3: Change the data in the catalog nodes
                
                //--    find the source cnode and put dest info in it
@@ -381,23 +384,23 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
                
                err = ReplaceBTreeRecord( vcb->catalogRefNum, &srcKey, srcHint, &srcData, sizeof(HFSCatalogFile), &srcHint );
                ReturnIfError( err );
-               
+
                
                //      find the destination cnode and put source info in it            
                err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
                if ( err != noErr )
                        return( cmBadNews );
-               
+                       
                CopyCatalogNodeInfo( &swapData, &destData );
                err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSCatalogFile), &destHint );
                ReturnIfError( err );
        }
        
        err = noErr;
-       
+
        //--    Step 4: Error Handling section
-       
-       
+
+
 FlushAndReturn:
        err = FlushCatalog( vcb );                      //      flush the catalog
        err = FlushExtentFile( vcb );                   //      flush the extent file (unneeded for common case, but it's cheap)                        
@@ -430,23 +433,39 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
        ExtentsRecBuffer        extentsBuffer[kNumExtentsToCache];
        ExtentKey *                     extentKeyPtr;
        ExtentRecord            extentData;
-       BTreeIterator           btIterator;
+       struct BTreeIterator *btIterator = NULL;
+       struct BTreeIterator *tmpIterator = NULL;
        FSBufferDescriptor      btRecord;
        u_int16_t                       btKeySize;
        u_int16_t                       btRecordSize;
        int16_t                         i, j;
        OSErr                           err;
        
-       
+       MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (btIterator == NULL) {
+               return memFullErr;  // translates to ENOMEM
+       }
+
+
+       MALLOC (tmpIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (tmpIterator == NULL) {      
+               FREE (btIterator, M_TEMP);      
+               return memFullErr;  // translates to ENOMEM
+       }
+
+       bzero(btIterator, sizeof(*btIterator));
+       bzero (tmpIterator, sizeof(*tmpIterator));
+
+
        fcb = GetFileControlBlock(vcb->extentsRefNum);
        
-       (void) BTInvalidateHint(&btIterator);
-       extentKeyPtr = (ExtentKey*) &btIterator.key;
+       (void) BTInvalidateHint(btIterator);
+       extentKeyPtr = (ExtentKey*) &btIterator->key;
        btRecord.bufferAddress = &extentData;
        btRecord.itemCount = 1;
-       
+
        //--    Collect the extent records
-       
+
        //
        //      A search on the following key will cause the BTree to be positioned immediately
        //      before the first extent record for file #srcFileID, but not actually positioned
@@ -459,7 +478,7 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
        if (isHFSPlus) {
                btRecord.itemSize = sizeof(HFSPlusExtentRecord);
                btKeySize = sizeof(HFSPlusExtentKey);
-               
+
                extentKeyPtr->hfsPlus.keyLength  = kHFSPlusExtentKeyMaximumLength;
                extentKeyPtr->hfsPlus.forkType   = forkType;
                extentKeyPtr->hfsPlus.pad                = 0;
@@ -469,7 +488,7 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
        else {
                btRecord.itemSize = sizeof(HFSExtentRecord);
                btKeySize = sizeof(HFSExtentKey);
-               
+
                extentKeyPtr->hfs.keyLength      = kHFSExtentKeyMaximumLength;
                extentKeyPtr->hfs.forkType       = 0;
                extentKeyPtr->hfs.fileID         = srcFileID;
@@ -491,8 +510,8 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
        //      of BTIterateRecord.  We'd need to set up the key for BTSearchRecord to find the last record
        //      we found, so that BTIterateRecord would get the next one (the first we haven't processed).
        //
-       
-       err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator);
+
+       err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
        
        //      We expect a btNotFound here, since there shouldn't be an extent record with FABN = 0.
        if (err != btNotFound)
@@ -503,24 +522,28 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
                if (err == noErr)                       //      If we found such a bogus extent record, then the tree is really messed up
                        err = cmBadNews;                //      so return an error that conveys the disk is hosed.
                
+               FREE (tmpIterator, M_TEMP);     
+               FREE (btIterator, M_TEMP);
                return err;
        }
-       
+
        do
        {
                btRecord.bufferAddress = &extentData;
                btRecord.itemCount = 1;
-               
+
                for ( i=0 ; i<kNumExtentsToCache ; i++ )
                {
                        HFSCatalogNodeID        foundFileID;
                        
-                       err = BTIterateRecord(fcb, kBTreeNextRecord, &btIterator, &btRecord, &btRecordSize);
+                       err = BTIterateRecord(fcb, kBTreeNextRecord, btIterator, &btRecord, &btRecordSize);
                        if ( err == btNotFound )                //      Did we run out of extent records in the extents tree?
                                break;                                          //      if xkrFNum(A0) is cleared on this error, then this test is bogus!
-                       else if ( err != noErr )
+                       else if ( err != noErr ) {
+                               FREE (btIterator, M_TEMP);
+                               FREE (tmpIterator, M_TEMP);
                                return( err );                          //      must be ioError
-                       
+                       }
                        foundFileID = isHFSPlus ? extentKeyPtr->hfsPlus.fileID : extentKeyPtr->hfs.fileID;
                        if ( foundFileID == srcFileID ) {
                                /* Check if we need to quit early. */
@@ -537,39 +560,45 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
                        }
                }
                
+               
+               
                //--    edit each extent key, and reinsert each extent record in the extent file
                if (isHFSPlus)
                        btRecordSize = sizeof(HFSPlusExtentRecord);
                else
                        btRecordSize = sizeof(HFSExtentRecord);
-               
-               for ( j=0 ; j<i ; j++ ) {
-                       BTreeIterator tmpIterator;
-                       
+
+               for ( j=0 ; j<i ; j++ )
+               {
+
                        if (isHFSPlus)
                                extentsBuffer[j].extentKey.hfsPlus.fileID = destFileID; //      change only the id in the key to dest ID
                        else
                                extentsBuffer[j].extentKey.hfs.fileID = destFileID;     //      change only the id in the key to dest ID
-                       
+
                        // get iterator and buffer descriptor ready...
-                       (void) BTInvalidateHint(&tmpIterator);
-                       BlockMoveData(&(extentsBuffer[j].extentKey), &tmpIterator.key, btKeySize);
+                       (void) BTInvalidateHint(tmpIterator);
+                       BlockMoveData(&(extentsBuffer[j].extentKey), &tmpIterator->key, btKeySize);
                        btRecord.bufferAddress = &(extentsBuffer[j].extentData);
-                       
-                       err = BTInsertRecord(fcb, &tmpIterator, &btRecord, btRecordSize);
-                       if ( err != noErr )
-                       {                                                                       //      parse the error
+
+                       err = BTInsertRecord(fcb, tmpIterator, &btRecord, btRecordSize);
+                       if ( err != noErr ) {                                                           
+                               /* Parse the error and free iterators */
+                               FREE (btIterator, M_TEMP);
+                               FREE (tmpIterator, M_TEMP);
                                if ( err == btExists )
                                {
-                                       if ( DEBUG_BUILD )
-                                               DebugStr("Can't insert record -- already exists");
+                                       if ( DEBUG_BUILD ) {
+                                               DebugStr("Can't insert record -- already exists"); 
+                                       }
                                        return( cmBadNews );
                                }
-                               else
+                               else {
                                        return( err );
+                               }                       
                        }
                }
-               
+
                //--    okay, done with this buffered batch, go get the next set of extent records
                //      If our buffer is not full, we must be done, or recieved an error
                
@@ -582,6 +611,9 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
                }
        } while ( true );
        
+       FREE (tmpIterator, M_TEMP);
+       FREE (btIterator, M_TEMP);
+
        return( err );
 }
 
@@ -593,33 +625,47 @@ static void  CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffe
 }
 
 
-
-
 //--   Delete all extents in extent file that have the ID given.
 static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,  u_int8_t forkType, Boolean isHFSPlus )
 {
        FCB *                           fcb;
        ExtentKey *                     extentKeyPtr;
        ExtentRecord            extentData;
-       BTreeIterator           btIterator;
+       struct BTreeIterator *btIterator = NULL;
+       struct BTreeIterator *tmpIterator = NULL;
        FSBufferDescriptor      btRecord;
        u_int16_t                       btRecordSize;
        OSErr                           err;
-       
+
+
+       MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (btIterator == NULL) {
+               return memFullErr;  // translates to ENOMEM
+       }
+
+       MALLOC (tmpIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (tmpIterator == NULL) {      
+               FREE (btIterator, M_TEMP);      
+               return memFullErr;  // translates to ENOMEM
+       }
+
+       bzero(btIterator, sizeof(*btIterator));
+       bzero (tmpIterator, sizeof(*tmpIterator));
+
        fcb = GetFileControlBlock(vcb->extentsRefNum);
-       
-       (void) BTInvalidateHint(&btIterator);
-       extentKeyPtr = (ExtentKey*) &btIterator.key;
+
+       (void) BTInvalidateHint(btIterator);
+       extentKeyPtr = (ExtentKey*) &btIterator->key;
        btRecord.bufferAddress = &extentData;
        btRecord.itemCount = 1;
-       
+
        //      The algorithm is to position the BTree just before any extent records for fileID.
        //      Then just keep getting successive records.  If the record is still for fileID,
        //      then delete it.
        
        if (isHFSPlus) {
                btRecord.itemSize = sizeof(HFSPlusExtentRecord);
-               
+
                extentKeyPtr->hfsPlus.keyLength  = kHFSPlusExtentKeyMaximumLength;
                extentKeyPtr->hfsPlus.forkType   = forkType;
                extentKeyPtr->hfsPlus.pad                = 0;
@@ -628,14 +674,14 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,
        }
        else {
                btRecord.itemSize = sizeof(HFSExtentRecord);
-               
+
                extentKeyPtr->hfs.keyLength      = kHFSExtentKeyMaximumLength;
                extentKeyPtr->hfs.forkType       = forkType;
                extentKeyPtr->hfs.fileID         = fileID;
                extentKeyPtr->hfs.startBlock = 0;
        }
-       
-       err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator);
+
+       err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
        if ( err != btNotFound )
        {
                if (err == noErr) {             //      Did we find a bogus extent record?
@@ -644,18 +690,17 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,
                
                return err;                             //      Got some unexpected error, so return it
        }
-       
+
        do
        {
-               BTreeIterator           tmpIterator;
                HFSCatalogNodeID        foundFileID;
-               
-               err = BTIterateRecord(fcb, kBTreeNextRecord, &btIterator, &btRecord, &btRecordSize);
+
+               err = BTIterateRecord(fcb, kBTreeNextRecord, btIterator, &btRecord, &btRecordSize);
                if ( err != noErr )
                {
                        if (err == btNotFound)  //      If we hit the end of the BTree
                                err = noErr;            //              then it's OK
-                       
+                               
                        break;                                  //      We're done now.
                }
                
@@ -670,12 +715,15 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,
                        }
                }
                
-               tmpIterator = btIterator;
-               err = BTDeleteRecord( fcb, &tmpIterator );
+               *tmpIterator = *btIterator;
+               err = BTDeleteRecord( fcb, tmpIterator );
                if (err != noErr)
                        break;
        }       while ( true );
        
+       FREE (tmpIterator, M_TEMP);
+       FREE (btIterator, M_TEMP);
+
        return( err );
 }
 
index 5ce31fd3c572e6ba303be27b2386d81de6e90528..590fd9397fe00d69b3cbac48f00051a9610f82e4 100644 (file)
@@ -27,6 +27,9 @@
  */
 
 #include "../headers/BTreesPrivate.h"
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <libkern/libkern.h>
 
 
 // local routines
@@ -37,11 +40,16 @@ static Boolean      ValidHFSRecord(const void *record, const BTreeControlBlock *btcb,
 OSErr ReplaceBTreeRecord(FileReference refNum, const void* key, u_int32_t hint, void *newData, u_int16_t dataSize, u_int32_t *newHint)
 {
        FSBufferDescriptor      btRecord;
-       BTreeIterator           iterator;
+       struct BTreeIterator *iterator = NULL;
        FCB                                     *fcb;
        BTreeControlBlock       *btcb;
        OSStatus                        result;
 
+       MALLOC (iterator, struct BTreeIterator *, sizeof (struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               return memFullErr;  //translates to ENOMEM              
+       }
+       bzero (iterator, sizeof (*iterator));
 
        fcb = GetFileControlBlock(refNum);
        btcb = (BTreeControlBlock*) fcb->fcbBTCBPtr;
@@ -50,24 +58,25 @@ OSErr ReplaceBTreeRecord(FileReference refNum, const void* key, u_int32_t hint,
        btRecord.itemSize = dataSize;
        btRecord.itemCount = 1;
 
-       iterator.hint.nodeNum = hint;
+       iterator->hint.nodeNum = hint;
 
        result = CheckBTreeKey((const BTreeKey *) key, btcb);
-       ExitOnError(result);
+       if (result) {
+               goto ErrorExit;
+       }
 
-       BlockMoveData(key, &iterator.key, CalcKeySize(btcb, (const BTreeKey *) key));           //\80\80 should we range check against maxkeylen?
+       BlockMoveData(key, &iterator->key, CalcKeySize(btcb, (const BTreeKey *) key));          //\80\80 should we range check against maxkeylen?
 
        if ( DEBUG_BUILD && !ValidHFSRecord(newData, btcb, dataSize) )
                DebugStr("ReplaceBTreeRecord: bad record?");
 
-       result = BTReplaceRecord( fcb, &iterator, &btRecord, dataSize );
-
-       *newHint = iterator.hint.nodeNum;
+       result = BTReplaceRecord( fcb, iterator, &btRecord, dataSize );
 
-       //\80\80 do we need to invalidate the iterator?
+       *newHint = iterator->hint.nodeNum;
 
 ErrorExit:
 
+       FREE (iterator, M_TEMP);
        return result;
 }
 
index 998f97fa930b04bf808cd011b091c16ec0b80988..cae3db8e2a602f39cbd295fbf9ff581447982a8d 100644 (file)
@@ -225,7 +225,7 @@ static OSErr FindExtentRecord(
        u_int32_t                       *foundHint)
 {
        FCB *                           fcb;
-       BTreeIterator           btIterator;
+       struct BTreeIterator *btIterator = NULL;
        FSBufferDescriptor      btRecord;
        OSErr                           err;
        u_int16_t                       btRecordSize;
@@ -234,14 +234,18 @@ static OSErr FindExtentRecord(
        if (foundHint)
                *foundHint = 0;
        fcb = GetFileControlBlock(vcb->extentsRefNum);
-       
-       bzero(&btIterator, sizeof(btIterator));
+
+       MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (btIterator == NULL) {
+               return memFullErr;  // translates to ENOMEM
+       }
+       bzero(btIterator, sizeof(*btIterator));
 
        if (vcb->vcbSigWord == kHFSSigWord) {
                HFSExtentKey *          extentKeyPtr;
                HFSExtentRecord         extentData;
 
-               extentKeyPtr = (HFSExtentKey*) &btIterator.key;
+               extentKeyPtr = (HFSExtentKey*) &btIterator->key;
                extentKeyPtr->keyLength = kHFSExtentKeyMaximumLength;
                extentKeyPtr->forkType = forkType;
                extentKeyPtr->fileID = fileID;
@@ -251,10 +255,10 @@ static OSErr FindExtentRecord(
                btRecord.itemSize = sizeof(HFSExtentRecord);
                btRecord.itemCount = 1;
 
-               err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator);
+               err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
 
                if (err == btNotFound && allowPrevious) {
-                       err = BTIterateRecord(fcb, kBTreePrevRecord, &btIterator, &btRecord, &btRecordSize);
+                       err = BTIterateRecord(fcb, kBTreePrevRecord, btIterator, &btRecord, &btRecordSize);
 
                        //      A previous record may not exist, so just return btNotFound (like we would if
                        //      it was for the wrong file/fork).
@@ -298,7 +302,7 @@ static OSErr FindExtentRecord(
                HFSPlusExtentKey *      extentKeyPtr;
                HFSPlusExtentRecord     extentData;
 
-               extentKeyPtr = (HFSPlusExtentKey*) &btIterator.key;
+               extentKeyPtr = (HFSPlusExtentKey*) &btIterator->key;
                extentKeyPtr->keyLength  = kHFSPlusExtentKeyMaximumLength;
                extentKeyPtr->forkType   = forkType;
                extentKeyPtr->pad                = 0;
@@ -309,10 +313,10 @@ static OSErr FindExtentRecord(
                btRecord.itemSize = sizeof(HFSPlusExtentRecord);
                btRecord.itemCount = 1;
 
-               err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator);
+               err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
 
                if (err == btNotFound && allowPrevious) {
-                       err = BTIterateRecord(fcb, kBTreePrevRecord, &btIterator, &btRecord, &btRecordSize);
+                       err = BTIterateRecord(fcb, kBTreePrevRecord, btIterator, &btRecord, &btRecordSize);
 
                        //      A previous record may not exist, so just return btNotFound (like we would if
                        //      it was for the wrong file/fork).
@@ -336,7 +340,9 @@ static OSErr FindExtentRecord(
        }
 
        if (foundHint)
-               *foundHint = btIterator.hint.nodeNum;
+               *foundHint = btIterator->hint.nodeNum;
+
+       FREE(btIterator, M_TEMP);
        return err;
 }
 
@@ -348,7 +354,7 @@ static OSErr CreateExtentRecord(
        HFSPlusExtentRecord     extents,
        u_int32_t                       *hint)
 {
-       BTreeIterator btIterator;
+       struct BTreeIterator *btIterator = NULL;
        FSBufferDescriptor      btRecord;
        u_int16_t  btRecordSize;
        int  lockflags;
@@ -357,7 +363,11 @@ static OSErr CreateExtentRecord(
        err = noErr;
        *hint = 0;
 
-       bzero(&btIterator, sizeof(btIterator));
+       MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (btIterator == NULL) {
+               return memFullErr;  // translates to ENOMEM
+       }
+       bzero(btIterator, sizeof(*btIterator));
 
        /*
         * The lock taken by callers of ExtendFileC is speculative and
@@ -377,7 +387,7 @@ static OSErr CreateExtentRecord(
                btRecord.itemSize = btRecordSize;
                btRecord.itemCount = 1;
 
-               keyPtr = (HFSExtentKey*) &btIterator.key;
+               keyPtr = (HFSExtentKey*) &btIterator->key;
                keyPtr->keyLength       = kHFSExtentKeyMaximumLength;
                keyPtr->forkType        = key->forkType;
                keyPtr->fileID          = key->fileID;
@@ -391,19 +401,20 @@ static OSErr CreateExtentRecord(
                btRecord.itemSize = btRecordSize;
                btRecord.itemCount = 1;
 
-               BlockMoveData(key, &btIterator.key, sizeof(HFSPlusExtentKey));
+               BlockMoveData(key, &btIterator->key, sizeof(HFSPlusExtentKey));
        }
 
        if (err == noErr)
-               err = BTInsertRecord(GetFileControlBlock(vcb->extentsRefNum), &btIterator, &btRecord, btRecordSize);
+               err = BTInsertRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator, &btRecord, btRecordSize);
 
        if (err == noErr)
-               *hint = btIterator.hint.nodeNum;
+               *hint = btIterator->hint.nodeNum;
 
        (void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum));
        
        hfs_systemfile_unlock(vcb, lockflags);
-       
+
+       FREE (btIterator, M_TEMP);      
        return err;
 }
 
@@ -414,17 +425,21 @@ static OSErr DeleteExtentRecord(
        u_int32_t                       fileID,
        u_int32_t                       startBlock)
 {
-       BTreeIterator btIterator;
+       struct BTreeIterator *btIterator = NULL;
        OSErr                           err;
        
        err = noErr;
 
-       bzero(&btIterator, sizeof(btIterator));
+       MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+       if (btIterator == NULL) {
+               return memFullErr;  // translates to ENOMEM
+       }
+       bzero(btIterator, sizeof(*btIterator));
        
        if (vcb->vcbSigWord == kHFSSigWord) {
                HFSExtentKey *  keyPtr;
 
-               keyPtr = (HFSExtentKey*) &btIterator.key;
+               keyPtr = (HFSExtentKey*) &btIterator->key;
                keyPtr->keyLength       = kHFSExtentKeyMaximumLength;
                keyPtr->forkType        = forkType;
                keyPtr->fileID          = fileID;
@@ -433,7 +448,7 @@ static OSErr DeleteExtentRecord(
        else {          //      HFS Plus volume
                HFSPlusExtentKey *      keyPtr;
 
-               keyPtr = (HFSPlusExtentKey*) &btIterator.key;
+               keyPtr = (HFSPlusExtentKey*) &btIterator->key;
                keyPtr->keyLength       = kHFSPlusExtentKeyMaximumLength;
                keyPtr->forkType        = forkType;
                keyPtr->pad                     = 0;
@@ -441,9 +456,11 @@ static OSErr DeleteExtentRecord(
                keyPtr->startBlock      = startBlock;
        }
 
-       err = BTDeleteRecord(GetFileControlBlock(vcb->extentsRefNum), &btIterator);
+       err = BTDeleteRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator);
        (void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum));
        
+
+       FREE(btIterator, M_TEMP);
        return err;
 }
 
@@ -497,7 +514,6 @@ OSErr MapFileBlockC (
        //
        //      Determine the end of the available space.  It will either be the end of the extent,
        //      or the file's PEOF, whichever is smaller.
-       
        //
        dataEnd = (off_t)((off_t)(nextFABN) * (off_t)(allocBlockSize));   // Assume valid data through end of this extent
        if (((off_t)fcb->ff_blocks * (off_t)allocBlockSize) < dataEnd)    // Is PEOF shorter?
@@ -536,10 +552,13 @@ OSErr MapFileBlockC (
                if (tmpOff <= 0) {
                        return EINVAL;
                }
-               if (tmpOff > (off_t)(numberOfBytes))
+
+               if (tmpOff > (off_t)(numberOfBytes)) {
                        *availableBytes = numberOfBytes;  // more there than they asked for, so pin the output
-               else
+               }
+               else {
                        *availableBytes = tmpOff;
+               }
        }
 
        return noErr;
@@ -1890,7 +1909,7 @@ static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB  *fcb, int deleted,
                }
        }
        else {
-               BTreeIterator btIterator;
+               struct BTreeIterator *btIterator = NULL;
                FSBufferDescriptor btRecord;
                u_int16_t btRecordSize;
                FCB * btFCB;
@@ -1900,8 +1919,12 @@ static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB  *fcb, int deleted,
                //      Need to find and change a record in Extents BTree
                //
                btFCB = GetFileControlBlock(vcb->extentsRefNum);
-
-               bzero(&btIterator, sizeof(btIterator));
+               
+               MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+               if (btIterator == NULL) {
+                       return memFullErr;  // translates to ENOMEM
+               }
+               bzero(btIterator, sizeof(*btIterator));
 
                /*
                 * The lock taken by callers of ExtendFileC/TruncateFileC is
@@ -1916,49 +1939,51 @@ static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB  *fcb, int deleted,
                        HFSExtentKey *  key;                            // Actual extent key used on disk in HFS
                        HFSExtentRecord foundData;                      // The extent data actually found
 
-                       key = (HFSExtentKey*) &btIterator.key;
+                       key = (HFSExtentKey*) &btIterator->key;
                        key->keyLength  = kHFSExtentKeyMaximumLength;
                        key->forkType   = extentFileKey->forkType;
                        key->fileID             = extentFileKey->fileID;
                        key->startBlock = extentFileKey->startBlock;
 
-                       btIterator.hint.index = 0;
-                       btIterator.hint.nodeNum = extentBTreeHint;
+                       btIterator->hint.index = 0;
+                       btIterator->hint.nodeNum = extentBTreeHint;
 
                        btRecord.bufferAddress = &foundData;
                        btRecord.itemSize = sizeof(HFSExtentRecord);
                        btRecord.itemCount = 1;
 
-                       err = BTSearchRecord(btFCB, &btIterator, &btRecord, &btRecordSize, &btIterator);
+                       err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator);
                        
                        if (err == noErr)
                                err = HFSPlusToHFSExtents(extentData, (HFSExtentDescriptor *)&foundData);
 
                        if (err == noErr)
-                               err = BTReplaceRecord(btFCB, &btIterator, &btRecord, btRecordSize);
+                               err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize);
                        (void) BTFlushPath(btFCB);
                }
                else {          //      HFS Plus volume
                        HFSPlusExtentRecord     foundData;              // The extent data actually found
 
-                       BlockMoveData(extentFileKey, &btIterator.key, sizeof(HFSPlusExtentKey));
+                       BlockMoveData(extentFileKey, &btIterator->key, sizeof(HFSPlusExtentKey));
 
-                       btIterator.hint.index = 0;
-                       btIterator.hint.nodeNum = extentBTreeHint;
+                       btIterator->hint.index = 0;
+                       btIterator->hint.nodeNum = extentBTreeHint;
 
                        btRecord.bufferAddress = &foundData;
                        btRecord.itemSize = sizeof(HFSPlusExtentRecord);
                        btRecord.itemCount = 1;
 
-                       err = BTSearchRecord(btFCB, &btIterator, &btRecord, &btRecordSize, &btIterator);
+                       err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator);
        
                        if (err == noErr) {
                                BlockMoveData(extentData, &foundData, sizeof(HFSPlusExtentRecord));
-                               err = BTReplaceRecord(btFCB, &btIterator, &btRecord, btRecordSize);
+                               err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize);
                        }
                        (void) BTFlushPath(btFCB);
                }
                hfs_systemfile_unlock(vcb, lockflags);
+
+               FREE(btIterator, M_TEMP);
        }
        
        return err;
index de2858418868f0ffd69a8ed4d1e0e3eedf98f3c6..a8a874c90c44cbf6fe4908e2ca48520c8c158e1c 100644 (file)
@@ -32,7 +32,7 @@
 
        Version:        HFS Plus 1.0
 
-       Copyright:      Ã”øΩ 1996-2009 by Apple Computer, Inc., all rights reserved.
+       Copyright:      ï¿½ 1996-2009 by Apple Computer, Inc., all rights reserved.
 
 */
 
@@ -70,6 +70,10 @@ Public routines:
                                        filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
                                        know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
                                        number of items in the tree that we can allocate from.
+
+       UnmapBlocks     
+                                       Issues DKIOCUNMAPs to the device as it fills the internal volume buffer when iterating
+                                       the volume bitmap.
  
 Internal routines:
        Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE.  This
@@ -141,6 +145,16 @@ Internal routines:
 
        ReleaseBitmapBlock
                                        Release a bitmap block back into the buffer cache.
+       
+       remove_free_extent_cache
+                                       Remove an extent from the free extent cache.  Handles overlaps
+                                       with multiple extents in the cache, and handles splitting an
+                                       extent in the cache if the extent to be removed is in the middle
+                                       of a cached extent.
+       
+       add_free_extent_cache
+                                       Add an extent to the free extent cache.  It will merge the
+                                       input extent with extents already in the cache.
  
  
 Debug/Test Routines
@@ -204,6 +218,8 @@ Red Black Tree Specific Routines
 #include <sys/ubc.h>
 #include <sys/uio.h>
 #include <kern/kalloc.h>
+/* For VM Page size */
+#include <libkern/libkern.h>
 
 #include "../../hfs.h"
 #include "../../hfs_dbg.h"
@@ -214,6 +230,10 @@ Red Black Tree Specific Routines
 #include "../headers/HybridAllocator.h"
 #include "../../hfs_kdebug.h"
 
+/* Headers for unmap-on-mount support */
+#include <vfs/vfs_journal.h>
+#include <sys/disk.h>
+
 #ifndef CONFIG_HFS_TRIM
 #define CONFIG_HFS_TRIM 0
 #endif
@@ -339,10 +359,25 @@ static OSErr BlockMarkFreeInternal(
        u_int32_t       numBlocks, 
        Boolean         do_validate);
 
-#if CONFIG_HFS_ALLOC_RBTREE
 
-static OSErr ReleaseRBScanBitmapBlock( struct buf *bp );
+static OSErr ReleaseScanBitmapBlock( struct buf *bp );
+
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset, 
+                             u_int32_t numBlocks, struct jnl_trim_list *list);
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
+
+static int hfs_alloc_scan_block(struct hfsmount *hfsmp, 
+                                                               u_int32_t startbit, 
+                                                               u_int32_t endBit, 
+                                                               u_int32_t *bitToScan,
+                                struct jnl_trim_list *list);
+
+int hfs_isallocated_scan (struct hfsmount *hfsmp,
+                                                                u_int32_t startingBlock,
+                                                                u_int32_t *bp_buf);
 
+#if CONFIG_HFS_ALLOC_RBTREE
 static OSErr BlockAllocateAnyRBTree(
        ExtendedVCB             *vcb,
        u_int32_t               startingBlock,
@@ -390,15 +425,6 @@ void check_rbtree_extents (struct hfsmount *hfsmp,
        u_int32_t numBlocks,
        int shouldBeFree);
 
-int hfs_isallocated_scan (struct hfsmount *hfsmp,
-                                                                u_int32_t startingBlock,
-                                                                u_int32_t *bp_buf);
-
-static int hfs_alloc_scan_block(struct hfsmount *hfsmp, 
-                                                               u_int32_t startbit, 
-                                                               u_int32_t endBit, 
-                                                               u_int32_t *bitToScan);
-
 #define ASSERT_FREE 1
 #define ASSERT_ALLOC 0
                                                                
@@ -410,19 +436,13 @@ static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc
 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
 
 #if ALLOC_DEBUG
-/* 
- * Extra #includes for the debug function below.  These are not normally #included because
- * they would constitute a layering violation
- */
-#include <vfs/vfs_journal.h>
-#include <sys/disk.h>
-
 /*
  * Validation Routine to verify that the TRIM list maintained by the journal
  * is in good shape relative to what we think the bitmap should have.  We should
  * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
  * we panic.  
  */
+int trim_validate_bitmap (struct hfsmount *hfsmp);
 int trim_validate_bitmap (struct hfsmount *hfsmp) {
        u_int64_t blockno_offset;
        u_int64_t numblocks;
@@ -459,43 +479,60 @@ int trim_validate_bitmap (struct hfsmount *hfsmp) {
 
 #endif
 
+
 /*
-;________________________________________________________________________________
-;
-; Routine:             hfs_unmap_free_extent
-;
-; Function:            Make note of a range of allocation blocks that should be
-                             unmapped (trimmed).  That is, the given range of blocks no
-                             longer have useful content, and the device can unmap the
-                             previous contents.  For example, a solid state disk may reuse
-                             the underlying storage for other blocks.
-;
-                             This routine is only supported for journaled volumes.  The extent
-                             being freed is passed to the journal code, and the extent will
-                             be unmapped after the current transaction is written to disk.
-;
-; Input Arguments:
-     hfsmp                   - The volume containing the allocation blocks.
-     startingBlock   - The first allocation block of the extent being freed.
-     numBlocks               - The number of allocation blocks of the extent being freed.
-;________________________________________________________________________________
-*/
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_unmap_free_extent
+ ;
+ ; Function:           Make note of a range of allocation blocks that should be
+ ;                             unmapped (trimmed).  That is, the given range of blocks no
+ ;                             longer have useful content, and the device can unmap the
+ ;                             previous contents.  For example, a solid state disk may reuse
+ ;                             the underlying storage for other blocks.
+ ;
+ ;                             This routine is only supported for journaled volumes.  The extent
+ ;                             being freed is passed to the journal code, and the extent will
+ ;                             be unmapped after the current transaction is written to disk.
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;     startingBlock   - The first allocation block of the extent being freed.
+ ;     numBlocks               - The number of allocation blocks of the extent being freed.
+ ;________________________________________________________________________________
+ */
 static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
 {
        u_int64_t offset;
        u_int64_t length;
-       int err;
-       
+       u_int64_t device_sz;
+       int err = 0;
+                       
        if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
        
+       if (ALLOC_DEBUG) {
+               if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+                       panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
+               }
+       }
+       
        if (hfsmp->jnl != NULL) {
+               device_sz = hfsmp->hfs_logical_bytes;
                offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
                length = (u_int64_t) numBlocks * hfsmp->blockSize;
 
-               err = journal_trim_add_extent(hfsmp->jnl, offset, length);
-               if (err) {
-                       printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
+               /* Validate that the trim is in a valid range of bytes */
+               if ((offset >= device_sz) || ((offset + length) > device_sz)) {
+                       printf("hfs_unmap_free_ext: ignoring trim @ off %lld len %lld \n", offset, length);
+                       err = EINVAL;
+               }
+
+               if (err == 0) {
+                       err = journal_trim_add_extent(hfsmp->jnl, offset, length);
+                       if (err) {
+                               printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
+                       }
                }
        }
        
@@ -504,22 +541,107 @@ static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBloc
 }
 
 
+
 /*
-;________________________________________________________________________________
-;
-; Routine:             hfs_unmap_alloc_extent
-;
-; Function:            Make note of a range of allocation blocks, some of
-;                              which may have previously been passed to hfs_unmap_free_extent,
-;                              is now in use on the volume.  The given blocks will be removed
-;                              from any pending DKIOCUNMAP.
-;
-; Input Arguments:
-;      hfsmp                   - The volume containing the allocation blocks.
-;      startingBlock   - The first allocation block of the extent being allocated.
-;      numBlocks               - The number of allocation blocks being allocated.
-;________________________________________________________________________________
-*/
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_track_unmap_blocks
+ ;
+ ; Function:   Make note of a range of allocation blocks that should be
+ ;                             unmapped (trimmed).  That is, the given range of blocks no
+ ;                             longer have useful content, and the device can unmap the
+ ;                             previous contents.  For example, a solid state disk may reuse
+ ;                             the underlying storage for other blocks.
+ ;
+ ;                             This routine is only supported for journaled volumes.  
+ ; 
+ ;              *****NOTE*****: 
+ ;              This function should *NOT* be used when the volume is fully 
+ ;              mounted.  This function is intended to support a bitmap iteration
+ ;              at mount time to fully inform the SSD driver of the state of all blocks
+ ;              at mount time, and assumes that there is no allocation/deallocation
+ ;              interference during its iteration.,
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;     offset          - The first allocation block of the extent being freed.
+ ;     numBlocks               - The number of allocation blocks of the extent being freed.
+ ;  list            - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, 
+                              u_int32_t numBlocks, struct jnl_trim_list *list) {
+    
+    u_int64_t offset;
+    u_int64_t length;
+    int error = 0;
+    
+    if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
+        int extent_no = list->extent_count;
+        offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+        length = (u_int64_t) numBlocks * hfsmp->blockSize;
+        
+        
+        list->extents[extent_no].offset = offset;
+        list->extents[extent_no].length = length;
+        list->extent_count++;
+        if (list->extent_count == list->allocated_count) {
+            error = hfs_issue_unmap (hfsmp, list);
+        }
+    }
+
+    return error;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_issue_unmap
+ ;
+ ; Function:   Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;  list            - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) {
+    dk_unmap_t unmap;
+    int error = 0;
+    
+    if (list->extent_count > 0) {
+        bzero(&unmap, sizeof(unmap));
+        unmap.extents = list->extents;
+        unmap.extentsCount = list->extent_count;
+        
+        /* Issue a TRIM and flush them out */
+        error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
+        
+        bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
+        list->extent_count = 0;
+    }
+    return error;
+}
+
+
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_unmap_alloc_extent
+ ;
+ ; Function:           Make note of a range of allocation blocks, some of
+ ;                             which may have previously been passed to hfs_unmap_free_extent,
+ ;                             is now in use on the volume.  The given blocks will be removed
+ ;                             from any pending DKIOCUNMAP.
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;     startingBlock   - The first allocation block of the extent being allocated.
+ ;     numBlocks               - The number of allocation blocks being allocated.
+ ;________________________________________________________________________________
+ */
 static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
 {
        u_int64_t offset;
@@ -594,6 +716,64 @@ hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
 }
 
 
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            UnmapBlocks
+ ;
+ ; Function:   Traverse the bitmap, and issue DKIOCUNMAPs to the underlying
+ ;                             device as needed so that the underlying disk device is as
+ ;                             up-to-date as possible with which blocks are unmapped.
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;________________________________________________________________________________
+ */
+
+__private_extern__
+u_int32_t UnmapBlocks (struct hfsmount *hfsmp) {
+       u_int32_t blocks_scanned = 0;
+       int error = 0;
+    struct jnl_trim_list trimlist;
+    
+    /*
+     *struct jnl_trim_list {
+     uint32_t    allocated_count;
+     uint32_t    extent_count;
+     dk_extent_t *extents;
+     };
+    */
+    bzero (&trimlist, sizeof(trimlist));
+    if (CONFIG_HFS_TRIM) {
+        int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
+        void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
+        if (extents == NULL) {
+            return ENOMEM;
+        }
+        trimlist.extents = (dk_extent_t*)extents;
+        trimlist.allocated_count = alloc_count;
+        trimlist.extent_count = 0;
+        
+        
+        
+        while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
+            error = hfs_alloc_scan_block (hfsmp, blocks_scanned, hfsmp->totalBlocks, 
+                                          &blocks_scanned, &trimlist);
+            if (error) {
+                printf("HFS: bitmap unmap scan error: %d\n", error);
+                break;
+            }
+        }
+        if (error == 0) {
+            hfs_issue_unmap(hfsmp, &trimlist);
+        }
+        if (trimlist.extents) {
+            kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
+        }
+       }
+       return error;
+}
+
 /*
  ;________________________________________________________________________________
  ;
@@ -1256,16 +1436,15 @@ static OSErr ReleaseBitmapBlock(
        return (0);
 }
 
-#if CONFIG_HFS_ALLOC_RBTREE
 /*
- * ReleaseRBScanBitmapBlock is used to release struct bufs that were 
- * created for use by the Red-Black tree generation code.  We want to force 
+ * ReleaseScanBitmapBlock is used to release struct bufs that were 
+ * created for use by bitmap scanning code.  We want to force 
  * them to be purged out of the buffer cache ASAP, so we'll release them differently
  * than in the ReleaseBitmapBlock case.  Alternately, we know that we're only reading 
  * the blocks, so we will never dirty them as part of the tree building scan.
  */
 
-static OSErr ReleaseRBScanBitmapBlock(struct buf *bp ) {
+static OSErr ReleaseScanBitmapBlock(struct buf *bp ) {
        
        if (bp == NULL) {
                return (0);
@@ -1284,9 +1463,6 @@ static OSErr ReleaseRBScanBitmapBlock(struct buf *bp ) {
        
 }
 
-#endif
-
-
 /*
 _______________________________________________________________________
 
@@ -1906,9 +2082,9 @@ Exit:
                *actualStartBlock = 0;
                *actualNumBlocks = 0;
        }
-       
-    if (currCache)
-       (void) ReleaseBitmapBlock(vcb, blockRef, dirty);
+
+       if (currCache)
+               (void) ReleaseBitmapBlock(vcb, blockRef, dirty);
 
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
@@ -1945,9 +2121,7 @@ static OSErr BlockAllocateKnown(
        u_int32_t               *actualNumBlocks)
 {
        OSErr                   err;    
-       u_int32_t               i;
        u_int32_t               foundBlocks;
-       u_int32_t               newStartBlock, newBlockCount;
 
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
@@ -1975,59 +2149,10 @@ static OSErr BlockAllocateKnown(
                foundBlocks = maxBlocks;
        *actualNumBlocks = foundBlocks;
        
-       if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
-               // since sparse volumes keep the free extent list sorted by starting
-               // block number, the list won't get re-ordered, it may only shrink
-               //
-               vcb->vcbFreeExt[0].startBlock += foundBlocks;
-               vcb->vcbFreeExt[0].blockCount -= foundBlocks;
-               if (vcb->vcbFreeExt[0].blockCount == 0) {
-                       for(i=1; i < vcb->vcbFreeExtCnt; i++) {
-                               vcb->vcbFreeExt[i-1] = vcb->vcbFreeExt[i];
-                       }
-                       vcb->vcbFreeExtCnt--;
-               }
-
-               goto done;
-       }
+       lck_spin_unlock(&vcb->vcbFreeExtLock);
 
-       //      Adjust the start and length of that extent.
-       newStartBlock = vcb->vcbFreeExt[0].startBlock + foundBlocks;
-       newBlockCount = vcb->vcbFreeExt[0].blockCount - foundBlocks;
-               
+       remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks);
        
-       //      The first extent might not be the largest anymore.  Bubble up any
-       //      (now larger) extents to the top of the list.
-       for (i=1; i<vcb->vcbFreeExtCnt; ++i)
-       {
-               if (vcb->vcbFreeExt[i].blockCount > newBlockCount)
-               {
-                       vcb->vcbFreeExt[i-1].startBlock = vcb->vcbFreeExt[i].startBlock;
-                       vcb->vcbFreeExt[i-1].blockCount = vcb->vcbFreeExt[i].blockCount;
-               }
-               else
-               {
-                       break;
-               }
-       }
-       
-       //      If this is now the smallest known free extent, then it might be smaller than
-       //      other extents we didn't keep track of.  So, just forget about this extent.
-       //      After the previous loop, (i-1) is the index of the extent we just allocated from.
-       if (newBlockCount == 0)
-       {
-               // then just reduce the number of free extents since this guy got deleted
-               --vcb->vcbFreeExtCnt;
-       }
-       else
-       {
-               //      It's not the smallest, so store it in its proper place
-               vcb->vcbFreeExt[i-1].startBlock = newStartBlock;
-               vcb->vcbFreeExt[i-1].blockCount = newBlockCount;
-       }
-
-done:
-       lck_spin_unlock(&vcb->vcbFreeExtLock);
        // sanity check
        if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 
        {
@@ -2546,21 +2671,24 @@ OSErr BlockMarkFreeInternal(
        register u_int32_t      numBlocks_in,
        Boolean                 do_validate)
 {
-       OSErr                   err;
+       OSErr           err;
        u_int32_t       startingBlock = startingBlock_in;
        u_int32_t       numBlocks = numBlocks_in;
-       register u_int32_t      *currentWord;   //      Pointer to current word within bitmap block
-       register u_int32_t      wordsLeft;              //      Number of words left in this bitmap block
-       register u_int32_t      bitMask;                //      Word with given bits already set (ready to OR in)
-       u_int32_t                       firstBit;               //      Bit index within word of first bit to allocate
-       u_int32_t                       numBits;                //      Number of bits in word to allocate
-       u_int32_t                       *buffer = NULL;
-       uintptr_t  blockRef;
-       u_int32_t  bitsPerBlock;
-       u_int32_t  wordsPerBlock;
+       uint32_t        unmapStart = startingBlock_in;
+       uint32_t        unmapCount = numBlocks_in;
+       uint32_t        wordIndexInBlock;
+       u_int32_t       *currentWord;   //      Pointer to current word within bitmap block
+       u_int32_t       wordsLeft;              //      Number of words left in this bitmap block
+       u_int32_t       bitMask;                //      Word with given bits already set (ready to OR in)
+       u_int32_t       currentBit;             //      Bit index within word of current bit to allocate
+       u_int32_t       numBits;                //      Number of bits in word to allocate
+       u_int32_t       *buffer = NULL;
+       uintptr_t       blockRef;
+       u_int32_t       bitsPerBlock;
+       u_int32_t       wordsPerBlock;
     // XXXdbg
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
-
+       
        if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
 
@@ -2579,30 +2707,46 @@ OSErr BlockMarkFreeInternal(
                err = EIO;
                goto Exit;
        }
-
+       
        //
        //      Pre-read the bitmap block containing the first word of allocation
        //
-
+       
        err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
        if (err != noErr) goto Exit;
        // XXXdbg
        if (hfsmp->jnl) {
                journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
        }
-
+       
        //
-       //      Initialize currentWord, and wordsLeft.
+       //      Figure out how many bits and words per bitmap block.
        //
-       {
-               u_int32_t wordIndexInBlock;
+       bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
+       wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+       wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+       
+       //
+       // Look for a range of free blocks immediately before startingBlock
+       // (up to the start of the current bitmap block).  Set unmapStart to
+       // the first free block.
+       //
+       currentWord = buffer + wordIndexInBlock;
+       currentBit = startingBlock % kBitsPerWord;
+       bitMask = kHighBitInWordMask >> currentBit;
+       while (true) {
+               // Move currentWord/bitMask back by one bit
+               bitMask <<= 1;
+               if (bitMask == 0) {
+                       if (--currentWord < buffer)
+                               break;
+                       bitMask = kLowBitInWordMask;
+               }
                
-               bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
-               wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
-
-               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
-               currentWord = buffer + wordIndexInBlock;
-               wordsLeft = wordsPerBlock - wordIndexInBlock;
+               if (*currentWord & SWAP_BE32(bitMask))
+                       break;  // Found an allocated block.  Stop searching.
+               --unmapStart;
+               ++unmapCount;
        }
        
        //
@@ -2610,14 +2754,16 @@ OSErr BlockMarkFreeInternal(
        //      boundary in the bitmap, then treat that first word
        //      specially.
        //
-
-       firstBit = startingBlock % kBitsPerWord;
-       if (firstBit != 0) {
-               bitMask = kAllBitsSetInWord >> firstBit;        //      turn off all bits before firstBit
-               numBits = kBitsPerWord - firstBit;                      //      number of remaining bits in this word
+       
+       currentWord = buffer + wordIndexInBlock;
+       wordsLeft = wordsPerBlock - wordIndexInBlock;
+       currentBit = startingBlock % kBitsPerWord;
+       if (currentBit != 0) {
+               bitMask = kAllBitsSetInWord >> currentBit;      //      turn off all bits before currentBit
+               numBits = kBitsPerWord - currentBit;            //      number of remaining bits in this word
                if (numBits > numBlocks) {
                        numBits = numBlocks;                                    //      entire allocation is inside this one word
-                       bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));        //      turn off bits after last
+                       bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits));      //      turn off bits after last
                }
                if ((do_validate == true) && 
                    (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
@@ -2625,15 +2771,15 @@ OSErr BlockMarkFreeInternal(
                }
                *currentWord &= SWAP_BE32 (~bitMask);           //      clear the bits in the bitmap
                numBlocks -= numBits;                                           //      adjust number of blocks left to free
-
+               
                ++currentWord;                                                          //      move to next word
                --wordsLeft;                                                            //      one less word left in this block
        }
-
+       
        //
        //      Free whole words (32 blocks) at a time.
        //
-
+       
        while (numBlocks >= kBitsPerWord) {
                if (wordsLeft == 0) {
                        //      Read in the next bitmap block
@@ -2642,15 +2788,15 @@ OSErr BlockMarkFreeInternal(
                        buffer = NULL;
                        err = ReleaseBitmapBlock(vcb, blockRef, true);
                        if (err != noErr) goto Exit;
-
+                       
                        err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
                        if (err != noErr) goto Exit;
-
+                       
                        // XXXdbg
                        if (hfsmp->jnl) {
                                journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
                        }
-
+                       
                        //      Readjust currentWord and wordsLeft
                        currentWord = buffer;
                        wordsLeft = wordsPerBlock;
@@ -2663,7 +2809,7 @@ OSErr BlockMarkFreeInternal(
                numBlocks -= kBitsPerWord;
                
                ++currentWord;                                                          //      move to next word
-               --wordsLeft;                                                            //      one less word left in this block
+               --wordsLeft;                                                                    //      one less word left in this block
        }
        
        //
@@ -2679,10 +2825,10 @@ OSErr BlockMarkFreeInternal(
                        buffer = NULL;
                        err = ReleaseBitmapBlock(vcb, blockRef, true);
                        if (err != noErr) goto Exit;
-
+                       
                        err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
                        if (err != noErr) goto Exit;
-
+                       
                        // XXXdbg
                        if (hfsmp->jnl) {
                                journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
@@ -2697,35 +2843,60 @@ OSErr BlockMarkFreeInternal(
                        goto Corruption;
                }
                *currentWord &= SWAP_BE32 (~bitMask);                   //      clear the bits in the bitmap
-
+               
                //      No need to update currentWord or wordsLeft
        }
-
+       
+       //
+       // Look for a range of free blocks immediately after the range we just freed
+       // (up to the end of the current bitmap block).
+       //
+       wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
+       wordsLeft = wordsPerBlock - wordIndexInBlock;
+       currentWord = buffer + wordIndexInBlock;
+       currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
+       bitMask = kHighBitInWordMask >> currentBit;
+       while (true) {
+               // Move currentWord/bitMask/wordsLeft forward one bit
+               bitMask >>= 1;
+               if (bitMask == 0) {
+                       if (--wordsLeft == 0)
+                               break;
+                       ++currentWord;
+                       bitMask = kHighBitInWordMask;
+               }
+               
+               if (*currentWord & SWAP_BE32(bitMask))
+                       break;  // Found an allocated block.  Stop searching.
+               ++unmapCount;
+       }
+       
 Exit:
-
+       
        if (buffer)
                (void)ReleaseBitmapBlock(vcb, blockRef, true);
-
+       
        if (err == noErr) {
-               hfs_unmap_free_extent(vcb, startingBlock_in, numBlocks_in);
+               hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
        }
 
        if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
 
        return err;
-
+       
 Corruption:
 #if DEBUG_BUILD
        panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
 #else
-       printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks (%u,%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
+       printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
        hfs_mark_volume_inconsistent(vcb);
        err = EIO;
        goto Exit;
 #endif
 }
 
+
 #if CONFIG_HFS_ALLOC_RBTREE
 /*
  * This is a wrapper function around BlockMarkFree.  This function is
@@ -3628,104 +3799,31 @@ hfs_isrbtree_active(struct hfsmount *hfsmp){
        return 0;
 }
 
-#if CONFIG_HFS_ALLOC_RBTREE
-/*
- * This function is basically the same as hfs_isallocated, except it's designed for 
- * use with the red-black tree validation code.  It assumes we're only checking whether
- * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
- * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
+
+/* 
+ * This function scans the specified bitmap block and acts on it as necessary.
+ * We may add it to the list of blocks to be UNMAP/TRIM'd or add it to allocator
+ * data structures.  This function is not #if'd to the CONFIG_RB case because
+ * we want to use it unilaterally at mount time if on a UNMAP-capable device.
+ * 
+ * Additionally, we may want an allocating thread to invoke this if the tree 
+ * does not have enough extents to satisfy an allocation request.
+ * 
+ * startbit            - the allocation block represented by a bit in 'allocblock' where we need to
+ *                             start our scan.  For instance, we may need to start the normal allocation scan
+ *                             in the middle of an existing allocation block.
+ * endBit              - the allocation block where we should end this search (inclusive).
+ * bitToScan   - output argument for this function to specify the next bit to scan.
  *
- * This should not be called in general purpose scanning code.
- */
-int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
-       
-       u_int32_t  *currentWord;   // Pointer to current word within bitmap block
-       u_int32_t  bitMask;        // Word with given bits already set (ready to test)
-       u_int32_t  firstBit;       // Bit index within word of first bit to allocate
-       u_int32_t  numBits;        // Number of bits in word to allocate
-       u_int32_t  bitsPerBlock;
-       uintptr_t  blockRef;
-       u_int32_t  wordsPerBlock;
-       u_int32_t  numBlocks = 1;
-       u_int32_t  *buffer = NULL;
-
-       int  inuse = 0;
-       int error;
-       
-       
-       if (bp_buf) {
-               /* just use passed-in buffer if avail. */
-               buffer = bp_buf;
-       }
-       else {
-               /*
-                * Pre-read the bitmap block containing the first word of allocation
-                */
-               error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
-               if (error)
-                       return (error);
-       }
-       
-       /*
-        * Initialize currentWord, and wordsLeft.
-        */
-       u_int32_t wordIndexInBlock;
-       
-       bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
-       wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
-       
-       wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
-       currentWord = buffer + wordIndexInBlock;
-               
-       /*
-        * First test any non word aligned bits.
-        */
-       firstBit = startingBlock % kBitsPerWord;
-       bitMask = kAllBitsSetInWord >> firstBit;
-       numBits = kBitsPerWord - firstBit;
-       if (numBits > numBlocks) {
-               numBits = numBlocks;
-               bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
-       }
-       if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-               inuse = 1;
-               goto Exit;
-       }
-       numBlocks -= numBits;
-       ++currentWord;
-       
-Exit:
-       if(bp_buf == NULL) {
-               if (buffer) {
-                       (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
-               }
-       }
-       return (inuse);
-       
-       
-       
-}
-
-/* 
- * This function scans the specified block and adds it to the pair of trees specified 
- * in its arguments.  We break this behavior out of GenerateTree so that an allocating
- * thread can invoke this if the tree does not have enough extents to satisfy 
- * an allocation request.
- * 
- * startbit            - the allocation block represented by a bit in 'allocblock' where we need to
- *                             start our scan.  For instance, we may need to start the normal allocation scan
- *                             in the middle of an existing allocation block.
- * endBit              - the allocation block where we should end this search (inclusive).
- * bitToScan   - output argument for this function to specify the next bit to scan.
- *
- * Returns:
- *             0 on success
- *             nonzero on failure. 
+ * Returns:
+ *             0 on success
+ *             nonzero on failure. 
  */
 
 static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit, 
-                                                u_int32_t endBit, u_int32_t *bitToScan) {
-
+                                u_int32_t endBit, u_int32_t *bitToScan, 
+                                struct jnl_trim_list *list) {
+    
        int error;
        u_int32_t curAllocBlock;
        struct buf *blockRef = NULL;
@@ -3735,7 +3833,7 @@ static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit,
        u_int32_t wordsPerBlock = blockSize / kBytesPerWord; 
        u_int32_t offset = 0;
        u_int32_t size = 0;
-
+    
        /* 
         * Read the appropriate block from the bitmap file.  ReadBitmapBlock
         * figures out which actual on-disk block corresponds to the bit we're 
@@ -3748,7 +3846,7 @@ static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit,
        
        /* curAllocBlock represents the logical block we're analyzing. */
        curAllocBlock = startbit;       
-
+    
        /*  Figure out which word curAllocBlock corresponds to in the block we read  */
        wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock;
        
@@ -3784,9 +3882,12 @@ static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit,
                                 * we saw, and reset our tally counter.
                                 */
                                if (size != 0) {
+#if CONFIG_HFS_ALLOC_RBTREE
                                        extent_tree_free_space(&hfsmp->offset_tree, size, offset);      
-                                       size = 0;
-                                       offset = 0;
+#endif
+                    hfs_track_unmap_blocks (hfsmp, offset, size, list);                    
+                    size = 0;
+                    offset = 0;
                                }
                        }
                        curAllocBlock++;
@@ -3804,7 +3905,10 @@ DoneScanning:
        
        /* We may have been tracking a range of free blocks that hasn't been inserted yet. */
        if (size != 0) {
-               extent_tree_free_space(&hfsmp->offset_tree, size, offset);      
+#if CONFIG_HFS_ALLOC_RBTREE
+               extent_tree_free_space(&hfsmp->offset_tree, size, offset);
+#endif
+        hfs_track_unmap_blocks (hfsmp, offset, size, list);
        }
        /* 
         * curAllocBlock represents the next block we need to scan while we're in this 
@@ -3812,11 +3916,91 @@ DoneScanning:
         */
        *bitToScan = curAllocBlock;
        
-       ReleaseRBScanBitmapBlock(blockRef);
-
+       ReleaseScanBitmapBlock(blockRef);
+    
        return 0;
 }
 
+
+/*
+ * This function is basically the same as hfs_isallocated, except it's designed for 
+ * use with the red-black tree validation code.  It assumes we're only checking whether
+ * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
+ * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
+ *
+ * This should not be called in general purpose scanning code.
+ */
+int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
+       
+       u_int32_t  *currentWord;   // Pointer to current word within bitmap block
+       u_int32_t  bitMask;        // Word with given bits already set (ready to test)
+       u_int32_t  firstBit;       // Bit index within word of first bit to allocate
+       u_int32_t  numBits;        // Number of bits in word to allocate
+       u_int32_t  bitsPerBlock;
+       uintptr_t  blockRef;
+       u_int32_t  wordsPerBlock;
+       u_int32_t  numBlocks = 1;
+       u_int32_t  *buffer = NULL;
+
+       int  inuse = 0;
+       int error;
+       
+       
+       if (bp_buf) {
+               /* just use passed-in buffer if avail. */
+               buffer = bp_buf;
+       }
+       else {
+               /*
+                * Pre-read the bitmap block containing the first word of allocation
+                */
+               error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
+               if (error)
+                       return (error);
+       }
+       
+       /*
+        * Initialize currentWord, and wordsLeft.
+        */
+       u_int32_t wordIndexInBlock;
+       
+       bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+       wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+       
+       wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+       currentWord = buffer + wordIndexInBlock;
+               
+       /*
+        * First test any non word aligned bits.
+        */
+       firstBit = startingBlock % kBitsPerWord;
+       bitMask = kAllBitsSetInWord >> firstBit;
+       numBits = kBitsPerWord - firstBit;
+       if (numBits > numBlocks) {
+               numBits = numBlocks;
+               bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+       }
+       if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+               inuse = 1;
+               goto Exit;
+       }
+       numBlocks -= numBits;
+       ++currentWord;
+       
+Exit:
+       if(bp_buf == NULL) {
+               if (buffer) {
+                       (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+               }
+       }
+       return (inuse);
+       
+       
+       
+}
+
+#if CONFIG_HFS_ALLOC_RBTREE
+
 /*
  * Extern function that is called from mount and upgrade mount routines
  * that enable us to initialize the tree.
@@ -4166,103 +4350,276 @@ u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
 }
 
 
+/*
+ * Remove an extent from the list of free extents.
+ *
+ * This is a low-level routine.         It does not handle overlaps or splitting;
+ * that is the responsibility of the caller.  The input extent must exactly
+ * match an extent already in the list; it will be removed, and any following
+ * extents in the list will be shifted up.
+ *
+ * Inputs:
+ *     startBlock - Start of extent to remove
+ *     blockCount - Number of blocks in extent to remove
+ *
+ * Result:
+ *     The index of the extent that was removed.
+ */
+static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
+{
+       if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
+               if (ALLOC_DEBUG)
+                       panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+               else
+                       printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+               return;
+       }
+       int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
+       if (shift_count > 0) {
+               memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+       }
+       hfsmp->vcbFreeExtCnt--;
+}
+
+
+/*
+ * Add an extent to the list of free extents.
+ *
+ * This is a low-level routine.         It does not handle overlaps or coalescing;
+ * that is the responsibility of the caller.  This routine *does* make
+ * sure that the extent it is adding is inserted in the correct location.
+ * If the list is full, this routine will handle either removing the last
+ * extent in the list to make room for the new extent, or ignoring the
+ * new extent if it is "worse" than the last extent in the list.
+ *
+ * Inputs:
+ *     startBlock - Start of extent to add
+ *     blockCount - Number of blocks in extent to add
+ *
+ * Result:
+ *     The index where the extent that was inserted, or kMaxFreeExtents
+ *     if the extent was not inserted (the list was full, and the extent
+ *     being added was "worse" than everything in the list).
+ */
+static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+       uint32_t i;
+       
+       /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
+       if (ALLOC_DEBUG) {
+               uint32_t endBlock = startBlock + blockCount;
+               for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+                       if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
+                               startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
+                                       continue;
+                       }
+                       panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
+                               hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
+               }
+       }        
+
+       /* Figure out what index the new extent should be inserted at. */
+       for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                       /* The list is sorted by increasing offset. */
+                       if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
+                               break;
+                       }
+               } else {
+                       /* The list is sorted by decreasing size. */
+                       if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
+                               break;
+                       }
+               }
+       }
+       
+       /* When we get here, i is the index where the extent should be inserted. */
+       if (i == kMaxFreeExtents) {
+               /*
+                * The new extent is worse than anything already in the list,
+                * and the list is full, so just ignore the extent to be added.
+                */
+               return i;
+       }
+       
+       /*
+        * Grow the list (if possible) to make room for an insert.
+        */
+       if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
+               hfsmp->vcbFreeExtCnt++;
+       
+       /*
+        * If we'll be keeping any extents after the insert position, then shift them.
+        */
+       int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
+       if (shift_count > 0) {
+               memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+       }
+       
+       /* Finally, store the new extent at its correct position. */
+       hfsmp->vcbFreeExt[i].startBlock = startBlock;
+       hfsmp->vcbFreeExt[i].blockCount = blockCount;
+       return i;
+}
+
+
 /*
  * Remove an entry from free extent cache after it has been allocated.
  *
- * This function does not split extents to remove them from the allocated list.  
+ * This is a high-level routine.  It handles removing a portion of a
+ * cached extent, potentially splitting it into two (if the cache was
+ * already full, throwing away the extent that would sort last).  It
+ * also handles removing an extent that overlaps multiple extents in
+ * the cache.
  *
  * Inputs: 
- *     hfsmp           - mount point structure 
- *     startBlock      - starting block of the extent to be removed. 
- *     blockCount      - number of blocks of the extent to be removed.
+ *     hfsmp           - mount point structure 
+ *     startBlock      - starting block of the extent to be removed. 
+ *     blockCount      - number of blocks of the extent to be removed.
  */
 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
 {
-       int i, j;
+       u_int32_t i, insertedIndex;
+       u_int32_t currentStart, currentEnd, endBlock;
        int extentsRemoved = 0;
-       u_int32_t start, end;
-
+       
 #if CONFIG_HFS_ALLOC_RBTREE
        /* If red-black tree is enabled, no free extent cache is necessary */
        if (hfs_isrbtree_active(hfsmp) == true) {
                return;
        }
 #endif
-
+       
        if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
-
+       
+       endBlock = startBlock + blockCount;
+       
        lck_spin_lock(&hfsmp->vcbFreeExtLock);
-
-       for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
-               start = hfsmp->vcbFreeExt[i].startBlock;
-               end = start + hfsmp->vcbFreeExt[i].blockCount;
-
-               /* If the extent to remove from free extent list starts within 
-                * this free extent, or, if it starts before this free extent 
-                * but ends in this free extent, remove it by shifting all other
-                * extents.
+       
+       /*
+        * Iterate over all of the extents in the free extent cache, removing or
+        * updating any entries that overlap with the input extent.
+        */
+       for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+               currentStart = hfsmp->vcbFreeExt[i].startBlock;
+               currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
+               
+               /*
+                * If the current extent is entirely before or entirely after the
+                * the extent to be removed, then we keep it as-is.
                 */
-               if (((startBlock >= start) && (startBlock < end)) ||
-                   ((startBlock < start) && (startBlock + blockCount) > start)) {
-                       for (j = i; j < (int)hfsmp->vcbFreeExtCnt - 1; j++) {
-                               hfsmp->vcbFreeExt[j] = hfsmp->vcbFreeExt[j+1];
-                       }
-                       hfsmp->vcbFreeExtCnt--;
-                       /* Decrement the index so that we check the extent 
-                        * that just got shifted to the current index.
+               if (currentEnd <= startBlock || currentStart >= endBlock) {
+                       continue;
+               }
+               
+               /*
+                * If the extent being removed entirely contains the current extent,
+                * then remove the current extent.
+                */
+               if (startBlock <= currentStart && endBlock >= currentEnd) {
+                       remove_free_extent_list(hfsmp, i);
+                       
+                       /*
+                        * We just removed the extent at index i.  The extent at
+                        * index i+1 just got shifted to index i.  So decrement i
+                        * to undo the loop's "++i", and the next iteration will
+                        * examine index i again, which contains the next extent
+                        * in the list.
                         */
-                       i--;
-                       extentsRemoved++;
+                       --i;
+                       ++extentsRemoved;
+                       continue;
                }
-               /* Continue looping as we might have to invalidate multiple extents, 
-                * probably not possible in normal case, but does not hurt.
+               
+               /*
+                * If the extent being removed is strictly "in the middle" of the
+                * current extent, then we need to split the current extent into
+                * two discontiguous extents (the "head" and "tail").  The good
+                * news is that we don't need to examine any other extents in
+                * the list.
                 */
+               if (startBlock > currentStart && endBlock < currentEnd) {
+                       remove_free_extent_list(hfsmp, i);
+                       add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+                       add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+                       break;
+               }
+               
+               /*
+                * The only remaining possibility is that the extent to be removed
+                * overlaps the start or end (but not both!) of the current extent.
+                * So we need to replace the current extent with a shorter one.
+                *
+                * The only tricky part is that the updated extent might be at a
+                * different index than the original extent.  If the updated extent
+                * was inserted after the current extent, then we need to re-examine
+                * the entry at index i, since it now contains the extent that was
+                * previously at index i+1.      If the updated extent was inserted
+                * before or at the same index as the removed extent, then the
+                * following extents haven't changed position.
+                */
+               remove_free_extent_list(hfsmp, i);
+               if (startBlock > currentStart) {
+                       /* Remove the tail of the current extent. */
+                       insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+               } else {
+                       /* Remove the head of the current extent. */
+                       insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+               }
+               if (insertedIndex > i) {
+                       --i;    /* Undo the "++i" in the loop, so we examine the entry at index i again. */
+               }
        }
        
        lck_spin_unlock(&hfsmp->vcbFreeExtLock);
-
+       
        sanity_check_free_ext(hfsmp, 0);
-
+       
        if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
-
+       
        return;
 }
 
+
 /*
- * Add an entry to free extent cache after it has been deallocated.  
+ * Add an entry to free extent cache after it has been deallocated.     
  *
- * If the extent provided has blocks beyond current allocLimit, it 
- * is clipped to allocLimit.  This function does not merge contiguous 
- * extents, if they already exist in the list.
+ * This is a high-level routine.  It will merge overlapping or contiguous
+ * extents into a single, larger extent.
+ *
+ * If the extent provided has blocks beyond current allocLimit, it is
+ * clipped to allocLimit (so that we won't accidentally find and allocate
+ * space beyond allocLimit).
  *
  * Inputs: 
- *     hfsmp           - mount point structure 
- *     startBlock      - starting block of the extent to be removed. 
- *     blockCount      - number of blocks of the extent to be removed.
+ *     hfsmp           - mount point structure 
+ *     startBlock      - starting block of the extent to be removed. 
+ *     blockCount      - number of blocks of the extent to be removed.
  *
  * Returns:
- *     true            - if the extent was added successfully to the list
- *     false           - if the extent was no added to the list, maybe because 
- *                       the extent was beyond allocLimit, or is not best 
- *                       candidate to be put in the cache.
+ *     true            - if the extent was added successfully to the list
+ *     false           - if the extent was not added to the list, maybe because 
+ *                       the extent was beyond allocLimit, or is not best 
+ *                       candidate to be put in the cache.
  */
-static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) 
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
 {
        Boolean retval = false;
-       u_int32_t start, end;
-       int i; 
+       uint32_t endBlock;
+       uint32_t currentEnd;
+       uint32_t i; 
        
        if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
-
+       
        /*
         * If using the red-black tree allocator, then there's no need to special case 
-        * for the sparse device case.  We'll simply add the region we've recently freed
+        * for the sparse device case.  We'll simply add the region we've recently freed
         * to the red-black tree, where it will get sorted by offset and length.  The only special 
         * casing will need to be done on the allocation side, where we may favor free extents
-        * based on offset even if it will cause fragmentation.  This may be true, for example, if
+        * based on offset even if it will cause fragmentation.  This may be true, for example, if
         * we are trying to reduce the number of bandfiles created in a sparse bundle disk image. 
         */
 #if CONFIG_HFS_ALLOC_RBTREE
@@ -4270,93 +4627,58 @@ static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc
                goto out_not_locked;
        }
 #endif
-
+       
        /* No need to add extent that is beyond current allocLimit */
        if (startBlock >= hfsmp->allocLimit) {
                goto out_not_locked;
        }
-
+       
        /* If end of the free extent is beyond current allocLimit, clip the extent */
        if ((startBlock + blockCount) > hfsmp->allocLimit) {
                blockCount = hfsmp->allocLimit - startBlock;
        }
-
+       
        lck_spin_lock(&hfsmp->vcbFreeExtLock);
-
-       /* If the free extent cache is full and the new extent fails to 
-        * compare with the last extent, skip adding it to the list.
-        */
-       if (hfsmp->vcbFreeExtCnt == kMaxFreeExtents) {
-               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
-                       /* For sparse disks, free extent cache list is sorted by start block, lowest first */
-                       if (startBlock > hfsmp->vcbFreeExt[kMaxFreeExtents-1].startBlock) {
-                               goto out;
-                       } 
-               } else {
-                       /* For normal mounts, free extent cache list is sorted by total blocks, highest first */
-                       if (blockCount <= hfsmp->vcbFreeExt[kMaxFreeExtents-1].blockCount) {
-                               goto out;
-                       } 
-               }
-       }
-
-       /* Check if the current extent overlaps with any of the existing 
-        * extents.  If yes, just skip adding it to the list.  We have 
-        * to do this check before shifting the extent records.
-        */
-       for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
-
-               start = hfsmp->vcbFreeExt[i].startBlock;
-               end = start + hfsmp->vcbFreeExt[i].blockCount;
-
-               if (((startBlock >= start) && (startBlock < end)) ||
-                   ((startBlock < start) && (startBlock + blockCount) > start)) {
-                       goto out;
-               }
-       }
-
-       /* Scan the free extent cache array from tail to head till 
-        * we find the entry after which our new entry should be 
-        * inserted.  After we break out of this loop, the new entry 
-        * will be inserted at 'i+1'.
+       
+       /*
+        * Make a pass through the free extent cache, looking for known extents that
+        * overlap or are contiguous with the extent to be added.  We'll remove those
+        * extents from the cache, and incorporate them into the new extent to be added.
         */
-       for (i = (int)hfsmp->vcbFreeExtCnt-1; i >= 0; i--) {
-               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
-                       /* For sparse devices, find entry with smaller start block than ours */
-                       if (hfsmp->vcbFreeExt[i].startBlock < startBlock) {
-                               break;
-                       }
+       endBlock = startBlock + blockCount;
+       for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
+               currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
+               if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
+                       /* Extent i does not overlap and is not contiguous, so keep it. */
+                       continue;
                } else {
-                       /* For normal devices, find entry with greater block count than ours */
-                       if (hfsmp->vcbFreeExt[i].blockCount >= blockCount) {
-                               break;
-                       }
-               }
-
-               /* If this is not the right spot to insert, and this is 
-                * not the last entry in the array, just shift it and 
-                * continue check another one. 
-                */
-               if ((i+1) < kMaxFreeExtents) {
-                       hfsmp->vcbFreeExt[i+1] = hfsmp->vcbFreeExt[i];
+                       /* We need to remove extent i and combine it with the input extent. */
+                       if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
+                               startBlock = hfsmp->vcbFreeExt[i].startBlock;
+                       if (currentEnd > endBlock)
+                               endBlock = currentEnd;
+                       
+                       remove_free_extent_list(hfsmp, i);
+                       /*
+                        * We just removed the extent at index i.  The extent at
+                        * index i+1 just got shifted to index i.  So decrement i
+                        * to undo the loop's "++i", and the next iteration will
+                        * examine index i again, which contains the next extent
+                        * in the list.
+                        */
+                       --i;
                }
        }
-       /* 'i' points to one index offset before which the new extent should be inserted */
-       hfsmp->vcbFreeExt[i+1].startBlock = startBlock;
-       hfsmp->vcbFreeExt[i+1].blockCount = blockCount;
-       if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents) {
-               hfsmp->vcbFreeExtCnt++;
-       }
-       retval = true;
-
-out:
+       add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
+       
        lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+       
 out_not_locked:
        sanity_check_free_ext(hfsmp, 0);
-
+       
        if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
-
+       
        return retval;
 }
 
@@ -4372,6 +4694,9 @@ static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
 
        lck_spin_lock(&hfsmp->vcbFreeExtLock);
        
+       if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
+               panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
+       
        /* 
         * Iterate the Free extent cache and ensure no entries are bogus or refer to
         * allocated blocks.
index 7276daa2691ea49da0ab7ab68535612657839775..e8ddcac8641d9af4ea3a9f0d08beaf60920622a9 100644 (file)
@@ -250,7 +250,10 @@ MetaZoneFreeBlocks(ExtendedVCB *vcb);
        
 EXTERN_API_C( u_int32_t )
 UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block);
-       
+
+EXTERN_API_C( u_int32_t )
+UnmapBlocks(struct hfsmount *hfsmp);
+
 #if CONFIG_HFS_ALLOC_RBTREE
 EXTERN_API_C( u_int32_t )
 GenerateTree( struct hfsmount *hfsmp, u_int32_t end_block, int *flags, int initialscan);
index e60df12e0a66edc0c03248192dd35e438a3dc21b..2a04fff666c295224998cc861ac1062a30a5828e 100644 (file)
 #include <sys/mcache.h>                        /* for mcache_init() */
 #include <sys/mbuf.h>                  /* for mbinit() */
 #include <sys/event.h>                 /* for knote_init() */
-#include <sys/kern_memorystatus.h>     /* for kern_memorystatus_init() */
+#include <sys/kern_memorystatus.h>     /* for memorystatus_init() */
 #include <sys/aio_kern.h>              /* for aio_init() */
 #include <sys/semaphore.h>             /* for psem_cache_init() */
 #include <net/dlil.h>                  /* for dlil_init() */
 #include <net/kpi_protocol.h>          /* for proto_kpi_init() */
+#include <net/iptap.h>                 /* for iptap_init() */
 #include <sys/pipe.h>                  /* for pipeinit() */
 #include <sys/socketvar.h>             /* for socketinit() */
 #include <sys/protosw.h>               /* for domaininit() */
 #include <net/if_utun.h>               /* for utun_register_control() */
 #include <net/net_str_id.h>            /* for net_str_id_init() */
 #include <net/netsrc.h>                        /* for netsrc_init() */
+#include <net/ntstat.h>                        /* for nstat_init() */
 #include <kern/assert.h>               /* for assert() */
 
 #include <net/init.h>
@@ -223,13 +225,6 @@ char       hostname[MAXHOSTNAMELEN];
 int            hostnamelen;
 char   domainname[MAXDOMNAMELEN];
 int            domainnamelen;
-#if defined(__i386__) || defined(__x86_64__)
-struct exec_archhandler exec_archhandler_ppc = {
-       .path = "/usr/libexec/oah/RosettaNonGrata",
-};
-#else /* __i386__ */
-struct exec_archhandler exec_archhandler_ppc;
-#endif /* __i386__ */
 
 char rootdevice[16];   /* hfs device names have at least 9 chars */
 
@@ -250,6 +245,7 @@ extern void klogwakeup(void);
 extern void file_lock_init(void);
 extern void kmeminit(void);
 extern void bsd_bufferinit(void);
+extern void throttle_init(void);
 
 extern int serverperfmode;
 extern int ncl;
@@ -263,13 +259,8 @@ __private_extern__ int execargs_cache_size = 0;
 __private_extern__ int execargs_free_count = 0;
 __private_extern__ vm_offset_t * execargs_cache = NULL;
 
-void bsd_exec_setup(int);
+void bsd_exec_setup(int) __attribute__((aligned(4096)));
 
-/*
- * Set to disable grading 64 bit Mach-o binaries as executable, for testing;
- * Intel only.
- */
-__private_extern__ int bootarg_no64exec = 0;
 __private_extern__ int bootarg_vnode_cache_defeat = 0;
 
 /*
@@ -330,7 +321,6 @@ extern void stackshot_lock_init(void);
        extern void dtrace_postinit(void);
 #endif
 
-
 /*
  * Initialization code.
  * Called from cold start routine as
@@ -394,14 +384,15 @@ void (*unmountroot_pre_hook)(void);
  * of the uu_context.vc_ucred field so that the uthread structure can be
  * used like any other.
  */
+extern void run_bringup_tests(void);
+
+extern void IOServicePublishResource(const char *, boolean_t);
+
 void
 bsd_init(void)
 {
        struct uthread *ut;
        unsigned int i;
-#if __i386__ || __x86_64__
-       int error;
-#endif 
        struct vfs_context context;
        kern_return_t   ret;
        struct ucred temp_cred;
@@ -412,6 +403,8 @@ bsd_init(void)
 
 #define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */
 
+       throttle_init();
+
        kernel_flock = funnel_alloc(KERNEL_FUNNEL);
        if (kernel_flock == (funnel_t *)0 ) {
                panic("bsd_init: Failed to allocate kernel funnel");
@@ -775,22 +768,26 @@ bsd_init(void)
        socketinit();
        bsd_init_kprintf("calling domaininit\n");
        domaininit();
+       iptap_init();
 #endif /* SOCKETS */
 
        kernproc->p_fd->fd_cdir = NULL;
        kernproc->p_fd->fd_rdir = NULL;
 
 #if CONFIG_FREEZE
-       /* Initialise background hibernation */
-       bsd_init_kprintf("calling kern_hibernation_init\n");
-       kern_hibernation_init();
+#ifndef CONFIG_MEMORYSTATUS
+    #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
+#endif
+       /* Initialise background freezing */
+       bsd_init_kprintf("calling memorystatus_freeze_init\n");
+       memorystatus_freeze_init();
 #endif
 
-#if CONFIG_EMBEDDED
+#if CONFIG_MEMORYSTATUS
        /* Initialize kernel memory status notifications */
-       bsd_init_kprintf("calling kern_memorystatus_init\n");
-       kern_memorystatus_init();
-#endif
+       bsd_init_kprintf("calling memorystatus_init\n");
+       memorystatus_init();
+#endif /* CONFIG_MEMORYSTATUS */
 
 #ifdef GPROF
        /* Initialize kernel profiling. */
@@ -837,10 +834,8 @@ bsd_init(void)
        
        /* register user tunnel kernel control handler */
        utun_register_control();
-    netsrc_init();
-       
-       /* wait for network domain to finish */
-       domainfin();
+       netsrc_init();
+       nstat_init();
 #endif /* NETWORKING */
 
        bsd_init_kprintf("calling vnode_pager_bootstrap\n");
@@ -963,13 +958,6 @@ bsd_init(void)
 
        pal_kernel_announce();
 
-#if __i386__ || __x86_64__
-       /* this should be done after the root filesystem is mounted */
-       error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
-       if (error) /* XXX make more generic */
-               exec_archhandler_ppc.path[0] = 0;
-#endif 
-
        bsd_init_kprintf("calling mountroot_post_hook\n");
 
        /* invoke post-root-mount hook */
@@ -1131,10 +1119,6 @@ parse_bsd_args(void)
        if (PE_parse_boot_argn("-x", namep, sizeof (namep))) /* safe boot */
                boothowto |= RB_SAFEBOOT;
 
-       /* disable 64 bit grading */
-       if (PE_parse_boot_argn("-no64exec", namep, sizeof (namep)))
-               bootarg_no64exec = 1;
-
        /* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */
        if (PE_parse_boot_argn("-vnode_cache_defeat", namep, sizeof (namep)))
                bootarg_vnode_cache_defeat = 1;
index 19da61270873a5667b80af9c15d485aea2fff598..bc4537d355142402001eef7b920a24fea6a813a4 100644 (file)
@@ -120,24 +120,29 @@ struct cdevsw nocdev = NO_CDEVICE;
  *       else see whether the index is free
  *     return the major number that is free else -1
  *
+ *     if index is negative, we start
+ *     looking for a free slot at the absolute value of index,
+ *     instead of starting at 0
  */
 int
 bdevsw_isfree(int index)
 {
        struct bdevsw *devsw;
-       if (index == -1) {
-           devsw = bdevsw;
-           for(index=0; index < nblkdev; index++, devsw++) {
-               if(memcmp((char *)devsw, 
-                           (char *)&nobdev, 
-                           sizeof(struct bdevsw)) == 0)
-                   break;
+
+       if (index < 0) {
+           if (index == -1)
+               index = 1;      /* start at 1 to avoid collision with volfs (Radar 2842228) */
+           else
+               index = -index; /* start at least this far up in the table */
+           devsw = &bdevsw[index];
+           for(; index < nblkdev; index++, devsw++) {
+               if(memcmp((char *)devsw, 
+                           (char *)&nobdev, 
+                           sizeof(struct bdevsw)) == 0)
+                   break;
            }
-       } else {
-               /* NB: Not used below unless index is in range */
-               devsw = &bdevsw[index];
        }
-
+       devsw = &bdevsw[index];
        if ((index < 0) || (index >= nblkdev) ||
            (memcmp((char *)devsw, 
                          (char *)&nobdev, 
@@ -151,33 +156,22 @@ bdevsw_isfree(int index)
  *     if index is -1, find a free slot to add
  *       else see whether the slot is free
  *     return the major number that is used else -1
+ *
+ *     if index is negative, we start
+ *     looking for a free slot at the absolute value of index,
+ *     instead of starting at 0
  */
 int
 bdevsw_add(int index, struct bdevsw * bsw) 
 {
-       struct bdevsw *devsw;
-
-       if (index == -1) {
-           devsw = &bdevsw[1];         /* Start at slot 1 - this is a hack to fix the index=1 hack */
-           /* yes, start at 1 to avoid collision with volfs (Radar 2842228) */
-           for(index=1; index < nblkdev; index++, devsw++) {
-               if(memcmp((char *)devsw, 
-                           (char *)&nobdev, 
-                           sizeof(struct bdevsw)) == 0)
-                   break;
-           }
-       }
-       devsw = &bdevsw[index];
-       if ((index < 0) || (index >= nblkdev) ||
-           (memcmp((char *)devsw, 
-                         (char *)&nobdev, 
-                         sizeof(struct bdevsw)) != 0)) {
+       index = bdevsw_isfree(index);
+       if (index < 0) {
                return(-1);
        }
        bdevsw[index] = *bsw;
        return(index);
 }
-/* 
+/*
  *     if the slot has the same bsw, then remove
  *     else -1
  */
@@ -201,19 +195,27 @@ bdevsw_remove(int index, struct bdevsw * bsw)
  *     if index is -1, return a free slot if avaliable
  *       else see whether the index is free
  *     return the major number that is free else -1
+ *
+ *     if index is negative, we start
+ *     looking for a free slot at the absolute value of index,
+ *     instead of starting at 0
  */
 int
 cdevsw_isfree(int index)
 {
        struct cdevsw *devsw;
 
-       if (index == -1) {
-           devsw = cdevsw;
-           for(index=0; index < nchrdev; index++, devsw++) {
-               if(memcmp((char *)devsw, 
-                           (char *)&nocdev, 
-                           sizeof(struct cdevsw)) == 0)
-                   break;
+       if (index < 0) {
+           if (index == -1)
+               index = 0;
+           else
+               index = -index; /* start at least this far up in the table */
+           devsw = &cdevsw[index];
+           for(; index < nchrdev; index++, devsw++) {
+               if(memcmp((char *)devsw, 
+                           (char *)&nocdev, 
+                           sizeof(struct cdevsw)) == 0)
+                   break;
            }
        }
        devsw = &cdevsw[index];
@@ -231,45 +233,27 @@ cdevsw_isfree(int index)
  *       else see whether the slot is free
  *     return the major number that is used else -1
  *
+ *     if index is negative, we start
+ *     looking for a free slot at the absolute value of index,
+ *     instead of starting at 0
+ *
  * NOTE:       In practice, -1 is unusable, since there are kernel internal
  *             devices that call this function with absolute index values,
  *             which will stomp on free-slot based assignments that happen
- *             before them.  Therefore, if index is negative, we start
- *             looking for a free slot at the absolute value of index,
- *             instead of starting at 0 (lets out slot 1, but that's one
- *             of the problem slots down low - the vndevice).  -12 is
- *             currently a safe starting point.
+ *             before them.  -24 is currently a safe starting point.
  */
 int
 cdevsw_add(int index, struct cdevsw * csw) 
 {
-       struct cdevsw *devsw;
-
+       index = cdevsw_isfree(index);
        if (index < 0) {
-           if (index == -1)
-               index = 0;      /* historical behaviour; XXX broken */
-           else
-               index = -index; /* start at least this far up in the table */
-           devsw = &cdevsw[index];
-           for(; index < nchrdev; index++, devsw++) {
-               if(memcmp((char *)devsw, 
-                           (char *)&nocdev, 
-                           sizeof(struct cdevsw)) == 0)
-                   break;
-           }
-       }
-       devsw = &cdevsw[index];
-       if ((index < 0) || (index >= nchrdev) ||
-           (memcmp((char *)devsw, 
-                         (char *)&nocdev, 
-                         sizeof(struct cdevsw)) != 0)) {
                return(-1);
        }
        cdevsw[index] = *csw;
        return(index);
 }
 /*
- *     if the index has the same bsw, then remove
+ *     if the slot has the same csw, then remove
  *     else -1
  */
 int
index 33e3b3040356bca85ecea01382c2201dc09fe0ac..ef8057a4e238330d1fb2896e75602d6e4e2fc46a 100644 (file)
@@ -168,13 +168,27 @@ vfs_context_t decmpfs_ctx;
 #define offsetof_func(func) ((uintptr_t)(&(((decmpfs_registration*)NULL)->func)))
 
 static void *
-_func_from_offset(uint32_t type, int offset)
+_func_from_offset(uint32_t type, uintptr_t offset)
 {
     /* get the function at the given offset in the registration for the given type */
     decmpfs_registration *reg = decompressors[type];
     char *regChar = (char*)reg;
     char *func = &regChar[offset];
     void **funcPtr = (void**)func;
+
+    switch (reg->decmpfs_registration) {
+        case DECMPFS_REGISTRATION_VERSION_V1:
+            if (offset > offsetof_func(free_data))
+                return NULL;
+            break;
+        case DECMPFS_REGISTRATION_VERSION_V3:
+            if (offset > offsetof_func(get_flags))
+                return NULL;
+            break;
+        default:
+            return NULL;
+    }
+
     return funcPtr[0];
 }
 
@@ -183,7 +197,7 @@ extern boolean_t IOServiceWaitForMatchingResource( const char * property, uint64
 extern boolean_t IOCatalogueMatchingDriversPresent( const char * property );
 
 static void *
-_decmp_get_func(uint32_t type, int offset)
+_decmp_get_func(uint32_t type, uintptr_t offset)
 {
        /*
         this function should be called while holding a shared lock to decompressorsLock,
@@ -208,13 +222,15 @@ _decmp_get_func(uint32_t type, int offset)
         snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", type);
         printf("waiting for %s\n", resourceName);
         while(decompressors[type] == NULL) {
-            lck_rw_done(decompressorsLock); // we have to unlock to allow the kext to register
+            lck_rw_unlock_shared(decompressorsLock); // we have to unlock to allow the kext to register
             if (IOServiceWaitForMatchingResource(resourceName, delay)) {
+                lck_rw_lock_shared(decompressorsLock);
                 break;
             }
             if (!IOCatalogueMatchingDriversPresent(providesName)) {
                 // 
                 printf("the kext with %s is no longer present\n", providesName);
+                lck_rw_lock_shared(decompressorsLock);
                 break;
             }
             printf("still waiting for %s\n", resourceName);
@@ -273,18 +289,12 @@ decmpfs_cnode_init(decmpfs_cnode *cp)
 {
     memset(cp, 0, sizeof(*cp));
        lck_rw_init(&cp->compressed_data_lock, decmpfs_lockgrp, NULL);
-#if !DECMPFS_SUPPORTS_SWAP64
-    lck_mtx_init(&cp->uncompressed_size_mtx, decmpfs_lockgrp, NULL);
-#endif
 }
 
 void
 decmpfs_cnode_destroy(decmpfs_cnode *cp)
 {
        lck_rw_destroy(&cp->compressed_data_lock, decmpfs_lockgrp);
-#if !DECMPFS_SUPPORTS_SWAP64
-    lck_mtx_destroy(&cp->uncompressed_size_mtx, decmpfs_lockgrp);
-#endif
 }
 
 boolean_t
@@ -382,25 +392,12 @@ decmpfs_cnode_set_vnode_minimal_xattr(decmpfs_cnode *cp, int minimal_xattr, int
 uint64_t
 decmpfs_cnode_get_vnode_cached_size(decmpfs_cnode *cp)
 {
-#if DECMPFS_SUPPORTS_SWAP64
     return cp->uncompressed_size;
-#else
-    /*
-     since this is a 64-bit field, we may not be able to access it atomically
-     so lock access
-     */
-    
-    lck_mtx_lock(&(cp->uncompressed_size_mtx));
-    uint64_t ret = cp->uncompressed_size;
-    lck_mtx_unlock(&(cp->uncompressed_size_mtx));
-    return ret;
-#endif
 }
 
 static void
 decmpfs_cnode_set_vnode_cached_size(decmpfs_cnode *cp, uint64_t size)
 {
-#if DECMPFS_SUPPORTS_SWAP64
     while(1) {
         uint64_t old = cp->uncompressed_size;
         if (OSCompareAndSwap64(old, size, (UInt64*)&cp->uncompressed_size)) {
@@ -409,16 +406,25 @@ decmpfs_cnode_set_vnode_cached_size(decmpfs_cnode *cp, uint64_t size)
             /* failed to write our value, so loop */
         }
     }
-#else
-    /*
-     since this is a 64-bit field, we may not be able to access it atomically
-     so lock access
-     */
-    
-    lck_mtx_lock(&(cp->uncompressed_size_mtx));
-    cp->uncompressed_size = size;
-    lck_mtx_unlock(&(cp->uncompressed_size_mtx));
-#endif
+}
+
+static uint64_t
+decmpfs_cnode_get_decompression_flags(decmpfs_cnode *cp)
+{
+    return cp->decompression_flags;
+}
+
+static void
+decmpfs_cnode_set_decompression_flags(decmpfs_cnode *cp, uint64_t flags)
+{
+    while(1) {
+        uint64_t old = cp->decompression_flags;
+        if (OSCompareAndSwap64(old, flags, (UInt64*)&cp->decompression_flags)) {
+            return;
+        } else {
+            /* failed to write our value, so loop */
+        }
+    }
 }
 
 #pragma mark --- decmpfs state routines ---
@@ -602,7 +608,7 @@ decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp)
         /* no validate registered, so nothing to do */
         err = 0;
     }
-    lck_rw_done(decompressorsLock);
+    lck_rw_unlock_shared(decompressorsLock);
 out:
     if (hdr) FREE(hdr, M_TEMP);
 #if COMPRESSION_DEBUG
@@ -632,6 +638,7 @@ decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp)
     mount_t mp = NULL;
        int cnode_locked = 0;
     int saveInvalid = 0; // save the header data even though the type was out of range
+    uint64_t decompression_flags = 0;
        
     if (vnode_isnamedstream(vp)) {
         /*
@@ -738,6 +745,15 @@ done:
         if (ret == FILE_IS_COMPRESSED) {
             /* update the ubc's size for this file */
             ubc_setsize(vp, hdr->uncompressed_size);
+            
+            /* update the decompression flags in the decmpfs cnode */
+            lck_rw_lock_shared(decompressorsLock);
+            decmpfs_get_decompression_flags_func get_flags = decmp_get_func(hdr->compression_type, get_flags);
+            if (get_flags) {
+                decompression_flags = get_flags(vp, decmpfs_ctx, hdr);
+            }
+            lck_rw_unlock_shared(decompressorsLock);
+            decmpfs_cnode_set_decompression_flags(cp, decompression_flags);
         }
        } else {
                /* we might have already taken the lock above; if so, skip taking it again by passing cnode_locked as the skiplock parameter */
@@ -885,6 +901,11 @@ decmpfs_hides_xattr(vfs_context_t ctx, decmpfs_cnode *cp, const char *xattr)
 
 #pragma mark --- registration/validation routines ---
 
+static inline int registration_valid(decmpfs_registration *registration)
+{
+    return registration && ((registration->decmpfs_registration == DECMPFS_REGISTRATION_VERSION_V1) || (registration->decmpfs_registration == DECMPFS_REGISTRATION_VERSION_V3));
+}
+
 errno_t
 register_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration)
 {
@@ -894,9 +915,7 @@ register_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *r
     int locked = 0;
     char resourceName[80];
     
-    if ((compression_type >= CMP_MAX) ||
-        (!registration) ||
-        (registration->decmpfs_registration != DECMPFS_REGISTRATION_VERSION)) {
+    if ((compression_type >= CMP_MAX) || !registration_valid(registration)) {
         ret = EINVAL;
         goto out;
     }
@@ -911,10 +930,9 @@ register_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *r
     decompressors[compression_type] = registration;
     snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", compression_type);
     IOServicePublishResource(resourceName, TRUE);
-    wakeup((caddr_t)&decompressors);
     
 out:
-    if (locked) lck_rw_done(decompressorsLock);
+    if (locked) lck_rw_unlock_exclusive(decompressorsLock);
     return ret;
 }
 
@@ -927,9 +945,7 @@ unregister_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration
     int locked = 0;
     char resourceName[80];
 
-    if ((compression_type >= CMP_MAX) ||
-        (!registration) ||
-        (registration->decmpfs_registration != DECMPFS_REGISTRATION_VERSION)) {
+    if ((compression_type >= CMP_MAX) || !registration_valid(registration)) {
         ret = EINVAL;
         goto out;
     }
@@ -942,10 +958,9 @@ unregister_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration
     decompressors[compression_type] = NULL;
     snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", compression_type);
     IOServicePublishResource(resourceName, FALSE);
-    wakeup((caddr_t)&decompressors);
     
 out:
-    if (locked) lck_rw_done(decompressorsLock);
+    if (locked) lck_rw_unlock_exclusive(decompressorsLock);
     return ret;
 }
 
@@ -960,7 +975,7 @@ compression_type_valid(decmpfs_header *hdr)
     if (decmp_get_func(hdr->compression_type, fetch) != NULL) {
         ret = 1;
     }
-    lck_rw_done(decompressorsLock);
+    lck_rw_unlock_shared(decompressorsLock);
        
     return ret;
 }
@@ -968,7 +983,7 @@ compression_type_valid(decmpfs_header *hdr)
 #pragma mark --- compression/decompression routines ---
 
 static int
-decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read)
+decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_cnode *cp, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read)
 {
     /* get the uncompressed bytes for the specified region of vp by calling out to the registered compressor */
     
@@ -1000,10 +1015,22 @@ decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_header *hdr, off_t offset, u
     decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(hdr->compression_type, fetch);
     if (fetch) {
                err = fetch(vp, decmpfs_ctx, hdr, offset, size, nvec, vec, bytes_read);
+               lck_rw_unlock_shared(decompressorsLock);
+        if (err == 0) {
+            uint64_t decompression_flags = decmpfs_cnode_get_decompression_flags(cp);
+            if (decompression_flags & DECMPFS_FLAGS_FORCE_FLUSH_ON_DECOMPRESS) {
+#if    !defined(__i386__) && !defined(__x86_64__)
+                int i;
+                for (i = 0; i < nvec; i++) {
+                    flush_dcache64((addr64_t)(uintptr_t)vec[i].buf, vec[i].size, FALSE);
+                }
+#endif
+            }
+        }
     } else {
         err = ENOTSUP;
+        lck_rw_unlock_shared(decompressorsLock);
     }
-    lck_rw_done(decompressorsLock);
     
 out:
     return err;
@@ -1105,7 +1132,7 @@ decompress:
                err = 0;
                did_read = 0;
        } else {
-        err = decmpfs_fetch_uncompressed_data(vp, hdr, uplPos, uplSize, 1, &vec, &did_read);
+        err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, uplPos, uplSize, 1, &vec, &did_read);
        }
     if (err) {
         DebugLog("decmpfs_fetch_uncompressed_data err %d\n", err);
@@ -1234,7 +1261,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
                adjust_fetch(vp, decmpfs_ctx, hdr, &uplPos, &uplSize);
         VerboseLog("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
     }
-    lck_rw_done(decompressorsLock);
+    lck_rw_unlock_shared(decompressorsLock);
     
     /* clip the adjusted size to the size of the file */
     if ((uint64_t)uplPos + uplSize > cachedSize) {
@@ -1304,7 +1331,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
         decmpfs_vector vec;
     decompress:
         vec = (decmpfs_vector){ .buf = data, .size = curUplSize };
-        err = decmpfs_fetch_uncompressed_data(vp, hdr, curUplPos, curUplSize, 1, &vec, &did_read);
+        err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, curUplPos, curUplSize, 1, &vec, &did_read);
         if (err) {
             ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err);
             
@@ -1409,7 +1436,7 @@ decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp)
             /* nothing to do, so no error */
             err = 0;
         }
-        lck_rw_done(decompressorsLock);
+        lck_rw_unlock_shared(decompressorsLock);
         
         if (err != 0) {
             ErrorLog("decompressor err %d\n", err);
@@ -1559,7 +1586,7 @@ decompress:
                
                uint64_t bytes_read = 0;
                decmpfs_vector vec = { .buf = data, .size = MIN(allocSize, remaining) };
-               err = decmpfs_fetch_uncompressed_data(vp, hdr, offset, vec.size, 1, &vec, &bytes_read);
+               err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, offset, vec.size, 1, &vec, &bytes_read);
                if (err != 0) {
                        ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err);
                        goto out;
@@ -1733,7 +1760,8 @@ static decmpfs_registration Type1Reg =
     .validate          = decmpfs_validate_compressed_file_Type1,
     .adjust_fetch      = NULL, /* no adjust necessary */
     .fetch             = decmpfs_fetch_uncompressed_data_Type1,
-    .free_data         = NULL  /* no free necessary */
+    .free_data         = NULL, /* no free necessary */
+    .get_flags         = NULL  /* no flags */
 };
 
 #pragma mark --- decmpfs initialization ---
index 8bc4ede3654aa93788d796b1c19e4c5517d01d11..9fa89e44315a16552add539388182f767683de18 100644 (file)
@@ -195,6 +195,7 @@ imageboot_mount_image(const char *root_path, int height)
                vnode_get_and_drop_always(old_rootvnode);
            }
 #else 
+           height = 0; /* keep the compiler from complaining */
            vnode_get_and_drop_always(old_rootvnode);
 #endif /* CONFIG_IMGSRC_ACCESS */
        }
index f7c7fa73a92a407abe2bc87b9d0e2d26a1b0ff51..b25b3f9d477570d3e7abd3fd4b8972be0eba2480 100644 (file)
@@ -65,6 +65,7 @@
 #include <sys/fcntl.h>
 #include <sys/file_internal.h>
 #include <sys/ubc.h>
+#include <sys/param.h>                 /* for isset() */
 
 #include <mach/mach_host.h>            /* for host_info() */
 #include <libkern/OSAtomic.h>
@@ -90,6 +91,9 @@ void kdbg_mapinit(void);
 int kdbg_reinit(boolean_t);
 int kdbg_bootstrap(boolean_t);
 
+static int kdbg_enable_typefilter(void);
+static int kdbg_disable_typefilter(void);
+
 static int create_buffers(boolean_t);
 static void delete_buffers(void);
 
@@ -189,7 +193,13 @@ struct kd_bufinfo *kdbip = NULL;
 kd_buf *kdcopybuf = NULL;
 
 
-unsigned int nkdbufs = 8192;
+int kdlog_sched_events = 0;
+
+boolean_t kdlog_bg_trace = FALSE;
+boolean_t kdlog_bg_trace_running = FALSE;
+unsigned int bg_nkdbufs = 0;
+
+unsigned int nkdbufs = 0;
 unsigned int kdlog_beg=0;
 unsigned int kdlog_end=0;
 unsigned int kdlog_value1=0;
@@ -237,6 +247,18 @@ pid_t global_state_pid = -1;       /* Used to control exclusive use of kd_buffer
 
 #define DBG_FUNC_MASK  0xfffffffc
 
+/*  TODO: move to kdebug.h */
+#define CLASS_MASK      0xff000000
+#define CLASS_OFFSET    24
+#define SUBCLASS_MASK   0x00ff0000
+#define SUBCLASS_OFFSET 16
+#define CSC_MASK        0xffff0000     /*  class and subclass mask */
+#define CSC_OFFSET      SUBCLASS_OFFSET
+
+#define EXTRACT_CLASS(debugid)          ( (uint8_t) ( ((debugid) & CLASS_MASK   ) >> CLASS_OFFSET    ) )
+#define EXTRACT_SUBCLASS(debugid)       ( (uint8_t) ( ((debugid) & SUBCLASS_MASK) >> SUBCLASS_OFFSET ) )
+#define EXTRACT_CSC(debugid)            ( (uint16_t)( ((debugid) & CSC_MASK     ) >> CSC_OFFSET      ) )
+
 #define INTERRUPT      0x01050000
 #define MACH_vmfault   0x01300008
 #define BSC_SysCall    0x040c0000
@@ -273,18 +295,20 @@ volatile kd_chudhook_fn kdebug_chudhook = 0;   /* pointer to CHUD toolkit functi
 
 __private_extern__ void stackshot_lock_init( void ) __attribute__((section("__TEXT, initcode")));
 
+static uint8_t *type_filter_bitmap;
+
 static void
-kdbg_set_tracing_enabled(boolean_t enabled)
+kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type)
 {
        int s = ml_set_interrupts_enabled(FALSE);
        lck_spin_lock(kds_spin_lock);
 
        if (enabled) {
-               kdebug_enable |= KDEBUG_ENABLE_TRACE;
+               kdebug_enable |= trace_type;
                kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
                kd_ctrl_page.enabled = 1;
        } else {
-               kdebug_enable &= ~KDEBUG_ENABLE_TRACE;
+               kdebug_enable &= ~(KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT);
                kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
                kd_ctrl_page.enabled = 0;
        }
@@ -578,7 +602,7 @@ boolean_t
 allocate_storage_unit(int cpu)
 {
        union   kds_ptr kdsp;
-       struct  kd_storage *kdsp_actual;
+       struct  kd_storage *kdsp_actual, *kdsp_next_actual;
        struct  kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
        uint64_t        oldest_ts, ts;
        boolean_t       retval = TRUE;
@@ -652,9 +676,14 @@ allocate_storage_unit(int cpu)
                }
                kdsp = kdbp_vict->kd_list_head;
                kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
-
                kdbp_vict->kd_list_head = kdsp_actual->kds_next;
 
+               if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
+                       kdsp_next_actual = POINTER_FROM_KDS_PTR(kdbp_vict->kd_list_head);
+                       kdsp_next_actual->kds_lostevents = TRUE;
+               } else
+                       kdbp_vict->kd_lostevents = TRUE;
+
                kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
        }
        kdsp_actual->kds_timestamp = mach_absolute_time();
@@ -707,7 +736,9 @@ kernel_debug_internal(
        int             cpu;
        struct kd_bufinfo *kdbp;
        struct kd_storage *kdsp_actual;
+       union  kds_ptr kds_raw;
 
+       
 
        if (kd_ctrl_page.kdebug_slowcheck) {
 
@@ -748,7 +779,7 @@ kernel_debug_internal(
                        lck_spin_unlock(kds_spin_lock);
                        ml_set_interrupts_enabled(s);
                }
-               if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & KDEBUG_ENABLE_TRACE))
+               if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT)))
                        goto out1;
        
                if ( !ml_at_interrupt_context()) {
@@ -759,7 +790,8 @@ kernel_debug_internal(
                                curproc = current_proc();
 
                                if ((curproc && !(curproc->p_kdebug)) &&
-                                   ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)))
+                                   ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
+                                     (debugid >> 24 != DBG_TRACE))
                                        goto out1;
                        }
                        else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
@@ -769,30 +801,46 @@ kernel_debug_internal(
                                curproc = current_proc();
 
                                if ((curproc && curproc->p_kdebug) &&
-                                   ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)))
+                                   ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
+                                     (debugid >> 24 != DBG_TRACE))
                                        goto out1;
                        }
                }
-               if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
-                       if ((debugid < kdlog_beg)
-                                       || ((debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE)))
-                               goto out1;
+
+               if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
+                       /* Always record trace system info */
+                       if (EXTRACT_CLASS(debugid) == DBG_TRACE)
+                               goto record_event;
+
+                       if (isset(type_filter_bitmap, EXTRACT_CSC(debugid))) 
+                               goto record_event;
+                       goto out1;
+               }
+               else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
+                       if ((debugid >= kdlog_beg && debugid <= kdlog_end) || (debugid >> 24) == DBG_TRACE)
+                               goto record_event;
+                       if (kdlog_sched_events && (debugid & 0xffff0000) == (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))
+                               goto record_event;
+                       goto out1;
                }
                else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
                        if ((debugid & DBG_FUNC_MASK) != kdlog_value1 &&
-                                       (debugid & DBG_FUNC_MASK) != kdlog_value2 &&
-                                       (debugid & DBG_FUNC_MASK) != kdlog_value3 &&
-                                       (debugid & DBG_FUNC_MASK) != kdlog_value4 &&
-                                       (debugid >> 24 != DBG_TRACE))
+                           (debugid & DBG_FUNC_MASK) != kdlog_value2 &&
+                           (debugid & DBG_FUNC_MASK) != kdlog_value3 &&
+                           (debugid & DBG_FUNC_MASK) != kdlog_value4 &&
+                           (debugid >> 24 != DBG_TRACE))
                                goto out1;
                }
        }
+record_event:
        disable_preemption();
        cpu = cpu_number();
        kdbp = &kdbip[cpu];
 retry_q:
-       if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
-               kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
+       kds_raw = kdbp->kd_list_tail;
+
+       if (kds_raw.raw != KDS_PTR_NULL) {
+               kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
                bindx = kdsp_actual->kds_bufindx;
        } else
                kdsp_actual = NULL;
@@ -963,7 +1011,7 @@ kdbg_reinit(boolean_t early_trace)
         * First make sure we're not in
         * the middle of cutting a trace
         */
-       kdbg_set_tracing_enabled(FALSE);
+       kdbg_set_tracing_enabled(FALSE, KDEBUG_ENABLE_TRACE);
 
        /*
         * make sure the SLOW_NOLOG is seen
@@ -1167,7 +1215,7 @@ kdbg_clear(void)
         * First make sure we're not in
         * the middle of cutting a trace
         */
-       kdbg_set_tracing_enabled(FALSE);
+       kdbg_set_tracing_enabled(FALSE, KDEBUG_ENABLE_TRACE);
 
        /*
         * make sure the SLOW_NOLOG is seen
@@ -1176,12 +1224,16 @@ kdbg_clear(void)
         */
        IOSleep(100);
 
+       kdlog_sched_events = 0;
         global_state_pid = -1;
        kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
        kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
        kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
+       
+       kdbg_disable_typefilter();
 
        delete_buffers();
+       nkdbufs = 0;
 
        /* Clean up the thread map buffer */
        kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
@@ -1299,16 +1351,68 @@ kdbg_setrtcdec(kd_regtype *kdr)
        return(ret);
 }
 
+int
+kdbg_enable_typefilter(void)
+{
+       if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
+               /* free the old filter */
+               kdbg_disable_typefilter();
+       }
+       
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE) != KERN_SUCCESS) {
+               return ENOSPC;
+       }
+       
+       bzero(type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
+
+       /* Turn off range and value checks */
+       kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
+       
+       /* Enable filter checking */
+       kd_ctrl_page.kdebug_flags |= KDBG_TYPEFILTER_CHECK;
+       kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
+       return 0;
+}
+
+int
+kdbg_disable_typefilter(void)
+{
+       /*  Disable filter checking */  
+       kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
+       
+       /*  Turn off slow checks unless pid checks are using them */
+       if ( (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) )
+               kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
+       else
+               kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
+       
+       if(type_filter_bitmap == NULL)
+               return 0;
+
+       vm_offset_t old_bitmap = (vm_offset_t)type_filter_bitmap;
+       type_filter_bitmap = NULL;
+
+       kmem_free(kernel_map, old_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
+       return 0;
+}
+
 int
 kdbg_setreg(kd_regtype * kdr)
 {
        int ret=0;
        unsigned int val_1, val_2, val;
+
+       kdlog_sched_events = 0;
+
        switch (kdr->type) {
        
        case KDBG_CLASSTYPE :
                val_1 = (kdr->value1 & 0xff);
                val_2 = (kdr->value2 & 0xff);
+
+               if (val_1 == DBG_FSYSTEM && val_2 == (DBG_FSYSTEM + 1))
+                       kdlog_sched_events = 1;
+
                kdlog_beg = (val_1<<24);
                kdlog_end = (val_2<<24);
                kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
@@ -1348,7 +1452,9 @@ kdbg_setreg(kd_regtype * kdr)
        case KDBG_TYPENONE :
                kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
 
-               if ( (kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK | KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) )
+               if ( (kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK   | 
+                                                  KDBG_PIDCHECK   | KDBG_PIDEXCLUDE | 
+                                                  KDBG_TYPEFILTER_CHECK)) )
                        kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
                else
                        kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
@@ -1612,7 +1718,7 @@ kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout)
 }
 
 
-static void
+static int
 kdbg_set_nkdbufs(unsigned int value)
 {
         /*
@@ -1622,12 +1728,34 @@ kdbg_set_nkdbufs(unsigned int value)
         unsigned int max_entries = (sane_size/2) / sizeof(kd_buf);
 
        if (value <= max_entries)
-               nkdbufs = value;
+               return (value);
        else
-               nkdbufs = max_entries;
+               return (max_entries);
 }
 
 
+static void
+kdbg_enable_bg_trace(void)
+{
+       if (kdlog_bg_trace == TRUE && kdlog_bg_trace_running == FALSE && n_storage_buffers == 0) {
+               nkdbufs = bg_nkdbufs;
+               kdbg_reinit(FALSE);
+               kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE);
+               kdlog_bg_trace_running = TRUE;
+       }
+}
+
+static void
+kdbg_disable_bg_trace(void)
+{
+       if (kdlog_bg_trace_running == TRUE) {
+               kdlog_bg_trace_running = FALSE;
+               kdbg_clear();           
+       }
+}
+
+
+
 /*
  * This function is provided for the CHUD toolkit only.
  *    int val:
@@ -1672,6 +1800,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                name[0] == KERN_KDEFLAGS ||
                name[0] == KERN_KDDFLAGS ||
                name[0] == KERN_KDENABLE ||
+               name[0] == KERN_KDENABLE_BG_TRACE ||
                name[0] == KERN_KDSETBUF) {
                
                if ( namelen < 2 )
@@ -1686,7 +1815,9 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 
        lck_mtx_lock(kd_trace_mtx_sysctl);
 
-       if (name[0] == KERN_KDGETBUF) {
+       switch(name[0]) {
+
+       case KERN_KDGETBUF:
                /* 
                 * Does not alter the global_state_pid
                 * This is a passive request.
@@ -1701,7 +1832,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                }
                kd_bufinfo.nkdbufs = nkdbufs;
                kd_bufinfo.nkdthreads = kd_mapsize / sizeof(kd_threadmap);
-
+               
                if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) )
                        kd_bufinfo.nolog = 1;
                else
@@ -1728,13 +1859,28 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                                ret = EINVAL;
                }
                goto out;
+               break;
 
-       } else if (name[0] == KERN_KDGETENTROPY) {              
+       case KERN_KDGETENTROPY:
                if (kd_entropy_buffer)
                        ret = EBUSY;
                else
                        ret = kdbg_getentropy(where, sizep, value);
                goto out;
+               break;
+
+       case KERN_KDENABLE_BG_TRACE:
+               bg_nkdbufs = kdbg_set_nkdbufs(value); 
+               kdlog_bg_trace = TRUE;
+               kdbg_enable_bg_trace();
+               goto out;
+               break;
+
+       case KERN_KDDISABLE_BG_TRACE:
+               kdlog_bg_trace = FALSE;
+               kdbg_disable_bg_trace();
+               goto out;
+               break;
        }
        
        if ((curproc = current_proc()) != NULL)
@@ -1764,40 +1910,55 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 
        switch(name[0]) {
                case KERN_KDEFLAGS:
+                       kdbg_disable_bg_trace();
+
                        value &= KDBG_USERFLAGS;
                        kd_ctrl_page.kdebug_flags |= value;
                        break;
                case KERN_KDDFLAGS:
+                       kdbg_disable_bg_trace();
+
                        value &= KDBG_USERFLAGS;
                        kd_ctrl_page.kdebug_flags &= ~value;
                        break;
                case KERN_KDENABLE:
                        /*
-                        * used to enable or disable
+                        * Enable tracing mechanism.  Two types:
+                        * KDEBUG_TRACE is the standard one,
+                        * and KDEBUG_PPT which is a carefully
+                        * chosen subset to avoid performance impact.
                         */
                        if (value) {
                                /*
                                 * enable only if buffer is initialized
                                 */
-                               if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
+                               if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || 
+                                   !(value == KDEBUG_ENABLE_TRACE || value == KDEBUG_ENABLE_PPT)) {
                                        ret = EINVAL;
                                        break;
                                }
                                kdbg_mapinit();
 
-                               kdbg_set_tracing_enabled(TRUE);
+                               kdbg_set_tracing_enabled(TRUE, value);
                        }
                        else
-                               kdbg_set_tracing_enabled(FALSE);
+                       {
+                               kdbg_set_tracing_enabled(FALSE, 0);
+                       }
                        break;
                case KERN_KDSETBUF:
-                       kdbg_set_nkdbufs(value);
+                       kdbg_disable_bg_trace();
+
+                       nkdbufs = kdbg_set_nkdbufs(value);
                        break;
                case KERN_KDSETUP:
+                       kdbg_disable_bg_trace();
+
                        ret = kdbg_reinit(FALSE);
                        break;
                case KERN_KDREMOVE:
                        kdbg_clear();
+                       kdbg_enable_bg_trace();
                        break;
                case KERN_KDSETREG:
                        if(size < sizeof(kd_regtype)) {
@@ -1808,6 +1969,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                                ret = EINVAL;
                                break;
                        }
+                       kdbg_disable_bg_trace();
+
                        ret = kdbg_setreg(&kd_Reg);
                        break;
                case KERN_KDGETREG:
@@ -1819,6 +1982,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                        if (copyout(&kd_Reg, where, sizeof(kd_regtype))) {
                                ret = EINVAL;
                        }
+                       kdbg_disable_bg_trace();
+
                        break;
                case KERN_KDREADTR:
                        ret = kdbg_read(where, sizep, NULL, NULL);
@@ -1832,6 +1997,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                        vnode_t vp;
                        int     fd;
 
+                       kdbg_disable_bg_trace();
+
                        if (name[0] == KERN_KDWRITETR) {
                                int s;
                                int wait_result = THREAD_AWAKENED;
@@ -1912,6 +2079,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                                ret = EINVAL;
                                break;
                        }
+                       kdbg_disable_bg_trace();
+
                        ret = kdbg_setpid(&kd_Reg);
                        break;
                case KERN_KDPIDEX:
@@ -1923,6 +2092,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                                ret = EINVAL;
                                break;
                        }
+                       kdbg_disable_bg_trace();
+
                        ret = kdbg_setpidex(&kd_Reg);
                        break;
                case KERN_KDTHRMAP:
@@ -1937,9 +2108,28 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                                ret = EINVAL;
                                break;
                        }
+                       kdbg_disable_bg_trace();
+
                        ret = kdbg_setrtcdec(&kd_Reg);
                        break;
-                      
+               case KERN_KDSET_TYPEFILTER:
+                       kdbg_disable_bg_trace();
+
+                       if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){
+                               if ((ret = kdbg_enable_typefilter()))
+                                       break;
+                       }
+
+                       if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
+                               ret = EINVAL;
+                               break;
+                       }
+
+                       if (copyin(where, type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE)) {
+                               ret = EINVAL;
+                               break;
+                       }
+                       break;
                default:
                        ret = EINVAL;
        }
@@ -2006,17 +2196,23 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
                tempbuf = kdcopybuf;
                tempbuf_number = 0;
 
+               // While space
                while (tempbuf_count) {
                        mintime = 0xffffffffffffffffULL;
                        min_kdbp = NULL;
                        min_cpu = 0;
 
+                       // Check all CPUs
                        for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_cpus; cpu++, kdbp++) {
 
+                               // Find one with raw data
                                if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL)
                                        continue;
+
+                               // Get from cpu data to buffer header to buffer
                                kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
 
+                               // See if there are actual data left in this buffer
                                rcursor = kdsp_actual->kds_readlast;
 
                                if (rcursor == kdsp_actual->kds_bufindx)
@@ -2052,11 +2248,13 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
                                out_of_events = TRUE;
                                break;
                        }
+
+                       // Get data
                        kdsp = min_kdbp->kd_list_head;
                        kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
 
                        if (kdsp_actual->kds_lostevents == TRUE) {
-                               lostevent.timestamp = kdsp_actual->kds_records[kdsp_actual->kds_readlast].timestamp;
+                               kdbg_set_timestamp_and_cpu(&lostevent, kdsp_actual->kds_records[kdsp_actual->kds_readlast].timestamp, min_cpu);
                                *tempbuf = lostevent;
                                
                                kdsp_actual->kds_lostevents = FALSE;
@@ -2064,6 +2262,8 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 
                                goto nextevent;
                        }
+
+                       // Copy into buffer
                        *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
 
                        if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT)
@@ -2263,10 +2463,10 @@ start_kern_tracing(unsigned int new_nkdbufs) {
 
        if (!new_nkdbufs)
                return;
-       kdbg_set_nkdbufs(new_nkdbufs);
+       nkdbufs = kdbg_set_nkdbufs(new_nkdbufs);
        kdbg_lock_init();
        kdbg_reinit(TRUE);
-       kdbg_set_tracing_enabled(TRUE);
+       kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE);
 
 #if defined(__i386__) || defined(__x86_64__)
        uint64_t now = mach_absolute_time();
index f30df6d2d5b66c6394b06a890c410e032380be3f..1cf74dc41a33ecdc738cb1bbb3db31f277141c21 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -154,11 +154,14 @@ kauth_init(void)
 
        /* bring up kauth subsystem components */
        kauth_cred_init();
+#if CONFIG_EXT_RESOLVER
        kauth_identity_init();
        kauth_groups_init();
+#endif
        kauth_scope_init();
+#if CONFIG_EXT_RESOLVER
        kauth_resolver_init();
-
+#endif
        /* can't alloc locks after this */
        lck_grp_free(kauth_lck_grp);
        kauth_lck_grp = NULL;
diff --git a/bsd/kern/kern_callout.c b/bsd/kern/kern_callout.c
deleted file mode 100644 (file)
index 58df65f..0000000
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * Kernel callout related functions, including moving average calculation
- * to permit the kernel to know about insufficiently responsive user space
- * processes.
- */
-
-#include <string.h>            /* memove, memset */
-#include <stdint.h>            /* uint64_t */
-#include <sys/kern_callout.h>
-
-/*
- * kco_ma_init
- *
- * Initialize a moving average structure for use
- *
- * Parameters: map                     Pointer to the moving average state
- *             threshold               Threshold % at which to trigger (>100)
- *             kind                    Kind of trigger(s) to set
- *
- * Returns:    (void)
- *
- * Notes:      The number of samples in a simple moving average is not
- *             controllable; this might be a future direction.
- *
- *             The simple and weighted thresholds are not separately
- *             controllable; this might be a future direction, but
- *             will likely be unnecessary due to one type being in use
- *             at a time in the most likely scenarios.
- */
-void
-kco_ma_init(struct kco_moving_average *map, int32_t threshold, int kind)
-{
-       memset(map, 0, sizeof(*map));
-
-       /* per algorithm init required */
-       map->ma_flags |= KCO_MA_F_NEEDS_INIT;
-
-       /* set algorithm selector flags */
-       map->ma_flags |= kind;
-
-       /* set thresholds */
-       map->ma_sma_threshold = threshold;
-       map->ma_wma_threshold = threshold;
-}
-
-
-/*
- * kco_ma_info
- *
- * Report on the current moving average information; this is typically only
- * called after a trigger event.
- *
- * Parameters: map                     Pointer to the moving average state
- *             kind                    Kind of trigger to report on
- *             averagep                Pointer to area to receive current
- *             old_averagep            Pointer to area to receive previous
- *             thresholdp              Pointer to area to receive threshold
- *
- * Returns:    0                       Information not available
- *             1                       Information retrieved
- *
- * Notes:      You can only retrieve one kind of average information at a
- *             time; if you are collecting multiple types, then you must
- *             call this function one time for each type you are interested
- *             in obtaining.
- */
-int
-kco_ma_info(struct kco_moving_average *map, int kind, uint64_t *averagep, uint64_t *old_averagep, int32_t *thresholdp, int *countp)
-{
-       uint64_t        average;
-       uint64_t        old_average;
-       int32_t         threshold;
-       int             count;
-
-       /* Not collecting this type of data  or no data yet*/
-       if (!(map->ma_flags & kind) || (map->ma_flags & KCO_MA_F_NEEDS_INIT))
-               return(0);
-
-       switch(kind) {
-       case KCO_MA_F_SMA:
-               average = map->ma_sma;
-               old_average = map->ma_old_sma;
-               threshold = map->ma_sma_threshold;
-               count = map->ma_sma_trigger_count;
-               break;
-
-       case KCO_MA_F_WMA:
-               average = map->ma_wma;
-               old_average = map->ma_old_wma;
-               threshold = map->ma_wma_threshold;
-               count = map->ma_wma_trigger_count;
-               break;
-
-       default:
-               /*
-                * Asking for data we don't have or more than one kind of
-                * data at the same time.
-                */
-               return(0);
-       }
-
-       if (averagep != NULL)
-               *averagep = average;
-       if (old_averagep != NULL)
-               *old_averagep = old_average;
-       if (thresholdp != NULL)
-               *thresholdp = threshold;
-       if (countp != NULL)
-               *countp = count;
-
-       return(1);
-}
-
-
-/*
- * kco_ma_addsample
- *
- * Accumulate a sample into a moving average
- *
- * Parameters: map                     Pointer to the moving average state
- *             sample_time             latency delta time
- *
- * Returns:    0                       Nothing triggered
- *             !0                      Bitmap of KCO_MA_F_* flags for the
- *                                             algorithms which triggered
- *
- * Notes:      Add a delta time sample to the moving average; this function
- *             will return bits for each algorithm which went over its
- *             trigger threshold as a result of receiving the sample.
- *             Callers can then log/complain/panic over the unresponsive
- *             process to which they are calling out.
- */
-int
-kco_ma_addsample(struct kco_moving_average *map, uint64_t sample_time)
-{
-       int     triggered = 0;
-       int     do_init = (map->ma_flags & KCO_MA_F_NEEDS_INIT);
-
-       /*
-        * SIMPLE MOVING AVERAGE
-        *
-        * Compute simple moving average over MA_SMA_SAMPLES; incremental is
-        * cheaper than re-sum.
-        */
-       if (map->ma_flags & KCO_MA_F_SMA) {
-               map->ma_old_sma = map->ma_sma;
-
-               map->ma_sma = ((map->ma_sma * MA_SMA_SAMPLES) - map->ma_sma_samples[0] + sample_time) / MA_SMA_SAMPLES;
-               memmove(&map->ma_sma_samples[1], &map->ma_sma_samples[0], sizeof(map->ma_sma_samples[0]) *(MA_SMA_SAMPLES - 1));
-               map->ma_sma_samples[0] = sample_time;
-               /*
-                * Check if percentage change exceeds the allowed trigger
-                * threshold; this will only happen if the sample time
-                * increases more than an acceptable amount; decreases will
-                * not cause a trigger (but will decrease the overall average,
-                * which can cause a trigger the next time).
-                *
-                * Note:        We don't start triggering on the simple moving
-                *              average until after we have enough samples for
-                *              the delta to be statistically valid; this is
-                *              defined to be MA_SMA_SAMPLES.
-                */
-               if (map->ma_sma_samples[MA_SMA_SAMPLES-1] && ((int)((map->ma_sma * 100) / map->ma_old_sma)) > map->ma_sma_threshold) {
-                       triggered |= KCO_MA_F_SMA;
-                       map->ma_sma_trigger_count++;
-               }
-       }
-
-       /*
-        * WEIGHTED MOVING AVERAGE
-        *
-        * Compute the weighted moving average.  Do this by averaging over
-        * two values, one with a lesser weighting than the other; the lesser
-        * weighted value is the persistent historical value, whose sample
-        * weight decreases over time, the older the samples get.  Be careful
-        * here to permit strict integer artimatic.
-        */
-       if (map->ma_flags & KCO_MA_F_WMA) {
-               map->ma_old_wma = map->ma_wma;
-
-               /* Prime the pump, if necessary */
-               if (do_init)
-                       map->ma_old_wma = sample_time;
-
-               map->ma_wma = ((((map->ma_wma * 90) + sample_time * ((100*2) - 90))/100) / 2);
-
-               /*
-                * Check if percentage change exceeds the allowed trigger
-                * threshold; this will only happen if the sample time
-                * increases more than an acceptable amount; decreases will
-                * not cause a trigger (but will decrease the overall average,
-                * which can cause a trigger the next time).
-                */
-               if (((int)(((map->ma_wma * 100) / map->ma_old_wma))) > map->ma_wma_threshold) {
-                       triggered |= KCO_MA_F_WMA;
-                       map->ma_wma_trigger_count++;
-               }
-       }
-
-       if (do_init)
-               map->ma_flags &= ~KCO_MA_F_NEEDS_INIT;
-
-       return (triggered);
-}
index 92b15bc408655398d24ba2f7378e772ffc918418..ae00ee73b56ccace725ddeb26df4167f673776de 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -654,34 +654,38 @@ ctl_ioctl(__unused struct socket *so, u_long cmd, caddr_t data,
                /* get the number of controllers */
                case CTLIOCGCOUNT: {
                        struct kctl     *kctl;
-                       int n = 0;
+                       u_int32_t n = 0;
 
                        lck_mtx_lock(ctl_mtx);
                        TAILQ_FOREACH(kctl, &ctl_head, next)
                                n++;
                        lck_mtx_unlock(ctl_mtx);
-
-                       *(u_int32_t *)data = n;
+                       
+                       bcopy(&n, data, sizeof (n));
                        error = 0;
                        break;
                }
                case CTLIOCGINFO: {
-                       struct ctl_info *ctl_info = (struct ctl_info *)data;
+                       struct ctl_info ctl_info;
                        struct kctl     *kctl = 0;
-                       size_t name_len = strlen(ctl_info->ctl_name);
-                       
+                       size_t name_len;
+
+                       bcopy(data, &ctl_info, sizeof (ctl_info));
+                       name_len = strnlen(ctl_info.ctl_name, MAX_KCTL_NAME);
+
                        if (name_len == 0 || name_len + 1 > MAX_KCTL_NAME) {
                                error = EINVAL;
                                break;
                        }
                        lck_mtx_lock(ctl_mtx);
-                       kctl = ctl_find_by_name(ctl_info->ctl_name);
+                       kctl = ctl_find_by_name(ctl_info.ctl_name);
                        lck_mtx_unlock(ctl_mtx);
                        if (kctl == 0) {
                                error = ENOENT;
                                break;
                        }
-                       ctl_info->ctl_id = kctl->id;
+                       ctl_info.ctl_id = kctl->id;
+                       bcopy(&ctl_info, data, sizeof (ctl_info));
                        error = 0;
                        break;
                }
index cf63621d9cca01cd94307eb37afb9619c6bc0c86..1c3093e4df072a51e3f70272ee8c7861fb0c2701 100644 (file)
@@ -211,9 +211,9 @@ coredump(proc_t core_proc)
        int             command_size, header_size, tstate_size;
        int             hoffset;
        off_t           foffset;
-       vm_map_offset_t vmoffset;
+       mach_vm_offset_t vmoffset;
        vm_offset_t     header;
-       vm_map_size_t   vmsize;
+       mach_vm_size_t  vmsize;
        vm_prot_t       prot;
        vm_prot_t       maxprot;
        vm_inherit_t    inherit;
index 4a6ee0386ebcacc02a5a790cb07e6b3e16676f30..7569cf5ebcb4f38574bf0006e46840578f157103 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/malloc.h>
 #include <sys/kauth.h>
 #include <sys/kernel.h>
+#include <sys/sdt.h>
 
 #include <security/audit/audit.h>
 
 #include <sys/mount.h>
 #include <sys/stat.h>  /* For manifest constants in posix_cred_access */
 #include <sys/sysproto.h>
-#include <sys/kern_callout.h>
 #include <mach/message.h>
 #include <mach/host_security.h>
 
-/* mach_absolute_time() */
-#include <mach/clock_types.h>
-#include <mach/mach_types.h>
-#include <mach/mach_time.h>
-
 #include <libkern/OSAtomic.h>
 
 #include <kern/task.h>
@@ -76,6 +71,7 @@
 #if CONFIG_MACF
 #include <security/mac.h>
 #include <security/mac_framework.h>
+#include <security/_label.h>
 #endif
 
 void mach_kauth_cred_uthread_update( void );
@@ -130,6 +126,7 @@ cred_debug_buffer * cred_debug_buf_p = NULL;
 
 #endif /* !DEBUG_CRED */
 
+#if CONFIG_EXT_RESOLVER
 /*
  * Interface to external identity resolver.
  *
@@ -153,7 +150,6 @@ struct kauth_resolver_work {
        struct kauth_identity_extlookup kr_work;
        uint64_t        kr_extend;
        uint32_t        kr_seqno;
-       uint64_t        kr_subtime;     /* submission time */
        int             kr_refs;
        int             kr_flags;
 #define KAUTH_REQUEST_UNSUBMITTED      (1<<0)
@@ -166,11 +162,82 @@ TAILQ_HEAD(kauth_resolver_unsubmitted_head, kauth_resolver_work) kauth_resolver_
 TAILQ_HEAD(kauth_resolver_submitted_head, kauth_resolver_work) kauth_resolver_submitted;
 TAILQ_HEAD(kauth_resolver_done_head, kauth_resolver_work)      kauth_resolver_done;
 
+/* Number of resolver timeouts between logged complaints */
+#define KAUTH_COMPLAINT_INTERVAL 1000
+int kauth_resolver_timeout_cnt = 0;
+
 static int     kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data);
 static int     kauth_resolver_complete(user_addr_t message);
 static int     kauth_resolver_getwork(user_addr_t message);
 static int     kauth_resolver_getwork2(user_addr_t message);
 
+#define        KAUTH_CACHES_MAX_SIZE 10000 /* Max # entries for both groups and id caches */
+
+struct kauth_identity {
+       TAILQ_ENTRY(kauth_identity) ki_link;
+       int     ki_valid;
+       uid_t   ki_uid;
+       gid_t   ki_gid;
+       guid_t  ki_guid;
+       ntsid_t ki_ntsid;
+       const char      *ki_name;       /* string name from string cache */
+       /*
+        * Expiry times are the earliest time at which we will disregard the
+        * cached state and go to userland.  Before then if the valid bit is
+        * set, we will return the cached value.  If it's not set, we will
+        * not go to userland to resolve, just assume that there is no answer
+        * available.
+        */
+       time_t  ki_guid_expiry;
+       time_t  ki_ntsid_expiry;
+};
+
+static TAILQ_HEAD(kauth_identity_head, kauth_identity) kauth_identities;
+static lck_mtx_t *kauth_identity_mtx;
+#define KAUTH_IDENTITY_LOCK()  lck_mtx_lock(kauth_identity_mtx);
+#define KAUTH_IDENTITY_UNLOCK()        lck_mtx_unlock(kauth_identity_mtx);
+#define KAUTH_IDENTITY_CACHEMAX_DEFAULT 100    /* XXX default sizing? */
+static int kauth_identity_cachemax = KAUTH_IDENTITY_CACHEMAX_DEFAULT;
+static int kauth_identity_count;
+
+static struct kauth_identity *kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry,
+    ntsid_t *ntsidp, time_t ntsid_expiry, const char *name, int nametype);
+static void    kauth_identity_register_and_free(struct kauth_identity *kip);
+static void    kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *kip, uint64_t extend_data);
+static void    kauth_identity_trimcache(int newsize);
+static void    kauth_identity_lru(struct kauth_identity *kip);
+static int     kauth_identity_guid_expired(struct kauth_identity *kip);
+static int     kauth_identity_ntsid_expired(struct kauth_identity *kip);
+static int     kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir, char *getname);
+static int     kauth_identity_find_gid(gid_t gid, struct kauth_identity *kir, char *getname);
+static int     kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir, char *getname);
+static int     kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir, char *getname);
+static int     kauth_identity_find_nam(char *name, int valid, struct kauth_identity *kir);
+
+struct kauth_group_membership {
+       TAILQ_ENTRY(kauth_group_membership) gm_link;
+       uid_t   gm_uid;         /* the identity whose membership we're recording */
+       gid_t   gm_gid;         /* group of which they are a member */
+       time_t  gm_expiry;      /* TTL for the membership, or 0 for persistent entries */
+       int     gm_flags;
+#define KAUTH_GROUP_ISMEMBER   (1<<0)
+};
+
+TAILQ_HEAD(kauth_groups_head, kauth_group_membership) kauth_groups;
+static lck_mtx_t *kauth_groups_mtx;
+#define KAUTH_GROUPS_LOCK()    lck_mtx_lock(kauth_groups_mtx);
+#define KAUTH_GROUPS_UNLOCK()  lck_mtx_unlock(kauth_groups_mtx);
+#define KAUTH_GROUPS_CACHEMAX_DEFAULT 100      /* XXX default sizing? */
+static int kauth_groups_cachemax = KAUTH_GROUPS_CACHEMAX_DEFAULT;
+static int kauth_groups_count;
+
+static int     kauth_groups_expired(struct kauth_group_membership *gm);
+static void    kauth_groups_lru(struct kauth_group_membership *gm);
+static void    kauth_groups_updatecache(struct kauth_identity_extlookup *el);
+static void    kauth_groups_trimcache(int newsize);
+
+#endif /* CONFIG_EXT_RESOLVER */
+
 static const int kauth_cred_primes[KAUTH_CRED_PRIMES_COUNT] = KAUTH_CRED_PRIMES;
 static int     kauth_cred_primes_index = 0;
 static int     kauth_cred_table_size = 0;
@@ -178,9 +245,6 @@ static int  kauth_cred_table_size = 0;
 TAILQ_HEAD(kauth_cred_entry_head, ucred);
 static struct kauth_cred_entry_head * kauth_cred_table_anchor = NULL;
 
-/* Weighted moving average for resolver response time */
-static struct kco_moving_average resolver_ma;
-
 #define KAUTH_CRED_HASH_DEBUG  0
 
 static int kauth_cred_add(kauth_cred_t new_cred);
@@ -196,7 +260,7 @@ static void kauth_cred_hash_print(void);
 static void kauth_cred_print(kauth_cred_t cred);
 #endif
 
-
+#if CONFIG_EXT_RESOLVER
 /*
  * kauth_resolver_init
  *
@@ -206,7 +270,7 @@ static void kauth_cred_print(kauth_cred_t cred);
  *
  * Returns:    (void)
  *
- * Notes:      Intialize the credential identity resolver for use; the
+ * Notes:      Initialize the credential identity resolver for use; the
  *             credential identity resolver is the KPI used by the user
  *             space credential identity resolver daemon to communicate
  *             with the kernel via the identitysvc() system call..
@@ -232,11 +296,6 @@ kauth_resolver_init(void)
        TAILQ_INIT(&kauth_resolver_done);
        kauth_resolver_sequence = 31337;
        kauth_resolver_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/);
-
-       /*
-        * 110% of average response time is "too long" and should be reported
-        */
-       kco_ma_init(&resolver_ma, 110, KCO_MA_F_WMA);
 }
 
 
@@ -283,7 +342,6 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data
        struct kauth_resolver_work *workp, *killp;
        struct timespec ts;
        int     error, shouldfree;
-       uint64_t        duration;
        
        /* no point actually blocking if the resolver isn't up yet */
        if (kauth_resolver_identity == 0) {
@@ -326,7 +384,7 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data
        workp->kr_work.el_result = KAUTH_EXTLOOKUP_INPROG;
 
        /*
-        * XXX We *MUST NOT* attempt to coelesce identical work items due to
+        * XXX We *MUST NOT* attempt to coalesce identical work items due to
         * XXX the inability to ensure order of update of the request item
         * XXX extended data vs. the wakeup; instead, we let whoever is waiting
         * XXX for each item repeat the update when they wake up.
@@ -335,7 +393,7 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data
 
        /*
         * Wake up an external resolver thread to deal with the new work; one
-        * may not be available, and if not, then the request will be grabed
+        * may not be available, and if not, then the request will be grabbed
         * when a resolver thread comes back into the kernel to request new
         * work.
         */
@@ -358,57 +416,44 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data
                        break;
        }
 
-       /*
-        * Update the moving average of how long the request took; if it
-        * took longer than the time threshold, then we complain about it
-        * being slow.
-        */
-       duration = mach_absolute_time() - workp->kr_subtime;
-       if (kco_ma_addsample(&resolver_ma, duration)) {
-               uint64_t        average;
-               uint64_t        old_average;
-               int32_t         threshold;
-               int             count;
-
-               /* If we can't get information, don't log anything */
-               if (kco_ma_info(&resolver_ma, KCO_MA_F_WMA, &average, &old_average, &threshold, &count)) {
-                       char pname[MAXCOMLEN+1] = "(NULL)";
-                       proc_name(kauth_resolver_identity, pname, sizeof(pname));
-                       // <rdar://6276265>  printf("kauth_resolver_submit: External resolver pid %d (name %s) response time %lld, average %lld new %lld threshold %d%% actual %d%% count %d\n", kauth_resolver_identity, pname, duration, old_average, average, threshold, (int)((duration * 100) / old_average), count);
-               }
-       }
-
        /* if the request was processed, copy the result */
        if (error == 0)
                *lkp = workp->kr_work;
        
-       /*
-        * If the request timed out and was never collected, the resolver
-        * is dead and probably not coming back anytime soon.  In this
-        * case we revert to no-resolver behaviour, and punt all the other
-        * sleeping requests to clear the backlog.
-        */
-       if ((error == EWOULDBLOCK) && (workp->kr_flags & KAUTH_REQUEST_UNSUBMITTED)) {
-               KAUTH_DEBUG("RESOLVER - request timed out without being collected for processing, resolver dead");
+       if (error == EWOULDBLOCK) {
+               if ((kauth_resolver_timeout_cnt++ % KAUTH_COMPLAINT_INTERVAL) == 0) {
+                        printf("kauth external resolver timed out (%d timeout(s) of %d seconds).\n",
+                                kauth_resolver_timeout_cnt, kauth_resolver_timeout);
+                }
+                
+               if (workp->kr_flags & KAUTH_REQUEST_UNSUBMITTED) {
+                       /*
+                        * If the request timed out and was never collected, the resolver
+                        * is dead and probably not coming back anytime soon.  In this
+                        * case we revert to no-resolver behaviour, and punt all the other
+                        * sleeping requests to clear the backlog.
+                        */
+                        KAUTH_DEBUG("RESOLVER - request timed out without being collected for processing, resolver dead");
+
+                        /*
+                        * Make the current resolver non-authoritative, and mark it as
+                        * no longer registered to prevent kauth_cred_ismember_gid()
+                        * enqueueing more work until a new one is registered.  This
+                        * mitigates the damage a crashing resolver may inflict.
+                        */
+                        kauth_resolver_identity = 0;
+                        kauth_resolver_registered = 0;
+
+                        /* kill all the other requestes that are waiting as well */
+                        TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link)
+                                wakeup(killp);
+                        TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link)
+                                wakeup(killp);
+                        /* Cause all waiting-for-work threads to return EIO */
+                        wakeup((caddr_t)&kauth_resolver_unsubmitted);
+                }
+        }
 
-               /*
-                * Make the current resolver non-authoritative, and mark it as
-                * no longer registered to prevent kauth_cred_ismember_gid()
-                * enqueueing more work until a new one is registered.  This
-                * mitigates the damage a crashing resolver may inflict.
-                */
-               kauth_resolver_identity = 0;
-               kauth_resolver_registered = 0;
-
-               /* kill all the other requestes that are waiting as well */
-               TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link)
-                   wakeup(killp);
-               TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link)
-                   wakeup(killp);
-               /* Cause all waiting-for-work threads to return EIO */
-               wakeup((caddr_t)&kauth_resolver_unsubmitted);
-       }
-       
        /*
         * drop our reference on the work item, and note whether we should
         * free it or not
@@ -477,6 +522,7 @@ identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused int3
        int opcode = uap->opcode;
        user_addr_t message = uap->message;
        struct kauth_resolver_work *workp;
+       struct kauth_cache_sizes sz_arg;
        int error;
        pid_t new_id;
 
@@ -524,8 +570,51 @@ identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused int3
                KAUTH_DEBUG("RESOLVER - call from bogus resolver %d\n", current_proc()->p_pid);
                return(EPERM);
        }
+       
+       if (opcode == KAUTH_GET_CACHE_SIZES) {
+               KAUTH_IDENTITY_LOCK();
+               sz_arg.kcs_id_size = kauth_identity_cachemax;
+               KAUTH_IDENTITY_UNLOCK();
+               
+               KAUTH_GROUPS_LOCK();
+               sz_arg.kcs_group_size = kauth_groups_cachemax;
+               KAUTH_GROUPS_UNLOCK();
 
-       if (opcode == KAUTH_EXTLOOKUP_DEREGISTER) {
+               if ((error = copyout(&sz_arg, uap->message, sizeof (sz_arg))) != 0) {
+                       return (error);
+               }
+               
+               return (0);
+       } else if (opcode == KAUTH_SET_CACHE_SIZES) {
+               if ((error = copyin(uap->message, &sz_arg, sizeof (sz_arg))) != 0) {
+                       return (error);
+               }
+               
+               if ((sz_arg.kcs_group_size > KAUTH_CACHES_MAX_SIZE) ||
+                   (sz_arg.kcs_id_size > KAUTH_CACHES_MAX_SIZE)) {
+                       return (EINVAL);
+               }
+               
+               KAUTH_IDENTITY_LOCK();
+               kauth_identity_cachemax = sz_arg.kcs_id_size;
+               kauth_identity_trimcache(kauth_identity_cachemax);
+               KAUTH_IDENTITY_UNLOCK();
+               
+               KAUTH_GROUPS_LOCK();
+               kauth_groups_cachemax = sz_arg.kcs_group_size;
+               kauth_groups_trimcache(kauth_groups_cachemax);
+               KAUTH_GROUPS_UNLOCK();
+               
+               return (0);
+       } else if (opcode == KAUTH_CLEAR_CACHES) {
+               KAUTH_IDENTITY_LOCK();
+               kauth_identity_trimcache(0);
+               KAUTH_IDENTITY_UNLOCK();
+               
+               KAUTH_GROUPS_LOCK();
+               kauth_groups_trimcache(0);
+               KAUTH_GROUPS_UNLOCK();
+       } else if (opcode == KAUTH_EXTLOOKUP_DEREGISTER) {
                /*
                 * Terminate outstanding requests; without an authoritative
                 * resolver, we are now back on our own authority.
@@ -638,7 +727,7 @@ kauth_resolver_getwork_continue(int result)
  *             EFAULT                          Bad user space message address
  *
  * Notes:      This common function exists to permit the use of continuations
- *             in the identity resoultion process.  This frees up the stack
+ *             in the identity resolution process.  This frees up the stack
  *             while we are waiting for the user space resolver to complete
  *             a request.  This is specifically used so that our per thread
  *             cost can be small, and we will therefore be willing to run a
@@ -708,7 +797,6 @@ kauth_resolver_getwork2(user_addr_t message)
        TAILQ_REMOVE(&kauth_resolver_unsubmitted, workp, kr_link);
        workp->kr_flags &= ~KAUTH_REQUEST_UNSUBMITTED;
        workp->kr_flags |= KAUTH_REQUEST_SUBMITTED;
-       workp->kr_subtime = mach_absolute_time();
        TAILQ_INSERT_TAIL(&kauth_resolver_submitted, workp, kr_link);
 
 out:
@@ -734,7 +822,7 @@ out:
  *             identity resolution daemon makes a request for work.  This
  *             permits a large number of threads to be used by the daemon,
  *             without using a lot of wired kernel memory when there are no
- *             acctual request outstanding.
+ *             actual request outstanding.
  */
 static int
 kauth_resolver_getwork(user_addr_t message)
@@ -925,60 +1013,21 @@ kauth_resolver_complete(user_addr_t message)
        
        return(error);
 }
+#endif /* CONFIG_EXT_RESOLVER */
 
 
 /*
  * Identity cache.
  */
 
-struct kauth_identity {
-       TAILQ_ENTRY(kauth_identity) ki_link;
-       int     ki_valid;
 #define        KI_VALID_UID    (1<<0)          /* UID and GID are mutually exclusive */
 #define KI_VALID_GID   (1<<1)
 #define KI_VALID_GUID  (1<<2)
 #define KI_VALID_NTSID (1<<3)
 #define KI_VALID_PWNAM (1<<4)  /* Used for translation */
 #define KI_VALID_GRNAM (1<<5)  /* Used for translation */
-       uid_t   ki_uid;
-       gid_t   ki_gid;
-       guid_t  ki_guid;
-       ntsid_t ki_ntsid;
-       const char      *ki_name;       /* string name from string cache */
-       /*
-        * Expiry times are the earliest time at which we will disregard the
-        * cached state and go to userland.  Before then if the valid bit is
-        * set, we will return the cached value.  If it's not set, we will
-        * not go to userland to resolve, just assume that there is no answer
-        * available.
-        */
-       time_t  ki_guid_expiry;
-       time_t  ki_ntsid_expiry;
-};
-
-static TAILQ_HEAD(kauth_identity_head, kauth_identity) kauth_identities;
-#define KAUTH_IDENTITY_CACHEMAX                100     /* XXX sizing? */
-static int kauth_identity_count;
-
-static lck_mtx_t *kauth_identity_mtx;
-#define KAUTH_IDENTITY_LOCK()  lck_mtx_lock(kauth_identity_mtx);
-#define KAUTH_IDENTITY_UNLOCK()        lck_mtx_unlock(kauth_identity_mtx);
-
-
-static struct kauth_identity *kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry,
-    ntsid_t *ntsidp, time_t ntsid_expiry, const char *name, int nametype);
-static void    kauth_identity_register_and_free(struct kauth_identity *kip);
-static void    kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *kip, uint64_t extend_data);
-static void    kauth_identity_lru(struct kauth_identity *kip);
-static int     kauth_identity_guid_expired(struct kauth_identity *kip);
-static int     kauth_identity_ntsid_expired(struct kauth_identity *kip);
-static int     kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir, char *getname);
-static int     kauth_identity_find_gid(gid_t gid, struct kauth_identity *kir, char *getname);
-static int     kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir, char *getname);
-static int     kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir, char *getname);
-static int     kauth_identity_find_nam(char *name, int valid, struct kauth_identity *kir);
-
 
+#if CONFIG_EXT_RESOLVER
 /*
  * kauth_identity_init
  *
@@ -988,7 +1037,7 @@ static int kauth_identity_find_nam(char *name, int valid, struct kauth_identity
  *
  * Returns:    (void)
  *
- * Notes:      Intialize the credential identity resolver for use; the
+ * Notes:      Initialize the credential identity resolver for use; the
  *             credential identity resolver is the KPI used to communicate
  *             with a user space credential identity resolver daemon.
  *
@@ -1013,7 +1062,7 @@ kauth_identity_init(void)
  *
  * Returns:    NULL                            Insufficient memory to satisfy
  *                                             the request
- *             !NULL                           A pointer to the applocated
+ *             !NULL                           A pointer to the allocated
  *                                             structure, filled in
  *
  * Notes:      It is illegal to translate between UID and GID; any given UUID
@@ -1125,7 +1174,7 @@ kauth_identity_register_and_free(struct kauth_identity *kip)
                 * if it pushes us over our limit, discard the oldest one.
                 */
                TAILQ_INSERT_HEAD(&kauth_identities, kip, ki_link);
-               if (++kauth_identity_count > KAUTH_IDENTITY_CACHEMAX) {
+               if (++kauth_identity_count > kauth_identity_cachemax) {
                        ip = TAILQ_LAST(&kauth_identities, kauth_identity_head);
                        TAILQ_REMOVE(&kauth_identities, ip, ki_link);
                        kauth_identity_count--;
@@ -1203,12 +1252,12 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
                                        kip->ki_guid = elp->el_uguid;
                                        kip->ki_valid |= KI_VALID_GUID;
                                }
-                               kip->ki_guid_expiry = tv.tv_sec + elp->el_uguid_valid;
+                               kip->ki_guid_expiry = (elp->el_uguid_valid) ? tv.tv_sec + elp->el_uguid_valid : 0;
                                if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_USID) {
                                        kip->ki_ntsid = elp->el_usid;
                                        kip->ki_valid |= KI_VALID_NTSID;
                                }
-                               kip->ki_ntsid_expiry = tv.tv_sec + elp->el_usid_valid;
+                               kip->ki_ntsid_expiry = (elp->el_usid_valid) ? tv.tv_sec + elp->el_usid_valid : 0;
                                if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_PWNAM) {
                                        const char *oname = kip->ki_name;
                                        kip->ki_name = speculative_name;
@@ -1234,9 +1283,9 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
                if (kip == NULL) {
                        kip = kauth_identity_alloc(elp->el_uid, KAUTH_GID_NONE,
                            (elp->el_flags & KAUTH_EXTLOOKUP_VALID_UGUID) ? &elp->el_uguid : NULL,
-                           tv.tv_sec + elp->el_uguid_valid,
+                           (elp->el_uguid_valid) ? tv.tv_sec + elp->el_uguid_valid : 0,
                            (elp->el_flags & KAUTH_EXTLOOKUP_VALID_USID) ? &elp->el_usid : NULL,
-                           tv.tv_sec + elp->el_usid_valid,
+                           (elp->el_usid_valid) ? tv.tv_sec + elp->el_usid_valid : 0,
                            (elp->el_flags & KAUTH_EXTLOOKUP_VALID_PWNAM) ? speculative_name : NULL,
                            KI_VALID_PWNAM);
                        if (kip != NULL) {
@@ -1260,12 +1309,12 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
                                        kip->ki_guid = elp->el_gguid;
                                        kip->ki_valid |= KI_VALID_GUID;
                                }
-                               kip->ki_guid_expiry = tv.tv_sec + elp->el_gguid_valid;
+                               kip->ki_guid_expiry = (elp->el_gguid_valid) ? tv.tv_sec + elp->el_gguid_valid : 0;
                                if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GSID) {
                                        kip->ki_ntsid = elp->el_gsid;
                                        kip->ki_valid |= KI_VALID_NTSID;
                                }
-                               kip->ki_ntsid_expiry = tv.tv_sec + elp->el_gsid_valid;
+                               kip->ki_ntsid_expiry = (elp->el_gsid_valid) ? tv.tv_sec + elp->el_gsid_valid : 0;
                                if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GRNAM) {
                                        const char *oname = kip->ki_name;
                                        kip->ki_name = speculative_name;
@@ -1291,9 +1340,9 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
                if (kip == NULL) {
                        kip = kauth_identity_alloc(KAUTH_UID_NONE, elp->el_gid,
                            (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GGUID) ? &elp->el_gguid : NULL,
-                           tv.tv_sec + elp->el_gguid_valid,
+                           (elp->el_gguid_valid) ? tv.tv_sec + elp->el_gguid_valid : 0,
                            (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GSID) ? &elp->el_gsid : NULL,
-                           tv.tv_sec + elp->el_gsid_valid,
+                           (elp->el_gsid_valid) ? tv.tv_sec + elp->el_gsid_valid : 0,
                            (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GRNAM) ? speculative_name : NULL,
                            KI_VALID_GRNAM);
                        if (kip != NULL) {
@@ -1314,6 +1363,25 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
 }
 
 
+/*
+ * Trim older entries from the identity cache.
+ *
+ * Must be called with the identity cache lock held.
+ */
+static void
+kauth_identity_trimcache(int newsize) {
+       struct kauth_identity           *kip;
+       
+       lck_mtx_assert(kauth_identity_mtx, LCK_MTX_ASSERT_OWNED);
+               
+       while (kauth_identity_count > newsize) {
+               kip = TAILQ_LAST(&kauth_identities, kauth_identity_head);
+               TAILQ_REMOVE(&kauth_identities, kip, ki_link);
+               kauth_identity_count--;
+               FREE(kip, M_KAUTH);
+       }
+}
+
 /*
  * kauth_identity_lru
  *
@@ -1357,8 +1425,15 @@ kauth_identity_guid_expired(struct kauth_identity *kip)
 {
        struct timeval tv;
 
+       /*
+        * Expiration time of 0 means this entry is persistent.
+        */
+       if (kip->ki_guid_expiry == 0)
+               return (0);
+
        microuptime(&tv);
        KAUTH_DEBUG("CACHE - GUID expires @ %d now %d", kip->ki_guid_expiry, tv.tv_sec);
+
        return((kip->ki_guid_expiry <= tv.tv_sec) ? 1 : 0);
 }
 
@@ -1379,8 +1454,15 @@ kauth_identity_ntsid_expired(struct kauth_identity *kip)
 {
        struct timeval tv;
 
+       /*
+        * Expiration time of 0 means this entry is persistent.
+        */
+       if (kip->ki_ntsid_expiry == 0)
+               return (0);
+
        microuptime(&tv);
        KAUTH_DEBUG("CACHE - NTSID expires @ %d now %d", kip->ki_ntsid_expiry, tv.tv_sec);
+
        return((kip->ki_ntsid_expiry <= tv.tv_sec) ? 1 : 0);
 }
 
@@ -1505,7 +1587,7 @@ kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir, char *getnam
  *
  * Parameters: name                            Pointer to name to find
  *             valid                           KI_VALID_PWNAM or KI_VALID_GRNAM
- *             kir                             Pointer to return aread
+ *             kir                             Pointer to return area
  *
  * Returns:    0                               Found
  *             ENOENT                          Not found
@@ -1570,6 +1652,7 @@ kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir, char *getn
        KAUTH_IDENTITY_UNLOCK();
        return((kip == NULL) ? ENOENT : 0);
 }
+#endif /* CONFIG_EXT_RESOLVER */
 
 
 /*
@@ -1586,7 +1669,7 @@ guid_t kauth_null_guid;
  * Parameters: guid1                           Pointer to first GUID
  *             guid2                           Pointer to second GUID
  *
- * Returns:    0                               If GUIDs are inequal
+ * Returns:    0                               If GUIDs are unequal
  *             !0                              If GUIDs are equal
  */
 int
@@ -1603,7 +1686,7 @@ kauth_guid_equal(guid_t *guid1, guid_t *guid2)
  *
  * Parameters: guid                            Pointer to GUID to check
  *
- * Returns:    KAUTH_WKG_NOT                   Not a wel known GUID
+ * Returns:    KAUTH_WKG_NOT                   Not a well known GUID
  *             KAUTH_WKG_EVERYBODY             "Everybody"
  *             KAUTH_WKG_NOBODY                "Nobody"
  *             KAUTH_WKG_OWNER                 "Other"
@@ -1642,10 +1725,10 @@ kauth_wellknown_guid(guid_t *guid)
  *
  * Description:        Determine the equality of two NTSIDs (NT Security Identifiers) 
  *
- * Paramters:  sid1                            Pointer to first NTSID
+ * Parameters: sid1                            Pointer to first NTSID
  *             sid2                            Pointer to second NTSID
  *
- * Returns:    0                               If GUIDs are inequal
+ * Returns:    0                               If GUIDs are unequal
  *             !0                              If GUIDs are equal
  */
 int
@@ -1673,7 +1756,6 @@ kauth_ntsid_equal(ntsid_t *sid1, ntsid_t *sid2)
  * be done using it.
  */
 
-static int     kauth_cred_cache_lookup(int from, int to, void *src, void *dst);
 
 
 /*
@@ -1860,6 +1942,21 @@ kauth_cred_getsvgid(kauth_cred_t cred)
 }
 
 
+static int     kauth_cred_cache_lookup(int from, int to, void *src, void *dst);
+
+#if CONFIG_EXT_RESOLVER == 0
+/*
+ * If there's no resolver, short-circuit the kauth_cred_x2y() lookups.
+ */
+static __inline int
+kauth_cred_cache_lookup(__unused int from, __unused int to,
+       __unused void *src, __unused void *dst)
+{
+       return (EWOULDBLOCK);
+
+}
+#endif
+
 /*
  * kauth_cred_guid2pwnam
  *
@@ -2225,6 +2322,7 @@ kauth_cred_guid2ntsid(guid_t *guidp, ntsid_t *sidp)
  * Returns:    0                               Success
  *             EINVAL                          Unknown source identity type
  */
+#if CONFIG_EXT_RESOLVER
 static int
 kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
 {
@@ -2325,6 +2423,7 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
                        /* do we have a translation? */
                        if (ki.ki_valid & to) {
                                KAUTH_DEBUG("CACHE - found matching entry with valid 0x%08x", ki.ki_valid);
+                               DTRACE_PROC4(kauth__identity__cache__hit, int, from, int, to, void *, src, void *, dst);
                                goto found;
                        } else {
                                /*
@@ -2425,7 +2524,13 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
 
        /* Call resolver */
        KAUTH_DEBUG("CACHE - calling resolver for %x", el.el_flags);
+
+       DTRACE_PROC3(kauth__id__resolver__submitted, int, from, int, to, uintptr_t, src);
+       
        error = kauth_resolver_submit(&el, extend_data);
+
+       DTRACE_PROC2(kauth__id__resolver__returned, int, error, struct kauth_identity_extlookup *, &el)
+       
        KAUTH_DEBUG("CACHE - resolver returned %d", error);
 
        /* was the external lookup successful? */
@@ -2490,28 +2595,6 @@ found:
  * XXX the linked-list implementation here needs to be optimized.
  */
 
-struct kauth_group_membership {
-       TAILQ_ENTRY(kauth_group_membership) gm_link;
-       uid_t   gm_uid;         /* the identity whose membership we're recording */
-       gid_t   gm_gid;         /* group of which they are a member */
-       time_t  gm_expiry;      /* TTL for the membership */
-       int     gm_flags;
-#define KAUTH_GROUP_ISMEMBER   (1<<0)
-};
-
-TAILQ_HEAD(kauth_groups_head, kauth_group_membership) kauth_groups;
-#define KAUTH_GROUPS_CACHEMAX          100     /* XXX sizing? */
-static int kauth_groups_count;
-
-static lck_mtx_t *kauth_groups_mtx;
-#define KAUTH_GROUPS_LOCK()    lck_mtx_lock(kauth_groups_mtx);
-#define KAUTH_GROUPS_UNLOCK()  lck_mtx_unlock(kauth_groups_mtx);
-
-static int     kauth_groups_expired(struct kauth_group_membership *gm);
-static void    kauth_groups_lru(struct kauth_group_membership *gm);
-static void    kauth_groups_updatecache(struct kauth_identity_extlookup *el);
-
-
 /*
  * kauth_groups_init
  *
@@ -2521,7 +2604,7 @@ static void       kauth_groups_updatecache(struct kauth_identity_extlookup *el);
  *
  * Returns:    (void)
  *
- * Notes:      Intialize the groups cache for use; the group cache is used
+ * Notes:      Initialize the groups cache for use; the group cache is used
  *             to avoid unnecessary calls out to user space.
  *
  *             This function is called from kauth_init() in the file
@@ -2551,7 +2634,14 @@ kauth_groups_expired(struct kauth_group_membership *gm)
 {
        struct timeval tv;
 
+       /*
+        * Expiration time of 0 means this entry is persistent.
+        */
+       if (gm->gm_expiry == 0)
+               return (0);
+               
        microuptime(&tv);
+       
        return((gm->gm_expiry <= tv.tv_sec) ? 1 : 0);
 }
 
@@ -2623,7 +2713,7 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el)
                        } else {
                                gm->gm_flags &= ~KAUTH_GROUP_ISMEMBER;
                        }
-                       gm->gm_expiry = el->el_member_valid + tv.tv_sec;
+                       gm->gm_expiry = (el->el_member_valid) ? el->el_member_valid + tv.tv_sec : 0;
                        kauth_groups_lru(gm);
                        break;
                }
@@ -2644,7 +2734,7 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el)
                } else {
                        gm->gm_flags &= ~KAUTH_GROUP_ISMEMBER;
                }
-               gm->gm_expiry = el->el_member_valid + tv.tv_sec;
+               gm->gm_expiry = (el->el_member_valid) ? el->el_member_valid + tv.tv_sec : 0;
        }               
 
        /*
@@ -2655,7 +2745,7 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el)
         */
        KAUTH_GROUPS_LOCK();
        TAILQ_INSERT_HEAD(&kauth_groups, gm, gm_link);
-       if (kauth_groups_count++ > KAUTH_GROUPS_CACHEMAX) {
+       if (++kauth_groups_count > kauth_groups_cachemax) {
                gm = TAILQ_LAST(&kauth_groups, kauth_groups_head);
                TAILQ_REMOVE(&kauth_groups, gm, gm_link);
                kauth_groups_count--;
@@ -2669,6 +2759,25 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el)
                FREE(gm, M_KAUTH);
 }
 
+/*
+ * Trim older entries from the group membership cache.
+ *
+ * Must be called with the group cache lock held.
+ */
+static void
+kauth_groups_trimcache(int new_size) {
+        struct kauth_group_membership *gm;
+
+       lck_mtx_assert(kauth_groups_mtx, LCK_MTX_ASSERT_OWNED);
+        
+       while (kauth_groups_count > new_size) {
+               gm = TAILQ_LAST(&kauth_groups, kauth_groups_head);
+               TAILQ_REMOVE(&kauth_groups, gm, gm_link);
+               kauth_groups_count--;
+               FREE(gm, M_KAUTH);
+       }
+}
+#endif /* CONFIG_EXT_RESOLVER */
 
 /*
  * Group membership KPI
@@ -2687,7 +2796,7 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el)
  *                                             result of the call
  *
  * Returns:    0                               Success
- *             ENOENT                          Could not proform lookup
+ *             ENOENT                          Could not perform lookup
  *     kauth_resolver_submit:EWOULDBLOCK
  *     kauth_resolver_submit:EINTR
  *     kauth_resolver_submit:ENOMEM
@@ -2702,16 +2811,14 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el)
  * Notes:      This function guarantees not to modify resultp when returning
  *             an error.
  *
- *             This function effectively checkes the EGID as well, since the
+ *             This function effectively checks the EGID as well, since the
  *             EGID is cr_groups[0] as an implementation detail.
  */
 int
 kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
 {
        posix_cred_t pcred = posix_cred_get(cred);
-       struct kauth_group_membership *gm;
-       struct kauth_identity_extlookup el;
-       int i, error;
+       int i;
 
        /*
         * Check the per-credential list of override groups.
@@ -2735,7 +2842,11 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
                return(0);
        }
                
-       
+#if CONFIG_EXT_RESOLVER
+       struct kauth_group_membership *gm;
+       struct kauth_identity_extlookup el;
+       int error;
+
        /*
         * If the resolver hasn't checked in yet, we are early in the boot
         * phase and the local group list is complete and authoritative.
@@ -2744,7 +2855,7 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
                *resultp = 0;
                return(0);
        }
-       
+
        /* TODO: */
        /* XXX check supplementary groups */
        /* XXX check whiteout groups */
@@ -2767,9 +2878,11 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
        KAUTH_GROUPS_UNLOCK();
 
        /* if we did, we can return now */
-       if (gm != NULL)
+       if (gm != NULL) {
+               DTRACE_PROC2(kauth__group__cache__hit, int, pcred->cr_gmuid, int, gid);
                return(0);
-       
+       }
+
        /* nothing in the cache, need to go to userland */
        bzero(&el, sizeof(el));
        el.el_info_pid = current_proc()->p_pid;
@@ -2777,7 +2890,13 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
        el.el_uid = pcred->cr_gmuid;
        el.el_gid = gid;
        el.el_member_valid = 0;         /* XXX set by resolver? */
+
+       DTRACE_PROC2(kauth__group__resolver__submitted, int, el.el_uid, int, el.el_gid);
+       
        error = kauth_resolver_submit(&el, 0ULL);
+       
+       DTRACE_PROC2(kauth__group__resolver__returned, int, error, int, el.el_flags);
+       
        if (error != 0)
                return(error);
        /* save the results from the lookup */
@@ -2790,9 +2909,12 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
        }
 
        return(ENOENT);
+#else
+       *resultp = 0;
+       return(0);
+#endif
 }
 
-
 /*
  * kauth_cred_ismember_guid
  *
@@ -2820,15 +2942,11 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
  *                                     0       Is not member
  */
 int
-kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp)
+kauth_cred_ismember_guid(__unused kauth_cred_t cred, guid_t *guidp, int *resultp)
 {
-       struct kauth_identity ki;
-       gid_t gid;
-       int error, wkg;
+       int error = 0;
 
-       error = 0;
-       wkg = kauth_wellknown_guid(guidp);
-       switch(wkg) {
+       switch (kauth_wellknown_guid(guidp)) {
        case KAUTH_WKG_NOBODY:
                *resultp = 0;
                break;
@@ -2836,6 +2954,10 @@ kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp)
                *resultp = 1;
                break;
        default:
+#if CONFIG_EXT_RESOLVER
+       {
+               struct kauth_identity ki;
+               gid_t gid;
 #if 6603280
                /*
                 * Grovel the identity cache looking for this GUID.
@@ -2884,6 +3006,11 @@ kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp)
                        error = kauth_cred_ismember_gid(cred, gid, resultp);
                }
        }
+#else  /* CONFIG_EXT_RESOLVER */
+               error = ENOENT;
+#endif /* CONFIG_EXT_RESOLVER */
+               break;
+       }
        return(error);
 }
 
@@ -3670,7 +3797,7 @@ kauth_cred_setresgid(kauth_cred_t cred, gid_t rgid, gid_t egid, gid_t svgid)
  * Parameters: cred                            The original credential
  *             groups                          Pointer to gid_t array which
  *                                             contains the new group list
- *             groupcount                      The cound of valid groups which
+ *             groupcount                      The count of valid groups which
  *                                             are contained in 'groups'
  *             gmuid                           KAUTH_UID_NONE -or- the new
  *                                             group membership UID
@@ -3693,7 +3820,7 @@ kauth_cred_setresgid(kauth_cred_t cred, gid_t rgid, gid_t egid, gid_t svgid)
  *             that is returned to them, if it is not intended to be a
  *             persistent reference.
  *
- * XXX:                Changes are determined in ordinal order - if the caller pasess
+ * XXX:                Changes are determined in ordinal order - if the caller passes
  *             in the same groups list that is already present in the
  *             credential, but the members are in a different order, even if
  *             the EGID is not modified (i.e. cr_groups[0] is the same), it
@@ -3753,7 +3880,7 @@ kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmu
  * XXX temporary, for NFS support until we can come up with a better
  * XXX enumeration/comparison mechanism
  *
- * Notes:      The return value exists to account for the possbility of a
+ * Notes:      The return value exists to account for the possibility of a
  *             kauth_cred_t without a POSIX label.  This will be the case in
  *             the future (see posix_cred_get() below, for more details).
  */
@@ -4326,12 +4453,12 @@ kauth_cred_ref(kauth_cred_t cred)
  *             scoped to this compilation unit.
  *
  *             This function destroys the contents of the pointer passed by
- *             the caller to prevent the caller accidently attempting to
+ *             the caller to prevent the caller accidentally attempting to
  *             release a given reference twice in error.
  *
  *             The last reference is considered to be released when a release
  *             of a credential of a reference count of 2 occurs; this is an
- *             intended effect, to take into accout the reference held by
+ *             intended effect, to take into account the reference held by
  *             the credential hash, which is released at the same time.
  */
 static void
@@ -4447,11 +4574,11 @@ kauth_cred_rele(kauth_cred_t cred)
  *             referencing them, prior to making them visible in an externally
  *             visible pointer (e.g. by adding them to the credential hash
  *             cache) is the only legal time in which an existing credential
- *             can be safely iinitialized or modified directly.
+ *             can be safely initialized or modified directly.
  *
  *             After initialization, the caller is expected to call the
  *             function kauth_cred_add() to add the credential to the hash
- *             cache, after which time it's frozen and becomes publically
+ *             cache, after which time it's frozen and becomes publicly
  *             visible.
  *
  *             The release protocol depends on kauth_hash_add() being called
@@ -4502,7 +4629,7 @@ kauth_cred_dup(kauth_cred_t cred)
  *             result, the caller is responsible for dropping BOTH the
  *             additional reference on the passed cred (if any), and the
  *             credential returned by this function.  The drop should be
- *             via the satnadr kauth_cred_unref() KPI.
+ *             via the kauth_cred_unref() KPI.
  */
 kauth_cred_t
 kauth_cred_copy_real(kauth_cred_t cred)
@@ -4787,7 +4914,7 @@ kauth_cred_remove(kauth_cred_t cred)
  *                                             hash cache
  *
  * Returns:    NULL                            Not found
- *             !NULL                           Matching cedential already in
+ *             !NULL                           Matching credential already in
  *                                             cred hash cache
  *
  * Locks:      Caller is expected to hold KAUTH_CRED_HASH_LOCK
@@ -4822,21 +4949,15 @@ kauth_cred_find(kauth_cred_t cred)
                 * don't worry about the label unless the flags in
                 * either credential tell us to.
                 */
-               if ((found_pcred->cr_flags & CRF_MAC_ENFORCE) != 0 ||
-                   (pcred->cr_flags & CRF_MAC_ENFORCE) != 0) {
-                       /* include the label pointer in the compare */
-                       match = (bcmp(&found_pcred->cr_uid, &pcred->cr_uid,
-                                (sizeof(struct ucred) -
-                                 offsetof(struct ucred, cr_posix))) == 0);
-               } else {
-                       /* flags have to match, but skip the label in bcmp */
-                       match = (found_pcred->cr_flags == pcred->cr_flags &&
-                                bcmp(&found_pcred->cr_uid, &pcred->cr_uid,
-                                     sizeof(struct posix_cred)) == 0 &&
-                                bcmp(&found_cred->cr_audit, &cred->cr_audit,
-                                     sizeof(cred->cr_audit)) == 0);
-
+               match = (bcmp(found_pcred, pcred, sizeof (*pcred)) == 0) ? TRUE : FALSE;
+               match = match && ((bcmp(&found_cred->cr_audit, &cred->cr_audit,
+                       sizeof(cred->cr_audit)) == 0) ? TRUE : FALSE);
+               if (((found_pcred->cr_flags & CRF_MAC_ENFORCE) != 0) ||
+                   ((pcred->cr_flags & CRF_MAC_ENFORCE) != 0)) {
+                       match = match && mac_cred_label_compare(found_cred->cr_label,
+                               cred->cr_label);
                }
+
                if (match) {
                        /* found a match */
                        return(found_cred);
@@ -4901,17 +5022,16 @@ kauth_cred_get_hashkey(kauth_cred_t cred)
        posix_cred_t pcred = posix_cred_get(cred);
        u_long  hash_key = 0;
 
+       hash_key = kauth_cred_hash((uint8_t *)&cred->cr_posix, 
+                                                          sizeof (struct posix_cred),
+                                                          hash_key);
+       hash_key = kauth_cred_hash((uint8_t *)&cred->cr_audit, 
+                                                          sizeof(struct au_session),
+                                                          hash_key);
+
        if (pcred->cr_flags & CRF_MAC_ENFORCE) {
-               hash_key = kauth_cred_hash((uint8_t *)&cred->cr_posix, 
-                                                                  sizeof(struct ucred) - offsetof(struct ucred, cr_posix),
-                                                                  hash_key);
-       } else {
-               /* skip label */
-               hash_key = kauth_cred_hash((uint8_t *)&cred->cr_posix, 
-                                                                  sizeof(struct posix_cred),
-                                                                  hash_key);
-               hash_key = kauth_cred_hash((uint8_t *)&cred->cr_audit, 
-                                                                  sizeof(struct au_session),
+               hash_key = kauth_cred_hash((uint8_t *)cred->cr_label, 
+                                                                  sizeof (struct label),
                                                                   hash_key);
        }
        return(hash_key);
@@ -5286,9 +5406,9 @@ sysctl_dump_cred_backtraces( __unused struct sysctl_oid *oidp, __unused void *ar
  *                                     attach a label to the new credential
  *
  * Notes:      This function currently wraps kauth_cred_create(), and is the
- *             only consume of tht ill-fated function, apart from bsd_init().
+ *             only consumer of that ill-fated function, apart from bsd_init().
  *             It exists solely to support the NFS server code creation of
- *             credentials based on the over-the-wire RPC cals containing
+ *             credentials based on the over-the-wire RPC calls containing
  *             traditional POSIX credential information being tunneled to
  *             the server host from the client machine.
  *
@@ -5296,7 +5416,7 @@ sysctl_dump_cred_backtraces( __unused struct sysctl_oid *oidp, __unused void *ar
  *
  *             In the short term, it creates a temporary credential, puts
  *             the POSIX information from NFS into it, and then calls
- *             kauth_cred_create(), as an internal implementaiton detail.
+ *             kauth_cred_create(), as an internal implementation detail.
  *
  *             If we have to keep it around in the medium term, it will
  *             create a new kauth_cred_t, then label it with a POSIX label
@@ -5332,7 +5452,7 @@ posix_cred_create(posix_cred_t pcred)
  *             this function will return a pointer to a posix_cred_t which
  *             GRANTS all access (effectively, a "root" credential).  This is
  *             necessary to support legacy code which insists on tightly
- *             integrating POSIX credentails into its APIs, including, but
+ *             integrating POSIX credentials into its APIs, including, but
  *             not limited to, System V IPC mechanisms, POSIX IPC mechanisms,
  *             NFSv3, signals, dtrace, and a large number of kauth routines
  *             used to implement POSIX permissions related system calls.
@@ -5369,13 +5489,13 @@ posix_cred_get(kauth_cred_t cred)
  * Returns:    (void)
  *
  * Notes:      This function is currently void in order to permit it to fit
- *             in with the currrent MACF framework label methods which allow
- *             labelling to fail silently.  This is like acceptable for
+ *             in with the current MACF framework label methods which allow
+ *             labeling to fail silently.  This is like acceptable for
  *             mandatory access controls, but not for POSIX, since those
  *             access controls are advisory.  We will need to consider a
  *             return value in a future version of the MACF API.
  *
- *             This operation currenty can not fail, as currently the POSIX
+ *             This operation currently cannot fail, as currently the POSIX
  *             credential is a subfield of the kauth_cred_t (ucred), which
  *             MUST be valid.  In the future, this will not be the case.
  */
index 3283ee3c0ebe919532d45fd9be6a820b6dbc6099..5913c5456baa24da2f25c55b347ad4558ebfe63c 100644 (file)
@@ -173,7 +173,6 @@ extern struct wait_queue select_conflict_queue;
 /*
  * Descriptor management.
  */
-struct filelist filehead;      /* head of list of open files */
 struct fmsglist fmsghead;      /* head of list of open files */
 struct fmsglist fmsg_ithead;   /* head of list of open files */
 int nfiles;                    /* actual number of open files */
@@ -184,7 +183,6 @@ lck_grp_t * file_lck_grp;
 lck_attr_t * file_lck_attr;
 
 lck_mtx_t * uipc_lock;
-lck_mtx_t * file_flist_lock;
 
 
 /*
@@ -210,7 +208,6 @@ file_lock_init(void)
        file_lck_attr = lck_attr_alloc_init();
 
        uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
-       file_flist_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
 }
 
 
@@ -866,7 +863,9 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        goto outdrop;
                }
 
-               if ((fl.l_whence == SEEK_CUR) && (fl.l_start + offset < fl.l_start)) {
+               volatile off_t affected_lock_area_set = 0;
+               affected_lock_area_set = fl.l_start + offset;
+               if ((fl.l_whence == SEEK_CUR) && (affected_lock_area_set < fl.l_start)) {
                    error = EOVERFLOW;
                    goto outdrop;
                }
@@ -941,11 +940,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                if (error)
                        goto outdrop;
 
+               volatile off_t affected_lock_area_end = 0;
+               affected_lock_area_end = fl.l_start + offset;
                /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
                /* and ending byte for EOVERFLOW in SEEK_SET */
                if (((fl.l_whence == SEEK_CUR) && 
-                    ((fl.l_start + offset < fl.l_start) ||
-                     ((fl.l_len > 0) && (fl.l_start+offset + fl.l_len - 1 < fl.l_start+offset)))) ||
+                    ((affected_lock_area_end < fl.l_start) ||
+                     ((fl.l_len > 0) && (affected_lock_area_end + fl.l_len - 1 < affected_lock_area_end)))) ||
                    ((fl.l_whence == SEEK_SET) && (fl.l_len > 0) && (fl.l_start + fl.l_len - 1 < fl.l_start)))
                {
                        /* lf_advlock doesn't check start/end for F_GETLK if file has no locks */
@@ -1161,6 +1162,18 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 
                goto out;
 
+       case F_SINGLE_WRITER:
+               if (fp->f_type != DTYPE_VNODE) {
+                       error = EBADF;
+                       goto out;
+               }
+               if (uap->arg)
+                       fp->f_fglob->fg_flag |= FSINGLE_WRITER;
+               else
+                       fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
+
+               goto out;
+
        case F_GLOBAL_NOCACHE:
                if (fp->f_type != DTYPE_VNODE) {
                        error = EBADF;
@@ -1239,58 +1252,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                 }
                 goto outdrop;
 
-
-       case F_READBOOTSTRAP:
-       case F_WRITEBOOTSTRAP: {
-               user32_fbootstraptransfer_t user32_fbt_struct;
-               user_fbootstraptransfer_t user_fbt_struct;
-               int     sizeof_struct;
-               caddr_t boot_structp;
-
-               if (fp->f_type != DTYPE_VNODE) {
-                       error = EBADF;
-                       goto out;
-               }
-               vp = (struct vnode *)fp->f_data;
-               proc_fdunlock(p);
-
-               if (IS_64BIT_PROCESS(p)) {
-                       sizeof_struct = sizeof(user_fbt_struct);
-                       boot_structp = (caddr_t) &user_fbt_struct;
-               }
-               else {
-                       sizeof_struct = sizeof(user32_fbt_struct);
-                       boot_structp = (caddr_t) &user32_fbt_struct;
-               }
-               error = copyin(argp, boot_structp, sizeof_struct);
-               if (error)
-                       goto outdrop;
-               if ( (error = vnode_getwithref(vp)) ) {
-                       goto outdrop;
-               }
-               if (uap->cmd == F_WRITEBOOTSTRAP) {
-                       /*
-                        * Make sure that we are root.  Updating the
-                        * bootstrap on a disk could be a security hole
-                        */
-                       if (!is_suser()) {
-                               (void)vnode_put(vp);
-                               error = EACCES;
-                               goto outdrop;
-                       }
-               }
-               if (strncmp(vnode_mount(vp)->mnt_vfsstat.f_fstypename, "hfs",
-                       sizeof(vnode_mount(vp)->mnt_vfsstat.f_fstypename)) != 0) {
-                       error = EINVAL;
-               } else {
-                       /*
-                        * call vnop_ioctl to handle the I/O
-                        */
-                       error = VNOP_IOCTL(vp, uap->cmd, boot_structp, 0, &context);
-               }
-               (void)vnode_put(vp);
-               goto outdrop;
-       }
        case F_LOG2PHYS:
        case F_LOG2PHYS_EXT: {
                struct log2phys l2p_struct;    /* structure for allocate command */
@@ -1577,7 +1538,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        goto outdrop;
                }
                                   
-#define CS_MAX_BLOB_SIZE (1ULL * 1024 * 1024) /* XXX ? */
+#define CS_MAX_BLOB_SIZE (1280ULL * 1024) /* max shared cache file XXX ? */
                if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
                        error = E2BIG;
                        vnode_put(vp);
@@ -1692,7 +1653,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                break;
        }
                        
-#ifdef CONFIG_PROTECT
+#if CONFIG_PROTECT
        case F_GETPROTECTIONCLASS: {
                int class = 0;
                
@@ -1746,28 +1707,80 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                vnode_put(vp);
                break;
        }       
+
+       case F_TRANSCODEKEY: {
+
+               if (fp->f_type != DTYPE_VNODE) {
+                       error = EBADF;
+                       goto out;
+               }
+               
+               vp = (struct vnode *)fp->f_data;
+               proc_fdunlock(p);
+
+               if (vnode_getwithref(vp)) {
+                       error = ENOENT;
+                       goto outdrop;
+               }       
+               
+               error = cp_vnode_transcode (vp);
+               vnode_put(vp);
+               break;
+       }       
+
+       case F_GETPROTECTIONLEVEL:  {
+               uint32_t cp_version = 0;
+
+               if (fp->f_type != DTYPE_VNODE) {
+                       error = EBADF; 
+                       goto out;
+               }
+
+               vp = (struct vnode*) fp->f_data;
+               proc_fdunlock (p);
+
+               if (vnode_getwithref(vp)) {
+                       error = ENOENT;
+                       goto outdrop;
+               }
+
+               /*
+                * if cp_get_major_vers fails, error will be set to proper errno 
+                * and cp_version will still be 0.
+                */
+
+               error = cp_get_root_major_vers (vp, &cp_version);
+               *retval = cp_version;
+
+               vnode_put (vp);
+               break;
+       }
+       
 #endif /* CONFIG_PROTECT */
-                       
+
        case F_MOVEDATAEXTENTS: {
                struct fileproc *fp2 = NULL;
                struct vnode *src_vp = NULLVP;
                struct vnode *dst_vp = NULLVP;
                /* We need to grab the 2nd FD out of the argments before moving on. */
                int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
-
+               
                if (fp->f_type != DTYPE_VNODE) {
                        error = EBADF;
                        goto out;
                }
-               vp = src_vp = (struct vnode *)fp->f_data;
 
                /* For now, special case HFS+ only, since this is SPI. */
+               src_vp = (struct vnode *)fp->f_data;
                if (src_vp->v_tag != VT_HFS) {
                        error = EINVAL;
                        goto out;
                }
 
-               /* We're still holding the proc FD lock */
+               /*
+                * Get the references before we start acquiring iocounts on the vnodes, 
+                * while we still hold the proc fd lock
+                */
                if ( (error = fp_lookup(p, fd2, &fp2, 1)) ) {
                        error = EBADF;
                        goto out;
@@ -1778,8 +1791,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        goto out;
                }
                dst_vp = (struct vnode *)fp2->f_data;
-
-               /* For now, special case HFS+ only, since this is SPI. */
                if (dst_vp->v_tag != VT_HFS) {
                        fp_drop(p, fd2, fp2, 1);
                        error = EINVAL;
@@ -1799,8 +1810,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 
                proc_fdunlock(p);
 
-               /* Proc lock dropped; now we have a legit pair of FDs.  Go to work */
-
                if (vnode_getwithref(src_vp)) {
                        fp_drop(p, fd2, fp2, 0);
                        error = ENOENT;
@@ -1812,12 +1821,11 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        error = ENOENT;
                        goto outdrop;
                }       
-
+               
                /* 
                 * Basic asserts; validate they are not the same and that
                 * both live on the same filesystem.
                 */
-
                if (dst_vp == src_vp) {
                        vnode_put (src_vp);
                        vnode_put (dst_vp);
@@ -1825,7 +1833,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        error = EINVAL;
                        goto outdrop;
                }       
-       
+
                if (dst_vp->v_mount != src_vp->v_mount) {
                        vnode_put (src_vp);
                        vnode_put (dst_vp);
@@ -1834,31 +1842,33 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        goto outdrop;
                }
 
+               /* Now we have a legit pair of FDs.  Go to work */
+
                /* Now check for write access to the target files */
                if(vnode_authorize(src_vp, NULLVP, 
-                                       (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
+                                                  (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
                        vnode_put(src_vp);
                        vnode_put(dst_vp);
                        fp_drop(p, fd2, fp2, 0);
                        error = EBADF;
                        goto outdrop;
                }
-
+               
                if(vnode_authorize(dst_vp, NULLVP, 
-                                       (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
+                                                  (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
                        vnode_put(src_vp);
                        vnode_put(dst_vp);
                        fp_drop(p, fd2, fp2, 0);
                        error = EBADF;
                        goto outdrop;
                }
-
+                       
                /* Verify that both vps point to files and not directories */
-               if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
-                       vnode_put(src_vp);
-                       vnode_put(dst_vp);
-                       fp_drop(p, fd2, fp2, 0);
+               if ( !vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
                        error = EINVAL;
+                       vnode_put (src_vp);
+                       vnode_put (dst_vp);
+                       fp_drop (p, fd2, fp2, 0);
                        goto outdrop;
                }
 
@@ -1866,15 +1876,54 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
                 * We'll pass in our special bit indicating that the new behavior is expected
                 */
-
+               
                error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
-
+               
                vnode_put (src_vp);
                vnode_put (dst_vp);
                fp_drop(p, fd2, fp2, 0);
                break;
        }
+                       
+                       
+       /*
+        * SPI (private) for indicating to a filesystem that subsequent writes to
+        * the open FD will represent static content.
+        */
+       case F_SETSTATICCONTENT: {
+               caddr_t ioctl_arg = NULL;
 
+               if (uap->arg) {
+                       ioctl_arg = (caddr_t) 1;
+               }
+
+               if (fp->f_type != DTYPE_VNODE) {
+                       error = EBADF;
+                       goto out;
+               }
+               vp = (struct vnode *)fp->f_data;
+               proc_fdunlock(p);
+
+               error = vnode_getwithref(vp);
+               if (error) {
+                       error = ENOENT;
+                       goto outdrop;
+               }
+
+               /* Only go forward if you have write access */
+               vfs_context_t ctx = vfs_context_current();
+               if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
+                       vnode_put(vp);
+                       error = EBADF;
+                       goto outdrop;
+               }
+
+               error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
+               (void)vnode_put(vp);
+               
+               break;
+       }
+       
        /* 
         * Set the vnode pointed to by 'fd'
         * and tag it as the (potentially future) backing store
@@ -1885,12 +1934,12 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        error = EBADF;
                        goto out;
                }
-               vp = (struct vnode *)fp->f_data;
                
+               vp = (struct vnode *)fp->f_data;
+
                if (vp->v_tag != VT_HFS) {
                        error = EINVAL;
                        goto out;
-
                }
                proc_fdunlock(p);
 
@@ -1910,14 +1959,12 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                
                /* If arg != 0, set, otherwise unset */
                if (uap->arg) {
-                       error = hfs_set_backingstore (vp, 1);
+                       error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)1, 0, &context);
                }
                else {
-                       error = hfs_set_backingstore (vp, 0);
+                       error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)NULL, 0, &context);
                }
-               /* Success. explicitly set error to 0. */
-               error = 0;
-
+               
                vnode_put(vp);
                break;
        }
@@ -1949,7 +1996,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        /* Check for error from vn_getpath before moving on */
                        if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
                                if (vp->v_tag == VT_HFS) {
-                                       error = hfs_is_backingstore (vp, &backingstore);
+                                       error = VNOP_IOCTL (vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
                                }
                                (void)vnode_put(vp);
 
@@ -1973,7 +2020,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                goto outdrop;
        }
 
-
        default:
                /*
                 * This is an fcntl() that we d not recognize at this level;
@@ -3920,7 +3966,7 @@ int
 falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
              vfs_context_t ctx, int locked)
 {
-       struct fileproc *fp, *fq;
+       struct fileproc *fp;
        struct fileglob *fg;
        int error, nfd;
 
@@ -3988,16 +4034,7 @@ falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
        mac_file_label_associate(fp->f_cred, fg);
 #endif
 
-       lck_mtx_lock_spin(file_flist_lock);
-
-       nfiles++;
-
-       if ( (fq = p->p_fd->fd_ofiles[0]) ) {
-               LIST_INSERT_AFTER(fq->f_fglob, fg, f_list);
-       } else {
-               LIST_INSERT_HEAD(&filehead, fg, f_list);
-       }
-       lck_mtx_unlock(file_flist_lock);
+       OSAddAtomic(1, &nfiles);
 
        p->p_fd->fd_ofiles[nfd] = fp;
 
@@ -4028,10 +4065,7 @@ falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
 void
 fg_free(struct fileglob *fg)
 {
-       lck_mtx_lock_spin(file_flist_lock);
-       LIST_REMOVE(fg, f_list);
-       nfiles--;
-       lck_mtx_unlock(file_flist_lock);
+       OSAddAtomic(-1, &nfiles);
 
        if (IS_VALID_CRED(fg->fg_cred)) {
                kauth_cred_unref(&fg->fg_cred);
@@ -4089,7 +4123,7 @@ fdexec(proc_t p, short flags)
                struct fileproc *fp = fdp->fd_ofiles[i];
                char *flagp = &fdp->fd_ofileflags[i];
 
-               if (cloexec_default) {
+               if (fp && cloexec_default) {
                        /*
                         * Reverse the usual semantics of file descriptor
                         * inheritance - all of them should be closed
index 632501473870d4e4edeb15f3f177c8fab69fcc51..ee3d66b09df04f6113ca0ba45aaf7e0dc9a1a7c1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include "net/net_str_id.h"
 
 #include <mach/task.h>
+
+#if VM_PRESSURE_EVENTS
 #include <kern/vm_pressure.h>
+#endif
 
 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
 
@@ -188,6 +191,7 @@ static struct filterops proc_filtops = {
         .f_event = filt_proc,
 };
 
+#if VM_PRESSURE_EVENTS
 static int filt_vmattach(struct knote *kn);
 static void filt_vmdetach(struct knote *kn);
 static int filt_vm(struct knote *kn, long hint);
@@ -196,6 +200,7 @@ static struct filterops vm_filtops = {
        .f_detach = filt_vmdetach,
        .f_event = filt_vm,
 };
+#endif /* VM_PRESSURE_EVENTS */
 
 extern struct filterops fs_filtops;
 
@@ -271,7 +276,12 @@ static struct filterops *sysfilt_ops[] = {
        &fs_filtops,                    /* EVFILT_FS */
        &user_filtops,                  /* EVFILT_USER */
        &bad_filtops,                   /* unused */
+#if VM_PRESSURE_EVENTS
        &vm_filtops,                    /* EVFILT_VM */
+#else
+       &bad_filtops,                   /* EVFILT_VM */
+#endif
+       &file_filtops,                  /* EVFILT_SOCK */
 };
 
 /*
@@ -549,12 +559,21 @@ filt_proc(struct knote *kn, long hint)
                        kn->kn_fflags |= NOTE_RESOURCEEND;
                        kn->kn_data = (hint & NOTE_PDATAMASK);
                }
+#if CONFIG_EMBEDDED
+               /* If the event is one of the APPSTATE events,remove the rest */
+               if (((event & NOTE_APPALLSTATES) != 0) && ((kn->kn_sfflags & NOTE_APPALLSTATES) != 0)) {
+                       /* only one state at a time */
+                       kn->kn_fflags &= ~NOTE_APPALLSTATES;
+                       kn->kn_fflags |= event;
+               }
+#endif /* CONFIG_EMBEDDED */
        }
 
        /* atomic check, no locking need when called from above */
        return (kn->kn_fflags != 0); 
 }
 
+#if VM_PRESSURE_EVENTS
 /*
  * Virtual memory kevents
  *
@@ -584,14 +603,15 @@ filt_vm(struct knote *kn, long hint)
 {
        /* hint == 0 means this is just an alive? check (always true) */
        if (hint != 0) { 
-               /* If this knote is interested in the event specified in hint... */
-               if ((kn->kn_sfflags & hint) != 0) { 
-                       kn->kn_fflags |= hint;
+               const pid_t pid = (pid_t)hint;
+               if ((kn->kn_sfflags & NOTE_VM_PRESSURE) && (kn->kn_kq->kq_p->p_pid == pid)) {
+                       kn->kn_fflags |= NOTE_VM_PRESSURE;
                }
        }
        
        return (kn->kn_fflags != 0);
 }
+#endif /* VM_PRESSURE_EVENTS */
 
 /*
  * filt_timervalidate - process data from user
@@ -2405,19 +2425,21 @@ knote_detach(struct klist *list, struct knote *kn)
  * we permanently enqueue them here.
  *
  * kqueue and knote references are held by caller.
+ *
+ * caller provides the wait queue link structure.
  */
 int
-knote_link_wait_queue(struct knote *kn, struct wait_queue *wq)
+knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql)
 {
        struct kqueue *kq = kn->kn_kq;
        kern_return_t kr;
 
-       kr = wait_queue_link(wq, kq->kq_wqs);
+       kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql);
        if (kr == KERN_SUCCESS) {
                knote_markstayqueued(kn);
                return 0;
        } else {
-               return ENOMEM;
+               return EINVAL;
        }
 }
 
@@ -2427,17 +2449,21 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq)
  *
  * Note that the unlink may have already happened from the other side, so
  * ignore any failures to unlink and just remove it from the kqueue list.
+ *
+ * On success, caller is responsible for the link structure
  */
-void
-knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq)
+int
+knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp)
 {
        struct kqueue *kq = kn->kn_kq;
+       kern_return_t kr;
 
-       (void) wait_queue_unlink(wq, kq->kq_wqs);
+       kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp);
        kqlock(kq);
        kn->kn_status &= ~KN_STAYQUEUED;
        knote_dequeue(kn);
        kqunlock(kq);
+       return (kr != KERN_SUCCESS) ? EINVAL : 0;
 }
 
 /*
@@ -2487,7 +2513,7 @@ knote_fdclose(struct proc *p, int fd)
 
 /* proc_fdlock held on entry (and exit) */
 static int
-knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
+knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p)
 {
        struct klist *list = NULL;
 
@@ -2500,10 +2526,18 @@ knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p)
                if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
                        u_int size = 0;
 
+                       if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur 
+                           || kn->kn_id >= (uint64_t)maxfiles)
+                               return (EINVAL);
+               
                        /* have to grow the fd_knlist */
                        size = fdp->fd_knlistsize;
                        while (size <= kn->kn_id)
                                size += KQEXTENT;
+
+                       if (size >= (UINT_MAX/sizeof(struct klist *)))
+                               return (EINVAL);
+
                        MALLOC(list, struct klist *,
                               size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
                        if (list == NULL)
@@ -2630,7 +2664,11 @@ knote_init(void)
 
        /* Initialize the timer filter lock */
        lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
-       lck_mtx_init(&vm_pressure_klist_mutex, kq_lck_grp, kq_lck_attr);
+       
+#if VM_PRESSURE_EVENTS
+       /* Initialize the vm pressure list lock */
+       vm_pressure_init(kq_lck_grp, kq_lck_attr);
+#endif
 }
 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
 
index 4c17cd232a031629b4bd6768fe606232138e7135..dde1b3c40f124e912847571272c89a3079c436ad 100644 (file)
@@ -81,6 +81,7 @@
  * Version 2.0.
  */
 #include <machine/reg.h>
+#include <machine/cpu_capabilities.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <kern/sched_prim.h> /* thread_wakeup() */
 #include <kern/affinity.h>
 #include <kern/assert.h>
+#include <kern/task.h>
 
 #if CONFIG_MACF
 #include <security/mac.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_protos.h>
 #include <vm/vm_kern.h>
+#include <vm/vm_fault.h>
+
+#include <kdp/kdp_dyld.h>
 
 #include <machine/pal_routines.h>
 
+#include <pexpert/pexpert.h>
+
+#if CONFIG_MEMORYSTATUS
+#include <sys/kern_memorystatus.h>
+#endif
+
 #if CONFIG_DTRACE
 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
 extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
@@ -154,7 +165,8 @@ extern void dtrace_lazy_dofs_destroy(proc_t);
 thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit);
 void vfork_exit(proc_t p, int rv);
 int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
-extern void proc_apply_task_networkbg_internal(proc_t);
+extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
+int task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t deadline, int scope);
 
 /*
  * Mach things for which prototypes are unavailable from Mach headers
@@ -218,11 +230,27 @@ static int exec_add_apple_strings(struct image_params *imgp);
 static int exec_handle_sugid(struct image_params *imgp);
 static int sugid_scripts = 0;
 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
-static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack,
-                                       int customstack, proc_t p);
+static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
 static void exec_resettextvp(proc_t, struct image_params *);
 static int check_for_signature(proc_t, struct image_params *);
+static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
+
+#if !CONFIG_EMBEDDED
+
+/* Identify process during exec and opt into legacy behaviors */
+
+struct legacy_behavior {
+    uuid_t    process_uuid;
+    uint32_t  legacy_mask;
+};
+
+static const struct legacy_behavior legacy_behaviors[] =
+{
+       {{ 0xF8, 0x7C, 0xC3, 0x67, 0xFB, 0x68, 0x37, 0x93, 0xBC, 0x34, 0xB2, 0xB6, 0x05, 0x2B, 0xCD, 0xE2 }, PROC_LEGACY_BEHAVIOR_IOTHROTTLE },
+       {{ 0x0B, 0x4E, 0xDF, 0xD8, 0x76, 0xD1, 0x3D, 0x4D, 0x9D, 0xD7, 0x37, 0x43, 0x1C, 0xA8, 0xFB, 0x26 }, PROC_LEGACY_BEHAVIOR_IOTHROTTLE },
+};
+#endif /* !CONFIG_EMBEDDED */
 
 /* We don't want this one exported */
 __private_extern__
@@ -374,96 +402,6 @@ exec_reset_save_path(struct image_params *imgp)
        return (0);
 }
 
-#ifdef IMGPF_POWERPC
-/*
- * exec_powerpc32_imgact
- *
- * Implicitly invoke the PowerPC handler for a byte-swapped image magic
- * number.  This may happen either as a result of an attempt to invoke a
- * PowerPC image directly, or indirectly as the interpreter used in an
- * interpreter script.
- *
- * Parameters; struct image_params *   image parameter block
- *
- * Returns:    -1              not an PowerPC image (keep looking)
- *             -3              Success: exec_archhandler_ppc: relookup
- *             >0              Failure: exec_archhandler_ppc: error number
- *
- * Note:       This image activator does not handle the case of a direct
- *             invocation of the exec_archhandler_ppc, since in that case, the
- *             exec_archhandler_ppc itself is not a PowerPC binary; instead,
- *             binary image activators must recognize the exec_archhandler_ppc;
- *             This is managed in exec_check_permissions().
- *
- * Note:       This image activator is limited to 32 bit powerpc images;
- *             if support for 64 bit powerpc images is desired, it would
- *             be more in line with this design to write a separate 64 bit
- *             image activator.
- */
-static int
-exec_powerpc32_imgact(struct image_params *imgp)
-{
-       struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
-       int error;
-       size_t len = 0;
-
-       /*
-        * Make sure it's a PowerPC binary.  If we've already redirected
-        * from an interpreted file once, don't do it again.
-        */
-       if (mach_header->magic != MH_CIGAM) {
-               /*
-                * If it's a cross-architecture 64 bit binary, then claim
-                * it, but refuse to run it.
-                */
-               if (mach_header->magic == MH_CIGAM_64)
-                       return (EBADARCH);
-               return (-1);
-       }
-
-       /* If there is no exec_archhandler_ppc, we can't run it */
-       if (exec_archhandler_ppc.path[0] == 0)
-               return (EBADARCH);
-
-       /* Remember the type of the original file for later grading */
-       if (!imgp->ip_origcputype) {
-               imgp->ip_origcputype = 
-                       OSSwapBigToHostInt32(mach_header->cputype);
-               imgp->ip_origcpusubtype = 
-                       OSSwapBigToHostInt32(mach_header->cpusubtype);
-       }
-
-       /*
-        * The PowerPC flag will be set by the exec_check_permissions()
-        * call anyway; however, we set this flag here so that the relookup
-        * in execve() does not follow symbolic links, as a side effect.
-        */
-       imgp->ip_flags |= IMGPF_POWERPC;
-
-       /* impute an interpreter */
-       error = copystr(exec_archhandler_ppc.path, imgp->ip_interp_buffer,
-                       IMG_SHSIZE, &len);
-       if (error)
-               return (error);
-
-       exec_reset_save_path(imgp);
-       exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
-                                  UIO_SYSSPACE);
-       
-       /*
-        * provide a replacement string for p->p_comm; we have to use an
-        * alternate buffer for this, rather than replacing it directly,
-        * since the exec may fail and return to the parent.  In that case,
-        * we would have erroneously changed the parent p->p_comm instead.
-        */
-       strlcpy(imgp->ip_p_comm, imgp->ip_ndp->ni_cnd.cn_nameptr, MAXCOMLEN+1);
-                                               /* +1 to allow MAXCOMLEN characters to be copied */
-
-       return (-3);
-}
-#endif /* IMGPF_POWERPC */
-
-
 /*
  * exec_shell_imgact
  *
@@ -511,11 +449,6 @@ exec_shell_imgact(struct image_params *imgp)
                return (-1);
        }
 
-#ifdef IMGPF_POWERPC
-       if ((imgp->ip_flags & IMGPF_POWERPC) != 0)
-                 return (EBADARCH);
-#endif /* IMGPF_POWERPC */
-
        imgp->ip_flags |= IMGPF_INTERPRET;
        imgp->ip_interp_sugid_fd = -1;
        imgp->ip_interp_buffer[0] = '\0';
@@ -792,8 +725,15 @@ exec_mach_imgact(struct image_params *imgp)
        /*
         * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
         * is a reserved field on the end, so for the most part, we can
-        * treat them as if they were identical.
-        */
+        * treat them as if they were identical. Reverse-endian Mach-O
+        * binaries are recognized but not compatible.
+        */
+       if ((mach_header->magic == MH_CIGAM) ||
+           (mach_header->magic == MH_CIGAM_64)) {
+               error = EBADARCH;
+               goto bad;
+       }
+
        if ((mach_header->magic != MH_MAGIC) &&
            (mach_header->magic != MH_MAGIC_64)) {
                error = -1;
@@ -874,21 +814,6 @@ grade:
        AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
            imgp->ip_endenvv - imgp->ip_endargv);
 
-#ifdef IMGPF_POWERPC
-       /*
-        * XXX
-        *
-        * Should be factored out; this is here because we might be getting
-        * invoked this way as the result of a shell script, and the check
-        * in exec_check_permissions() is not interior to the jump back up
-        * to the "encapsulated_binary:" label in exec_activate_image().
-        */
-       if (imgp->ip_vattr->va_fsid == exec_archhandler_ppc.fsid &&
-               imgp->ip_vattr->va_fileid == exec_archhandler_ppc.fileid) {
-               imgp->ip_flags |= IMGPF_POWERPC;
-       }
-#endif /* IMGPF_POWERPC */
-
        /*
         * We are being called to activate an image subsequent to a vfork()
         * operation; in this case, we know that our task, thread, and
@@ -971,10 +896,6 @@ grade:
        vm_map_exec(get_task_map(task),
                    task,
                    (void *) p->p_fd->fd_rdir,
-#ifdef IMGPF_POWERPC
-                   imgp->ip_flags & IMGPF_POWERPC ?
-                   CPU_TYPE_POWERPC :
-#endif
                    cpu_type());
        
        /*
@@ -997,8 +918,7 @@ grade:
        
        if (load_result.unixproc &&
                create_unix_stack(get_task_map(task),
-                                 load_result.user_stack,
-                                 load_result.customstack,
+                                 &load_result,
                                  p) != KERN_SUCCESS) {
                error = load_return_to_errno(LOAD_NOSPACE);
                goto badtoolate;
@@ -1043,6 +963,9 @@ grade:
                    load_result.all_image_info_size);
        }
 
+       /* Avoid immediate VM faults back into kernel */
+       exec_prefault_data(p, imgp, &load_result);
+
        if (vfexec || spawn) {
                vm_map_switch(old_map);
        }
@@ -1096,6 +1019,28 @@ grade:
 
        memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
 
+#if !CONFIG_EMBEDDED
+       unsigned int i;
+
+       if (!vfexec && !spawn) {
+               if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
+                       throttle_legacy_process_decr();
+               }
+       }
+
+       p->p_legacy_behavior = 0;
+       for (i=0; i < sizeof(legacy_behaviors)/sizeof(legacy_behaviors[0]); i++) {
+               if (0 == uuid_compare(legacy_behaviors[i].process_uuid, p->p_uuid)) {
+                       p->p_legacy_behavior = legacy_behaviors[i].legacy_mask;
+                       break;
+               }
+       }
+
+       if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
+               throttle_legacy_process_incr();
+       }
+#endif
+
 // <rdar://6598155> dtrace code cleanup needed
 #if CONFIG_DTRACE
        /*
@@ -1154,18 +1099,11 @@ grade:
                }
        }
 
-#ifdef IMGPF_POWERPC
        /*
-        * Mark the process as powerpc or not.  If powerpc, set the affinity
-        * flag, which will be used for grading binaries in future exec's
-        * from the process.
+        * Ensure the 'translated' and 'affinity' flags are cleared, since we
+        * no longer run PowerPC binaries.
         */
-       if (((imgp->ip_flags & IMGPF_POWERPC) != 0))
-               OSBitOrAtomic(P_TRANSLATED, &p->p_flag);
-       else
-#endif /* IMGPF_POWERPC */
-               OSBitAndAtomic(~((uint32_t)P_TRANSLATED), &p->p_flag);
-       OSBitAndAtomic(~((uint32_t)P_AFFINITY), &p->p_flag);
+       OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag);
 
        /*
         * If posix_spawned with the START_SUSPENDED flag, stop the
@@ -1179,22 +1117,54 @@ grade:
                        proc_unlock(p);
                        (void) task_suspend(p->task);
                }
-               if ((psa->psa_flags & POSIX_SPAWN_OSX_TALAPP_START) || (psa->psa_flags & POSIX_SPAWN_OSX_DBCLIENT_START) || (psa->psa_flags & POSIX_SPAWN_IOS_APP_START)) {
+#if CONFIG_EMBEDDED
+               if ((psa->psa_flags & POSIX_SPAWN_IOS_RESV1_APP_START) || (psa->psa_flags & POSIX_SPAWN_IOS_APPLE_DAEMON_START) || (psa->psa_flags & POSIX_SPAWN_IOS_APP_START)) {
+                       if ((psa->psa_flags & POSIX_SPAWN_IOS_RESV1_APP_START))
+                               apptype = PROC_POLICY_IOS_RESV1_APPTYPE;
+                       else if (psa->psa_flags & POSIX_SPAWN_IOS_APPLE_DAEMON_START)
+                               apptype = PROC_POLICY_IOS_APPLE_DAEMON;
+                       else if (psa->psa_flags & POSIX_SPAWN_IOS_APP_START)
+                               apptype = PROC_POLICY_IOS_APPTYPE;
+                       else
+                               apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+                       proc_set_task_apptype(p->task, apptype, imgp->ip_new_thread);
+                       if (apptype == PROC_POLICY_IOS_RESV1_APPTYPE)
+                               proc_apply_task_networkbg_internal(p, NULL);
+                       }
+
+               if (psa->psa_apptype & POSIX_SPAWN_APPTYPE_IOS_APPLEDAEMON) {
+                       apptype = PROC_POLICY_IOS_APPLE_DAEMON;
+                       proc_set_task_apptype(p->task, apptype, imgp->ip_new_thread);
+               }
+#else /* CONFIG_EMBEDDED */
+               if ((psa->psa_flags & POSIX_SPAWN_OSX_TALAPP_START) || (psa->psa_flags & POSIX_SPAWN_OSX_DBCLIENT_START)) {
                        if ((psa->psa_flags & POSIX_SPAWN_OSX_TALAPP_START))
                                apptype = PROC_POLICY_OSX_APPTYPE_TAL;
                        else if (psa->psa_flags & POSIX_SPAWN_OSX_DBCLIENT_START)
                                apptype = PROC_POLICY_OSX_APPTYPE_DBCLIENT;
-                       else if (psa->psa_flags & POSIX_SPAWN_IOS_APP_START)
-                               apptype = PROC_POLICY_IOS_APPTYPE;
                        else
-                               apptype = 0;
-                       proc_set_task_apptype(p->task, apptype);
+                               apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+                       proc_set_task_apptype(p->task, apptype, NULL);
                        if ((apptype == PROC_POLICY_OSX_APPTYPE_TAL) || 
                                (apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) {
-
-                               proc_apply_task_networkbg_internal(p);
+                               proc_apply_task_networkbg_internal(p, NULL);
                        }
                }
+               if ((psa->psa_apptype & POSIX_SPAWN_APPTYPE_OSX_TAL) ||
+                               (psa->psa_apptype & POSIX_SPAWN_APPTYPE_OSX_WIDGET)) {
+                       if ((psa->psa_apptype & POSIX_SPAWN_APPTYPE_OSX_TAL))
+                               apptype = PROC_POLICY_OSX_APPTYPE_TAL;
+                       else if (psa->psa_flags & POSIX_SPAWN_APPTYPE_OSX_WIDGET)
+                               apptype = PROC_POLICY_OSX_APPTYPE_DBCLIENT;
+                       else
+                               apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+                       proc_set_task_apptype(p->task, apptype, imgp->ip_new_thread);
+                       if ((apptype == PROC_POLICY_OSX_APPTYPE_TAL) || 
+                               (apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) {
+                               proc_apply_task_networkbg_internal(p, NULL);
+                       }
+               }
+#endif /* CONFIG_EMBEDDED */
        }
 
        /*
@@ -1249,9 +1219,6 @@ struct execsw {
 } execsw[] = {
        { exec_mach_imgact,             "Mach-o Binary" },
        { exec_fat_imgact,              "Fat Binary" },
-#ifdef IMGPF_POWERPC
-       { exec_powerpc32_imgact,        "PowerPC binary" },
-#endif /* IMGPF_POWERPC */
        { exec_shell_imgact,            "Interpreter Script" },
        { NULL, NULL}
 };
@@ -1393,16 +1360,6 @@ encapsulated_binary:
                        NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
                                   UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
 
-#ifdef IMGPF_POWERPC
-                       /*
-                        * PowerPC does not follow symlinks because the
-                        * code which sets exec_archhandler_ppc.fsid and
-                        * exec_archhandler_ppc.fileid doesn't follow them.
-                        */
-                       if (imgp->ip_flags & IMGPF_POWERPC)
-                               nd.ni_cnd.cn_flags &= ~FOLLOW;
-#endif /* IMGPF_POWERPC */
-
                        proc_transend(p, 0);
                        goto again;
 
@@ -1455,57 +1412,55 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags)
        _ps_port_action_t *act = NULL;
        task_t task = p->task;
        ipc_port_t port = NULL;
-       errno_t ret = KERN_SUCCESS;
+       errno_t ret = 0;
        int i;
 
        for (i = 0; i < pacts->pspa_count; i++) {
                act = &pacts->pspa_actions[i];
 
                if (ipc_object_copyin(get_task_ipcspace(current_task()),
-                               CAST_MACH_PORT_TO_NAME(act->new_port),
-                               MACH_MSG_TYPE_COPY_SEND,
-                               (ipc_object_t *) &port) != KERN_SUCCESS)
-                       return EINVAL;
-
-               if (ret)                        
-                       return ret;
+                   act->new_port, MACH_MSG_TYPE_COPY_SEND,
+                   (ipc_object_t *) &port) != KERN_SUCCESS)
+                       return (EINVAL);
 
                switch (act->port_type) {
-                       case PSPA_SPECIAL:
-                               /* Only allowed when not under vfork */
-                               if (!(psa_flags & POSIX_SPAWN_SETEXEC))
-                                       return ENOTSUP;
-                               ret = (task_set_special_port(task, 
-                                               act->which, 
-                                               port) == KERN_SUCCESS) ? 0 : EINVAL;
-                               break;
-                       case PSPA_EXCEPTION:
-                               /* Only allowed when not under vfork */
-                               if (!(psa_flags & POSIX_SPAWN_SETEXEC))
-                                       return ENOTSUP;
-                               ret = (task_set_exception_ports(task, 
-                                               act->mask,
-                                               port
-                                               act->behavior, 
-                                               act->flavor) == KERN_SUCCESS) ? 0 : EINVAL;
-                               break;
+               case PSPA_SPECIAL:
+                       /* Only allowed when not under vfork */
+                       if (!(psa_flags & POSIX_SPAWN_SETEXEC))
+                               ret = ENOTSUP;
+                       else if (task_set_special_port(task,
+                           act->which, port) != KERN_SUCCESS)
+                               ret = EINVAL;
+                       break;
+
+               case PSPA_EXCEPTION:
+                       /* Only allowed when not under vfork */
+                       if (!(psa_flags & POSIX_SPAWN_SETEXEC))
+                               ret = ENOTSUP;
+                       else if (task_set_exception_ports(task, 
+                           act->mask, port, act->behavior
+                           act->flavor) != KERN_SUCCESS)
+                               ret = EINVAL;
+                       break;
 #if CONFIG_AUDIT
-                       case PSPA_AU_SESSION:
-                               ret = audit_session_spawnjoin(p, 
-                                               port);
-                               break;
+               case PSPA_AU_SESSION:
+                       ret = audit_session_spawnjoin(p, port);
+                       break;
 #endif
-                       default:
-                               ret = EINVAL;
+               default:
+                       ret = EINVAL;
+                       break;
                }
+
                /* action failed, so release port resources */
+
                if (ret) { 
                        ipc_port_release_send(port);
-                       return ret;
+                       break;
                }
        }
 
-       return ret;
+       return (ret);
 }
 
 /*
@@ -2059,10 +2014,30 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
                                        error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec);
                        }
                }
+
+               /*
+                * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
+                * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
+                * limit.
+                *
+                * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
+                */
+               if (px_sa.psa_cpumonitor_percent != 0) {
+                       error = proc_set_task_ruse_cpu(p->task,
+                                       TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
+                                       px_sa.psa_cpumonitor_percent,
+                                       px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
+                                       0);
+               }
        }
 
 bad:
        if (error == 0) {
+               /* reset delay idle sleep status if set */
+#if !CONFIG_EMBEDDED
+               if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
+                       OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
+#endif /* !CONFIG_EMBEDDED */
                /* upon  successful spawn, re/set the proc control state */
                if (imgp->ip_px_sa != NULL) {
                        switch (px_sa.psa_pcontrol) {
@@ -2080,8 +2055,20 @@ bad:
                                        p->p_pcaction = 0;
                                        break;
                        };
+#if !CONFIG_EMBEDDED
+                       if ((px_sa.psa_apptype & POSIX_SPAWN_APPTYPE_DELAYIDLESLEEP) != 0)
+                               OSBitOrAtomic(P_DELAYIDLESLEEP, &p->p_flag);
+#endif /* !CONFIG_EMBEDDED */
                }
                exec_resettextvp(p, imgp);
+               
+#if CONFIG_EMBEDDED
+               /* Has jetsam attributes? */
+               if (imgp->ip_px_sa != NULL) {
+                       memorystatus_list_change((px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
+                                       p->p_pid, px_sa.psa_priority, -1, px_sa.psa_high_water_mark);
+               }
+#endif
        }
 
        /*
@@ -2969,6 +2956,9 @@ random_hex_str(char *str, int len)
 #define        ENTROPY_VALUES 2
 #define ENTROPY_KEY "malloc_entropy="
 
+#define PFZ_KEY "pfz="
+extern user32_addr_t commpage_text32_location;
+extern user64_addr_t commpage_text64_location;
 /*
  * Build up the contents of the apple[] string vector
  */
@@ -2976,16 +2966,31 @@ static int
 exec_add_apple_strings(struct image_params *imgp)
 {
        int i, error;
-       int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
+       int new_ptr_size=4;
        char guard[19];
        char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1];
 
        char entropy[19];
        char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1];
 
+       char pfz_string[strlen(PFZ_KEY) + 16 + 4 +1];
+       
+       if( imgp->ip_flags & IMGPF_IS_64BIT) {
+               new_ptr_size = 8;
+               snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location);
+       }else{
+               snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location);
+       }
+
        /* exec_save_path stored the first string */
        imgp->ip_applec = 1;
 
+       /* adding the pfz string */
+       error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string),UIO_SYSSPACE,FALSE);
+       if(error)
+               goto bad;
+       imgp->ip_applec++;
+
        /*
         * Supply libc with a collection of random values to use when
         * implementing -fstack-protector.
@@ -3116,18 +3121,6 @@ exec_check_permissions(struct image_params *imgp)
 #endif
 
 
-#ifdef IMGPF_POWERPC
-       /*
-        * If the file we are about to attempt to load is the exec_handler_ppc,
-        * which is determined by matching the vattr fields against previously
-        * cached values, then we set the PowerPC environment flag.
-        */
-       if (vap->va_fsid == exec_archhandler_ppc.fsid &&
-               vap->va_fileid == exec_archhandler_ppc.fileid) {
-               imgp->ip_flags |= IMGPF_POWERPC;
-       }
-#endif /* IMGPF_POWERPC */
-
        /* XXX May want to indicate to underlying FS that vnode is open */
 
        return (error);
@@ -3390,64 +3383,79 @@ handle_mac_transition:
  *             limits on stack growth, if they end up being needed.
  *
  * Parameters: p                       Process to set stack on
- *             user_stack              Address to set stack for process to
- *             customstack             FALSE if no custom stack in binary
- *             map                     Address map in which to allocate the
- *                                     new stack, if 'customstack' is FALSE
+ *             load_result             Information from mach-o load commands
+ *             map                     Address map in which to allocate the new stack
  *
  * Returns:    KERN_SUCCESS            Stack successfully created
  *             !KERN_SUCCESS           Mach failure code
  */
 static kern_return_t
-create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack,
+create_unix_stack(vm_map_t map, load_result_t* load_result, 
                        proc_t p)
 {
        mach_vm_size_t          size, prot_size;
        mach_vm_offset_t        addr, prot_addr;
        kern_return_t           kr;
 
+       mach_vm_address_t       user_stack = load_result->user_stack;
+       
        proc_lock(p);
        p->user_stack = user_stack;
        proc_unlock(p);
 
-       if (!customstack) {
+       if (!load_result->prog_allocated_stack) {
                /*
                 * Allocate enough space for the maximum stack size we
                 * will ever authorize and an extra page to act as
-                * a guard page for stack overflows.
+                * a guard page for stack overflows. For default stacks,
+                * vm_initial_limit_stack takes care of the extra guard page.
+                * Otherwise we must allocate it ourselves.
                 */
-               size = mach_vm_round_page(MAXSSIZ);
-#if STACK_GROWTH_UP
-               addr = mach_vm_trunc_page(user_stack);
-#else  /* STACK_GROWTH_UP */
-               addr = mach_vm_trunc_page(user_stack - size);
-#endif /* STACK_GROWTH_UP */
+
+               size = mach_vm_round_page(load_result->user_stack_size);
+               if (load_result->prog_stack_size)
+                       size += PAGE_SIZE;
+               addr = mach_vm_trunc_page(load_result->user_stack - size);
                kr = mach_vm_allocate(map, &addr, size,
                                        VM_MAKE_TAG(VM_MEMORY_STACK) |
-                                     VM_FLAGS_FIXED);
+                                       VM_FLAGS_FIXED);
                if (kr != KERN_SUCCESS) {
-                       return kr;
+                       /* If can't allocate at default location, try anywhere */
+                       addr = 0;
+                       kr = mach_vm_allocate(map, &addr, size,
+                                                                 VM_MAKE_TAG(VM_MEMORY_STACK) |
+                                                                 VM_FLAGS_ANYWHERE);
+                       if (kr != KERN_SUCCESS)
+                               return kr;
+
+                       user_stack = addr + size;
+                       load_result->user_stack = user_stack;
+
+                       proc_lock(p);
+                       p->user_stack = user_stack;
+                       proc_unlock(p);
                }
+
                /*
                 * And prevent access to what's above the current stack
                 * size limit for this process.
                 */
                prot_addr = addr;
-#if STACK_GROWTH_UP
-               prot_addr += unix_stack_size(p);
-#endif /* STACK_GROWTH_UP */
-               prot_addr = mach_vm_round_page(prot_addr);
-               prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
+               if (load_result->prog_stack_size)
+                       prot_size = PAGE_SIZE;
+               else
+                       prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
                kr = mach_vm_protect(map,
-                                    prot_addr,
-                                    prot_size,
-                                    FALSE,
-                                    VM_PROT_NONE);
+                                                        prot_addr,
+                                                        prot_size,
+                                                        FALSE,
+                                                        VM_PROT_NONE);
                if (kr != KERN_SUCCESS) {
                        (void) mach_vm_deallocate(map, addr, size);
                        return kr;
                }
        }
+
        return KERN_SUCCESS;
 }
 
@@ -3891,3 +3899,131 @@ done:
        return error;
 }
 
+/*
+ * Typically as soon as we start executing this process, the
+ * first instruction will trigger a VM fault to bring the text
+ * pages (as executable) into the address space, followed soon
+ * thereafter by dyld data structures (for dynamic executable).
+ * To optimize this, as well as improve support for hardware
+ * debuggers that can only access resident pages present
+ * in the process' page tables, we prefault some pages if
+ * possible. Errors are non-fatal.
+ */
+static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
+{
+       int ret;
+       size_t expected_all_image_infos_size;
+
+       /*
+        * Prefault executable or dyld entry point.
+        */
+       vm_fault( current_map(),
+                         vm_map_trunc_page(load_result->entry_point),
+                         VM_PROT_READ | VM_PROT_EXECUTE,
+                         FALSE,
+                         THREAD_UNINT, NULL, 0);
+       
+       if (imgp->ip_flags & IMGPF_IS_64BIT) {
+               expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
+       } else {
+               expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
+       }
+
+       /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
+       if (load_result->dynlinker &&
+               load_result->all_image_info_addr &&
+               load_result->all_image_info_size >= expected_all_image_infos_size) {
+               union {
+                       struct user64_dyld_all_image_infos      infos64;
+                       struct user32_dyld_all_image_infos      infos32;
+               } all_image_infos;
+
+               /*
+                * Pre-fault to avoid copyin() going through the trap handler
+                * and recovery path.
+                */
+               vm_fault( current_map(),
+                                 vm_map_trunc_page(load_result->all_image_info_addr),
+                                 VM_PROT_READ | VM_PROT_WRITE,
+                                 FALSE,
+                                 THREAD_UNINT, NULL, 0);
+               if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
+                       /* all_image_infos straddles a page */
+                       vm_fault( current_map(),
+                                         vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1),
+                                         VM_PROT_READ | VM_PROT_WRITE,
+                                         FALSE,
+                                         THREAD_UNINT, NULL, 0);
+               }
+
+               ret = copyin(load_result->all_image_info_addr,
+                                        &all_image_infos,
+                                        expected_all_image_infos_size);
+               if (ret == 0 && all_image_infos.infos32.version >= 9) {
+
+                       user_addr_t notification_address;
+                       user_addr_t dyld_image_address;
+                       user_addr_t dyld_version_address;
+                       user_addr_t dyld_all_image_infos_address;
+                       user_addr_t dyld_slide_amount;
+
+                       if (imgp->ip_flags & IMGPF_IS_64BIT) {
+                               notification_address = all_image_infos.infos64.notification;
+                               dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
+                               dyld_version_address = all_image_infos.infos64.dyldVersion;
+                               dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
+                       } else {
+                               notification_address = all_image_infos.infos32.notification;
+                               dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
+                               dyld_version_address = all_image_infos.infos32.dyldVersion;
+                               dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
+                       }
+
+                       /*
+                        * dyld statically sets up the all_image_infos in its Mach-O
+                        * binary at static link time, with pointers relative to its default
+                        * load address. Since ASLR might slide dyld before its first
+                        * instruction is executed, "dyld_slide_amount" tells us how far
+                        * dyld was loaded compared to its default expected load address.
+                        * All other pointers into dyld's image should be adjusted by this
+                        * amount. At some point later, dyld will fix up pointers to take
+                        * into account the slide, at which point the all_image_infos_address
+                        * field in the structure will match the runtime load address, and
+                        * "dyld_slide_amount" will be 0, if we were to consult it again.
+                        */
+
+                       dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
+
+#if 0
+                       kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
+                                       (uint64_t)load_result->all_image_info_addr,
+                                       all_image_infos.infos32.version,
+                                       (uint64_t)notification_address,
+                                       (uint64_t)dyld_image_address,
+                                       (uint64_t)dyld_version_address,
+                                       (uint64_t)dyld_all_image_infos_address);
+#endif
+
+                       vm_fault( current_map(),
+                                         vm_map_trunc_page(notification_address + dyld_slide_amount),
+                                         VM_PROT_READ | VM_PROT_EXECUTE,
+                                         FALSE,
+                                         THREAD_UNINT, NULL, 0);
+                       vm_fault( current_map(),
+                                         vm_map_trunc_page(dyld_image_address + dyld_slide_amount),
+                                         VM_PROT_READ | VM_PROT_EXECUTE,
+                                         FALSE,
+                                         THREAD_UNINT, NULL, 0);
+                       vm_fault( current_map(),
+                                         vm_map_trunc_page(dyld_version_address + dyld_slide_amount),
+                                         VM_PROT_READ,
+                                         FALSE,
+                                         THREAD_UNINT, NULL, 0);
+                       vm_fault( current_map(),
+                                         vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount),
+                                         VM_PROT_READ | VM_PROT_WRITE,
+                                         FALSE,
+                                         THREAD_UNINT, NULL, 0);
+               }
+       }
+}
index 6d04dd6a9d576e2bcb685a7c26df64fdefbc8c60..4e9f418be25464c7b0ab1827328b99122b04c4b0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <kern/assert.h>
 #include <sys/codesign.h>
 
+#if VM_PRESSURE_EVENTS
+#include <kern/vm_pressure.h>
+#endif
+
+#if CONFIG_MEMORYSTATUS
+#include <sys/kern_memorystatus.h>
+#endif
+
 #if CONFIG_DTRACE
 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
 extern void (*dtrace_fasttrap_exit_ptr)(proc_t);
@@ -140,7 +148,7 @@ extern void dtrace_lazy_dofs_destroy(proc_t);
 #include <sys/sdt.h>
 
 extern char init_task_failure_data[];
-void proc_prepareexit(proc_t p, int rv);
+void proc_prepareexit(proc_t p, int rv, boolean_t perf_notify);
 void vfork_exit(proc_t p, int rv);
 void vproc_exit(proc_t p);
 __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p);
@@ -156,15 +164,15 @@ int       wait1continue(int result);
 int    waitidcontinue(int result);
 int    *get_bsduthreadrval(thread_t);
 kern_return_t sys_perf_notify(thread_t thread, int pid);
-kern_return_t abnormal_exit_notify(mach_exception_data_type_t code, 
-               mach_exception_data_type_t subcode);
+kern_return_t task_exception_notify(exception_type_t exception,
+       mach_exception_data_type_t code, mach_exception_data_type_t subcode);
 void   delay(int);
                        
 /*
  * NOTE: Source and target may *NOT* overlap!
  * XXX Should share code with bsd/dev/ppc/unix_signal.c
  */
-static void
+void
 siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out)
 {
        out->si_signo   = in->si_signo;
@@ -179,7 +187,7 @@ siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out)
        out->si_band    = in->si_band;                  /* range reduction */
 }
 
-static void
+void
 siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out)
 {
        out->si_signo   = in->si_signo;
@@ -194,6 +202,24 @@ siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out)
        out->si_band    = in->si_band;                  /* range reduction */
 }
 
+static int
+copyoutsiginfo(user_siginfo_t *native, boolean_t is64, user_addr_t uaddr)
+{
+       if (is64) {
+               user64_siginfo_t sinfo64;
+                               
+               bzero(&sinfo64, sizeof (sinfo64));
+               siginfo_user_to_user64(native, &sinfo64);
+               return (copyout(&sinfo64, uaddr, sizeof (sinfo64)));
+       } else {
+               user32_siginfo_t sinfo32;
+
+               bzero(&sinfo32, sizeof (sinfo32));
+               siginfo_user_to_user32(native, &sinfo32);
+               return (copyout(&sinfo32, uaddr, sizeof (sinfo32)));
+       }
+}
+
 /*
  * exit --
  *     Death of process.
@@ -218,10 +244,17 @@ exit(proc_t p, struct exit_args *uap, int *retval)
  */
 int
 exit1(proc_t p, int rv, int *retval)
+{
+       return exit1_internal(p, rv, retval, TRUE, TRUE);
+}
+
+int
+exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, boolean_t perf_notify)
 {
        thread_t self = current_thread();
        struct task *task = p->task;
        struct uthread *ut;
+       int error = 0;
 
        /*
         * If a thread in this task has already
@@ -231,10 +264,14 @@ exit1(proc_t p, int rv, int *retval)
 
         ut = get_bsdthread_info(self);
         if (ut->uu_flag & UT_VFORK) {
-                       vfork_exit(p, rv);
-                       vfork_return(p , retval, p->p_pid);
-                       unix_syscall_return(0);
-                       /* NOT REACHED */
+               if (!thread_can_terminate) {
+                       return EINVAL;
+               }
+
+               vfork_exit(p, rv);
+               vfork_return(p , retval, p->p_pid);
+               unix_syscall_return(0);
+               /* NOT REACHED */
         }
 
        /* 
@@ -254,8 +291,30 @@ exit1(proc_t p, int rv, int *retval)
 
        DTRACE_PROC1(exit, int, CLD_EXITED);
 
+       /* mark process is going to exit and pull out of DBG/disk throttle */
+       proc_removethrottle(p);
+
+#if CONFIG_MEMORYSTATUS
+       memorystatus_list_remove(p->p_pid);
+#endif
+
         proc_lock(p);
-       proc_transstart(p, 1);
+       error = proc_transstart(p, 1);
+       if (error == EDEADLK) {
+               /* Temp: If deadlock error, then it implies multithreaded exec is
+                * in progress. Instread of letting exit continue and 
+                * corrupting the freed memory, let the exit thread
+                * return. This will save corruption in remote case.
+                */
+               proc_unlock(p);
+               if (current_proc() == p){
+                       thread_exception_return();
+               } else {
+                       /* external termination like jetsam */
+                       return(error);
+               }
+       }
+
        while (p->exit_thread != self) {
                if (sig_try_locked(p) <= 0) {
                        proc_transend(p, 1);
@@ -264,7 +323,12 @@ exit1(proc_t p, int rv, int *retval)
                                return(0);
                         }
                        proc_unlock(p);
+                       
                        thread_terminate(self);
+                       if (!thread_can_terminate) {
+                               return 0;
+                       }
+
                        thread_exception_return();
                        /* NOTREACHED */
                }
@@ -287,7 +351,7 @@ exit1(proc_t p, int rv, int *retval)
        proc_transend(p, 1);
        proc_unlock(p);
 
-       proc_prepareexit(p, rv);
+       proc_prepareexit(p, rv, perf_notify);
 
        /* Last thread to terminate will call proc_exit() */
        task_terminate_internal(task);
@@ -296,7 +360,7 @@ exit1(proc_t p, int rv, int *retval)
 }
 
 void
-proc_prepareexit(proc_t p, int rv) 
+proc_prepareexit(proc_t p, int rv, boolean_t perf_notify
 {
        mach_exception_data_type_t code, subcode;
        struct uthread *ut;
@@ -323,12 +387,14 @@ proc_prepareexit(proc_t p, int rv)
                        ((ut->uu_exception & 0x0f) << 20) | 
                        ((int)ut->uu_code & 0xfffff);
                subcode = ut->uu_subcode;
-               (void) abnormal_exit_notify(code, subcode);
+               (void) task_exception_notify(EXC_CRASH, code, subcode);
        }
 
 skipcheck:
-       /* Notify the perf server */
-       (void)sys_perf_notify(self, p->p_pid);
+       /* Notify the perf server? */
+       if (perf_notify) {
+               (void)sys_perf_notify(self, p->p_pid);
+       }
 
        /*
         * Remove proc from allproc queue and from pidhash chain.
@@ -386,7 +452,7 @@ proc_exit(proc_t p)
                p->p_lflag |= P_LEXIT;
                proc_transend(p, 1);
                proc_unlock(p);
-               proc_prepareexit(p, 0); 
+               proc_prepareexit(p, 0, TRUE);   
                (void) task_terminate_internal(task);
                proc_lock(p);
        } else {
@@ -410,8 +476,9 @@ proc_exit(proc_t p)
        proc_unlock(p);
        pid = p->p_pid;
        exitval = p->p_xstat;
-       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_START,
-                                             pid, exitval, 0, 0, 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON, 
+               BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_START,
+               pid, exitval, 0, 0, 0);
 
 #if CONFIG_DTRACE
        /*
@@ -451,6 +518,10 @@ proc_exit(proc_t p)
 
        nspace_proc_exit(p);
 
+#if VM_PRESSURE_EVENTS
+       vm_pressure_proc_cleanup(p);
+#endif
+
        /*
         * need to cancel async IO requests that can be cancelled and wait for those
         * already active.  MAY BLOCK!
@@ -458,6 +529,9 @@ proc_exit(proc_t p)
        
        proc_refdrain(p);
 
+       /* if any pending cpu limits action, clear it */
+       task_clear_cpuusage(p->task);
+
        workqueue_mark_exiting(p);
        workqueue_exit(p);
 
@@ -480,6 +554,12 @@ proc_exit(proc_t p)
                throttle_lowpri_io(FALSE);
        }
 
+#if !CONFIG_EMBEDDED
+       if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
+               throttle_legacy_process_decr();
+       }
+#endif
+
 #if SYSV_SHM
        /* Close ref SYSV Shared memory*/
        if (p->vm_shm)
@@ -501,8 +581,7 @@ proc_exit(proc_t p)
                        struct vnode *ttyvp;
                        int ttyvid;
                        struct vfs_context context;
-                       struct tty * tp;
-
+                       struct tty *tp;
 
                        /*
                         * Controlling process.
@@ -510,55 +589,43 @@ proc_exit(proc_t p)
                         * drain controlling terminal
                         * and revoke access to controlling terminal.
                         */
+                       session_lock(sessp);
                        tp = SESSION_TP(sessp);
-
                        if ((tp != TTY_NULL) && (tp->t_session == sessp)) {
-                               tty_pgsignal(tp, SIGHUP, 1);
-
-                               session_lock(sessp);
-                               /* reget potentially tp due to revocation */
-                               tp = SESSION_TP(sessp);
-                               ttyvp = sessp->s_ttyvp;
-                               ttyvid = sessp->s_ttyvid;
-                               sessp->s_ttyvp = NULLVP;
-                               sessp->s_ttyvid = 0;
-                               sessp->s_ttyp = TTY_NULL;
-                               sessp->s_ttypgrpid = NO_PID;
                                session_unlock(sessp);
 
-                               if ((ttyvp != NULLVP) && (vnode_getwithvid(ttyvp, ttyvid) == 0)) {
+                               tty_pgsignal(tp, SIGHUP, 1);
 
-                                       if (tp != TTY_NULL) {
-                                               tty_lock(tp);
-                                               (void) ttywait(tp);
-                                               tty_unlock(tp);
-                                       }
-                                       context.vc_thread = proc_thread(p); /* XXX */
-                                       context.vc_ucred = kauth_cred_proc_ref(p);
-                                       VNOP_REVOKE(ttyvp, REVOKEALL, &context);
-                                       vnode_put(ttyvp);
-                                       kauth_cred_unref(&context.vc_ucred);
-                               }
-                       } else {
                                session_lock(sessp);
-                               /* reget potentially tp due to revocation */
                                tp = SESSION_TP(sessp);
-                               ttyvp = sessp->s_ttyvp;
-                               sessp->s_ttyvp = NULLVP;
-                               sessp->s_ttyvid = 0;
-                               sessp->s_ttyp = TTY_NULL;
-                               sessp->s_ttypgrpid = NO_PID;
-                               session_unlock(sessp);
+                       }
+                       ttyvp = sessp->s_ttyvp;
+                       ttyvid = sessp->s_ttyvid;
+                       sessp->s_ttyvp = NULLVP;
+                       sessp->s_ttyvid = 0;
+                       sessp->s_ttyp = TTY_NULL;
+                       sessp->s_ttypgrpid = NO_PID;
+                       session_unlock(sessp);
+
+                       if ((ttyvp != NULLVP) && (vnode_getwithvid(ttyvp, ttyvid) == 0)) {
+                               if (tp != TTY_NULL) {
+                                       tty_lock(tp);
+                                       (void) ttywait(tp);
+                                       tty_unlock(tp);
+                               }
+                               context.vc_thread = proc_thread(p); /* XXX */
+                               context.vc_ucred = kauth_cred_proc_ref(p);
+                               vnode_rele(ttyvp);
+                               VNOP_REVOKE(ttyvp, REVOKEALL, &context);
+                               vnode_put(ttyvp);
+                               kauth_cred_unref(&context.vc_ucred);
+                               ttyvp = NULLVP;
                        }
                        if (ttyvp)
                                vnode_rele(ttyvp);
-                       /*
-                        * s_ttyp is not zero'd; we use this to indicate
-                        * that the session once had a controlling terminal.
-                        * (for logging and informational purposes)
-                        */
+                       if (tp)
+                               ttyfree(tp);
                }
-               
                session_lock(sessp);
                sessp->s_leader = NULL;
                session_unlock(sessp);
@@ -783,8 +850,9 @@ proc_exit(proc_t p)
                 * The write is to an int and is coherent. Also parent is
                 *  keyed off of list lock for reaping
                 */
-               KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_END,
-                                             pid, exitval, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON,
+                       BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_END,
+                       pid, exitval, 0, 0, 0);
                p->p_stat = SZOMB;
                /* 
                 * The current process can be reaped so, no one
@@ -806,8 +874,9 @@ proc_exit(proc_t p)
                 *  keyed off of list lock for reaping
                 */
                proc_list_lock();
-               KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_END,
-                                             pid, exitval, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON,
+                       BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_END,
+                       pid, exitval, 0, 0, 0);
                /* check for sysctl zomb lookup */
                while ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
                        msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
@@ -1209,6 +1278,12 @@ out:
        return (error);
 }
 
+#if DEBUG
+#define ASSERT_LCK_MTX_OWNED(lock)     \
+                               lck_mtx_assert(lock, LCK_MTX_ASSERT_OWNED)
+#else
+#define ASSERT_LCK_MTX_OWNED(lock)     /* nothing */
+#endif
 
 int
 waitidcontinue(int result)
@@ -1218,12 +1293,12 @@ waitidcontinue(int result)
        int *retval;
 
        if (result)
-               return(result);
+               return (result);
 
        thread = current_thread();
        vt = get_bsduthreadarg(thread);
        retval = get_bsduthreadrval(thread);
-       return(waitid(current_proc(), (struct waitid_args *)vt, retval));
+       return (waitid(current_proc(), (struct waitid_args *)vt, retval));
 }
 
 /*
@@ -1232,7 +1307,7 @@ waitidcontinue(int result)
  *
  * Parameters: uap->idtype             one of P_PID, P_PGID, P_ALL
  *             uap->id                 pid_t or gid_t or ignored
- *             uap->infop              Address of signinfo_t struct in
+ *             uap->infop              Address of siginfo_t struct in
  *                                     user space into which to return status
  *             uap->options            flag values
  *
@@ -1243,33 +1318,24 @@ int
 waitid(proc_t q, struct waitid_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
-       return(waitid_nocancel(q, (struct waitid_nocancel_args *)uap, retval));
+       return (waitid_nocancel(q, (struct waitid_nocancel_args *)uap, retval));
 }
 
 int
-waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused int32_t *retval)
+waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap,
+       __unused int32_t *retval)
 {
-       user_siginfo_t  collect64;      /* siginfo data to return to caller */
-
+       user_siginfo_t  siginfo;        /* siginfo data to return to caller */
+       boolean_t caller64 = IS_64BIT_PROCESS(q);
        int nfound;
        proc_t p;
        int error;
 
-       /*
-        * Forced validation of options for T.waitpid 21; should be a TSD!
-        * This will pass the test, but note that we have more bits than the
-        * standard specifies that we will allow in, in this case.  The test
-        * passes because they light all the bits, not just the ones we allow,
-        * and so the following check returns EINVAL like the test wants.
-        */
-       if (((uap->options & (WNOHANG|WNOWAIT|WCONTINUED|WUNTRACED|WSTOPPED|WEXITED)) != uap->options) ||
-           (uap->options == 0))
+       if (uap->options == 0 ||
+           (uap->options & ~(WNOHANG|WNOWAIT|WCONTINUED|WSTOPPED|WEXITED)))
                return (EINVAL);        /* bits set that aren't recognized */
 
-       /*
-        * Overly critical options checking, per POSIX
-        */
-       switch(uap->idtype) {
+       switch (uap->idtype) {
        case P_PID:     /* child with process ID equal to... */
        case P_PGID:    /* child with process group ID equal to... */
                if (((int)uap->id) < 0)
@@ -1284,7 +1350,8 @@ loop:
 loop1:
        nfound = 0;
        for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
-               switch(uap->idtype) {
+
+               switch (uap->idtype) {
                case P_PID:     /* child with process ID equal to... */
                        if (p->p_pid != (pid_t)uap->id)
                                continue;
@@ -1304,68 +1371,44 @@ loop1:
                 * the single return for waited process guarantee.
                 */
                if (p->p_listflag & P_LIST_WAITING) {
-                       (void)msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitidcoll", 0);
+                       (void) msleep(&p->p_stat, proc_list_mlock,
+                               PWAIT, "waitidcoll", 0);
                        goto loop1;
                }
                p->p_listflag |= P_LIST_WAITING;                /* mark busy */
 
                nfound++;
 
-               /*
-                * Types of processes we are interested in
-                *
-                * XXX Don't know what to do for WCONTINUED?!?
-                */
-               switch(p->p_stat) {
+               bzero(&siginfo, sizeof (siginfo));
+
+               switch (p->p_stat) {
                case SZOMB:             /* Exited */
                        if (!(uap->options & WEXITED))
                                break;
-
-                       /* drop the lock and the thread is going to return */
                        proc_list_unlock();
+#if CONFIG_MACF
+                       if ((error = mac_proc_check_wait(q, p)) != 0)
+                               goto out;
+#endif
+                       siginfo.si_signo = SIGCHLD;
+                       siginfo.si_pid = p->p_pid;
+                       siginfo.si_status = WEXITSTATUS(p->p_xstat);
+                       if (WIFSIGNALED(p->p_xstat)) {
+                               siginfo.si_code = WCOREDUMP(p->p_xstat) ?
+                                       CLD_DUMPED : CLD_KILLED;
+                       } else
+                               siginfo.si_code = CLD_EXITED;
 
-                       /* Collect "siginfo" information for caller */
-                       collect64.si_signo = SIGCHLD;
-                       collect64.si_code = 0;
-                       collect64.si_errno = 0;
-                       collect64.si_pid = 0;
-                       collect64.si_uid = 0;
-                       collect64.si_addr  = 0;
-                       collect64.si_status = WEXITSTATUS(p->p_xstat);
-                       collect64.si_band = 0;
-
-                       if (IS_64BIT_PROCESS(p)) {
-                               user64_siginfo_t sinfo64;
-                               
-                               siginfo_user_to_user64(&collect64, &sinfo64);
-                               
-                               error = copyout((caddr_t)&sinfo64,
-                                       uap->infop,
-                                       sizeof(sinfo64));
-                       } else {
-                               user32_siginfo_t sinfo32;
-                               
-                               siginfo_user_to_user32(&collect64, &sinfo32);
-                               
-                               error = copyout((caddr_t)&sinfo32,
-                                                               uap->infop,
-                                                               sizeof(sinfo32));
-                       }
-                       /* information unavailable? */
-                       if (error) 
+                       if ((error = copyoutsiginfo(&siginfo,
+                           caller64, uap->infop)) != 0)
                                goto out;
 
                        /* Prevent other process for waiting for this event? */
                        if (!(uap->options & WNOWAIT)) {
-                               /* Clean up */
-                               (void)reap_child_locked(q, p, 0, 0, 0);
-                       } else {
-                               proc_list_lock();
-                               p->p_listflag &= ~P_LIST_WAITING;
-                               proc_list_unlock();
+                               (void) reap_child_locked(q, p, 0, 0, 0);
+                               return (0);
                        }
-
-                       return (0);
+                       goto out;
 
                case SSTOP:             /* Stopped */
                        /*
@@ -1381,41 +1424,18 @@ loop1:
                         */
                        if ((p->p_lflag & P_LWAITED) != 0)
                                break;
-
-                       /* drop the lock and the thread is going to return */
                        proc_list_unlock();
+#if CONFIG_MACF
+                       if ((error = mac_proc_check_wait(q, p)) != 0)
+                               goto out;
+#endif
+                       siginfo.si_signo = SIGCHLD;
+                       siginfo.si_pid = p->p_pid;
+                       siginfo.si_status = p->p_xstat; /* signal number */
+                       siginfo.si_code = CLD_STOPPED;
 
-                       /* Collect "siginfo" information for caller */
-                       collect64.si_signo = SIGCHLD;
-                       collect64.si_code = 0;
-                       collect64.si_errno = 0;
-                       collect64.si_pid = 0;
-                       collect64.si_uid = 0;
-                       collect64.si_addr  = 0;
-                       proc_lock(p);
-                       collect64.si_status = p->p_xstat;
-                       proc_unlock(p);
-                       collect64.si_band = 0;
-
-                       if (IS_64BIT_PROCESS(p)) {
-                               user64_siginfo_t sinfo64;
-                               
-                               siginfo_user_to_user64(&collect64, &sinfo64);
-                               
-                               error = copyout((caddr_t)&sinfo64,
-                                                               uap->infop,
-                                                               sizeof(sinfo64));
-                       } else {
-                               user32_siginfo_t sinfo32;
-                               
-                               siginfo_user_to_user32(&collect64, &sinfo32);
-                               
-                               error = copyout((caddr_t)&sinfo32,
-                                                               uap->infop,
-                                                               sizeof(sinfo32));
-                       }
-                       /* information unavailable? */
-                       if (error)
+                       if ((error = copyoutsiginfo(&siginfo,
+                           caller64, uap->infop)) != 0)
                                goto out;
 
                        /* Prevent other process for waiting for this event? */
@@ -1424,12 +1444,9 @@ loop1:
                                p->p_lflag |= P_LWAITED;
                                proc_unlock(p);
                        }
-
-                       error = 0;
                        goto out;
 
-               default:                /* All others */
-                                       /* ...meaning Continued */
+               default:                /* All other states => Continued */
                        if (!(uap->options & WCONTINUED))
                                break;
 
@@ -1440,60 +1457,40 @@ loop1:
                         */
                        if ((p->p_flag & P_CONTINUED) == 0)
                                break;
-
-                       /* drop the lock and the thread is going to return */
                        proc_list_unlock();
-
-                       /* Collect "siginfo" information for caller */
+#if CONFIG_MACF
+                       if ((error = mac_proc_check_wait(q, p)) != 0)
+                               goto out;
+#endif
+                       siginfo.si_signo = SIGCHLD;
+                       siginfo.si_code = CLD_CONTINUED;
                        proc_lock(p);
-                       collect64.si_signo = SIGCHLD;
-                       collect64.si_code = CLD_CONTINUED;
-                       collect64.si_errno = 0;
-                       collect64.si_pid = p->p_contproc;
-                       collect64.si_uid = 0;
-                       collect64.si_addr  = 0;
-                       collect64.si_status = p->p_xstat;
-                       collect64.si_band = 0;
+                       siginfo.si_pid = p->p_contproc;
+                       siginfo.si_status = p->p_xstat;
                        proc_unlock(p);
 
-                       if (IS_64BIT_PROCESS(p)) {
-                               user64_siginfo_t sinfo64;
-                               
-                               siginfo_user_to_user64(&collect64, &sinfo64);
-                               
-                               error = copyout((caddr_t)&sinfo64,
-                                                               uap->infop,
-                                                               sizeof(sinfo64));
-                       } else {
-                               user32_siginfo_t sinfo32;
-                               
-                               siginfo_user_to_user32(&collect64, &sinfo32);
-                               
-                               error = copyout((caddr_t)&sinfo32,
-                                                               uap->infop,
-                                                               sizeof(sinfo32));
-                       }
-                       /* information unavailable? */
-                       if (error)
+                       if ((error = copyoutsiginfo(&siginfo,
+                           caller64, uap->infop)) != 0)
                                goto out;
 
                        /* Prevent other process for waiting for this event? */
                        if (!(uap->options & WNOWAIT)) {
-                               OSBitAndAtomic(~((uint32_t)P_CONTINUED), &p->p_flag);
+                               OSBitAndAtomic(~((uint32_t)P_CONTINUED),
+                                   &p->p_flag);
                        }
-
-                       error = 0;
                        goto out;
                }
-               /* LIST LOCK IS HELD HERE */
+               ASSERT_LCK_MTX_OWNED(proc_list_mlock);
+
                /* Not a process we are interested in; go on to next child */
-               
+
                p->p_listflag &= ~P_LIST_WAITING;
                wakeup(&p->p_stat);
        }
+       ASSERT_LCK_MTX_OWNED(proc_list_mlock);
 
-       /* list lock is always held */
        /* No child processes that could possibly satisfy the request? */
+
        if (nfound == 0) {
                proc_list_unlock();
                return (ECHILD);
@@ -1501,10 +1498,24 @@ loop1:
 
        if (uap->options & WNOHANG) {
                proc_list_unlock();
+#if CONFIG_MACF
+               if ((error = mac_proc_check_wait(q, p)) != 0)
+                       return (error);
+#endif
+               /*
+                * The state of the siginfo structure in this case
+                * is undefined.  Some implementations bzero it, some
+                * (like here) leave it untouched for efficiency.
+                *
+                * Thus the most portable check for "no matching pid with
+                * WNOHANG" is to store a zero into si_pid before
+                * invocation, then check for a non-zero value afterwards.
+                */
                return (0);
        }
 
-       if ((error = msleep0((caddr_t)q, proc_list_mlock, PWAIT | PCATCH | PDROP, "waitid", 0, waitidcontinue)))
+       if ((error = msleep0(q, proc_list_mlock,
+           PWAIT | PCATCH | PDROP, "waitid", 0, waitidcontinue)) != 0)
                return (error);
 
        goto loop;
@@ -1682,6 +1693,12 @@ vproc_exit(proc_t p)
         */
        fdfree(p);
 
+#if !CONFIG_EMBEDDED
+       if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
+               throttle_legacy_process_decr();
+       }
+#endif
+
        sessp = proc_session(p);
        if (SESS_LEADER(p, sessp)) {
                
@@ -1689,7 +1706,7 @@ vproc_exit(proc_t p)
                        struct vnode *ttyvp;
                        int ttyvid;
                        struct vfs_context context;
-                       struct tty * tp;
+                       struct tty *tp;
 
                        /*
                         * Controlling process.
@@ -1697,54 +1714,43 @@ vproc_exit(proc_t p)
                         * drain controlling terminal
                         * and revoke access to controlling terminal.
                         */
+                       session_lock(sessp);
                        tp = SESSION_TP(sessp);
-
                        if ((tp != TTY_NULL) && (tp->t_session == sessp)) {
+                               session_unlock(sessp);
+
                                tty_pgsignal(tp, SIGHUP, 1);
-                               tty_lock(tp);
-                               (void) ttywait(tp);
-                               tty_unlock(tp);
-                               /*
-                                * The tty could have been revoked
-                                * if we blocked.
-                                */
 
                                session_lock(sessp);
-                               /* reget in case of race */
                                tp = SESSION_TP(sessp);
-                               ttyvp = sessp->s_ttyvp;
-                               ttyvid = sessp->s_ttyvid;
-                               sessp->s_ttyvp = NULL;
-                               sessp->s_ttyvid = 0;
-                               sessp->s_ttyp = TTY_NULL;
-                               sessp->s_ttypgrpid = NO_PID;
-                               session_unlock(sessp);
-
-                              if ((ttyvp != NULLVP) && (vnode_getwithvid(ttyvp, ttyvid) == 0)) {
-                                       context.vc_thread = proc_thread(p); /* XXX */
-                                       context.vc_ucred = kauth_cred_proc_ref(p);
-                                       VNOP_REVOKE(ttyvp, REVOKEALL, &context);
-                                       vnode_put(ttyvp);
-                                       kauth_cred_unref(&context.vc_ucred);
+                       }
+                       ttyvp = sessp->s_ttyvp;
+                       ttyvid = sessp->s_ttyvid;
+                       sessp->s_ttyvp = NULL;
+                       sessp->s_ttyvid = 0;
+                       sessp->s_ttyp = TTY_NULL;
+                       sessp->s_ttypgrpid = NO_PID;
+                       session_unlock(sessp);
+
+                      if ((ttyvp != NULLVP) && (vnode_getwithvid(ttyvp, ttyvid) == 0)) {
+                               if (tp != TTY_NULL) {
+                                       tty_lock(tp);
+                                       (void) ttywait(tp);
+                                       tty_unlock(tp);
                                }
-                       } else {
-                               session_lock(sessp);
-                               ttyvp = sessp->s_ttyvp;
-                               sessp->s_ttyvp = NULL;
-                               sessp->s_ttyvid = 0;
-                               sessp->s_ttyp = TTY_NULL;
-                               sessp->s_ttypgrpid = NO_PID;
-                               session_unlock(sessp);
+                               context.vc_thread = proc_thread(p); /* XXX */
+                               context.vc_ucred = kauth_cred_proc_ref(p);
+                               vnode_rele(ttyvp);
+                               VNOP_REVOKE(ttyvp, REVOKEALL, &context);
+                               vnode_put(ttyvp);
+                               kauth_cred_unref(&context.vc_ucred);
+                               ttyvp = NULLVP;
                        }
                        if (ttyvp) 
                                vnode_rele(ttyvp);
-                       /*
-                        * s_ttyp is not zero'd; we use this to indicate
-                        * that the session once had a controlling terminal.
-                        * (for logging and informational purposes)
-                        */
+                       if (tp)
+                               ttyfree(tp);
                }
-
                session_lock(sessp);
                sessp->s_leader = NULL;
                session_unlock(sessp);
@@ -1838,13 +1844,13 @@ vproc_exit(proc_t p)
 
 #ifdef  FIXME
            if (task) {
-               task_basic_info_data_t tinfo;
+               mach_task_basic_info_data_t tinfo;
                task_thread_times_info_data_t ttimesinfo;
                int task_info_stuff, task_ttimes_stuff;
                struct timeval ut,st;
 
-               task_info_stuff = TASK_BASIC_INFO_COUNT;
-               task_info(task, TASK_BASIC_INFO,
+               task_info_stuff = MACH_TASK_BASIC_INFO_COUNT;
+               task_info(task, MACH_TASK_BASIC_INFO,
                          &tinfo, &task_info_stuff);
                p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds;
                p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds;
index 76c1fbae69d1d970f223040c9bddb289959f4a33..37d02887f64fd3d094b83ba9c8a8a3d04f54b83d 100644 (file)
@@ -129,6 +129,10 @@ extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t);
 
 #include <sys/sdt.h>
 
+#if CONFIG_MEMORYSTATUS
+#include <sys/kern_memorystatus.h>
+#endif
+
 /* XXX routines which should have Mach prototypes, but don't */
 void thread_set_parent(thread_t parent, int pid);
 extern void act_thread_catt(void *ctx);
@@ -158,8 +162,8 @@ void proc_vfork_end(proc_t parent_proc);
  * Notes:      Although this function increments a count, a count in
  *             excess of 1 is not currently supported.  According to the
  *             POSIX standard, calling anything other than execve() or
- *             _exit() fillowing a vfork(), including calling vfork()
- *             itself again, will result in undefned behaviour
+ *             _exit() following a vfork(), including calling vfork()
+ *             itself again, will result in undefined behaviour
  */
 void
 proc_vfork_begin(proc_t parent_proc)
@@ -179,7 +183,7 @@ proc_vfork_begin(proc_t parent_proc)
  *
  * Returns:    (void)
  *
- * Notes:      Decerements the count; currently, reentrancy of vfork()
+ * Notes:      Decrements the count; currently, reentrancy of vfork()
  *             is unsupported on the current process
  */
 void
@@ -189,7 +193,6 @@ proc_vfork_end(proc_t parent_proc)
        parent_proc->p_vforkcnt--;
        if (parent_proc->p_vforkcnt < 0)
                panic("vfork cnt is -ve");
-       /* resude the vfork count; clear the flag when it goes to 0 */
        if (parent_proc->p_vforkcnt == 0)
                parent_proc->p_lflag  &= ~P_LVFORK;
        proc_unlock(parent_proc);
@@ -650,6 +653,12 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
        /* return the thread pointer to the caller */
        *child_threadp = child_thread;
 
+#if CONFIG_MEMORYSTATUS
+       if (!err) {
+               memorystatus_list_add(child_proc->p_pid, DEFAULT_JETSAM_PRIORITY, -1);
+       }
+#endif
+
 bad:
        /*
         * In the error case, we return a 0 value for the returned pid (but
@@ -671,48 +680,53 @@ bad:
  *             this is done by reassociating the parent process structure
  *             with the task, thread, and uthread.
  *
+ *             Refer to the ASCII art above vfork() to figure out the
+ *             state we're undoing.
+ *
  * Parameters: child_proc              Child process
  *             retval                  System call return value array
  *             rval                    Return value to present to parent
  *
  * Returns:    void
  *
- * Note      The caller resumes or exits the parent, as appropriate, after
- *             callling this function.
+ * Notes:      The caller resumes or exits the parent, as appropriate, after
+ *             calling this function.
  */
 void
 vfork_return(proc_t child_proc, int32_t *retval, int rval)
 {
-       proc_t parent_proc = child_proc->p_pptr;
-       thread_t parent_thread = (thread_t)current_thread();
-       uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
+       task_t parent_task = get_threadtask(child_proc->p_vforkact);
+       proc_t parent_proc = get_bsdtask_info(parent_task);
+       thread_t th = current_thread();
+       uthread_t uth = get_bsdthread_info(th);
        
-       act_thread_catt(parent_uthread->uu_userstate);
+       act_thread_catt(uth->uu_userstate);
 
-       /* end vfork in parent */
+       /* clear vfork state in parent proc structure */
        proc_vfork_end(parent_proc);
 
        /* REPATRIATE PARENT TASK, THREAD, UTHREAD */
-       parent_uthread->uu_userstate = 0;
-       parent_uthread->uu_flag &= ~UT_VFORK;
+       uth->uu_userstate = 0;
+       uth->uu_flag &= ~UT_VFORK;
        /* restore thread-set-id state */
-       if (parent_uthread->uu_flag & UT_WASSETUID) {
-               parent_uthread->uu_flag |= UT_SETUID;
-               parent_uthread->uu_flag &= UT_WASSETUID;
+       if (uth->uu_flag & UT_WASSETUID) {
+               uth->uu_flag |= UT_SETUID;
+               uth->uu_flag &= UT_WASSETUID;
        }
-       parent_uthread->uu_proc = 0;
-       parent_uthread->uu_sigmask = parent_uthread->uu_vforkmask;
-       child_proc->p_lflag  &= ~P_LINVFORK;
-       child_proc->p_vforkact = (void *)0;
+       uth->uu_proc = 0;
+       uth->uu_sigmask = uth->uu_vforkmask;
+
+       proc_lock(child_proc);
+       child_proc->p_lflag &= ~P_LINVFORK;
+       child_proc->p_vforkact = 0;
+       proc_unlock(child_proc);
 
-       thread_set_parent(parent_thread, rval);
+       thread_set_parent(th, rval);
 
        if (retval) {
                retval[0] = rval;
                retval[1] = 0;                  /* mark parent */
        }
-
-       return;
 }
 
 
@@ -1006,6 +1020,12 @@ forkproc_free(proc_t p)
        /* Need to undo the effects of the fdcopy(), if any */
        fdfree(p);
 
+#if !CONFIG_EMBEDDED
+       if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
+               throttle_legacy_process_decr();
+       }
+#endif
+
        /*
         * Drop the reference on a text vnode pointer, if any
         * XXX This code is broken in forkproc(); see <rdar://4256419>;
@@ -1174,9 +1194,20 @@ retry:
         * Increase reference counts on shared objects.
         * The p_stats and p_sigacts substructs are set in vm_fork.
         */
+#if !CONFIG_EMBEDDED
+       child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR | P_DELAYIDLESLEEP));
+#else /*  !CONFIG_EMBEDDED */
        child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR));
+#endif /* !CONFIG_EMBEDDED */
        if (parent_proc->p_flag & P_PROFIL)
                startprofclock(child_proc);
+
+#if !CONFIG_EMBEDDED
+       if (child_proc->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
+               throttle_legacy_process_incr();
+       }
+#endif
+
        /*
         * Note that if the current thread has an assumed identity, this
         * credential will be granted to the new process.
@@ -1319,6 +1350,9 @@ retry:
        }
 #endif
 
+       /* Default to no tracking of dirty state */
+       child_proc->p_dirty = 0;
+
 bad:
        return(child_proc);
 }
@@ -1393,6 +1427,7 @@ uthread_alloc(task_t task, thread_t thread, int noinherit)
        p = (proc_t) get_bsdtask_info(task);
        uth = (uthread_t)ut;
        uth->uu_kwe.kwe_uth = uth;
+       uth->uu_thread = thread;
 
        /*
         * Thread inherits credential from the creating thread, if both
@@ -1445,6 +1480,9 @@ uthread_alloc(task_t task, thread_t thread, int noinherit)
                if (p->p_dtrace_ptss_pages != NULL) {
                        uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p);
                }
+#endif
+#if CONFIG_MACF
+               mac_thread_label_init(uth);
 #endif
        }
 
@@ -1532,6 +1570,9 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info)
                if (tmpptr != NULL) {
                        dtrace_ptss_release_entry(p, tmpptr);
                }
+#endif
+#if CONFIG_MACF
+               mac_thread_label_destroy(uth);
 #endif
        }
 }
index b7775864c66048bd0ccc056bb2ff7412f4a27c51..13e4c97dbe02c6d8c58a58f3a8ac4f9a891a21c2 100644 (file)
@@ -127,11 +127,8 @@ typedef enum {
 static int      lf_clearlock(struct lockf *);
 static overlap_t lf_findoverlap(struct lockf *,
            struct lockf *, int, struct lockf ***, struct lockf **);
-static struct lockf *lf_getblock(struct lockf *);
-static int      lf_getlock(struct lockf *, struct flock *);
-#if CONFIG_EMBEDDED
-static int      lf_getlockpid(struct vnode *, struct flock *);
-#endif
+static struct lockf *lf_getblock(struct lockf *, pid_t);
+static int      lf_getlock(struct lockf *, struct flock *, pid_t);
 static int      lf_setlock(struct lockf *);
 static int      lf_split(struct lockf *, struct lockf *);
 static void     lf_wakelock(struct lockf *, boolean_t);
@@ -174,11 +171,6 @@ lf_advlock(struct vnop_advlock_args *ap)
 
        /* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
 
-#if CONFIG_EMBEDDED
-       if (ap->a_op == F_GETLKPID)
-               return lf_getlockpid(vp, fl);
-#endif
-
        /*
         * Avoid the common case of unlocking when inode has no locks.
         */
@@ -287,9 +279,16 @@ lf_advlock(struct vnop_advlock_args *ap)
                break;
 
        case F_GETLK:
-               error = lf_getlock(lock, fl);
+               error = lf_getlock(lock, fl, -1);
+               FREE(lock, M_LOCKF);
+               break;
+
+#if CONFIG_EMBEDDED
+       case F_GETLKPID:
+               error = lf_getlock(lock, fl, fl->l_pid);
                FREE(lock, M_LOCKF);
                break;
+#endif
 
        default:
                FREE(lock, M_LOCKF);
@@ -302,6 +301,36 @@ lf_advlock(struct vnop_advlock_args *ap)
        return (error);
 }
 
+/*
+ * Empty the queue of msleeping requests for a lock on the given vnode.
+ * Called with the vnode already locked.  Used for forced unmount, where
+ * a flock(2) invoker sleeping on a blocked lock holds an iocount reference
+ * that prevents the vnode from ever being drained.  Force unmounting wins.
+ */
+void
+lf_abort_advlocks(vnode_t vp)
+{
+       struct lockf *lock;
+
+       if ((lock = vp->v_lockf) == NULL)
+               return; 
+
+       lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
+
+       if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+               struct lockf *tlock;
+
+               TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
+                       /*
+                        * Setting this flag should cause all
+                        * currently blocked F_SETLK request to
+                        * return to userland with an errno.
+                        */
+                       tlock->lf_flags |= F_ABORT;
+               }
+               lf_wakelock(lock, TRUE);
+       }
+}
 
 /*
  * Take any lock attempts which are currently blocked by a given lock ("from")
@@ -351,8 +380,6 @@ lf_coalesce_adjacent(struct lockf *lock)
                 * NOTE: Assumes that if two locks are adjacent on the number line 
                 * and belong to the same owner, then they are adjacent on the list.
                 */
-
-               /* If the lock ends adjacent to us, we can coelesce it */
                if ((*lf)->lf_end != -1 &&
                    ((*lf)->lf_end + 1) == lock->lf_start) {
                        struct lockf *adjacent = *lf;
@@ -439,7 +466,7 @@ lf_setlock(struct lockf *lock)
        /*
         * Scan lock list for this file looking for locks that would block us.
         */
-       while ((block = lf_getblock(lock))) {
+       while ((block = lf_getblock(lock, -1))) {
                /*
                 * Free the structure and return if nonblocking.
                 */
@@ -553,10 +580,14 @@ lf_setlock(struct lockf *lock)
                error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
 
                if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
-                       if ((block = lf_getblock(lock))) {
+                       if ((block = lf_getblock(lock, -1))) {
                                lf_move_blocked(block, lock);
                        }
                }
+
+               if (error == 0 && (lock->lf_flags & F_ABORT) != 0)
+                       error = EBADF;
+
                if (error) {    /* XXX */
                        /*
                         * We may have been awakened by a signal and/or by a
@@ -816,6 +847,7 @@ lf_clearlock(struct lockf *unlock)
  *             fl                      Pointer to flock structure to receive
  *                                     the blocking lock information, if a
  *                                     blocking lock is found.
+ *             matchpid                -1, or pid value to match in lookup.
  *
  * Returns:    0                       Success
  *
@@ -828,7 +860,7 @@ lf_clearlock(struct lockf *unlock)
  *             the blocking process ID for advisory record locks.
  */
 static int
-lf_getlock(struct lockf *lock, struct flock *fl)
+lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid)
 {
        struct lockf *block;
 
@@ -837,7 +869,7 @@ lf_getlock(struct lockf *lock, struct flock *fl)
                lf_print("lf_getlock", lock);
 #endif /* LOCKF_DEBUGGING */
 
-       if ((block = lf_getblock(lock))) {
+       if ((block = lf_getblock(lock, matchpid))) {
                fl->l_type = block->lf_type;
                fl->l_whence = SEEK_SET;
                fl->l_start = block->lf_start;
@@ -855,56 +887,6 @@ lf_getlock(struct lockf *lock, struct flock *fl)
        return (0);
 }
 
-#if CONFIG_EMBEDDED
-int lf_getlockpid(struct vnode *vp, struct flock *fl)
-{
-       struct lockf *lf, *blk;
-
-       if (vp == 0)
-               return EINVAL;
-
-       fl->l_type = F_UNLCK;
-       
-       lck_mtx_lock(&vp->v_lock);
-
-       for (lf = vp->v_lockf; lf; lf = lf->lf_next) {
-
-               if (lf->lf_flags & F_POSIX) {
-                       if ((((struct proc *)lf->lf_id)->p_pid) == fl->l_pid) {
-                               fl->l_type = lf->lf_type;
-                               fl->l_whence = SEEK_SET;
-                               fl->l_start = lf->lf_start;
-                               if (lf->lf_end == -1)
-                                       fl->l_len = 0;
-                               else
-                                       fl->l_len = lf->lf_end - lf->lf_start + 1;
-
-                               break;
-                       }
-               }
-
-               TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
-                       if (blk->lf_flags & F_POSIX) {
-                               if ((((struct proc *)blk->lf_id)->p_pid) == fl->l_pid) {
-                                       fl->l_type = blk->lf_type;
-                                       fl->l_whence = SEEK_SET;
-                                       fl->l_start = blk->lf_start;
-                                       if (blk->lf_end == -1)
-                                               fl->l_len = 0;
-                                       else
-                                               fl->l_len = blk->lf_end - blk->lf_start + 1;
-
-                                       break;
-                               }
-                       }
-               }
-       }
-
-       lck_mtx_unlock(&vp->v_lock);
-       return (0);
-}
-#endif
-
 /*
  * lf_getblock
  *
@@ -915,29 +897,35 @@ int lf_getlockpid(struct vnode *vp, struct flock *fl)
  *
  * Parameters: lock                    The lock for which we are interested
  *                                     in obtaining the blocking lock, if any
+ *             matchpid                -1, or pid value to match in lookup.
  *
  * Returns:    NOLOCKF                 No blocking lock exists
  *             !NOLOCKF                The address of the blocking lock's
  *                                     struct lockf.
  */
 static struct lockf *
-lf_getblock(struct lockf *lock)
+lf_getblock(struct lockf *lock, pid_t matchpid)
 {
        struct lockf **prev, *overlap, *lf = *(lock->lf_head);
-       int ovcase;
 
-       prev = lock->lf_head;
-       while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) != OVERLAP_NONE) {
+       for (prev = lock->lf_head;
+           lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != OVERLAP_NONE;
+           lf = overlap->lf_next) {
                /*
-                * We've found an overlap, see if it blocks us
+                * Found an overlap.
+                *
+                * If we're matching pids, and it's a record lock,
+                * but the pid doesn't match, then keep on looking ..
                 */
-               if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
-                       return (overlap);
+               if (matchpid != -1 &&
+                   (overlap->lf_flags & F_POSIX) != 0 &&
+                   proc_pid((struct proc *)(overlap->lf_id)) != matchpid)
+                       continue;
                /*
-                * Nope, point to the next one on the list and
-                * see if it blocks us
+                * does it block us?
                 */
-               lf = overlap->lf_next;
+               if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
+                       return (overlap);
        }
        return (NOLOCKF);
 }
@@ -970,7 +958,7 @@ lf_getblock(struct lockf *lock)
  *                                     this is generally used to relink the
  *                                     lock list, avoiding a second iteration.
  *             *overlap                The pointer to the overlapping lock
- *                                     itself; this is ussed to return data in
+ *                                     itself; this is used to return data in
  *                                     the check == OTHERS case, and for the
  *                                     caller to modify the overlapping lock,
  *                                     in the check == SELF case
index c1700ee51a7e6c5f8dfbdf2db15021932b49fb55..ee021079ae82885837b24a4cbc7f0c739b5db068 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -273,11 +273,11 @@ const char *memname[] = {
        "fileglob",             /* 99 M_FILEGLOB */ 
        "kauth",                /* 100 M_KAUTH */ 
        "dummynet",             /* 101 M_DUMMYNET */ 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        "unsafe_fsnode",        /* 102 M_UNSAFEFS */ 
 #else
        "",                     /* 102 M_UNSAFEFS */ 
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        "macpipelabel", /* 103 M_MACPIPELABEL */
        "mactemp",      /* 104 M_MACTEMP */
        "sbuf",         /* 105 M_SBUF */
@@ -459,11 +459,11 @@ struct kmzones {
        { SOS(fileglob),        KMZ_CREATEZONE, TRUE }, /* 99 M_FILEGLOB */
        { 0,            KMZ_MALLOC, FALSE },            /* 100 M_KAUTH */
        { 0,            KMZ_MALLOC, FALSE },            /* 101 M_DUMMYNET */
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        { SOS(unsafe_fsnode),KMZ_CREATEZONE, TRUE },    /* 102 M_UNSAFEFS */
 #else 
        { 0,            KMZ_MALLOC, FALSE },            /* 102 M_UNSAFEFS */
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        { 0,            KMZ_MALLOC, FALSE },            /* 103 M_MACPIPELABEL */
        { 0,            KMZ_MALLOC, FALSE },            /* 104 M_MACTEMP */
        { 0,            KMZ_MALLOC, FALSE },            /* 105 M_SBUF */
@@ -562,11 +562,23 @@ _MALLOC(
                return (NULL);
 
        if (flags & M_NOWAIT) {
-               hdr = (void *)kalloc_noblock(memsize);
+               if (size > memsize)   /* overflow detected */
+                       return (NULL);
+               else
+                       hdr = (void *)kalloc_noblock(memsize); 
        } else {
-               hdr = (void *)kalloc(memsize);
-
-               if (hdr == NULL) {
+               if (size > memsize) {
+                       /*
+                        * We get here when the caller told us to block, waiting for memory but an overflow
+                        * has been detected.  The caller isn't expecting a NULL return code so we panic
+                        * with a descriptive message.
+                        */
+                       panic("_MALLOC: overflow detected, size %llu ", (uint64_t) size);
+               }
+               else
+                       hdr = (void *)kalloc(memsize);
+
+              if (hdr == NULL) {
 
                        /*
                         * We get here when the caller told us to block waiting for memory, but
index 489ddd2bea9b38220a80c818ae641ec0c7fa0018..1bf8dd616c128713e27601a50046bec5ba7e5916 100644 (file)
  *
  */
 
-#include <sys/kern_event.h>
-#include <sys/kern_memorystatus.h>
-
 #include <kern/sched_prim.h>
 #include <kern/kalloc.h>
+#include <kern/assert.h>
 #include <kern/debug.h>
 #include <kern/lock.h>
 #include <kern/task.h>
 #include <kern/thread.h>
+#include <kern/host.h>
 #include <libkern/libkern.h>
+#include <mach/mach_time.h>
 #include <mach/task.h>
 #include <mach/task_info.h>
+#include <mach/host_priv.h>
+#include <sys/kern_event.h>
 #include <sys/proc.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
+#include <sys/sysproto.h>
 #include <sys/wait.h>
 #include <sys/tree.h>
+#include <sys/priv.h>
 #include <pexpert/pexpert.h>
 
 #if CONFIG_FREEZE
 #include <vm/vm_protos.h>
 #include <vm/vm_map.h>
+#endif
 
-enum {
-       kProcessSuspended =        (1 << 0), 
-       kProcessHibernated =       (1 << 1),
-       kProcessNoReclaimWorth =   (1 << 2),
-       kProcessIgnored =          (1 << 3),
-       kProcessBusy =             (1 << 4)
-};
+#include <sys/kern_memorystatus.h> 
 
-static lck_mtx_t * hibernation_mlock;
-static lck_attr_t * hibernation_lck_attr;
-static lck_grp_t * hibernation_lck_grp;
-static lck_grp_attr_t * hibernation_lck_grp_attr;
-
-typedef struct hibernation_node {
-       RB_ENTRY(hibernation_node) link;
-       pid_t pid;
-       uint32_t state;
-       mach_timespec_t hibernation_ts;
-} hibernation_node;
-
-static int hibernation_tree_compare(hibernation_node *n1, hibernation_node *n2) {
-       if (n1->pid < n2->pid)
-               return -1;
-       else if (n1->pid > n2->pid)
-               return 1;
-       else
-               return 0;
-}
+/* These are very verbose printfs(), enable with
+ * MEMORYSTATUS_DEBUG_LOG
+ */
+#if MEMORYSTATUS_DEBUG_LOG
+#define MEMORYSTATUS_DEBUG(cond, format, ...)      \
+do {                                              \
+       if (cond) { printf(format, ##__VA_ARGS__); } \
+} while(0)
+#else
+#define MEMORYSTATUS_DEBUG(cond, format, ...)
+#endif
 
-static RB_HEAD(hibernation_tree, hibernation_node) hibernation_tree_head;
-RB_PROTOTYPE_SC(static, hibernation_tree, hibernation_node, link, hibernation_tree_compare);
+/* General memorystatus stuff */
 
-RB_GENERATE(hibernation_tree, hibernation_node, link, hibernation_tree_compare);
+static void memorystatus_add_node(memorystatus_node *node);
+static void memorystatus_remove_node(memorystatus_node *node);
+static memorystatus_node *memorystatus_get_node(pid_t pid);
+static void memorystatus_release_node(memorystatus_node *node);
 
-static inline boolean_t kern_hibernation_can_hibernate_processes(void);
-static boolean_t kern_hibernation_can_hibernate(void);
+int memorystatus_wakeup = 0;
 
-static void kern_hibernation_add_node(hibernation_node *node);
-static hibernation_node *kern_hibernation_get_node(pid_t pid);
-static void kern_hibernation_release_node(hibernation_node *node);
-static void kern_hibernation_free_node(hibernation_node *node, boolean_t unlock);
+static void memorystatus_thread(void *param __unused, wait_result_t wr __unused);
 
-static void kern_hibernation_register_pid(pid_t pid);
-static void kern_hibernation_unregister_pid(pid_t pid);
+static memorystatus_node *next_memorystatus_node = NULL;
 
-static int kern_hibernation_get_process_state(pid_t pid, uint32_t *state, mach_timespec_t *ts);
-static int kern_hibernation_set_process_state(pid_t pid, uint32_t state);
+static int memorystatus_list_count = 0;
 
-static void kern_hibernation_cull(void);
+static lck_mtx_t * memorystatus_list_mlock;
+static lck_attr_t * memorystatus_lck_attr;
+static lck_grp_t * memorystatus_lck_grp;
+static lck_grp_attr_t * memorystatus_lck_grp_attr;
 
-static void kern_hibernation_thread(void);
+static TAILQ_HEAD(memorystatus_list_head, memorystatus_node) memorystatus_list;
 
-extern boolean_t vm_freeze_enabled;
+static uint64_t memorystatus_idle_delay_time = 0;
 
-int kern_hibernation_wakeup = 0;
+static unsigned int memorystatus_dirty_count = 0;
 
-static int jetsam_priority_list_hibernation_index = 0;
+extern void proc_dirty_start(struct proc *p);
+extern void proc_dirty_end(struct proc *p);
 
-/* Thresholds */
-static int kern_memorystatus_level_hibernate = 50;
+/* Jetsam */
+
+#if CONFIG_JETSAM
+
+extern unsigned int    vm_page_free_count;
+extern unsigned int    vm_page_active_count;
+extern unsigned int    vm_page_inactive_count;
+extern unsigned int    vm_page_throttled_count;
+extern unsigned int    vm_page_purgeable_count;
+extern unsigned int    vm_page_wire_count;
+
+static lck_mtx_t * exit_list_mlock;
+
+static TAILQ_HEAD(exit_list_head, memorystatus_node) exit_list;
+
+static unsigned int memorystatus_kev_failure_count = 0;
+
+/* Counted in pages... */
+unsigned int memorystatus_delta = 0;
+
+unsigned int memorystatus_available_pages = (unsigned int)-1;
+unsigned int memorystatus_available_pages_critical = 0;
+unsigned int memorystatus_available_pages_highwater = 0;
+
+/* ...with the exception of the legacy level in percent. */
+unsigned int memorystatus_level = 0;
+
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD, &memorystatus_kev_failure_count, 0, "");
+SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD, &memorystatus_level, 0, "");
+
+unsigned int memorystatus_jetsam_policy = kPolicyDefault;
+
+unsigned int memorystatus_jetsam_policy_offset_pages_more_free = 0;
+#if DEVELOPMENT || DEBUG
+unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0;
+#endif
+
+static memorystatus_jetsam_snapshot_t memorystatus_jetsam_snapshot;
+#define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot.entries
+
+static int memorystatus_jetsam_snapshot_list_count = 0;
+
+int memorystatus_jetsam_wakeup = 0;
+unsigned int memorystatus_jetsam_running = 1;
+
+static uint32_t memorystatus_task_page_count(task_t task);
+
+static void memorystatus_move_node_to_exit_list(memorystatus_node *node);
+
+static void memorystatus_update_levels_locked(void);
+
+static void memorystatus_jetsam_thread_block(void);
+static void memorystatus_jetsam_thread(void *param __unused, wait_result_t wr __unused);
+
+static int memorystatus_send_note(int event_code, void *data, size_t data_length);
+
+static uint32_t memorystatus_build_flags_from_state(uint32_t state);
 
-#define HIBERNATION_PAGES_MIN   ( 1 * 1024 * 1024 / PAGE_SIZE)
-#define HIBERNATION_PAGES_MAX   (16 * 1024 * 1024 / PAGE_SIZE)
+/* VM pressure */
 
-static unsigned int kern_memorystatus_hibernation_pages_min   = HIBERNATION_PAGES_MIN;
-static unsigned int kern_memorystatus_hibernation_pages_max   = HIBERNATION_PAGES_MAX;
+#if VM_PRESSURE_EVENTS
 
-static unsigned int kern_memorystatus_suspended_count = 0;
-static unsigned int kern_memorystatus_hibernated_count = 0;
+typedef enum vm_pressure_level {
+        kVMPressureNormal   = 0,
+        kVMPressureWarning  = 1,
+        kVMPressureUrgent   = 2,
+        kVMPressureCritical = 3,
+} vm_pressure_level_t;
 
-static unsigned int kern_memorystatus_hibernation_suspended_minimum = 4;
+static vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal;
 
-static unsigned int kern_memorystatus_low_swap_pages = 0;
+unsigned int memorystatus_available_pages_pressure = 0;
+
+static inline boolean_t memorystatus_get_pressure_locked(void);
+static void memorystatus_check_pressure_reset(void);
+
+#endif /* VM_PRESSURE_EVENTS */
+
+#endif /* CONFIG_JETSAM */
+
+/* Freeze */
+
+#if CONFIG_FREEZE
+
+static unsigned int memorystatus_suspended_resident_count = 0;
+static unsigned int memorystatus_suspended_count = 0;
+
+boolean_t memorystatus_freeze_enabled = FALSE;
+int memorystatus_freeze_wakeup = 0;
+
+static inline boolean_t memorystatus_can_freeze_processes(void);
+static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
+
+static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
+
+/* Thresholds */
+static unsigned int memorystatus_freeze_threshold = 0;
+
+static unsigned int memorystatus_freeze_pages_min = FREEZE_PAGES_MIN;
+static unsigned int memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
+
+static unsigned int memorystatus_frozen_count = 0;
+
+static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
+
+/* Stats */
+static uint64_t memorystatus_freeze_count = 0;
+static uint64_t memorystatus_freeze_pageouts = 0;
 
 /* Throttling */
-#define HIBERNATION_DAILY_MB_MAX         1024
-#define HIBERNATION_DAILY_PAGEOUTS_MAX (HIBERNATION_DAILY_MB_MAX * (1024 * 1024 / PAGE_SIZE))
-
-static struct throttle_interval_t {
-       uint32_t mins;
-       uint32_t burst_multiple;
-       uint32_t pageouts;
-       uint32_t max_pageouts;
-       mach_timespec_t ts;
-       boolean_t throttle;
-} throttle_intervals[] = {
-       {          60,  8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */
+static throttle_interval_t throttle_intervals[] = {
+       {      60,  8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */
        { 24 * 60,  1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */
 };
 
-/* Stats */
-static uint64_t kern_memorystatus_hibernation_count = 0;
-SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_count, CTLFLAG_RD, &kern_memorystatus_hibernation_count, "");
+static uint64_t memorystatus_freeze_throttle_count = 0;
 
-static uint64_t kern_memorystatus_hibernation_pageouts = 0;
-SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_pageouts, CTLFLAG_RD, &kern_memorystatus_hibernation_pageouts, "");
+#endif /* CONFIG_FREEZE */
 
-static uint64_t kern_memorystatus_hibernation_throttle_count = 0;
-SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_throttle_count, CTLFLAG_RD, &kern_memorystatus_hibernation_throttle_count, "");
+#if CONFIG_JETSAM
 
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_min_processes, CTLFLAG_RW, &kern_memorystatus_hibernation_suspended_minimum, 0, "");
+/* Debug */
 
 #if DEVELOPMENT || DEBUG
-/* Allow parameter tweaking in these builds */
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_level_hibernate, CTLFLAG_RW, &kern_memorystatus_level_hibernate, 0, "");
 
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_pages_min, CTLFLAG_RW, &kern_memorystatus_hibernation_pages_min, 0, "");
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_pages_max, CTLFLAG_RW, &kern_memorystatus_hibernation_pages_max, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD, &memorystatus_available_pages, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RW, &memorystatus_available_pages_critical, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_highwater, CTLFLAG_RW, &memorystatus_available_pages_highwater, 0, "");
+#if VM_PRESSURE_EVENTS
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW, &memorystatus_available_pages_pressure, 0, "");
+#endif /* VM_PRESSURE_EVENTS */
+
+/* Diagnostic code */
+enum {
+       kJetsamDiagnosticModeNone =              0, 
+       kJetsamDiagnosticModeAll  =              1,
+       kJetsamDiagnosticModeStopAtFirstActive = 2,
+       kJetsamDiagnosticModeCount
+} jetsam_diagnostic_mode = kJetsamDiagnosticModeNone;
+
+static int jetsam_diagnostic_suspended_one_active_proc = 0;
+
+static int
+sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+       const char *diagnosticStrings[] = {
+               "jetsam: diagnostic mode: resetting critical level.",
+               "jetsam: diagnostic mode: will examine all processes",
+               "jetsam: diagnostic mode: will stop at first active process"                
+       };
+        
+       int error, val = jetsam_diagnostic_mode;
+       boolean_t changed = FALSE;
+
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error || !req->newptr)
+               return (error);
+       if ((val < 0) || (val >= kJetsamDiagnosticModeCount)) {
+               printf("jetsam: diagnostic mode: invalid value - %d\n", val);
+               return EINVAL;
+       }
+       
+       lck_mtx_lock(memorystatus_list_mlock);
+       
+       if ((unsigned int) val != jetsam_diagnostic_mode) {
+               jetsam_diagnostic_mode = val;
+
+               memorystatus_jetsam_policy &= ~kPolicyDiagnoseActive;
+                
+               switch (jetsam_diagnostic_mode) {
+               case kJetsamDiagnosticModeNone:
+                       /* Already cleared */
+                       break;
+               case kJetsamDiagnosticModeAll:
+                       memorystatus_jetsam_policy |= kPolicyDiagnoseAll;
+                       break;
+               case kJetsamDiagnosticModeStopAtFirstActive:
+                       memorystatus_jetsam_policy |= kPolicyDiagnoseFirst;
+                       break;
+               default:
+                       /* Already validated */
+                       break;
+               }
+               
+               memorystatus_update_levels_locked();
+               changed = TRUE;
+       }
+        
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       if (changed) {
+               printf("%s\n", diagnosticStrings[val]);
+       }
+       
+       return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
+               &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode");
+
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_more_free, CTLFLAG_RW, &memorystatus_jetsam_policy_offset_pages_more_free, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, "");
+
+#if VM_PRESSURE_EVENTS
+
+#include "vm_pressure.h"
+
+static int
+sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2, oidp)
+       int error = 0;
+
+       error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0);
+       if (error)
+               return (error);
+
+       return SYSCTL_OUT(req, &memorystatus_vm_pressure_level, sizeof(memorystatus_vm_pressure_level));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED,
+    0, 0, &sysctl_memorystatus_vm_pressure_level, "I", "");
+
+static int
+sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+       int error, pid = 0;
+
+       error = sysctl_handle_int(oidp, &pid, 0, req);
+       if (error || !req->newptr)
+               return (error);
+
+       if (vm_dispatch_pressure_note_to_pid(pid)) {
+               return 0;
+       }
+
+       return EINVAL;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
+    0, 0, &sysctl_memorystatus_vm_pressure_send, "I", "");
+
+#endif /* VM_PRESSURE_EVENTS */
+
+#endif /* CONFIG_JETSAM */
+
+#if CONFIG_FREEZE
+
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW, &memorystatus_freeze_threshold, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW, &memorystatus_freeze_pages_min, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW, &memorystatus_freeze_pages_max, 0, "");
+
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD, &memorystatus_freeze_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD, &memorystatus_freeze_pageouts, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD, &memorystatus_freeze_throttle_count, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW, &memorystatus_freeze_suspended_threshold, 0, "");
+
+boolean_t memorystatus_freeze_throttle_enabled = TRUE;
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW, &memorystatus_freeze_throttle_enabled, 0, "");
+
+/* 
+ * Manual trigger of freeze and thaw for dev / debug kernels only.
+ */
+static int
+sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+       int error, pid = 0;
+       proc_t p;
+
+       error = sysctl_handle_int(oidp, &pid, 0, req);
+       if (error || !req->newptr)
+               return (error);
+
+       p = proc_find(pid);
+       if (p != NULL) {
+               uint32_t purgeable, wired, clean, dirty;
+               boolean_t shared;
+               uint32_t max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
+               task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
+               proc_rele(p);
+        return 0;
+       }
+
+       return EINVAL;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
+    0, 0, &sysctl_memorystatus_freeze, "I", "");
+
+static int
+sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+       int error, pid = 0;
+       proc_t p;
+
+       error = sysctl_handle_int(oidp, &pid, 0, req);
+       if (error || !req->newptr)
+               return (error);
+
+       p = proc_find(pid);
+       if (p != NULL) {
+               task_thaw(p->task);
+               proc_rele(p);
+               return 0;
+       }
+
+       return EINVAL;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
+    0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
 
-boolean_t kern_memorystatus_hibernation_throttle_enabled = TRUE;
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_throttle_enabled, CTLFLAG_RW, &kern_memorystatus_hibernation_throttle_enabled, 0, "");
-#endif /* DEVELOPMENT || DEBUG */
 #endif /* CONFIG_FREEZE */
 
-extern unsigned int    vm_page_free_count;
-extern unsigned int    vm_page_active_count;
-extern unsigned int    vm_page_inactive_count;
-extern unsigned int    vm_page_purgeable_count;
-extern unsigned int    vm_page_wire_count;
+#endif /* DEVELOPMENT || DEBUG */
+
+__private_extern__ void
+memorystatus_init(void)
+{
+       thread_t thread = THREAD_NULL;
+       kern_return_t result;
+       
+       memorystatus_lck_attr = lck_attr_alloc_init();
+       memorystatus_lck_grp_attr = lck_grp_attr_alloc_init();
+       memorystatus_lck_grp = lck_grp_alloc_init("memorystatus",  memorystatus_lck_grp_attr);
+       memorystatus_list_mlock = lck_mtx_alloc_init(memorystatus_lck_grp, memorystatus_lck_attr);
+       TAILQ_INIT(&memorystatus_list);
+
+#if CONFIG_JETSAM
+       exit_list_mlock = lck_mtx_alloc_init(memorystatus_lck_grp, memorystatus_lck_attr);
+       TAILQ_INIT(&exit_list);
+       
+       memorystatus_delta = DELTA_PERCENT * atop_64(max_mem) / 100;
+#endif
+
+#if CONFIG_FREEZE
+       memorystatus_freeze_threshold = (FREEZE_PERCENT / DELTA_PERCENT) * memorystatus_delta;
+#endif
+
+       nanoseconds_to_absolutetime((uint64_t)IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time);
+
+       result = kernel_thread_start(memorystatus_thread, NULL, &thread);
+       if (result == KERN_SUCCESS) {
+               thread_deallocate(thread);
+       } else {
+               panic("Could not create memorystatus_thread");
+       }
+
+#if CONFIG_JETSAM
+       memorystatus_jetsam_policy_offset_pages_more_free = (POLICY_MORE_FREE_OFFSET_PERCENT / DELTA_PERCENT) * memorystatus_delta;
+#if DEVELOPMENT || DEBUG
+       memorystatus_jetsam_policy_offset_pages_diagnostic = (POLICY_DIAGNOSTIC_OFFSET_PERCENT / DELTA_PERCENT) * memorystatus_delta;
+#endif
+
+       /* No contention at this point */
+       memorystatus_update_levels_locked();
+       
+       result = kernel_thread_start(memorystatus_jetsam_thread, NULL, &thread);
+       if (result == KERN_SUCCESS) {
+               thread_deallocate(thread);
+       } else {
+               panic("Could not create memorystatus_jetsam_thread");
+       }
+#endif
+}
+
+/*
+ * Node manipulation
+ */
+
+static void
+memorystatus_add_node(memorystatus_node *new_node)
+{
+       memorystatus_node *node;
+
+       /* Make sure we're called with the list lock held */
+       lck_mtx_assert(memorystatus_list_mlock, LCK_MTX_ASSERT_OWNED);
+
+       TAILQ_FOREACH(node, &memorystatus_list, link) {
+               if (node->priority <= new_node->priority) {
+                       break;
+               }
+       }
+
+       if (node) {
+               TAILQ_INSERT_BEFORE(node, new_node, link);
+       } else {
+               TAILQ_INSERT_TAIL(&memorystatus_list, new_node, link);
+       }
+
+       next_memorystatus_node = TAILQ_FIRST(&memorystatus_list);
+
+       memorystatus_list_count++;
+}
+
+static void
+memorystatus_remove_node(memorystatus_node *node) 
+{
+       /* Make sure we're called with the list lock held */
+       lck_mtx_assert(memorystatus_list_mlock, LCK_MTX_ASSERT_OWNED);
+
+       TAILQ_REMOVE(&memorystatus_list, node, link);
+       next_memorystatus_node = TAILQ_FIRST(&memorystatus_list);
+
+#if CONFIG_FREEZE    
+       if (node->state & (kProcessFrozen)) {
+               memorystatus_frozen_count--;
+       }
+
+       if (node->state & kProcessSuspended) {
+               memorystatus_suspended_resident_count -= node->resident_pages;
+               memorystatus_suspended_count--;
+       }
+#endif
+
+       memorystatus_list_count--;
+}
+
+/* Returns with the lock taken if found */
+static memorystatus_node *
+memorystatus_get_node(pid_t pid) 
+{
+       memorystatus_node *node;
+
+       lck_mtx_lock(memorystatus_list_mlock);
+
+       TAILQ_FOREACH(node, &memorystatus_list, link) {
+               if (node->pid == pid) {
+                       break;
+               }
+       }
+
+       if (!node) {
+               lck_mtx_unlock(memorystatus_list_mlock);                
+       }
+
+       return node;
+}
+
+static void
+memorystatus_release_node(memorystatus_node *node) 
+{
+#pragma unused(node)
+       lck_mtx_unlock(memorystatus_list_mlock);        
+}
+
+/* 
+ * List manipulation
+ */
+kern_return_t 
+memorystatus_list_add(pid_t pid, int priority, int high_water_mark)
+{
+
+#if !CONFIG_JETSAM
+#pragma unused(high_water_mark)
+#endif
+
+       memorystatus_node *new_node;
+
+       new_node = (memorystatus_node*)kalloc(sizeof(memorystatus_node));
+       if (!new_node) {
+               assert(FALSE);
+       }
+       memset(new_node, 0, sizeof(memorystatus_node));
+    
+       MEMORYSTATUS_DEBUG(1, "memorystatus_list_add: adding process %d with priority %d, high water mark %d.\n", pid, priority, high_water_mark);
+    
+       new_node->pid = pid;
+       new_node->priority = priority;
+#if CONFIG_JETSAM
+       new_node->hiwat_pages = high_water_mark;
+#endif    
+
+       lck_mtx_lock(memorystatus_list_mlock);
+    
+       memorystatus_add_node(new_node);
+        
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       return KERN_SUCCESS;
+}
+
+kern_return_t
+memorystatus_list_change(boolean_t effective, pid_t pid, int priority, int state_flags, int high_water_mark)
+{
+
+#if !CONFIG_JETSAM
+#pragma unused(high_water_mark)
+#endif
+       
+       kern_return_t ret;
+       memorystatus_node *node, *search;
+
+       MEMORYSTATUS_DEBUG(1, "memorystatus_list_change: changing process %d to priority %d with flags %d\n", pid, priority, state_flags);
+
+       lck_mtx_lock(memorystatus_list_mlock);
+
+       TAILQ_FOREACH(node, &memorystatus_list, link) {
+               if (node->pid == pid) {
+                       break;
+               }
+       }
+    
+       if (!node) {
+               ret = KERN_FAILURE;
+               goto out;             
+       }
+
+       if (effective && (node->state & kProcessPriorityUpdated)) {
+               MEMORYSTATUS_DEBUG(1, "memorystatus_list_change: effective change specified for pid %d, but change already occurred.\n", pid);
+               ret = KERN_FAILURE;
+               goto out;             
+       }
+
+       node->state |= kProcessPriorityUpdated;
+       if (state_flags != -1) {
+               node->state &= ~(kProcessActive|kProcessForeground);
+               if (state_flags & kMemorystatusFlagsFrontmost) {
+                       node->state |= kProcessForeground;
+               }
+               if (state_flags & kMemorystatusFlagsActive) {
+                       node->state |= kProcessActive;
+               }
+       }
+
+#if CONFIG_JETSAM        
+       if (high_water_mark != -1) {
+               node->hiwat_pages = high_water_mark;
+       }
+#endif
+
+       if (node->priority == priority) {
+               /* Priority unchanged */
+               MEMORYSTATUS_DEBUG(1, "memorystatus_list_change: same priority set for pid %d\n", pid);
+               ret = KERN_SUCCESS;
+               goto out;
+       }
+
+       if (node->priority < priority) {
+               /* Higher priority value (ie less important) - search backwards */
+               search = TAILQ_PREV(node, memorystatus_list_head, link);
+               TAILQ_REMOVE(&memorystatus_list, node, link);
+
+               node->priority = priority;
+               while (search && (search->priority <= node->priority)) {
+                       search = TAILQ_PREV(search, memorystatus_list_head, link);
+               }
+               if (search) {
+                       TAILQ_INSERT_AFTER(&memorystatus_list, search, node, link);
+               } else {
+                       TAILQ_INSERT_HEAD(&memorystatus_list, node, link);
+               }
+       } else {
+               /* Lower priority value (ie more important) - search forwards */
+               search = TAILQ_NEXT(node, link);
+               TAILQ_REMOVE(&memorystatus_list, node, link);
+
+               node->priority = priority;
+               while (search && (search->priority >= node->priority)) {
+                       search = TAILQ_NEXT(search, link);
+               }
+               if (search) {
+                       TAILQ_INSERT_BEFORE(search, node, link);
+               } else {
+                       TAILQ_INSERT_TAIL(&memorystatus_list, node, link);
+               }
+       }
+
+       next_memorystatus_node = TAILQ_FIRST(&memorystatus_list);
+       ret = KERN_SUCCESS;
+
+out:
+       lck_mtx_unlock(memorystatus_list_mlock);
+       return ret;
+}
+
+kern_return_t memorystatus_list_remove(pid_t pid)
+{
+       kern_return_t ret;
+       memorystatus_node *node = NULL;
+
+       MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", pid);
+
+#if CONFIG_JETSAM
+       /* Did we mark this as a exited process? */
+       lck_mtx_lock(exit_list_mlock);
+
+       TAILQ_FOREACH(node, &exit_list, link) {
+               if (node->pid == pid) {
+                       /* We did, so remove it from the list. The stats were updated when the queues were shifted. */
+                       TAILQ_REMOVE(&exit_list, node, link);
+                       break;
+               }
+       }
+
+       lck_mtx_unlock(exit_list_mlock);
+#endif
+
+       /* If not, search the main list */
+       if (!node) {
+               lck_mtx_lock(memorystatus_list_mlock);
+
+               TAILQ_FOREACH(node, &memorystatus_list, link) {
+                       if (node->pid == pid) {
+                               /* Remove from the list, and update accounting accordingly */
+                               memorystatus_remove_node(node);
+                               break;
+                       }
+               }
+
+               lck_mtx_unlock(memorystatus_list_mlock);
+       }
+
+       if (node) {
+               kfree(node, sizeof(memorystatus_node));
+               ret = KERN_SUCCESS; 
+       } else {
+               ret = KERN_FAILURE;
+       }
+
+       return ret;
+}
+
+kern_return_t 
+memorystatus_on_track_dirty(int pid, boolean_t track)
+{
+       kern_return_t ret = KERN_FAILURE;
+       memorystatus_node *node;
+       
+       node = memorystatus_get_node((pid_t)pid);
+       if (!node) {
+               return KERN_FAILURE;
+       }
+       
+       if (track & !(node->state & kProcessSupportsIdleExit)) {
+               node->state |= kProcessSupportsIdleExit;
+               node->clean_time = mach_absolute_time() + memorystatus_idle_delay_time;
+               ret = KERN_SUCCESS;
+       } else  if (!track & (node->state & kProcessSupportsIdleExit)) {
+               node->state &= ~kProcessSupportsIdleExit;
+               node->clean_time = 0;
+               ret = KERN_SUCCESS;             
+       }
+       
+       memorystatus_release_node(node);
+               
+       return ret;     
+}
 
-static void kern_memorystatus_thread(void);
+kern_return_t 
+memorystatus_on_dirty(int pid, boolean_t dirty)
+{
+       kern_return_t ret = KERN_FAILURE;
+       memorystatus_node *node;
+       
+       node = memorystatus_get_node((pid_t)pid);
+       if (!node) {
+               return KERN_FAILURE;
+       }
+       
+       if (dirty) {
+               if (!(node->state & kProcessDirty)) {
+                       node->state |= kProcessDirty;
+                       node->clean_time = 0;
+                       memorystatus_dirty_count++;
+                       ret = KERN_SUCCESS;
+               }
+       } else {
+               if (node->state & kProcessDirty) {
+                       node->state &= ~kProcessDirty;
+                       node->clean_time = mach_absolute_time() + memorystatus_idle_delay_time;
+                       memorystatus_dirty_count--;
+                       ret = KERN_SUCCESS;
+               }
+       }
+       
+       memorystatus_release_node(node);
+       
+       return ret;
+}
 
-int kern_memorystatus_wakeup = 0;
-int kern_memorystatus_level = 0;
-int kern_memorystatus_last_level = 0;
-unsigned int kern_memorystatus_delta;
+void 
+memorystatus_on_suspend(int pid)
+{      
+       memorystatus_node *node = memorystatus_get_node((pid_t)pid);
 
-unsigned int kern_memorystatus_kev_failure_count = 0;
-int kern_memorystatus_level_critical = 5;
-#define kern_memorystatus_level_highwater (kern_memorystatus_level_critical + 5)
+       if (node) {
+#if CONFIG_FREEZE
+               proc_t p;
 
-static struct {
-       jetsam_kernel_stats_t stats;
-       size_t entry_count;
-       jetsam_snapshot_entry_t entries[kMaxSnapshotEntries];
-} jetsam_snapshot;
+               p = proc_find(pid);
+               if (p != NULL) {
+                       uint32_t pages = memorystatus_task_page_count(p->task);
+                       proc_rele(p);
+                       node->resident_pages = pages;
+                       memorystatus_suspended_resident_count += pages;
+               }
+               memorystatus_suspended_count++;
+#endif
 
-static jetsam_priority_entry_t jetsam_priority_list[kMaxPriorityEntries];
-#define jetsam_snapshot_list jetsam_snapshot.entries
+               node->state |= kProcessSuspended;
 
-static int jetsam_priority_list_index = 0;
-static int jetsam_priority_list_count = 0;
-static int jetsam_snapshot_list_count = 0;
+               memorystatus_release_node(node);
+       }
+}
 
-static lck_mtx_t * jetsam_list_mlock;
-static lck_attr_t * jetsam_lck_attr;
-static lck_grp_t * jetsam_lck_grp;
-static lck_grp_attr_t * jetsam_lck_grp_attr;
+void
+memorystatus_on_resume(int pid)
+{      
+       memorystatus_node *node = memorystatus_get_node((pid_t)pid);
 
-SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_memorystatus_level, 0, "");
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_memorystatus_kev_failure_count, 0, "");
+       if (node) {
+#if CONFIG_FREEZE
+               boolean_t frozen = (node->state & kProcessFrozen);
+               if (node->state & (kProcessFrozen)) {
+                       memorystatus_frozen_count--;
+               }
+               memorystatus_suspended_resident_count -= node->resident_pages;
+               memorystatus_suspended_count--;
+#endif
 
-#if DEVELOPMENT || DEBUG
+               node->state &= ~(kProcessSuspended | kProcessFrozen | kProcessIgnored);
 
-enum {
-       kJetsamDiagnosticModeNone =              0, 
-       kJetsamDiagnosticModeAll  =              1,
-       kJetsamDiagnosticModeStopAtFirstActive = 2
-} jetsam_diagnostic_mode = kJetsamDiagnosticModeNone;
+               memorystatus_release_node(node);
 
-static int jetsam_diagnostic_suspended_one_active_proc = 0;
+#if CONFIG_FREEZE
+               if (frozen) {
+                       memorystatus_freeze_entry_t data = { pid, kMemorystatusFlagsThawed, 0 };
+                       memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
+               }
+#endif
+       }
+}
 
-static int
-sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS
+void
+memorystatus_on_inactivity(int pid)
 {
-#pragma unused(arg1, arg2)
-       int error, val = jetsam_diagnostic_mode;
-       boolean_t disabled;
+#pragma unused(pid)
+#if CONFIG_FREEZE
+       /* Wake the freeze thread */
+       thread_wakeup((event_t)&memorystatus_freeze_wakeup);
+#endif 
+}
 
-       error = sysctl_handle_int(oidp, &val, 0, req);
-       if (error || !req->newptr)
-               return (error);
-       if ((val < 0) || (val > 2)) {
-               printf("jetsam: diagnostic mode: invalid value - %d\n", val);
-               return (0);
+static void
+memorystatus_thread(void *param __unused, wait_result_t wr __unused)
+{
+       static boolean_t initialized = FALSE;
+       memorystatus_node *node;
+       uint64_t current_time;
+       pid_t victim_pid = -1;
+
+       if (initialized == FALSE) {
+               initialized = TRUE;
+               assert_wait(&memorystatus_wakeup, THREAD_UNINT);
+               (void)thread_block((thread_continue_t)memorystatus_thread);
        }
+
+       /*  Pick next idle exit victim. For now, just iterate through; ideally, this would be be more intelligent. */
+       current_time = mach_absolute_time();
        
-       /* 
-        * If jetsam_diagnostic_mode is set, we need to lower memory threshold for jetsam
-        */
-       disabled = (val == 0) && (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone);
+       /* Set a cutoff so that we don't idle exit processes that went recently clean */
        
-       jetsam_diagnostic_mode = val;
+       lck_mtx_lock(memorystatus_list_mlock);
        
-       if (disabled) {
-               kern_memorystatus_level_critical = 5;
-               printf("jetsam: diagnostic mode: resetting critical level to %d\n", kern_memorystatus_level_critical);
-       } else {
-               kern_memorystatus_level_critical = 10;
-               printf("jetsam: diagnostic mode: %d: increasing critical level to %d\n", (int) jetsam_diagnostic_mode, kern_memorystatus_level_critical);
-               if (jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive)
-                       printf("jetsam: diagnostic mode: will stop at first active app\n");
+       if (memorystatus_dirty_count) {
+               TAILQ_FOREACH(node, &memorystatus_list, link) {
+                       if ((node->state & kProcessSupportsIdleExit) && !(node->state & (kProcessDirty|kProcessIgnoreIdleExit))) {                              
+                               if (current_time >= node->clean_time) {
+                                       victim_pid = node->pid;
+                                       break;
+                               }
+                       }
+               }
        }
-       
-       return (0);
-}
-
-SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
-               &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode");
-#endif /* DEVELOPMENT || DEBUG */
 
-__private_extern__ void
-kern_memorystatus_init(void)
-{
-       jetsam_lck_attr = lck_attr_alloc_init();
-       jetsam_lck_grp_attr= lck_grp_attr_alloc_init();
-       jetsam_lck_grp = lck_grp_alloc_init("jetsam",  jetsam_lck_grp_attr);
-       jetsam_list_mlock = lck_mtx_alloc_init(jetsam_lck_grp, jetsam_lck_attr);
-       kern_memorystatus_delta = 5 * atop_64(max_mem) / 100;
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       if (-1 != victim_pid) {         
+               proc_t p = proc_find(victim_pid);
+               if (p != NULL) {
+                       boolean_t kill = FALSE;
+                       proc_dirty_start(p);
+                       /* Ensure process is still marked for idle exit and is clean */
+                       if ((p->p_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) {
+                               /* Clean; issue SIGKILL */
+                               p->p_dirty |= P_DIRTY_TERMINATED;
+                               kill = TRUE;
+                       }
+                       proc_dirty_end(p);
+                       if (TRUE == kill) {
+                               printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_pid, (p->p_comm ? p->p_comm : "(unknown)"));
+                               psignal(p, SIGKILL);
+                       }
+                       proc_rele(p);
+               }
+       }
 
-       (void)kernel_thread(kernel_task, kern_memorystatus_thread);
+       assert_wait(&memorystatus_wakeup, THREAD_UNINT);
+       (void)thread_block((thread_continue_t)memorystatus_thread);
 }
 
+#if CONFIG_JETSAM
+
 static uint32_t
-jetsam_task_page_count(task_t task)
+memorystatus_task_page_count(task_t task)
 {
        kern_return_t ret;
        static task_info_data_t data;
@@ -279,704 +929,644 @@ jetsam_task_page_count(task_t task)
        return 0;
 }
 
+static int
+memorystatus_send_note(int event_code, void *data, size_t data_length) {
+       int ret;
+       struct kev_msg ev_msg;
+       
+       ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+       ev_msg.kev_class      = KEV_SYSTEM_CLASS;
+       ev_msg.kev_subclass   = KEV_MEMORYSTATUS_SUBCLASS;
+
+       ev_msg.event_code     = event_code;
+
+       ev_msg.dv[0].data_length = data_length;
+       ev_msg.dv[0].data_ptr = data;
+       ev_msg.dv[1].data_length = 0;
+
+       ret = kev_post_msg(&ev_msg);
+       if (ret) {
+               memorystatus_kev_failure_count++;
+               printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
+       }
+       
+    return ret;
+}
+
 static uint32_t
-jetsam_flags_for_pid(pid_t pid)
+memorystatus_build_flags_from_state(uint32_t state) {
+    uint32_t flags = 0;
+    
+    if (state & kProcessForeground) {
+        flags |= kMemorystatusFlagsFrontmost;
+    }
+    if (state & kProcessActive) {
+        flags |= kMemorystatusFlagsActive;
+    }
+    if (state & kProcessSupportsIdleExit) {
+        flags |= kMemorystatusFlagsSupportsIdleExit;
+    }
+    if (state & kProcessDirty) {
+        flags |= kMemorystatusFlagsDirty;
+    }
+    
+    return flags;
+}
+
+static void 
+memorystatus_move_node_to_exit_list(memorystatus_node *node) 
 {
-       int i;
+       /* Make sure we're called with the list lock held */
+       lck_mtx_assert(memorystatus_list_mlock, LCK_MTX_ASSERT_OWNED);
+    
+       /* Now, acquire the exit list lock... */
+       lck_mtx_lock(exit_list_mlock);
+       
+       /* Remove from list + update accounting... */
+       memorystatus_remove_node(node);
+       
+       /* ...then insert at the end of the exit queue */
+       TAILQ_INSERT_TAIL(&exit_list, node, link);
+       
+       /* And relax */
+       lck_mtx_unlock(exit_list_mlock);
+}
 
-       for (i = 0; i < jetsam_priority_list_count; i++) {
-               if (pid == jetsam_priority_list[i].pid) {
-                       return jetsam_priority_list[i].flags;
+void memorystatus_update(unsigned int pages_avail)
+{        
+       if (!memorystatus_delta) {
+           return;
+       }
+                     
+       if ((pages_avail < memorystatus_available_pages_critical) ||
+            (pages_avail >= (memorystatus_available_pages + memorystatus_delta)) ||
+            (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) {
+               memorystatus_available_pages = pages_avail;
+               memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem);
+               /* Only wake the thread if currently blocked */
+               if (OSCompareAndSwap(0, 1, &memorystatus_jetsam_running)) {
+                       thread_wakeup((event_t)&memorystatus_jetsam_wakeup);
                }
        }
-       return 0;
+}
+
+static boolean_t
+memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry)
+{      
+       memorystatus_node *node;
+    
+       TAILQ_FOREACH(node, &memorystatus_list, link) {
+               if (node->pid == p->p_pid) {
+                       break;
+               }
+       }
+       
+       if (!node) {
+               return FALSE;
+       }
+       
+       entry->pid = p->p_pid;
+       strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1);
+       entry->priority = node->priority;
+       entry->pages = memorystatus_task_page_count(p->task);
+       entry->flags = memorystatus_build_flags_from_state(node->state);
+       memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid));
+
+       return TRUE;    
 }
 
 static void
-jetsam_snapshot_procs(void)
+memorystatus_jetsam_snapshot_procs_locked(void)
 {
        proc_t p;
        int i = 0;
 
-       jetsam_snapshot.stats.free_pages = vm_page_free_count;
-       jetsam_snapshot.stats.active_pages = vm_page_active_count;
-       jetsam_snapshot.stats.inactive_pages = vm_page_inactive_count;
-       jetsam_snapshot.stats.purgeable_pages = vm_page_purgeable_count;
-       jetsam_snapshot.stats.wired_pages = vm_page_wire_count;
+       memorystatus_jetsam_snapshot.stats.free_pages = vm_page_free_count;
+       memorystatus_jetsam_snapshot.stats.active_pages = vm_page_active_count;
+       memorystatus_jetsam_snapshot.stats.inactive_pages = vm_page_inactive_count;
+       memorystatus_jetsam_snapshot.stats.throttled_pages = vm_page_throttled_count;
+       memorystatus_jetsam_snapshot.stats.purgeable_pages = vm_page_purgeable_count;
+       memorystatus_jetsam_snapshot.stats.wired_pages = vm_page_wire_count;
        proc_list_lock();
        LIST_FOREACH(p, &allproc, p_list) {
-               task_t task = p->task;
-               jetsam_snapshot_list[i].pid = p->p_pid;
-               jetsam_snapshot_list[i].pages = jetsam_task_page_count(task);
-               jetsam_snapshot_list[i].flags = jetsam_flags_for_pid(p->p_pid);
-               strlcpy(&jetsam_snapshot_list[i].name[0], p->p_comm, MAXCOMLEN+1);
-#ifdef DEBUG
-               printf("jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+               if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) {
+                       continue;
+               }
+               
+               MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                        p->p_pid, 
                        p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7],
                        p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]);
-#endif
-               memcpy(&jetsam_snapshot_list[i].uuid[0], &p->p_uuid[0], sizeof(p->p_uuid));
-               i++;
-               if (i == kMaxSnapshotEntries) {
+
+               if (++i == kMaxSnapshotEntries) {
                        break;
                }       
        }
        proc_list_unlock();     
-       jetsam_snapshot.entry_count = jetsam_snapshot_list_count = i - 1;
+       memorystatus_jetsam_snapshot.snapshot_time = mach_absolute_time();
+       memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = i - 1;
 }
 
 static void
-jetsam_mark_pid_in_snapshot(pid_t pid, int flags)
+memorystatus_mark_pid_in_snapshot(pid_t pid, int flags)
 {
-
        int i = 0;
 
-       for (i = 0; i < jetsam_snapshot_list_count; i++) {
-               if (jetsam_snapshot_list[i].pid == pid) {
-                       jetsam_snapshot_list[i].flags |= flags;
+       for (i = 0; i < memorystatus_jetsam_snapshot_list_count; i++) {
+               if (memorystatus_jetsam_snapshot_list[i].pid == pid) {
+                       memorystatus_jetsam_snapshot_list[i].flags |= flags;
                        return;
                }
        }
 }
 
 int
-jetsam_kill_top_proc(boolean_t any, uint32_t cause)
+memorystatus_kill_top_proc(boolean_t any, uint32_t cause)
 {
        proc_t p;
+       int pending_snapshot = 0;
 
 #ifndef CONFIG_FREEZE
 #pragma unused(any)
 #endif
+       
+       lck_mtx_lock(memorystatus_list_mlock);
 
-       if (jetsam_snapshot_list_count == 0) {
-               jetsam_snapshot_procs();
+       if (memorystatus_jetsam_snapshot_list_count == 0) {
+               memorystatus_jetsam_snapshot_procs_locked();
+       } else {
+               pending_snapshot = 1;
        }
-       lck_mtx_lock(jetsam_list_mlock);
-       while (jetsam_priority_list_index < jetsam_priority_list_count) {
-               jetsam_priority_entry_t* jetsam_priority_entry = &jetsam_priority_list[jetsam_priority_list_index];
-               pid_t aPid = jetsam_priority_entry->pid;
+
+       while (next_memorystatus_node) {
+               memorystatus_node *node;
+               pid_t aPid;
+#if DEVELOPMENT || DEBUG
+               int activeProcess;
+               int procSuspendedForDiagnosis;
+#endif /* DEVELOPMENT || DEBUG */
+
+               node = next_memorystatus_node;
+               next_memorystatus_node = TAILQ_NEXT(next_memorystatus_node, link);
+
 #if DEVELOPMENT || DEBUG
-               int activeProcess = jetsam_priority_entry->flags & kJetsamFlagsFrontmost;
-               int procSuspendedForDiagnosis = jetsam_priority_entry->flags & kJetsamFlagsSuspForDiagnosis;
+               activeProcess = node->state & kProcessForeground;
+               procSuspendedForDiagnosis = node->state & kProcessSuspendedForDiag;
 #endif /* DEVELOPMENT || DEBUG */
-               jetsam_priority_list_index++;
+               
+               aPid = node->pid;
+
                /* skip empty slots in the list */
-               if (aPid == 0) {
+               if (aPid == 0  || (node->state & kProcessKilled)) {
                        continue; // with lock held
                }
-               lck_mtx_unlock(jetsam_list_mlock);
+
                p = proc_find(aPid);
                if (p != NULL) {
                        int flags = cause;
+                       
 #if DEVELOPMENT || DEBUG
-                       if ((jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) && procSuspendedForDiagnosis) {
+                       if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) {
                                printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid);
                                proc_rele(p);
-                               lck_mtx_lock(jetsam_list_mlock);
                                continue;
                        }
 #endif /* DEVELOPMENT || DEBUG */
+
 #if CONFIG_FREEZE
-                       hibernation_node *node;
                        boolean_t skip;
-                       if ((node = kern_hibernation_get_node(aPid))) {
-                               boolean_t reclaim_proc = !(node->state & (kProcessBusy | kProcessNoReclaimWorth));
-                               if (any || reclaim_proc) {
-                                       if (node->state & kProcessHibernated) {
-                                               flags |= kJetsamFlagsHibernated;
-                                       }
-                                       skip = FALSE;
-                               } else {
-                                       skip = TRUE;
+                       boolean_t reclaim_proc = !(node->state & (kProcessLocked | kProcessNoReclaimWorth));
+                       if (any || reclaim_proc) {
+                               if (node->state & kProcessFrozen) {
+                                       flags |= kMemorystatusFlagsFrozen;
                                }
-                               kern_hibernation_release_node(node);
-                       } else {
                                skip = FALSE;
+                       } else {
+                               skip = TRUE;
                        }
+                       
                        if (skip) {
                                proc_rele(p);                   
                        } else
 #endif
                        {
 #if DEVELOPMENT || DEBUG
-                               if ((jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) && activeProcess) {
-#if DEBUG
-                                       printf("jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
-                                               aPid, (p->p_comm ? p->p_comm: "(unknown)"), kern_memorystatus_level);
-#endif /* DEBUG */
-                                       jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsSuspForDiagnosis);
-                                       jetsam_priority_entry->flags |= kJetsamFlagsSuspForDiagnosis;
-                                       task_suspend(p->task);
-                                       proc_rele(p);
-                                       if (jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive) {
+                               if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) {
+                                       MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
+                                               aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level);
+                                       memorystatus_mark_pid_in_snapshot(aPid, kMemorystatusFlagsSuspForDiagnosis);
+                                       node->state |= kProcessSuspendedForDiag;
+                                       if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) {
                                                jetsam_diagnostic_suspended_one_active_proc = 1;
                                                printf("jetsam: returning after suspending first active proc - %d\n", aPid);
                                        }
+                                       lck_mtx_unlock(memorystatus_list_mlock);
+                                       task_suspend(p->task);
+                                       proc_rele(p);
                                        return 0;
                                } else
 #endif /* DEVELOPMENT || DEBUG */
                                {
-                                       printf("jetsam: killing pid %d [%s] - memory_status_level: %d\n", 
-                                               aPid, (p->p_comm ? p->p_comm : "(unknown)"), kern_memorystatus_level);
-                                       jetsam_mark_pid_in_snapshot(aPid, flags);
-                                       exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
+                                       printf("memorystatus: jetsam killing pid %d [%s] - memorystatus_available_pages: %d\n", 
+                                               aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages);
+                                       /* Shift queue, update stats */
+                                       memorystatus_move_node_to_exit_list(node);
+                                       memorystatus_mark_pid_in_snapshot(aPid, flags);
+                                       lck_mtx_unlock(memorystatus_list_mlock);
+                                       exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE);
                                        proc_rele(p);
-#if DEBUG
-                                       printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
-#endif /* DEBUG */
                                        return 0;
                                }
                        }
                }
-           lck_mtx_lock(jetsam_list_mlock);
        }
-       lck_mtx_unlock(jetsam_list_mlock);
+       
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       // If we didn't kill anything, toss any newly-created snapshot
+       if (!pending_snapshot) {
+           memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = 0;
+       }
+       
        return -1;
 }
 
+int memorystatus_kill_top_proc_from_VM(void) {
+       return memorystatus_kill_top_proc(TRUE, kMemorystatusFlagsKilledVM);
+}
+
 static int
-jetsam_kill_hiwat_proc(void)
+memorystatus_kill_hiwat_proc(void)
 {
        proc_t p;
-       int i;
-       if (jetsam_snapshot_list_count == 0) {
-               jetsam_snapshot_procs();
+       int pending_snapshot = 0;
+       memorystatus_node *next_hiwat_node;
+       
+       lck_mtx_lock(memorystatus_list_mlock);
+       
+       if (memorystatus_jetsam_snapshot_list_count == 0) {
+               memorystatus_jetsam_snapshot_procs_locked();
+       } else {
+               pending_snapshot = 1;
        }
-       lck_mtx_lock(jetsam_list_mlock);
-       for (i = jetsam_priority_list_index; i < jetsam_priority_list_count; i++) {
+       
+       next_hiwat_node = next_memorystatus_node;
+       
+       while (next_hiwat_node) {
                pid_t aPid;
                int32_t hiwat;
-               aPid = jetsam_priority_list[i].pid;
-               hiwat = jetsam_priority_list[i].hiwat_pages;    
+               memorystatus_node *node;
+        
+               node = next_hiwat_node;
+               next_hiwat_node = TAILQ_NEXT(next_hiwat_node, link);
+               
+               aPid = node->pid;
+               hiwat = node->hiwat_pages;
+               
                /* skip empty or non-hiwat slots in the list */
-               if (aPid == 0 || (hiwat < 0)) {
+               if (aPid == 0 || (hiwat < 0) || (node->state & kProcessKilled)) {
                        continue; // with lock held
                }
+               
                p = proc_find(aPid);
                if (p != NULL) {
-                       int32_t pages = (int32_t)jetsam_task_page_count(p->task);
+                       int32_t pages = (int32_t)memorystatus_task_page_count(p->task);
                        boolean_t skip = (pages <= hiwat);
 #if DEVELOPMENT || DEBUG
-                       if (!skip && (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone)) {
-                               if (jetsam_priority_list[i].flags & kJetsamFlagsSuspForDiagnosis) {
+                       if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) {
+                               if (node->state & kProcessSuspendedForDiag) {
                                        proc_rele(p);
                                        continue;
                                }
                        }
 #endif /* DEVELOPMENT || DEBUG */
+
 #if CONFIG_FREEZE
                        if (!skip) {
-                               hibernation_node *node;
-                               if ((node = kern_hibernation_get_node(aPid))) {
-                                       if (node->state & kProcessBusy) {
-                                               kern_hibernation_release_node(node);
-                                               skip = TRUE;
-                                       } else {
-                                               kern_hibernation_free_node(node, TRUE);
-                                               skip = FALSE;
-                                       }
+                               if (node->state & kProcessLocked) {
+                                       skip = TRUE;
+                               } else {
+                                       skip = FALSE;
                                }                               
                        }
 #endif
+
                        if (!skip) {
-#if DEBUG
-                               printf("jetsam: %s pid %d [%s] - %d pages > hiwat (%d)\n",
-                                       (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone)?"suspending": "killing", aPid, p->p_comm, pages, hiwat);
-#endif /* DEBUG */
+                               MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d pages > 1 (%d)\n",
+                                       (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, pages, hiwat);
 #if DEVELOPMENT || DEBUG
-                               if (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) {
-                                       lck_mtx_unlock(jetsam_list_mlock);
+                               if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
+                                   memorystatus_mark_pid_in_snapshot(aPid, kMemorystatusFlagsSuspForDiagnosis);
+                                       node->state |= kProcessSuspendedForDiag;
+                                       lck_mtx_unlock(memorystatus_list_mlock);
                                        task_suspend(p->task);
                                        proc_rele(p);
-#if DEBUG
-                                       printf("jetsam: pid %d suspended for diagnosis - memory_status_level: %d\n", aPid, kern_memorystatus_level);
-#endif /* DEBUG */
-                                       jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsSuspForDiagnosis);
-                                       jetsam_priority_list[i].flags |= kJetsamFlagsSuspForDiagnosis;
+                                       MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages);
                                } else
 #endif /* DEVELOPMENT || DEBUG */
-                               {
-                                       jetsam_priority_list[i].pid = 0;
-                                       lck_mtx_unlock(jetsam_list_mlock);
+                               {       
+                                       printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", 
+                                               aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages);
+                                       /* Shift queue, update stats */
+                                       memorystatus_move_node_to_exit_list(node);
+                                       memorystatus_mark_pid_in_snapshot(aPid, kMemorystatusFlagsKilledHiwat);
+                                       lck_mtx_unlock(memorystatus_list_mlock);                    
                                        exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
                                        proc_rele(p);
-#if DEBUG
-                                       printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
-#endif /* DEBUG */
-                                       jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsKilledHiwat);
-                               }
-                               return 0;
-                       } else {
-                               proc_rele(p);
-                       }
-
-               }
-       }
-       lck_mtx_unlock(jetsam_list_mlock);
-       return -1;
-}
-
-#if CONFIG_FREEZE
-static void
-jetsam_send_hibernation_note(uint32_t flags, pid_t pid, uint32_t pages) {
-       int ret;
-       struct kev_msg ev_msg;
-       jetsam_hibernation_entry_t data;
-       
-       ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-       ev_msg.kev_class      = KEV_SYSTEM_CLASS;
-       ev_msg.kev_subclass   = KEV_MEMORYSTATUS_SUBCLASS;
-
-       ev_msg.event_code     = kMemoryStatusHibernationNote;
-
-       ev_msg.dv[0].data_length = sizeof data;
-       ev_msg.dv[0].data_ptr = &data;
-       ev_msg.dv[1].data_length = 0;
-
-       data.pid = pid;
-       data.flags = flags;
-       data.pages = pages;
-
-       ret = kev_post_msg(&ev_msg);
-       if (ret) {
-               kern_memorystatus_kev_failure_count++;
-               printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
-       }
-}
-
-static int
-jetsam_hibernate_top_proc(void)
-{
-       int hibernate_index;
-       proc_t p;
-       uint32_t i;
-
-       lck_mtx_lock(jetsam_list_mlock);
-       
-       for (hibernate_index = jetsam_priority_list_index; hibernate_index < jetsam_priority_list_count; hibernate_index++) {
-               pid_t aPid;
-               uint32_t state = 0;
-
-               aPid = jetsam_priority_list[hibernate_index].pid;
-
-               /* skip empty slots in the list */
-               if (aPid == 0) {
-                       continue; // with lock held
-               }
-
-               if (kern_hibernation_get_process_state(aPid, &state, NULL) != 0) {
-                       continue; // with lock held
-               }
-
-               /* ensure the process isn't marked as busy and is suspended */
-               if ((state & kProcessBusy) || !(state & kProcessSuspended)) {
-                       continue; // with lock held
-               }
-
-               p = proc_find(aPid);
-               if (p != NULL) {
-                       hibernation_node *node;
-                       boolean_t skip;
-                       uint32_t purgeable, wired, clean, dirty;
-                       boolean_t shared;
-                       
-                       lck_mtx_unlock(jetsam_list_mlock);
-                       
-                       if ((node = kern_hibernation_get_node(aPid))) {
-                               if (node->state & kProcessBusy) {
-                                       skip = TRUE;
-                               } else {
-                                       node->state |= kProcessBusy;
-                                       /* Whether we hibernate or not, increase the count so can we maintain the gap between hibernated and suspended processes. */
-                                       kern_memorystatus_hibernated_count++;
-                                       skip = FALSE;
-                               }
-                               kern_hibernation_release_node(node);
-                       } else {
-                               skip = TRUE;
-                       }
-                       
-                       if (!skip) {
-                               /* Only hibernate processes meeting our size criteria. If not met, mark it as such and return. */
-                               task_freeze(p->task, &purgeable, &wired, &clean, &dirty, &shared, TRUE);
-                               skip = (dirty < kern_memorystatus_hibernation_pages_min) || (dirty > kern_memorystatus_hibernation_pages_max);          
-                       }
-                       
-                       if (!skip) {
-                               unsigned int swap_pages_free = default_pager_swap_pages_free();
-                               
-                               /* Ensure there's actually enough space free to hibernate this process. */
-                               if (dirty > swap_pages_free) {
-                                       kern_memorystatus_low_swap_pages = swap_pages_free;
-                                       skip = TRUE;
                                }
-                       }
-
-                       if (skip) {
-                               kern_hibernation_set_process_state(aPid, kProcessIgnored);
-                               proc_rele(p);
                                return 0;
+                       } else {
+                               proc_rele(p);
                        }
 
-#if DEBUG
-                       printf("jetsam: pid %d [%s] hibernating - memory_status_level: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", 
-                               aPid, (p->p_comm ? p->p_comm : "(unknown)"), kern_memorystatus_level, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free());
-#endif
-
-                       task_freeze(p->task, &purgeable, &wired, &clean, &dirty, &shared, FALSE);
-                       proc_rele(p);
-                       
-                       kern_hibernation_set_process_state(aPid, kProcessHibernated | (shared ? 0: kProcessNoReclaimWorth));
-                       
-                       /* Update stats */
-                       for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
-                               throttle_intervals[i].pageouts += dirty;
-                       }
-                       kern_memorystatus_hibernation_pageouts += dirty;
-                       kern_memorystatus_hibernation_count++;
-                       
-                       jetsam_send_hibernation_note(kJetsamFlagsHibernated, aPid, dirty);
-
-                       return dirty;
                }
        }
-       lck_mtx_unlock(jetsam_list_mlock);
+       
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       // If we didn't kill anything, toss any newly-created snapshot
+       if (!pending_snapshot) {
+               memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = 0;
+       }
+       
        return -1;
 }
-#endif /* CONFIG_FREEZE */
 
 static void
-kern_memorystatus_thread(void)
+memorystatus_jetsam_thread_block(void)
 {
-       struct kev_msg ev_msg;
-       jetsam_kernel_stats_t data;
-       boolean_t post_memorystatus_snapshot = FALSE; 
-       int ret;
+       assert_wait(&memorystatus_jetsam_wakeup, THREAD_UNINT);
+       assert(memorystatus_jetsam_running == 1);
+       OSDecrementAtomic(&memorystatus_jetsam_running);
+       (void)thread_block((thread_continue_t)memorystatus_jetsam_thread);   
+}
 
-       bzero(&data, sizeof(jetsam_kernel_stats_t));
-       bzero(&ev_msg, sizeof(struct kev_msg));
+static void
+memorystatus_jetsam_thread(void *param __unused, wait_result_t wr __unused)
+{
+       boolean_t post_snapshot = FALSE; 
+       static boolean_t is_vm_privileged = FALSE;
+
+       if (is_vm_privileged == FALSE) {
+               /* 
+                * It's the first time the thread has run, so just mark the thread as privileged and block.
+                * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>.
+                */
+               thread_wire(host_priv_self(), current_thread(), TRUE);
+               is_vm_privileged = TRUE;
+               memorystatus_jetsam_thread_block();
+       }
+       
+       assert(memorystatus_available_pages != (unsigned)-1);
+       
        while(1) {
+               unsigned int last_available_pages;
 
 #if DEVELOPMENT || DEBUG
                jetsam_diagnostic_suspended_one_active_proc = 0;
 #endif /* DEVELOPMENT || DEBUG */
-
-               while (kern_memorystatus_level <= kern_memorystatus_level_highwater) {
-                       if (jetsam_kill_hiwat_proc() < 0) {
+           
+               while (memorystatus_available_pages <= memorystatus_available_pages_highwater) {
+                       if (memorystatus_kill_hiwat_proc() < 0) {
                                break;
                        }
-                       post_memorystatus_snapshot = TRUE;
+                       post_snapshot = TRUE;
                }
 
-               while (kern_memorystatus_level <= kern_memorystatus_level_critical) {
-                       if (jetsam_kill_top_proc(FALSE, kJetsamFlagsKilled) < 0) {
-                               break;
+               while (memorystatus_available_pages <= memorystatus_available_pages_critical) {
+                       if (memorystatus_kill_top_proc(FALSE, kMemorystatusFlagsKilled) < 0) {
+                               /* No victim was found - panic */
+                               panic("memorystatus_jetsam_thread: no victim! available pages:%d, critical page level: %d\n",
+                                        memorystatus_available_pages, memorystatus_available_pages_critical);
                        }
-                       post_memorystatus_snapshot = TRUE;
+                       post_snapshot = TRUE;
 #if DEVELOPMENT || DEBUG
-                       if ((jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive) && jetsam_diagnostic_suspended_one_active_proc) {
+                       if ((memorystatus_jetsam_policy & kPolicyDiagnoseFirst) && jetsam_diagnostic_suspended_one_active_proc) {
                                printf("jetsam: stopping killing since 1 active proc suspended already for diagnosis\n");
                                break; // we found first active proc, let's not kill any more
                        }
 #endif /* DEVELOPMENT || DEBUG */
                }
+               
+               last_available_pages = memorystatus_available_pages;
 
-               kern_memorystatus_last_level = kern_memorystatus_level;
-
-               ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-               ev_msg.kev_class      = KEV_SYSTEM_CLASS;
-               ev_msg.kev_subclass   = KEV_MEMORYSTATUS_SUBCLASS;
-
-               /* pass the memory status level (percent free) */
-               ev_msg.event_code     = kMemoryStatusLevelNote;
-
-               ev_msg.dv[0].data_length = sizeof kern_memorystatus_last_level;
-               ev_msg.dv[0].data_ptr = &kern_memorystatus_last_level;
-               ev_msg.dv[1].data_length = sizeof data;
-               ev_msg.dv[1].data_ptr = &data;
-               ev_msg.dv[2].data_length = 0;
-
-               data.free_pages = vm_page_free_count;
-               data.active_pages = vm_page_active_count;
-               data.inactive_pages = vm_page_inactive_count;
-               data.purgeable_pages = vm_page_purgeable_count;
-               data.wired_pages = vm_page_wire_count;
-
-               ret = kev_post_msg(&ev_msg);
-               if (ret) {
-                       kern_memorystatus_kev_failure_count++;
-                       printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
+               if (post_snapshot) {
+                       size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_list_count - 1);
+                       memorystatus_jetsam_snapshot.notification_time = mach_absolute_time();
+                       memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
                }
 
-               if (post_memorystatus_snapshot) {
-                       size_t snapshot_size =  sizeof(jetsam_kernel_stats_t) + sizeof(size_t) + sizeof(jetsam_snapshot_entry_t) * jetsam_snapshot_list_count;
-                       ev_msg.event_code = kMemoryStatusSnapshotNote;
-                       ev_msg.dv[0].data_length = sizeof snapshot_size;
-                       ev_msg.dv[0].data_ptr = &snapshot_size;
-                       ev_msg.dv[1].data_length = 0;
-
-                       ret = kev_post_msg(&ev_msg);
-                       if (ret) {
-                               kern_memorystatus_kev_failure_count++;
-                               printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
-                       }
+               if (memorystatus_available_pages >= (last_available_pages + memorystatus_delta) ||
+                   last_available_pages >= (memorystatus_available_pages + memorystatus_delta)) {
+                       continue;
                }
 
-               if (kern_memorystatus_level >= kern_memorystatus_last_level + 5 ||
-                   kern_memorystatus_level <= kern_memorystatus_last_level - 5)
-                       continue;
+#if VM_PRESSURE_EVENTS
+               memorystatus_check_pressure_reset();
+#endif
 
-               assert_wait(&kern_memorystatus_wakeup, THREAD_UNINT);
-               (void)thread_block((thread_continue_t)kern_memorystatus_thread);
+               memorystatus_jetsam_thread_block();
        }
 }
 
+#endif /* CONFIG_JETSAM */
+
 #if CONFIG_FREEZE
 
 __private_extern__ void
-kern_hibernation_init(void)
+memorystatus_freeze_init(void)
 {
-    hibernation_lck_attr = lck_attr_alloc_init();
-    hibernation_lck_grp_attr = lck_grp_attr_alloc_init();
-    hibernation_lck_grp = lck_grp_alloc_init("hibernation",  hibernation_lck_grp_attr);
-    hibernation_mlock = lck_mtx_alloc_init(hibernation_lck_grp, hibernation_lck_attr);
+       kern_return_t result;
+       thread_t thread;
        
-       RB_INIT(&hibernation_tree_head);
-
-       (void)kernel_thread(kernel_task, kern_hibernation_thread);
+       result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
+       if (result == KERN_SUCCESS) {
+               thread_deallocate(thread);
+       } else {
+               panic("Could not create memorystatus_freeze_thread");
+       }
 }
 
-static inline boolean_t 
-kern_hibernation_can_hibernate_processes(void) 
+static int
+memorystatus_freeze_top_proc(boolean_t *memorystatus_freeze_swap_low)
 {
-       boolean_t ret;
-       
-       lck_mtx_lock_spin(hibernation_mlock);
-       ret = (kern_memorystatus_suspended_count - kern_memorystatus_hibernated_count) > 
-                               kern_memorystatus_hibernation_suspended_minimum ? TRUE : FALSE;
-       lck_mtx_unlock(hibernation_mlock);
-       
-       return ret;
-}
+       proc_t p;
+       uint32_t i;
+       memorystatus_node *next_freeze_node;
 
-static boolean_t 
-kern_hibernation_can_hibernate(void)
-{
-       /* Only hibernate if we're sufficiently low on memory; this holds off hibernation right after boot, 
-          and is generally is a no-op once we've reached steady state. */
-       if (kern_memorystatus_level > kern_memorystatus_level_hibernate) {
-               return FALSE;
-       }
+       lck_mtx_lock(memorystatus_list_mlock);
        
-       /* Check minimum suspended process threshold. */
-       if (!kern_hibernation_can_hibernate_processes()) {
-               return FALSE;
-       }
-
-       /* Is swap running low? */
-       if (kern_memorystatus_low_swap_pages) {
-               /* If there's been no movement in free swap pages since we last attempted hibernation, return. */
-               if (default_pager_swap_pages_free() <= kern_memorystatus_low_swap_pages) {
-                       return FALSE;
-               }
-               
-               /* Pages have been freed, so we can retry. */
-               kern_memorystatus_low_swap_pages = 0;
-       }
+       next_freeze_node = next_memorystatus_node;
        
-       /* OK */
-       return TRUE;
-}
-
-static void
-kern_hibernation_add_node(hibernation_node *node)
-{
-       lck_mtx_lock_spin(hibernation_mlock);
-
-       RB_INSERT(hibernation_tree, &hibernation_tree_head, node);
-       kern_memorystatus_suspended_count++;
-
-       lck_mtx_unlock(hibernation_mlock);      
-}
-
-/* Returns with the hibernation lock taken */
-static hibernation_node *
-kern_hibernation_get_node(pid_t pid) 
-{
-       hibernation_node sought, *found;
-       sought.pid = pid;
-       lck_mtx_lock_spin(hibernation_mlock);
-       found = RB_FIND(hibernation_tree, &hibernation_tree_head, &sought);
-       if (!found) {
-               lck_mtx_unlock(hibernation_mlock);              
-       }
-       return found;
-}
-
-static void
-kern_hibernation_release_node(hibernation_node *node) 
-{
-#pragma unused(node)
-       lck_mtx_unlock(hibernation_mlock);      
-}
-
-static void 
-kern_hibernation_free_node(hibernation_node *node, boolean_t unlock) 
-{
-       /* make sure we're called with the hibernation_mlock held */
-       lck_mtx_assert(hibernation_mlock, LCK_MTX_ASSERT_OWNED);
-
-       if (node->state & (kProcessHibernated | kProcessIgnored)) {
-               kern_memorystatus_hibernated_count--;
-       } 
+       while (next_freeze_node) {
+               memorystatus_node *node;
+               pid_t aPid;
+               uint32_t state;
+               
+               node = next_freeze_node;
+               next_freeze_node = TAILQ_NEXT(next_freeze_node, link);
 
-       kern_memorystatus_suspended_count--;
-       
-       RB_REMOVE(hibernation_tree, &hibernation_tree_head, node);
-       kfree(node, sizeof(hibernation_node));
+               aPid = node->pid;
+               state = node->state;
 
-       if (unlock) {
-               lck_mtx_unlock(hibernation_mlock);
-       }       
-}
+               /* skip empty slots in the list */
+               if (aPid == 0) {
+                       continue; // with lock held
+               }
 
-static void 
-kern_hibernation_register_pid(pid_t pid)
-{
-       hibernation_node *node;
+               /* Ensure the process is eligible for freezing */
+               if ((state & (kProcessKilled | kProcessLocked | kProcessFrozen)) || !(state & kProcessSuspended)) {
+                       continue; // with lock held
+               }
 
-#if DEVELOPMENT || DEBUG
-       node = kern_hibernation_get_node(pid);
-       if (node) {
-               printf("kern_hibernation_register_pid: pid %d already registered!\n", pid);
-               kern_hibernation_release_node(node);
-               return;
-       }
-#endif
+               p = proc_find(aPid);
+               if (p != NULL) {
+                       kern_return_t kr;
+                       uint32_t purgeable, wired, clean, dirty;
+                       boolean_t shared;
+                       uint32_t max_pages = 0;
+                                       
+                       /* Only freeze processes meeting our minimum resident page criteria */
+                       if (memorystatus_task_page_count(p->task) < memorystatus_freeze_pages_min) {
+                               proc_rele(p);
+                               continue;
+                       } 
 
-       /* Register as a candiate for hibernation */
-       node = (hibernation_node *)kalloc(sizeof(hibernation_node));
-       if (node) {     
-               clock_sec_t sec;
-               clock_nsec_t nsec;
-               mach_timespec_t ts;
-               
-               memset(node, 0, sizeof(hibernation_node));
+                       /* Ensure there's enough free space to freeze this process. */                  
+                       max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
+                       if (max_pages < memorystatus_freeze_pages_min) {
+                               *memorystatus_freeze_swap_low = TRUE;
+                               proc_rele(p);
+                               lck_mtx_unlock(memorystatus_list_mlock);
+                               return 0;
+                       }
+                       
+                       /* Mark as locked temporarily to avoid kill */
+                       node->state |= kProcessLocked;
+                       
+                       kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
+                       
+                       MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_proc: task_freeze %s for pid %d [%s] - "
+                       "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", 
+                       (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), 
+                       memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free());
+                       
+                       proc_rele(p);
+               
+                       node->state &= ~kProcessLocked;
+                       
+                       if (KERN_SUCCESS == kr) {
+                               memorystatus_freeze_entry_t data = { aPid, kMemorystatusFlagsFrozen, dirty };
+                               
+                               memorystatus_frozen_count++;
+                               
+                               node->state |= (kProcessFrozen | (shared ? 0: kProcessNoReclaimWorth));
+                       
+                               /* Update stats */
+                               for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
+                               throttle_intervals[i].pageouts += dirty;
+                               }
+                       
+                               memorystatus_freeze_pageouts += dirty;
+                               memorystatus_freeze_count++;
 
-               node->pid = pid;
-               node->state = kProcessSuspended;
+                               lck_mtx_unlock(memorystatus_list_mlock);
 
-               clock_get_system_nanotime(&sec, &nsec);
-               ts.tv_sec = sec;
-               ts.tv_nsec = nsec;
-               
-               node->hibernation_ts = ts;
+                               memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
 
-               kern_hibernation_add_node(node);
+                               return dirty;
+                       }
+                       
+                       /* Failed; go round again */
+               }
        }
+       
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       return -1;
 }
 
-static void 
-kern_hibernation_unregister_pid(pid_t pid)
+static inline boolean_t 
+memorystatus_can_freeze_processes(void) 
 {
-       hibernation_node *node;
+       boolean_t ret;
        
-       node = kern_hibernation_get_node(pid);
-       if (node) {
-               kern_hibernation_free_node(node, TRUE);
-       }
-}
-
-void 
-kern_hibernation_on_pid_suspend(pid_t pid)
-{      
-       kern_hibernation_register_pid(pid);
-}
-
-/* If enabled, we bring all the hibernated pages back prior to resumption; otherwise, they're faulted back in on demand */
-#define THAW_ON_RESUME 1
-
-void
-kern_hibernation_on_pid_resume(pid_t pid, task_t task)
-{      
-#if THAW_ON_RESUME
-       hibernation_node *node;
-       if ((node = kern_hibernation_get_node(pid))) {
-               if (node->state & kProcessHibernated) {
-                       node->state |= kProcessBusy;
-                       kern_hibernation_release_node(node);
-                       task_thaw(task);
-                       jetsam_send_hibernation_note(kJetsamFlagsThawed, pid, 0);
+       lck_mtx_lock(memorystatus_list_mlock);
+       
+       if (memorystatus_suspended_count) {
+               uint32_t average_resident_pages, estimated_processes;
+        
+               /* Estimate the number of suspended processes we can fit */
+               average_resident_pages = memorystatus_suspended_resident_count / memorystatus_suspended_count;
+               estimated_processes = memorystatus_suspended_count +
+                       ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages);
+
+               /* If it's predicted that no freeze will occur, lower the threshold temporarily */
+               if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) {
+                       memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW;
                } else {
-                       kern_hibernation_release_node(node);
+                       memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;        
                }
-       }
-#else
-#pragma unused(task)
-#endif
-       kern_hibernation_unregister_pid(pid);
-}
-
-void
-kern_hibernation_on_pid_hibernate(pid_t pid)
-{
-#pragma unused(pid)
-
-       /* Wake the hibernation thread */
-       thread_wakeup((event_t)&kern_hibernation_wakeup);       
-}
 
-static int 
-kern_hibernation_get_process_state(pid_t pid, uint32_t *state, mach_timespec_t *ts) 
-{
-       hibernation_node *found;
-       int err = ESRCH;
+               MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n", 
+                       memorystatus_suspended_count, average_resident_pages, estimated_processes);
        
-       *state = 0;
-
-       found = kern_hibernation_get_node(pid);
-       if (found) {
-               *state = found->state;
-               if (ts) {
-                       *ts = found->hibernation_ts;
+               if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
+                       ret = TRUE;
+               } else {
+                       ret = FALSE;
                }
-               err = 0;
-               kern_hibernation_release_node(found);
+       } else {
+               ret = FALSE;
        }
+                               
+       lck_mtx_unlock(memorystatus_list_mlock);
        
-       return err;
+       return ret;
 }
 
-static in
-kern_hibernation_set_process_state(pid_t pid, uint32_t state) 
+static boolean_
+memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
 {
-       hibernation_node *found;
-       int err = ESRCH;
+       /* Only freeze if we're sufficiently low on memory; this holds off freeze right
+          after boot,  and is generally is a no-op once we've reached steady state. */
+       if (memorystatus_available_pages > memorystatus_freeze_threshold) {
+               return FALSE;
+       }
+       
+       /* Check minimum suspended process threshold. */
+       if (!memorystatus_can_freeze_processes()) {
+               return FALSE;
+       }
 
-       found = kern_hibernation_get_node(pid);
-       if (found) {
-               found->state = state;
-               err = 0;
-               kern_hibernation_release_node(found);
+       /* Is swap running low? */
+       if (*memorystatus_freeze_swap_low) {
+               /* If there's been no movement in free swap pages since we last attempted freeze, return. */
+               if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) {
+                       return FALSE;
+               }
+               
+               /* Pages have been freed - we can retry. */
+               *memorystatus_freeze_swap_low = FALSE;  
        }
        
-       return err;
+       /* OK */
+       return TRUE;
 }
 
 static void
-kern_hibernation_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval)
+memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval)
 {
        if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) {
                if (!interval->max_pageouts) {
-                       interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * HIBERNATION_DAILY_PAGEOUTS_MAX) / (24 * 60)));
+                       interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60)));
                } else {
-                       printf("jetsam: %d minute throttle timeout, resetting\n", interval->mins);
+                       printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins);
                }
                interval->ts.tv_sec = interval->mins * 60;
                interval->ts.tv_nsec = 0;
                ADD_MACH_TIMESPEC(&interval->ts, ts);
-               /* Since we update the throttle stats pre-hibernation, adjust for overshoot here */
+               /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
                if (interval->pageouts > interval->max_pageouts) {
                        interval->pageouts -= interval->max_pageouts;
                } else {
@@ -984,18 +1574,17 @@ kern_hibernation_update_throttle_interval(mach_timespec_t *ts, struct throttle_i
                }
                interval->throttle = FALSE;
        } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) {
-               printf("jetsam: %d minute pageout limit exceeded; enabling throttle\n", interval->mins);
+               printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval->mins);
                interval->throttle = TRUE;
        }       
-#ifdef DEBUG
-       printf("jetsam: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", 
+
+       MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", 
                interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60, 
                interval->throttle ? "on" : "off");
-#endif
 }
 
 static boolean_t
-kern_hibernation_throttle_update(void) 
+memorystatus_freeze_update_throttle(void) 
 {
        clock_sec_t sec;
        clock_nsec_t nsec;
@@ -1004,7 +1593,7 @@ kern_hibernation_throttle_update(void)
        boolean_t throttled = FALSE;
 
 #if DEVELOPMENT || DEBUG
-       if (!kern_memorystatus_hibernation_throttle_enabled)
+       if (!memorystatus_freeze_throttle_enabled)
                return FALSE;
 #endif
 
@@ -1012,14 +1601,14 @@ kern_hibernation_throttle_update(void)
        ts.tv_sec = sec;
        ts.tv_nsec = nsec;
        
-       /* Check hibernation pageouts over multiple intervals and throttle if we've exceeded our budget.
+       /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget.
         *
-        * This ensures that periods of inactivity can't be used as 'credit' towards hibernation if the device has
+        * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has
         * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in
         * order to allow for bursts of activity.
         */
        for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
-               kern_hibernation_update_throttle_interval(&ts, &throttle_intervals[i]);
+               memorystatus_freeze_update_throttle_interval(&ts, &throttle_intervals[i]);
                if (throttle_intervals[i].throttle == TRUE)
                        throttled = TRUE;
        }                                                               
@@ -1028,159 +1617,276 @@ kern_hibernation_throttle_update(void)
 }
 
 static void
-kern_hibernation_cull(void)
+memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
 {
-       hibernation_node *node, *next;
-       lck_mtx_lock(hibernation_mlock);
+       static boolean_t memorystatus_freeze_swap_low = FALSE;
+       
+       if (memorystatus_freeze_enabled) {
+               if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
+                       /* Only freeze if we've not exceeded our pageout budgets */
+                       if (!memorystatus_freeze_update_throttle()) {
+                               memorystatus_freeze_top_proc(&memorystatus_freeze_swap_low);
+                       } else {
+                               printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n");
+                               memorystatus_freeze_throttle_count++; /* Throttled, update stats */
+                       }
+               }
+       }
 
-       for (node = RB_MIN(hibernation_tree, &hibernation_tree_head); node != NULL; node = next) {
-               proc_t p;
+       assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
+       thread_block((thread_continue_t) memorystatus_freeze_thread);   
+}
+
+#endif /* CONFIG_FREEZE */
 
-               next = RB_NEXT(hibernation_tree, &hibernation_tree_head, node);
+#if CONFIG_JETSAM
 
-               /* TODO: probably suboptimal, so revisit should it cause a performance issue */
-               p = proc_find(node->pid);
-               if (p) {
-                       proc_rele(p);
-               } else {
-                       kern_hibernation_free_node(node, FALSE);                                
+#if VM_PRESSURE_EVENTS
+
+static inline boolean_t
+memorystatus_get_pressure_locked(void) {
+       if (memorystatus_available_pages > memorystatus_available_pages_pressure) {
+                /* Too many free pages */
+                return kVMPressureNormal;
+       }
+       
+#if CONFIG_FREEZE
+       if (memorystatus_frozen_count > 0) {
+                /* Frozen processes exist */
+                return kVMPressureNormal;              
+       }
+#endif
+
+       if (memorystatus_suspended_count > MEMORYSTATUS_SUSPENDED_THRESHOLD) {
+               /* Too many supended processes */
+               return kVMPressureNormal;
+       }
+       
+       if (memorystatus_suspended_count > 0) {
+               /* Some suspended processes - warn */
+               return kVMPressureWarning;
+       }
+    
+       /* Otherwise, pressure level is urgent */
+       return kVMPressureUrgent;
+}
+
+pid_t
+memorystatus_request_vm_pressure_candidate(void) {
+       memorystatus_node *node;
+       pid_t pid = -1;
+
+       lck_mtx_lock(memorystatus_list_mlock);
+
+       /* Are we in a low memory state? */
+       memorystatus_vm_pressure_level = memorystatus_get_pressure_locked();
+       if (kVMPressureNormal != memorystatus_vm_pressure_level) {
+               TAILQ_FOREACH(node, &memorystatus_list, link) {
+                       /* Skip ineligible processes */
+                       if (node->state & (kProcessKilled | kProcessLocked | kProcessSuspended | kProcessFrozen | kProcessNotifiedForPressure)) {
+                               continue;
+                       }
+                       node->state |= kProcessNotifiedForPressure;
+                       pid = node->pid;
+                       break;
                }
        }
+    
+       lck_mtx_unlock(memorystatus_list_mlock);
 
-       lck_mtx_unlock(hibernation_mlock);      
+       return pid;
+}
+
+void
+memorystatus_send_pressure_note(pid_t pid) {
+    memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid));
 }
 
 static void
-kern_hibernation_thread(void)
-{
-       if (vm_freeze_enabled) {
-               if (kern_hibernation_can_hibernate()) {
-                       
-                       /* Cull dead processes */
-                       kern_hibernation_cull();
-                       
-                       /* Only hibernate if we've not exceeded our pageout budgets */
-                       if (!kern_hibernation_throttle_update()) {
-                               jetsam_hibernate_top_proc();
-                       } else {
-                               printf("kern_hibernation_thread: in throttle, ignoring hibernation\n");
-                               kern_memorystatus_hibernation_throttle_count++; /* Throttled, update stats */
+memorystatus_check_pressure_reset() {        
+       lck_mtx_lock(memorystatus_list_mlock);
+       
+       if (kVMPressureNormal != memorystatus_vm_pressure_level) {
+               memorystatus_vm_pressure_level = memorystatus_get_pressure_locked();
+               if (kVMPressureNormal == memorystatus_vm_pressure_level) {
+                       memorystatus_node *node;
+                       TAILQ_FOREACH(node, &memorystatus_list, link) {
+                               node->state &= ~kProcessNotifiedForPressure;
                        }
                }
        }
-
-       assert_wait((event_t) &kern_hibernation_wakeup, THREAD_UNINT);
-       thread_block((thread_continue_t) kern_hibernation_thread);      
+    
+       lck_mtx_unlock(memorystatus_list_mlock);
 }
 
-#endif /* CONFIG_FREEZE */
+#endif /* VM_PRESSURE_EVENTS */
+
+/* Sysctls... */
 
 static int
-sysctl_io_variable(struct sysctl_req *req, void *pValue, size_t currentsize, size_t maxsize, size_t *newsize)
+sysctl_memorystatus_list_change SYSCTL_HANDLER_ARGS
 {
-    int error;
-
-    /* Copy blob out */
-    error = SYSCTL_OUT(req, pValue, currentsize);
+       int ret;
+       memorystatus_priority_entry_t entry;
 
-    /* error or nothing to set */
-    if (error || !req->newptr)
-        return(error);
+#pragma unused(oidp, arg1, arg2)
 
-    if (req->newlen > maxsize) {
+       if (!req->newptr || req->newlen > sizeof(entry)) {
                return EINVAL;
        }
-       error = SYSCTL_IN(req, pValue, req->newlen);
 
-       if (!error) {
-               *newsize = req->newlen;
+       ret = SYSCTL_IN(req, &entry, req->newlen);
+       if (ret) {
+               return ret;
        }
 
-    return(error);
+       memorystatus_list_change(FALSE, entry.pid, entry.priority, entry.flags, -1);
+
+       return ret;
 }
 
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_jetsam_change, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
+    0, 0, &sysctl_memorystatus_list_change, "I", "");
+    
 static int
-sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+sysctl_memorystatus_priority_list(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
-       int i, ret;
-       jetsam_priority_entry_t temp_list[kMaxPriorityEntries];
-       size_t newsize, currentsize;
-
-       if (req->oldptr) {
-               lck_mtx_lock(jetsam_list_mlock);
-               for (i = 0; i < jetsam_priority_list_count; i++) {
-                       temp_list[i] = jetsam_priority_list[i];
+       int ret;
+       size_t allocated_size, list_size = 0;
+       memorystatus_priority_entry_t *list;
+       uint32_t list_count, i = 0;
+       memorystatus_node *node;
+        
+       /* Races, but this is only for diagnostic purposes */
+       list_count = memorystatus_list_count;
+       allocated_size = sizeof(memorystatus_priority_entry_t) * list_count;
+       list = kalloc(allocated_size);
+       if (!list) {
+               return ENOMEM;
+       }
+
+       memset(list, 0, allocated_size);
+        
+       lck_mtx_lock(memorystatus_list_mlock);
+
+       TAILQ_FOREACH(node, &memorystatus_list, link) {
+               list[i].pid = node->pid;
+               list[i].priority = node->priority; 
+               list[i].flags = memorystatus_build_flags_from_state(node->state);
+               list[i].hiwat_pages = node->hiwat_pages;
+               list_size += sizeof(memorystatus_priority_entry_t);
+               if (++i >= list_count) {
+                       break;
+               }       
+       }
+       
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       if (!list_size) {
+               if (req->oldptr) {
+                       MEMORYSTATUS_DEBUG(1, "kern.memorystatus_priority_list returning EINVAL\n");
+                       return EINVAL;
+               }
+               else {
+                       MEMORYSTATUS_DEBUG(1, "kern.memorystatus_priority_list returning 0 for size\n");
                }
-               lck_mtx_unlock(jetsam_list_mlock);
+       } else {
+               MEMORYSTATUS_DEBUG(1, "kern.memorystatus_priority_list returning %ld for size\n", (long)list_size);
        }
+       
+       ret = SYSCTL_OUT(req, list, list_size);
 
-       currentsize = sizeof(jetsam_priority_list[0]) * jetsam_priority_list_count;
+       kfree(list, allocated_size);
+       
+       return ret;
+}
 
-       ret = sysctl_io_variable(req, &temp_list[0], currentsize, sizeof(temp_list), &newsize);
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_priority_list, CTLTYPE_OPAQUE|CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_memorystatus_priority_list, "S,jetsam_priorities", "");
 
-       if (!ret && req->newptr) {
-               int temp_list_count = newsize / sizeof(jetsam_priority_list[0]);
-#if DEBUG 
-               printf("set jetsam priority pids = { ");
-               for (i = 0; i < temp_list_count; i++) {
-                       printf("(%d, 0x%08x, %d) ", temp_list[i].pid, temp_list[i].flags, temp_list[i].hiwat_pages);
-               }
-               printf("}\n");
-#endif /* DEBUG */
-               lck_mtx_lock(jetsam_list_mlock);
-#if CONFIG_FREEZE
-               jetsam_priority_list_hibernation_index = 0;
+static void
+memorystatus_update_levels_locked(void) {
+       /* Set the baseline levels in pages */
+       memorystatus_available_pages_critical = (CRITICAL_PERCENT / DELTA_PERCENT) * memorystatus_delta;
+       memorystatus_available_pages_highwater = (HIGHWATER_PERCENT / DELTA_PERCENT) * memorystatus_delta;
+#if VM_PRESSURE_EVENTS
+       memorystatus_available_pages_pressure = (PRESSURE_PERCENT / DELTA_PERCENT) * memorystatus_delta;
 #endif
-               jetsam_priority_list_index = 0;
-               jetsam_priority_list_count = temp_list_count;
-               for (i = 0; i < temp_list_count; i++) {
-                       jetsam_priority_list[i] = temp_list[i];
-               }
-               for (i = temp_list_count; i < kMaxPriorityEntries; i++) {
-                       jetsam_priority_list[i].pid = 0;
-                       jetsam_priority_list[i].flags = 0;
-                       jetsam_priority_list[i].hiwat_pages = -1;
-                       jetsam_priority_list[i].hiwat_reserved1 = -1;
-                       jetsam_priority_list[i].hiwat_reserved2 = -1;
-                       jetsam_priority_list[i].hiwat_reserved3 = -1;
-               }
-               lck_mtx_unlock(jetsam_list_mlock);
-       }       
-       return ret;
+       
+#if DEBUG || DEVELOPMENT
+       if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
+               memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic;
+               memorystatus_available_pages_highwater += memorystatus_jetsam_policy_offset_pages_diagnostic;
+#if VM_PRESSURE_EVENTS
+               memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic;
+#endif
+       }
+#endif
+       
+       /* Only boost the critical level - it's more important to kill right away than issue warnings */
+       if (memorystatus_jetsam_policy & kPolicyMoreFree) {
+               memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_more_free;
+       }
+}
+
+static int
+sysctl_memorystatus_jetsam_policy_more_free SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2, oidp)
+       int error, more_free = 0;
+
+       error = priv_check_cred(kauth_cred_get(), PRIV_VM_JETSAM, 0);
+       if (error)
+               return (error);
+
+       error = sysctl_handle_int(oidp, &more_free, 0, req);
+       if (error || !req->newptr)
+               return (error);
+
+       lck_mtx_lock(memorystatus_list_mlock);
+       
+       if (more_free) {
+               memorystatus_jetsam_policy |= kPolicyMoreFree;
+       } else {
+               memorystatus_jetsam_policy &= ~kPolicyMoreFree;               
+       }
+        
+       memorystatus_update_levels_locked();
+               
+       lck_mtx_unlock(memorystatus_list_mlock);
+       
+       return 0;
 }
 
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_jetsam_policy_more_free, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED|CTLFLAG_ANYBODY,
+    0, 0, &sysctl_memorystatus_jetsam_policy_more_free, "I", "");
+
 static int
-sysctl_handle_kern_memorystatus_snapshot(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+sysctl_handle_memorystatus_snapshot(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
        int ret;
        size_t currentsize = 0;
 
-       if (jetsam_snapshot_list_count > 0) {
-               currentsize = sizeof(jetsam_kernel_stats_t) + sizeof(size_t) + sizeof(jetsam_snapshot_entry_t) * jetsam_snapshot_list_count;
+       if (memorystatus_jetsam_snapshot_list_count > 0) {
+               currentsize = sizeof(memorystatus_jetsam_snapshot_t) + sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_list_count - 1);
        }
        if (!currentsize) {
                if (req->oldptr) {
-#ifdef DEBUG
-                       printf("kern.memorystatus_snapshot returning EINVAL\n");
-#endif
+                       MEMORYSTATUS_DEBUG(1, "kern.memorystatus_snapshot returning EINVAL\n");
                        return EINVAL;
                }
                else {
-#ifdef DEBUG
-                       printf("kern.memorystatus_snapshot returning 0 for size\n");
-#endif
+                       MEMORYSTATUS_DEBUG(1, "kern.memorystatus_snapshot returning 0 for size\n");
                }
        } else {
-#ifdef DEBUG
-                       printf("kern.memorystatus_snapshot returning %ld for size\n", (long)currentsize);
-#endif
+               MEMORYSTATUS_DEBUG(1, "kern.memorystatus_snapshot returning %ld for size\n", (long)currentsize);
        }       
-       ret = sysctl_io_variable(req, &jetsam_snapshot, currentsize, 0, NULL);
+       ret = SYSCTL_OUT(req, &memorystatus_jetsam_snapshot, currentsize);
        if (!ret && req->oldptr) {
-               jetsam_snapshot.entry_count = jetsam_snapshot_list_count = 0;
+               memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = 0;
        }
        return ret;
 }
 
-SYSCTL_PROC(_kern, OID_AUTO, memorystatus_priority_list, CTLTYPE_OPAQUE|CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_handle_kern_memorystatus_priority_list, "S,jetsam_priorities", "");
-SYSCTL_PROC(_kern, OID_AUTO, memorystatus_snapshot, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_handle_kern_memorystatus_snapshot, "S,jetsam_snapshot", "");
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_snapshot, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_handle_memorystatus_snapshot, "S,memorystatus_snapshot", "");
+
+#endif /* CONFIG_JETSAM */
index 5cc239bd0c4c6a4c375f0dbdf7d539b8e7d80736..7c27eb16dd9fd4aa6fb117acd1e32dfed7ba9941 100644 (file)
@@ -405,7 +405,7 @@ SYSCTL_PROC(_hw, HW_L2SETTINGS,   l2settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG
 SYSCTL_PROC(_hw, HW_L3SETTINGS,   l3settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_L3SETTINGS, sysctl_hw_generic, "I", "");
 SYSCTL_INT (_hw, OID_AUTO, cputhreadtype, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, &cputhreadtype, 0, "");
 
-#if defined (__i386__) || defined (__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 int mmx_flag = -1;
 int sse_flag = -1;
 int sse2_flag = -1;
@@ -435,6 +435,8 @@ SYSCTL_INT(_hw_optional, OID_AUTO, avx1_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_L
 SYSCTL_INT(_hw_optional, OID_AUTO, rdrand, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &rdrand_flag, 0, "");
 SYSCTL_INT(_hw_optional, OID_AUTO, f16c, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &f16c_flag, 0, "");
 SYSCTL_INT(_hw_optional, OID_AUTO, enfstrg, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &enfstrg_flag, 0, "");
+#else
+#error Unsupported arch
 #endif /* !__i386__ && !__x86_64 && !__arm__ */
 
 /*
@@ -479,6 +481,8 @@ sysctl_mib_init(void)
        cputhreadtype = cpu_threadtype();
 #if defined(__i386__) || defined (__x86_64__)
     cpu64bit = (_get_cpu_capabilities() & k64Bit) == k64Bit;
+#else
+#error Unsupported arch
 #endif
 
        /*
@@ -530,8 +534,8 @@ sysctl_mib_init(void)
        packages = roundup(ml_cpu_cache_sharing(0), cpuid_info()->thread_count)
                        / cpuid_info()->thread_count;
 
-#else /* end __arm__ */
-# error unknown architecture
+#else
+#error unknown architecture
 #endif /* !__i386__ && !__x86_64 && !__arm__ */
 
 }
index 979af3e5d2c527d7ac28e1b2f61154a0fc99eb48..13a64cb93a78aaae1f30b9ebb8d77f7e82c1a443 100644 (file)
 #include <mach/vm_map.h>
 #include <mach/host_priv.h>
 
+#include <machine/machine_routines.h>
+
 #include <kern/cpu_number.h>
 #include <kern/host.h>
+#include <kern/task.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
@@ -145,8 +148,8 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
        int                     err=0;
        vm_map_t                user_map;
        kern_return_t           result;
-       mach_vm_offset_t        user_addr;
-       mach_vm_size_t          user_size;
+       vm_map_offset_t         user_addr;
+       vm_map_size_t           user_size;
        vm_object_offset_t      pageoff;
        vm_object_offset_t      file_pos;
        int                     alloc_flags=0;
@@ -161,8 +164,8 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
        int fd = uap->fd;
        int num_retries = 0;
 
-       user_addr = (mach_vm_offset_t)uap->addr;
-       user_size = (mach_vm_size_t) uap->len;
+       user_addr = (vm_map_offset_t)uap->addr;
+       user_size = (vm_map_size_t) uap->len;
 
        AUDIT_ARG(addr, user_addr);
        AUDIT_ARG(len, user_size);
@@ -207,7 +210,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
        user_size += pageoff;                   /* low end... */
        user_size = mach_vm_round_page(user_size);      /* hi end */
 
-       if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || (flags & MAP_FILE))){
+       if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){
                return EINVAL;
        }
        /*
@@ -247,12 +250,11 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 #if CONFIG_MACF
                /*
                 * Entitlement check.
-                * Re-enable once mac* is implemented.
                 */
-               /*error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
+               error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
                if (error) {
                        return EINVAL;
-               }*/             
+               }               
 #endif /* MAC */
 
                /*
@@ -279,6 +281,9 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
                struct vnode_attr va;
                vfs_context_t ctx = vfs_context_current();
 
+               if (flags & MAP_JIT)
+                       return EINVAL;
+
                /*
                 * Mapping file, get fp for validation. Obtain vnode and make
                 * sure it is of appropriate type.
@@ -403,13 +408,10 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 
 #if CONFIG_PROTECT
                        {
-                               void *cnode;
-                               if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
-                                       error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
-                                       if (error) {
-                                               (void) vnode_put(vp);
-                                               goto bad;
-                                       }
+                               error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
+                               if (error) {
+                                       (void) vnode_put(vp);
+                                       goto bad;
                                }
                        }
 #endif /* CONFIG_PROTECT */
@@ -616,7 +618,6 @@ bad:
        KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
        KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
                              (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
-
        return(error);
 }
 
@@ -639,9 +640,7 @@ msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int3
 
        addr = (mach_vm_offset_t) uap->addr;
        size = (mach_vm_size_t)uap->len;
-
        KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
-
        if (addr & PAGE_MASK_64) {
                /* UNIX SPEC: user address is not page-aligned, return EINVAL */
                return EINVAL;
@@ -1175,14 +1174,11 @@ map_fd_funneled(
 #if CONFIG_PROTECT
        /* check for content protection access */
        {
-       void *cnode;
-       if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
-               err = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
-               if (err != 0) { 
-                       (void)vnode_put(vp);
-                       goto bad;
-               }
-       }
+               err = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
+               if (err != 0) { 
+                       (void) vnode_put(vp);
+                       goto bad;
+               }
        }
 #endif /* CONFIG_PROTECT */
 
index f352a55bf34beaf2561d88e1b3e2c8239ff4daad..6d696b4244226b782877e0e4312106a724501921 100644 (file)
 #include <vm/vm_protos.h>
 #include <vm/vm_map.h>         /* vm_map_switch_protect() */
 #include <mach/task.h>
+#include <mach/message.h>
 
 #if CONFIG_MACF
 #include <security/mac_framework.h>
@@ -174,6 +175,7 @@ static void pgrp_remove(proc_t p);
 static void pgrp_replace(proc_t p, struct pgrp *pgrp);
 static void pgdelete_dropref(struct pgrp *pgrp);
 extern void pg_rele_dropref(struct pgrp * pgrp);
+static int csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaddittoken);
 
 struct fixjob_iterargs {
        struct pgrp * pg;
@@ -353,6 +355,23 @@ proc_findinternal(int pid, int locked)
        return(p);
 }
 
+proc_t
+proc_findthread(thread_t thread)
+{
+       proc_t p = PROC_NULL;
+       struct uthread *uth;
+
+       proc_list_lock();
+       uth = get_bsdthread_info(thread);
+       if (uth && (uth->uu_flag & UT_VFORK))
+               p = uth->uu_proc;
+       else
+               p = (proc_t)(get_bsdthreadtask_info(thread));
+       p = proc_ref_locked(p);
+       proc_list_unlock();
+       return(p);
+}
+
 int 
 proc_rele(proc_t p)
 {
@@ -733,6 +752,12 @@ proc_suser(proc_t p)
        return(error);
 }
 
+task_t
+proc_task(proc_t proc)
+{
+       return (task_t)proc->task;
+}
+
 /*      
  * Obtain the first thread in a process
  *
@@ -1686,10 +1711,31 @@ SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW | CTLFLAG_LOCKED, &maxlcid, 0,
 int 
 csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
 {
-       int ops = uap->ops;
-       pid_t pid = uap->pid;
-       user_addr_t uaddr = uap->useraddr;
-       size_t usize = (size_t)CAST_DOWN(size_t, uap->usersize);
+       return(csops_internal(uap->pid, uap->ops, uap->useraddr, 
+               uap->usersize, USER_ADDR_NULL));
+}
+
+int 
+csops_audittoken(__unused proc_t p, struct csops_audittoken_args *uap, __unused int32_t *retval)
+{
+       if (uap->uaudittoken == USER_ADDR_NULL)
+               return(EINVAL);
+       switch (uap->ops) {
+               case CS_OPS_PIDPATH:
+               case CS_OPS_ENTITLEMENTS_BLOB:
+                       break;
+               default:
+                       return(EINVAL);
+       };
+
+       return(csops_internal(uap->pid, uap->ops, uap->useraddr, 
+               uap->usersize, uap->uaudittoken));
+}
+
+static int
+csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaudittoken)
+{
+       size_t usize = (size_t)CAST_DOWN(size_t, usersize);
        proc_t pt;
        uint32_t retflags;
        int vid, forself;
@@ -1698,6 +1744,8 @@ csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
        off_t toff;
        char * buf;
        unsigned char cdhash[SHA1_RESULTLEN];
+       audit_token_t token;
+       unsigned int upid=0, uidversion = 0;
        
        forself = error = 0;
 
@@ -1714,15 +1762,37 @@ csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
                        return(EOVERFLOW);
                if (kauth_cred_issuser(kauth_cred_get()) != TRUE) 
                        return(EPERM);
-       } else if ((forself == 0) && ((ops != CS_OPS_STATUS) && (ops != CS_OPS_CDHASH) && (ops != CS_OPS_PIDOFFSET) && (kauth_cred_issuser(kauth_cred_get()) != TRUE))) {
-               return(EPERM);
+       } else {
+               switch (ops) {
+               case CS_OPS_STATUS:
+               case CS_OPS_CDHASH:
+               case CS_OPS_PIDOFFSET:
+               case CS_OPS_ENTITLEMENTS_BLOB:
+                       break;  /* unrestricted */
+               default:
+                       if (forself == 0 && kauth_cred_issuser(kauth_cred_get()) != TRUE)
+                               return(EPERM);
+                       break;
+               }
        }
 
        pt = proc_find(pid);
        if (pt == PROC_NULL)
                return(ESRCH);
 
-
+       upid = pt->p_pid;
+       uidversion = pt->p_idversion;
+       if (uaudittoken != USER_ADDR_NULL) {
+               
+               error = copyin(uaudittoken, &token, sizeof(audit_token_t));
+               if (error != 0)
+                       goto out;
+               /* verify the audit token pid/idversion matches with proc */
+               if ((token.val[5] != upid) || (token.val[7] != uidversion)) {
+                       error = ESRCH;
+                       goto out;
+               }
+       }
 
        switch (ops) {
 
@@ -1833,20 +1903,34 @@ csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
                        return error;
 
                case CS_OPS_ENTITLEMENTS_BLOB: {
-                       char zeros[8] = { 0 };
+                       char fakeheader[8] = { 0 };
                        void *start;
                        size_t length;
 
-                       if (0 != (error = cs_entitlements_blob_get(pt,
-                           &start, &length)))
+                       if ((pt->p_csflags & CS_VALID) == 0) {
+                               error = EINVAL;
                                break;
-                       if (usize < sizeof(zeros) || usize < length) {
+                       }
+                       if (usize < sizeof(fakeheader)) {
                                error = ERANGE;
                                break;
                        }
+                       if (0 != (error = cs_entitlements_blob_get(pt,
+                           &start, &length)))
+                               break;
+                       /* if no entitlement, fill in zero header */
                        if (NULL == start) {
-                               start = zeros;
-                               length = sizeof(zeros);
+                               start = fakeheader;
+                               length = sizeof(fakeheader);
+                       } else if (usize < length) {
+                               /* ... if input too short, copy out length of entitlement */
+                               uint32_t length32 = htonl((uint32_t)length);
+                               memcpy(&fakeheader[4], &length32, sizeof(length32));
+
+                               error = copyout(fakeheader, uaddr, sizeof(fakeheader));
+                               if (error == 0)
+                                       error = ERANGE; /* input buffer to short, ERANGE signals that */
+                               break;
                        }
                        error = copyout(start, uaddr, length);
                        break;
@@ -1867,7 +1951,6 @@ out:
        return(error);
 }
 
-
 int
 proc_iterate(flags, callout, arg, filterfn, filterarg)
        int flags;
index d2473dbf0476c2d54e303c72d01cfc0a4ae6526f..ca41339ea902e04ab7d027c39d20e41d48a8e29e 100644 (file)
@@ -113,7 +113,8 @@ int uthread_get_background_state(uthread_t);
 static void do_background_socket(struct proc *p, thread_t thread, int priority);
 static int do_background_thread(struct proc *curp, thread_t thread, int priority);
 static int do_background_proc(struct proc *curp, struct proc *targetp, int priority);
-void proc_apply_task_networkbg_internal(proc_t);
+void proc_apply_task_networkbg_internal(proc_t, thread_t);
+void proc_restore_task_networkbg_internal(proc_t, thread_t);
 
 rlim_t maxdmap = MAXDSIZ;      /* XXX */ 
 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;  /* XXX */ 
@@ -368,6 +369,9 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r
        }
 
        case PRIO_DARWIN_THREAD: {
+               /* process marked for termination no priority management */
+               if ((curp->p_lflag & P_LPTERMINATE) != 0)
+                       return(EINVAL);
                /* we currently only support the current thread */
                if (uap->who != 0) {
                        return (EINVAL);
@@ -390,11 +394,16 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r
                        refheld = 1;
                }
 
-               error = do_background_proc(curp, p, uap->prio);
-               if (!error) {
-                       (void) do_background_socket(p, NULL, uap->prio);
-               }
+               /* process marked for termination no priority management */
+               if ((p->p_lflag & P_LPTERMINATE) != 0) {
+                       error = EINVAL;
+               } else {
+                       error = do_background_proc(curp, p, uap->prio);
+                       if (!error) {
+                               (void) do_background_socket(p, NULL, uap->prio);
+                       }
                
+               }
                found++;
                if (refheld != 0)
                        proc_rele(p);
@@ -461,16 +470,13 @@ do_background_proc(struct proc *curp, struct proc *targetp, int priority)
        int error = 0;
        kauth_cred_t ucred;
        kauth_cred_t target_cred;
-#if CONFIG_EMBEDDED
-       task_category_policy_data_t info;
-#endif
 
        ucred = kauth_cred_get();
        target_cred = kauth_cred_proc_ref(targetp);
 
        if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
-           kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
-           kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred))
+               kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
+               kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred))
        {
                error = EPERM;
                goto out;
@@ -482,49 +488,12 @@ do_background_proc(struct proc *curp, struct proc *targetp, int priority)
                goto out;
 #endif
 
-#if !CONFIG_EMBEDDED
        if (priority == PRIO_DARWIN_NONUI)
                error = proc_apply_task_gpuacc(targetp->task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
        else
-               error = proc_set1_bgtaskpolicy(targetp->task, priority);
+               error = proc_set_and_apply_bgtaskpolicy(targetp->task, priority);
        if (error)
                goto out;
-#else /* !CONFIG_EMBEDDED */
-
-       /* set the max scheduling priority on the task */
-       if (priority == PRIO_DARWIN_BG) { 
-               info.role = TASK_THROTTLE_APPLICATION;
-       }
-       else if (priority == PRIO_DARWIN_NONUI) { 
-               info.role = TASK_NONUI_APPLICATION;
-       }
-       else {
-               info.role = TASK_DEFAULT_APPLICATION;
-       }
-
-       error = task_policy_set(targetp->task,
-                       TASK_CATEGORY_POLICY,
-                       (task_policy_t) &info,
-                       TASK_CATEGORY_POLICY_COUNT);
-
-       if (error)
-               goto out;
-
-       proc_lock(targetp);
-
-       /* mark proc structure as backgrounded */
-       if (priority == PRIO_DARWIN_BG) {
-               targetp->p_lflag |= P_LBACKGROUND;
-       } else {
-               targetp->p_lflag &= ~P_LBACKGROUND;
-       }
-
-       /* set or reset the disk I/O priority */
-       targetp->p_iopol_disk = (priority == PRIO_DARWIN_BG ? 
-                       IOPOL_THROTTLE : IOPOL_DEFAULT); 
-
-       proc_unlock(targetp);
-#endif /* !CONFIG_EMBEDDED */
 
 out:
        kauth_cred_unref(&target_cred);
@@ -610,11 +579,7 @@ static int
 do_background_thread(struct proc *curp __unused, thread_t thread, int priority)
 {
        struct uthread                                          *ut;
-#if !CONFIG_EMBEDDED
        int error = 0;
-#else /* !CONFIG_EMBEDDED */
-       thread_precedence_policy_data_t         policy;
-#endif /* !CONFIG_EMBEDDED */
        
        ut = get_bsdthread_info(thread);
 
@@ -623,61 +588,9 @@ do_background_thread(struct proc *curp __unused, thread_t thread, int priority)
                return(EPERM);
        }
 
-#if !CONFIG_EMBEDDED
-       error = proc_set1_bgthreadpolicy(curp->task, thread_tid(thread), priority);
+       error = proc_set_and_apply_bgthreadpolicy(curp->task, thread_tid(thread), priority);
        return(error);
-#else /* !CONFIG_EMBEDDED */
-       if ( (priority & PRIO_DARWIN_BG) == 0 ) {
-               /* turn off backgrounding of thread */
-               if ( (ut->uu_flag & UT_BACKGROUND) == 0 ) {
-                       /* already off */
-                       return(0);
-               }
-
-               /*
-                * Clear background bit in thread and disable disk IO
-                * throttle as well as network traffic management.
-                * The corresponding socket flags for sockets created by
-                * this thread will be cleared in do_background_socket().
-                */
-               ut->uu_flag &= ~(UT_BACKGROUND | UT_BACKGROUND_TRAFFIC_MGT);
-               ut->uu_iopol_disk = IOPOL_NORMAL;
-
-               /* reset thread priority (we did not save previous value) */
-               policy.importance = 0;
-               thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
-                                                  (thread_policy_t)&policy,
-                                                  THREAD_PRECEDENCE_POLICY_COUNT );
-               return(0);
-       }
-       
-       /* background this thread */
-       if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
-               /* already backgrounded */
-               return(0);
-       }
 
-       /*
-        * Tag thread as background and throttle disk IO, as well
-        * as regulate network traffics.  Future sockets created
-        * by this thread will have their corresponding socket
-        * flags set at socket create time.
-        */
-       ut->uu_flag |= (UT_BACKGROUND | UT_BACKGROUND_TRAFFIC_MGT);
-       ut->uu_iopol_disk = IOPOL_THROTTLE;
-
-       policy.importance = INT_MIN;
-       thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
-                                          (thread_policy_t)&policy,
-                                          THREAD_PRECEDENCE_POLICY_COUNT );
-
-       /* throttle networking IO happens in socket( ) syscall.
-        * If UT_{BACKGROUND,BACKGROUND_TRAFFIC_MGT} is set in the current
-        * thread then TRAFFIC_MGT_SO_{BACKGROUND,BG_REGULATE} is set.
-        * Existing sockets are taken care of by do_background_socket().
-        */
-#endif /* !CONFIG_EMBEDDED */
-       return(0);
 }
 
 #if CONFIG_EMBEDDED
@@ -726,29 +639,6 @@ out:
 }
 #endif /* CONFIG_EMBEDDED */
 
-#if CONFIG_EMBEDDED
-/*
- * If the thread or its proc has been put into the background
- * with setpriority(PRIO_DARWIN_{THREAD,PROCESS}, *, PRIO_DARWIN_BG),
- * report that status.
- *
- * Returns: PRIO_DARWIN_BG if background
- *                     0 if foreground
- */
-int
-uthread_get_background_state(uthread_t uth)
-{
-       proc_t p = uth->uu_proc;
-       if (p && (p->p_lflag & P_LBACKGROUND))
-               return PRIO_DARWIN_BG;
-       
-       if (uth->uu_flag & UT_BACKGROUND)
-               return PRIO_DARWIN_BG;
-
-       return 0;
-}
-#endif /* CONFIG_EMBEDDED */
-
 /*
  * Returns:    0                       Success
  *     copyin:EFAULT
@@ -891,12 +781,7 @@ dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
                                size = round_page_64(limp->rlim_cur);
                                size -= round_page_64(alimp->rlim_cur);
 
-#if STACK_GROWTH_UP
-                               /* go to top of current stack */
-                       addr = p->user_stack + round_page_64(alimp->rlim_cur);
-#else  /* STACK_GROWTH_UP */
                        addr = p->user_stack - round_page_64(limp->rlim_cur);
-#endif /* STACK_GROWTH_UP */
                        kr = mach_vm_protect(current_map(), 
                                             addr, size,
                                             FALSE, VM_PROT_DEFAULT);
@@ -918,28 +803,6 @@ dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
                         */
                        cur_sp = thread_adjuserstack(current_thread(),
                                                     0);
-#if STACK_GROWTH_UP
-                       if (cur_sp >= p->user_stack &&
-                           cur_sp < (p->user_stack +
-                                     round_page_64(alimp->rlim_cur))) {
-                               /* current stack pointer is in main stack */
-                               if (cur_sp >= (p->user_stack +
-                                              round_page_64(limp->rlim_cur))) {
-                                       /*
-                                        * New limit would cause
-                                        * current usage to be invalid:
-                                        * reject new limit.
-                                        */
-                                       error =  EINVAL;
-                                       goto out;
-                       }
-                       } else {
-                               /* not on the main stack: reject */
-                               error =  EINVAL;
-                               goto out;
-               }
-                                
-#else  /* STACK_GROWTH_UP */
                        if (cur_sp <= p->user_stack &&
                            cur_sp > (p->user_stack -
                                      round_page_64(alimp->rlim_cur))) {
@@ -959,16 +822,11 @@ dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
                                error =  EINVAL;
                                goto out;
                        }
-#endif /* STACK_GROWTH_UP */
                                
                        size = round_page_64(alimp->rlim_cur);
                        size -= round_page_64(limp->rlim_cur);
 
-#if STACK_GROWTH_UP
-                       addr = p->user_stack + round_page_64(limp->rlim_cur);
-#else  /* STACK_GROWTH_UP */
                        addr = p->user_stack - round_page_64(alimp->rlim_cur);
-#endif /* STACK_GROWTH_UP */
 
                        kr = mach_vm_protect(current_map(),
                                             addr, size,
@@ -1092,15 +950,15 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *i
 
        task = p->task;
        if (task) {
-               task_basic_info_32_data_t tinfo;
+               mach_task_basic_info_data_t tinfo;
                task_thread_times_info_data_t ttimesinfo;
                task_events_info_data_t teventsinfo;
                mach_msg_type_number_t task_info_count, task_ttimes_count;
                mach_msg_type_number_t task_events_count;
                struct timeval ut,st;
 
-               task_info_count = TASK_BASIC_INFO_32_COUNT;
-               task_info(task, TASK_BASIC2_INFO_32,
+               task_info_count = MACH_TASK_BASIC_INFO_COUNT;
+               task_info(task, MACH_TASK_BASIC_INFO,
                          (task_info_t)&tinfo, &task_info_count);
                ut.tv_sec = tinfo.user_time.seconds;
                ut.tv_usec = tinfo.user_time.microseconds;
@@ -1136,7 +994,7 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *i
                if (p->p_stats->p_ru.ru_nivcsw < 0)
                        p->p_stats->p_ru.ru_nivcsw = 0;
 
-               p->p_stats->p_ru.ru_maxrss = tinfo.resident_size;
+               p->p_stats->p_ru.ru_maxrss = tinfo.resident_size_max;
        }
 }
 
@@ -1330,13 +1188,7 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
 {
        int     error = 0;
        struct _iopol_param_t iop_param;
-#if !CONFIG_EMBEDDED
        int processwide = 0;
-#else /* !CONFIG_EMBEDDED */
-       thread_t thread = THREAD_NULL;
-       struct uthread  *ut = NULL;
-       int *policy;
-#endif /* !CONFIG_EMBEDDED */
 
        if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
                goto out;
@@ -1346,7 +1198,6 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
                goto out;
        }
 
-#if !CONFIG_EMBEDDED
        switch (iop_param.iop_scope) {
        case IOPOL_SCOPE_PROCESS:
                processwide = 1;
@@ -1366,6 +1217,7 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
                case IOPOL_NORMAL:
                case IOPOL_THROTTLE:
                case IOPOL_PASSIVE:
+               case IOPOL_UTILITY:
                        if(processwide != 0)
                                proc_apply_task_diskacc(current_task(), iop_param.iop_policy);
                        else
@@ -1392,61 +1244,6 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
                break;
        }
 
-#else /* !CONFIG_EMBEDDED */
-       switch (iop_param.iop_scope) {
-       case IOPOL_SCOPE_PROCESS:
-               policy = &p->p_iopol_disk;
-               break;
-       case IOPOL_SCOPE_THREAD:
-               thread = current_thread();
-               ut = get_bsdthread_info(thread);
-               policy = &ut->uu_iopol_disk;
-               break;
-       default:
-               error = EINVAL;
-               goto out;
-       }
-               
-       switch(uap->cmd) {
-       case IOPOL_CMD_SET:
-               switch (iop_param.iop_policy) {
-               case IOPOL_DEFAULT:
-               case IOPOL_NORMAL:
-               case IOPOL_THROTTLE:
-               case IOPOL_PASSIVE:
-                       proc_lock(p);
-                       *policy = iop_param.iop_policy;
-                       proc_unlock(p);
-                       break;
-               default:
-                       error = EINVAL;
-                       goto out;
-               }
-               break;
-       case IOPOL_CMD_GET:
-               switch (*policy) {
-               case IOPOL_DEFAULT:
-               case IOPOL_NORMAL:
-               case IOPOL_THROTTLE:
-               case IOPOL_PASSIVE:
-                       iop_param.iop_policy = *policy;
-                       break;
-               default: // in-kernel 
-                       // this should never happen
-                       printf("%s: unknown I/O policy %d\n", __func__, *policy);
-                       // restore to default value
-                       *policy = IOPOL_DEFAULT;
-                       iop_param.iop_policy = *policy;
-               }
-               
-               error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
-               break;
-       default:
-               error = EINVAL; // unknown command
-               break;
-       }
-
-#endif /* !CONFIG_EMBEDDED */
 out:
        *retval = error;
        return (error);
@@ -1458,28 +1255,7 @@ boolean_t thread_is_io_throttled(void);
 boolean_t
 thread_is_io_throttled(void) 
 {
-
-#if !CONFIG_EMBEDDED
-
        return(proc_get_task_selfdiskacc() == IOPOL_THROTTLE);
-               
-#else /* !CONFIG_EMBEDDED */
-       int     policy;
-       struct uthread  *ut;
-
-       ut = get_bsdthread_info(current_thread());
-
-       if(ut){
-               policy = current_proc()->p_iopol_disk;
-
-               if (ut->uu_iopol_disk != IOPOL_DEFAULT)
-                       policy = ut->uu_iopol_disk;
-
-               if (policy == IOPOL_THROTTLE)
-                       return TRUE;
-       }
-       return FALSE;
-#endif /* !CONFIG_EMBEDDED */
 }
 
 void
@@ -1523,10 +1299,17 @@ proc_set_task_networkbg(void * bsdinfo, int setbg)
 }
 
 void
-proc_apply_task_networkbg_internal(proc_t p)
+proc_apply_task_networkbg_internal(proc_t p, thread_t thread)
 {
        if (p != PROC_NULL) {
-               do_background_socket(p, NULL, PRIO_DARWIN_BG);
+               do_background_socket(p, thread, PRIO_DARWIN_BG);
+       }
+}
+void
+proc_restore_task_networkbg_internal(proc_t p, thread_t thread)
+{
+       if (p != PROC_NULL) {
+               do_background_socket(p, thread, PRIO_DARWIN_BG);
        }
 }
 
index de545581243a15a8a2510f4ea7bb439a6df28cab..d656dcaf367f79b77e5ad66dfe0481196cefbaff 100644 (file)
 extern int thread_enable_fpe(thread_t act, int onoff);
 extern thread_t        port_name_to_thread(mach_port_name_t port_name);
 extern kern_return_t get_signalact(task_t , thread_t *, int);
-extern boolean_t thread_should_abort(thread_t);
 extern unsigned int get_useraddr(void);
 
 /*
@@ -655,7 +654,7 @@ siginit(proc_t p)
 {
        int i;
 
-       for (i = 0; i < NSIG; i++)
+       for (i = 1; i < NSIG; i++)
                if (sigprop[i] & SA_IGNORE && i != SIGCONT)
                        p->p_sigignore |= sigmask(i);
 }
@@ -1637,7 +1636,7 @@ threadsignal(thread_t sig_actthread, int signum, mach_exception_code_t code)
        p = (proc_t)(get_bsdtask_info(sig_task));
 
        uth = get_bsdthread_info(sig_actthread);
-       if (uth && (uth->uu_flag & UT_VFORK))
+       if (uth->uu_flag & UT_VFORK)
                p = uth->uu_proc;
 
        proc_lock(p);
@@ -2069,7 +2068,6 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                         *      Process will be running after 'run'
                         */
                        sig_proc->p_stat = SRUN;
-                       proc_unlock(sig_proc);
                        /*
                         * In scenarios where suspend/resume are racing
                         * the signal we are missing AST_BSD by the time
@@ -2079,6 +2077,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                         */
                        act_set_astbsd(sig_thread);
                        thread_abort(sig_thread);
+                       proc_unlock(sig_proc);
 
                        goto psigout;
 
@@ -2206,7 +2205,7 @@ psignal_uthread(thread_t thread, int signum)
  *             postsig(signum);
  */
 int
-issignal(proc_t p)
+issignal_locked(proc_t p)
 {
        int signum, mask, prop, sigbits;
        thread_t cur_act;
@@ -2223,13 +2222,11 @@ issignal(proc_t p)
                 ram_printf(3);
         }
 #endif /* SIGNAL_DEBUG */
-       proc_lock(p);
 
        /*
         * Try to grab the signal lock.
         */
        if (sig_try_locked(p) <= 0) {
-               proc_unlock(p);
                return(0);
        }
 
@@ -2362,6 +2359,7 @@ issignal(proc_t p)
                                KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
                                              p->p_pid, W_EXITCODE(0, SIGKILL), 2, 0, 0);
                                exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
+                               proc_lock(p);
                                return(0);
                        }
 
@@ -2503,7 +2501,6 @@ issignal(proc_t p)
        /* NOTREACHED */
 out:
        proc_signalend(p, 1);
-       proc_unlock(p);
        return(retval);
 }
 
@@ -2653,7 +2650,7 @@ stop(proc_t p, proc_t parent)
  * from the current set of pending signals.
  */
 void
-postsig(int signum)
+postsig_locked(int signum)
 {
        proc_t p = current_proc();
        struct sigacts *ps = p->p_sigacts;
@@ -2672,12 +2669,10 @@ postsig(int signum)
                panic("psig not on master");
 #endif
 
-       proc_lock(p);
        /*
         * Try to grab the signal lock.
         */
        if (sig_try_locked(p) <= 0) {
-               proc_unlock(p);
                return;
        }
 
@@ -2713,6 +2708,16 @@ postsig(int signum)
                ut->t_dtrace_siginfo.si_uid = p->si_uid;
                ut->t_dtrace_siginfo.si_status = WEXITSTATUS(p->si_status);
 
+               /* Fire DTrace proc:::fault probe when signal is generated by hardware. */
+               switch (signum) {
+               case SIGILL: case SIGBUS: case SIGSEGV: case SIGFPE: case SIGTRAP:
+                       DTRACE_PROC2(fault, int, (int)(ut->uu_code), siginfo_t *, &(ut->t_dtrace_siginfo));
+                       break;
+               default:
+                       break;
+               }
+               
+
                DTRACE_PROC3(signal__handle, int, signum, siginfo_t *, &(ut->t_dtrace_siginfo),
                                        void (*)(void), SIG_DFL);
 #endif
@@ -2720,6 +2725,7 @@ postsig(int signum)
                KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
                                              p->p_pid, W_EXITCODE(0, signum), 3, 0, 0);
                exit1(p, W_EXITCODE(0, signum), (int *)NULL);
+               proc_lock(p);
                return;
        } else {
                /*
@@ -2767,7 +2773,6 @@ postsig(int signum)
                sendsig(p, catcher, signum, returnmask, code);
        }
        proc_signalend(p, 1);
-       proc_unlock(p);
 }
 
 /*
@@ -2963,10 +2968,12 @@ bsd_ast(thread_t thread)
                    
 #endif /* CONFIG_DTRACE */
 
+       proc_lock(p);
        if (CHECK_SIGNALS(p, current_thread(), ut)) {
-               while ( (signum = issignal(p)) )
-                       postsig(signum);
+               while ( (signum = issignal_locked(p)) )
+                       postsig_locked(signum);
        }
+       proc_unlock(p);
 
        if (!bsd_init_done) {
                bsd_init_done = 1;
@@ -3116,7 +3123,10 @@ sig_lock_to_exit(proc_t p)
 
        p->exit_thread = self;
        proc_unlock(p);
-       (void) task_suspend(p->task);
+
+       task_hold(p->task);
+       task_wait(p->task, FALSE);
+
        proc_lock(p);
 }
 
index cc05a7db7309e0b2d24063e9b2a187ca6dc6d7a9..9e9587bea692a7d879a08495441fca5b12c58657 100644 (file)
@@ -153,7 +153,7 @@ uiomove64(const addr64_t c_cp, int n, struct uio *uio)
                if (n > 0 && acnt > (uint64_t)n)
                        acnt = n;
 
-               switch (uio->uio_segflg) {
+               switch ((int) uio->uio_segflg) {
 
                case UIO_USERSPACE64:
                case UIO_USERISPACE64:
@@ -280,7 +280,7 @@ ureadc(int c, struct uio *uio)
        if (uio_curriovlen(uio) <= 0)
                panic("ureadc: non-positive iovlen");
 
-       switch (uio->uio_segflg) {
+       switch ((int) uio->uio_segflg) {
 
        case UIO_USERSPACE32:
        case UIO_USERSPACE:
index b1db73f0c270fefaed3a5699654f32dd11de048a..d0d4674948cea99910aef1f1d3cd6413e26b59f4 100644 (file)
@@ -78,7 +78,8 @@ struct kern_direct_file_io_ref_t
     struct vnode * vp;
     dev_t          device;
     uint32_t      blksize;
-    off_t                 filelength;
+    off_t          filelength;
+    char           pinned;
 };
 
 
@@ -95,8 +96,82 @@ static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t
     return (VNOP_IOCTL(p1, theIoctl, result, 0, p2));
 }
 
-void
-kern_unmap_file(struct kern_direct_file_io_ref_t * ref, off_t f_offset, off_t end);
+static int
+kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, off_t offset, off_t end)
+{
+    int error;
+    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
+    void * p1;
+    void * p2;
+    uint64_t    fileblk;
+    size_t      filechunk;
+    dk_extent_t  extent;
+    dk_unmap_t   unmap;
+    _dk_cs_pin_t pin;
+
+    bzero(&extent, sizeof(dk_extent_t));
+    bzero(&unmap, sizeof(dk_unmap_t));
+    bzero(&pin, sizeof(pin));
+    if (ref->vp->v_type == VREG)
+    {
+        p1 = &ref->device;
+        p2 = kernproc;
+        do_ioctl = &file_ioctl;
+    }
+    else
+    {
+       /* Partition. */
+       p1 = ref->vp;
+       p2 = ref->ctx;
+       do_ioctl = &device_ioctl;
+    }
+    while (offset < end) 
+    {
+        if (ref->vp->v_type == VREG)
+        {
+            daddr64_t blkno;
+           filechunk = 1*1024*1024*1024;
+           if (filechunk > (size_t)(end - offset))
+           filechunk = (size_t)(end - offset);
+            error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
+                       if (error) break;
+            fileblk = blkno * ref->blksize;
+        }
+        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
+        {
+            fileblk = offset;
+            filechunk = ref->filelength;
+        }
+
+       if (DKIOCUNMAP == theIoctl)
+       {
+           extent.offset = fileblk;
+           extent.length = filechunk;
+           unmap.extents = &extent;
+           unmap.extentsCount = 1;
+           error = do_ioctl(p1, p2, theIoctl, (caddr_t)&unmap);
+//         printf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length);
+       }
+       else if (_DKIOCCSPINEXTENT == theIoctl)
+       {
+           pin.cp_extent.offset = fileblk;
+           pin.cp_extent.length = filechunk;
+           pin.cp_flags = _DKIOCSPINDISCARDDATA;
+           error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin);
+           if (error && (ENOTTY != error))
+           {
+               printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", 
+                       error, pin.cp_extent.offset, pin.cp_extent.length);
+           }
+       }
+       else error = EINVAL;
+
+       if (error) break;
+        offset += filechunk;
+    }
+    return (error);
+}
+
 int
 kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len);
 
@@ -143,7 +218,7 @@ kern_open_file_for_direct_io(const char * name,
        goto out;
     }
 
-    ref->vp = NULL;
+    bzero(ref, sizeof(*ref));
     p = kernproc;
     ref->ctx = vfs_context_create(vfs_context_current());
 
@@ -197,13 +272,6 @@ kern_open_file_for_direct_io(const char * name,
     }
     ref->device = device;
 
-    // generate the block list
-
-    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
-    if (error)
-        goto out;
-    locked = TRUE;
-
     // get block size
 
     error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
@@ -220,6 +288,19 @@ kern_open_file_for_direct_io(const char * name,
        ref->filelength = fileblk * ref->blksize;    
     }
 
+    // pin logical extents
+
+    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
+    if (error && (ENOTTY != error)) goto out;
+    ref->pinned = (error == 0);
+
+    // generate the block list
+
+    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
+    if (error)
+        goto out;
+    locked = TRUE;
+
     f_offset = 0;
     while (f_offset < ref->filelength) 
     {
@@ -370,60 +451,6 @@ kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t ad
                        vfs_context_proc(ref->ctx)));
 }
 
-void
-kern_unmap_file(struct kern_direct_file_io_ref_t * ref, off_t offset, off_t end)
-{
-    int error;
-       int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
-       void * p1;
-       void * p2;
-       dk_extent_t extent;
-       dk_unmap_t  unmap;
-    uint64_t    fileblk;
-    size_t      filechunk;
-
-       bzero(&extent, sizeof(dk_extent_t));
-       bzero(&unmap, sizeof(dk_unmap_t));
-       if (ref->vp->v_type == VREG)
-       {
-               p1 = &ref->device;
-               p2 = kernproc;
-               do_ioctl = &file_ioctl;
-       }
-       else
-       {
-               /* Partition. */
-               p1 = ref->vp;
-               p2 = ref->ctx;
-               do_ioctl = &device_ioctl;
-       }
-    while (offset < end) 
-    {
-        if (ref->vp->v_type == VREG)
-        {
-            daddr64_t blkno;
-                       filechunk = 1*1024*1024*1024;
-                       if (filechunk > (size_t)(end - offset))
-                               filechunk = (size_t)(end - offset);
-            error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
-                       if (error) break;
-            fileblk = blkno * ref->blksize;
-        }
-        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
-        {
-            fileblk = offset;
-            filechunk = ref->filelength;
-        }
-               extent.offset = fileblk;
-               extent.length = filechunk;
-               unmap.extents = &extent;
-               unmap.extentsCount = 1;
-        error = do_ioctl(p1, p2, DKIOCUNMAP, (caddr_t)&unmap);
-//             kprintf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length);
-               if (error) break;
-        offset += filechunk;
-    }
-}
 
 void
 kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
@@ -460,9 +487,9 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
         {
             (void) kern_write_file(ref, write_offset, addr, write_length);
         }
-        if (discard_offset && discard_end)
+        if (discard_offset && discard_end && !ref->pinned)
         {
-            (void) kern_unmap_file(ref, discard_offset, discard_end);
+            (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, discard_end);
         }
 
         error = vnode_close(ref->vp, FWRITE, ref->ctx);
index c6b4888c35692b0fd16ae0cb6ebde657ea0ce1bc..34cb1520a21d096dd8716ec4fae8c9a99d1978f1 100644 (file)
@@ -58,7 +58,6 @@
 #include <sys/systm.h>                 /* for unix_syscall_return() */
 #include <libkern/OSAtomic.h>
 
-extern boolean_t thread_should_abort(thread_t);        /* XXX */
 extern void compute_averunnable(void *);       /* XXX */
 
 
index e1f693be2084cac4139ce2502a5324da78942eb7..56782c39c70eae2c58a17e3269d7ffe92ed9d049 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <mach/vm_param.h>
 #include <kern/mach_param.h>
 #include <kern/task.h>
+#include <kern/thread.h>
 #include <kern/lock.h>
 #include <kern/processor.h>
 #include <kern/debug.h>
 #include <i386/cpuid.h>
 #endif
 
+#if CONFIG_FREEZE
+#include <sys/kern_memorystatus.h>
+#endif
+
+/*
+ * deliberately setting max requests to really high number
+ * so that runaway settings do not cause MALLOC overflows
+ */
+#define AIO_MAX_REQUESTS (128 * CONFIG_AIO_MAX)
+
 extern sysctlfn net_sysctl;
 extern sysctlfn cpu_sysctl;
 extern int aio_max_requests;                           
@@ -147,6 +158,7 @@ extern int nx_enabled;
 extern int speculative_reads_disabled;
 extern int ignore_is_ssd;
 extern unsigned int speculative_prefetch_max;
+extern unsigned int speculative_prefetch_max_iosize;
 extern unsigned int preheat_pages_max;
 extern unsigned int preheat_pages_min;
 extern long numvnodes;
@@ -175,15 +187,18 @@ extern unsigned int vm_page_speculative_q_age_ms;
 extern boolean_t    mach_timer_coalescing_enabled;
 
 STATIC void
-fill_user32_eproc(proc_t p, struct user32_eproc *ep);
+fill_user32_eproc(proc_t, struct user32_eproc *__restrict);
+STATIC void
+fill_user32_externproc(proc_t, struct user32_extern_proc *__restrict);
 STATIC void
-fill_user32_externproc(proc_t p, struct user32_extern_proc *exp);
+fill_user64_eproc(proc_t, struct user64_eproc *__restrict);
 STATIC void
-fill_user64_eproc(proc_t p, struct user64_eproc *ep);
+fill_user64_proc(proc_t, struct user64_kinfo_proc *__restrict);
 STATIC void
-fill_user64_proc(proc_t p, struct user64_kinfo_proc *kp);
+fill_user64_externproc(proc_t, struct user64_extern_proc *__restrict);
 STATIC void
-fill_user64_externproc(proc_t p, struct user64_extern_proc *exp);
+fill_user32_proc(proc_t, struct user32_kinfo_proc *__restrict);
+
 extern int 
 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t * sizep);
 #if NFSCLIENT
@@ -195,8 +210,6 @@ pcsamples_ops(int *name, u_int namelen, user_addr_t where, size_t *sizep,
               proc_t p);
 __private_extern__ kern_return_t
 reset_vmobjectcache(unsigned int val1, unsigned int val2);
-STATIC void
-fill_user32_proc(proc_t p, struct user32_kinfo_proc *kp);
 int
 sysctl_procargs(int *name, u_int namelen, user_addr_t where, 
                                size_t *sizep, proc_t cur_proc);
@@ -221,11 +234,9 @@ int sysdoproc_callback(proc_t p, void *arg);
 /* forward declarations for non-static STATIC */
 STATIC void fill_loadavg64(struct loadavg *la, struct user64_loadavg *la64);
 STATIC void fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32);
-STATIC int sysctl_handle_exec_archhandler_ppc(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_handle_kern_threadname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_sched_stats(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_sched_stats_enable(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
-STATIC int sysctl_file(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_kdebug_ops SYSCTL_HANDLER_ARGS;
 STATIC int sysctl_dotranslate SYSCTL_HANDLER_ARGS;
 STATIC int sysctl_doaffinity SYSCTL_HANDLER_ARGS;
@@ -278,6 +289,7 @@ STATIC int sysctl_sysctl_native(struct sysctl_oid *oidp, void *arg1, int arg2, s
 STATIC int sysctl_sysctl_cputype(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_safeboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_singleuser(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_slide(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 
 
 extern void IORegistrySetOSBuildVersion(char * build_version); 
@@ -653,75 +665,6 @@ SYSCTL_PROC(_kern, KERN_TRANSLATE, translate, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_
        NULL,                   /* Data pointer */
        "");
 
-int
-set_archhandler(__unused proc_t p, int arch)
-{
-       int error;
-       struct nameidata nd;
-       struct vnode_attr va;
-       vfs_context_t ctx = vfs_context_current();
-       struct exec_archhandler *archhandler;
-
-       switch(arch) {
-       case CPU_TYPE_POWERPC:
-               archhandler = &exec_archhandler_ppc;
-               break;
-       default:
-               return (EBADARCH);
-       }
-
-       NDINIT(&nd, LOOKUP, OP_GETATTR, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
-                  CAST_USER_ADDR_T(archhandler->path), ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       nameidone(&nd);
-       
-       /* Check mount point */
-       if ((nd.ni_vp->v_mount->mnt_flag & MNT_NOEXEC) ||
-               (nd.ni_vp->v_type != VREG)) {
-               vnode_put(nd.ni_vp);
-               return (EACCES);
-       }
-       
-       VATTR_INIT(&va);
-       VATTR_WANTED(&va, va_fsid);
-       VATTR_WANTED(&va, va_fileid);
-       error = vnode_getattr(nd.ni_vp, &va, ctx);
-       if (error) {
-               vnode_put(nd.ni_vp);
-               return (error);
-       }
-       vnode_put(nd.ni_vp);
-       
-       archhandler->fsid = va.va_fsid;
-       archhandler->fileid = va.va_fileid;
-       return 0;
-}
-
-
-STATIC int
-sysctl_handle_exec_archhandler_ppc(struct sysctl_oid *oidp, void *arg1,
-               int arg2, struct sysctl_req *req)
-{
-       int error = 0;
-
-       if (req->newptr && !kauth_cred_issuser(kauth_cred_get()))
-               return (EPERM);
-
-       error = sysctl_handle_string(oidp, arg1, arg2, req);
-
-       if (error)
-               goto done;
-
-       if (req->newptr)
-               error = set_archhandler(req->p, CPU_TYPE_POWERPC);
-
-done:
-       return error;
-
-}
-
 STATIC int
 sysctl_handle_kern_threadname( __unused struct sysctl_oid *oidp, __unused void *arg1,
              __unused int arg2, struct sysctl_req *req)
@@ -781,16 +724,6 @@ sysctl_handle_kern_threadname(     __unused struct sysctl_oid *oidp, __unused void *
 
 SYSCTL_PROC(_kern, KERN_THREADNAME, threadname, CTLFLAG_ANYBODY | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_handle_kern_threadname,"A","");
 
-SYSCTL_NODE(_kern, KERN_EXEC, exec, CTLFLAG_RD|CTLFLAG_LOCKED, 0, "");
-
-SYSCTL_NODE(_kern_exec, OID_AUTO, archhandler, CTLFLAG_RD|CTLFLAG_LOCKED, 0, "");
-
-SYSCTL_PROC(_kern_exec_archhandler, OID_AUTO, powerpc,
-                       CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
-                       exec_archhandler_ppc.path,
-                       sizeof(exec_archhandler_ppc.path),
-                       sysctl_handle_exec_archhandler_ppc, "A", "");
-
 #define BSD_HOST 1
 STATIC int
 sysctl_sched_stats(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
@@ -1244,57 +1177,6 @@ sysctl_rdstruct(user_addr_t oldp, size_t *oldlenp,
        return (error);
 }
 
-/*
- * Get file structures.
- */
-STATIC int
-sysctl_file
-(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
-{
-       int error;
-       struct fileglob *fg;
-       struct extern_file nef;
-
-       if (req->oldptr == USER_ADDR_NULL) {
-               /*
-                * overestimate by 10 files
-                */
-               req->oldidx = sizeof(filehead) + (nfiles + 10) * sizeof(struct extern_file);
-               return (0);
-       }
-
-       /*
-        * first copyout filehead
-        */
-       error = SYSCTL_OUT(req, &filehead, sizeof(filehead));
-       if (error)
-               return (error);
-
-       /*
-        * followed by an array of file structures
-        */
-       for (fg = filehead.lh_first; fg != 0; fg = fg->f_list.le_next) {
-        nef.f_list.le_next =  (struct extern_file *)fg->f_list.le_next;
-        nef.f_list.le_prev =  (struct extern_file **)fg->f_list.le_prev;
-               nef.f_flag = (fg->fg_flag & FMASK);
-               nef.f_type = fg->fg_type;
-               nef.f_count = fg->fg_count;
-               nef.f_msgcount = fg->fg_msgcount;
-               nef.f_cred = fg->fg_cred;
-               nef.f_ops = fg->fg_ops;
-               nef.f_offset = fg->fg_offset;
-               nef.f_data = fg->fg_data;
-               error = SYSCTL_OUT(req, &nef, sizeof(nef));
-               if (error)
-                       return (error);
-       }
-       return (0);
-}
-
-SYSCTL_PROC(_kern, KERN_FILE, file,
-               CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
-               0, 0, sysctl_file, "S,filehead", "");
-
 STATIC int
 sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg)
 {
@@ -1391,12 +1273,12 @@ sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg)
 #define KERN_PROCSLOP  (5 * sizeof (struct kinfo_proc))
 struct sysdoproc_args {
        int     buflen;
-       caddr_t kprocp;
+       void    *kprocp;
        boolean_t is_64_bit;
        user_addr_t     dp;
        size_t needed;
        int sizeof_kproc;
-       int * errorp;
+       int *errorp;
        int uidcheck;
        int ruidcheck;
        int ttycheck;
@@ -1404,37 +1286,33 @@ struct sysdoproc_args {
 };
 
 int
-sysdoproc_callback(proc_t p, void * arg)
+sysdoproc_callback(proc_t p, void *arg)
 {
-       struct sysdoproc_args * args = (struct sysdoproc_args *)arg;
-       int error=0;
+       struct sysdoproc_args *args = arg;
 
        if (args->buflen >= args->sizeof_kproc) {
-               if ((args->ruidcheck != 0)  && (sysdoproc_filt_KERN_PROC_RUID(p, &args->uidval) == 0))
-                       return(PROC_RETURNED);
-               if ((args->uidcheck != 0)  && (sysdoproc_filt_KERN_PROC_UID(p, &args->uidval) == 0))
-                       return(PROC_RETURNED);
-               if ((args->ttycheck != 0)  && (sysdoproc_filt_KERN_PROC_TTY(p, &args->uidval) == 0))
-                       return(PROC_RETURNED);
+               if ((args->ruidcheck != 0) && (sysdoproc_filt_KERN_PROC_RUID(p, &args->uidval) == 0))
+                       return (PROC_RETURNED);
+               if ((args->uidcheck != 0) && (sysdoproc_filt_KERN_PROC_UID(p, &args->uidval) == 0))
+                       return (PROC_RETURNED);
+               if ((args->ttycheck != 0) && (sysdoproc_filt_KERN_PROC_TTY(p, &args->uidval) == 0))
+                       return (PROC_RETURNED);
 
                bzero(args->kprocp, args->sizeof_kproc);
-               if (args->is_64_bit) {
-                       fill_user64_proc(p, (struct user64_kinfo_proc *) args->kprocp);
-               }
-               else {
-                       fill_user32_proc(p, (struct user32_kinfo_proc *) args->kprocp);
-               }
-               error = copyout(args->kprocp, args->dp, args->sizeof_kproc);
+               if (args->is_64_bit)
+                       fill_user64_proc(p, args->kprocp);
+               else
+                       fill_user32_proc(p, args->kprocp);
+               int error = copyout(args->kprocp, args->dp, args->sizeof_kproc);
                if (error) {
                        *args->errorp = error;
-                       return(PROC_RETURNED_DONE);
-                       return (error);
+                       return (PROC_RETURNED_DONE);
                }
                args->dp += args->sizeof_kproc;
                args->buflen -= args->sizeof_kproc;
        }
        args->needed += args->sizeof_kproc;
-       return(PROC_RETURNED);
+       return (PROC_RETURNED);
 }
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "");
@@ -1450,11 +1328,11 @@ sysctl_prochandle SYSCTL_HANDLER_ARGS
        size_t needed = 0;
        int buflen = where != USER_ADDR_NULL ? req->oldlen : 0;
        int error = 0;
-       boolean_t is_64_bit = FALSE;
+       boolean_t is_64_bit = proc_is64bit(current_proc());
        struct user32_kinfo_proc  user32_kproc;
        struct user64_kinfo_proc  user_kproc;
        int sizeof_kproc;
-       caddr_t kprocp;
+       void *kprocp;
        int (*filterfn)(proc_t, void *) = 0;
        struct sysdoproc_args args;
        int uidcheck = 0;
@@ -1464,17 +1342,14 @@ sysctl_prochandle SYSCTL_HANDLER_ARGS
        if (namelen != 1 && !(namelen == 0 && cmd == KERN_PROC_ALL))
                return (EINVAL);
 
-       is_64_bit = proc_is64bit(current_proc()); 
        if (is_64_bit) {
                sizeof_kproc = sizeof(user_kproc);
-               kprocp = (caddr_t) &user_kproc;
-       }
-       else {
+               kprocp = &user_kproc;
+       } else {
                sizeof_kproc = sizeof(user32_kproc);
-               kprocp = (caddr_t) &user32_kproc;
+               kprocp = &user32_kproc;
        }
 
-
        switch (cmd) {
 
                case KERN_PROC_PID:
@@ -1522,12 +1397,13 @@ sysctl_prochandle SYSCTL_HANDLER_ARGS
        args.ttycheck = ttycheck;
        args.sizeof_kproc = sizeof_kproc;
        if (namelen)
-       args.uidval = name[0];
+               args.uidval = name[0];
 
-       proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST), sysdoproc_callback, &args, filterfn, name);
+       proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST),
+           sysdoproc_callback, &args, filterfn, name);
 
        if (error)
-               return(error);
+               return (error);
 
        dp = args.dp;
        needed = args.needed;
@@ -1544,6 +1420,7 @@ sysctl_prochandle SYSCTL_HANDLER_ARGS
        req->oldidx += req->oldlen;
        return (0);
 }
+
 /*
  * We specify the subcommand code for multiple nodes as the 'req->arg2' value
  * in the sysctl declaration itself, which comes into the handler function
@@ -1611,42 +1488,30 @@ SYSCTL_PROC(_kern_proc, KERN_PROC_LCID, lcid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_
 
 
 /*
- * Fill in an eproc structure for the specified process.
+ * Fill in non-zero fields of an eproc structure for the specified process.
  */
 STATIC void
-fill_user32_eproc(proc_t p, struct user32_eproc *ep)
+fill_user32_eproc(proc_t p, struct user32_eproc *__restrict ep)
 {
        struct tty *tp;
+       struct pgrp *pg;
+       struct session *sessp;
        kauth_cred_t my_cred;
-       struct pgrp * pg;
-       struct session * sessp;
 
        pg = proc_pgrp(p);
        sessp = proc_session(p);
 
-       ep->e_paddr = CAST_DOWN_EXPLICIT(uint32_t,p);
-
        if (pg != PGRP_NULL) {
-               ep->e_sess = CAST_DOWN_EXPLICIT(uint32_t,sessp);
                ep->e_pgid = p->p_pgrpid;
                ep->e_jobc = pg->pg_jobc;
-               if ((sessp != SESSION_NULL) && sessp->s_ttyvp)
+               if (sessp != SESSION_NULL && sessp->s_ttyvp)
                        ep->e_flag = EPROC_CTTY;
-       } else {
-               ep->e_sess = 0;
-               ep->e_pgid = 0;
-               ep->e_jobc = 0;
        }
 #if CONFIG_LCTX
-       if (p->p_lctx) {
+       if (p->p_lctx)
                ep->e_lcid = p->p_lctx->lc_id;
-       } else {
-               ep->e_lcid = 0;
-       }
 #endif
        ep->e_ppid = p->p_ppid;
-       /* Pre-zero the fake historical pcred */
-       bzero(&ep->e_pcred, sizeof(ep->e_pcred));
        if (p->p_ucred) {
                my_cred = kauth_cred_proc_ref(p);
 
@@ -1655,78 +1520,58 @@ fill_user32_eproc(proc_t p, struct user32_eproc *ep)
                ep->e_pcred.p_svuid = kauth_cred_getsvuid(my_cred);
                ep->e_pcred.p_rgid = kauth_cred_getrgid(my_cred);
                ep->e_pcred.p_svgid = kauth_cred_getsvgid(my_cred);
+
                /* A fake historical *kauth_cred_t */
                ep->e_ucred.cr_ref = my_cred->cr_ref;
                ep->e_ucred.cr_uid = kauth_cred_getuid(my_cred);
                ep->e_ucred.cr_ngroups = posix_cred_get(my_cred)->cr_ngroups;
-               bcopy(posix_cred_get(my_cred)->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t));
+               bcopy(posix_cred_get(my_cred)->cr_groups,
+                       ep->e_ucred.cr_groups, NGROUPS * sizeof (gid_t));
 
                kauth_cred_unref(&my_cred);
        }
-       if (p->p_stat == SIDL || p->p_stat == SZOMB) {
-               ep->e_vm.vm_tsize = 0;
-               ep->e_vm.vm_dsize = 0;
-               ep->e_vm.vm_ssize = 0;
-       }
-       ep->e_vm.vm_rssize = 0;
 
        if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) &&
             (tp = SESSION_TP(sessp))) {
                ep->e_tdev = tp->t_dev;
                ep->e_tpgid = sessp->s_ttypgrpid;
-               ep->e_tsess = CAST_DOWN_EXPLICIT(uint32_t,tp->t_session);
        } else
                ep->e_tdev = NODEV;
 
-       if (SESS_LEADER(p, sessp))
-               ep->e_flag |= EPROC_SLEADER;
-       bzero(&ep->e_wmesg[0], WMESGLEN+1);
-       ep->e_xsize = ep->e_xrssize = 0;
-       ep->e_xccount = ep->e_xswrss = 0;
-       if (sessp != SESSION_NULL)
+       if (sessp != SESSION_NULL) {
+               if (SESS_LEADER(p, sessp))
+                       ep->e_flag |= EPROC_SLEADER;
                session_rele(sessp);
-       if(pg != PGRP_NULL)
+       }
+       if (pg != PGRP_NULL)
                pg_rele(pg);
 }
 
 /*
- * Fill in an LP64 version of eproc structure for the specified process.
+ * Fill in non-zero fields of an LP64 eproc structure for the specified process.
  */
 STATIC void
-fill_user64_eproc(proc_t p, struct user64_eproc *ep)
+fill_user64_eproc(proc_t p, struct user64_eproc *__restrict ep)
 {
        struct tty *tp;
-       struct  session *sessp = NULL;
-       struct pgrp * pg;
+       struct pgrp *pg;
+       struct session *sessp;
        kauth_cred_t my_cred;
        
        pg = proc_pgrp(p);
        sessp = proc_session(p);
 
-       ep->e_paddr = CAST_USER_ADDR_T(p);
        if (pg != PGRP_NULL) {
-               ep->e_sess = CAST_USER_ADDR_T(sessp);
                ep->e_pgid = p->p_pgrpid;
                ep->e_jobc = pg->pg_jobc;
-               if (sessp != SESSION_NULL) {
-                       if (sessp->s_ttyvp)
-                           ep->e_flag = EPROC_CTTY;
-               }
-       } else {
-               ep->e_sess = USER_ADDR_NULL;
-               ep->e_pgid = 0;
-               ep->e_jobc = 0;
+               if (sessp != SESSION_NULL && sessp->s_ttyvp)
+                       ep->e_flag = EPROC_CTTY;
        }
 #if CONFIG_LCTX
-       if (p->p_lctx) {
+       if (p->p_lctx)
                ep->e_lcid = p->p_lctx->lc_id;
-       } else {
-               ep->e_lcid = 0;
-       }
 #endif
        ep->e_ppid = p->p_ppid;
-       /* Pre-zero the fake historical pcred */
-       bzero(&ep->e_pcred, sizeof(ep->e_pcred));
        if (p->p_ucred) {
                my_cred = kauth_cred_proc_ref(p);
 
@@ -1740,180 +1585,135 @@ fill_user64_eproc(proc_t p, struct user64_eproc *ep)
                ep->e_ucred.cr_ref = my_cred->cr_ref;
                ep->e_ucred.cr_uid = kauth_cred_getuid(my_cred);
                ep->e_ucred.cr_ngroups = posix_cred_get(my_cred)->cr_ngroups;
-               bcopy(posix_cred_get(my_cred)->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t));
+               bcopy(posix_cred_get(my_cred)->cr_groups,
+                       ep->e_ucred.cr_groups, NGROUPS * sizeof (gid_t));
 
                kauth_cred_unref(&my_cred);
        }
-       if (p->p_stat == SIDL || p->p_stat == SZOMB) {
-               ep->e_vm.vm_tsize = 0;
-               ep->e_vm.vm_dsize = 0;
-               ep->e_vm.vm_ssize = 0;
-       }
-       ep->e_vm.vm_rssize = 0;
 
        if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) &&
             (tp = SESSION_TP(sessp))) {
                ep->e_tdev = tp->t_dev;
                ep->e_tpgid = sessp->s_ttypgrpid;
-               ep->e_tsess = CAST_USER_ADDR_T(tp->t_session);
        } else
                ep->e_tdev = NODEV;
 
-       if (SESS_LEADER(p, sessp))
-               ep->e_flag |= EPROC_SLEADER;
-       bzero(&ep->e_wmesg[0], WMESGLEN+1);
-       ep->e_xsize = ep->e_xrssize = 0;
-       ep->e_xccount = ep->e_xswrss = 0;
-       if (sessp != SESSION_NULL)
+       if (sessp != SESSION_NULL) {
+               if (SESS_LEADER(p, sessp))
+                       ep->e_flag |= EPROC_SLEADER;
                session_rele(sessp);
+       }
        if (pg != PGRP_NULL)
                pg_rele(pg);
 }
 
 /*
  * Fill in an eproc structure for the specified process.
+ * bzeroed by our caller, so only set non-zero fields.
  */
 STATIC void
-fill_user32_externproc(proc_t p, struct user32_extern_proc *exp)
+fill_user32_externproc(proc_t p, struct user32_extern_proc *__restrict exp)
 {
-       exp->p_forw = exp->p_back = 0;
        exp->p_starttime.tv_sec = p->p_start.tv_sec;
        exp->p_starttime.tv_usec = p->p_start.tv_usec;
-       exp->p_vmspace = 0;
-       exp->p_sigacts = CAST_DOWN_EXPLICIT(uint32_t,p->p_sigacts);
-       exp->p_flag  = p->p_flag;
+       exp->p_flag = p->p_flag;
        if (p->p_lflag & P_LTRACED)
                exp->p_flag |= P_TRACED;
        if (p->p_lflag & P_LPPWAIT)
                exp->p_flag |= P_PPWAIT;
        if (p->p_lflag & P_LEXIT)
                exp->p_flag |= P_WEXIT;
-       exp->p_stat  = p->p_stat ;
-       exp->p_pid  = p->p_pid ;
-       exp->p_oppid  = p->p_oppid ;
+       exp->p_stat = p->p_stat;
+       exp->p_pid = p->p_pid;
+       exp->p_oppid = p->p_oppid;
        /* Mach related  */
-       exp->user_stack  = p->user_stack;
-       exp->exit_thread  = CAST_DOWN_EXPLICIT(uint32_t,p->exit_thread);
-       exp->p_debugger  = p->p_debugger ;
-       exp->sigwait  = p->sigwait ;
+       exp->user_stack = p->user_stack;
+       exp->p_debugger = p->p_debugger;
+       exp->sigwait = p->sigwait;
        /* scheduling */
 #ifdef _PROC_HAS_SCHEDINFO_
-       exp->p_estcpu  = p->p_estcpu ;
-       exp->p_pctcpu  = p->p_pctcpu ;
-       exp->p_slptime  = p->p_slptime ;
-#else
-       exp->p_estcpu  = 0 ;
-       exp->p_pctcpu  = 0 ;
-       exp->p_slptime = 0 ;
+       exp->p_estcpu = p->p_estcpu;
+       exp->p_pctcpu = p->p_pctcpu;
+       exp->p_slptime = p->p_slptime;
 #endif
-       exp->p_cpticks  = 0 ;
-       exp->p_wchan  = 0 ;
-       exp->p_wmesg  = 0 ;
-       exp->p_swtime  = 0 ;
-       bcopy(&p->p_realtimer, &exp->p_realtimer,sizeof(struct itimerval));
-       bcopy(&p->p_rtime, &exp->p_rtime,sizeof(struct timeval));
-       exp->p_uticks  = 0 ;
-       exp->p_sticks  = 0 ;
-       exp->p_iticks  = 0 ;
-       exp->p_traceflag  = 0;
-       exp->p_tracep  = 0 ;
-       exp->p_siglist  = 0 ;   /* No longer relevant */
-       exp->p_textvp  = CAST_DOWN_EXPLICIT(uint32_t,p->p_textvp) ;
-       exp->p_holdcnt = 0 ;
-       exp->p_sigmask  = 0 ;   /* no longer avaialable */
-       exp->p_sigignore  = p->p_sigignore ;
-       exp->p_sigcatch  = p->p_sigcatch ;
-       exp->p_priority  = p->p_priority ;
-       exp->p_usrpri  = 0 ;
-       exp->p_nice  = p->p_nice ;
-       bcopy(&p->p_comm, &exp->p_comm,MAXCOMLEN);
-       exp->p_comm[MAXCOMLEN] = '\0';
-       exp->p_pgrp  = CAST_DOWN_EXPLICIT(uint32_t,p->p_pgrp) ;
-       exp->p_addr  = 0;
-       exp->p_xstat  = p->p_xstat ;
-       exp->p_acflag  = p->p_acflag ;
-       exp->p_ru  = CAST_DOWN_EXPLICIT(uint32_t,p->p_ru) ;             /* XXX may be NULL */
+       exp->p_realtimer.it_interval.tv_sec =
+               (user32_time_t)p->p_realtimer.it_interval.tv_sec;
+       exp->p_realtimer.it_interval.tv_usec =
+               (__int32_t)p->p_realtimer.it_interval.tv_usec;
+
+       exp->p_realtimer.it_value.tv_sec =
+               (user32_time_t)p->p_realtimer.it_value.tv_sec;
+       exp->p_realtimer.it_value.tv_usec =
+               (__int32_t)p->p_realtimer.it_value.tv_usec;
+
+       exp->p_rtime.tv_sec = (user32_time_t)p->p_rtime.tv_sec;
+       exp->p_rtime.tv_usec = (__int32_t)p->p_rtime.tv_usec;
+
+       exp->p_sigignore = p->p_sigignore;
+       exp->p_sigcatch = p->p_sigcatch;
+       exp->p_priority = p->p_priority;
+       exp->p_nice = p->p_nice;
+       bcopy(&p->p_comm, &exp->p_comm, MAXCOMLEN);
+       exp->p_xstat = p->p_xstat;
+       exp->p_acflag = p->p_acflag;
 }
 
 /*
  * Fill in an LP64 version of extern_proc structure for the specified process.
  */
 STATIC void
-fill_user64_externproc(proc_t p, struct user64_extern_proc *exp)
+fill_user64_externproc(proc_t p, struct user64_extern_proc *__restrict exp)
 {
-       exp->p_forw = exp->p_back = USER_ADDR_NULL;
        exp->p_starttime.tv_sec = p->p_start.tv_sec;
        exp->p_starttime.tv_usec = p->p_start.tv_usec;
-       exp->p_vmspace = USER_ADDR_NULL;
-       exp->p_sigacts = CAST_USER_ADDR_T(p->p_sigacts);
-       exp->p_flag  = p->p_flag;
+       exp->p_flag = p->p_flag;
        if (p->p_lflag & P_LTRACED)
                exp->p_flag |= P_TRACED;
        if (p->p_lflag & P_LPPWAIT)
                exp->p_flag |= P_PPWAIT;
        if (p->p_lflag & P_LEXIT)
                exp->p_flag |= P_WEXIT;
-       exp->p_stat  = p->p_stat ;
-       exp->p_pid  = p->p_pid ;
-       exp->p_oppid  = p->p_oppid ;
+       exp->p_stat = p->p_stat;
+       exp->p_pid = p->p_pid;
+       exp->p_oppid = p->p_oppid;
        /* Mach related  */
-       exp->user_stack  = p->user_stack;
-       exp->exit_thread  = CAST_USER_ADDR_T(p->exit_thread);
-       exp->p_debugger  = p->p_debugger ;
-       exp->sigwait  = p->sigwait ;
+       exp->user_stack = p->user_stack;
+       exp->p_debugger = p->p_debugger;
+       exp->sigwait = p->sigwait;
        /* scheduling */
 #ifdef _PROC_HAS_SCHEDINFO_
-       exp->p_estcpu  = p->p_estcpu ;
-       exp->p_pctcpu  = p->p_pctcpu ;
-       exp->p_slptime  = p->p_slptime ;
-#else
-       exp->p_estcpu  = 0 ;
-       exp->p_pctcpu  = 0 ;
-       exp->p_slptime = 0 ;
+       exp->p_estcpu = p->p_estcpu;
+       exp->p_pctcpu = p->p_pctcpu;
+       exp->p_slptime = p->p_slptime;
 #endif
-       exp->p_cpticks  = 0 ;
-       exp->p_wchan  = 0;
-       exp->p_wmesg  = 0;
-       exp->p_swtime  = 0 ;
        exp->p_realtimer.it_interval.tv_sec = p->p_realtimer.it_interval.tv_sec;
        exp->p_realtimer.it_interval.tv_usec = p->p_realtimer.it_interval.tv_usec;
+
        exp->p_realtimer.it_value.tv_sec = p->p_realtimer.it_value.tv_sec;
        exp->p_realtimer.it_value.tv_usec = p->p_realtimer.it_value.tv_usec;
+
        exp->p_rtime.tv_sec = p->p_rtime.tv_sec;
        exp->p_rtime.tv_usec = p->p_rtime.tv_usec;
-       exp->p_uticks  = 0 ;
-       exp->p_sticks  = 0 ;
-       exp->p_iticks  = 0 ;
-       exp->p_traceflag  = 0 ;
-       exp->p_tracep  = 0;
-       exp->p_siglist  = 0 ;   /* No longer relevant */
-       exp->p_textvp  = CAST_USER_ADDR_T(p->p_textvp);
-       exp->p_holdcnt = 0 ;
-       exp->p_sigmask  = 0 ;   /* no longer avaialable */
-       exp->p_sigignore  = p->p_sigignore ;
-       exp->p_sigcatch  = p->p_sigcatch ;
-       exp->p_priority  = p->p_priority ;
-       exp->p_usrpri  = 0 ;
-       exp->p_nice  = p->p_nice ;
-       bcopy(&p->p_comm, &exp->p_comm,MAXCOMLEN);
-       exp->p_comm[MAXCOMLEN] = '\0';
-       exp->p_pgrp  = CAST_USER_ADDR_T(p->p_pgrp);
-       exp->p_addr  = USER_ADDR_NULL;
-       exp->p_xstat  = p->p_xstat ;
-       exp->p_acflag  = p->p_acflag ;
-       exp->p_ru  = CAST_USER_ADDR_T(p->p_ru);         /* XXX may be NULL */
+
+       exp->p_sigignore = p->p_sigignore;
+       exp->p_sigcatch = p->p_sigcatch;
+       exp->p_priority = p->p_priority;
+       exp->p_nice = p->p_nice;
+       bcopy(&p->p_comm, &exp->p_comm, MAXCOMLEN);
+       exp->p_xstat = p->p_xstat;
+       exp->p_acflag = p->p_acflag;
 }
 
 STATIC void
-fill_user32_proc(proc_t p, struct user32_kinfo_proc *kp)
+fill_user32_proc(proc_t p, struct user32_kinfo_proc *__restrict kp)
 {
-       /* on a 64 bit kernel, 32 bit users will get some truncated information */
+       /* on a 64 bit kernel, 32 bit users get some truncated information */
        fill_user32_externproc(p, &kp->kp_proc);
        fill_user32_eproc(p, &kp->kp_eproc);
 }
 
 STATIC void
-fill_user64_proc(proc_t p, struct user64_kinfo_proc *kp)
+fill_user64_proc(proc_t p, struct user64_kinfo_proc *__restrict kp)
 {
        fill_user64_externproc(p, &kp->kp_proc);
        fill_user64_eproc(p, &kp->kp_eproc);
@@ -1958,6 +1758,10 @@ sysctl_kdebug_ops SYSCTL_HANDLER_ARGS
        case KERN_KDSETRTCDEC:
        case KERN_KDSETBUF:
        case KERN_KDGETENTROPY:
+       case KERN_KDENABLE_BG_TRACE:
+       case KERN_KDDISABLE_BG_TRACE:
+       case KERN_KDSET_TYPEFILTER:
+
                ret = kdbg_control(name, namelen, oldp, oldlenp);
                break;
        default:
@@ -2303,7 +2107,7 @@ sysctl_aiomax
        int error = sysctl_io_number(req, aio_max_requests, sizeof(int), &new_value, &changed);
        if (changed) {
                 /* make sure the system-wide limit is greater than the per process limit */
-               if (new_value >= aio_max_requests_per_process)
+               if (new_value >= aio_max_requests_per_process && new_value <= AIO_MAX_REQUESTS)
                        aio_max_requests = new_value;
                else
                        error = EINVAL;
@@ -2642,6 +2446,10 @@ SYSCTL_UINT(_kern, OID_AUTO, speculative_prefetch_max,
                CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
                &speculative_prefetch_max, 0, "");
 
+SYSCTL_UINT(_kern, OID_AUTO, speculative_prefetch_max_iosize, 
+               CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+               &speculative_prefetch_max_iosize, 0, "");
+
 SYSCTL_UINT(_kern, OID_AUTO, vm_page_free_target,
                CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
                &vm_page_free_target, 0, "");
@@ -2976,6 +2784,40 @@ SYSCTL_PROC(_kern, KERN_RAGEVNODE, rage_vnode,
                CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
                0, 0, sysctl_rage_vnode, "I", "");
 
+/* XXX move this interface into libproc and remove this sysctl */
+STATIC int
+sysctl_setthread_cpupercent
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       int new_value, old_value;
+       int error = 0;
+       kern_return_t kret = KERN_SUCCESS;
+       uint8_t percent = 0;
+       int ms_refill = 0;
+
+       old_value = 0;
+
+       if ((error = sysctl_io_number(req, old_value, sizeof(old_value), &new_value, NULL)) != 0)
+               return (error);
+
+       percent = new_value & 0xff;                     /* low 8 bytes for perent */
+       ms_refill = (new_value >> 8) & 0xffffff;        /* upper 24bytes represent ms refill value */
+       if (percent > 100)
+               return (EINVAL);
+
+       /*
+        * If the caller is specifying a percentage of 0, this will unset the CPU limit, if present.
+        */
+       if ((kret = thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, percent, ms_refill * NSEC_PER_MSEC)) != 0)
+               return (EIO);
+       
+       return (0);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, setthread_cpupercent,
+               CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY,
+               0, 0, sysctl_setthread_cpupercent, "I", "set thread cpu percentage limit");
+
 
 STATIC int
 sysctl_kern_check_openevt
@@ -3119,12 +2961,6 @@ SYSCTL_PROC(_vm, VM_SWAPUSAGE, swapusage,
                CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
                0, 0, sysctl_swapusage, "S,xsw_usage", "");
 
-#if CONFIG_EMBEDDED
-/* <rdar://problem/7688080> */
-boolean_t vm_freeze_enabled = FALSE;
-#endif /* CONFIG_EMBEDDED */
-
-
 #if CONFIG_FREEZE
 extern void vm_page_reactivate_all_throttled(void);
 
@@ -3132,7 +2968,7 @@ static int
 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
 {
 #pragma unused(arg1, arg2)
-       int error, val = vm_freeze_enabled ? 1 : 0;
+       int error, val = memorystatus_freeze_enabled ? 1 : 0;
        boolean_t disabled;
 
        error = sysctl_handle_int(oidp, &val, 0, req);
@@ -3142,9 +2978,9 @@ sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
        /* 
         * If freeze is being disabled, we need to move dirty pages out from the throttle to the active queue. 
         */
-       disabled = (!val && vm_freeze_enabled);
+       disabled = (!val && memorystatus_freeze_enabled);
        
-       vm_freeze_enabled = val ? TRUE : FALSE;
+       memorystatus_freeze_enabled = val ? TRUE : FALSE;
        
        if (disabled) {
                vm_page_reactivate_all_throttled();
@@ -3153,7 +2989,7 @@ sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
        return (0);
 }
 
-SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT|CTLFLAG_RW, &vm_freeze_enabled, 0, sysctl_freeze_enabled, "I", "");
+SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT|CTLFLAG_RW, &memorystatus_freeze_enabled, 0, sysctl_freeze_enabled, "I", "");
 #endif /* CONFIG_FREEZE */
 
 /* this kernel does NOT implement shared_region_make_private_np() */
@@ -3296,6 +3132,24 @@ SYSCTL_INT (_kern, OID_AUTO, affinity_sets_enabled,
 SYSCTL_INT (_kern, OID_AUTO, affinity_sets_mapping,
            CTLFLAG_RW | CTLFLAG_LOCKED, &affinity_sets_mapping, 0, "mapping policy");
 
+/*
+ * Boolean indicating if KASLR is active.
+ */
+STATIC int
+sysctl_slide
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       uint32_t        slide;
+
+       slide = vm_kernel_slide ? 1 : 0;
+
+       return sysctl_io_number( req, slide, sizeof(int), NULL, NULL);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, slide,
+               CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
+               0, 0, sysctl_slide, "I", "");
+
 /*
  * Limit on total memory users can wire.
  *
@@ -3316,7 +3170,6 @@ vm_map_size_t     vm_user_wire_limit;
 /*
  * There needs to be a more automatic/elegant way to do this
  */
-
 SYSCTL_QUAD(_vm, OID_AUTO, global_no_user_wire_amount, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_no_user_wire_amount, "");
 SYSCTL_QUAD(_vm, OID_AUTO, global_user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_user_wire_limit, "");
 SYSCTL_QUAD(_vm, OID_AUTO, user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_user_wire_limit, "");
@@ -3370,3 +3223,16 @@ SYSCTL_STRING(_kern, OID_AUTO, sched,
                          CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
                          sched_string, sizeof(sched_string),
                          "Timeshare scheduler implementation");
+
+/*
+ * Only support runtime modification on embedded platforms
+ * with development config enabled
+ */
+#if CONFIG_EMBEDDED
+#if !SECURE_KERNEL
+extern int precise_user_kernel_time;
+SYSCTL_INT(_kern, OID_AUTO, precise_user_kernel_time, 
+               CTLFLAG_RW | CTLFLAG_LOCKED,
+               &precise_user_kernel_time, 0, "Precise accounting of kernel vs. user time");
+#endif
+#endif
index 70ab53b3123ef63686972a3f93acc3f1227eb27c..2598737281eb3f49992acc59b0740a2ca3b18f5c 100644 (file)
@@ -31,6 +31,7 @@
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/socket.h>
 #include <kern/debug.h>
 #include <libkern/OSAtomic.h>
@@ -44,6 +45,9 @@ static const mbuf_flags_t mbuf_flags_mask = (MBUF_EXT | MBUF_PKTHDR | MBUF_EOR |
     MBUF_LOOP | MBUF_BCAST | MBUF_MCAST | MBUF_FRAG | MBUF_FIRSTFRAG |
     MBUF_LASTFRAG | MBUF_PROMISC | MBUF_HASFCS);
 
+#define        MBUF_PKTAUXF_MASK       \
+       (MBUF_PKTAUXF_INET_RESOLVE_RTR | MBUF_PKTAUXF_INET6_RESOLVE_RTR)
+
 void* mbuf_data(mbuf_t mbuf)
 {
        return mbuf->m_data;
@@ -1105,38 +1109,67 @@ mbuf_get_minclsize(void)
        return (MHLEN + MLEN);
 }
 
-mbuf_traffic_class_t 
+mbuf_traffic_class_t
 mbuf_get_traffic_class(mbuf_t m)
 {
-       mbuf_traffic_class_t prio = MBUF_TC_BE;
-
        if (m == NULL || !(m->m_flags & M_PKTHDR))
-               return (prio);
-
-       if (m->m_pkthdr.prio <= MBUF_TC_VO)
-               prio = m->m_pkthdr.prio;
+               return (MBUF_TC_BE);
 
-       return (prio);
+       return (m_get_traffic_class(m));
 }
 
-errno_t 
+errno_t
 mbuf_set_traffic_class(mbuf_t m, mbuf_traffic_class_t tc)
 {
-       errno_t error = 0;
-       
+       if (m == NULL || !(m->m_flags & M_PKTHDR) ||
+           ((u_int32_t)tc >= MBUF_TC_MAX))
+               return (EINVAL);
+
+       return (m_set_traffic_class(m, tc));
+}
+
+int
+mbuf_is_traffic_class_privileged(mbuf_t m)
+{
+       if (m == NULL || !(m->m_flags & M_PKTHDR) ||
+           !MBUF_VALID_SC(m->m_pkthdr.svc))
+               return (0);
+
+       return (m->m_pkthdr.aux_flags & MAUXF_PRIO_PRIVILEGED);
+}
+
+mbuf_svc_class_t
+mbuf_get_service_class(mbuf_t m)
+{
        if (m == NULL || !(m->m_flags & M_PKTHDR))
-               return EINVAL;
+               return (MBUF_SC_BE);
 
-       switch (tc) {
-               case MBUF_TC_BE:
-               case MBUF_TC_BK:
-               case MBUF_TC_VI:
-               case MBUF_TC_VO:
-                       m->m_pkthdr.prio = tc;
-                       break;
-               default:
-                       error = EINVAL;
-                       break;
-       }
-       return error;
+       return (m_get_service_class(m));
+}
+
+errno_t
+mbuf_set_service_class(mbuf_t m, mbuf_svc_class_t sc)
+{
+       if (m == NULL || !(m->m_flags & M_PKTHDR))
+               return (EINVAL);
+
+       return (m_set_service_class(m, sc));
+}
+
+errno_t
+mbuf_pkthdr_aux_flags(mbuf_t m, mbuf_pkthdr_aux_flags_t *flagsp)
+{
+       u_int32_t flags;
+       if (m == NULL || !(m->m_flags & M_PKTHDR) || flagsp == NULL)
+               return (EINVAL);
+
+       flags = m->m_pkthdr.aux_flags & MBUF_PKTAUXF_MASK;
+
+       /* These 2 flags are mutually exclusive */
+       VERIFY((flags &
+           (MBUF_PKTAUXF_INET_RESOLVE_RTR | MBUF_PKTAUXF_INET6_RESOLVE_RTR)) !=
+           (MBUF_PKTAUXF_INET_RESOLVE_RTR | MBUF_PKTAUXF_INET6_RESOLVE_RTR));
+
+       *flagsp = flags;
+       return (0);
 }
index 70507beff54a68b9b05244f7f5dd1edff8df854b..3de525cbe2bae4b4f7bff7eae809cd36dce8dd5f 100644 (file)
@@ -514,11 +514,11 @@ sock_setsockopt(
 /*
  * This follows the recommended mappings between DSCP code points and WMM access classes
  */
-static u_int8_t so_tc_from_dscp(u_int8_t dscp);
-static u_int8_t
+static u_int32_t so_tc_from_dscp(u_int8_t dscp);
+static u_int32_t
 so_tc_from_dscp(u_int8_t dscp)
 {
-       u_int8_t tc;
+       u_int32_t tc;
 
        if (dscp >= 0x30 && dscp <= 0x3f)
                tc = SO_TC_VO;
@@ -529,7 +529,7 @@ so_tc_from_dscp(u_int8_t dscp)
        else
                tc = SO_TC_BE;
 
-       return tc;
+       return (tc);
 }
 
 errno_t
@@ -946,6 +946,8 @@ sock_socket(
 #endif
                (*new_so)->so_upcall = (so_upcall)callback;
                (*new_so)->so_upcallarg = context;
+               (*new_so)->last_pid = 0;
+               (*new_so)->last_upid = 0;
        }
        return error;
 }
@@ -978,7 +980,7 @@ sock_release(socket_t sock)
                return;
        socket_lock(sock, 1);
 
-       if (sock->so_flags & SOF_UPCALLINUSE)
+       if (sock->so_upcallusecount)
                soclose_wait_locked(sock);
 
        sock->so_retaincnt--;
index 67a944c2d47e32fb899adac0bac74eeef5659543..f456d62465351787213c5e5a46d2fe4c03327773 100644 (file)
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <sys/protosw.h>
+#include <sys/domain.h>
 #include <sys/proc.h>
 #include <kern/locks.h>
 #include <kern/thread.h>
 #include <kern/debug.h>
 #include <net/kext_net.h>
+#include <net/if.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 
 #include <libkern/libkern.h>
 #include <libkern/OSAtomic.h>
@@ -258,63 +267,64 @@ sflt_attach_locked(
        struct socket_filter_entry *entry = NULL;
        
        if (filter == NULL)
-               error = ENOENT;
-       
-       if (error == 0) {
-               /* allocate the socket filter entry */
-               MALLOC(entry, struct socket_filter_entry *, sizeof(*entry), M_IFADDR, M_WAITOK);
-               if (entry == NULL) {
-                       error = ENOMEM;
-               }
-       }
-       
-       if (error == 0) {
-               /* Initialize the socket filter entry */
-               entry->sfe_cookie = NULL;
-               entry->sfe_flags = SFEF_ATTACHED;
-               entry->sfe_refcount = 1; // corresponds to SFEF_ATTACHED flag set
+               return ENOENT;
+
+       for (entry = so->so_filt; entry; entry = entry->sfe_next_onfilter) 
+               if (entry->sfe_filter->sf_filter.sf_handle ==
+                   filter->sf_filter.sf_handle)
+                       return EEXIST;
+
+       /* allocate the socket filter entry */
+       MALLOC(entry, struct socket_filter_entry *, sizeof(*entry), M_IFADDR,
+           M_WAITOK);
+       if (entry == NULL)
+               return ENOMEM;
+       
+       /* Initialize the socket filter entry */
+       entry->sfe_cookie = NULL;
+       entry->sfe_flags = SFEF_ATTACHED;
+       entry->sfe_refcount = 1; // corresponds to SFEF_ATTACHED flag set
+       
+       /* Put the entry in the filter list */
+       sflt_retain_locked(filter);
+       entry->sfe_filter = filter;
+       entry->sfe_next_onfilter = filter->sf_entry_head;
+       filter->sf_entry_head = entry;
+       
+       /* Put the entry on the socket filter list */
+       entry->sfe_socket = so;
+       entry->sfe_next_onsocket = so->so_filt;
+       so->so_filt = entry;
+
+       if (entry->sfe_filter->sf_filter.sf_attach) {
+               // Retain the entry while we call attach
+               sflt_entry_retain(entry);
                
-               /* Put the entry in the filter list */
-               sflt_retain_locked(filter);
-               entry->sfe_filter = filter;
-               entry->sfe_next_onfilter = filter->sf_entry_head;
-               filter->sf_entry_head = entry;
+               // Release the filter lock -- callers must be aware we will do this
+               lck_rw_unlock_exclusive(sock_filter_lock);
                
-               /* Put the entry on the socket filter list */
-               entry->sfe_socket = so;
-               entry->sfe_next_onsocket = so->so_filt;
-               so->so_filt = entry;
+               // Unlock the socket
+               if (socklocked)
+                       socket_unlock(so, 0);
                
-               if (entry->sfe_filter->sf_filter.sf_attach) {
-                       // Retain the entry while we call attach
-                       sflt_entry_retain(entry);
-                       
-                       // Release the filter lock -- callers must be aware we will do this
-                       lck_rw_unlock_exclusive(sock_filter_lock);
-                       
-                       // Unlock the socket
-                       if (socklocked)
-                               socket_unlock(so, 0);
-                       
-                       // It's finally safe to call the filter function
-                       error = entry->sfe_filter->sf_filter.sf_attach(&entry->sfe_cookie, so);
-                       
-                       // Lock the socket again
-                       if (socklocked)
-                               socket_lock(so, 0);
-                       
-                       // Lock the filters again
-                       lck_rw_lock_exclusive(sock_filter_lock);
-                       
-                       // If the attach function returns an error, this filter must be detached
-                       if (error) {
-                               entry->sfe_flags |= SFEF_NODETACH; // don't call sf_detach
-                               sflt_detach_locked(entry);
-                       }
-                       
-                       // Release the retain we held through the attach call
-                       sflt_entry_release(entry);
+               // It's finally safe to call the filter function
+               error = entry->sfe_filter->sf_filter.sf_attach(&entry->sfe_cookie, so);
+               
+               // Lock the socket again
+               if (socklocked)
+                       socket_lock(so, 0);
+               
+               // Lock the filters again
+               lck_rw_lock_exclusive(sock_filter_lock);
+               
+               // If the attach function returns an error, this filter must be detached
+               if (error) {
+                       entry->sfe_flags |= SFEF_NODETACH; // don't call sf_detach
+                       sflt_detach_locked(entry);
                }
+               
+               // Release the retain we held through the attach call
+               sflt_entry_release(entry);
        }
        
        return error;
@@ -450,21 +460,25 @@ sflt_termsock(
        lck_rw_unlock_exclusive(sock_filter_lock);
 }
 
-__private_extern__ void
-sflt_notify(
+
+static void
+sflt_notify_internal(
        struct socket   *so,
        sflt_event_t    event,
-       void                    *param)
+       void            *param,
+       sflt_handle     handle)
 {
        if (so->so_filt == NULL) return;
        
        struct socket_filter_entry      *entry;
-       int                                                     unlocked = 0;
+       int                             unlocked = 0;
        
        lck_rw_lock_shared(sock_filter_lock);
        for (entry = so->so_filt; entry; entry = entry->sfe_next_onsocket) {
                if ((entry->sfe_flags & SFEF_ATTACHED)
-                       && entry->sfe_filter->sf_filter.sf_notify) {
+                   && entry->sfe_filter->sf_filter.sf_notify &&
+                   ((handle && entry->sfe_filter->sf_filter.sf_handle != handle) ||
+                    !handle)) {
                        // Retain the filter entry and release the socket filter lock
                        sflt_entry_retain(entry);
                        lck_rw_unlock_shared(sock_filter_lock);
@@ -491,6 +505,24 @@ sflt_notify(
        }
 }
 
+__private_extern__ void
+sflt_notify(
+       struct socket   *so,
+       sflt_event_t    event,
+       void            *param)
+{
+       sflt_notify_internal(so, event, param, 0);
+}
+
+static void
+sflt_notify_after_register(
+       struct socket   *so,
+       sflt_event_t    event,
+       sflt_handle     handle)
+{
+       sflt_notify_internal(so, event, NULL, handle);
+}
+
 __private_extern__ int
 sflt_ioctl(
        struct socket   *so,
@@ -1075,6 +1107,11 @@ sflt_detach(
        return result;
 }
 
+struct solist {
+       struct solist *next;
+       struct socket *so;
+};
+
 errno_t
 sflt_register(
        const struct sflt_filter        *filter,
@@ -1087,6 +1124,9 @@ sflt_register(
        int error = 0;
        struct protosw *pr = pffindproto(domain, protocol, type);
        unsigned int len;
+       struct socket *so;
+       struct inpcb *inp;
+       struct solist *solisthead = NULL, *solist = NULL;
 
        if (pr == NULL)
                return ENOENT;
@@ -1141,12 +1181,95 @@ sflt_register(
                sflt_retain_locked(sock_filt);
        }
        lck_rw_unlock_exclusive(sock_filter_lock);
-       
+
        if (match != NULL) {
                FREE(sock_filt, M_IFADDR);
                return EEXIST;
        }
 
+       if (!(filter->sf_flags & SFLT_EXTENDED_REGISTRY))
+               return error;
+
+       /*
+        * Setup the filter on the TCP and UDP sockets already created.
+        */
+#define SOLIST_ADD(_so)                do {                                    \
+       solist->next = solisthead;                                      \
+       sock_retain((_so));                                             \
+       solist->so = (_so);                                             \
+       solisthead = solist;                                            \
+} while (0)
+       if (protocol == IPPROTO_TCP) {
+               lck_rw_lock_shared(tcbinfo.mtx);
+               LIST_FOREACH(inp, tcbinfo.listhead, inp_list) {
+                       so = inp->inp_socket;
+                       if (so == NULL || so->so_state & SS_DEFUNCT ||
+                           so->so_state & SS_NOFDREF ||
+                           !INP_CHECK_SOCKAF(so, domain) ||
+                           !INP_CHECK_SOCKTYPE(so, type))
+                               continue;
+                       MALLOC(solist, struct solist *, sizeof(*solist),
+                           M_IFADDR, M_NOWAIT);
+                       if (!solist)
+                               continue;
+                       SOLIST_ADD(so);
+               }
+               lck_rw_done(tcbinfo.mtx);
+       } else if (protocol == IPPROTO_UDP) {
+               lck_rw_lock_shared(udbinfo.mtx);
+               LIST_FOREACH(inp, udbinfo.listhead, inp_list) {
+                       so = inp->inp_socket;
+                       if (so == NULL || so->so_state & SS_DEFUNCT ||
+                           so->so_state & SS_NOFDREF ||
+                           !INP_CHECK_SOCKAF(so, domain) ||
+                           !INP_CHECK_SOCKTYPE(so, type))
+                               continue;
+                       MALLOC(solist, struct solist *, sizeof(*solist),
+                           M_IFADDR, M_NOWAIT);
+                       if (!solist)
+                               continue;
+                       SOLIST_ADD(so);
+               }
+               lck_rw_done(udbinfo.mtx);
+       }
+       /* XXX it's possible to walk the raw socket list as well */
+#undef SOLIST_ADD
+
+       while (solisthead) {
+               sflt_handle handle = filter->sf_handle;
+
+               so = solisthead->so;
+               sflt_initsock(so);
+
+               if (so->so_state & SS_ISCONNECTING)
+                       sflt_notify_after_register(so, sock_evt_connecting,
+                           handle);
+               else if (so->so_state & SS_ISCONNECTED)
+                       sflt_notify_after_register(so, sock_evt_connected,
+                           handle);
+               else if ((so->so_state &
+                   (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE)) ==
+                   (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE))
+                       sflt_notify_after_register(so, sock_evt_disconnecting,
+                           handle);
+               else if ((so->so_state &
+                   (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED)) ==
+                   (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED))
+                       sflt_notify_after_register(so, sock_evt_disconnected,
+                           handle);
+               else if (so->so_state & SS_CANTSENDMORE)
+                       sflt_notify_after_register(so, sock_evt_cantsendmore,
+                           handle);
+               else if (so->so_state & SS_CANTRCVMORE)
+                       sflt_notify_after_register(so, sock_evt_cantrecvmore,
+                           handle);
+               /* XXX no easy way to post the sock_evt_closing event */
+               sock_release(so);
+               solist = solisthead;
+               solisthead = solisthead->next;
+               FREE(solist, M_IFADDR);
+       }
+
        return error;
 }
 
index 9811047d7500d8b9c5edc6ef159e5ae8a72dee5f..e2fd1e3505b03cbe9a2a9134debc839f22cba9c0 100644 (file)
@@ -198,30 +198,14 @@ fatfile_getarch_affinity(
                struct vnode            *vp,
                vm_offset_t             data_ptr,
                struct fat_arch *archret,
-               int                             affinity)
+               int                             affinity __unused)
 {
-               load_return_t lret;
-               int handler = (exec_archhandler_ppc.path[0] != 0);
-               cpu_type_t primary_type, fallback_type;
-
-               if (handler && affinity) {
-                               primary_type = CPU_TYPE_POWERPC;
-                               fallback_type = cpu_type();
-               } else {
-                               primary_type = cpu_type();
-                               fallback_type = CPU_TYPE_POWERPC;
-               }
                /*
                 * Ignore all architectural bits when determining if an image
                 * in a fat file should be skipped or graded.
                 */
-               lret = fatfile_getarch2(vp, data_ptr, primary_type
+               return fatfile_getarch2(vp, data_ptr, cpu_type()
                                CPU_ARCH_MASK, archret);
-               if ((lret != 0) && handler) {
-                       lret = fatfile_getarch2(vp, data_ptr, fallback_type,
-                                               CPU_SUBTYPE_LIB64, archret);
-               }
-               return lret;
 }
 
 /**********************************************************************
index 47253a898b268ca575643d8055bdfa6442133395..02b660be150d37f2aefe08ce631aa24e968c0081 100644 (file)
  * XXX vm/pmap.h should not treat these prototypes as MACH_KERNEL_PRIVATE
  * when KERNEL is defined.
  */
-extern pmap_t  pmap_create(vm_map_size_t size, boolean_t is_64bit);
-extern void    pmap_switch(pmap_t);
-
-/*
- * XXX kern/thread.h should not treat these prototypes as MACH_KERNEL_PRIVATE
- * when KERNEL is defined.
- */
-extern kern_return_t   thread_setstatus(thread_t thread, int flavor,
-                               thread_state_t tstate,
-                               mach_msg_type_number_t count);
-
-extern kern_return_t    thread_state_initialize(thread_t thread);
-
+extern pmap_t  pmap_create(ledger_t ledger, vm_map_size_t size,
+                               boolean_t is_64bit);
 
 /* XXX should have prototypes in a shared header file */
 extern int     get_map_nentries(vm_map_t);
@@ -109,12 +98,15 @@ static load_result_t load_result_null = {
        .mach_header = MACH_VM_MIN_ADDRESS,
        .entry_point = MACH_VM_MIN_ADDRESS,
        .user_stack = MACH_VM_MIN_ADDRESS,
+       .user_stack_size = 0,
        .all_image_info_addr = MACH_VM_MIN_ADDRESS,
        .all_image_info_size = 0,
        .thread_count = 0,
        .unixproc = 0,
        .dynlinker = 0,
-       .customstack = 0,
+       .needs_dynlinker = 0,
+       .prog_allocated_stack = 0,
+       .prog_stack_size = 0,
        .validentry = 0,
        .csflags = 0,
        .uuid = { 0 },
@@ -166,9 +158,19 @@ set_code_unprotect(
        struct encryption_info_command  *lcp,
        caddr_t                         addr,
        vm_map_t                        map,
+       int64_t                         slide,
        struct vnode                    *vp);
 #endif
 
+static
+load_return_t
+load_main(
+       struct entry_point_command      *epc,
+       thread_t                thread,
+       int64_t                         slide,
+       load_result_t           *result
+);
+
 static load_return_t
 load_unixthread(
        struct thread_command   *tcp,
@@ -282,6 +284,7 @@ load_machfile(
        struct vnode            *vp = imgp->ip_vp;
        off_t                   file_offset = imgp->ip_arch_offset;
        off_t                   macho_size = imgp->ip_arch_size;
+       off_t                   file_size = imgp->ip_vattr->va_data_size;
        
        pmap_t                  pmap = 0;       /* protected by create_map */
        vm_map_t                map;
@@ -296,6 +299,10 @@ load_machfile(
        mach_vm_offset_t        aslr_offset = 0;
        kern_return_t           kret;
 
+       if (macho_size > file_size) {
+               return(LOAD_BADMACHO);
+       }
+
        if (new_map == VM_MAP_NULL) {
                create_map = TRUE;
                old_task = current_task();
@@ -313,7 +320,8 @@ load_machfile(
        }
 
        if (create_map) {
-               pmap = pmap_create((vm_map_size_t) 0, (imgp->ip_flags & IMGPF_IS_64BIT));
+               pmap = pmap_create(get_task_ledger(task), (vm_map_size_t) 0,
+                               (imgp->ip_flags & IMGPF_IS_64BIT));
                pal_switch_pmap(thread, pmap, imgp->ip_flags & IMGPF_IS_64BIT);
                map = vm_map_create(pmap,
                                0,
@@ -359,6 +367,19 @@ load_machfile(
                return(lret);
        }
 
+#if CONFIG_EMBEDDED
+       /*
+        * Check to see if the page zero is enforced by the map->min_offset.
+        */ 
+       if (vm_map_has_hard_pagezero(map, 0x1000) == FALSE) {
+               if (create_map) {
+                       vm_map_deallocate(map); /* will lose pmap reference too */
+               }
+               printf("Cannot enforce a hard page-zero for %s\n", imgp->ip_strings);
+               psignal(vfs_context_proc(imgp->ip_vfs_context), SIGKILL);
+               return (LOAD_BADMACHO);
+       }
+#else
        /*
         * For 64-bit users, check for presence of a 4GB page zero
         * which will enable the kernel to share the user's address space
@@ -366,9 +387,10 @@ load_machfile(
         */ 
 
        if ((imgp->ip_flags & IMGPF_IS_64BIT) &&
-            vm_map_has_4GB_pagezero(map))
+            vm_map_has_4GB_pagezero(map)) {
                vm_map_set_4GB_pagezero(map);
-
+       }
+#endif
        /*
         *      Commit to new map.
         *
@@ -396,23 +418,23 @@ load_machfile(
                         *
                         * NOTE: task_start_halt() makes sure that no new
                         * threads are created in the task during the transition.
-                        * We need to mark the workqueue as exiting before we
-                        * wait for threads to terminate (at the end of which
-                        * we no longer have a prohibition on thread creation).
-                        * 
-                        * Finally, clean up any lingering workqueue data structures
-                        * that may have been left behind by the workqueue threads
-                        * as they exited (and then clean up the work queue itself).
-                        */
-                       kret = task_start_halt(task);
-                       if (kret != KERN_SUCCESS) {
-                               return(kret);           
-                       }
-                       proc_transcommit(p, 0);
-                       workqueue_mark_exiting(p);
-                       task_complete_halt(task);
-                       workqueue_exit(p);
-               }
+                        * We need to mark the workqueue as exiting before we
+                        * wait for threads to terminate (at the end of which
+                        * we no longer have a prohibition on thread creation).
+                        * 
+                        * Finally, clean up any lingering workqueue data structures
+                        * that may have been left behind by the workqueue threads
+                        * as they exited (and then clean up the work queue itself).
+                        */
+                       kret = task_start_halt(task);
+                       if (kret != KERN_SUCCESS) {
+                               return(kret);           
+                       }
+                       proc_transcommit(p, 0);
+                       workqueue_mark_exiting(p);
+                       task_complete_halt(task);
+                       workqueue_exit(p);
+               }
                old_map = swap_task_map(old_task, thread, map, !spawn);
                vm_map_clear_4GB_pagezero(old_map);
                vm_map_deallocate(old_map);
@@ -566,7 +588,6 @@ parse_machfile(
         */
        for (pass = 1; pass <= 3; pass++) {
 
-#if CONFIG_EMBEDDED
                /*
                 * Check that the entry point is contained in an executable segments
                 */ 
@@ -575,7 +596,6 @@ parse_machfile(
                        ret = LOAD_FAILURE;
                        break;
                }
-#endif
 
                /*
                 * Loop through each of the load_commands indicated by the
@@ -637,6 +657,17 @@ parse_machfile(
                                                 slide,
                                                 result);
                                break;
+                       case LC_MAIN:
+                               if (pass != 1)
+                                       break;
+                               if (depth != 1)
+                                       break;
+                               ret = load_main(
+                                                (struct entry_point_command *) lcp,
+                                                thread,
+                                                slide,
+                                                result);
+                               break;
                        case LC_LOAD_DYLINKER:
                                if (pass != 3)
                                        break;
@@ -683,7 +714,7 @@ parse_machfile(
                                        break;
                                ret = set_code_unprotect(
                                        (struct encryption_info_command *) lcp,
-                                       addr, map, vp);
+                                       addr, map, slide, vp);
                                if (ret != LOAD_SUCCESS) {
                                        printf("proc %d: set_code_unprotect() error %d "
                                               "for file \"%s\"\n",
@@ -717,16 +748,21 @@ parse_machfile(
                    }
            }
 
-           if (dlp != 0) {
+               /* Make sure if we need dyld, we got it */
+               if (result->needs_dynlinker && !dlp) {
+                       ret = LOAD_FAILURE;
+               }
+
+           if ((ret == LOAD_SUCCESS) && (dlp != 0)) {
                    /* load the dylinker, and always slide it by the ASLR
                     * offset regardless of PIE */
                    ret = load_dylinker(dlp, dlarchbits, map, thread, depth, aslr_offset, result);
            }
 
-           if(depth == 1) {
-               if (result->thread_count == 0) {
-                       ret = LOAD_FAILURE;
-               }
+           if((ret == LOAD_SUCCESS) && (depth == 1)) {
+                       if (result->thread_count == 0) {
+                               ret = LOAD_FAILURE;
+                       }
            }
        }
 
@@ -823,12 +859,13 @@ load_segment(
 {
        struct segment_command_64 segment_command, *scp;
        kern_return_t           ret;
-       mach_vm_offset_t        map_addr, map_offset;
-       mach_vm_size_t          map_size, seg_size, delta_size;
+       vm_map_offset_t         map_addr, map_offset;
+       vm_map_size_t           map_size, seg_size, delta_size;
        vm_prot_t               initprot;
        vm_prot_t               maxprot;
        size_t                  segment_command_size, total_section_size,
                                single_section_size;
+       boolean_t               prohibit_pagezero_mapping = FALSE;
        
        if (LC_SEGMENT_64 == lcp->cmd) {
                segment_command_size = sizeof(struct segment_command_64);
@@ -888,9 +925,15 @@ load_segment(
                 */
                seg_size += slide;
                slide = 0;
-
+#if CONFIG_EMBEDDED
+               prohibit_pagezero_mapping = TRUE;
+#endif
                /* XXX (4596982) this interferes with Rosetta, so limit to 64-bit tasks */
                if (scp->cmd == LC_SEGMENT_64) {
+                       prohibit_pagezero_mapping = TRUE;
+               }
+               
+               if (prohibit_pagezero_mapping) {
                        /*
                         * This is a "page zero" segment:  it starts at address 0,
                         * is not mapped from the binary file and is not accessible.
@@ -1001,6 +1044,65 @@ load_segment(
        return ret;
 }
 
+
+
+static
+load_return_t
+load_main(
+       struct entry_point_command      *epc,
+       thread_t                thread,
+       int64_t                         slide,
+       load_result_t           *result
+)
+{
+       mach_vm_offset_t addr;
+       kern_return_t   ret;
+       
+       if (epc->cmdsize < sizeof(*epc))
+               return (LOAD_BADMACHO);
+       if (result->thread_count != 0) {
+               printf("load_main: already have a thread!");
+               return (LOAD_FAILURE);
+       }
+
+       if (thread == THREAD_NULL)
+               return (LOAD_SUCCESS);
+       
+       /* LC_MAIN specifies stack size but not location */
+       if (epc->stacksize) {
+               result->prog_stack_size = 1;
+               result->user_stack_size = epc->stacksize;
+       } else {
+               result->prog_stack_size = 0;
+               result->user_stack_size = MAXSSIZ;
+       }
+       result->prog_allocated_stack = 0;
+
+       /* use default location for stack */
+       ret = thread_userstackdefault(thread, &addr);
+       if (ret != KERN_SUCCESS)
+               return(LOAD_FAILURE);
+
+       /* The stack slides down from the default location */
+       result->user_stack = addr;
+       result->user_stack -= slide;
+
+       /* kernel does *not* use entryoff from LC_MAIN.  Dyld uses it. */
+       result->needs_dynlinker = TRUE;
+       result->validentry = TRUE;
+
+       ret = thread_state_initialize( thread );
+       if (ret != KERN_SUCCESS) {
+               return(LOAD_FAILURE);
+       }
+
+       result->unixproc = TRUE;
+       result->thread_count++;
+
+       return(LOAD_SUCCESS);
+}
+
+
 static
 load_return_t
 load_unixthread(
@@ -1012,6 +1114,7 @@ load_unixthread(
 {
        load_return_t   ret;
        int customstack =0;
+       mach_vm_offset_t addr;
        
        if (tcp->cmdsize < sizeof(*tcp))
                return (LOAD_BADMACHO);
@@ -1027,26 +1130,35 @@ load_unixthread(
                       (uint32_t *)(((vm_offset_t)tcp) + 
                                sizeof(struct thread_command)),
                       tcp->cmdsize - sizeof(struct thread_command),
-                      &result->user_stack,
+                      &addr,
                           &customstack);
        if (ret != LOAD_SUCCESS)
                return(ret);
 
-       if (customstack)
-               result->customstack = 1;
-       else
-               result->customstack = 0;
+       /* LC_UNIXTHREAD optionally specifies stack size and location */
+    
+       if (customstack) {
+               result->prog_stack_size = 0;    /* unknown */
+               result->prog_allocated_stack = 1;
+       } else {
+               result->prog_allocated_stack = 0;
+               result->prog_stack_size = 0;
+               result->user_stack_size = MAXSSIZ;
+       }
 
-       result->user_stack += slide;
+       /* The stack slides down from the default location */
+       result->user_stack = addr;
+       result->user_stack -= slide;
 
        ret = load_threadentry(thread,
                       (uint32_t *)(((vm_offset_t)tcp) + 
                                sizeof(struct thread_command)),
                       tcp->cmdsize - sizeof(struct thread_command),
-                      &result->entry_point);
+                      &addr);
        if (ret != LOAD_SUCCESS)
                return(ret);
 
+       result->entry_point = addr;
        result->entry_point += slide;
 
        ret = load_threadstate(thread,
@@ -1325,6 +1437,7 @@ load_dylinker(
        if (ret == LOAD_SUCCESS) {              
                result->dynlinker = TRUE;
                result->entry_point = myresult->entry_point;
+               result->validentry = myresult->validentry;
                result->all_image_info_addr = myresult->all_image_info_addr;
                result->all_image_info_size = myresult->all_image_info_size;
        }
@@ -1439,6 +1552,7 @@ set_code_unprotect(
                   struct encryption_info_command *eip,
                   caddr_t addr,        
                   vm_map_t map,
+                  int64_t slide,
                   struct vnode *vp)
 {
        int result, len;
@@ -1517,7 +1631,7 @@ set_code_unprotect(
                                if ((seg64->fileoff <= eip->cryptoff) &&
                                    (seg64->fileoff+seg64->filesize >= 
                                     eip->cryptoff+eip->cryptsize)) {
-                                       map_offset = seg64->vmaddr + eip->cryptoff - seg64->fileoff;
+                                       map_offset = seg64->vmaddr + eip->cryptoff - seg64->fileoff + slide;
                                        map_size = eip->cryptsize;
                                        goto remap_now;
                                }
@@ -1526,7 +1640,7 @@ set_code_unprotect(
                                if ((seg32->fileoff <= eip->cryptoff) &&
                                    (seg32->fileoff+seg32->filesize >= 
                                     eip->cryptoff+eip->cryptsize)) {
-                                       map_offset = seg32->vmaddr + eip->cryptoff - seg32->fileoff;
+                                       map_offset = seg32->vmaddr + eip->cryptoff - seg32->fileoff + slide;
                                        map_size = eip->cryptsize;
                                        goto remap_now;
                                }
index fd8e585db405f06ce7a2e9c4d04177ab137098b0..ece41929e4a9b9a2f6147072aa76a5b2d7ff1099 100644 (file)
@@ -53,13 +53,16 @@ typedef struct _load_result {
        user_addr_t             mach_header;
        user_addr_t             entry_point;
        user_addr_t             user_stack;
+       mach_vm_size_t          user_stack_size;
        mach_vm_address_t       all_image_info_addr;
        mach_vm_size_t          all_image_info_size;
        int                     thread_count;
        unsigned int
                /* boolean_t */ unixproc        :1,
+                               needs_dynlinker : 1,
                                dynlinker       :1,
-                               customstack     :1,
+                               prog_allocated_stack    :1,
+                               prog_stack_size : 1,    
                                validentry      :1,
                                                :0;
        unsigned int            csflags;
index 7ec55c799ee854661105f3361552614e4c2866ca..1a5cac88f387b48a9f0f6d5d4d9ed2d350b3e501 100644 (file)
@@ -313,6 +313,11 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
                 *      is resumed by adding NSIG to p_cursig. [see issig]
                 */
                proc_unlock(t);
+#if NOTYET
+               error = mac_proc_check_signal(p, t, SIGKILL);
+               if (0 != error)
+                       goto resume;
+#endif
                psignal(t, SIGKILL);
                goto resume;
 
@@ -342,8 +347,15 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
 
                if (uap->req == PT_STEP) {
                        /*
-                        * set trace bit
+                        * set trace bit 
+                        * we use sending SIGSTOP as a comparable security check.
                         */
+#if NOTYET
+                       error = mac_proc_check_signal(p, t, SIGSTOP);
+                       if (0 != error) {
+                               goto out;
+                       }
+#endif
                        if (thread_setsinglestep(th_act, 1) != KERN_SUCCESS) {
                                error = ENOTSUP;
                                goto out;
@@ -351,7 +363,14 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
                } else {
                        /*
                         * clear trace bit if on
+                        * we use sending SIGCONT as a comparable security check.
                         */
+#if NOTYET
+                       error = mac_proc_check_signal(p, t, SIGCONT);
+                       if (0 != error) {
+                               goto out;
+                       }
+#endif
                        if (thread_setsinglestep(th_act, 0) != KERN_SUCCESS) {
                                error = ENOTSUP;
                                goto out;
index 3019058712b3ea52da54173579e35cf4cb603977..d585708d1e695909502dceac4f5ececc28ee2064 100755 (executable)
@@ -172,7 +172,7 @@ s/\$//g
        }
        NR == 1 {
                printf "\n/* The casts are bogus but will do for now. */\n" > sysent
-               printf "__private_extern__ struct sysent %s[] = {\n",switchname > sysent
+               printf "__private_extern__ const struct sysent %s[] = {\n",switchname > sysent
 
                printf "#ifndef %s\n", sysproto_h > sysarg
                printf "#define\t%s\n\n", sysproto_h > sysarg
@@ -210,7 +210,7 @@ s/\$//g
                printf "#define\tPADR_(t)\t0\n" > sysarg
                printf "#endif\n" > sysarg
                printf "\n__BEGIN_DECLS\n" > sysarg
-               printf "#ifndef __arm__\n" > sysarg
+               printf "#if !defined(__arm__)\n" > sysarg
                printf "void munge_w(const void *, void *);  \n" > sysarg
                printf "void munge_ww(const void *, void *);  \n" > sysarg
                printf "void munge_www(const void *, void *);  \n" > sysarg
@@ -243,7 +243,7 @@ s/\$//g
                printf "void munge_wwwsw(const void *, void *);  \n" > sysarg
                printf "void munge_llllll(const void *, void *); \n" > sysarg
                printf "#else \n" > sysarg
-               printf "/* ARM does not need mungers for BSD system calls */\n" > sysarg
+               printf "/* ARM does not need mungers for BSD system calls. */\n" > sysarg
                printf "#define munge_w  NULL \n" > sysarg
                printf "#define munge_ww  NULL \n" > sysarg
                printf "#define munge_www  NULL \n" > sysarg
index a0c6cfb69b4f5e198be83529f0c30d88527d74ea..f3570ae41aef6d7477bc7c1e449054cc6d2830ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -75,7 +75,7 @@
        (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_SIZE)
 
 #define        MCACHE_CPU(c) \
-       (mcache_cpu_t *)((char *)(c) + MCACHE_SIZE(cpu_number()))
+       (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number())))
 
 /*
  * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used
@@ -1416,7 +1416,7 @@ mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp)
 __private_extern__ void
 mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
 {
-       u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size);
+       u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
        u_int64_t *buf = (u_int64_t *)buf_arg;
 
        VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
@@ -1429,7 +1429,7 @@ mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size)
 __private_extern__ void *
 mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size)
 {
-       u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size);
+       u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
        u_int64_t *buf;
 
        VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
@@ -1446,7 +1446,7 @@ __private_extern__ void *
 mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg,
     size_t size)
 {
-       u_int64_t *buf_end = (u_int64_t *)((char *)buf_arg + size);
+       u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size));
        u_int64_t *buf;
 
        VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t)));
index 1eb975ed27c003794cfa84bf603a0c8a0959d6be..dd238f066a42f0702438517b7f9367b5c483c834 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2001-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
index e5573a99f3f52182dac93c7b62ce3762cd45299d..09c7928335bd2c7119f207e1f70623b7151ab1e6 100644 (file)
@@ -229,6 +229,7 @@ static struct mac_policy_ops policy_ops = {
        CHECK_SET_INT_HOOK(proc_check_getaudit)
        CHECK_SET_INT_HOOK(proc_check_getauid)
        CHECK_SET_INT_HOOK(proc_check_getlcid)
+       CHECK_SET_INT_HOOK(proc_check_ledger)
        CHECK_SET_INT_HOOK(proc_check_map_anon)
        CHECK_SET_INT_HOOK(proc_check_mprotect)
        CHECK_SET_INT_HOOK(proc_check_sched)
@@ -271,6 +272,7 @@ static struct mac_policy_ops policy_ops = {
        CHECK_SET_INT_HOOK(system_check_swapoff)
        CHECK_SET_INT_HOOK(system_check_swapon)
        CHECK_SET_INT_HOOK(system_check_sysctl)
+       CHECK_SET_INT_HOOK(system_check_kas_info)
        CHECK_SET_INT_HOOK(sysvmsq_check_enqueue)
        CHECK_SET_INT_HOOK(sysvmsq_check_msgrcv)
        CHECK_SET_INT_HOOK(sysvmsq_check_msgrmid)
@@ -447,7 +449,7 @@ static struct mac_policy_ops policy_ops = {
        CHECK_SET_VOID_HOOK(task_label_update)
        CHECK_SET_VOID_HOOK(vnode_label_associate_devfs)
        CHECK_SET_VOID_HOOK(vnode_label_associate_file)
-       CHECK_SET_VOID_HOOK(vnode_label_associate_pipe)
+       CHECK_SET_VOID_HOOK(thread_userret)
        CHECK_SET_VOID_HOOK(vnode_label_associate_posixsem)
        CHECK_SET_VOID_HOOK(vnode_label_associate_posixshm)
        CHECK_SET_VOID_HOOK(vnode_label_associate_singlelabel)
@@ -458,11 +460,8 @@ static struct mac_policy_ops policy_ops = {
        CHECK_SET_VOID_HOOK(vnode_label_recycle)
        CHECK_SET_VOID_HOOK(vnode_label_update)
        CHECK_SET_VOID_HOOK(vnode_notify_rename)
-       .mpo_reserved12 = common_void_hook,
-       .mpo_reserved14 = common_void_hook,
-       .mpo_reserved15 = common_void_hook,
-       .mpo_reserved16 = common_void_hook,
-       .mpo_reserved17 = common_void_hook,
+       CHECK_SET_VOID_HOOK(thread_label_init)
+       CHECK_SET_VOID_HOOK(thread_label_destroy)
        .mpo_reserved18 = common_void_hook,
        .mpo_reserved19 = common_void_hook,
        .mpo_reserved20 = common_void_hook,
index 617d1dc9f7672ac134bcbfcac29ed6aeac10e6f9..d46eb0b3c1268ed385b2841d55824213b98190eb 100644 (file)
@@ -499,8 +499,8 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
                        pinfo->pshm_mode = cmode;
                        pinfo->pshm_uid = kauth_getuid();
                        pinfo->pshm_gid = kauth_getgid();
-                       bcopy(pnbuf, &pinfo->pshm_name[0], PSHMNAMLEN);
-                       pinfo->pshm_name[PSHMNAMLEN]=0;
+                       bcopy(pnbuf, &pinfo->pshm_name[0], pathlen);
+                       pinfo->pshm_name[pathlen]=0;
 #if CONFIG_MACF
                        error = mac_posixshm_check_create(kauth_cred_get(), nameptr);
                        if (error) {
@@ -530,7 +530,7 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
                        AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid,
                                        pinfo->pshm_gid, pinfo->pshm_mode);
 #if CONFIG_MACF        
-                       if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo))) {
+                       if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo, fmode))) {
                                goto bad;
                        }
 #endif
@@ -550,7 +550,7 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
                        goto bad;
                }       
 #if CONFIG_MACF        
-               if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo))) {
+               if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo, fmode))) {
                        goto bad;
                }
 #endif
@@ -829,10 +829,10 @@ pshm_access(struct pshminfo *pinfo, int mode, kauth_cred_t cred, __unused proc_t
 int
 pshm_mmap(__unused proc_t p, struct mmap_args *uap, user_addr_t *retval, struct fileproc *fp, off_t pageoff) 
 {
-       mach_vm_offset_t        user_addr = (mach_vm_offset_t)uap->addr;
-       mach_vm_size_t          user_size = (mach_vm_size_t)uap->len ;
-       mach_vm_offset_t        user_start_addr;
-       mach_vm_size_t          map_size, mapped_size;
+       vm_map_offset_t user_addr = (vm_map_offset_t)uap->addr;
+       vm_map_size_t   user_size = (vm_map_size_t)uap->len ;
+       vm_map_offset_t user_start_addr;
+       vm_map_size_t   map_size, mapped_size;
        int prot = uap->prot;
        int flags = uap->flags;
        vm_object_offset_t file_pos = (vm_object_offset_t)uap->pos;
@@ -898,9 +898,9 @@ pshm_mmap(__unused proc_t p, struct mmap_args *uap, user_addr_t *retval, struct
 
        if ((flags & MAP_FIXED) == 0) {
                alloc_flags = VM_FLAGS_ANYWHERE;
-               user_addr = mach_vm_round_page(user_addr); 
+               user_addr = vm_map_round_page(user_addr); 
        } else {
-               if (user_addr != mach_vm_trunc_page(user_addr))
+               if (user_addr != vm_map_round_page(user_addr))
                        return (EINVAL);
                /*
                 * We do not get rid of the existing mappings here because
@@ -1099,15 +1099,23 @@ shm_unlink(__unused proc_t p, struct shm_unlink_args *uap,
        AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, pinfo->pshm_gid,
                  pinfo->pshm_mode);
 
-       /*
-        * JMM - How should permissions be checked?
+       /* 
+        * following file semantics, unlink should be allowed 
+        * for users with write permission only. 
         */
+       if ( (error = pshm_access(pinfo, FWRITE, kauth_cred_get(), p)) ) {
+               PSHM_SUBSYS_UNLOCK();
+               goto bad;
+       }
 
        pinfo->pshm_flags |= PSHM_INDELETE;
        pshm_cache_delete(pcache);
        pinfo->pshm_flags |= PSHM_REMOVED;
        /* release the existence reference */
        if (!--pinfo->pshm_usecount) {
+#if CONFIG_MACF
+               mac_posixshm_label_destroy(pinfo);
+#endif
                PSHM_SUBSYS_UNLOCK();
                /*
                 * If this is the last reference going away on the object,
index a907fad59ca37a3d0c34d3d8e42cb41033267e81..482f83e0e529f47becd3897cd8a9ad6f3ec91ce7 100644 (file)
@@ -48,6 +48,7 @@
 #include <sys/sysctl.h>
 #include <sys/user.h>
 #include <sys/aio_kern.h>
+#include <sys/kern_memorystatus.h>
 
 #include <security/audit/audit.h>
 
@@ -96,6 +97,8 @@ int proc_pidfdinfo(int pid, int flavor,int fd, user_addr_t buffer, uint32_t buff
 int proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, int32_t * retval);
 int proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval);
 int proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
+int proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t * retval);
+int proc_terminate(int pid, int32_t * retval);
 
 /* protos for procpidinfo calls */
 int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
@@ -103,7 +106,7 @@ int proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd, int zombie);
 int proc_pidshortbsdinfo(proc_t p, struct proc_bsdshortinfo *pbsd_shortp, int zombie);
 int proc_pidtaskinfo(proc_t p, struct proc_taskinfo *ptinfo);
 int proc_pidallinfo(proc_t p, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
-int proc_pidthreadinfo(proc_t p, uint64_t arg,  struct proc_threadinfo *pthinfo);
+int proc_pidthreadinfo(proc_t p, uint64_t arg,  int thuniqueid, struct proc_threadinfo *pthinfo);
 int proc_pidthreadpathinfo(proc_t p, uint64_t arg,  struct proc_threadwithpathinfo *pinfo);
 int proc_pidlistthreads(proc_t p,  user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 int proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
@@ -127,11 +130,16 @@ int pid_atalkinfo(struct atalk  * at, struct fileproc * fp,  int closeonexec, us
 
 /* protos for misc */
 
+void proc_dirty_start(struct proc *p);
+void proc_dirty_end(struct proc *p);
+
 int fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo);
 void  fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * finfo);
 static int proc_security_policy(proc_t p);
 static void munge_vinfo_stat(struct stat64 *sbp, struct vinfo_stat *vsbp);
 
+extern int cansignal(struct proc *, kauth_cred_t, struct proc *, int, int);
+
 uint64_t get_dispatchqueue_offset_from_proc(void *p)
 {
        if(p != NULL) {
@@ -169,7 +177,10 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b
                        return(proc_setcontrol(pid, flavor, arg, buffer, buffersize, retval));
                case 6: /* proc_pidfileportinfo */
                        return(proc_pidfileportinfo(pid, flavor, (mach_port_name_t)arg, buffer, buffersize, retval));
-
+               case 7: /* proc_terminate */
+                       return(proc_terminate(pid, retval));
+               case 8: /* proc_dirtycontrol */
+                       return(proc_dirtycontrol(pid, flavor, arg, retval));
                default:
                                return(EINVAL);
        }
@@ -525,6 +536,10 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie)
                        pbsd->pbi_flags |= PROC_FLAG_CTTY;
        }
 
+#if !CONFIG_EMBEDDED
+       if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) 
+               pbsd->pbi_flags |= PROC_FLAG_DELAYIDLESLEEP;
+#endif /* !CONFIG_EMBEDDED */
 
        switch(PROC_CONTROL_STATE(p)) {
                case P_PCTHROTTLE:
@@ -553,6 +568,8 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie)
 
        if (zombie == 0)
                pbsd->pbi_nfiles = p->p_fd->fd_nfiles;
+       
+       pbsd->e_tdev = NODEV;
        if (pg != PGRP_NULL) {
                pbsd->pbi_pgid = p->p_pgrpid;
                pbsd->pbi_pjobc = pg->pg_jobc;
@@ -600,6 +617,10 @@ proc_pidshortbsdinfo(proc_t p, struct proc_bsdshortinfo * pbsd_shortp, int zombi
                pbsd_shortp->pbsi_flags |= PROC_FLAG_PSUGID;
        if ((p->p_flag & P_EXEC) == P_EXEC) 
                pbsd_shortp->pbsi_flags |= PROC_FLAG_EXEC;
+#if !CONFIG_EMBEDDED
+       if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) 
+               pbsd_shortp->pbsi_flags |= PROC_FLAG_DELAYIDLESLEEP;
+#endif /* !CONFIG_EMBEDDED */
 
        switch(PROC_CONTROL_STATE(p)) {
                case P_PCTHROTTLE:
@@ -652,14 +673,14 @@ proc_pidtaskinfo(proc_t p, struct proc_taskinfo * ptinfo)
 
 
 int 
-proc_pidthreadinfo(proc_t p, uint64_t arg,  struct proc_threadinfo *pthinfo)
+proc_pidthreadinfo(proc_t p, uint64_t arg,  int thuniqueid, struct proc_threadinfo *pthinfo)
 {
        int error = 0;
        uint64_t threadaddr = (uint64_t)arg;
 
        bzero(pthinfo, sizeof(struct proc_threadinfo));
 
-       error = fill_taskthreadinfo(p->task, threadaddr, (struct proc_threadinfo_internal *)pthinfo, NULL, NULL);
+       error = fill_taskthreadinfo(p->task, threadaddr, thuniqueid, (struct proc_threadinfo_internal *)pthinfo, NULL, NULL);
        if (error)
                return(ESRCH);
        else
@@ -704,7 +725,7 @@ proc_pidthreadpathinfo(proc_t p, uint64_t arg,  struct proc_threadwithpathinfo *
 
        bzero(pinfo, sizeof(struct proc_threadwithpathinfo));
 
-       error = fill_taskthreadinfo(p->task, threadaddr, (struct proc_threadinfo_internal *)&pinfo->pt, (void *)&vp, &vid);
+       error = fill_taskthreadinfo(p->task, threadaddr, 0, (struct proc_threadinfo_internal *)&pinfo->pt, (void *)&vp, &vid);
        if (error)
                return(ESRCH);
 
@@ -937,6 +958,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
        int refheld = 0, shortversion = 0;
        uint32_t size;
        int zombie = 0;
+       int thuniqueid = 0;
 
        switch (flavor) {
                case PROC_PIDLISTFDS:
@@ -989,6 +1011,9 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
                        if (buffer == (user_addr_t)0)
                                size = 0;
                        break;
+               case PROC_PIDTHREADID64INFO:
+                       size = PROC_PIDTHREADID64INFO_SIZE;
+                       break;
                default:
                        return(EINVAL);
        }
@@ -1099,10 +1124,12 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
                }
                break;
 
+               case PROC_PIDTHREADID64INFO:
+                       thuniqueid = 1;
                case PROC_PIDTHREADINFO:{
                struct proc_threadinfo pthinfo;
 
-                       error  = proc_pidthreadinfo(p,  arg, &pthinfo);
+                       error  = proc_pidthreadinfo(p,  arg, thuniqueid, &pthinfo);
                        if (error == 0) {
                                error = copyout(&pthinfo, buffer, sizeof(struct proc_threadinfo));
                                if (error == 0)
@@ -1756,6 +1783,15 @@ proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t
                }
                break;
 
+               case PROC_SELFSET_DELAYIDLESLEEP: {
+                       /* mark or clear the process property to delay idle sleep disk IO */
+                       if (pcontrol != 0)
+                               OSBitOrAtomic(P_DELAYIDLESLEEP, &pself->p_flag);
+                       else
+                               OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &pself->p_flag);
+               }
+               break;
+
                default:
                        error = ENOTSUP;
        }
@@ -1764,6 +1800,262 @@ out:
        return(error);
 }
 
+void
+proc_dirty_start(struct proc *p)
+{
+       proc_lock(p);
+       while (p->p_dirty & P_DIRTY_BUSY) {
+               msleep(&p->p_dirty, &p->p_mlock, 0, "proc_dirty_start", NULL);
+       }
+       p->p_dirty |= P_DIRTY_BUSY;
+       proc_unlock(p);
+}
+
+void
+proc_dirty_end(struct proc *p)
+{
+       proc_lock(p);
+       if (p->p_dirty & P_DIRTY_BUSY) {
+               p->p_dirty &= ~P_DIRTY_BUSY;
+               wakeup(&p->p_dirty);
+       }
+       proc_unlock(p);
+}
+
+static boolean_t
+proc_validate_track_flags(uint32_t pcontrol, struct proc *target_p) {
+       /* Check idle exit isn't specified independently */
+       if ((pcontrol & PROC_DIRTY_TRACK_MASK) == PROC_DIRTY_ALLOW_IDLE_EXIT) {
+               return false;           
+       }
+       
+       /* See that the process isn't marked for termination */
+       if (target_p->p_dirty & P_DIRTY_TERMINATED) {
+               return false;           
+       }
+       
+       return true;
+}
+
+int
+proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t *retval) {
+       struct proc *target_p;
+       int error = 0;
+       uint32_t pcontrol = (uint32_t)arg;
+       kauth_cred_t my_cred, target_cred;
+       boolean_t self = FALSE;
+       boolean_t child = FALSE;
+       pid_t selfpid;
+
+       target_p = proc_find(pid);
+       if (target_p == PROC_NULL) {
+               return(ESRCH);
+       }
+       
+       my_cred = kauth_cred_get();
+       target_cred = kauth_cred_proc_ref(target_p);
+       
+       selfpid = proc_selfpid();
+       if (pid == selfpid) {
+               self = TRUE;
+       } else if (target_p->p_ppid == selfpid) {
+               child = TRUE;
+       }
+       
+       switch (flavor) {
+               case PROC_DIRTYCONTROL_TRACK: {
+                       /* Only allow the process itself, its parent, or root */
+                       if ((self == FALSE) && (child == FALSE) && kauth_cred_issuser(kauth_cred_get()) != TRUE) {
+                               error = EPERM;
+                               goto out;
+                       }
+                       
+                       proc_dirty_start(target_p);     
+
+                       if (proc_validate_track_flags(pcontrol, target_p)) {
+                               /* Cumulative, as per <rdar://problem/11159924> */
+                               target_p->p_dirty |= 
+                                       ((pcontrol & PROC_DIRTY_TRACK) ? P_DIRTY_TRACK : 0) |
+                                       ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) ? P_DIRTY_ALLOW_IDLE_EXIT : 0);        
+#if CONFIG_MEMORYSTATUS
+                               if ((target_p->p_dirty & P_DIRTY_CAN_IDLE_EXIT) == P_DIRTY_CAN_IDLE_EXIT) {
+                                       memorystatus_on_track_dirty(pid, TRUE);
+                               }
+#endif
+                       } else {
+                               error = EINVAL;
+                       }
+                       
+                       proc_dirty_end(target_p);
+               }
+               break;
+
+               case PROC_DIRTYCONTROL_SET: {
+                       boolean_t kill = false;
+                       
+                       /* Check privileges; use cansignal() here since the process could be terminated */
+                       if (!cansignal(current_proc(), my_cred, target_p, SIGKILL, 0)) {
+                               error = EPERM;
+                               goto out;
+                       }
+                       
+                       proc_dirty_start(target_p);
+                       
+                       if (!(target_p->p_dirty & P_DIRTY_TRACK)) {
+                               /* Dirty tracking not enabled */
+                               error = EINVAL;                 
+                       } else if (pcontrol && (target_p->p_dirty & P_DIRTY_TERMINATED)) {
+                               /* 
+                                * Process is set to be terminated and we're attempting to mark it dirty.
+                                * Set for termination and marking as clean is OK - see <rdar://problem/10594349>.
+                                */
+                               error = EBUSY;          
+                       } else {
+                               int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN;
+                       if (pcontrol && !(target_p->p_dirty & flag)) {
+                                       target_p->p_dirty |= flag;
+                               } else if ((pcontrol == 0) && (target_p->p_dirty & flag)) {
+                                       if ((flag == P_DIRTY_SHUTDOWN) && (!target_p->p_dirty & P_DIRTY)) {
+                                               /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */
+                                               target_p->p_dirty |= P_DIRTY_TERMINATED;
+                                               kill = true;
+                                       } else if ((flag == P_DIRTY) && (target_p->p_dirty & P_DIRTY_TERMINATED)) {
+                                               /* Kill previously terminated processes if set clean */
+                                               kill = true;                                            
+                                       }
+                                       target_p->p_dirty &= ~flag;
+                               } else {
+                                       /* Already set */
+                                       error = EALREADY;
+                               }
+                       }
+#if CONFIG_MEMORYSTATUS
+                       if ((error == 0) && ((target_p->p_dirty & P_DIRTY_CAN_IDLE_EXIT) == P_DIRTY_CAN_IDLE_EXIT)) {
+                               memorystatus_on_dirty(pid, pcontrol ? TRUE : FALSE);
+                       }
+#endif
+                       proc_dirty_end(target_p);
+
+                       if ((error == 0) && (kill == true)) {
+                               psignal(target_p, SIGKILL);
+                       }
+               }
+               break;
+               
+               case PROC_DIRTYCONTROL_GET: {
+                       /* No permissions check - dirty state is freely available */
+                       if (retval) {
+                               proc_dirty_start(target_p);
+                               
+                               *retval = 0;
+                               if (target_p->p_dirty & P_DIRTY_TRACK) {
+                                       *retval |= PROC_DIRTY_TRACKED;
+                                       if (target_p->p_dirty & P_DIRTY_ALLOW_IDLE_EXIT) {
+                                               *retval |= PROC_DIRTY_ALLOWS_IDLE_EXIT;
+                                       }
+                                       if (target_p->p_dirty & P_DIRTY) {
+                                               *retval |= PROC_DIRTY_IS_DIRTY;
+                                       }
+                               }
+                               
+                               proc_dirty_end(target_p);
+                       } else {
+                               error = EINVAL;
+                       }
+               }
+               break;
+       }
+
+out:
+       proc_rele(target_p);
+    kauth_cred_unref(&target_cred);
+       
+       return(error);  
+}
+
+/*
+ * proc_terminate() provides support for sudden termination.
+ * SIGKILL is issued to tracked, clean processes; otherwise,
+ * SIGTERM is sent.
+ */
+
+int
+proc_terminate(int pid, int32_t *retval)
+{
+       int error = 0;
+       proc_t p;
+       kauth_cred_t uc = kauth_cred_get();
+       int sig;
+
+#if 0
+       /* XXX: Check if these are necessary */
+       AUDIT_ARG(pid, pid);
+       AUDIT_ARG(signum, sig);
+#endif
+
+       if (pid <= 0 || retval == NULL) {
+               return (EINVAL);
+       }
+
+       if ((p = proc_find(pid)) == NULL) {
+               return (ESRCH);
+       }
+
+#if 0
+       /* XXX: Check if these are necessary */
+       AUDIT_ARG(process, p);
+#endif
+
+       /* Check privileges; if SIGKILL can be issued, then SIGTERM is also OK */
+       if (!cansignal(current_proc(), uc, p, SIGKILL, 0)) {
+               error = EPERM;
+               goto out;
+       }
+       
+       proc_dirty_start(p);
+       
+       p->p_dirty |= P_DIRTY_TERMINATED;
+       
+       if ((p->p_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) {
+               /* Clean; mark as terminated and issue SIGKILL */
+               sig = SIGKILL;
+       } else {
+               /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */
+               sig = SIGTERM;
+       }
+
+       proc_dirty_end(p);
+
+       proc_removethrottle(p);
+
+       psignal(p, sig);
+       *retval = sig;
+
+out:
+       proc_rele(p);
+       
+       return error;
+}
+
+void
+proc_removethrottle(proc_t p)
+
+{
+       /* remove throttled states in all threads; process is going to terminate soon */
+       proc_lock(p);
+
+       /* if already marked marked for proc_termiantion.. */
+       if ((p->p_lflag & P_LPTERMINATE) != 0) {
+               proc_unlock(p);
+               return;
+       }
+       p->p_lflag |= P_LPTERMINATE;
+       proc_unlock(p);
+
+       (void)proc_task_remove_throttle(p->task);
+
+}
+
 
 /*
  * copy stat64 structure into vinfo_stat structure.
index e6596dad440131e74e253219d1386d0cd5114a72..ff919538fb4c1918dfff316766606b5a36ab2c66 100644 (file)
 
 #include <kern/ipc_misc.h>
 #include <vm/vm_protos.h>
+#if CONFIG_EMBEDDED
+#include <security/mac.h>
+#include <sys/kern_memorystatus.h>
+#endif /* CONFIG_EMBEDDED */
 
 static int handle_background(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
 static int handle_hwaccess(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
@@ -79,6 +83,11 @@ static int handle_apptype(int scope, int action, int policy, int policy_subtype,
 extern kern_return_t task_suspend(task_t);
 extern kern_return_t task_resume(task_t);
 
+#if CONFIG_EMBEDDED
+static int handle_applifecycle(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
+#endif /* CONFIG_EMBEDDED */
+
+
 /***************************** process_policy ********************/
 
 /*
@@ -91,7 +100,7 @@ extern kern_return_t task_resume(task_t);
 
 /* system call implementaion */
 int
-process_policy(struct proc *p, struct process_policy_args * uap, __unused int32_t *retval)
+process_policy(__unused struct proc *p, struct process_policy_args * uap, __unused int32_t *retval)
 {
        int error = 0;
        int scope = uap->scope;
@@ -101,7 +110,7 @@ process_policy(struct proc *p, struct process_policy_args * uap, __unused int32_
        user_addr_t attrp = uap->attrp;
        pid_t target_pid = uap->target_pid;
        uint64_t target_threadid = uap->target_threadid;
-       proc_t proc = PROC_NULL;
+       proc_t target_proc = PROC_NULL;
        proc_t curp = current_proc();
        kauth_cred_t my_cred;
 #if CONFIG_EMBEDDED
@@ -111,17 +120,17 @@ process_policy(struct proc *p, struct process_policy_args * uap, __unused int32_
        if ((scope != PROC_POLICY_SCOPE_PROCESS) && (scope != PROC_POLICY_SCOPE_THREAD)) {
                return(EINVAL);
        }
-       proc = proc_find(target_pid);
-       if (proc == PROC_NULL)  {
-               return(EINVAL);
+       target_proc = proc_find(target_pid);
+       if (target_proc == PROC_NULL)  {
+               return(ESRCH);
        }
 
-       my_cred = kauth_cred_proc_ref(curp);
+       my_cred = kauth_cred_get();
 
 #if CONFIG_EMBEDDED
-       target_cred = kauth_cred_proc_ref(proc);
+       target_cred = kauth_cred_proc_ref(target_proc);
 
-       if (suser(my_cred, NULL) && kauth_cred_getruid(my_cred) &&
+       if (!kauth_cred_issuser(my_cred) && kauth_cred_getruid(my_cred) &&
            kauth_cred_getuid(my_cred) != kauth_cred_getuid(target_cred) &&
            kauth_cred_getruid(my_cred) != kauth_cred_getuid(target_cred))
 #else
@@ -131,7 +140,7 @@ process_policy(struct proc *p, struct process_policy_args * uap, __unused int32_
         */
        if ((policy != PROC_POLICY_RESOURCE_STARVATION) && 
                (policy != PROC_POLICY_APPTYPE) && 
-               (suser(my_cred, NULL) && curp != p))
+               (!kauth_cred_issuser(my_cred) && curp != p))
 #endif
        {
                error = EPERM;
@@ -139,27 +148,39 @@ process_policy(struct proc *p, struct process_policy_args * uap, __unused int32_
        }
 
 #if CONFIG_MACF
-       error = mac_proc_check_sched(curp, p);
-       if (error) 
-               goto out;
-#endif
+#if CONFIG_EMBEDDED
+       /* Lifecycle management will invoke approp macf checks */
+       if (policy != PROC_POLICY_APP_LIFECYCLE) {
+#endif /* CONFIG_EMBEDDED */
+               error = mac_proc_check_sched(curp, target_proc);
+               if (error) 
+                       goto out;
+#if CONFIG_EMBEDDED
+       }
+#endif /* CONFIG_EMBEDDED */
+#endif /* CONFIG_MACF */
 
 
        switch(policy) {
                case PROC_POLICY_BACKGROUND:
-                       error = handle_background(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+                       error = handle_background(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid);
                        break;
                case PROC_POLICY_HARDWARE_ACCESS:
-                       error = handle_hwaccess(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+                       error = handle_hwaccess(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid);
                        break;
                case PROC_POLICY_RESOURCE_STARVATION:
-                       error = handle_lowresrouce(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+                       error = handle_lowresrouce(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid);
                        break;
                case PROC_POLICY_RESOURCE_USAGE:
-                       error = handle_resourceuse(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+                       error = handle_resourceuse(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid);
+                       break;
+#if CONFIG_EMBEDDED
+               case PROC_POLICY_APP_LIFECYCLE:
+                       error = handle_applifecycle(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid);
                        break;
+#endif /* CONFIG_EMBEDDED */
                case PROC_POLICY_APPTYPE:
-                       error = handle_apptype(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+                       error = handle_apptype(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid);
                        break;
                default:
                        error = EINVAL;
@@ -167,8 +188,7 @@ process_policy(struct proc *p, struct process_policy_args * uap, __unused int32_
        }
 
 out:
-       proc_rele(proc);
-        kauth_cred_unref(&my_cred);
+       proc_rele(target_proc);
 #if CONFIG_EMBEDDED
         kauth_cred_unref(&target_cred);
 #endif
@@ -355,6 +375,12 @@ handle_resourceuse(__unused int scope, __unused int action, __unused int policy,
                                        cpuattr.ppattr_cpu_attr_interval, 
                                        cpuattr.ppattr_cpu_attr_deadline); 
                        }
+                       break;
+
+               case PROC_POLICY_ACTION_RESTORE:
+                       error = proc_clear_task_ruse_cpu(proc->task);
+                       break;
+
                default:
                        error = EINVAL;
                        break;
@@ -364,13 +390,123 @@ handle_resourceuse(__unused int scope, __unused int action, __unused int policy,
        return(error);
 }
 
+#if CONFIG_EMBEDDED
+static int 
+handle_applifecycle(__unused int scope, int action, __unused int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid)
+{
+
+       int error = 0;
+       int state = 0, oldstate = 0; 
+       int noteval = 0;
+
+       
+
+       switch(policy_subtype) {
+               case PROC_POLICY_APPLIFE_NONE:
+                       error = 0;
+                       break;
+
+               case PROC_POLICY_APPLIFE_STATE:
+#if CONFIG_MACF
+                       error = mac_proc_check_sched(current_proc(), proc);
+                       if (error) 
+                               goto out;
+#endif
+                       switch (action) {
+                               case PROC_POLICY_ACTION_GET :
+                                       state = proc_lf_getappstate(proc->task);
+                                       error = copyout((int *)&state, (user_addr_t)attrp, sizeof(int));
+                                       break;
+                               case PROC_POLICY_ACTION_APPLY :
+                               case PROC_POLICY_ACTION_SET :
+                                       error = copyin((user_addr_t)attrp, (int  *)&state, sizeof(int));
+                                       if ((error == 0) && (state != TASK_APPSTATE_NONE)) {
+                                               oldstate = proc_lf_getappstate(proc->task);
+                                               error = proc_lf_setappstate(proc->task, state);
+                                               if (error == 0) {
+                                                       switch (state) {
+                                                               case TASK_APPSTATE_ACTIVE:
+                                                                       noteval = NOTE_APPACTIVE;
+                                                                       break;
+                                                               case TASK_APPSTATE_BACKGROUND:
+                                                                       noteval = NOTE_APPBACKGROUND;
+                                                                       break;
+                                                               case TASK_APPSTATE_NONUI:
+                                                                       noteval = NOTE_APPNONUI;
+                                                                       break;
+                                                               case TASK_APPSTATE_INACTIVE:
+                                                                       noteval = NOTE_APPINACTIVE;
+                                                                       break;
+                                                       }
+                                       
+                                                       proc_lock(proc);        
+                                                       proc_knote(proc, noteval);
+                                                       proc_unlock(proc);      
+                                               }
+                                       }
+                                       break;
+
+                               default:
+                                       error = EINVAL;
+                                       break;
+                       }
+                       break;
+
+               case PROC_POLICY_APPLIFE_DEVSTATUS:
+#if CONFIG_MACF
+                       /* ToDo - this should be a generic check, since we could potentially hang other behaviours here. */
+                       error = mac_proc_check_suspend_resume(current_proc(), MAC_PROC_CHECK_HIBERNATE);
+                       if (error) {
+                               error = EPERM;
+                               goto out;
+                       }
+#endif
+                       if (action == PROC_POLICY_ACTION_APPLY) {
+                               /* Used as a freeze hint */
+                               memorystatus_on_inactivity(-1);
+                               
+                               /* in future use devicestatus for pid_socketshutdown() */
+                               error = 0;
+                        } else {
+                               error = EINVAL;
+                       }
+                       break;
+
+               case PROC_POLICY_APPLIFE_PIDBIND:
+#if CONFIG_MACF
+                       error = mac_proc_check_suspend_resume(current_proc(), MAC_PROC_CHECK_PIDBIND);
+                       if (error) {
+                               error = EPERM;
+                               goto out;
+                       }
+#endif
+                       error = copyin((user_addr_t)attrp, (int  *)&state, sizeof(int));
+                       if (error != 0)
+                               goto out;
+                       if (action == PROC_POLICY_ACTION_APPLY) {
+                               /* bind the thread in target_thread in current process to target_proc */
+                               error = proc_lf_pidbind(current_task(), target_threadid, proc->task, state);
+                        } else
+                               error = EINVAL;
+                       break;
+               default:
+                       error = EINVAL;
+                       break;  
+       }
+
+out:
+       return(error);
+}
+#endif /* CONFIG_EMBEDDED */
+
 
 static int 
-handle_apptype(__unused int scope, int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, proc_t proc, __unused uint64_t target_threadid)
+handle_apptype(__unused int scope, int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, proc_t target_proc, __unused uint64_t target_threadid)
 {
        int error = 0;
 
        switch(policy_subtype) {
+#if !CONFIG_EMBEDDED
                case PROC_POLICY_OSX_APPTYPE_TAL:
                        /* need to be super user to do this */
                        if (kauth_cred_issuser(kauth_cred_get()) == 0) {
@@ -381,9 +517,14 @@ handle_apptype(__unused int scope, int action, __unused int policy, int policy_s
                case PROC_POLICY_OSX_APPTYPE_DASHCLIENT:
                        /* no special priv needed */
                        break;
+#endif /* !CONFIG_EMBEDDED */
                case PROC_POLICY_OSX_APPTYPE_NONE:
+#if CONFIG_EMBEDDED
+               case PROC_POLICY_IOS_RESV1_APPTYPE:
+               case PROC_POLICY_IOS_APPLE_DAEMON:
                case PROC_POLICY_IOS_APPTYPE:
                case PROC_POLICY_IOS_NONUITYPE:
+#endif /* CONFIG_EMBEDDED */
                        return(ENOTSUP);
                        break;
                default:
@@ -393,21 +534,24 @@ handle_apptype(__unused int scope, int action, __unused int policy, int policy_s
        switch (action) {
                case PROC_POLICY_ACTION_ENABLE:
                        /* reapply the app foreground/background policy */
-                       error = proc_enable_task_apptype(proc->task, policy_subtype);
+                       error = proc_enable_task_apptype(target_proc->task, policy_subtype);
                        break;
                case PROC_POLICY_ACTION_DISABLE: 
                        /* remove the app foreground/background policy */
-                       error = proc_disable_task_apptype(proc->task, policy_subtype);
+                       error = proc_disable_task_apptype(target_proc->task, policy_subtype);
                        break;
                default:
                        error = EINVAL;
                        break;
        }
                                
+#if !CONFIG_EMBEDDED
 out:
+#endif /* !CONFIG_EMBEDDED */
        return(error);
 }
 
+
 int
 proc_apply_resource_actions(void * bsdinfo, int type, int action)
 {
@@ -426,11 +570,15 @@ proc_apply_resource_actions(void * bsdinfo, int type, int action)
                        psignal(p, SIGKILL);
                        break;
 
-               case PROC_POLICY_RSRCACT_NOTIFY:
+               case PROC_POLICY_RSRCACT_NOTIFY_KQ:
                        proc_lock(p);
                        proc_knote(p, NOTE_RESOURCEEND | (type & 0xff));
                        proc_unlock(p);
                        break;
+               
+               case PROC_POLICY_RSRCACT_NOTIFY_EXC:
+                       panic("shouldn't be applying exception notification to process!");
+                       break;
        }
 
        return(0);
@@ -445,7 +593,8 @@ proc_restore_resource_actions(void * bsdinfo, __unused int type, int action)
        switch(action) {
                case PROC_POLICY_RSRCACT_THROTTLE:
                case PROC_POLICY_RSRCACT_TERMINATE:
-               case PROC_POLICY_RSRCACT_NOTIFY:
+               case PROC_POLICY_RSRCACT_NOTIFY_KQ:
+               case PROC_POLICY_RSRCACT_NOTIFY_EXC:
                        /* no need to do anything */
                        break;
 
index bcb0b0997cce9e43d42a541d0f6a911abdc0756a..6e96434e1cacc41bcfb71a10f1b930dc29baa15a 100644 (file)
@@ -87,7 +87,9 @@
 #include <pexpert/pexpert.h>
 
 #define __PSYNCH_DEBUG__ 0                     /* debug panic actions  */
+#if (KDEBUG && STANDARD_KDEBUG)
 #define _PSYNCH_TRACE_ 1               /* kdebug trace */
+#endif
 
 #define __TESTMODE__ 2         /* 0 - return error on user error conditions */
                                /* 1 - log error on user error conditions */
@@ -1739,8 +1741,13 @@ out:
  *  psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
  */
 int
+#ifdef NOTYET
+psynch_rw_longrdlock(__unused proc_t p, struct psynch_rw_longrdlock_args * uap,  __unused uint32_t * retval)
+#else /* NOTYET */
 psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_args * uap,  __unused uint32_t * retval)
+#endif /* NOTYET */
 {
+#ifdef NOTYET
        user_addr_t rwlock  = uap->rwlock;
        uint32_t lgen = uap->lgenval;
        uint32_t ugen = uap->ugenval;
@@ -1875,8 +1882,12 @@ out:
        __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, returnbits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
        return(error);
+#else /* NOTYET */
+       return(ESRCH);
+#endif /* NOTYET */
 }
 
+
 /*
  *  psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
  */
@@ -2029,8 +2040,13 @@ out1:
  *  psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
  */
 int
+#ifdef NOTYET
 psynch_rw_yieldwrlock(__unused proc_t p, __unused struct  psynch_rw_yieldwrlock_args * uap, __unused uint32_t * retval)
+#else /* NOTYET */
+psynch_rw_yieldwrlock(__unused proc_t p, __unused struct  __unused psynch_rw_yieldwrlock_args * uap, __unused uint32_t * retval)
+#endif /* NOTYET */
 {
+#ifdef NOTYET
        user_addr_t rwlock  = uap->rwlock;
        uint32_t lgen = uap->lgenval;
        uint32_t ugen = uap->ugenval;
@@ -2166,6 +2182,9 @@ out:
        __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
        return(error);
+#else /* NOTYET */
+       return(ESRCH);
+#endif /* NOTYET */
 }
 
 #if NOTYET
@@ -2657,12 +2676,13 @@ pth_proc_hashdelete(proc_t p)
                pthread_debug_proc = PROC_NULL;
 #endif /* _PSYNCH_TRACE_ */
        hashptr = p->p_pthhash;
+       p->p_pthhash = NULL;
        if (hashptr == NULL)
                return;
 
+       pthread_list_lock();
        for(i= 0; i < hashsize; i++) {
                while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
-                       pthread_list_lock();
                        if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
                                kwq->kw_pflags &= ~KSYN_WQ_INHASH;
                                LIST_REMOVE(kwq, kw_hash);
@@ -2679,10 +2699,11 @@ pth_proc_hashdelete(proc_t p)
                                ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
                        lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
                        zfree(kwq_zone, kwq);
+                       pthread_list_lock();
                }
        }
-       FREE(p->p_pthhash, M_PROC);
-       p->p_pthhash = NULL;
+       pthread_list_unlock();
+       FREE(hashptr, M_PROC);
 }
 
 /* no lock held for this as the waitqueue is getting freed */
@@ -3066,8 +3087,8 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_ele
 #endif
 {
        kern_return_t kret;
-       int error = 0;
 #if _PSYNCH_TRACE_
+       int error = 0;
        uthread_t uth = NULL;
 #endif /* _PSYNCH_TRACE_ */
 
@@ -4161,7 +4182,7 @@ update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
 uint32_t 
 find_nextlowseq(ksyn_wait_queue_t kwq)
 {
-       uint32_t numbers[4];
+       uint32_t numbers[KSYN_QUEUE_MAX];
        int count = 0, i;
        uint32_t lowest;
 
@@ -4188,7 +4209,7 @@ find_nextlowseq(ksyn_wait_queue_t kwq)
 uint32_t
 find_nexthighseq(ksyn_wait_queue_t kwq)
 {
-       uint32_t numbers[4];
+       uint32_t numbers[KSYN_QUEUE_MAX];
        int count = 0, i;
        uint32_t highest;
 
index 80112d7c2050d46c54c51c40f11965fad9c72336..d037ee0a1402234839c5e9c7754c9776403d3d13 100644 (file)
@@ -114,28 +114,34 @@ lck_grp_attr_t   *pthread_lck_grp_attr;
 lck_grp_t    *pthread_lck_grp;
 lck_attr_t   *pthread_lck_attr;
 
-extern kern_return_t thread_getstatus(register thread_t act, int flavor,
-                       thread_state_t tstate, mach_msg_type_number_t *count);
-extern kern_return_t thread_setstatus(thread_t thread, int flavor,
-                       thread_state_t tstate, mach_msg_type_number_t count);
 extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
 extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t);
 extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t);
 
 extern void workqueue_thread_yielded(void);
 
-static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity);
-static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t th,
-                                       user_addr_t oc_item, int oc_prio, int oc_affinity);
-static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
+#if defined(__i386__) || defined(__x86_64__)
+extern boolean_t is_useraddr64_canonical(uint64_t addr64);
+#endif
+
+static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, boolean_t force_oc,
+                                       boolean_t  overcommit, int oc_prio, int oc_affinity);
+
+static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, int priority);
+
+static void wq_runreq(proc_t p, boolean_t overcommit, uint32_t priority, thread_t th, struct threadlist *tl,
                       int reuse_thread, int wake_thread, int return_directly);
+
+static int setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, uint32_t priority, int reuse_thread, struct threadlist *tl);
+
 static void wq_unpark_continue(void);
 static void wq_unsuspend_continue(void);
-static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
+
 static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
 static void workqueue_removethread(struct threadlist *tl, int fromexit);
 static void workqueue_lock_spin(proc_t);
 static void workqueue_unlock(proc_t);
+
 int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
 int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
 
@@ -150,6 +156,12 @@ int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
 #define TRUNC_DOWN64(a,c)       ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
 
 
+/* flag values for reuse field in the libc side _pthread_wqthread */
+#define        WQ_FLAG_THREAD_PRIOMASK         0x0000ffff
+#define        WQ_FLAG_THREAD_OVERCOMMIT       0x00010000      /* thread is with overcommit prio */
+#define        WQ_FLAG_THREAD_REUSE            0x00020000      /* thread is being reused */
+#define        WQ_FLAG_THREAD_NEWSPI           0x00040000      /* the call is with new SPIs */
+
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start 
 31  <---------------------------------> 0
@@ -322,6 +334,13 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
                 */
                ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
 
+               /* Disallow setting non-canonical PC or stack */
+               if (!is_useraddr64_canonical(ts64->rsp) ||
+                   !is_useraddr64_canonical(ts64->rip)) {
+                       error = EINVAL;
+                       goto out;
+               }
+
                thread_set_wq_state64(th, (thread_state_t)ts64);
        }
        }
@@ -332,8 +351,16 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
        if ((flags & PTHREAD_START_SETSCHED) != 0) {
                thread_extended_policy_data_t    extinfo;
                thread_precedence_policy_data_t   precedinfo;
+#if CONFIG_EMBEDDED
+               int ret = 0;
+#endif /* CONFIG_EMBEDDED */
 
                importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
+#if CONFIG_EMBEDDED
+               /* sets the saved importance for apple ios daemon if backgrounded. else returns 0 */
+               ret = proc_setthread_saved_importance(th, importance);
+               if (ret == 0) {
+#endif /* CONFIG_EMBEDDED */
                policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
 
                if (policy == SCHED_OTHER)
@@ -345,6 +372,9 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
 #define BASEPRI_DEFAULT 31
                precedinfo.importance = (importance - BASEPRI_DEFAULT);
                thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
+#if CONFIG_EMBEDDED
+               }
+#endif /* CONFIG_EMBEDDED */
        }
 
        kret = thread_resume(th);
@@ -510,7 +540,7 @@ workqueue_interval_timer_start(struct workqueue *wq)
 
        thread_call_enter_delayed(wq->wq_atimer_call, deadline);
 
-       KERNEL_DEBUG(0xefffd110, wq, wq->wq_itemcount, wq->wq_flags, wq->wq_timer_interval, 0);
+       KERNEL_DEBUG(0xefffd110, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
 }
 
 
@@ -630,14 +660,14 @@ again:
                 * new work within our acceptable time interval because
                 * there were no idle threads left to schedule
                 */
-               if (wq->wq_itemcount) {
+               if (wq->wq_reqcount) {
                        uint32_t        priority;
                        uint32_t        affinity_tag;
                        uint32_t        i;
                        uint64_t        curtime;
 
                        for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
-                               if (wq->wq_list_bitmap & (1 << priority))
+                               if (wq->wq_requests[priority])
                                        break;
                        }
                        assert(priority < WORKQUEUE_NUMPRIOS);
@@ -675,23 +705,23 @@ again:
                                        break;
                                }
                        }
-                       if (wq->wq_itemcount) {
+                       if (wq->wq_reqcount) {
                                /*
                                 * as long as we have threads to schedule, and we successfully
                                 * scheduled new work, keep trying
                                 */
                                while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
                                        /*
-                                        * workqueue_run_nextitem is responsible for
+                                        * workqueue_run_nextreq is responsible for
                                         * dropping the workqueue lock in all cases
                                         */
-                                       retval = workqueue_run_nextitem(p, wq, THREAD_NULL, 0, 0, 0);
+                                       retval = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0, 0);
                                        workqueue_lock_spin(p);
 
                                        if (retval == FALSE)
                                                break;
                                }
-                               if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_itemcount) {
+                               if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) {
 
                                        if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
                                                goto again;
@@ -699,7 +729,7 @@ again:
                                        if (wq->wq_thidlecount == 0 || busycount)
                                                WQ_TIMER_NEEDED(wq, start_timer);
 
-                                       KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_itemcount, wq->wq_thidlecount, busycount, 0);
+                                       KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0);
                                }
                        }
                }
@@ -734,12 +764,12 @@ workqueue_thread_yielded(void)
 
        p = current_proc();
 
-       if ((wq = p->p_wqptr) == NULL || wq->wq_itemcount == 0)
+       if ((wq = p->p_wqptr) == NULL || wq->wq_reqcount == 0)
                return;
        
        workqueue_lock_spin(p);
 
-       if (wq->wq_itemcount) {
+       if (wq->wq_reqcount) {
                uint64_t        curtime;
                uint64_t        elapsed;
                clock_sec_t     secs;
@@ -752,7 +782,7 @@ workqueue_thread_yielded(void)
                        workqueue_unlock(p);
                        return;
                }
-               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 0, 0);
+               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0);
 
                wq->wq_thread_yielded_count = 0;
 
@@ -768,11 +798,11 @@ workqueue_thread_yielded(void)
                                 * 'workqueue_addnewthread' drops the workqueue lock
                                 * when creating the new thread and then retakes it before
                                 * returning... this window allows other threads to process
-                                * work on the queue, so we need to recheck for available work
+                                * requests, so we need to recheck for available work
                                 * if none found, we just return...  the newly created thread
                                 * will eventually get used (if it hasn't already)...
                                 */
-                               if (wq->wq_itemcount == 0) {
+                               if (wq->wq_reqcount == 0) {
                                        workqueue_unlock(p);
                                        return;
                                }
@@ -780,9 +810,8 @@ workqueue_thread_yielded(void)
                        if (wq->wq_thidlecount) {
                                uint32_t        priority;
                                uint32_t        affinity = -1;
-                               user_addr_t     item;
-                               struct workitem *witem = NULL;
-                               struct workitemlist *wl = NULL;
+                               boolean_t       overcommit = FALSE;
+                               boolean_t       force_oc = FALSE;
                                struct uthread    *uth;
                                struct threadlist *tl;
 
@@ -791,38 +820,31 @@ workqueue_thread_yielded(void)
                                        affinity = tl->th_affinity_tag;
 
                                for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
-                                       if (wq->wq_list_bitmap & (1 << priority)) {
-                                               wl = (struct workitemlist *)&wq->wq_list[priority];
+                                       if (wq->wq_requests[priority])
                                                break;
-                                       }
                                }
-                               assert(wl != NULL);
-                               assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
+                               assert(priority < WORKQUEUE_NUMPRIOS);
 
-                               witem = TAILQ_FIRST(&wl->wl_itemlist);
-                               TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
+                               wq->wq_reqcount--;
+                               wq->wq_requests[priority]--;
 
-                               if (TAILQ_EMPTY(&wl->wl_itemlist))
-                                       wq->wq_list_bitmap &= ~(1 << priority);
-                               wq->wq_itemcount--;
+                               if (wq->wq_ocrequests[priority]) {
+                                       wq->wq_ocrequests[priority]--;
+                                       overcommit = TRUE;
+                               } else
+                                       force_oc = TRUE;
 
-                               item = witem->wi_item;
-                               witem->wi_item = (user_addr_t)0;
-                               witem->wi_affinity = 0;
-
-                               TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
-
-                               (void)workqueue_run_nextitem(p, wq, THREAD_NULL, item, priority, affinity);
+                               (void)workqueue_run_nextreq(p, wq, THREAD_NULL, force_oc, overcommit, priority, affinity);
                                /*
-                                * workqueue_run_nextitem is responsible for
+                                * workqueue_run_nextreq is responsible for
                                 * dropping the workqueue lock in all cases
                                 */
-                               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 1, 0);
+                               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0);
 
                                return;
                        }
                }
-               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 2, 0);
+               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0);
        }
        workqueue_unlock(p);
 }
@@ -868,7 +890,7 @@ workqueue_callback(int type, thread_t thread)
 
                        OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
 
-                       if (wq->wq_itemcount)
+                       if (wq->wq_reqcount)
                                WQ_TIMER_NEEDED(wq, start_timer);
 
                        if (start_timer == TRUE)
@@ -1090,13 +1112,11 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
        int wq_size;
        char * ptr;
        char * nptr;
-       int j;
        uint32_t i;
        uint32_t num_cpus;
        int error = 0;
        boolean_t need_wakeup = FALSE;
-       struct workitem * witem;
-       struct workitemlist *wl;
+
 
        if ((p->p_lflag & P_LREGISTER) == 0)
                return(EINVAL);
@@ -1138,10 +1158,10 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
                workqueue_unlock(p);
 
                wq_size = sizeof(struct workqueue) +
-                       (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
+                       (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint16_t)) +
                        (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
                        (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint64_t)) +
-                       sizeof(uint64_t);
+                       sizeof(uint32_t) + sizeof(uint64_t);
 
                ptr = (char *)kalloc(wq_size);
                bzero(ptr, wq_size);
@@ -1153,25 +1173,20 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
                wq->wq_task = current_task();
                wq->wq_map  = current_map();
 
-               for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
-                       wl = (struct workitemlist *)&wq->wq_list[i];
-                       TAILQ_INIT(&wl->wl_itemlist);
-                       TAILQ_INIT(&wl->wl_freelist);
-
-                       for (j = 0; j < WORKITEM_SIZE; j++) {
-                               witem = &wq->wq_array[(i*WORKITEM_SIZE) + j];
-                               TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry);
-                       }
+               for (i = 0; i < WORKQUEUE_NUMPRIOS; i++)
                        wq->wq_reqconc[i] = wq->wq_affinity_max;
-               }
+
                nptr = ptr + sizeof(struct workqueue);
 
                for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
-                       wq->wq_thactive_count[i] = (uint32_t *)nptr;
-                       nptr += (num_cpus * sizeof(uint32_t));
+                       wq->wq_thscheduled_count[i] = (uint16_t *)nptr;
+                       nptr += (num_cpus * sizeof(uint16_t));
                }
+               nptr += (sizeof(uint32_t) - 1);
+               nptr = (char *)((uintptr_t)nptr & ~(sizeof(uint32_t) - 1));
+
                for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
-                       wq->wq_thscheduled_count[i] = (uint32_t *)nptr;
+                       wq->wq_thactive_count[i] = (uint32_t *)nptr;
                        nptr += (num_cpus * sizeof(uint32_t));
                }
                /*
@@ -1208,59 +1223,86 @@ out:
        return(error);
 }
 
+
 int
 workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused int32_t *retval)
 {
-       user_addr_t item = uap->item;
-       int options     = uap->options;
-       int prio        = uap->prio;    /* should  be used to find the right workqueue */
-       int affinity    = uap->affinity;
-       int error       = 0;
-       thread_t th     = THREAD_NULL;
-       user_addr_t oc_item = 0;
         struct workqueue *wq;
+       int error       = 0;
 
        if ((p->p_lflag & P_LREGISTER) == 0)
                return(EINVAL);
 
-       /*
-        * affinity not yet hooked up on this path
-        */
-       affinity = -1;
+       switch (uap->options) {
 
-       switch (options) {
+               case WQOPS_QUEUE_NEWSPISUPP:
+                       break;
+
+               case WQOPS_QUEUE_REQTHREADS: {
+                       /*
+                        * for this operation, we re-purpose the affinity
+                        * argument as the number of threads to start
+                        */
+                       boolean_t overcommit = FALSE;
+                       int priority         = uap->prio;
+                       int reqcount         = uap->affinity;
 
-               case WQOPS_QUEUE_ADD: {
-                       
-                       if (prio & WORKQUEUE_OVERCOMMIT) {
-                               prio &= ~WORKQUEUE_OVERCOMMIT;
-                               oc_item = item;
+                       if (priority & WORKQUEUE_OVERCOMMIT) {
+                               priority &= ~WORKQUEUE_OVERCOMMIT;
+                               overcommit = TRUE;
                        }
-                       if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS))
-                               return (EINVAL);
+                       if ((reqcount <= 0) || (priority < 0) || (priority >= WORKQUEUE_NUMPRIOS)) {
+                               error = EINVAL;
+                               break;
+                       }
+                        workqueue_lock_spin(p);
 
-                       workqueue_lock_spin(p);
+                        if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
+                                workqueue_unlock(p);
 
-                       if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
-                               workqueue_unlock(p);
-                               return (EINVAL);
-                       }
-                       if (wq->wq_thidlecount == 0 && (oc_item || (wq->wq_constrained_threads_scheduled < wq->wq_affinity_max))) {
+                               error = EINVAL;
+                               break;
+                        }
+                       if (overcommit == FALSE) {
+                               wq->wq_reqcount += reqcount;
+                               wq->wq_requests[priority] += reqcount;
+                               
+                               KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0);
 
-                               workqueue_addnewthread(wq, oc_item ? TRUE : FALSE);
+                               while (wq->wq_reqcount) {
+                                       if (workqueue_run_one(p, wq, overcommit, priority) == FALSE)
+                                               break;
+                               }
+                       } else {
+                               KERNEL_DEBUG(0xefffd13c | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0);
 
-                               if (wq->wq_thidlecount == 0)
-                                       oc_item = 0;
+                               while (reqcount) {
+                                       if (workqueue_run_one(p, wq, overcommit, priority) == FALSE)
+                                               break;
+                                       reqcount--;
+                               }
+                               if (reqcount) {
+                                       /*
+                                        * we need to delay starting some of the overcommit requests...
+                                        * we should only fail to create the overcommit threads if
+                                        * we're at the max thread limit... as existing threads
+                                        * return to the kernel, we'll notice the ocrequests
+                                        * and spin them back to user space as the overcommit variety
+                                        */
+                                       wq->wq_reqcount += reqcount;
+                                       wq->wq_requests[priority] += reqcount;
+                                       wq->wq_ocrequests[priority] += reqcount;
+
+                                       KERNEL_DEBUG(0xefffd140 | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0);
+                               }
                        }
-                       if (oc_item == 0)
-                               error = workqueue_additem(wq, prio, item, affinity);
+                       workqueue_unlock(p);
 
-                       KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, prio, affinity, oc_item, 0);
                        }
                        break;
-               case WQOPS_THREAD_RETURN: {
 
-                       th = current_thread();
+               case WQOPS_THREAD_RETURN: {
+                       thread_t th = current_thread();
                        struct uthread *uth = get_bsdthread_info(th);
 
                        /* reset signal mask on the workqueue thread to default state */
@@ -1269,50 +1311,29 @@ workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused in
                                uth->uu_sigmask = ~workq_threadmask;
                                proc_unlock(p);
                        }
-
                        workqueue_lock_spin(p);
 
                        if ((wq = (struct workqueue *)p->p_wqptr) == NULL || (uth->uu_threadlist == NULL)) {
                                workqueue_unlock(p);
-                               return (EINVAL);
+
+                               error = EINVAL;
+                               break;
                        }
                        KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, wq, 0, 0, 0, 0);
-                       }
-                       break;
-               case WQOPS_THREAD_SETCONC: {
 
-                       if ((prio < 0) || (prio > WORKQUEUE_NUMPRIOS))
-                               return (EINVAL);
-
-                       workqueue_lock_spin(p);
-
-                       if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
-                               workqueue_unlock(p);
-                               return (EINVAL);
-                       }
+                       (void)workqueue_run_nextreq(p, wq, th, FALSE, FALSE, 0, -1);
                        /*
-                        * for this operation, we re-purpose the affinity
-                        * argument as the concurrency target
+                        * workqueue_run_nextreq is responsible for
+                        * dropping the workqueue lock in all cases
                         */
-                       if (prio < WORKQUEUE_NUMPRIOS)
-                               wq->wq_reqconc[prio] = affinity;
-                       else {
-                               for (prio = 0; prio < WORKQUEUE_NUMPRIOS; prio++)
-                                       wq->wq_reqconc[prio] = affinity;
-
-                       }
                        }
                        break;
+               
                default:
-                       return (EINVAL);
+                       error = EINVAL;
+                       break;
        }
-       (void)workqueue_run_nextitem(p, wq, th, oc_item, prio, affinity);
-       /*
-        * workqueue_run_nextitem is responsible for
-        * dropping the workqueue lock in all cases
-        */
        return (error);
-
 }
 
 /*
@@ -1426,30 +1447,6 @@ workqueue_exit(struct proc *p)
        }
 }
 
-static int 
-workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity)
-{
-       struct workitem *witem;
-       struct workitemlist *wl;
-
-       wl = (struct workitemlist *)&wq->wq_list[prio];
-
-       if (TAILQ_EMPTY(&wl->wl_freelist))
-               return (ENOMEM);
-
-       witem = (struct workitem *)TAILQ_FIRST(&wl->wl_freelist);
-       TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry);
-
-       witem->wi_item = item;
-       witem->wi_affinity = affinity;
-       TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry);
-
-       wq->wq_list_bitmap |= (1 << prio);
-
-       wq->wq_itemcount++;
-
-       return (0);
-}
 
 static int workqueue_importance[WORKQUEUE_NUMPRIOS] = 
 {
@@ -1464,37 +1461,69 @@ static int workqueue_policy[WORKQUEUE_NUMPRIOS] =
 };
 
 
+
+static boolean_t
+workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, int priority)
+{
+       boolean_t       ran_one;
+
+       if (wq->wq_thidlecount == 0) {
+               if (overcommit == FALSE) {
+                       if (wq->wq_constrained_threads_scheduled < wq->wq_affinity_max)
+                               workqueue_addnewthread(wq, overcommit);
+               } else {
+                       workqueue_addnewthread(wq, overcommit);
+
+                       if (wq->wq_thidlecount == 0)
+                               return (FALSE);
+               }
+       }
+       ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, overcommit, priority, -1);
+       /*
+        * workqueue_run_nextreq is responsible for
+        * dropping the workqueue lock in all cases
+        */
+       workqueue_lock_spin(p);
+
+       return (ran_one);
+}
+
+
+
 /*
- * workqueue_run_nextitem:
+ * workqueue_run_nextreq:
  *   called with the workqueue lock held...
  *   responsible for dropping it in all cases
  */
 static boolean_t
-workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_addr_t oc_item, int oc_prio, int oc_affinity)
+workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
+                     boolean_t force_oc, boolean_t overcommit, int oc_prio, int oc_affinity)
 {
-       struct workitem *witem = NULL;
-       user_addr_t item = 0;
        thread_t th_to_run = THREAD_NULL;
        thread_t th_to_park = THREAD_NULL;
        int wake_thread = 0;
-       int reuse_thread = 1;
+       int reuse_thread = WQ_FLAG_THREAD_REUSE;
        uint32_t priority, orig_priority;
        uint32_t affinity_tag, orig_affinity_tag;
        uint32_t i, n;
-       uint32_t activecount;
        uint32_t busycount;
        uint32_t us_to_wait;
        struct threadlist *tl = NULL;
        struct threadlist *ttl = NULL;
        struct uthread *uth = NULL;
-       struct workitemlist *wl = NULL;
        boolean_t start_timer = FALSE;
        boolean_t adjust_counters = TRUE;
        uint64_t  curtime;
 
 
-       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_itemcount, 0);
+       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0);
+
+       if (thread != THREAD_NULL) {
+               uth = get_bsdthread_info(thread);
 
+               if ( (tl = uth->uu_threadlist) == NULL)
+                       panic("wq thread with no threadlist ");
+       }
        /*
         * from here until we drop the workq lock
         * we can't be pre-empted since we hold 
@@ -1504,14 +1533,15 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
         * and these values are used to index the multi-dimensional
         * counter arrays in 'workqueue_callback'
         */
-       if (oc_item) {
+dispatch_overcommit:
+
+       if (overcommit == TRUE || force_oc == TRUE) {
                uint32_t min_scheduled = 0;
                uint32_t scheduled_count;
                uint32_t active_count;
                uint32_t t_affinity = 0;
 
                priority = oc_prio;
-               item = oc_item;
 
                if ((affinity_tag = oc_affinity) == (uint32_t)-1) {
                        for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
@@ -1536,37 +1566,55 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                        }
                        affinity_tag = t_affinity;
                }
+               if (thread != THREAD_NULL) {
+                       th_to_run = thread;
+                       goto pick_up_work;
+               }
                goto grab_idle_thread;
        }
+       if (wq->wq_reqcount) {
+               for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
+                       if (wq->wq_requests[priority])
+                               break;
+               }
+               assert(priority < WORKQUEUE_NUMPRIOS);
+
+               if (wq->wq_ocrequests[priority] && (thread != THREAD_NULL || wq->wq_thidlecount)) {
+                       /*
+                        * handle delayed overcommit request...
+                        * they have priority over normal requests
+                        * within a given priority level
+                        */
+                       wq->wq_reqcount--;
+                       wq->wq_requests[priority]--;
+                       wq->wq_ocrequests[priority]--;
+
+                       oc_prio = priority;
+                       overcommit = TRUE;
+
+                       goto dispatch_overcommit;
+               }
+       }
        /*
         * if we get here, the work should be handled by a constrained thread
         */
-       if (wq->wq_itemcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
+       if (wq->wq_reqcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
                /*
                 * no work to do, or we're already at or over the scheduling limit for
                 * constrained threads...  just return or park the thread...
                 * do not start the timer for this condition... if we don't have any work,
                 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
-                * constrained threads to return to the kernel before we can dispatch work from our queue
+                * constrained threads to return to the kernel before we can dispatch additional work
                 */
                if ((th_to_park = thread) == THREAD_NULL)
                        goto out_of_work;
                goto parkit;
        }
-       for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
-               if (wq->wq_list_bitmap & (1 << priority)) {
-                       wl = (struct workitemlist *)&wq->wq_list[priority];
-                       break;
-               }
-       }
-       assert(wl != NULL);
-       assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
 
        curtime = mach_absolute_time();
 
        if (thread != THREAD_NULL) {
-               uth = get_bsdthread_info(thread);
-               tl = uth->uu_threadlist;
+
                affinity_tag = tl->th_affinity_tag;
 
                /*
@@ -1576,6 +1624,10 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                 * we're considering running work for
                 */
                if (affinity_tag < wq->wq_reqconc[priority]) {
+                       uint32_t  bcount = 0;
+                       uint32_t  acount = 0;
+                       uint32_t  tcount = 0;
+
                        /*
                         * we're a worker thread from the pool... currently we
                         * are considered 'active' which means we're counted
@@ -1583,56 +1635,84 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                         * add up the active counts of all the priority levels
                         * up to and including the one we want to schedule
                         */
-                       for (activecount = 0, i = 0; i <= priority; i++) {
-                               uint32_t  acount;
+                       for (i = 0; i <= priority; i++) {
 
-                               acount = wq->wq_thactive_count[i][affinity_tag];
+                               tcount = wq->wq_thactive_count[i][affinity_tag];
+                               acount += tcount;
 
-                               if (acount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) {
+                               if (tcount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) {
                                        if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag]))
-                                               acount = 1;
+                                               bcount++;
                                }
-                               activecount += acount;
                        }
-                       if (activecount == 1) {
+                       if ((acount + bcount) == 1) {
                                /*
                                 * we're the only active thread associated with our
                                 * affinity group at this priority level and higher,
+                                * and there are no threads considered 'busy',
                                 * so pick up some work and keep going
                                 */
                                th_to_run = thread;
                                goto pick_up_work;
                        }
+                       if (wq->wq_reqconc[priority] == 1) {
+                               /*
+                                * we have at least one other active or busy thread running at this
+                                * priority level or higher and since we only have 
+                                * 1 affinity group to schedule against, no need
+                                * to try and find another... we can't start up another thread to
+                                * service the request and we already have the info
+                                * needed to determine if we need to start a timer or not
+                                */
+                               if (acount == 1) {
+                                       /*
+                                        * we're the only active thread, but we must have found
+                                        * at least 1 busy thread, so indicate that we need
+                                        * to start a timer
+                                        */
+                                       busycount = 1;
+                               } else
+                                       busycount = 0;
+
+                               affinity_tag = 1;
+                               goto cant_schedule;
+                       }
                }
                /*
                 * there's more than 1 thread running in this affinity group
                 * or the concurrency level has been cut back for this priority...
-                * lets continue on and look for an 'empty' group to run this
-                * work item in
+                * let's continue on and look for an 'empty' group to run this
+                * work request in
                 */
        }
        busycount = 0;
 
        for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
+               boolean_t       can_schedule;
+
                /*
                 * look for first affinity group that is currently not active
                 * i.e. no active threads at this priority level or higher
                 * and no threads that have run recently
                 */
-               for (activecount = 0, i = 0; i <= priority; i++) {
-                       if ((activecount = wq->wq_thactive_count[i][affinity_tag]))
+               for (i = 0; i <= priority; i++) {
+                       can_schedule = FALSE;
+
+                       if (wq->wq_thactive_count[i][affinity_tag])
                                break;
 
-                       if (wq->wq_thscheduled_count[i][affinity_tag]) {
-                               if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
-                                       busycount++;
-                                       break;
-                               }
+                       if (wq->wq_thscheduled_count[i][affinity_tag] &&
+                           wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
+                               busycount++;
+                               break;
                        }
+                       can_schedule = TRUE;
                }
-               if (activecount == 0 && busycount == 0)
+               if (can_schedule == TRUE)
                        break;
        }
+cant_schedule:
+
        if (affinity_tag >= wq->wq_reqconc[priority]) {
                /*
                 * we've already got at least 1 thread per
@@ -1644,7 +1724,7 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                         * 'busy' state... make sure we start
                         * the timer because if they are the only
                         * threads keeping us from scheduling
-                        * this workitem, we won't get a callback
+                        * this work request, we won't get a callback
                         * to kick off the timer... we need to
                         * start it now...
                         */
@@ -1671,6 +1751,8 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                th_to_run = thread;
                goto pick_up_work;
        }
+
+grab_idle_thread:
        if (wq->wq_thidlecount == 0) {
                /*
                 * we don't have a thread to schedule, but we have
@@ -1683,14 +1765,12 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
 
                goto no_thread_to_run;
        }
-
-grab_idle_thread:
        /*
         * we've got a candidate (affinity group with no currently
         * active threads) to start a new thread on...
         * we already know there is both work available
         * and an idle thread, so activate a thread and then
-        * fall into the code that pulls a new workitem...
+        * fall into the code that pulls a new work request...
         */
        TAILQ_FOREACH(ttl, &wq->wq_thidlelist, th_entry) {
                if (ttl->th_affinity_tag == affinity_tag || ttl->th_affinity_tag == (uint16_t)-1) {
@@ -1727,18 +1807,9 @@ grab_idle_thread:
        th_to_run = tl->th_thread;
 
 pick_up_work:
-       if (item == 0) {
-               witem = TAILQ_FIRST(&wl->wl_itemlist);
-               TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
-
-               if (TAILQ_EMPTY(&wl->wl_itemlist))
-                       wq->wq_list_bitmap &= ~(1 << priority);
-               wq->wq_itemcount--;
-
-               item = witem->wi_item;
-               witem->wi_item = (user_addr_t)0;
-               witem->wi_affinity = 0;
-               TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
+       if (overcommit == FALSE && force_oc == FALSE) {
+               wq->wq_reqcount--;
+               wq->wq_requests[priority]--;
 
                if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
                        wq->wq_constrained_threads_scheduled++;
@@ -1792,38 +1863,25 @@ pick_up_work:
                thread_precedence_policy_data_t precedinfo;
                thread_extended_policy_data_t   extinfo;
                uint32_t        policy;
+#if CONFIG_EMBEDDED
+               int retval = 0;
 
+               /* sets the saved importance for apple ios daemon if backgrounded. else returns 0 */
+               retval = proc_setthread_saved_importance(th_to_run, workqueue_importance[priority]);
+               if (retval == 0) {
+#endif /* CONFIG_EMBEDDED */
                policy = workqueue_policy[priority];
                
                KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START, wq, orig_priority, tl->th_policy, 0, 0);
 
                if ((orig_priority == WORKQUEUE_BG_PRIOQUEUE) || (priority == WORKQUEUE_BG_PRIOQUEUE)) {
-                       struct uthread *ut = NULL;
-
-                       ut = get_bsdthread_info(th_to_run);
-
                        if (orig_priority == WORKQUEUE_BG_PRIOQUEUE) {
                                /* remove the disk throttle, importance will be reset in anycase */
-#if !CONFIG_EMBEDDED
                                proc_restore_workq_bgthreadpolicy(th_to_run);
-#else /* !CONFIG_EMBEDDED */
-                               if ((ut->uu_flag & UT_BACKGROUND) != 0) {
-                                       ut->uu_flag &= ~UT_BACKGROUND;
-                                       ut->uu_iopol_disk = IOPOL_NORMAL;
-                               }
-#endif /* !CONFIG_EMBEDDED */
                        } 
 
                        if (priority == WORKQUEUE_BG_PRIOQUEUE) {
-#if !CONFIG_EMBEDDED
-                       proc_apply_workq_bgthreadpolicy(th_to_run);
-#else /* !CONFIG_EMBEDDED */
-                               if ((ut->uu_flag & UT_BACKGROUND) == 0) {
-                                       /* set diskthrottling */
-                                       ut->uu_flag |= UT_BACKGROUND;
-                                       ut->uu_iopol_disk = IOPOL_THROTTLE;
-                               }
-#endif /* !CONFIG_EMBEDDED */
+                               proc_apply_workq_bgthreadpolicy(th_to_run);
                        }
                }
 
@@ -1839,6 +1897,9 @@ pick_up_work:
 
 
                KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END, wq,  priority, policy, 0, 0);
+#if CONFIG_EMBEDDED
+               }
+#endif /* CONFIG_EMBEDDED */
        }
        if (kdebug_enable) {
                int     lpri = -1;
@@ -1866,11 +1927,11 @@ pick_up_work:
                }
        }
        /*
-        * if current thread is reused for workitem, does not return via unix_syscall
+        * if current thread is reused for work request, does not return via unix_syscall
         */
-       wq_runitem(p, item, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
+       wq_runreq(p, overcommit, priority, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
        
-       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), item, 1, 0);
+       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), overcommit, 1, 0);
 
        return (TRUE);
 
@@ -1894,11 +1955,6 @@ parkit:
         * this is a workqueue thread with no more
         * work to do... park it for now
         */
-       uth = get_bsdthread_info(th_to_park);
-       tl = uth->uu_threadlist;
-       if (tl == 0) 
-               panic("wq thread with no threadlist ");
-       
        TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
        tl->th_flags &= ~TH_LIST_RUNNING;
 
@@ -2032,7 +2088,7 @@ wq_unpark_continue(void)
                        if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
                                /*
                                 * a normal wakeup of this thread occurred... no need 
-                                * for any synchronization with the timer and wq_runitem
+                                * for any synchronization with the timer and wq_runreq
                                 */
 normal_return_to_user:                 
                                thread_sched_call(th_to_unpark, workqueue_callback);
@@ -2088,7 +2144,7 @@ normal_return_to_user:
 
 
 static void 
-wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
+wq_runreq(proc_t p, boolean_t overcommit, uint32_t priority, thread_t th, struct threadlist *tl,
           int reuse_thread, int wake_thread, int return_directly)
 {
        int ret = 0;
@@ -2096,7 +2152,7 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 
        KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th));
 
-       ret = setup_wqthread(p, th, item, reuse_thread, tl);
+       ret = setup_wqthread(p, th, overcommit, priority, reuse_thread, tl);
 
        if (ret != 0)
                panic("setup_wqthread failed  %x\n", ret);
@@ -2106,7 +2162,7 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 
                thread_exception_return();
 
-               panic("wq_runitem: thread_exception_return returned ...\n");
+               panic("wq_runreq: thread_exception_return returned ...\n");
        }
        if (wake_thread) {
                workqueue_lock_spin(p);
@@ -2141,8 +2197,15 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 
 
 int
-setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
+setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, uint32_t priority, int reuse_thread, struct threadlist *tl)
 {
+       uint32_t flags = reuse_thread | WQ_FLAG_THREAD_NEWSPI;
+
+       if (overcommit == TRUE)
+               flags |= WQ_FLAG_THREAD_OVERCOMMIT;
+
+       flags |= priority;
+
 #if defined(__i386__) || defined(__x86_64__)
        int isLP64 = 0;
 
@@ -2158,16 +2221,14 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct
                ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
                ts->ebx = (unsigned int)tl->th_thport;
                ts->ecx = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
-               ts->edx = (unsigned int)item;
-               ts->edi = (unsigned int)reuse_thread;
+               ts->edx = (unsigned int)0;
+               ts->edi = (unsigned int)flags;
                ts->esi = (unsigned int)0;
                /*
                 * set stack pointer
                 */
                ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN));
 
-               if ((reuse_thread != 0) && (ts->eax == (unsigned int)0))
-                       panic("setup_wqthread: setting reuse thread with null pthread\n");
                thread_set_wq_state32(th, (thread_state_t)ts);
 
        } else {
@@ -2178,8 +2239,8 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct
                ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
                ts64->rsi = (uint64_t)(tl->th_thport);
                ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
-               ts64->rcx = (uint64_t)item;
-               ts64->r8 = (uint64_t)reuse_thread;
+               ts64->rcx = (uint64_t)0;
+               ts64->r8 = (uint64_t)flags;
                ts64->r9 = (uint64_t)0;
 
                /*
@@ -2187,8 +2248,6 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct
                 */
                ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN);
 
-               if ((reuse_thread != 0) && (ts64->rdi == (uint64_t)0))
-                       panic("setup_wqthread: setting reuse thread with null pthread\n");
                thread_set_wq_state64(th, (thread_state_t)ts64);
        }
 #else
index 9d489e1225e0f17e49bfbd4734899d261bc7249d..ffbaaf4569bb07378afa0e94adb2c928fb8f23f5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2005-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -42,6 +42,7 @@
 #include <sys/unpcb.h>
 #include <sys/sys_domain.h>
 #include <sys/kern_event.h>
+#include <mach/vm_param.h>
 #include <net/ndrv_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
@@ -67,12 +68,12 @@ fill_sockbuf_info(struct sockbuf *sb, struct sockbuf_info *sbi)
 static void
 fill_common_sockinfo(struct socket *so, struct socket_info *si)
 {
-       si->soi_so = (u_int64_t)((uintptr_t)so);
+       si->soi_so = (u_int64_t)VM_KERNEL_ADDRPERM(so);
        si->soi_type = so->so_type;
-       si->soi_options = so->so_options;
+       si->soi_options = (short)(so->so_options & 0xffff);
        si->soi_linger = so->so_linger;
        si->soi_state = so->so_state;
-       si->soi_pcb = (u_int64_t)((uintptr_t)so->so_pcb);
+       si->soi_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
        if (so->so_proto) {
                si->soi_protocol = so->so_proto->pr_protocol;
                if (so->so_proto->pr_domain)
@@ -148,7 +149,8 @@ fill_socketinfo(struct socket *so, struct socket_info *si)
                                tcpsi->tcpsi_timer[TCPT_2MSL] = tp->t_timer[TCPT_2MSL];
                                tcpsi->tcpsi_mss = tp->t_maxseg;
                                tcpsi->tcpsi_flags = tp->t_flags;
-                               tcpsi->tcpsi_tp = (u_int64_t)((uintptr_t)tp);
+                               tcpsi->tcpsi_tp =
+                                   (u_int64_t)VM_KERNEL_ADDRPERM(tp);
                        }
                        break;
                }
@@ -158,10 +160,11 @@ fill_socketinfo(struct socket *so, struct socket_info *si)
                        
                        si->soi_kind = SOCKINFO_UN;
                        
-                       unsi->unsi_conn_pcb = (uint64_t)((uintptr_t)unp->unp_conn);
+                       unsi->unsi_conn_pcb =
+                           (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_conn);
                        if (unp->unp_conn)
-                               unsi->unsi_conn_so = (uint64_t)((uintptr_t)unp->unp_conn->unp_socket);
-                               
+                               unsi->unsi_conn_so = (uint64_t)
+                                   VM_KERNEL_ADDRPERM(unp->unp_conn->unp_socket);
                        
                        if (unp->unp_addr) {
                                size_t  addrlen = unp->unp_addr->sun_len;
index 45bddb4312660c187393b1377bf4c2e429bf1bbd..3d4e18b604513f85268118b7c06123d997a78f80 100644 (file)
@@ -446,6 +446,8 @@ vprintf(const char *fmt, va_list ap)
        return 0;
 }
 
+#if !CONFIG_EMBEDDED
+
 /*
  * Scaled down version of vsprintf(3).
  *
@@ -467,6 +469,7 @@ vsprintf(char *buf, const char *cfmt, va_list ap)
        }
        return 0;
 }
+#endif /* !CONFIG_EMBEDDED */
 
 /*
  * Scaled down version of snprintf(3).
index 4d07853d9e9b2e54083ca04ebaa001884075bac3..80b6edc2708dcd242fb2f1a5b0f7d47a00b020eb 100644 (file)
@@ -343,30 +343,6 @@ overflow:
 #define PROFILE_UNLOCK(x)
 
 
-int
-profil(struct proc *p, struct profil_args *uap, int32_t *retval)
-{
-       void *tmp;
-
-       tmp = p;
-       tmp = uap;
-       tmp = retval;
-
-       return EINVAL;
-}
-
-int
-add_profil(struct proc *p, struct add_profil_args *uap, int32_t *retval)
-{
-       void *tmp;
-
-       tmp = p;
-       tmp = uap;
-       tmp = retval;
-
-       return EINVAL;
-}
-
 /*
  * Scale is a fixed-point number with the binary point 16 bits
  * into the value, and is <= 1.0.  pc is at most 32 bits, so the
index bacd02b79d8b19d6820b89d742e764cbb871ca7b..cc950bd84d6879c519f85b4cf496567baa501279 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,6 +93,7 @@
 #include <sys/poll.h>
 #include <sys/event.h>
 #include <sys/eventvar.h>
+#include <sys/proc.h>
 
 #include <mach/mach_types.h>
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
 #include <kern/thread.h>
 #include <kern/clock.h>
+#include <kern/ledger.h>
+#include <kern/task.h>
 
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -1567,6 +1570,7 @@ poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
 {
        struct poll_continue_args *cont = (struct poll_continue_args *)data;
        struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
+       short prev_revents = fds->revents;
        short mask;
 
        /* convert the results back into revents */
@@ -1606,7 +1610,7 @@ poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
                break;
        }
 
-       if (fds->revents)
+       if (fds->revents != 0 && prev_revents == 0)
                cont->pca_rfds++;
 
        return 0;
@@ -2044,14 +2048,14 @@ postpipeevent(struct pipe *pipep, int event)
                          evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
                  }
                  if ((evq->ee_eventmask & EV_WR) && 
-                     (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
+                     (MAX(pipep->pipe_buffer.size,PIPE_SIZE) - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
 
                          if (pipep->pipe_state & PIPE_EOF) {
                                  mask |= EV_WR|EV_RESET;
                                  break;
                          }
                          mask |= EV_WR;
-                         evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
+                         evq->ee_req.er_wcnt = MAX(pipep->pipe_buffer.size, PIPE_SIZE) - pipep->pipe_buffer.cnt;
                  }
                  break;
 
@@ -2819,3 +2823,111 @@ gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retv
 
        return (error);
 }
+
+/*
+ * ledger
+ *
+ * Description:        Omnibus system call for ledger operations
+ */
+int
+ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
+{
+       int rval, pid, len, error;
+#ifdef LEDGER_DEBUG
+       struct ledger_limit_args lla;
+#endif
+       task_t task;
+       proc_t proc;
+
+       /* Finish copying in the necessary args before taking the proc lock */
+       error = 0;
+       len = 0;
+       if (args->cmd == LEDGER_ENTRY_INFO)
+               error = copyin(args->arg3, (char *)&len, sizeof (len));
+       else if (args->cmd == LEDGER_TEMPLATE_INFO)
+               error = copyin(args->arg2, (char *)&len, sizeof (len));
+#ifdef LEDGER_DEBUG
+       else if (args->cmd == LEDGER_LIMIT)
+               error = copyin(args->arg2, (char *)&lla, sizeof (lla));
+#endif
+       if (error)
+               return (error);
+       if (len < 0)
+               return (EINVAL);
+
+       rval = 0;
+       if (args->cmd != LEDGER_TEMPLATE_INFO) {
+               pid = args->arg1;
+               proc = proc_find(pid);
+               if (proc == NULL)
+                       return (ESRCH);
+
+#if CONFIG_MACF
+               error = mac_proc_check_ledger(p, proc, args->cmd);
+               if (error) {
+                       proc_rele(proc);
+                       return (error);
+               }
+#endif
+
+               task = proc->task;
+       }
+               
+       switch (args->cmd) {
+#ifdef LEDGER_DEBUG
+               case LEDGER_LIMIT: {
+                       if (!is_suser())
+                               rval = EPERM;
+                       rval = ledger_limit(task, &lla);
+                       proc_rele(proc);
+                       break;
+               }
+#endif
+               case LEDGER_INFO: {
+                       struct ledger_info info;
+
+                       rval = ledger_info(task, &info);
+                       proc_rele(proc);
+                       if (rval == 0)
+                               rval = copyout(&info, args->arg2,
+                                   sizeof (info));
+                       break;
+               }
+
+               case LEDGER_ENTRY_INFO: {
+                       void *buf;
+                       int sz;
+
+                       rval = ledger_entry_info(task, &buf, &len);
+                       proc_rele(proc);
+                       if ((rval == 0) && (len > 0)) {
+                               sz = len * sizeof (struct ledger_entry_info);
+                               rval = copyout(buf, args->arg2, sz);
+                               kfree(buf, sz);
+                       }
+                       if (rval == 0)
+                               rval = copyout(&len, args->arg3, sizeof (len));
+                       break;
+               }
+
+               case LEDGER_TEMPLATE_INFO: {
+                       void *buf;
+                       int sz;
+
+                       rval = ledger_template_info(&buf, &len);
+                       if ((rval == 0) && (len > 0)) {
+                               sz = len * sizeof (struct ledger_template_info);
+                               rval = copyout(buf, args->arg1, sz);
+                               kfree(buf, sz);
+                       }
+                       if (rval == 0)
+                               rval = copyout(&len, args->arg2, sizeof (len));
+                       break;
+               }
+
+               default:
+                       rval = EINVAL;
+       }
+
+       return (rval);
+}
index 27f2461b4854a284c3ba5352afe6a789d5311c84..9aa8ac04c8987ed5e81290fff0e614dc7b464c72 100644 (file)
  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
  * all features of sockets, but does do everything that pipes normally
  * do.
+ *
+ * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
+ *  
+ *      _________________________________
+ * 1.  |_________________________________| r=w, c=0
+ * 
+ *      _________________________________
+ * 2.  |__r:::::wc_______________________| r <= w , c > 0
+ *
+ *      _________________________________
+ * 3.  |::::wc_____r:::::::::::::::::::::| r>w , c > 0
+ *
+ *      _________________________________
+ * 4.  |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
+ *
+ *
+ *  Nomenclature:-
+ *  a-z define the steps in a program flow
+ *  1-4 are the states as defined aboe
+ *  Action: is what file operation is done on the pipe
+ *  
+ *  Current:None  Action: initialize with size M=200
+ *  a. State 1 ( r=0, w=0, c=0)
+ *  
+ *  Current: a    Action: write(100) (w < M)
+ *  b. State 2 (r=0, w=100, c=100)
+ *  
+ *  Current: b    Action: write(100) (w = M-w)
+ *  c. State 4 (r=0,w=0,c=200)
+ *  
+ *  Current: b    Action: read(70)  ( r < c )
+ *  d. State 2(r=70,w=100,c=30)
+ *  
+ *  Current: d   Action: write(75) ( w < (m-w))
+ *  e. State 2 (r=70,w=175,c=105)
+ *  
+ *  Current: d    Action: write(110) ( w > (m-w))
+ *  f. State 3 (r=70,w=10,c=140)
+ *  
+ *  Current: d   Action: read(30) (r >= c )
+ *  g. State 1 (r=100,w=100,c=0)
+ *  
  */
 
 /*
- * This code has two modes of operation, a small write mode and a large
- * write mode.  The small write mode acts like conventional pipes with
- * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
- * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
- * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
- * the receiving process can copy it directly from the pages in the sending
- * process.
- *
- * If the sending process receives a signal, it is possible that it will
- * go away, and certainly its address space can change, because control
- * is returned back to the user-mode side.  In that case, the pipe code
- * arranges to copy the buffer supplied by the user process, to a pageable
- * kernel buffer, and the receiving process will grab the data from the
- * pageable kernel buffer.  Since signals don't happen all that often,
- * the copy operation is normally eliminated.
- *
- * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
- * happen for small transfers so that the system will not spend all of
- * its time context switching.
+ * This code create half duplex pipe buffers for facilitating file like
+ * operations on pipes. The initial buffer is very small, but this can
+ * dynamically change to larger sizes based on usage. The buffer size is never
+ * reduced. The total amount of kernel memory used is governed by maxpipekva.
+ * In case of dynamic expansion limit is reached, the output thread is blocked
+ * until the pipe buffer empties enough to continue. 
  *
  * In order to limit the resource use of pipes, two sysctls exist:
  *
  * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
- * address space available to us in pipe_map.  Whenever the amount in use
- * exceeds half of this value, all new pipes will be created with size
- * SMALL_PIPE_SIZE, rather than PIPE_SIZE.  Big pipe creation will be limited
- * as well.  This value is loader tunable only.
- *
- * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
- * be wired in order to facilitate direct copies using page flipping.
- * Whenever this value is exceeded, pipes will fall back to using regular
- * copies.  This value is sysctl controllable at all times.
- *
- * These values are autotuned in subr_param.c.
+ * address space available to us in pipe_map. 
  *
  * Memory usage may be monitored through the sysctls
- * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
+ * kern.ipc.pipes, kern.ipc.pipekva.
  *
  */
 
 #include <sys/kdebug.h>
 
 #include <kern/zalloc.h>
+#include <kern/kalloc.h>
 #include <vm/vm_kern.h>
 #include <libkern/OSAtomic.h>
 
 #define f_ops f_fglob->fg_ops
 #define f_offset f_fglob->fg_offset
 #define f_data f_fglob->fg_data
-/*
- * Use this define if you want to disable *fancy* VM things.  Expect an
- * approx 30% decrease in transfer rate.  This could be useful for
- * NetBSD or OpenBSD.
- *
- * this needs to be ported to X and the performance measured
- * before committing to supporting it
- */
-#define PIPE_NODIRECT  1
-
-#ifndef PIPE_NODIRECT
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-#include <vm/uma.h>
-
-#endif
 
 /*
- * interfaces to the outside world
+ * interfaces to the outside world exported through file operations 
  */
 static int pipe_read(struct fileproc *fp, struct uio *uio,
                 int flags, vfs_context_t ctx);
-
 static int pipe_write(struct fileproc *fp, struct uio *uio,
                 int flags, vfs_context_t ctx);
-
 static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
-
 static int pipe_select(struct fileproc *fp, int which, void * wql,
                vfs_context_t ctx);
-
 static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
                vfs_context_t ctx);
-
 static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
                vfs_context_t ctx);
-
 static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
 
-
 struct  fileops pipeops =
   { pipe_read,
     pipe_write,
@@ -190,7 +180,6 @@ struct  fileops pipeops =
     pipe_kqfilter,
     pipe_drain };
 
-
 static void    filt_pipedetach(struct knote *kn);
 static int     filt_piperead(struct knote *kn, long hint);
 static int     filt_pipewrite(struct knote *kn, long hint);
@@ -200,33 +189,18 @@ static struct filterops pipe_rfiltops = {
         .f_detach = filt_pipedetach,
         .f_event = filt_piperead,
 };
+
 static struct filterops pipe_wfiltops = {
         .f_isfd = 1,
         .f_detach = filt_pipedetach,
         .f_event = filt_pipewrite,
 };
 
-/*
- * Default pipe buffer size(s), this can be kind-of large now because pipe
- * space is pageable.  The pipe code will try to maintain locality of
- * reference for performance reasons, so small amounts of outstanding I/O
- * will not wipe the cache.
- */
-#define MINPIPESIZE (PIPE_SIZE/3)
+static int nbigpipe;      /* for compatibility sake. no longer used */
+static int amountpipes;   /* total number of pipes in system */
+static int amountpipekva; /* total memory used by pipes */
 
-/*
- * Limit the number of "big" pipes
- */
-#define LIMITBIGPIPES  32
-static int nbigpipe;
-
-static int amountpipes;
-static int amountpipekva;
-
-#ifndef PIPE_NODIRECT
-static int amountpipekvawired;
-#endif
-int maxpipekva = 1024 * 1024 * 16;
+int maxpipekva = PIPE_KVAMAX;  /* allowing 16MB max. */
 
 #if PIPE_SYSCTLS
 SYSCTL_DECL(_kern_ipc);
@@ -248,29 +222,24 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
 static void pipeclose(struct pipe *cpipe);
 static void pipe_free_kmem(struct pipe *cpipe);
 static int pipe_create(struct pipe **cpipep);
+static int pipespace(struct pipe *cpipe, int size);
+static int choose_pipespace(unsigned long current, unsigned long expected);
+static int expand_pipespace(struct pipe *p, int target_size);
 static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
-static __inline int pipelock(struct pipe *cpipe, int catch);
-static __inline void pipeunlock(struct pipe *cpipe);
-
-#ifndef PIPE_NODIRECT
-static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
-static void pipe_destroy_write_buffer(struct pipe *wpipe);
-static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
-static void pipe_clone_write_buffer(struct pipe *wpipe);
-#endif
+static __inline int pipeio_lock(struct pipe *cpipe, int catch);
+static __inline void pipeio_unlock(struct pipe *cpipe);
 
 extern int postpipeevent(struct pipe *, int);
 extern void evpipefree(struct pipe *cpipe);
 
-
-static int pipespace(struct pipe *cpipe, int size);
-
 static lck_grp_t       *pipe_mtx_grp;
 static lck_attr_t      *pipe_mtx_attr;
 static lck_grp_attr_t  *pipe_mtx_grp_attr;
 
 static zone_t pipe_zone;
 
+#define MAX_PIPESIZE(pipe)             ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
+
 #define        PIPE_GARBAGE_AGE_LIMIT          5000    /* In milliseconds */
 #define PIPE_GARBAGE_QUEUE_LIMIT       32000
 
@@ -286,26 +255,26 @@ static struct pipe_garbage *pipe_garbage_tail = NULL;
 static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
 static int pipe_garbage_count = 0;
 static lck_mtx_t *pipe_garbage_lock;
+static void pipe_garbage_collect(struct pipe *cpipe);
 
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
 
+/* initial setup done at time of sysinit */
 void
 pipeinit(void)
 {
+       nbigpipe=0;
        vm_size_t zone_size;
-
        zone_size = 8192 * sizeof(struct pipe);
         pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
 
-       /*
-        * allocate lock group attribute and group for pipe mutexes
-        */
+
+       /* allocate lock group attribute and group for pipe mutexes */
        pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
        pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
 
-       /*
-        * allocate the lock attribute for pipe mutexes
-        */
+       /* allocate the lock attribute for pipe mutexes */
        pipe_mtx_attr = lck_attr_alloc_init();
 
        /*
@@ -316,6 +285,7 @@ pipeinit(void)
         pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
            zone_size, 4096, "pipe garbage zone");
        pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
+       
 }
 
 /* Bitmap for things to touch in pipe_touch() */
@@ -346,10 +316,80 @@ pipe_touch(struct pipe *tpipe, int touch)
        }
 }
 
+static const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE, PAGE_SIZE * 2, PIPE_SIZE , PIPE_SIZE * 4 };
+
+/* 
+ * finds the right size from possible sizes in pipesize_blocks 
+ * returns the size which matches max(current,expected) 
+ */
+static int 
+choose_pipespace(unsigned long current, unsigned long expected)
+{
+       int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
+       unsigned long target;
+
+       if (expected > current) 
+               target = expected;
+       else
+               target = current;
+
+       while ( i >0 && pipesize_blocks[i-1] > target) {
+               i=i-1;
+
+       }
+       
+       return pipesize_blocks[i];
+}
 
 
+/*
+ * expand the size of pipe while there is data to be read,
+ * and then free the old buffer once the current buffered
+ * data has been transferred to new storage.
+ * Required: PIPE_LOCK and io lock to be held by caller.
+ * returns 0 on success or no expansion possible
+ */
+static int 
+expand_pipespace(struct pipe *p, int target_size)
+{
+       struct pipe tmp, oldpipe;
+       int error;
+       tmp.pipe_buffer.buffer = 0;
+       
+       if (p->pipe_buffer.size >= (unsigned) target_size) {
+               return 0; /* the existing buffer is max size possible */
+       }
+       
+       /* create enough space in the target */
+       error = pipespace(&tmp, target_size);
+       if (error != 0)
+               return (error);
+
+       oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
+       oldpipe.pipe_buffer.size = p->pipe_buffer.size;
+       
+       memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
+       if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){
+               /* we are in State 3 and need extra copying for read to be consistent */
+               memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
+               p->pipe_buffer.in += p->pipe_buffer.size;
+       }
+
+       p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
+       p->pipe_buffer.size = tmp.pipe_buffer.size;
+
+
+       pipe_free_kmem(&oldpipe);
+       return 0;
+}
+
 /*
  * The pipe system call for the DTYPE_PIPE type of pipes
+ * 
+ * returns:
+ *  FREAD  | fd0 | -->[struct rpipe] --> |~~buffer~~| \  
+ *                                                    (pipe_mutex)
+ *  FWRITE | fd1 | -->[struct wpipe] --X              / 
  */
 
 /* ARGSUSED */
@@ -372,22 +412,12 @@ pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
         /*
         * allocate the space for the normal I/O direction up
         * front... we'll delay the allocation for the other
-        * direction until a write actually occurs (most
-        * likely it won't)...
-        *
-         * Reduce to 1/4th pipe size if we're over our global max.
+        * direction until a write actually occurs (most likely it won't)...
          */
-        if (amountpipekva > maxpipekva / 2)
-               error = pipespace(rpipe, SMALL_PIPE_SIZE);
-        else
-               error = pipespace(rpipe, PIPE_SIZE);
+       error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
         if (error)
                goto freepipes;
 
-#ifndef PIPE_NODIRECT
-       rpipe->pipe_state |= PIPE_DIRECTOK;
-       wpipe->pipe_state |= PIPE_DIRECTOK;
-#endif
        TAILQ_INIT(&rpipe->pipe_evlist);
        TAILQ_INIT(&wpipe->pipe_evlist);
 
@@ -398,9 +428,8 @@ pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
        retval[0] = fd;
 
        /*
-        * for now we'll create half-duplex
-        * pipes... this is what we've always
-        * supported..
+        * for now we'll create half-duplex pipes(refer returns section above). 
+        * this is what we've always supported..
         */
        rf->f_flag = FREAD;
        rf->f_type = DTYPE_PIPE;
@@ -419,7 +448,8 @@ pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
 
        rpipe->pipe_peer = wpipe;
        wpipe->pipe_peer = rpipe;
-       rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
+       /* both structures share the same mutex */
+       rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 
 
        retval[1] = fd;
 #if CONFIG_MACF
@@ -476,20 +506,16 @@ pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
        }
 #endif
        if (cpipe->pipe_buffer.buffer == 0) {
-               /*
-                * must be stat'ing the write fd
-                */
+               /* must be stat'ing the write fd */
                if (cpipe->pipe_peer) {
-                       /*
-                        * the peer still exists, use it's info
-                        */
-                       pipe_size  = cpipe->pipe_peer->pipe_buffer.size;
+                       /* the peer still exists, use it's info */
+                       pipe_size  = MAX_PIPESIZE(cpipe->pipe_peer);
                        pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
                } else {
                        pipe_count = 0;
                }
        } else {
-               pipe_size  = cpipe->pipe_buffer.size;
+               pipe_size  = MAX_PIPESIZE(cpipe);
                pipe_count = cpipe->pipe_buffer.cnt;
        }
        /*
@@ -497,7 +523,7 @@ pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
         * we might catch it in transient state
         */
        if (pipe_size == 0)
-               pipe_size  = PIPE_SIZE;
+               pipe_size  = MAX(PIPE_SIZE, pipesize_blocks[0]);
 
        if (isstat64 != 0) {
                sb64 = (struct stat64 *)ub;     
@@ -525,7 +551,7 @@ pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
                * address of this pipe's struct pipe.  This number may be recycled
                * relatively quickly.
                */
-               sb64->st_ino = (ino64_t)((uintptr_t)cpipe);
+               sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
        } else {
                sb = (struct stat *)ub; 
 
@@ -552,7 +578,7 @@ pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
                * address of this pipe's struct pipe.  This number may be recycled
                * relatively quickly.
                */
-               sb->st_ino = (ino_t)(uintptr_t)cpipe;
+               sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
        }
        PIPE_UNLOCK(cpipe);
 
@@ -579,10 +605,11 @@ pipespace(struct pipe *cpipe, int size)
 {
        vm_offset_t buffer;
 
-       size = round_page(size);
+       if (size <= 0)
+               return(EINVAL);
 
-       if (kmem_alloc(kernel_map, &buffer, size) != KERN_SUCCESS)
-               return(ENOMEM);
+       if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
+               return(ENOMEM);
 
        /* free old resources if we're resizing */
        pipe_free_kmem(cpipe);
@@ -605,7 +632,6 @@ static int
 pipe_create(struct pipe **cpipep)
 {
        struct pipe *cpipe;
-
        cpipe = (struct pipe *)zalloc(pipe_zone);
 
        if ((*cpipep = cpipe) == NULL)
@@ -619,7 +645,6 @@ pipe_create(struct pipe **cpipep)
 
        /* Initial times are all the time of creation of the pipe */
        pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
-
        return (0);
 }
 
@@ -628,20 +653,17 @@ pipe_create(struct pipe **cpipep)
  * lock a pipe for I/O, blocking other access
  */
 static inline int
-pipelock(struct pipe *cpipe, int catch)
+pipeio_lock(struct pipe *cpipe, int catch)
 {
        int error;
-
        while (cpipe->pipe_state & PIPE_LOCKFL) {
                cpipe->pipe_state |= PIPE_LWANT;
-
                error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
                               "pipelk", 0);
                if (error != 0) 
                        return (error);
        }
        cpipe->pipe_state |= PIPE_LOCKFL;
-
        return (0);
 }
 
@@ -649,16 +671,18 @@ pipelock(struct pipe *cpipe, int catch)
  * unlock a pipe I/O lock
  */
 static inline void
-pipeunlock(struct pipe *cpipe)
+pipeio_unlock(struct pipe *cpipe)
 {
        cpipe->pipe_state &= ~PIPE_LOCKFL;
-
        if (cpipe->pipe_state & PIPE_LWANT) {
                cpipe->pipe_state &= ~PIPE_LWANT;
                wakeup(cpipe);
        }
 }
 
+/*
+ * wakeup anyone whos blocked in select
+ */
 static void
 pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
 {
@@ -679,6 +703,10 @@ pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
         }
 }
 
+/*
+ * Read n bytes from the buffer. Semantics are similar to file read.
+ * returns: number of bytes read from the buffer
+ */
 /* ARGSUSED */
 static int
 pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
@@ -692,7 +720,7 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
        PIPE_LOCK(rpipe);
        ++rpipe->pipe_busy;
 
-       error = pipelock(rpipe, 1);
+       error = pipeio_lock(rpipe, 1);
        if (error)
                goto unlocked_error;
 
@@ -702,11 +730,17 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
                goto locked_error;
 #endif
 
+
        while (uio_resid(uio)) {
                /*
                 * normal pipe buffer receive
                 */
                if (rpipe->pipe_buffer.cnt > 0) {
+                       /*
+                        * # bytes to read is min( bytes from read pointer until end of buffer,
+                        *                         total unread bytes, 
+                        *                         user requested byte count)
+                        */
                        size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
                        if (size > rpipe->pipe_buffer.cnt)
                                size = rpipe->pipe_buffer.cnt;
@@ -714,7 +748,7 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
                        if (size > (u_int) uio_resid(uio))
                                size = (u_int) uio_resid(uio);
 
-                       PIPE_UNLOCK(rpipe);
+                       PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
                        error = uiomove(
                            &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
                            size, uio);
@@ -727,7 +761,7 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
                                rpipe->pipe_buffer.out = 0;
 
                        rpipe->pipe_buffer.cnt -= size;
-
+                       
                        /*
                         * If there is no more to read in the pipe, reset
                         * its pointers to the beginning.  This improves
@@ -738,32 +772,6 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
                                rpipe->pipe_buffer.out = 0;
                        }
                        nread += size;
-#ifndef PIPE_NODIRECT
-               /*
-                * Direct copy, bypassing a kernel buffer.
-                */
-               } else if ((size = rpipe->pipe_map.cnt) &&
-                          (rpipe->pipe_state & PIPE_DIRECTW)) {
-                       caddr_t va;
-                       // LP64todo - fix this!
-                       if (size > (u_int) uio_resid(uio))
-                               size = (u_int) uio_resid(uio);
-
-                       va = (caddr_t) rpipe->pipe_map.kva +
-                           rpipe->pipe_map.pos;
-                       PIPE_UNLOCK(rpipe);
-                       error = uiomove(va, size, uio);
-                       PIPE_LOCK(rpipe);
-                       if (error)
-                               break;
-                       nread += size;
-                       rpipe->pipe_map.pos += size;
-                       rpipe->pipe_map.cnt -= size;
-                       if (rpipe->pipe_map.cnt == 0) {
-                               rpipe->pipe_state &= ~PIPE_DIRECTW;
-                               wakeup(rpipe);
-                       }
-#endif
                } else {
                        /*
                         * detect EOF condition
@@ -782,7 +790,7 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
                        }
 
                        /*
-                        * Break if some data was read.
+                        * Break if some data was read in previous iteration.
                         */
                        if (nread > 0)
                                break;
@@ -792,7 +800,7 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
                         * We will either break out with an error or we will
                         * sleep and relock to loop.
                         */
-                       pipeunlock(rpipe);
+                       pipeio_unlock(rpipe);
 
                        /*
                         * Handle non-blocking mode operation or
@@ -802,11 +810,9 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
                                error = EAGAIN;
                        } else {
                                rpipe->pipe_state |= PIPE_WANTR;
-
                                error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
-
                                if (error == 0)
-                                       error = pipelock(rpipe, 1);
+                                       error = pipeio_lock(rpipe, 1);
                        }
                        if (error)
                                goto unlocked_error;
@@ -815,7 +821,7 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
 #if CONFIG_MACF
 locked_error:
 #endif
-       pipeunlock(rpipe);
+       pipeio_unlock(rpipe);
 
 unlocked_error:
        --rpipe->pipe_busy;
@@ -826,7 +832,7 @@ unlocked_error:
        if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
                rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
                wakeup(rpipe);
-       } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
+       } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
                /*
                 * Handle write blocking hysteresis.
                 */
@@ -836,7 +842,7 @@ unlocked_error:
                }
        }
 
-       if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
+       if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0)
                pipeselwakeup(rpipe, rpipe->pipe_peer);
 
        /* update last read time */
@@ -847,250 +853,10 @@ unlocked_error:
        return (error);
 }
 
-
-
-#ifndef PIPE_NODIRECT
 /*
- * Map the sending processes' buffer into kernel space and wire it.
- * This is similar to a physical write operation.
+ * perform a write of n bytes into the read side of buffer. Since 
+ * pipes are unidirectional a write is meant to be read by the otherside only.
  */
-static int
-pipe_build_write_buffer(wpipe, uio)
-       struct pipe *wpipe;
-       struct uio *uio;
-{
-       pmap_t pmap;
-       u_int size;
-       int i, j;
-       vm_offset_t addr, endaddr;
-
-
-       size = (u_int) uio->uio_iov->iov_len;
-       if (size > wpipe->pipe_buffer.size)
-               size = wpipe->pipe_buffer.size;
-
-       pmap = vmspace_pmap(curproc->p_vmspace);
-       endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
-       addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
-       for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
-               /*
-                * vm_fault_quick() can sleep.  Consequently,
-                * vm_page_lock_queue() and vm_page_unlock_queue()
-                * should not be performed outside of this loop.
-                */
-       race:
-               if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
-                       vm_page_lock_queues();
-                       for (j = 0; j < i; j++)
-                               vm_page_unhold(wpipe->pipe_map.ms[j]);
-                       vm_page_unlock_queues();
-                       return (EFAULT);
-               }
-               wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
-                   VM_PROT_READ);
-               if (wpipe->pipe_map.ms[i] == NULL)
-                       goto race;
-       }
-
-/*
- * set up the control block
- */
-       wpipe->pipe_map.npages = i;
-       wpipe->pipe_map.pos =
-           ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
-       wpipe->pipe_map.cnt = size;
-
-/*
- * and map the buffer
- */
-       if (wpipe->pipe_map.kva == 0) {
-               /*
-                * We need to allocate space for an extra page because the
-                * address range might (will) span pages at times.
-                */
-               wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
-                       wpipe->pipe_buffer.size + PAGE_SIZE);
-               atomic_add_int(&amountpipekvawired,
-                   wpipe->pipe_buffer.size + PAGE_SIZE);
-       }
-       pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
-               wpipe->pipe_map.npages);
-
-/*
- * and update the uio data
- */
-
-       uio->uio_iov->iov_len -= size;
-       uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
-       if (uio->uio_iov->iov_len == 0)
-               uio->uio_iov++;
-       uio_setresid(uio, (uio_resid(uio) - size));
-       uio->uio_offset += size;
-       return (0);
-}
-
-/*
- * unmap and unwire the process buffer
- */
-static void
-pipe_destroy_write_buffer(wpipe)
-       struct pipe *wpipe;
-{
-       int i;
-
-       if (wpipe->pipe_map.kva) {
-               pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
-
-               if (amountpipekvawired > maxpipekvawired / 2) {
-                       /* Conserve address space */
-                       vm_offset_t kva = wpipe->pipe_map.kva;
-                       wpipe->pipe_map.kva = 0;
-                       kmem_free(kernel_map, kva,
-                           wpipe->pipe_buffer.size + PAGE_SIZE);
-                       atomic_subtract_int(&amountpipekvawired,
-                           wpipe->pipe_buffer.size + PAGE_SIZE);
-               }
-       }
-       vm_page_lock_queues();
-       for (i = 0; i < wpipe->pipe_map.npages; i++) {
-               vm_page_unhold(wpipe->pipe_map.ms[i]);
-       }
-       vm_page_unlock_queues();
-       wpipe->pipe_map.npages = 0;
-}
-
-/*
- * In the case of a signal, the writing process might go away.  This
- * code copies the data into the circular buffer so that the source
- * pages can be freed without loss of data.
- */
-static void
-pipe_clone_write_buffer(wpipe)
-       struct pipe *wpipe;
-{
-       int size;
-       int pos;
-
-       size = wpipe->pipe_map.cnt;
-       pos = wpipe->pipe_map.pos;
-
-       wpipe->pipe_buffer.in = size;
-       wpipe->pipe_buffer.out = 0;
-       wpipe->pipe_buffer.cnt = size;
-       wpipe->pipe_state &= ~PIPE_DIRECTW;
-
-       PIPE_UNLOCK(wpipe);
-       bcopy((caddr_t) wpipe->pipe_map.kva + pos,
-           wpipe->pipe_buffer.buffer, size);
-       pipe_destroy_write_buffer(wpipe);
-       PIPE_LOCK(wpipe);
-}
-
-/*
- * This implements the pipe buffer write mechanism.  Note that only
- * a direct write OR a normal pipe write can be pending at any given time.
- * If there are any characters in the pipe buffer, the direct write will
- * be deferred until the receiving process grabs all of the bytes from
- * the pipe buffer.  Then the direct mapping write is set-up.
- */
-static int
-pipe_direct_write(wpipe, uio)
-       struct pipe *wpipe;
-       struct uio *uio;
-{
-       int error;
-
-retry:
-       while (wpipe->pipe_state & PIPE_DIRECTW) {
-               if (wpipe->pipe_state & PIPE_WANTR) {
-                       wpipe->pipe_state &= ~PIPE_WANTR;
-                       wakeup(wpipe);
-               }
-               wpipe->pipe_state |= PIPE_WANTW;
-               error = msleep(wpipe, PIPE_MTX(wpipe),
-                   PRIBIO | PCATCH, "pipdww", 0);
-               if (error)
-                       goto error1;
-               if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
-                       error = EPIPE;
-                       goto error1;
-               }
-       }
-       wpipe->pipe_map.cnt = 0;        /* transfer not ready yet */
-       if (wpipe->pipe_buffer.cnt > 0) {
-               if (wpipe->pipe_state & PIPE_WANTR) {
-                       wpipe->pipe_state &= ~PIPE_WANTR;
-                       wakeup(wpipe);
-               }
-                       
-               wpipe->pipe_state |= PIPE_WANTW;
-               error = msleep(wpipe, PIPE_MTX(wpipe),
-                   PRIBIO | PCATCH, "pipdwc", 0);
-               if (error)
-                       goto error1;
-               if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
-                       error = EPIPE;
-                       goto error1;
-               }
-               goto retry;
-       }
-
-       wpipe->pipe_state |= PIPE_DIRECTW;
-
-       pipelock(wpipe, 0);
-       PIPE_UNLOCK(wpipe);
-       error = pipe_build_write_buffer(wpipe, uio);
-       PIPE_LOCK(wpipe);
-       pipeunlock(wpipe);
-       if (error) {
-               wpipe->pipe_state &= ~PIPE_DIRECTW;
-               goto error1;
-       }
-
-       error = 0;
-       while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
-               if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
-                       pipelock(wpipe, 0);
-                       PIPE_UNLOCK(wpipe);
-                       pipe_destroy_write_buffer(wpipe);
-                       PIPE_LOCK(wpipe);
-                       pipeselwakeup(wpipe, wpipe);
-                       pipeunlock(wpipe);
-                       error = EPIPE;
-                       goto error1;
-               }
-               if (wpipe->pipe_state & PIPE_WANTR) {
-                       wpipe->pipe_state &= ~PIPE_WANTR;
-                       wakeup(wpipe);
-               }
-               pipeselwakeup(wpipe, wpipe);
-               error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
-                   "pipdwt", 0);
-       }
-
-       pipelock(wpipe,0);
-       if (wpipe->pipe_state & PIPE_DIRECTW) {
-               /*
-                * this bit of trickery substitutes a kernel buffer for
-                * the process that might be going away.
-                */
-               pipe_clone_write_buffer(wpipe);
-       } else {
-               PIPE_UNLOCK(wpipe);
-               pipe_destroy_write_buffer(wpipe);
-               PIPE_LOCK(wpipe);
-       }
-       pipeunlock(wpipe);
-       return (error);
-
-error1:
-       wakeup(wpipe);
-       return (error);
-}
-#endif
-       
-
-
 static int
 pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
        __unused vfs_context_t ctx)
@@ -1099,6 +865,9 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
        int orig_resid;
        int pipe_size;
        struct pipe *wpipe, *rpipe;
+       // LP64todo - fix this!
+       orig_resid = uio_resid(uio);
+       int space;
 
        rpipe = (struct pipe *)fp->f_data;
 
@@ -1123,54 +892,35 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
 
        pipe_size = 0;
 
-       if (wpipe->pipe_buffer.buffer == 0) {
-               /*
-                * need to allocate some storage... we delay the allocation
-                * until the first write on fd[0] to avoid allocating storage for both
-                * 'pipe ends'... most pipes are half-duplex with the writes targeting
-                * fd[1], so allocating space for both ends is a waste...
-                *
-                * Reduce to 1/4th pipe size if we're over our global max.
-                */
-               if (amountpipekva > maxpipekva / 2)
-                       pipe_size = SMALL_PIPE_SIZE;
-               else
-                       pipe_size = PIPE_SIZE;
-       }
-
        /*
-        * If it is advantageous to resize the pipe buffer, do
-        * so.
+        * need to allocate some storage... we delay the allocation
+        * until the first write on fd[0] to avoid allocating storage for both
+        * 'pipe ends'... most pipes are half-duplex with the writes targeting
+        * fd[1], so allocating space for both ends is a waste...
         */
-       if ((uio_resid(uio) > PIPE_SIZE) &&
-               (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
-               (amountpipekva < maxpipekva / 2) &&
-               (nbigpipe < LIMITBIGPIPES) &&
-#ifndef PIPE_NODIRECT
-               (wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
-#endif
-               (wpipe->pipe_buffer.cnt == 0)) {
 
-               pipe_size = BIG_PIPE_SIZE;
+       if ( wpipe->pipe_buffer.buffer == 0 || ( 
+               (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
+               amountpipekva < maxpipekva ) ) {
 
+               pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
        }
        if (pipe_size) {
                /*
                 * need to do initial allocation or resizing of pipe
+                * holding both structure and io locks. 
                 */
-               if ((error = pipelock(wpipe, 1)) == 0) {
-                       PIPE_UNLOCK(wpipe);
-                       if (pipespace(wpipe, pipe_size) == 0)
-                               OSAddAtomic(1, &nbigpipe);
-                       PIPE_LOCK(wpipe);
-                       pipeunlock(wpipe);
-
-                       if (wpipe->pipe_buffer.buffer == 0) {
-                               /*
-                                * initial allocation failed
-                                */
+               if ((error = pipeio_lock(wpipe, 1)) == 0) {
+                       if (wpipe->pipe_buffer.cnt == 0)                        
+                               error = pipespace(wpipe, pipe_size);
+                       else 
+                               error = expand_pipespace(wpipe, pipe_size);
+               
+                       pipeio_unlock(wpipe);
+                       
+                       /* allocation failed */
+                       if (wpipe->pipe_buffer.buffer == 0)
                                error = ENOMEM;
-                       }
                }
                if (error) {
                        /*
@@ -1187,91 +937,35 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
                        return(error);
                }
        }
-       // LP64todo - fix this!
-       orig_resid = uio_resid(uio);
 
        while (uio_resid(uio)) {
-               int space;
-
-#ifndef PIPE_NODIRECT
-               /*
-                * If the transfer is large, we can gain performance if
-                * we do process-to-process copies directly.
-                * If the write is non-blocking, we don't use the
-                * direct write mechanism.
-                *
-                * The direct write mechanism will detect the reader going
-                * away on us.
-                */
-               if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
-                   (fp->f_flag & FNONBLOCK) == 0 &&
-                   amountpipekvawired + uio_resid(uio) < maxpipekvawired) { 
-                       error = pipe_direct_write(wpipe, uio);
-                       if (error)
-                               break;
-                       continue;
-               }
 
-               /*
-                * Pipe buffered writes cannot be coincidental with
-                * direct writes.  We wait until the currently executing
-                * direct write is completed before we start filling the
-                * pipe buffer.  We break out if a signal occurs or the
-                * reader goes away.
-                */
        retrywrite:
-               while (wpipe->pipe_state & PIPE_DIRECTW) {
-                       if (wpipe->pipe_state & PIPE_WANTR) {
-                               wpipe->pipe_state &= ~PIPE_WANTR;
-                               wakeup(wpipe);
-                       }
-                       error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipbww", 0);
-
-                       if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))
-                               break;
-                       if (error)
-                               break;
-               }
-#else
-       retrywrite:
-#endif
                space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
 
-               /*
-                * Writes of size <= PIPE_BUF must be atomic.
-                */
+               /* Writes of size <= PIPE_BUF must be atomic. */
                if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
                        space = 0;
 
                if (space > 0) {
 
-                       if ((error = pipelock(wpipe,1)) == 0) {
+                       if ((error = pipeio_lock(wpipe,1)) == 0) {
                                int size;       /* Transfer size */
                                int segsize;    /* first segment to transfer */
 
                                if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
-                                       pipeunlock(wpipe);
+                                       pipeio_unlock(wpipe);
                                        error = EPIPE;
                                        break;
                                }
-#ifndef PIPE_NODIRECT
-                               /*
-                                * It is possible for a direct write to
-                                * slip in on us... handle it here...
-                                */
-                               if (wpipe->pipe_state & PIPE_DIRECTW) {
-                                       pipeunlock(wpipe);
-                                       goto retrywrite;
-                               }
-#endif
                                /* 
-                                * If a process blocked in pipelock, our
+                                * If a process blocked in pipeio_lock, our
                                 * value for space might be bad... the mutex
                                 * is dropped while we're blocked
                                 */
                                if (space > (int)(wpipe->pipe_buffer.size - 
                                    wpipe->pipe_buffer.cnt)) {
-                                       pipeunlock(wpipe);
+                                       pipeio_unlock(wpipe);
                                        goto retrywrite;
                                }
 
@@ -1307,7 +1001,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
                                        /* 
                                         * Transfer remaining part now, to
                                         * support atomic writes.  Wraparound
-                                        * happened.
+                                        * happened. (State 3)
                                         */
                                        if (wpipe->pipe_buffer.in + segsize != 
                                            wpipe->pipe_buffer.size)
@@ -1320,9 +1014,12 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
                                            size - segsize, uio);
                                        PIPE_LOCK(rpipe);
                                }
+                               /* 
+                                * readers never know to read until count is updated.
+                                */
                                if (error == 0) {
                                        wpipe->pipe_buffer.in += size;
-                                       if (wpipe->pipe_buffer.in >=
+                                       if (wpipe->pipe_buffer.in >
                                            wpipe->pipe_buffer.size) {
                                                if (wpipe->pipe_buffer.in !=
                                                    size - segsize +
@@ -1339,7 +1036,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
                                                panic("Pipe buffer overflow");
                                
                                }
-                               pipeunlock(wpipe);
+                               pipeio_unlock(wpipe);
                        }
                        if (error)
                                break;
@@ -1453,12 +1150,7 @@ pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
                return (0);
 
        case FIONREAD:
-#ifndef PIPE_NODIRECT
-               if (mpipe->pipe_state & PIPE_DIRECTW)
-                       *(int *)data = mpipe->pipe_map.cnt;
-               else
-#endif
-                       *(int *)data = mpipe->pipe_buffer.cnt;
+               *(int *)data = mpipe->pipe_buffer.cnt;
                PIPE_UNLOCK(mpipe);
                return (0);
 
@@ -1493,6 +1185,7 @@ pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
        PIPE_LOCK(rpipe);
 
        wpipe = rpipe->pipe_peer;
+       
 
 #if CONFIG_MACF
        /*
@@ -1524,7 +1217,7 @@ pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
                        wpipe->pipe_state |= PIPE_WSELECT;
                if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
                    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
-                    (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
+                    (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) > 0)) {
 
                        retnum = 1;
                } else {
@@ -1553,7 +1246,6 @@ pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
        cpipe = (struct pipe *)fg->fg_data;
        fg->fg_data = NULL;
        proc_fdunlock(vfs_context_proc(ctx));
-
        if (cpipe)
                pipeclose(cpipe);
 
@@ -1563,102 +1255,14 @@ pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
 static void
 pipe_free_kmem(struct pipe *cpipe)
 {
-
        if (cpipe->pipe_buffer.buffer != NULL) {
-               if (cpipe->pipe_buffer.size > PIPE_SIZE)
-                       OSAddAtomic(-1, &nbigpipe);
                OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
                OSAddAtomic(-1, &amountpipes);
-
-               kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer,
+               kfree((void *)cpipe->pipe_buffer.buffer,
                          cpipe->pipe_buffer.size);
                cpipe->pipe_buffer.buffer = NULL;
+               cpipe->pipe_buffer.size = 0;
        }
-#ifndef PIPE_NODIRECT
-       if (cpipe->pipe_map.kva != 0) {
-               atomic_subtract_int(&amountpipekvawired,
-                   cpipe->pipe_buffer.size + PAGE_SIZE);
-               kmem_free(kernel_map,
-                       cpipe->pipe_map.kva,
-                       cpipe->pipe_buffer.size + PAGE_SIZE);
-               cpipe->pipe_map.cnt = 0;
-               cpipe->pipe_map.kva = 0;
-               cpipe->pipe_map.pos = 0;
-               cpipe->pipe_map.npages = 0;
-       }
-#endif
-}
-
-/*
- * When a thread sets a write-select on a pipe, it creates an implicit,
- * untracked dependency between that thread and the peer of the pipe
- * on which the select is set.  If the peer pipe is closed and freed
- * before the select()ing thread wakes up, the system will panic as
- * it attempts to unwind the dangling select().  To avoid that panic,
- * we notice whenever a dangerous select() is set on a pipe, and
- * defer the final deletion of the pipe until that select()s are all
- * resolved.  Since we can't currently detect exactly when that
- * resolution happens, we use a simple garbage collection queue to 
- * reap the at-risk pipes 'later'.
- */
-static void
-pipe_garbage_collect(struct pipe *cpipe)
-{
-       uint64_t old, now;
-       struct pipe_garbage *pgp;
-
-       /* Convert msecs to nsecs and then to abstime */
-       old = pipe_garbage_age_limit * 1000000;
-       nanoseconds_to_absolutetime(old, &old);
-
-       lck_mtx_lock(pipe_garbage_lock);
-
-       /* Free anything that's been on the queue for <mumble> seconds */
-       now = mach_absolute_time();
-       old = now - old;
-       while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
-               pipe_garbage_head = pgp->pg_next;
-               if (pipe_garbage_head == NULL)
-                       pipe_garbage_tail = NULL;
-               pipe_garbage_count--;
-               zfree(pipe_zone, pgp->pg_pipe);
-               zfree(pipe_garbage_zone, pgp);
-       }
-
-       /* Add the new pipe (if any) to the tail of the garbage queue */
-       if (cpipe) {
-               cpipe->pipe_state = PIPE_DEAD;
-               pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
-               if (pgp == NULL) {
-                       /*
-                        * We're too low on memory to garbage collect the
-                        * pipe.  Freeing it runs the risk of panicing the
-                        * system.  All we can do is leak it and leave
-                        * a breadcrumb behind.  The good news, such as it
-                        * is, is that this will probably never happen.
-                        * We will probably hit the panic below first.
-                        */
-                       printf("Leaking pipe %p - no room left in the queue",
-                           cpipe);
-                       lck_mtx_unlock(pipe_garbage_lock);
-                       return;
-               }
-
-               pgp->pg_pipe = cpipe;
-               pgp->pg_timestamp = now;
-               pgp->pg_next = NULL;
-
-               if (pipe_garbage_tail)
-                       pipe_garbage_tail->pg_next = pgp;
-               pipe_garbage_tail = pgp;
-               if (pipe_garbage_head == NULL)
-                       pipe_garbage_head = pipe_garbage_tail;
-
-               if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
-                       panic("Length of pipe garbage queue exceeded %d",
-                           PIPE_GARBAGE_QUEUE_LIMIT);
-       }
-       lck_mtx_unlock(pipe_garbage_lock);
 }
 
 /*
@@ -1671,7 +1275,6 @@ pipeclose(struct pipe *cpipe)
 
        if (cpipe == NULL)
                return;
-
        /* partially created pipes won't have a valid mutex. */
        if (PIPE_MTX(cpipe) != NULL)
                PIPE_LOCK(cpipe);
@@ -1745,6 +1348,7 @@ pipeclose(struct pipe *cpipe)
                zfree(pipe_zone, cpipe);
                pipe_garbage_collect(NULL);
        }
+
 }
 
 /*ARGSUSED*/
@@ -1838,11 +1442,6 @@ filt_piperead(struct knote *kn, long hint)
 
        wpipe = rpipe->pipe_peer;
        kn->kn_data = rpipe->pipe_buffer.cnt;
-
-#ifndef PIPE_NODIRECT
-       if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
-               kn->kn_data = rpipe->pipe_map.cnt;
-#endif
        if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
            (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
                kn->kn_flags |= EV_EOF;
@@ -1850,8 +1449,8 @@ filt_piperead(struct knote *kn, long hint)
        } else {
                int64_t lowwat = 1;
                if (kn->kn_sfflags & NOTE_LOWAT) {
-                       if (rpipe->pipe_buffer.size && kn->kn_sdata > rpipe->pipe_buffer.size)
-                               lowwat = rpipe->pipe_buffer.size;
+                       if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe))
+                               lowwat = MAX_PIPESIZE(rpipe);
                        else if (kn->kn_sdata > lowwat)
                                lowwat = kn->kn_sdata;
                }
@@ -1890,18 +1489,12 @@ filt_pipewrite(struct knote *kn, long hint)
                        PIPE_UNLOCK(rpipe);
                return (1);
        }
-       kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
-       if (!kn->kn_data && wpipe->pipe_buffer.size == 0)
-               kn->kn_data = PIPE_BUF; /* unwritten pipe is ready for write */
+       kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt;
 
-#ifndef PIPE_NODIRECT
-       if (wpipe->pipe_state & PIPE_DIRECTW)
-               kn->kn_data = 0;
-#endif
        int64_t lowwat = PIPE_BUF;
        if (kn->kn_sfflags & NOTE_LOWAT) {
-               if (wpipe->pipe_buffer.size && kn->kn_sdata > wpipe->pipe_buffer.size)
-                       lowwat = wpipe->pipe_buffer.size;
+               if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe))
+                       lowwat = MAX_PIPESIZE(wpipe);
                else if (kn->kn_sdata > lowwat)
                        lowwat = kn->kn_sdata;
        }
@@ -1942,13 +1535,13 @@ fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
                        /*
                         * the peer still exists, use it's info
                         */
-                       pipe_size  = cpipe->pipe_peer->pipe_buffer.size;
+                       pipe_size  = MAX_PIPESIZE(cpipe->pipe_peer);
                        pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
                } else {
                        pipe_count = 0;
                }
        } else {
-               pipe_size  = cpipe->pipe_buffer.size;
+               pipe_size  = MAX_PIPESIZE(cpipe);
                pipe_count = cpipe->pipe_buffer.cnt;
        }
        /*
@@ -2024,6 +1617,75 @@ pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
 }
 
 
+ /*
+ * When a thread sets a write-select on a pipe, it creates an implicit,
+ * untracked dependency between that thread and the peer of the pipe
+ * on which the select is set.  If the peer pipe is closed and freed
+ * before the select()ing thread wakes up, the system will panic as
+ * it attempts to unwind the dangling select().  To avoid that panic,
+ * we notice whenever a dangerous select() is set on a pipe, and
+ * defer the final deletion of the pipe until that select()s are all
+ * resolved.  Since we can't currently detect exactly when that
+ * resolution happens, we use a simple garbage collection queue to 
+ * reap the at-risk pipes 'later'.
+ */
+static void
+pipe_garbage_collect(struct pipe *cpipe)
+{
+       uint64_t old, now;
+       struct pipe_garbage *pgp;
+
+       /* Convert msecs to nsecs and then to abstime */
+       old = pipe_garbage_age_limit * 1000000;
+       nanoseconds_to_absolutetime(old, &old);
+
+       lck_mtx_lock(pipe_garbage_lock);
+
+       /* Free anything that's been on the queue for <mumble> seconds */
+       now = mach_absolute_time();
+       old = now - old;
+       while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
+               pipe_garbage_head = pgp->pg_next;
+               if (pipe_garbage_head == NULL)
+                       pipe_garbage_tail = NULL;
+               pipe_garbage_count--;
+               zfree(pipe_zone, pgp->pg_pipe);
+               zfree(pipe_garbage_zone, pgp);
+       }
+
+       /* Add the new pipe (if any) to the tail of the garbage queue */
+       if (cpipe) {
+               cpipe->pipe_state = PIPE_DEAD;
+               pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
+               if (pgp == NULL) {
+                       /*
+                        * We're too low on memory to garbage collect the
+                        * pipe.  Freeing it runs the risk of panicing the
+                        * system.  All we can do is leak it and leave
+                        * a breadcrumb behind.  The good news, such as it
+                        * is, is that this will probably never happen.
+                        * We will probably hit the panic below first.
+                        */
+                       printf("Leaking pipe %p - no room left in the queue",
+                           cpipe);
+                       lck_mtx_unlock(pipe_garbage_lock);
+                       return;
+               }
+
+               pgp->pg_pipe = cpipe;
+               pgp->pg_timestamp = now;
+               pgp->pg_next = NULL;
 
+               if (pipe_garbage_tail)
+                       pipe_garbage_tail->pg_next = pgp;
+               pipe_garbage_tail = pgp;
+               if (pipe_garbage_head == NULL)
+                       pipe_garbage_head = pipe_garbage_tail;
 
+               if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
+                       panic("Length of pipe garbage queue exceeded %d",
+                           PIPE_GARBAGE_QUEUE_LIMIT);
+       }
+       lck_mtx_unlock(pipe_garbage_lock);
+}
 
index 431e476587f13bc34e3e50dfd793e5bd8c7ac578..d06b9cb9c88590f5abfedb577419ad244a9fddc7 100644 (file)
@@ -189,6 +189,7 @@ soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 {
        int error = 0;
        int dropsockref = -1;
+       int int_arg;
 
        socket_lock(so, 1);
 
@@ -201,16 +202,18 @@ soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 
        switch (cmd) {
 
-       case FIONBIO:
-               if (*(int *)data)
+       case FIONBIO:                   /* int */
+               bcopy(data, &int_arg, sizeof (int_arg));
+               if (int_arg)
                        so->so_state |= SS_NBIO;
                else
                        so->so_state &= ~SS_NBIO;
 
                goto out;
 
-       case FIOASYNC:
-               if (*(int *)data) {
+       case FIOASYNC:                  /* int */
+               bcopy(data, &int_arg, sizeof (int_arg));
+               if (int_arg) {
                        so->so_state |= SS_ASYNC;
                        so->so_rcv.sb_flags |= SB_ASYNC;
                        so->so_snd.sb_flags |= SB_ASYNC;
@@ -221,29 +224,32 @@ soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                }
                goto out;
 
-       case FIONREAD:
-               *(int *)data = so->so_rcv.sb_cc;
+       case FIONREAD:                  /* int */
+               bcopy(&so->so_rcv.sb_cc, data, sizeof (u_int32_t));
                goto out;
 
-       case SIOCSPGRP:
-               so->so_pgid = *(int *)data;
+       case SIOCSPGRP:                 /* int */
+               bcopy(data, &so->so_pgid, sizeof (pid_t));
                goto out;
 
-       case SIOCGPGRP:
-               *(int *)data = so->so_pgid;
+       case SIOCGPGRP:                 /* int */
+               bcopy(&so->so_pgid, data, sizeof (pid_t));
                goto out;
 
-       case SIOCATMARK:
-               *(int *)data = (so->so_state&SS_RCVATMARK) != 0;
+       case SIOCATMARK:                /* int */
+               int_arg = (so->so_state & SS_RCVATMARK) != 0;
+               bcopy(&int_arg, data, sizeof (int_arg));
                goto out;
 
-       case SIOCSETOT: {
+       case SIOCSETOT: {               /* int */
                /*
                 * Set socket level options here and then call protocol
                 * specific routine.
                 */
                struct socket *cloned_so = NULL;
-               int cloned_fd = *(int *)data;
+               int cloned_fd;
+
+               bcopy(data, &cloned_fd, sizeof (cloned_fd));
 
                /* let's make sure it's either -1 or a valid file descriptor */
                if (cloned_fd != -1) {
@@ -441,8 +447,8 @@ soo_stat(struct socket *so, void *ub, int isstat64)
                if ((so->so_state & SS_CANTSENDMORE) == 0)
                        sb64->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
                sb64->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
-               sb64->st_uid = so->so_uid;
-               sb64->st_gid = so->so_gid;
+               sb64->st_uid = kauth_cred_getuid(so->so_cred);
+               sb64->st_gid = kauth_cred_getgid(so->so_cred);
        } else {
                sb->st_mode = S_IFSOCK;
                if ((so->so_state & SS_CANTRCVMORE) == 0 ||
@@ -451,8 +457,8 @@ soo_stat(struct socket *so, void *ub, int isstat64)
                if ((so->so_state & SS_CANTSENDMORE) == 0)
                        sb->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
                sb->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
-               sb->st_uid = so->so_uid;
-               sb->st_gid = so->so_gid;
+               sb->st_uid = kauth_cred_getuid(so->so_cred);
+               sb->st_gid = kauth_cred_getgid(so->so_cred);
        }
 
        ret = (*so->so_proto->pr_usrreqs->pru_sense)(so, ub, isstat64);
@@ -489,6 +495,7 @@ soo_drain(struct fileproc *fp, __unused vfs_context_t ctx)
                wakeup((caddr_t)&so->so_timeo);
                sorwakeup(so);
                sowwakeup(so);
+               soevent(so, SO_FILT_HINT_LOCKED);
 
                socket_unlock(so, 1);
        }
index 009dd377b5e7fe3682e592170ee97a1ae4f952ce..0a258380495d40d57ace83bb8e1ca123891f0f19 100644 (file)
@@ -91,7 +91,7 @@
 41     AUE_DUP         ALL     { int dup(u_int fd); } 
 42     AUE_PIPE        ALL     { int pipe(void); } 
 43     AUE_GETEGID     ALL     { int getegid(void); } 
-44     AUE_PROFILE     ALL     { int profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } 
+44     AUE_NULL        ALL     { int nosys(void); } { old profil }
 45     AUE_NULL        ALL     { int nosys(void); } { old ktrace }
 46     AUE_SIGACTION   ALL     { int sigaction(int signum, struct __sigaction *nsa, struct sigaction *osa) NO_SYSCALL_STUB; } 
 47     AUE_GETGID      ALL     { int getgid(void); } 
 167    AUE_MOUNT       ALL     { int mount(char *type, char *path, int flags, caddr_t data); } 
 168    AUE_NULL        ALL     { int nosys(void); }   { old ustat }
 169    AUE_CSOPS       ALL     { int csops(pid_t pid, uint32_t ops, user_addr_t useraddr, user_size_t usersize); } 
-170    AUE_NULL        HN      { int nosys(void); }   { old table }
+170    AUE_CSOPS       ALL     { int csops_audittoken(pid_t pid, uint32_t ops, user_addr_t useraddr, user_size_t usersize, user_addr_t uaudittoken); } 
 171    AUE_NULL        ALL     { int nosys(void); }   { old wait3 }
 172    AUE_NULL        ALL     { int nosys(void); }   { old rpause     }       
 173    AUE_WAITID      ALL     { int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); } 
 174    AUE_NULL        ALL     { int nosys(void); }   { old getdents }
 175    AUE_NULL        ALL     { int nosys(void); }   { old gc_control }
-176    AUE_ADDPROFILE  ALL     { int add_profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } 
+176    AUE_NULL        ALL     { int nosys(void); }   { old add_profil }
 177    AUE_NULL        ALL     { int nosys(void); } 
 178    AUE_NULL        ALL     { int nosys(void); } 
 179    AUE_NULL        ALL     { int nosys(void); } 
 ; to HFS semantics, they are not specific to the HFS filesystem.
 ; We expect all filesystems to recognize the call and report that it is
 ; not supported or to actually implement it.
-216    AUE_MKCOMPLEX   UHN     { int mkcomplex(const char *path, mode_t mode, u_long type); }  { soon to be obsolete }
-217    AUE_STATV       UHN     { int statv(const char *path, struct vstat *vsb); }     { soon to be obsolete }
-218    AUE_LSTATV      UHN     { int lstatv(const char *path, struct vstat *vsb); }    { soon to be obsolete }
-219    AUE_FSTATV      UHN     { int fstatv(int fd, struct vstat *vsb); }      { soon to be obsolete }
+
+; 216-> 219 used to be mkcomplex and {f,l}statv variants. They are gone now.
+216    AUE_NULL        ALL     { int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode) NO_SYSCALL_STUB; }
+217    AUE_NULL        ALL     { int nosys(void); }
+218    AUE_NULL        ALL     { int nosys(void); }
+219    AUE_NULL        ALL     { int nosys(void); }
 220    AUE_GETATTRLIST ALL     { int getattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options) NO_SYSCALL_STUB; } 
 221    AUE_SETATTRLIST ALL     { int setattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options) NO_SYSCALL_STUB; } 
 222    AUE_GETDIRENTRIESATTR   ALL     { int getdirentriesattr(int fd, struct attrlist *alist, void *buffer, size_t buffersize, u_long *count, u_long *basep, u_long *newstate, u_long options); } 
 #endif
 266    AUE_SHMOPEN     ALL     { int shm_open(const char *name, int oflag, int mode); } 
 267    AUE_SHMUNLINK   ALL     { int shm_unlink(const char *name); } 
-268    AUE_SEMOPEN     ALL     { user_addr_t sem_open(const char *name, int oflag, int mode, int value); } 
+268    AUE_SEMOPEN     ALL     { user_addr_t sem_open(const char *name, int oflag, int mode, int value) NO_SYSCALL_STUB; } 
 269    AUE_SEMCLOSE    ALL     { int sem_close(sem_t *sem); } 
 270    AUE_SEMUNLINK   ALL     { int sem_unlink(const char *name); } 
 271    AUE_SEMWAIT     ALL     { int sem_wait(sem_t *sem); } 
 290    AUE_GETWGROUPS  ALL     { int getwgroups(user_addr_t setlen, user_addr_t guidset) NO_SYSCALL_STUB; }
 291    AUE_MKFIFO_EXTENDED     ALL     { int mkfifo_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } 
 292    AUE_MKDIR_EXTENDED      ALL     { int mkdir_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } 
+#if CONFIG_EXT_RESOLVER
 293    AUE_IDENTITYSVC ALL     { int identitysvc(int opcode, user_addr_t message) NO_SYSCALL_STUB; } 
+#else
+293    AUE_NULL        ALL     { int nosys(void); } 
+#endif
 294    AUE_NULL        ALL     { int shared_region_check_np(uint64_t *start_address) NO_SYSCALL_STUB; }
 295    AUE_NULL        ALL     { int nosys(void); } { old shared_region_map_np }
 296    AUE_NULL        ALL     { int vm_pressure_monitor(int wait_for_pressure, int nsecs_monitored, uint32_t *pages_reclaimed); }
 352    AUE_NULL        ALL     { int nosys(void); } 
 353    AUE_GETAUID     ALL     { int getauid(au_id_t *auid); } 
 354    AUE_SETAUID     ALL     { int setauid(au_id_t *auid); } 
-355    AUE_GETAUDIT    ALL     { int getaudit(struct auditinfo *auditinfo); } 
-356    AUE_SETAUDIT    ALL     { int setaudit(struct auditinfo *auditinfo); } 
+355    AUE_NULL        ALL     { int nosys(void); }    { old getaudit }
+356    AUE_NULL        ALL     { int nosys(void); }    { old setaudit }
 357    AUE_GETAUDIT_ADDR       ALL     { int getaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } 
 358    AUE_SETAUDIT_ADDR       ALL     { int setaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } 
 359    AUE_AUDITCTL    ALL     { int auditctl(char *path); } 
 371     AUE_NULL        ALL     { int nosys(void); }   { old __semwait_signal }
 #endif
 372    AUE_NULL        ALL     { uint64_t thread_selfid (void) NO_SYSCALL_STUB; } 
-373    AUE_NULL        ALL     { int nosys(void); } 
+373    AUE_LEDGER      ALL     { int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3); } 
 374    AUE_NULL        ALL     { int nosys(void); } 
 375    AUE_NULL        ALL     { int nosys(void); } 
 376    AUE_NULL        ALL     { int nosys(void); } 
 435    AUE_NULL        ALL     { int pid_hibernate(int pid); }
 436    AUE_NULL        ALL     { int pid_shutdown_sockets(int pid, int level); }
 #else
-435    AUE_NULL        ALL     { int nosys(void); } 
+435    AUE_NULL        ALL     { int nosys(void); }
 436    AUE_NULL        ALL     { int nosys(void); }
 #endif
 437    AUE_NULL        ALL     { int nosys(void); } { old shared_region_slide_np }
 438    AUE_NULL        ALL     { int shared_region_map_and_slide_np(int fd, uint32_t count, const struct shared_file_mapping_np *mappings, uint32_t slide, uint64_t* slide_start, uint32_t slide_size) NO_SYSCALL_STUB; }
-
+439    AUE_NULL        ALL     { int kas_info(int selector, void *value, size_t *size); }
index ed43ec893ab289f8bb21494ee1abec7bfbd5525b..f172333ef2c950eb935e55d4feaae82dffb87c31 100644 (file)
@@ -1044,7 +1044,7 @@ semop(struct proc *p, struct semop_args *uap, int32_t *retval)
 {
        int semid = uap->semid;
        int nsops = uap->nsops;
-       struct sembuf sops[MAX_SOPS];
+       struct sembuf sops[seminfo.semopm];
        register struct semid_kernel *semakptr;
        register struct sembuf *sopptr = NULL;  /* protected by 'semptr' */
        register struct sem *semptr = NULL;     /* protected by 'if' */
@@ -1084,14 +1084,15 @@ semop(struct proc *p, struct semop_args *uap, int32_t *retval)
                goto semopout;
        }
 
-       if (nsops < 0 || nsops > MAX_SOPS) {
+       if (nsops < 0 || nsops > seminfo.semopm) {
 #ifdef SEM_DEBUG
-               printf("too many sops (max=%d, nsops=%d)\n", MAX_SOPS, nsops);
+               printf("too many sops (max=%d, nsops=%d)\n",
+                   seminfo.semopm, nsops);
 #endif
                eval = E2BIG;
                goto semopout;
        }
-
+       
        /*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
        if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
 #ifdef SEM_DEBUG
index fbc026fb235ee58c1ae60b2a589adf4dc3b8b97f..fd215a76aa8c12fba2aa48e555a99ae87971d502 100644 (file)
 0x10c001c      MSC_kern_invalid_#7
 0x10c0020      MSC_kern_invalid_#8
 0x10c0024      MSC_kern_invalid_#9
-0x10c0028      MSC_kern_invalid_#10
-0x10c002c      MSC_kern_invalid_#11
-0x10c0030      MSC_kern_invalid_#12
-0x10c0034      MSC_kern_invalid_#13
-0x10c0038      MSC_kern_invalid_#14
-0x10c003c      MSC_kern_invalid_#15
-0x10c0040      MSC_kern_invalid_#16
-0x10c0044      MSC_kern_invalid_#17
-0x10c0048      MSC_kern_invalid_#18
-0x10c004c      MSC_kern_invalid_#19
-0x10c0050      MSC_kern_invalid_#20
-0x10c0054      MSC_kern_invalid_#21
-0x10c0058      MSC_kern_invalid_#22
-0x10c005c      MSC_kern_invalid_#23
+0x10c0028      MSC_mach_vm_allocate_trap
+0x10c0030      MSC_mach_vm_deallocate_trap
+0x10c0038      MSC_mach_vm_protect_trap
+0x10c0040      MSC_mach_port_allocate_trap
+0x10c0044      MSC_mach_port_destroy_trap
+0x10c0048      MSC_mach_port_deallocate_trap
+0x10c004c      MSC_mach_port_mod_refs_trap
+0x10c0050      MSC_mach_port_move_member_trap
+0x10c0054      MSC_mach_port_insert_right_trap
+0x10c0058      MSC_mach_port_insert_member_trap
+0x10c005c      MSC_mach_port_extract_member_trap
 0x10c0060      MSC_kern_invalid_#24
 0x10c0064      MSC_kern_invalid_#25
 0x10c0068      MSC_mach_reply_port
 0x1300104      MACH_purgable_token_delete
 0x1300108      MACH_purgable_token_ripened
 0x130010c      MACH_purgable_token_purged
+0x1300120      MACH_purgable_object_add
+0x1300124      MACH_purgable_object_remove
+0x1300128      MACH_purgable_object_purge
+0x130012c      MACH_purgable_object_purge_all
 0x1300400      MACH_vm_check_zf_delay
 0x1300404      MACH_vm_cow_delay
 0x1300408      MACH_vm_zf_delay
 0x1400024      MACH_IDLE
 0x1400028      MACH_STACK_DEPTH
 0x140002c      MACH_MOVED
-0x1400030       MACH_FAIRSHARE_ENTER
-0x1400034       MACH_FAIRSHARE_EXIT
+0x1400030      MACH_FAIRSHARE_ENTER
+0x1400034      MACH_FAIRSHARE_EXIT
 0x1400038      MACH_FAILSAFE
-0x1400040      MACH_STKHANDOFF_BT
+0x140003C      MACH_BLOCK
+0x1400040      MACH_WAIT
 0x1400044      MACH_SCHED_BT
 0x1400048      MACH_IDLE_BT
 0x1400050      MACH_SCHED_GET_URGENCY
 0x3020154      P_PgOutAsyncPDone
 0x3020158      P_PgInAsyncP
 0x302015C      P_PgInAsyncPDone
+0x3020200       P_WrDataN
+0x3020208       P_RdDataN
+0x3020210       P_WrDataAsyncN
+0x3020218       P_RdDataAsyncN
+0x3020204       P_WrDataNDone
+0x302020C       P_RdDataNDone
+0x3020214       P_WrDataAsyncNDone
+0x302021C       P_RdDataAsyncNDone
+0x3020280       P_WrDataNT
+0x3020288       P_RdDataNT
+0x3020290       P_WrDataAsyncNT
+0x3020298       P_RdDataAsyncNT
+0x3020284       P_WrDataNTDone
+0x302028C       P_RdDataNTDone
+0x3020294       P_WrDataAsyncNTDone
+0x302029C       P_RdDataAsyncNTDone
+0x3020300       P_WrDataNP
+0x3020308       P_RdDataNP
+0x3020310       P_WrDataAsyncNP
+0x3020318       P_RdDataAsyncNP
+0x3020304       P_WrDataNPDone
+0x302030C       P_RdDataNPDone
+0x3020314       P_WrDataAsyncNPDone
+0x302031C       P_RdDataAsyncNPDone
 0x3050004      journal_flush
+0x3060000      SPEC_ioctl
+0x3060004      SPEC_trim_extent
 0x3070004      BootCache_tag
 0x3070008      BootCache_batch
 0x4010004      proc_exit
 0x40c0354      BSC_#213
 0x40c0358      BSC_#214
 0x40c035c      BSC_#215
-0x40c0360      BSC_mkcomplex
-0x40c0364      BSC_statv
-0x40c0368      BSC_lstatv
-0x40c036c      BSC_fstatv
+0x40c0360      BSC_obs_mkcomplex
+0x40c0364      BSC_obs_statv
+0x40c0368      BSC_obs_lstatv
+0x40c036c      BSC_obs_fstatv
 0x40c0370      BSC_getattrlist
 0x40c0374      BSC_setattrlist
 0x40c0378      BSC_getdirentriesattr
 0x40c05c8      BSC_obs_semwait_signal
 0x40c05cc      BSC_obs_semwait_signal_nocancel
 0x40c05d0      BSC_thread_selfid
-0x40c05d4      BSC_#373
+0x40c05d4      BSC_ledger
 0x40c05d8      BSC_#374
 0x40c05dc      BSC_#375
 0x40c05e0      BSC_#376
 0x40c06cc      BSC_pid_hibernate
 0x40c06d0      BSC_pid_shutdown_sockets
 0x40c06d4      BSC_shared_region_slide_np
-0x40c06fc      BSC_shared_region_map_and_slide_np
+0x40c06d8      BSC_shared_region_map_and_slide_np
+0x40c06dc      BSC_kas_info
 0x40e0104      BSC_msync_extended_info
 0x40e0264      BSC_pread_extended_info
 0x40e0268      BSC_pwrite_extended_info
 0x5230030      HID_DispatchKeyboard
 0x5230034      HID_EjectCallback
 0x5230038      HID_CapsCallback
-0x523003c      HID_#3c
-0x523004c      HID_#4c
+0x523003c      HID_HandleReport
+0x5230040      HID_DispatchTabletPointer
+0x5230044      HID_DispatchTabletProx
+0x5230048      HID_DispatchHIDEvent
+0x523004c      HID_CalculateCapsDelay
+0x5230050      HID_Invalid
 0x5310004      CPUPM_PSTATE
 0x5310008      CPUPM_IDLE_CSTATE
 0x531000c      CPUPM_IDLE_HALT
 0x2200002c     LAUNCHD_bsd_kevent
 0x22000030     LAUNCHD_vproc_trans_incr
 0x22000034     LAUNCHD_vproc_trans_decr
+0x25000000     PERF_Event
+0x25010000     PERF_THD_Sample
+0x25010004     PERF_THD_Data
+0x25010008     PERF_THD_XSample
+0x2501000c     PERF_THD_XPend
+0x25010010     PERF_THD_XData
+0x25020000     PERF_STK_KSample
+0x25020004     PERF_STK_USched
+0x25020008     PERF_STK_USample
+0x2502000c     PERF_STK_KData
+0x25020010     PERF_STK_UData
+0x25030000     PERF_TMR_AllSched
+0x25030004     PERF_TMR_Schedule
+0x25030008     PERF_TMR_Handler
+0x25040000     PERF_ATS_Thread
+0x25040004     PERF_ATS_Error
+0x25040008     PERF_ATS_Run
+0x2504000c     PERF_ATS_Pause
+0x25040010     PERF_ATS_Idle
+0x25040014     PERF_ATS_Sample
+0x25050000     PERF_AST_Handler
+0x25050004     PERF_AST_Error
 0xff000104     MSG_mach_notify_port_deleted
 0xff000114     MSG_mach_notify_port_destroyed
 0xff000118     MSG_mach_notify_no_senders
 0xff002c40     MSG_io_service_wait_quiet
 0xff002c44     MSG_io_registry_entry_create_iterator
 0xff002c48     MSG_io_iterator_is_valid
-0xff002c4c     MSG_io_make_matching
 0xff002c50     MSG_io_catalog_send_data
 0xff002c54     MSG_io_catalog_terminate
 0xff002c58     MSG_io_catalog_get_data
index b97eb780e36ea557e667a64c90a19e204a4ad06d..4d7c5b9fa7f8fa06435db603030a63f878bdf5ac 100644 (file)
@@ -142,6 +142,9 @@ static void ttyunblock(struct tty *tp);
 static int     ttywflush(struct tty *tp);
 static int     proc_compare(proc_t p1, proc_t p2);
 
+static void    ttyhold(struct tty *tp);
+static void    ttydeallocate(struct tty *tp);
+
 static int isctty(proc_t p, struct tty  *tp);
 static int isctty_sp(proc_t p, struct tty  *tp, struct session *sessp);
 
@@ -339,8 +342,9 @@ int
 ttyopen(dev_t device, struct tty *tp)
 {
        proc_t p = current_proc();
-       struct pgrp * pg, * oldpg;
+       struct pgrp *pg, *oldpg;
        struct session *sessp, *oldsess;
+       struct tty *oldtp;
 
        TTY_LOCK_OWNED(tp);     /* debug assert */
 
@@ -359,15 +363,15 @@ ttyopen(dev_t device, struct tty *tp)
        /*
         * First tty open affter setsid() call makes this tty its controlling
         * tty, if the tty does not already have a session associated with it.
-        * Only do this if the process
         */
-       if (SESS_LEADER(p, sessp) &&                    /* process is session leader */
+       if (SESS_LEADER(p, sessp) &&    /* the process is the session leader */
            sessp->s_ttyvp == NULL &&   /* but has no controlling tty */
-           tp->t_session == NULL ) {           /* and tty not controlling */
+           tp->t_session == NULL ) {   /* and tty not controlling */
                session_lock(sessp);
                if ((sessp->s_flags & S_NOCTTY) == 0) { /* and no O_NOCTTY */
-                       /* Hold on to the reference */
-                       sessp->s_ttyp = tp;     /* XXX NOT A REFERENCE */
+                       oldtp = sessp->s_ttyp;
+                       ttyhold(tp);
+                       sessp->s_ttyp = tp;
                        OSBitOrAtomic(P_CONTROLT, &p->p_flag);
                        session_unlock(sessp);
                        proc_list_lock();
@@ -385,6 +389,8 @@ ttyopen(dev_t device, struct tty *tp)
                                pg_rele(oldpg);
                        if (oldsess != SESSION_NULL)
                                session_rele(oldsess);  
+                       if (NULL != oldtp)
+                               ttyfree(oldtp);
                        tty_lock(tp);
                        goto out;
                }
@@ -1047,8 +1053,9 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p)
 {
        int error = 0;
        struct uthread *ut;
-       struct pgrp * pg, *oldpg;
-       struct session *sessp, * oldsessp;
+       struct pgrp *pg, *oldpg;
+       struct session *sessp, *oldsessp;
+       struct tty *oldtp;
 
        TTY_LOCK_OWNED(tp);     /* debug assert */
 
@@ -1404,7 +1411,9 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p)
                tp->t_pgrp = pg;
                proc_list_unlock();
                session_lock(sessp);
-               sessp->s_ttyp = tp;     /* XXX NOT A REFERENCE */
+               oldtp = sessp->s_ttyp;
+               ttyhold(tp);
+               sessp->s_ttyp = tp;
                session_unlock(sessp);
                OSBitOrAtomic(P_CONTROLT, &p->p_flag);
                /* SAFE: All callers drop the lock on return */
@@ -1414,6 +1423,8 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p)
                        session_rele(oldsessp);
                if (oldpg != PGRP_NULL)
                        pg_rele(oldpg);
+               if (NULL != oldtp)
+                       ttyfree(oldtp);
                tty_lock(tp);
                break;
 
@@ -3038,19 +3049,48 @@ ttymalloc(void)
                lck_mtx_init(&tp->t_lock, tty_lck_grp, tty_lck_attr);
                klist_init(&tp->t_rsel.si_note);
                klist_init(&tp->t_wsel.si_note);
+               tp->t_refcnt = 1;
        }
-       return(tp);
+       return (tp);
 }
 
+/*
+ * Increment the reference count on a tty.
+ */
+static void
+ttyhold(struct tty *tp)
+{
+       TTY_LOCK_OWNED(tp);
+       tp->t_refcnt++;
+}
 
 /*
- * Free a tty structure and its buffers.
- *
- * Locks:      The tty_lock() is assumed to not be held at the time of
- *             the free; this functions destroys the mutex.
+ * Drops a reference count on a tty structure; if the reference count reaches
+ * zero, then also frees the structure and associated buffers.
  */
 void
 ttyfree(struct tty *tp)
+{
+       TTY_LOCK_NOTOWNED(tp);
+
+       tty_lock(tp);
+       if (--tp->t_refcnt == 0) {
+               tty_unlock(tp);
+               ttydeallocate(tp);
+       } else if (tp->t_refcnt < 0) {
+               panic("%s: freeing free tty %p", __func__, tp);
+       } else
+               tty_unlock(tp);
+}
+
+/*
+ * Deallocate a tty structure and its buffers.
+ *
+ * Locks:      The tty_lock() is assumed to not be held at the time of
+ *             the free; this function destroys the mutex.
+ */
+static void
+ttydeallocate(struct tty *tp)
 {
        TTY_LOCK_NOTOWNED(tp);  /* debug assert */
 
@@ -3097,4 +3137,3 @@ isctty_sp(proc_t p, struct tty  *tp, struct session *sessp)
        return(sessp == tp->t_session && p->p_flag & P_CONTROLT);
 
 }
-
index ec7bee44581c5c532a900866909200d716b5fe66..9cb8339bf59943b261f8ce4c8de547fc6589c21d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1997-2012 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,7 @@
 /* Forward declarations for cdevsw[] entry */
 /* XXX we should consider making these static */
 int cttyopen(dev_t dev, int flag, int mode, proc_t p);
+int cttyclose(dev_t dev, int flag, int mode, proc_t p);
 int cttyread(dev_t dev, struct uio *uio, int flag);
 int cttywrite(dev_t dev, struct uio *uio, int flag);
 int cttyioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, proc_t p);
@@ -85,31 +86,65 @@ static vnode_t cttyvp(proc_t p);
 int
 cttyopen(dev_t dev, int flag, __unused int mode, proc_t p)
 {
-       vnode_t ttyvp = cttyvp(p);
-       struct vfs_context context;
+       vnode_t ttyvp;
        int error;
 
-       if (ttyvp == NULL)
-               return (ENXIO);
-
-       context.vc_thread = current_thread();
-       context.vc_ucred = kauth_cred_proc_ref(p);
-
        /*
         * A little hack--this device, used by many processes,
-        * happens to do an open on another device, which can 
-        * cause unhappiness if the second-level open blocks indefinitely 
-        * (as could be the case if the master side has hung up).  Since
-        * we know that this driver doesn't care about the serializing
-        * opens and closes, we can drop the lock.
+        * does an open on another device, which can cause unhappiness
+        * if the second-level open blocks indefinitely (e.g. if the
+        * master side has hung up).  This driver doesn't care
+        * about serializing opens and closes, so drop the lock.
         */
        devsw_unlock(dev, S_IFCHR);
-       error = VNOP_OPEN(ttyvp, flag, &context);
+
+       if ((ttyvp = cttyvp(p)) == NULL) {
+               error = ENXIO;
+       } else {
+               struct vfs_context context;
+
+               context.vc_thread = current_thread();
+               context.vc_ucred = kauth_cred_proc_ref(p);
+
+               error = VNOP_OPEN(ttyvp, flag, &context);
+
+               kauth_cred_unref(&context.vc_ucred);
+               vnode_put(ttyvp);
+       }
+
        devsw_lock(dev, S_IFCHR);
+       return (error);
+}
 
-       vnode_put(ttyvp);
-       kauth_cred_unref(&context.vc_ucred);
+/*
+ * This driver is marked D_TRACKCLOSE and so gets a close
+ * for every open so that ttyvp->v_specinfo->si_count can be kept sane.
+ */
+int
+cttyclose(dev_t dev, int flag, __unused int mode, proc_t p)
+{
+       vnode_t ttyvp;
+       int error;
+
+       /* See locking commentary above. */
+
+       devsw_unlock(dev, S_IFCHR);
+
+       if ((ttyvp = cttyvp(p)) == NULL) {
+               error = ENXIO;
+       } else {
+               struct vfs_context context;
+
+               context.vc_thread = current_thread();
+               context.vc_ucred = kauth_cred_proc_ref(p);
+
+               error = VNOP_CLOSE(ttyvp, flag, &context);
 
+               kauth_cred_unref(&context.vc_ucred);
+               vnode_put(ttyvp);
+       }
+
+       devsw_lock(dev, S_IFCHR);
        return (error);
 }
 
index c7661e41b1df6048a9d28a42a4d85939c9fd531e..c89ea82abdf6f079e8f1eb6b6cee25d5104c61bd 100644 (file)
@@ -1984,9 +1984,20 @@ ubc_create_upl(
                        uplflags |= UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
                                     UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE;
                } else {
-                       uplflags |= UPL_RET_ONLY_ABSENT | UPL_NOBLOCK |
+                       uplflags |= UPL_RET_ONLY_ABSENT |
                                    UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
                                    UPL_SET_INTERNAL | UPL_SET_LITE;
+
+                       /*
+                        * if the requested size == PAGE_SIZE, we don't want to set
+                        * the UPL_NOBLOCK since we may be trying to recover from a
+                        * previous partial pagein I/O that occurred because we were low
+                        * on memory and bailed early in order to honor the UPL_NOBLOCK...
+                        * since we're only asking for a single page, we can block w/o fear
+                        * of tying up pages while waiting for more to become available
+                        */
+                       if (bufsize > PAGE_SIZE)
+                               uplflags |= UPL_NOBLOCK;
                }
        } else {
                uplflags &= ~UPL_FOR_PAGEOUT;
@@ -2344,6 +2355,16 @@ UBCINFOEXISTS(struct vnode * vp)
 }
 
 
+void
+ubc_upl_range_needed(
+       upl_t           upl,
+       int             index,
+       int             count)
+{
+       upl_range_needed(upl, index, count);
+}
+
+
 /*
  * CODE SIGNING
  */
@@ -2356,7 +2377,9 @@ static SInt32 cs_blob_count_peak = 0;
 
 int cs_validation = 1;
 
+#ifndef SECURE_KERNEL
 SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_validation, 0, "Do validate code signatures");
+#endif
 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_count, 0, "Current number of code signature blobs");
 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_size, 0, "Current size of all code signature blobs");
 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
@@ -2760,6 +2783,7 @@ unsigned long cs_validate_page_bad_hash = 0;
 boolean_t
 cs_validate_page(
        void                    *_blobs,
+       memory_object_t         pager,
        memory_object_offset_t  page_offset,
        const void              *data,
        boolean_t               *tainted)
@@ -2868,8 +2892,8 @@ cs_validate_page(
                cs_validate_page_no_hash++;
                if (cs_debug > 1) {
                        printf("CODE SIGNING: cs_validate_page: "
-                              "off 0x%llx: no hash to validate !?\n",
-                              page_offset);
+                              "mobj %p off 0x%llx: no hash to validate !?\n",
+                              pager, page_offset);
                }
                validated = FALSE;
                *tainted = FALSE;
@@ -2893,10 +2917,10 @@ cs_validate_page(
                if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
                        if (cs_debug) {
                                printf("CODE SIGNING: cs_validate_page: "
-                                      "off 0x%llx size 0x%lx: "
+                                      "mobj %p off 0x%llx size 0x%lx: "
                                       "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
                                       "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
-                                      page_offset, size,
+                                      pager, page_offset, size,
                                       asha1[0], asha1[1], asha1[2],
                                       asha1[3], asha1[4],
                                       esha1[0], esha1[1], esha1[2],
@@ -2907,8 +2931,9 @@ cs_validate_page(
                } else {
                        if (cs_debug > 1) {
                                printf("CODE SIGNING: cs_validate_page: "
-                                      "off 0x%llx size 0x%lx: SHA1 OK\n",
-                                      page_offset, size);
+                                      "mobj %p off 0x%llx size 0x%lx: "
+                                      "SHA1 OK\n",
+                                      pager, page_offset, size);
                        }
                        *tainted = FALSE;
                }
index 1065d3683102b0cb02a8bc6353af06dbd733cd3c..a015bddaee7b36b5f8217b49d6507f10c6e9be72 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,6 +65,7 @@
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
@@ -74,6 +75,8 @@
 #include <sys/syslog.h>
 #include <sys/queue.h>
 
+#include <net/dlil.h>
+
 #include <pexpert/pexpert.h>
 
 void init_domain(struct domain *dp) __attribute__((section("__TEXT, initcode")));
@@ -95,16 +98,12 @@ static void net_update_uptime(void);
 lck_grp_t              *domain_proto_mtx_grp;
 lck_attr_t     *domain_proto_mtx_attr;
 static lck_grp_attr_t  *domain_proto_mtx_grp_attr;
-lck_mtx_t              *domain_proto_mtx;
+decl_lck_mtx_data(static, domain_proto_mtx);
 extern int             do_reclaim;
 
 extern sysctlfn net_sysctl;
 
-static u_int64_t uptime;
-
-#ifdef INET6
-extern  void ip6_fin(void);
-#endif
+static u_int64_t _net_uptime;
 
 static void
 init_proto(struct protosw *pr)
@@ -153,10 +152,14 @@ init_domain(struct domain *dp)
        }
 
        /* Recompute for new protocol */
-       if (max_linkhdr < 16)           /* XXX - Sheesh; everything's ether? */
-               max_linkhdr = 16;
-       if (dp->dom_protohdrlen > max_protohdr)
-               max_protohdr = dp->dom_protohdrlen;
+       if (_max_linkhdr < 16)          /* XXX - Sheesh; everything's ether? */
+               _max_linkhdr = 16;
+       _max_linkhdr = max_linkhdr;     /* round it up */
+
+       if (dp->dom_protohdrlen > _max_protohdr)
+               _max_protohdr = dp->dom_protohdrlen;
+       _max_protohdr = max_protohdr;   /* round it up */
+
        max_hdr = max_linkhdr + max_protohdr;
        max_datalen = MHLEN - max_hdr;
 }
@@ -164,7 +167,7 @@ init_domain(struct domain *dp)
 void
 prepend_domain(struct domain *dp) 
 {      
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
+       lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
        dp->dom_next = domains; 
        domains = dp; 
 }
@@ -172,15 +175,17 @@ prepend_domain(struct domain *dp)
 void
 net_add_domain(struct domain *dp)
 {
+       int do_unlock;
+
        kprintf("Adding domain %s (family %d)\n", dp->dom_name,
                dp->dom_family);
        /* First, link in the domain */
 
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
        prepend_domain(dp);
 
        init_domain(dp);
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
 
 }
 
@@ -188,11 +193,12 @@ int
 net_del_domain(struct domain *dp)
 {      register struct domain *dp1, *dp2;
        register int retval = 0;
+       int do_unlock;
 
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
  
        if (dp->dom_refs) {
-               lck_mtx_unlock(domain_proto_mtx);
+               domain_proto_mtx_unlock(do_unlock);
                return(EBUSY);
      }
 
@@ -207,7 +213,7 @@ net_del_domain(struct domain *dp)
                        domains = dp1->dom_next;
        } else
                retval = EPFNOSUPPORT;
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
 
        return(retval);
 }
@@ -294,6 +300,7 @@ void
 domaininit(void)
 {
        register struct domain *dp;
+       int do_unlock;
 
        /*
         * allocate lock group attribute and group for domain mutexes
@@ -307,15 +314,13 @@ domaininit(void)
         */
        domain_proto_mtx_attr = lck_attr_alloc_init();
 
-       if ((domain_proto_mtx = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr)) == NULL) {
-               printf("domaininit: can't init domain mtx for domain list\n");
-               return; /* we have a problem... */
-       }
+       lck_mtx_init(&domain_proto_mtx, domain_proto_mtx_grp,
+                    domain_proto_mtx_attr);
        /*
         * Add all the static domains to the domains list
         */
 
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
 
        prepend_domain(&localdomain);
        prepend_domain(&inetdomain);
@@ -351,18 +356,10 @@ domaininit(void)
        for (dp = domains; dp; dp = dp->dom_next)
                init_domain(dp);
 
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
        timeout(pfslowtimo, NULL, 1);
 }
 
-void
-domainfin(void)
-{
-#ifdef INET6
-       ip6_fin();
-#endif
-}
-
 static __inline__ struct domain *
 pffinddomain_locked(int pf)
 {
@@ -383,20 +380,20 @@ pffindtype(int family, int type)
 {
        register struct domain *dp;
        register struct protosw *pr;
+       int do_unlock;
 
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
        dp = pffinddomain_locked(family);
        if (dp == NULL) {
-       lck_mtx_unlock(domain_proto_mtx);
+               domain_proto_mtx_unlock(do_unlock);
                return (NULL);
        }
        for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
                if (pr->pr_type && pr->pr_type == type) {
-                       lck_mtx_unlock(domain_proto_mtx);
+                       domain_proto_mtx_unlock(do_unlock);
                        return (pr);
                }
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
        return (0);
 }
 
@@ -404,22 +401,22 @@ struct domain *
 pffinddomain(int pf)
 {
        struct domain *dp;
+       int do_unlock;
 
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
        dp = pffinddomain_locked(pf);
-                       lck_mtx_unlock(domain_proto_mtx);
-                       return(dp);
-               }
+       domain_proto_mtx_unlock(do_unlock);
+       return(dp);
+}
 
 struct protosw *
 pffindproto(int family, int protocol, int type)
 {
        register struct protosw *pr;
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(domain_proto_mtx);
+       int do_unlock;
+       do_unlock = domain_proto_mtx_lock();
        pr = pffindproto_locked(family, protocol, type);
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
        return (pr);
 }
 
@@ -471,13 +468,13 @@ struct protosw *
 pffindprotonotype(int family, int protocol)
 {
        register struct protosw *pr;
+       int do_unlock;
        if (protocol == 0) {
                return (NULL);
        }
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
        pr = pffindprotonotype_locked(family, protocol, 0);
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
        return (pr);
 }
 
@@ -488,6 +485,7 @@ net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
        register struct domain *dp;
        register struct protosw *pr;
        int family, protocol, error;
+       int do_unlock;
 
        /*
         * All sysctl names at this level are nonterminal;
@@ -501,21 +499,21 @@ net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 
        if (family == 0)
                return (0);
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
        for (dp = domains; dp; dp = dp->dom_next)
                if (dp->dom_family == family)
                        goto found;
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
        return (ENOPROTOOPT);
 found:
        for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
                if (pr->pr_protocol == protocol && pr->pr_sysctl) {
                        error = (*pr->pr_sysctl)(name + 2, namelen - 2,
                            (void *)(uintptr_t)oldp, oldlenp, (void *)(uintptr_t)newp, newlen);
-                       lck_mtx_unlock(domain_proto_mtx);
+                       domain_proto_mtx_unlock(do_unlock);
                        return (error);
                }
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
        return (ENOPROTOOPT);
 }
 
@@ -530,16 +528,17 @@ pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
 {
        struct domain *dp;
        struct protosw *pr;
+       int do_unlock;
 
        if (!sa)
                return;
 
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
        for (dp = domains; dp; dp = dp->dom_next)
                for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
                        if (pr->pr_ctlinput)
                                (*pr->pr_ctlinput)(cmd, sa, ctlparam);
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
 }
 
 void
@@ -547,6 +546,7 @@ pfslowtimo(__unused void *arg)
 {
        register struct domain *dp;
        register struct protosw *pr;
+       int do_unlock;
 
        /*
         * Update coarse-grained networking timestamp (in sec.); the idea
@@ -555,7 +555,7 @@ pfslowtimo(__unused void *arg)
         */
        net_update_uptime();
 
-       lck_mtx_lock(domain_proto_mtx);
+       do_unlock = domain_proto_mtx_lock();
        for (dp = domains; dp; dp = dp->dom_next) 
                for (pr = dp->dom_protosw; pr; pr = pr->pr_next) {
                        if (pr->pr_slowtimo)
@@ -565,7 +565,7 @@ pfslowtimo(__unused void *arg)
                                (*pr->pr_drain)();
                }
        do_reclaim = 0;
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(do_unlock);
        timeout(pfslowtimo, NULL, hz/PR_SLOWHZ);
 }
 
@@ -575,7 +575,7 @@ net_update_uptime(void)
        struct timeval tv;
 
        microuptime(&tv);
-       uptime = tv.tv_sec;
+       _net_uptime = tv.tv_sec;
 }
 
 /*
@@ -587,8 +587,30 @@ u_int64_t
 net_uptime(void)
 {
        /* If we get here before pfslowtimo() fires for the first time */
-       if (uptime == 0)
+       if (_net_uptime == 0)
                net_update_uptime();
 
-       return (uptime);
+       return (_net_uptime);
+}
+
+int
+domain_proto_mtx_lock(void)
+{
+       int held = net_thread_check_lock(NET_THREAD_HELD_DOMAIN);
+       if (!held) {
+               lck_mtx_lock(&domain_proto_mtx);
+               net_thread_set_lock(NET_THREAD_HELD_DOMAIN);
+       }
+       lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
+       return !held;
+}
+
+void
+domain_proto_mtx_unlock(int do_unlock)
+{
+       if (do_unlock) {
+               net_thread_unset_lock(NET_THREAD_HELD_DOMAIN);
+               lck_mtx_unlock(&domain_proto_mtx);
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       }
 }
index d8d3ce8573365be298c8c19d20a0b4938e81e65d..0112f0c024352e56582dc085f35de3742177e8e3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -303,7 +303,8 @@ extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
 extern vm_map_t mb_map;                /* special map */
 
 /* Global lock */
-static lck_mtx_t *mbuf_mlock;
+decl_lck_mtx_data(static, mbuf_mlock_data);
+static lck_mtx_t *mbuf_mlock = &mbuf_mlock_data;
 static lck_attr_t *mbuf_mlock_attr;
 static lck_grp_t *mbuf_mlock_grp;
 static lck_grp_attr_t *mbuf_mlock_grp_attr;
@@ -449,14 +450,15 @@ int njcl;                 /* # of clusters for jumbo sizes */
 int njclbytes;                 /* size of a jumbo cluster */
 union mbigcluster *mbutl;      /* first mapped cluster address */
 union mbigcluster *embutl;     /* ending virtual address of mclusters */
-int max_linkhdr;               /* largest link-level header */
-int max_protohdr;              /* largest protocol header */
+int _max_linkhdr;              /* largest link-level header */
+int _max_protohdr;             /* largest protocol header */
 int max_hdr;                   /* largest link+protocol header */
 int max_datalen;               /* MHLEN - max_hdr */
 
 static boolean_t mclverify;    /* debug: pattern-checking */
 static boolean_t mcltrace;     /* debug: stack tracing */
 static boolean_t mclfindleak;  /* debug: leak detection */
+static boolean_t mclexpleak;   /* debug: expose leak info to user space */
 
 /* mbuf leak detection variables */
 static struct mleak_table mleak_table;
@@ -495,6 +497,22 @@ struct mtrace {
  */
 #define        MLEAK_NUM_TRACES                5
 
+#define        MB_LEAK_SPACING_64 "                    "
+#define MB_LEAK_SPACING_32 "            "
+
+
+#define        MB_LEAK_HDR_32  "\n\
+    trace [1]   trace [2]   trace [3]   trace [4]   trace [5]  \n\
+    ----------  ----------  ----------  ----------  ---------- \n\
+"
+
+#define        MB_LEAK_HDR_64  "\n\
+    trace [1]           trace [2]           trace [3]       \
+        trace [4]           trace [5]      \n\
+    ------------------  ------------------  ------------------  \
+    ------------------  ------------------ \n\
+"
+
 static uint32_t mleak_alloc_buckets = MLEAK_ALLOCATION_MAP_NUM;
 static uint32_t mleak_trace_buckets = MLEAK_TRACE_MAP_NUM;
 
@@ -504,7 +522,8 @@ static struct mtrace *mleak_traces;
 static struct mtrace *mleak_top_trace[MLEAK_NUM_TRACES];
 
 /* Lock to protect mleak tables from concurrent modification */
-static lck_mtx_t *mleak_lock;
+decl_lck_mtx_data(static, mleak_lock_data);
+static lck_mtx_t *mleak_lock = &mleak_lock_data;
 static lck_attr_t *mleak_lock_attr;
 static lck_grp_t *mleak_lock_grp;
 static lck_grp_attr_t *mleak_lock_grp_attr;
@@ -588,7 +607,9 @@ static int mb_waiters;                      /* number of waiters */
 
 #define        MB_WDT_MAXTIME  10              /* # of secs before watchdog panic */
 static struct timeval mb_wdtstart;     /* watchdog start timestamp */
-static char mbuf_dump_buf[256];
+static char *mbuf_dump_buf;
+
+#define        MBUF_DUMP_BUF_SIZE      2048
 
 /*
  * mbuf watchdog is enabled by default on embedded platforms.  It is
@@ -656,6 +677,8 @@ static void mleak_activate(void);
 static void mleak_logger(u_int32_t, mcache_obj_t *, boolean_t);
 static boolean_t mleak_log(uintptr_t *, mcache_obj_t *, uint32_t, int);
 static void mleak_free(mcache_obj_t *);
+static void mleak_sort_traces(void);
+static void mleak_update_stats(void);
 
 static mcl_slab_t *slab_get(void *);
 static void slab_init(mcl_slab_t *, mbuf_class_t, u_int32_t,
@@ -769,8 +792,9 @@ static struct mbuf *m_split0(struct mbuf *, int, int, int);
                (m)->m_pkthdr.vlan_tag = 0;                             \
                (m)->m_pkthdr.socket_id = 0;                            \
                (m)->m_pkthdr.vt_nrecs = 0;                             \
+               (m)->m_pkthdr.aux_flags = 0;                            \
                m_tag_init(m);                                          \
-               m_prio_init(m);                                         \
+               m_service_class_init(m);                                \
        }                                                               \
 }
 
@@ -856,7 +880,7 @@ static mbuf_mtypes_t *mbuf_mtypes;  /* per-CPU statistics */
        ((size_t)(&((mbuf_mtypes_t *)0)->mbs_cpu[n]))
 
 #define        MTYPES_CPU(p) \
-       ((mtypes_cpu_t *)((char *)(p) + MBUF_MTYPES_SIZE(cpu_number())))
+       ((mtypes_cpu_t *)(void *)((char *)(p) + MBUF_MTYPES_SIZE(cpu_number())))
 
 #define        mtype_stat_add(type, n) {                                       \
        if ((unsigned)(type) < MT_MAX) {                                \
@@ -1032,42 +1056,14 @@ static int
 mleak_top_trace_sysctl SYSCTL_HANDLER_ARGS
 {
 #pragma unused(oidp, arg1, arg2)
-       mleak_trace_stat_t *mltr;
        int i;
 
        /* Ensure leak tracing turned on */
-       if (!mclfindleak)
+       if (!mclfindleak || !mclexpleak)
                return (ENXIO);
 
-       VERIFY(mleak_stat != NULL);
-#ifdef __LP64__
-       VERIFY(mleak_stat->ml_isaddr64);
-#else
-       VERIFY(!mleak_stat->ml_isaddr64);
-#endif /* !__LP64__ */
-       VERIFY(mleak_stat->ml_cnt == MLEAK_NUM_TRACES);
-
        lck_mtx_lock(mleak_lock);
-       mltr = &mleak_stat->ml_trace[0];
-       bzero(mltr, sizeof (*mltr) * MLEAK_NUM_TRACES);
-       for (i = 0; i < MLEAK_NUM_TRACES; i++) {
-               int j;
-
-               if (mleak_top_trace[i] == NULL ||
-                   mleak_top_trace[i]->allocs == 0)
-                       continue;
-
-               mltr->mltr_collisions   = mleak_top_trace[i]->collisions;
-               mltr->mltr_hitcount     = mleak_top_trace[i]->hitcount;
-               mltr->mltr_allocs       = mleak_top_trace[i]->allocs;
-               mltr->mltr_depth        = mleak_top_trace[i]->depth;
-
-               VERIFY(mltr->mltr_depth <= MLEAK_STACK_DEPTH);
-               for (j = 0; j < mltr->mltr_depth; j++)
-                       mltr->mltr_addr[j] = mleak_top_trace[i]->addr[j];
-
-               mltr++;
-       }
+       mleak_update_stats();
        i = SYSCTL_OUT(req, mleak_stat, MLEAK_STAT_SIZE(MLEAK_NUM_TRACES));
        lck_mtx_unlock(mleak_lock);
 
@@ -1081,7 +1077,7 @@ mleak_table_sysctl SYSCTL_HANDLER_ARGS
        int i = 0;
 
        /* Ensure leak tracing turned on */
-       if (!mclfindleak)
+       if (!mclfindleak || !mclexpleak)
                return (ENXIO);
 
        lck_mtx_lock(mleak_lock);
@@ -1264,7 +1260,7 @@ typedef struct ncl_tbl {
 
 /* Non-server */
 static ncl_tbl_t ncl_table[] = {
-       { (1ULL << GBSHIFT)       /*  1 GB */,  (64 << MBSHIFT)  /*  64 MB */ },
+       { (1ULL << GBSHIFT)       /*  1 GB */,  (64 << MBSHIFT)  /*  64 MB */ },
        { (1ULL << (GBSHIFT + 3)) /*  8 GB */,  (96 << MBSHIFT)  /*  96 MB */ },
        { (1ULL << (GBSHIFT + 4)) /* 16 GB */,  (128 << MBSHIFT) /* 128 MB */ },
        { 0, 0 }
@@ -1272,7 +1268,7 @@ static ncl_tbl_t ncl_table[] = {
 
 /* Server */
 static ncl_tbl_t ncl_table_srv[] = {
-       { (1ULL << GBSHIFT)       /*  1 GB */,  (96 << MBSHIFT)  /*  96 MB */ },
+       { (1ULL << GBSHIFT)       /*  1 GB */,  (96 << MBSHIFT)  /*  96 MB */ },
        { (1ULL << (GBSHIFT + 2)) /*  4 GB */,  (128 << MBSHIFT) /* 128 MB */ },
        { (1ULL << (GBSHIFT + 3)) /*  8 GB */,  (160 << MBSHIFT) /* 160 MB */ },
        { (1ULL << (GBSHIFT + 4)) /* 16 GB */,  (192 << MBSHIFT) /* 192 MB */ },
@@ -1318,6 +1314,74 @@ mbinit(void)
        void *buf;
        thread_t thread = THREAD_NULL;
 
+       /*
+        * These MBUF_ values must be equal to their private counterparts.
+        */
+       _CASSERT(MBUF_EXT == M_EXT);
+       _CASSERT(MBUF_PKTHDR == M_PKTHDR);
+       _CASSERT(MBUF_EOR == M_EOR);
+       _CASSERT(MBUF_LOOP == M_LOOP);
+       _CASSERT(MBUF_BCAST == M_BCAST);
+       _CASSERT(MBUF_MCAST == M_MCAST);
+       _CASSERT(MBUF_FRAG == M_FRAG);
+       _CASSERT(MBUF_FIRSTFRAG == M_FIRSTFRAG);
+       _CASSERT(MBUF_LASTFRAG == M_LASTFRAG);
+       _CASSERT(MBUF_PROMISC == M_PROMISC);
+       _CASSERT(MBUF_HASFCS == M_HASFCS);
+
+       _CASSERT(MBUF_TYPE_FREE == MT_FREE);
+       _CASSERT(MBUF_TYPE_DATA == MT_DATA);
+       _CASSERT(MBUF_TYPE_HEADER == MT_HEADER);
+       _CASSERT(MBUF_TYPE_SOCKET == MT_SOCKET);
+       _CASSERT(MBUF_TYPE_PCB == MT_PCB);
+       _CASSERT(MBUF_TYPE_RTABLE == MT_RTABLE);
+       _CASSERT(MBUF_TYPE_HTABLE == MT_HTABLE);
+       _CASSERT(MBUF_TYPE_ATABLE == MT_ATABLE);
+       _CASSERT(MBUF_TYPE_SONAME == MT_SONAME);
+       _CASSERT(MBUF_TYPE_SOOPTS == MT_SOOPTS);
+       _CASSERT(MBUF_TYPE_FTABLE == MT_FTABLE);
+       _CASSERT(MBUF_TYPE_RIGHTS == MT_RIGHTS);
+       _CASSERT(MBUF_TYPE_IFADDR == MT_IFADDR);
+       _CASSERT(MBUF_TYPE_CONTROL == MT_CONTROL);
+       _CASSERT(MBUF_TYPE_OOBDATA == MT_OOBDATA);
+
+       _CASSERT(MBUF_TSO_IPV4 == CSUM_TSO_IPV4);
+       _CASSERT(MBUF_TSO_IPV6 == CSUM_TSO_IPV6);
+       _CASSERT(MBUF_CSUM_REQ_SUM16 == CSUM_TCP_SUM16);
+       _CASSERT(MBUF_CSUM_TCP_SUM16 == MBUF_CSUM_REQ_SUM16);
+       _CASSERT(MBUF_CSUM_REQ_IP == CSUM_IP);
+       _CASSERT(MBUF_CSUM_REQ_TCP == CSUM_TCP);
+       _CASSERT(MBUF_CSUM_REQ_UDP == CSUM_UDP);
+       _CASSERT(MBUF_CSUM_REQ_TCPIPV6 == CSUM_TCPIPV6);
+       _CASSERT(MBUF_CSUM_REQ_UDPIPV6 == CSUM_UDPIPV6);
+       _CASSERT(MBUF_CSUM_DID_IP == CSUM_IP_CHECKED);
+       _CASSERT(MBUF_CSUM_IP_GOOD == CSUM_IP_VALID);
+       _CASSERT(MBUF_CSUM_DID_DATA == CSUM_DATA_VALID);
+       _CASSERT(MBUF_CSUM_PSEUDO_HDR == CSUM_PSEUDO_HDR);
+
+       _CASSERT(MBUF_WAITOK == M_WAIT);
+       _CASSERT(MBUF_DONTWAIT == M_DONTWAIT);
+       _CASSERT(MBUF_COPYALL == M_COPYALL);
+
+       _CASSERT(MBUF_PKTAUXF_INET_RESOLVE_RTR == MAUXF_INET_RESOLVE_RTR);
+       _CASSERT(MBUF_PKTAUXF_INET6_RESOLVE_RTR == MAUXF_INET6_RESOLVE_RTR);
+
+       _CASSERT(MBUF_SC2TC(MBUF_SC_BK_SYS) == MBUF_TC_BK);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_BK) == MBUF_TC_BK);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_BE) == MBUF_TC_BE);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_RD) == MBUF_TC_BE);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_OAM) == MBUF_TC_BE);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_AV) == MBUF_TC_VI);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_RV) == MBUF_TC_VI);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_VI) == MBUF_TC_VI);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_VO) == MBUF_TC_VO);
+       _CASSERT(MBUF_SC2TC(MBUF_SC_CTL) == MBUF_TC_VO);
+
+       _CASSERT(MBUF_TC2SCVAL(MBUF_TC_BK) == SCVAL_BK);
+       _CASSERT(MBUF_TC2SCVAL(MBUF_TC_BE) == SCVAL_BE);
+       _CASSERT(MBUF_TC2SCVAL(MBUF_TC_VI) == SCVAL_VI);
+       _CASSERT(MBUF_TC2SCVAL(MBUF_TC_VO) == SCVAL_VO);
+
        if (nmbclusters == 0)
                nmbclusters = NMBCLUSTERS;
 
@@ -1331,7 +1395,7 @@ mbinit(void)
        mbuf_mlock_grp_attr = lck_grp_attr_alloc_init();
        mbuf_mlock_grp = lck_grp_alloc_init("mbuf", mbuf_mlock_grp_attr);
        mbuf_mlock_attr = lck_attr_alloc_init();
-       mbuf_mlock = lck_mtx_alloc_init(mbuf_mlock_grp, mbuf_mlock_attr);
+       lck_mtx_init(mbuf_mlock, mbuf_mlock_grp, mbuf_mlock_attr);
 
        /*
         * Allocate cluster slabs table:
@@ -1369,13 +1433,14 @@ mbinit(void)
        mclverify = (mbuf_debug & MCF_VERIFY);
        mcltrace = (mbuf_debug & MCF_TRACE);
        mclfindleak = !(mbuf_debug & MCF_NOLEAKLOG);
+       mclexpleak = mclfindleak && (mbuf_debug & MCF_EXPLEAKLOG);
 
        /* Enable mbuf leak logging, with a lock to protect the tables */
 
        mleak_lock_grp_attr = lck_grp_attr_alloc_init();
        mleak_lock_grp = lck_grp_alloc_init("mleak_lock", mleak_lock_grp_attr);
        mleak_lock_attr = lck_attr_alloc_init();
-       mleak_lock = lck_mtx_alloc_init(mleak_lock_grp, mleak_lock_attr);
+       lck_mtx_init(mleak_lock, mleak_lock_grp, mleak_lock_attr);
 
        mleak_activate();
 
@@ -1390,7 +1455,7 @@ mbinit(void)
        bzero((char *)mcl_paddr, mcl_pages * sizeof (ppnum_t));
 
        embutl = (union mbigcluster *)
-           ((unsigned char *)mbutl + (nmbclusters * MCLBYTES));
+           ((void *)((unsigned char *)mbutl + (nmbclusters * MCLBYTES)));
        VERIFY((((char *)embutl - (char *)mbutl) % MBIGCLBYTES) == 0);
 
        /* Prime up the freelist */
@@ -1501,6 +1566,10 @@ mbinit(void)
                }
        }
 
+       /* allocate space for mbuf_dump_buf */
+       MALLOC(mbuf_dump_buf, char *, MBUF_DUMP_BUF_SIZE, M_TEMP, M_WAITOK);
+       VERIFY(mbuf_dump_buf != NULL);
+
        printf("mbinit: done [%d MB total pool size, (%d/%d) split]\n",
            (nmbclusters << MCLSHIFT) >> MBSHIFT,
            (nclusters << MCLSHIFT) >> MBSHIFT,
@@ -2177,7 +2246,7 @@ cslab_free(mbuf_class_t class, mcache_obj_t *list, int purged)
                        MEXT_REF(m) = 0;
                        MEXT_FLAGS(m) = 0;
 
-                       rfa = (mcache_obj_t *)MEXT_RFA(m);
+                       rfa = (mcache_obj_t *)(void *)MEXT_RFA(m);
                        rfa->obj_next = ref_list;
                        ref_list = rfa;
                        MEXT_RFA(m) = NULL;
@@ -2331,7 +2400,7 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed,
 
                rfa = (struct ext_ref *)ref_list;
                ref_list = ref_list->obj_next;
-               ((mcache_obj_t *)rfa)->obj_next = NULL;
+               ((mcache_obj_t *)(void *)rfa)->obj_next = NULL;
 
                /*
                 * If auditing is enabled, construct the shadow mbuf
@@ -3514,7 +3583,8 @@ m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
                m_tag_delete_chain(to, NULL);
        to->m_pkthdr = from->m_pkthdr;          /* especially tags */
        m_tag_init(from);                       /* purge tags from src */
-       m_prio_init(from);                      /* reset priority from src */
+       m_service_class_init(from);             /* reset svc class from src */
+       from->m_pkthdr.aux_flags = 0;           /* clear aux flags from src */
        to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
        if ((to->m_flags & M_EXT) == 0)
                to->m_data = to->m_pktdat;
@@ -3538,6 +3608,14 @@ m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
        return (m_tag_copy_chain(to, from, how));
 }
 
+void
+m_copy_pftag(struct mbuf *to, struct mbuf *from)
+{
+       to->m_pkthdr.pf_mtag = from->m_pkthdr.pf_mtag;
+       to->m_pkthdr.pf_mtag.pftag_hdr = NULL;
+       to->m_pkthdr.pf_mtag.pftag_flags &= ~(PF_TAG_HDR_INET|PF_TAG_HDR_INET6);
+}
+
 /*
  * Return a list of mbuf hdrs that point to clusters.  Try for num_needed;
  * if wantall is not set, return whatever number were available.  Set up the
@@ -3645,6 +3723,12 @@ m_getpackets_internal(unsigned int *num_needed, int num_with_pkthdrs,
                return (NULL);
        }
 
+       if (pnum > *num_needed) {
+               printf("%s: File a radar related to <rdar://10146739>. \
+                       needed = %u, pnum = %u, num_needed = %u \n",
+                       __func__, needed, pnum, *num_needed);
+       }               
+
        *num_needed = pnum;
        return (top);
 }
@@ -4086,7 +4170,7 @@ m_freem_list(struct mbuf *m)
                        if (!(m->m_flags & M_EXT))
                                goto simple_free;
 
-                       o = (mcache_obj_t *)m->m_ext.ext_buf;
+                       o = (mcache_obj_t *)(void *)m->m_ext.ext_buf;
                        refcnt = m_decref(m);
                        composite = (MEXT_FLAGS(m) & EXTF_COMPOSITE);
                        if (refcnt == 0 && !composite) {
@@ -4104,7 +4188,7 @@ m_freem_list(struct mbuf *m)
                                            m->m_ext.ext_size,
                                            m->m_ext.ext_arg);
                                }
-                               rfa = (mcache_obj_t *)MEXT_RFA(m);
+                               rfa = (mcache_obj_t *)(void *)MEXT_RFA(m);
                                rfa->obj_next = ref_list;
                                ref_list = rfa;
                                MEXT_RFA(m) = NULL;
@@ -5597,6 +5681,123 @@ m_last(struct mbuf *m)
        return (m);
 }
 
+unsigned int
+m_fixhdr(struct mbuf *m0)
+{
+       u_int len;
+
+       len = m_length2(m0, NULL);
+       m0->m_pkthdr.len = len;
+       return (len);
+}
+
+unsigned int
+m_length2(struct mbuf *m0, struct mbuf **last)
+{
+       struct mbuf *m;
+       u_int len;
+
+       len = 0;
+       for (m = m0; m != NULL; m = m->m_next) {
+               len += m->m_len;
+               if (m->m_next == NULL)
+                       break;
+       }
+       if (last != NULL)
+               *last = m;
+       return (len);
+}
+
+/*
+ * Defragment a mbuf chain, returning the shortest possible chain of mbufs
+ * and clusters.  If allocation fails and this cannot be completed, NULL will
+ * be returned, but the passed in chain will be unchanged.  Upon success,
+ * the original chain will be freed, and the new chain will be returned.
+ *
+ * If a non-packet header is passed in, the original mbuf (chain?) will
+ * be returned unharmed.
+ *
+ * If offset is specfied, the first mbuf in the chain will have a leading
+ * space of the amount stated by the "off" parameter.
+ *
+ * This routine requires that the m_pkthdr.header field of the original
+ * mbuf chain is cleared by the caller.
+ */
+struct mbuf *
+m_defrag_offset(struct mbuf *m0, u_int32_t off, int how)
+{
+       struct mbuf *m_new = NULL, *m_final = NULL;
+       int progress = 0, length, pktlen;
+
+       if (!(m0->m_flags & M_PKTHDR))
+               return (m0);
+
+       VERIFY(off < MHLEN);
+       m_fixhdr(m0); /* Needed sanity check */
+
+       pktlen = m0->m_pkthdr.len + off;
+       if (pktlen > MHLEN)
+               m_final = m_getcl(how, MT_DATA, M_PKTHDR);
+       else
+               m_final = m_gethdr(how, MT_DATA);
+
+       if (m_final == NULL)
+               goto nospace;
+
+       if (off > 0) {
+               pktlen -= off;
+               m_final->m_len -= off;
+               m_final->m_data += off;
+       }
+
+       /*
+        * Caller must have handled the contents pointed to by this
+        * pointer before coming here, as otherwise it will point to
+        * the original mbuf which will get freed upon success.
+        */
+       VERIFY(m0->m_pkthdr.header == NULL);
+
+       if (m_dup_pkthdr(m_final, m0, how) == 0)
+               goto nospace;
+
+       m_new = m_final;
+
+       while (progress < pktlen) {
+               length = pktlen - progress;
+               if (length > MCLBYTES)
+                       length = MCLBYTES;
+
+               if (m_new == NULL) {
+                       if (length > MLEN)
+                               m_new = m_getcl(how, MT_DATA, 0);
+                       else
+                               m_new = m_get(how, MT_DATA);
+                       if (m_new == NULL)
+                               goto nospace;
+               }
+
+               m_copydata(m0, progress, length, mtod(m_new, caddr_t));
+               progress += length;
+               m_new->m_len = length;
+               if (m_new != m_final)
+                       m_cat(m_final, m_new);
+               m_new = NULL;
+       }
+       m_freem(m0);
+       m0 = m_final;
+       return (m0);
+nospace:
+       if (m_final)
+               m_freem(m_final);
+       return (NULL);
+}
+
+struct mbuf *
+m_defrag(struct mbuf *m0, int how)
+{
+       return (m_defrag_offset(m0, 0, how));
+}
+
 void
 m_mchtype(struct mbuf *m, int t)
 {
@@ -6315,7 +6516,6 @@ mleak_log(uintptr_t *bt, mcache_obj_t *addr, uint32_t depth, int num)
        struct mallocation *allocation;
        struct mtrace *trace;
        uint32_t trace_index;
-       int i;
 
        /* Quit if someone else modifying the tables */
        if (!lck_mtx_try_lock_spin(mleak_lock)) {
@@ -6389,22 +6589,6 @@ mleak_log(uintptr_t *bt, mcache_obj_t *addr, uint32_t depth, int num)
        mleak_table.alloc_recorded++;
        mleak_table.outstanding_allocs++;
 
-       /* keep a log of the last 5 traces to be top trace, in order */
-       for (i = 0; i < MLEAK_NUM_TRACES; i++) {
-               if (mleak_top_trace[i] == NULL ||
-                   mleak_top_trace[i]->allocs <= trace->allocs) {
-                       if (mleak_top_trace[i] != trace) {
-                               int j = MLEAK_NUM_TRACES;
-                               while (--j > i) {
-                                       mleak_top_trace[j] =
-                                           mleak_top_trace[j - 1];
-                               }
-                               mleak_top_trace[i] = trace;
-                       }
-                       break;
-               }
-       }
-
        lck_mtx_unlock(mleak_lock);
        return (TRUE);
 }
@@ -6438,6 +6622,90 @@ mleak_free(mcache_obj_t *addr)
        }
 }
 
+static void
+mleak_sort_traces()
+{
+       int i, j, k;
+       struct mtrace *swap;
+
+       for(i = 0; i < MLEAK_NUM_TRACES; i++)
+               mleak_top_trace[i] = NULL;
+
+       for(i = 0, j = 0; j < MLEAK_NUM_TRACES && i < mleak_trace_buckets; i++)
+       {
+               if (mleak_traces[i].allocs <= 0)
+                       continue;
+
+               mleak_top_trace[j] = &mleak_traces[i];
+               for (k = j; k > 0; k--) {
+                       if (mleak_top_trace[k]->allocs <=
+                           mleak_top_trace[k-1]->allocs)
+                               break;
+
+                       swap = mleak_top_trace[k-1];
+                       mleak_top_trace[k-1] = mleak_top_trace[k];
+                       mleak_top_trace[k] = swap;
+               }
+               j++;
+       }
+
+       j--;
+       for(; i < mleak_trace_buckets; i++) {
+               if (mleak_traces[i].allocs <= mleak_top_trace[j]->allocs)
+                       continue;
+
+               mleak_top_trace[j] = &mleak_traces[i];
+
+               for (k = j; k > 0; k--) {
+                       if (mleak_top_trace[k]->allocs <=
+                           mleak_top_trace[k-1]->allocs)
+                               break;
+
+                       swap = mleak_top_trace[k-1];
+                       mleak_top_trace[k-1] = mleak_top_trace[k];
+                       mleak_top_trace[k] = swap;
+               }
+       }
+}
+
+static void
+mleak_update_stats()
+{
+       mleak_trace_stat_t *mltr;
+       int i;
+
+       VERIFY(mleak_stat != NULL);
+#ifdef __LP64__
+       VERIFY(mleak_stat->ml_isaddr64);
+#else
+       VERIFY(!mleak_stat->ml_isaddr64);
+#endif /* !__LP64__ */
+       VERIFY(mleak_stat->ml_cnt == MLEAK_NUM_TRACES);
+
+       mleak_sort_traces();
+
+       mltr = &mleak_stat->ml_trace[0];
+       bzero(mltr, sizeof (*mltr) * MLEAK_NUM_TRACES);
+       for (i = 0; i < MLEAK_NUM_TRACES; i++) {
+       int j;
+
+               if (mleak_top_trace[i] == NULL ||
+                   mleak_top_trace[i]->allocs == 0)
+                       continue;
+
+               mltr->mltr_collisions   = mleak_top_trace[i]->collisions;
+               mltr->mltr_hitcount     = mleak_top_trace[i]->hitcount;
+               mltr->mltr_allocs       = mleak_top_trace[i]->allocs;
+               mltr->mltr_depth        = mleak_top_trace[i]->depth;
+
+               VERIFY(mltr->mltr_depth <= MLEAK_STACK_DEPTH);
+               for (j = 0; j < mltr->mltr_depth; j++)
+                       mltr->mltr_addr[j] = mleak_top_trace[i]->addr[j];
+
+               mltr++;
+       }
+}
+
 static struct mbtypes {
        int             mt_type;
        const char      *mt_name;
@@ -6478,8 +6746,9 @@ mbuf_dump(void)
        uint8_t seen[256];
        struct mbtypes *mp;
        mb_class_stat_t *sp;
+       mleak_trace_stat_t *mltr;
        char *c = mbuf_dump_buf;
-       int i, k, clen = sizeof (mbuf_dump_buf);
+       int i, k, clen = MBUF_DUMP_BUF_SIZE;
 
        mbuf_dump_buf[0] = '\0';
 
@@ -6577,6 +6846,77 @@ mbuf_dump(void)
            "in use)\n", totmem / 1024, totpct);
        MBUF_DUMP_BUF_CHK();
 
+       /* mbuf leak detection statistics */
+       mleak_update_stats();
+
+       k = snprintf(c, clen, "\nmbuf leak detection table:\n");
+       MBUF_DUMP_BUF_CHK();
+       k = snprintf(c, clen, "\ttotal captured: %u (one per %u)\n",
+           mleak_table.mleak_capture / mleak_table.mleak_sample_factor,
+           mleak_table.mleak_sample_factor);
+       MBUF_DUMP_BUF_CHK();
+       k = snprintf(c, clen, "\ttotal allocs outstanding: %llu\n",
+           mleak_table.outstanding_allocs);
+       MBUF_DUMP_BUF_CHK();
+       k = snprintf(c, clen, "\tnew hash recorded: %llu allocs, %llu traces\n",
+           mleak_table.alloc_recorded, mleak_table.trace_recorded);
+       MBUF_DUMP_BUF_CHK();
+       k = snprintf(c, clen, "\thash collisions: %llu allocs, %llu traces\n",
+           mleak_table.alloc_collisions, mleak_table.trace_collisions);
+       MBUF_DUMP_BUF_CHK();
+       k = snprintf(c, clen, "\toverwrites: %llu allocs, %llu traces\n",
+           mleak_table.alloc_overwrites, mleak_table.trace_overwrites);
+       MBUF_DUMP_BUF_CHK();
+       k = snprintf(c, clen, "\tlock conflicts: %llu\n\n",
+           mleak_table.total_conflicts);
+       MBUF_DUMP_BUF_CHK();
+
+       k = snprintf(c, clen, "top %d outstanding traces:\n",
+           mleak_stat->ml_cnt);
+       MBUF_DUMP_BUF_CHK();
+       for (i = 0; i < mleak_stat->ml_cnt; i++) {
+               mltr = &mleak_stat->ml_trace[i];
+               k = snprintf(c, clen, "[%d] %llu outstanding alloc(s), "
+                   "%llu hit(s), %llu collision(s)\n", (i + 1),
+                   mltr->mltr_allocs, mltr->mltr_hitcount,
+                   mltr->mltr_collisions);
+               MBUF_DUMP_BUF_CHK();
+       }
+
+       if (mleak_stat->ml_isaddr64)
+               k = snprintf(c, clen, MB_LEAK_HDR_64);
+       else
+               k = snprintf(c, clen, MB_LEAK_HDR_32);
+       MBUF_DUMP_BUF_CHK();
+
+       for (i = 0; i < MLEAK_STACK_DEPTH; i++) {
+               int j;
+               k = snprintf(c, clen, "%2d: ", (i + 1));
+               MBUF_DUMP_BUF_CHK();
+               for (j = 0; j < mleak_stat->ml_cnt; j++) {
+                       mltr = &mleak_stat->ml_trace[j];
+                       if (i < mltr->mltr_depth) {
+                               if (mleak_stat->ml_isaddr64) {
+                                       k = snprintf(c, clen, "0x%0llx  ",
+                                           mltr->mltr_addr[i]);
+                               } else {
+                                       k = snprintf(c, clen,
+                                           "0x%08x  ",
+                                           (u_int32_t)mltr->mltr_addr[i]);
+                               }
+                       } else {
+                               if (mleak_stat->ml_isaddr64)
+                                       k = snprintf(c, clen,
+                                           MB_LEAK_SPACING_64);
+                               else
+                                       k = snprintf(c, clen,
+                                           MB_LEAK_SPACING_32);
+                       }
+                       MBUF_DUMP_BUF_CHK();
+               }
+               k = snprintf(c, clen, "\n");
+               MBUF_DUMP_BUF_CHK();
+       }
 done:
        return (mbuf_dump_buf);
 }
index 38623846055a626e7da955f9bcf07a45692a05a9..565d005f9e2c25401466c92b768c267a00dfaa0a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -399,13 +399,15 @@ m_tag_create(u_int32_t id, u_int16_t type, int len, int wait, struct mbuf *buf)
                VERIFY(p->m_tag_cookie == M_TAG_VALID_PATTERN);
 
                struct mbuf *m = m_dtom(p);
-               struct m_taghdr *hdr = (struct m_taghdr *)m->m_data;
+               struct m_taghdr *hdr = (struct m_taghdr *)(void *)m->m_data;
 
+               VERIFY(IS_P2ALIGNED(hdr + 1, sizeof (u_int64_t)));
                VERIFY(m->m_flags & M_TAGHDR && !(m->m_flags & M_EXT));
 
                /* The mbuf can store this m_tag */
                if (M_TAG_ALIGN(len) <= MLEN - m->m_len) {
-                       t = (struct m_tag *)(m->m_data + m->m_len);
+                       t = (struct m_tag *)(void *)(m->m_data + m->m_len);
+                       VERIFY(IS_P2ALIGNED(t, sizeof (u_int64_t)));
                        hdr->refcnt++;
                        m->m_len += M_TAG_ALIGN(len);
                        VERIFY(m->m_len <= MLEN);
@@ -445,14 +447,16 @@ m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait)
 
                m->m_flags |= M_TAGHDR;
 
-               hdr = (struct m_taghdr *)m->m_data;
+               hdr = (struct m_taghdr *)(void *)m->m_data;
+               VERIFY(IS_P2ALIGNED(hdr + 1, sizeof (u_int64_t)));
                hdr->refcnt = 1;
                m->m_len += sizeof (struct m_taghdr);
-               t = (struct m_tag *)(m->m_data + m->m_len);
+               t = (struct m_tag *)(void *)(m->m_data + m->m_len);
+               VERIFY(IS_P2ALIGNED(t, sizeof (u_int64_t)));
                m->m_len += M_TAG_ALIGN(len);
                VERIFY(m->m_len <= MLEN);
         } else if (len + sizeof (struct m_tag) <= MCLBYTES) {
-               t = (struct m_tag *)m_mclalloc(wait);
+               t = (struct m_tag *)(void *)m_mclalloc(wait);
         } else {
                 t = NULL;
        }
@@ -460,6 +464,7 @@ m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait)
        if (t == NULL)
                return (NULL);
 
+       VERIFY(IS_P2ALIGNED(t, sizeof (u_int64_t)));
        t->m_tag_cookie = M_TAG_VALID_PATTERN;
        t->m_tag_type = type;
        t->m_tag_len = len;
@@ -489,10 +494,15 @@ m_tag_free(struct m_tag *t)
 #endif /* INET6 */
        if (t == NULL)
                return;
+
+       VERIFY(t->m_tag_cookie == M_TAG_VALID_PATTERN);
+
        if (M_TAG_ALIGN(t->m_tag_len) + sizeof (struct m_taghdr) <= MLEN) {
                struct mbuf * m = m_dtom(t);
                VERIFY(m->m_flags & M_TAGHDR);
-               struct m_taghdr *hdr = (struct m_taghdr *)m->m_data;
+               struct m_taghdr *hdr = (struct m_taghdr *)(void *)m->m_data;
+
+               VERIFY(IS_P2ALIGNED(hdr + 1, sizeof (u_int64_t)));
 
                /* No other tags in this mbuf */
                if(--hdr->refcnt == 0) {
@@ -665,9 +675,8 @@ m_tag_init(struct mbuf *m)
        VERIFY(m != NULL);
 
        SLIST_INIT(&m->m_pkthdr.tags);
-#if PF_PKTHDR
        bzero(&m->m_pkthdr.pf_mtag, sizeof (m->m_pkthdr.pf_mtag));
-#endif
+       bzero(&m->m_pkthdr.tcp_mtag, sizeof (m->m_pkthdr.tcp_mtag));
 }
 
 /* Get first tag in chain. */
@@ -690,9 +699,143 @@ m_tag_next(struct mbuf *m, struct m_tag *t)
        return (SLIST_NEXT(t, m_tag_link));
 }
 
+int
+m_set_traffic_class(struct mbuf *m, mbuf_traffic_class_t tc)
+{
+       u_int32_t val = MBUF_TC2SCVAL(tc);      /* just the val portion */
+
+       return (m_set_service_class(m, m_service_class_from_val(val)));
+}
+
+mbuf_traffic_class_t
+m_get_traffic_class(struct mbuf *m)
+{
+       return (MBUF_SC2TC(m_get_service_class(m)));
+}
+
 void
-m_prio_init(struct mbuf *m)
+m_service_class_init(struct mbuf *m)
 {
        if (m->m_flags & M_PKTHDR)
-               m->m_pkthdr.prio = MBUF_TC_BE;
+               (void) m_set_service_class(m, MBUF_SC_BE);
+}
+
+int
+m_set_service_class(struct mbuf *m, mbuf_svc_class_t sc)
+{
+       int error = 0;
+
+       VERIFY(m->m_flags & M_PKTHDR);
+
+       if (MBUF_VALID_SC(sc))
+               m->m_pkthdr.svc = sc;
+       else
+               error = EINVAL;
+
+       return (error);
+}
+
+mbuf_svc_class_t
+m_get_service_class(struct mbuf *m)
+{
+       mbuf_svc_class_t sc;
+
+       VERIFY(m->m_flags & M_PKTHDR);
+
+       if (MBUF_VALID_SC(m->m_pkthdr.svc))
+               sc = m->m_pkthdr.svc;
+       else
+               sc = MBUF_SC_BE;
+
+       return (sc);
+}
+
+mbuf_svc_class_t
+m_service_class_from_idx(u_int32_t i)
+{
+       mbuf_svc_class_t sc = MBUF_SC_BE;
+
+       switch (i) {
+       case SCIDX_BK_SYS:
+               return (MBUF_SC_BK_SYS);
+
+       case SCIDX_BK:
+               return (MBUF_SC_BK);
+
+       case SCIDX_BE:
+               return (MBUF_SC_BE);
+
+       case SCIDX_RD:
+               return (MBUF_SC_RD);
+
+       case SCIDX_OAM:
+               return (MBUF_SC_OAM);
+
+       case SCIDX_AV:
+               return (MBUF_SC_AV);
+
+       case SCIDX_RV:
+               return (MBUF_SC_RV);
+
+       case SCIDX_VI:
+               return (MBUF_SC_VI);
+
+       case SCIDX_VO:
+               return (MBUF_SC_VO);
+
+       case SCIDX_CTL:
+               return (MBUF_SC_CTL);
+
+       default:
+               break;
+       }
+
+       VERIFY(0);
+       /* NOTREACHED */
+       return (sc);
+}
+
+mbuf_svc_class_t
+m_service_class_from_val(u_int32_t v)
+{
+       mbuf_svc_class_t sc = MBUF_SC_BE;
+
+       switch (v) {
+       case SCVAL_BK_SYS:
+               return (MBUF_SC_BK_SYS);
+
+       case SCVAL_BK:
+               return (MBUF_SC_BK);
+
+       case SCVAL_BE:
+               return (MBUF_SC_BE);
+
+       case SCVAL_RD:
+               return (MBUF_SC_RD);
+
+       case SCVAL_OAM:
+               return (MBUF_SC_OAM);
+
+       case SCVAL_AV:
+               return (MBUF_SC_AV);
+
+       case SCVAL_RV:
+               return (MBUF_SC_RV);
+
+       case SCVAL_VI:
+               return (MBUF_SC_VI);
+
+       case SCVAL_VO:
+               return (MBUF_SC_VO);
+
+       case SCVAL_CTL:
+               return (MBUF_SC_CTL);
+
+       default:
+               break;
+       }
+
+       VERIFY(0);
+       /* NOTREACHED */
+       return (sc);
 }
index b496895f67c75d4f6a68f0d78b27a606c5b36f64..af4b4fbe13f304cc1c7b78ec439a4f3a76e5ae4c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,7 +93,9 @@
 #include <sys/kdebug.h>
 #include <sys/un.h>
 #include <sys/user.h>
+#include <sys/priv.h>
 #include <net/route.h>
+#include <net/ntstat.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip6.h>
 #include <pexpert/pexpert.h>
 #include <kern/assert.h>
 #include <kern/task.h>
-
+#include <sys/kpi_mbuf.h>
 #include <sys/mcache.h>
 
 #if CONFIG_MACF
 #include <security/mac_framework.h>
 #endif /* MAC */
 
-extern int in6_init_done;
 
 int                    so_cache_hw = 0;
 int                    so_cache_timeouts = 0;
@@ -137,6 +138,8 @@ static void filt_sordetach(struct knote *kn);
 static int     filt_soread(struct knote *kn, long hint);
 static void    filt_sowdetach(struct knote *kn);
 static int     filt_sowrite(struct knote *kn, long hint);
+static void    filt_sockdetach(struct knote *kn);
+static int     filt_sockev(struct knote *kn, long hint);
 
 static int
 sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p);
@@ -154,6 +157,11 @@ static struct filterops sowrite_filtops = {
         .f_detach = filt_sowdetach,
         .f_event = filt_sowrite,
 };
+static struct filterops sock_filtops = {
+       .f_isfd = 1,
+       .f_detach = filt_sockdetach,
+       .f_event = filt_sockev,
+};
 
 #define        EVEN_MORE_LOCKING_DEBUG 0
 int socket_debug = 0;
@@ -214,6 +222,10 @@ int sodefunctlog = 0;
 SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
     &sodefunctlog, 0, "");
 
+int sothrottlelog = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &sothrottlelog, 0, "");
+
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -225,6 +237,7 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
 /* sys_generic.c */
 extern void postevent(struct socket *, struct sockbuf *, int);
 extern void evsofree(struct socket *);
+extern int tcp_notsent_lowat_check(struct socket *so);
 
 /* TODO: these should be in header file */
 extern int get_inpcb_str_size(void);
@@ -234,10 +247,6 @@ extern struct protosw *pffindprotonotype(int, int);
 extern int soclose_locked(struct socket *);
 extern int soo_kqfilter(struct fileproc *, struct knote *, struct proc *);
 
-#if CONFIG_EMBEDDED
-extern int uthread_get_background_state(uthread_t);
-#endif /*CONFIG_EMBEDDED */
-
 #ifdef __APPLE__
 
 vm_size_t      so_cache_zone_element_size;
@@ -250,7 +259,12 @@ static void so_cache_timer(void *);
 void soclose_wait_locked(struct socket *so);
 int so_isdstlocal(struct socket *so);
 
-__private_extern__ u_int32_t sotcdb = 0;
+/*
+ * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
+ * setting the DSCP code on the packet based on the service class; see
+ * <rdar://problem/11277343> for details.
+ */
+__private_extern__ u_int32_t sotcdb = SOTCDB_NO_DSCP;
 SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
     &sotcdb, 0, "");
 
@@ -302,9 +316,11 @@ socketinit(void)
 
        sflt_init();
 
-       VERIFY(SO_TC_MAX == SO_TC_STATS_MAX);
-       
+       _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
+
        socket_tclass_init();
+
+       socket_flowadv_init();
 }
 
 static void
@@ -369,7 +385,7 @@ cached_sock_alloc(struct socket **so, int waitok)
 
                offset = ALIGN(offset);
 
-               ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb =
+               ((struct inpcb *)(void *)(*so)->so_saved_pcb)->inp_saved_ppcb =
                    (caddr_t)offset;
 #if TEMPDEBUG
                kprintf("Allocating cached socket - %p, pcb=%p tcpcb=%p\n",
@@ -424,13 +440,16 @@ so_update_last_owner_locked(
        struct socket   *so,
        proc_t                  self)
 {
-       if (self == NULL)
-               self = current_proc();
-       
-       if (self)
+       if (so->last_pid != 0)
        {
-               so->last_upid = proc_uniqueid(self);
-               so->last_pid = proc_pid(self);
+               if (self == NULL)
+                       self = current_proc();
+               
+               if (self)
+               {
+                       so->last_upid = proc_uniqueid(self);
+                       so->last_pid = proc_pid(self);
+               }
        }
 }
 
@@ -500,7 +519,6 @@ soalloc(int waitok, int dom, int type)
                        return (NULL);
                }
 #endif /* MAC_SOCKET */
-               so_update_last_owner_locked(so, NULL);
        }
 
        return (so);
@@ -525,10 +543,6 @@ socreate(int dom, struct socket **aso, int type, int proto)
        register struct protosw *prp;
        register struct socket *so;
        register int error = 0;
-#if CONFIG_EMBEDDED
-       thread_t thread;
-       struct uthread *ut;
-#endif /* CONFIG_EMBEDDED */
 
 #if TCPDEBUG
        extern int tcpconsdebug;
@@ -558,9 +572,10 @@ socreate(int dom, struct socket **aso, int type, int proto)
        TAILQ_INIT(&so->so_incomp);
        TAILQ_INIT(&so->so_comp);
        so->so_type = type;
+       so->last_upid = proc_uniqueid(p);
+       so->last_pid = proc_pid(p);
 
-       so->so_uid = kauth_cred_getuid(kauth_cred_get());
-       so->so_gid = kauth_cred_getgid(kauth_cred_get());
+       so->so_cred = kauth_cred_proc_ref(p);
        if (!suser(kauth_cred_get(), NULL))
                so->so_state = SS_PRIV;
 
@@ -610,38 +625,21 @@ socreate(int dom, struct socket **aso, int type, int proto)
        /*
         * If this is a background thread/task, mark the socket as such.
         */
-#if !CONFIG_EMBEDDED
-       if (proc_get_self_isbackground() != 0) 
-#else /* !CONFIG_EMBEDDED */
-       thread = current_thread();
-       ut = get_bsdthread_info(thread);
-       if (uthread_get_background_state(ut)) 
-#endif /* !CONFIG_EMBEDDED */
-       {
+       if (proc_get_self_isbackground() != 0) {
                socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
                so->so_background_thread = current_thread();
        }
 
        switch (dom) {
-    /*
-     * Don't mark Unix domain sockets as eligible for defunct by default.
-     */
+       /*
+        * Don't mark Unix domain or system sockets as eligible for defunct by default.
+       */
        case PF_LOCAL:
+       case PF_SYSTEM:
                so->so_flags |= SOF_NODEFUNCT;
                break;
-    /*
-     * Radar 9119053
-     * Since v6 initialization is asynchronous and we can't hold
-     * up the main boot path, we need to at least hold off any
-     * sockets attempting to be created until the v6 stack is
-     * up and ready. 
-     */
-       case PF_INET6:
-               if (in6_init_done == 0)
-                       ip6_fin();
-        break;
-    default:
-        break;
+       default:
+               break;
        }
 
        *aso = so;
@@ -677,7 +675,7 @@ sobind(struct socket *so, struct sockaddr *nam)
        int error = 0;
 
        socket_lock(so, 1);
-       
+       VERIFY(so->so_usecount > 1);    
        so_update_last_owner_locked(so, p);
 
        /*
@@ -709,6 +707,8 @@ out:
 void
 sodealloc(struct socket *so)
 {
+       kauth_cred_unref(&so->so_cred);
+
        /* Remove any filters */
        sflt_termsock(so);
 
@@ -754,8 +754,6 @@ solisten(struct socket *so, int backlog)
 
        socket_lock(so, 1);
        
-       so_update_last_owner_locked(so, p);
-       
        if (so->so_proto == NULL) {
                error = EINVAL;
                goto out;
@@ -898,10 +896,10 @@ soclose_wait_locked(struct socket *so)
         * Double check here and return if there's no outstanding upcall;
         * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
         */
-       if (!(so->so_flags & SOF_UPCALLINUSE) ||
-           !(so->so_flags & SOF_UPCALLCLOSEWAIT))
+       if (!so->so_upcallusecount || !(so->so_flags & SOF_UPCALLCLOSEWAIT))
                return;
-
+       so->so_rcv.sb_flags &= ~SB_UPCALL;
+       so->so_snd.sb_flags &= ~SB_UPCALL;
        so->so_flags |= SOF_CLOSEWAIT;
        (void) msleep((caddr_t)&so->so_upcall, mutex_held, (PZERO - 1),
            "soclose_wait_locked", NULL);
@@ -1037,6 +1035,15 @@ drop:
        if (so->so_usecount == 0)
                panic("soclose: usecount is zero so=%p\n", so);
        if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
+               /*
+                * Let NetworkStatistics know this PCB is going away
+                * before we detach it.
+                */
+               if (nstat_collect &&
+                   (so->so_proto->pr_domain->dom_family == AF_INET ||
+                   so->so_proto->pr_domain->dom_family == AF_INET6))
+                       nstat_pcb_detach(so->so_pcb);
+
                int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
                if (error == 0)
                        error = error2;
@@ -1047,6 +1054,9 @@ discard:
        if (so->so_pcb && so->so_state & SS_NOFDREF)
                panic("soclose: NOFDREF");
        so->so_state |= SS_NOFDREF;
+       
+       if ((so->so_flags & SOF_KNOTE) != 0)
+               KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED);
 #ifdef __APPLE__
        so->so_proto->pr_domain->dom_refs--;
        evsofree(so);
@@ -1062,7 +1072,7 @@ soclose(struct socket *so)
        int error = 0;
        socket_lock(so, 1);
 
-       if (so->so_flags & SOF_UPCALLINUSE)
+       if (so->so_upcallusecount)
                soclose_wait_locked(so);
 
        if (so->so_retaincnt == 0) {
@@ -1216,8 +1226,6 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
 
        if (dolock)
                socket_lock(so, 1);
-
-       so_update_last_owner_locked(so, p);
        
        /*
         * If this is a listening socket or if this is a previously-accepted
@@ -1419,8 +1427,9 @@ defunct:
        if ((atomic && resid > so->so_snd.sb_hiwat) ||
            clen > so->so_snd.sb_hiwat)
                return (EMSGSIZE);
-       if (space < resid + clen &&
-           (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) {
+       if ((space < resid + clen &&
+           (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) ||
+           (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
                if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
                    assumelock) {
                        return (EWOULDBLOCK);
@@ -2052,9 +2061,7 @@ restart:
                 * end up with false positives during select() or poll()
                 * which could put the application in a bad state.
                 */
-               if (m == NULL && so->so_rcv.sb_cc != 0)
-                       panic("soreceive corrupted so_rcv: m %p cc %u",
-                           m, so->so_rcv.sb_cc);
+               SB_MB_CHECK(&so->so_rcv);
 
                if (so->so_error) {
                        if (m)
@@ -2327,11 +2334,16 @@ dontblock:
                        }
                        cm = cmn;
                }
-               orig_resid = 0;
-               if (sb_rcv->sb_mb != NULL)
+               /* 
+                * Update the value of nextrecord in case we received new
+                * records when the socket was unlocked above for 
+                * externalizing SCM_RIGHTS.
+                */
+               if (m != NULL)
                        nextrecord = sb_rcv->sb_mb->m_nextpkt;
                else
-                       nextrecord = NULL;
+                       nextrecord = sb_rcv->sb_mb;
+               orig_resid = 0;
        }
 
        if (m != NULL) {
@@ -2356,7 +2368,6 @@ dontblock:
                        flags |= MSG_OOB;
        } else {
                if (!(flags & MSG_PEEK)) {
-                       so->so_rcv.sb_mb = nextrecord;
                        SB_EMPTY_FIXUP(&so->so_rcv);
                }
        }
@@ -2603,6 +2614,7 @@ dontblock:
                        if (m) {
                                nextrecord = m->m_nextpkt;
                        }
+                       SB_MB_CHECK(&so->so_rcv);
                }
        }
 #ifdef MORE_LOCKING_DEBUG
@@ -2650,6 +2662,7 @@ dontblock:
                        } else if (nextrecord->m_nextpkt == NULL) {
                                so->so_rcv.sb_lastrecord = nextrecord;
                        }
+                       SB_MB_CHECK(&so->so_rcv);
                }
                SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
                SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
@@ -2975,7 +2988,6 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 #endif /* MAC_SOCKET */
 
        socket_lock(so, 1);
-       so_update_last_owner_locked(so, NULL);
        
        if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE))
            == (SS_CANTRCVMORE | SS_CANTSENDMORE) && 
@@ -3066,17 +3078,18 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                        switch (sopt->sopt_name) {
                        case SO_SNDBUF:
                        case SO_RCVBUF:
-                               if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
-                                   &so->so_snd : &so->so_rcv,
-                                   (u_int32_t) optval) == 0) {
+                       {
+                               struct sockbuf *sb = (sopt->sopt_name == SO_SNDBUF) ?
+                                       &so->so_snd : &so->so_rcv;
+                               if (sbreserve(sb, (u_int32_t) optval) == 0) {
                                        error = ENOBUFS;
                                        goto bad;
                                }
-                               if (sopt->sopt_name == SO_SNDBUF)
-                                       so->so_snd.sb_flags |= SB_USRSIZE;
-                               else
-                                       so->so_rcv.sb_flags |= SB_USRSIZE;
+                               sb->sb_flags |= SB_USRSIZE;
+                               sb->sb_flags &= ~SB_AUTOSIZE;
+                               sb->sb_idealsize = (u_int32_t)optval;
                                break;
+                       }
 
                        /*
                         * Make sure the low-water is never greater than
@@ -3268,12 +3281,12 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                                so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
                        break;
                }
-               
+
                case SO_TRAFFIC_CLASS_DBG: {
                        struct so_tcdbg so_tcdbg;
-                       
-                       error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg),
-                               sizeof (struct so_tcdbg));
+
+                       error = sooptcopyin(sopt, &so_tcdbg,
+                           sizeof (struct so_tcdbg), sizeof (struct so_tcdbg));
                        if (error)
                                goto bad;
                        error = so_set_tcdbg(so, &so_tcdbg);
@@ -3281,7 +3294,22 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                                goto bad;
                        break;
                }
-               
+
+               case SO_PRIVILEGED_TRAFFIC_CLASS:
+                       error = priv_check_cred(kauth_cred_get(),
+                           PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0);
+                       if (error)
+                               goto bad;
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                               sizeof (optval));
+                       if (error)
+                               goto bad;
+                       if (optval == 0)
+                               so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS;
+                       else
+                               so->so_flags |= SOF_PRIVILEGED_TRAFFIC_CLASS;
+                       break;
+
                case SO_DEFUNCTOK:
                        error = sooptcopyin(sopt, &optval, sizeof (optval),
                            sizeof (optval));
@@ -3317,6 +3345,25 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                        error = EINVAL;
                        break;
 
+               case SO_OPPORTUNISTIC:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error == 0)
+                               error = so_set_opportunistic(so, optval);
+                       break;
+
+               case SO_FLUSH:
+                       /* This option is handled by lower layer(s) */
+                       error = 0;
+                       break;
+
+               case SO_RECV_ANYIF:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error == 0)
+                               error = so_set_recv_anyif(so, optval);
+                       break;
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -3414,7 +3461,6 @@ sogetopt(struct socket *so, struct sockopt *sopt)
        }
 
        socket_lock(so, 1);
-       so_update_last_owner_locked(so, NULL);
 
        error = sflt_getsockopt(so, sopt);
        if (error) {
@@ -3593,18 +3639,23 @@ integer:
                case SO_TRAFFIC_CLASS:
                        optval = so->so_traffic_class;
                        goto integer;
-               
+
                case SO_RECV_TRAFFIC_CLASS:
                        optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
                        goto integer;
 
                case SO_TRAFFIC_CLASS_STATS:
                        error = sooptcopyout(sopt, &so->so_tc_stats, sizeof(so->so_tc_stats));
+                       break;
 
                case SO_TRAFFIC_CLASS_DBG: 
                        error = sogetopt_tcdbg(so, sopt);
                        break;
-               
+
+               case SO_PRIVILEGED_TRAFFIC_CLASS:
+                       optval = (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
+                       goto integer;
+
                case SO_DEFUNCTOK:
                        optval = !(so->so_flags & SOF_NODEFUNCT);
                        goto integer;
@@ -3613,6 +3664,19 @@ integer:
                        optval = (so->so_flags & SOF_DEFUNCT);
                        goto integer;
 
+               case SO_OPPORTUNISTIC:
+                       optval = so_get_opportunistic(so);
+                       goto integer;
+
+               case SO_FLUSH:
+                       /* This option is not gettable */
+                       error = EINVAL;
+                       break;
+
+               case SO_RECV_ANYIF:
+                       optval = so_get_recv_anyif(so);
+                       goto integer;
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -3763,7 +3827,6 @@ sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
        int revents = 0;
 
        socket_lock(so, 1);
-       so_update_last_owner_locked(so, p);
 
        if (events & (POLLIN | POLLRDNORM))
                if (soreadable(so))
@@ -3806,7 +3869,7 @@ soo_kqfilter(__unused struct fileproc *fp, struct knote *kn,
     __unused struct proc *p)
 {
        struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
-       struct sockbuf *sb;
+       struct klist *skl;
 
        socket_lock(so, 1);
 
@@ -3820,19 +3883,37 @@ soo_kqfilter(__unused struct fileproc *fp, struct knote *kn,
        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &soread_filtops;
-               sb = &so->so_rcv;
+               skl = &so->so_rcv.sb_sel.si_note;
                break;
        case EVFILT_WRITE:
                kn->kn_fop = &sowrite_filtops;
-               sb = &so->so_snd;
+               skl = &so->so_snd.sb_sel.si_note;
+               break;
+       case EVFILT_SOCK:
+               kn->kn_fop = &sock_filtops;
+               skl = &so->so_klist;
                break;
        default:
                socket_unlock(so, 1);
                return (1);
        }
 
-       if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
-               sb->sb_flags |= SB_KNOTE;
+       if (KNOTE_ATTACH(skl, kn)) {
+               switch(kn->kn_filter) {
+               case EVFILT_READ:
+                       so->so_rcv.sb_flags |= SB_KNOTE;
+                       break;
+               case EVFILT_WRITE:
+                       so->so_snd.sb_flags |= SB_KNOTE;
+                       break;
+               case EVFILT_SOCK:
+                       so->so_flags |= SOF_KNOTE;
+                       break;
+               default:
+                       socket_unlock(so, 1);
+                       return (1);
+               }
+       }
        socket_unlock(so, 1);
        return (0);
 }
@@ -3945,11 +4026,25 @@ filt_sowdetach(struct knote *kn)
        socket_unlock(so, 1);
 }
 
+int
+so_wait_for_if_feedback(struct socket *so)
+{
+       if ((so->so_proto->pr_domain->dom_family == AF_INET ||
+           so->so_proto->pr_domain->dom_family == AF_INET6) &&
+           (so->so_state & SS_ISCONNECTED)) {
+               struct inpcb *inp = sotoinpcb(so);
+               if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+                       return (1);
+       }
+       return (0);
+}
+
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
        struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+       int ret = 0;
 
        if ((hint & SO_FILT_HINT_LOCKED) == 0)
                socket_lock(so, 1);
@@ -3958,20 +4053,17 @@ filt_sowrite(struct knote *kn, long hint)
        if (so->so_state & SS_CANTSENDMORE) {
                kn->kn_flags |= EV_EOF;
                kn->kn_fflags = so->so_error;
-               if ((hint & SO_FILT_HINT_LOCKED) == 0)
-                       socket_unlock(so, 1);
-               return (1);
+               ret = 1;
+               goto out;
        }
        if (so->so_error) {     /* temporary udp error */
-               if ((hint & SO_FILT_HINT_LOCKED) == 0)
-                       socket_unlock(so, 1);
-               return (1);
+               ret = 1;
+               goto out;
        }
        if (((so->so_state & SS_ISCONNECTED) == 0) &&
            (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
-               if ((hint & SO_FILT_HINT_LOCKED) == 0)
-                       socket_unlock(so, 1);
-               return (0);
+               ret = 0;
+               goto out;
        }
        int64_t lowwat = so->so_snd.sb_lowat;
        if (kn->kn_sfflags & NOTE_LOWAT)
@@ -3981,9 +4073,119 @@ filt_sowrite(struct knote *kn, long hint)
                else if (kn->kn_sdata > lowwat)
                        lowwat = kn->kn_sdata;
        }
+       if (kn->kn_data >= lowwat) {
+               if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
+                       ret = tcp_notsent_lowat_check(so);
+               } else {
+                       ret = 1;
+               }
+       }
+       if (so_wait_for_if_feedback(so))
+               ret = 0;
+out:
        if ((hint & SO_FILT_HINT_LOCKED) == 0)
                socket_unlock(so, 1);
-       return (kn->kn_data >= lowwat);
+       return(ret);
+}
+
+static void
+filt_sockdetach(struct knote *kn)
+{
+       struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+       socket_lock(so, 1);
+       
+       if ((so->so_flags & SOF_KNOTE) != 0)
+               if (KNOTE_DETACH(&so->so_klist, kn))
+                       so->so_flags &= ~SOF_KNOTE;
+       socket_unlock(so, 1);
+}
+
+static int
+filt_sockev(struct knote *kn, long hint)
+{
+       int ret = 0, locked = 0;
+       struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+
+       if ((hint & SO_FILT_HINT_LOCKED) == 0) {
+               socket_lock(so, 1);
+               locked = 1;
+       }
+
+       switch (hint & SO_FILT_HINT_EV) {
+       case SO_FILT_HINT_CONNRESET:
+               if (kn->kn_sfflags & NOTE_CONNRESET)
+                       kn->kn_fflags |= NOTE_CONNRESET;
+               break;
+       case SO_FILT_HINT_TIMEOUT:
+               if (kn->kn_sfflags & NOTE_TIMEOUT)
+                       kn->kn_fflags |= NOTE_TIMEOUT;
+               break;
+       case SO_FILT_HINT_NOSRCADDR:
+               if (kn->kn_sfflags & NOTE_NOSRCADDR)
+                       kn->kn_fflags |= NOTE_NOSRCADDR;
+               break;
+       case SO_FILT_HINT_IFDENIED:
+               if ((kn->kn_sfflags & NOTE_IFDENIED))
+                       kn->kn_fflags |= NOTE_IFDENIED;
+               break;
+       case SO_FILT_HINT_KEEPALIVE:
+               if (kn->kn_sfflags & NOTE_KEEPALIVE)
+                       kn->kn_fflags |= NOTE_KEEPALIVE;
+       }
+
+       if ((kn->kn_sfflags & NOTE_READCLOSED) &&
+               (so->so_state & SS_CANTRCVMORE))
+               kn->kn_fflags |= NOTE_READCLOSED;
+
+       if ((kn->kn_sfflags & NOTE_WRITECLOSED) &&
+               (so->so_state & SS_CANTSENDMORE))
+               kn->kn_fflags |= NOTE_WRITECLOSED;
+
+       if ((kn->kn_sfflags & NOTE_SUSPEND) &&
+           ((hint & SO_FILT_HINT_SUSPEND) ||
+           (so->so_flags & SOF_SUSPENDED))) {
+               kn->kn_fflags &=
+                       ~(NOTE_SUSPEND | NOTE_RESUME);
+               kn->kn_fflags |= NOTE_SUSPEND;
+       }
+
+       if ((kn->kn_sfflags & NOTE_RESUME) &&
+           ((hint & SO_FILT_HINT_RESUME) ||
+           (so->so_flags & SOF_SUSPENDED) == 0)) {
+               kn->kn_fflags &=
+                       ~(NOTE_SUSPEND | NOTE_RESUME);
+               kn->kn_fflags |= NOTE_RESUME;
+       }
+
+       if (so->so_error != 0) {
+               ret = 1;
+               kn->kn_data = so->so_error;
+               kn->kn_flags |= EV_EOF;
+       } else {
+               get_sockev_state(so, (u_int32_t *)&(kn->kn_data));
+       }
+
+       if (kn->kn_fflags != 0)
+               ret = 1;
+
+       if (locked)
+               socket_unlock(so, 1);
+
+       return(ret);
+}
+
+void
+get_sockev_state(struct socket *so, u_int32_t *statep) {
+       u_int32_t state = *(statep);
+
+       if (so->so_state & SS_ISCONNECTED)      
+               state |= SOCKEV_CONNECTED;
+       else 
+               state &= ~(SOCKEV_CONNECTED);
+       state |= ((so->so_state & SS_ISDISCONNECTED) ?
+               SOCKEV_DISCONNECTED : 0);
+       *(statep) = state;
+       return;
 }
 
 #define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1)
@@ -4227,3 +4429,40 @@ sodefunct(struct proc *p, struct socket *so, int level)
 done:
        return (0);
 }
+
+__private_extern__ int
+so_set_recv_anyif(struct socket *so, int optval)
+{
+       int ret = 0;
+
+#if INET6
+       if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+       if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+               if (optval)
+                       sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
+               else
+                       sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
+       } else {
+               ret = EPROTONOSUPPORT;
+       }
+
+       return (ret);
+}
+
+__private_extern__ int
+so_get_recv_anyif(struct socket *so)
+{
+       int ret = 0;
+
+#if INET6
+       if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+       if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+               ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
+       }
+
+       return (ret);
+}
index 4b71dd80c5ce816f569827b30f9bfe62492ea06d..32b896ee819713702cb61956ceb67f4a514627a3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,7 @@
 #include <sys/kauth.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/protosw.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -94,6 +95,8 @@
 #include <security/mac_framework.h>
 #endif
 
+#include <mach/vm_param.h>
+
 /* TODO: this should be in a header file somewhere */
 extern void postevent(struct socket *, struct sockbuf *, int);
 
@@ -122,9 +125,11 @@ u_int32_t  sb_max = SB_MAX;                /* XXX should be static */
 u_int32_t      high_sb_max = SB_MAX;
 
 static u_int32_t sb_efficiency = 8;    /* parameter for sbreserve() */
-__private_extern__ unsigned int total_mb_cnt = 0;
-__private_extern__ unsigned int total_cl_cnt = 0;
-__private_extern__ int sbspace_factor = 8;
+__private_extern__ int32_t total_sbmb_cnt = 0;
+
+/* Control whether to throttle sockets eligible to be throttled */
+__private_extern__ u_int32_t net_io_policy_throttled = 0;
+static int sysctl_io_policy_throttled SYSCTL_HANDLER_ARGS;
 
 /*
  * Procedures to manipulate state flags of socket
@@ -197,6 +202,7 @@ soisconnected(struct socket *so)
                wakeup((caddr_t)&so->so_timeo);
                sorwakeup(so);
                sowwakeup(so);
+               soevent(so, SO_FILT_HINT_LOCKED);
        }
 }
 
@@ -205,6 +211,7 @@ soisdisconnecting(struct socket *so)
 {
        so->so_state &= ~SS_ISCONNECTING;
        so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+       soevent(so, SO_FILT_HINT_LOCKED);
        sflt_notify(so, sock_evt_disconnecting, NULL);
        wakeup((caddr_t)&so->so_timeo);
        sowwakeup(so);
@@ -216,6 +223,7 @@ soisdisconnected(struct socket *so)
 {
        so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
+       soevent(so, SO_FILT_HINT_LOCKED);
        sflt_notify(so, sock_evt_disconnected, NULL);
        wakeup((caddr_t)&so->so_timeo);
        sowwakeup(so);
@@ -231,6 +239,7 @@ sodisconnectwakeup(struct socket *so)
 {
        so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
+       soevent(so, SO_FILT_HINT_LOCKED);
        wakeup((caddr_t)&so->so_timeo);
        sowwakeup(so);
        sorwakeup(so);
@@ -297,8 +306,10 @@ sonewconn_internal(struct socket *head, int connstatus)
        so->so_proto = head->so_proto;
        so->so_timeo = head->so_timeo;
        so->so_pgid  = head->so_pgid;
-       so->so_uid = head->so_uid;
-       so->so_gid = head->so_gid;
+       kauth_cred_ref(head->so_cred);
+       so->so_cred = head->so_cred;
+       so->last_pid = head->last_pid;
+       so->last_upid = head->last_upid;
        /* inherit socket options stored in so_flags */
        so->so_flags = head->so_flags & (SOF_NOSIGPIPE |
                                         SOF_NOADDRAVAIL |
@@ -306,7 +317,10 @@ sonewconn_internal(struct socket *head, int connstatus)
                                         SOF_NOTIFYCONFLICT | 
                                         SOF_BINDRANDOMPORT | 
                                         SOF_NPX_SETOPTSHUT |
-                                        SOF_NODEFUNCT);
+                                        SOF_NODEFUNCT |
+                                        SOF_PRIVILEGED_TRAFFIC_CLASS|
+                                        SOF_NOTSENT_LOWAT |
+                                        SOF_USELRO);
        so->so_usecount = 1;
        so->next_lock_lr = 0;
        so->next_unlock_lr = 0;
@@ -330,6 +344,8 @@ sonewconn_internal(struct socket *head, int connstatus)
                sodealloc(so);
                return ((struct socket *)0);
        }
+       so->so_rcv.sb_flags |= (head->so_rcv.sb_flags & SB_USRSIZE);
+       so->so_snd.sb_flags |= (head->so_snd.sb_flags & SB_USRSIZE);
 
        /*
         * Must be done with head unlocked to avoid deadlock
@@ -419,6 +435,7 @@ void
 socantsendmore(struct socket *so)
 {
        so->so_state |= SS_CANTSENDMORE;
+       soevent(so, SO_FILT_HINT_LOCKED);
        sflt_notify(so, sock_evt_cantsendmore, NULL);
        sowwakeup(so);
 }
@@ -427,6 +444,7 @@ void
 socantrcvmore(struct socket *so)
 {
        so->so_state |= SS_CANTRCVMORE;
+       soevent(so, SO_FILT_HINT_LOCKED);
        sflt_notify(so, sock_evt_cantrecvmore, NULL);
        sorwakeup(so);
 }
@@ -576,15 +594,15 @@ sowakeup(struct socket *so, struct sockbuf *sb)
                so_upcall = so->so_upcall;
                so_upcallarg = so->so_upcallarg;
                /* Let close know that we're about to do an upcall */
-               so->so_flags |= SOF_UPCALLINUSE;
+               so->so_upcallusecount++;
 
                socket_unlock(so, 0);
                (*so_upcall)(so, so_upcallarg, M_DONTWAIT);
                socket_lock(so, 0);
 
-               so->so_flags &= ~SOF_UPCALLINUSE;
+               so->so_upcallusecount--;
                /* Tell close that it's safe to proceed */
-               if (so->so_flags & SOF_CLOSEWAIT)
+               if (so->so_flags & SOF_CLOSEWAIT && so->so_upcallusecount == 0)
                        wakeup((caddr_t)&so->so_upcall);
        }
 }
@@ -631,8 +649,14 @@ soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc)
 
        if (sbreserve(&so->so_snd, sndcc) == 0)
                goto bad;
+       else
+               so->so_snd.sb_idealsize = sndcc;
+
        if (sbreserve(&so->so_rcv, rcvcc) == 0)
                goto bad2;
+       else
+               so->so_rcv.sb_idealsize = rcvcc;
+
        if (so->so_rcv.sb_lowat == 0)
                so->so_rcv.sb_lowat = 1;
        if (so->so_snd.sb_lowat == 0)
@@ -1445,6 +1469,7 @@ sbcreatecontrol(caddr_t p, int size, int type, int level)
        if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
                return ((struct mbuf *)NULL);
        cp = mtod(m, struct cmsghdr *);
+       VERIFY(IS_P2ALIGNED(cp, sizeof (u_int32_t)));
        /* XXX check size? */
        (void) memcpy(CMSG_DATA(cp), p, size);
        m->m_len = CMSG_SPACE(size);
@@ -1464,24 +1489,26 @@ sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf** mp)
                *mp = sbcreatecontrol(p, size, type, level);
                return mp;
        }
-       
+
        if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN){
                mp = &(*mp)->m_next;
                *mp = sbcreatecontrol(p, size, type, level);
                return mp;
        }
-       
+
        m = *mp;
-       
-       cp = (struct cmsghdr *) (mtod(m, char *) + m->m_len);
+
+       cp = (struct cmsghdr *)(void *)(mtod(m, char *) + m->m_len);
+       /* CMSG_SPACE ensures 32-bit alignment */
+       VERIFY(IS_P2ALIGNED(cp, sizeof (u_int32_t)));
        m->m_len += CMSG_SPACE(size);
-       
+
        /* XXX check size? */
        (void) memcpy(CMSG_DATA(cp), p, size);
        cp->cmsg_len = CMSG_LEN(size);
        cp->cmsg_level = level;
        cp->cmsg_type = type;
-       
+
        return mp;
 }
 
@@ -1699,9 +1726,10 @@ soreadable(struct socket *so)
 int
 sowriteable(struct socket *so)
 {
-       return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
-           ((so->so_state&SS_ISCONNECTED) ||
-           (so->so_proto->pr_flags&PR_CONNREQUIRED) == 0)) ||
+       return ((!so_wait_for_if_feedback(so) &&
+           sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
+           ((so->so_state & SS_ISCONNECTED) ||
+           (so->so_proto->pr_flags & PR_CONNREQUIRED) == 0)) ||
            (so->so_state & SS_CANTSENDMORE) ||
            so->so_error);
 }
@@ -1711,7 +1739,7 @@ sowriteable(struct socket *so)
 void
 sballoc(struct sockbuf *sb, struct mbuf *m)
 {
-       int cnt = 1;
+       u_int32_t cnt = 1;
        sb->sb_cc += m->m_len; 
        if (m->m_type != MT_DATA && m->m_type != MT_HEADER && 
                m->m_type != MT_OOBDATA)
@@ -1720,9 +1748,10 @@ sballoc(struct sockbuf *sb, struct mbuf *m)
        
        if (m->m_flags & M_EXT) {
                sb->sb_mbcnt += m->m_ext.ext_size; 
-               cnt += m->m_ext.ext_size / MSIZE ;
+               cnt += (m->m_ext.ext_size >> MSIZESHIFT) ;
        }
-       OSAddAtomic(cnt, &total_mb_cnt);
+       OSAddAtomic(cnt, &total_sbmb_cnt);
+       VERIFY(total_sbmb_cnt > 0);
 }
 
 /* adjust counters in sb reflecting freeing of m */
@@ -1730,6 +1759,7 @@ void
 sbfree(struct sockbuf *sb, struct mbuf *m)
 {
        int cnt = -1;
+
        sb->sb_cc -= m->m_len;
        if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&     
                m->m_type != MT_OOBDATA)
@@ -1737,9 +1767,10 @@ sbfree(struct sockbuf *sb, struct mbuf *m)
        sb->sb_mbcnt -= MSIZE; 
        if (m->m_flags & M_EXT) {
                sb->sb_mbcnt -= m->m_ext.ext_size; 
-               cnt -= m->m_ext.ext_size / MSIZE ;
+               cnt -= (m->m_ext.ext_size >> MSIZESHIFT) ;
        }
-       OSAddAtomic(cnt, &total_mb_cnt);
+       OSAddAtomic(cnt, &total_sbmb_cnt);
+       VERIFY(total_sbmb_cnt >= 0);
 }
 
 /*
@@ -1818,6 +1849,14 @@ sowwakeup(struct socket *so)
        if (sb_notify(&so->so_snd))
                sowakeup(so, &so->so_snd);
 }
+
+void
+soevent(struct socket *so, long hint)
+{
+       if (so->so_flags & SOF_KNOTE)
+               KNOTE(&so->so_klist, hint);
+}
+
 #endif /* __APPLE__ */
 
 /*
@@ -1847,12 +1886,12 @@ void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
        xso->xso_len = sizeof (*xso);
-       xso->xso_so = (_XSOCKET_PTR(struct socket *))(uintptr_t)so;
+       xso->xso_so = (_XSOCKET_PTR(struct socket *))VM_KERNEL_ADDRPERM(so);
        xso->so_type = so->so_type;
-       xso->so_options = so->so_options;
+       xso->so_options = (short)(so->so_options & 0xffff);
        xso->so_linger = so->so_linger;
        xso->so_state = so->so_state;
-       xso->so_pcb = (_XSOCKET_PTR(caddr_t))(uintptr_t)so->so_pcb;
+       xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRPERM(so->so_pcb);
        if (so->so_proto) {
                xso->xso_protocol = so->so_proto->pr_protocol;
                xso->xso_family = so->so_proto->pr_domain->dom_family;
@@ -1868,7 +1907,7 @@ sotoxsocket(struct socket *so, struct xsocket *xso)
        xso->so_oobmark = so->so_oobmark;
        sbtoxsockbuf(&so->so_snd, &xso->so_snd);
        sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
-       xso->so_uid = so->so_uid;
+       xso->so_uid = kauth_cred_getuid(so->so_cred);
 }
 
 
@@ -1878,12 +1917,12 @@ void
 sotoxsocket64(struct socket *so, struct xsocket64 *xso)
 {
         xso->xso_len = sizeof (*xso);
-        xso->xso_so = (u_int64_t)(uintptr_t)so;
+        xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so);
         xso->so_type = so->so_type;
-        xso->so_options = so->so_options;
+        xso->so_options = (short)(so->so_options & 0xffff);
         xso->so_linger = so->so_linger;
         xso->so_state = so->so_state;
-        xso->so_pcb = (u_int64_t)(uintptr_t)so->so_pcb;
+        xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
         if (so->so_proto) {
                 xso->xso_protocol = so->so_proto->pr_protocol;
                 xso->xso_family = so->so_proto->pr_domain->dom_family;
@@ -1899,7 +1938,7 @@ sotoxsocket64(struct socket *so, struct xsocket64 *xso)
         xso->so_oobmark = so->so_oobmark;
         sbtoxsockbuf(&so->so_snd, &xso->so_snd);
         sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
-        xso->so_uid = so->so_uid;
+        xso->so_uid = kauth_cred_getuid(so->so_cred);
 }
 
 #endif /* !CONFIG_EMBEDDED */
@@ -1925,12 +1964,29 @@ sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
                xsb->sb_timeo = 1;
 }
 
+/*
+ * Based on the policy set by an all knowing decison maker, throttle sockets
+ * that either have been marked as belonging to "background" process.
+ */
 int
-soisbackground(struct socket *so)
+soisthrottled(struct socket *so)
 {
-       return (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND);
+       /*
+        * On non-embedded, we rely on implicit throttling by the application,
+        * as we're missing the system-wide "decision maker".
+        */
+       return (
+#if CONFIG_EMBEDDED
+           net_io_policy_throttled &&
+#endif /* CONFIG_EMBEDDED */
+           (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND));
 }
 
+int
+soisprivilegedtraffic(struct socket *so)
+{
+       return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
+}
 
 /*
  * Here is the definition of some of the basic objects in the kern.ipc
@@ -1959,6 +2015,27 @@ sysctl_sb_max(__unused struct sysctl_oid *oidp, __unused void *arg1,
        return error;
 }
 
+static int
+sysctl_io_policy_throttled SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int i, err;
+
+       i = net_io_policy_throttled;
+
+       err = sysctl_handle_int(oidp, &i, 0, req);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return (err);
+
+       if (i != net_io_policy_throttled)
+               SOTHROTTLELOG(("throttle: network IO policy throttling is "
+                   "now %s\n", i ? "ON" : "OFF"));
+
+       net_io_policy_throttled = i;
+
+       return (err);
+}
+
 SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
 
@@ -1966,8 +2043,6 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD | CTLFLAG_LOCKED,
     &maxsockets, 0, "Maximum number of sockets avaliable");
 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
     &sb_efficiency, 0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, sbspace_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &sbspace_factor, 0, "Ratio of mbuf/cluster use for socket layers");
 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD | CTLFLAG_LOCKED,
     &nmbclusters, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, njcl, CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, "");
@@ -1976,3 +2051,9 @@ SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat, CTLFLAG_RW | CTLFLAG_
     &soqlimitcompat, 1, "Enable socket queue limit compatibility");
 SYSCTL_INT(_kern_ipc, OID_AUTO, soqlencomp, CTLFLAG_RW | CTLFLAG_LOCKED,
     &soqlencomp, 0, "Listen backlog represents only complete queue");
+
+SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy");
+
+SYSCTL_PROC(_kern_ipc_io_policy, OID_AUTO, throttled,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttled, 0,
+    sysctl_io_policy_throttled, "I", "");
index 8a6356d5a2435982d4732963a3ed30c2fd1ca3bf..bd2bd5dd35fedfeda615f3d4b8cea94ef33e962b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -199,12 +199,7 @@ socket(struct proc *p, struct socket_args *uap, int32_t *retval)
                ut = get_bsdthread_info(thread);
                        
                /* if this is a backgrounded thread then throttle all new sockets */
-#if !CONFIG_EMBEDDED
-               if (proc_get_selfthread_isbackground() != 0)
-#else /* !CONFIG_EMBEDDED */
-               if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) 
-#endif /* !CONFIG_EMBEDDED */
-               {
+               if (proc_get_selfthread_isbackground() != 0) {
                        so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
                        so->so_background_thread = thread;
                }
@@ -475,17 +470,17 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
        fflag = fp->f_flag;
        error = falloc(p, &fp, &newfd, vfs_context_current());
        if (error) {
-               /*
-                * Probably ran out of file descriptors. Put the
-                * unaccepted connection back onto the queue and
-                * do another wakeup so some other process might
-                * have a chance at it.
+               /* 
+                * Probably ran out of file descriptors.
+                *
+                * <rdar://problem/8554930>
+                * Don't put this back on the socket like we used to, that
+                * just causes the client to spin. Drop the socket.
                 */
-               socket_lock(head, 0);
-               TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
-               head->so_qlen++;
-               wakeup_one((caddr_t)&head->so_timeo);
-               socket_unlock(head, 1);
+               so->so_state &= ~(SS_NOFDREF | SS_COMP);
+               so->so_head = NULL;
+               soclose(so);
+               sodereference(head);
                goto out;
        }
        *retval = newfd;
@@ -864,9 +859,10 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
        /*
         * We check the state without holding the socket lock;
         * if a race condition occurs, it would simply result
-        * in an extra call to the MAC check function.
+        * in an extra call to the MAC check function. 
         */
-       if (!(so->so_state & SS_ISCONNECTED) &&
+       if ( to != NULL &&
+           !(so->so_state & SS_DEFUNCT) &&
            (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
                goto bad;
 #endif /* MAC_SOCKET_SUBSET */
@@ -1149,7 +1145,8 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
         * if a race condition occurs, it would simply result
         * in an extra call to the MAC check function.
         */
-       if (!(so->so_state & SS_ISCONNECTED) &&
+       if (!(so->so_state & SS_DEFUNCT) &&
+           !(so->so_state & SS_ISCONNECTED) &&
            (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
                goto out1;
 #endif /* MAC_SOCKET_SUBSET */
@@ -1220,15 +1217,15 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
                                 */
                                if (cp->cmsg_level == SOL_SOCKET &&  cp->cmsg_type == SCM_TIMESTAMP) {
                                        unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
-                                       struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
+                                       struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
                                        int tmp_space;
-                                       struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
+                                       struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
                                        
                                        tmp_cp->cmsg_level = SOL_SOCKET;
                                        tmp_cp->cmsg_type = SCM_TIMESTAMP;
                                        
                                        if (proc_is64bit(p)) {
-                                               struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
+                                               struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
                                                
                                                tv64->tv_sec = tv->tv_sec;
                                                tv64->tv_usec = tv->tv_usec;
@@ -1236,7 +1233,7 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
                                                tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
                                                tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
                                        } else {
-                                               struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
+                                               struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
                                                
                                                tv32->tv_sec = tv->tv_sec;
                                                tv32->tv_usec = tv->tv_usec;
@@ -1278,7 +1275,7 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
                                len -= tocopy;
                                
                                buflen -= cp_size;
-                               cp = (struct cmsghdr *) ((unsigned char *) cp + cp_size);
+                               cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size);
                                cp_size = CMSG_ALIGN(cp->cmsg_len);
                        }
                        
@@ -2073,7 +2070,13 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
        size_t sizeof_hdtr;
        off_t file_size;
        struct vfs_context context = *vfs_context_current();
-
+#define ENXIO_10146739_DBG(err_str) {  \
+       if (error == ENXIO) {           \
+               printf(err_str,         \
+               __func__,               \
+               "File a radar related to rdar://10146739 \n");  \
+       }                               \
+}
        KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
            0, 0, 0, 0);
 
@@ -2085,6 +2088,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
         * type and connected socket out, positive offset.
         */
        if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
+               ENXIO_10146739_DBG("%s: fp_getfvp error. %s"); 
                goto done;
        }
        if ((fp->f_flag & FREAD) == 0) {
@@ -2097,6 +2101,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
        }
        error = file_socket(uap->s, &so);
        if (error) {
+               ENXIO_10146739_DBG("%s: file_socket error. %s");
                goto done1;
        }
        if (so == NULL) {
@@ -2179,8 +2184,10 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
                        nuap.iovp = user_hdtr.headers;
                        nuap.iovcnt = user_hdtr.hdr_cnt;
                        error = writev_nocancel(p, &nuap, &writev_retval);
-                       if (error)
+                       if (error) {
+                               ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
                                goto done2;
+                       }
                        sbytes += writev_retval;
                }
        }
@@ -2190,8 +2197,10 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
         *  1. We don't want to allocate more mbufs than necessary
         *  2. We don't want to read past the end of file
         */
-       if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
+       if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
+               ENXIO_10146739_DBG("%s: vnode_size error. %s");
                goto done2;
+       }
 
        /*
         * Simply read file data into a chain of mbufs that used with scatter
@@ -2264,11 +2273,12 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
                pktlen = mbuf_pkt_maxlen(m0);
                if (pktlen < (size_t)xfsize)
                        xfsize = pktlen;
-
+               
                auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
                    UIO_READ, &uio_buf[0], sizeof (uio_buf));
                if (auio == NULL) {
-                       //printf("sendfile: uio_createwithbuffer failed\n");
+                       printf("sendfile failed. nbufs = %d. %s", nbufs,
+                               "File a radar related to rdar://10146739.\n");
                        mbuf_freem(m0);
                        error = ENXIO;
                        socket_lock(so, 0);
@@ -2302,6 +2312,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
                            error == EINTR || error == EWOULDBLOCK)) {
                                error = 0;
                        } else {
+                               ENXIO_10146739_DBG("%s: fo_read error. %s");
                                mbuf_freem(m0);
                                goto done3;
                        }
@@ -2351,6 +2362,7 @@ retry_space:
                                so->so_error = 0;
                        }
                        m_freem(m0);
+                       ENXIO_10146739_DBG("%s: Unexpected socket error. %s");
                        goto done3;
                }
                /*
@@ -2393,6 +2405,7 @@ retry_space:
                                        error = 0;
                                        continue;
                                }
+                               ENXIO_10146739_DBG("%s: sflt_data_out error. %s");
                                goto done3;
                        }
                        /*
@@ -2406,6 +2419,7 @@ retry_space:
                KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
                    uap->s, 0, 0, 0, 0);
                if (error) {
+                       ENXIO_10146739_DBG("%s: pru_send error. %s");
                        goto done3;
                }
        }
@@ -2420,8 +2434,10 @@ retry_space:
                nuap.iovp = user_hdtr.trailers;
                nuap.iovcnt = user_hdtr.trl_cnt;
                error = writev_nocancel(p, &nuap, &writev_retval);
-               if (error)
+               if (error) {
+                       ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
                        goto done2;
+               }
                sbytes += writev_retval;
        }
 done2:
index c64053a2c55e24a77da818d956335d54db12c8e6..2368c19bd788fbaf781805fa67f77a213ef7d7ee 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -96,6 +96,8 @@
 #include <security/mac_framework.h>
 #endif /* CONFIG_MACF */
 
+#include <mach/vm_param.h>
+
 #define        f_msgcount f_fglob->fg_msgcount
 #define        f_cred f_fglob->fg_cred
 #define        f_ops f_fglob->fg_ops
@@ -699,6 +701,18 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt)
                                        error = EINVAL;
                        }
                        break;
+               case LOCAL_PEERPID:
+                       if (unp->unp_conn != NULL) {
+                               if (unp->unp_conn->unp_socket != NULL) {
+                                       pid_t peerpid = unp->unp_conn->unp_socket->last_pid;
+                                       error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
+                               } else {
+                                       panic("peer is connected but has no socket?");
+                               }
+                       } else {
+                               error = ENOTCONN;
+                       }
+                       break;
                default:
                        error = EOPNOTSUPP;
                        break;
@@ -821,6 +835,8 @@ unp_detach(struct unpcb *unp)
 
        lck_rw_lock_exclusive(unp_list_mtx);
        LIST_REMOVE(unp, unp_link);
+       --unp_count; 
+       ++unp_gencnt;
        lck_rw_done(unp_list_mtx);
        if (unp->unp_vnode) {
                struct vnode *tvp = NULL;
@@ -1122,8 +1138,16 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    (so3 = sonewconn(so2, 0, nam)) == 0) {
                        error = ECONNREFUSED;
-                       socket_unlock(so2, 1);
-                       socket_lock(so, 0);
+                       if (so != so2) {
+                               socket_unlock(so2, 1);
+                               socket_lock(so, 0);
+                       } else {
+                               socket_lock(so, 0);
+                               /* Release the reference held for
+                                * listen socket.
+                                */
+                               so2->so_usecount--;
+                       }
                        goto out;
                }
                unp2 = sotounpcb(so2);
@@ -1455,31 +1479,37 @@ static void
 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
 {
 #if defined(__LP64__)
-       cp->unp_link.le_next = (u_int32_t)(uintptr_t)up->unp_link.le_next;
-       cp->unp_link.le_prev = (u_int32_t)(uintptr_t)up->unp_link.le_prev;
+       cp->unp_link.le_next = (u_int32_t)
+           VM_KERNEL_ADDRPERM(up->unp_link.le_next);
+       cp->unp_link.le_prev = (u_int32_t)
+           VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
 #else
-       cp->unp_link.le_next = (struct unpcb_compat *)up->unp_link.le_next;
-       cp->unp_link.le_prev = (struct unpcb_compat **)up->unp_link.le_prev;
+       cp->unp_link.le_next = (struct unpcb_compat *)
+           VM_KERNEL_ADDRPERM(up->unp_link.le_next);
+       cp->unp_link.le_prev = (struct unpcb_compat **)
+           VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
 #endif
-       cp->unp_socket = (_UNPCB_PTR(struct socket *))(uintptr_t)up->unp_socket;
-       cp->unp_vnode = (_UNPCB_PTR(struct vnode *))(uintptr_t)up->unp_vnode;
+       cp->unp_socket = (_UNPCB_PTR(struct socket *))
+           VM_KERNEL_ADDRPERM(up->unp_socket);
+       cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
+           VM_KERNEL_ADDRPERM(up->unp_vnode);
        cp->unp_ino = up->unp_ino;
        cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
-           (uintptr_t)up->unp_conn;
-       cp->unp_refs = (u_int32_t)(uintptr_t)up->unp_refs.lh_first;
+           VM_KERNEL_ADDRPERM(up->unp_conn);
+       cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
 #if defined(__LP64__)
        cp->unp_reflink.le_next =
-           (u_int32_t)(uintptr_t)up->unp_reflink.le_next;
+           (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
        cp->unp_reflink.le_prev =
-           (u_int32_t)(uintptr_t)up->unp_reflink.le_prev;
+           (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
 #else
        cp->unp_reflink.le_next =
-           (struct unpcb_compat *)up->unp_reflink.le_next;
+           (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
        cp->unp_reflink.le_prev =
-           (struct unpcb_compat **)up->unp_reflink.le_prev;
+           (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
 #endif
        cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
-           (uintptr_t)up->unp_addr;
+           VM_KERNEL_ADDRPERM(up->unp_addr);
        cp->unp_cc = up->unp_cc;
        cp->unp_mbcnt = up->unp_mbcnt;
        cp->unp_gencnt = up->unp_gencnt;
@@ -1563,7 +1593,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS
                        bzero(&xu, sizeof (xu));
                        xu.xu_len = sizeof (xu);
                        xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
-                           (uintptr_t)unp;
+                           VM_KERNEL_ADDRPERM(unp);
                        /*
                         * XXX - need more locking here to protect against
                         * connect/disconnect races for SMP.
@@ -1687,20 +1717,24 @@ unp_pcblist64 SYSCTL_HANDLER_ARGS
 
                        bzero(&xu, xu_len);
                        xu.xu_len = xu_len;
-                       xu.xu_unpp = (u_int64_t)(uintptr_t)unp;
-                        xu.xunp_link.le_next =
-                                (u_int64_t)(uintptr_t)unp->unp_link.le_next;
-                        xu.xunp_link.le_prev =
-                                (u_int64_t)(uintptr_t)unp->unp_link.le_prev;
-                       xu.xunp_socket = (u_int64_t)(uintptr_t)unp->unp_socket;
-                       xu.xunp_vnode = (u_int64_t)(uintptr_t)unp->unp_vnode;
+                       xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
+                       xu.xunp_link.le_next = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
+                       xu.xunp_link.le_prev = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
+                       xu.xunp_socket = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_socket);
+                       xu.xunp_vnode = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_vnode);
                        xu.xunp_ino = unp->unp_ino;
-                       xu.xunp_conn = (u_int64_t)(uintptr_t)unp->unp_conn;
-                       xu.xunp_refs = (u_int64_t)(uintptr_t)unp->unp_refs.lh_first;
-                       xu.xunp_reflink.le_next = 
-                               (u_int64_t)(uintptr_t)unp->unp_reflink.le_next;
-                        xu.xunp_reflink.le_prev = 
-                                (u_int64_t)(uintptr_t)unp->unp_reflink.le_prev;
+                       xu.xunp_conn = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_conn);
+                       xu.xunp_refs = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
+                       xu.xunp_reflink.le_next = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
+                       xu.xunp_reflink.le_prev = (u_int64_t)
+                           VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
                        xu.xunp_cc = unp->unp_cc;
                        xu.xunp_mbcnt = unp->unp_mbcnt;
                        xu.xunp_gencnt = unp->unp_gencnt;
@@ -2327,9 +2361,8 @@ unp_unlock(struct socket *so, int refcount, void * lr)
                
                lck_mtx_unlock(mutex_held);
 
-               unp->unp_gencnt = ++unp_gencnt;
+               lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
                zfree(unp_zone, unp);
-               --unp_count;
 
                unp_gc();
        } else {
index b5fc2f07214274325cdf90614d546057cdf4ccd0..f2ae46185840778ba23909388ed93c2728175cd2 100644 (file)
 #include <sys/systm.h>
 #include <sys/types.h>
 #include <sys/sysctl.h>
+#include <kern/assert.h>
+#include <vm/vm_pageout.h>
 
-void vm_pressure_klist_lock(void);
-void vm_pressure_klist_unlock(void);
+#if CONFIG_MEMORYSTATUS
+#include <sys/kern_memorystatus.h>
+#endif
+
+/* 
+ * This value is the threshold that a process must meet to be considered for scavenging.
+ */
+#define VM_PRESSURE_MINIMUM_RSIZE              10      /* MB */
+#define VM_PRESSURE_NOTIFY_WAIT_PERIOD         10000   /* milliseconds */
+
+static void vm_pressure_klist_lock(void);
+static void vm_pressure_klist_unlock(void);
 
-void vm_dispatch_memory_pressure(void);
-int vm_try_terminate_candidates(void);
-int vm_try_pressure_candidates(void);
-void vm_recharge_active_list(void);
+static void vm_dispatch_memory_pressure(void);
+static kern_return_t vm_try_pressure_candidates(void);
+static void vm_reset_active_list(void);
+
+static lck_mtx_t vm_pressure_klist_mutex;
 
 struct klist vm_pressure_klist;
 struct klist vm_pressure_klist_dormant;
 
-void vm_pressure_klist_lock(void) {
+#if DEBUG
+#define VM_PRESSURE_DEBUG(cond, format, ...)      \
+do {                                              \
+       if (cond) { printf(format, ##__VA_ARGS__); } \
+} while(0)
+#else
+#define VM_PRESSURE_DEBUG(cond, format, ...)
+#endif
+
+void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr) {
+       lck_mtx_init(&vm_pressure_klist_mutex, grp, attr);
+}
+
+static void vm_pressure_klist_lock(void) {
        lck_mtx_lock(&vm_pressure_klist_mutex);
 }
 
-void vm_pressure_klist_unlock(void) {
+static void vm_pressure_klist_unlock(void) {
        lck_mtx_unlock(&vm_pressure_klist_mutex);
 }
 
@@ -65,13 +91,11 @@ int vm_knote_register(struct knote *kn) {
        
        vm_pressure_klist_lock();
        
-       if ((kn->kn_sfflags & (NOTE_VM_PRESSURE))) {
-#if DEBUG
-               printf("[vm_pressure] process %d registering pressure notification\n", kn->kn_kq->kq_p->p_pid);
-#endif
+       if ((kn->kn_sfflags) & (NOTE_VM_PRESSURE)) {
                KNOTE_ATTACH(&vm_pressure_klist, kn);
-       } else
+       } else {          
                rv = ENOTSUP;
+       }
        
        vm_pressure_klist_unlock();
        
@@ -83,9 +107,7 @@ void vm_knote_unregister(struct knote *kn) {
        
        vm_pressure_klist_lock();
        
-#if DEBUG
-       printf("[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid);
-#endif
+       VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid);
        
        SLIST_FOREACH(kn_temp, &vm_pressure_klist, kn_selnext) {
                if (kn_temp == kn) {
@@ -94,139 +116,249 @@ void vm_knote_unregister(struct knote *kn) {
                        return;
                }
        }
-       KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
+
+       SLIST_FOREACH(kn_temp, &vm_pressure_klist_dormant, kn_selnext) {
+               if (kn_temp == kn) {
+                       KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
+                       vm_pressure_klist_unlock();
+                       return;
+               }
+       }
+       
+       vm_pressure_klist_unlock();
+}
+
+void vm_pressure_proc_cleanup(proc_t p)
+{
+       struct knote *kn = NULL;
+
+       vm_pressure_klist_lock();
+       
+       VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d exiting pressure notification\n", p->p_pid);
+       
+       SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
+               if (kn->kn_kq->kq_p == p) {
+                       KNOTE_DETACH(&vm_pressure_klist, kn);
+                       vm_pressure_klist_unlock();
+                       return;
+               }
+       }
+       
+       SLIST_FOREACH(kn, &vm_pressure_klist_dormant, kn_selnext) {
+               if (kn->kn_kq->kq_p == p) {
+                       KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
+                       vm_pressure_klist_unlock();
+                       return;
+               }
+       }
        
        vm_pressure_klist_unlock();
 }
 
-/* Interface for event dispatch from vm_pageout_garbage_collect thread */
-void consider_pressure_events(void) {
+void consider_vm_pressure_events(void)
+{
        vm_dispatch_memory_pressure();
 }
 
-void vm_dispatch_memory_pressure(void) {       
+static void vm_dispatch_memory_pressure(void)
+{
        vm_pressure_klist_lock();
        
        if (!SLIST_EMPTY(&vm_pressure_klist)) {
                
-#if DEBUG
-               printf("[vm_pressure] vm_dispatch_memory_pressure\n");
-#endif
+               VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n");
                
-               if (vm_try_pressure_candidates()) {
+               if (vm_try_pressure_candidates() == KERN_SUCCESS) {
                        vm_pressure_klist_unlock();
                        return;
                }
                
        }
        
-       /* Else... */
+       VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n");
        
-#if DEBUG
-       printf("[vm_pressure] could not find suitable event candidate\n");
-#endif
-       
-       vm_recharge_active_list();
+       vm_reset_active_list();
        
        vm_pressure_klist_unlock();
 }
 
-/*
- * Try standard pressure event candidates.  Called with klist lock held.
- */
-int vm_try_pressure_candidates(void) {
-       /* 
-        * This value is the threshold that a process must meet to be considered for scavenging.
-        * If a process has sufficiently little resident memory, there is probably no use scavenging it.
-        * At best, we'll scavenge very little memory.  At worst, we'll page in code pages or malloc metadata.
-        */
-       
-#define VM_PRESSURE_MINIMUM_RSIZE      (10 * 1024 * 1024)
-       
-       struct proc *p_max = NULL;
-       unsigned int resident_max = 0;
-       struct knote *kn_max = NULL;
-       struct knote *kn;
-       
+#if CONFIG_JETSAM
+
+/* Jetsam aware version. Called with lock held */
+
+static struct knote * vm_find_knote_from_pid(pid_t pid) {
+       struct knote *kn = NULL;
+    
        SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
-               if ( (kn != NULL ) && ( kn->kn_kq != NULL ) && ( kn->kn_kq->kq_p != NULL ) ) {
-                       if (kn->kn_sfflags & NOTE_VM_PRESSURE) {
-                               struct proc *p = kn->kn_kq->kq_p;
-                               if (!(kn->kn_status & KN_DISABLED)) {
-                                       kern_return_t kr = KERN_SUCCESS;
-                                       struct task *t = (struct task *)(p->task);
-                                       struct task_basic_info basic_info;
-                                       mach_msg_type_number_t size = TASK_BASIC_INFO_COUNT;
-                                       if( ( kr = task_info(t, TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) == KERN_SUCCESS ) {
-                                               unsigned int resident_size = basic_info.resident_size;
-                                               /* 
-                                                * We don't want a small process to block large processes from 
-                                                * being notified again.  <rdar://problem/7955532>
-                                                */                                             
-                                               if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
-                                                       if (resident_size > resident_max) {
-                                                               p_max = p;
-                                                               resident_max = resident_size;
-                                                               kn_max = kn;
-                                                       }
-                                               } else {
-#if DEBUG
-                                                       /* There was no candidate with enough resident memory to scavenge */
-                                                       /* This debug print makes too much noise now */
-                                                       //printf("[vm_pressure] threshold failed for pid %d with %u resident, skipping...\n", p->p_pid, resident_size);
-#endif
-                                               }
-                                       } else {
-#if DEBUG
-                                               printf("[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr);
-#endif
-                                       }
-                               } else {
-#if DEBUG
-                                       printf("[vm_pressure] pid %d currently disabled, skipping...\n", p->p_pid);
-#endif
-                               }
-                       }
-               } else {
-#if DEBUG
-                       if (kn == NULL) {
-                               printf("[vm_pressure] kn is NULL\n");
-                       } else if (kn->kn_kq == NULL) {
-                               printf("[vm_pressure] kn->kn_kq is NULL\n");
-                       } else if (kn->kn_kq->kq_p == NULL) {
-                               printf("[vm_pressure] kn->kn_kq->kq_p is NULL\n");
-                       }
-#endif
+               struct proc *p;
+               pid_t current_pid;
+
+               p = kn->kn_kq->kq_p;
+               current_pid = p->p_pid;
+
+               if (current_pid == pid) {
+                       break;
                }
        }
-       
-       if (kn_max == NULL) return 0;
+    
+       return kn;
+}
 
-#if DEBUG
-       printf("[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
-#endif
+static kern_return_t vm_try_pressure_candidates(void)
+{
+        struct knote *kn = NULL;
+        pid_t target_pid = (pid_t)-1;
 
-       KNOTE_DETACH(&vm_pressure_klist, kn_max);
-       struct klist dispatch_klist = { NULL };
-       KNOTE_ATTACH(&dispatch_klist, kn_max);
-       KNOTE(&dispatch_klist, NOTE_VM_PRESSURE);
-       KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
-       
-       return 1;
+        /* If memory is low, and there's a pid to target... */
+        target_pid = memorystatus_request_vm_pressure_candidate();
+        while (target_pid != -1) {
+                /* ...look it up in the list, and break if found... */
+                if ((kn = vm_find_knote_from_pid(target_pid))) {
+                        break;
+                }
+
+                /* ...otherwise, go round again. */
+                target_pid = memorystatus_request_vm_pressure_candidate();
+        }
+
+        if (NULL == kn) {
+                VM_PRESSURE_DEBUG(0, "[vm_pressure] can't find candidate pid\n");
+                return KERN_FAILURE;
+        }
+
+        /* ...and dispatch the note */
+        VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d, free pages %d\n", kn->kn_kq->kq_p->p_pid, memorystatus_available_pages);
+
+        KNOTE(&vm_pressure_klist, target_pid);
+        
+        memorystatus_send_pressure_note(target_pid);
+
+        return KERN_SUCCESS;
 }
 
+static void vm_reset_active_list(void) {
+        /* No-op */
+}
+
+#if DEVELOPMENT || DEBUG
+
+/* Test purposes only */
+boolean_t vm_dispatch_pressure_note_to_pid(pid_t pid) {
+       struct knote *kn;
+    
+       vm_pressure_klist_lock();
+    
+       kn = vm_find_knote_from_pid(pid);
+       if (kn) {
+               KNOTE(&vm_pressure_klist, pid);
+       }
+    
+       vm_pressure_klist_unlock();
+    
+       return kn ? TRUE : FALSE;
+}
+
+#endif /* DEVELOPMENT || DEBUG */
+
+#else /* CONFIG_MEMORYSTATUS */
+
+static kern_return_t vm_try_pressure_candidates(void)
+{
+       struct knote *kn = NULL, *kn_max = NULL;
+        unsigned int resident_max = 0;
+        pid_t target_pid = -1;
+        struct klist dispatch_klist = { NULL };
+       kern_return_t kr = KERN_SUCCESS;
+       struct timeval curr_tstamp = {0, 0};
+       int elapsed_msecs = 0;
+       proc_t  target_proc = PROC_NULL;
+
+       microuptime(&curr_tstamp);
+       
+        SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
+                struct mach_task_basic_info basic_info;
+                mach_msg_type_number_t  size = MACH_TASK_BASIC_INFO_COUNT;
+                unsigned int           resident_size = 0;
+               proc_t                  p = PROC_NULL;
+               struct task*            t = TASK_NULL;
+
+               p = kn->kn_kq->kq_p;
+               proc_list_lock();
+               if (p != proc_ref_locked(p)) {
+                       p = PROC_NULL;
+                       proc_list_unlock();
+                       continue;
+               }
+               proc_list_unlock();
+
+               t = (struct task *)(p->task);
+               
+               timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp);
+               elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
+                                                       
+               if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) {
+                       proc_rele(p);
+                       continue;
+               }
+
+                if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) {
+                        VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr);
+                       proc_rele(p);
+                        continue;
+                }
+
+                /* 
+                * We don't want a small process to block large processes from 
+                * being notified again. <rdar://problem/7955532>
+                */
+                resident_size = (basic_info.resident_size)/(MB);
+                if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
+                        if (resident_size > resident_max) {
+                                resident_max = resident_size;
+                                kn_max = kn;
+                                target_pid = p->p_pid;
+                               target_proc = p;
+                        }
+                } else {
+                        /* There was no candidate with enough resident memory to scavenge */
+                        VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size);
+                }
+               proc_rele(p);
+        }
+
+        if (kn_max == NULL || target_pid == -1) {
+               return KERN_FAILURE;
+       }
+
+       VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0);
+        VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
+
+        KNOTE_DETACH(&vm_pressure_klist, kn_max);
+
+       target_proc = proc_find(target_pid);
+       if (target_proc != PROC_NULL) {
+               KNOTE_ATTACH(&dispatch_klist, kn_max);
+               KNOTE(&dispatch_klist, target_pid);
+               KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
+
+               microuptime(&target_proc->vm_pressure_last_notify_tstamp);
+               proc_rele(target_proc);
+       }
+
+        return KERN_SUCCESS;
+}
 
 /*
  * Remove all elements from the dormant list and place them on the active list.
  * Called with klist lock held.
  */
-void vm_recharge_active_list(void) {
+static void vm_reset_active_list(void) {
        /* Re-charge the main list from the dormant list if possible */
        if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
-#if DEBUG
-               printf("[vm_pressure] recharging main list from dormant list\n");
-#endif 
                struct knote *kn;
+
+               VM_PRESSURE_DEBUG(1, "[vm_pressure] recharging main list from dormant list\n");
+        
                while (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
                        kn = SLIST_FIRST(&vm_pressure_klist_dormant);
                        SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant, kn_selnext);
@@ -234,3 +366,5 @@ void vm_recharge_active_list(void) {
                }
        }
 }
+
+#endif /* CONFIG_MEMORYSTATUS */
index 8063c820a17ff53fd51ca99c29ed4b2dbcb40e73..059e9c23cb6822a423e45664f7cd86e9e63d9e8c 100644 (file)
 
 #include <sys/queue.h>
 
-static lck_mtx_t vm_pressure_klist_mutex;
+void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr);
 
 int vm_knote_register(struct knote *);
 void vm_knote_unregister(struct knote *);
 
-void consider_pressure_events(void);
+void consider_vm_pressure_events(void);
+void vm_pressure_proc_cleanup(proc_t);
+
+#if CONFIG_MEMORYSTATUS && (DEVELOPMENT || DEBUG)
+boolean_t vm_dispatch_pressure_note_to_pid(pid_t pid);
+#endif
 
 #endif /* VM_PRESSURE_H */
index 8259186d03ae9e8a1684f5f63925465da9ca58d1..223f3a526a763e27da902d2941a97f600e99d7aa 100644 (file)
@@ -77,6 +77,7 @@
 #include <sys/types.h>
 #include <mach/vm_param.h>
 
+
 #ifdef __APPLE_API_OBSOLETE
 /* BCD conversions. */
 extern u_char const    bcd2bin_data[];
index 1a6417179422909cec8a83434dfc5915472b6498..a38f0dd74c9e0fe79d8770046a46f085dbc6db26 100644 (file)
@@ -41,14 +41,6 @@ struct exec_info {
        char    **ev;
 };
 
-struct exec_archhandler {
-       char path[MAXPATHLEN];
-       uint32_t fsid;
-       uint64_t fileid;
-};
-
-extern struct exec_archhandler exec_archhandler_ppc;
-int set_archhandler(struct proc *, int);
 int grade_binary(cpu_type_t, cpu_subtype_t);
 
 #if defined (__i386__) || defined(__x86_64__)
index 262acfbc8a22d570d55120ae8355d502a28f2682..b349878d049a17acb1b9ea8a0b6aceafb9421339 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #ifndef        _MACHINE_SETJMP_H_
 #define        _MACHINE_SETJMP_H_
 
-#if defined (__i386__) || defined(__x86_64__)
-#include "i386/setjmp.h"
-#else
-#error architecture not supported
-#endif
+#include <setjmp.h>
 
 #endif /* _MACHINE_SETJMP_H_ */
index e2af8fc60fb79605e3b1c09e5031f3b8a20adc5e..df7e6ac1b69999dbabfe35fa7fc2420ee4fed830 100644 (file)
@@ -678,7 +678,7 @@ natively. Callers should be aware of this when requesting the full path of a har
 A
 .Vt timespec
 that contains the time that the file system object was created or renamed into
-its containing directory.  Note that inconsistent behavior may obe observed
+its containing directory.  Note that inconsistent behavior may be observed
 when this attribute is requested on hard-linked items. 
 .Pp
 .
index d2895cd337e63082e07bd56a84af6f40f135e8c4..8966090a66fc656a44949d36e4ad84020057dda6 100644 (file)
@@ -1,193 +1 @@
-.\"
-.\" Copyright (c) 2008-2009 Apple Inc. All rights reserved.
-.\"
-.\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@
-.\" 
-.\" This file contains Original Code and/or Modifications of Original Code
-.\" as defined in and that are subject to the Apple Public Source License
-.\" Version 2.0 (the 'License'). You may not use this file except in
-.\" compliance with the License. The rights granted to you under the License
-.\" may not be used to create, or enable the creation or redistribution of,
-.\" unlawful or unlicensed copies of an Apple operating system, or to
-.\" circumvent, violate, or enable the circumvention or violation of, any
-.\" terms of an Apple operating system software license agreement.
-.\" 
-.\" Please obtain a copy of the License at
-.\" http://www.opensource.apple.com/apsl/ and read it before using this file.
-.\" 
-.\" The Original Code and all software distributed under the License are
-.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
-.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
-.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
-.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
-.\" Please see the License for the specific language governing rights and
-.\" limitations under the License.
-.\" 
-.\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@
-.\"
-.Dd March 6, 2009
-.Dt GETAUDIT 2
-.Os
-.Sh NAME
-.Nm getaudit ,
-.Nm getaudit_addr
-.Nd "retrieve audit session state"
-.Sh SYNOPSIS
-.In bsm/audit.h
-.Ft int
-.Fn getaudit "auditinfo_t *auditinfo"
-.Ft int
-.Fn getaudit_addr "auditinfo_addr_t *auditinfo_addr" "u_int length"
-.Sh DESCRIPTION
-The
-.Fn getaudit
-system call
-retrieves the active audit session state for the current process via the
-.Vt auditinfo_t
-pointed to by
-.Fa auditinfo .
-The
-.Fn getaudit_addr
-system call
-retrieves extended state via
-.Fa auditinfo_addr
-and
-.Fa length .
-.Pp
-The
-.Fa auditinfo_t
-data structure is defined as follows:
-.nf
-.in +4n
-struct auditinfo {
-       au_id_t        ai_auid;         /* Audit user ID */
-       au_mask_t      ai_mask;         /* Audit masks */
-       au_tid_t       ai_termid;       /* Terminal ID */
-       au_asid_t      ai_asid;         /* Audit session ID */
-};
-typedef struct auditinfo        auditinfo_t;
-.in
-.fi
-.Pp
-The
-.Fa ai_auid
-variable contains the audit identifier which is recorded in the audit log for
-each event the process caused.
-.Pp
-The
-.Fa au_mask_t
-data structure defines the bit mask for auditing successful and failed events
-out of the predefined list of event classes. It is defined as follows:
-.nf
-.in +4n
-struct au_mask {
-       unsigned int    am_success;     /* success bits */
-       unsigned int    am_failure;     /* failure bits */
-};
-typedef struct au_mask  au_mask_t;
-.in
-.fi
-.Pp
-The
-.Fa au_termid_t
-data structure defines the Terminal ID recorded with every event caused by the
-process. It is defined as follows:
-.nf
-.in +4n
-struct au_tid {
-       dev_t           port;
-       u_int32_t       machine;
-};
-typedef struct au_tid   au_tid_t;
-.in
-.fi
-.Pp
-The
-.Fa ai_asid
-variable contains the audit session ID which is recorded with every event
-caused by the process.
-.Pp
-The
-.Fn getaudit_addr
-system call
-uses the expanded
-.Fa auditinfo_addr_t
-data structure supports Terminal IDs with larger addresses such as those used
-in IP version 6.  It is defined as follows:
-.nf
-.in +4n
-struct auditinfo_addr {
-       au_id_t         ai_auid;        /* Audit user ID. */
-       au_mask_t       ai_mask;        /* Audit masks. */
-       au_tid_addr_t   ai_termid;      /* Terminal ID. */
-       au_asid_t       ai_asid;        /* Audit session ID. */
-       u_int64_t       ai_flags;       /* Audit session flags. */
-};
-typedef struct auditinfo_addr   auditinfo_addr_t;
-.in
-.fi
-.Pp
-The
-.Fa au_tid_addr_t
-data structure which includes a larger address storage field and an additional
-field with the type of address stored:
-.nf
-.in +4n
-struct au_tid_addr {
-       dev_t           at_port;
-       u_int32_t       at_type;
-       u_int32_t       at_addr[4];
-};
-typedef struct au_tid_addr      au_tid_addr_t;
-.in
-.fi
-.Pp
-Without appropriate privilege the audit mask fields will be set to all
-ones. 
-.Sh RETURN VALUES
-.Rv -std getaudit getaudit_addr
-.Sh ERRORS
-The
-.Fn getaudit
-function will fail if:
-.Bl -tag -width Er
-.It Bq Er EFAULT
-A failure occurred while data transferred to or from
-the kernel failed.
-.It Bq Er EINVAL
-Illegal argument was passed by a system call.
-.It Bq Er EOVERFLOW
-The
-.Fa length
-argument indicates an overflow condition will occur.
-.It Bq Er ERANGE
-The address is too big and, therefore, 
-.Fn getaudit_addr
-should be used instead.
-.El
-.Sh SEE ALSO
-.Xr audit 2 ,
-.Xr auditon 2 ,
-.Xr getauid 2 ,
-.Xr setaudit 2 ,
-.Xr setauid 2 ,
-.Xr libbsm 3
-.Sh HISTORY
-The OpenBSM implementation was created by McAfee Research, the security
-division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004.
-It was subsequently adopted by the TrustedBSD Project as the foundation for
-the OpenBSM distribution.
-.Sh AUTHORS
-.An -nosplit
-This software was created by McAfee Research, the security research division
-of McAfee, Inc., under contract to Apple Computer Inc.
-Additional authors include
-.An Wayne Salamon ,
-.An Robert Watson ,
-and SPARTA Inc.
-.Pp
-The Basic Security Module (BSM) interface to audit records and audit event
-stream format were defined by Sun Microsystems.
-.Pp
-This manual page was written by
-.An Robert Watson Aq rwatson@FreeBSD.org .
+.so man2/getaudit_addr.2
index 25e765cd575e8e16dbd8fc66523372075218980f..26a349b25e829d73fecb83277a84754d85ac6432 100644 (file)
@@ -1 +1,214 @@
-.so man2/getaudit.2
+.\"
+.\" Copyright (c) 2008-2011 Apple Inc. All rights reserved.
+.\"
+.\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+.\"
+.\" This file contains Original Code and/or Modifications of Original Code
+.\" as defined in and that are subject to the Apple Public Source License
+.\" Version 2.0 (the 'License'). You may not use this file except in
+.\" compliance with the License. The rights granted to you under the License
+.\" may not be used to create, or enable the creation or redistribution of,
+.\" unlawful or unlicensed copies of an Apple operating system, or to
+.\" circumvent, violate, or enable the circumvention or violation of, any
+.\" terms of an Apple operating system software license agreement.
+.\"
+.\" Please obtain a copy of the License at
+.\" http://www.opensource.apple.com/apsl/ and read it before using this file.
+.\"
+.\" The Original Code and all software distributed under the License are
+.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+.\" Please see the License for the specific language governing rights and
+.\" limitations under the License.
+.\"
+.\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+.\"
+.Dd March 6, 2011
+.Dt GETAUDIT_ADDR 2
+.Os
+.Sh NAME
+.Nm getaudit_addr ,
+.Nm getaudit(NOW DEPRECATED)
+.Nd "retrieve audit session state"
+.Sh SYNOPSIS
+.In bsm/audit.h
+.In bsm/audit_session.h
+.Ft int
+.Fn getaudit_addr "auditinfo_addr_t *auditinfo_addr" "u_int length"
+.Sh SYNOPSIS (NOW DEPRECATED)
+.In bsm/audit.h
+.Ft int
+.Fn getaudit "auditinfo_t *auditinfo"
+.Sh DESCRIPTION
+The
+.Fn getaudit_addr
+system call
+retrieves extended state via
+.Fa auditinfo_addr
+and
+.Fa length .
+It
+uses the
+.Fa auditinfo_addr_t
+data structure supports Terminal IDs incuding those with larger addresses such
+as those used in IP version 6.  It is defined as follows:
+.nf
+.in +4n
+struct auditinfo_addr {
+       au_id_t         ai_auid;        /* Audit user ID. */
+       au_mask_t       ai_mask;        /* Audit masks. */
+       au_tid_addr_t   ai_termid;      /* Terminal ID. */
+       au_asid_t       ai_asid;        /* Audit session ID. */
+       u_int64_t       ai_flags;       /* Audit session flags. */
+};
+typedef struct auditinfo_addr   auditinfo_addr_t;
+.in
+.fi
+.Pp
+The
+.Fa ai_auid
+variable contains the audit identifier which is recorded in the audit log for
+each event the process caused.
+.Pp
+The
+.Fa au_mask_t
+data structure defines the bit mask for auditing successful and failed events
+out of the predefined list of event classes. It is defined as follows:
+.nf
+.in +4n
+struct au_mask {
+       unsigned int    am_success;     /* success bits */
+       unsigned int    am_failure;     /* failure bits */
+};
+typedef struct au_mask  au_mask_t;
+.in
+.fi
+.Pp
+The
+.Fa au_tid_addr_t
+data structure which includes a larger address storage field and an additional
+field with the type of address stored:
+.nf
+.in +4n
+struct au_tid_addr {
+       dev_t           at_port;
+       u_int32_t       at_type;
+       u_int32_t       at_addr[4];
+};
+typedef struct au_tid_addr      au_tid_addr_t;
+.in
+.fi
+.Pp
+The
+.Fa ai_asid
+variable contains the audit session ID which is recorded with every event
+caused by the process.
+.Pp
+The
+.Fa ai_flags
+variable contains flags that are opaque to the kernel and used by various
+consumers of the
+.Fa auditinfo_addr
+data.  Please see the
+.Ao Pa bsm/audit_session.h Ac
+header file for more information
+and flag definitions for this platform.
+.Pp
+Without appropriate privilege the audit mask fields will be set to all
+ones.
+.Pp
+The
+.Fn getaudit
+system call (NOW DEPRECATED)
+retrieves the active audit session state for the current process via the
+.Vt auditinfo_t
+pointed to by
+.Fa auditinfo .
+.Pp
+The
+.Fa auditinfo_t
+data structure (NOW DEPRECATED) is defined as follows:
+.nf
+.in +4n
+struct auditinfo {
+       au_id_t        ai_auid;         /* Audit user ID */
+       au_mask_t      ai_mask;         /* Audit masks */
+       au_tid_t       ai_termid;       /* Terminal ID */
+       au_asid_t      ai_asid;         /* Audit session ID */
+};
+typedef struct auditinfo        auditinfo_t;
+.in
+.fi
+.Pp
+The
+.Fa au_termid_t
+data structure (NOW DEPRECATED) defines the Terminal ID recorded with
+every event caused by the process. It is defined as follows:
+.nf
+.in +4n
+struct au_tid {
+       dev_t           port;
+       u_int32_t       machine;
+};
+typedef struct au_tid   au_tid_t;
+.in
+.fi
+.Sh RETURN VALUES
+.Rv -std getaudit_addr
+.Sh ERRORS
+The
+.Fn getaudit_addr
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EFAULT
+A failure occurred while data transferred to or from
+the kernel failed.
+.It Bq Er EINVAL
+Illegal argument was passed by a system call.
+.It Bq Er EOVERFLOW
+The
+.Fa length
+argument indicates an overflow condition will occur.
+.It Bq Er ERANGE
+The address is too big.
+.El
+.Sh SEE ALSO
+.Xr audit 2 ,
+.Xr auditon 2 ,
+.Xr getauid 2 ,
+.Xr setaudit 2 ,
+.Xr setauid 2 ,
+.Xr libbsm 3
+.Sh HISTORY
+The OpenBSM implementation was created by McAfee Research, the security
+division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004.
+It was subsequently adopted by the TrustedBSD Project as the foundation for
+the OpenBSM distribution.
+.Pp
+.Fn getaudit_addr
+replaced
+.Fn getaudit
+in Mac OS X 10.7 to support longer terminal addresses such as those used
+by IP version 6.
+.Fn getaudit
+is now deprecated and
+.Fn getaudit_addr
+should be used instead.
+.Sh AUTHORS
+.An -nosplit
+This software was created by McAfee Research, the security research division
+of McAfee, Inc., under contract to Apple Computer Inc.
+Additional authors include
+.An Wayne Salamon ,
+.An Robert Watson ,
+and SPARTA Inc.
+.Pp
+The Basic Security Module (BSM) interface to audit records and audit event
+stream format were defined by Sun Microsystems.
+.Pp
+This manual page was written by
+.An Robert Watson Aq rwatson@FreeBSD.org
+and
+.An Stacey Son Aq sson@FreeBSD.org .
index a941bc389228fa921ff391261a9c2887fe043a1d..2ae5297fcfefc31203f447324dd4a215ff0bc9b1 100644 (file)
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2008, 2010 Apple Inc.  All rights reserved.
+.\" Copyright (c) 2008-2011 Apple Inc.  All rights reserved.
 .\"
 .\" @APPLE_LICENSE_HEADER_START@
 .\" 
@@ -56,7 +56,7 @@
 .\"
 .\"     @(#)getgroups.2        8.2 (Berkeley) 4/16/94
 .\"
-.Dd September 17, 2010
+.Dd October 28, 2011
 .Dt GETGROUPS 2
 .Os BSD 4.2
 .Sh NAME
@@ -91,6 +91,13 @@ returns the number of groups without modifying the
 .Fa grouplist[]
 array.
 .Pp
+Calling
+.Xr initgroups 3
+to opt-in for supplementary groups will cause
+.Fn getgroups
+to return a single entry, the GID that was passed to 
+.Xr initgroups 3 .
+.Pp
 To provide compatibility with applications that use
 .Fn getgroups
 in environments where users may be in more than
index d0657201764251a1bf74df11be46dbf12684dc16..c243f97e14e0af8a9f445b487992396f621f0ffb 100644 (file)
@@ -67,7 +67,7 @@ the following structure:
 struct rusage {
         struct timeval ru_utime; /* user time used */
         struct timeval ru_stime; /* system time used */
-        long ru_maxrss;          /* integral max resident set size */
+        long ru_maxrss;          /* max resident set size */
         long ru_ixrss;           /* integral shared text memory size */
         long ru_idrss;           /* integral unshared data size */
         long ru_isrss;           /* integral unshared stack size */
@@ -92,7 +92,7 @@ the total amount of time spent executing in user mode.
 the total amount of time spent in the system executing on behalf
 of the process(es).
 .It Fa ru_maxrss
-the maximum resident set size utilized (in kilobytes).
+the maximum resident set size utilized (in bytes).
 .It Fa ru_ixrss
 an \*(lqintegral\*(rq value indicating the amount of memory used
 by the text segment
index e0408c27233ee14b68ee428f10dd8ffbc86f03d8..0ec25a00086c76f65633fdb5d44d306b3cbc32c3 100644 (file)
@@ -275,7 +275,7 @@ operation testing the ability to write to a socket will return true
 only if the low-water mark amount could be processed.
 The default value for
 .Dv SO_SNDLOWAT
-is set to a convenient size for network efficiency, often 1024.
+is set to a convenient size for network efficiency, often 2048.
 .Pp
 .Dv SO_RCVLOWAT
 is an option to set the minimum count for input operations.
index 663f38ac05e54e57f813e5fc525c18640eaa772e..5117766350bae536a7c63ef14ba1922835501916 100644 (file)
@@ -638,6 +638,10 @@ ATTR_CMN_OWNERID
 ATTR_CMN_GRPID
 .It
 ATTR_CMN_ACCESSMASK
+.It
+ATTR_CMN_FILEID
+.It
+ATTR_CMN_PARENTID
 .Pp
 .
 .It
index b626e0cf8c3d304fdf84c6b0e6512219b24e2f82..1fa5dda1f77f6ae2edef736b91790164df69044f 100644 (file)
@@ -1,236 +1 @@
-.\"
-.\" Copyright (c) 2008-2009 Apple Inc. All rights reserved.
-.\"
-.\" @APPLE_LICENSE_HEADER_START@
-.\" 
-.\" This file contains Original Code and/or Modifications of Original Code
-.\" as defined in and that are subject to the Apple Public Source License
-.\" Version 2.0 (the 'License'). You may not use this file except in
-.\" compliance with the License. Please obtain a copy of the License at
-.\" http://www.opensource.apple.com/apsl/ and read it before using this
-.\" file.
-.\" 
-.\" The Original Code and all software distributed under the License are
-.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
-.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
-.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
-.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
-.\" Please see the License for the specific language governing rights and
-.\" limitations under the License.
-.\" 
-.\" @APPLE_LICENSE_HEADER_END@
-.\"
-.Dd March 23, 2009
-.Dt SETAUDIT 2
-.Os
-.Sh NAME
-.Nm setaudit ,
-.Nm setaudit_addr
-.Nd "set audit session state"
-.Sh SYNOPSIS
-.In bsm/audit.h
-.Ft int
-.Fn setaudit "auditinfo_t *auditinfo"
-.Ft int
-.Fn setaudit_addr "auditinfo_addr_t *auditinfo_addr" "u_int length"
-.Sh DESCRIPTION
-The
-.Fn setaudit
-system call
-sets the active audit session state for the current process via the
-.Vt auditinfo_t
-pointed to by
-.Fa auditinfo .
-The
-.Fn setaudit_addr
-system call
-sets extended state via
-.Fa auditinfo_addr
-and
-.Fa length .
-.Pp
-The
-.Fa auditinfo_t
-data structure is defined as follows:
-.nf
-.in +4n
-struct auditinfo {
-       au_id_t        ai_auid;         /* Audit user ID */
-       au_mask_t      ai_mask;         /* Audit masks */
-       au_tid_t       ai_termid;       /* Terminal ID */
-       au_asid_t      ai_asid;         /* Audit session ID */
-};
-typedef struct auditinfo        auditinfo_t;
-.in
-.fi
-.Pp
-The
-.Fa ai_auid
-variable contains the audit identifier which is recorded in the audit log for 
-each event the process caused.
-The value of AU_DEFAUDITID (-1) should not be used.
-The exception is if the value of audit identifier is known at the
-start of the session but will be determined and set later.
-Until 
-.Fa ai_auid
-is set to something other than AU_DEFAUDITID any audit events
-generated by the system with be filtered by the non-attributed audit
-mask.
-.Pp
-The
-.Fa au_mask_t
-data structure defines the bit mask for auditing successful and failed events 
-out of the predefined list of event classes. It is defined as follows:
-.nf
-.in +4n
-struct au_mask {
-       unsigned int    am_success;     /* success bits */
-       unsigned int    am_failure;     /* failure bits */
-};
-typedef struct au_mask  au_mask_t;
-.in
-.fi
-.Pp
-The
-.Fa au_termid_t
-data structure defines the Terminal ID recorded with every event caused by the 
-process. It is defined as follows:
-.nf
-.in +4n
-struct au_tid {
-       dev_t           port;
-       u_int32_t       machine;
-};
-typedef struct au_tid   au_tid_t;
-.in
-.fi
-.Pp
-The
-.Fa ai_asid
-variable contains the audit session ID which is recorded with every event 
-caused by the process.  It can be any value in the range 1 to PID_MAX (99999).
-If the value of AU_ASSIGN_ASID is used for 
-.Fa ai_asid
-a unique session ID will be generated by the kernel.
-The audit session ID will be returned in the
-.Fa ai_asid
-field on success.
-.Pp
-The
-.Fn setaudit_addr
-system call
-uses the expanded
-.Fa auditinfo_addr_t 
-data structure which supports Terminal IDs with larger addresses
-such as those used in IP version 6.  It is defined as follows:
-.nf
-.in +4n
-struct auditinfo_addr {
-       au_id_t         ai_auid;        /* Audit user ID. */
-       au_mask_t       ai_mask;        /* Audit masks. */
-       au_tid_addr_t   ai_termid;      /* Terminal ID. */
-       au_asid_t       ai_asid;        /* Audit session ID. */
-       u_int64_t       ai_flags;       /* Audit session flags */
-};
-typedef struct auditinfo_addr   auditinfo_addr_t;
-.in
-.fi
-.Pp
-The 
-.Fa au_tid_addr_t
-data structure includes a larger address storage field and an additional
-field with the type of address stored:
-.nf
-.in +4n
-struct au_tid_addr {
-       dev_t           at_port;
-       u_int32_t       at_type;
-       u_int32_t       at_addr[4];
-};
-typedef struct au_tid_addr      au_tid_addr_t;
-.in
-.fi
-.Pp
-The
-.Fa ai_flags
-field is opaque to the kernel and can be used to store user
-defined session flags.
-.Pp
-These system calls require an appropriate privilege to complete.
-.Pp
-These system calls should only be called once at the start of a new
-session and not again during the same session to update the session 
-information.
-There are some exceptions, however.
-The 
-.Fa ai_auid
-field may be updated later if initially set to the value of
-AU_DEFAUDITID (-1).
-Likewise, the 
-.Fa ai_termid
-fields may be updated later if the 
-.Fa at_type
-field in
-.Fa au_tid_addr
-is set to AU_IPv4 and the other
-.Fa ai_tid_addr
-fields are all set to zero.
-Creating a new session is done by setting the 
-.Fa ai_asid
-field to an unique session value or AU_ASSIGN_ASID.
-These system calls will fail when attempting to change the
-.Fa ai_auid
-or
-.Fa ai_termid
-fields once set to something other than the default values.
-The
-.Fa ai_flags
-field may be updated only according to local access control
-policy but this is usually accomplished with
-.Xr auditon 2
-using the A_SETSFLAGS command.
-The audit preselection masks may be changed at any time
-but are usually updated with
-.Xr auditon 2
-using the A_SETPMASK command.
-.Sh RETURN VALUES
-.Rv -std setaudit setaudit_addr
-.Sh ERRORS
-.Bl -tag -width Er
-.It Bq Er EFAULT
-A failure occurred while data transferred to or from
-the kernel failed.
-.It Bq Er EINVAL
-Illegal argument was passed by a system call.
-.It Bq Er EPERM
-The process does not have sufficient permission to complete
-the operation.
-.El
-.Sh SEE ALSO
-.Xr audit 2 ,
-.Xr auditon 2 ,
-.Xr getaudit 2 ,
-.Xr getauid 2 ,
-.Xr setauid 2 ,
-.Xr libbsm 3
-.Sh HISTORY
-The OpenBSM implementation was created by McAfee Research, the security
-division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004.
-It was subsequently adopted by the TrustedBSD Project as the foundation for
-the OpenBSM distribution.
-.Sh AUTHORS
-.An -nosplit
-This software was created by McAfee Research, the security research division
-of McAfee, Inc., under contract to Apple Computer Inc.
-Additional authors include
-.An Wayne Salamon ,
-.An Robert Watson ,
-and SPARTA Inc.
-.Pp
-The Basic Security Module (BSM) interface to audit records and audit event
-stream format were defined by Sun Microsystems.
-.Pp
-This manual page was written by
-.An Robert Watson Aq rwatson@FreeBSD.org
-and
-.An Stacey Son Aq sson@FreeBSD.org .
+.so man2/setaudit_addr.2
index f11b4169f5846b081baf179969702e7a9b15ea67..d6b48c4d57b7383995f70d65f9ef5571751e10fb 100644 (file)
@@ -1 +1,253 @@
-.so man2/setaudit.2
+.\"
+.\" Copyright (c) 2008-2011 Apple Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\"
+.\" This file contains Original Code and/or Modifications of Original Code
+.\" as defined in and that are subject to the Apple Public Source License
+.\" Version 2.0 (the 'License'). You may not use this file except in
+.\" compliance with the License. Please obtain a copy of the License at
+.\" http://www.opensource.apple.com/apsl/ and read it before using this
+.\" file.
+.\"
+.\" The Original Code and all software distributed under the License are
+.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+.\" Please see the License for the specific language governing rights and
+.\" limitations under the License.
+.\"
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd March 4, 2011
+.Dt SETAUDIT_ADDR 2
+.Os
+.Sh NAME
+.Nm setaudit_addr ,
+.Nm setaudit(NOW DEPRECATED)
+.Nd "set audit session state"
+.Sh SYNOPSIS
+.In bsm/audit.h
+.In bsm/audit_session.h
+.Ft int
+.Fn setaudit_addr "auditinfo_addr_t *auditinfo_addr" "u_int length"
+.Sh SYNOPSIS (NOW DEPRECATED)
+.In bsm/audit.h
+.Ft int
+.Fn setaudit "auditinfo_t *auditinfo"
+.Sh DESCRIPTION
+The
+.Fn setaudit_addr
+system call
+uses the
+.Fa auditinfo_addr_t
+data structure for the
+.Fa auditinfo_addr
+argument which supports Terminal IDs with large addresses
+such as those used in IP version 6.  It is defined as follows:
+.nf
+.in +4n
+struct auditinfo_addr {
+       au_id_t         ai_auid;        /* Audit user ID. */
+       au_mask_t       ai_mask;        /* Audit masks. */
+       au_tid_addr_t   ai_termid;      /* Terminal ID. */
+       au_asid_t       ai_asid;        /* Audit session ID. */
+       u_int64_t       ai_flags;       /* Audit session flags */
+};
+typedef struct auditinfo_addr   auditinfo_addr_t;
+.in
+.fi
+.Pp
+The
+.Fa ai_auid
+variable contains the audit identifier which is recorded in the audit log for
+each event the process caused. The value of AU_DEFAUDITID (-1) should not be
+used.  The exception is if the value of audit identifier is known at the start
+of the session but will be determined and set later. Until
+.Fa ai_auid
+is set to something other than AU_DEFAUDITID any audit events
+generated by the system with be filtered by the non-attributed audit
+mask.
+.Pp
+The
+.Fa au_mask_t
+data structure defines the bit mask for auditing successful and failed events
+out of the predefined list of event classes. It is defined as follows:
+.nf
+.in +4n
+struct au_mask {
+       unsigned int    am_success;     /* success bits */
+       unsigned int    am_failure;     /* failure bits */
+};
+typedef struct au_mask  au_mask_t;
+.in
+.fi
+.Pp
+The
+.Fa au_tid_addr_t
+data structure includes a larger address storage field and an additional
+field with the type of address stored:
+.nf
+.in +4n
+struct au_tid_addr {
+       dev_t           at_port;
+       u_int32_t       at_type;
+       u_int32_t       at_addr[4];
+};
+typedef struct au_tid_addr      au_tid_addr_t;
+.in
+.fi
+.Pp
+The
+.Fa ai_asid
+variable contains the audit session ID which is recorded with every event
+caused by the process.  It can be any value in the range 1 to PID_MAX (99999).
+If the value of AU_ASSIGN_ASID is used for
+.Fa ai_asid
+a unique session ID will be generated by the kernel.
+The audit session ID will be returned in the
+.Fa ai_asid
+field on success.
+.Pp
+The
+.Fa ai_flags
+field is opaque to the kernel and can be used to store flags associated
+with the audit session.  Please see the
+.Ao Pa bsm/audit_session.h Ac
+header file
+for more infomration and flag definitions for this platform.
+.Pp
+The
+.Fa setaudit_addr
+system call require an appropriate privilege to complete.
+.Pp
+This system call should only be called once at the start of a new
+session and not again during the same session to update the session
+information.
+There are some exceptions, however.
+The
+.Fa ai_auid
+field may be updated later if initially set to the value of
+AU_DEFAUDITID (-1).
+Likewise, the
+.Fa ai_termid
+fields may be updated later if the
+.Fa at_type
+field in
+.Fa au_tid_addr
+is set to AU_IPv4 and the other
+.Fa ai_tid_addr
+fields are all set to zero.
+Creating a new session is done by setting the
+.Fa ai_asid
+field to an unique session value or AU_ASSIGN_ASID.
+These system calls will fail when attempting to change the
+.Fa ai_auid
+or
+.Fa ai_termid
+fields once set to something other than the default values.
+The
+.Fa ai_flags
+field may be updated only according to local access control
+policy but this is usually accomplished with
+.Xr auditon 2
+using the A_SETSFLAGS command.
+The audit preselection masks may be changed at any time
+but are usually updated with
+.Xr auditon 2
+.Pp
+The
+.Fn setaudit
+system call (NOW DEPRECATED)
+sets the active audit session state for the current process via the
+.Vt auditinfo_t
+pointed to by
+.Fa auditinfo .
+The
+.Fn setaudit_addr
+system call
+sets extended state via
+.Fa auditinfo_addr
+and
+.Fa length .
+.Pp
+The
+.Fa auditinfo_t
+data structure (NOW DEPRECATED) is defined as follows:
+.nf
+.in +4n
+struct auditinfo {
+       au_id_t        ai_auid;         /* Audit user ID */
+       au_mask_t      ai_mask;         /* Audit masks */
+       au_tid_t       ai_termid;       /* Terminal ID */
+       au_asid_t      ai_asid;         /* Audit session ID */
+};
+typedef struct auditinfo        auditinfo_t;
+.in
+.fi
+.Pp
+The
+.Fa au_termid_t
+data structure (NOW DEPRECATED) defines the Terminal ID recorded with every
+event caused by the process. It is defined as follows:
+.nf
+.in +4n
+struct au_tid {
+       dev_t           port;
+       u_int32_t       machine;
+};
+typedef struct au_tid   au_tid_t;
+.in
+.fi
+.Sh RETURN VALUES
+.Rv -std setaudit_addr
+.Sh ERRORS
+.Bl -tag -width Er
+.It Bq Er EFAULT
+A failure occurred while data transferred to or from
+the kernel failed.
+.It Bq Er EINVAL
+Illegal argument was passed by a system call.
+.It Bq Er EPERM
+The process does not have sufficient permission to complete
+the operation.
+.El
+.Sh SEE ALSO
+.Xr audit 2 ,
+.Xr auditon 2 ,
+.Xr getaudit 2 ,
+.Xr getauid 2 ,
+.Xr setauid 2 ,
+.Xr libbsm 3
+.Sh HISTORY
+The OpenBSM implementation was created by McAfee Research, the security
+division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004.
+It was subsequently adopted by the TrustedBSD Project as the foundation for
+the OpenBSM distribution.
+.Pp
+.Fn setaudit_addr
+replaced
+.Fn setaudit
+in Mac OS X 10.7 to support longer terminal addresses such as those used
+by IP version 6.
+.Fn setaudit
+is now deprecated and
+.Fn setaudit_addr
+should be used instead.
+.Sh AUTHORS
+.An -nosplit
+This software was created by McAfee Research, the security research division
+of McAfee, Inc., under contract to Apple Computer Inc.
+Additional authors include
+.An Wayne Salamon ,
+.An Robert Watson ,
+and SPARTA Inc.
+.Pp
+The Basic Security Module (BSM) interface to audit records and audit event
+stream format were defined by Sun Microsystems.
+.Pp
+This manual page was written by
+.An Robert Watson Aq rwatson@FreeBSD.org
+and
+.An Stacey Son Aq sson@FreeBSD.org .
index 957c5bd77073588f1937d027d7accf7226ad38fa..240e8298d841c135b268d0e5ac71b7e2fbf9f920 100644 (file)
@@ -175,6 +175,15 @@ Not enough space left on the file system.
 .Xr getxattr 2 ,
 .Xr listxattr 2 ,
 .Xr removexattr 2
+.Sh NOTES
+Due to historical reasons, the
+.Dv XATTR_FINDERINFO_NAME
+(defined to be 
+.Dq com.apple.FinderInfo )
+extended attribute must be 32 bytes; see the
+.Dv ATTR_CMN_FNDRINFO
+section in
+.Xr getattrlist 2 .
 .Sh HISTORY
 .Fn setxattr
 and
index 85dce6a8010ecf9183c31a2f4169cbb083015e40..7e2b9ad27e672d91faec6143032de11fe8bbf857 100644 (file)
@@ -49,7 +49,7 @@
 .Fn statfs "const char *path" "struct statfs *buf"
 .Ft int
 .Fn fstatfs "int fd" "struct statfs *buf"
-.Sh TRANSITIIONAL SYNOPSIS (NOW DEPRECATED)
+.Sh TRANSITIONAL SYNOPSIS (NOW DEPRECATED)
 .Ft int
 .br
 .Fn statfs64 "const char *path" "struct statfs64 *buf" ;
@@ -149,7 +149,7 @@ The
 routine returns the same information about an open file referenced by descriptor
 .Fa fd .
 .Sh FLAGS
-.Bl -tag -width MNT_UNKOWNPERMISSIONS
+.Bl -tag -width MNT_UNKNOWNPERMISSIONS
 These are some of the flags that may be present in the f_flags field. 
 .It Dv MNT_RDONLY
 A read-only filesystem
@@ -187,6 +187,8 @@ File system is journaled
 File system should defer writes
 .It Dv MNT_MULTILABEL
 MAC support for individual labels
+.It Dv MNT_CPROTECT
+File system supports per-file encrypted data protection
 .El
 .Sh CAVEATS
 In Mac OS X versions before 10.4, f_iosize is 4096. On these older
index 8110125e24967d80b2fd3fa3ca3033bf173ccd34..485f9db3ce382b09df1f51eae87ce5cc42ebdcd6 100644 (file)
@@ -33,9 +33,7 @@
 .Sh NAME
 .Nm posix_spawnattr_setspecialport_np
 .Nm posix_spawnattr_setexceptionports_np
-.Nd set or get the
-.Em spawn-binpref
-attribute on a
+.Nd set special ports on a
 .Em posix_spawnattr_t
 .Sh SYNOPSIS
 .Fd #include <spawn.h>
index a0f33694513e1455d45ec46861bc77814a106613..f385d9990c07921ad43bae553f25bb0a13d71b07 100644 (file)
@@ -14,7 +14,6 @@ DATAFILES = \
        bpf.4   \
        divert.4        \
         dummynet.4 \
-       faith.4 \
        fd.4    \
        gif.4   \
        icmp.4  \
index be9d1e818289ebc21b751262cf52d02a4a3ffdcb..74fe8baf8c13e5db8df0fef25218b172b96df8aa 100644 (file)
@@ -314,17 +314,17 @@ The node must be a host
 (not a router)
 for the option to be meaningful.
 Defaults to off.
-.It Dv IPV6CTL_KEEPFAITH
-.Pq ip6.keepfaith
-Boolean: enable/disable
-.Dq FAITH
-TCP relay IPv6-to-IPv4 translator code in the kernel.
-Refer
-.Xr faith 4
-and
-.Xr faithd 8
-for detail.
-Defaults to off.
+.\".It Dv IPV6CTL_KEEPFAITH
+.\".Pq ip6.keepfaith
+.\"Boolean: enable/disable
+.\".Dq FAITH
+.\"TCP relay IPv6-to-IPv4 translator code in the kernel.
+.\"Refer
+.\".Xr faith 4
+.\"and
+.\".Xr faithd 8
+.\"for detail.
+.\"Defaults to off.
 .It Dv IPV6CTL_LOG_INTERVAL
 .Pq ip6.log_interval
 Integer: default interval between
index 25df62c8eac89053dab19d6af372aeb0a0a164ee..473ed190e951ea5bb34d7ad41afb79cf87caa0e7 100644 (file)
@@ -379,10 +379,10 @@ For wildcard sockets, this can restrict connections to IPv6 only.
 .\".Ox
 .\"IPv6 sockets are always IPv6-only, so the socket option is read-only
 .\"(not modifiable).
-.It Dv IPV6_FAITH Fa "int *"
-Get or set the status of whether
-.Xr faith 4
-connections can be made to this socket.
+.\".It Dv IPV6_FAITH Fa "int *"
+.\"Get or set the status of whether
+.\".Xr faith 4
+.\"connections can be made to this socket.
 .It Dv IPV6_USE_MIN_MTU Fa "int *"
 Get or set whether the minimal IPv6 maximum transmission unit (MTU) size
 will be used to avoid fragmentation from occurring for subsequent
index 52cf1c806285bbf346e18db20a8fe4f3561dc9c1..11a0369a32c45c61312132584035c84f262777e3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -94,6 +94,8 @@ extern int    iskmemdev(dev_t dev);
 extern int     bpfkqfilter(dev_t dev, struct knote *kn);
 extern int     ptsd_kqfilter(dev_t dev, struct knote *kn);
 
+extern int ignore_is_ssd;
+
 struct vnode *speclisth[SPECHSZ];
 
 /* symbolic sleep message strings for devices */
@@ -154,19 +156,41 @@ struct vnodeopv_desc spec_vnodeop_opv_desc =
 static void set_blocksize(vnode_t, dev_t);
 
 
+#define THROTTLE_LEVEL_NONE    -1
+#define        THROTTLE_LEVEL_TIER0     0
+
+#define THROTTLE_LEVEL_THROTTLED 1
+#define THROTTLE_LEVEL_TIER1    1
+#define THROTTLE_LEVEL_TIER2    2
+
+#define THROTTLE_LEVEL_START    0
+#define THROTTLE_LEVEL_END      2
+
+
 struct _throttle_io_info_t {
-       struct timeval  last_normal_IO_timestamp;
-       struct timeval  last_IO_timestamp;
-       SInt32 numthreads_throttling;
-       SInt32 refcnt;
-       SInt32 alloc;
+       struct timeval  throttle_last_IO_timestamp[THROTTLE_LEVEL_END + 1];
+       struct timeval  throttle_last_write_timestamp;
+       struct timeval  throttle_start_IO_period_timestamp;
+
+       TAILQ_HEAD( , uthread) throttle_uthlist;        /* List of throttled uthreads */
+
+        lck_mtx_t       throttle_lock;
+        thread_call_t   throttle_timer_call;
+        int32_t throttle_timer_running;
+        int32_t throttle_io_count;
+        int32_t throttle_io_count_begin;
+        int32_t throttle_io_period;
+       uint32_t throttle_io_period_num;
+       int32_t throttle_refcnt;
+       int32_t throttle_alloc;
 };
 
 struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
 
-static void throttle_info_update_internal(void *throttle_info, int flags, boolean_t isssd);
-
+static void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t ut, int policy, int flags, boolean_t isssd);
+static int throttle_get_thread_throttle_level(uthread_t ut, int policy);
 
+__private_extern__ int32_t throttle_legacy_process_count = 0;
 
 /*
  * Trivial lookup routine that always fails.
@@ -259,12 +283,7 @@ spec_open(struct vnop_open_args *ap)
                                        return (EPERM);
                        }
                }
-               if (cdevsw[maj].d_type == D_TTY) {
-                       vnode_lock(vp);
-                       vp->v_flag |= VISTTY;
-                       vnode_unlock(vp);
-               }
-               
+
                devsw_lock(dev, S_IFCHR);
                error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
 
@@ -274,14 +293,15 @@ spec_open(struct vnop_open_args *ap)
 
                devsw_unlock(dev, S_IFCHR);
 
-               if (error == 0 && cdevsw[maj].d_type == D_DISK && !vp->v_un.vu_specinfo->si_initted) {
+               if (error == 0 && (D_TYPEMASK & cdevsw[maj].d_type) == D_DISK && !vp->v_un.vu_specinfo->si_initted) {
                        int     isssd = 0;
                        uint64_t throttle_mask = 0;
                        uint32_t devbsdunit = 0;
 
                        if (VNOP_IOCTL(vp, DKIOCGETTHROTTLEMASK, (caddr_t)&throttle_mask, 0, NULL) == 0) {
-                       
-                               if (VNOP_IOCTL(vp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ap->a_context) == 0) {
+                               
+                               if (throttle_mask != 0 &&
+                                   VNOP_IOCTL(vp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ap->a_context) == 0) {
                                        /*
                                         * as a reasonable approximation, only use the lowest bit of the mask
                                         * to generate a disk unit number
@@ -315,7 +335,7 @@ spec_open(struct vnop_open_args *ap)
                 * opens for writing of any disk block devices.
                 */
                if (securelevel >= 2 && cred != FSCRED &&
-                   (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
+                   (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
                        return (EPERM);
                /*
                 * Do not allow opens of block devices that are
@@ -403,14 +423,13 @@ spec_read(struct vnop_read_args *ap)
        switch (vp->v_type) {
 
        case VCHR:
-                if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) {
+                if ((D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type) == D_DISK && vp->v_un.vu_specinfo->si_throttleable) {
                        struct _throttle_io_info_t *throttle_info;
 
                        throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit];
 
-                       throttle_info_update_internal(throttle_info, 0, vp->v_un.vu_specinfo->si_isssd);
+                       throttle_info_update_internal(throttle_info, NULL, -1, 0, vp->v_un.vu_specinfo->si_isssd);
                 }
-
                error = (*cdevsw[major(vp->v_rdev)].d_read)
                        (vp->v_rdev, uio, ap->a_ioflag);
 
@@ -497,16 +516,15 @@ spec_write(struct vnop_write_args *ap)
        switch (vp->v_type) {
 
        case VCHR:
-                if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) {
+                if ((D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type) == D_DISK && vp->v_un.vu_specinfo->si_throttleable) {
                        struct _throttle_io_info_t *throttle_info;
 
                        throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit];
 
-                       throttle_info_update_internal(throttle_info, 0, vp->v_un.vu_specinfo->si_isssd);
+                       throttle_info_update_internal(throttle_info, NULL, -1, 0, vp->v_un.vu_specinfo->si_isssd);
 
-                       microuptime(&throttle_info->last_IO_timestamp);
+                       microuptime(&throttle_info->throttle_last_write_timestamp);
                 }
-
                error = (*cdevsw[major(vp->v_rdev)].d_write)
                        (vp->v_rdev, uio, ap->a_ioflag);
 
@@ -615,8 +633,21 @@ spec_ioctl(struct vnop_ioctl_args *ap)
                break;
 
        case VBLK:
-               retval = (*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
-                                                      ap->a_fflag, p);
+               if (kdebug_enable) {
+                       if (ap->a_command == DKIOCUNMAP) {
+                               dk_unmap_t      *unmap;
+                               dk_extent_t     *extent;
+                               uint32_t        i;
+
+                               unmap = (dk_unmap_t *)ap->a_data;
+                               extent = unmap->extents;
+
+                               for (i = 0; i < unmap->extentsCount; i++, extent++) {
+                                       KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 1) | DBG_FUNC_NONE, dev, extent->offset/ap->a_vp->v_specsize, extent->length, 0, 0);
+                               }
+                       }
+               }
+               retval = (*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, p);
                break;
 
        default:
@@ -693,29 +724,38 @@ spec_fsync(struct vnop_fsync_args *ap)
        return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context);
 }
 
+
 /*
  * Just call the device strategy routine
  */
 extern int hard_throttle_on_root;
-void IOSleep(int);
 
-// the low priority process may wait for at most LOWPRI_MAX_DELAY millisecond
-#define LOWPRI_INITIAL_WINDOW_MSECS 100
-#define LOWPRI_WINDOW_MSECS_INC        50
-#define LOWPRI_MAX_WINDOW_MSECS 200
-#define LOWPRI_MAX_WAITING_MSECS 200
+void throttle_init(void);
+
 
+#define LOWPRI_THROTTLE_WINDOW_MSECS 500
+#define LOWPRI_LEGACY_THROTTLE_WINDOW_MSECS 200
+#define LOWPRI_IO_PERIOD_MSECS 200
+#define LOWPRI_IO_PERIOD_SSD_MSECS 20
+#define LOWPRI_TIMER_PERIOD_MSECS 10
+
+
+int    lowpri_throttle_window_msecs = LOWPRI_THROTTLE_WINDOW_MSECS;
+int    lowpri_legacy_throttle_window_msecs = LOWPRI_LEGACY_THROTTLE_WINDOW_MSECS;
+int    lowpri_io_period_msecs = LOWPRI_IO_PERIOD_MSECS;
+int    lowpri_io_period_ssd_msecs = LOWPRI_IO_PERIOD_SSD_MSECS;
+int    lowpri_timer_period_msecs = LOWPRI_TIMER_PERIOD_MSECS;
+
+/*
+ * If a process requiring legacy iothrottle behavior is running on the
+ * system, use legacy limits for throttle window and max IO size.
+ */
 #if CONFIG_EMBEDDED
-#define LOWPRI_SLEEP_INTERVAL 5
+#define THROTTLE_WINDOW (lowpri_throttle_window_msecs)
 #else
-#define LOWPRI_SLEEP_INTERVAL 2
+#define THROTTLE_WINDOW (throttle_legacy_process_count == 0 ? lowpri_throttle_window_msecs : lowpri_legacy_throttle_window_msecs)
 #endif
 
-int    lowpri_IO_initial_window_msecs  = LOWPRI_INITIAL_WINDOW_MSECS;
-int    lowpri_IO_window_msecs_inc  = LOWPRI_WINDOW_MSECS_INC;
-int    lowpri_max_window_msecs  = LOWPRI_MAX_WINDOW_MSECS;
-int     lowpri_max_waiting_msecs = LOWPRI_MAX_WAITING_MSECS;
-
 #if 0 
 #define DEBUG_ALLOC_THROTTLE_INFO(format, debug_info, args...) \
         do {                                                    \
@@ -727,10 +767,17 @@ int     lowpri_max_waiting_msecs = LOWPRI_MAX_WAITING_MSECS;
 #define DEBUG_ALLOC_THROTTLE_INFO(format, debug_info, args...)
 #endif
 
-SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_initial_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_IO_initial_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
-SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_window_inc, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_IO_window_msecs_inc, LOWPRI_INITIAL_WINDOW_MSECS, "");
-SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
-SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_throttle_window_msecs, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_legacy_throttle_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_legacy_throttle_window_msecs, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_io_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_io_period_msecs, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_io_period_ssd_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_io_period_ssd_msecs, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_timer_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_timer_period_msecs, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_legacy_process_count, CTLFLAG_RD | CTLFLAG_LOCKED, &throttle_legacy_process_count, 0, "");
+
+static lck_grp_t        *throttle_mtx_grp;
+static lck_attr_t       *throttle_mtx_attr;
+static lck_grp_attr_t   *throttle_mtx_grp_attr;
+
 
 /*
  * throttled I/O helper function
@@ -741,7 +788,7 @@ num_trailing_0(uint64_t n)
 {
        /*
         * since in most cases the number of trailing 0s is very small,
-     * we simply counting sequentially from the lowest bit
+        * we simply counting sequentially from the lowest bit
         */
        if (n == 0)
                return sizeof(n) * 8;
@@ -753,6 +800,7 @@ num_trailing_0(uint64_t n)
        return count;
 }
 
+
 /*
  * Release the reference and if the item was allocated and this is the last
  * reference then free it.
@@ -762,7 +810,7 @@ num_trailing_0(uint64_t n)
 static int
 throttle_info_rel(struct _throttle_io_info_t *info)
 {
-       SInt32 oldValue = OSDecrementAtomic(&info->refcnt);
+       SInt32 oldValue = OSDecrementAtomic(&info->throttle_refcnt);
 
        DEBUG_ALLOC_THROTTLE_INFO("refcnt = %d info = %p\n", 
                info, (int)(oldValue -1), info );
@@ -775,13 +823,16 @@ throttle_info_rel(struct _throttle_io_info_t *info)
         * Once reference count is zero, no one else should be able to take a 
         * reference 
         */
-       if ((info->refcnt == 0) && (info->alloc)) {
-               DEBUG_ALLOC_THROTTLE_INFO("Freeing info = %p\n", info, info );
+       if ((info->throttle_refcnt == 0) && (info->throttle_alloc)) {
+               DEBUG_ALLOC_THROTTLE_INFO("Freeing info = %p\n", info);
+               
+               lck_mtx_destroy(&info->throttle_lock, throttle_mtx_grp);
                FREE(info, M_TEMP); 
        }
        return oldValue;
 }
 
+
 /*
  * Just take a reference on the throttle info structure.
  *
@@ -790,17 +841,211 @@ throttle_info_rel(struct _throttle_io_info_t *info)
 static SInt32
 throttle_info_ref(struct _throttle_io_info_t *info)
 {
-       SInt32 oldValue = OSIncrementAtomic(&info->refcnt);
+       SInt32 oldValue = OSIncrementAtomic(&info->throttle_refcnt);
 
        DEBUG_ALLOC_THROTTLE_INFO("refcnt = %d info = %p\n", 
                info, (int)(oldValue -1), info );
        /* Allocated items should never have a reference of zero */
-       if (info->alloc && (oldValue == 0))
+       if (info->throttle_alloc && (oldValue == 0))
                panic("Taking a reference without calling create throttle info!\n");
 
        return oldValue;
 }
 
+
+/*
+ * on entry the throttle_lock is held...
+ * this function is responsible for taking
+ * and dropping the reference on the info
+ * structure which will keep it from going
+ * away while the timer is running if it
+ * happens to have been dynamically allocated by
+ * a network fileystem kext which is now trying
+ * to free it
+ */
+static uint32_t
+throttle_timer_start(struct _throttle_io_info_t *info, boolean_t update_io_count)
+{      
+       struct timeval  elapsed;
+       int             elapsed_msecs;
+       int             throttle_level;
+       uint64_t        deadline;
+
+       if (update_io_count == TRUE) {
+               info->throttle_io_count_begin = info->throttle_io_count;
+               info->throttle_io_period_num++;
+
+               microuptime(&info->throttle_start_IO_period_timestamp);
+       }
+       for (throttle_level = THROTTLE_LEVEL_START; throttle_level < THROTTLE_LEVEL_END; throttle_level++) {
+
+               microuptime(&elapsed);
+               timevalsub(&elapsed, &info->throttle_last_IO_timestamp[throttle_level]);
+               elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
+
+               if (elapsed_msecs < THROTTLE_WINDOW) {
+                       /*
+                        * we had an I/O occur in this level within
+                        * our throttle window, so we need to
+                        * to make sure the timer continues to run
+                        */
+                       break;
+               }
+       }
+       if (throttle_level >= THROTTLE_LEVEL_END) {
+               /*
+                * we're outside all of the throttle windows...
+                * don't start a new timer
+                */
+               info->throttle_timer_running = 0;
+
+               return (THROTTLE_LEVEL_END);
+       }
+       if (info->throttle_timer_running == 0) {
+               /*
+                * take a reference for the timer
+                */
+               throttle_info_ref(info);
+
+               info->throttle_timer_running = 1;
+       }
+       clock_interval_to_deadline(lowpri_timer_period_msecs, 1000000, &deadline);
+
+       thread_call_enter_delayed(info->throttle_timer_call, deadline);
+
+       return (throttle_level);
+}
+
+
+static void
+throttle_timer(struct _throttle_io_info_t *info)
+{
+       uthread_t       ut, utlist;
+       struct timeval  elapsed;
+       int             elapsed_msecs;
+       int             throttle_level;
+        boolean_t      update_io_count = FALSE;
+       boolean_t       need_wakeup = FALSE;
+       boolean_t       need_release = FALSE;
+
+        lck_mtx_lock(&info->throttle_lock);
+       
+       microuptime(&elapsed);
+       timevalsub(&elapsed, &info->throttle_start_IO_period_timestamp);
+       elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
+
+       if (elapsed_msecs >= info->throttle_io_period) {
+               /*
+                * we're closing out the current IO period...
+                * if we have a waiting thread, wake it up
+                * after we have reset the I/O window info
+                */
+               need_wakeup = TRUE;
+               update_io_count = TRUE;
+       }
+        if ((throttle_level = throttle_timer_start(info, update_io_count)) == THROTTLE_LEVEL_END) {
+               /*
+                * we are now outside of the throttle window
+                * for all throttle levels...
+                *
+                * the timer is not restarted in this case, so
+                * we need to get rid of the reference we took when
+                * we started up the timer... we can't do this
+                * until we are entirely done playing with 'info'
+                */
+               need_release = TRUE;
+       }
+
+       TAILQ_FOREACH_SAFE(ut, &info->throttle_uthlist, uu_throttlelist, utlist) {
+               /*
+                * if we are now outside of the throttle window release
+                * all of the currently blocked threads, otherwise
+                * look for threads that have had their IO policy changed
+                * by someone else and are no longer throttleable, or are
+                * not at the current throttle level and unblock them
+                */
+               if (throttle_level == THROTTLE_LEVEL_END || throttle_get_thread_throttle_level(ut, -1) <= throttle_level) {
+
+                       TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist);
+                       ut->uu_on_throttlelist = 0;
+
+                       wakeup(&ut->uu_on_throttlelist);
+               }
+       }
+       if (need_wakeup && !TAILQ_EMPTY(&info->throttle_uthlist)) {
+               /*
+                * we've entered a new I/O period and we're still
+                * in the throttle window, so wakeup the next guy in line
+                */
+               ut = (uthread_t)TAILQ_FIRST(&info->throttle_uthlist);
+               TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist);
+               ut->uu_on_throttlelist = 0;
+
+               wakeup(&ut->uu_on_throttlelist);
+       }
+        lck_mtx_unlock(&info->throttle_lock);
+
+       if (need_release == TRUE)
+               throttle_info_rel(info);
+}
+
+
+void
+throttle_init(void)
+{
+        struct _throttle_io_info_t *info;
+        int    i;
+
+       /*                                                                                                                                    
+         * allocate lock group attribute and group                                                                                            
+         */
+        throttle_mtx_grp_attr = lck_grp_attr_alloc_init();
+        throttle_mtx_grp = lck_grp_alloc_init("throttle I/O", throttle_mtx_grp_attr);
+
+        /*                                                                                                                                    
+         * allocate the lock attribute                                                                                                        
+         */
+        throttle_mtx_attr = lck_attr_alloc_init();
+
+       for (i = 0; i < LOWPRI_MAX_NUM_DEV; i++) {
+               info = &_throttle_io_info[i];
+         
+               lck_mtx_init(&info->throttle_lock, throttle_mtx_grp, throttle_mtx_attr);
+               info->throttle_timer_call = thread_call_allocate((thread_call_func_t)throttle_timer, (thread_call_param_t)info);
+
+               TAILQ_INIT(&info->throttle_uthlist);
+       }
+}
+
+
+/*
+ * KPI routine
+ * 
+ * wakeup and remove the specified thread from the throttle queue
+ * if it's no longer in a throttleable state...
+ * takes a valid uthread (which may or may not be on the
+ * throttle queue) as input
+ */
+void
+unthrottle_thread(uthread_t ut)
+{
+       struct _throttle_io_info_t *info;
+
+       if ((info = ut->uu_throttle_info) == NULL)
+               return;
+
+        lck_mtx_lock(&info->throttle_lock);
+
+       if (ut->uu_on_throttlelist && throttle_get_thread_throttle_level(ut, -1) <= THROTTLE_LEVEL_THROTTLED) { 
+               TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist);
+               ut->uu_on_throttlelist = 0;
+
+               wakeup(&ut->uu_on_throttlelist);
+       }
+        lck_mtx_unlock(&info->throttle_lock);
+}
+
+
 /*
  * KPI routine
  *
@@ -819,9 +1064,15 @@ throttle_info_create(void)
                return NULL;
        /* Mark that this one was allocated and needs to be freed */
        DEBUG_ALLOC_THROTTLE_INFO("Creating info = %p\n", info, info );
-       info->alloc = TRUE;
+       info->throttle_alloc = TRUE;
+
+       lck_mtx_init(&info->throttle_lock, throttle_mtx_grp, throttle_mtx_attr);
+       info->throttle_timer_call = thread_call_allocate((thread_call_func_t)throttle_timer, (thread_call_param_t)info);
+
+       TAILQ_INIT(&info->throttle_uthlist);
+
        /* Take a reference */
-       OSIncrementAtomic(&info->refcnt);
+       OSIncrementAtomic(&info->throttle_refcnt);
        return info;
 }
 
@@ -855,7 +1106,10 @@ throttle_info_mount_ref(mount_t mp, void *throttle_info)
        if ((throttle_info == NULL) || (mp == NULL))
                return;
        throttle_info_ref(throttle_info);
-       /* We already have a reference release it before adding the new one */
+
+       /*
+        * We already have a reference release it before adding the new one
+        */
        if (mp->mnt_throttle_info)
                throttle_info_rel(mp->mnt_throttle_info);
        mp->mnt_throttle_info = throttle_info;
@@ -868,10 +1122,9 @@ throttle_info_mount_ref(mount_t mp, void *throttle_info)
  * handle must be released by throttle_info_rel_by_mask
  */
 int
-throttle_info_ref_by_mask(uint64_t throttle_mask,
-                                                 throttle_info_handle_t *throttle_info_handle)
+throttle_info_ref_by_mask(uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle)
 {
-       int dev_index;
+       int     dev_index;
        struct _throttle_io_info_t *info;
 
        if (throttle_info_handle == NULL)
@@ -881,6 +1134,7 @@ throttle_info_ref_by_mask(uint64_t throttle_mask,
        info = &_throttle_io_info[dev_index];
        throttle_info_ref(info);
        *(struct _throttle_io_info_t**)throttle_info_handle = info;
+
        return 0;
 }
 
@@ -892,7 +1146,9 @@ throttle_info_ref_by_mask(uint64_t throttle_mask,
 void
 throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle)
 {
-       /* for now the handle is just a pointer to _throttle_io_info_t */
+       /*
+        * for now the handle is just a pointer to _throttle_io_info_t
+        */
        throttle_info_rel((struct _throttle_io_info_t*)throttle_info_handle);
 }
 
@@ -916,13 +1172,13 @@ throttle_info_get_last_io_time(mount_t mp, struct timeval *tv)
        struct _throttle_io_info_t *info;
 
        if (mp == NULL)
-           info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
+               info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
        else if (mp->mnt_throttle_info == NULL)
-           info = &_throttle_io_info[mp->mnt_devbsdunit];
+               info = &_throttle_io_info[mp->mnt_devbsdunit];
        else
-           info = mp->mnt_throttle_info;
+               info = mp->mnt_throttle_info;
 
-       *tv = info->last_IO_timestamp;
+       *tv = info->throttle_last_write_timestamp;
 }
 
 void
@@ -931,69 +1187,101 @@ update_last_io_time(mount_t mp)
        struct _throttle_io_info_t *info;
                
        if (mp == NULL)
-           info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
+               info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
        else if (mp->mnt_throttle_info == NULL)
-           info = &_throttle_io_info[mp->mnt_devbsdunit];
+               info = &_throttle_io_info[mp->mnt_devbsdunit];
        else
-           info = mp->mnt_throttle_info;
+               info = mp->mnt_throttle_info;
 
-       microuptime(&info->last_IO_timestamp);
+       microuptime(&info->throttle_last_write_timestamp);
 }
 
 
-#if CONFIG_EMBEDDED
-
-int throttle_get_io_policy(struct uthread **ut)
+int
+throttle_get_io_policy(uthread_t *ut)
 {
-       int policy = IOPOL_DEFAULT;
-       proc_t p = current_proc();
-
        *ut = get_bsdthread_info(current_thread());
-               
-       if (p != NULL)
-               policy = p->p_iopol_disk;
-
-       if (*ut != NULL) {
-               // the I/O policy of the thread overrides that of the process
-               // unless the I/O policy of the thread is default
-               if ((*ut)->uu_iopol_disk != IOPOL_DEFAULT)
-                       policy = (*ut)->uu_iopol_disk;
-       }
-       return policy;
+
+       return (proc_get_task_selfdiskacc());
 }
-#else
 
-int throttle_get_io_policy(__unused struct uthread **ut)
-{
-       *ut = get_bsdthread_info(current_thread());
 
-       return (proc_get_task_selfdiskacc());
+
+static int
+throttle_get_thread_throttle_level(uthread_t ut, int policy)
+{      
+       int     thread_throttle_level = THROTTLE_LEVEL_NONE;
+
+       if (ut == NULL)
+               ut = get_bsdthread_info(current_thread());
+
+       if (policy == -1)
+               policy = proc_get_diskacc(ut->uu_thread);
+
+       switch (policy) {
+
+       case IOPOL_DEFAULT:
+       case IOPOL_NORMAL:
+               thread_throttle_level = THROTTLE_LEVEL_TIER0;
+       case IOPOL_PASSIVE:
+               if (ut->uu_throttle_bc == TRUE)
+                       thread_throttle_level = THROTTLE_LEVEL_TIER2;
+               break;
+       case IOPOL_THROTTLE:
+               thread_throttle_level = THROTTLE_LEVEL_TIER2;
+               break;
+       case IOPOL_UTILITY:
+               thread_throttle_level = THROTTLE_LEVEL_TIER1;
+               break;
+       default:
+               printf("unknown I/O policy %d", policy);
+               break;
+       }
+       return (thread_throttle_level);
 }
-#endif
 
 
 static int
-throttle_io_will_be_throttled_internal(int lowpri_window_msecs, void * throttle_info)
+throttle_io_will_be_throttled_internal(void * throttle_info)
 {
        struct _throttle_io_info_t *info = throttle_info;
        struct timeval elapsed;
-       int elapsed_msecs;
-       int policy;
-       struct uthread  *ut;
-
-       policy = throttle_get_io_policy(&ut);
+       int     elapsed_msecs;
+       int     thread_throttle_level;
+       int     throttle_level;
 
-       if (ut->uu_throttle_bc == FALSE && policy != IOPOL_THROTTLE)
+       if ((thread_throttle_level = throttle_get_thread_throttle_level(NULL, -1)) < THROTTLE_LEVEL_THROTTLED)
                return (0);
 
-       microuptime(&elapsed);
-       timevalsub(&elapsed, &info->last_normal_IO_timestamp);
-       elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
+       for (throttle_level = THROTTLE_LEVEL_START; throttle_level < thread_throttle_level; throttle_level++) {
 
-       if (lowpri_window_msecs == -1) // use the max waiting time
-               lowpri_window_msecs = lowpri_max_waiting_msecs;
+               microuptime(&elapsed);
+               timevalsub(&elapsed, &info->throttle_last_IO_timestamp[throttle_level]);
+               elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
 
-       return elapsed_msecs < lowpri_window_msecs;
+               if (elapsed_msecs < THROTTLE_WINDOW)
+                       break;
+       }
+       if (throttle_level >= thread_throttle_level) {
+               /*
+                * we're beyond all of the throttle windows
+                * that affect the throttle level of this thread,
+                * so go ahead and treat as normal I/O
+                */
+               return (0);
+       }
+       if (info->throttle_io_count != info->throttle_io_count_begin) {
+               /*
+                * we've already issued at least one throttleable I/O
+                * in the current I/O window, so avoid issuing another one
+                */
+               return (2);
+       }
+       /*
+        * we're in the throttle window, so
+        * cut the I/O size back
+        */
+       return (1);
 }
 
 /* 
@@ -1002,83 +1290,123 @@ throttle_io_will_be_throttled_internal(int lowpri_window_msecs, void * throttle_
  * the correct throttle info array element.
  */
 int
-throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp)
+throttle_io_will_be_throttled(__unused int lowpri_window_msecs, mount_t mp)
 {
-       void *info;
+       void    *info;
 
-       /* Should we just return zero if no mount point */
+       /*
+        * Should we just return zero if no mount point
+        */
        if (mp == NULL)
-           info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
+               info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
        else if (mp->mnt_throttle_info == NULL)
-           info = &_throttle_io_info[mp->mnt_devbsdunit];
+               info = &_throttle_io_info[mp->mnt_devbsdunit];
        else
-           info = mp->mnt_throttle_info;
-       return throttle_io_will_be_throttled_internal(lowpri_window_msecs, info);
+               info = mp->mnt_throttle_info;
+
+       return throttle_io_will_be_throttled_internal(info);
 }
 
+
 uint32_t
 throttle_lowpri_io(int sleep_amount)
 {
-       int sleep_cnt = 0;
-       int numthreads_throttling;
-       int max_try_num;
-       struct uthread *ut;
+       uthread_t ut;
        struct _throttle_io_info_t *info;
-       int max_waiting_msecs;
+       int     throttle_type = 0;
+       int     sleep_cnt = 0;
+       int     locked = 0;
+       uint32_t  throttle_io_period_num = 0;
+       boolean_t insert_tail = TRUE;
 
        ut = get_bsdthread_info(current_thread());
 
-       if ((ut->uu_lowpri_window == 0) || (ut->uu_throttle_info == NULL))
-               goto done;
+       if (ut->uu_lowpri_window == 0)
+               return (0);
 
        info = ut->uu_throttle_info;
 
-       if (sleep_amount != 0) {
-#if CONFIG_EMBEDDED
-               max_waiting_msecs = lowpri_max_waiting_msecs;
-#else 
-               if (ut->uu_throttle_isssd == TRUE)
-                       max_waiting_msecs = lowpri_max_waiting_msecs / 100;
-               else
-                       max_waiting_msecs = lowpri_max_waiting_msecs;
-#endif
-               if (max_waiting_msecs < LOWPRI_SLEEP_INTERVAL)
-                       max_waiting_msecs = LOWPRI_SLEEP_INTERVAL;
+       if ((sleep_amount == 0) || (info == NULL))
+               goto done;
 
-               numthreads_throttling = info->numthreads_throttling + MIN(10, MAX(1, sleep_amount)) - 1;
-               max_try_num = max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, numthreads_throttling);
+       if (sleep_amount == 1 && ut->uu_throttle_bc == FALSE)
+               sleep_amount = 0;
 
-               for (sleep_cnt = 0; sleep_cnt < max_try_num; sleep_cnt++) {
-                       if (throttle_io_will_be_throttled_internal(ut->uu_lowpri_window, info)) {
-                               if (sleep_cnt == 0) {
-                                       KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
-                                                             ut->uu_lowpri_window, max_try_num, numthreads_throttling, 0, 0);
-                               }
-                               IOSleep(LOWPRI_SLEEP_INTERVAL);
-                               DEBUG_ALLOC_THROTTLE_INFO("sleeping because of info = %p\n", info, info );
-                       } else {
+       throttle_io_period_num = info->throttle_io_period_num;
+
+       while ( (throttle_type = throttle_io_will_be_throttled_internal(info)) ) {
+
+               if (throttle_type == 1) {
+                       if (sleep_amount == 0)
+                               break;                  
+                       if (info->throttle_io_period_num < throttle_io_period_num)
+                               break;
+                       if ((info->throttle_io_period_num - throttle_io_period_num) >= (uint32_t)sleep_amount)
                                break;
-                       }
                }
-               if (sleep_cnt) {
-                       KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
-                                             ut->uu_lowpri_window, sleep_cnt, 0, 0, 0);
+               if (!locked) {
+                       lck_mtx_lock(&info->throttle_lock);
+                       locked = 1;
                }
-       }
-       SInt32 oldValue;
-       oldValue = OSDecrementAtomic(&info->numthreads_throttling);
+               if (info->throttle_timer_running == 0) {
+                       /*
+                        * try to start the timer since it's
+                        * currently not running.  on failure, no
+                        * timer reference to drop since it wasn't started
+                        */
+                       if (throttle_timer_start(info, TRUE) == THROTTLE_LEVEL_END)
+                               goto done;
+               }
+               if (sleep_cnt == 0) {
+                       KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
+                                                     ut->uu_lowpri_window, info->throttle_io_period, info->throttle_io_count, 0, 0);
+               }
+               if (ut->uu_on_throttlelist == 0) {
+                       if (insert_tail == TRUE)
+                               TAILQ_INSERT_TAIL(&info->throttle_uthlist, ut, uu_throttlelist);
+                       else
+                               TAILQ_INSERT_HEAD(&info->throttle_uthlist, ut, uu_throttlelist);
+
+                       ut->uu_on_throttlelist = 1;
+               }
+               msleep((caddr_t)&ut->uu_on_throttlelist, &info->throttle_lock, PRIBIO + 1, "throttle_lowpri_io", NULL);
 
-       if (oldValue <= 0) {
-               panic("%s: numthreads negative", __func__);
+               sleep_cnt++;
+               
+               if (sleep_amount == 0)
+                       insert_tail = FALSE;
+               else if (info->throttle_io_period_num < throttle_io_period_num ||
+                        (info->throttle_io_period_num - throttle_io_period_num) >= (uint32_t)sleep_amount) {
+                       insert_tail = FALSE;
+                       sleep_amount = 0;
+               }
        }
 done:
-       ut->uu_lowpri_window = 0;
-       if (ut->uu_throttle_info)
-               throttle_info_rel(ut->uu_throttle_info);
+       if (ut->uu_on_throttlelist) {
+               if (!locked) {
+                       lck_mtx_lock(&info->throttle_lock);
+                       locked = 1;
+               }
+               if (ut->uu_on_throttlelist) {
+                       TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist);
+
+                       ut->uu_on_throttlelist = 0;
+               }
+       }
+       if (locked)
+               lck_mtx_unlock(&info->throttle_lock);
+               
+       if (sleep_cnt)
+               KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
+                                     ut->uu_lowpri_window, info->throttle_io_period, info->throttle_io_count, 0, 0);
+       if (info)
+               throttle_info_rel(info);
+
        ut->uu_throttle_info = NULL;
        ut->uu_throttle_bc = FALSE;
+       ut->uu_lowpri_window = 0;
 
-       return (sleep_cnt * LOWPRI_SLEEP_INTERVAL);
+       return (sleep_cnt);
 }
 
 /*
@@ -1091,85 +1419,59 @@ done:
  */
 void throttle_set_thread_io_policy(int policy)
 {
-#if !CONFIG_EMBEDDED
        proc_apply_thread_selfdiskacc(policy);
-#else /* !CONFIG_EMBEDDED */
-       struct uthread *ut;
-       ut = get_bsdthread_info(current_thread());
-       ut->uu_iopol_disk = policy;
-#endif /* !CONFIG_EMBEDDED */
 }
 
 
 static
-void throttle_info_reset_window(struct uthread *ut)
+void throttle_info_reset_window(uthread_t ut)
 {
        struct _throttle_io_info_t *info;
 
-       info = ut->uu_throttle_info;
+       if ( (info = ut->uu_throttle_info) ) {
+               throttle_info_rel(info);
 
-       OSDecrementAtomic(&info->numthreads_throttling);
-       throttle_info_rel(info);
-       ut->uu_throttle_info = NULL;
-       ut->uu_lowpri_window = 0;
+               ut->uu_throttle_info = NULL;
+               ut->uu_lowpri_window = 0;
+               ut->uu_throttle_bc = FALSE;
+       }
 }
 
 static
-void throttle_info_set_initial_window(struct uthread *ut, struct _throttle_io_info_t *info, boolean_t isssd, boolean_t BC_throttle)
+void throttle_info_set_initial_window(uthread_t ut, struct _throttle_io_info_t *info, boolean_t BC_throttle)
 {
-       SInt32 oldValue;
+       if (ut->uu_throttle_info == NULL) {
 
-       ut->uu_throttle_info = info;
-       throttle_info_ref(info);
-       DEBUG_ALLOC_THROTTLE_INFO("updating info = %p\n", info, info );
+               ut->uu_throttle_info = info;
+               throttle_info_ref(info);
+               DEBUG_ALLOC_THROTTLE_INFO("updating info = %p\n", info, info );
 
-       oldValue = OSIncrementAtomic(&info->numthreads_throttling);
-       if (oldValue < 0) {
-               panic("%s: numthreads negative", __func__);
+               ut->uu_lowpri_window = THROTTLE_WINDOW;
+               ut->uu_throttle_bc = BC_throttle;
        }
-       ut->uu_lowpri_window = lowpri_IO_initial_window_msecs;
-       ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue;
-       ut->uu_throttle_isssd = isssd;
-       ut->uu_throttle_bc = BC_throttle;
 }
 
 
 static
-void throttle_info_update_internal(void *throttle_info, int flags, boolean_t isssd)
+void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t ut, int policy, int flags, boolean_t isssd)
 {
-       struct _throttle_io_info_t *info = throttle_info;
-       struct uthread  *ut;
-       int policy;
-       int is_throttleable_io = 0;
-       int is_passive_io = 0;
+       int     thread_throttle_level;
 
-       if (!lowpri_IO_initial_window_msecs || (info == NULL))
+       if (THROTTLE_WINDOW == 0)
                return;
-       policy = throttle_get_io_policy(&ut);
 
-       switch (policy) {
-       case IOPOL_DEFAULT:
-       case IOPOL_NORMAL:
-               break;
-       case IOPOL_THROTTLE:
-               is_throttleable_io = 1;
-               break;
-       case IOPOL_PASSIVE:
-               is_passive_io = 1;
-               break;
-       default:
-               printf("unknown I/O policy %d", policy);
-               break;
-       }
+       if (ut == NULL)
+               ut = get_bsdthread_info(current_thread());
 
-       if (!is_throttleable_io && ISSET(flags, B_PASSIVE))
-               is_passive_io |= 1;
+       thread_throttle_level = throttle_get_thread_throttle_level(ut, policy);
 
-       if (!is_throttleable_io) {
-               if (!is_passive_io){
-                       microuptime(&info->last_normal_IO_timestamp);
-               }
-       } else if (ut) {
+       if (thread_throttle_level == THROTTLE_LEVEL_TIER0 && ISSET(flags, B_PASSIVE))
+               thread_throttle_level = THROTTLE_LEVEL_NONE;
+
+       if (thread_throttle_level != THROTTLE_LEVEL_NONE)
+               microuptime(&info->throttle_last_IO_timestamp[thread_throttle_level]);
+
+       if (thread_throttle_level >= THROTTLE_LEVEL_THROTTLED) {
                /*
                 * I'd really like to do the IOSleep here, but
                 * we may be holding all kinds of filesystem related locks
@@ -1180,42 +1482,54 @@ void throttle_info_update_internal(void *throttle_info, int flags, boolean_t iss
                 * do the delay just before we return from the system
                 * call that triggered this I/O or from vnode_pagein
                 */
-               if (ut->uu_lowpri_window == 0)
-                       throttle_info_set_initial_window(ut, info, isssd, FALSE);
-               else {
-                       /* The thread sends I/Os to different devices within the same system call */
-                       if (ut->uu_throttle_info != info) {
-                               struct _throttle_io_info_t *old_info = ut->uu_throttle_info;
-
-                               // keep track of the numthreads in the right device
-                               OSDecrementAtomic(&old_info->numthreads_throttling);
-                               OSIncrementAtomic(&info->numthreads_throttling);
-
-                               DEBUG_ALLOC_THROTTLE_INFO("switching from info = %p\n", old_info, old_info );
-                               DEBUG_ALLOC_THROTTLE_INFO("switching to info = %p\n", info, info );
-                               /* This thread no longer needs a reference on that throttle info */
-                               throttle_info_rel(ut->uu_throttle_info);
-                               ut->uu_throttle_info = info;
-                               /* Need to take a reference on this throttle info */
-                               throttle_info_ref(ut->uu_throttle_info);
-                       }
-                       int numthreads = MAX(1, info->numthreads_throttling);
-                       ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads;
-                       if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads)
-                               ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads;
-
-                       if (isssd == FALSE) {
-                               /*
-                                * we're here because we've actually issued I/Os to different devices...
-                                * if at least one of them was a non SSD, then thottle the thread
-                                * using the policy for non SSDs
-                                */
-                               ut->uu_throttle_isssd = FALSE;
-                       }
+               if (info->throttle_io_period == 0) {
+
+                       if (isssd == TRUE)
+                               info->throttle_io_period = lowpri_io_period_ssd_msecs;
+                       else
+                               info->throttle_io_period = lowpri_io_period_msecs;
+
+                       if (info->throttle_io_period < lowpri_timer_period_msecs)
+                               info->throttle_io_period = lowpri_timer_period_msecs;
                }
+               OSAddAtomic(1, &info->throttle_io_count);
+
+               throttle_info_set_initial_window(ut, info, FALSE);
+       }
+}
+
+void throttle_info_update_by_mount(mount_t mp)
+{
+       struct _throttle_io_info_t *info;
+       uthread_t ut;
+       boolean_t isssd = FALSE;
+
+       ut = get_bsdthread_info(current_thread());
+
+       if (ut->uu_lowpri_window)
+               return;
+
+       if (mp != NULL) {
+               if ((mp->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd)
+                       isssd = TRUE;
+               info = &_throttle_io_info[mp->mnt_devbsdunit];
+       } else
+               info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
+
+       if (info->throttle_io_period == 0) {
+
+               if (isssd == TRUE)
+                       info->throttle_io_period = lowpri_io_period_ssd_msecs;
+               else
+                       info->throttle_io_period = lowpri_io_period_msecs;
+
+               if (info->throttle_io_period < lowpri_timer_period_msecs)
+                       info->throttle_io_period = lowpri_timer_period_msecs;
        }
+       throttle_info_set_initial_window(ut, info, FALSE);
 }
 
+
 /*
  * KPI routine
  *
@@ -1224,7 +1538,8 @@ void throttle_info_update_internal(void *throttle_info, int flags, boolean_t iss
  */
 void throttle_info_update(void *throttle_info, int flags)
 {
-       throttle_info_update_internal(throttle_info, flags, FALSE);
+        if (throttle_info)
+               throttle_info_update_internal(throttle_info, NULL, -1, flags, FALSE);
 }
 
 /*
@@ -1236,7 +1551,9 @@ void throttle_info_update(void *throttle_info, int flags)
 void throttle_info_update_by_mask(void *throttle_info_handle, int flags)
 {
        void *throttle_info = throttle_info_handle;
-       /* for now we only use the lowest bit of the throttle mask, so the
+
+       /*
+        * for now we only use the lowest bit of the throttle mask, so the
         * handle is the same as the throttle_info.  Later if we store a
         * set of throttle infos in the handle, we will want to loop through
         * them and call throttle_info_update in a loop
@@ -1244,20 +1561,77 @@ void throttle_info_update_by_mask(void *throttle_info_handle, int flags)
        throttle_info_update(throttle_info, flags);
 }
 
-extern int ignore_is_ssd;
+
+int throttle_info_io_will_be_throttled(void * throttle_info, int policy)
+{
+       struct _throttle_io_info_t *info = throttle_info;
+       struct timeval elapsed;
+       int     elapsed_msecs;
+       int     throttle_level;
+       int     thread_throttle_level;
+
+        switch (policy) {
+
+        case IOPOL_THROTTLE:
+                thread_throttle_level = THROTTLE_LEVEL_TIER2;
+                break;
+        case IOPOL_UTILITY:
+                thread_throttle_level = THROTTLE_LEVEL_TIER1;
+                break;
+        default:
+                thread_throttle_level = THROTTLE_LEVEL_TIER0;
+               break;
+       }
+       for (throttle_level = THROTTLE_LEVEL_START; throttle_level < thread_throttle_level; throttle_level++) {
+
+               microuptime(&elapsed);
+               timevalsub(&elapsed, &info->throttle_last_IO_timestamp[throttle_level]);
+               elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
+
+               if (elapsed_msecs < THROTTLE_WINDOW)
+                       break;
+       }
+       if (throttle_level >= thread_throttle_level) {
+               /*
+                * we're beyond all of the throttle windows
+                * so go ahead and treat as normal I/O
+                */
+               return (0);
+       }
+       /*
+        * we're in the throttle window
+        */
+       return (1);
+}
+
+void
+throttle_legacy_process_incr(void)
+{
+       OSIncrementAtomic(&throttle_legacy_process_count);
+}
+
+void
+throttle_legacy_process_decr(void)
+{
+       OSDecrementAtomic(&throttle_legacy_process_count);
+}
+
 
 int
 spec_strategy(struct vnop_strategy_args *ap)
 {
-        buf_t  bp;
+       buf_t   bp;
        int     bflags;
        int     policy;
        dev_t   bdev;
        uthread_t ut;
        mount_t mp;
-       int strategy_ret;
+       int     strategy_ret;
        struct _throttle_io_info_t *throttle_info;
        boolean_t isssd = FALSE;
+#if !CONFIG_EMBEDDED
+       proc_t curproc = current_proc();
+#endif /* !CONFIG_EMBEDDED */
 
         bp = ap->a_bp;
        bdev = buf_device(bp);
@@ -1265,13 +1639,21 @@ spec_strategy(struct vnop_strategy_args *ap)
 
        policy = throttle_get_io_policy(&ut);
 
-       if (policy == IOPOL_THROTTLE) {
+       if (bp->b_flags & B_META) 
+               bp->b_attr.ba_flags |= BA_META;
+
+       if (policy == IOPOL_THROTTLE || policy == IOPOL_UTILITY) {
                bp->b_flags |= B_THROTTLED_IO;
                bp->b_attr.ba_flags |= BA_THROTTLED_IO;
                bp->b_flags &= ~B_PASSIVE;
        } else if (policy == IOPOL_PASSIVE)
                bp->b_flags |= B_PASSIVE;
 
+#if !CONFIG_EMBEDDED
+       if ((curproc != NULL) && ((curproc->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP))
+               bp->b_attr.ba_flags |= BA_DELAYIDLESLEEP;
+#endif /* !CONFIG_EMBEDDED */
+               
        bflags = bp->b_flags;
 
         if (kdebug_enable) {
@@ -1292,10 +1674,13 @@ spec_strategy(struct vnop_strategy_args *ap)
                else if (bflags & B_PASSIVE)
                        code |= DKIO_PASSIVE;
 
-               KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
-                                     bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
+               if (bp->b_attr.ba_flags & BA_NOCACHE)
+                       code |= DKIO_NOCACHE;
+
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON, FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
+                                         bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
         }
-       if (((bflags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
+       if (((bflags & (B_THROTTLED_IO | B_PASSIVE | B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
            mp && (mp->mnt_kern_flag & MNTK_ROOTDEV))
                hard_throttle_on_root = 1;
 
@@ -1306,10 +1691,11 @@ spec_strategy(struct vnop_strategy_args *ap)
        } else
                throttle_info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
 
-       throttle_info_update_internal(throttle_info, bflags, isssd);
+       throttle_info_update_internal(throttle_info, ut, policy, bflags, isssd);
 
        if ((bflags & B_READ) == 0) {
-               microuptime(&throttle_info->last_IO_timestamp);
+               microuptime(&throttle_info->throttle_last_write_timestamp);
+
                if (mp) {
                        INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size);
                }
@@ -1342,19 +1728,19 @@ spec_strategy(struct vnop_strategy_args *ap)
        
        strategy_ret = (*(strategy_fcn_ret_t*)bdevsw[major(bdev)].d_strategy)(bp);
        
-       if ((IO_SATISFIED_BY_CACHE == strategy_ret) && (ut->uu_lowpri_window != 0) && (ut->uu_throttle_info != NULL)) {
+       if (IO_SATISFIED_BY_CACHE == strategy_ret) {
                /*
                 * If this was a throttled IO satisfied by the boot cache,
                 * don't delay the thread.
                 */
                throttle_info_reset_window(ut);
 
-       } else if ((IO_SHOULD_BE_THROTTLED == strategy_ret) && (ut->uu_lowpri_window == 0) && (ut->uu_throttle_info == NULL)) {
+       } else if (IO_SHOULD_BE_THROTTLED == strategy_ret) {
                /*
                 * If the boot cache indicates this IO should be throttled,
                 * delay the thread.
                 */
-               throttle_info_set_initial_window(ut, throttle_info, isssd, TRUE);
+               throttle_info_set_initial_window(ut, throttle_info, TRUE);
        }
        return (0);
 }
@@ -1394,15 +1780,16 @@ spec_close(struct vnop_close_args *ap)
                 * a controlling terminal, unless it is the closing
                 * process' controlling terminal.  In that case,
                 * if the reference count is 1 (this is the very
-            * last close)
+                * last close)
                 */
                sessp = proc_session(p);
                if (sessp != SESSION_NULL) {
-                       if ((vcount(vp) == 1) && 
-                               (vp == sessp->s_ttyvp)) {
+                       if (vp == sessp->s_ttyvp && vcount(vp) == 1) {
+                               struct tty *tp;
 
                                session_lock(sessp);
                                if (vp == sessp->s_ttyvp) {
+                                       tp = SESSION_TP(sessp);
                                        sessp->s_ttyvp = NULL;
                                        sessp->s_ttyvid = 0;
                                        sessp->s_ttyp = TTY_NULL;
@@ -1413,6 +1800,8 @@ spec_close(struct vnop_close_args *ap)
 
                                if (do_rele) {
                                        vnode_rele(vp);
+                                       if (NULL != tp)
+                                               ttyfree(tp);
                                }
                        }
                        session_rele(sessp);
@@ -1420,20 +1809,15 @@ spec_close(struct vnop_close_args *ap)
 
                devsw_lock(dev, S_IFCHR);
 
-               vp->v_specinfo->si_opencount--;
+               if (--vp->v_specinfo->si_opencount < 0)
+                       panic("negative open count (c, %u, %u)", major(dev), minor(dev));
 
-               if (vp->v_specinfo->si_opencount < 0) {
-                       panic("Negative open count?");
-               }
                /*
-                * close on last reference or on vnode revoke call
+                * close always, or close on last reference, or close on revoke
                 */
-               if ((vcount(vp) > 0) && ((flags & IO_REVOKE) == 0)) {
-                       devsw_unlock(dev, S_IFCHR);
-                       return (0);
-               }       
-               
-               error = cdevsw[major(dev)].d_close(dev, flags, S_IFCHR, p);
+               if ((D_TRACKCLOSE & cdevsw[major(dev)].d_type) != 0 ||
+                   vcount(vp) == 0 || (flags & IO_REVOKE) != 0)
+                       error = cdevsw[major(dev)].d_close(dev, flags, S_IFCHR, p);
 
                devsw_unlock(dev, S_IFCHR);
                break;
@@ -1465,18 +1849,11 @@ spec_close(struct vnop_close_args *ap)
 
                devsw_lock(dev, S_IFBLK);
 
-               vp->v_specinfo->si_opencount--;
-               
-               if (vp->v_specinfo->si_opencount < 0) {
-                       panic("Negative open count?");
-               }
-
-               if (vcount(vp) > 0) {
-                       devsw_unlock(dev, S_IFBLK);
-                       return (0);
-               }
+               if (--vp->v_specinfo->si_opencount < 0)
+                       panic("negative open count (b, %u, %u)", major(dev), minor(dev));
 
-               error = bdevsw[major(dev)].d_close(dev, flags, S_IFBLK, p);
+               if (vcount(vp) == 0)
+                       error = bdevsw[major(dev)].d_close(dev, flags, S_IFBLK, p);
 
                devsw_unlock(dev, S_IFBLK);
                break;
index 7b44d40e37ba5bb10cc8ac4fc30f3cae6106b517..2b14d796b1e5c51272a9e16df26f256f854aab82 100644 (file)
@@ -112,9 +112,9 @@ struct specinfo {
  */
 #define        SPECHSZ 64
 #if    ((SPECHSZ&(SPECHSZ-1)) == 0)
-#define        SPECHASH(rdev)  (((rdev>>5)+(rdev))&(SPECHSZ-1))
+#define        SPECHASH(rdev)  (((rdev>>21)+(rdev))&(SPECHSZ-1))
 #else
-#define        SPECHASH(rdev)  (((unsigned)((rdev>>5)+(rdev)))%SPECHSZ)
+#define        SPECHASH(rdev)  (((unsigned)((rdev>>21)+(rdev)))%SPECHSZ)
 #endif
 
 extern struct vnode *speclisth[SPECHSZ];
index 79c622bf88949bf5d40a9dd8935594e90770e040..5a186e2b67f293f3983e9d7f4ef3e1b8c7aae800 100644 (file)
@@ -8,10 +8,12 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
+       altq classq pktsched
 
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
+       altq classq pktsched
 
 EXPINC_SUBDIRS_I386 = \
 
@@ -19,7 +21,8 @@ DATAFILES= \
        bpf.h dlil.h \
        ethernet.h if.h if_arp.h \
        if_dl.h if_llc.h if_media.h if_mib.h \
-       if_types.h if_utun.h if_var.h \
+       if_types.h if_var.h \
+       if_utun.h if_utun_crypto.h if_utun_crypto_ipsec.h \
        kext_net.h ndrv.h pfkeyv2.h \
        route.h
 
@@ -29,14 +32,14 @@ KERNELFILES= \
 
 PRIVATE_DATAFILES = \
        if_vlan_var.h if_ppp.h firewire.h \
-       ppp_defs.h radix.h if_bond_var.h lacp.h ndrv_var.h \
+       ppp_defs.h radix.h if_bond_var.h if_bond_internal.h lacp.h ndrv_var.h \
        netsrc.h raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h \
-       if_bridgevar.h ntstat.h if_llreach.h
+       if_bridgevar.h ntstat.h iptap.h if_llreach.h
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
-       bpfdesc.h dlil_pvt.h ppp_comp.h \
+       bpfdesc.h ppp_comp.h \
        zlib.h bpf_compat.h net_osdep.h \
-       ntstat.h if_llreach.h
+       ntstat.h iptap.h if_llreach.h flowadv.h
 
 INSTALL_MI_LIST        = ${DATAFILES}
 
diff --git a/bsd/net/altq/Makefile b/bsd/net/altq/Makefile
new file mode 100644 (file)
index 0000000..b8ad371
--- /dev/null
@@ -0,0 +1,43 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+INSTINC_SUBDIRS = \
+
+INSTINC_SUBDIRS_PPC = \
+
+INSTINC_SUBDIRS_I386 = \
+
+EXPINC_SUBDIRS = \
+
+EXPINC_SUBDIRS_PPC = \
+
+EXPINC_SUBDIRS_I386 = \
+
+DATAFILES= \
+
+KERNELFILES= \
+
+PRIVATE_DATAFILES = \
+       altq.h altq_cbq.h altq_fairq.h altq_hfsc.h altq_priq.h altq_qfq.h
+
+PRIVATE_KERNELFILES = ${KERNELFILES}
+
+INSTALL_MI_LIST        = ${DATAFILES}
+
+INSTALL_MI_DIR = net/altq
+
+EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES}
+
+EXPORT_MI_DIR = ${INSTALL_MI_DIR}
+
+INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+
+INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/bsd/net/altq/altq.h b/bsd/net/altq/altq.h
new file mode 100644 (file)
index 0000000..590c681
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq.h,v 1.4 2006/10/12 19:59:08 peter Exp $ */
+/*     $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $      */
+
+/*
+ * Copyright (C) 1998-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_H_
+#define        _ALTQ_ALTQ_H_
+
+#ifdef PRIVATE
+#include <net/pktsched/pktsched.h>
+
+/* altq discipline type */
+#define        ALTQT_NONE      PKTSCHEDT_NONE  /* reserved */
+#define        ALTQT_CBQ       PKTSCHEDT_CBQ   /* cbq */
+#define        ALTQT_HFSC      PKTSCHEDT_HFSC  /* hfsc */
+#define        ALTQT_PRIQ      PKTSCHEDT_PRIQ  /* priority queue */
+#define ALTQT_FAIRQ    PKTSCHEDT_FAIRQ /* fairq */
+#define ALTQT_QFQ      PKTSCHEDT_QFQ   /* quick fair queueing */
+#define        ALTQT_MAX       PKTSCHEDT_MAX   /* should be max disc type + 1 */
+#endif /* PRIVATE */
+#ifdef BSD_KERNEL_PRIVATE
+#include <net/altq/altq_var.h>
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _ALTQ_ALTQ_H_ */
diff --git a/bsd/net/altq/altq_cbq.c b/bsd/net/altq/altq_cbq.c
new file mode 100644 (file)
index 0000000..31b3573
--- /dev/null
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_cbq.c,v 1.23 2007/09/13 20:40:02 chl Exp $       */
+/*     $KAME: altq_cbq.c,v 1.9 2000/12/14 08:12:45 thorpej Exp $       */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#if PF_ALTQ && PKTSCHED_CBQ
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+#include <net/net_osdep.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#include <netinet/in.h>
+
+/*
+ * Forward Declarations.
+ */
+static int altq_cbq_request(struct ifaltq *, enum altrq, void *);
+static int altq_cbq_enqueue(struct ifaltq *, struct mbuf *);
+static struct mbuf *altq_cbq_dequeue(struct ifaltq *, enum altdq_op);
+
+int
+altq_cbq_pfattach(struct pf_altq *a)
+{
+       struct ifnet    *ifp;
+       int              error;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(&ifp->if_snd);
+       error = altq_attach(IFCQ_ALTQ(&ifp->if_snd), ALTQT_CBQ, a->altq_disc,
+           altq_cbq_enqueue, altq_cbq_dequeue, NULL, altq_cbq_request);
+       IFCQ_UNLOCK(&ifp->if_snd);
+
+       return (error);
+}
+
+int
+altq_cbq_add(struct pf_altq *a)
+{
+       cbq_state_t     *cbqp;
+       struct ifnet    *ifp;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL)
+               return (EINVAL);
+       if (!ALTQ_IS_READY(IFCQ_ALTQ(&ifp->if_snd)))
+               return (ENODEV);
+
+       cbqp = cbq_alloc(ifp, M_WAITOK, TRUE);
+       if (cbqp == NULL)
+               return (ENOMEM);
+
+       /* keep the state in pf_altq */
+       a->altq_disc = cbqp;
+
+       return (0);
+}
+
+int
+altq_cbq_remove(struct pf_altq *a)
+{
+       cbq_state_t     *cbqp;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((cbqp = a->altq_disc) == NULL)
+               return (EINVAL);
+       a->altq_disc = NULL;
+
+       return (cbq_destroy(cbqp));
+}
+
+int
+altq_cbq_add_queue(struct pf_altq *a)
+{
+       struct cbq_opts *opts = &a->pq_u.cbq_opts;
+       cbq_state_t *cbqp;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((cbqp = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(cbqp->ifnp.ifq_);
+       err = cbq_add_queue(cbqp, a->qlimit, a->priority,
+           opts->minburst, opts->maxburst, opts->pktsize, opts->maxpktsize,
+           opts->ns_per_byte, opts->maxidle, opts->minidle, opts->offtime,
+           opts->flags, a->parent_qid, a->qid, NULL);
+       IFCQ_UNLOCK(cbqp->ifnp.ifq_);
+
+       return (err);
+}
+
+int
+altq_cbq_remove_queue(struct pf_altq *a)
+{
+       cbq_state_t *cbqp;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((cbqp = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(cbqp->ifnp.ifq_);
+       err = cbq_remove_queue(cbqp, a->qid);
+       IFCQ_UNLOCK(cbqp->ifnp.ifq_);
+
+       return (err);
+}
+
+int
+altq_cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+       struct ifclassq *ifq = NULL;
+       cbq_state_t *cbqp;
+       class_stats_t stats;
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((unsigned)*nbytes < sizeof (stats))
+               return (EINVAL);
+
+       if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
+               return (EBADF);
+
+       ifq = cbqp->ifnp.ifq_;
+       IFCQ_LOCK_ASSERT_HELD(ifq);     /* lock held by altq_lookup */
+       error = cbq_get_class_stats(cbqp, a->qid, &stats);
+       IFCQ_UNLOCK(ifq);
+       if (error != 0)
+               return (error);
+
+       if ((error = copyout((caddr_t)&stats, (user_addr_t)(uintptr_t)ubuf,
+           sizeof (stats))) != 0)
+               return (error);
+
+       *nbytes = sizeof (stats);
+
+       return (0);
+}
+
+static int
+altq_cbq_request(struct ifaltq *altq, enum altrq req, void *arg)
+{
+       cbq_state_t     *cbqp = (cbq_state_t *)altq->altq_disc;
+
+       switch (req) {
+       case ALTRQ_PURGE:
+               cbq_purge(cbqp);
+               break;
+
+       case ALTRQ_PURGE_SC:
+               /* not supported for ALTQ instance */
+               break;
+
+       case ALTRQ_EVENT:
+               cbq_event(cbqp, (cqev_t)arg);
+               break;
+       }
+       return (0);
+}
+
+/*
+ * altq_cbq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+altq_cbq_enqueue(struct ifaltq *altq, struct mbuf *m)
+{
+       /* grab class set by classifier */
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               printf("%s: packet for %s does not have pkthdr\n", __func__,
+                   if_name(altq->altq_ifcq->ifcq_ifp));
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       return (cbq_enqueue(altq->altq_disc, NULL, m, m_pftag(m)));
+}
+
+/*
+ * altq_cbq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *     ALTDQ_REMOVE must return the same packet if called immediately
+ *     after ALTDQ_POLL.
+ */
+static struct mbuf *
+altq_cbq_dequeue(struct ifaltq *altq, enum altdq_op op)
+{
+       return (cbq_dequeue(altq->altq_disc, (cqdq_op_t)op));
+}
+#endif /* PF_ALTQ && PKTSCHED_CBQ */
diff --git a/bsd/net/altq/altq_cbq.h b/bsd/net/altq/altq_cbq.h
new file mode 100644 (file)
index 0000000..fba7310
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_cbq.h,v 1.8 2006/10/12 19:59:08 peter Exp $       */
+/*     $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $  */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _NET_ALTQ_ALTQ_CBQ_H_
+#define        _NET_ALTQ_ALTQ_CBQ_H_
+
+#include <net/pfvar.h>
+#include <net/altq/altq.h>
+#include <net/pktsched/pktsched_cbq.h>
+
+#ifdef BSD_KERNEL_PRIVATE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int     altq_cbq_pfattach(struct pf_altq *);
+extern int     altq_cbq_add(struct pf_altq *);
+extern int     altq_cbq_remove(struct pf_altq *);
+extern int     altq_cbq_add_queue(struct pf_altq *);
+extern int     altq_cbq_remove_queue(struct pf_altq *);
+extern int     altq_cbq_getqstats(struct pf_altq *, void *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* !_NET_ALTQ_ALTQ_CBQ_H_ */
diff --git a/bsd/net/altq/altq_fairq.c b/bsd/net/altq/altq_fairq.c
new file mode 100644 (file)
index 0000000..2846547
--- /dev/null
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $
+ */
+/*
+ * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
+ * fairq.  The fairq algorithm is completely different then priq, of course,
+ * but because I used priq's skeleton I believe I should include priq's
+ * copyright.
+ *
+ * Copyright (C) 2000-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if PF_ALTQ && PKTSCHED_FAIRQ
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+#include <net/net_osdep.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_fairq.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+static int altq_fairq_enqueue(struct ifaltq *, struct mbuf *);
+static struct mbuf *altq_fairq_dequeue(struct ifaltq *, enum altdq_op);
+static int altq_fairq_request(struct ifaltq *, enum altrq, void *);
+
+int
+altq_fairq_pfattach(struct pf_altq *a)
+{
+       struct ifnet *ifp;
+       int error;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(&ifp->if_snd);
+       error = altq_attach(IFCQ_ALTQ(&ifp->if_snd), ALTQT_FAIRQ, a->altq_disc,
+           altq_fairq_enqueue, altq_fairq_dequeue, NULL, altq_fairq_request);
+       IFCQ_UNLOCK(&ifp->if_snd);
+
+       return (error);
+}
+
+int
+altq_fairq_add(struct pf_altq *a)
+{
+       struct fairq_if *fif;
+       struct ifnet *ifp;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL)
+               return (EINVAL);
+       if (!ALTQ_IS_READY(IFCQ_ALTQ(&ifp->if_snd)))
+               return (ENODEV);
+
+       fif = fairq_alloc(ifp, M_WAITOK, TRUE);
+       if (fif == NULL)
+               return (ENOMEM);
+
+       /* keep the state in pf_altq */
+       a->altq_disc = fif;
+
+       return (0);
+}
+
+int
+altq_fairq_remove(struct pf_altq *a)
+{
+       struct fairq_if *fif;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((fif = a->altq_disc) == NULL)
+               return (EINVAL);
+       a->altq_disc = NULL;
+
+       return (fairq_destroy(fif));
+}
+
+int
+altq_fairq_add_queue(struct pf_altq *a)
+{
+       struct fairq_if *fif;
+       struct fairq_opts *opts = &a->pq_u.fairq_opts;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((fif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(fif->fif_ifq);
+       err = fairq_add_queue(fif, a->priority, a->qlimit, a->bandwidth,
+           opts->nbuckets, opts->flags, opts->hogs_m1, opts->lssc_m1,
+           opts->lssc_d, opts->lssc_m2, a->qid, NULL);
+       IFCQ_UNLOCK(fif->fif_ifq);
+
+       return (err);
+}
+
+int
+altq_fairq_remove_queue(struct pf_altq *a)
+{
+       struct fairq_if *fif;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((fif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(fif->fif_ifq);
+       err = fairq_remove_queue(fif, a->qid);
+       IFCQ_UNLOCK(fif->fif_ifq);
+
+       return (err);
+}
+
+int
+altq_fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+       struct ifclassq *ifq = NULL;
+       struct fairq_if *fif;
+       struct fairq_classstats stats;
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((unsigned)*nbytes < sizeof (stats))
+               return (EINVAL);
+
+       if ((fif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
+               return (EBADF);
+
+       ifq = fif->fif_ifq;
+       IFCQ_LOCK_ASSERT_HELD(ifq);     /* lock held by altq_lookup */
+       error = fairq_get_class_stats(fif, a->qid, &stats);
+       IFCQ_UNLOCK(ifq);
+       if (error != 0)
+               return (error);
+
+       if ((error = copyout((caddr_t)&stats, (user_addr_t)(uintptr_t)ubuf,
+           sizeof (stats))) != 0)
+               return (error);
+
+       *nbytes = sizeof (stats);
+
+       return (0);
+}
+
+static int
+altq_fairq_request(struct ifaltq *altq, enum altrq req, void *arg)
+{
+       struct fairq_if *fif = (struct fairq_if *)altq->altq_disc;
+
+       switch (req) {
+       case ALTRQ_PURGE:
+               fairq_purge(fif);
+               break;
+
+       case ALTRQ_PURGE_SC:
+               /* not supported for ALTQ instance */
+               break;
+
+       case ALTRQ_EVENT:
+               fairq_event(fif, (cqev_t)arg);
+               break;
+       }
+       return (0);
+}
+
+/*
+ * altq_fairq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+altq_fairq_enqueue(struct ifaltq *altq, struct mbuf *m)
+{
+       /* grab class set by classifier */
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               printf("%s: packet for %s does not have pkthdr\n", __func__,
+                   if_name(altq->altq_ifcq->ifcq_ifp));
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       return (fairq_enqueue(altq->altq_disc, NULL, m, m_pftag(m)));
+}
+
+/*
+ * altq_fairq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *     ALTDQ_REMOVE must return the same packet if called immediately
+ *     after ALTDQ_POLL.
+ */
+static struct mbuf *
+altq_fairq_dequeue(struct ifaltq *altq, enum altdq_op op)
+{
+       return (fairq_dequeue(altq->altq_disc, (cqdq_op_t)op));
+}
+#endif /* PF_ALTQ && PKTSCHED_FAIRQ */
diff --git a/bsd/net/altq/altq_fairq.h b/bsd/net/altq/altq_fairq.h
new file mode 100644 (file)
index 0000000..d9d536c
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ */
+
+#ifndef _NET_ALTQ_ALTQ_FAIRQ_H_
+#define        _NET_ALTQ_ALTQ_FAIRQ_H_
+
+#include <net/pfvar.h>
+#include <net/altq/altq.h>
+#include <net/pktsched/pktsched_fairq.h>
+
+#ifdef BSD_KERNEL_PRIVATE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int     altq_fairq_pfattach(struct pf_altq *);
+extern int     altq_fairq_add(struct pf_altq *);
+extern int     altq_fairq_remove(struct pf_altq *);
+extern int     altq_fairq_add_queue(struct pf_altq *);
+extern int     altq_fairq_remove_queue(struct pf_altq *);
+extern int     altq_fairq_getqstats(struct pf_altq *, void *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _NET_ALTQ_ALTQ_FAIRQ_H_ */
diff --git a/bsd/net/altq/altq_hfsc.c b/bsd/net/altq/altq_hfsc.c
new file mode 100644 (file)
index 0000000..1e58df4
--- /dev/null
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_hfsc.c,v 1.25 2007/09/13 20:40:02 chl Exp $      */
+/*     $KAME: altq_hfsc.c,v 1.17 2002/11/29 07:48:33 kjc Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+
+#include <sys/cdefs.h>
+
+#if PF_ALTQ && PKTSCHED_HFSC
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+#include <net/net_osdep.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_hfsc.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+static int altq_hfsc_request(struct ifaltq *, enum altrq, void *);
+static int altq_hfsc_enqueue(struct ifaltq *, struct mbuf *);
+static struct mbuf *altq_hfsc_dequeue(struct ifaltq *, enum altdq_op);
+
+int
+altq_hfsc_pfattach(struct pf_altq *a)
+{
+       struct ifnet *ifp;
+       int error;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(&ifp->if_snd);
+       error = altq_attach(IFCQ_ALTQ(&ifp->if_snd), ALTQT_HFSC, a->altq_disc,
+           altq_hfsc_enqueue, altq_hfsc_dequeue, NULL, altq_hfsc_request);
+       IFCQ_UNLOCK(&ifp->if_snd);
+
+       return (error);
+}
+
+int
+altq_hfsc_add(struct pf_altq *a)
+{
+       struct hfsc_if *hif;
+       struct ifnet *ifp;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL)
+               return (EINVAL);
+       if (!ALTQ_IS_READY(IFCQ_ALTQ(&ifp->if_snd)))
+               return (ENODEV);
+
+       hif = hfsc_alloc(ifp, M_WAITOK, TRUE);
+       if (hif == NULL)
+               return (ENOMEM);
+
+       /* keep the state in pf_altq */
+       a->altq_disc = hif;
+
+       return (0);
+}
+
+int
+altq_hfsc_remove(struct pf_altq *a)
+{
+       struct hfsc_if *hif;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((hif = a->altq_disc) == NULL)
+               return (EINVAL);
+       a->altq_disc = NULL;
+
+       return (hfsc_destroy(hif));
+}
+
+int
+altq_hfsc_add_queue(struct pf_altq *a)
+{
+       struct hfsc_if *hif;
+       struct hfsc_opts *opts = &a->pq_u.hfsc_opts;
+       struct service_curve rtsc, lssc, ulsc;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((hif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       bzero(&rtsc, sizeof (rtsc));
+       bzero(&lssc, sizeof (lssc));
+       bzero(&ulsc, sizeof (ulsc));
+
+       rtsc.m1 = opts->rtsc_m1;
+       rtsc.d  = opts->rtsc_d;
+       rtsc.m2 = opts->rtsc_m2;
+       rtsc.fl = opts->rtsc_fl;
+       lssc.m1 = opts->lssc_m1;
+       lssc.d  = opts->lssc_d;
+       lssc.m2 = opts->lssc_m2;
+       lssc.fl = opts->lssc_fl;
+       ulsc.m1 = opts->ulsc_m1;
+       ulsc.d  = opts->ulsc_d;
+       ulsc.m2 = opts->ulsc_m2;
+       ulsc.fl = opts->ulsc_fl;
+
+       IFCQ_LOCK(hif->hif_ifq);
+       err = hfsc_add_queue(hif, &rtsc, &lssc, &ulsc, a->qlimit,
+           opts->flags, a->parent_qid, a->qid, NULL);
+       IFCQ_UNLOCK(hif->hif_ifq);
+
+       return (err);
+}
+
+int
+altq_hfsc_remove_queue(struct pf_altq *a)
+{
+       struct hfsc_if *hif;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((hif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(hif->hif_ifq);
+       err = hfsc_remove_queue(hif, a->qid);
+       IFCQ_UNLOCK(hif->hif_ifq);
+
+       return (err);
+}
+
+int
+altq_hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+       struct ifclassq *ifq = NULL;
+       struct hfsc_if *hif;
+       struct hfsc_classstats stats;
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((unsigned)*nbytes < sizeof (stats))
+               return (EINVAL);
+
+       if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
+               return (EBADF);
+
+       ifq = hif->hif_ifq;
+       IFCQ_LOCK_ASSERT_HELD(ifq);     /* lock held by altq_lookup */
+       error = hfsc_get_class_stats(hif, a->qid, &stats);
+       IFCQ_UNLOCK(ifq);
+       if (error != 0)
+               return (error);
+
+       if ((error = copyout((caddr_t)&stats, (user_addr_t)(uintptr_t)ubuf,
+           sizeof (stats))) != 0)
+               return (error);
+
+       *nbytes = sizeof (stats);
+
+       return (0);
+}
+
+static int
+altq_hfsc_request(struct ifaltq *altq, enum altrq req, void *arg)
+{
+       struct hfsc_if  *hif = (struct hfsc_if *)altq->altq_disc;
+
+       switch (req) {
+       case ALTRQ_PURGE:
+               hfsc_purge(hif);
+               break;
+
+       case ALTRQ_PURGE_SC:
+               /* not supported for ALTQ instance */
+               break;
+
+       case ALTRQ_EVENT:
+               hfsc_event(hif, (cqev_t)arg);
+               break;
+       }
+       return (0);
+}
+
+/*
+ * altq_hfsc_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+altq_hfsc_enqueue(struct ifaltq *altq, struct mbuf *m)
+{
+       /* grab class set by classifier */
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               printf("%s: packet for %s does not have pkthdr\n", __func__,
+                   if_name(altq->altq_ifcq->ifcq_ifp));
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       return (hfsc_enqueue(altq->altq_disc, NULL, m,  m_pftag(m)));
+}
+
+/*
+ * altq_hfsc_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *     ALTDQ_REMOVE must return the same packet if called immediately
+ *     after ALTDQ_POLL.
+ */
+static struct mbuf *
+altq_hfsc_dequeue(struct ifaltq *altq, enum altdq_op op)
+{
+       return (hfsc_dequeue(altq->altq_disc, (cqdq_op_t)op));
+}
+#endif /* PF_ALTQ && PKTSCHED_HFSC */
diff --git a/bsd/net/altq/altq_hfsc.h b/bsd/net/altq/altq_hfsc.h
new file mode 100644 (file)
index 0000000..6b46293
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_hfsc.h,v 1.8 2006/10/12 19:59:08 peter Exp $      */
+/*     $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+#ifndef _NET_ALTQ_ALTQ_HFSC_H_
+#define        _NET_ALTQ_ALTQ_HFSC_H_
+
+#include <net/pfvar.h>
+#include <net/altq/altq.h>
+#include <net/pktsched/pktsched_hfsc.h>
+
+#ifdef BSD_KERNEL_PRIVATE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int     altq_hfsc_pfattach(struct pf_altq *);
+extern int     altq_hfsc_add(struct pf_altq *);
+extern int     altq_hfsc_remove(struct pf_altq *);
+extern int     altq_hfsc_add_queue(struct pf_altq *);
+extern int     altq_hfsc_remove_queue(struct pf_altq *);
+extern int     altq_hfsc_getqstats(struct pf_altq *, void *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _NET_ALTQ_ALTQ_HFSC_H_ */
diff --git a/bsd/net/altq/altq_priq.c b/bsd/net/altq/altq_priq.c
new file mode 100644 (file)
index 0000000..a86a483
--- /dev/null
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_priq.c,v 1.21 2007/09/13 20:40:02 chl Exp $      */
+/*     $KAME: altq_priq.c,v 1.1 2000/10/18 09:15:23 kjc Exp $  */
+
+/*
+ * Copyright (C) 2000-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * priority queue
+ */
+
+#if PF_ALTQ && PKTSCHED_PRIQ
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+#include <net/net_osdep.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_priq.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+static int altq_priq_enqueue(struct ifaltq *, struct mbuf *);
+static struct mbuf *altq_priq_dequeue(struct ifaltq *, enum altdq_op);
+static int altq_priq_request(struct ifaltq *, enum altrq, void *);
+
+int
+altq_priq_pfattach(struct pf_altq *a)
+{
+       struct ifnet *ifp;
+       int error;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(&ifp->if_snd);
+       error = altq_attach(IFCQ_ALTQ(&ifp->if_snd), ALTQT_PRIQ, a->altq_disc,
+           altq_priq_enqueue, altq_priq_dequeue, NULL, altq_priq_request);
+       IFCQ_UNLOCK(&ifp->if_snd);
+
+       return (error);
+}
+
+int
+altq_priq_add(struct pf_altq *a)
+{
+       struct priq_if  *pif;
+       struct ifnet    *ifp;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL)
+               return (EINVAL);
+       if (!ALTQ_IS_READY(IFCQ_ALTQ(&ifp->if_snd)))
+               return (ENODEV);
+
+       pif = priq_alloc(ifp, M_WAITOK, TRUE);
+       if (pif == NULL)
+               return (ENOMEM);
+
+       /* keep the state in pf_altq */
+       a->altq_disc = pif;
+
+       return (0);
+}
+
+int
+altq_priq_remove(struct pf_altq *a)
+{
+       struct priq_if *pif;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((pif = a->altq_disc) == NULL)
+               return (EINVAL);
+       a->altq_disc = NULL;
+
+       return (priq_destroy(pif));
+}
+
+int
+altq_priq_add_queue(struct pf_altq *a)
+{
+       struct priq_if *pif;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((pif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(pif->pif_ifq);
+       err = priq_add_queue(pif, a->priority, a->qlimit,
+           a->pq_u.priq_opts.flags, a->qid, NULL);
+       IFCQ_UNLOCK(pif->pif_ifq);
+
+       return (err);
+}
+
+int
+altq_priq_remove_queue(struct pf_altq *a)
+{
+       struct priq_if *pif;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((pif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(pif->pif_ifq);
+       err = priq_remove_queue(pif, a->qid);
+       IFCQ_UNLOCK(pif->pif_ifq);
+
+       return (err);
+}
+
+int
+altq_priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+       struct ifclassq *ifq = NULL;
+       struct priq_if *pif;
+       struct priq_classstats stats;
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((unsigned)*nbytes < sizeof (stats))
+               return (EINVAL);
+
+       if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
+               return (EBADF);
+
+       ifq = pif->pif_ifq;
+       IFCQ_LOCK_ASSERT_HELD(ifq);     /* lock held by altq_lookup */
+       error = priq_get_class_stats(pif, a->qid, &stats);
+       IFCQ_UNLOCK(ifq);
+       if (error != 0)
+               return (error);
+
+       if ((error = copyout((caddr_t)&stats, (user_addr_t)(uintptr_t)ubuf,
+           sizeof (stats))) != 0)
+               return (error);
+
+       *nbytes = sizeof (stats);
+
+       return (0);
+}
+
+static int
+altq_priq_request(struct ifaltq *altq, enum altrq req, void *arg)
+{
+       struct priq_if  *pif = (struct priq_if *)altq->altq_disc;
+
+       switch (req) {
+       case ALTRQ_PURGE:
+               priq_purge(pif);
+               break;
+
+       case ALTRQ_PURGE_SC:
+       case ALTRQ_THROTTLE:
+               /* not supported for ALTQ instance */
+               break;
+
+       case ALTRQ_EVENT:
+               priq_event(pif, (cqev_t)arg);
+               break;
+       }
+       return (0);
+}
+
+/*
+ * altq_priq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+altq_priq_enqueue(struct ifaltq *altq, struct mbuf *m)
+{
+       /* grab class set by classifier */
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               printf("%s: packet for %s does not have pkthdr\n", __func__,
+                   if_name(altq->altq_ifcq->ifcq_ifp));
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       return (priq_enqueue(altq->altq_disc, NULL, m, m_pftag(m)));
+}
+
+/*
+ * altq_priq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *     ALTDQ_REMOVE must return the same packet if called immediately
+ *     after ALTDQ_POLL.
+ */
+static struct mbuf *
+altq_priq_dequeue(struct ifaltq *altq, enum altdq_op op)
+{
+       return (priq_dequeue(altq->altq_disc, (cqdq_op_t)op));
+}
+#endif /* PF_ALTQ && PKTSCHED_PRIQ */
diff --git a/bsd/net/altq/altq_priq.h b/bsd/net/altq/altq_priq.h
new file mode 100644 (file)
index 0000000..f6b6372
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_priq.h,v 1.7 2006/10/12 19:59:08 peter Exp $      */
+/*     $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $  */
+/*
+ * Copyright (C) 2000-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_ALTQ_ALTQ_PRIQ_H_
+#define        _NET_ALTQ_ALTQ_PRIQ_H_
+
+#include <net/pfvar.h>
+#include <net/altq/altq.h>
+#include <net/pktsched/pktsched_priq.h>
+
+#ifdef BSD_KERNEL_PRIVATE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int     altq_priq_pfattach(struct pf_altq *);
+extern int     altq_priq_add(struct pf_altq *);
+extern int     altq_priq_remove(struct pf_altq *);
+extern int     altq_priq_add_queue(struct pf_altq *);
+extern int     altq_priq_remove_queue(struct pf_altq *);
+extern int     altq_priq_getqstats(struct pf_altq *, void *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _NET_ALTQ_ALTQ_PRIQ_H_ */
diff --git a/bsd/net/altq/altq_qfq.c b/bsd/net/altq/altq_qfq.c
new file mode 100644 (file)
index 0000000..d45437e
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * quick fair queueing
+ */
+
+#if PF_ALTQ
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+#include <net/net_osdep.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_qfq.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+static int altq_qfq_enqueue(struct ifaltq *, struct mbuf *);
+static struct mbuf *altq_qfq_dequeue(struct ifaltq *, enum altdq_op);
+static int altq_qfq_request(struct ifaltq *, enum altrq, void *);
+
+int
+altq_qfq_pfattach(struct pf_altq *a)
+{
+       struct ifnet *ifp;
+       int error;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(&ifp->if_snd);
+       error = altq_attach(IFCQ_ALTQ(&ifp->if_snd), ALTQT_QFQ, a->altq_disc,
+           altq_qfq_enqueue, altq_qfq_dequeue, NULL, altq_qfq_request);
+       IFCQ_UNLOCK(&ifp->if_snd);
+
+       return (error);
+}
+
+int
+altq_qfq_add(struct pf_altq *a)
+{
+       struct qfq_if   *qif;
+       struct ifnet    *ifp;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL)
+               return (EINVAL);
+       if (!ALTQ_IS_READY(IFCQ_ALTQ(&ifp->if_snd)))
+               return (ENODEV);
+
+       qif = qfq_alloc(ifp, M_WAITOK, TRUE);
+       if (qif == NULL)
+               return (ENOMEM);
+
+       /* keep the state in pf_altq */
+       a->altq_disc = qif;
+
+       return (0);
+}
+
+int
+altq_qfq_remove(struct pf_altq *a)
+{
+       struct qfq_if *qif;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((qif = a->altq_disc) == NULL)
+               return (EINVAL);
+       a->altq_disc = NULL;
+
+       return (qfq_destroy(qif));
+}
+
+int
+altq_qfq_add_queue(struct pf_altq *a)
+{
+       struct qfq_if *qif;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((qif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(qif->qif_ifq);
+       err = qfq_add_queue(qif, a->qlimit, a->weight, a->pq_u.qfq_opts.lmax,
+           a->pq_u.qfq_opts.flags, a->qid, NULL);
+       IFCQ_UNLOCK(qif->qif_ifq);
+
+       return (err);
+}
+
+int
+altq_qfq_remove_queue(struct pf_altq *a)
+{
+       struct qfq_if *qif;
+       int err;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((qif = a->altq_disc) == NULL)
+               return (EINVAL);
+
+       IFCQ_LOCK(qif->qif_ifq);
+       err = qfq_remove_queue(qif, a->qid);
+       IFCQ_UNLOCK(qif->qif_ifq);
+
+       return (err);
+}
+
+int
+altq_qfq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+       struct ifclassq *ifq = NULL;
+       struct qfq_if *qif;
+       struct qfq_classstats stats;
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((unsigned)*nbytes < sizeof (stats))
+               return (EINVAL);
+
+       if ((qif = altq_lookup(a->ifname, ALTQT_QFQ)) == NULL)
+               return (EBADF);
+
+       ifq = qif->qif_ifq;
+       IFCQ_LOCK_ASSERT_HELD(ifq);     /* lock held by altq_lookup */
+       error = qfq_get_class_stats(qif, a->qid, &stats);
+       IFCQ_UNLOCK(ifq);
+       if (error != 0)
+               return (error);
+
+       if ((error = copyout((caddr_t)&stats, (user_addr_t)(uintptr_t)ubuf,
+           sizeof (stats))) != 0)
+               return (error);
+
+       *nbytes = sizeof (stats);
+
+       return (0);
+}
+
+static int
+altq_qfq_request(struct ifaltq *altq, enum altrq req, void *arg)
+{
+       struct qfq_if   *qif = (struct qfq_if *)altq->altq_disc;
+
+       switch (req) {
+       case ALTRQ_PURGE:
+               qfq_purge(qif);
+               break;
+
+       case ALTRQ_PURGE_SC:
+               /* not supported for ALTQ instance */
+               break;
+
+       case ALTRQ_EVENT:
+               qfq_event(qif, (cqev_t)arg);
+               break;
+       }
+       return (0);
+}
+
+/*
+ * altq_qfq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+altq_qfq_enqueue(struct ifaltq *altq, struct mbuf *m)
+{
+       /* grab class set by classifier */
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               printf("%s: packet for %s does not have pkthdr\n", __func__,
+                   if_name(altq->altq_ifcq->ifcq_ifp));
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       return (qfq_enqueue(altq->altq_disc, NULL, m, m_pftag(m)));
+}
+
+/*
+ * altq_qfq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
+ *     ALTDQ_REMOVE must return the same packet if called immediately
+ *     after ALTDQ_POLL.
+ */
+static struct mbuf *
+altq_qfq_dequeue(struct ifaltq *altq, enum altdq_op op)
+{
+       return (qfq_dequeue(altq->altq_disc, (cqdq_op_t)op));
+}
+#endif /* PF_ALTQ */
diff --git a/bsd/net/altq/altq_qfq.h b/bsd/net/altq/altq_qfq.h
new file mode 100644 (file)
index 0000000..7907422
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _NET_ALTQ_ALTQ_QFQ_H_
+#define        _NET_ALTQ_ALTQ_QFQ_H_
+
+#include <net/pfvar.h>
+#include <net/altq/altq.h>
+#include <net/pktsched/pktsched_qfq.h>
+
+#ifdef BSD_KERNEL_PRIVATE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int     altq_qfq_pfattach(struct pf_altq *);
+extern int     altq_qfq_add(struct pf_altq *);
+extern int     altq_qfq_remove(struct pf_altq *);
+extern int     altq_qfq_add_queue(struct pf_altq *);
+extern int     altq_qfq_remove_queue(struct pf_altq *);
+extern int     altq_qfq_getqstats(struct pf_altq *, void *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _NET_ALTQ_ALTQ_QFQ_H_ */
diff --git a/bsd/net/altq/altq_subr.c b/bsd/net/altq/altq_subr.c
new file mode 100644 (file)
index 0000000..5b00e6f
--- /dev/null
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_subr.c,v 1.24 2007/12/11 00:30:14 mikeb Exp $    */
+/*     $KAME: altq_subr.c,v 1.11 2002/01/11 08:11:49 kjc Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+#include <sys/mcache.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/pfvar.h>
+#include <net/altq/altq.h>
+#include <net/pktsched/pktsched.h>
+
+#include <pexpert/pexpert.h>
+
+SYSCTL_NODE(_net, OID_AUTO, altq, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "ALTQ");
+
+static u_int32_t altq_debug;
+SYSCTL_UINT(_net_altq, OID_AUTO, debug, CTLFLAG_RW, &altq_debug, 0,
+    "Enable ALTQ debugging");
+
+/*
+ * look up the queue state by the interface name and the queueing type;
+ * upon success, returns with the interface send queue lock held, and
+ * the caller is responsible for releasing it.
+ */
+void *
+altq_lookup(char *name, u_int32_t type)
+{
+       struct ifnet *ifp;
+       void *state = NULL;
+
+       if ((ifp = ifunit(name)) != NULL) {
+               IFCQ_LOCK(&ifp->if_snd);
+               if (type != ALTQT_NONE &&
+                   IFCQ_ALTQ(&ifp->if_snd)->altq_type == type)
+                       state = IFCQ_ALTQ(&ifp->if_snd)->altq_disc;
+               if (state == NULL)
+                       IFCQ_UNLOCK(&ifp->if_snd);
+       }
+
+       if (state != NULL)
+               IFCQ_LOCK_ASSERT_HELD(&ifp->if_snd);
+
+       return (state);
+}
+
+int
+altq_attach(struct ifaltq *altq, u_int32_t type, void *discipline,
+    altq_enq_func enqueue, altq_deq_func dequeue,
+    altq_deq_sc_func dequeue_sc, altq_req_func request)
+{
+       IFCQ_LOCK_ASSERT_HELD(altq->altq_ifcq);
+
+       if (!ALTQ_IS_READY(altq))
+               return (ENXIO);
+
+       VERIFY(enqueue != NULL);
+       VERIFY(!(dequeue != NULL && dequeue_sc != NULL));
+       VERIFY(request != NULL);
+
+       altq->altq_type = type;
+       altq->altq_disc = discipline;
+       altq->altq_enqueue = enqueue;
+       altq->altq_dequeue = dequeue;
+       altq->altq_dequeue_sc = dequeue_sc;
+       altq->altq_request = request;
+       altq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
+
+       return (0);
+}
+
+int
+altq_detach(struct ifaltq *altq)
+{
+       IFCQ_LOCK_ASSERT_HELD(altq->altq_ifcq);
+
+       if (!ALTQ_IS_READY(altq))
+               return (ENXIO);
+       if (ALTQ_IS_ENABLED(altq))
+               return (EBUSY);
+       if (!ALTQ_IS_ATTACHED(altq))
+               return (0);
+
+       altq->altq_type = ALTQT_NONE;
+       altq->altq_disc = NULL;
+       altq->altq_enqueue = NULL;
+       altq->altq_dequeue = NULL;
+       altq->altq_dequeue_sc = NULL;
+       altq->altq_request = NULL;
+       altq->altq_flags &= ALTQF_CANTCHANGE;
+
+       return (0);
+}
+
+int
+altq_enable(struct ifaltq *altq)
+{
+       struct ifclassq *ifq = altq->altq_ifcq;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!ALTQ_IS_READY(altq))
+               return (ENXIO);
+       if (ALTQ_IS_ENABLED(altq))
+               return (0);
+
+       altq->altq_flags |= ALTQF_ENABLED;
+
+       return (0);
+}
+
+int
+altq_disable(struct ifaltq *altq)
+{
+       struct ifclassq *ifq = altq->altq_ifcq;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!ALTQ_IS_ENABLED(altq))
+               return (0);
+
+       if_qflush(ifq->ifcq_ifp, 1);
+
+       altq->altq_flags &= ~ALTQF_ENABLED;
+
+       return (0);
+}
+
+/*
+ * add a discipline or a queue
+ */
+int
+altq_add(struct pf_altq *a)
+{
+       int error = 0;
+
+       VERIFY(machclk_freq != 0);
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if (a->qname[0] != 0)
+               return (altq_add_queue(a));
+
+       switch (a->scheduler) {
+#if PKTSCHED_CBQ
+       case ALTQT_CBQ:
+               error = altq_cbq_add(a);
+               break;
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_PRIQ
+       case ALTQT_PRIQ:
+               error = altq_priq_add(a);
+               break;
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_HFSC
+       case ALTQT_HFSC:
+               error = altq_hfsc_add(a);
+               break;
+#endif /* PKTSCHED_HFSC */
+#if PKTSCHED_FAIRQ
+        case ALTQT_FAIRQ:
+                error = altq_fairq_add(a);
+                break;
+#endif /* PKTSCHED_FAIRQ */
+        case ALTQT_QFQ:
+                error = altq_qfq_add(a);
+                break;
+       default:
+               error = ENXIO;
+       }
+
+       return (error);
+}
+
+/*
+ * remove a discipline or a queue
+ */
+int
+altq_remove(struct pf_altq *a)
+{
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if (a->qname[0] != 0)
+               return (altq_remove_queue(a));
+
+       switch (a->scheduler) {
+#if PKTSCHED_CBQ
+       case ALTQT_CBQ:
+               error = altq_cbq_remove(a);
+               break;
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_PRIQ
+       case ALTQT_PRIQ:
+               error = altq_priq_remove(a);
+               break;
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_HFSC
+       case ALTQT_HFSC:
+               error = altq_hfsc_remove(a);
+               break;
+#endif /* PKTSCHED_HFSC */
+#if PKTSCHED_FAIRQ
+        case ALTQT_FAIRQ:
+                error = altq_fairq_remove(a);
+                break;
+#endif /* PKTSCHED_FAIRQ */
+        case ALTQT_QFQ:
+                error = altq_qfq_remove(a);
+                break;
+       default:
+               error = ENXIO;
+       }
+
+       return (error);
+}
+
+/*
+ * add a queue to the discipline
+ */
+int
+altq_add_queue(struct pf_altq *a)
+{
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       switch (a->scheduler) {
+#if PKTSCHED_CBQ
+       case ALTQT_CBQ:
+               error = altq_cbq_add_queue(a);
+               break;
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_PRIQ
+       case ALTQT_PRIQ:
+               error = altq_priq_add_queue(a);
+               break;
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_HFSC
+       case ALTQT_HFSC:
+               error = altq_hfsc_add_queue(a);
+               break;
+#endif /* PKTSCHED_HFSC */
+#if PKTSCHED_FAIRQ
+        case ALTQT_FAIRQ:
+                error = altq_fairq_add_queue(a);
+                break;
+#endif /* PKTSCHED_FAIRQ */
+        case ALTQT_QFQ:
+                error = altq_qfq_add_queue(a);
+                break;
+       default:
+               error = ENXIO;
+       }
+
+       return (error);
+}
+
+/*
+ * remove a queue from the discipline
+ */
+int
+altq_remove_queue(struct pf_altq *a)
+{
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       switch (a->scheduler) {
+#if PKTSCHED_CBQ
+       case ALTQT_CBQ:
+               error = altq_cbq_remove_queue(a);
+               break;
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_PRIQ
+       case ALTQT_PRIQ:
+               error = altq_priq_remove_queue(a);
+               break;
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_HFSC
+       case ALTQT_HFSC:
+               error = altq_hfsc_remove_queue(a);
+               break;
+#endif /* PKTSCHED_HFSC */
+#if PKTSCHED_FAIRQ
+        case ALTQT_FAIRQ:
+                error = altq_fairq_remove_queue(a);
+                break;
+#endif /* PKTSCHED_FAIRQ */
+        case ALTQT_QFQ:
+                error = altq_qfq_remove_queue(a);
+                break;
+       default:
+               error = ENXIO;
+       }
+
+       return (error);
+}
+
+/*
+ * get queue statistics
+ */
+int
+altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       switch (a->scheduler) {
+#if PKTSCHED_CBQ
+       case ALTQT_CBQ:
+               error = altq_cbq_getqstats(a, ubuf, nbytes);
+               break;
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_PRIQ
+       case ALTQT_PRIQ:
+               error = altq_priq_getqstats(a, ubuf, nbytes);
+               break;
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_HFSC
+       case ALTQT_HFSC:
+               error = altq_hfsc_getqstats(a, ubuf, nbytes);
+               break;
+#endif /* PKTSCHED_HFSC */
+#if PKTSCHED_FAIRQ
+        case ALTQT_FAIRQ:
+                error = altq_fairq_getqstats(a, ubuf, nbytes);
+                break;
+#endif /* PKTSCHED_FAIRQ */
+        case ALTQT_QFQ:
+                error = altq_qfq_getqstats(a, ubuf, nbytes);
+                break;
+       default:
+               error = ENXIO;
+       }
+
+       return (error);
+}
+
+/*
+ * attach a discipline to the interface.  if one already exists, it is
+ * overridden.
+ */
+int
+altq_pfattach(struct pf_altq *a)
+{
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       switch (a->scheduler) {
+       case ALTQT_NONE:
+               break;
+#if PKTSCHED_CBQ
+       case ALTQT_CBQ:
+               error = altq_cbq_pfattach(a);
+               break;
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_PRIQ
+       case ALTQT_PRIQ:
+               error = altq_priq_pfattach(a);
+               break;
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_HFSC
+       case ALTQT_HFSC:
+               error = altq_hfsc_pfattach(a);
+               break;
+#endif /* PKTSCHED_HFSC */
+#if PKTSCHED_FAIRQ
+       case ALTQT_FAIRQ:
+               error = altq_fairq_pfattach(a);
+               break;
+#endif /* PKTSCHED_FAIRQ */
+       case ALTQT_QFQ:
+               error = altq_qfq_pfattach(a);
+               break;
+       default:
+               error = ENXIO;
+       }
+
+       return (error);
+}
+
+/*
+ * detach a discipline from the interface.
+ * it is possible that the discipline was already overridden by another
+ * discipline.
+ */
+int
+altq_pfdetach(struct pf_altq *a)
+{
+       struct ifnet *ifp;
+       int error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if ((ifp = ifunit(a->ifname)) == NULL)
+               return (EINVAL);
+
+       /* if this discipline is no longer referenced, just return */
+       IFCQ_LOCK(&ifp->if_snd);
+       if (a->altq_disc == NULL ||
+           a->altq_disc != IFCQ_ALTQ(&ifp->if_snd)->altq_disc) {
+               IFCQ_UNLOCK(&ifp->if_snd);
+               return (0);
+       }
+
+       if (ALTQ_IS_ENABLED(IFCQ_ALTQ(&ifp->if_snd)))
+               error = altq_disable(IFCQ_ALTQ(&ifp->if_snd));
+       if (error == 0)
+               error = altq_detach(IFCQ_ALTQ(&ifp->if_snd));
+       IFCQ_UNLOCK(&ifp->if_snd);
+       return (error);
+}
+
+
diff --git a/bsd/net/altq/altq_var.h b/bsd/net/altq/altq_var.h
new file mode 100644 (file)
index 0000000..e866a4d
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_var.h,v 1.10 2006/10/15 13:17:13 peter Exp $      */
+/*     $KAME: altq_var.h,v 1.18 2005/04/13 03:44:25 suz Exp $  */
+
+/*
+ * Copyright (C) 1998-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _NET_ALTQ_ALTQ_VAR_H_
+#define        _NET_ALTQ_ALTQ_VAR_H_
+
+#ifdef BSD_KERNEL_PRIVATE
+#if PF_ALTQ
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <net/pktsched/pktsched.h>
+#include <net/classq/classq.h>
+#include <net/altq/if_altq.h>
+#if PKTSCHED_HFSC
+#include <net/altq/altq_hfsc.h>
+#endif /* PKTSCHED_HFSC */
+#if PKTSCHED_FAIRQ
+#include <net/altq/altq_fairq.h>
+#endif /* PKTSCHED_FAIRQ */
+#if PKTSCHED_CBQ
+#include <net/altq/altq_cbq.h>
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_PRIQ
+#include <net/altq/altq_priq.h>
+#endif /* PKTSCHED_PRIQ */
+#include <net/altq/altq_qfq.h>
+
+struct pf_altq;
+
+extern void    *altq_lookup(char *, u_int32_t);
+extern int     altq_pfattach(struct pf_altq *);
+extern int     altq_pfdetach(struct pf_altq *);
+extern int     altq_add(struct pf_altq *);
+extern int     altq_remove(struct pf_altq *);
+extern int     altq_add_queue(struct pf_altq *);
+extern int     altq_remove_queue(struct pf_altq *);
+extern int     altq_getqstats(struct pf_altq *, void *, int *);
+
+#endif /* PF_ALTQ */
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _NET_ALTQ_ALTQ_VAR_H_ */
diff --git a/bsd/net/altq/if_altq.h b/bsd/net/altq/if_altq.h
new file mode 100644 (file)
index 0000000..6d634cf
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*     $OpenBSD: if_altq.h,v 1.11 2007/11/18 12:51:48 mpf Exp $        */
+/*     $KAME: if_altq.h,v 1.6 2001/01/29 19:59:09 itojun Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _NET_ALTQ_IF_ALTQ_H_
+#define        _NET_ALTQ_IF_ALTQ_H_
+
+#ifdef BSD_KERNEL_PRIVATE
+#if PF_ALTQ
+#include <net/classq/if_classq.h>
+
+/* altq request types */
+typedef enum altrq {
+       ALTRQ_PURGE =           CLASSQRQ_PURGE,         /* purge all packets */
+       ALTRQ_PURGE_SC =        CLASSQRQ_PURGE_SC,      /* purge SC flow */
+       ALTRQ_EVENT =           CLASSQRQ_EVENT,         /* interface events */
+       ALTRQ_THROTTLE =        CLASSQRQ_THROTTLE,      /* throttle packets */
+} altrq_t;
+
+struct ifaltq;
+enum altdq_op;
+
+typedef        int (*altq_enq_func)(struct ifaltq *, struct mbuf *);
+typedef        struct mbuf *(*altq_deq_func)(struct ifaltq *, enum altdq_op);
+typedef        struct mbuf *(*altq_deq_sc_func)(struct ifaltq *,
+    mbuf_svc_class_t, enum altdq_op);
+typedef        int (*altq_req_func)(struct ifaltq *, enum altrq, void *);
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifaltq {
+       struct ifclassq *altq_ifcq;     /* back pointer to interface queue */
+
+       /* alternate queueing related fields */
+       u_int32_t       altq_type;      /* discipline type */
+       u_int32_t       altq_flags;     /* flags (e.g. ready, in-use) */
+       void            *altq_disc;     /* for discipline-specific use */
+
+       altq_enq_func   altq_enqueue;
+       altq_deq_func   altq_dequeue;
+       altq_deq_sc_func altq_dequeue_sc;
+       altq_req_func   altq_request;
+};
+
+/* altq_flags */
+#define        ALTQF_READY      0x01   /* driver supports alternate queueing */
+#define        ALTQF_ENABLED    0x02   /* altq is in use */
+#define        ALTQF_DRIVER1    0x40   /* driver specific */
+
+/* altq_flags set internally only: */
+#define        ALTQF_CANTCHANGE        (ALTQF_READY)
+
+/* altq_dequeue op arg */
+typedef enum altdq_op {
+       ALTDQ_REMOVE = CLASSQDQ_REMOVE, /* dequeue mbuf from the queue */
+       ALTDQ_POLL = CLASSQDQ_POLL,     /* don't dequeue mbuf from the queue */
+} altdq_op_t;
+
+#define        ALTQ_IS_READY(_altq)            ((_altq)->altq_flags & ALTQF_READY)
+#define        ALTQ_IS_ENABLED(_altq)          ((_altq)->altq_flags & ALTQF_ENABLED)
+#define        ALTQ_IS_ATTACHED(_altq)         ((_altq)->altq_disc != NULL)
+
+#define        ALTQ_ENQUEUE(_altq, _m, _err) do {                              \
+       (_err) = (*(_altq)->altq_enqueue)(_altq, _m);                   \
+} while (0)
+
+#define        ALTQ_DEQUEUE(_altq, _m) do {                                    \
+       (_m) = (*(_altq)->altq_dequeue)(_altq, ALTDQ_REMOVE);           \
+} while (0)
+
+#define        ALTQ_DEQUEUE_SC(_altq, _sc, _m) do {                            \
+       (_m) = (*(_altq)->altq_dequeue_sc)(_altq, _sc, ALTDQ_REMOVE);   \
+} while (0)
+
+#define        ALTQ_POLL(_altq, _m) do {                                       \
+       (_m) = (*(_altq)->altq_dequeue)(_altq, ALTDQ_POLL);             \
+} while (0)
+
+#define        ALTQ_POLL_SC(_altq, _sc, _m) do {                               \
+       (_m) = (*(_altq)->altq_dequeue_sc)(_altq, _sc, ALTDQ_POLL);     \
+} while (0)
+
+#define        ALTQ_PURGE(_altq) do {                                          \
+       (void) (*(_altq)->altq_request)(_altq, ALTRQ_PURGE, NULL);      \
+} while (0)
+
+#define        ALTQ_PURGE_SC(_altq, _sc, _flow, _packets, _bytes) do {         \
+       cqrq_purge_sc_t _req = { _sc, _flow, 0, 0 };                    \
+       (void) (*(_altq)->altq_request)(_altq, ALTRQ_PURGE_SC, &_req);  \
+       (_packets) = _req.packets;                                      \
+       (_bytes) = _req.bytes;                                          \
+} while (0)
+
+#define        ALTQ_UPDATE(_altq, _ev) do {                                    \
+       (void) (*(_altq)->altq_request)(_altq, ALTRQ_EVENT,             \
+           (void *)(_ev));                                             \
+} while (0)
+
+#define        ALTQ_SET_READY(_altq) do {                                      \
+       IFCQ_LOCK_ASSERT_HELD((_altq)->altq_ifcq);                      \
+       (_altq)->altq_flags |= ALTQF_READY;                             \
+} while (0)
+
+#define        ALTQ_CLEAR_READY(_altq) do {                                    \
+       IFCQ_LOCK_ASSERT_HELD((_altq)->altq_ifcq);                      \
+       (_altq)->altq_flags &= ~ALTQF_READY;                            \
+} while (0)
+
+extern int altq_attach(struct ifaltq *, u_int32_t, void *,
+    altq_enq_func, altq_deq_func, altq_deq_sc_func, altq_req_func);
+extern int altq_detach(struct ifaltq *);
+extern int altq_enable(struct ifaltq *);
+extern int altq_disable(struct ifaltq *);
+#endif /* PF_ALTQ */
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _NET_ALTQ_IF_ALTQ_H_ */
index e370dfc5ed2d12bbb4c1aad57c0ebc14cc8f150f..b1ac5f1e84801feeb75adecaa7be22a895a21f10 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/file_internal.h>
 #include <sys/event.h>
 
-#if defined(sparc) && BSD < 199103
-#include <sys/stream.h>
-#endif
 #include <sys/poll.h>
 
 #include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/vnode.h>
 
 #include <net/if.h>
 #include <net/bpfdesc.h>
 
 #include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 #include <netinet/if_ether.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 extern int tvtohz(struct timeval *);
 
-/*
- * Older BSDs don't have kernel malloc.
- */
-#if BSD < 199103
-extern bcopy();
-static caddr_t bpf_alloc();
-#include <net/bpf_compat.h>
-#define BPF_BUFSIZE (MCLBYTES-8)
-#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio)
-#else
 #define BPF_BUFSIZE 4096
 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
-#endif
 
 
 #define PRINET  26                     /* interruptible */
@@ -177,7 +171,8 @@ static struct bpf_d **bpf_dtab = NULL;
 static unsigned int bpf_dtab_size = 0;
 static unsigned int    nbpfilter = 0;
 
-static lck_mtx_t               *bpf_mlock;
+decl_lck_mtx_data(static, bpf_mlock_data);
+static lck_mtx_t               *bpf_mlock = &bpf_mlock_data;
 static lck_grp_t               *bpf_mlock_grp;
 static lck_grp_attr_t  *bpf_mlock_grp_attr;
 static lck_attr_t              *bpf_mlock_attr;
@@ -199,13 +194,14 @@ static int        bpf_movein(struct uio *, int,
 static int     bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt);
 static void bpf_timed_out(void *, void *);
 static void bpf_wakeup(struct bpf_d *);
-static void    catchpacket(struct bpf_d *, u_char *, u_int,
-                   u_int, void (*)(const void *, void *, size_t));
+static void    catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int,
+                   u_int, int, void (*)(const void *, void *, size_t));
 static void    reset_d(struct bpf_d *);
 static int bpf_setf(struct bpf_d *, u_int bf_len, user_addr_t bf_insns);
-static int     bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *,
-    struct proc *);
+static int     bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
 static int     bpf_setdlt(struct bpf_d *, u_int);
+static int     bpf_set_traffic_class(struct bpf_d *, int);
+static void    bpf_set_packet_service_class(struct mbuf *, int);
 
 /*static  void *bpf_devfs_token[MAXBPFILTER];*/
 
@@ -222,8 +218,8 @@ static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
        d_close_t           bpfclose;
        d_read_t            bpfread;
        d_write_t           bpfwrite;
-    ioctl_fcn_t            bpfioctl;
-    select_fcn_t       bpfselect;
+       ioctl_fcn_t         bpfioctl;
+       select_fcn_t        bpfselect;
 
 
 /* Darwin's cdevsw struct differs slightly from BSDs */
@@ -234,15 +230,15 @@ static struct cdevsw bpf_cdevsw = {
        /* read */          bpfread,
        /* write */         bpfwrite,
        /* ioctl */         bpfioctl,
-       /* stop */              eno_stop,
-       /* reset */             eno_reset,
-       /* tty */               NULL,
-       /* select */    bpfselect,
-       /* mmap */              eno_mmap,
-       /* strategy*/   eno_strat,
-       /* getc */              eno_getc,
-       /* putc */              eno_putc,
-       /* type */              0
+       /* stop */          eno_stop,
+       /* reset */         eno_reset,
+       /* tty */           NULL,
+       /* select */        bpfselect,
+       /* mmap */          eno_mmap,
+       /* strategy*/       eno_strat,
+       /* getc */          eno_getc,
+       /* putc */          eno_putc,
+       /* type */          0
 };
 
 #define SOCKADDR_HDR_LEN          offsetof(struct sockaddr, sa_data)
@@ -316,7 +312,7 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc
                sa_family = AF_IEEE80211;
                hlen = 0;
                break;
-       
+
        case DLT_IEEE802_11_RADIO:
                sa_family = AF_IEEE80211;
                hlen = 0;
@@ -360,13 +356,8 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc
        if (m == 0)
                return (ENOBUFS);
        if ((unsigned)len > MHLEN) {
-#if BSD >= 199103
                MCLGET(m, M_WAIT);
                if ((m->m_flags & M_EXT) == 0) {
-#else
-               MCLGET(m);
-               if (m->m_len != MCLBYTES) {
-#endif
                        error = ENOBUFS;
                        goto bad;
                }
@@ -381,11 +372,7 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc
        if (hlen != 0) {
                m->m_pkthdr.len -= hlen;
                m->m_len -= hlen;
-#if BSD >= 199103
                m->m_data += hlen; /* XXX */
-#else
-               m->m_off += hlen;
-#endif
                error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
                if (error)
                        goto bad;
@@ -691,7 +678,8 @@ bpfopen(dev_t dev, int flags, __unused int fmt,
        d->bd_seesent = 1;
        d->bd_oflags = flags;
        d->bd_state = BPF_IDLE;
-    d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
+       d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
+       d->bd_traffic_class = SO_TC_BE;
 
        if (d->bd_thread_call == NULL) {
                printf("bpfopen: malloc thread call failed\n");
@@ -817,6 +805,26 @@ bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
        return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
 }
 
+static struct inpcb *
+bpf_findinpcb(struct inpcbinfo *pcbinfo, uint32_t flowhash)
+{
+       struct inpcb *inp = NULL;
+
+       if (!flowhash) return (NULL);
+
+       lck_rw_lock_shared(pcbinfo->mtx);
+       LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
+               if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
+                       if (inp->inp_flowhash == flowhash)
+                               break;
+                       in_pcb_checkstate(inp, WNT_RELEASE, 0);
+               }
+       }
+       lck_rw_done(pcbinfo->mtx);
+
+       return (inp);
+}
+
 /*
  * Rotate the packet buffers in descriptor d.  Move the store buffer
  * into the hold slot, and the free buffer into the store slot.
@@ -936,6 +944,39 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
         * At this point, we know we have something in the hold slot.
         */
 
+       /*
+        * Before we move data to userland, we fill out the extended
+        * header fields.
+        */
+       if (d->bd_extendedhdr) {
+               char *p;
+
+               p = d->bd_hbuf;
+               while (p < d->bd_hbuf + d->bd_hlen) {
+                       struct bpf_hdr_ext *ehp;
+                       struct inpcb *inp;
+                       uint32_t flowhash;
+                       pid_t pid;
+
+                       ehp = (struct bpf_hdr_ext *)(void *)p;
+                       if ((flowhash = ehp->bh_flowhash)) {
+                               if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP)
+                                       inp = bpf_findinpcb(&tcbinfo, flowhash);
+                               else
+                                       inp = bpf_findinpcb(&udbinfo, flowhash);
+                               if (inp) {
+                                       socket_lock(inp->inp_socket, 0);
+                                       pid = inp->inp_socket->last_pid;
+                                       in_pcb_checkstate(inp, WNT_RELEASE, 1);
+                                       socket_unlock(inp->inp_socket, 0);
+                                       ehp->bh_pid = pid;
+                                       proc_name(pid, ehp->bh_comm, MAXCOMLEN);
+                               }
+                               ehp->bh_flowhash = 0;
+                       }
+                       p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
+               }
+       }
        /*
         * Move data from hold buffer into user space.
         * We know the entire buffer is transferred since
@@ -965,20 +1006,12 @@ bpf_wakeup(struct bpf_d *d)
        if (d->bd_async && d->bd_sig && d->bd_sigio)
                pgsigio(d->bd_sigio, d->bd_sig);
 
-#if BSD >= 199103
        selwakeup(&d->bd_sel);
        KNOTE(&d->bd_sel.si_note, 1);
 #ifndef __APPLE__
        /* XXX */
        d->bd_sel.si_pid = 0;
 #endif
-#else
-       if (d->bd_selproc) {
-               selwakeup(d->bd_selproc, (int)d->bd_selcoll);
-               d->bd_selcoll = 0;
-               d->bd_selproc = 0;
-       }
-#endif
 }
 
 
@@ -1050,26 +1083,26 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
        }
        ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
 
-   /*
-    * fix for PR-6849527
-    * geting variables onto stack before dropping lock for bpf_movein()
-    */
-    bif_dlt = (int)d->bd_bif->bif_dlt;
-    bd_hdrcmplt  = d->bd_hdrcmplt;
+       /*
+        * fix for PR-6849527
+        * geting variables onto stack before dropping lock for bpf_movein()
+        */
+       bif_dlt = (int)d->bd_bif->bif_dlt;
+       bd_hdrcmplt  = d->bd_hdrcmplt;
+
        /* bpf_movein allocating mbufs; drop lock */
-    lck_mtx_unlock(bpf_mlock);
+       lck_mtx_unlock(bpf_mlock);
 
        error = bpf_movein(uio, bif_dlt, &m, 
-    bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
-    &datlen);
-       
-    if (error) {
+       bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
+       &datlen);
+
+       if (error) {
                return (error);
        }
 
        /* taking the lock again and verifying whether device is open */
-    lck_mtx_lock(bpf_mlock);
+       lck_mtx_lock(bpf_mlock);
        d = bpf_dtab[minor(dev)];
        if (d == 0 || d == (void *)1) {
                lck_mtx_unlock(bpf_mlock);
@@ -1093,16 +1126,19 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
 #if CONFIG_MACF_NET
        mac_mbuf_label_associate_bpfdesc(d, m);
 #endif
+
+       bpf_set_packet_service_class(m, d->bd_traffic_class);
+
        lck_mtx_unlock(bpf_mlock);
 
        if (d->bd_hdrcmplt) {
                if (d->bd_bif->bif_send)
                        error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
                else
-                       error = dlil_output(ifp, 0, m, NULL, NULL, 1);
-       }
-       else {
-               error = dlil_output(ifp, PF_INET, m, NULL, (struct sockaddr *)dst_buf, 0);
+                       error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
+       } else {
+               error = dlil_output(ifp, PF_INET, m, NULL,
+                   (struct sockaddr *)dst_buf, 0, NULL);
        }
 
        /*
@@ -1148,6 +1184,9 @@ reset_d(struct bpf_d *d)
  *  BIOCSHDRCMPLT      Set "header already complete" flag
  *  BIOCGSEESENT       Get "see packets sent" flag
  *  BIOCSSEESENT       Set "see packets sent" flag
+ *  BIOCSETTC          Set traffic class.
+ *  BIOCGETTC          Get traffic class.
+ *  BIOCSEXTHDR                Set "extended header" flag
  */
 /* ARGSUSED */
 int
@@ -1155,7 +1194,8 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
     struct proc *p)
 {
        struct bpf_d *d;
-       int error = 0;
+       int error = 0, int_arg;
+       struct ifreq ifr;
 
        lck_mtx_lock(bpf_mlock);
 
@@ -1178,7 +1218,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
        /*
         * Check for read packet available.
         */
-       case FIONREAD:
+       case FIONREAD:                  /* int */
                {
                        int n;
 
@@ -1186,11 +1226,11 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
                        if (d->bd_hbuf)
                                n += d->bd_hlen;
 
-                       *(int *)addr = n;
+                       bcopy(&n, addr, sizeof (n));
                        break;
                }
 
-       case SIOCGIFADDR:
+       case SIOCGIFADDR:               /* struct ifreq */
                {
                        struct ifnet *ifp;
 
@@ -1206,44 +1246,47 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
        /*
         * Get buffer len [for read()].
         */
-       case BIOCGBLEN:
-               *(u_int *)addr = d->bd_bufsize;
+       case BIOCGBLEN:                 /* u_int */
+               bcopy(&d->bd_bufsize, addr, sizeof (u_int));
                break;
 
        /*
         * Set buffer length.
         */
-       case BIOCSBLEN:
-#if BSD < 199103
-               error = EINVAL;
-#else
+       case BIOCSBLEN:                 /* u_int */
                if (d->bd_bif != 0)
                        error = EINVAL;
                else {
-                       u_int size = *(u_int *)addr;
+                       u_int size;
+
+                       bcopy(addr, &size, sizeof (size));
 
                        if (size > bpf_maxbufsize)
-                               *(u_int *)addr = size = bpf_maxbufsize;
+                               size = bpf_maxbufsize;
                        else if (size < BPF_MINBUFSIZE)
-                               *(u_int *)addr = size = BPF_MINBUFSIZE;
+                               size = BPF_MINBUFSIZE;
+                       bcopy(&size, addr, sizeof (size));
                        d->bd_bufsize = size;
                }
-#endif
                break;
 
        /*
         * Set link layer read filter.
         */
-       case BIOCSETF32: {
-               struct bpf_program32 *prg32 = (struct bpf_program32 *)addr;
-               error = bpf_setf(d, prg32->bf_len,
-                   CAST_USER_ADDR_T(prg32->bf_insns));
+       case BIOCSETF32: {              /* struct bpf_program32 */
+               struct bpf_program32 prg32;
+
+               bcopy(addr, &prg32, sizeof (prg32));
+               error = bpf_setf(d, prg32.bf_len,
+                   CAST_USER_ADDR_T(prg32.bf_insns));
                break;
        }
 
-       case BIOCSETF64: {
-               struct bpf_program64 *prg64 = (struct bpf_program64 *)addr;
-               error = bpf_setf(d, prg64->bf_len, prg64->bf_insns);
+       case BIOCSETF64: {              /* struct bpf_program64 */
+               struct bpf_program64 prg64;
+
+               bcopy(addr, &prg64, sizeof (prg64));
+               error = bpf_setf(d, prg64.bf_len, prg64.bf_insns);
                break;
        }
 
@@ -1277,56 +1320,62 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
        /*
         * Get device parameters.
         */
-       case BIOCGDLT:
+       case BIOCGDLT:                  /* u_int */
                if (d->bd_bif == 0)
                        error = EINVAL;
                else
-                       *(u_int *)addr = d->bd_bif->bif_dlt;
+                       bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
                break;
 
        /*
         * Get a list of supported data link types.
         */
-       case BIOCGDLTLIST:
+       case BIOCGDLTLIST:              /* struct bpf_dltlist */
                if (d->bd_bif == NULL) {
                        error = EINVAL;
                } else {
-                       error = bpf_getdltlist(d,
-                           (struct bpf_dltlist *)addr, p);
+                       error = bpf_getdltlist(d, addr, p);
                }
                break;
 
        /*
         * Set data link type.
         */
-       case BIOCSDLT:
-                       if (d->bd_bif == NULL)
-                                       error = EINVAL;
-                       else
-                                       error = bpf_setdlt(d, *(u_int *)addr);
-                       break;
+       case BIOCSDLT:                  /* u_int */
+               if (d->bd_bif == NULL) {
+                       error = EINVAL;
+               } else {
+                       u_int dlt;
+
+                       bcopy(addr, &dlt, sizeof (dlt));
+                       error = bpf_setdlt(d, dlt);
+               }
+               break;
 
        /*
         * Get interface name.
         */
-       case BIOCGETIF:
+       case BIOCGETIF:                 /* struct ifreq */
                if (d->bd_bif == 0)
                        error = EINVAL;
                else {
                        struct ifnet *const ifp = d->bd_bif->bif_ifp;
-                       struct ifreq *const ifr = (struct ifreq *)addr;
 
-                       snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
-                           "%s%d", ifp->if_name, ifp->if_unit);
+                       snprintf(((struct ifreq *)(void *)addr)->ifr_name,
+                           sizeof (ifr.ifr_name), "%s%d", ifp->if_name,
+                           ifp->if_unit);
                }
                break;
 
        /*
         * Set interface.
         */
-       case BIOCSETIF: {
+       case BIOCSETIF: {               /* struct ifreq */
                ifnet_t ifp;
-               ifp = ifunit(((struct ifreq *)addr)->ifr_name);
+
+               bcopy(addr, &ifr, sizeof (ifr));
+               ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+               ifp = ifunit(ifr.ifr_name);
                if (ifp == NULL)
                        error = ENXIO;
                else
@@ -1337,122 +1386,145 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
        /*
         * Set read timeout.
         */
-        case BIOCSRTIMEOUT32:
-                {
-                       struct user32_timeval *_tv = (struct user32_timeval *)addr;
-                       struct timeval tv;
+        case BIOCSRTIMEOUT32: {                /* struct user32_timeval */
+               struct user32_timeval _tv;
+               struct timeval tv;
 
-                       tv.tv_sec  = _tv->tv_sec;
-                       tv.tv_usec = _tv->tv_usec;
+               bcopy(addr, &_tv, sizeof (_tv));
+               tv.tv_sec  = _tv.tv_sec;
+               tv.tv_usec = _tv.tv_usec;
+
+               /*
+                * Subtract 1 tick from tvtohz() since this isn't
+                * a one-shot timer.
+                */
+               if ((error = itimerfix(&tv)) == 0)
+                       d->bd_rtout = tvtohz(&tv) - 1;
+               break;
+       }
+
+        case BIOCSRTIMEOUT64: {                /* struct user64_timeval */
+               struct user64_timeval _tv;
+               struct timeval tv;
+
+               bcopy(addr, &_tv, sizeof (_tv));
+               tv.tv_sec  = _tv.tv_sec;
+               tv.tv_usec = _tv.tv_usec;
+
+               /*
+                * Subtract 1 tick from tvtohz() since this isn't
+                * a one-shot timer.
+                */
+               if ((error = itimerfix(&tv)) == 0)
+                       d->bd_rtout = tvtohz(&tv) - 1;
+               break;
+       }
 
-                        /*
-                        * Subtract 1 tick from tvtohz() since this isn't
-                        * a one-shot timer.
-                        */
-                       if ((error = itimerfix(&tv)) == 0)
-                               d->bd_rtout = tvtohz(&tv) - 1;
-                       break;
-                }
-
-        case BIOCSRTIMEOUT64:
-                {
-                       struct user64_timeval *_tv = (struct user64_timeval *)addr;
-                       struct timeval tv;
-                        
-                       tv.tv_sec  = _tv->tv_sec;
-                       tv.tv_usec = _tv->tv_usec;
-                        
-                       /*
-                        * Subtract 1 tick from tvtohz() since this isn't
-                        * a one-shot timer.
-                        */
-                       if ((error = itimerfix(&tv)) == 0)
-                               d->bd_rtout = tvtohz(&tv) - 1;
-                       break;
-                }
-       
         /*
         * Get read timeout.
         */
-       case BIOCGRTIMEOUT32:
-               {
-                       struct user32_timeval *tv = (struct user32_timeval *)addr;
+       case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
+               struct user32_timeval tv;
 
-                       tv->tv_sec = d->bd_rtout / hz;
-                       tv->tv_usec = (d->bd_rtout % hz) * tick;
-                       break;
-                }
+               bzero(&tv, sizeof (tv));
+               tv.tv_sec = d->bd_rtout / hz;
+               tv.tv_usec = (d->bd_rtout % hz) * tick;
+               bcopy(&tv, addr, sizeof (tv));
+               break;
+       }
 
-       case BIOCGRTIMEOUT64:
-               {
-                       struct user64_timeval *tv = (struct user64_timeval *)addr;
+       case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
+               struct user64_timeval tv;
 
-                       tv->tv_sec = d->bd_rtout / hz;
-                       tv->tv_usec = (d->bd_rtout % hz) * tick;
-                       break;
-                }
+               bzero(&tv, sizeof (tv));
+               tv.tv_sec = d->bd_rtout / hz;
+               tv.tv_usec = (d->bd_rtout % hz) * tick;
+               bcopy(&tv, addr, sizeof (tv));
+               break;
+       }
 
        /*
         * Get packet stats.
         */
-       case BIOCGSTATS:
-               {
-                       struct bpf_stat *bs = (struct bpf_stat *)addr;
+       case BIOCGSTATS: {              /* struct bpf_stat */
+               struct bpf_stat bs;
 
-                       bs->bs_recv = d->bd_rcount;
-                       bs->bs_drop = d->bd_dcount;
-                       break;
-               }
+               bzero(&bs, sizeof (bs));
+               bs.bs_recv = d->bd_rcount;
+               bs.bs_drop = d->bd_dcount;
+               bcopy(&bs, addr, sizeof (bs));
+               break;
+       }
 
        /*
         * Set immediate mode.
         */
-       case BIOCIMMEDIATE:
-               d->bd_immediate = *(u_int *)addr;
+       case BIOCIMMEDIATE:             /* u_int */
+               bcopy(addr, &d->bd_immediate, sizeof (u_int));
                break;
 
-       case BIOCVERSION:
-               {
-                       struct bpf_version *bv = (struct bpf_version *)addr;
+       case BIOCVERSION: {             /* struct bpf_version */
+               struct bpf_version bv;
 
-                       bv->bv_major = BPF_MAJOR_VERSION;
-                       bv->bv_minor = BPF_MINOR_VERSION;
-                       break;
-               }
+               bzero(&bv, sizeof (bv));
+               bv.bv_major = BPF_MAJOR_VERSION;
+               bv.bv_minor = BPF_MINOR_VERSION;
+               bcopy(&bv, addr, sizeof (bv));
+               break;
+       }
 
        /*
         * Get "header already complete" flag
         */
-       case BIOCGHDRCMPLT:
-               *(u_int *)addr = d->bd_hdrcmplt;
+       case BIOCGHDRCMPLT:             /* u_int */
+               bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
                break;
 
        /*
         * Set "header already complete" flag
         */
-       case BIOCSHDRCMPLT:
-               d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+       case BIOCSHDRCMPLT:             /* u_int */
+               bcopy(addr, &int_arg, sizeof (int_arg));
+               d->bd_hdrcmplt = int_arg ? 1 : 0;
                break;
 
        /*
         * Get "see sent packets" flag
         */
-       case BIOCGSEESENT:
-               *(u_int *)addr = d->bd_seesent;
+       case BIOCGSEESENT:              /* u_int */
+               bcopy(&d->bd_seesent, addr, sizeof (u_int));
                break;
 
        /*
         * Set "see sent packets" flag
         */
-       case BIOCSSEESENT:
-               d->bd_seesent = *(u_int *)addr;
+       case BIOCSSEESENT:              /* u_int */
+               bcopy(addr, &d->bd_seesent, sizeof (u_int));
+               break;
+
+       /*
+        * Set traffic service class
+        */
+       case BIOCSETTC: {               /* int */
+               int tc;
+
+               bcopy(addr, &tc, sizeof (int));
+               error = bpf_set_traffic_class(d, tc);
                break;
+       }
 
-       case FIONBIO:           /* Non-blocking I/O */
+       /*
+        * Get traffic service class
+        */
+       case BIOCGETTC:                 /* int */
+               bcopy(&d->bd_traffic_class, addr, sizeof (int));
                break;
 
-       case FIOASYNC:          /* Send signal on receive packets */
-               d->bd_async = *(int *)addr;
+       case FIONBIO:           /* Non-blocking I/O; int */
+               break;
+
+       case FIOASYNC:          /* Send signal on receive packets; int */
+               bcopy(addr, &d->bd_async, sizeof (int));
                break;
 #ifndef __APPLE__
        case FIOSETOWN:
@@ -1473,23 +1545,25 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
                *(int *)addr = -fgetown(d->bd_sigio);
                break;
 #endif
-       case BIOCSRSIG:         /* Set receive signal */
-               {
-                       u_int sig;
+       case BIOCSRSIG: {       /* Set receive signal; u_int */
+               u_int sig;
 
-                       sig = *(u_int *)addr;
+               bcopy(addr, &sig, sizeof (u_int));
 
-                       if (sig >= NSIG)
-                               error = EINVAL;
-                       else
-                               d->bd_sig = sig;
-                       break;
-               }
-       case BIOCGRSIG:
-               *(u_int *)addr = d->bd_sig;
+               if (sig >= NSIG)
+                       error = EINVAL;
+               else
+                       d->bd_sig = sig;
                break;
        }
-       
+       case BIOCGRSIG:                 /* u_int */
+               bcopy(&d->bd_sig, addr, sizeof (u_int));
+               break;
+       case BIOCSEXTHDR:
+               bcopy(addr, &d->bd_extendedhdr, sizeof (u_int));
+               break;
+       }
+
        lck_mtx_unlock(bpf_mlock);
 
        return (error);
@@ -1592,18 +1666,20 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt)
  * Get a list of available data link type of the interface.
  */
 static int
-bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl, struct proc *p)
+bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
 {
        u_int           n;
        int             error;
        struct ifnet    *ifp;
        struct bpf_if   *bp;
        user_addr_t     dlist;
+       struct bpf_dltlist bfl;
 
+       bcopy(addr, &bfl, sizeof (bfl));
        if (proc_is64bit(p)) {
-               dlist = (user_addr_t)bfl->bfl_u.bflu_pad;
+               dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
        } else {
-               dlist = CAST_USER_ADDR_T(bfl->bfl_u.bflu_list);
+               dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
        }
 
        ifp = d->bd_bif->bif_ifp;
@@ -1613,16 +1689,20 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl, struct proc *p)
                if (bp->bif_ifp != ifp)
                        continue;
                if (dlist != USER_ADDR_NULL) {
-                       if (n >= bfl->bfl_len) {
+                       if (n >= bfl.bfl_len) {
                                return (ENOMEM);
                        }
                        error = copyout(&bp->bif_dlt, dlist,
                            sizeof (bp->bif_dlt));
+                       if (error != 0)
+                               break;
                        dlist += sizeof (bp->bif_dlt);
                }
                n++;
        }
-       bfl->bfl_len = n;
+       bfl.bfl_len = n;
+       bcopy(&bfl, addr, sizeof (bfl));
+
        return (error);
 }
 
@@ -1669,6 +1749,29 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt)
        return (bp == NULL ? EINVAL : 0);
 }
 
+static int
+bpf_set_traffic_class(struct bpf_d *d, int tc)
+{
+       int error = 0;
+
+       if (!SO_VALID_TC(tc))
+               error = EINVAL;
+       else
+               d->bd_traffic_class = tc;
+
+       return (error);
+}
+
+static void
+bpf_set_packet_service_class(struct mbuf *m, int tc)
+{
+       if (!(m->m_flags & M_PKTHDR))
+               return;
+
+       VERIFY(SO_VALID_TC(tc));
+       (void) m_set_service_class(m, so_tc2msc(tc));
+}
+
 /*
  * Support for select()
  *
@@ -1843,17 +1946,6 @@ filt_bpfread(struct knote *kn, long hint)
        return (ready);
 }
 
-static inline void*
-_cast_non_const(const void * ptr) {
-       union {
-               const void*             cval;
-               void*                   val;
-       } ret;
-       
-       ret.cval = ptr;
-       return (ret.val);
-}
-
 /*
  * Copy data from an mbuf chain into a buffer.  This code is derived
  * from m_copydata in sys/uipc_mbuf.c.
@@ -1861,7 +1953,7 @@ _cast_non_const(const void * ptr) {
 static void
 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
 {
-       struct mbuf *m = _cast_non_const(src_arg);
+       struct mbuf *m = (struct mbuf *)(uintptr_t)(src_arg);
        u_int count;
        u_char *dst;
 
@@ -1884,9 +1976,10 @@ bpf_tap_imp(
        mbuf_t          m,
        void*           hdr,
        size_t          hlen,
-       int                     outbound)
+       int             outbound)
 {
        struct bpf_if *bp;
+       struct mbuf *savedm = m;
 
        /*
         * It's possible that we get here after the bpf descriptor has been
@@ -1953,7 +2046,8 @@ bpf_tap_imp(
                                if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
                                        continue;
 #endif
-                               catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
+                               catchpacket(d, (u_char *)m, savedm, pktlen,
+                                   slen, outbound, bpf_mcopy);
                        }
                }
        }
@@ -1999,13 +2093,19 @@ static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
  * pkt is really an mbuf.
  */
 static void
-catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 
+catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
+       u_int snaplen, int outbound,
        void (*cpfn)(const void *, void *, size_t))
 {
        struct bpf_hdr *hp;
+       struct bpf_hdr_ext *ehp;
        int totlen, curlen;
-       int hdrlen = d->bd_bif->bif_hdrlen;
+       int hdrlen, caplen;
        int do_wakeup = 0;
+       u_char *payload;
+
+       hdrlen = d->bd_extendedhdr ? d->bd_bif->bif_exthdrlen :
+           d->bd_bif->bif_hdrlen;
        /*
         * Figure out how many bytes to move.  If the packet is
         * greater or equal to the snapshot length, transfer that
@@ -2049,17 +2149,41 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
        /*
         * Append the bpf header.
         */
-       hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
        struct timeval tv;
        microtime(&tv);
-       hp->bh_tstamp.tv_sec = tv.tv_sec;
-       hp->bh_tstamp.tv_usec = tv.tv_usec;
-       hp->bh_datalen = pktlen;
-       hp->bh_hdrlen = hdrlen;
+       if (d->bd_extendedhdr) {
+               ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
+               memset(ehp, 0, sizeof(*ehp));
+               ehp->bh_tstamp.tv_sec = tv.tv_sec;
+               ehp->bh_tstamp.tv_usec = tv.tv_usec;
+               ehp->bh_datalen = pktlen;
+               ehp->bh_hdrlen = hdrlen;
+               ehp->bh_caplen = totlen - hdrlen;
+               if (outbound) {
+                       if (m->m_pkthdr.m_fhflags & PF_TAG_FLOWHASH)
+                               ehp->bh_flowhash = m->m_pkthdr.m_flowhash;
+                       ehp->bh_svc = so_svc2tc(m->m_pkthdr.svc);
+                       ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
+                       if (m->m_pkthdr.m_fhflags & PF_TAG_TCP)
+                               ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
+               } else
+                       ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
+               payload = (u_char *)ehp + hdrlen;
+               caplen = ehp->bh_caplen;
+       } else {
+               hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
+               hp->bh_tstamp.tv_sec = tv.tv_sec;
+               hp->bh_tstamp.tv_usec = tv.tv_usec;
+               hp->bh_datalen = pktlen;
+               hp->bh_hdrlen = hdrlen;
+               hp->bh_caplen = totlen - hdrlen;
+               payload = (u_char *)hp + hdrlen;
+               caplen = hp->bh_caplen;
+       }
        /*
         * Copy the packet data into the store buffer and update its length.
         */
-       (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
+       (*cpfn)(pkt, payload, caplen);
        d->bd_slen = curlen + totlen;
 
        if (do_wakeup)
@@ -2180,6 +2304,8 @@ bpf_attach(
         * performance reasons and to alleviate alignment restrictions).
         */
        bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
+       bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
+           sizeof(struct bpf_hdr_ext)) - hdrlen;
        
        /* Take a reference on the interface */
        ifnet_reference(ifp);
@@ -2258,18 +2384,10 @@ bpf_init(__unused void *unused)
 
         bpf_mlock_attr = lck_attr_alloc_init();
 
-        bpf_mlock = lck_mtx_alloc_init(bpf_mlock_grp, bpf_mlock_attr);
+        lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
 
-               if (bpf_mlock == 0) {
-                       printf("bpf_init: failed to allocate bpf_mlock\n");
-                       bpf_devsw_installed = 0;
-                       return;
-               }
-               
                maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
                if (maj == -1) {
-                       if (bpf_mlock)
-                               lck_mtx_free(bpf_mlock, bpf_mlock_grp);
                        if (bpf_mlock_attr)
                                lck_attr_free(bpf_mlock_attr);
                        if (bpf_mlock_grp)
index 92a5f31a0c945896552baf433878e57aab4892f2..3ed4d951f2b6844a3b1a66f440f7d75c953d538e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,7 @@
 
 #ifndef _NET_BPF_H_
 #define _NET_BPF_H_
+#include <sys/param.h>
 #include <sys/appleapiopts.h>
 #include <sys/types.h>
 #include <sys/time.h>
@@ -193,6 +194,11 @@ struct bpf_version {
 #define BIOCSSEESENT   _IOW('B',119, u_int)
 #define BIOCSDLT        _IOW('B',120, u_int)
 #define BIOCGDLTLIST    _IOWR('B',121, struct bpf_dltlist)
+#ifdef PRIVATE
+#define        BIOCGETTC       _IOR('B', 122, int)
+#define        BIOCSETTC       _IOW('B', 123, int)
+#define        BIOCSEXTHDR     _IOW('B', 124, u_int)
+#endif /* PRIVATE */
 
 /*
  * Structure prepended to each packet.
@@ -204,15 +210,36 @@ struct bpf_hdr {
        u_short         bh_hdrlen;      /* length of bpf header (this struct
                                           plus alignment padding) */
 };
+#ifdef KERNEL
 /*
  * Because the structure above is not a multiple of 4 bytes, some compilers
  * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
  * Only the kernel needs to know about it; applications use bh_hdrlen.
  */
-#ifdef KERNEL
 #define        SIZEOF_BPF_HDR  (sizeof(struct bpf_hdr) <= 20 ? 18 : \
     sizeof(struct bpf_hdr))
 #endif
+#ifdef PRIVATE
+/*
+ * This structure must be a multiple of 4 bytes.
+ * It includes padding and spare fields that we can use later if desired.
+ */
+struct bpf_hdr_ext {
+       struct BPF_TIMEVAL bh_tstamp;   /* time stamp */
+       bpf_u_int32     bh_caplen;      /* length of captured portion */
+       bpf_u_int32     bh_datalen;     /* original length of packet */
+       u_short         bh_hdrlen;      /* length of bpf header */
+       u_short         bh_flags;
+#define BPF_HDR_EXT_FLAGS_DIR_IN       0x0000
+#define BPF_HDR_EXT_FLAGS_DIR_OUT      0x0001
+#define        BPF_HDR_EXT_FLAGS_TCP           0x0002
+       pid_t           bh_pid;         /* process PID */
+       char            bh_comm[MAXCOMLEN+1]; /* process command */
+       u_char          _bh_pad2[3];
+       bpf_u_int32     bh_svc;         /* service class */
+       bpf_u_int32     bh_flowhash;    /* kernel reserved; 0 in userland */
+};
+#endif /* PRIVATE */
 
 /*
  * Data-link level type codes.
index 69d35371f271572dce6ee0289bcce2f049f8eba3..3ec0f2866e58330005375e43adea9ec7eafd1092 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  */
 
 #include <sys/param.h>
+#include <string.h>
 
 #ifdef sun
 #include <netinet/in.h>
 #endif
 
-#if defined(sparc) || defined(mips) || defined(ibm032) || defined(__alpha__)
-#define BPF_ALIGN
-#endif
+#if !defined(__i386__) && !defined(__x86_64__)
+#define BPF_ALIGN 1
+#else /* defined(__i386__) || defined(__x86_64__) */
+#define BPF_ALIGN 0
+#endif /* defined(__i386__) || defined(__x86_64__) */
 
-#ifndef BPF_ALIGN
-#define EXTRACT_SHORT(p)       ((u_int16_t)ntohs(*(u_int16_t *)p))
-#define EXTRACT_LONG(p)                (ntohl(*(u_int32_t *)p))
+#if !BPF_ALIGN
+#define EXTRACT_SHORT(p)       ((u_int16_t)ntohs(*(u_int16_t *)(void *)p))
+#define EXTRACT_LONG(p)                (ntohl(*(u_int32_t *)(void *)p))
 #else
 #define EXTRACT_SHORT(p)\
        ((u_int16_t)\
@@ -211,6 +214,8 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
        register bpf_u_int32 k;
        int32_t mem[BPF_MEMWORDS];
 
+       bzero(mem, sizeof(mem));
+
        if (pc == 0)
                /*
                 * No filter means accept all.
@@ -242,7 +247,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 
                                if (buflen != 0)
                                        return 0;
-                               A = m_xword((struct mbuf *)p, k, &merr);
+                               A = m_xword((struct mbuf *)(void *)p, k, &merr);
                                if (merr != 0)
                                        return 0;
                                continue;
@@ -255,7 +260,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
                                A = EXTRACT_LONG(&p[k]);
                        else
 #endif
-                               A = ntohl(*(int32_t *)(p + k));
+                               A = ntohl(*(int32_t *)(void *)(p + k));
                        continue;
 
                case BPF_LD|BPF_H|BPF_ABS:
@@ -266,7 +271,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 
                                if (buflen != 0)
                                        return 0;
-                               A = m_xhalf((struct mbuf *)p, k, &merr);
+                               A = m_xhalf((struct mbuf *)(void *)p, k, &merr);
                                continue;
 #else
                                return 0;
@@ -283,7 +288,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 
                                if (buflen != 0)
                                        return 0;
-                               m = (struct mbuf *)p;
+                               m = (struct mbuf *)(void *)p;
                                MINDEX(m, k);
                                A = mtod(m, u_char *)[k];
                                continue;
@@ -311,7 +316,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 
                                if (buflen != 0)
                                        return 0;
-                               A = m_xword((struct mbuf *)p, k, &merr);
+                               A = m_xword((struct mbuf *)(void *)p, k, &merr);
                                if (merr != 0)
                                        return 0;
                                continue;
@@ -324,7 +329,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
                                A = EXTRACT_LONG(&p[k]);
                        else
 #endif
-                               A = ntohl(*(int32_t *)(p + k));
+                               A = ntohl(*(int32_t *)(void *)(p + k));
                        continue;
 
                case BPF_LD|BPF_H|BPF_IND:
@@ -336,7 +341,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 
                                if (buflen != 0)
                                        return 0;
-                               A = m_xhalf((struct mbuf *)p, k, &merr);
+                               A = m_xhalf((struct mbuf *)(void *)p, k, &merr);
                                if (merr != 0)
                                        return 0;
                                continue;
@@ -355,7 +360,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 
                                if (buflen != 0)
                                        return 0;
-                               m = (struct mbuf *)p;
+                               m = (struct mbuf *)(void *)p;
                                MINDEX(m, k);
                                A = mtod(m, u_char *)[k];
                                continue;
@@ -374,7 +379,7 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 
                                if (buflen != 0)
                                        return 0;
-                               m = (struct mbuf *)p;
+                               m = (struct mbuf *)(void *)p;
                                MINDEX(m, k);
                                X = (mtod(m, u_char *)[k] & 0xf) << 2;
                                continue;
index e0507f9354efa4c1689f965d0cfc214966bbb372..d96300bc6c8482d05c91fe3cea4ce5dbbd236662 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -132,6 +132,8 @@ struct bpf_d {
 #if CONFIG_MACF_NET
        struct label *  bd_label;       /* MAC label for descriptor */
 #endif
+       int             bd_traffic_class; /* traffic service class */
+       int             bd_extendedhdr; /* process req. the extended header */
 };
 
 /* Values for bd_state */
@@ -154,6 +156,7 @@ struct bpf_if {
        struct bpf_d *bif_dlist;        /* descriptor list */
        u_int bif_dlt;                  /* link layer type */
        u_int bif_hdrlen;               /* length of header (with padding) */
+       u_int bif_exthdrlen;            /* length of ext header */
        struct ifnet *bif_ifp;          /* corresponding interface */
        bpf_send_func   bif_send;
        bpf_tap_func    bif_tap;
index 1d6922f28f95bf7df5e79c678369f3333cd292d6..c7fc659a757c0b2a35e1f8ac6903db1804ead24e 100644 (file)
@@ -94,7 +94,8 @@
 
 #include <kern/thread.h>
 
-static lck_mtx_t               *bstp_task_mtx = NULL;
+decl_lck_mtx_data(static, bstp_task_mtx_data);
+static lck_mtx_t               *bstp_task_mtx = &bstp_task_mtx_data;
 static lck_grp_t               *bstp_task_grp = NULL;
 static lck_attr_t              *bstp_task_attr = NULL;
 static thread_t                        bstp_task_thread;
@@ -142,10 +143,9 @@ static void bstp_task_drain(struct bstp_task *);
 #define        INFO_SAME       0
 #define        INFO_WORSE      -1
 
-const uint8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
-
 LIST_HEAD(, bstp_state) bstp_list;
-static lck_mtx_t               *bstp_list_mtx;
+decl_lck_mtx_data(static, bstp_list_mtx_data);
+static lck_mtx_t               *bstp_list_mtx = &bstp_list_mtx_data;
 static lck_grp_t               *bstp_lock_grp = NULL;
 static lck_attr_t              *bstp_lock_attr = NULL;
 
@@ -2326,7 +2326,7 @@ bstp_sys_init(void)
 #if BRIDGE_DEBUG
        lck_attr_setdebug(bstp_lock_attr);
 #endif
-       bstp_list_mtx = lck_mtx_alloc_init(bstp_lock_grp, bstp_lock_attr);
+       lck_mtx_init(bstp_list_mtx, bstp_lock_grp, bstp_lock_attr);
        lck_grp_attr_free(lck_grp_attr);
 
        LIST_INIT(&bstp_list);
@@ -2349,7 +2349,7 @@ bstp_create_task_thread(void)
 #if BRIDGE_DEBUG
        lck_attr_setdebug(bstp_task_attr);
 #endif
-       bstp_task_mtx = lck_mtx_alloc_init(bstp_lock_grp, bstp_lock_attr);
+       lck_mtx_init(bstp_task_mtx, bstp_lock_grp, bstp_lock_attr);
        lck_grp_attr_free(lck_grp_attr);
 
        error = kernel_thread_start((thread_continue_t)bstp_task_thread_func, NULL, &bstp_task_thread);
index a70f7aabaf4cfa528c05a5f63bf1a36a40af040c..412fface87297c3e08a179d01af0f320cd1cc8f5 100644 (file)
@@ -408,8 +408,6 @@ struct bstp_state {
        bstp_rtage_cb_t         bs_rtage_cb;
 };
 
-extern const uint8_t bstp_etheraddr[];
-
 void   bstp_attach(struct bstp_state *, struct bstp_cb_ops *);
 void   bstp_detach(struct bstp_state *);
 void   bstp_init(struct bstp_state *);
diff --git a/bsd/net/classq/Makefile b/bsd/net/classq/Makefile
new file mode 100644 (file)
index 0000000..9e99d6f
--- /dev/null
@@ -0,0 +1,44 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+INSTINC_SUBDIRS = \
+
+INSTINC_SUBDIRS_PPC = \
+
+INSTINC_SUBDIRS_I386 = \
+
+EXPINC_SUBDIRS = \
+
+EXPINC_SUBDIRS_PPC = \
+
+EXPINC_SUBDIRS_I386 = \
+
+DATAFILES= \
+
+KERNELFILES= \
+
+PRIVATE_DATAFILES = \
+       classq.h classq_blue.h classq_red.h classq_rio.h classq_sfb.h \
+       if_classq.h
+
+PRIVATE_KERNELFILES = ${KERNELFILES}
+
+INSTALL_MI_LIST        = ${DATAFILES}
+
+INSTALL_MI_DIR = net/classq
+
+EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES}
+
+EXPORT_MI_DIR = ${INSTALL_MI_DIR}
+
+INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+
+INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/bsd/net/classq/classq.c b/bsd/net/classq/classq.c
new file mode 100644 (file)
index 0000000..92b7600
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the Network Research
+ *      Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/random.h>
+#include <sys/kernel_types.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+#include <net/classq/classq.h>
+
+#include <libkern/libkern.h>
+
+u_int32_t classq_verbose;      /* more noise if greater than 1 */
+
+SYSCTL_NODE(_net, OID_AUTO, classq, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "classq");
+
+SYSCTL_UINT(_net_classq, OID_AUTO, verbose, CTLFLAG_RW|CTLFLAG_LOCKED,
+       &classq_verbose, 0, "Class queue verbosity level");
+
+void
+_qinit(class_queue_t *q, int type, int lim)
+{
+       MBUFQ_INIT(&q->mbufq);
+       qlimit(q) = lim;
+       qlen(q) = 0;
+       qsize(q) = 0;
+       qtype(q) = type;
+       qstate(q) = QS_RUNNING;
+}
+
+/* add a packet at the tail of the queue */
+void
+_addq(class_queue_t *q, struct mbuf *m)
+{
+       MBUFQ_ENQUEUE(&q->mbufq, m);
+       qlen(q)++;
+       VERIFY(qlen(q) != 0);
+       qsize(q) += m_length(m);
+}
+
+/* add one or more packets at the tail of the queue */
+void
+_addq_multi(class_queue_t *q, struct mbuf *m_head, struct mbuf *m_tail,
+    u_int32_t cnt, u_int32_t size)
+{
+       MBUFQ_ENQUEUE_MULTI(&q->mbufq, m_head, m_tail);
+       qlen(q) += cnt;
+       qsize(q) += size;
+}
+
+/* get a packet at the head of the queue */
+struct mbuf *
+_getq(class_queue_t *q)
+{
+       struct mbuf *m;
+
+       MBUFQ_DEQUEUE(&q->mbufq, m);
+       if (m == NULL) {
+               VERIFY(qlen(q) == 0);
+               if (qsize(q) > 0)
+                       qsize(q) = 0;
+               return (NULL);
+       }
+       VERIFY(qlen(q) > 0);
+       qlen(q)--;
+
+       /* qsize is an approximation, so adjust if necessary */
+       if (((int)qsize(q) - m_length(m)) > 0)
+               qsize(q) -= m_length(m);
+       else if (qsize(q) != 0)
+               qsize(q) = 0;
+
+       return (m);
+}
+
+/* get a packet of a specific flow beginning from the head of the queue */
+struct mbuf *
+_getq_flow(class_queue_t *q, u_int32_t flow)
+{
+       struct mbuf *m, *m_tmp;
+
+       MBUFQ_FOREACH_SAFE(m, &q->mbufq, m_tmp) {
+               if (flow == 0 || ((m->m_flags & M_PKTHDR) &&
+                   m->m_pkthdr.m_flowhash == flow)) {
+                       /* remove it from the class queue */
+                       MBUFQ_REMOVE(&q->mbufq, m);
+                       MBUFQ_NEXT(m) = NULL;
+                       break;
+               }
+       }
+
+       if (m != NULL) {
+               u_int32_t l = m_length(m);
+
+               VERIFY(qlen(q) > 0);
+               qlen(q)--;
+
+               /* qsize is an approximation, so adjust if necessary */
+               if (((int)qsize(q) - l) > 0)
+                       qsize(q) -= l;
+               else if (qsize(q) != 0)
+                       qsize(q) = 0;
+       }
+
+       return (m);
+}
+
+/* get all packets starting from the head of the queue */
+struct mbuf *
+_getq_all(class_queue_t *q)
+{
+       struct mbuf *m;
+
+       m = MBUFQ_FIRST(&q->mbufq);
+       MBUFQ_INIT(&q->mbufq);
+       qlen(q) = 0;
+       qsize(q) = 0;
+
+       return (m);
+}
+
+/* drop a packet at the tail of the queue */
+struct mbuf *
+_getq_tail(class_queue_t *q)
+{
+       struct mq_head *head = &q->mbufq;
+       struct mbuf *m = MBUFQ_LAST(head);
+
+       if (m != NULL) {
+               struct mbuf *n = MBUFQ_FIRST(head);
+
+               while (n != NULL) {
+                       struct mbuf *next = MBUFQ_NEXT(n);
+                       if (next == m) {
+                               MBUFQ_NEXT(n) = NULL;
+                               break;
+                       }
+                       n = next;
+               }
+               VERIFY(n != NULL ||
+                   (qlen(q) == 1 && m == MBUFQ_FIRST(head)));
+               VERIFY(qlen(q) > 0);
+               --qlen(q);
+
+               /* qsize is an approximation, so adjust if necessary */
+               if (((int)qsize(q) - m_length(m)) > 0)
+                       qsize(q) -= m_length(m);
+               else if (qsize(q) != 0)
+                       qsize(q) = 0;
+
+               if (qempty(q)) {
+                       VERIFY(MBUFQ_EMPTY(head));
+                       MBUFQ_INIT(head);
+               } else {
+                       VERIFY(n != NULL);
+                       head->mq_last = &MBUFQ_NEXT(n);
+               }
+       }
+       return (m);
+}
+
+/* randomly select a packet in the queue */
+struct mbuf *
+_getq_random(class_queue_t *q)
+{
+       struct mq_head *head = &q->mbufq;
+       struct mbuf *m = NULL;
+       unsigned int n;
+       u_int32_t rnd;
+
+       n = qlen(q);
+       if (n == 0) {
+               VERIFY(MBUFQ_EMPTY(head));
+               if (qsize(q) > 0)
+                       qsize(q) = 0;
+               return (NULL);
+       }
+
+       m = MBUFQ_FIRST(head);
+       read_random(&rnd, sizeof (rnd));
+       n = (rnd % n) + 1;
+
+       if (n == 1) {
+               if ((MBUFQ_FIRST(head) = MBUFQ_NEXT(m)) == NULL)
+                       (head)->mq_last = &MBUFQ_FIRST(head);
+       } else {
+               struct mbuf *p = NULL;
+
+               VERIFY(n > 1);
+               while (n--) {
+                       if (MBUFQ_NEXT(m) == NULL)
+                               break;
+                       p = m;
+                       m = MBUFQ_NEXT(m);
+               }
+               VERIFY(p != NULL && MBUFQ_NEXT(p) == m);
+
+               if ((MBUFQ_NEXT(p) = MBUFQ_NEXT(m)) == NULL)
+                       (head)->mq_last = &MBUFQ_NEXT(p);
+       }
+
+       VERIFY(qlen(q) > 0);
+       --qlen(q);
+
+       /* qsize is an approximation, so adjust if necessary */
+       if (((int)qsize(q) - m_length(m)) > 0)
+               qsize(q) -= m_length(m);
+       else if (qsize(q) != 0)
+               qsize(q) = 0;
+
+       MBUFQ_NEXT(m) = NULL;
+
+       return (m);
+}
+
+/* remove a packet from the queue */
+void
+_removeq(class_queue_t *q, struct mbuf *m)
+{
+       struct mq_head *head = &q->mbufq;
+       struct mbuf *m0, **mtail;
+
+       m0 = MBUFQ_FIRST(head);
+       if (m0 == NULL)
+               return;
+
+       if (m0 != m) {
+               while (MBUFQ_NEXT(m0) != m) {
+                       if (m0 == NULL)
+                               return;
+                       m0 = MBUFQ_NEXT(m0);
+               }
+               mtail = &MBUFQ_NEXT(m0);
+       } else {
+               mtail = &MBUFQ_FIRST(head);
+       }
+
+       *mtail = MBUFQ_NEXT(m);
+       if (*mtail == NULL)
+               head->mq_last = mtail;
+
+       VERIFY(qlen(q) > 0);
+       --qlen(q);
+
+       /* qsize is an approximation, so adjust if necessary */
+       if (((int)qsize(q) - m_length(m)) > 0)
+               qsize(q) -= m_length(m);
+       else if (qsize(q) != 0)
+               qsize(q) = 0;
+
+       MBUFQ_NEXT(m) = NULL;
+}
+
+void
+_flushq(class_queue_t *q)
+{
+       (void) _flushq_flow(q, 0, NULL, NULL);
+}
+
+void
+_flushq_flow(class_queue_t *q, u_int32_t flow, u_int32_t *cnt, u_int32_t *len)
+{
+       MBUFQ_HEAD(mq_freeq) freeq;
+       struct mbuf *m, *m_tmp;
+       u_int32_t c = 0, l = 0;
+
+       MBUFQ_INIT(&freeq);
+
+       MBUFQ_FOREACH_SAFE(m, &q->mbufq, m_tmp) {
+               if (flow == 0 || ((m->m_flags & M_PKTHDR) &&
+                   m->m_pkthdr.m_flowhash == flow)) {
+                       /* remove it from the class queue */
+                       MBUFQ_REMOVE(&q->mbufq, m);
+                       MBUFQ_NEXT(m) = NULL;
+
+                       /* and add it to the free queue */
+                       MBUFQ_ENQUEUE(&freeq, m);
+
+                       l += m_length(m);
+                       c++;
+               }
+       }
+       VERIFY(c == 0 || !MBUFQ_EMPTY(&freeq));
+
+       if (c > 0) {
+               VERIFY(qlen(q) >= c);
+               qlen(q) -= c;
+
+               /* qsize is an approximation, so adjust if necessary */
+               if (((int)qsize(q) - l) > 0)
+                       qsize(q) -= l;
+               else if (qsize(q) != 0)
+                       qsize(q) = 0;
+       }
+
+       if (!MBUFQ_EMPTY(&freeq))
+               m_freem_list(MBUFQ_FIRST(&freeq));
+
+       if (cnt != NULL)
+               *cnt = c;
+       if (len != NULL)
+               *len = l;
+}
diff --git a/bsd/net/classq/classq.h b/bsd/net/classq/classq.h
new file mode 100644 (file)
index 0000000..fa18ae4
--- /dev/null
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_classq.h,v 1.7 2006/10/12 19:59:08 peter Exp $    */
+/*     $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $        */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the Network Research
+ *     Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * class queue definitions extracted from rm_class.h.
+ */
+#ifndef _NET_CLASSQ_CLASSQ_H_
+#define        _NET_CLASSQ_CLASSQ_H_
+
+#ifdef PRIVATE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Queue types
+ */
+typedef enum classq_type {
+       Q_DROPHEAD,
+       Q_DROPTAIL,
+       Q_RED,
+       Q_RIO,
+       Q_BLUE,
+       Q_SFB
+} classq_type_t;
+
+/*
+ * Packet Queue states
+ */
+typedef enum classq_state {
+       QS_RUNNING,
+       QS_SUSPENDED
+} classq_state_t;
+
+#define        DEFAULT_QLIMIT  128 /* default */
+
+/*
+ * generic packet counter
+ */
+struct pktcntr {
+       u_int64_t       packets;
+       u_int64_t       bytes;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+#include <sys/mcache.h>
+#include <sys/mbuf.h>
+#include <sys/sysctl.h>
+
+/*
+ * Packet Queue structures and macros to manipulate them.
+ */
+typedef struct _class_queue_ {
+       MBUFQ_HEAD(mq_head) mbufq;      /* Packet queue */
+       u_int32_t       qlen;   /* Queue length (in number of packets) */
+       u_int32_t       qsize;  /* Approx. queue size (in number of bytes) */
+       u_int32_t       qlim;   /* Queue limit (in number of packets*) */
+       classq_type_t   qtype;  /* Queue type */
+       classq_state_t  qstate; /* Queue state */
+} class_queue_t;
+
+#define        qtype(q)        (q)->qtype              /* Get queue type */
+#define        qstate(q)       (q)->qstate             /* Get queue state */
+#define        qlimit(q)       (q)->qlim               /* Max packets to be queued */
+#define        qlen(q)         (q)->qlen               /* Current queue length. */
+#define        qsize(q)        (q)->qsize              /* Approx. bytes in queue */
+/* #define     qtail(q)        MBUFQ_LAST(&(q)->mbufq) */
+#define        qhead(q)        MBUFQ_FIRST(&(q)->mbufq)
+
+#define        qempty(q)       (qlen(q) == 0)  /* Is the queue empty?? */
+#define        q_is_red(q)     (qtype(q) == Q_RED)     /* Is the queue a RED queue */
+#define        q_is_rio(q)     (qtype(q) == Q_RIO)     /* Is the queue a RIO queue */
+#define        q_is_blue(q)    (qtype(q) == Q_BLUE)    /* Is the queue a BLUE queue */
+#define        q_is_sfb(q)     (qtype(q) == Q_SFB)     /* Is the queue a SFB queue */
+#define        q_is_red_or_rio(q) (qtype(q) == Q_RED || qtype(q) == Q_RIO)
+#define        q_is_suspended(q) (qstate(q) == QS_SUSPENDED)
+
+#define        PKTCNTR_ADD(_cntr, _pkt, _len) do {                             \
+       (_cntr)->packets += (_pkt);                                     \
+       (_cntr)->bytes += (_len);                                       \
+} while (0)
+
+#define        PKTCNTR_CLEAR(_cntr) do {                                       \
+       (_cntr)->packets = 0;                                           \
+       (_cntr)->bytes = 0;                                             \
+} while (0)
+
+/* flags for mark_ecn() */
+#define        CLASSQF_ECN4    0x01    /* use packet marking for IPv4 packets */
+#define        CLASSQF_ECN6    0x02    /* use packet marking for IPv6 packets */
+#define        CLASSQF_ECN     (CLASSQF_ECN4 | CLASSQF_ECN6)
+
+extern u_int32_t classq_verbose;
+
+SYSCTL_DECL(_net_classq);
+
+extern void _qinit(class_queue_t *, int, int);
+extern void _addq(class_queue_t *, struct mbuf *);
+extern void _addq_multi(class_queue_t *, struct mbuf *, struct mbuf *,
+    u_int32_t, u_int32_t);
+extern struct mbuf *_getq(class_queue_t *);
+extern struct mbuf *_getq_all(class_queue_t *);
+extern struct mbuf *_getq_tail(class_queue_t *);
+extern struct mbuf *_getq_random(class_queue_t *);
+extern struct mbuf *_getq_flow(class_queue_t *, u_int32_t);
+extern void _removeq(class_queue_t *, struct mbuf *);
+extern void _flushq(class_queue_t *);
+extern void _flushq_flow(class_queue_t *, u_int32_t, u_int32_t *, u_int32_t *);
+
+extern void classq_init(void);
+
+extern u_int8_t read_dsfield(struct mbuf *, struct pf_mtag *);
+extern void    write_dsfield(struct mbuf *, struct pf_mtag *, u_int8_t);
+extern int     mark_ecn(struct mbuf *, struct pf_mtag *, int);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_CLASSQ_CLASSQ_H_ */
diff --git a/bsd/net/classq/classq_blue.c b/bsd/net/classq/classq_blue.c
new file mode 100644 (file)
index 0000000..6b67d94
--- /dev/null
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_blue.c,v 1.21 2006/11/16 01:32:37 christos Exp $  */
+/*     $KAME: altq_blue.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
+
+/*
+ * Copyright (C) 1997-2002
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the Computer Systems
+ *     Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#if CLASSQ_BLUE
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/kauth.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_types.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/classq/classq_blue.h>
+#include <net/net_osdep.h>
+
+/*
+ * Blue is proposed and implemented by Wu-chang Feng <wuchang@eecs.umich.edu>.
+ * more information on Blue is available from
+ * http://www.eecs.umich.edu/~wuchang/blue/
+ */
+
+#define        BLUE_LIMIT      200             /* default max queue lenght */
+
+#define        BLUE_ZONE_MAX   32              /* maximum elements in zone */
+#define        BLUE_ZONE_NAME  "classq_blue"   /* zone name */
+
+static unsigned int blue_size;         /* size of zone element */
+static struct zone *blue_zone;         /* zone for blue */
+
+/* internal function prototypes */
+static struct mbuf *blue_getq_flow(struct blue *, class_queue_t *,
+    u_int32_t, boolean_t);
+static int blue_drop_early(struct blue *);
+
+void
+blue_init(void)
+{
+       _CASSERT(BLUEF_ECN4 == CLASSQF_ECN4);
+       _CASSERT(BLUEF_ECN6 == CLASSQF_ECN6);
+
+       blue_size = sizeof (struct blue);
+       blue_zone = zinit(blue_size, BLUE_ZONE_MAX * blue_size,
+           0, BLUE_ZONE_NAME);
+       if (blue_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, BLUE_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(blue_zone, Z_EXPAND, TRUE);
+       zone_change(blue_zone, Z_CALLERACCT, TRUE);
+}
+
+/*
+ * blue support routines
+ */
+struct blue *
+blue_alloc(struct ifnet *ifp, u_int32_t max_pmark, u_int32_t hold_time,
+    u_int32_t flags)
+{
+       struct blue *bp;
+
+       VERIFY(ifp != NULL);
+
+       bp = zalloc(blue_zone);
+       if (bp == NULL)
+               return (NULL);
+
+       bzero(bp, blue_size);
+       bp->blue_idle = 1;
+       bp->blue_flags = (flags & BLUEF_USERFLAGS);
+       bp->blue_ifp = ifp;
+
+       if (max_pmark == 0)
+               bp->blue_max_pmark = 1000;
+       else
+               bp->blue_max_pmark = max_pmark;
+
+       if (hold_time == 0)
+               bp->blue_hold_time = 50000;
+       else
+               bp->blue_hold_time = hold_time;
+
+       microuptime(&bp->blue_last);
+
+       return (bp);
+}
+
+void
+blue_destroy(struct blue *bp)
+{
+       zfree(blue_zone, bp);
+}
+
+void
+blue_getstats(struct blue *bp, struct blue_stats *sp)
+{
+       sp->q_pmark             = bp->blue_pmark;
+       sp->drop_forced         = bp->blue_stats.drop_forced;
+       sp->drop_unforced       = bp->blue_stats.drop_unforced;
+       sp->marked_packets      = bp->blue_stats.marked_packets;
+}
+
+#define        DTYPE_NODROP    0       /* no drop */
+#define        DTYPE_FORCED    1       /* a "forced" drop */
+#define        DTYPE_EARLY     2       /* an "unforced" (early) drop */
+
+int
+blue_addq(struct blue *bp, class_queue_t *q, struct mbuf *m,
+    struct pf_mtag *tag)
+{
+       int droptype;
+
+       /*
+        * if we were idle, this is an enqueue onto an empty queue
+        * and we should decrement marking probability
+        */
+       if (bp->blue_idle) {
+               struct timeval now;
+               u_int32_t t;
+
+               bp->blue_idle = 0;
+               microuptime(&now);
+               t = (now.tv_sec - bp->blue_last.tv_sec);
+               if (t > 1) {
+                       bp->blue_pmark = 1;
+                       microuptime(&bp->blue_last);
+               } else {
+                       t = t * 1000000 + (now.tv_usec - bp->blue_last.tv_usec);
+                       if (t > bp->blue_hold_time) {
+                               bp->blue_pmark--;
+                               if (bp->blue_pmark < 0)
+                                       bp->blue_pmark = 0;
+                               microuptime(&bp->blue_last);
+                       }
+               }
+       }
+
+       /* see if we drop early */
+       droptype = DTYPE_NODROP;
+       if (blue_drop_early(bp) && qlen(q) > 1) {
+               /* mark or drop by blue */
+               if ((bp->blue_flags & BLUEF_ECN) &&
+                   (tag->pftag_flags & PF_TAG_TCP) &&  /* only for TCP */
+                   mark_ecn(m, tag, bp->blue_flags)) {
+                       /* successfully marked.  do not drop. */
+                       bp->blue_stats.marked_packets++;
+               } else {
+                       /* unforced drop by blue */
+                       droptype = DTYPE_EARLY;
+               }
+       }
+
+       /* if the queue length hits the hard limit, it's a forced drop */
+       if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+               droptype = DTYPE_FORCED;
+
+       /* if successful or forced drop, enqueue this packet. */
+       if (droptype != DTYPE_EARLY)
+               _addq(q, m);
+
+       if (droptype != DTYPE_NODROP) {
+               if (droptype == DTYPE_EARLY) {
+                       /* drop the incoming packet */
+                       bp->blue_stats.drop_unforced++;
+               } else {
+                       struct timeval now;
+                       u_int32_t t;
+                       /* forced drop, select a victim packet in the queue. */
+                       m = _getq_random(q);
+                       microuptime(&now);
+                       t = (now.tv_sec - bp->blue_last.tv_sec);
+                       t = t * 1000000 + (now.tv_usec - bp->blue_last.tv_usec);
+                       if (t > bp->blue_hold_time) {
+                               bp->blue_pmark += bp->blue_max_pmark >> 3;
+                               if (bp->blue_pmark > bp->blue_max_pmark)
+                                       bp->blue_pmark = bp->blue_max_pmark;
+                               microuptime(&bp->blue_last);
+                       }
+                       bp->blue_stats.drop_forced++;
+               }
+               IFCQ_CONVERT_LOCK(&bp->blue_ifp->if_snd);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+       /* successfully queued */
+       return (CLASSQEQ_SUCCESS);
+}
+
+static struct mbuf *
+blue_getq_flow(struct blue *bp, class_queue_t *q, u_int32_t flow,
+    boolean_t purge)
+{
+#pragma unused(purge)
+       struct mbuf *m;
+
+       /* flow of 0 means head of queue */
+       if ((m = ((flow == 0) ? _getq(q) : _getq_flow(q, flow))) == NULL) {
+               if (bp->blue_idle == 0) {
+                       bp->blue_idle = 1;
+                       microuptime(&bp->blue_last);
+               }
+               return (NULL);
+       }
+
+       bp->blue_idle = 0;
+       return (m);
+}
+
+struct mbuf *
+blue_getq(struct blue *bp, class_queue_t *q)
+{
+       return (blue_getq_flow(bp, q, 0, FALSE));
+}
+
+void
+blue_purgeq(struct blue *bp, class_queue_t *q, u_int32_t flow,
+    u_int32_t *packets, u_int32_t *bytes)
+{
+       u_int32_t cnt = 0, len = 0;
+       struct mbuf *m;
+
+       IFCQ_CONVERT_LOCK(&bp->blue_ifp->if_snd);
+
+       while ((m = blue_getq_flow(bp, q, flow, TRUE)) != NULL) {
+               cnt++;
+               len += m_pktlen(m);
+               m_freem(m);
+       }
+
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+/*
+ * early-drop probability is kept in blue_pmark
+ */
+static int
+blue_drop_early(struct blue *bp)
+{
+       if ((random() % (unsigned)bp->blue_max_pmark) <
+           (unsigned)bp->blue_pmark) {
+               /* drop or mark */
+               return (1);
+       }
+       /* no drop/mark */
+       return (0);
+}
+
+void
+blue_updateq(struct blue *bp, cqev_t ev)
+{
+#pragma unused(bp, ev)
+       /* nothing for now */
+}
+
+int
+blue_suspendq(struct blue *bp, class_queue_t *q, boolean_t on)
+{
+#pragma unused(bp, q, on)
+       return (ENOTSUP);
+}
+#endif /* CLASSQ_BLUE */
diff --git a/bsd/net/classq/classq_blue.h b/bsd/net/classq/classq_blue.h
new file mode 100644 (file)
index 0000000..e6c546e
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_blue.h,v 1.5 2006/10/12 19:59:08 peter Exp $      */
+/*     $KAME: altq_blue.h,v 1.7 2002/11/29 04:36:22 kjc Exp $  */
+
+/*
+ * Copyright (C) 1997-2002
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_CLASSQ_CLASSQ_BLUE_H_
+#define        _NET_CLASSQ_CLASSQ_BLUE_H_
+
+#ifdef PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
+#include <net/classq/if_classq.h>
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct blue_stats {
+       int32_t                 q_pmark;
+       u_int32_t               _pad;
+       u_int64_t               drop_forced;
+       u_int64_t               drop_unforced;
+       u_int64_t               marked_packets;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+/* blue flags */
+#define        BLUEF_ECN4      0x01    /* use packet marking for IPv4 packets */
+#define        BLUEF_ECN6      0x02    /* use packet marking for IPv6 packets */
+#define        BLUEF_ECN       (BLUEF_ECN4 | BLUEF_ECN6)
+
+#define        BLUEF_USERFLAGS                                                 \
+       (BLUEF_ECN4 | BLUEF_ECN6)
+
+typedef struct blue {
+       u_int32_t blue_flags;   /* blue flags */
+
+       /* blue parameters */
+       int32_t   blue_pmark;     /* 0-1000 (mark probability*10000) */
+       int32_t   blue_max_pmark; /* sets precision of marking probability */
+       u_int32_t blue_hold_time; /* hold time in usec */
+       struct ifnet *blue_ifp;   /* back pointer to ifnet */
+
+       /* variables for internal use */
+       u_int32_t blue_idle;      /* queue was empty */
+       struct timeval blue_last; /* timestamp when the queue becomes idle */
+
+       /* statistics */
+       struct {
+               struct pktcntr  xmit_cnt;
+               struct pktcntr  drop_cnt;
+               u_int64_t       drop_forced;
+               u_int64_t       drop_unforced;
+               u_int64_t       marked_packets;
+       } blue_stats;
+} blue_t;
+
+extern void blue_init(void);
+extern struct blue *blue_alloc(struct ifnet *, u_int32_t, u_int32_t, u_int32_t);
+extern void blue_destroy(struct blue *);
+extern int blue_addq(struct blue *, class_queue_t *, struct mbuf *,
+    struct pf_mtag *);
+extern struct mbuf *blue_getq(struct blue *, class_queue_t *);
+extern void blue_purgeq(struct blue *, class_queue_t *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+extern void blue_getstats(struct blue *, struct blue_stats *);
+extern void blue_updateq(struct blue *, cqev_t);
+extern int blue_suspendq(struct blue *, class_queue_t *, boolean_t);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_CLASSQ_CLASSQ_BLUE_H_ */
diff --git a/bsd/net/classq/classq_red.c b/bsd/net/classq/classq_red.c
new file mode 100644 (file)
index 0000000..825b62d
--- /dev/null
@@ -0,0 +1,615 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_red.c,v 1.14 2007/09/13 20:40:02 chl Exp $       */
+/*     $KAME: altq_red.c,v 1.10 2002/04/03 05:38:51 kjc Exp $  */
+
+/*
+ * Copyright (C) 1997-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the Computer Systems
+ *     Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#if CLASSQ_RED
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kauth.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/classq/classq_red.h>
+
+/*
+ * ALTQ/RED (Random Early Detection) implementation using 32-bit
+ * fixed-point calculation.
+ *
+ * written by kjc using the ns code as a reference.
+ * you can learn more about red and ns from Sally's home page at
+ * http://www-nrg.ee.lbl.gov/floyd/
+ *
+ * most of the red parameter values are fixed in this implementation
+ * to prevent fixed-point overflow/underflow.
+ * if you change the parameters, watch out for overflow/underflow!
+ *
+ * the parameters used are recommended values by Sally.
+ * the corresponding ns config looks:
+ *     q_weight=0.00195
+ *     minthresh=5 maxthresh=15 queue-size=60
+ *     linterm=30
+ *     dropmech=drop-tail
+ *     bytes=false (can't be handled by 32-bit fixed-point)
+ *     doubleq=false dqthresh=false
+ *     wait=true
+ */
+/*
+ * alternative red parameters for a slow link.
+ *
+ * assume the queue length becomes from zero to L and keeps L, it takes
+ * N packets for q_avg to reach 63% of L.
+ * when q_weight is 0.002, N is about 500 packets.
+ * for a slow link like dial-up, 500 packets takes more than 1 minute!
+ * when q_weight is 0.008, N is about 127 packets.
+ * when q_weight is 0.016, N is about 63 packets.
+ * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
+ * are allowed for 0.016.
+ * see Sally's paper for more details.
+ */
+/* normal red parameters */
+#define        W_WEIGHT        512     /* inverse of weight of EWMA (511/512) */
+                               /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define        W_WEIGHT_1      128     /* inverse of weight of EWMA (127/128) */
+                               /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define        W_WEIGHT_2      64      /* inverse of weight of EWMA (63/64) */
+                               /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define        FP_SHIFT        12      /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define        INV_P_MAX       10      /* inverse of max drop probability */
+#define        TH_MIN          5       /* min threshold */
+#define        TH_MAX          15      /* max threshold */
+
+#define        RED_LIMIT       60      /* default max queue lenght */
+
+#define        RED_ZONE_MAX    32              /* maximum elements in zone */
+#define        RED_ZONE_NAME   "classq_red"    /* zone name */
+
+static unsigned int red_size;          /* size of zone element */
+static struct zone *red_zone;          /* zone for red */
+
+/*
+ * our default policy for forced-drop is drop-tail.
+ * (in altq-1.1.2 or earlier, the default was random-drop.
+ * but it makes more sense to punish the cause of the surge.)
+ * to switch to the random-drop policy, define "RED_RANDOM_DROP".
+ */
+
+/* default red parameter values */
+static int default_th_min = TH_MIN;
+static int default_th_max = TH_MAX;
+static int default_inv_pmax = INV_P_MAX;
+
+static struct mbuf *red_getq_flow(struct red *, class_queue_t *,
+    u_int32_t, boolean_t);
+
+void
+red_init(void)
+{
+       _CASSERT(REDF_ECN4 == CLASSQF_ECN4);
+       _CASSERT(REDF_ECN6 == CLASSQF_ECN6);
+
+       red_size = sizeof (red_t);
+       red_zone = zinit(red_size, RED_ZONE_MAX * red_size,
+           0, RED_ZONE_NAME);
+       if (red_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, RED_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(red_zone, Z_EXPAND, TRUE);
+       zone_change(red_zone, Z_CALLERACCT, TRUE);
+}
+
+/*
+ * red support routines
+ */
+red_t *
+red_alloc(struct ifnet *ifp, int weight, int inv_pmax, int th_min,
+    int th_max, int flags, int pkttime)
+{
+       red_t   *rp;
+       int      w, i;
+       int      npkts_per_sec;
+
+       VERIFY(ifp != NULL);
+
+       rp = zalloc(red_zone);
+       if (rp == NULL)
+               return (NULL);
+
+       bzero(rp, red_size);
+       rp->red_avg = 0;
+       rp->red_idle = 1;
+
+       if (weight == 0)
+               rp->red_weight = W_WEIGHT;
+       else
+               rp->red_weight = weight;
+       if (inv_pmax == 0)
+               rp->red_inv_pmax = default_inv_pmax;
+       else
+               rp->red_inv_pmax = inv_pmax;
+       if (th_min == 0)
+               rp->red_thmin = default_th_min;
+       else
+               rp->red_thmin = th_min;
+       if (th_max == 0)
+               rp->red_thmax = default_th_max;
+       else
+               rp->red_thmax = th_max;
+
+       rp->red_flags = (flags & REDF_USERFLAGS);
+       rp->red_ifp = ifp;
+
+       if (pkttime == 0)
+               /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+               rp->red_pkttime = 800;
+       else
+               rp->red_pkttime = pkttime;
+
+       if (weight == 0) {
+               /* when the link is very slow, adjust red parameters */
+               npkts_per_sec = 1000000 / rp->red_pkttime;
+               if (npkts_per_sec < 50) {
+                       /* up to about 400Kbps */
+                       rp->red_weight = W_WEIGHT_2;
+               } else if (npkts_per_sec < 300) {
+                       /* up to about 2.4Mbps */
+                       rp->red_weight = W_WEIGHT_1;
+               }
+       }
+
+       /* calculate wshift.  weight must be power of 2 */
+       w = rp->red_weight;
+       for (i = 0; w > 1; i++)
+               w = w >> 1;
+       rp->red_wshift = i;
+       w = 1 << rp->red_wshift;
+       if (w != rp->red_weight) {
+               printf("invalid weight value %d for red! use %d\n",
+                   rp->red_weight, w);
+               rp->red_weight = w;
+       }
+
+       /*
+        * thmin_s and thmax_s are scaled versions of th_min and th_max
+        * to be compared with avg.
+        */
+       rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
+       rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
+
+       /*
+        * precompute probability denominator
+        *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+        */
+       rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) *
+           rp->red_inv_pmax) << FP_SHIFT;
+
+       /* allocate weight table */
+       rp->red_wtab = wtab_alloc(rp->red_weight);
+       if (rp->red_wtab == NULL) {
+               red_destroy(rp);
+               return (NULL);
+       }
+
+       microuptime(&rp->red_last);
+       return (rp);
+}
+
+void
+red_destroy(red_t *rp)
+{
+       if (rp->red_wtab != NULL) {
+               wtab_destroy(rp->red_wtab);
+               rp->red_wtab = NULL;
+       }
+       zfree(red_zone, rp);
+}
+
+void
+red_getstats(red_t *rp, struct red_stats *sp)
+{
+       sp->q_avg               = rp->red_avg >> rp->red_wshift;
+       sp->drop_forced         = rp->red_stats.drop_forced;
+       sp->drop_unforced       = rp->red_stats.drop_unforced;
+       sp->marked_packets      = rp->red_stats.marked_packets;
+}
+
+int
+red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag)
+{
+       int avg, droptype;
+       int n;
+
+       avg = rp->red_avg;
+
+       /*
+        * if we were idle, we pretend that n packets arrived during
+        * the idle period.
+        */
+       if (rp->red_idle) {
+               struct timeval now;
+               int t;
+
+               rp->red_idle = 0;
+               microuptime(&now);
+               t = (now.tv_sec - rp->red_last.tv_sec);
+               if (t > 60) {
+                       /*
+                        * being idle for more than 1 minute, set avg to zero.
+                        * this prevents t from overflow.
+                        */
+                       avg = 0;
+               } else {
+                       t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
+                       n = t / rp->red_pkttime - 1;
+
+                       /* the following line does (avg = (1 - Wq)^n * avg) */
+                       if (n > 0)
+                               avg = (avg >> FP_SHIFT) *
+                                   pow_w(rp->red_wtab, n);
+               }
+       }
+
+       /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
+       avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
+       rp->red_avg = avg;              /* save the new value */
+
+       /*
+        * red_count keeps a tally of arriving traffic that has not
+        * been dropped.
+        */
+       rp->red_count++;
+
+       /* see if we drop early */
+       droptype = DTYPE_NODROP;
+       if (avg >= rp->red_thmin_s && qlen(q) > 1) {
+               if (avg >= rp->red_thmax_s) {
+                       /* avg >= th_max: forced drop */
+                       droptype = DTYPE_FORCED;
+               } else if (rp->red_old == 0) {
+                       /* first exceeds th_min */
+                       rp->red_count = 1;
+                       rp->red_old = 1;
+               } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
+                   rp->red_probd, rp->red_count)) {
+                       /* mark or drop by red */
+                       if ((rp->red_flags & REDF_ECN) &&
+                           (tag->pftag_flags & PF_TAG_TCP) &&  /* only TCP */
+                           mark_ecn(m, tag, rp->red_flags)) {
+                               /* successfully marked.  do not drop. */
+                               rp->red_count = 0;
+                               rp->red_stats.marked_packets++;
+                       } else {
+                               /* unforced drop by red */
+                               droptype = DTYPE_EARLY;
+                       }
+               }
+       } else {
+               /* avg < th_min */
+               rp->red_old = 0;
+       }
+
+       /*
+        * if the queue length hits the hard limit, it's a forced drop.
+        */
+       if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+               droptype = DTYPE_FORCED;
+
+#ifdef RED_RANDOM_DROP
+       /* if successful or forced drop, enqueue this packet. */
+       if (droptype != DTYPE_EARLY)
+               _addq(q, m);
+#else
+       /* if successful, enqueue this packet. */
+       if (droptype == DTYPE_NODROP)
+               _addq(q, m);
+#endif
+       if (droptype != DTYPE_NODROP) {
+               if (droptype == DTYPE_EARLY) {
+                       /* drop the incoming packet */
+                       rp->red_stats.drop_unforced++;
+               } else {
+                       /* forced drop, select a victim packet in the queue. */
+#ifdef RED_RANDOM_DROP
+                       m = _getq_random(q);
+#endif
+                       rp->red_stats.drop_forced++;
+               }
+               rp->red_count = 0;
+               IFCQ_CONVERT_LOCK(&rp->red_ifp->if_snd);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+       /* successfully queued */
+       return (CLASSQEQ_SUCCESS);
+}
+
+/*
+ * early-drop probability is calculated as follows:
+ *   prob = p_max * (avg - th_min) / (th_max - th_min)
+ *   prob_a = prob / (2 - count*prob)
+ *         = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
+ * here prob_a increases as successive undrop count increases.
+ * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
+ * becomes 1 when (count >= (2 / prob))).
+ */
+int
+drop_early(int fp_len, int fp_probd, int count)
+{
+       int     d;              /* denominator of drop-probability */
+
+       d = fp_probd - count * fp_len;
+       if (d <= 0)
+               /* count exceeds the hard limit: drop or mark */
+               return (1);
+
+       /*
+        * now the range of d is [1..600] in fixed-point. (when
+        * th_max-th_min=10 and p_max=1/30)
+        * drop probability = (avg - TH_MIN) / d
+        */
+
+       if ((random() % d) < (unsigned)fp_len) {
+               /* drop or mark */
+               return (1);
+       }
+       /* no drop/mark */
+       return (0);
+}
+
+static struct mbuf *
+red_getq_flow(struct red *rp, class_queue_t *q, u_int32_t flow, boolean_t purge)
+{
+#pragma unused(purge)
+       struct mbuf *m;
+
+       /* flow of 0 means head of queue */
+       if ((m = ((flow == 0) ? _getq(q) : _getq_flow(q, flow))) == NULL) {
+               if (rp->red_idle == 0) {
+                       rp->red_idle = 1;
+                       microuptime(&rp->red_last);
+               }
+               return (NULL);
+       }
+
+       rp->red_idle = 0;
+       return (m);
+}
+
+struct mbuf *
+red_getq(red_t *rp, class_queue_t *q)
+{
+       return (red_getq_flow(rp, q, 0, FALSE));
+}
+
+void
+red_purgeq(struct red *rp, class_queue_t *q, u_int32_t flow, u_int32_t *packets,
+    u_int32_t *bytes)
+{
+       u_int32_t cnt = 0, len = 0;
+       struct mbuf *m;
+
+       IFCQ_CONVERT_LOCK(&rp->red_ifp->if_snd);
+
+       while ((m = red_getq_flow(rp, q, flow, TRUE)) != NULL) {
+               cnt++;
+               len += m_pktlen(m);
+               m_freem(m);
+       }
+
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+void
+red_updateq(red_t *rp, cqev_t ev)
+{
+#pragma unused(rp, ev)
+       /* nothing for now */
+}
+
+int
+red_suspendq(red_t *rp, class_queue_t *q, boolean_t on)
+{
+#pragma unused(rp, q, on)
+       return (ENOTSUP);
+}
+
+/*
+ * helper routine to calibrate avg during idle.
+ * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
+ * here Wq = 1/weight and the code assumes Wq is close to zero.
+ *
+ * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
+ */
+static struct wtab *wtab_list = NULL;  /* pointer to wtab list */
+
+struct wtab *
+wtab_alloc(int weight)
+{
+       struct wtab     *w;
+       int              i;
+
+       for (w = wtab_list; w != NULL; w = w->w_next)
+               if (w->w_weight == weight) {
+                       w->w_refcount++;
+                       return (w);
+               }
+
+       w = _MALLOC(sizeof (struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
+       if (w == NULL)
+               return (NULL);
+
+       w->w_weight = weight;
+       w->w_refcount = 1;
+       w->w_next = wtab_list;
+       wtab_list = w;
+
+       /* initialize the weight table */
+       w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
+       for (i = 1; i < 32; i++) {
+               w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
+               if (w->w_tab[i] == 0 && w->w_param_max == 0)
+                       w->w_param_max = 1 << i;
+       }
+
+       return (w);
+}
+
+void
+wtab_destroy(struct wtab *w)
+{
+       struct wtab     *prev;
+
+       if (--w->w_refcount > 0)
+               return;
+
+       if (wtab_list == w)
+               wtab_list = w->w_next;
+       else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
+               if (prev->w_next == w) {
+                       prev->w_next = w->w_next;
+                       break;
+               }
+
+       _FREE(w, M_DEVBUF);
+}
+
+int32_t
+pow_w(struct wtab *w, int n)
+{
+       int     i, bit;
+       int32_t val;
+
+       if (n >= w->w_param_max)
+               return (0);
+
+       val = 1 << FP_SHIFT;
+       if (n <= 0)
+               return (val);
+
+       bit = 1;
+       i = 0;
+       while (n) {
+               if (n & bit) {
+                       val = (val * w->w_tab[i]) >> FP_SHIFT;
+                       n &= ~bit;
+               }
+               i++;
+               bit <<=  1;
+       }
+       return (val);
+}
+
+#endif /* CLASSQ_RED */
diff --git a/bsd/net/classq/classq_red.h b/bsd/net/classq/classq_red.h
new file mode 100644 (file)
index 0000000..58956b5
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_red.h,v 1.5 2006/10/12 19:59:08 peter Exp $       */
+/*     $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $   */
+
+/*
+ * Copyright (C) 1997-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_CLASSQ_CLASSQ_RED_H_
+#define        _NET_CLASSQ_CLASSQ_RED_H_
+
+#ifdef PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
+#include <net/classq/if_classq.h>
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * simpler versions of red parameters and statistics used by other
+ * disciplines (e.g., CBQ)
+ */
+struct redparams {
+       int th_min;             /* red min threshold */
+       int th_max;             /* red max threshold */
+       int inv_pmax;           /* inverse of max drop probability */
+};
+
+struct red_stats {
+       int32_t         q_avg;
+       u_int32_t       _pad;
+       u_int32_t       drop_forced;
+       u_int32_t       drop_unforced;
+       u_int32_t       marked_packets;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+/* weight table structure for idle time calibration */
+struct wtab {
+       struct wtab     *w_next;
+       int              w_weight;
+       int              w_param_max;
+       int              w_refcount;
+       int32_t          w_tab[32];
+};
+
+/* red flags */
+#define        REDF_ECN4       0x01    /* use packet marking for IPv4 packets */
+#define        REDF_ECN6       0x02    /* use packet marking for IPv6 packets */
+#define        REDF_ECN        (REDF_ECN4 | REDF_ECN6)
+#define        REDF_FLOWVALVE  0x04    /* use flowvalve (aka penalty-box) */
+
+#define        REDF_USERFLAGS                                                  \
+       (REDF_ECN4 | REDF_ECN6 | REDF_FLOWVALVE)
+
+typedef struct red {
+       int             red_pkttime;    /* average packet time in micro sec */
+                                       /*   used for idle calibration */
+       int             red_flags;      /* red flags */
+       struct ifnet    *red_ifp;       /* back pointer to ifnet */
+
+       /* red parameters */
+       int             red_weight;     /* weight for EWMA */
+       int             red_inv_pmax;   /* inverse of max drop probability */
+       int             red_thmin;      /* red min threshold */
+       int             red_thmax;      /* red max threshold */
+
+       /* variables for internal use */
+       int             red_wshift;     /* log(red_weight) */
+       int             red_thmin_s;    /* th_min scaled by avgshift */
+       int             red_thmax_s;    /* th_max scaled by avgshift */
+       int             red_probd;      /* drop probability denominator */
+
+       int             red_avg;        /* queue len avg scaled by avgshift */
+       int             red_count;      /* packet count since last dropped/ */
+                                       /*   marked packet */
+       int             red_idle;       /* queue was empty */
+       int             red_old;        /* avg is above th_min */
+       struct wtab     *red_wtab;      /* weight table */
+       struct timeval   red_last;      /* time when the queue becomes idle */
+
+       struct {
+               struct pktcntr  xmit_cnt;
+               struct pktcntr  drop_cnt;
+               u_int32_t       drop_forced;
+               u_int32_t       drop_unforced;
+               u_int32_t       marked_packets;
+       } red_stats;
+} red_t;
+
+/* red drop types */
+#define        DTYPE_NODROP    0       /* no drop */
+#define        DTYPE_FORCED    1       /* a "forced" drop */
+#define        DTYPE_EARLY     2       /* an "unforced" (early) drop */
+
+extern void red_init(void);
+extern red_t *red_alloc(struct ifnet *, int, int, int, int, int, int);
+extern void red_destroy(red_t *);
+extern void red_getstats(red_t *, struct red_stats *);
+extern int red_addq(red_t *, class_queue_t *, struct mbuf *, struct pf_mtag *);
+extern struct mbuf *red_getq(red_t *, class_queue_t *);
+extern void red_purgeq(struct red *, class_queue_t *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+extern void red_updateq(red_t *, cqev_t);
+extern int red_suspendq(red_t *, class_queue_t *, boolean_t);
+
+extern int drop_early(int, int, int);
+extern struct wtab *wtab_alloc(int);
+extern void wtab_destroy(struct wtab *);
+extern int32_t pow_w(struct wtab *, int);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_CLASSQ_CLASSQ_RED_H_ */
diff --git a/bsd/net/classq/classq_rio.c b/bsd/net/classq/classq_rio.c
new file mode 100644 (file)
index 0000000..20a44ee
--- /dev/null
@@ -0,0 +1,528 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_rio.c,v 1.11 2007/09/13 20:40:02 chl Exp $       */
+/*     $KAME: altq_rio.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $       */
+
+/*
+ * Copyright (C) 1998-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the Computer Systems
+ *     Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#if CLASSQ_RIO
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kauth.h>
+#include <sys/kauth.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ *   described in
+ *     "Explicit Allocation of Best Effort Packet Delivery Service"
+ *     David D. Clark and Wenjia Fang, MIT Lab for Computer Science
+ *     http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
+ *
+ * this implementation is extended to support more than 2 drop precedence
+ * values as described in RFC2597 (Assured Forwarding PHB Group).
+ *
+ */
+/*
+ * AF DS (differentiated service) codepoints.
+ * (classes can be mapped to CBQ or H-FSC classes.)
+ *
+ *      0   1   2   3   4   5   6   7
+ *    +---+---+---+---+---+---+---+---+
+ *    |   CLASS   |DropPre| 0 |  CU   |
+ *    +---+---+---+---+---+---+---+---+
+ *
+ *    class 1: 001
+ *    class 2: 010
+ *    class 3: 011
+ *    class 4: 100
+ *
+ *    low drop prec:    01
+ *    medium drop prec: 10
+ *    high drop prec:   11
+ */
+
+/* normal red parameters */
+#define        W_WEIGHT        512     /* inverse of weight of EWMA (511/512) */
+                               /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define        W_WEIGHT_1      128     /* inverse of weight of EWMA (127/128) */
+                               /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define        W_WEIGHT_2      64      /* inverse of weight of EWMA (63/64) */
+                               /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define        FP_SHIFT        12      /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define        INV_P_MAX       10      /* inverse of max drop probability */
+#define        TH_MIN           5      /* min threshold */
+#define        TH_MAX          15      /* max threshold */
+
+#define        RIO_LIMIT       60      /* default max queue lenght */
+
+/* default rio parameter values */
+static struct redparams default_rio_params[RIO_NDROPPREC] = {
+  /* th_min,            th_max,     inv_pmax */
+  { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
+  { TH_MAX + TH_MIN,    TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
+  { TH_MIN,             TH_MAX,     INV_P_MAX }  /* high drop precedence */
+};
+
+#define        RIO_ZONE_MAX    32              /* maximum elements in zone */
+#define        RIO_ZONE_NAME   "classq_rio"    /* zone name */
+
+static unsigned int rio_size;          /* size of zone element */
+static struct zone *rio_zone;          /* zone for rio */
+
+/* internal function prototypes */
+static struct mbuf *rio_getq_flow(struct rio *, class_queue_t *,
+    u_int32_t, boolean_t);
+static int dscp2index(u_int8_t);
+
+void
+rio_init(void)
+{
+       _CASSERT(RIOF_ECN4 == CLASSQF_ECN4);
+       _CASSERT(RIOF_ECN6 == CLASSQF_ECN6);
+
+       rio_size = sizeof (rio_t);
+       rio_zone = zinit(rio_size, RIO_ZONE_MAX * rio_size,
+           0, RIO_ZONE_NAME);
+       if (rio_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, RIO_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(rio_zone, Z_EXPAND, TRUE);
+       zone_change(rio_zone, Z_CALLERACCT, TRUE);
+}
+
+rio_t *
+rio_alloc(struct ifnet *ifp, int weight, struct redparams *params,
+    int flags, int pkttime)
+{
+       rio_t   *rp;
+       int      w, i;
+       int      npkts_per_sec;
+
+       VERIFY(ifp != NULL);
+
+       rp = zalloc(rio_zone);
+       if (rp == NULL)
+               return (NULL);
+
+       bzero(rp, rio_size);
+       rp->rio_flags = (flags & RIOF_USERFLAGS);
+       rp->rio_ifp = ifp;
+
+       if (pkttime == 0)
+               /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+               rp->rio_pkttime = 800;
+       else
+               rp->rio_pkttime = pkttime;
+
+       if (weight != 0)
+               rp->rio_weight = weight;
+       else {
+               /* use default */
+               rp->rio_weight = W_WEIGHT;
+
+               /* when the link is very slow, adjust red parameters */
+               npkts_per_sec = 1000000 / rp->rio_pkttime;
+               if (npkts_per_sec < 50) {
+                       /* up to about 400Kbps */
+                       rp->rio_weight = W_WEIGHT_2;
+               } else if (npkts_per_sec < 300) {
+                       /* up to about 2.4Mbps */
+                       rp->rio_weight = W_WEIGHT_1;
+               }
+       }
+
+       /* calculate wshift.  weight must be power of 2 */
+       w = rp->rio_weight;
+       for (i = 0; w > 1; i++)
+               w = w >> 1;
+       rp->rio_wshift = i;
+       w = 1 << rp->rio_wshift;
+       if (w != rp->rio_weight) {
+               printf("invalid weight value %d for red! use %d\n",
+                   rp->rio_weight, w);
+               rp->rio_weight = w;
+       }
+
+       /* allocate weight table */
+       rp->rio_wtab = wtab_alloc(rp->rio_weight);
+       if (rp->rio_wtab == NULL) {
+               rio_destroy(rp);
+               return (NULL);
+       }
+
+       for (i = 0; i < RIO_NDROPPREC; i++) {
+               struct dropprec_state *prec = &rp->rio_precstate[i];
+
+               prec->avg = 0;
+               prec->idle = 1;
+
+               if (params == NULL || params[i].inv_pmax == 0)
+                       prec->inv_pmax = default_rio_params[i].inv_pmax;
+               else
+                       prec->inv_pmax = params[i].inv_pmax;
+               if (params == NULL || params[i].th_min == 0)
+                       prec->th_min = default_rio_params[i].th_min;
+               else
+                       prec->th_min = params[i].th_min;
+               if (params == NULL || params[i].th_max == 0)
+                       prec->th_max = default_rio_params[i].th_max;
+               else
+                       prec->th_max = params[i].th_max;
+
+               /*
+                * th_min_s and th_max_s are scaled versions of th_min
+                * and th_max to be compared with avg.
+                */
+               prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
+               prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
+
+               /*
+                * precompute probability denominator
+                *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+                */
+               prec->probd = (2 * (prec->th_max - prec->th_min) *
+                   prec->inv_pmax) << FP_SHIFT;
+
+               microuptime(&prec->last);
+       }
+
+       return (rp);
+}
+
+void
+rio_destroy(rio_t *rp)
+{
+       if (rp->rio_wtab != NULL) {
+               wtab_destroy(rp->rio_wtab);
+               rp->rio_wtab = NULL;
+       }
+       zfree(rio_zone, rp);
+}
+
+void
+rio_getstats(rio_t *rp, struct red_stats *sp)
+{
+       int     i;
+
+       for (i = 0; i < RIO_NDROPPREC; i++) {
+               bcopy(&rp->q_stats[i], sp, sizeof (struct red_stats));
+               sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
+               sp++;
+       }
+}
+
+#if (RIO_NDROPPREC == 3)
+/*
+ * internally, a drop precedence value is converted to an index
+ * starting from 0.
+ */
+static int
+dscp2index(u_int8_t dscp)
+{
+#define        AF_DROPPRECMASK 0x18
+
+       int     dpindex = dscp & AF_DROPPRECMASK;
+
+       if (dpindex == 0)
+               return (0);
+       return ((dpindex >> 3) - 1);
+}
+#endif
+
+#define        RIOM_SET_PRECINDEX(t, idx) do {                 \
+       (t)->pftag_qpriv32 = (idx);                     \
+} while (0)
+
+#define        RIOM_GET_PRECINDEX(t)                           \
+       ({ u_int32_t idx; idx = (t)->pftag_qpriv32;     \
+       RIOM_SET_PRECINDEX(t, 0); idx; })
+
+int
+rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag)
+{
+#define        DSCP_MASK       0xfc
+       int                      avg, droptype;
+       u_int8_t                 dsfield, odsfield;
+       int                      dpindex, i, n, t;
+       struct timeval           now;
+       struct dropprec_state   *prec;
+
+       dsfield = odsfield = read_dsfield(m, tag);
+       dpindex = dscp2index(dsfield);
+
+       /*
+        * update avg of the precedence states whose drop precedence
+        * is larger than or equal to the drop precedence of the packet
+        */
+       now.tv_sec = 0;
+       for (i = dpindex; i < RIO_NDROPPREC; i++) {
+               prec = &rp->rio_precstate[i];
+               avg = prec->avg;
+               if (prec->idle) {
+                       prec->idle = 0;
+                       if (now.tv_sec == 0)
+                               microuptime(&now);
+                       t = (now.tv_sec - prec->last.tv_sec);
+                       if (t > 60)
+                               avg = 0;
+                       else {
+                               t = t * 1000000 +
+                                   (now.tv_usec - prec->last.tv_usec);
+                               n = t / rp->rio_pkttime;
+                               /* calculate (avg = (1 - Wq)^n * avg) */
+                               if (n > 0) {
+                                       avg = (avg >> FP_SHIFT) *
+                                           pow_w(rp->rio_wtab, n);
+                               }
+                       }
+               }
+
+               /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
+               avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
+               prec->avg = avg;                /* save the new value */
+               /*
+                * count keeps a tally of arriving traffic that has not
+                * been dropped.
+                */
+               prec->count++;
+       }
+
+       prec = &rp->rio_precstate[dpindex];
+       avg = prec->avg;
+
+       /* see if we drop early */
+       droptype = DTYPE_NODROP;
+       if (avg >= prec->th_min_s && prec->qlen > 1) {
+               if (avg >= prec->th_max_s) {
+                       /* avg >= th_max: forced drop */
+                       droptype = DTYPE_FORCED;
+               } else if (prec->old == 0) {
+                       /* first exceeds th_min */
+                       prec->count = 1;
+                       prec->old = 1;
+               } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
+                   prec->probd, prec->count)) {
+                       /* unforced drop by red */
+                       droptype = DTYPE_EARLY;
+               }
+       } else {
+               /* avg < th_min */
+               prec->old = 0;
+       }
+
+       /*
+        * if the queue length hits the hard limit, it's a forced drop.
+        */
+       if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+               droptype = DTYPE_FORCED;
+
+       if (droptype != DTYPE_NODROP) {
+               /* always drop incoming packet (as opposed to randomdrop) */
+               for (i = dpindex; i < RIO_NDROPPREC; i++)
+                       rp->rio_precstate[i].count = 0;
+
+               if (droptype == DTYPE_EARLY)
+                       rp->q_stats[dpindex].drop_unforced++;
+               else
+                       rp->q_stats[dpindex].drop_forced++;
+
+               IFCQ_CONVERT_LOCK(&rp->rio_ifp->if_snd);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+
+       for (i = dpindex; i < RIO_NDROPPREC; i++)
+               rp->rio_precstate[i].qlen++;
+
+       /* save drop precedence index in mbuf hdr */
+       RIOM_SET_PRECINDEX(tag, dpindex);
+
+       if (rp->rio_flags & RIOF_CLEARDSCP)
+               dsfield &= ~DSCP_MASK;
+
+       if (dsfield != odsfield)
+               write_dsfield(m, tag, dsfield);
+
+       _addq(q, m);
+
+       return (CLASSQEQ_SUCCESS);
+}
+
+static struct mbuf *
+rio_getq_flow(struct rio *rp, class_queue_t *q, u_int32_t flow, boolean_t purge)
+{
+#pragma unused(purge)
+       struct mbuf *m;
+       int dpindex, i;
+
+       /* flow of 0 means head of queue */
+       if ((m = ((flow == 0) ? _getq(q) : _getq_flow(q, flow))) == NULL)
+               return (NULL);
+
+       VERIFY(m->m_flags & M_PKTHDR);
+
+       dpindex = RIOM_GET_PRECINDEX(m_pftag(m));
+       for (i = dpindex; i < RIO_NDROPPREC; i++) {
+               if (--rp->rio_precstate[i].qlen == 0) {
+                       if (rp->rio_precstate[i].idle == 0) {
+                               rp->rio_precstate[i].idle = 1;
+                               microuptime(&rp->rio_precstate[i].last);
+                       }
+               }
+       }
+       return (m);
+}
+
+struct mbuf *
+rio_getq(rio_t *rp, class_queue_t *q)
+{
+       return (rio_getq_flow(rp, q, 0, FALSE));
+}
+
+void
+rio_purgeq(struct rio *rp, class_queue_t *q, u_int32_t flow, u_int32_t *packets,
+    u_int32_t *bytes)
+{
+       u_int32_t cnt = 0, len = 0;
+       struct mbuf *m;
+
+       IFCQ_CONVERT_LOCK(&rp->rio_ifp->if_snd);
+
+       while ((m = rio_getq_flow(rp, q, flow, TRUE)) != NULL) {
+               cnt++;
+               len += m_pktlen(m);
+               m_freem(m);
+       }
+
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+void
+rio_updateq(rio_t *rp, cqev_t ev)
+{
+#pragma unused(rp, ev)
+       /* nothing for now */
+}
+
+int
+rio_suspendq(rio_t *rp, class_queue_t *q, boolean_t on)
+{
+#pragma unused(rp, q, on)
+       return (ENOTSUP);
+}
+#endif /* CLASSQ_RIO */
diff --git a/bsd/net/classq/classq_rio.h b/bsd/net/classq/classq_rio.h
new file mode 100644 (file)
index 0000000..fb3c241
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_rio.h,v 1.5 2006/10/12 19:59:08 peter Exp $       */
+/*     $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $   */
+
+/*
+ * Copyright (C) 1998-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_CLASSQ_CLASSQ_RIO_H_
+#define        _NET_CLASSQ_CLASSQ_RIO_H_
+
+#ifdef PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
+#include <net/classq/if_classq.h>
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * RIO: RED with IN/OUT bit
+ * (extended to support more than 2 drop precedence values)
+ */
+#define        RIO_NDROPPREC   3       /* number of drop precedence values */
+
+#ifdef BSD_KERNEL_PRIVATE
+/* rio flags */
+#define        RIOF_ECN4       0x01    /* use packet marking for IPv4 packets */
+#define        RIOF_ECN6       0x02    /* use packet marking for IPv6 packets */
+#define        RIOF_ECN        (RIOF_ECN4 | RIOF_ECN6)
+#define        RIOF_CLEARDSCP  0x200   /* clear diffserv codepoint */
+
+#define        RIOF_USERFLAGS                                                  \
+       (RIOF_ECN4 | RIOF_ECN6 | RIOF_CLEARDSCP)
+
+typedef struct rio {
+       /* per drop precedence structure */
+       struct dropprec_state {
+               /* red parameters */
+               int     inv_pmax;       /* inverse of max drop probability */
+               int     th_min;         /* red min threshold */
+               int     th_max;         /* red max threshold */
+
+               /* variables for internal use */
+               int     th_min_s;       /* th_min scaled by avgshift */
+               int     th_max_s;       /* th_max scaled by avgshift */
+               int     probd;          /* drop probability denominator */
+
+               int     qlen;           /* queue length */
+               int     avg;            /* (scaled) queue length average */
+               int     count;          /* packet count since the last */
+                                       /*   dropped/marked packet */
+               int     idle;           /* queue was empty */
+               int     old;            /* avg is above th_min */
+               struct timeval  last;   /* timestamp when queue becomes idle */
+       } rio_precstate[RIO_NDROPPREC];
+
+       int              rio_wshift;    /* log(red_weight) */
+       int              rio_weight;    /* weight for EWMA */
+       struct wtab     *rio_wtab;      /* weight table */
+
+       int              rio_pkttime;   /* average packet time in micro sec */
+                                       /*   used for idle calibration */
+       int              rio_flags;     /* rio flags */
+       struct ifnet    *rio_ifp;       /* back pointer to ifnet */
+
+       u_int8_t         rio_codepoint; /* codepoint value to tag packets */
+       u_int8_t         rio_codepointmask;     /* codepoint mask bits */
+
+       struct red_stats q_stats[RIO_NDROPPREC];        /* statistics */
+} rio_t;
+
+extern void rio_init(void);
+extern rio_t *rio_alloc(struct ifnet *, int, struct redparams *, int, int);
+extern void rio_destroy(rio_t *);
+extern void rio_getstats(rio_t *, struct red_stats *);
+extern int rio_addq(rio_t *, class_queue_t *, struct mbuf *, struct pf_mtag *);
+extern struct mbuf *rio_getq(rio_t *, class_queue_t *);
+extern void rio_purgeq(struct rio *, class_queue_t *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+extern void rio_updateq(rio_t *, cqev_t);
+extern int rio_suspendq(rio_t *, class_queue_t *, boolean_t);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_CLASSQ_CLASSQ_RIO_H_ */
diff --git a/bsd/net/classq/classq_sfb.c b/bsd/net/classq/classq_sfb.c
new file mode 100644 (file)
index 0000000..c0f575a
--- /dev/null
@@ -0,0 +1,1184 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/kauth.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_types.h>
+#include <net/dlil.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/classq/classq_sfb.h>
+#include <net/flowhash.h>
+#include <net/net_osdep.h>
+
+/*
+ * Stochastic Fair Blue
+ *
+ * Wu-chang Feng, Dilip D. Kandlur, Debanjan Saha, Kang G. Shin
+ * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
+ *
+ * Based on the NS code with the following parameters:
+ *
+ *   bytes:    false
+ *   decrement:        0.001
+ *   increment:        0.005
+ *   hold-time:        10ms-50ms (randomized)
+ *   algorithm:        0
+ *   pbox:     1
+ *   pbox-time:        50-100ms (randomized)
+ *   hinterval:        11-23 (randomized)
+ *
+ * This implementation uses L = 2 and N = 32 for 2 sets of:
+ *
+ *     B[L][N]: L x N array of bins (L levels, N bins per level)
+ *
+ * Each set effectively creates 32^2 virtual buckets (bin combinations)
+ * while using only O(32*2) states.
+ *
+ * Given a 32-bit hash value, we divide it such that octets [0,1,2,3] are
+ * used as index for the bins across the 2 levels, where level 1 uses [0,2]
+ * and level 2 uses [1,3].  The 2 values per level correspond to the indices
+ * for the current and warm-up sets (section 4.4. in the SFB paper regarding
+ * Moving Hash Functions explains the purposes of these 2 sets.)
+ */
+
+/*
+ * Use Murmur3A_x86_32 for hash function.  It seems to perform consistently
+ * across platforms for 1-word key (32-bit flowhash value).  See flowhash.h
+ * for other alternatives.  We only need 16-bit hash output.
+ */
+#define        SFB_HASH        net_flowhash_mh3_x86_32
+#define        SFB_HASHMASK    HASHMASK(16)
+
+#define        SFB_BINMASK(_x) \
+       ((_x) & HASHMASK(SFB_BINS_SHIFT))
+
+#define        SFB_BINST(_sp, _l, _n, _c) \
+       (&(*(_sp)->sfb_bins)[_c].stats[_l][_n])
+
+#define        SFB_BINFT(_sp, _l, _n, _c) \
+       (&(*(_sp)->sfb_bins)[_c].freezetime[_l][_n])
+
+#define        SFB_FC_LIST(_sp, _n) \
+       (&(*(_sp)->sfb_fc_lists)[_n])
+
+/*
+ * The holdtime parameter determines the minimum time interval between
+ * two successive updates of the marking probability.  In the event the
+ * uplink speed is not known, a default value is chosen and is randomized
+ * to be within the following range.
+ */
+#define        HOLDTIME_BASE   (100ULL * 1000 * 1000)  /* 100ms */
+#define        HOLDTIME_MIN    (10ULL * 1000 * 1000)   /* 10ms */
+#define        HOLDTIME_MAX    (100ULL * 1000 * 1000)  /* 100ms */
+
+/*
+ * The pboxtime parameter determines the bandwidth allocated for rogue
+ * flows, i.e. the rate limiting bandwidth.  In the event the uplink speed
+ * is not known, a default value is chosen and is randomized to be within
+ * the following range.
+ */
+#define        PBOXTIME_BASE   (300ULL * 1000 * 1000)  /* 300ms */
+#define        PBOXTIME_MIN    (30ULL * 1000 * 1000)   /* 30ms */
+#define        PBOXTIME_MAX    (300ULL * 1000 * 1000)  /* 300ms */
+
+#define        SFB_RANDOM(sp, tmin, tmax)      ((sfb_random(sp) % (tmax)) + (tmin))
+
+#define        SFB_PKT_PBOX PF_TAG_QUEUE1      /* in penalty box */
+
+/* The following mantissa values are in SFB_FP_SHIFT Q format */
+#define        SFB_MAX_PMARK   (1 << SFB_FP_SHIFT) /* Q14 representation of 1.00 */
+
+/*
+ * These are d1 (increment) and d2 (decrement) parameters, used to determine
+ * the amount by which the marking probability is incremented when the queue
+ * overflows, or is decremented when the link is idle.  d1 is set higher than
+ * d2, because link underutilization can occur when congestion management is
+ * either too conservative or too aggressive, but packet loss occurs only
+ * when congestion management is too conservative.  By weighing heavily
+ * against packet loss, it can quickly reach to a substantial increase in
+ * traffic load.
+ */
+#define        SFB_INCREMENT   82              /* Q14 representation of 0.005 */
+#define        SFB_DECREMENT   16              /* Q14 representation of 0.001 */
+
+#define        SFB_PMARK_TH    16056           /* Q14 representation of 0.98 */
+#define        SFB_PMARK_WARM  3276            /* Q14 representation of 0.2 */
+
+#define        SFB_PMARK_INC(_bin) do {                                        \
+       (_bin)->pmark += sfb_increment;                                 \
+       if ((_bin)->pmark > SFB_MAX_PMARK)                              \
+               (_bin)->pmark = SFB_MAX_PMARK;                          \
+} while (0)
+
+#define        SFB_PMARK_DEC(_bin) do {                                        \
+       if ((_bin)->pmark > 0) {                                        \
+               (_bin)->pmark -= sfb_decrement;                         \
+               if ((_bin)->pmark < 0)                                  \
+                       (_bin)->pmark = 0;                              \
+       }                                                               \
+} while (0)
+
+#define        HINTERVAL_MIN   (10)    /* 10 seconds */
+#define        HINTERVAL_MAX   (20)    /* 20 seconds */
+#define        SFB_HINTERVAL(sp) ((sfb_random(sp) % HINTERVAL_MAX) + HINTERVAL_MIN)
+
+#define        DEQUEUE_DECAY   7               /* ilog2 of EWMA decay rate, (128) */
+#define        DEQUEUE_SPIKE(_new, _old)       \
+       ((u_int64_t)ABS((int64_t)(_new) - (int64_t)(_old)) > ((_old) << 11))
+
+#define        ABS(v)  (((v) > 0) ? (v) : -(v))
+
+#define        SFB_ZONE_MAX    32              /* maximum elements in zone */
+#define        SFB_ZONE_NAME   "classq_sfb"    /* zone name */
+
+/* Place the flow control entries in current bin on level 0 */
+#define        SFB_FC_LEVEL    0
+
+static unsigned int sfb_size;          /* size of zone element */
+static struct zone *sfb_zone;          /* zone for sfb */
+
+/* internal function prototypes */
+static u_int32_t sfb_random(struct sfb *);
+static struct mbuf *sfb_getq_flow(struct sfb *, class_queue_t *, u_int32_t,
+    boolean_t);
+static void sfb_resetq(struct sfb *, cqev_t);
+static void sfb_calc_holdtime(struct sfb *, u_int64_t);
+static void sfb_calc_pboxtime(struct sfb *, u_int64_t);
+static void sfb_calc_hinterval(struct sfb *, u_int64_t *);
+static void sfb_swap_bins(struct sfb *, u_int32_t);
+static inline int sfb_pcheck(struct sfb *, struct pf_mtag *);
+static int sfb_penalize(struct sfb *, struct pf_mtag *, struct timespec *);
+static void sfb_adjust_bin(struct sfb *, struct sfbbinstats *,
+    struct timespec *, struct timespec *, boolean_t);
+static void sfb_decrement_bin(struct sfb *, struct sfbbinstats *,
+    struct timespec *, struct timespec *);
+static void sfb_increment_bin(struct sfb *, struct sfbbinstats *,
+    struct timespec *, struct timespec *);
+static inline void sfb_dq_update_bins(struct sfb *, struct pf_mtag *,
+    struct timespec *);
+static inline void sfb_eq_update_bins(struct sfb *, struct pf_mtag *);
+static int sfb_drop_early(struct sfb *, struct pf_mtag *, u_int16_t *,
+    struct timespec *);
+static boolean_t sfb_bin_addfcentry(struct sfb *, struct pf_mtag *);
+static void sfb_fclist_append(struct sfb *, struct sfb_fc_list *);
+static void sfb_fclists_clean(struct sfb *sp);
+
+SYSCTL_NODE(_net_classq, OID_AUTO, sfb, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "SFB");
+
+static u_int64_t sfb_holdtime = 0;     /* 0 indicates "automatic" */
+SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, holdtime, CTLFLAG_RW|CTLFLAG_LOCKED,
+    &sfb_holdtime, "SFB freeze time in nanoseconds");
+
+static u_int64_t sfb_pboxtime = 0;     /* 0 indicates "automatic" */
+SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, pboxtime, CTLFLAG_RW|CTLFLAG_LOCKED,
+    &sfb_pboxtime, "SFB penalty box time in nanoseconds");
+
+static u_int64_t sfb_hinterval;
+SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, hinterval, CTLFLAG_RW|CTLFLAG_LOCKED,
+    &sfb_hinterval, "SFB hash interval in nanoseconds");
+
+static u_int32_t sfb_increment = SFB_INCREMENT;
+SYSCTL_UINT(_net_classq_sfb, OID_AUTO, increment, CTLFLAG_RW|CTLFLAG_LOCKED,
+    &sfb_increment, SFB_INCREMENT, "SFB increment [d1]");
+
+static u_int32_t sfb_decrement = SFB_DECREMENT;
+SYSCTL_UINT(_net_classq_sfb, OID_AUTO, decrement, CTLFLAG_RW|CTLFLAG_LOCKED,
+    &sfb_decrement, SFB_DECREMENT, "SFB decrement [d2]");
+
+static u_int32_t sfb_allocation = 0;   /* 0 means "automatic" */
+SYSCTL_UINT(_net_classq_sfb, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED,
+    &sfb_allocation, 0, "SFB bin allocation");
+
+static u_int32_t sfb_ratelimit = 0;
+SYSCTL_UINT(_net_classq_sfb, OID_AUTO, ratelimit, CTLFLAG_RW|CTLFLAG_LOCKED,
+       &sfb_ratelimit, 0, "SFB rate limit");
+
+#define        MBPS    (1ULL * 1000 * 1000)
+#define        GBPS    (MBPS * 1000)
+
+struct sfb_time_tbl {
+       u_int64_t       speed;          /* uplink speed */
+       u_int64_t       holdtime;       /* hold time */
+       u_int64_t       pboxtime;       /* penalty box time */
+};
+
+static struct sfb_time_tbl sfb_ttbl[] = {
+       {   1 * MBPS,   HOLDTIME_BASE * 1000,   PBOXTIME_BASE * 1000    },
+       {  10 * MBPS,   HOLDTIME_BASE * 100,    PBOXTIME_BASE * 100     },
+       { 100 * MBPS,   HOLDTIME_BASE * 10,     PBOXTIME_BASE * 10      },
+       {   1 * GBPS,   HOLDTIME_BASE,          PBOXTIME_BASE           },
+       {  10 * GBPS,   HOLDTIME_BASE / 10,     PBOXTIME_BASE / 10      },
+       { 100 * GBPS,   HOLDTIME_BASE / 100,    PBOXTIME_BASE / 100     },
+       { 0, 0, 0 }
+};
+
+void
+sfb_init(void)
+{
+       _CASSERT(SFBF_ECN4 == CLASSQF_ECN4);
+       _CASSERT(SFBF_ECN6 == CLASSQF_ECN6);
+
+       sfb_size = sizeof (struct sfb);
+       sfb_zone = zinit(sfb_size, SFB_ZONE_MAX * sfb_size,
+           0, SFB_ZONE_NAME);
+       if (sfb_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, SFB_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(sfb_zone, Z_EXPAND, TRUE);
+       zone_change(sfb_zone, Z_CALLERACCT, TRUE);
+}
+
+static u_int32_t
+sfb_random(struct sfb *sp)
+{
+       IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
+       return (random());
+}
+
+static void
+sfb_calc_holdtime(struct sfb *sp, u_int64_t outbw)
+{
+       u_int64_t holdtime;
+
+       if (sfb_holdtime != 0) {
+               holdtime = sfb_holdtime;
+       } else if (outbw == 0) {
+               holdtime = SFB_RANDOM(sp, HOLDTIME_MIN, HOLDTIME_MAX);
+       } else {
+               unsigned int n, i;
+
+               n = sfb_ttbl[0].holdtime;
+               for (i = 0; sfb_ttbl[i].speed != 0; i++) {
+                       if (outbw < sfb_ttbl[i].speed)
+                               break;
+                       n = sfb_ttbl[i].holdtime;
+               }
+               holdtime = n;
+       }
+       net_nsectimer(&holdtime, &sp->sfb_holdtime);
+}
+
+static void
+sfb_calc_pboxtime(struct sfb *sp, u_int64_t outbw)
+{
+       u_int64_t pboxtime;
+
+       if (sfb_pboxtime != 0) {
+               pboxtime = sfb_pboxtime;
+       } else if (outbw == 0) {
+               pboxtime = SFB_RANDOM(sp, PBOXTIME_MIN, PBOXTIME_MAX);
+       } else {
+               unsigned int n, i;
+
+               n = sfb_ttbl[0].pboxtime;
+               for (i = 0; sfb_ttbl[i].speed != 0; i++) {
+                       if (outbw < sfb_ttbl[i].speed)
+                               break;
+                       n = sfb_ttbl[i].pboxtime;
+               }
+               pboxtime = n;
+       }
+       net_nsectimer(&pboxtime, &sp->sfb_pboxtime);
+       net_timerclear(&sp->sfb_pboxfreeze);
+}
+
+static void
+sfb_calc_hinterval(struct sfb *sp, u_int64_t *t)
+{
+       u_int64_t hinterval;
+       struct timespec now;
+
+       if (t != NULL) {
+               /*
+                * TODO adi@apple.com: use dq_avg to derive hinterval.
+                */
+               hinterval = *t;
+       }
+
+       if (sfb_hinterval != 0)
+               hinterval = sfb_hinterval;
+       else if (t == NULL || hinterval == 0)
+               hinterval = ((u_int64_t)SFB_HINTERVAL(sp) * NSEC_PER_SEC);
+
+       net_nsectimer(&hinterval, &sp->sfb_hinterval);
+
+       nanouptime(&now);
+       net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset);
+}
+
+/*
+ * sfb support routines
+ */
+struct sfb *
+sfb_alloc(struct ifnet *ifp, u_int32_t qid, u_int32_t qlim, u_int32_t flags)
+{
+       struct sfb *sp;
+
+       VERIFY(ifp != NULL && qlim > 0);
+
+       sp = zalloc(sfb_zone);
+       if (sp == NULL) {
+               log(LOG_ERR, "%s: SFB unable to allocate\n", if_name(ifp));
+               return (NULL);
+       }
+
+       bzero(sp, sfb_size);
+       if ((sp->sfb_bins = _MALLOC(sizeof (*sp->sfb_bins), M_DEVBUF,
+           M_WAITOK|M_ZERO)) == NULL) {
+               log(LOG_ERR, "%s: SFB unable to allocate bins\n", if_name(ifp));
+               sfb_destroy(sp);
+               return (NULL);
+       }
+
+       if ((sp->sfb_fc_lists = _MALLOC(sizeof (*sp->sfb_fc_lists), M_DEVBUF,
+           M_WAITOK|M_ZERO)) == NULL) {
+               log(LOG_ERR, "%s: SFB unable to allocate flow control lists\n",
+                   if_name(ifp));
+               sfb_destroy(sp);
+               return(NULL);
+       }
+
+       sp->sfb_flags = (flags & SFBF_USERFLAGS);
+       sp->sfb_ifp = ifp;
+       sp->sfb_qlim = qlim;
+       sp->sfb_qid = qid;
+
+       sfb_resetq(sp, -1);
+
+       return (sp);
+}
+
+static void
+sfb_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl)
+{
+       IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
+       ifnet_fclist_append(sp, fcl);
+}
+
+static void
+sfb_fclists_clean(struct sfb *sp)
+{
+       int i;
+
+       /* Move all the flow control entries to the ifnet list */
+       for (i = 0; i < SFB_BINS; ++i) {
+               struct sfb_fc_list *fcl = SFB_FC_LIST(sp, i);
+               if (!SLIST_EMPTY(fcl))
+                       sfb_fclist_append(sp, fcl);
+       }
+}
+
+void
+sfb_destroy(struct sfb *sp)
+{
+       sfb_fclists_clean(sp);
+       if (sp->sfb_bins != NULL) {
+               _FREE(sp->sfb_bins, M_DEVBUF);
+               sp->sfb_bins = NULL;
+       }
+       if (sp->sfb_fc_lists != NULL) {
+               _FREE(sp->sfb_fc_lists, M_DEVBUF);
+               sp->sfb_fc_lists = NULL;
+       }
+       zfree(sfb_zone, sp);
+}
+
+static void
+sfb_resetq(struct sfb *sp, cqev_t ev)
+{
+       struct ifnet *ifp = sp->sfb_ifp;
+       u_int64_t eff_rate;
+
+       VERIFY(ifp != NULL);
+
+       if (ev != CLASSQ_EV_LINK_DOWN) {
+               (*sp->sfb_bins)[0].fudge = sfb_random(sp);
+               (*sp->sfb_bins)[1].fudge = sfb_random(sp);
+               sp->sfb_allocation = ((sfb_allocation == 0) ?
+                   (sp->sfb_qlim / 3) : sfb_allocation);
+               sp->sfb_drop_thresh = sp->sfb_allocation +
+                   (sp->sfb_allocation >> 1);
+       }
+
+       sp->sfb_clearpkts = 0;
+       sp->sfb_current = 0;
+
+       eff_rate = ifnet_output_linkrate(ifp);
+       sp->sfb_eff_rate = eff_rate;
+
+       sfb_calc_holdtime(sp, eff_rate);
+       sfb_calc_pboxtime(sp, eff_rate);
+       sfb_calc_hinterval(sp, NULL);
+
+       if (ev == CLASSQ_EV_LINK_DOWN ||
+               ev == CLASSQ_EV_LINK_UP)
+               sfb_fclists_clean(sp);
+
+       bzero(sp->sfb_bins, sizeof (*sp->sfb_bins));
+       bzero(&sp->sfb_stats, sizeof (sp->sfb_stats));
+
+       if (ev == CLASSQ_EV_LINK_DOWN || !classq_verbose)
+               return;
+
+       log(LOG_DEBUG, "%s: SFB qid=%d, holdtime=%llu nsec, "
+           "pboxtime=%llu nsec, allocation=%d, drop_thresh=%d, "
+           "hinterval=%d sec, sfb_bins=%d bytes, eff_rate=%llu bps\n",
+           if_name(ifp), sp->sfb_qid, (u_int64_t)sp->sfb_holdtime.tv_nsec,
+           (u_int64_t)sp->sfb_pboxtime.tv_nsec,
+           (u_int32_t)sp->sfb_allocation, (u_int32_t)sp->sfb_drop_thresh,
+           (int)sp->sfb_hinterval.tv_sec, (int)sizeof (*sp->sfb_bins),
+           eff_rate);
+}
+
+void
+sfb_getstats(struct sfb *sp, struct sfb_stats *sps)
+{
+       sps->allocation = sp->sfb_allocation;
+       sps->dropthresh = sp->sfb_drop_thresh;
+       sps->clearpkts = sp->sfb_clearpkts;
+       sps->current = sp->sfb_current;
+
+       net_timernsec(&sp->sfb_holdtime, &sp->sfb_stats.hold_time);
+       net_timernsec(&sp->sfb_pboxtime, &sp->sfb_stats.pbox_time);
+       net_timernsec(&sp->sfb_hinterval, &sp->sfb_stats.rehash_intval);
+       *(&(sps->sfbstats)) = *(&(sp->sfb_stats));
+
+       _CASSERT(sizeof ((*sp->sfb_bins)[0].stats) ==
+           sizeof (sps->binstats[0].stats));
+
+       bcopy(&(*sp->sfb_bins)[0].stats, &sps->binstats[0].stats,
+           sizeof (sps->binstats[0].stats));
+       bcopy(&(*sp->sfb_bins)[1].stats, &sps->binstats[1].stats,
+           sizeof (sps->binstats[1].stats));
+}
+
+static void
+sfb_swap_bins(struct sfb *sp, u_int32_t len)
+{
+       int i, j, s;
+
+       if (sp->sfb_flags & SFBF_SUSPENDED)
+               return;
+
+       s = sp->sfb_current;
+       VERIFY((s + (s ^ 1)) == 1);
+
+       (*sp->sfb_bins)[s].fudge = sfb_random(sp); /* recompute perturbation */
+       sp->sfb_clearpkts = len;
+       sp->sfb_stats.num_rehash++;
+
+       s = (sp->sfb_current ^= 1);     /* flip the bit (swap current) */
+
+       if (classq_verbose) {
+               log(LOG_DEBUG, "%s: SFB qid=%d, set %d is now current, "
+                   "qlen=%d\n", if_name(sp->sfb_ifp), sp->sfb_qid, s, len);
+       }
+
+       /* clear freezetime for all current bins */
+       bzero(&(*sp->sfb_bins)[s].freezetime,
+           sizeof ((*sp->sfb_bins)[s].freezetime));
+
+       /* clear/adjust bin statistics and flow control lists */
+       for (i = 0; i < SFB_BINS; i++) {
+               struct sfb_fc_list *fcl = SFB_FC_LIST(sp, i);
+
+               if (!SLIST_EMPTY(fcl))
+                       sfb_fclist_append(sp, fcl);
+
+               for (j = 0; j < SFB_LEVELS; j++) {
+                       struct sfbbinstats *cbin, *wbin;
+
+                       cbin = SFB_BINST(sp, j, i, s);          /* current */
+                       wbin = SFB_BINST(sp, j, i, s ^ 1);      /* warm-up */
+
+                       cbin->pkts = 0;
+                       if (cbin->pmark > SFB_MAX_PMARK)
+                               cbin->pmark = SFB_MAX_PMARK;
+                       if (cbin->pmark < 0)
+                               cbin->pmark = 0;
+
+                       /*
+                        * Keep pmark from before to identify
+                        * non-responsives immediately.
+                        */
+                       if (wbin->pmark > SFB_PMARK_WARM)
+                               wbin->pmark = SFB_PMARK_WARM;
+               }
+       }
+}
+
+static inline int
+sfb_pcheck(struct sfb *sp, struct pf_mtag *t)
+{
+#if SFB_LEVELS != 2
+       int i, n;
+#endif /* SFB_LEVELS != 2 */
+       int s;
+
+       s = sp->sfb_current;
+       VERIFY((s + (s ^ 1)) == 1);
+
+       /*
+        * For current bins, returns 1 if all pmark >= SFB_PMARK_TH,
+        * 0 otherwise; optimize for SFB_LEVELS=2.
+        */
+#if SFB_LEVELS == 2
+       /*
+        * Level 0: bin index at [0] for set 0; [2] for set 1
+        * Level 1: bin index at [1] for set 0; [3] for set 1
+        */
+       if (SFB_BINST(sp, 0, SFB_BINMASK(t->pftag_qpriv8[(s << 1)]),
+           s)->pmark < SFB_PMARK_TH ||
+           SFB_BINST(sp, 1, SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]),
+           s)->pmark < SFB_PMARK_TH)
+               return (0);
+#else /* SFB_LEVELS != 2 */
+       for (i = 0; i < SFB_LEVELS; i++) {
+               if (s == 0)             /* set 0, bin index [0,1] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i]);
+               else                    /* set 1, bin index [2,3] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i + 2]);
+
+               if (SFB_BINST(sp, i, n, s)->pmark < SFB_PMARK_TH)
+                       return (0);
+       }
+#endif /* SFB_LEVELS != 2 */
+       return (1);
+}
+
+static int
+sfb_penalize(struct sfb *sp, struct pf_mtag *t, struct timespec *now)
+{
+       struct timespec delta = { 0, 0 };
+
+       /* If minimum pmark of current bins is < SFB_PMARK_TH, we're done */
+       if (!sfb_ratelimit || !sfb_pcheck(sp, t))
+               return (0);
+
+       net_timersub(now, &sp->sfb_pboxfreeze, &delta);
+       if (net_timercmp(&delta, &sp->sfb_pboxtime, <)) {
+#if SFB_LEVELS != 2
+               int i;
+#endif /* SFB_LEVELS != 2 */
+               struct sfbbinstats *bin;
+               int n, w;
+
+               w = sp->sfb_current ^ 1;
+               VERIFY((w + (w ^ 1)) == 1);
+
+               /*
+                * Update warm-up bins; optimize for SFB_LEVELS=2
+                */
+#if SFB_LEVELS == 2
+               /* Level 0: bin index at [0] for set 0; [2] for set 1 */
+               n = SFB_BINMASK(t->pftag_qpriv8[(w << 1)]);
+               bin = SFB_BINST(sp, 0, n, w);
+               if (bin->pkts >= sp->sfb_allocation)
+                       sfb_increment_bin(sp, bin, SFB_BINFT(sp, 0, n, w), now);
+
+               /* Level 0: bin index at [1] for set 0; [3] for set 1 */
+               n = SFB_BINMASK(t->pftag_qpriv8[(w << 1) + 1]);
+               bin = SFB_BINST(sp, 1, n, w);
+               if (bin->pkts >= sp->sfb_allocation)
+                       sfb_increment_bin(sp, bin, SFB_BINFT(sp, 1, n, w), now);
+#else /* SFB_LEVELS != 2 */
+               for (i = 0; i < SFB_LEVELS; i++) {
+                       if (w == 0)     /* set 0, bin index [0,1] */
+                               n = SFB_BINMASK(t->pftag_qpriv8[i]);
+                       else            /* set 1, bin index [2,3] */
+                               n = SFB_BINMASK(t->pftag_qpriv8[i + 2]);
+
+                       bin = SFB_BINST(sp, i, n, w);
+                       if (bin->pkts >= sp->sfb_allocation) {
+                               sfb_increment_bin(sp, bin,
+                                   SFB_BINFT(sp, i, n, w), now);
+                       }
+               }
+#endif /* SFB_LEVELS != 2 */
+               return (1);
+       }
+
+       /* non-conformant or else misclassified flow; queue it anyway */
+       t->pftag_flags |= SFB_PKT_PBOX;
+       *(&sp->sfb_pboxfreeze) = *now;
+
+       return (0);
+}
+
+static void
+sfb_adjust_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft,
+    struct timespec *now, boolean_t inc)
+{
+       struct timespec delta;
+
+       net_timersub(now, ft, &delta);
+       if (net_timercmp(&delta, &sp->sfb_holdtime, <)) {
+               if (classq_verbose > 1) {
+                       log(LOG_DEBUG, "%s: SFB qid=%d, %s update frozen "
+                           "(delta=%llu nsec)\n", if_name(sp->sfb_ifp),
+                           sp->sfb_qid, inc ?  "increment" : "decrement",
+                           (u_int64_t)delta.tv_nsec);
+               }
+               return;
+       }
+
+       /* increment/decrement marking probability */
+       *ft = *now;
+       if (inc)
+               SFB_PMARK_INC(bin);
+       else
+               SFB_PMARK_DEC(bin);
+}
+
+static void
+sfb_decrement_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft,
+    struct timespec *now)
+{
+       return (sfb_adjust_bin(sp, bin, ft, now, FALSE));
+}
+
+static void
+sfb_increment_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft,
+    struct timespec *now)
+{
+       return (sfb_adjust_bin(sp, bin, ft, now, TRUE));
+}
+
+static inline void
+sfb_dq_update_bins(struct sfb *sp, struct pf_mtag *t, struct timespec *now)
+{
+#if SFB_LEVELS != 2 || SFB_FC_LEVEL != 0
+       int i;
+#endif /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */
+       struct sfbbinstats *bin;
+       int s, n;
+       struct sfb_fc_list *fcl = NULL;
+
+       s = sp->sfb_current;
+       VERIFY((s + (s ^ 1)) == 1);
+
+       /*
+        * Update current bins; optimize for SFB_LEVELS=2 and SFB_FC_LEVEL=0
+        */
+#if SFB_LEVELS == 2 && SFB_FC_LEVEL == 0
+       /* Level 0: bin index at [0] for set 0; [2] for set 1 */
+       n = SFB_BINMASK(t->pftag_qpriv8[(s << 1)]);
+       bin = SFB_BINST(sp, 0, n, s);
+
+       VERIFY(bin->pkts > 0);
+       if (--bin->pkts == 0) {
+               sfb_decrement_bin(sp, bin, SFB_BINFT(sp, 0, n, s), now);
+       }
+       if (bin->pkts <= (sp->sfb_allocation >> 2)) {
+               /* deliver flow control feedback to the sockets */
+               fcl = SFB_FC_LIST(sp, n);
+               if (!SLIST_EMPTY(fcl))
+                       sfb_fclist_append(sp, fcl);
+       }
+
+       /* Level 1: bin index at [1] for set 0; [3] for set 1 */
+       n = SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]);
+       bin = SFB_BINST(sp, 1, n, s);
+
+       VERIFY(bin->pkts > 0);
+       if (--bin->pkts == 0)
+               sfb_decrement_bin(sp, bin, SFB_BINFT(sp, 1, n, s), now);
+#else /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */
+       for (i = 0; i < SFB_LEVELS; i++) {
+               if (s == 0)             /* set 0, bin index [0,1] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i]);
+               else                    /* set 1, bin index [2,3] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i + 2]);
+
+               bin = SFB_BINST(sp, i, n, s);
+
+               VERIFY(bin->pkts > 0);
+               if (--bin->pkts == 0) {
+                       sfb_decrement_bin(sp, bin,
+                           SFB_BINFT(sp, i, n, s), now);
+               }
+               if (bin->pkts <= (sp->sfb_allocation >> 2)) {
+                       /* deliver flow control feedback to the sockets */
+                       if (i == SFB_FC_LEVEL) {
+                               fcl = SFB_FC_LIST(sp, n);
+                               if (!SLIST_EMPTY(fcl))
+                                       sfb_fclist_append(sp, fcl);
+                       }
+               }
+       }
+#endif /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */
+}
+
+static inline void
+sfb_eq_update_bins(struct sfb *sp, struct pf_mtag *t)
+{
+#if SFB_LEVELS != 2
+       int i, n;
+#endif /* SFB_LEVELS != 2 */
+       int s;
+
+       s = sp->sfb_current;
+       VERIFY((s + (s ^ 1)) == 1);
+
+       /*
+        * Update current bins; optimize for SFB_LEVELS=2
+        */
+#if SFB_LEVELS == 2
+       /* Level 0: bin index at [0] for set 0; [2] for set 1 */
+       SFB_BINST(sp, 0, SFB_BINMASK(t->pftag_qpriv8[(s << 1)]), s)->pkts++;
+
+       /* Level 1: bin index at [1] for set 0; [3] for set 1 */
+       SFB_BINST(sp, 1, SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]), s)->pkts++;
+#else /* SFB_LEVELS != 2 */
+       for (i = 0; i < SFB_LEVELS; i++) {
+               if (s == 0)             /* set 0, bin index [0,1] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i]);
+               else                    /* set 1, bin index [2,3] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i + 2]);
+
+               SFB_BINST(sp, i, n, s)->pkts++;
+       }
+#endif /* SFB_LEVELS != 2 */
+}
+
+static boolean_t
+sfb_bin_addfcentry(struct sfb *sp, struct pf_mtag *t)
+{
+       struct sfb_bin_fcentry *fce;
+       u_int32_t flowhash;
+       struct sfb_fc_list *fcl;
+       int s;
+
+       s = sp->sfb_current;
+       VERIFY((s + (s ^ 1)) == 1);
+
+       flowhash = t->pftag_flowhash;
+
+       if (flowhash == 0) {
+               sp->sfb_stats.null_flowhash++;
+               return (FALSE);
+       }
+
+       /*
+        * Use value at index 0 for set 0 and
+        * value at index 2 for set 1
+        */
+       fcl = SFB_FC_LIST(sp, SFB_BINMASK(t->pftag_qpriv8[(s << 1)]));
+       SLIST_FOREACH(fce, fcl, fce_link) {
+               if (fce->fce_flowhash == flowhash) {
+                       /* Already on flow control list; just return */
+                       return (TRUE);
+               }
+       }
+
+       IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
+       fce = ifnet_fce_alloc(M_WAITOK);
+       if (fce != NULL) {
+               fce->fce_flowhash = flowhash;
+               SLIST_INSERT_HEAD(fcl, fce, fce_link);
+               sp->sfb_stats.flow_controlled++;
+       }
+
+       return (fce != NULL);
+}
+
+/*
+ * early-drop probability is kept in pmark of each bin of the flow
+ */
+static int
+sfb_drop_early(struct sfb *sp, struct pf_mtag *t, u_int16_t *pmin,
+    struct timespec *now)
+{
+#if SFB_LEVELS != 2
+       int i;
+#endif /* SFB_LEVELS != 2 */
+       struct sfbbinstats *bin;
+       int s, n, ret = 0;
+
+       s = sp->sfb_current;
+       VERIFY((s + (s ^ 1)) == 1);
+
+       *pmin = (u_int16_t)-1;
+
+       /*
+        * Update current bins; optimize for SFB_LEVELS=2
+        */
+#if SFB_LEVELS == 2
+       /* Level 0: bin index at [0] for set 0; [2] for set 1 */
+       n = SFB_BINMASK(t->pftag_qpriv8[(s << 1)]);
+       bin = SFB_BINST(sp, 0, n, s);
+       if (*pmin > (u_int16_t)bin->pmark)
+               *pmin = (u_int16_t)bin->pmark;
+
+       if (bin->pkts >= sp->sfb_allocation) {
+               if (bin->pkts >= sp->sfb_drop_thresh)
+                       ret = 1;        /* drop or mark */
+               sfb_increment_bin(sp, bin, SFB_BINFT(sp, 0, n, s), now);
+       }
+
+       /* Level 1: bin index at [1] for set 0; [3] for set 1 */
+       n = SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]);
+       bin = SFB_BINST(sp, 1, n, s);
+       if (*pmin > (u_int16_t)bin->pmark)
+               *pmin = (u_int16_t)bin->pmark;
+
+       if (bin->pkts >= sp->sfb_allocation) {
+               if (bin->pkts >= sp->sfb_drop_thresh)
+                       ret = 1;        /* drop or mark */
+               sfb_increment_bin(sp, bin, SFB_BINFT(sp, 1, n, s), now);
+       }
+#else /* SFB_LEVELS != 2 */
+       for (i = 0; i < SFB_LEVELS; i++) {
+               if (s == 0)             /* set 0, bin index [0,1] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i]);
+               else                    /* set 1, bin index [2,3] */
+                       n = SFB_BINMASK(t->pftag_qpriv8[i + 2]);
+
+               bin = SFB_BINST(sp, i, n, s);
+               if (*pmin > (u_int16_t)bin->pmark)
+                       *pmin = (u_int16_t)bin->pmark;
+
+               if (bin->pkts >= sp->sfb_allocation) {
+                       if (bin->pkts >= sp->sfb_drop_thresh)
+                               ret = 1;        /* drop or mark */
+                       sfb_increment_bin(sp, bin,
+                           SFB_BINFT(sp, i, n, s), now);
+               }
+       }
+#endif /* SFB_LEVELS != 2 */
+
+       if (sp->sfb_flags & SFBF_SUSPENDED)
+               ret = 1;        /* drop or mark */
+
+       return (ret);
+}
+
+#define        DTYPE_NODROP    0       /* no drop */
+#define        DTYPE_FORCED    1       /* a "forced" drop */
+#define        DTYPE_EARLY     2       /* an "unforced" (early) drop */
+
+int
+sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t)
+{
+       struct timespec now;
+       int droptype, s;
+       u_int16_t pmin;
+       int fc_adv = 0;
+       int ret = CLASSQEQ_SUCCESS;
+
+       nanouptime(&now);
+
+       s = sp->sfb_current;
+       VERIFY((s + (s ^ 1)) == 1);
+
+       /* time to swap the bins? */
+       if (net_timercmp(&now, &sp->sfb_nextreset, >=)) {
+               net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset);
+               sfb_swap_bins(sp, qlen(q));
+               s = sp->sfb_current;
+               VERIFY((s + (s ^ 1)) == 1);
+       }
+
+       t->pftag_flags &= ~SFB_PKT_PBOX;
+       t->pftag_qpriv16[s] =
+           (SFB_HASH(&t->pftag_flowhash, sizeof (t->pftag_flowhash),
+           (*sp->sfb_bins)[s].fudge) & SFB_HASHMASK);
+       t->pftag_qpriv16[s ^ 1] =
+           (SFB_HASH(&t->pftag_flowhash, sizeof (t->pftag_flowhash),
+           (*sp->sfb_bins)[s ^ 1].fudge) & SFB_HASHMASK);
+
+       /* see if we drop early */
+       droptype = DTYPE_NODROP;
+       if (sfb_drop_early(sp, t, &pmin, &now)) {
+               /* flow control, mark or drop by sfb */
+               if ((sp->sfb_flags & SFBF_FLOWCTL) &&
+                   (t->pftag_flags & PF_TAG_FLOWADV)) {
+                       fc_adv = 1;
+                       /* drop all during suspension or for non-TCP */
+                       if ((sp->sfb_flags & SFBF_SUSPENDED) ||
+                           !(t->pftag_flags & PF_TAG_TCP)) {
+                               droptype = DTYPE_EARLY;
+                               sp->sfb_stats.drop_early++;
+                       }
+               } else if ((sp->sfb_flags & SFBF_ECN) &&
+                   (t->pftag_flags & PF_TAG_TCP) &&    /* only for TCP */
+                   ((sfb_random(sp) & SFB_MAX_PMARK) <= pmin) &&
+                   mark_ecn(m, t, sp->sfb_flags) &&
+                   !(sp->sfb_flags & SFBF_SUSPENDED)) {
+                       /* successfully marked; do not drop. */
+                       sp->sfb_stats.marked_packets++;
+               } else {
+                       /* unforced drop by sfb */
+                       droptype = DTYPE_EARLY;
+                       sp->sfb_stats.drop_early++;
+               }
+       }
+
+       /* non-responsive flow penalty? */
+       if (droptype == DTYPE_NODROP && sfb_penalize(sp, t, &now)) {
+               droptype = DTYPE_FORCED;
+               sp->sfb_stats.drop_pbox++;
+       }
+
+       /* if the queue length hits the hard limit, it's a forced drop */
+       if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) {
+               droptype = DTYPE_FORCED;
+               sp->sfb_stats.drop_queue++;
+       }
+
+       if (fc_adv == 1 && droptype != DTYPE_FORCED &&
+           sfb_bin_addfcentry(sp, t)) {
+               /* deliver flow control advisory error */
+               if (droptype == DTYPE_NODROP) {
+                       ret = CLASSQEQ_SUCCESS_FC;
+                       VERIFY(!(sp->sfb_flags & SFBF_SUSPENDED));
+               } else if (sp->sfb_flags & SFBF_SUSPENDED) {
+                       /* dropped due to suspension */
+                       ret = CLASSQEQ_DROPPED_SP;
+               } else {
+                       /* dropped due to flow-control */
+                       ret = CLASSQEQ_DROPPED_FC;
+               }
+       }
+
+       /* if successful enqueue this packet, else drop it */
+       if (droptype == DTYPE_NODROP) {
+               _addq(q, m);
+       } else {
+               IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
+               m_freem(m);
+               return ((ret != CLASSQEQ_SUCCESS) ? ret : CLASSQEQ_DROPPED);
+       }
+
+       if (!(t->pftag_flags & SFB_PKT_PBOX))
+               sfb_eq_update_bins(sp, t);
+       else
+               sp->sfb_stats.pbox_packets++;
+
+       /* successfully queued */
+       return (ret);
+}
+
+static struct mbuf *
+sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge)
+{
+       struct timespec now;
+       struct mbuf *m;
+       struct pf_mtag *t;
+
+       if (!purge && (sp->sfb_flags & SFBF_SUSPENDED))
+               return (NULL);
+
+       nanouptime(&now);
+
+       /* flow of 0 means head of queue */
+       if ((m = ((flow == 0) ? _getq(q) : _getq_flow(q, flow))) == NULL) {
+               if (!purge)
+                       net_timerclear(&sp->sfb_getqtime);
+               return (NULL);
+       }
+
+       VERIFY(m->m_flags & M_PKTHDR);
+
+       t = m_pftag(m);
+
+       if (!purge) {
+               /* calculate EWMA of dequeues */
+               if (net_timerisset(&sp->sfb_getqtime)) {
+                       struct timespec delta;
+                       u_int64_t avg, new;
+
+                       net_timersub(&now, &sp->sfb_getqtime, &delta);
+                       net_timernsec(&delta, &new);
+                       avg = sp->sfb_stats.dequeue_avg;
+                       if (avg > 0) {
+                               int decay = DEQUEUE_DECAY;
+                               /*
+                                * If the time since last dequeue is
+                                * significantly greater than the current
+                                * average, weight the average more against
+                                * the old value.
+                                */
+                               if (DEQUEUE_SPIKE(new, avg))
+                                       decay += 5;
+                               avg = (((avg << decay) - avg) + new) >> decay;
+                       } else {
+                               avg = new;
+                       }
+                       sp->sfb_stats.dequeue_avg = avg;
+               }
+               *(&sp->sfb_getqtime) = *(&now);
+       }
+
+       /*
+        * Clearpkts are the ones which were in the queue when the hash
+        * function was perturbed.  Since the perturbation value (fudge),
+        * and thus bin information for these packets is not known, we do
+        * not change accounting information while dequeuing these packets.
+        * It is important not to set the hash interval too small due to
+        * this reason.  A rule of thumb is to set it to K*D, where D is
+        * the time taken to drain queue.
+        */
+       if (t->pftag_flags & SFB_PKT_PBOX) {
+               t->pftag_flags &= ~SFB_PKT_PBOX;
+               if (sp->sfb_clearpkts > 0)
+                       sp->sfb_clearpkts--;
+       } else if (sp->sfb_clearpkts > 0) {
+               sp->sfb_clearpkts--;
+       } else {
+               sfb_dq_update_bins(sp, t, &now);
+       }
+
+       return (m);
+}
+
+struct mbuf *
+sfb_getq(struct sfb *sp, class_queue_t *q)
+{
+       return (sfb_getq_flow(sp, q, 0, FALSE));
+}
+
+void
+sfb_purgeq(struct sfb *sp, class_queue_t *q, u_int32_t flow, u_int32_t *packets,
+    u_int32_t *bytes)
+{
+       u_int32_t cnt = 0, len = 0;
+       struct mbuf *m;
+
+       IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
+
+       while ((m = sfb_getq_flow(sp, q, flow, TRUE)) != NULL) {
+               cnt++;
+               len += m_pktlen(m);
+               m_freem(m);
+       }
+
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+void
+sfb_updateq(struct sfb *sp, cqev_t ev)
+{
+       struct ifnet *ifp = sp->sfb_ifp;
+
+       VERIFY(ifp != NULL);
+
+       switch (ev) {
+       case CLASSQ_EV_LINK_SPEED: {
+               u_int64_t eff_rate = ifnet_output_linkrate(ifp);
+
+               /* update parameters only if rate has changed */
+               if (eff_rate == sp->sfb_eff_rate)
+                       break;
+
+               if (classq_verbose) {
+                       log(LOG_DEBUG, "%s: SFB qid=%d, adapting to new "
+                           "eff_rate=%llu bps\n", if_name(ifp), sp->sfb_qid,
+                           eff_rate);
+               }
+               sfb_calc_holdtime(sp, eff_rate);
+               sfb_calc_pboxtime(sp, eff_rate);
+               break;
+       }
+
+       case CLASSQ_EV_LINK_UP:
+       case CLASSQ_EV_LINK_DOWN:
+               if (classq_verbose) {
+                       log(LOG_DEBUG, "%s: SFB qid=%d, resetting due to "
+                           "link %s\n", if_name(ifp), sp->sfb_qid,
+                           (ev == CLASSQ_EV_LINK_UP) ? "UP" : "DOWN");
+               }
+               sfb_resetq(sp, ev);
+               break;
+
+       case CLASSQ_EV_LINK_MTU:
+       default:
+               break;
+       }
+}
+
+int
+sfb_suspendq(struct sfb *sp, class_queue_t *q, boolean_t on)
+{
+#pragma unused(q)
+       struct ifnet *ifp = sp->sfb_ifp;
+
+       VERIFY(ifp != NULL);
+
+       if ((on && (sp->sfb_flags & SFBF_SUSPENDED)) ||
+           (!on && !(sp->sfb_flags & SFBF_SUSPENDED)))
+               return (0);
+
+       if (!(sp->sfb_flags & SFBF_FLOWCTL)) {
+               log(LOG_ERR, "%s: SFB qid=%d, unable to %s queue since "
+                   "flow-control is not enabled", if_name(ifp), sp->sfb_qid,
+                   (on ? "suspend" : "resume"));
+               return (ENOTSUP);
+       }
+
+       if (classq_verbose) {
+               log(LOG_DEBUG, "%s: SFB qid=%d, setting state to %s",
+                   if_name(ifp), sp->sfb_qid, (on ? "SUSPENDED" : "RUNNING"));
+       }
+
+       if (on) {
+               sp->sfb_flags |= SFBF_SUSPENDED;
+       } else {
+               sp->sfb_flags &= ~SFBF_SUSPENDED;
+               sfb_swap_bins(sp, qlen(q));
+       }
+
+       return (0);
+}
diff --git a/bsd/net/classq/classq_sfb.h b/bsd/net/classq/classq_sfb.h
new file mode 100644 (file)
index 0000000..911ad31
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _NET_CLASSQ_CLASSQ_SFB_H_
+#define        _NET_CLASSQ_CLASSQ_SFB_H_
+
+#ifdef PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
+#include <net/classq/if_classq.h>
+#include <stdbool.h>
+#include <sys/time.h>
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        SFB_FP_SHIFT    14                      /* fixed-point shift (Q14) */
+#define        SFB_LEVELS      2                       /* L */
+#define        SFB_BINS_SHIFT  5
+#define        SFB_BINS        (1 << SFB_BINS_SHIFT)   /* N */
+
+struct sfbstats {
+       u_int64_t               drop_early;
+       u_int64_t               drop_pbox;
+       u_int64_t               drop_queue;
+       u_int64_t               marked_packets;
+       u_int64_t               pbox_packets;
+       u_int64_t               pbox_time;
+       u_int64_t               hold_time;
+       u_int64_t               dequeue_avg;
+       u_int64_t               rehash_intval;
+       u_int64_t               num_rehash;
+       u_int64_t               null_flowhash;
+       u_int64_t               flow_controlled;
+       u_int64_t               flow_feedback;
+};
+
+struct sfbbinstats {
+       int16_t         pmark;          /* marking probability in Q format */
+       u_int16_t       pkts;           /* number of packets */
+};
+
+struct sfb_stats {
+       u_int32_t               allocation;
+       u_int32_t               dropthresh;
+       u_int32_t               clearpkts;
+       u_int32_t               current;
+       struct sfbstats         sfbstats;
+       struct sfbbins {
+               struct sfbbinstats stats[SFB_LEVELS][SFB_BINS];
+       } binstats[2] __attribute__((aligned(8)));
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+struct sfb_bin_fcentry {
+       SLIST_ENTRY(sfb_bin_fcentry) fce_link;
+       u_int32_t       fce_flowhash;
+};
+
+SLIST_HEAD(sfb_fc_list, sfb_bin_fcentry);
+
+struct sfb_bins {
+       u_int32_t               fudge;
+       struct sfbbinstats      stats[SFB_LEVELS][SFB_BINS];
+       struct timespec         freezetime[SFB_LEVELS][SFB_BINS];
+};
+
+/* SFB flags */
+#define        SFBF_ECN4       0x01    /* use packet marking for IPv4 packets */
+#define        SFBF_ECN6       0x02    /* use packet marking for IPv6 packets */
+#define        SFBF_ECN        (SFBF_ECN4 | SFBF_ECN6)
+#define        SFBF_FLOWCTL    0x04    /* enable flow control advisories */
+#define        SFBF_SUSPENDED  0x1000  /* queue is suspended */
+
+#define        SFBF_USERFLAGS                                                  \
+       (SFBF_ECN4 | SFBF_ECN6 | SFBF_FLOWCTL)
+
+typedef struct sfb {
+       /* variables for internal use */
+       u_int32_t       sfb_flags;      /* SFB flags */
+       u_int32_t       sfb_qlim;
+       u_int32_t       sfb_qid;
+       u_int16_t       sfb_allocation;
+       u_int16_t       sfb_drop_thresh;
+       u_int32_t       sfb_clearpkts;
+       u_int64_t       sfb_eff_rate;   /* last known effective rate */
+       struct timespec sfb_getqtime;   /* last dequeue timestamp */
+       struct timespec sfb_holdtime;   /* random holdtime in nsec */
+       struct ifnet    *sfb_ifp;       /* back pointer to ifnet */
+
+       /* moving hash function */
+       struct timespec sfb_hinterval;  /* random reset interval in sec */
+       struct timespec sfb_nextreset;  /* reset deadline */
+
+       /* penalty box */
+       struct timespec sfb_pboxtime;   /* random pboxtime in nsec */
+       struct timespec sfb_pboxfreeze;
+
+       /* B[L][N] bins (2 sets: current and warm-up) */
+       u_int32_t       sfb_current;    /* current set (0 or 1) */
+       struct sfb_bins (*sfb_bins)[2];
+
+       /* Flow control lists for current set */
+       struct sfb_fc_list (*sfb_fc_lists)[SFB_BINS];
+
+       /* statistics */
+       struct sfbstats sfb_stats __attribute__((aligned(8)));
+} sfb_t;
+
+extern void sfb_init(void);
+extern struct sfb *sfb_alloc(struct ifnet *, u_int32_t, u_int32_t, u_int32_t);
+extern void sfb_destroy(struct sfb *);
+extern int sfb_addq(struct sfb *, class_queue_t *, struct mbuf *,
+    struct pf_mtag *);
+extern struct mbuf *sfb_getq(struct sfb *, class_queue_t *);
+extern void sfb_purgeq(struct sfb *, class_queue_t *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+extern void sfb_getstats(struct sfb *, struct sfb_stats *);
+extern void sfb_updateq(struct sfb *, cqev_t);
+extern int sfb_suspendq(struct sfb *, class_queue_t *, boolean_t);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_CLASSQ_CLASSQ_SFB_H_ */
diff --git a/bsd/net/classq/classq_subr.c b/bsd/net/classq/classq_subr.c
new file mode 100644 (file)
index 0000000..738c86e
--- /dev/null
@@ -0,0 +1,794 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/random.h>
+#include <sys/kernel_types.h>
+#include <sys/sysctl.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+#include <net/classq/classq.h>
+#if CLASSQ_RED
+#include <net/classq/classq_red.h>
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+#include <net/classq/classq_rio.h>
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+#include <net/classq/classq_blue.h>
+#endif /* CLASSQ_BLUE */
+#include <net/classq/classq_sfb.h>
+#include <net/pktsched/pktsched.h>
+
+#include <libkern/libkern.h>
+
+#if PF_ALTQ
+#include <net/altq/altq.h>
+#endif /* PF_ALTQ */
+
+static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
+    u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *,
+    boolean_t);
+static struct mbuf *ifclassq_poll_common(struct ifclassq *,
+    mbuf_svc_class_t, boolean_t);
+static struct mbuf *ifclassq_tbr_dequeue_common(struct ifclassq *, int,
+    mbuf_svc_class_t, boolean_t);
+
+void
+classq_init(void)
+{
+       _CASSERT(MBUF_TC_BE == 0);
+       _CASSERT(MBUF_SC_BE == 0);
+       _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
+
+#if CLASSQ_RED
+       red_init();
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       rio_init();
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       blue_init();
+#endif /* CLASSQ_BLUE */
+       sfb_init();
+}
+
+int
+ifclassq_setup(struct ifnet *ifp, u_int32_t sflags, boolean_t reuse)
+{
+#pragma unused(reuse)
+       struct ifclassq *ifq = &ifp->if_snd;
+       int err = 0;
+
+       IFCQ_LOCK(ifq);
+       VERIFY(IFCQ_IS_EMPTY(ifq));
+       ifq->ifcq_ifp = ifp;
+       IFCQ_LEN(ifq) = 0;
+       bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
+       bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
+
+       VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+       VERIFY(ifq->ifcq_flags == 0);
+       VERIFY(ifq->ifcq_sflags == 0);
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(ifq->ifcq_enqueue == NULL);
+       VERIFY(ifq->ifcq_dequeue == NULL);
+       VERIFY(ifq->ifcq_dequeue_sc == NULL);
+       VERIFY(ifq->ifcq_request == NULL);
+
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               u_int32_t maxlen = 0;
+
+               if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
+                       maxlen = if_sndq_maxlen;
+               IFCQ_SET_MAXLEN(ifq, maxlen);
+
+               ifq->ifcq_sflags = sflags;
+               err = ifclassq_pktsched_setup(ifq);
+               if (err == 0)
+                       ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
+       }
+
+#if PF_ALTQ
+       ifq->ifcq_drain = 0;
+       IFCQ_ALTQ(ifq)->altq_ifcq = ifq;
+       VERIFY(IFCQ_ALTQ(ifq)->altq_type == ALTQT_NONE);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_flags == 0);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_disc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_enqueue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue_sc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_request == NULL);
+
+       if ((ifp->if_eflags & IFEF_TXSTART) &&
+           ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)
+               ALTQ_SET_READY(IFCQ_ALTQ(ifq));
+       else
+               ALTQ_CLEAR_READY(IFCQ_ALTQ(ifq));
+#endif /* PF_ALTQ */
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+void
+ifclassq_teardown(struct ifnet *ifp)
+{
+       struct ifclassq *ifq = &ifp->if_snd;
+
+       IFCQ_LOCK(ifq);
+#if PF_ALTQ
+       if (ALTQ_IS_READY(IFCQ_ALTQ(ifq))) {
+               if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+                       altq_disable(IFCQ_ALTQ(ifq));
+               if (ALTQ_IS_ATTACHED(IFCQ_ALTQ(ifq)))
+                       altq_detach(IFCQ_ALTQ(ifq));
+               IFCQ_ALTQ(ifq)->altq_flags = 0;
+       }
+       ifq->ifcq_drain = 0;
+       IFCQ_ALTQ(ifq)->altq_ifcq = NULL;
+       VERIFY(IFCQ_ALTQ(ifq)->altq_type == ALTQT_NONE);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_flags == 0);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_disc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_enqueue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue_sc == NULL);
+       VERIFY(IFCQ_ALTQ(ifq)->altq_request == NULL);
+#endif /* PF_ALTQ */
+
+       if (IFCQ_IS_READY(ifq)) {
+               if (IFCQ_TBR_IS_ENABLED(ifq)) {
+                       struct tb_profile tb = { 0, 0, 0 };
+                       (void) ifclassq_tbr_set(ifq, &tb, FALSE);
+               }
+               (void) pktsched_teardown(ifq);
+               ifq->ifcq_flags = 0;
+       }
+       ifq->ifcq_sflags = 0;
+
+       VERIFY(IFCQ_IS_EMPTY(ifq));
+       VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+       VERIFY(ifq->ifcq_flags == 0);
+       VERIFY(ifq->ifcq_sflags == 0);
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(ifq->ifcq_enqueue == NULL);
+       VERIFY(ifq->ifcq_dequeue == NULL);
+       VERIFY(ifq->ifcq_dequeue_sc == NULL);
+       VERIFY(ifq->ifcq_request == NULL);
+       IFCQ_LEN(ifq) = 0;
+       IFCQ_MAXLEN(ifq) = 0;
+       bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
+       bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
+
+       IFCQ_UNLOCK(ifq);
+}
+
+int
+ifclassq_pktsched_setup(struct ifclassq *ifq)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifp->if_eflags & IFEF_TXSTART);
+
+       switch (ifp->if_output_sched_model) {
+       case IFNET_SCHED_MODEL_DRIVER_MANAGED:
+               err = pktsched_setup(ifq, PKTSCHEDT_TCQ, ifq->ifcq_sflags);
+               break;
+
+       case IFNET_SCHED_MODEL_NORMAL:
+               err = pktsched_setup(ifq, PKTSCHEDT_QFQ, ifq->ifcq_sflags);
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (err);
+}
+
+void
+ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
+{
+       IFCQ_LOCK(ifq);
+       if (maxqlen == 0)
+               maxqlen = if_sndq_maxlen;
+       IFCQ_SET_MAXLEN(ifq, maxqlen);
+       IFCQ_UNLOCK(ifq);
+}
+
+u_int32_t
+ifclassq_get_maxlen(struct ifclassq *ifq)
+{
+       return (IFCQ_MAXLEN(ifq));
+}
+
+u_int32_t
+ifclassq_get_len(struct ifclassq *ifq)
+{
+       return (IFCQ_LEN(ifq));
+}
+
+errno_t
+ifclassq_enqueue(struct ifclassq *ifq, struct mbuf *m)
+{
+       errno_t err;
+
+       IFCQ_LOCK_SPIN(ifq);
+
+#if PF_ALTQ
+       if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) {
+               ALTQ_ENQUEUE(IFCQ_ALTQ(ifq), m, err);
+       } else {
+               u_int32_t qlen = IFCQ_LEN(ifq);
+               IFCQ_ENQUEUE(ifq, m, err);
+               if (IFCQ_LEN(ifq) > qlen)
+                       ifq->ifcq_drain += (IFCQ_LEN(ifq) - qlen);
+       }
+#else /* !PF_ALTQ */
+       IFCQ_ENQUEUE(ifq, m, err);
+#endif /* PF_ALTQ */
+
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+errno_t
+ifclassq_dequeue(struct ifclassq *ifq, u_int32_t limit, struct mbuf **head,
+    struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
+{
+       return (ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, limit, head, tail,
+           cnt, len, FALSE));
+}
+
+errno_t
+ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
+    u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+    u_int32_t *len)
+{
+       return (ifclassq_dequeue_common(ifq, sc, limit, head, tail,
+           cnt, len, TRUE));
+}
+
+static errno_t
+ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
+    u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+    u_int32_t *len, boolean_t drvmgt)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       u_int32_t i = 0, l = 0;
+       struct mbuf **first, *last;
+#if PF_ALTQ
+       struct ifaltq *altq = IFCQ_ALTQ(ifq);
+       boolean_t draining;
+#endif /* PF_ALTQ */
+
+       VERIFY(!drvmgt || MBUF_VALID_SC(sc));
+
+       *head = NULL;
+       first = &(*head);
+       last = NULL;
+
+       ifq = &ifp->if_snd;
+       IFCQ_LOCK_SPIN(ifq);
+
+       while (i < limit) {
+               u_int64_t pktlen;
+#if PF_ALTQ
+               u_int32_t qlen;
+
+               qlen = IFCQ_LEN(ifq);
+               draining = IFCQ_IS_DRAINING(ifq);
+
+               if (drvmgt) {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE_SC(ifq, sc, *head);
+                       else if (draining)
+                               IFCQ_DEQUEUE_SC(ifq, sc, *head);
+                       else if (ALTQ_IS_ENABLED(altq))
+                               ALTQ_DEQUEUE_SC(altq, sc, *head);
+                       else
+                               *head = NULL;
+               } else {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE(ifq, *head);
+                       else if (draining)
+                               IFCQ_DEQUEUE(ifq, *head);
+                       else if (ALTQ_IS_ENABLED(altq))
+                               ALTQ_DEQUEUE(altq, *head);
+                       else
+                               *head = NULL;
+               }
+
+               if (draining && *head != NULL) {
+                       VERIFY(ifq->ifcq_drain >= (qlen - IFCQ_LEN(ifq)));
+                       ifq->ifcq_drain -= (qlen - IFCQ_LEN(ifq));
+               }
+#else /* ! PF_ALTQ */
+               if (drvmgt) {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE_SC(ifq, sc, *head);
+                       else
+                               IFCQ_DEQUEUE_SC(ifq, sc, *head);
+               } else {
+                       if (IFCQ_TBR_IS_ENABLED(ifq))
+                               IFCQ_TBR_DEQUEUE(ifq, *head);
+                       else
+                               IFCQ_DEQUEUE(ifq, *head);
+               }
+#endif /* !PF_ALTQ */
+
+               if (*head == NULL)
+                       break;
+
+               (*head)->m_nextpkt = NULL;
+               last = *head;
+
+               l += (*head)->m_pkthdr.len;
+               pktlen = (*head)->m_pkthdr.len;
+
+               (*head)->m_pkthdr.pf_mtag.pftag_pktseq =
+                   atomic_add_64_ov(&(ifp->if_bw.cur_seq), pktlen);
+
+               head = &(*head)->m_nextpkt;
+               i++;
+       }
+
+       IFCQ_UNLOCK(ifq);
+
+       if (tail != NULL)
+               *tail = last;
+       if (cnt != NULL)
+               *cnt = i;
+       if (len != NULL)
+               *len = l;
+
+       return ((*first != NULL) ? 0 : EAGAIN);
+}
+
+struct mbuf *
+ifclassq_poll(struct ifclassq *ifq)
+{
+       return (ifclassq_poll_common(ifq, MBUF_SC_UNSPEC, FALSE));
+}
+
+struct mbuf *
+ifclassq_poll_sc(struct ifclassq *ifq, mbuf_svc_class_t sc)
+{
+       return (ifclassq_poll_common(ifq, sc, TRUE));
+}
+
+static struct mbuf *
+ifclassq_poll_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
+    boolean_t drvmgt)
+{
+#if PF_ALTQ
+       struct ifaltq *altq = IFCQ_ALTQ(ifq);
+#endif /* PF_ALTQ */
+       struct mbuf *m;
+
+       VERIFY(!drvmgt || MBUF_VALID_SC(sc));
+
+#if PF_ALTQ
+       if (drvmgt) {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL_SC(ifq, sc, m);
+               else if (IFCQ_IS_DRAINING(ifq))
+                       IFCQ_POLL_SC(ifq, sc, m);
+               else if (ALTQ_IS_ENABLED(altq))
+                       ALTQ_POLL_SC(altq, sc, m);
+               else
+                       m = NULL;
+       } else {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL(ifq, m);
+               else if (IFCQ_IS_DRAINING(ifq))
+                       IFCQ_POLL(ifq, m);
+               else if (ALTQ_IS_ENABLED(altq))
+                       ALTQ_POLL(altq, m);
+               else
+                       m = NULL;
+       }
+#else /* ! PF_ALTQ */
+       if (drvmgt) {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL_SC(ifq, sc, m);
+               else
+                       IFCQ_POLL_SC(ifq, sc, m);
+       } else {
+               if (IFCQ_TBR_IS_ENABLED(ifq))
+                       IFCQ_TBR_POLL(ifq, m);
+               else
+                       IFCQ_POLL(ifq, m);
+       }
+#endif /* !PF_ALTQ */
+
+       return (m);
+}
+
+void
+ifclassq_update(struct ifclassq *ifq, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(IFCQ_IS_READY(ifq));
+
+#if PF_ALTQ
+       if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+               ALTQ_UPDATE(IFCQ_ALTQ(ifq), ev);
+#endif /* PF_ALTQ */
+       IFCQ_UPDATE(ifq, ev);
+}
+
+int
+ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline,
+    ifclassq_enq_func enqueue, ifclassq_deq_func dequeue,
+    ifclassq_deq_sc_func dequeue_sc, ifclassq_req_func request)
+{
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(enqueue != NULL);
+       VERIFY(!(dequeue != NULL && dequeue_sc != NULL));
+       VERIFY(request != NULL);
+
+       ifq->ifcq_type = type;
+       ifq->ifcq_disc = discipline;
+       ifq->ifcq_enqueue = enqueue;
+       ifq->ifcq_dequeue = dequeue;
+       ifq->ifcq_dequeue_sc = dequeue_sc;
+       ifq->ifcq_request = request;
+
+       return (0);
+}
+
+int
+ifclassq_detach(struct ifclassq *ifq)
+{
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(ifq->ifcq_disc == NULL);
+
+       ifq->ifcq_type = PKTSCHEDT_NONE;
+       ifq->ifcq_disc = NULL;
+       ifq->ifcq_enqueue = NULL;
+       ifq->ifcq_dequeue = NULL;
+       ifq->ifcq_dequeue_sc = NULL;
+       ifq->ifcq_request = NULL;
+
+       return (0);
+}
+
+int
+ifclassq_getqstats(struct ifclassq *ifq, u_int32_t qid, void *ubuf,
+    u_int32_t *nbytes)
+{
+       struct if_ifclassq_stats *ifqs;
+       int err;
+
+       if (*nbytes < sizeof (*ifqs))
+               return (EINVAL);
+
+       ifqs = _MALLOC(sizeof (*ifqs), M_TEMP, M_WAITOK | M_ZERO);
+       if (ifqs == NULL)
+               return (ENOMEM);
+
+       IFCQ_LOCK(ifq);
+       if (!IFCQ_IS_READY(ifq)) {
+               IFCQ_UNLOCK(ifq);
+               _FREE(ifqs, M_TEMP);
+               return (ENXIO);
+       }
+
+       ifqs->ifqs_len = IFCQ_LEN(ifq);
+       ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
+       *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
+       *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
+       ifqs->ifqs_scheduler = ifq->ifcq_type;
+
+       err = pktsched_getqstats(ifq, qid, ifqs);
+       IFCQ_UNLOCK(ifq);
+
+       if (err == 0 && (err = copyout((caddr_t)ifqs,
+           (user_addr_t)(uintptr_t)ubuf, sizeof (*ifqs))) == 0)
+               *nbytes = sizeof (*ifqs);
+
+       _FREE(ifqs, M_TEMP);
+
+       return (err);
+}
+
+const char *
+ifclassq_ev2str(cqev_t ev)
+{
+       const char *c;
+
+       switch (ev) {
+       case CLASSQ_EV_LINK_SPEED:
+               c = "LINK_SPEED";
+               break;
+
+       case CLASSQ_EV_LINK_MTU:
+               c = "LINK_MTU";
+               break;
+
+       case CLASSQ_EV_LINK_UP:
+               c = "LINK_UP";
+               break;
+
+       case CLASSQ_EV_LINK_DOWN:
+               c = "LINK_DOWN";
+               break;
+
+       default:
+               c = "UNKNOWN";
+               break;
+       }
+
+       return (c);
+}
+
+/*
+ * internal representation of token bucket parameters
+ *     rate:   byte_per_unittime << 32
+ *             (((bits_per_sec) / 8) << 32) / machclk_freq
+ *     depth:  byte << 32
+ *
+ */
+#define        TBR_SHIFT       32
+#define        TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
+#define        TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
+
+struct mbuf *
+ifclassq_tbr_dequeue(struct ifclassq *ifq, int op)
+{
+       return (ifclassq_tbr_dequeue_common(ifq, op, MBUF_SC_UNSPEC, FALSE));
+}
+
+struct mbuf *
+ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, int op, mbuf_svc_class_t sc)
+{
+       return (ifclassq_tbr_dequeue_common(ifq, op, sc, TRUE));
+}
+
+static struct mbuf *
+ifclassq_tbr_dequeue_common(struct ifclassq *ifq, int op,
+    mbuf_svc_class_t sc, boolean_t drvmgt)
+{
+       struct tb_regulator *tbr;
+       struct mbuf *m;
+       int64_t interval;
+       u_int64_t now;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(!drvmgt || MBUF_VALID_SC(sc));
+       VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
+
+       tbr = &ifq->ifcq_tbr;
+       if (op == CLASSQDQ_REMOVE && tbr->tbr_lastop == CLASSQDQ_POLL) {
+               /* if this is a remove after poll, bypass tbr check */
+       } else {
+               /* update token only when it is negative */
+               if (tbr->tbr_token <= 0) {
+                       now = read_machclk();
+                       interval = now - tbr->tbr_last;
+                       if (interval >= tbr->tbr_filluptime) {
+                               tbr->tbr_token = tbr->tbr_depth;
+                       } else {
+                               tbr->tbr_token += interval * tbr->tbr_rate;
+                               if (tbr->tbr_token > tbr->tbr_depth)
+                                       tbr->tbr_token = tbr->tbr_depth;
+                       }
+                       tbr->tbr_last = now;
+               }
+               /* if token is still negative, don't allow dequeue */
+               if (tbr->tbr_token <= 0)
+                       return (NULL);
+       }
+
+       /*
+        * ifclassq takes precedence over ALTQ queue;
+        * ifcq_drain count is adjusted by the caller.
+        */
+#if PF_ALTQ
+       if (IFCQ_IS_DRAINING(ifq)) {
+#endif /* PF_ALTQ */
+               if (op == CLASSQDQ_POLL) {
+                       if (drvmgt)
+                               IFCQ_POLL_SC(ifq, sc, m);
+                       else
+                               IFCQ_POLL(ifq, m);
+               } else {
+                       if (drvmgt)
+                               IFCQ_DEQUEUE_SC(ifq, sc, m);
+                       else
+                               IFCQ_DEQUEUE(ifq, m);
+               }
+#if PF_ALTQ
+       } else {
+               struct ifaltq *altq = IFCQ_ALTQ(ifq);
+               if (ALTQ_IS_ENABLED(altq)) {
+                       if (drvmgt)
+                               m = (*altq->altq_dequeue_sc)(altq, sc, op);
+                       else
+                               m = (*altq->altq_dequeue)(altq, op);
+               } else {
+                       m = NULL;
+               }
+       }
+#endif /* PF_ALTQ */
+
+       if (m != NULL && op == CLASSQDQ_REMOVE)
+               tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+       tbr->tbr_lastop = op;
+
+       return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
+    boolean_t update)
+{
+       struct tb_regulator *tbr;
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       u_int64_t rate, old_rate;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(IFCQ_IS_READY(ifq));
+
+       VERIFY(machclk_freq != 0);
+
+       tbr = &ifq->ifcq_tbr;
+       old_rate = tbr->tbr_rate_raw;
+
+       rate = profile->rate;
+       if (profile->percent > 0) {
+               u_int64_t eff_rate;
+
+               if (profile->percent > 100)
+                       return (EINVAL);
+               if ((eff_rate = ifp->if_output_bw.eff_bw) == 0)
+                       return (ENODEV);
+               rate = (eff_rate * profile->percent) / 100;
+       }
+
+       if (rate == 0) {
+               if (!IFCQ_TBR_IS_ENABLED(ifq))
+                       return (ENOENT);
+
+               if (pktsched_verbose)
+                       printf("%s: TBR disabled\n", if_name(ifp));
+
+               /* disable this TBR */
+               ifq->ifcq_flags &= ~IFCQF_TBR;
+               bzero(tbr, sizeof (*tbr));
+               ifnet_set_start_cycle(ifp, NULL);
+               if (update)
+                       ifclassq_update(ifq, CLASSQ_EV_LINK_SPEED);
+               return (0);
+       }
+
+       if (pktsched_verbose) {
+               printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
+                   (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
+                   "enabled", rate, profile->depth);
+       }
+
+       /* set the new TBR */
+       bzero(tbr, sizeof (*tbr));
+       tbr->tbr_rate_raw = rate;
+       tbr->tbr_percent = profile->percent;
+       ifq->ifcq_flags |= IFCQF_TBR;
+
+       /*
+        * Note that the TBR fill up time (hence the ifnet restart time)
+        * is directly related to the specified TBR depth.  The ideal
+        * depth value should be computed such that the interval time
+        * between each successive wakeup is adequately spaced apart,
+        * in order to reduce scheduling overheads.  A target interval
+        * of 10 ms seems to provide good performance balance.  This can be
+        * overridden by specifying the depth profile.  Values smaller than
+        * the ideal depth will reduce delay at the expense of CPU cycles.
+        */
+       tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
+       if (tbr->tbr_rate > 0) {
+               u_int32_t mtu = ifp->if_mtu;
+               int64_t ival, idepth = 0;
+               int i;
+
+               if (mtu < IF_MINMTU)
+                       mtu = IF_MINMTU;
+
+               ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
+
+               for (i = 1; ; i++) {
+                       idepth = TBR_SCALE(i * mtu);
+                       if ((idepth / tbr->tbr_rate) > ival)
+                               break;
+               }
+               VERIFY(idepth > 0);
+
+               tbr->tbr_depth = TBR_SCALE(profile->depth);
+               if (tbr->tbr_depth == 0) {
+                       tbr->tbr_filluptime = idepth / tbr->tbr_rate;
+                       /* a little fudge factor to get closer to rate */
+                       tbr->tbr_depth = idepth + (idepth >> 3);
+               } else {
+                       tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+               }
+       } else {
+               tbr->tbr_depth = TBR_SCALE(profile->depth);
+               tbr->tbr_filluptime = 0xffffffffffffffffLL;
+       }
+       tbr->tbr_token = tbr->tbr_depth;
+       tbr->tbr_last = read_machclk();
+       tbr->tbr_lastop = CLASSQDQ_REMOVE;
+
+       if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
+               struct timespec ts =
+                   { 0, pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
+               if (pktsched_verbose) {
+                       printf("%s: TBR calculated tokens %lld "
+                           "filluptime %llu ns\n", if_name(ifp),
+                           TBR_UNSCALE(tbr->tbr_token),
+                           pktsched_abs_to_nsecs(tbr->tbr_filluptime));
+               }
+               ifnet_set_start_cycle(ifp, &ts);
+       } else {
+               if (pktsched_verbose) {
+                       if (tbr->tbr_rate == 0) {
+                               printf("%s: TBR calculated tokens %lld "
+                                   "infinite filluptime\n", if_name(ifp),
+                                   TBR_UNSCALE(tbr->tbr_token));
+                       } else if (!(ifp->if_flags & IFF_UP)) {
+                               printf("%s: TBR suspended (link is down)\n",
+                                   if_name(ifp));
+                       }
+               }
+               ifnet_set_start_cycle(ifp, NULL);
+       }
+       if (update && tbr->tbr_rate_raw != old_rate)
+               ifclassq_update(ifq, CLASSQ_EV_LINK_SPEED);
+
+       return (0);
+}
diff --git a/bsd/net/classq/classq_util.c b/bsd/net/classq/classq_util.c
new file mode 100644 (file)
index 0000000..e8bf3d5
--- /dev/null
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the Network Research
+ *      Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/random.h>
+#include <sys/kernel_types.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+#include <net/classq/classq.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#if INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <libkern/libkern.h>
+
+/*
+ * read and write diffserv field in IPv4 or IPv6 header
+ */
+u_int8_t
+read_dsfield(struct mbuf *m, struct pf_mtag *t)
+{
+       struct mbuf *m0;
+       u_int8_t ds_field = 0;
+
+       if (t->pftag_hdr == NULL ||
+           !(t->pftag_flags & (PF_TAG_HDR_INET|PF_TAG_HDR_INET6)))
+               return ((u_int8_t)0);
+
+       /* verify that hdr is within the mbuf data */
+       for (m0 = m; m0 != NULL; m0 = m0->m_next)
+               if (((caddr_t)t->pftag_hdr >= m0->m_data) &&
+                   ((caddr_t)t->pftag_hdr < m0->m_data + m0->m_len))
+                       break;
+       if (m0 == NULL) {
+               /* ick, tag info is stale */
+               printf("%s: can't locate header!\n", __func__);
+               return ((u_int8_t)0);
+       }
+
+       if (t->pftag_flags & PF_TAG_HDR_INET) {
+               struct ip *ip = (struct ip *)(void *)t->pftag_hdr;
+
+               if (((uintptr_t)ip + sizeof (*ip)) >
+                   ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0)))
+                       return (0);             /* out of bounds */
+
+               if (ip->ip_v != 4)
+                       return ((u_int8_t)0);   /* version mismatch! */
+               ds_field = ip->ip_tos;
+       }
+#if INET6
+       else if (t->pftag_flags & PF_TAG_HDR_INET6) {
+               struct ip6_hdr *ip6 = (struct ip6_hdr *)(void *)t->pftag_hdr;
+               u_int32_t flowlabel;
+
+               if (((uintptr_t)ip6 + sizeof (*ip6)) >
+                   ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0)))
+                       return (0);             /* out of bounds */
+
+               flowlabel = ntohl(ip6->ip6_flow);
+               if ((flowlabel >> 28) != 6)
+                       return ((u_int8_t)0);   /* version mismatch! */
+               ds_field = (flowlabel >> 20) & 0xff;
+       }
+#endif
+       return (ds_field);
+}
+
+void
+write_dsfield(struct mbuf *m, struct pf_mtag *t, u_int8_t dsfield)
+{
+       struct mbuf *m0;
+
+       if (t->pftag_hdr == NULL ||
+           !(t->pftag_flags & (PF_TAG_HDR_INET|PF_TAG_HDR_INET6)))
+               return;
+
+       /* verify that hdr is within the mbuf data */
+       for (m0 = m; m0 != NULL; m0 = m0->m_next)
+               if (((caddr_t)t->pftag_hdr >= m0->m_data) &&
+                   ((caddr_t)t->pftag_hdr < m0->m_data + m0->m_len))
+                       break;
+       if (m0 == NULL) {
+               /* ick, tag info is stale */
+               printf("%s: can't locate header!\n", __func__);
+               return;
+       }
+
+       if (t->pftag_flags & PF_TAG_HDR_INET) {
+               struct ip *ip = (struct ip *)(void *)t->pftag_hdr;
+               u_int8_t old;
+               int32_t sum;
+
+               if (((uintptr_t)ip + sizeof (*ip)) >
+                   ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0)))
+                       return;         /* out of bounds */
+
+               if (ip->ip_v != 4)
+                       return;         /* version mismatch! */
+               old = ip->ip_tos;
+               dsfield |= old & 3;     /* leave CU bits */
+               if (old == dsfield)
+                       return;
+               ip->ip_tos = dsfield;
+               /*
+                * update checksum (from RFC1624)
+                *         HC' = ~(~HC + ~m + m')
+                */
+               sum = ~ntohs(ip->ip_sum) & 0xffff;
+               sum += 0xff00 + (~old & 0xff) + dsfield;
+               sum = (sum >> 16) + (sum & 0xffff);
+               sum += (sum >> 16);  /* add carry */
+
+               ip->ip_sum = htons(~sum & 0xffff);
+       }
+#if INET6
+       else if (t->pftag_flags & PF_TAG_HDR_INET6) {
+               struct ip6_hdr *ip6 = (struct ip6_hdr *)t->pftag_hdr;
+               u_int32_t flowlabel;
+
+               if (((uintptr_t)ip6 + sizeof (*ip6)) >
+                   ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0)))
+                       return;         /* out of bounds */
+
+               flowlabel = ntohl(ip6->ip6_flow);
+               if ((flowlabel >> 28) != 6)
+                       return;         /* version mismatch! */
+               flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
+               ip6->ip6_flow = htonl(flowlabel);
+       }
+#endif
+}
+
+/*
+ * try to mark CE bit to the packet.
+ *    returns 1 if successfully marked, 0 otherwise.
+ */
+int
+mark_ecn(struct mbuf *m, struct pf_mtag *t, int flags)
+{
+       struct mbuf     *m0;
+       void            *hdr;
+       int             af;
+
+       if ((hdr = t->pftag_hdr) == NULL ||
+           !(t->pftag_flags & (PF_TAG_HDR_INET|PF_TAG_HDR_INET6)))
+               return (0);
+
+       /* verify that hdr is within the mbuf data */
+       for (m0 = m; m0 != NULL; m0 = m0->m_next)
+               if (((caddr_t)hdr >= m0->m_data) &&
+                   ((caddr_t)hdr < m0->m_data + m0->m_len))
+                       break;
+       if (m0 == NULL) {
+               /* ick, tag info is stale */
+               printf("%s: can't locate header!\n", __func__);
+               return (0);
+       }
+
+       if (t->pftag_flags & PF_TAG_HDR_INET)
+               af = AF_INET;
+       else if (t->pftag_flags & PF_TAG_HDR_INET6)
+               af = AF_INET6;
+       else
+               af = AF_UNSPEC;
+
+       switch (af) {
+       case AF_INET:
+               if (flags & CLASSQF_ECN4) {     /* REDF_ECN4 == BLUEF_ECN4 */
+                       struct ip *ip = hdr;
+                       u_int8_t otos;
+                       int sum;
+
+                       if (((uintptr_t)ip + sizeof (*ip)) >
+                           ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0)))
+                               return (0);     /* out of bounds */
+
+                       if (ip->ip_v != 4)
+                               return (0);     /* version mismatch! */
+                       if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+                               return (0);     /* not-ECT */
+                       if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+                               return (1);     /* already marked */
+
+                       /*
+                        * ecn-capable but not marked,
+                        * mark CE and update checksum
+                        */
+                       otos = ip->ip_tos;
+                       ip->ip_tos |= IPTOS_ECN_CE;
+                       /*
+                        * update checksum (from RFC1624)
+                        *         HC' = ~(~HC + ~m + m')
+                        */
+                       sum = ~ntohs(ip->ip_sum) & 0xffff;
+                       sum += (~otos & 0xffff) + ip->ip_tos;
+                       sum = (sum >> 16) + (sum & 0xffff);
+                       sum += (sum >> 16);  /* add carry */
+                       ip->ip_sum = htons(~sum & 0xffff);
+                       return (1);
+               }
+               break;
+#if INET6
+       case AF_INET6:
+               if (flags & CLASSQF_ECN6) {     /* REDF_ECN6 == BLUEF_ECN6 */
+                       struct ip6_hdr *ip6 = hdr;
+                       u_int32_t flowlabel;
+
+                       if (((uintptr_t)ip6 + sizeof (*ip6)) >
+                           ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0)))
+                               return (0);     /* out of bounds */
+
+                       flowlabel = ntohl(ip6->ip6_flow);
+                       if ((flowlabel >> 28) != 6)
+                               return (0);     /* version mismatch! */
+                       if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+                           (IPTOS_ECN_NOTECT << 20))
+                               return (0);     /* not-ECT */
+                       if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+                           (IPTOS_ECN_CE << 20))
+                               return (1);     /* already marked */
+                       /*
+                        * ecn-capable but not marked,  mark CE
+                        */
+                       flowlabel |= (IPTOS_ECN_CE << 20);
+                       ip6->ip6_flow = htonl(flowlabel);
+                       return (1);
+               }
+               break;
+#endif  /* INET6 */
+       }
+
+       /* not marked */
+       return (0);
+}
diff --git a/bsd/net/classq/if_classq.h b/bsd/net/classq/if_classq.h
new file mode 100644 (file)
index 0000000..9eb32d8
--- /dev/null
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _NET_CLASSQ_IF_CLASSQ_H_
+#define        _NET_CLASSQ_IF_CLASSQ_H_
+
+#ifdef PRIVATE
+#define        IFCQ_SC_MAX             10              /* max number of queues */
+
+#ifdef BSD_KERNEL_PRIVATE
+#include <net/classq/classq.h>
+/* classq dequeue op arg */
+typedef enum cqdq_op {
+       CLASSQDQ_REMOVE =       1,      /* dequeue mbuf from the queue */
+       CLASSQDQ_POLL =         2,      /* don't dequeue mbuf from the queue */
+} cqdq_op_t;
+
+/* classq request types */
+typedef enum cqrq {
+       CLASSQRQ_PURGE =        1,      /* purge all packets */
+       CLASSQRQ_PURGE_SC =     2,      /* purge service class (and flow) */
+       CLASSQRQ_EVENT =        3,      /* interface events */
+       CLASSQRQ_THROTTLE =     4,      /* throttle packets */
+} cqrq_t;
+
+/* classq purge_sc request argument */
+typedef struct cqrq_purge_sc {
+       mbuf_svc_class_t        sc;     /* (in) service class */
+       u_int32_t               flow;   /* (in) 0 means all flows */
+       u_int32_t               packets; /* (out) purged packets */
+       u_int32_t               bytes;  /* (out) purged bytes */
+} cqrq_purge_sc_t;
+
+/* classq throttle request argument */
+typedef struct cqrq_throttle {
+       u_int32_t               set;    /* set or get */
+       u_int32_t               level;  /* (in/out) throttling level */
+} cqrq_throttle_t;
+
+#if PF_ALTQ
+#include <net/altq/if_altq.h>
+#endif /* PF_ALTQ */
+
+/*
+ * A token-bucket regulator limits the rate that a network driver can
+ * dequeue packets from the output queue.  Modern cards are able to buffer
+ * a large amount of packets and dequeue too many packets at a time.  This
+ * bursty dequeue behavior makes it impossible to schedule packets by
+ * queueing disciplines.  A token-bucket is used to control the burst size
+ * in a device independent manner.
+ */
+struct tb_regulator {
+       u_int64_t       tbr_rate_raw;   /* (unscaled) token bucket rate */
+       u_int32_t       tbr_percent;    /* token bucket rate in percentage */
+       int64_t         tbr_rate;       /* (scaled) token bucket rate */
+       int64_t         tbr_depth;      /* (scaled) token bucket depth */
+
+       int64_t         tbr_token;      /* (scaled) current token */
+       int64_t         tbr_filluptime; /* (scaled) time to fill up bucket */
+       u_int64_t       tbr_last;       /* last time token was updated */
+
+       int             tbr_lastop;     /* last dequeue operation type */
+                                       /*   needed for poll-and-dequeue */
+};
+
+/* simple token bucket meter profile */
+struct tb_profile {
+       u_int64_t       rate;   /* rate in bit-per-sec */
+       u_int32_t       percent; /* rate in percentage */
+       u_int32_t       depth;  /* depth in bytes */
+};
+
+struct ifclassq;
+enum cqdq_op;
+enum cqrq;
+
+typedef int (*ifclassq_enq_func)(struct ifclassq *, struct mbuf *);
+typedef struct mbuf *(*ifclassq_deq_func)(struct ifclassq *, enum cqdq_op);
+typedef struct mbuf *(*ifclassq_deq_sc_func)(struct ifclassq *,
+    mbuf_svc_class_t, enum cqdq_op);
+typedef int (*ifclassq_req_func)(struct ifclassq *, enum cqrq, void *);
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifclassq {
+       decl_lck_mtx_data(, ifcq_lock);
+
+       struct ifnet    *ifcq_ifp;      /* back pointer to interface */
+       u_int32_t       ifcq_len;
+       u_int32_t       ifcq_maxlen;
+       struct pktcntr  ifcq_xmitcnt;
+       struct pktcntr  ifcq_dropcnt;
+
+       u_int32_t       ifcq_type;      /* scheduler type */
+       u_int32_t       ifcq_flags;     /* flags */
+       u_int32_t       ifcq_sflags;    /* scheduler flags */
+       void            *ifcq_disc;     /* for scheduler-specific use */
+       /*
+        * ifcq_disc_slots[] represents the leaf classes configured for the
+        * corresponding discpline/scheduler, ordered by their corresponding
+        * service class index.  Each slot holds the queue ID used to identify
+        * the class instance, as well as the class instance pointer itself.
+        * The latter is used during enqueue and dequeue in order to avoid the
+        * costs associated with looking up the class pointer based on the
+        * queue ID.  The queue ID is used when querying the statistics from
+        * user space.
+        *
+        * Avoiding the use of queue ID during enqueue and dequeue is made
+        * possible by virtue of knowing the particular mbuf service class
+        * associated with the packets.  The service class index of the
+        * packet is used as the index to ifcq_disc_slots[].
+        *
+        * ifcq_disc_slots[] therefore also acts as a lookup table which
+        * provides for the mapping between MBUF_SC values and the actual
+        * scheduler classes.
+        */
+       struct ifclassq_disc_slot {
+               u_int32_t       qid;
+               void            *cl;
+       } ifcq_disc_slots[IFCQ_SC_MAX]; /* for discipline use */
+
+       ifclassq_enq_func       ifcq_enqueue;
+       ifclassq_deq_func       ifcq_dequeue;
+       ifclassq_deq_sc_func    ifcq_dequeue_sc;
+       ifclassq_req_func       ifcq_request;
+
+       /* token bucket regulator */
+       struct tb_regulator     ifcq_tbr;       /* TBR */
+
+#if PF_ALTQ
+       u_int32_t       ifcq_drain;
+       struct ifaltq   ifcq_altq;
+#endif /* PF_ALTQ */
+};
+
+#if PF_ALTQ
+#define        IFCQ_ALTQ(_ifcq)                (&(_ifcq)->ifcq_altq)
+#define        IFCQ_IS_DRAINING(_ifcq)         ((_ifcq)->ifcq_drain > 0)
+#endif /* PF_ALTQ */
+
+/* ifcq_flags */
+#define        IFCQF_READY      0x01           /* ifclassq supports discipline */
+#define        IFCQF_ENABLED    0x02           /* ifclassq is in use */
+#define        IFCQF_TBR        0x04           /* Token Bucket Regulator is in use */
+
+#define        IFCQ_IS_READY(_ifcq)            ((_ifcq)->ifcq_flags & IFCQF_READY)
+#define        IFCQ_IS_ENABLED(_ifcq)          ((_ifcq)->ifcq_flags & IFCQF_ENABLED)
+#define        IFCQ_TBR_IS_ENABLED(_ifcq)      ((_ifcq)->ifcq_flags & IFCQF_TBR)
+
+/* classq enqueue return value */
+#define CLASSQEQ_DROPPED       (-1)    /* packet dropped (freed)  */
+#define CLASSQEQ_SUCCESS       0       /* success, packet enqueued */
+#define CLASSQEQ_SUCCESS_FC    1       /* packet enqueued; */
+                                       /*   give flow control feedback */
+#define CLASSQEQ_DROPPED_FC    2       /* packet dropped; */
+                                       /*  give flow control feedback */
+#define        CLASSQEQ_DROPPED_SP     3       /* packet dropped due to suspension; */
+                                       /*  give flow control feedback */
+
+/* interface event argument for CLASSQRQ_EVENT */
+typedef enum cqev {
+       CLASSQ_EV_LINK_SPEED =  1,      /* link speed has changed */
+       CLASSQ_EV_LINK_MTU =    2,      /* link MTU has changed */
+       CLASSQ_EV_LINK_UP =     3,      /* link is now up */
+       CLASSQ_EV_LINK_DOWN =   4,      /* link is now down */
+} cqev_t;
+#endif /* BSD_KERNEL_PRIVATE */
+
+#include <net/pktsched/pktsched_priq.h>
+#include <net/pktsched/pktsched_fairq.h>
+#include <net/pktsched/pktsched_tcq.h>
+#include <net/pktsched/pktsched_cbq.h>
+#include <net/pktsched/pktsched_hfsc.h>
+#include <net/pktsched/pktsched_qfq.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct if_ifclassq_stats {
+       u_int32_t       ifqs_len;
+       u_int32_t       ifqs_maxlen;
+       struct pktcntr  ifqs_xmitcnt;
+       struct pktcntr  ifqs_dropcnt;
+       u_int32_t       ifqs_scheduler;
+       union {
+               struct priq_classstats  ifqs_priq_stats;
+               struct fairq_classstats ifqs_fairq_stats;
+               struct tcq_classstats   ifqs_tcq_stats;
+               struct cbq_classstats   ifqs_cbq_stats;
+               struct hfsc_classstats  ifqs_hfsc_stats;
+               struct qfq_classstats   ifqs_qfq_stats;
+       };
+} __attribute__((aligned(8)));
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef BSD_KERNEL_PRIVATE
+/*
+ * For ifclassq lock
+ */
+#define        IFCQ_LOCK_ASSERT_HELD(_ifcq)                                    \
+       lck_mtx_assert(&(_ifcq)->ifcq_lock, LCK_MTX_ASSERT_OWNED)
+
+#define        IFCQ_LOCK_ASSERT_NOTHELD(_ifcq)                                 \
+       lck_mtx_assert(&(_ifcq)->ifcq_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define        IFCQ_LOCK(_ifcq)                                                \
+       lck_mtx_lock(&(_ifcq)->ifcq_lock)
+
+#define        IFCQ_LOCK_SPIN(_ifcq)                                           \
+       lck_mtx_lock_spin(&(_ifcq)->ifcq_lock)
+
+#define        IFCQ_CONVERT_LOCK(_ifcq) do {                                   \
+       IFCQ_LOCK_ASSERT_HELD(_ifcq);                                   \
+       lck_mtx_convert_spin(&(_ifcq)->ifcq_lock);                      \
+} while (0)
+
+#define        IFCQ_UNLOCK(_ifcq)                                              \
+       lck_mtx_unlock(&(_ifcq)->ifcq_lock)
+
+/*
+ * For ifclassq operations
+ */
+#define        IFCQ_ENQUEUE(_ifq, _m, _err) do {                               \
+       (_err) = (*(_ifq)->ifcq_enqueue)(_ifq, _m);                     \
+} while (0)
+
+#define        IFCQ_DEQUEUE(_ifq, _m) do {                                     \
+       (_m) = (*(_ifq)->ifcq_dequeue)(_ifq, CLASSQDQ_REMOVE);          \
+} while (0)
+
+#define        IFCQ_DEQUEUE_SC(_ifq, _sc, _m) do {                             \
+       (_m) = (*(_ifq)->ifcq_dequeue_sc)(_ifq, _sc, CLASSQDQ_REMOVE);  \
+} while (0)
+
+#define        IFCQ_TBR_DEQUEUE(_ifcq, _m) do {                                \
+       (_m) = ifclassq_tbr_dequeue(_ifcq, CLASSQDQ_REMOVE);            \
+} while (0)
+
+#define        IFCQ_TBR_DEQUEUE_SC(_ifcq, _sc, _m) do {                        \
+       (_m) = ifclassq_tbr_dequeue_sc(_ifcq, CLASSQDQ_REMOVE, _sc);    \
+} while (0)
+
+#define        IFCQ_POLL(_ifq, _m) do {                                        \
+       (_m) = (*(_ifq)->ifcq_dequeue)(_ifq, CLASSQDQ_POLL);            \
+} while (0)
+
+#define        IFCQ_POLL_SC(_ifq, _sc, _m) do {                                \
+       (_m) = (*(_ifq)->ifcq_dequeue_sc)(_ifq, _sc, CLASSQDQ_POLL);    \
+} while (0)
+
+#define        IFCQ_TBR_POLL(_ifcq, _m) do {                                   \
+       (_m) = ifclassq_tbr_dequeue(_ifcq, CLASSQDQ_POLL);              \
+} while (0)
+
+#define        IFCQ_TBR_POLL_SC(_ifcq, _sc, _m) do {                           \
+       (_m) = ifclassq_tbr_dequeue_sc(_ifcq, CLASSQDQ_POLL, _sc);      \
+} while (0)
+
+#define        IFCQ_PURGE(_ifq) do {                                           \
+       (void) (*(_ifq)->ifcq_request)(_ifq, CLASSQRQ_PURGE, NULL);     \
+} while (0)
+
+#define        IFCQ_PURGE_SC(_ifq, _sc, _flow, _packets, _bytes) do {          \
+       cqrq_purge_sc_t _req = { _sc, _flow, 0, 0 };                    \
+       (void) (*(_ifq)->ifcq_request)(_ifq, CLASSQRQ_PURGE_SC, &_req); \
+       (_packets) = _req.packets;                                      \
+       (_bytes) = _req.bytes;                                          \
+} while (0)
+
+#define        IFCQ_UPDATE(_ifq, _ev) do {                                     \
+       (void) (*(_ifq)->ifcq_request)(_ifq, CLASSQRQ_EVENT,            \
+           (void *)(_ev));                                             \
+} while (0)
+
+#define        IFCQ_SET_THROTTLE(_ifq, _level, _err) do {                      \
+       cqrq_throttle_t _req = { 1, _level };                           \
+       (_err) = (*(_ifq)->ifcq_request)                                \
+           (_ifq, CLASSQRQ_THROTTLE, &_req);                           \
+} while (0)
+
+#define        IFCQ_GET_THROTTLE(_ifq, _level, _err) do {                      \
+       cqrq_throttle_t _req = { 0, IFNET_THROTTLE_OFF };               \
+       (_err) = (*(_ifq)->ifcq_request)                                \
+           (_ifq, CLASSQRQ_THROTTLE, &_req);                           \
+       (_level) = _req.level;                                          \
+} while (0)
+
+#define        IFCQ_LEN(_ifcq)         ((_ifcq)->ifcq_len)
+#define        IFCQ_QFULL(_ifcq)       (IFCQ_LEN(_ifcq) >= (_ifcq)->ifcq_maxlen)
+#define        IFCQ_IS_EMPTY(_ifcq)    (IFCQ_LEN(_ifcq) == 0)
+#define        IFCQ_INC_LEN(_ifcq)     (IFCQ_LEN(_ifcq)++)
+#define        IFCQ_DEC_LEN(_ifcq)     (IFCQ_LEN(_ifcq)--)
+#define        IFCQ_MAXLEN(_ifcq)      ((_ifcq)->ifcq_maxlen)
+#define        IFCQ_SET_MAXLEN(_ifcq, _len) ((_ifcq)->ifcq_maxlen = (_len))
+
+#define        IFCQ_XMIT_ADD(_ifcq, _pkt, _len) do {                           \
+       PKTCNTR_ADD(&(_ifcq)->ifcq_xmitcnt, _pkt, _len);                \
+} while (0)
+
+#define        IFCQ_DROP_ADD(_ifcq, _pkt, _len) do {                           \
+       PKTCNTR_ADD(&(_ifcq)->ifcq_dropcnt, _pkt, _len);                \
+} while (0)
+
+extern int ifclassq_setup(struct ifnet *, u_int32_t, boolean_t);
+extern void ifclassq_teardown(struct ifnet *);
+extern int ifclassq_pktsched_setup(struct ifclassq *);
+extern void ifclassq_set_maxlen(struct ifclassq *, u_int32_t);
+extern u_int32_t ifclassq_get_maxlen(struct ifclassq *);
+extern u_int32_t ifclassq_get_len(struct ifclassq *);
+extern errno_t ifclassq_enqueue(struct ifclassq *, struct mbuf *);
+extern errno_t ifclassq_dequeue(struct ifclassq *, u_int32_t, struct mbuf **,
+    struct mbuf **, u_int32_t *, u_int32_t *);
+extern errno_t ifclassq_dequeue_sc(struct ifclassq *, mbuf_svc_class_t,
+    u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
+extern struct mbuf *ifclassq_poll(struct ifclassq *);
+extern struct mbuf *ifclassq_poll_sc(struct ifclassq *, mbuf_svc_class_t);
+extern void ifclassq_update(struct ifclassq *, cqev_t);
+extern int ifclassq_attach(struct ifclassq *, u_int32_t, void *,
+    ifclassq_enq_func, ifclassq_deq_func, ifclassq_deq_sc_func,
+    ifclassq_req_func);
+extern int ifclassq_detach(struct ifclassq *);
+extern int ifclassq_getqstats(struct ifclassq *, u_int32_t,
+    void *, u_int32_t *);
+extern const char *ifclassq_ev2str(cqev_t);
+extern int ifclassq_tbr_set(struct ifclassq *, struct tb_profile *, boolean_t);
+extern struct mbuf *ifclassq_tbr_dequeue(struct ifclassq *, int);
+extern struct mbuf *ifclassq_tbr_dequeue_sc(struct ifclassq *, int,
+    mbuf_svc_class_t);
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* PRIVATE */
+#endif /* _NET_CLASSQ_IF_CLASSQ_H_ */
index 51c2d976e4b798af99b95d89ded1e2e039e1e37f..5da68ac6881f3ed9cbc2c6630ab0f987ad9bba52 100644 (file)
 #include <sys/domain.h>
 #include <sys/user.h>
 #include <sys/random.h>
+#include <sys/socketvar.h>
 #include <net/if_dl.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <net/if_var.h>
 #include <net/dlil.h>
 #include <net/if_arp.h>
+#include <net/iptap.h>
 #include <sys/kern_event.h>
 #include <sys/kdebug.h>
 #include <sys/mcache.h>
 #include <net/if_types.h>
 #include <net/if_llreach.h>
 #include <net/kpi_interfacefilter.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_sfb.h>
 
 #if INET
 #include <netinet/in_var.h>
 #include <netinet/igmp_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/if_ether.h>
+#include <netinet/in_pcb.h>
 #endif /* INET */
 
 #if INET6
 #if PF
 #include <net/pfvar.h>
 #endif /* PF */
+#if PF_ALTQ
+#include <net/altq/altq.h>
+#endif /* PF_ALTQ */
+#include <net/pktsched/pktsched.h>
 
 #define DBG_LAYER_BEG          DLILDBG_CODE(DBG_DLIL_STATIC, 0)
 #define DBG_LAYER_END          DLILDBG_CODE(DBG_DLIL_STATIC, 2)
 #define DLIL_PRINTF    kprintf
 #endif
 
-#define        _CASSERT(x)     \
-       switch (0) { case 0: case (x): ; }
-
 #define        IF_DATA_REQUIRE_ALIGNED_64(f)   \
        _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
 
 #define        IFNET_IF_DATA_REQUIRE_ALIGNED_64(f)     \
        _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
 
-#define IFNET_IF_TC_REQUIRE_ALIGNED_64(f) \
-       _CASSERT(!(offsetof(struct ifnet, if_tc.f) % sizeof (u_int64_t)))
-
 enum {
        kProtoKPI_v1    = 1,
        kProtoKPI_v2    = 2
@@ -171,7 +180,7 @@ SLIST_HEAD(proto_hash_entry, if_proto);
 struct dlil_ifnet {
        struct ifnet    dl_if;                  /* public ifnet */
        /*
-        * dlil private fields, protected by dl_if_lock
+        * DLIL private fields, protected by dl_if_lock
         */
        decl_lck_mtx_data(, dl_if_lock);
        TAILQ_ENTRY(dlil_ifnet) dl_if_link;     /* dlil_ifnet link */
@@ -186,6 +195,8 @@ struct dlil_ifnet {
                u_int8_t        asdl[DLIL_SDLMAXLEN]; /* addr storage */
                u_int8_t        msdl[DLIL_SDLMAXLEN]; /* mask storage */
        } dl_if_lladdr;
+       u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
+       struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
        ctrace_t        dl_if_attach;           /* attach PC stacktrace */
        ctrace_t        dl_if_detach;           /* detach PC stacktrace */
 };
@@ -234,12 +245,26 @@ static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
 static lck_grp_t *dlil_lock_group;
 lck_grp_t *ifnet_lock_group;
 static lck_grp_t *ifnet_head_lock_group;
+static lck_grp_t *ifnet_snd_lock_group;
+static lck_grp_t *ifnet_rcv_lock_group;
 lck_attr_t *ifnet_lock_attr;
 decl_lck_rw_data(static, ifnet_head_lock);
 decl_lck_mtx_data(static, dlil_ifnet_lock);
 u_int32_t dlil_filter_count = 0;
 extern u_int32_t       ipv4_ll_arp_aware;
 
+struct sfb_fc_list ifnet_fclist;
+decl_lck_mtx_data(static, ifnet_fclist_lock);
+
+static unsigned int ifnet_fcezone_size;                /* size of ifnet_fce */
+static struct zone *ifnet_fcezone;             /* zone for ifnet_fce */
+
+#define IFNET_FCEZONE_MAX      32              /* maximum elements in zone */
+#define IFNET_FCEZONE_NAME     "ifnet_fcezone" /* zone name */
+
+static void ifnet_fc_thread_func(void *, wait_result_t);
+static void ifnet_fc_init(void);
+
 #if DEBUG
 static unsigned int ifnet_debug = 1;   /* debugging (enabled) */
 #else
@@ -258,12 +283,6 @@ static struct zone *dlif_filt_zone;        /* zone for ifnet_filter */
 #define        DLIF_FILT_ZONE_MAX      8               /* maximum elements in zone */
 #define        DLIF_FILT_ZONE_NAME     "ifnet_filter"  /* zone name */
 
-static unsigned int dlif_inp_size;     /* size of dlil_threading_info */
-static struct zone *dlif_inp_zone;     /* zone for dlil_threading_info */
-
-#define        DLIF_INP_ZONE_MAX       DLIF_ZONE_MAX   /* maximum elements in zone */
-#define        DLIF_INP_ZONE_NAME      "ifnet_thread"  /* zone name */
-
 static unsigned int dlif_phash_size;   /* size of ifnet proto hash table */
 static struct zone *dlif_phash_zone;   /* zone for ifnet proto hash table */
 
@@ -276,6 +295,20 @@ static struct zone *dlif_proto_zone;       /* zone for if_proto */
 #define        DLIF_PROTO_ZONE_MAX     (DLIF_ZONE_MAX*2) /* maximum elements in zone */
 #define        DLIF_PROTO_ZONE_NAME    "ifnet_proto"   /* zone name */
 
+static unsigned int dlif_tcpstat_size;         /* size of tcpstat_local to allocate */
+static unsigned int dlif_tcpstat_bufsize;      /* size of dlif_tcpstat_size + headroom */
+static struct zone *dlif_tcpstat_zone;         /* zone for tcpstat_local */
+
+#define        DLIF_TCPSTAT_ZONE_MAX   1               /* maximum elements in zone */
+#define        DLIF_TCPSTAT_ZONE_NAME  "ifnet_tcpstat" /* zone name */
+
+static unsigned int dlif_udpstat_size;         /* size of udpstat_local to allocate */
+static unsigned int dlif_udpstat_bufsize;      /* size of dlif_udpstat_size + headroom */
+static struct zone *dlif_udpstat_zone;         /* zone for udpstat_local */
+
+#define        DLIF_UDPSTAT_ZONE_MAX   1               /* maximum elements in zone */
+#define        DLIF_UDPSTAT_ZONE_NAME  "ifnet_udpstat" /* zone name */
+
 /*
  * Updating this variable should be done by first acquiring the global
  * radix node head (rnh_lock), in tandem with settting/clearing the
@@ -284,11 +317,9 @@ static struct zone *dlif_proto_zone;       /* zone for if_proto */
 u_int32_t ifnet_aggressive_drainers;
 static u_int32_t net_rtref;
 
-static struct dlil_threading_info dlil_lo_thread;
-__private_extern__  struct dlil_threading_info *dlil_lo_thread_ptr = &dlil_lo_thread;
-
-static struct mbuf *dlil_lo_input_mbuf_head = NULL;
-static struct mbuf *dlil_lo_input_mbuf_tail = NULL;
+static struct dlil_main_threading_info dlil_main_input_thread_info;
+__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
+    (struct dlil_threading_info *)&dlil_main_input_thread_info;
 
 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
@@ -327,6 +358,10 @@ static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
     const struct sockaddr_dl *, const struct sockaddr *);
 
 static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
+static void ifp_if_start(struct ifnet *);
+static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
+    struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
+static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
     protocol_family_t *);
 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
@@ -334,20 +369,42 @@ static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
-    const struct sockaddr *, const char *, const char *);
-static errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
+    const struct sockaddr *, const char *, const char *
+#if CONFIG_EMBEDDED
+    ,
+    u_int32_t *, u_int32_t *
+#endif /* CONFIG_EMBEDDED */
+    );
 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
 static void ifp_if_free(struct ifnet *);
 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
+static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
+static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
 
-static void dlil_input_thread_func(struct dlil_threading_info *inpthread);
+static void dlil_main_input_thread_func(void *, wait_result_t);
+static void dlil_input_thread_func(void *, wait_result_t);
+static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
+static void dlil_rxpoll_calc_limits(struct dlil_threading_info *);
 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
-
-static void ifnet_delayed_thread_func(void);
+static void dlil_terminate_input_thread(struct dlil_threading_info *);
+static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
+    struct dlil_threading_info *, boolean_t);
+static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
+static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
+    u_int32_t, ifnet_model_t, boolean_t);
+static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
+    const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
+
+static void ifnet_detacher_thread_func(void *, wait_result_t);
+static int ifnet_detacher_thread_cont(int);
 static void ifnet_detach_final(struct ifnet *);
 static void ifnet_detaching_enqueue(struct ifnet *);
 static struct ifnet *ifnet_detaching_dequeue(void);
 
+static void ifnet_start_thread_fn(void *, wait_result_t);
+static void ifnet_poll_thread_fn(void *, wait_result_t);
+static void ifnet_poll(struct ifnet *);
+
 static void ifp_src_route_copyout(struct ifnet *, struct route *);
 static void ifp_src_route_copyin(struct ifnet *, struct route *);
 #if INET6
@@ -355,6 +412,10 @@ static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
 #endif /* INET6 */
 
+static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
+static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
+static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
+
 /* The following are protected by dlil_ifnet_lock */
 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
 static u_int32_t ifnet_detaching_cnt;
@@ -363,6 +424,11 @@ static void *ifnet_delayed_run;    /* wait channel for detaching thread */
 extern void bpfdetach(struct ifnet*);
 extern void proto_input_run(void);
 
+extern uint32_t udp_count_opportunistic(unsigned int ifindex, 
+       u_int32_t flags);
+extern uint32_t tcp_count_opportunistic(unsigned int ifindex, 
+       u_int32_t flags);
+
 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
 
 #if DEBUG
@@ -370,28 +436,108 @@ static int dlil_verbose = 1;
 #else
 static int dlil_verbose = 0;
 #endif /* DEBUG */
-static int dlil_multithreaded_input = 1;
-static int cur_dlil_input_threads = 0;
 #if IFNET_INPUT_SANITY_CHK
-static int dlil_lo_input_mbuf_count = 0;
 /* sanity checking of input packet lists received */
-static int dlil_input_sanity_check = 0;
-#endif
+static u_int32_t dlil_input_sanity_check = 0;
+#endif /* IFNET_INPUT_SANITY_CHK */
+/* rate limit debug messages */
+struct timespec dlil_dbgrate = { 1, 0 };
 
 SYSCTL_DECL(_net_link_generic_system);
 
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW,
-    &dlil_verbose, 0, "Log DLIL error messages");
-
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, multi_threaded_input, CTLFLAG_RW,
-    &dlil_multithreaded_input , 0, "Uses multiple input thread for DLIL input");
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
+
+#define        IF_SNDQ_MINLEN  32
+u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
+    sysctl_sndq_maxlen, "I", "Default transmit queue max length");
+
+#define        IF_RCVQ_MINLEN  32
+#define IF_RCVQ_MAXLEN 256
+u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
+    sysctl_rcvq_maxlen, "I", "Default receive queue max length");
+
+#define        IF_RXPOLL_DECAY 2               /* ilog2 of EWMA decay rate (4) */
+static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
+    "ilog2 of EWMA decay rate of avg inbound packets");
+
+#define        IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
+static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
+SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
+    "input poll mode freeze time");
+
+#define        IF_RXPOLL_SAMPLETIME    (10ULL * 1000 * 1000)   /* 10 ms */
+static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
+SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
+    CTLFLAG_RD | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
+    "input poll sampling time");
+
+#define        IF_RXPOLL_INTERVAL_TIME (1ULL * 1000 * 1000)    /* 1 ms */
+static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVAL_TIME;
+SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
+    "input poll interval (time)");
+
+#define        IF_RXPOLL_INTERVAL_PKTS 0                       /* 0 (disabled) */
+static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
+    IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
+
+#define        IF_RXPOLL_WLOWAT                5
+static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, IF_RXPOLL_WLOWAT,
+    "input poll wakeup low watermark");
+
+#define        IF_RXPOLL_WHIWAT                100
+static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, IF_RXPOLL_WHIWAT,
+    "input poll wakeup high watermark");
+
+static u_int32_t if_rxpoll_max = 0;                    /* 0 (automatic) */
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
+    "max packets per poll call");
+
+static u_int32_t if_rxpoll = 1;
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
+    sysctl_rxpoll, "I", "enable opportunistic input polling");
+
+u_int32_t if_bw_smoothing_val = 3;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, "");
+
+u_int32_t if_bw_measure_size = 10;
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, "");
+
+static u_int32_t cur_dlil_input_threads = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
+    CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0,
+    "Current number of DLIL input threads");
 
 #if IFNET_INPUT_SANITY_CHK
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
-    CTLFLAG_RW, &dlil_input_sanity_check , 0,
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0,
     "Turn on sanity checking in DLIL input");
-#endif
+#endif /* IFNET_INPUT_SANITY_CHK */
 
+static u_int32_t if_flowadv = 1;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
+    "enable flow-advisory mechanism");
+
+unsigned int net_rxpoll = 1;
 unsigned int net_affinity = 1;
 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
 
@@ -399,10 +545,47 @@ extern u_int32_t  inject_buckets;
 
 static lck_grp_attr_t  *dlil_grp_attributes = NULL;
 static lck_attr_t      *dlil_lck_attributes = NULL;
-static lck_grp_t       *dlil_input_lock_grp = NULL;
 
 #define PROTO_HASH_SLOTS       0x5
 
+#define        DLIL_INPUT_CHECK(m, ifp) {                                      \
+       struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m);                    \
+       if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) ||       \
+           !(mbuf_flags(m) & MBUF_PKTHDR)) {                           \
+               panic_plain("%s: invalid mbuf %p\n", __func__, m);      \
+               /* NOTREACHED */                                        \
+       }                                                               \
+}
+
+#define        DLIL_EWMA(old, new, decay) do {                                 \
+       u_int32_t _avg;                                                 \
+       if ((_avg = (old)) > 0)                                         \
+               _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
+       else                                                            \
+               _avg = (new);                                           \
+       (old) = _avg;                                                   \
+} while (0)
+
+#define        MBPS    (1ULL * 1000 * 1000)
+#define        GBPS    (MBPS * 1000)
+
+struct rxpoll_time_tbl {
+       u_int64_t       speed;          /* downlink speed */
+       u_int32_t       plowat;         /* packets low watermark */
+       u_int32_t       phiwat;         /* packets high watermark */
+       u_int32_t       blowat;         /* bytes low watermark */
+       u_int32_t       bhiwat;         /* bytes high watermark */
+};
+
+static struct rxpoll_time_tbl rxpoll_tbl[] = {
+       {  10 * MBPS,   2,      8,      (1 * 1024),     (6 * 1024)      },
+       { 100 * MBPS,   10,     40,     (4 * 1024),     (64 * 1024)     },
+       {   1 * GBPS,   10,     40,     (4 * 1024),     (64 * 1024)     },
+       {  10 * GBPS,   10,     40,     (4 * 1024),     (64 * 1024)     },
+       { 100 * GBPS,   10,     40,     (4 * 1024),     (64 * 1024)     },
+       { 0, 0, 0, 0, 0 }
+};
+
 /*
  * Internal functions.
  */
@@ -634,38 +817,154 @@ dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
        dlil_event_internal(ifp, &ev_msg);
 }
 
+__private_extern__ int
+dlil_alloc_local_stats(struct ifnet *ifp)
+{
+       int ret = EINVAL;
+       void *buf, *base, **pbuf;
+
+       if (ifp == NULL)
+               goto end;
+
+       if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
+               /* allocate tcpstat_local structure */
+               buf = zalloc(dlif_tcpstat_zone);
+               if (buf == NULL) {
+                       ret = ENOMEM;
+                       goto end;
+               }
+               bzero(buf, dlif_tcpstat_bufsize);
+
+               /* Get the 64-bit aligned base address for this object */
+               base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
+                   sizeof (u_int64_t));
+               VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
+                   ((intptr_t)buf + dlif_tcpstat_bufsize));
+
+               /*
+                * Wind back a pointer size from the aligned base and
+                * save the original address so we can free it later.
+                */
+               pbuf = (void **)((intptr_t)base - sizeof (void *));
+               *pbuf = buf;
+               ifp->if_tcp_stat = base;
+
+               /* allocate udpstat_local structure */
+               buf = zalloc(dlif_udpstat_zone);
+               if (buf == NULL) {
+                       ret = ENOMEM;
+                       goto end;
+               }
+               bzero(buf, dlif_udpstat_bufsize);
+
+               /* Get the 64-bit aligned base address for this object */
+               base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
+                   sizeof (u_int64_t));
+               VERIFY(((intptr_t)base + dlif_udpstat_size) <=
+                   ((intptr_t)buf + dlif_udpstat_bufsize));
+
+               /*
+                * Wind back a pointer size from the aligned base and
+                * save the original address so we can free it later.
+                */
+               pbuf = (void **)((intptr_t)base - sizeof (void *));
+               *pbuf = buf;
+               ifp->if_udp_stat = base;
+
+               VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
+                   IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
+
+               ret = 0;
+       }
+
+end:
+       if (ret != 0) {
+               if (ifp->if_tcp_stat != NULL) {
+                       pbuf = (void **)
+                           ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
+                       zfree(dlif_tcpstat_zone, *pbuf);
+                       ifp->if_tcp_stat = NULL;
+               }
+               if (ifp->if_udp_stat != NULL) {
+                       pbuf = (void **)
+                           ((intptr_t)ifp->if_udp_stat - sizeof (void *));
+                       zfree(dlif_udpstat_zone, *pbuf);
+                       ifp->if_udp_stat = NULL;
+               }
+       }
+
+       return (ret);
+}
+
 static int
-dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inputthread)
+dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
 {
+       thread_continue_t func;
+       u_int32_t limit;
        int error;
 
-       bzero(inputthread, sizeof(*inputthread));
-       /* loopback ifp may not be configured at dlil_init time. */
-       if (ifp == lo_ifp) {
-               (void) strlcat(inputthread->input_name,
-                   "dlil_input_main_thread_mtx", DLIL_THREADNAME_LEN);
+       /* NULL ifp indicates the main input thread, called at dlil_init time */
+       if (ifp == NULL) {
+               func = dlil_main_input_thread_func;
+               VERIFY(inp == dlil_main_input_thread);
+               (void) strlcat(inp->input_name,
+                   "main_input", DLIL_THREADNAME_LEN);
+       } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
+               func = dlil_rxpoll_input_thread_func;
+               VERIFY(inp != dlil_main_input_thread);
+               (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
+                   "%s%d_input_poll", ifp->if_name, ifp->if_unit);
        } else {
-               (void) snprintf(inputthread->input_name, DLIL_THREADNAME_LEN,
-                   "dlil_input_%s%d_mtx", ifp->if_name, ifp->if_unit);
+               func = dlil_input_thread_func;
+               VERIFY(inp != dlil_main_input_thread);
+               (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
+                   "%s%d_input", ifp->if_name, ifp->if_unit);
        }
+       VERIFY(inp->input_thr == THREAD_NULL);
 
-       inputthread->lck_grp = lck_grp_alloc_init(inputthread->input_name,
-           dlil_grp_attributes);
-       lck_mtx_init(&inputthread->input_lck, inputthread->lck_grp,
-           dlil_lck_attributes);
+       inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
+       lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
+
+       inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
+       inp->ifp = ifp;         /* NULL for main input thread */
+
+       net_timerclear(&inp->mode_holdtime);
+       net_timerclear(&inp->mode_lasttime);
+       net_timerclear(&inp->sample_holdtime);
+       net_timerclear(&inp->sample_lasttime);
+       net_timerclear(&inp->dbg_lasttime);
+
+       /*
+        * For interfaces that support opportunistic polling, set the
+        * low and high watermarks for outstanding inbound packets/bytes.
+        * Also define freeze times for transitioning between modes
+        * and updating the average.
+        */
+       if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
+               limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
+               dlil_rxpoll_calc_limits(inp);
+       } else {
+               limit = (u_int32_t)-1;
+       }
+
+       _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit);
+       if (inp == dlil_main_input_thread) {
+               struct dlil_main_threading_info *inpm =
+                   (struct dlil_main_threading_info *)inp;
+               _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit);
+       }
 
-       error= kernel_thread_start((thread_continue_t)dlil_input_thread_func,
-           inputthread, &inputthread->input_thread);
-       if (error == 0) {
-               ml_thread_policy(inputthread->input_thread, MACHINE_GROUP,
+       error = kernel_thread_start(func, inp, &inp->input_thr);
+       if (error == KERN_SUCCESS) {
+               ml_thread_policy(inp->input_thr, MACHINE_GROUP,
                    (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
                /*
-                * Except for the loopback dlil input thread, we create
-                * an affinity set so that the matching workloop thread
-                * can be scheduled on the same processor set.
+                * We create an affinity set so that the matching workloop
+                * thread or the starter thread (for loopback) can be
+                * scheduled on the same processor set as the input thread.
                 */
-               if (net_affinity && inputthread != dlil_lo_thread_ptr) {
-                       struct thread *tp = inputthread->input_thread;
+               if (net_affinity) {
+                       struct thread *tp = inp->input_thr;
                        u_int32_t tag;
                        /*
                         * Randomize to reduce the probability
@@ -674,23 +973,79 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inputthread)
                        read_random(&tag, sizeof (tag));
                        if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
                                thread_reference(tp);
-                               inputthread->tag = tag;
-                               inputthread->net_affinity = TRUE;
+                               inp->tag = tag;
+                               inp->net_affinity = TRUE;
                        }
                }
+       } else if (inp == dlil_main_input_thread) {
+               panic_plain("%s: couldn't create main input thread", __func__);
+               /* NOTREACHED */
        } else {
-               panic("%s: couldn't create thread", __func__);
+               panic_plain("%s: couldn't create %s%d input thread", __func__,
+                   ifp->if_name, ifp->if_unit);
                /* NOTREACHED */
        }
        OSAddAtomic(1, &cur_dlil_input_threads);
-#if DLIL_DEBUG
-       printf("%s: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n",
-           __func__, inputthread, inputthread->input_thread,
-           dlil_multithreaded_input, cur_dlil_input_threads);
-#endif
+
        return (error);
 }
 
+static void
+dlil_terminate_input_thread(struct dlil_threading_info *inp)
+{
+       struct ifnet *ifp;
+
+       VERIFY(current_thread() == inp->input_thr);
+       VERIFY(inp != dlil_main_input_thread);
+
+       OSAddAtomic(-1, &cur_dlil_input_threads);
+
+       lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
+       lck_grp_free(inp->lck_grp);
+
+       inp->input_waiting = 0;
+       inp->wtot = 0;
+       bzero(inp->input_name, sizeof (inp->input_name));
+       ifp = inp->ifp;
+       inp->ifp = NULL;
+       VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
+       qlimit(&inp->rcvq_pkts) = 0;
+       bzero(&inp->stats, sizeof (inp->stats));
+
+       VERIFY(!inp->net_affinity);
+       inp->input_thr = THREAD_NULL;
+       VERIFY(inp->wloop_thr == THREAD_NULL);
+       VERIFY(inp->poll_thr == THREAD_NULL);
+       VERIFY(inp->tag == 0);
+
+       inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
+       bzero(&inp->tstats, sizeof (inp->tstats));
+       bzero(&inp->pstats, sizeof (inp->pstats));
+       bzero(&inp->sstats, sizeof (inp->sstats));
+
+       net_timerclear(&inp->mode_holdtime);
+       net_timerclear(&inp->mode_lasttime);
+       net_timerclear(&inp->sample_holdtime);
+       net_timerclear(&inp->sample_lasttime);
+       net_timerclear(&inp->dbg_lasttime);
+
+#if IFNET_INPUT_SANITY_CHK
+       inp->input_mbuf_cnt = 0;
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+       if (dlil_verbose) {
+               printf("%s%d: input thread terminated\n",
+                   ifp->if_name, ifp->if_unit);
+       }
+
+       /* for the extra refcnt from kernel_thread_start() */
+       thread_deallocate(current_thread());
+
+       /* this is the end */
+       thread_terminate(current_thread());
+       /* NOTREACHED */
+}
+
 static kern_return_t
 dlil_affinity_set(struct thread *tp, u_int32_t tag)
 {
@@ -721,6 +1076,7 @@ dlil_init(void)
        IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
        IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
        IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
+       IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
 
        IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
        IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
@@ -733,19 +1089,7 @@ dlil_init(void)
        IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
        IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
        IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
-
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkpackets);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkbytes);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkpackets);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkbytes);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivipackets);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivibytes);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovipackets);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovibytes);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivopackets);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivobytes);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovopackets);
-       IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovobytes);
+       IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
 
        /*
         * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
@@ -765,10 +1109,13 @@ dlil_init(void)
         * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
         */
        _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
+       _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
 
        PE_parse_boot_argn("net_affinity", &net_affinity,
            sizeof (net_affinity));
 
+       PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
+
        PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
 
        PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
@@ -781,7 +1128,8 @@ dlil_init(void)
        dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
            0, DLIF_ZONE_NAME);
        if (dlif_zone == NULL) {
-               panic("%s: failed allocating %s", __func__, DLIF_ZONE_NAME);
+               panic_plain("%s: failed allocating %s", __func__,
+                   DLIF_ZONE_NAME);
                /* NOTREACHED */
        }
        zone_change(dlif_zone, Z_EXPAND, TRUE);
@@ -791,28 +1139,18 @@ dlil_init(void)
        dlif_filt_zone = zinit(dlif_filt_size,
            DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
        if (dlif_filt_zone == NULL) {
-               panic("%s: failed allocating %s", __func__,
+               panic_plain("%s: failed allocating %s", __func__,
                    DLIF_FILT_ZONE_NAME);
                /* NOTREACHED */
        }
        zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
        zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
 
-       dlif_inp_size = sizeof (struct dlil_threading_info);
-       dlif_inp_zone = zinit(dlif_inp_size,
-           DLIF_INP_ZONE_MAX * dlif_inp_size, 0, DLIF_INP_ZONE_NAME);
-       if (dlif_inp_zone == NULL) {
-               panic("%s: failed allocating %s", __func__, DLIF_INP_ZONE_NAME);
-               /* NOTREACHED */
-       }
-       zone_change(dlif_inp_zone, Z_EXPAND, TRUE);
-       zone_change(dlif_inp_zone, Z_CALLERACCT, FALSE);
-
        dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
        dlif_phash_zone = zinit(dlif_phash_size,
            DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
        if (dlif_phash_zone == NULL) {
-               panic("%s: failed allocating %s", __func__,
+               panic_plain("%s: failed allocating %s", __func__,
                    DLIF_PHASH_ZONE_NAME);
                /* NOTREACHED */
        }
@@ -823,13 +1161,47 @@ dlil_init(void)
        dlif_proto_zone = zinit(dlif_proto_size,
            DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
        if (dlif_proto_zone == NULL) {
-               panic("%s: failed allocating %s", __func__,
+               panic_plain("%s: failed allocating %s", __func__,
                    DLIF_PROTO_ZONE_NAME);
                /* NOTREACHED */
        }
        zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
        zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
 
+       dlif_tcpstat_size = sizeof (struct tcpstat_local);
+       /* Enforce 64-bit alignment for tcpstat_local structure */
+       dlif_tcpstat_bufsize =
+           dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
+       dlif_tcpstat_bufsize =
+           P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
+       dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
+           DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
+           DLIF_TCPSTAT_ZONE_NAME);
+       if (dlif_tcpstat_zone == NULL) {
+               panic_plain("%s: failed allocating %s", __func__,
+                   DLIF_TCPSTAT_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
+       zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
+
+       dlif_udpstat_size = sizeof (struct udpstat_local);
+       /* Enforce 64-bit alignment for udpstat_local structure */
+       dlif_udpstat_bufsize =
+           dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
+       dlif_udpstat_bufsize =
+           P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
+       dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
+           DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
+           DLIF_UDPSTAT_ZONE_NAME);
+       if (dlif_udpstat_zone == NULL) {
+               panic_plain("%s: failed allocating %s", __func__,
+                   DLIF_UDPSTAT_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
+       zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
+
        ifnet_llreach_init();
 
        TAILQ_INIT(&dlil_ifnet_head);
@@ -839,13 +1211,15 @@ dlil_init(void)
        /* Setup the lock groups we will use */
        dlil_grp_attributes = lck_grp_attr_alloc_init();
 
-       dlil_lock_group = lck_grp_alloc_init("dlil internal locks",
+       dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
            dlil_grp_attributes);
        ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
            dlil_grp_attributes);
        ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
            dlil_grp_attributes);
-       dlil_input_lock_grp = lck_grp_alloc_init("dlil input lock",
+       ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
+           dlil_grp_attributes);
+       ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
            dlil_grp_attributes);
 
        /* Setup the lock attributes we will use */
@@ -857,20 +1231,21 @@ dlil_init(void)
            dlil_lck_attributes);
        lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
 
+       ifnet_fc_init();
+
        lck_attr_free(dlil_lck_attributes);
        dlil_lck_attributes = NULL;
 
        ifa_init();
-
        /*
-        * Create and start up the first dlil input thread once everything
-        * is initialized.
+        * Create and start up the main DLIL input thread and the interface
+        * detacher threads once everything is initialized.
         */
-       dlil_create_input_thread(lo_ifp, dlil_lo_thread_ptr);
+       dlil_create_input_thread(NULL, dlil_main_input_thread);
 
-       if (kernel_thread_start((thread_continue_t)ifnet_delayed_thread_func,
-           NULL, &thread) != 0) {
-               panic("%s: couldn't create detach thread", __func__);
+       if (kernel_thread_start(ifnet_detacher_thread_func,
+           NULL, &thread) != KERN_SUCCESS) {
+               panic_plain("%s: couldn't create detacher thread", __func__);
                /* NOTREACHED */
        }
        thread_deallocate(thread);
@@ -879,6 +1254,12 @@ dlil_init(void)
        /* Initialize the packet filter */
        pfinit();
 #endif /* PF */
+
+       /* Initialize queue algorithms */
+       classq_init();
+
+       /* Initialize packet schedulers */
+       pktsched_init();
 }
 
 static void
@@ -1080,276 +1461,1157 @@ dlil_detach_filter(interface_filter_t filter)
        dlil_detach_filter_internal(filter, 0);
 }
 
+/*
+ * Main input thread:
+ *
+ *   a) handles all inbound packets for lo0
+ *   b) handles all inbound packets for interfaces with no dedicated
+ *     input thread (e.g. anything but Ethernet/PDP or those that support
+ *     opportunistic polling.)
+ *   c) protocol registrations
+ *   d) packet injections
+ */
 static void
-dlil_input_thread_func(struct dlil_threading_info *inputthread)
+dlil_main_input_thread_func(void *v, wait_result_t w)
 {
+#pragma unused(w)
+       struct dlil_main_threading_info *inpm = v;
+       struct dlil_threading_info *inp = v;
+
+       VERIFY(inp == dlil_main_input_thread);
+       VERIFY(inp->ifp == NULL);
+       VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+
        while (1) {
                struct mbuf *m = NULL, *m_loop = NULL;
-#if IFNET_INPUT_SANITY_CHK
-               int             loop_cnt = 0, mbuf_cnt;
-               int             count;
-               struct mbuf *m1;
-#endif /* IFNET_INPUT_SANITY_CHK */
+               u_int32_t m_cnt, m_cnt_loop;
+               boolean_t proto_req;
 
-               lck_mtx_lock_spin(&inputthread->input_lck);
+               lck_mtx_lock_spin(&inp->input_lck);
 
                /* Wait until there is work to be done */
-               while (!(inputthread->input_waiting & ~DLIL_INPUT_RUNNING)) {
-                       inputthread->input_waiting &= ~DLIL_INPUT_RUNNING;
-                       msleep(&inputthread->input_waiting,
-                           &inputthread->input_lck, 0,
-                           inputthread->input_name, 0);
+               while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+                       inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+                       (void) msleep(&inp->input_waiting, &inp->input_lck,
+                           (PZERO - 1) | PSPIN, inp->input_name, NULL);
                }
 
-               lck_mtx_assert(&inputthread->input_lck, LCK_MTX_ASSERT_OWNED);
+               inp->input_waiting |= DLIL_INPUT_RUNNING;
+               inp->input_waiting &= ~DLIL_INPUT_WAITING;
 
-               m = inputthread->mbuf_head;
-               inputthread->mbuf_head = NULL;
-               inputthread->mbuf_tail = NULL;
+               /* Main input thread cannot be terminated */
+               VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
 
-               if (inputthread->input_waiting & DLIL_INPUT_TERMINATE) {
-                       lck_mtx_unlock(&inputthread->input_lck);
+               proto_req = (inp->input_waiting &
+                   (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
 
-                       if (m != NULL)
-                               mbuf_freem_list(m);
+               /* Packets for non-dedicated interfaces other than lo0 */
+               m_cnt = qlen(&inp->rcvq_pkts);
+               m = _getq_all(&inp->rcvq_pkts);
 
-                       OSAddAtomic(-1, &cur_dlil_input_threads);
+               /* Packets exclusive for lo0 */
+               m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
+               m_loop = _getq_all(&inpm->lo_rcvq_pkts);
 
-                       lck_mtx_destroy(&inputthread->input_lck,
-                           inputthread->lck_grp);
-                       lck_grp_free(inputthread->lck_grp);
+               inp->wtot = 0;
 
-                       zfree(dlif_inp_zone, inputthread);
+               lck_mtx_unlock(&inp->input_lck);
 
-                       /* for the extra refcnt from kernel_thread_start() */
-                       thread_deallocate(current_thread());
+               /*
+               * NOTE warning %%% attention !!!!
+               * We should think about putting some thread starvation
+               * safeguards if we deal with long chains of packets.
+               */
+               if (m_loop != NULL)
+                       dlil_input_packet_list_extended(lo_ifp, m_loop,
+                           m_cnt_loop, inp->mode);
 
-                       /* this is the end */
-                       thread_terminate(current_thread());
-                       /* NOTREACHED */
-                       return;
-               }
+               if (m != NULL)
+                       dlil_input_packet_list_extended(NULL, m,
+                           m_cnt, inp->mode);
+
+               if (proto_req)
+                       proto_input_run();
+       }
+
+       /* NOTREACHED */
+       VERIFY(0);      /* we should never get here */
+}
+
+/*
+ * Input thread for interfaces with legacy input model.
+ */
+static void
+dlil_input_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(w)
+       struct dlil_threading_info *inp = v;
+       struct ifnet *ifp = inp->ifp;
+
+       VERIFY(inp != dlil_main_input_thread);
+       VERIFY(ifp != NULL);
+       VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
+       VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
 
-               inputthread->input_waiting |= DLIL_INPUT_RUNNING;
-               inputthread->input_waiting &= ~DLIL_INPUT_WAITING;
+       while (1) {
+               struct mbuf *m = NULL;
+               u_int32_t m_cnt;
+
+               lck_mtx_lock_spin(&inp->input_lck);
 
-               if (inputthread == dlil_lo_thread_ptr) {
-                       m_loop = dlil_lo_input_mbuf_head;
-                       dlil_lo_input_mbuf_head = NULL;
-                       dlil_lo_input_mbuf_tail = NULL;
+               /* Wait until there is work to be done */
+               while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+                       inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+                       (void) msleep(&inp->input_waiting, &inp->input_lck,
+                           (PZERO - 1) | PSPIN, inp->input_name, NULL);
                }
 
-#if IFNET_INPUT_SANITY_CHK
-               if (dlil_input_sanity_check != 0) {
-                       mbuf_cnt = inputthread->mbuf_count;
-                       inputthread->mbuf_count = 0;
-                       if (inputthread == dlil_lo_thread_ptr) {
-                               loop_cnt = dlil_lo_input_mbuf_count;
-                               dlil_lo_input_mbuf_count = 0;
-                       }
+               inp->input_waiting |= DLIL_INPUT_RUNNING;
+               inp->input_waiting &= ~DLIL_INPUT_WAITING;
 
-                       lck_mtx_unlock(&inputthread->input_lck);
+               /*
+                * Protocol registration and injection must always use
+                * the main input thread; in theory the latter can utilize
+                * the corresponding input thread where the packet arrived
+                * on, but that requires our knowing the interface in advance
+                * (and the benefits might not worth the trouble.)
+                */
+               VERIFY(!(inp->input_waiting &
+                   (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
 
-                       for (m1 = m, count = 0; m1; m1 = mbuf_nextpkt(m1)) {
-                               count++;
-                       }
-                       if (count != mbuf_cnt) {
-                               panic("%s - thread=%p reg. loop queue "
-                                   "has %d packets, should have %d\n",
-                                   __func__, inputthread, count, mbuf_cnt);
-                               /* NOTREACHED */
-                       }
+               /* Packets for this interface */
+               m_cnt = qlen(&inp->rcvq_pkts);
+               m = _getq_all(&inp->rcvq_pkts);
 
-                       if (inputthread == dlil_lo_thread_ptr) {
-                               for (m1 = m_loop, count = 0; m1;
-                                   m1 = mbuf_nextpkt(m1)) {
-                                       count++;
-                               }
-                               if (count != loop_cnt) {
-                                       panic("%s - thread=%p loop queue "
-                                           "has %d packets, should have %d\n",
-                                           __func__, inputthread, count,
-                                           loop_cnt);
-                                       /* NOTREACHED */
-                               }
-                       }
-               } else
-#endif /* IFNET_INPUT_SANITY_CHK */
-               {
-                       lck_mtx_unlock(&inputthread->input_lck);
+               if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
+                       lck_mtx_unlock(&inp->input_lck);
+
+                       /* Free up pending packets */
+                       if (m != NULL)
+                               mbuf_freem_list(m);
+
+                       dlil_terminate_input_thread(inp);
+                       /* NOTREACHED */
+                       return;
                }
 
+               inp->wtot = 0;
+
+               dlil_input_stats_sync(ifp, inp);
+
+               lck_mtx_unlock(&inp->input_lck);
 
                /*
                * NOTE warning %%% attention !!!!
                * We should think about putting some thread starvation
                * safeguards if we deal with long chains of packets.
                */
-               if (m_loop) {
-                       if (inputthread == dlil_lo_thread_ptr) {
-                               dlil_input_packet_list(lo_ifp, m_loop);
-                       }
-#if IFNET_INPUT_SANITY_CHK
-                       else {
-                               panic("%s - thread=%p loop queue has %d "
-                                   "packets, should have none!\n", __func__,
-                                   inputthread, loop_cnt);
-                               /* NOTREACHED */
-                       }
-#endif /* IFNET_INPUT_SANITY_CHK */
-               }
-
                if (m != NULL)
-                       dlil_input_packet_list(0, m);
-
-               lck_mtx_lock_spin(&inputthread->input_lck);
-
-               if (inputthread->input_waiting &
-                   (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER))  {
-                       lck_mtx_unlock(&inputthread->input_lck);
-                       proto_input_run();
-               } else {
-                       lck_mtx_unlock(&inputthread->input_lck);
-               }
+                       dlil_input_packet_list_extended(NULL, m,
+                           m_cnt, inp->mode);
        }
+
+       /* NOTREACHED */
+       VERIFY(0);      /* we should never get here */
 }
 
-errno_t
-ifnet_input(ifnet_t ifp, mbuf_t m_head,
-    const struct ifnet_stat_increment_param *stats)
+/*
+ * Input thread for interfaces with opportunistic polling input model.
+ */
+static void
+dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
 {
-       struct thread *tp = current_thread();
-       mbuf_t          m_tail;
-       struct dlil_threading_info *inp;
-#if IFNET_INPUT_SANITY_CHK
-       u_int32_t       pkt_count = 0;
-#endif /* IFNET_INPUT_SANITY_CHK */
+#pragma unused(w)
+       struct dlil_threading_info *inp = v;
+       struct ifnet *ifp = inp->ifp;
+       struct timespec ts;
 
-       if (ifp == NULL || m_head == NULL) {
-               if (m_head != NULL)
-                       mbuf_freem_list(m_head);
-               return (EINVAL);
-       }
+       VERIFY(inp != dlil_main_input_thread);
+       VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
 
-       m_tail = m_head;
        while (1) {
-#if IFNET_INPUT_SANITY_CHK
-               if (dlil_input_sanity_check != 0) {
-                       ifnet_t rcvif;
+               struct mbuf *m = NULL;
+               u_int32_t m_cnt, m_size, poll_req = 0;
+               ifnet_model_t mode;
+               struct timespec now, delta;
 
-                       rcvif = mbuf_pkthdr_rcvif(m_tail);
-                       pkt_count++;
+               lck_mtx_lock_spin(&inp->input_lck);
 
-                       if (rcvif == NULL ||
-                           (ifp->if_type != IFT_LOOP && rcvif != ifp) ||
-                           !(mbuf_flags(m_head) & MBUF_PKTHDR)) {
-                               panic("%s - invalid mbuf %p\n",
-                                   __func__, m_tail);
-                               /* NOTREACHED */
-                       }
+               /* Link parameters changed? */
+               if (ifp->if_poll_update != 0) {
+                       ifp->if_poll_update = 0;
+                       dlil_rxpoll_calc_limits(inp);
                }
-#endif /* IFNET_INPUT_SANITY_CHK */
-               if (mbuf_nextpkt(m_tail) == NULL)
-                       break;
-               m_tail = mbuf_nextpkt(m_tail);
-       }
 
-       inp = ifp->if_input_thread;
+               /* Current operating mode */
+               mode = inp->mode;
 
-       if (dlil_multithreaded_input == 0 || inp == NULL)
-               inp = dlil_lo_thread_ptr;
-
-       /*
-        * If there is a matching dlil input thread associated with an
-        * affinity set, associate this workloop thread with the same set.
-        * We will only do this once.
-        */
-       lck_mtx_lock_spin(&inp->input_lck);
-       if (inp->net_affinity && inp->workloop_thread == NULL) {
-               u_int32_t tag = inp->tag;
-               inp->workloop_thread = tp;
-               lck_mtx_unlock(&inp->input_lck);
+               /* Wait until there is work to be done */
+               while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING) &&
+                   qempty(&inp->rcvq_pkts)) {
+                       inp->input_waiting &= ~DLIL_INPUT_RUNNING;
+                       (void) msleep(&inp->input_waiting, &inp->input_lck,
+                           (PZERO - 1) | PSPIN, inp->input_name, NULL);
+               }
 
-               /* Associated the current thread with the new affinity tag */
-               (void) dlil_affinity_set(tp, tag);
+               inp->input_waiting |= DLIL_INPUT_RUNNING;
+               inp->input_waiting &= ~DLIL_INPUT_WAITING;
 
                /*
-                * Take a reference on the workloop (current) thread; during
-                * detach, we will need to refer to it in order ot tear down
-                * its affinity.
+                * Protocol registration and injection must always use
+                * the main input thread; in theory the latter can utilize
+                * the corresponding input thread where the packet arrived
+                * on, but that requires our knowing the interface in advance
+                * (and the benefits might not worth the trouble.)
                 */
-               thread_reference(tp);
-               lck_mtx_lock_spin(&inp->input_lck);
-       }
-
-        /* WARNING
-        * Because of loopbacked multicast we cannot stuff the ifp in
-        * the rcvif of the packet header: loopback has its own dlil
-        * input queue
-        */
+               VERIFY(!(inp->input_waiting &
+                   (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
 
-       if (inp == dlil_lo_thread_ptr && ifp->if_type == IFT_LOOP) {
-               if (dlil_lo_input_mbuf_head == NULL)
-                       dlil_lo_input_mbuf_head = m_head;
-               else if (dlil_lo_input_mbuf_tail != NULL)
-                       dlil_lo_input_mbuf_tail->m_nextpkt = m_head;
-               dlil_lo_input_mbuf_tail = m_tail;
-#if IFNET_INPUT_SANITY_CHK
-               if (dlil_input_sanity_check != 0) {
-                       dlil_lo_input_mbuf_count += pkt_count;
-                       inp->input_mbuf_cnt += pkt_count;
-                       inp->input_wake_cnt++;
+               if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
+                       /* Free up pending packets */
+                       _flushq(&inp->rcvq_pkts);
+                       lck_mtx_unlock(&inp->input_lck);
 
-                       lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
+                       dlil_terminate_input_thread(inp);
+                       /* NOTREACHED */
+                       return;
                }
-#endif
-       } else {
-               if (inp->mbuf_head == NULL)
-                       inp->mbuf_head = m_head;
-               else if (inp->mbuf_tail != NULL)
-                       inp->mbuf_tail->m_nextpkt = m_head;
-               inp->mbuf_tail = m_tail;
-#if IFNET_INPUT_SANITY_CHK
-               if (dlil_input_sanity_check != 0) {
-                       inp->mbuf_count += pkt_count;
-                       inp->input_mbuf_cnt += pkt_count;
-                       inp->input_wake_cnt++;
 
-                       lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
-               }
-#endif
-       }
+               /* Total count of all packets */
+               m_cnt = qlen(&inp->rcvq_pkts);
+
+               /* Total bytes of all packets */
+               m_size = qsize(&inp->rcvq_pkts);
+
+               /* Packets for this interface */
+               m = _getq_all(&inp->rcvq_pkts);
+               VERIFY(m != NULL || m_cnt == 0);
+
+               nanouptime(&now);
+               if (!net_timerisset(&inp->sample_lasttime))
+                       *(&inp->sample_lasttime) = *(&now);
+
+               net_timersub(&now, &inp->sample_lasttime, &delta);
+               if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
+                       u_int32_t ptot, btot;
+
+                       /* Accumulate statistics for current sampling */
+                       PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
+
+                       if (net_timercmp(&delta, &inp->sample_holdtime, <))
+                               goto skip;
+
+                       *(&inp->sample_lasttime) = *(&now);
+
+                       /* Calculate min/max of inbound bytes */
+                       btot = (u_int32_t)inp->sstats.bytes;
+                       if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
+                               inp->rxpoll_bmin = btot;
+                       if (btot > inp->rxpoll_bmax)
+                               inp->rxpoll_bmax = btot;
+
+                       /* Calculate EWMA of inbound bytes */
+                       DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
+
+                       /* Calculate min/max of inbound packets */
+                       ptot = (u_int32_t)inp->sstats.packets;
+                       if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
+                               inp->rxpoll_pmin = ptot;
+                       if (ptot > inp->rxpoll_pmax)
+                               inp->rxpoll_pmax = ptot;
+
+                       /* Calculate EWMA of inbound packets */
+                       DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
+
+                       /* Reset sampling statistics */
+                       PKTCNTR_CLEAR(&inp->sstats);
+
+                       /* Calculate EWMA of wakeup requests */
+                       DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
+                       inp->wtot = 0;
+
+                       if (dlil_verbose) {
+                               if (!net_timerisset(&inp->dbg_lasttime))
+                                       *(&inp->dbg_lasttime) = *(&now);
+                               net_timersub(&now, &inp->dbg_lasttime, &delta);
+                               if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
+                                       *(&inp->dbg_lasttime) = *(&now);
+                                       printf("%s%d: [%s] pkts avg %d max %d "
+                                           "limits [%d/%d], wreq avg %d "
+                                           "limits [%d/%d], bytes avg %d "
+                                           "limits [%d/%d]\n", ifp->if_name,
+                                           ifp->if_unit, (inp->mode ==
+                                           IFNET_MODEL_INPUT_POLL_ON) ?
+                                           "ON" : "OFF", inp->rxpoll_pavg,
+                                           inp->rxpoll_pmax,
+                                           inp->rxpoll_plowat,
+                                           inp->rxpoll_phiwat,
+                                           inp->rxpoll_wavg,
+                                           inp->rxpoll_wlowat,
+                                           inp->rxpoll_whiwat,
+                                           inp->rxpoll_bavg,
+                                           inp->rxpoll_blowat,
+                                           inp->rxpoll_bhiwat);
+                               }
+                       }
 
-       inp->input_waiting |= DLIL_INPUT_WAITING;
-       if ((inp->input_waiting & DLIL_INPUT_RUNNING) == 0) {
-               wakeup((caddr_t)&inp->input_waiting);
-       }
-       lck_mtx_unlock(&inp->input_lck);
+                       /* Perform mode transition, if necessary */
+                       if (!net_timerisset(&inp->mode_lasttime))
+                               *(&inp->mode_lasttime) = *(&now);
+
+                       net_timersub(&now, &inp->mode_lasttime, &delta);
+                       if (net_timercmp(&delta, &inp->mode_holdtime, <))
+                               goto skip;
+
+                       if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
+                           inp->rxpoll_bavg <= inp->rxpoll_blowat &&
+                           inp->rxpoll_wavg <= inp->rxpoll_wlowat &&
+                           inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
+                               mode = IFNET_MODEL_INPUT_POLL_OFF;
+                       } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
+                           (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
+                           inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
+                           inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
+                               mode = IFNET_MODEL_INPUT_POLL_ON;
+                       }
 
-       if (stats) {
-               atomic_add_64(&ifp->if_data.ifi_ipackets, stats->packets_in);
-               atomic_add_64(&ifp->if_data.ifi_ibytes, stats->bytes_in);
-               atomic_add_64(&ifp->if_data.ifi_ierrors, stats->errors_in);
+                       if (mode != inp->mode) {
+                               inp->mode = mode;
+                               *(&inp->mode_lasttime) = *(&now);
+                               poll_req++;
+                       }
+               }
+skip:
+               dlil_input_stats_sync(ifp, inp);
 
-               atomic_add_64(&ifp->if_data.ifi_opackets, stats->packets_out);
-               atomic_add_64(&ifp->if_data.ifi_obytes, stats->bytes_out);
-               atomic_add_64(&ifp->if_data.ifi_oerrors, stats->errors_out);
+               lck_mtx_unlock(&inp->input_lck);
 
-               atomic_add_64(&ifp->if_data.ifi_collisions, stats->collisions);
-               atomic_add_64(&ifp->if_data.ifi_iqdrops, stats->dropped);
-       }
+               /*
+                * If there's a mode change and interface is still attached,
+                * perform a downcall to the driver for the new mode.  Also
+                * hold an IO refcnt on the interface to prevent it from
+                * being detached (will be release below.)
+                */
+               if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
+                       struct ifnet_model_params p = { mode, { 0 } };
+                       errno_t err;
+
+                       if (dlil_verbose) {
+                               printf("%s%d: polling is now %s, "
+                                   "pkts avg %d max %d limits [%d/%d], "
+                                   "wreq avg %d limits [%d/%d], "
+                                   "bytes avg %d limits [%d/%d]\n",
+                                   ifp->if_name, ifp->if_unit,
+                                   (mode == IFNET_MODEL_INPUT_POLL_ON) ?
+                                   "ON" : "OFF", inp->rxpoll_pavg,
+                                   inp->rxpoll_pmax, inp->rxpoll_plowat,
+                                   inp->rxpoll_phiwat, inp->rxpoll_wavg,
+                                   inp->rxpoll_wlowat, inp->rxpoll_whiwat,
+                                   inp->rxpoll_bavg, inp->rxpoll_blowat,
+                                   inp->rxpoll_bhiwat);
+                       }
 
-       return (0);
-}
+                       if ((err = ((*ifp->if_input_ctl)(ifp,
+                           IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
+                               printf("%s%d: error setting polling mode "
+                                   "to %s (%d)\n", ifp->if_name, ifp->if_unit,
+                                   (mode == IFNET_MODEL_INPUT_POLL_ON) ?
+                                   "ON" : "OFF", err);
+                       }
 
-static int
-dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
-    char **frame_header_p, protocol_family_t protocol_family)
-{
-       struct ifnet_filter *filter;
+                       switch (mode) {
+                       case IFNET_MODEL_INPUT_POLL_OFF:
+                               ifnet_set_poll_cycle(ifp, NULL);
+                               inp->rxpoll_offreq++;
+                               if (err != 0)
+                                       inp->rxpoll_offerr++;
+                               break;
 
-       /*
-        * Pass the inbound packet to the interface filters
+                       case IFNET_MODEL_INPUT_POLL_ON:
+                               net_nsectimer(&if_rxpoll_interval_time, &ts);
+                               ifnet_set_poll_cycle(ifp, &ts);
+                               ifnet_poll(ifp);
+                               inp->rxpoll_onreq++;
+                               if (err != 0)
+                                       inp->rxpoll_onerr++;
+                               break;
+
+                       default:
+                               VERIFY(0);
+                               /* NOTREACHED */
+                       }
+
+                       /* Release the IO refcnt */
+                       ifnet_decr_iorefcnt(ifp);
+               }
+
+               /*
+               * NOTE warning %%% attention !!!!
+               * We should think about putting some thread starvation
+               * safeguards if we deal with long chains of packets.
+               */
+               if (m != NULL)
+                       dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
+       }
+
+       /* NOTREACHED */
+       VERIFY(0);      /* we should never get here */
+}
+
+static void
+dlil_rxpoll_calc_limits(struct dlil_threading_info *inp)
+{
+       struct ifnet *ifp = inp->ifp;
+       u_int64_t sample_holdtime, inbw;
+
+       VERIFY(inp != dlil_main_input_thread);
+       VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
+
+       if ((inbw = ifnet_input_linkrate(ifp)) == 0) {
+               sample_holdtime = 0;    /* polling is disabled */
+               inp->rxpoll_wlowat = inp->rxpoll_plowat =
+                   inp->rxpoll_blowat = 0;
+               inp->rxpoll_whiwat = inp->rxpoll_phiwat =
+                   inp->rxpoll_bhiwat = (u_int32_t)-1;
+       } else {
+               unsigned int n, i;
+
+               n = 0;
+               for (i = 0; rxpoll_tbl[i].speed != 0; i++) {
+                       if (inbw < rxpoll_tbl[i].speed)
+                               break;
+                       n = i;
+               }
+               sample_holdtime = if_rxpoll_sample_holdtime;
+               inp->rxpoll_wlowat = if_rxpoll_wlowat;
+               inp->rxpoll_whiwat = if_rxpoll_whiwat;
+               inp->rxpoll_plowat = rxpoll_tbl[n].plowat;
+               inp->rxpoll_phiwat = rxpoll_tbl[n].phiwat;
+               inp->rxpoll_blowat = rxpoll_tbl[n].blowat;
+               inp->rxpoll_bhiwat = rxpoll_tbl[n].bhiwat;
+       }
+
+       net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
+       net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
+
+       if (dlil_verbose) {
+               printf("%s%d: speed %llu bps, sample per %llu nsec, "
+                   "pkt limits [%d/%d], wreq limits [%d/%d], "
+                   "bytes limits [%d/%d]\n", ifp->if_name, ifp->if_unit,
+                   inbw, sample_holdtime, inp->rxpoll_plowat,
+                   inp->rxpoll_phiwat, inp->rxpoll_wlowat, inp->rxpoll_whiwat,
+                   inp->rxpoll_blowat, inp->rxpoll_bhiwat);
+       }
+}
+
+errno_t
+ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
+    const struct ifnet_stat_increment_param *s)
+{
+       return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
+}
+
+errno_t
+ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
+    struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
+{
+       return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
+}
+
+static errno_t
+ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
+    const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
+{
+       struct thread *tp = current_thread();
+       struct mbuf *last;
+       struct dlil_threading_info *inp;
+       u_int32_t m_cnt = 0, m_size = 0;
+
+       /*
+        * Drop the packet(s) if the parameters are invalid, or if the
+        * interface is no longer attached; else hold an IO refcnt to
+        * prevent it from being detached (will be released below.)
+        */
+       if (ifp == NULL || m_head == NULL || (s == NULL && ext) ||
+           (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
+               if (m_head != NULL)
+                       mbuf_freem_list(m_head);
+               return (EINVAL);
+       }
+
+       VERIFY(m_tail == NULL || ext);
+       VERIFY(s != NULL || !ext);
+
+       if (m_tail == NULL) {
+               last = m_head;
+               while (1) {
+#if IFNET_INPUT_SANITY_CHK
+                       if (dlil_input_sanity_check != 0)
+                               DLIL_INPUT_CHECK(last, ifp);
+#endif /* IFNET_INPUT_SANITY_CHK */
+                       m_cnt++;
+                       m_size += m_length(last);
+                       if (mbuf_nextpkt(last) == NULL)
+                               break;
+                       last = mbuf_nextpkt(last);
+               }
+               m_tail = last;
+       } else {
+#if IFNET_INPUT_SANITY_CHK
+               if (dlil_input_sanity_check != 0) {
+                       last = m_head;
+                       while (1) {
+                               DLIL_INPUT_CHECK(last, ifp);
+                               m_cnt++;
+                               m_size += m_length(last);
+                               if (mbuf_nextpkt(last) == NULL)
+                                       break;
+                               last = mbuf_nextpkt(last);
+                       }
+               } else {
+                       m_cnt = s->packets_in;
+                       m_size = s->bytes_in;
+                       last = m_tail;
+               }
+#else
+               m_cnt = s->packets_in;
+               m_size = s->bytes_in;
+               last = m_tail;
+#endif /* IFNET_INPUT_SANITY_CHK */
+       }
+
+       if (last != m_tail) {
+               panic_plain("%s: invalid input packet chain for %s%d, "
+                   "tail mbuf %p instead of %p\n", __func__, ifp->if_name,
+                   ifp->if_unit, m_tail, last);
+       }
+
+       /*
+        * Assert packet count only for the extended variant, for backwards
+        * compatibility, since this came directly from the device driver.
+        * Relax this assertion for input bytes, as the driver may have
+        * included the link-layer headers in the computation; hence
+        * m_size is just an approximation.
+        */
+       if (ext && s->packets_in != m_cnt) {
+               panic_plain("%s: input packet count mismatch for %s%d, "
+                   "%d instead of %d\n", __func__, ifp->if_name,
+                   ifp->if_unit, s->packets_in, m_cnt);
+       }
+
+       if ((inp = ifp->if_inp) == NULL)
+               inp = dlil_main_input_thread;
+
+       /*
+        * If there is a matching DLIL input thread associated with an
+        * affinity set, associate this thread with the same set.  We
+        * will only do this once.
+        */
+       lck_mtx_lock_spin(&inp->input_lck);
+       if (inp != dlil_main_input_thread && inp->net_affinity &&
+           ((!poll && inp->wloop_thr == THREAD_NULL) ||
+           (poll && inp->poll_thr == THREAD_NULL))) {
+               u_int32_t tag = inp->tag;
+
+               if (poll) {
+                       VERIFY(inp->poll_thr == THREAD_NULL);
+                       inp->poll_thr = tp;
+               } else {
+                       VERIFY(inp->wloop_thr == THREAD_NULL);
+                       inp->wloop_thr = tp;
+               }
+               lck_mtx_unlock(&inp->input_lck);
+
+               /* Associate the current thread with the new affinity tag */
+               (void) dlil_affinity_set(tp, tag);
+
+               /*
+                * Take a reference on the current thread; during detach,
+                * we will need to refer to it in order ot tear down its
+                * affinity.
+                */
+               thread_reference(tp);
+               lck_mtx_lock_spin(&inp->input_lck);
+       }
+
+        /*
+        * Because of loopbacked multicast we cannot stuff the ifp in
+        * the rcvif of the packet header: loopback (lo0) packets use a
+        * dedicated list so that we can later associate them with lo_ifp
+        * on their way up the stack.  Packets for other interfaces without
+        * dedicated input threads go to the regular list.
+        */
+       if (inp == dlil_main_input_thread && ifp == lo_ifp) {
+               struct dlil_main_threading_info *inpm =
+                   (struct dlil_main_threading_info *)inp;
+               _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, m_cnt, m_size);
+       } else {
+               _addq_multi(&inp->rcvq_pkts, m_head, m_tail, m_cnt, m_size);
+       }
+
+#if IFNET_INPUT_SANITY_CHK
+       if (dlil_input_sanity_check != 0) {
+               u_int32_t count;
+               struct mbuf *m0;
+
+               for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
+                       count++;
+
+               if (count != m_cnt) {
+                       panic_plain("%s%d: invalid packet count %d "
+                           "(expected %d)\n", ifp->if_name, ifp->if_unit,
+                           count, m_cnt);
+                       /* NOTREACHED */
+               }
+
+               inp->input_mbuf_cnt += m_cnt;
+       }
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+       if (s != NULL) {
+               dlil_input_stats_add(s, inp, poll);
+               /*
+                * If we're using the main input thread, synchronize the
+                * stats now since we have the interface context.  All
+                * other cases involving dedicated input threads will
+                * have their stats synchronized there.
+                */
+               if (inp == dlil_main_input_thread)
+                       dlil_input_stats_sync(ifp, inp);
+       }
+
+       inp->input_waiting |= DLIL_INPUT_WAITING;
+       if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
+               inp->wtot++;
+               wakeup_one((caddr_t)&inp->input_waiting);
+       }
+       lck_mtx_unlock(&inp->input_lck);
+
+       if (ifp != lo_ifp) {
+               /* Release the IO refcnt */
+               ifnet_decr_iorefcnt(ifp);
+       }
+
+       return (0);
+}
+
+void
+ifnet_start(struct ifnet *ifp)
+{
+       /*
+        * If the starter thread is inactive, signal it to do work.
+        */
+       lck_mtx_lock_spin(&ifp->if_start_lock);
+       ifp->if_start_req++;
+       if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) {
+               wakeup_one((caddr_t)&ifp->if_start_thread);
+       }
+       lck_mtx_unlock(&ifp->if_start_lock);
+}
+
+static void
+ifnet_start_thread_fn(void *v, wait_result_t w)
+{
+#pragma unused(w)
+       struct ifnet *ifp = v;
+       char ifname[IFNAMSIZ + 1];
+       struct timespec *ts = NULL;
+       struct ifclassq *ifq = &ifp->if_snd;
+
+       /*
+        * Treat the dedicated starter thread for lo0 as equivalent to
+        * the driver workloop thread; if net_affinity is enabled for
+        * the main input thread, associate this starter thread to it
+        * by binding them with the same affinity tag.  This is done
+        * only once (as we only have one lo_ifp which never goes away.)
+        */
+       if (ifp == lo_ifp) {
+               struct dlil_threading_info *inp = dlil_main_input_thread;
+               struct thread *tp = current_thread();
+
+               lck_mtx_lock(&inp->input_lck);
+               if (inp->net_affinity) {
+                       u_int32_t tag = inp->tag;
+
+                       VERIFY(inp->wloop_thr == THREAD_NULL);
+                       VERIFY(inp->poll_thr == THREAD_NULL);
+                       inp->wloop_thr = tp;
+                       lck_mtx_unlock(&inp->input_lck);
+
+                       /* Associate this thread with the affinity tag */
+                       (void) dlil_affinity_set(tp, tag);
+               } else {
+                       lck_mtx_unlock(&inp->input_lck);
+               }
+       }
+
+       snprintf(ifname, sizeof (ifname), "%s%d_starter",
+           ifp->if_name, ifp->if_unit);
+
+       lck_mtx_lock_spin(&ifp->if_start_lock);
+
+       for (;;) {
+               (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
+                   (PZERO - 1) | PSPIN, ifname, ts);
+
+               /* interface is detached? */
+               if (ifp->if_start_thread == THREAD_NULL) {
+                       ifnet_set_start_cycle(ifp, NULL);
+                       lck_mtx_unlock(&ifp->if_start_lock);
+                       ifnet_purge(ifp);
+
+                       if (dlil_verbose) {
+                               printf("%s%d: starter thread terminated\n",
+                                   ifp->if_name, ifp->if_unit);
+                       }
+
+                       /* for the extra refcnt from kernel_thread_start() */
+                       thread_deallocate(current_thread());
+                       /* this is the end */
+                       thread_terminate(current_thread());
+                       /* NOTREACHED */
+                       return;
+               }
+
+               ifp->if_start_active = 1;
+               for (;;) {
+                       u_int32_t req = ifp->if_start_req;
+
+                       lck_mtx_unlock(&ifp->if_start_lock);
+                       /* invoke the driver's start routine */
+                       ((*ifp->if_start)(ifp));
+                       lck_mtx_lock_spin(&ifp->if_start_lock);
+
+                       /* if there's no pending request, we're done */
+                       if (req == ifp->if_start_req)
+                               break;
+               }
+               ifp->if_start_req = 0;
+               ifp->if_start_active = 0;
+               /*
+                * Wakeup N ns from now if rate-controlled by TBR, and if
+                * there are still packets in the send queue which haven't
+                * been dequeued so far; else sleep indefinitely (ts = NULL)
+                * until ifnet_start() is called again.
+                */
+               ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
+                   &ifp->if_start_cycle : NULL);
+
+               if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
+                       ts = NULL;
+       }
+
+       /* NOTREACHED */
+       lck_mtx_unlock(&ifp->if_start_lock);
+       VERIFY(0);      /* we should never get here */
+}
+
+void
+ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
+{
+       if (ts == NULL)
+               bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
+       else
+               *(&ifp->if_start_cycle) = *ts;
+
+       if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
+               printf("%s%d: restart interval set to %lu nsec\n",
+                   ifp->if_name, ifp->if_unit, ts->tv_nsec);
+}
+
+static void
+ifnet_poll(struct ifnet *ifp)
+{
+       /*
+        * If the poller thread is inactive, signal it to do work.
+        */
+       lck_mtx_lock_spin(&ifp->if_poll_lock);
+       ifp->if_poll_req++;
+       if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
+               wakeup_one((caddr_t)&ifp->if_poll_thread);
+       }
+       lck_mtx_unlock(&ifp->if_poll_lock);
+}
+
+static void
+ifnet_poll_thread_fn(void *v, wait_result_t w)
+{
+#pragma unused(w)
+       struct dlil_threading_info *inp;
+       struct ifnet *ifp = v;
+       char ifname[IFNAMSIZ + 1];
+       struct timespec *ts = NULL;
+       struct ifnet_stat_increment_param s;
+
+       snprintf(ifname, sizeof (ifname), "%s%d_poller",
+           ifp->if_name, ifp->if_unit);
+       bzero(&s, sizeof (s));
+
+       lck_mtx_lock_spin(&ifp->if_poll_lock);
+
+       inp = ifp->if_inp;
+       VERIFY(inp != NULL);
+
+       for (;;) {
+               if (ifp->if_poll_thread != THREAD_NULL) {
+                       (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
+                           (PZERO - 1) | PSPIN, ifname, ts);
+               }
+
+               /* interface is detached (maybe while asleep)? */
+               if (ifp->if_poll_thread == THREAD_NULL) {
+                       ifnet_set_poll_cycle(ifp, NULL);
+                       lck_mtx_unlock(&ifp->if_poll_lock);
+
+                       if (dlil_verbose) {
+                               printf("%s%d: poller thread terminated\n",
+                                   ifp->if_name, ifp->if_unit);
+                       }
+
+                       /* for the extra refcnt from kernel_thread_start() */
+                       thread_deallocate(current_thread());
+                       /* this is the end */
+                       thread_terminate(current_thread());
+                       /* NOTREACHED */
+                       return;
+               }
+
+               ifp->if_poll_active = 1;
+               for (;;) {
+                       struct mbuf *m_head, *m_tail;
+                       u_int32_t m_lim, m_cnt, m_totlen;
+                       u_int16_t req = ifp->if_poll_req;
+
+                       lck_mtx_unlock(&ifp->if_poll_lock);
+
+                       /*
+                        * If no longer attached, there's nothing to do;
+                        * else hold an IO refcnt to prevent the interface
+                        * from being detached (will be released below.)
+                        */
+                       if (!ifnet_is_attached(ifp, 1))
+                               break;
+
+                       m_lim = (if_rxpoll_max != 0) ? if_rxpoll_max :
+                           MAX((qlimit(&inp->rcvq_pkts)),
+                           (inp->rxpoll_phiwat << 2));
+
+                       if (dlil_verbose > 1) {
+                               printf("%s%d: polling up to %d pkts, "
+                                   "pkts avg %d max %d, wreq avg %d, "
+                                   "bytes avg %d\n",
+                                   ifp->if_name, ifp->if_unit, m_lim,
+                                   inp->rxpoll_pavg, inp->rxpoll_pmax,
+                                   inp->rxpoll_wavg, inp->rxpoll_bavg);
+                       }
+
+                       /* invoke the driver's input poll routine */
+                       ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
+                           &m_cnt, &m_totlen));
+
+                       if (m_head != NULL) {
+                               VERIFY(m_tail != NULL && m_cnt > 0);
+
+                               if (dlil_verbose > 1) {
+                                       printf("%s%d: polled %d pkts, "
+                                           "pkts avg %d max %d, wreq avg %d, "
+                                           "bytes avg %d\n",
+                                           ifp->if_name, ifp->if_unit, m_cnt,
+                                           inp->rxpoll_pavg, inp->rxpoll_pmax,
+                                           inp->rxpoll_wavg, inp->rxpoll_bavg);
+                               }
+
+                               /* stats are required for extended variant */
+                               s.packets_in = m_cnt;
+                               s.bytes_in = m_totlen;
+
+                               (void) ifnet_input_common(ifp, m_head, m_tail,
+                                   &s, TRUE, TRUE);
+                       } else if (dlil_verbose > 1) {
+                               printf("%s%d: no packets, pkts avg %d max %d, "
+                                   "wreq avg %d, bytes avg %d\n", ifp->if_name,
+                                   ifp->if_unit, inp->rxpoll_pavg,
+                                   inp->rxpoll_pmax, inp->rxpoll_wavg,
+                                   inp->rxpoll_bavg);
+                       }
+
+                       /* Release the io ref count */
+                       ifnet_decr_iorefcnt(ifp);
+
+                       lck_mtx_lock_spin(&ifp->if_poll_lock);
+
+                       /* if there's no pending request, we're done */
+                       if (req == ifp->if_poll_req)
+                               break;
+               }
+               ifp->if_poll_req = 0;
+               ifp->if_poll_active = 0;
+
+               /*
+                * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
+                * until ifnet_poll() is called again.
+                */
+               ts = &ifp->if_poll_cycle;
+               if (ts->tv_sec == 0 && ts->tv_nsec == 0)
+                       ts = NULL;
+       }
+
+       /* NOTREACHED */
+       lck_mtx_unlock(&ifp->if_poll_lock);
+       VERIFY(0);      /* we should never get here */
+}
+
+void
+ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
+{
+       if (ts == NULL)
+               bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
+       else
+               *(&ifp->if_poll_cycle) = *ts;
+
+       if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
+               printf("%s%d: poll interval set to %lu nsec\n",
+                   ifp->if_name, ifp->if_unit, ts->tv_nsec);
+}
+
+void
+ifnet_purge(struct ifnet *ifp)
+{
+       if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
+               if_qflush(ifp, 0);
+}
+
+void
+ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!(IFCQ_IS_READY(ifq)))
+               return;
+
+       if (IFCQ_TBR_IS_ENABLED(ifq)) {
+               struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
+                   ifq->ifcq_tbr.tbr_percent, 0 };
+               (void) ifclassq_tbr_set(ifq, &tb, FALSE);
+       }
+
+       ifclassq_update(ifq, ev);
+}
+
+void
+ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
+{
+       switch (ev) {
+       case CLASSQ_EV_LINK_SPEED:
+               if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
+                       ifp->if_poll_update++;
+               break;
+
+       default:
+               break;
+       }
+}
+
+errno_t
+ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
+{
+       struct ifclassq *ifq;
+       u_int32_t omodel;
+       errno_t err;
+
+       if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED &&
+           model != IFNET_SCHED_MODEL_NORMAL))
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART))
+               return (ENXIO);
+
+       ifq = &ifp->if_snd;
+       IFCQ_LOCK(ifq);
+       omodel = ifp->if_output_sched_model;
+       ifp->if_output_sched_model = model;
+       if ((err = ifclassq_pktsched_setup(ifq)) != 0)
+               ifp->if_output_sched_model = omodel;
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+errno_t
+ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
+{
+       if (ifp == NULL)
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART))
+               return (ENXIO);
+
+       ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
+
+       return (0);
+}
+
+errno_t
+ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
+{
+       if (ifp == NULL || maxqlen == NULL)
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART))
+               return (ENXIO);
+
+       *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
+
+       return (0);
+}
+
+errno_t
+ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *qlen)
+{
+       if (ifp == NULL || qlen == NULL)
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART))
+               return (ENXIO);
+
+       *qlen = ifclassq_get_len(&ifp->if_snd);
+
+       return (0);
+}
+
+errno_t
+ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
+{
+       struct dlil_threading_info *inp;
+
+       if (ifp == NULL)
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
+               return (ENXIO);
+
+       if (maxqlen == 0)
+               maxqlen = if_rcvq_maxlen;
+       else if (maxqlen < IF_RCVQ_MINLEN)
+               maxqlen = IF_RCVQ_MINLEN;
+
+       inp = ifp->if_inp;
+       lck_mtx_lock(&inp->input_lck);
+       qlimit(&inp->rcvq_pkts) = maxqlen;
+       lck_mtx_unlock(&inp->input_lck);
+
+       return (0);
+}
+
+errno_t
+ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
+{
+       struct dlil_threading_info *inp;
+
+       if (ifp == NULL || maxqlen == NULL)
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
+               return (ENXIO);
+
+       inp = ifp->if_inp;
+       lck_mtx_lock(&inp->input_lck);
+       *maxqlen = qlimit(&inp->rcvq_pkts);
+       lck_mtx_unlock(&inp->input_lck);
+       return (0);
+}
+
+errno_t
+ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+       int error;
+
+       if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
+           m->m_nextpkt != NULL) {
+               if (m != NULL)
+                       m_freem_list(m);
+               return (EINVAL);
+       } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+           !(ifp->if_refflags & IFRF_ATTACHED)) {
+               /* flag tested without lock for performance */
+               m_freem(m);
+               return (ENXIO);
+       } else if (!(ifp->if_flags & IFF_UP)) {
+               m_freem(m);
+               return (ENETDOWN);
+               
+       }
+
+       /* enqueue the packet */
+       error = ifclassq_enqueue(&ifp->if_snd, m);
+
+       /*
+        * Tell the driver to start dequeueing; do this even when the queue
+        * for the packet is suspended (EQSUSPENDED), as the driver could still
+        * be dequeueing from other unsuspended queues.
+        */
+       if (error == 0 || error == EQFULL || error == EQSUSPENDED)
+               ifnet_start(ifp);
+
+       return (error);
+}
+
+errno_t
+ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
+{
+       if (ifp == NULL || mp == NULL)
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+           (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
+               return (ENXIO);
+
+       return (ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL));
+}
+
+errno_t
+ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
+    struct mbuf **mp)
+{
+       if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+           (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
+               return (ENXIO);
+
+       return (ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL));
+}
+
+errno_t
+ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head,
+    struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
+{
+       if (ifp == NULL || head == NULL || limit < 1)
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+           (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL))
+               return (ENXIO);
+
+       return (ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len));
+}
+
+errno_t
+ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
+    u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
+    u_int32_t *len)
+{
+
+       if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc))
+               return (EINVAL);
+       else if (!(ifp->if_eflags & IFEF_TXSTART) ||
+           (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED))
+               return (ENXIO);
+
+       return (ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head,
+           tail, cnt, len));
+}
+
+static int
+dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
+    char **frame_header_p, protocol_family_t protocol_family)
+{
+       struct ifnet_filter *filter;
+
+       /*
+        * Pass the inbound packet to the interface filters
         */
        lck_mtx_lock_spin(&ifp->if_flt_lock);
        /* prevent filter list from changing in case we drop the lock */
@@ -1458,8 +2720,110 @@ dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
        return;
 }
 
+static void
+dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
+    struct dlil_threading_info *inp, boolean_t poll)
+{
+       struct ifnet_stat_increment_param *d = &inp->stats;
+
+       if (s->packets_in != 0)
+               d->packets_in += s->packets_in;
+       if (s->bytes_in != 0)
+               d->bytes_in += s->bytes_in;
+       if (s->errors_in != 0)
+               d->errors_in += s->errors_in;
+
+       if (s->packets_out != 0)
+               d->packets_out += s->packets_out;
+       if (s->bytes_out != 0)
+               d->bytes_out += s->bytes_out;
+       if (s->errors_out != 0)
+               d->errors_out += s->errors_out;
+
+       if (s->collisions != 0)
+               d->collisions += s->collisions;
+       if (s->dropped != 0)
+               d->dropped += s->dropped;
+
+       if (poll)
+               PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
+}
+
+static void
+dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
+{
+       struct ifnet_stat_increment_param *s = &inp->stats;
+
+       /*
+        * Use of atomic operations is unavoidable here because
+        * these stats may also be incremented elsewhere via KPIs.
+        */
+       if (s->packets_in != 0) {
+               atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
+               s->packets_in = 0;
+       }
+       if (s->bytes_in != 0) {
+               atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
+               s->bytes_in = 0;
+       }
+       if (s->errors_in != 0) {
+               atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
+               s->errors_in = 0;
+       }
+
+       if (s->packets_out != 0) {
+               atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
+               s->packets_out = 0;
+       }
+       if (s->bytes_out != 0) {
+               atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
+               s->bytes_out = 0;
+       }
+       if (s->errors_out != 0) {
+               atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
+               s->errors_out = 0;
+       }
+
+       if (s->collisions != 0) {
+               atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
+               s->collisions = 0;
+       }
+       if (s->dropped != 0) {
+               atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
+               s->dropped = 0;
+       }
+
+       /*
+        * No need for atomic operations as they are modified here
+        * only from within the DLIL input thread context.
+        */
+       if (inp->tstats.packets != 0) {
+               inp->pstats.ifi_poll_packets += inp->tstats.packets;
+               inp->tstats.packets = 0;
+       }
+       if (inp->tstats.bytes != 0) {
+               inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
+               inp->tstats.bytes = 0;
+       }
+}
+
+__private_extern__ void
+dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
+{
+       return (dlil_input_packet_list_common(ifp, m, 0,
+           IFNET_MODEL_INPUT_POLL_OFF, FALSE));
+}
+
 __private_extern__ void
-dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
+dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
+    u_int32_t cnt, ifnet_model_t mode)
+{
+       return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
+}
+
+static void
+dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
+    u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
 {
        int                             error = 0;
        protocol_family_t               protocol_family;
@@ -1469,9 +2833,13 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
        struct if_proto *               last_ifproto = NULL;
        mbuf_t                          pkt_first = NULL;
        mbuf_t *                        pkt_next = NULL;
+       u_int32_t                       poll_thresh = 0, poll_ival = 0;
 
        KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
 
+       if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
+           (poll_ival = if_rxpoll_interval_pkts) > 0)
+               poll_thresh = cnt;
 
        while (m != NULL) {
                struct if_proto *ifproto = NULL;
@@ -1480,16 +2848,22 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
                if (ifp_param == NULL)
                        ifp = m->m_pkthdr.rcvif;
 
+               if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
+                   poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
+                       ifnet_poll(ifp);
+
                /* Check if this mbuf looks valid */
-               MBUF_INPUT_CHECK(m, ifp); 
+               MBUF_INPUT_CHECK(m, ifp);
 
                next_packet = m->m_nextpkt;
                m->m_nextpkt = NULL;
                frame_header = m->m_pkthdr.header;
                m->m_pkthdr.header = NULL;
 
-               /* Get an IO reference count if the interface is not 
-                * loopback and it is attached.
+               /*
+                * Get an IO reference count if the interface is not
+                * loopback (lo0) and it is attached; lo0 never goes
+                * away, so optimize for that.
                 */
                if (ifp != lo_ifp) {
                        if (!ifnet_is_attached(ifp, 1)) {
@@ -1499,22 +2873,7 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
                        iorefcnt = 1;
                }
 
-               switch (m->m_pkthdr.prio) {
-                       case MBUF_TC_BK:
-                               atomic_add_64(&ifp->if_tc.ifi_ibkpackets, 1);
-                               atomic_add_64(&ifp->if_tc.ifi_ibkbytes, m->m_pkthdr.len);
-                               break;
-                       case MBUF_TC_VI:
-                               atomic_add_64(&ifp->if_tc.ifi_ivipackets, 1);
-                               atomic_add_64(&ifp->if_tc.ifi_ivibytes, m->m_pkthdr.len);
-                               break;
-                       case MBUF_TC_VO:
-                               atomic_add_64(&ifp->if_tc.ifi_ivopackets, 1);
-                               atomic_add_64(&ifp->if_tc.ifi_ivobytes, m->m_pkthdr.len);
-                               break;
-                       default:
-                               break;
-               }
+               ifp_inc_traffic_class_in(ifp, m);
 
                /* find which protocol family this packet is for */
                ifnet_lock_shared(ifp);
@@ -1527,6 +2886,10 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
                        protocol_family = 0;
                }
 
+#if CONFIG_EMBEDDED
+               iptap_ipf_input(ifp, protocol_family, m, frame_header);
+#endif /* CONFIG_EMBEDDED */
+
                if (m->m_flags & (M_BCAST|M_MCAST))
                        atomic_add_64(&ifp->if_imcasts, 1);
 
@@ -1595,7 +2958,7 @@ next:
                        if_proto_free(ifproto);
                        ifproto = NULL;
                }
-               
+
                m = next_packet;
 
                /* update the driver's multicast filter, if needed */
@@ -1606,7 +2969,6 @@ next:
        }
 
        KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
-       return;
 }
 
 errno_t
@@ -1760,27 +3122,78 @@ dlil_get_socket_type(struct mbuf **mp, int family, int raw)
 }
 #endif
 
-static void
-if_inc_traffic_class_out(ifnet_t ifp, mbuf_t m)
+/*
+ * This is mostly called from the context of the DLIL input thread;
+ * because of that there is no need for atomic operations.
+ */
+static __inline void
+ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
 {
        if (!(m->m_flags & M_PKTHDR))
                return;
 
-       switch (m->m_pkthdr.prio) {
-               case MBUF_TC_BK:
-                       atomic_add_64(&ifp->if_tc.ifi_obkpackets, 1);
-                       atomic_add_64(&ifp->if_tc.ifi_obkbytes, m->m_pkthdr.len);
-                       break;
-               case MBUF_TC_VI:
-                       atomic_add_64(&ifp->if_tc.ifi_ovipackets, 1);
-                       atomic_add_64(&ifp->if_tc.ifi_ovibytes, m->m_pkthdr.len);
-                       break;
-               case MBUF_TC_VO:
-                       atomic_add_64(&ifp->if_tc.ifi_ovopackets, 1);
-                       atomic_add_64(&ifp->if_tc.ifi_ovobytes, m->m_pkthdr.len);
-                       break;
-               default:
-                       break;
+       switch (m_get_traffic_class(m)) {
+       case MBUF_TC_BE:
+               ifp->if_tc.ifi_ibepackets++;
+               ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
+               break;
+       case MBUF_TC_BK:
+               ifp->if_tc.ifi_ibkpackets++;
+               ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
+               break;
+       case MBUF_TC_VI:
+               ifp->if_tc.ifi_ivipackets++;
+               ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
+               break;
+       case MBUF_TC_VO:
+               ifp->if_tc.ifi_ivopackets++;
+               ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
+               break;
+       default:
+               break;
+       }
+
+       if (mbuf_is_traffic_class_privileged(m)) {
+               ifp->if_tc.ifi_ipvpackets++;
+               ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
+       }
+}
+
+/*
+ * This is called from DLIL output, hence multiple threads could end
+ * up modifying the statistics.  We trade off acccuracy for performance
+ * by not using atomic operations here.
+ */
+static __inline void
+ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
+{
+       if (!(m->m_flags & M_PKTHDR))
+               return;
+
+       switch (m_get_traffic_class(m)) {
+       case MBUF_TC_BE:
+               ifp->if_tc.ifi_obepackets++;
+               ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
+               break;
+       case MBUF_TC_BK:
+               ifp->if_tc.ifi_obkpackets++;
+               ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
+               break;
+       case MBUF_TC_VI:
+               ifp->if_tc.ifi_ovipackets++;
+               ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
+               break;
+       case MBUF_TC_VO:
+               ifp->if_tc.ifi_ovopackets++;
+               ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
+               break;
+       default:
+               break;
+       }
+
+       if (mbuf_is_traffic_class_privileged(m)) {
+               ifp->if_tc.ifi_opvpackets++;
+               ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
        }
 }
 
@@ -1796,10 +3209,14 @@ if_inc_traffic_class_out(ifnet_t ifp, mbuf_t m)
  * an interface lock if we're going to take both. This makes sense
  * because a protocol is likely to interact with an ifp while it
  * is under the protocol lock.
+ *
+ * An advisory code will be returned if adv is not null. This
+ * can be used to provide feedback about interface queues to the 
+ * application.
  */
 errno_t
 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
-    void *route, const struct sockaddr *dest, int raw)
+    void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
 {
        char *frame_type = NULL;
        char *dst_linkaddr = NULL;
@@ -1811,6 +3228,9 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
        mbuf_t  send_head = NULL;
        mbuf_t  *send_tail = &send_head;
        int iorefcnt = 0;
+#if CONFIG_EMBEDDED
+       u_int32_t pre = 0, post = 0;
+#endif /* CONFIG_EMBEDDED */
 
        KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0);
 
@@ -1877,13 +3297,13 @@ preout_again:
 
        do {
 #if CONFIG_DTRACE
-               if (proto_family == PF_INET) {
+               if (!raw && proto_family == PF_INET) {
                        struct ip *ip = mtod(m, struct ip*);
                        DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
                                struct ip *, ip, struct ifnet *, ifp,
                                struct ip *, ip, struct ip6_hdr *, NULL);
 
-               } else if (proto_family == PF_INET6) {
+               } else if (!raw && proto_family == PF_INET6) {
                        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
                        DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
                                struct ip6_hdr *, ip6, struct ifnet*, ifp,
@@ -1909,7 +3329,12 @@ preout_again:
                        }
 
                        retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
-                           frame_type);
+                           frame_type
+#if CONFIG_EMBEDDED
+                           ,
+                           &pre, &post
+#endif /* CONFIG_EMBEDDED */
+                           );
                        if (retval) {
                                if (retval != EJUSTRETURN)
                                        m_freem(m);
@@ -1987,10 +3412,22 @@ preout_again:
                        *send_tail = m;
                        send_tail = &m->m_nextpkt;
                } else {
-                       if_inc_traffic_class_out(ifp, m);
+#if CONFIG_EMBEDDED
+                       iptap_ipf_output(ifp, proto_family, (struct mbuf *)m,
+                           pre, post);
+#endif /* CONFIG_EMBEDDED */
+                       ifp_inc_traffic_class_out(ifp, m);
                        KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
                            0,0,0,0,0);
-                       retval = ifp->if_output(ifp, m);
+                       retval = (*ifp->if_output)(ifp, m);
+                       if (retval == EQFULL || retval == EQSUSPENDED) {
+                               if (adv != NULL && adv->code == FADV_SUCCESS) {
+                                       adv->code = (retval == EQFULL ?
+                                           FADV_FLOW_CONTROLLED :
+                                           FADV_SUSPENDED);
+                               }
+                               retval = 0;
+                       }
                        if (retval && dlil_verbose) {
                                printf("%s: output error on %s%d retval = %d\n",
                                    __func__, ifp->if_name, ifp->if_unit,
@@ -2010,10 +3447,21 @@ next:
        } while (m);
 
        if (send_head) {
-               if_inc_traffic_class_out(ifp, send_head);
+#if CONFIG_EMBEDDED
+               iptap_ipf_output(ifp, proto_family, (struct mbuf *)send_head,
+                   pre, post);
+#endif /* CONFIG_EMBEDDED */
+               ifp_inc_traffic_class_out(ifp, send_head);
 
                KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
-               retval = ifp->if_output(ifp, send_head);
+               retval = (*ifp->if_output)(ifp, send_head);
+               if (retval == EQFULL || retval == EQSUSPENDED) {
+                       if (adv != NULL) {
+                               adv->code = (retval == EQFULL ?
+                                   FADV_FLOW_CONTROLLED : FADV_SUSPENDED);
+                       }
+                       retval = 0;
+               }
                if (retval && dlil_verbose) {
                        printf("%s: output error on %s%d retval = %d\n",
                            __func__, ifp->if_name, ifp->if_unit, retval);
@@ -2183,7 +3631,7 @@ dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
                    proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
                if (resolvep != NULL)
                        result = resolvep(ifp, proto_addr,
-                           (struct sockaddr_dl*)ll_addr, ll_len);
+                           (struct sockaddr_dl*)(void *)ll_addr, ll_len);
                if_proto_free(proto);
        }
 
@@ -2229,6 +3677,31 @@ dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
        return (result);
 }
 
+__private_extern__ errno_t
+net_thread_check_lock(u_int32_t flag)
+{
+       struct uthread *uth = get_bsdthread_info(current_thread());
+       return ((uth->uu_network_lock_held & flag) == flag);
+}
+
+__private_extern__ void
+net_thread_set_lock(u_int32_t flag)
+{
+       struct uthread *uth = get_bsdthread_info(current_thread());
+
+       VERIFY((uth->uu_network_lock_held & flag) != flag);
+       uth->uu_network_lock_held |= flag;
+}
+
+__private_extern__ void
+net_thread_unset_lock(u_int32_t flag)
+{
+       struct uthread *uth = get_bsdthread_info(current_thread());
+
+       VERIFY((uth->uu_network_lock_held & flag) == flag);
+       uth->uu_network_lock_held &= (~flag);
+}
+
 static __inline__ int
 _is_announcement(const struct sockaddr_in * sender_sin,
     const struct sockaddr_in * target_sin)
@@ -2242,24 +3715,37 @@ _is_announcement(const struct sockaddr_in * sender_sin,
 __private_extern__ errno_t
 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
     const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
-    const struct sockaddr* target_proto)
+    const struct sockaddr* target_proto0, u_int32_t rtflags)
 {
        errno_t result = 0;
        const struct sockaddr_in * sender_sin;
        const struct sockaddr_in * target_sin;
+       struct sockaddr_inarp target_proto_sinarp;
+       struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
 
        if (target_proto == NULL || (sender_proto != NULL &&
            sender_proto->sa_family != target_proto->sa_family))
                return (EINVAL);
 
+       /*
+        * If the target is a (default) router, provide that
+        * information to the send_arp callback routine.
+        */
+       if (rtflags & RTF_ROUTER) {
+               bcopy(target_proto, &target_proto_sinarp,
+                   sizeof (struct sockaddr_in));
+               target_proto_sinarp.sin_other |= SIN_ROUTER;
+               target_proto = (struct sockaddr *)&target_proto_sinarp;
+       }
+
        /*
         * If this is an ARP request and the target IP is IPv4LL,
         * send the request on all interfaces.  The exception is
         * an announcement, which must only appear on the specific
         * interface.
         */
-       sender_sin = (const struct sockaddr_in *)sender_proto;
-       target_sin = (const struct sockaddr_in *)target_proto;
+       sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
+       target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
        if (target_proto->sa_family == AF_INET &&
            IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
            ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
@@ -2298,7 +3784,7 @@ dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
                                                /* Copy the source IP address */
                                                source_ip_copy =
                                                    *(struct sockaddr_in *)
-                                                   source_ip->ifa_addr;
+                                                   (void *)source_ip->ifa_addr;
                                                IFA_UNLOCK(source_ip);
                                                break;
                                        }
@@ -2316,8 +3802,8 @@ dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
 
                                /* Send the ARP */
                                new_result = dlil_send_arp_internal(cur_ifp,
-                                   arpop,
-                                   (struct sockaddr_dl *)source_hw->ifa_addr,
+                                   arpop, (struct sockaddr_dl *)(void *)
+                                   source_hw->ifa_addr,
                                    (struct sockaddr *)&source_ip_copy, NULL,
                                    target_proto);
 
@@ -2385,10 +3871,6 @@ ifnet_decr_iorefcnt(struct ifnet *ifp)
         */
        if (ifp->if_refio == 0 && 
                (ifp->if_refflags & IFRF_DETACHING) != 0) {
-               /* Convert the spinlock to a regular mutex if we have
-                * to wait for any reason while doing a wakeup.
-                */
-               lck_mtx_convert_spin(&ifp->if_ref_lock);
                wakeup(&(ifp->if_refio));
        }
        lck_mtx_unlock(&ifp->if_ref_lock);
@@ -2767,6 +4249,9 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        struct ifaddr *ifa;
        struct if_data_internal if_data_saved;
        struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+       struct dlil_threading_info *dl_inp;
+       u_int32_t sflags = 0;
+       int err;
 
        if (ifp == NULL)
                return (EINVAL);
@@ -2790,7 +4275,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
 
        lck_mtx_lock_spin(&ifp->if_ref_lock);
        if (ifp->if_refflags & IFRF_ATTACHED) {
-               panic("%s: flags mismatch (attached set) ifp=%p",
+               panic_plain("%s: flags mismatch (attached set) ifp=%p",
                    __func__, ifp);
                /* NOTREACHED */
        }
@@ -2856,9 +4341,6 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
        TAILQ_INIT(&ifp->if_addrhead);
 
-       if (ifp->if_snd.ifq_maxlen == 0)
-               ifp->if_snd.ifq_maxlen = ifqmaxlen;
-
        if (ifp->if_index == 0) {
                int idx = if_next_index();
 
@@ -2905,52 +4387,125 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        /* Hold a reference to the underlying dlil_ifnet */
        ifnet_reference(ifp);
 
+       /* Clear stats (save and restore other fields that we care) */
+       if_data_saved = ifp->if_data;
+       bzero(&ifp->if_data, sizeof (ifp->if_data));
+       ifp->if_data.ifi_type = if_data_saved.ifi_type;
+       ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
+       ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
+       ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
+       ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
+       ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
+       ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
+       ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
+       ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
+       ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
+       ifnet_touch_lastchange(ifp);
+
+       VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
+           ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED);
+
+       /* By default, use SFB and enable flow advisory */
+       sflags = PKTSCHEDF_QALG_SFB;
+       if (if_flowadv)
+               sflags |= PKTSCHEDF_QALG_FLOWCTL;
+
+       /* Initialize transmit queue(s) */
+       err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
+       if (err != 0) {
+               panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
+                   "err=%d", __func__, ifp, err);
+               /* NOTREACHED */
+       }
+
+       /* Sanity checks on the input thread storage */
+       dl_inp = &dl_if->dl_if_inpstorage;
+       bzero(&dl_inp->stats, sizeof (dl_inp->stats));
+       VERIFY(dl_inp->input_waiting == 0);
+       VERIFY(dl_inp->wtot == 0);
+       VERIFY(dl_inp->ifp == NULL);
+       VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
+       VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
+       VERIFY(!dl_inp->net_affinity);
+       VERIFY(ifp->if_inp == NULL);
+       VERIFY(dl_inp->input_thr == THREAD_NULL);
+       VERIFY(dl_inp->wloop_thr == THREAD_NULL);
+       VERIFY(dl_inp->poll_thr == THREAD_NULL);
+       VERIFY(dl_inp->tag == 0);
+       VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
+       bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
+       bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
+       bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
+#if IFNET_INPUT_SANITY_CHK
+       VERIFY(dl_inp->input_mbuf_cnt == 0);
+#endif /* IFNET_INPUT_SANITY_CHK */
+
+       /*
+        * A specific DLIL input thread is created per Ethernet/cellular
+        * interface or for an interface which supports opportunistic
+        * input polling.  Pseudo interfaces or other types of interfaces
+        * use the main input thread instead.
+        */
+       if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
+           ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
+               ifp->if_inp = dl_inp;
+               err = dlil_create_input_thread(ifp, ifp->if_inp);
+               if (err != 0) {
+                       panic_plain("%s: ifp=%p couldn't get an input thread; "
+                           "err=%d", __func__, ifp, err);
+                       /* NOTREACHED */
+               }
+       }
+
        /*
-        * A specific dlil input thread is created per Ethernet/cellular
-        * interface.  pseudo interfaces or other types of interfaces use
-        * the main ("loopback") thread.
-        *
-        * If the sysctl "net.link.generic.system.multi_threaded_input" is set
-        * to zero, all packets will be handled by the main loopback thread,
-        * reverting to 10.4.x behaviour.
+        * If the driver supports the new transmit model, create a workloop
+        * starter thread to invoke the if_start callback where the packets
+        * may be dequeued and transmitted.
         */
-       if (dlil_multithreaded_input &&
-           (ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR)) {
-               int err;
-
-               ifp->if_input_thread = zalloc(dlif_inp_zone);
-               if (ifp->if_input_thread == NULL) {
-                       panic("%s: ifp=%p couldn't alloc threading",
-                           __func__, ifp);
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               VERIFY(ifp->if_start != NULL);
+               VERIFY(ifp->if_start_thread == THREAD_NULL);
+
+               ifnet_set_start_cycle(ifp, NULL);
+               ifp->if_start_active = 0;
+               ifp->if_start_req = 0;
+               if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp,
+                   &ifp->if_start_thread)) != KERN_SUCCESS) {
+                       panic_plain("%s: ifp=%p couldn't get a start thread; "
+                           "err=%d", __func__, ifp, err);
                        /* NOTREACHED */
                }
-               bzero(ifp->if_input_thread, dlif_inp_size);
-               err = dlil_create_input_thread(ifp, ifp->if_input_thread);
-               if (err != 0) {
-                       panic("%s: ifp=%p couldn't get a thread. "
+               ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
+                   (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
+       }
+
+       /*
+        * If the driver supports the new receive model, create a poller
+        * thread to invoke if_input_poll callback where the packets may
+        * be dequeued from the driver and processed for reception.
+        */
+       if (ifp->if_eflags & IFEF_RXPOLL) {
+               VERIFY(ifp->if_input_poll != NULL);
+               VERIFY(ifp->if_input_ctl != NULL);
+               VERIFY(ifp->if_poll_thread == THREAD_NULL);
+
+               ifnet_set_poll_cycle(ifp, NULL);
+               ifp->if_poll_update = 0;
+               ifp->if_poll_active = 0;
+               ifp->if_poll_req = 0;
+               if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
+                   &ifp->if_poll_thread)) != KERN_SUCCESS) {
+                       panic_plain("%s: ifp=%p couldn't get a poll thread; "
                            "err=%d", __func__, ifp, err);
                        /* NOTREACHED */
                }
-#ifdef DLIL_DEBUG
-               printf("%s: dlil thread for ifp=%p if_index=%d\n",
-                   __func__, ifp, ifp->if_index);
-#endif
+               ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
+                   (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
        }
 
-       /* Clear stats (save and restore other fields that we care) */
-       if_data_saved = ifp->if_data;
-       bzero(&ifp->if_data, sizeof (ifp->if_data));
-       ifp->if_data.ifi_type = if_data_saved.ifi_type;
-       ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
-       ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
-       ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
-       ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
-       ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
-       ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
-       ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
-       ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
-       ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
-       ifnet_touch_lastchange(ifp);
+       VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
+       VERIFY(ifp->if_desc.ifd_len == 0);
+       VERIFY(ifp->if_desc.ifd_desc != NULL);
 
        /* Record attach PC stacktrace */
        ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
@@ -3024,6 +4579,9 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
         */
        lck_mtx_lock(rnh_lock);
        ifnet_lock_exclusive(ifp);
+       /* Initialize Link Quality Metric (loopback [lo0] is always good) */
+       ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD :
+           IFNET_LQM_THRESH_UNKNOWN;
        lck_mtx_lock_spin(&ifp->if_ref_lock);
        ifp->if_refflags = IFRF_ATTACHED;
        lck_mtx_unlock(&ifp->if_ref_lock);
@@ -3113,7 +4671,8 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
                /* address and mask sockaddr_dl locations */
                asdl = (struct sockaddr_dl *)(ifa + 1);
                bzero(asdl, SOCK_MAXADDRLEN);
-               msdl = (struct sockaddr_dl *)((char *)asdl + SOCK_MAXADDRLEN);
+               msdl = (struct sockaddr_dl *)(void *)
+                   ((char *)asdl + SOCK_MAXADDRLEN);
                bzero(msdl, SOCK_MAXADDRLEN);
        } else {
                VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
@@ -3129,9 +4688,9 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
                }
                IFA_LOCK(ifa);
                /* address and mask sockaddr_dl locations */
-               asdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.asdl;
+               asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
                bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
-               msdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.msdl;
+               msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
                bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
        }
 
@@ -3188,8 +4747,8 @@ ifnet_detach(ifnet_t ifp)
        if (ifp == NULL)
                return (EINVAL);
 
-       ifnet_head_lock_exclusive();
        lck_mtx_lock(rnh_lock);
+       ifnet_head_lock_exclusive();
        ifnet_lock_exclusive(ifp);
 
        /*
@@ -3240,12 +4799,27 @@ ifnet_detach(ifnet_t ifp)
        ifnet_head_done();
        lck_mtx_unlock(rnh_lock);
 
+       /* Reset Link Quality Metric (unless loopback [lo0]) */
+       if (ifp != lo_ifp)
+               if_lqm_update(ifp, IFNET_LQM_THRESH_OFF);
+
+       /* Reset TCP local statistics */
+       if (ifp->if_tcp_stat != NULL)
+               bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
+
+       /* Reset UDP local statistics */
+       if (ifp->if_udp_stat != NULL)
+               bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
+
        /* Let BPF know we're detaching */
        bpfdetach(ifp);
 
        /* Mark the interface as DOWN */
        if_down(ifp);
 
+       /* Drain send queue */
+       ifclassq_teardown(ifp);
+
        /* Disable forwarding cached route */
        lck_mtx_lock(&ifp->if_cached_route_lock);
        ifp->if_fwd_cacheok = 0;
@@ -3303,26 +4877,48 @@ ifnet_detaching_dequeue(void)
        return (ifp);
 }
 
-static void
-ifnet_delayed_thread_func(void)
+static int
+ifnet_detacher_thread_cont(int err)
 {
+#pragma unused(err)
        struct ifnet *ifp;
 
        for (;;) {
-               dlil_if_lock();
+               dlil_if_lock_assert();
                while (ifnet_detaching_cnt == 0) {
-                       (void) msleep(&ifnet_delayed_run, &dlil_ifnet_lock,
-                           (PZERO - 1), "ifnet_delayed_thread", NULL);
+                       (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
+                           (PZERO - 1), "ifnet_detacher_cont", 0,
+                           ifnet_detacher_thread_cont);
+                       /* NOTREACHED */
                }
 
                VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
 
                /* Take care of detaching ifnet */
                ifp = ifnet_detaching_dequeue();
-               dlil_if_unlock();
-               if (ifp != NULL)
+               if (ifp != NULL) {
+                       dlil_if_unlock();
                        ifnet_detach_final(ifp);
+                       dlil_if_lock();
+               }
        }
+       /* NOTREACHED */
+       return (0);
+}
+
+static void
+ifnet_detacher_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+       dlil_if_lock();
+       (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
+           (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
+       /*
+        * msleep0() shouldn't have returned as PCATCH was not set;
+        * therefore assert in this case.
+        */
+       dlil_if_unlock();
+       VERIFY(0);
 }
 
 static void
@@ -3330,7 +4926,7 @@ ifnet_detach_final(struct ifnet *ifp)
 {
        struct ifnet_filter *filter, *filter_next;
        struct ifnet_filter_head fhead;
-       struct dlil_threading_info *inputthread;
+       struct dlil_threading_info *inp;
        struct ifaddr *ifa;
        ifnet_detached_func if_free;
        int i;
@@ -3342,8 +4938,10 @@ ifnet_detach_final(struct ifnet *ifp)
                /* NOTREACHED */
        }
 
-       /* Wait until the existing IO references get released
-        * before we proceed with ifnet_detach
+       /*
+        * Wait until the existing IO references get released
+        * before we proceed with ifnet_detach.  This is not a
+        * common case, so block without using a continuation.
         */
        while (ifp->if_refio > 0) {
                printf("%s: Waiting for IO references on %s%d interface "
@@ -3420,65 +5018,87 @@ ifnet_detach_final(struct ifnet *ifp)
        /* There should not be any addresses left */
        VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
 
+       /*
+        * Signal the starter thread to terminate itself.
+        */
+       if (ifp->if_start_thread != THREAD_NULL) {
+               lck_mtx_lock_spin(&ifp->if_start_lock);
+               ifp->if_start_thread = THREAD_NULL;
+               wakeup_one((caddr_t)&ifp->if_start_thread);
+               lck_mtx_unlock(&ifp->if_start_lock);
+       }
+
+       /*
+        * Signal the poller thread to terminate itself.
+        */
+       if (ifp->if_poll_thread != THREAD_NULL) {
+               lck_mtx_lock_spin(&ifp->if_poll_lock);
+               ifp->if_poll_thread = THREAD_NULL;
+               wakeup_one((caddr_t)&ifp->if_poll_thread);
+               lck_mtx_unlock(&ifp->if_poll_lock);
+       }
+
        /*
         * If thread affinity was set for the workloop thread, we will need
         * to tear down the affinity and release the extra reference count
-        * taken at attach time;
+        * taken at attach time.  Does not apply to lo0 or other interfaces
+        * without dedicated input threads.
         */
-       if ((inputthread = ifp->if_input_thread) != NULL) {
-               if (inputthread->net_affinity) {
-                       struct thread *tp;
-
-                       if (inputthread == dlil_lo_thread_ptr) {
-                               panic("%s: Thread affinity should not be "
-                                   "enabled on the loopback dlil input "
-                                   "thread", __func__);
-                               /* NOTREACHED */
+       if ((inp = ifp->if_inp) != NULL) {
+               VERIFY(inp != dlil_main_input_thread);
+
+               if (inp->net_affinity) {
+                       struct thread *tp, *wtp, *ptp;
+
+                       lck_mtx_lock_spin(&inp->input_lck);
+                       wtp = inp->wloop_thr;
+                       inp->wloop_thr = THREAD_NULL;
+                       ptp = inp->poll_thr;
+                       inp->poll_thr = THREAD_NULL;
+                       tp = inp->input_thr;    /* don't nullify now */
+                       inp->tag = 0;
+                       inp->net_affinity = FALSE;
+                       lck_mtx_unlock(&inp->input_lck);
+
+                       /* Tear down poll thread affinity */
+                       if (ptp != NULL) {
+                               VERIFY(ifp->if_eflags & IFEF_RXPOLL);
+                               (void) dlil_affinity_set(ptp,
+                                   THREAD_AFFINITY_TAG_NULL);
+                               thread_deallocate(ptp);
                        }
 
-                       lck_mtx_lock_spin(&inputthread->input_lck);
-                       tp = inputthread->workloop_thread;
-                       inputthread->workloop_thread = NULL;
-                       inputthread->tag = 0;
-                       inputthread->net_affinity = FALSE;
-                       lck_mtx_unlock(&inputthread->input_lck);
-
                        /* Tear down workloop thread affinity */
-                       if (tp != NULL) {
-                               (void) dlil_affinity_set(tp,
+                       if (wtp != NULL) {
+                               (void) dlil_affinity_set(wtp,
                                    THREAD_AFFINITY_TAG_NULL);
-                               thread_deallocate(tp);
+                               thread_deallocate(wtp);
                        }
 
-                       /* Tear down dlil input thread affinity */
-                       tp = inputthread->input_thread;
+                       /* Tear down DLIL input thread affinity */
                        (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
                        thread_deallocate(tp);
                }
 
-               /* cleanup ifp dlil input thread, if any */
-               ifp->if_input_thread = NULL;
-
-               if (inputthread != dlil_lo_thread_ptr) {
-#ifdef DLIL_DEBUG
-                       printf("%s: wakeup thread threadinfo: %p "
-                           "input_thread=%p threads: cur=%d max=%d\n",
-                           __func__, inputthread, inputthread->input_thread,
-                           dlil_multithreaded_input, cur_dlil_input_threads);
-#endif
-                       lck_mtx_lock_spin(&inputthread->input_lck);
-
-                       inputthread->input_waiting |= DLIL_INPUT_TERMINATE;
-                       if (!(inputthread->input_waiting & DLIL_INPUT_RUNNING))
-                               wakeup((caddr_t)&inputthread->input_waiting);
+               /* disassociate ifp DLIL input thread */
+               ifp->if_inp = NULL;
 
-                       lck_mtx_unlock(&inputthread->input_lck);
+               lck_mtx_lock_spin(&inp->input_lck);
+               inp->input_waiting |= DLIL_INPUT_TERMINATE;
+               if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
+                       wakeup_one((caddr_t)&inp->input_waiting);
                }
+               lck_mtx_unlock(&inp->input_lck);
        }
 
        /* The driver might unload, so point these to ourselves */
        if_free = ifp->if_free;
        ifp->if_output = ifp_if_output;
+       ifp->if_pre_enqueue = ifp_if_output;
+       ifp->if_start = ifp_if_start;
+       ifp->if_output_ctl = ifp_if_ctl;
+       ifp->if_input_poll = ifp_if_input_poll;
+       ifp->if_input_ctl = ifp_if_ctl;
        ifp->if_ioctl = ifp_if_ioctl;
        ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
        ifp->if_free = ifp_if_free;
@@ -3489,6 +5109,12 @@ ifnet_detach_final(struct ifnet *ifp)
        ifp->if_del_proto = ifp_if_del_proto;
        ifp->if_check_multi = ifp_if_check_multi;
 
+       /* wipe out interface description */
+       VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
+       ifp->if_desc.ifd_len = 0;
+       VERIFY(ifp->if_desc.ifd_desc != NULL);
+       bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
+
        ifnet_lock_done(ifp);
 
 #if PF
@@ -3505,6 +5131,9 @@ ifnet_detach_final(struct ifnet *ifp)
        VERIFY(ifp->if_flt_waiters == 0);
        lck_mtx_unlock(&ifp->if_flt_lock);
 
+       /* Last chance to drain send queue */
+       if_qflush(ifp, 0);
+
        /* Last chance to cleanup any cached route */
        lck_mtx_lock(&ifp->if_cached_route_lock);
        VERIFY(!ifp->if_fwd_cacheok);
@@ -3553,6 +5182,34 @@ ifp_if_output(struct ifnet *ifp, struct mbuf *m)
        return (0);
 }
 
+static void
+ifp_if_start(struct ifnet *ifp)
+{
+       ifnet_purge(ifp);
+}
+
+static void
+ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
+    struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
+{
+#pragma unused(ifp, flags, max_cnt)
+       if (m_head != NULL)
+               *m_head = NULL;
+       if (m_tail != NULL)
+               *m_tail = NULL;
+       if (cnt != NULL)
+               *cnt = 0;
+       if (len != NULL)
+               *len = 0;
+}
+
+static errno_t
+ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
+{
+#pragma unused(ifp, cmd, arglen, arg)
+       return (EOPNOTSUPP);
+}
+
 static errno_t
 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
 {
@@ -3583,17 +5240,25 @@ ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
        return (EOPNOTSUPP);
 }
 
-static errno_t
-ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
-    const struct sockaddr *sa, const char *ll, const char *t)
+static errno_t ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
+const struct sockaddr *sa, const char *ll, const char *t
+#if CONFIG_EMBEDDED
+               ,
+               u_int32_t *pre, u_int32_t *post
+#endif /* CONFIG_EMBEDDED */
+                                                        )
 {
 #pragma unused(ifp, m, sa, ll, t)
        m_freem(*m);
        *m = NULL;
+#if CONFIG_EMBEDDED
+       *pre = 0;
+       *post = 0;
+#endif /* CONFIG_EMBEDDED */
        return (EJUSTRETURN);
 }
 
-static errno_t
+errno_t
 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
 {
 #pragma unused(ifp, cmd, arg)
@@ -3697,20 +5362,43 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid,
                dlifp1->dl_if_trace = dlil_if_trace;
        }
        ifp1->if_name = dlifp1->dl_if_namestorage;
+
+       /* initialize interface description */
+       ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
+       ifp1->if_desc.ifd_len = 0;
+       ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
+
 #if CONFIG_MACF_NET
        mac_ifnet_label_init(ifp1);
 #endif
 
+       if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
+               DLIL_PRINTF("%s: failed to allocate if local stats, "
+                   "error: %d\n", __func__, ret);
+               /* This probably shouldn't be fatal */
+               ret = 0;
+       }
+
        lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
        lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
        lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
        lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
-       lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_lock_group,
-           ifnet_lock_attr);
        lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
            ifnet_lock_attr);
        lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
 
+       /* for send data paths */
+       lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
+           ifnet_lock_attr);
+       lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
+           ifnet_lock_attr);
+       lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
+           ifnet_lock_attr);
+
+       /* for receive data paths */
+       lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
+           ifnet_lock_attr);
+
        TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
 
        *ifp = ifp1;
@@ -3846,7 +5534,9 @@ struct rtentry *
 ifnet_cached_rtlookup_inet(struct ifnet        *ifp, struct in_addr src_ip)
 {
        struct route            src_rt;
-       struct sockaddr_in      *dst = (struct sockaddr_in *)(&src_rt.ro_dst);
+       struct sockaddr_in      *dst;
+
+       dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
 
        ifp_src_route_copyout(ifp, &src_rt);
 
@@ -3900,7 +5590,8 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
                        src_rt.ro_dst.sin6_family = AF_INET6;
                }
                src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
-               src_rt.ro_dst.sin6_addr = *src_ip6;
+               bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
+                   sizeof (src_rt.ro_dst.sin6_addr));
 
                if (src_rt.ro_rt == NULL) {
                        src_rt.ro_rt = rtalloc1_scoped(
@@ -3920,3 +5611,394 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
        return (src_rt.ro_rt);
 }
 #endif /* INET6 */
+
+void
+if_lqm_update(struct ifnet *ifp, int lqm)
+{
+       struct kev_dl_link_quality_metric_data ev_lqm_data;
+
+       VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
+
+       /* Normalize to edge */
+       if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_POOR)
+               lqm = IFNET_LQM_THRESH_POOR;
+       else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
+               lqm = IFNET_LQM_THRESH_GOOD;
+
+       ifnet_lock_exclusive(ifp);
+       if (lqm == ifp->if_lqm) {
+               ifnet_lock_done(ifp);
+               return;         /* nothing to update */
+       }
+       ifp->if_lqm = lqm;
+       ifnet_lock_done(ifp);
+
+       bzero(&ev_lqm_data, sizeof (ev_lqm_data));
+       ev_lqm_data.link_quality_metric = lqm;
+
+       dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
+           (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
+}
+
+/* for uuid.c */
+int
+uuid_get_ethernet(u_int8_t *node)
+{
+       struct ifnet *ifp;
+       struct sockaddr_dl *sdl;
+
+       ifnet_head_lock_shared();
+       TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+               ifnet_lock_shared(ifp);
+               IFA_LOCK_SPIN(ifp->if_lladdr);
+               sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
+               if (sdl->sdl_type == IFT_ETHER) {
+                       memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
+                       IFA_UNLOCK(ifp->if_lladdr);
+                       ifnet_lock_done(ifp);
+                       ifnet_head_done();
+                       return (0);
+               }
+               IFA_UNLOCK(ifp->if_lladdr);
+               ifnet_lock_done(ifp);
+       }
+       ifnet_head_done();
+
+       return (-1);
+}
+
+static int
+sysctl_rxpoll SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int i, err;
+
+       i = if_rxpoll;
+
+       err = sysctl_handle_int(oidp, &i, 0, req);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return (err);
+
+       if (net_rxpoll == 0)
+               return (ENXIO);
+
+       if_rxpoll = i;
+       return (err);
+}
+
+static int
+sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int i, err;
+
+       i = if_sndq_maxlen;
+
+       err = sysctl_handle_int(oidp, &i, 0, req);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return (err);
+
+       if (i < IF_SNDQ_MINLEN)
+               i = IF_SNDQ_MINLEN;
+
+       if_sndq_maxlen = i;
+       return (err);
+}
+
+static int
+sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int i, err;
+
+       i = if_rcvq_maxlen;
+
+       err = sysctl_handle_int(oidp, &i, 0, req);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return (err);
+
+       if (i < IF_RCVQ_MINLEN)
+               i = IF_RCVQ_MINLEN;
+
+       if_rcvq_maxlen = i;
+       return (err);
+}
+
+void
+ifnet_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl)
+{
+       struct sfb_bin_fcentry *fce, *tfce;
+
+       lck_mtx_lock_spin(&ifnet_fclist_lock);
+
+       SLIST_FOREACH_SAFE(fce, fcl, fce_link, tfce) {
+               SLIST_REMOVE(fcl, fce, sfb_bin_fcentry, fce_link);
+               SLIST_INSERT_HEAD(&ifnet_fclist, fce, fce_link);
+               sp->sfb_stats.flow_feedback++;
+       }
+       VERIFY(SLIST_EMPTY(fcl) && !SLIST_EMPTY(&ifnet_fclist));
+
+       wakeup(&ifnet_fclist);
+
+       lck_mtx_unlock(&ifnet_fclist_lock);
+}
+
+struct sfb_bin_fcentry *
+ifnet_fce_alloc(int how)
+{
+       struct sfb_bin_fcentry *fce;
+
+       fce = (how == M_WAITOK) ? zalloc(ifnet_fcezone) :
+           zalloc_noblock(ifnet_fcezone);
+       if (fce != NULL)
+               bzero(fce, ifnet_fcezone_size);
+
+       return (fce);
+}
+
+void
+ifnet_fce_free(struct sfb_bin_fcentry *fce)
+{
+       zfree(ifnet_fcezone, fce);
+}
+
+static void
+ifnet_fc_init(void)
+{
+       thread_t thread = THREAD_NULL;
+
+       SLIST_INIT(&ifnet_fclist);
+       lck_mtx_init(&ifnet_fclist_lock, ifnet_snd_lock_group, NULL);
+
+       ifnet_fcezone_size = P2ROUNDUP(sizeof (struct sfb_bin_fcentry),
+           sizeof (u_int64_t));
+       ifnet_fcezone = zinit(ifnet_fcezone_size,
+           IFNET_FCEZONE_MAX * ifnet_fcezone_size, 0, IFNET_FCEZONE_NAME);
+       if (ifnet_fcezone == NULL) {
+               panic("%s: failed allocating %s", __func__, IFNET_FCEZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(ifnet_fcezone, Z_EXPAND, TRUE);
+       zone_change(ifnet_fcezone, Z_CALLERACCT, FALSE);
+
+       if (kernel_thread_start(ifnet_fc_thread_func,
+           NULL, &thread) != KERN_SUCCESS) {
+               panic("%s: couldn't create flow event advisory thread",
+                   __func__);
+               /* NOTREACHED */
+       }
+       thread_deallocate(thread);
+}
+
+static int
+ifnet_fc_thread_cont(int err)
+{
+#pragma unused(err)
+       struct sfb_bin_fcentry *fce;
+       struct inp_fc_entry *infc;
+
+       for (;;) {
+               lck_mtx_assert(&ifnet_fclist_lock, LCK_MTX_ASSERT_OWNED);
+               while (SLIST_EMPTY(&ifnet_fclist)) {
+                       (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock,
+                           (PSOCK | PSPIN), "ifnet_fc_cont", 0,
+                           ifnet_fc_thread_cont);
+                       /* NOTREACHED */
+               }
+
+               fce = SLIST_FIRST(&ifnet_fclist);
+               SLIST_REMOVE(&ifnet_fclist, fce, sfb_bin_fcentry, fce_link);
+               SLIST_NEXT(fce, fce_link) = NULL;
+               lck_mtx_unlock(&ifnet_fclist_lock);
+
+               infc = inp_fc_getinp(fce->fce_flowhash);
+               if (infc == NULL) {
+                       ifnet_fce_free(fce);
+                       lck_mtx_lock_spin(&ifnet_fclist_lock);
+                       continue;
+               }
+               VERIFY(infc->infc_inp != NULL);
+
+               inp_fc_feedback(infc->infc_inp);
+
+               inp_fc_entry_free(infc);
+               ifnet_fce_free(fce);
+               lck_mtx_lock_spin(&ifnet_fclist_lock);
+       }
+}
+
+static void
+ifnet_fc_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+       lck_mtx_lock(&ifnet_fclist_lock);
+       (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock,
+           (PSOCK | PSPIN), "ifnet_fc", 0, ifnet_fc_thread_cont);
+       /*
+        * msleep0() shouldn't have returned as PCATCH was not set;
+        * therefore assert in this case.
+        */
+       lck_mtx_unlock(&ifnet_fclist_lock);
+       VERIFY(0);
+}
+
+void
+dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
+    int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
+{
+       struct kev_dl_node_presence kev;
+       struct sockaddr_dl *sdl;
+       struct sockaddr_in6 *sin6;
+
+       VERIFY(ifp);
+       VERIFY(sa);
+       VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
+
+       bzero(&kev, sizeof (kev));
+       sin6 = &kev.sin6_node_address;
+       sdl = &kev.sdl_node_address;
+       nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
+       kev.rssi = rssi;
+       kev.link_quality_metric = lqm;
+       kev.node_proximity_metric = npm;
+       bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
+
+       nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
+       dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
+           &kev.link_data, sizeof (kev));
+}
+
+void
+dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
+{
+       struct kev_dl_node_absence kev;
+       struct sockaddr_in6 *sin6;
+       struct sockaddr_dl *sdl;
+
+       VERIFY(ifp);
+       VERIFY(sa);
+       VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
+
+       bzero(&kev, sizeof (kev));
+       sin6 = &kev.sin6_node_address;
+       sdl = &kev.sdl_node_address;
+       nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
+
+       nd6_alt_node_absent(ifp, sin6);
+       dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
+           &kev.link_data, sizeof (kev));
+}
+
+errno_t
+ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
+    struct proc *p)
+{
+       u_int32_t level = IFNET_THROTTLE_OFF;
+       errno_t result = 0;
+
+       VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
+
+       if (cmd == SIOCSIFOPPORTUNISTIC) {
+               /*
+                * XXX: Use priv_check_cred() instead of root check?
+                */
+               if ((result = proc_suser(p)) != 0)
+                       return (result);
+
+               if (ifr->ifr_opportunistic.ifo_flags ==
+                   IFRIFOF_BLOCK_OPPORTUNISTIC)
+                       level = IFNET_THROTTLE_OPPORTUNISTIC;
+               else if (ifr->ifr_opportunistic.ifo_flags == 0)
+                       level = IFNET_THROTTLE_OFF;
+               else
+                       result = EINVAL;
+
+               if (result == 0)
+                       result = ifnet_set_throttle(ifp, level);
+       } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
+               ifr->ifr_opportunistic.ifo_flags = 0;
+               if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
+                       ifr->ifr_opportunistic.ifo_flags |=
+                           IFRIFOF_BLOCK_OPPORTUNISTIC;
+               }
+       }
+
+       /*
+        * Return the count of current opportunistic connections
+        * over the interface.
+        */
+       if (result == 0) {
+               uint32_t flags = 0;
+               flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
+                       INPCB_OPPORTUNISTIC_SETCMD : 0;
+               flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ? 
+                       INPCB_OPPORTUNISTIC_THROTTLEON : 0;
+               ifr->ifr_opportunistic.ifo_inuse =
+                   udp_count_opportunistic(ifp->if_index, flags) +
+                   tcp_count_opportunistic(ifp->if_index, flags);
+       }
+
+       if (result == EALREADY)
+               result = 0;
+
+       return (result);
+}
+
+int
+ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
+{
+       struct ifclassq *ifq;
+       int err = 0;
+
+       if (!(ifp->if_eflags & IFEF_TXSTART))
+               return (ENXIO);
+
+       *level = IFNET_THROTTLE_OFF;
+
+       ifq = &ifp->if_snd;
+       IFCQ_LOCK(ifq);
+       /* Throttling works only for IFCQ, not ALTQ instances */
+       if (IFCQ_IS_ENABLED(ifq))
+               IFCQ_GET_THROTTLE(ifq, *level, err);
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+int
+ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
+{
+       struct ifclassq *ifq;
+       int err = 0;
+
+       if (!(ifp->if_eflags & IFEF_TXSTART))
+               return (ENXIO);
+
+       switch (level) {
+       case IFNET_THROTTLE_OFF:
+       case IFNET_THROTTLE_OPPORTUNISTIC:
+#if PF_ALTQ
+               /* Throttling works only for IFCQ, not ALTQ instances */
+               if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+                       return (ENXIO);
+#endif /* PF_ALTQ */
+               break;
+       default:
+               return (EINVAL);
+       }
+
+       ifq = &ifp->if_snd;
+       IFCQ_LOCK(ifq);
+       if (IFCQ_IS_ENABLED(ifq))
+               IFCQ_SET_THROTTLE(ifq, level, err);
+       IFCQ_UNLOCK(ifq);
+
+       if (err == 0) {
+               printf("%s%d: throttling level set to %d\n", ifp->if_name,
+                   ifp->if_unit, level);
+               if (level == IFNET_THROTTLE_OFF)
+                       ifnet_start(ifp);
+       }
+
+       return (err);
+}
index db1060db88a04432f5a3971ff275028e5c2d3293..98ca8e87848a4996057d54b1122abbfdb5812372 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -39,10 +39,6 @@ enum {
        BPF_TAP_INPUT_OUTPUT
 };
 
-/* Ethernet specific types */
-#define DLIL_DESC_ETYPE2       4
-#define DLIL_DESC_SAP          5
-#define DLIL_DESC_SNAP         6
 /*
  * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol,
  *                    variants.native_type_length must be set to 2
@@ -58,46 +54,138 @@ enum {
  * The length of the protocol data specified at native_type must be set in
  * variants.native_type_length.
  */
+/* Ethernet specific types */
+#define DLIL_DESC_ETYPE2       4
+#define DLIL_DESC_SAP          5
+#define DLIL_DESC_SNAP         6
 
 #ifdef KERNEL_PRIVATE
-
 #include <net/if.h>
 #include <net/if_var.h>
+#include <net/classq/classq.h>
+#include <net/flowadv.h>
 #include <sys/kern_event.h>
 #include <kern/thread.h>
 #include <kern/locks.h>
 
-#if __STDC__
+#ifdef BSD_KERNEL_PRIVATE
+/* Operations on timespecs. */
+#define        net_timerclear(tvp)     (tvp)->tv_sec = (tvp)->tv_nsec = 0
+
+#define        net_timerisset(tvp)     ((tvp)->tv_sec || (tvp)->tv_nsec)
+
+#define        net_timercmp(tvp, uvp, cmp)                                     \
+       (((tvp)->tv_sec == (uvp)->tv_sec) ?                             \
+           ((tvp)->tv_nsec cmp (uvp)->tv_nsec) :                       \
+           ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+#define        net_timeradd(tvp, uvp, vvp) do {                                \
+       (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;                  \
+       (vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec;               \
+       if ((vvp)->tv_nsec >= NSEC_PER_SEC) {                           \
+               (vvp)->tv_sec++;                                        \
+               (vvp)->tv_nsec -= NSEC_PER_SEC;                         \
+       }                                                               \
+} while (0)
+
+#define        net_timersub(tvp, uvp, vvp) do {                                \
+       (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;                  \
+       (vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec;               \
+       if ((vvp)->tv_nsec < 0) {                                       \
+               (vvp)->tv_sec--;                                        \
+               (vvp)->tv_nsec += NSEC_PER_SEC;                         \
+       }                                                               \
+} while (0)
+
+#define        net_timernsec(tvp, nsp) do {                                    \
+       *(nsp) = (tvp)->tv_nsec;                                        \
+       if ((tvp)->tv_sec > 0)                                          \
+               *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC);               \
+} while (0)
+
+#define        net_nsectimer(nsp, tvp) do {                                    \
+       u_int64_t __nsp = *(nsp);                                       \
+       net_timerclear(tvp);                                            \
+       while ((__nsp) >= NSEC_PER_SEC) {                               \
+               (tvp)->tv_sec++;                                        \
+               (__nsp) -= NSEC_PER_SEC;                                \
+       }                                                               \
+       (tvp)->tv_nsec = (__nsp);                                       \
+} while (0)
 
 struct ifnet;
 struct mbuf;
 struct ether_header;
 struct sockaddr_dl;
-
-#endif
-
 struct iff_filter;
 
 #define        DLIL_THREADNAME_LEN     32
 
+/*
+ * DLIL input thread info
+ */
 struct dlil_threading_info {
        decl_lck_mtx_data(, input_lck);
        lck_grp_t       *lck_grp;       /* lock group (for lock stats) */
-       mbuf_t          mbuf_head;      /* start of mbuf list from if */
-       mbuf_t          mbuf_tail;
-       u_int32_t       mbuf_count;
-       boolean_t       net_affinity;   /* affinity set is available */
        u_int32_t       input_waiting;  /* DLIL condition of thread */
-       struct thread   *input_thread;  /* thread data for this input */
-       struct thread   *workloop_thread; /* current workloop thread */
-       u_int32_t       tag;            /* current affinity tag */
-       char            input_name[DLIL_THREADNAME_LEN];
+       u_int32_t       wtot;           /* # of wakeup requests */
+       char            input_name[DLIL_THREADNAME_LEN]; /* name storage */
+       struct ifnet    *ifp;           /* pointer to interface */
+       class_queue_t   rcvq_pkts;      /* queue of pkts */
+       struct ifnet_stat_increment_param stats; /* incremental statistics */
+       /*
+        * Thread affinity (workloop and DLIL threads).
+        */
+       boolean_t       net_affinity;   /* affinity set is available */
+       struct thread   *input_thr;     /* input thread */
+       struct thread   *wloop_thr;     /* workloop thread */
+       struct thread   *poll_thr;      /* poll thread */
+       u_int32_t       tag;            /* affinity tag */
+       /*
+        * Opportunistic polling.
+        */
+       ifnet_model_t   mode;           /* current mode */
+       struct pktcntr  tstats;         /* incremental polling statistics */
+       struct if_rxpoll_stats pstats;  /* polling statistics */
+#define        rxpoll_offreq   pstats.ifi_poll_off_req
+#define        rxpoll_offerr   pstats.ifi_poll_off_err
+#define        rxpoll_onreq    pstats.ifi_poll_on_req
+#define        rxpoll_onerr    pstats.ifi_poll_on_err
+#define        rxpoll_wavg     pstats.ifi_poll_wakeups_avg
+#define        rxpoll_wlowat   pstats.ifi_poll_wakeups_lowat
+#define        rxpoll_whiwat   pstats.ifi_poll_wakeups_hiwat
+#define        rxpoll_pavg     pstats.ifi_poll_packets_avg
+#define        rxpoll_pmin     pstats.ifi_poll_packets_min
+#define        rxpoll_pmax     pstats.ifi_poll_packets_max
+#define        rxpoll_plowat   pstats.ifi_poll_packets_lowat
+#define        rxpoll_phiwat   pstats.ifi_poll_packets_hiwat
+#define        rxpoll_bavg     pstats.ifi_poll_bytes_avg
+#define        rxpoll_bmin     pstats.ifi_poll_bytes_min
+#define        rxpoll_bmax     pstats.ifi_poll_bytes_max
+#define        rxpoll_blowat   pstats.ifi_poll_bytes_lowat
+#define        rxpoll_bhiwat   pstats.ifi_poll_bytes_hiwat
+       struct pktcntr  sstats;         /* packets and bytes per sampling */
+       struct timespec mode_holdtime;  /* mode holdtime in nsec */
+       struct timespec mode_lasttime;  /* last mode change time in nsec */
+       struct timespec sample_holdtime; /* sampling holdtime in nsec */
+       struct timespec sample_lasttime; /* last sampling time in nsec */
+       struct timespec dbg_lasttime;   /* last debug message time in nsec */
 #if IFNET_INPUT_SANITY_CHK
-       u_int32_t       input_wake_cnt; /* number of times the thread was awaken with packets to process */
-       u_long          input_mbuf_cnt; /* total number of mbuf packets processed by this thread */
+       /*
+        * For debugging.
+        */
+       u_int64_t       input_mbuf_cnt; /* total # of packets processed */
 #endif
 };
 
+/*
+ * DLIL input thread info (for main/loopback input thread)
+ */
+struct dlil_main_threading_info {
+       struct dlil_threading_info      inp;
+       class_queue_t                   lo_rcvq_pkts; /* queue of lo0 pkts */
+};
+
 /*
  * The following are shared with kpi_protocol.c so that it may wakeup
  * the input thread to run through packets queued for protocol input.
@@ -108,8 +196,12 @@ struct dlil_threading_info {
 #define        DLIL_PROTO_WAITING      0x10000000
 #define        DLIL_INPUT_TERMINATE    0x08000000
 
+__private_extern__ struct dlil_threading_info *dlil_main_input_thread;
+
 extern void dlil_init(void);
 
+extern errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
+
 extern errno_t dlil_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
 
 /*
@@ -119,17 +211,30 @@ extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t,
     const struct sockaddr_dl *, const struct sockaddr *,
     const struct sockaddr_dl *, const struct sockaddr *);
 
+/*
+ * The following flags used to check if a network thread already
+ * owns the lock
+ */
+#define        NET_THREAD_HELD_PF      0x1     /* thread is holding PF lock */
+#define        NET_THREAD_HELD_DOMAIN  0x2     /* thread is holding domain_proto_mtx */
+
+extern errno_t net_thread_check_lock(u_int32_t);
+extern void net_thread_set_lock(u_int32_t);
+extern void net_thread_unset_lock(u_int32_t);
+
 extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *,
-    const struct sockaddr *, int);
+    const struct sockaddr *, int, struct flowadv *);
 
 extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
+extern void dlil_input_packet_list_extended(struct ifnet *, struct mbuf *,
+    u_int32_t, ifnet_model_t);
 
 extern errno_t dlil_resolve_multi(struct ifnet *,
     const struct sockaddr *, struct sockaddr *, size_t);
 
 extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *,
     const struct sockaddr *, const struct sockaddr_dl *,
-    const struct sockaddr *);
+    const struct sockaddr *, u_int32_t);
 
 extern int dlil_attach_filter(ifnet_t, const struct iff_filter *,
     interface_filter_t *);
@@ -140,6 +245,8 @@ extern void dlil_proto_unplumb_all(ifnet_t);
 extern void dlil_post_msg(struct ifnet *, u_int32_t, u_int32_t,
     struct net_event_data *, u_int32_t);
 
+extern int dlil_alloc_local_stats(struct ifnet *);
+
 /*
  * dlil_if_acquire is obsolete. Use ifnet_allocate.
  */
@@ -155,6 +262,11 @@ extern u_int32_t ifnet_aggressive_drainers;
 extern errno_t dlil_if_ref(struct ifnet *);
 extern errno_t dlil_if_free(struct ifnet *);
 
+extern void dlil_node_present(struct ifnet *, struct sockaddr *, int32_t, int,
+    int, u_int8_t[48]);
+extern void dlil_node_absent(struct ifnet *, struct sockaddr *);
+
+#endif /* BSD_KERNEL_PRIVATE */
 #endif /* KERNEL_PRIVATE */
 #endif /* KERNEL */
 #endif /* DLIL_H */
diff --git a/bsd/net/dlil_pvt.h b/bsd/net/dlil_pvt.h
deleted file mode 100644 (file)
index 192b272..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef DLIL_PVT_H
-#define DLIL_PVT_H
-#include <sys/appleapiopts.h>
-#ifdef KERNEL_PRIVATE
-
-#include <net/dlil.h>
-#include <sys/queue.h>
-
-struct dlil_family_mod_str {
-    TAILQ_ENTRY(dlil_family_mod_str)   dl_fam_next;
-    char       *interface_family;
-    int (*add_if)(struct ifnet_ptr  *ifp);
-    int (*del_if)(struct ifnet    *ifp);
-    int (*add_proto)(struct ifnet *ifp, uint32_t protocol_family,
-                                struct ddesc_head_str *demux_desc_head);
-    int (*del_proto)(struct ifnet *ifp, uint32_t proto_family);
-}
-
-#endif /* KERNEL_PRIVATE */
-#endif
index 1adcbe27e9309569ea904d3057f70e1324b5d9d5..e7daa051a62b8acd50386612bb0e9be50389bdfa 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -186,12 +186,16 @@ ether_at_prmod_ioctl(
     u_int32_t                                  command,
     void                                               *data)
 {
-    struct ifreq *ifr = data;
     int error = 0;
 
     switch (command) {
 
-    case SIOCSIFADDR:
+    case SIOCSIFADDR:          /* struct ifaddr pointer */
+       /*
+        * Note: caller of ifnet_ioctl() passes in pointer to
+        * struct ifaddr as parameter to SIOCSIFADDR, for legacy
+        * reasons.
+        */
         if ((ifp->if_flags & IFF_RUNNING) == 0) {
              ifnet_set_flags(ifp, IFF_UP, IFF_UP);
              ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
@@ -199,9 +203,12 @@ ether_at_prmod_ioctl(
 
        break;
 
-    case SIOCGIFADDR:
+    case SIOCGIFADDR: {                /* struct ifreq */
+       struct ifreq *ifr = data;
+
        ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN);
        break;
+    }
 
     default:
        error = EOPNOTSUPP;
index a1cbfb3d1508f03ddf00db57e0d1d550d553faa8..60b6846d293649ccc3eef179ed97bf0e14e09df8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -97,7 +97,9 @@
 #include <net/ether_if_module.h>
 #include <sys/socketvar.h>
 #include <net/if_vlan_var.h>
-#include <net/if_bond_var.h>
+#if BOND
+#include <net/if_bond_internal.h>
+#endif /* BOND */
 #if IF_BRIDGE
 #include <net/if_bridgevar.h>
 #endif /* IF_BRIDGE */
@@ -357,7 +359,7 @@ ether_demux(
        char                            *frame_header,
        protocol_family_t       *protocol_family)
 {
-       struct ether_header *eh = (struct ether_header *)frame_header;
+       struct ether_header *eh = (struct ether_header *)(void *)frame_header;
        u_short                 ether_type = eh->ether_type;
        u_int16_t               type;
        u_int8_t                *data;
@@ -416,7 +418,7 @@ ether_demux(
        else if (ether_type == htons(ETHERTYPE_VLAN)) {
                struct ether_vlan_header *      evl;
 
-               evl = (struct ether_vlan_header *)frame_header;
+               evl = (struct ether_vlan_header *)(void *)frame_header;
                if (m->m_len < ETHER_VLAN_ENCAP_LEN
                    || ntohs(evl->evl_proto) == ETHERTYPE_VLAN
                    || EVL_VLANOFTAG(ntohs(evl->evl_tag)) != 0) {
@@ -443,13 +445,13 @@ ether_demux(
        */
        
        if (ntohs(ether_type) <= 1500) {
-               extProto1 = *(u_int32_t*)data;
+               bcopy(data, &extProto1, sizeof (u_int32_t));
                
                // SAP or SNAP
                if ((extProto1 & htonl(0xFFFFFF00)) == htonl(0xAAAA0300)) {
                        // SNAP
                        type = DLIL_DESC_SNAP;
-                       extProto2 = *(u_int32_t*)(data + sizeof(u_int32_t));
+                       bcopy(data + sizeof(u_int32_t), &extProto2, sizeof (u_int32_t));
                        extProto1 &= htonl(0x000000FF);
                } else {
                        type = DLIL_DESC_SAP;
@@ -504,11 +506,17 @@ ether_demux(
  */
 int
 ether_frameout(
-       struct ifnet                    *ifp,
-       struct mbuf                             **m,
-       const struct sockaddr   *ndest,
-       const char                              *edst,
-       const char                              *ether_type)
+                          struct ifnet                 *ifp,
+                          struct mbuf                          **m,
+                          const struct sockaddr        *ndest,
+                          const char                           *edst,
+                          const char                           *ether_type
+#if KPI_INTERFACE_EMBEDDED
+                          ,
+                          u_int32_t                            *prepend_len,
+                          u_int32_t                            *postpend_len
+#endif /* KPI_INTERFACE_EMBEDDED */
+                          )
 {
        struct ether_header *eh;
        int hlen;       /* link layer header length */
@@ -530,11 +538,11 @@ ether_frameout(
             if ((*m)->m_flags & M_BCAST) {
                 struct mbuf *n = m_copy(*m, 0, (int)M_COPYALL);
                 if (n != NULL)
-                    dlil_output(lo_ifp, ndest->sa_family, n, NULL, ndest, 0);
+                    dlil_output(lo_ifp, ndest->sa_family, n, NULL, ndest, 0, NULL);
             }
             else {
                                        if (_ether_cmp(edst, ifnet_lladdr(ifp)) == 0) {
-                    dlil_output(lo_ifp, ndest->sa_family, *m, NULL, ndest, 0);
+                    dlil_output(lo_ifp, ndest->sa_family, *m, NULL, ndest, 0, NULL);
                     return EJUSTRETURN;
                 }
             }
@@ -550,7 +558,11 @@ ether_frameout(
            return (EJUSTRETURN);
        }
 
-
+#if KPI_INTERFACE_EMBEDDED
+       *prepend_len = sizeof (struct ether_header);
+       *postpend_len = 0;
+#endif /* KPI_INTERFACE_EMBEDDED */
+       
        eh = mtod(*m, struct ether_header *);
        (void)memcpy(&eh->ether_type, ether_type,
                sizeof(eh->ether_type));
@@ -582,7 +594,8 @@ ether_check_multi(
                        break;
                
                case AF_LINK:
-                       e_addr = CONST_LLADDR((const struct sockaddr_dl*)proto_addr); 
+                       e_addr = CONST_LLADDR((const struct sockaddr_dl*)
+                           (uintptr_t)(size_t)proto_addr);
                        if ((e_addr[0] & 0x01) != 0x01)
                                result = EADDRNOTAVAIL;
                        else
index e8411dec64657da761378d4e04e720506fc9f86f..78a2b3f079138a56caa469891625981acc378711 100644 (file)
@@ -111,9 +111,12 @@ ether_inet6_input(ifnet_t ifp, protocol_family_t protocol,
     mbuf_t packet, char *header)
 {
 #pragma unused(ifp, protocol)
-       struct ether_header *eh = (struct ether_header *)header;
+       struct ether_header *eh = (struct ether_header *)(void *)header;
+       u_int16_t ether_type;
 
-       if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
+       bcopy(&eh->ether_type, &ether_type, sizeof (ether_type));
+
+       if (ether_type == htons(ETHERTYPE_IPV6)) {
                struct ifnet *mifp;
                /*
                 * Trust the ifp in the mbuf, rather than ifproto's
@@ -155,11 +158,13 @@ ether_inet6_pre_output(ifnet_t ifp, protocol_family_t protocol_family,
         */
        m->m_flags |= M_LOOP;
 
-       result = nd6_lookup_ipv6(ifp, (const struct sockaddr_in6 *)dst_netaddr,
-           &sdl, sizeof (sdl), route, *m0);
+       result = nd6_lookup_ipv6(ifp, (const struct sockaddr_in6 *)
+           (uintptr_t)(size_t)dst_netaddr, &sdl, sizeof (sdl), route, *m0);
 
        if (result == 0) {
-               *(u_int16_t *)type = htons(ETHERTYPE_IPV6);
+               u_int16_t ethertype_ipv6 = htons(ETHERTYPE_IPV6);
+
+               bcopy(&ethertype_ipv6, type, sizeof (ethertype_ipv6));
                bcopy(LLADDR(&sdl), edst, sdl.sdl_alen);
        }
 
@@ -173,7 +178,7 @@ ether_inet6_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
        static const size_t minsize =
            offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN;
        const struct sockaddr_in6 *sin6 =
-           (const struct sockaddr_in6 *)proto_addr;
+           (const struct sockaddr_in6 *)(uintptr_t)(size_t)proto_addr;
 
        if (proto_addr->sa_family != AF_INET6)
                return (EAFNOSUPPORT);
@@ -202,21 +207,28 @@ ether_inet6_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family,
     u_long command, void *data)
 {
 #pragma unused(protocol_family)
-       struct ifreq *ifr = (struct ifreq *)data;
        int error = 0;
 
        switch (command) {
-       case SIOCSIFADDR:
+       case SIOCSIFADDR:               /* struct ifaddr pointer */
+               /*
+                * Note: caller of ifnet_ioctl() passes in pointer to
+                * struct ifaddr as parameter to SIOCSIFADDR, for legacy
+                * reasons.
+                */
                if ((ifp->if_flags & IFF_RUNNING) == 0) {
                        ifnet_set_flags(ifp, IFF_UP, IFF_UP);
                        ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
                }
                break;
 
-       case SIOCGIFADDR:
+       case SIOCGIFADDR: {             /* struct ifreq */
+               struct ifreq *ifr = (struct ifreq *)(void *)data;
+
                (void) ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data,
                    ETHER_ADDR_LEN);
                break;
+       }
 
        default:
                error = EOPNOTSUPP;
index 12a8ead3cd8dd39822d797088c434997af1727b4..b8820a5a5505178bd4036bd602683663f8035726 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -109,7 +109,7 @@ extern void kdp_set_ip_and_mac_addresses(struct in_addr *ipaddr,
     struct ether_addr *macaddr);
 
 #define        _ip_copy(dst, src)      \
-       (*(dst) = *(src))
+       bcopy(src, dst, sizeof (struct in_addr))
 
 static void
 ether_inet_arp_input(struct ifnet *ifp, struct mbuf *m)
@@ -142,9 +142,9 @@ ether_inet_arp_input(struct ifnet *ifp, struct mbuf *m)
        bzero(&sender_ip, sizeof (sender_ip));
        sender_ip.sin_len = sizeof (sender_ip);
        sender_ip.sin_family = AF_INET;
-       _ip_copy(&sender_ip.sin_addr, (const struct in_addr *)ea->arp_spa);
+       _ip_copy(&sender_ip.sin_addr, ea->arp_spa);
        target_ip = sender_ip;
-       _ip_copy(&target_ip.sin_addr, (const struct in_addr *)ea->arp_tpa);
+       _ip_copy(&target_ip.sin_addr, ea->arp_tpa);
 
        bzero(&sender_hw, sizeof (sender_hw));
        sender_hw.sdl_len = sizeof (sender_hw);
@@ -247,21 +247,24 @@ ether_inet_pre_output(ifnet_t ifp, protocol_family_t protocol_family,
                struct sockaddr_dl ll_dest;
 
                result = arp_lookup_ip(ifp,
-                   (const struct sockaddr_in *)dst_netaddr, &ll_dest,
-                   sizeof (ll_dest), (route_t)route, *m0);
+                   (const struct sockaddr_in *)(uintptr_t)(size_t)dst_netaddr,
+                   &ll_dest, sizeof (ll_dest), (route_t)route, *m0);
                if (result == 0) {
+                       u_int16_t ethertype_ip = htons(ETHERTYPE_IP);
+
                        bcopy(LLADDR(&ll_dest), edst, ETHER_ADDR_LEN);
-                       *(u_int16_t *)type = htons(ETHERTYPE_IP);
+                       bcopy(&ethertype_ip, type, sizeof (ethertype_ip));
                }
-       break;
+               break;
        }
 
        case pseudo_AF_HDRCMPLT:
        case AF_UNSPEC:
                m->m_flags &= ~M_LOOP;
-               eh = (const struct ether_header *)dst_netaddr->sa_data;
+               eh = (const struct ether_header *)(uintptr_t)(size_t)
+                   dst_netaddr->sa_data;
                (void) memcpy(edst, eh->ether_dhost, 6);
-               *(u_short *)type = eh->ether_type;
+               bcopy(&eh->ether_type, type, sizeof (u_short));
                break;
 
        default:
@@ -281,7 +284,8 @@ ether_inet_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
 {
        static const size_t minsize =
            offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN;
-       const struct sockaddr_in *sin = (const struct sockaddr_in *)proto_addr;
+       const struct sockaddr_in *sin =
+           (const struct sockaddr_in *)(uintptr_t)(size_t)proto_addr;
 
        if (proto_addr->sa_family != AF_INET)
                return (EAFNOSUPPORT);
@@ -310,13 +314,18 @@ ether_inet_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family,
     u_long command, void *data)
 {
 #pragma unused(protocol_family)
-       ifaddr_t ifa = data;
-       struct ifreq *ifr = data;
        int error = 0;
 
        switch (command) {
-       case SIOCSIFADDR:
-       case SIOCAIFADDR:
+       case SIOCSIFADDR:               /* struct ifaddr pointer */
+       case SIOCAIFADDR: {             /* struct ifaddr pointer */
+               /*
+                * Note: caller of ifnet_ioctl() passes in pointer to
+                * struct ifaddr as parameter to SIOC{A,S}IFADDR, for
+                * legacy reasons.
+                */
+               struct ifaddr *ifa = data;
+
                if (!(ifnet_flags(ifp) & IFF_RUNNING)) {
                        ifnet_set_flags(ifp, IFF_UP, IFF_UP);
                        ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
@@ -326,6 +335,10 @@ ether_inet_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family,
                        break;
 
                inet_arp_init_ifaddr(ifp, ifa);
+
+               if (command != SIOCSIFADDR)
+                       break;
+
                /*
                 * Register new IP and MAC addresses with the kernel
                 * debugger if the interface is the same as was registered
@@ -334,18 +347,21 @@ ether_inet_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family,
                 * Do this only for the first address of the interface
                 * and not for aliases.
                 */
-               if (command == SIOCSIFADDR &&
-                   ((kdp_get_interface() != 0 &&
+               if ((kdp_get_interface() != 0 &&
                    kdp_get_interface() == ifp->if_softc) ||
-                   (kdp_get_interface() == 0 && ifp->if_unit == 0)))
+                   (kdp_get_interface() == 0 && ifp->if_unit == 0))
                        kdp_set_ip_and_mac_addresses(&(IA_SIN(ifa)->sin_addr),
                            ifnet_lladdr(ifp));
                break;
+       }
+
+       case SIOCGIFADDR: {             /* struct ifreq */
+               struct ifreq *ifr = data;
 
-       case SIOCGIFADDR:
                ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data,
                    ETHER_ADDR_LEN);
                break;
+       }
 
        default:
                error = EOPNOTSUPP;
@@ -390,9 +406,9 @@ ether_inet_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
        struct ether_header *eh;
        struct ether_arp *ea;
        const struct sockaddr_in *sender_ip =
-           (const struct sockaddr_in *)sender_proto;
-       const struct sockaddr_in *target_ip =
-           (const struct sockaddr_in *)target_proto;
+           (const struct sockaddr_in *)(uintptr_t)(size_t)sender_proto;
+       const struct sockaddr_inarp *target_ip =
+           (const struct sockaddr_inarp *)(uintptr_t)(size_t)target_proto;
        char *datap;
 
        if (target_ip == NULL)
@@ -459,8 +475,9 @@ ether_inet_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
                        IFA_LOCK(ifa);
                        if (ifa->ifa_addr != NULL &&
                            ifa->ifa_addr->sa_family == AF_INET) {
-                               bcopy(&((struct sockaddr_in *)ifa->ifa_addr)->
-                                   sin_addr, ea->arp_spa, sizeof(ea->arp_spa));
+                               bcopy(&((struct sockaddr_in *)(void *)
+                                   ifa->ifa_addr)->sin_addr, ea->arp_spa,
+                                   sizeof (ea->arp_spa));
                                IFA_UNLOCK(ifa);
                                break;
                        }
@@ -489,6 +506,23 @@ ether_inet_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
        /* Target IP */
        bcopy(&target_ip->sin_addr, ea->arp_tpa, sizeof (ea->arp_tpa));
 
+       /*
+        * If this is an ARP request for a (default) router, mark
+        * the packet accordingly so that the driver can find out,
+        * in case it needs to perform driver-specific action(s).
+        */
+       if (arpop == ARPOP_REQUEST && (target_ip->sin_other & SIN_ROUTER)) {
+               m->m_pkthdr.aux_flags |= MAUXF_INET_RESOLVE_RTR;
+               VERIFY(!(m->m_pkthdr.aux_flags & MAUXF_INET6_RESOLVE_RTR));
+       }
+
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               /* Use control service class if the interface 
+                * supports transmit-start model
+                */
+               (void) m_set_service_class(m, MBUF_SC_CTL);
+       }
+
        ifnet_output_raw(ifp, PF_INET, m);
 
        return (0);
diff --git a/bsd/net/flowadv.h b/bsd/net/flowadv.h
new file mode 100644 (file)
index 0000000..96e6e9e
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _NET_FLOWADV_H_
+#define        _NET_FLOWADV_H_
+
+#include <sys/types.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define        FADV_SUCCESS            0       /* success */
+#define        FADV_FLOW_CONTROLLED    1       /* regular flow control */
+#define        FADV_SUSPENDED          2       /* flow control due to suspension */
+
+struct flowadv {
+       int32_t         code;           /* FADV advisory code */
+};
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* _NET_FLOWADV_H_ */
diff --git a/bsd/net/flowhash.c b/bsd/net/flowhash.c
new file mode 100644 (file)
index 0000000..e634624
--- /dev/null
@@ -0,0 +1,825 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * http://code.google.com/p/smhasher/
+ *
+ * Copyright (c) 2009-2011 Austin Appleby.
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ */
+
+/*
+ * http://burtleburtle.net/bob/hash/
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * You can use this free for any purpose.  It's in the public domain.
+ * It has no warranty.
+ */
+
+#include <stdbool.h>
+#include <sys/types.h>
+#include <machine/endian.h>
+#include <net/flowhash.h>
+
+static inline u_int32_t getblock32(const u_int32_t *, int);
+static inline u_int64_t getblock64(const u_int64_t *, int);
+static inline u_int32_t mh3_fmix32(u_int32_t);
+static inline u_int64_t mh3_fmix64(u_int64_t);
+
+#define        ALIGNED16(v)    ((((uintptr_t)(v)) & 1) == 0)
+#define        ALIGNED32(v)    ((((uintptr_t)(v)) & 3) == 0)
+#define        ALIGNED64(v)    ((((uintptr_t)(v)) & 7) == 0)
+
+#define        ROTL32(x, r)    (((x) << (r)) | ((x) >> (32 - (r))))
+#define        ROTL64(x, r)    (((x) << (r)) | ((x) >> (64 - (r))))
+
+/*
+ * The following hash algorithms are selected based on performance:
+ *
+ * Intel 32-bit:       MurmurHash3_x86_32
+ * Intel 64-bit:       MurmurHash3_x64_128
+ * ARM, et al:         JHash
+ */
+#if defined(__i386__)
+net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x86_32;
+#elif defined(__x86_64__)
+net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
+#else /* !__i386__ && !__x86_64__ */
+net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
+#endif /* !__i386__ && !__x86_64__ */
+
+#if defined(__i386__) || defined(__x86_64__)
+static inline u_int32_t
+getblock32(const u_int32_t *p, int i)
+{
+       return (p[i]);
+}
+
+static inline u_int64_t
+getblock64(const u_int64_t *p, int i)
+{
+       return (p[i]);
+}
+#else /* !__i386__ && !__x86_64 */
+static inline u_int32_t
+getblock32(const u_int32_t *p, int i)
+{
+       const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
+       u_int32_t value;
+
+       if (ALIGNED32(p)) {
+               value = p[i];
+       } else {
+#if BYTE_ORDER == BIG_ENDIAN
+               value =
+                   (((u_int32_t)bytes[0]) << 24) |
+                   (((u_int32_t)bytes[1]) << 16) |
+                   (((u_int32_t)bytes[2]) << 8) |
+                   ((u_int32_t)bytes[3]);
+#else /* LITTLE_ENDIAN */
+               value =
+                   (((u_int32_t)bytes[3]) << 24) |
+                   (((u_int32_t)bytes[2]) << 16) |
+                   (((u_int32_t)bytes[1]) << 8) |
+                   ((u_int32_t)bytes[0]);
+#endif /* LITTLE_ENDIAN */
+       }
+       return (value);
+}
+
+static inline u_int64_t
+getblock64(const u_int64_t *p, int i)
+{
+       const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
+       u_int64_t value;
+
+       if (ALIGNED64(p)) {
+               value = p[i];
+       } else {
+#if BYTE_ORDER == BIG_ENDIAN
+               value =
+                   (((u_int64_t)bytes[0]) << 56) |
+                   (((u_int64_t)bytes[1]) << 48) |
+                   (((u_int64_t)bytes[2]) << 40) |
+                   (((u_int64_t)bytes[3]) << 32) |
+                   (((u_int64_t)bytes[4]) << 24) |
+                   (((u_int64_t)bytes[5]) << 16) |
+                   (((u_int64_t)bytes[6]) << 8) |
+                   ((u_int64_t)bytes[7]);
+#else /* LITTLE_ENDIAN */
+               value =
+                   (((u_int64_t)bytes[7]) << 56) |
+                   (((u_int64_t)bytes[6]) << 48) |
+                   (((u_int64_t)bytes[5]) << 40) |
+                   (((u_int64_t)bytes[4]) << 32) |
+                   (((u_int64_t)bytes[3]) << 24) |
+                   (((u_int64_t)bytes[2]) << 16) |
+                   (((u_int64_t)bytes[1]) << 8) |
+                   ((u_int64_t)bytes[0]);
+#endif /* LITTLE_ENDIAN */
+       }
+       return (value);
+}
+#endif /* !__i386__ && !__x86_64 */
+
+static inline u_int32_t
+mh3_fmix32(u_int32_t h)
+{
+       h ^= h >> 16;
+       h *= 0x85ebca6b;
+       h ^= h >> 13;
+       h *= 0xc2b2ae35;
+       h ^= h >> 16;
+
+       return (h);
+}
+
+static inline u_int64_t
+mh3_fmix64(u_int64_t k)
+{
+       k ^= k >> 33;
+       k *= 0xff51afd7ed558ccdLLU;
+       k ^= k >> 33;
+       k *= 0xc4ceb9fe1a85ec53LLU;
+       k ^= k >> 33;
+
+       return (k);
+}
+
+/*
+ * MurmurHash3_x86_32
+ */
+#define        MH3_X86_32_C1   0xcc9e2d51
+#define        MH3_X86_32_C2   0x1b873593
+
+u_int32_t
+net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed)
+{
+       const u_int8_t *data = (const u_int8_t *)key;
+       const u_int32_t nblocks = len / 4;
+       const u_int32_t *blocks;
+       const u_int8_t *tail;
+       u_int32_t h1 = seed, k1;
+       int i;
+
+       /* body */
+       blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
+
+       for (i = -nblocks; i; i++) {
+               k1 = getblock32(blocks, i);
+
+               k1 *= MH3_X86_32_C1;
+               k1 = ROTL32(k1, 15);
+               k1 *= MH3_X86_32_C2;
+
+               h1 ^= k1;
+               h1 = ROTL32(h1, 13);
+               h1 = h1 * 5 + 0xe6546b64;
+       }
+
+       /* tail */
+       tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
+       k1 = 0;
+
+       switch (len & 3) {
+       case 3:
+               k1 ^= tail[2] << 16;
+               /* FALLTHRU */
+       case 2:
+               k1 ^= tail[1] << 8;
+               /* FALLTHRU */
+       case 1:
+               k1 ^= tail[0];
+               k1 *= MH3_X86_32_C1;
+               k1 = ROTL32(k1, 15);
+               k1 *= MH3_X86_32_C2;
+               h1 ^= k1;
+       };
+
+       /* finalization */
+       h1 ^= len;
+
+       h1 = mh3_fmix32(h1);
+
+       return (h1);
+}
+
+/*
+ * MurmurHash3_x64_128
+ */
+#define        MH3_X64_128_C1  0x87c37b91114253d5LLU
+#define        MH3_X64_128_C2  0x4cf5ad432745937fLLU
+
+u_int32_t
+net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed)
+{
+       const u_int8_t *data = (const u_int8_t *)key;
+       const u_int32_t nblocks = len / 16;
+       const u_int64_t *blocks;
+       const u_int8_t *tail;
+       u_int64_t h1 = seed, k1;
+       u_int64_t h2 = seed, k2;
+       u_int32_t i;
+
+       /* body */
+       blocks = (const u_int64_t *)(const void *)data;
+
+       for (i = 0; i < nblocks; i++) {
+               k1 = getblock64(blocks, i * 2 + 0);
+               k2 = getblock64(blocks, i * 2 + 1);
+
+               k1 *= MH3_X64_128_C1;
+               k1 = ROTL64(k1, 31);
+               k1 *= MH3_X64_128_C2;
+               h1 ^= k1;
+
+               h1 = ROTL64(h1, 27);
+               h1 += h2;
+               h1 = h1 * 5 + 0x52dce729;
+
+               k2 *= MH3_X64_128_C2;
+               k2 = ROTL64(k2, 33);
+               k2 *= MH3_X64_128_C1;
+               h2 ^= k2;
+
+               h2 = ROTL64(h2, 31);
+               h2 += h1;
+               h2 = h2 * 5+ 0x38495ab5;
+       }
+
+       /* tail */
+       tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
+       k1 = 0;
+       k2 = 0;
+
+       switch (len & 15) {
+       case 15:
+               k2 ^= ((u_int64_t)tail[14]) << 48;
+               /* FALLTHRU */
+       case 14:
+               k2 ^= ((u_int64_t)tail[13]) << 40;
+               /* FALLTHRU */
+       case 13:
+               k2 ^= ((u_int64_t)tail[12]) << 32;
+               /* FALLTHRU */
+       case 12:
+               k2 ^= ((u_int64_t)tail[11]) << 24;
+               /* FALLTHRU */
+       case 11:
+               k2 ^= ((u_int64_t)tail[10]) << 16;
+               /* FALLTHRU */
+       case 10:
+               k2 ^= ((u_int64_t)tail[9]) << 8;
+               /* FALLTHRU */
+       case 9:
+               k2 ^= ((u_int64_t)tail[8]) << 0;
+               k2 *= MH3_X64_128_C2;
+               k2 = ROTL64(k2, 33);
+               k2 *= MH3_X64_128_C1;
+               h2 ^= k2;
+               /* FALLTHRU */
+       case 8:
+               k1 ^= ((u_int64_t)tail[7]) << 56;
+               /* FALLTHRU */
+       case 7:
+               k1 ^= ((u_int64_t)tail[6]) << 48;
+               /* FALLTHRU */
+       case 6:
+               k1 ^= ((u_int64_t)tail[5]) << 40;
+               /* FALLTHRU */
+       case 5:
+               k1 ^= ((u_int64_t)tail[4]) << 32;
+               /* FALLTHRU */
+       case 4:
+               k1 ^= ((u_int64_t)tail[3]) << 24;
+               /* FALLTHRU */
+       case 3:
+               k1 ^= ((u_int64_t)tail[2]) << 16;
+               /* FALLTHRU */
+       case 2:
+               k1 ^= ((u_int64_t)tail[1]) << 8;
+               /* FALLTHRU */
+       case 1:
+               k1 ^= ((u_int64_t)tail[0]) << 0;
+               k1 *= MH3_X64_128_C1;
+               k1 = ROTL64(k1, 31);
+               k1 *= MH3_X64_128_C2;
+               h1 ^= k1;
+       };
+
+       /* finalization */
+       h1 ^= len;
+       h2 ^= len;
+
+       h1 += h2;
+       h2 += h1;
+
+       h1 = mh3_fmix64(h1);
+       h2 = mh3_fmix64(h2);
+
+       h1 += h2;
+       h2 += h1;
+
+       /* throw all but lowest 32-bit */
+       return (h1 & 0xffffffff);
+}
+
+#define        JHASH_INIT      0xdeadbeef
+
+#define        JHASH_MIX(a, b, c) {                    \
+       a -= c;  a ^= ROTL32(c, 4);   c += b;   \
+       b -= a;  b ^= ROTL32(a, 6);   a += c;   \
+       c -= b;  c ^= ROTL32(b, 8);   b += a;   \
+       a -= c;  a ^= ROTL32(c, 16);  c += b;   \
+       b -= a;  b ^= ROTL32(a, 19);  a += c;   \
+       c -= b;  c ^= ROTL32(b, 4);   b += a;   \
+}
+
+#define        JHASH_FINAL(a, b, c) {                  \
+       c ^= b;  c -= ROTL32(b, 14);            \
+       a ^= c;  a -= ROTL32(c, 11);            \
+       b ^= a;  b -= ROTL32(a, 25);            \
+       c ^= b;  c -= ROTL32(b, 16);            \
+       a ^= c;  a -= ROTL32(c, 4);             \
+       b ^= a;  b -= ROTL32(a, 14);            \
+       c ^= b;  c -= ROTL32(b, 24);            \
+}
+
+#if BYTE_ORDER == BIG_ENDIAN
+/*
+ * hashbig()
+ */
+u_int32_t
+net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
+{
+       u_int32_t a, b, c;
+
+       /* Set up the internal state */
+       a = b = c = JHASH_INIT + len + seed;
+
+       if (ALIGNED32(key)) {
+               /* read 32-bit chunks */
+               const u_int32_t *k = (const u_int32_t *)key;
+
+               /*
+                * all but last block:
+                * aligned reads and affect 32 bits of (a,b,c)
+                */
+               while (len > 12) {
+                       a += k[0];
+                       b += k[1];
+                       c += k[2];
+                       JHASH_MIX(a, b, c);
+                       len -= 12;
+                       k += 3;
+               }
+
+               /*
+                * handle the last (probably partial) block
+                *
+                * "k[2] << 8" actually reads beyond the end of the string,
+                * but then shifts out the part it's not allowed to read.
+                * Because the string is aligned, the illegal read is in
+                * the same word as the rest of the string.  The masking
+                * trick does make the hash noticably faster for short
+                * strings (like English words).
+                */
+               switch (len) {
+               case 12:
+                       c += k[2];
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 11:
+                       c += k[2] & 0xffffff00;
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 10:
+                       c += k[2] & 0xffff0000;
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 9:
+                       c += k[2] & 0xff000000;
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 8:
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 7:
+                       b += k[1] & 0xffffff00;
+                       a += k[0];
+                       break;
+
+               case 6:
+                       b += k[1] & 0xffff0000;
+                       a += k[0];
+                       break;
+
+               case 5:
+                       b += k[1] & 0xff000000;
+                       a += k[0];
+                       break;
+
+               case 4:
+                       a += k[0];
+                       break;
+
+               case 3:
+                       a += k[0] & 0xffffff00;
+                       break;
+
+               case 2:
+                       a += k[0] & 0xffff0000;
+                       break;
+
+               case 1:
+                       a += k[0] & 0xff000000;
+                       break;
+
+               case 0:
+                       /* zero length requires no mixing */
+                       return (c);
+               }
+
+               JHASH_FINAL(a, b, c);
+
+               return (c);
+       }
+
+       /* need to read the key one byte at a time */
+       const u_int8_t *k = (const u_int8_t *)key;
+
+       /* all but the last block: affect some 32 bits of (a,b,c) */
+       while (len > 12) {
+               a += ((u_int32_t)k[0]) << 24;
+               a += ((u_int32_t)k[1]) << 16;
+               a += ((u_int32_t)k[2]) << 8;
+               a += ((u_int32_t)k[3]);
+               b += ((u_int32_t)k[4]) << 24;
+               b += ((u_int32_t)k[5]) << 16;
+               b += ((u_int32_t)k[6]) << 8;
+               b += ((u_int32_t)k[7]);
+               c += ((u_int32_t)k[8]) << 24;
+               c += ((u_int32_t)k[9]) << 16;
+               c += ((u_int32_t)k[10]) << 8;
+               c += ((u_int32_t)k[11]);
+               JHASH_MIX(a, b, c);
+               len -= 12;
+               k += 12;
+       }
+
+       /* last block: affect all 32 bits of (c) */
+       switch (len) {
+       case 12:
+               c += k[11];
+               /* FALLTHRU */
+       case 11:
+               c += ((u_int32_t)k[10]) << 8;
+               /* FALLTHRU */
+       case 10:
+               c += ((u_int32_t)k[9]) << 16;
+               /* FALLTHRU */
+       case 9:
+               c += ((u_int32_t)k[8]) << 24;
+               /* FALLTHRU */
+       case 8:
+               b += k[7];
+               /* FALLTHRU */
+       case 7:
+               b += ((u_int32_t)k[6]) << 8;
+               /* FALLTHRU */
+       case 6:
+               b += ((u_int32_t)k[5]) << 16;
+               /* FALLTHRU */
+       case 5:
+               b += ((u_int32_t)k[4]) << 24;
+               /* FALLTHRU */
+       case 4:
+               a += k[3];
+               /* FALLTHRU */
+       case 3:
+               a += ((u_int32_t)k[2]) << 8;
+               /* FALLTHRU */
+       case 2:
+               a += ((u_int32_t)k[1]) << 16;
+               /* FALLTHRU */
+       case 1:
+               a += ((u_int32_t)k[0]) << 24;
+               break;
+
+       case 0:
+               /* zero length requires no mixing */
+               return (c);
+       }
+
+       JHASH_FINAL(a, b, c);
+
+       return (c);
+}
+#else /* LITTLE_ENDIAN */
+/*
+ * hashlittle()
+ */
+u_int32_t
+net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
+{
+       u_int32_t a, b, c;
+
+       /* Set up the internal state */
+       a = b = c = JHASH_INIT + len + seed;
+
+#if defined(__i386__) || defined(__x86_64__)
+       /*
+        * On i386/x86_64, it is faster to read 32-bit chunks if the key
+        * is aligned 32-bit OR not 16-bit, and perform 16-bit reads if it
+        * is aligned 16-bit.
+        */
+       if (ALIGNED32(key) || !ALIGNED16(key)) {
+#else /* !defined(__i386__) && !defined(__x86_64__) */
+       if (ALIGNED32(key)) {
+#endif /* !defined(__i386__) && !defined(__x86_64__) */
+               /* read 32-bit chunks */
+               const u_int32_t *k = (const u_int32_t *)key;
+
+               /*
+                * all but last block:
+                * aligned reads and affect 32 bits of (a,b,c)
+                */
+               while (len > 12) {
+                       a += k[0];
+                       b += k[1];
+                       c += k[2];
+                       JHASH_MIX(a, b, c);
+                       len -= 12;
+                       k += 3;
+               }
+
+               /*
+                * handle the last (probably partial) block
+                *
+                * "k[2] & 0xffffff" actually reads beyond the end of the
+                * string, but then masks off the part it's not allowed
+                * to read.  Because the string is aligned, the masked-off
+                * tail is in the same word as the rest of the string.
+                * The masking trick does make the hash noticably faster
+                * for short strings (like English words).
+                */
+               switch (len) {
+               case 12:
+                       c += k[2];
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 11:
+                       c += k[2] & 0xffffff;
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 10:
+                       c += k[2] & 0xffff;
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 9:
+                       c += k[2] & 0xff;
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 8:
+                       b += k[1];
+                       a += k[0];
+                       break;
+
+               case 7:
+                       b += k[1] & 0xffffff;
+                       a += k[0];
+                       break;
+
+               case 6:
+                       b += k[1] & 0xffff;
+                       a += k[0];
+                       break;
+
+               case 5:
+                       b += k[1] & 0xff;
+                       a += k[0];
+                       break;
+
+               case 4:
+                       a += k[0];
+                       break;
+
+               case 3:
+                       a += k[0] & 0xffffff;
+                       break;
+
+               case 2:
+                       a += k[0] & 0xffff;
+                       break;
+
+               case 1:
+                       a += k[0] & 0xff;
+                       break;
+
+               case 0:
+                       /* zero length requires no mixing */
+                       return (c);
+               }
+
+               JHASH_FINAL(a, b, c);
+
+               return (c);
+       }
+#if !defined(__i386__) && !defined(__x86_64__)
+       else if (ALIGNED16(key)) {
+#endif /* !defined(__i386__) && !defined(__x86_64__) */
+               /* read 16-bit chunks */
+               const u_int16_t *k = (const u_int16_t *)key;
+               const u_int8_t *k8;
+
+               /* all but last block: aligned reads and different mixing */
+               while (len > 12) {
+                       a += k[0] + (((u_int32_t)k[1]) << 16);
+                       b += k[2] + (((u_int32_t)k[3]) << 16);
+                       c += k[4] + (((u_int32_t)k[5]) << 16);
+                       JHASH_MIX(a, b, c);
+                       len -= 12;
+                       k += 6;
+               }
+
+               /* handle the last (probably partial) block */
+               k8 = (const u_int8_t *)k;
+               switch (len) {
+               case 12:
+                       c += k[4] + (((u_int32_t)k[5]) << 16);
+                       b += k[2] + (((u_int32_t)k[3]) << 16);
+                       a += k[0] + (((u_int32_t)k[1]) << 16);
+                       break;
+
+               case 11:
+                       c += ((u_int32_t)k8[10]) << 16;
+                       /* FALLTHRU */
+               case 10:
+                       c += k[4];
+                       b += k[2] + (((u_int32_t)k[3]) << 16);
+                       a += k[0] + (((u_int32_t)k[1]) << 16);
+                       break;
+
+               case 9:
+                       c += k8[8];
+                       /* FALLTHRU */
+               case 8:
+                       b += k[2] + (((u_int32_t)k[3]) << 16);
+                       a += k[0] + (((u_int32_t)k[1]) << 16);
+                       break;
+
+               case 7:
+                       b += ((u_int32_t)k8[6]) << 16;
+                       /* FALLTHRU */
+               case 6:
+                       b += k[2];
+                       a += k[0] + (((u_int32_t)k[1]) << 16);
+                       break;
+
+               case 5:
+                       b += k8[4];
+                       /* FALLTHRU */
+               case 4:
+                       a += k[0] + (((u_int32_t)k[1]) << 16);
+                       break;
+
+               case 3:
+                       a += ((u_int32_t)k8[2]) << 16;
+                       /* FALLTHRU */
+               case 2:
+                       a += k[0];
+                       break;
+
+               case 1:
+                       a += k8[0];
+                       break;
+
+               case 0:
+                       /* zero length requires no mixing */
+                       return (c);
+               }
+
+               JHASH_FINAL(a, b, c);
+
+               return (c);
+#if !defined(__i386__) && !defined(__x86_64__)
+       }
+
+       /* need to read the key one byte at a time */
+       const u_int8_t *k = (const u_int8_t *)key;
+
+       /* all but the last block: affect some 32 bits of (a,b,c) */
+       while (len > 12) {
+               a += k[0];
+               a += ((u_int32_t)k[1]) << 8;
+               a += ((u_int32_t)k[2]) << 16;
+               a += ((u_int32_t)k[3]) << 24;
+               b += k[4];
+               b += ((u_int32_t)k[5]) << 8;
+               b += ((u_int32_t)k[6]) << 16;
+               b += ((u_int32_t)k[7]) << 24;
+               c += k[8];
+               c += ((u_int32_t)k[9]) << 8;
+               c += ((u_int32_t)k[10]) << 16;
+               c += ((u_int32_t)k[11]) << 24;
+               JHASH_MIX(a, b, c);
+               len -= 12;
+               k += 12;
+       }
+
+       /* last block: affect all 32 bits of (c) */
+       switch (len) {
+       case 12:
+               c += ((u_int32_t)k[11]) << 24;
+               /* FALLTHRU */
+       case 11:
+               c += ((u_int32_t)k[10]) << 16;
+               /* FALLTHRU */
+       case 10:
+               c += ((u_int32_t)k[9]) << 8;
+               /* FALLTHRU */
+       case 9:
+               c += k[8];
+               /* FALLTHRU */
+       case 8:
+               b += ((u_int32_t)k[7]) << 24;
+               /* FALLTHRU */
+       case 7:
+               b += ((u_int32_t)k[6]) << 16;
+               /* FALLTHRU */
+       case 6:
+               b += ((u_int32_t)k[5]) << 8;
+               /* FALLTHRU */
+       case 5:
+               b += k[4];
+               /* FALLTHRU */
+       case 4:
+               a += ((u_int32_t)k[3]) << 24;
+               /* FALLTHRU */
+       case 3:
+               a += ((u_int32_t)k[2]) << 16;
+               /* FALLTHRU */
+       case 2:
+               a += ((u_int32_t)k[1]) << 8;
+               /* FALLTHRU */
+       case 1:
+               a += k[0];
+               break;
+
+       case 0:
+               /* zero length requires no mixing */
+               return (c);
+       }
+
+       JHASH_FINAL(a, b, c);
+
+       return (c);
+#endif /* !defined(__i386__) && !defined(__x86_64__) */
+}
+#endif /* LITTLE_ENDIAN */
diff --git a/bsd/net/flowhash.h b/bsd/net/flowhash.h
new file mode 100644 (file)
index 0000000..dc7c3b5
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _NET_FLOWHASH_H_
+#define        _NET_FLOWHASH_H_
+
+#include <sys/types.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/*
+ * If 32-bit hash value is too large, use this macro to truncate
+ * it to n-bit; masking is a faster operation than modulus.
+ */
+#define        HASHMASK(n)     ((1UL << (n)) - 1)
+
+/*
+ * Returns 32-bit hash value.  Hashes which are capable of returning
+ * more bits currently have their results truncated to 32-bit.
+ */
+typedef u_int32_t net_flowhash_fn_t(const void *, u_int32_t, const u_int32_t);
+
+extern net_flowhash_fn_t *net_flowhash;
+extern net_flowhash_fn_t net_flowhash_mh3_x86_32;
+extern net_flowhash_fn_t net_flowhash_mh3_x64_128;
+extern net_flowhash_fn_t net_flowhash_jhash;
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* _NET_FLOWHASH_H_ */
index 595fcaea994c6467ab3ad9585a3a9b411135ba9c..1097ff570257765b22e5e52c7d96b40a5a27f6fc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 #if INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <security/mac_framework.h>
 #endif
 
+#if PF_ALTQ
+#include <net/altq/if_altq.h>
+#endif /* !PF_ALTQ */
 
 /*
  * System initialization
@@ -132,8 +140,9 @@ lck_attr_t  *ifa_mtx_attr;
 lck_grp_t      *ifa_mtx_grp;
 static lck_grp_attr_t  *ifa_mtx_grp_attr;
 
+static int ifioctl_ifreq(struct socket *, u_long, struct ifreq *,
+    struct proc *);
 static int ifconf(u_long cmd, user_addr_t ifrp, int * ret_space);
-static void if_qflush(struct ifqueue *);
 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
 void if_rtproto_del(struct ifnet *ifp, int protocol);
 
@@ -151,7 +160,6 @@ static int  if_clone_list(int count, int * total, user_addr_t dst);
 
 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
 
-int    ifqmaxlen = IFQ_MAXLEN;
 struct ifnethead ifnet_head = TAILQ_HEAD_INITIALIZER(ifnet_head);
 
 static int     if_cloners_count;
@@ -413,8 +421,8 @@ if_next_index(void)
                }
 
                /* switch to the new tables and size */
-               ifnet_addrs = (struct ifaddr **)new_ifnet_addrs;
-               ifindex2ifnet = (struct ifnet **)new_ifindex2ifnet;
+               ifnet_addrs = (struct ifaddr **)(void *)new_ifnet_addrs;
+               ifindex2ifnet = (struct ifnet **)(void *)new_ifindex2ifnet;
                if_indexlim = new_if_indexlim;
 
                /* release the old data */
@@ -951,7 +959,8 @@ ifa_ifwithnet_common(const struct sockaddr *addr, unsigned int ifscope)
         * so do that if we can.
         */
        if (af == AF_LINK) {
-               const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
+               const struct sockaddr_dl *sdl =
+                   (const struct sockaddr_dl *)(uintptr_t)(size_t)addr;
                if (sdl->sdl_index && sdl->sdl_index <= if_index) {
                        ifa = ifnet_addrs[sdl->sdl_index - 1];
                        if (ifa != NULL)
@@ -1220,6 +1229,7 @@ if_updown(
        int i;
        struct ifaddr **ifa;
        struct timespec tv;
+       struct ifclassq *ifq = &ifp->if_snd;
 
        /* Wait until no one else is changing the up/down state */
        while ((ifp->if_eflags & IFEF_UPDOWNCHANGE) != 0) {
@@ -1229,16 +1239,16 @@ if_updown(
                msleep(&ifp->if_eflags, NULL, 0, "if_updown", &tv);
                ifnet_lock_exclusive(ifp);
        }
-       
+
        /* Verify that the interface isn't already in the right state */
        if ((!up && (ifp->if_flags & IFF_UP) == 0) ||
                (up && (ifp->if_flags & IFF_UP) == IFF_UP)) {
                return;
        }
-       
+
        /* Indicate that the up/down state is changing */
        ifp->if_eflags |= IFEF_UPDOWNCHANGE;
-       
+
        /* Mark interface up or down */
        if (up) {
                ifp->if_flags |= IFF_UP;
@@ -1246,9 +1256,9 @@ if_updown(
        else {
                ifp->if_flags &= ~IFF_UP;
        }
-       
+
        ifnet_touch_lastchange(ifp);
-       
+
        /* Drop the lock to notify addresses and route */
        ifnet_lock_done(ifp);
        if (ifnet_get_address_list(ifp, &ifa) == 0) {
@@ -1258,15 +1268,19 @@ if_updown(
                ifnet_free_address_list(ifa);
        }
        rt_ifmsg(ifp);
-       
-       /* Aquire the lock to clear the changing flag and flush the send queue */
-       ifnet_lock_exclusive(ifp);
+
        if (!up)
-               if_qflush(&ifp->if_snd);
+               if_qflush(ifp, 0);
+
+       /* Inform all transmit queues about the new link state */
+       IFCQ_LOCK(ifq);
+       ifnet_update_sndq(ifq, up ? CLASSQ_EV_LINK_UP : CLASSQ_EV_LINK_DOWN);
+       IFCQ_UNLOCK(ifq);
+
+       /* Aquire the lock to clear the changing flag */
+       ifnet_lock_exclusive(ifp);
        ifp->if_eflags &= ~IFEF_UPDOWNCHANGE;
        wakeup(&ifp->if_eflags);
-       
-       return;
 }
 
 /*
@@ -1298,19 +1312,61 @@ if_up(
 /*
  * Flush an interface queue.
  */
-static void
-if_qflush(struct ifqueue *ifq)
+void
+if_qflush(struct ifnet *ifp, int ifq_locked)
 {
-       struct mbuf *m, *n;
+       struct ifclassq *ifq = &ifp->if_snd;
+
+       if (!ifq_locked)
+               IFCQ_LOCK(ifq);
+
+       if (IFCQ_IS_ENABLED(ifq))
+               IFCQ_PURGE(ifq);
+#if PF_ALTQ
+       if (IFCQ_IS_DRAINING(ifq))
+               ifq->ifcq_drain = 0;
+       if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+               ALTQ_PURGE(IFCQ_ALTQ(ifq));
+#endif /* PF_ALTQ */
 
-       n = ifq->ifq_head;
-       while ((m = n) != 0) {
-               n = m->m_act;
-               m_freem(m);
+       VERIFY(IFCQ_IS_EMPTY(ifq));
+
+       if (!ifq_locked)
+               IFCQ_UNLOCK(ifq);
+}
+
+void
+if_qflush_sc(struct ifnet *ifp, mbuf_svc_class_t sc, u_int32_t flow,
+    u_int32_t *packets, u_int32_t *bytes, int ifq_locked)
+{
+       struct ifclassq *ifq = &ifp->if_snd;
+       u_int32_t cnt = 0, len = 0;
+       u_int32_t a_cnt = 0, a_len = 0;
+
+       VERIFY(sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sc));
+       VERIFY(flow != 0);
+
+       if (!ifq_locked)
+               IFCQ_LOCK(ifq);
+
+       if (IFCQ_IS_ENABLED(ifq))
+               IFCQ_PURGE_SC(ifq, sc, flow, cnt, len);
+#if PF_ALTQ
+       if (IFCQ_IS_DRAINING(ifq)) {
+               VERIFY((signed)(ifq->ifcq_drain - cnt) >= 0);
+               ifq->ifcq_drain -= cnt;
        }
-       ifq->ifq_head = NULL;
-       ifq->ifq_tail = NULL;
-       ifq->ifq_len = 0;
+       if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
+               ALTQ_PURGE_SC(IFCQ_ALTQ(ifq), sc, flow, a_cnt, a_len);
+#endif /* PF_ALTQ */
+
+       if (!ifq_locked)
+               IFCQ_UNLOCK(ifq);
+
+       if (packets != NULL)
+               *packets = cnt + a_cnt;
+       if (bytes != NULL)
+               *bytes = len + a_len;
 }
 
 /*
@@ -1371,7 +1427,7 @@ struct ifnet *
 if_withname(struct sockaddr *sa)
 {
        char ifname[IFNAMSIZ+1];
-       struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
+       struct sockaddr_dl *sdl = (struct sockaddr_dl *)(void *)sa;
 
        if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
             (sdl->sdl_nlen > IFNAMSIZ) )
@@ -1396,69 +1452,466 @@ if_withname(struct sockaddr *sa)
 int
 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 {
-       struct ifnet *ifp;
-       struct ifreq *ifr;
-       struct ifstat *ifs;
+       char ifname[IFNAMSIZ + 1];
+       struct ifnet *ifp = NULL;
+       struct ifstat *ifs = NULL;
        int error = 0;
-       short oif_flags;
-       struct kev_msg        ev_msg;
-       struct net_event_data ev_data;
 
-       bzero(&ev_data, sizeof(struct net_event_data));
-       bzero(&ev_msg, sizeof(struct kev_msg));
+       bzero(ifname, sizeof (ifname));
+
+       /*
+        * ioctls which don't require ifp, or ifreq ioctls
+        */
        switch (cmd) {
-       case OSIOCGIFCONF32:
-       case SIOCGIFCONF32: {
-               struct ifconf32 *ifc = (struct ifconf32 *)data;
-               return (ifconf(cmd, CAST_USER_ADDR_T(ifc->ifc_req),
-                   &ifc->ifc_len));
-               /* NOTREACHED */
+       case OSIOCGIFCONF32:                    /* struct ifconf32 */
+       case SIOCGIFCONF32: {                   /* struct ifconf32 */
+               struct ifconf32 ifc;
+               bcopy(data, &ifc, sizeof (ifc));
+               error = ifconf(cmd, CAST_USER_ADDR_T(ifc.ifc_req),
+                   &ifc.ifc_len);
+               bcopy(&ifc, data, sizeof (ifc));
+               goto done;
        }
-       case SIOCGIFCONF64:
-       case OSIOCGIFCONF64: {
-               struct ifconf64 *ifc = (struct ifconf64 *)data;
-               return (ifconf(cmd, ifc->ifc_req, &ifc->ifc_len));
-               /* NOTREACHED */
+
+       case SIOCGIFCONF64:                     /* struct ifconf64 */
+       case OSIOCGIFCONF64: {                  /* struct ifconf64 */
+               struct ifconf64 ifc;
+               bcopy(data, &ifc, sizeof (ifc));
+               error = ifconf(cmd, ifc.ifc_req, &ifc.ifc_len);
+               bcopy(&ifc, data, sizeof (ifc));
+               goto done;
        }
+
+#if IF_CLONE_LIST
+       case SIOCIFGCLONERS32: {                /* struct if_clonereq32 */
+               struct if_clonereq32 ifcr;
+               bcopy(data, &ifcr, sizeof (ifcr));
+               error = if_clone_list(ifcr.ifcr_count, &ifcr.ifcr_total,
+                   CAST_USER_ADDR_T(ifcr.ifcru_buffer));
+               bcopy(&ifcr, data, sizeof (ifcr));
+               goto done;
+       }
+
+       case SIOCIFGCLONERS64: {                /* struct if_clonereq64 */
+               struct if_clonereq64 ifcr;
+               bcopy(data, &ifcr, sizeof (ifcr));
+               error = if_clone_list(ifcr.ifcr_count, &ifcr.ifcr_total,
+                   ifcr.ifcru_buffer);
+               bcopy(&ifcr, data, sizeof (ifcr));
+               goto done;
        }
-       ifr = (struct ifreq *)data;
+#endif /* IF_CLONE_LIST */
+
+       case SIOCSIFDSTADDR:                    /* struct ifreq */
+       case SIOCSIFADDR:                       /* struct ifreq */
+       case SIOCSIFBRDADDR:                    /* struct ifreq */
+       case SIOCSIFNETMASK:                    /* struct ifreq */
+       case OSIOCGIFADDR:                      /* struct ifreq */
+       case OSIOCGIFDSTADDR:                   /* struct ifreq */
+       case OSIOCGIFBRDADDR:                   /* struct ifreq */
+       case OSIOCGIFNETMASK:                   /* struct ifreq */
+       case SIOCSIFKPI:                        /* struct ifreq */
+               if (so->so_proto == NULL) {
+                       error = EOPNOTSUPP;
+                       goto done;
+               }
+               /* FALLTHRU */
+       case SIOCIFCREATE:                      /* struct ifreq */
+       case SIOCIFCREATE2:                     /* struct ifreq */
+       case SIOCIFDESTROY:                     /* struct ifreq */
+       case SIOCGIFFLAGS:                      /* struct ifreq */
+       case SIOCGIFEFLAGS:                     /* struct ifreq */
+       case SIOCGIFCAP:                        /* struct ifreq */
+       case SIOCGIFMAC:                        /* struct ifreq */
+       case SIOCGIFMETRIC:                     /* struct ifreq */
+       case SIOCGIFMTU:                        /* struct ifreq */
+       case SIOCGIFPHYS:                       /* struct ifreq */
+       case SIOCSIFFLAGS:                      /* struct ifreq */
+       case SIOCSIFCAP:                        /* struct ifreq */
+       case SIOCSIFPHYS:                       /* struct ifreq */
+       case SIOCSIFMTU:                        /* struct ifreq */
+       case SIOCADDMULTI:                      /* struct ifreq */
+       case SIOCDELMULTI:                      /* struct ifreq */
+       case SIOCDIFPHYADDR:                    /* struct ifreq */
+       case SIOCSIFMEDIA:                      /* struct ifreq */
+       case SIOCSIFGENERIC:                    /* struct ifreq */
+       case SIOCSIFLLADDR:                     /* struct ifreq */
+       case SIOCSIFALTMTU:                     /* struct ifreq */
+       case SIOCSIFVLAN:                       /* struct ifreq */
+       case SIOCSIFBOND:                       /* struct ifreq */
+       case SIOCGIFPSRCADDR:                   /* struct ifreq */
+       case SIOCGIFPDSTADDR:                   /* struct ifreq */
+       case SIOCGIFGENERIC:                    /* struct ifreq */
+       case SIOCGIFDEVMTU:                     /* struct ifreq */
+       case SIOCGIFVLAN:                       /* struct ifreq */
+       case SIOCGIFBOND:                       /* struct ifreq */
+       case SIOCGIFWAKEFLAGS:                  /* struct ifreq */
+       case SIOCGIFGETRTREFCNT:                /* struct ifreq */
+       case SIOCSIFOPPORTUNISTIC:              /* struct ifreq */
+       case SIOCGIFOPPORTUNISTIC:              /* struct ifreq */
+       case SIOCGIFLINKQUALITYMETRIC: {        /* struct ifreq */
+               struct ifreq ifr;
+               bcopy(data, &ifr, sizeof (ifr));
+               error = ifioctl_ifreq(so, cmd, &ifr, p);
+               bcopy(&ifr, data, sizeof (ifr));
+               goto done;
+       }
+       }
+
+       /*
+        * ioctls which require ifp.  Note that we acquire dlil_ifnet_lock
+        * here to ensure that the ifnet, if found, has been fully attached.
+        */
+       dlil_if_lock();
+       switch (cmd) {
+       case SIOCSIFPHYADDR: {                  /* struct ifaliasreq */
+               bcopy(((struct ifaliasreq *)(void *)data)->ifra_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+#if INET6
+       case SIOCSIFPHYADDR_IN6_32: {           /* struct in6_aliasreq_32 */
+               bcopy(((struct in6_aliasreq_32 *)(void *)data)->ifra_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       case SIOCSIFPHYADDR_IN6_64: {           /* struct in6_aliasreq_64 */
+               bcopy(((struct in6_aliasreq_64 *)(void *)data)->ifra_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+#endif
+
+       case SIOCSLIFPHYADDR:                   /* struct if_laddrreq */
+       case SIOCGLIFPHYADDR: {                 /* struct if_laddrreq */
+               bcopy(((struct if_laddrreq *)(void *)data)->iflr_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       case SIOCGIFSTATUS: {                   /* struct ifstat */
+               ifs = _MALLOC(sizeof (*ifs), M_DEVBUF, M_WAITOK);
+               if (ifs == NULL) {
+                       error = ENOMEM;
+                       dlil_if_unlock();
+                       goto done;
+               }
+               bcopy(data, ifs, sizeof (*ifs));
+               ifs->ifs_name[IFNAMSIZ - 1] = '\0';
+               ifp = ifunit(ifs->ifs_name);
+               break;
+       }
+
+       case SIOCGIFMEDIA32: {                  /* struct ifmediareq32 */
+               bcopy(((struct ifmediareq32 *)(void *)data)->ifm_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       case SIOCGIFMEDIA64: {                  /* struct ifmediareq64 */
+               bcopy(((struct ifmediareq64 *)(void *)data)->ifm_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       case SIOCSIFDESC:                       /* struct if_descreq */
+       case SIOCGIFDESC: {                     /* struct if_descreq */
+               bcopy(((struct if_descreq *)(void *)data)->ifdr_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       case SIOCSIFLINKPARAMS:                 /* struct if_linkparamsreq */
+       case SIOCGIFLINKPARAMS: {               /* struct if_linkparamsreq */
+               bcopy(((struct if_linkparamsreq *)(void *)data)->iflpr_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       case SIOCGIFQUEUESTATS: {               /* struct if_qstatsreq */
+               bcopy(((struct if_qstatsreq *)(void *)data)->ifqr_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       case SIOCSIFTHROTTLE:                   /* struct if_throttlereq */
+       case SIOCGIFTHROTTLE: {                 /* struct if_throttlereq */
+               bcopy(((struct if_throttlereq *)(void *)data)->ifthr_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+
+       default: {
+               /*
+                * This is a bad assumption, but the code seems to
+                * have been doing this in the past; caveat emptor.
+                */
+               bcopy(((struct ifreq *)(void *)data)->ifr_name,
+                   ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+       }
+       }
+       dlil_if_unlock();
+
+       if (ifp == NULL) {
+               error = ENXIO;
+               goto done;
+       }
+
+       switch (cmd) {
+       case SIOCSIFPHYADDR:                    /* struct ifaliasreq */
+#if INET6
+       case SIOCSIFPHYADDR_IN6_32:             /* struct in6_aliasreq_32 */
+       case SIOCSIFPHYADDR_IN6_64:             /* struct in6_aliasreq_64 */
+#endif
+       case SIOCSLIFPHYADDR:                   /* struct if_laddrreq */
+               error = proc_suser(p);
+               if (error != 0)
+                       break;
+
+               error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+                   cmd, data);
+               if (error != 0)
+                       break;
+
+               ifnet_touch_lastchange(ifp);
+               break;
+
+       case SIOCGIFSTATUS:                     /* struct ifstat */
+               VERIFY(ifs != NULL);
+               ifs->ascii[0] = '\0';
+
+               error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+                   cmd, (caddr_t)ifs);
+
+               bcopy(ifs, data, sizeof (*ifs));
+               break;
+
+       case SIOCGLIFPHYADDR:                   /* struct if_laddrreq */
+       case SIOCGIFMEDIA32:                    /* struct ifmediareq32 */
+       case SIOCGIFMEDIA64:                    /* struct ifmediareq64 */
+               error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+                   cmd, data);
+               break;
+
+       case SIOCSIFDESC: {                     /* struct if_descreq */
+               struct if_descreq *ifdr = (struct if_descreq *)(void *)data;
+               u_int32_t ifdr_len;
+
+               if ((error = proc_suser(p)) != 0)
+                        break;
+
+               ifnet_lock_exclusive(ifp);
+               bcopy(&ifdr->ifdr_len, &ifdr_len, sizeof (ifdr_len));
+               if (ifdr_len > sizeof (ifdr->ifdr_desc) ||
+                   ifdr_len > ifp->if_desc.ifd_maxlen) {
+                       error = EINVAL;
+                       ifnet_lock_done(ifp);
+                       break;
+               }
+
+               bzero(ifp->if_desc.ifd_desc, ifp->if_desc.ifd_maxlen);
+               if ((ifp->if_desc.ifd_len = ifdr_len) > 0) {
+                       bcopy(ifdr->ifdr_desc, ifp->if_desc.ifd_desc,
+                           MIN(ifdr_len, ifp->if_desc.ifd_maxlen));
+               }
+               ifnet_lock_done(ifp);
+               break;
+       }
+
+       case SIOCGIFDESC: {                     /* struct if_descreq */
+               struct if_descreq *ifdr = (struct if_descreq *)(void *)data;
+               u_int32_t ifdr_len;
+
+               ifnet_lock_shared(ifp);
+               ifdr_len = MIN(ifp->if_desc.ifd_len, sizeof (ifdr->ifdr_desc));
+               bcopy(&ifdr_len, &ifdr->ifdr_len, sizeof (ifdr_len));
+               bzero(&ifdr->ifdr_desc, sizeof (ifdr->ifdr_desc));
+               if (ifdr_len > 0) {
+                       bcopy(ifp->if_desc.ifd_desc, ifdr->ifdr_desc, ifdr_len);
+               }
+               ifnet_lock_done(ifp);
+               break;
+       }
+
+       case SIOCSIFLINKPARAMS: {               /* struct if_linkparamsreq */
+               struct if_linkparamsreq *iflpr =
+                   (struct if_linkparamsreq *)(void *)data;
+               struct ifclassq *ifq = &ifp->if_snd;
+               struct tb_profile tb = { 0, 0, 0 };
+
+               if ((error = proc_suser(p)) != 0)
+                        break;
+
+               IFCQ_LOCK(ifq);
+               if (!IFCQ_IS_READY(ifq)) {
+                       error = ENXIO;
+                       IFCQ_UNLOCK(ifq);
+                       break;
+               }
+               bcopy(&iflpr->iflpr_output_tbr_rate, &tb.rate,
+                   sizeof (tb.rate));
+               bcopy(&iflpr->iflpr_output_tbr_percent, &tb.percent,
+                   sizeof (tb.percent));
+               error = ifclassq_tbr_set(ifq, &tb, TRUE);
+               IFCQ_UNLOCK(ifq);
+               break;
+       }
+
+       case SIOCGIFLINKPARAMS: {               /* struct if_linkparamsreq */
+               struct if_linkparamsreq *iflpr =
+                   (struct if_linkparamsreq *)(void *)data;
+               struct ifclassq *ifq = &ifp->if_snd;
+               u_int32_t sched_type = PKTSCHEDT_NONE, flags = 0;
+               u_int64_t tbr_bw = 0, tbr_pct = 0;
+
+               IFCQ_LOCK(ifq);
+#if PF_ALTQ
+               if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) {
+                       sched_type = IFCQ_ALTQ(ifq)->altq_type;
+                       flags |= IFLPRF_ALTQ;
+               } else
+#endif /* PF_ALTQ */
+               {
+                       if (IFCQ_IS_ENABLED(ifq))
+                               sched_type = ifq->ifcq_type;
+               }
+               bcopy(&sched_type, &iflpr->iflpr_output_sched,
+                   sizeof (iflpr->iflpr_output_sched));
+
+               if (IFCQ_TBR_IS_ENABLED(ifq)) {
+                       tbr_bw = ifq->ifcq_tbr.tbr_rate_raw;
+                       tbr_pct = ifq->ifcq_tbr.tbr_percent;
+               }
+               bcopy(&tbr_bw, &iflpr->iflpr_output_tbr_rate,
+                   sizeof (iflpr->iflpr_output_tbr_rate));
+               bcopy(&tbr_pct, &iflpr->iflpr_output_tbr_percent,
+                   sizeof (iflpr->iflpr_output_tbr_percent));
+               IFCQ_UNLOCK(ifq);
+
+               if (ifp->if_output_sched_model ==
+                   IFNET_SCHED_MODEL_DRIVER_MANAGED)
+                       flags |= IFLPRF_DRVMANAGED;
+               bcopy(&flags, &iflpr->iflpr_flags, sizeof (iflpr->iflpr_flags));
+               bcopy(&ifp->if_output_bw, &iflpr->iflpr_output_bw,
+                   sizeof (iflpr->iflpr_output_bw));
+               bcopy(&ifp->if_input_bw, &iflpr->iflpr_input_bw,
+                   sizeof (iflpr->iflpr_input_bw));
+               break;
+       }
+
+       case SIOCGIFQUEUESTATS: {               /* struct if_qstatsreq */
+               struct if_qstatsreq *ifqr = (struct if_qstatsreq *)(void *)data;
+               u_int32_t ifqr_len, ifqr_slot;
+
+               bcopy(&ifqr->ifqr_slot, &ifqr_slot, sizeof (ifqr_slot));
+               bcopy(&ifqr->ifqr_len, &ifqr_len, sizeof (ifqr_len));
+               error = ifclassq_getqstats(&ifp->if_snd, ifqr_slot,
+                   ifqr->ifqr_buf, &ifqr_len);
+               if (error != 0)
+                       ifqr_len = 0;
+               bcopy(&ifqr_len, &ifqr->ifqr_len, sizeof (ifqr_len));
+               break;
+       }
+
+       case SIOCSIFTHROTTLE: {                 /* struct if_throttlereq */
+               struct if_throttlereq *ifthr =
+                   (struct if_throttlereq *)(void *)data;
+               u_int32_t ifthr_level;
+
+               /*
+                * XXX: Use priv_check_cred() instead of root check?
+                */
+               if ((error = proc_suser(p)) != 0)
+                        break;
+
+               bcopy(&ifthr->ifthr_level, &ifthr_level, sizeof (ifthr_level));
+               error = ifnet_set_throttle(ifp, ifthr_level);
+               if (error == EALREADY)
+                       error = 0;
+               break;
+       }
+
+       case SIOCGIFTHROTTLE: {                 /* struct if_throttlereq */
+               struct if_throttlereq *ifthr =
+                   (struct if_throttlereq *)(void *)data;
+               u_int32_t ifthr_level;
+
+               if ((error = ifnet_get_throttle(ifp, &ifthr_level)) == 0) {
+                       bcopy(&ifthr_level, &ifthr->ifthr_level,
+                           sizeof (ifthr_level));
+               }
+               break;
+       }
+
+       default:
+               if (so->so_proto == NULL) {
+                       error = EOPNOTSUPP;
+                       break;
+               }
+
+               socket_lock(so, 1);
+               error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
+                   data, ifp, p));
+               socket_unlock(so, 1);
+
+               if (error == EOPNOTSUPP || error == ENOTSUP) {
+                       error = ifnet_ioctl(ifp,
+                           so->so_proto->pr_domain->dom_family, cmd, data);
+               }
+               break;
+       }
+
+done:
+       if (ifs != NULL)
+               _FREE(ifs, M_DEVBUF);
+
+       return (error);
+}
+
+static int
+ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p)
+{
+       struct ifnet *ifp;
+       u_long ocmd = cmd;
+       int error = 0;
+       struct kev_msg ev_msg;
+       struct net_event_data ev_data;
+
+       bzero(&ev_data, sizeof (struct net_event_data));
+       bzero(&ev_msg, sizeof (struct kev_msg));
+
+       ifr->ifr_name[IFNAMSIZ - 1] = '\0';
+
        switch (cmd) {
        case SIOCIFCREATE:
        case SIOCIFCREATE2:
                 error = proc_suser(p);
                 if (error)
                         return (error);
-                return if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
-                        cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL);
+                return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
+                   cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
        case SIOCIFDESTROY:
                error = proc_suser(p);
                if (error)
                        return (error);
-               return if_clone_destroy(ifr->ifr_name);
-#if IF_CLONE_LIST
-       case SIOCIFGCLONERS32: {
-               struct if_clonereq32 *ifcr = (struct if_clonereq32 *)data;
-               return (if_clone_list(ifcr->ifcr_count, &ifcr->ifcr_total,
-                   CAST_USER_ADDR_T(ifcr->ifcru_buffer)));
-               /* NOTREACHED */
-
-       }
-       case SIOCIFGCLONERS64: {
-               struct if_clonereq64 *ifcr = (struct if_clonereq64 *)data;
-               return (if_clone_list(ifcr->ifcr_count, &ifcr->ifcr_total,
-                   ifcr->ifcru_buffer));
-               /* NOTREACHED */
-           }
-#endif /* IF_CLONE_LIST */
+               return (if_clone_destroy(ifr->ifr_name));
        }
 
-       /*
-        * ioctls which require ifp.  Note that we acquire dlil_ifnet_lock
-        * here to ensure that the ifnet, if found, has been fully attached.
-        */
-       dlil_if_lock();
        ifp = ifunit(ifr->ifr_name);
-       dlil_if_unlock();
        if (ifp == NULL)
                return (ENXIO);
 
@@ -1469,6 +1922,12 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                ifnet_lock_done(ifp);
                break;
 
+       case SIOCGIFEFLAGS:
+               ifnet_lock_shared(ifp);
+               ifr->ifr_eflags = ifp->if_eflags;
+               ifnet_lock_done(ifp);
+               break;
+
        case SIOCGIFCAP:
                ifnet_lock_shared(ifp);
                ifr->ifr_reqcap = ifp->if_capabilities;
@@ -1499,6 +1958,24 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                ifnet_lock_done(ifp);
                break;
 
+       case SIOCGIFWAKEFLAGS:
+               ifnet_lock_shared(ifp);
+               ifr->ifr_wake_flags = ifnet_get_wake_flags(ifp);
+               ifnet_lock_done(ifp);
+               break;
+
+       case SIOCGIFGETRTREFCNT:
+               ifnet_lock_shared(ifp);
+               ifr->ifr_route_refcnt = ifp->if_route_refcnt;
+               ifnet_lock_done(ifp);
+               break;
+
+       case SIOCGIFLINKQUALITYMETRIC:
+               ifnet_lock_shared(ifp);
+               ifr->ifr_link_quality_metric = ifp->if_lqm;
+               ifnet_lock_done(ifp);
+               break;
+
        case SIOCSIFFLAGS:
                error = proc_suser(p);
                if (error != 0)
@@ -1512,7 +1989,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                 * for the SIOCSIFFLAGS case.
                 */
                (void) ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-                   cmd, data);
+                   cmd, (caddr_t)ifr);
 
                /*
                 * Send the event even upon error from the driver because
@@ -1544,7 +2021,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                        break;
                }
                error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-                   cmd, data);
+                   cmd, (caddr_t)ifr);
 
                ifnet_touch_lastchange(ifp);
                break;
@@ -1584,7 +2061,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                        break;
 
                error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-                   cmd, data);
+                   cmd, (caddr_t)ifr);
                if (error != 0)
                        break;
 
@@ -1604,9 +2081,9 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                ifnet_touch_lastchange(ifp);
                break;
 
-       case SIOCSIFMTU:
-       {
+       case SIOCSIFMTU: {
                u_int32_t oldmtu = ifp->if_mtu;
+               struct ifclassq *ifq = &ifp->if_snd;
 
                error = proc_suser(p);
                if (error != 0)
@@ -1621,7 +2098,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                        break;
                }
                error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-                   cmd, data);
+                   cmd, (caddr_t)ifr);
                if (error != 0)
                        break;
 
@@ -1651,6 +2128,10 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 #if INET6
                        nd6_setmtu(ifp);
 #endif
+                       /* Inform all transmit queues about the new MTU */
+                       IFCQ_LOCK(ifq);
+                       ifnet_update_sndq(ifq, CLASSQ_EV_LINK_MTU);
+                       IFCQ_UNLOCK(ifq);
                }
                break;
        }
@@ -1710,13 +2191,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                ifnet_touch_lastchange(ifp);
                break;
 
-       case SIOCSIFPHYADDR:
        case SIOCDIFPHYADDR:
-#if INET6
-       case SIOCSIFPHYADDR_IN6_32:
-       case SIOCSIFPHYADDR_IN6_64:
-#endif
-       case SIOCSLIFPHYADDR:
        case SIOCSIFMEDIA:
        case SIOCSIFGENERIC:
        case SIOCSIFLLADDR:
@@ -1728,60 +2203,41 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                        break;
 
                error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-                   cmd, data);
+                   cmd, (caddr_t)ifr);
                if (error != 0)
                        break;
 
                ifnet_touch_lastchange(ifp);
                break;
 
-       case SIOCGIFSTATUS:
-               ifs = (struct ifstat *)data;
-               ifs->ascii[0] = '\0';
-
        case SIOCGIFPSRCADDR:
        case SIOCGIFPDSTADDR:
-       case SIOCGLIFPHYADDR:
-       case SIOCGIFMEDIA32:
-       case SIOCGIFMEDIA64:
        case SIOCGIFGENERIC:
        case SIOCGIFDEVMTU:
-               error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-                   cmd, data);
-               break;
-
        case SIOCGIFVLAN:
        case SIOCGIFBOND:
                error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-                   cmd, data);
+                   cmd, (caddr_t)ifr);
                break;
 
-       case SIOCGIFWAKEFLAGS:
-               ifnet_lock_shared(ifp);
-               ifr->ifr_wake_flags = ifnet_get_wake_flags(ifp);
-               ifnet_lock_done(ifp);
+       case SIOCSIFOPPORTUNISTIC:
+       case SIOCGIFOPPORTUNISTIC:
+               error = ifnet_getset_opportunistic(ifp, cmd, ifr, p);
                break;
 
-       case SIOCGIFGETRTREFCNT:
-               ifnet_lock_shared(ifp);
-               ifr->ifr_route_refcnt = ifp->if_route_refcnt;
-               ifnet_lock_done(ifp);
-               break;
-
-       default:
-               oif_flags = ifp->if_flags;
-               if (so->so_proto == NULL) {
-                       error = EOPNOTSUPP;
-                       break;
-               }
-           {
-               u_long ocmd = cmd;
-
-               switch (cmd) {
-               case SIOCSIFDSTADDR:
-               case SIOCSIFADDR:
-               case SIOCSIFBRDADDR:
-               case SIOCSIFNETMASK:
+       case SIOCSIFDSTADDR:
+       case SIOCSIFADDR:
+       case SIOCSIFBRDADDR:
+       case SIOCSIFNETMASK:
+       case OSIOCGIFADDR:
+       case OSIOCGIFDSTADDR:
+       case OSIOCGIFBRDADDR:
+       case OSIOCGIFNETMASK:
+       case SIOCSIFKPI:
+               VERIFY(so->so_proto != NULL);
+
+               if (cmd == SIOCSIFDSTADDR || cmd == SIOCSIFADDR ||
+                   cmd == SIOCSIFBRDADDR || cmd == SIOCSIFNETMASK) {
 #if BYTE_ORDER != BIG_ENDIAN
                        if (ifr->ifr_addr.sa_family == 0 &&
                            ifr->ifr_addr.sa_len < 16) {
@@ -1792,27 +2248,19 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                        if (ifr->ifr_addr.sa_len == 0)
                                ifr->ifr_addr.sa_len = 16;
 #endif
-                       break;
-
-               case OSIOCGIFADDR:
-                       cmd = SIOCGIFADDR;
-                       break;
-
-               case OSIOCGIFDSTADDR:
-                       cmd = SIOCGIFDSTADDR;
-                       break;
-
-               case OSIOCGIFBRDADDR:
-                       cmd = SIOCGIFBRDADDR;
-                       break;
-
-               case OSIOCGIFNETMASK:
-                       cmd = SIOCGIFNETMASK;
+               } else if (cmd == OSIOCGIFADDR) {
+                       cmd = SIOCGIFADDR;      /* struct ifreq */
+               } else if (cmd == OSIOCGIFDSTADDR) {
+                       cmd = SIOCGIFDSTADDR;   /* struct ifreq */
+               } else if (cmd == OSIOCGIFBRDADDR) {
+                       cmd = SIOCGIFBRDADDR;   /* struct ifreq */
+               } else if (cmd == OSIOCGIFNETMASK) {
+                       cmd = SIOCGIFNETMASK;   /* struct ifreq */
                }
 
                socket_lock(so, 1);
                error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
-                   data, ifp, p));
+                   (caddr_t)ifr, ifp, p));
                socket_unlock(so, 1);
 
                switch (ocmd) {
@@ -1820,22 +2268,28 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                case OSIOCGIFDSTADDR:
                case OSIOCGIFBRDADDR:
                case OSIOCGIFNETMASK:
-                       *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
-
+                       bcopy(&ifr->ifr_addr.sa_family, &ifr->ifr_addr,
+                           sizeof (u_short));
                }
-           }
+
                if (cmd == SIOCSIFKPI) {
                        int temperr = proc_suser(p);
                        if (temperr != 0)
                                error = temperr;
                }
 
-               if (error == EOPNOTSUPP || error == ENOTSUP)
+               if (error == EOPNOTSUPP || error == ENOTSUP) {
                        error = ifnet_ioctl(ifp,
-                           so->so_proto->pr_domain->dom_family, cmd, data);
-
+                           so->so_proto->pr_domain->dom_family, cmd,
+                           (caddr_t)ifr);
+               }
                break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
        }
+
        return (error);
 }
 
@@ -1959,7 +2413,7 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space)
                        addrs++;
                        if (cmd == OSIOCGIFCONF32 || cmd == OSIOCGIFCONF64) {
                                struct osockaddr *osa =
-                                        (struct osockaddr *)&ifr.ifr_addr;
+                                   (struct osockaddr *)(void *)&ifr.ifr_addr;
                                ifr.ifr_addr = *sa;
                                osa->sa_family = sa->sa_family;
                                error = copyout((caddr_t)&ifr, ifrp,
@@ -2018,7 +2472,7 @@ if_allmulti(struct ifnet *ifp, int onswitch)
 {
        int error = 0;
        int     modified = 0;
-       
+
        ifnet_lock_exclusive(ifp);
 
        if (onswitch) {
@@ -2036,7 +2490,7 @@ if_allmulti(struct ifnet *ifp, int onswitch)
                }
        }
        ifnet_lock_done(ifp);
-       
+
        if (modified)
                error = ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
 
@@ -2345,56 +2799,58 @@ if_addmulti_doesexist(struct ifnet *ifp, const struct sockaddr *sa,
  * Radar 3642395, make sure all multicasts are in a standard format.
  */
 static struct sockaddr*
-copy_and_normalize(
-       const struct sockaddr   *original)
+copy_and_normalize(const struct sockaddr *original)
 {
-       int                                     alen = 0;
+       int                     alen = 0;
        const u_char            *aptr = NULL;
        struct sockaddr         *copy = NULL;
        struct sockaddr_dl      *sdl_new = NULL;
-       int                                     len = 0;
-       
+       int                     len = 0;
+
        if (original->sa_family != AF_LINK &&
-               original->sa_family != AF_UNSPEC) {
+           original->sa_family != AF_UNSPEC) {
                /* Just make a copy */
-               MALLOC(copy, struct sockaddr*, original->sa_len, M_IFADDR, M_WAITOK);
+               MALLOC(copy, struct sockaddr*, original->sa_len,
+                   M_IFADDR, M_WAITOK);
                if (copy != NULL)
                        bcopy(original, copy, original->sa_len);
-               return copy;
+               return (copy);
        }
-       
+
        switch (original->sa_family) {
                case AF_LINK: {
-                       const struct sockaddr_dl        *sdl_original =
-                                                                                       (const struct sockaddr_dl*)original;
-                       
-                       if (sdl_original->sdl_nlen + sdl_original->sdl_alen + sdl_original->sdl_slen +
-                               offsetof(struct sockaddr_dl, sdl_data) > sdl_original->sdl_len)
-                               return NULL;
-                       
+                       const struct sockaddr_dl *sdl_original =
+                           (struct sockaddr_dl*)(uintptr_t)(size_t)original;
+
+                       if (sdl_original->sdl_nlen + sdl_original->sdl_alen +
+                           sdl_original->sdl_slen +
+                           offsetof(struct sockaddr_dl, sdl_data) >
+                           sdl_original->sdl_len)
+                               return (NULL);
+
                        alen = sdl_original->sdl_alen;
                        aptr = CONST_LLADDR(sdl_original);
                }
                break;
-               
+
                case AF_UNSPEC: {
                        if (original->sa_len < ETHER_ADDR_LEN +
-                               offsetof(struct sockaddr, sa_data)) {
-                               return NULL;
+                           offsetof(struct sockaddr, sa_data)) {
+                               return (NULL);
                        }
-                       
+
                        alen = ETHER_ADDR_LEN;
                        aptr = (const u_char*)original->sa_data;
                }
                break;
        }
-       
+
        if (alen == 0 || aptr == NULL)
-               return NULL;
-       
+               return (NULL);
+
        len = alen + offsetof(struct sockaddr_dl, sdl_data);
        MALLOC(sdl_new, struct sockaddr_dl*, len, M_IFADDR, M_WAITOK);
-       
+
        if (sdl_new != NULL) {
                bzero(sdl_new, len);
                sdl_new->sdl_len = len;
@@ -2402,8 +2858,8 @@ copy_and_normalize(
                sdl_new->sdl_alen = alen;
                bcopy(aptr, LLADDR(sdl_new), alen);
        }
-       
-       return (struct sockaddr*)sdl_new;
+
+       return ((struct sockaddr*)sdl_new);
 }
 
 /*
@@ -2888,9 +3344,9 @@ if_data_internal_to_if_data(struct ifnet *ifp,
 #define COPYFIELD(fld)         if_data->fld = if_data_int->fld
 #define COPYFIELD32(fld)       if_data->fld = (u_int32_t)(if_data_int->fld)
 /* compiler will cast down to 32-bit */
-#define        COPYFIELD32_ATOMIC(fld) do {                                            \
-       atomic_get_64(if_data->fld,                                             \
-           (u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);                 \
+#define        COPYFIELD32_ATOMIC(fld) do {                                    \
+       atomic_get_64(if_data->fld,                                     \
+           (u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);         \
 } while (0)
 
        COPYFIELD(ifi_type);
@@ -2923,7 +3379,7 @@ if_data_internal_to_if_data(struct ifnet *ifp,
 
        COPYFIELD(ifi_recvtiming);
        COPYFIELD(ifi_xmittiming);
-       
+
        if_data->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec;
        if_data->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec;
 
@@ -2947,9 +3403,9 @@ if_data_internal_to_if_data64(struct ifnet *ifp,
 {
 #pragma unused(ifp)
 #define COPYFIELD64(fld)       if_data64->fld = if_data_int->fld
-#define COPYFIELD64_ATOMIC(fld) do {                                           \
-       atomic_get_64(if_data64->fld,                                           \
-       (u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);                     \
+#define COPYFIELD64_ATOMIC(fld) do {                                   \
+       atomic_get_64(if_data64->fld,                                   \
+           (u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);         \
 } while (0)
 
        COPYFIELD64(ifi_type);
@@ -2996,11 +3452,16 @@ __private_extern__ void
 if_copy_traffic_class(struct ifnet *ifp,
     struct if_traffic_class *if_tc)
 {
-#define COPY_IF_TC_FIELD64_ATOMIC(fld) do {                            \
-       atomic_get_64(if_tc->fld,                                                       \
-       (u_int64_t *)(void *)(uintptr_t)&ifp->if_tc.fld);       \
+#define COPY_IF_TC_FIELD64_ATOMIC(fld) do {                    \
+       atomic_get_64(if_tc->fld,                               \
+           (u_int64_t *)(void *)(uintptr_t)&ifp->if_tc.fld);   \
 } while (0)
 
+       bzero(if_tc, sizeof (*if_tc));
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_ibepackets);
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_ibebytes);
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_obepackets);
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_obebytes);
        COPY_IF_TC_FIELD64_ATOMIC(ifi_ibkpackets);
        COPY_IF_TC_FIELD64_ATOMIC(ifi_ibkbytes);
        COPY_IF_TC_FIELD64_ATOMIC(ifi_obkpackets);
@@ -3013,10 +3474,83 @@ if_copy_traffic_class(struct ifnet *ifp,
        COPY_IF_TC_FIELD64_ATOMIC(ifi_ivobytes);
        COPY_IF_TC_FIELD64_ATOMIC(ifi_ovopackets);
        COPY_IF_TC_FIELD64_ATOMIC(ifi_ovobytes);
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_ipvpackets);
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_ipvbytes);
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_opvpackets);
+       COPY_IF_TC_FIELD64_ATOMIC(ifi_opvbytes);
 
 #undef COPY_IF_TC_FIELD64_ATOMIC
 }
 
+void
+if_copy_data_extended(struct ifnet *ifp, struct if_data_extended *if_de)
+{
+#define COPY_IF_DE_FIELD64_ATOMIC(fld) do {                    \
+       atomic_get_64(if_de->fld,                               \
+           (u_int64_t *)(void *)(uintptr_t)&ifp->if_data.fld); \
+} while (0)
+
+       bzero(if_de, sizeof (*if_de));
+       COPY_IF_DE_FIELD64_ATOMIC(ifi_alignerrs);
+
+#undef COPY_IF_DE_FIELD64_ATOMIC
+}
+
+void
+if_copy_packet_stats(struct ifnet *ifp, struct if_packet_stats *if_ps)
+{
+#define COPY_IF_PS_TCP_FIELD64_ATOMIC(fld) do {                                \
+       atomic_get_64(if_ps->ifi_tcp_##fld,                             \
+           (u_int64_t *)(void *)(uintptr_t)&ifp->if_tcp_stat->fld);    \
+} while (0)
+
+#define COPY_IF_PS_UDP_FIELD64_ATOMIC(fld) do {                                \
+       atomic_get_64(if_ps->ifi_udp_##fld,                             \
+           (u_int64_t *)(void *)(uintptr_t)&ifp->if_udp_stat->fld);    \
+} while (0)
+
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(badformat);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(unspecv6);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(synfin);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(badformatipsec);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(noconnnolist);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(noconnlist);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(listbadsyn);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(icmp6unreach);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(deprecate6);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(ooopacket);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(rstinsynrcv);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(dospacket);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(cleanup);
+       COPY_IF_PS_TCP_FIELD64_ATOMIC(synwindow);
+
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(port_unreach);
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(faithprefix);
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(port0);
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(badlength);
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(badchksum);
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(badmcast);
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(cleanup);
+       COPY_IF_PS_UDP_FIELD64_ATOMIC(badipsec);
+
+#undef COPY_IF_PS_TCP_FIELD64_ATOMIC
+#undef COPY_IF_PS_UDP_FIELD64_ATOMIC
+}
+
+void
+if_copy_rxpoll_stats(struct ifnet *ifp, struct if_rxpoll_stats *if_rs)
+{
+       bzero(if_rs, sizeof (*if_rs));
+       if (!(ifp->if_eflags & IFEF_RXPOLL) || !ifnet_is_attached(ifp, 1))
+               return;
+
+       /* by now, ifnet will stay attached so if_inp must be valid */
+       VERIFY(ifp->if_inp != NULL);
+       bcopy(&ifp->if_inp->pstats, if_rs, sizeof (*if_rs));
+
+       /* Release the IO refcnt */
+       ifnet_decr_iorefcnt(ifp);
+}
 
 struct ifaddr *
 ifa_remref(struct ifaddr *ifa, int locked)
index a7974460cc237a20172d59695f998b5d7d993fea..8f80f7b2fb613973c455f0ebacf7330bcd53939c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #define KEV_DL_SUBCLASS 2
 
-#define KEV_DL_SIFFLAGS            1
-#define KEV_DL_SIFMETRICS   2
-#define KEV_DL_SIFMTU      3
-#define KEV_DL_SIFPHYS     4
-#define KEV_DL_SIFMEDIA            5
-#define KEV_DL_SIFGENERIC   6
-#define KEV_DL_ADDMULTI            7
-#define KEV_DL_DELMULTI            8
-#define KEV_DL_IF_ATTACHED  9
-#define KEV_DL_IF_DETACHING 10
-#define KEV_DL_IF_DETACHED  11
-#define KEV_DL_LINK_OFF            12
-#define KEV_DL_LINK_ON     13
-#define KEV_DL_PROTO_ATTACHED  14
-#define KEV_DL_PROTO_DETACHED  15
-#define KEV_DL_LINK_ADDRESS_CHANGED    16
-#define KEV_DL_WAKEFLAGS_CHANGED       17
-#define KEV_DL_IF_IDLE_ROUTE_REFCNT    18
-#define KEV_DL_IFCAP_CHANGED           19
+#define KEV_DL_SIFFLAGS                                1
+#define KEV_DL_SIFMETRICS                      2
+#define KEV_DL_SIFMTU                          3
+#define KEV_DL_SIFPHYS                         4
+#define KEV_DL_SIFMEDIA                                5
+#define KEV_DL_SIFGENERIC                      6
+#define KEV_DL_ADDMULTI                                7
+#define KEV_DL_DELMULTI                                8
+#define KEV_DL_IF_ATTACHED                     9
+#define KEV_DL_IF_DETACHING                    10
+#define KEV_DL_IF_DETACHED                     11
+#define KEV_DL_LINK_OFF                                12
+#define KEV_DL_LINK_ON                         13
+#define KEV_DL_PROTO_ATTACHED                  14
+#define KEV_DL_PROTO_DETACHED                  15
+#define KEV_DL_LINK_ADDRESS_CHANGED            16
+#define KEV_DL_WAKEFLAGS_CHANGED               17
+#define KEV_DL_IF_IDLE_ROUTE_REFCNT            18
+#define KEV_DL_IFCAP_CHANGED                   19
+#define KEV_DL_LINK_QUALITY_METRIC_CHANGED     20
+#define KEV_DL_NODE_PRESENCE                   21
+#define KEV_DL_NODE_ABSENCE                    22
+#define KEV_DL_MASTER_ELECTED                  23
 
 #include <net/if_var.h>
 #include <sys/types.h>
+
+#ifdef PRIVATE
+#include <net/if_dl.h>
+#include <netinet/in.h>
+#endif
 #endif
 
 #ifdef KERNEL_PRIVATE
@@ -142,24 +151,43 @@ struct if_clonereq32 {
 #define        IFF_ALTPHYS     IFF_LINK2       /* use alternate physical connection */
 #define        IFF_MULTICAST   0x8000          /* supports multicast */
 
-#ifdef KERNEL_PRIVATE
+#ifdef PRIVATE
 /* extended flags definitions:  (all bits are reserved for internal/future use) */
-#define IFEF_AUTOCONFIGURING   0x1
-#define IFEF_DVR_REENTRY_OK    0x20    /* When set, driver may be reentered from its own thread */
-#define IFEF_ACCEPT_RTADVD     0x40    /* set to accept IPv6 router advertisement on the interface */
-#define _IFEF_DETACHING                0x80    /* deprecated */
-#define IFEF_USEKPI            0x100   /* Set when interface is created through the KPIs */
+#define IFEF_AUTOCONFIGURING   0x1     /* allow BOOTP/DHCP replies to enter */
+#define _IFEF_DVR_REENTRY_OK   0x20    /* deprecated */
+#define IFEF_ACCEPT_RTADV      0x40    /* set to accept IPv6 Router Advertisement on the interface */
+#define IFEF_TXSTART           0x80    /* interface has start callback */
+#define IFEF_RXPOLL            0x100   /* interface supports opportunistic input polling */
 #define IFEF_VLAN              0x200   /* interface has one or more vlans */
 #define IFEF_BOND              0x400   /* interface is part of bond */
 #define        IFEF_ARPLL              0x800   /* ARP for IPv4LL addresses on this port */
 #define        IFEF_NOWINDOWSCALE      0x1000  /* Don't scale TCP window on iface */
 #define        IFEF_NOAUTOIPV6LL       0x2000  /* Interface IPv6 LinkLocal address not provided by kernel */
-#define IFEF_SERVICE_TRIGGERED 0x20000 /* interface is on-demand dynamically created/destroyed */
+#define        IFEF_IPV4_ROUTER        0x8000  /* set on internal-network-facing interface when in IPv4 router mode */
+#define        IFEF_IPV6_ROUTER        0x10000 /* set on internal-network-facing interface when in IPv6 router mode */
+#define IFEF_LOCALNET_PRIVATE  0x20000 /* local private network */
+#define IFEF_IPV6_ND6ALT       0x40000 /* alternative KPI for IPv6 neighbor discovery */
+#define IFEF_SERVICE_TRIGGERED IFEF_LOCALNET_PRIVATE
+#define        IFEF_RESTRICTED_RECV    0x80000 /* interface restricts inbound pkts */
+#define        IFEF_AWDL               0x100000   /* Apple Wireless Direct Link */
+#define        IFEF_NOACKPRI           0x200000   /* Don't use TCP ACK prioritization on interface */
 #define        IFEF_SENDLIST           0x10000000 /* Interface supports sending a list of packets */
 #define _IFEF_REUSE            0x20000000 /* deprecated */
 #define _IFEF_INUSE            0x40000000 /* deprecated */
 #define IFEF_UPDOWNCHANGE      0x80000000 /* Interface's up/down state is changing */
+#ifdef XNU_KERNEL_PRIVATE
+/*
+ * Current requirements for an AWDL interface.  Setting/clearing IFEF_AWDL
+ * will also trigger the setting/clearing of the rest of the flags.  Once
+ * IFEF_AWDL is set, the rest of flags cannot be cleared, by definition.
+ */
+#define        IFEF_AWDL_MASK \
+       (IFEF_LOCALNET_PRIVATE | IFEF_IPV6_ND6ALT | IFEF_RESTRICTED_RECV | \
+       IFEF_AWDL)
+#endif /* XNU_KERNEL_PRIVATE */
+#endif /* PRIVATE */
 
+#ifdef KERNEL_PRIVATE
 /*
  * !!! NOTE !!!
  *
@@ -176,7 +204,6 @@ struct if_clonereq32 {
 #define        IFF_CANTCHANGE \
        (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\
            IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI)
-
 #endif /* KERNEL_PRIVATE */
 
 /*
@@ -213,7 +240,7 @@ struct if_clonereq32 {
 #define IFCAP_VALID (IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO | IFCAP_VLAN_MTU | \
        IFCAP_VLAN_HWTAGGING | IFCAP_JUMBO_MTU | IFCAP_AV)
 
-#define        IFQ_MAXLEN      50
+#define        IFQ_MAXLEN      128
 #define        IFNET_SLOWHZ    1               /* granularity is 1 second */
 
 /*
@@ -368,7 +395,7 @@ struct      ifreq {
                int     ifru_mtu;
                int     ifru_phys;
                int     ifru_media;
-               int     ifru_intval;
+               int     ifru_intval;
                caddr_t ifru_data;
 #ifdef KERNEL_PRIVATE
                u_int64_t ifru_data64;  /* 64-bit ifru_data */
@@ -377,7 +404,18 @@ struct     ifreq {
                struct  ifkpi   ifru_kpi;
                u_int32_t ifru_wake_flags;
                u_int32_t ifru_route_refcnt;
+#ifdef PRIVATE
+               int     ifru_link_quality_metric;
+#endif /* PRIVATE */
                int     ifru_cap[2];
+#ifdef PRIVATE
+               struct {
+                       uint32_t        ifo_flags;
+#define IFRIFOF_BLOCK_OPPORTUNISTIC    0x00000001
+                       uint32_t        ifo_inuse;
+               } ifru_opportunistic;
+               u_int64_t ifru_eflags;
+#endif /* PRIVATE */
        } ifr_ifru;
 #define        ifr_addr        ifr_ifru.ifru_addr      /* address */
 #define        ifr_dstaddr     ifr_ifru.ifru_dstaddr   /* other end of p-to-p link */
@@ -401,8 +439,15 @@ struct     ifreq {
 #define ifr_kpi                ifr_ifru.ifru_kpi
 #define ifr_wake_flags ifr_ifru.ifru_wake_flags /* wake capabilities of devive */
 #define ifr_route_refcnt ifr_ifru.ifru_route_refcnt /* route references on interface */
+#ifdef PRIVATE
+#define ifr_link_quality_metric ifr_ifru.ifru_link_quality_metric /* LQM */
+#endif /* PRIVATE */
 #define ifr_reqcap      ifr_ifru.ifru_cap[0]    /* requested capabilities */
 #define ifr_curcap      ifr_ifru.ifru_cap[1]    /* current capabilities */
+#ifdef PRIVATE
+#define ifr_opportunistic      ifr_ifru.ifru_opportunistic    /* current capabilities */
+#define        ifr_eflags      ifr_ifru.ifru_eflags    /* extended flags  */
+#endif
 };
 
 #define        _SIZEOF_ADDR_IFREQ(ifr) \
@@ -562,6 +607,166 @@ struct if_laddrreq {
        struct sockaddr_storage dstaddr; /* out */
 };
 
+#ifdef PRIVATE
+/*
+ *     Link Quality Metrics
+ *
+ *     IFNET_LQM_THRESH_OFF    Metric is not available; device is off.
+ *     IFNET_LQM_THRESH_UNKNOWN Metric is not (yet) known.
+ *     IFNET_LQM_THRESH_POOR   Link quality is considered poor by driver.
+ *     IFNET_LQM_THRESH_GOOD   Link quality is considered good by driver.
+ */
+enum {
+       IFNET_LQM_THRESH_OFF            = (-2),
+       IFNET_LQM_THRESH_UNKNOWN        = (-1),
+       IFNET_LQM_THRESH_POOR           = 50,
+       IFNET_LQM_THRESH_GOOD           = 100
+};
+#ifdef XNU_KERNEL_PRIVATE
+#define        IFNET_LQM_MIN   IFNET_LQM_THRESH_OFF
+#define        IFNET_LQM_MAX   IFNET_LQM_THRESH_GOOD
+#endif /* XNU_KERNEL_PRIVATE */
+
+/*
+ * DLIL KEV_DL_LINK_QUALITY_METRIC_CHANGED structure
+ */
+struct kev_dl_link_quality_metric_data {
+       struct net_event_data   link_data;
+       int                     link_quality_metric;
+};
+
+#define        IF_DESCSIZE     128
+
+/*
+ * Structure for SIOC[SG]IFDESC
+ */
+struct if_descreq {
+       char                    ifdr_name[IFNAMSIZ];    /* interface name */
+       u_int32_t               ifdr_len;               /* up to IF_DESCSIZE */
+       u_int8_t                ifdr_desc[IF_DESCSIZE]; /* opaque data */
+};
+
+/*
+ *     Output packet scheduling models
+ *
+ *     IFNET_SCHED_MODEL_NORMAL The default output packet scheduling model
+ *             where the driver or media does not require strict scheduling
+ *             strategy, and that the networking stack is free to choose the
+ *             most appropriate scheduling and queueing algorithm, including
+ *             shaping traffics.
+ *     IFNET_SCHED_MODEL_DRIVER_MANAGED The alternative output packet
+ *             scheduling model where the driver or media requires strict
+ *             scheduling strategy (e.g. 802.11 WMM), and that the networking
+ *             stack is only responsible for creating multiple queues for the
+ *             corresponding service classes.
+ */
+enum {
+       IFNET_SCHED_MODEL_NORMAL                = 0,
+       IFNET_SCHED_MODEL_DRIVER_MANAGED        = 1,
+#ifdef XNU_KERNEL_PRIVATE
+       IFNET_SCHED_MODEL_MAX                   = 2,
+#endif /* XNU_KERNEL_PRIVATE */
+};
+
+/*
+ * Values for iflpr_flags
+ */
+#define        IFLPRF_ALTQ             0x1     /* configured via PF/ALTQ */
+#define        IFLPRF_DRVMANAGED       0x2     /* output queue scheduled by drv */
+
+/*
+ * Structure for SIOCGIFLINKPARAMS
+ */
+struct if_linkparamsreq {
+       char            iflpr_name[IFNAMSIZ];   /* interface name */
+       u_int32_t       iflpr_flags;
+       u_int32_t       iflpr_output_sched;
+       u_int64_t       iflpr_output_tbr_rate;
+       u_int32_t       iflpr_output_tbr_percent;
+       struct if_bandwidths iflpr_output_bw;
+       struct if_bandwidths iflpr_input_bw;
+};
+
+/*
+ * Structure for SIOCGIFQUEUESTATS
+ */
+struct if_qstatsreq {
+       char            ifqr_name[IFNAMSIZ];    /* interface name */
+       u_int32_t       ifqr_slot;
+       void            *ifqr_buf               __attribute__((aligned(8)));
+       int              ifqr_len               __attribute__((aligned(8)));
+};
+
+/*
+ * Node Proximity Metrics
+ */
+enum {
+       IFNET_NPM_THRESH_UNKNOWN        = (-1),
+       IFNET_NPM_THRESH_NEAR           = 30,
+       IFNET_NPM_THRESH_GENERAL        = 70,
+       IFNET_NPM_THRESH_FAR            = 100,
+};
+
+/*
+ *     Received Signal Strength Indication [special values]
+ *
+ *     IFNET_RSSI_UNKNOWN      Metric is not (yet) known.
+ */
+enum {
+       IFNET_RSSI_UNKNOWN      = ((-2147483647)-1),    /* INT32_MIN */
+};
+
+
+/*
+ * DLIL KEV_DL_NODE_PRESENCE/KEV_DL_NODE_ABSENCE event structures
+ */
+struct kev_dl_node_presence {
+       struct net_event_data   link_data;
+       struct sockaddr_in6     sin6_node_address;
+       struct sockaddr_dl      sdl_node_address;
+       int32_t                 rssi;
+       int                     link_quality_metric;
+       int                     node_proximity_metric;
+       u_int8_t                node_service_info[48];
+};
+
+struct kev_dl_node_absence {
+       struct net_event_data   link_data;
+       struct sockaddr_in6     sin6_node_address;
+       struct sockaddr_dl      sdl_node_address;
+};
+
+/*
+ * Structure for SIOC[SG]IFTHROTTLE
+ */
+struct if_throttlereq {
+       char            ifthr_name[IFNAMSIZ];   /* interface name */
+       u_int32_t       ifthr_level;
+};
+
+/*
+ *     Interface throttling levels
+ *
+ *     IFNET_THROTTLE_OFF The default throttling level (no throttling.)
+ *             All service class queues operate normally according to the
+ *             standard packet scheduler configuration.
+ *     IFNET_THROTTLE_OPPORTUNISTIC One or more service class queues that
+ *             are responsible for managing "opportunistic" traffics are
+ *             suspended.  Packets enqueued on those queues will be dropped
+ *             and a flow advisory error will be generated to the data
+ *             source.  Existing packets in the queues will stay enqueued
+ *             until the interface is no longer throttled, or until they
+ *             are explicitly flushed.
+ */
+enum {
+       IFNET_THROTTLE_OFF                      = 0,
+       IFNET_THROTTLE_OPPORTUNISTIC            = 1,
+#ifdef XNU_KERNEL_PRIVATE
+       IFNET_THROTTLE_MAX                      = 2,
+#endif /* XNU_KERNEL_PRIVATE */
+};
+#endif /* PRIVATE */
+
 #ifdef KERNEL
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_IFADDR);
index 91790bd3a39f59d98d413678dc7c2a343f27c0df..1964a55240816ecad0f59b0027970faaa0892753 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -512,11 +512,15 @@ packet_buffer_allocate(int length)
     /* leave room for ethernet header */
     size = length + sizeof(struct ether_header);
     if (size > (int)MHLEN) {
-       /* XXX doesn't handle large payloads */
-       printf("bond: packet_buffer_allocate size %d > max %u\n", size, MHLEN);
-       return (NULL);
+       if (size > (int)MCLBYTES) {
+           printf("bond: packet_buffer_allocate size %d > max %u\n",
+                  size, MCLBYTES);
+           return (NULL);
+       }
+       m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
+    } else {
+       m = m_gethdr(M_WAITOK, MT_DATA);
     }
-    m = m_gethdr(M_WAITOK, MT_DATA);
     if (m == NULL) {
        return (NULL);
     }
@@ -1470,6 +1474,8 @@ bond_output(struct ifnet * ifp, struct mbuf * m)
     uint32_t                   h;
     ifbond_ref                 ifb;
     struct ifnet *             port_ifp = NULL;
+    int                                err;
+    struct flowadv             adv = { FADV_SUCCESS };
        
     if (m == 0) {
        return (0);
@@ -1517,7 +1523,17 @@ bond_output(struct ifnet * ifp, struct mbuf * m)
     }
     bond_bpf_output(ifp, m, bpf_func);
 
-    return (ifnet_output_raw(port_ifp, PF_BOND, m));
+    err = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, 1, &adv);
+
+    if (err == 0) {
+       if (adv.code == FADV_FLOW_CONTROLLED) {
+           err = EQFULL;
+       } else if (adv.code == FADV_SUSPENDED) {
+           err = EQSUSPENDED;
+       }
+    }
+
+    return (err);
 
  done:
     bond_unlock();
@@ -2561,10 +2577,6 @@ static int
 bond_set_promisc(__unused struct ifnet *ifp)
 {
     int                error = 0;
-    /*
-     * The benefit of doing this currently does not warrant
-     * the added code complexity. Do nothing and return.
-     */
     return (error);
 }
 
diff --git a/bsd/net/if_bond_internal.h b/bsd/net/if_bond_internal.h
new file mode 100644 (file)
index 0000000..99e6058
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _NET_IF_BOND_INTERNAL_H_
+
+#ifdef KERNEL_PRIVATE
+int bond_family_init(void) __attribute__((section("__TEXT, initcode")));
+#endif /* KERNEL_PRIVATE */
+
+#endif /* _NET_IF_BOND_INTERNAL_H_ */
+
index fb17c9a905f85af4838cb47b0e11f4467263d348..f92a3f24c0eb0b2e85245c8232e63f00d90592de 100644 (file)
@@ -95,8 +95,6 @@ struct if_bond_req {
 
 #pragma pack()
 
-#ifdef KERNEL_PRIVATE
-int bond_family_init(void) __attribute__((section("__TEXT, initcode")));
-#endif /* KERNEL_PRIVATE */
+#include <net/if_bond_internal.h>
 
 #endif /* _NET_IF_BOND_VAR_H_ */
index fd546fa0e048763f86623030300b99d957348b4f..db581d740625b3ac7dfe071d10c9e5006627b28f 100644 (file)
@@ -1,6 +1,5 @@
-/*     $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $       */
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -27,6 +26,7 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+/*     $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $       */
 /*
  * Copyright 2001 Wasabi Systems, Inc.
  * All rights reserved.
  */
 
 #include <sys/cdefs.h>
-//__FBSDID("$FreeBSD$");
-
-//#include "opt_inet.h"
-//#include "opt_inet6.h"
-//#include "opt_carp.h"
 
 #define BRIDGE_DEBUG 1
 #ifndef BRIDGE_DEBUG
 #include <sys/time.h>
 #include <sys/socket.h> /* for net/if.h */
 #include <sys/sockio.h>
-//#include <sys/ctype.h>  /* string functions */
 #include <sys/kernel.h>
 #include <sys/random.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
-//#include <vm/uma.h>
-//#include <sys/module.h>
-//#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
-//#include <sys/mutex.h>
 #include <sys/mcache.h>
 
 #include <sys/kauth.h>
 #include <net/bpf.h>
 #endif
 #include <net/if.h>
-//#include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
-//#include <net/pfil.h>
 
 #include <netinet/in.h> /* for struct arpcom */
 #include <netinet/in_systm.h>
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
-//#include <machine/in_cksum.h>
 #include <netinet/if_ether.h> /* for struct arpcom */
 #include <net/bridgestp.h>
 #include <net/if_bridgevar.h>
 #include <net/if_llc.h>
+#if NVLAN > 0
 #include <net/if_vlan_var.h>
+#endif /* NVLAN > 0 */
 
 #include <net/if_ether.h>
 #include <net/dlil.h>
 
 #if BRIDGE_DEBUG
 
-#define BR_LCKDBG_MAX                          4
+#define BR_LCKDBG_MAX                  4
 
-#define BRIDGE_LOCK(_sc)                       bridge_lock(_sc)
-#define BRIDGE_UNLOCK(_sc)                     bridge_unlock(_sc)
-#define BRIDGE_LOCK_ASSERT(_sc)                lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
+#define BRIDGE_LOCK(_sc)               bridge_lock(_sc)
+#define BRIDGE_UNLOCK(_sc)             bridge_unlock(_sc)
+#define BRIDGE_LOCK_ASSERT(_sc)                \
+       lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
 #define        BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
-#define        BRIDGE_UNREF(_sc)                       bridge_unref(_sc)
-#define        BRIDGE_XLOCK(_sc)                       bridge_xlock(_sc)
-#define        BRIDGE_XDROP(_sc)                       bridge_xdrop(_sc)
+#define        BRIDGE_UNREF(_sc)               bridge_unref(_sc)
+#define        BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
+#define        BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
 
 #else /* BRIDGE_DEBUG */
 
 #define BRIDGE_LOCK(_sc)               lck_mtx_lock((_sc)->sc_mtx)
 #define BRIDGE_UNLOCK(_sc)             lck_mtx_unlock((_sc)->sc_mtx)
-#define BRIDGE_LOCK_ASSERT(_sc)                lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
-#define        BRIDGE_LOCK2REF(_sc, _err)      do {    \
-       lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);    \
-       if ((_sc)->sc_iflist_xcnt > 0)          \
-               (_err) = EBUSY;                 \
-       else                                    \
-               (_sc)->sc_iflist_ref++;         \
-       lck_mtx_unlock((_sc)->sc_mtx);          \
+#define BRIDGE_LOCK_ASSERT(_sc)                \
+       lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
+#define        BRIDGE_LOCK2REF(_sc, _err)      do {                            \
+       lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);            \
+       if ((_sc)->sc_iflist_xcnt > 0)                                  \
+               (_err) = EBUSY;                                         \
+       else                                                            \
+               (_sc)->sc_iflist_ref++;                                 \
+       lck_mtx_unlock((_sc)->sc_mtx);                                  \
 } while (0)
 #define        BRIDGE_UNREF(_sc)               do {                            \
        lck_mtx_lock((_sc)->sc_mtx);                                    \
        (_sc)->sc_iflist_ref--;                                         \
        if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
-               lck_mtx_unlock((_sc)->sc_mtx);                                  \
-               wakeup(&(_sc)->sc_cv);                          \
-       } else                                                                  \
-               lck_mtx_unlock((_sc)->sc_mtx);                                  \
+               lck_mtx_unlock((_sc)->sc_mtx);                          \
+               wakeup(&(_sc)->sc_cv);                                  \
+       } else                                                          \
+               lck_mtx_unlock((_sc)->sc_mtx);                          \
 } while (0)
-#define        BRIDGE_XLOCK(_sc)               do {            \
+#define        BRIDGE_XLOCK(_sc)               do {                            \
        lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);            \
-       (_sc)->sc_iflist_xcnt++;                        \
-       while ((_sc)->sc_iflist_ref > 0)                \
-               msleep(&(_sc)->sc_cv, (_sc)->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);      \
+       (_sc)->sc_iflist_xcnt++;                                        \
+       while ((_sc)->sc_iflist_ref > 0)                                \
+               msleep(&(_sc)->sc_cv, (_sc)->sc_mtx, PZERO,             \
+                   "BRIDGE_XLOCK", NULL);                              \
 } while (0)
-#define        BRIDGE_XDROP(_sc)               do {    \
-       lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);    \
-       (_sc)->sc_iflist_xcnt--;                \
+#define        BRIDGE_XDROP(_sc)               do {                            \
+       lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);            \
+       (_sc)->sc_iflist_xcnt--;                                        \
 } while (0)
 
 #endif /* BRIDGE_DEBUG */
 
 #if NBPFILTER > 0
-#define BRIDGE_BPF_MTAP_INPUT(sc, m) \
-       if (sc->sc_bpf_input) \
+#define BRIDGE_BPF_MTAP_INPUT(sc, m)                                   \
+       if (sc->sc_bpf_input)                                           \
                bridge_bpf_input(sc->sc_ifp, m)
 #else /* NBPFILTER */
 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
@@ -294,17 +286,17 @@ struct bridge_iflist {
        TAILQ_ENTRY(bridge_iflist) bif_next;
        struct ifnet            *bif_ifp;       /* member if */
        struct bstp_port        bif_stp;        /* STP state */
-       uint32_t                        bif_flags;      /* member if flags */
-       int                                     bif_savedcaps;  /* saved capabilities */
-       uint32_t                        bif_addrmax;    /* max # of addresses */
-       uint32_t                        bif_addrcnt;    /* cur. # of addresses */
-       uint32_t                        bif_addrexceeded;/* # of address violations */
-
-       interface_filter_t      bif_iff_ref;
-       struct bridge_softc *bif_sc;
-       char                            bif_promisc;                    /* promiscuous mode set */
-       char                            bif_proto_attached;             /* protocol attached */
-       char                            bif_filter_attached;    /* interface filter attached */
+       uint32_t                bif_flags;      /* member if flags */
+       int                     bif_savedcaps;  /* saved capabilities */
+       uint32_t                bif_addrmax;    /* max # of addresses */
+       uint32_t                bif_addrcnt;    /* cur. # of addresses */
+       uint32_t                bif_addrexceeded;/* # of address violations */
+
+       interface_filter_t      bif_iff_ref;
+       struct bridge_softc     *bif_sc;
+       char            bif_promisc;            /* promiscuous mode set */
+       char            bif_proto_attached;     /* protocol attached */
+       char            bif_filter_attached;    /* interface filter attached */
 };
 
 /*
@@ -314,10 +306,10 @@ struct bridge_rtnode {
        LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
        LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
        struct bridge_iflist    *brt_dst;       /* destination if */
-       unsigned long                   brt_expire;     /* expiration time */
-       uint8_t                                 brt_flags;      /* address flags */
-       uint8_t                                 brt_addr[ETHER_ADDR_LEN];
-       uint16_t                                brt_vlan;       /* vlan id */
+       unsigned long           brt_expire;     /* expiration time */
+       uint8_t                 brt_flags;      /* address flags */
+       uint8_t                 brt_addr[ETHER_ADDR_LEN];
+       uint16_t                brt_vlan;       /* vlan id */
 
 };
 #define        brt_ifp                 brt_dst->bif_ifp
@@ -326,41 +318,41 @@ struct bridge_rtnode {
  * Software state for each bridge.
  */
 struct bridge_softc {
-       struct ifnet                            *sc_ifp;        /* make this an interface */
-       LIST_ENTRY(bridge_softc)        sc_list;
-       lck_mtx_t                                       *sc_mtx;
-       void                                            *sc_cv;
-       uint32_t                                        sc_brtmax;      /* max # of addresses */
-       uint32_t                                        sc_brtcnt;      /* cur. # of addresses */
-       uint32_t                                        sc_brttimeout;  /* rt timeout in seconds */
-       uint32_t                                        sc_iflist_ref;  /* refcount for sc_iflist */
-       uint32_t                                        sc_iflist_xcnt; /* refcount for sc_iflist */
-       TAILQ_HEAD(, bridge_iflist)     sc_iflist;      /* member interface list */
-       LIST_HEAD(, bridge_rtnode)      *sc_rthash;     /* our forwarding table */
-       LIST_HEAD(, bridge_rtnode)      sc_rtlist;      /* list version of above */
-       uint32_t                                        sc_rthash_key;  /* key for hash */
-       TAILQ_HEAD(, bridge_iflist)     sc_spanlist;    /* span ports list */
-       struct bstp_state                       sc_stp;         /* STP state */
-       uint32_t                                        sc_brtexceeded; /* # of cache drops */
-       uint32_t                                        sc_filter_flags; /* ipf and flags */
-       
-       char                                            sc_if_xname[IFNAMSIZ];
-    bpf_packet_func                            sc_bpf_input;
-    bpf_packet_func                            sc_bpf_output;
-    u_int32_t                                  sc_flags;
+       struct ifnet            *sc_ifp;        /* make this an interface */
+       LIST_ENTRY(bridge_softc) sc_list;
+       lck_mtx_t               *sc_mtx;
+       void                    *sc_cv;
+       uint32_t                sc_brtmax;      /* max # of addresses */
+       uint32_t                sc_brtcnt;      /* cur. # of addresses */
+       uint32_t                sc_brttimeout;  /* rt timeout in seconds */
+       uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
+       uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
+       TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
+       LIST_HEAD(, bridge_rtnode) *sc_rthash;  /* our forwarding table */
+       LIST_HEAD(, bridge_rtnode) sc_rtlist;   /* list version of above */
+       uint32_t                sc_rthash_key;  /* key for hash */
+       TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
+       struct bstp_state       sc_stp;         /* STP state */
+       uint32_t                sc_brtexceeded; /* # of cache drops */
+       uint32_t                sc_filter_flags; /* ipf and flags */
+
+       char                    sc_if_xname[IFNAMSIZ];
+       bpf_packet_func         sc_bpf_input;
+       bpf_packet_func         sc_bpf_output;
+       u_int32_t               sc_flags;
 
 #if BRIDGE_DEBUG
-       void                                            *lock_lr[BR_LCKDBG_MAX];        /* locking calling history */
-       int                                             next_lock_lr;
-       void                                            *unlock_lr[BR_LCKDBG_MAX];      /* unlocking caller history */
-       int                                             next_unlock_lr;
+       void                    *lock_lr[BR_LCKDBG_MAX];        /* locking calling history */
+       int                     next_lock_lr;
+       void                    *unlock_lr[BR_LCKDBG_MAX];      /* unlocking caller history */
+       int                     next_unlock_lr;
 #endif /* BRIDGE_DEBUG */
 };
 
 #define SCF_DETACHING 0x1
 
-static lck_mtx_t       *bridge_list_mtx;
-//eventhandler_tag     bridge_detach_cookie = NULL;
+decl_lck_mtx_data(static, bridge_list_mtx_data);
+static lck_mtx_t       *bridge_list_mtx = &bridge_list_mtx_data;
 
 int    bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
 
@@ -380,15 +372,17 @@ static int        bridge_init(struct ifnet *);
 #if HAS_BRIDGE_DUMMYNET
 static void    bridge_dummynet(struct mbuf *, struct ifnet *);
 #endif
-static void    bridge_stop(struct ifnet *, int);
-static errno_t bridge_start(struct ifnet *, struct mbuf *);
+static void    bridge_ifstop(struct ifnet *, int);
+static int     bridge_output(struct ifnet *, struct mbuf *);
+static void    bridge_start(struct ifnet *);
 __private_extern__ errno_t bridge_input(struct ifnet *, struct mbuf *, void *);
 #if BRIDGE_MEMBER_OUT_FILTER
-static errno_t bridge_iff_output(void *, ifnet_t , protocol_family_t , mbuf_t *);
-static int     bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
-                   struct rtentry *);
+static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t ,
+    mbuf_t *);
+static int     bridge_member_output(struct ifnet *, struct mbuf *,
+                   struct sockaddr *, struct rtentry *);
 #endif
-static void    bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int     bridge_enqueue(struct bridge_softc *, struct ifnet *,
                    struct mbuf *);
 static void    bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
 
@@ -421,8 +415,10 @@ static int bridge_rtnode_insert(struct bridge_softc *,
                    struct bridge_rtnode *);
 static void    bridge_rtnode_destroy(struct bridge_softc *,
                    struct bridge_rtnode *);
+#if BRIDGESTP
 static void    bridge_rtable_expire(struct ifnet *, int);
 static void    bridge_state_change(struct ifnet *, int);
+#endif /* BRIDGESTP */
 
 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
                    const char *name);
@@ -495,13 +491,19 @@ static void bridge_detach(ifnet_t ifp);
 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
 #define        VLANTAGOF(_m)   0
 
+u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
+    { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+#if BRIDGESTP
 static struct bstp_cb_ops bridge_ops = {
        .bcb_state = bridge_state_change,
        .bcb_rtage = bridge_rtable_expire
 };
+#endif /* BRIDGESTP */
 
 SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
+SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+    "Bridge");
 
 #if defined(PFIL_HOOKS)
 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
@@ -511,25 +513,27 @@ static int pfil_ipfw = 0;   /* layer2 filter with ipfw */
 static int pfil_ipfw_arp = 0;   /* layer2 filter with ipfw */
 static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
                                    locally destined packets */
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW|CTLFLAG_LOCKED,
     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
-SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
+SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW|CTLFLAG_LOCKED,
     &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW|CTLFLAG_LOCKED,
     &pfil_bridge, 0, "Packet filter on the bridge interface");
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW|CTLFLAG_LOCKED,
     &pfil_member, 0, "Packet filter on the member interface");
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
-    &pfil_local_phys, 0,
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
+    CTLFLAG_RW|CTLFLAG_LOCKED, &pfil_local_phys, 0,
     "Packet filter on the physical interface for locally destined packets");
 #endif /* PFIL_HOOKS */
 
+#if BRIDGESTP
 static int log_stp   = 0;   /* log STP state changes */
 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
     &log_stp, 0, "Log STP state changes");
+#endif /* BRIDGESTP */
 
 struct bridge_control {
-       int                             (*bc_func)(struct bridge_softc *, void *);
+       int             (*bc_func)(struct bridge_softc *, void *);
        unsigned int    bc_argsize;
        unsigned int    bc_flags;
 };
@@ -539,213 +543,216 @@ struct bridge_control {
 #define        BC_F_SUSER              0x04    /* do super-user check */
 
 static const struct bridge_control bridge_control_table32[] = {
-       { bridge_ioctl_add,             sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_del,             sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_scache,          sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_gifs32,          sizeof(struct ifbifconf32),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       { bridge_ioctl_rts32,           sizeof(struct ifbaconf32),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       
-       { bridge_ioctl_saddr32,         sizeof(struct ifbareq32),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sto,             sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_gto,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_daddr32,         sizeof(struct ifbareq32),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_flush,           sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_spri,            sizeof(struct ifbrparam),
+       { bridge_ioctl_add,             sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_del,             sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gifflags,        sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_sifflags,        sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_scache,          sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gcache,          sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+
+       { bridge_ioctl_gifs32,          sizeof (struct ifbifconf32),
+           BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_rts32,           sizeof (struct ifbaconf32),
+           BC_F_COPYIN|BC_F_COPYOUT },
+
+       { bridge_ioctl_saddr32,         sizeof (struct ifbareq32),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sto,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gto,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+
+       { bridge_ioctl_daddr32,         sizeof (struct ifbareq32),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_flush,           sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gpri,            sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_spri,            sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_ght,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sht,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gfd,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sfd,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gma,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sma,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sifprio,         sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sifcost,         sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gfilt,           sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sfilt,           sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_purge,           sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_addspan,         sizeof (struct ifbreq),
                BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_ght,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_sht,             sizeof(struct ifbrparam),
+       { bridge_ioctl_delspan,         sizeof (struct ifbreq),
                BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gma,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_sma,             sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gfilt,           sizeof(struct ifbrparam),
-         BC_F_COPYOUT },
-       { bridge_ioctl_sfilt,           sizeof(struct ifbrparam),
-         BC_F_COPYIN|BC_F_SUSER },
 
-       { bridge_ioctl_purge,   sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gbparam32,       sizeof (struct ifbropreq32),
+           BC_F_COPYOUT },
 
-       { bridge_ioctl_addspan,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_delspan,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gbparam32,               sizeof(struct ifbropreq32),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_grte,            sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_gifsstp32,               sizeof(struct ifbpstpconf32),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       
-       { bridge_ioctl_sproto,          sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_stxhc,           sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sifmaxaddr,      sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_grte,            sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+
+       { bridge_ioctl_gifsstp32,       sizeof (struct ifbpstpconf32),
+           BC_F_COPYIN|BC_F_COPYOUT },
+
+       { bridge_ioctl_sproto,          sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_stxhc,           sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sifmaxaddr,      sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
 };
 
 static const struct bridge_control bridge_control_table64[] = {
-       { bridge_ioctl_add,             sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_del,             sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_scache,          sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_gifs64,          sizeof(struct ifbifconf64),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       { bridge_ioctl_rts64,           sizeof(struct ifbaconf64),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       
-       { bridge_ioctl_saddr64,         sizeof(struct ifbareq64),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sto,             sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_gto,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_daddr64,         sizeof(struct ifbareq64),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_flush,           sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_spri,            sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_ght,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_sht,             sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gma,             sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       { bridge_ioctl_sma,             sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gfilt,           sizeof(struct ifbrparam),
-         BC_F_COPYOUT },
-       { bridge_ioctl_sfilt,           sizeof(struct ifbrparam),
-         BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_add,             sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_del,             sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
 
-       { bridge_ioctl_purge,   sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gifflags,        sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_sifflags,        sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
 
-       { bridge_ioctl_addspan,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       { bridge_ioctl_delspan,         sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_gbparam64,               sizeof(struct ifbropreq64),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_grte,            sizeof(struct ifbrparam),
-               BC_F_COPYOUT },
-       
-       { bridge_ioctl_gifsstp64,               sizeof(struct ifbpstpconf64),
-               BC_F_COPYIN|BC_F_COPYOUT },
-       
-       { bridge_ioctl_sproto,          sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_stxhc,           sizeof(struct ifbrparam),
-               BC_F_COPYIN|BC_F_SUSER },
-       
-       { bridge_ioctl_sifmaxaddr,      sizeof(struct ifbreq),
-               BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_scache,          sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gcache,          sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+
+       { bridge_ioctl_gifs64,          sizeof (struct ifbifconf64),
+           BC_F_COPYIN|BC_F_COPYOUT },
+       { bridge_ioctl_rts64,           sizeof (struct ifbaconf64),
+           BC_F_COPYIN|BC_F_COPYOUT },
+
+       { bridge_ioctl_saddr64,         sizeof (struct ifbareq64),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sto,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_gto,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+
+       { bridge_ioctl_daddr64,         sizeof (struct ifbareq64),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_flush,           sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gpri,            sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_spri,            sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_ght,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sht,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gfd,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sfd,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gma,             sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sma,             sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sifprio,         sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sifcost,         sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gfilt,           sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+       { bridge_ioctl_sfilt,           sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_purge,   sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_addspan,         sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+       { bridge_ioctl_delspan,         sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_gbparam64,       sizeof (struct ifbropreq64),
+           BC_F_COPYOUT },
+
+       { bridge_ioctl_grte,            sizeof (struct ifbrparam),
+           BC_F_COPYOUT },
+
+       { bridge_ioctl_gifsstp64,       sizeof (struct ifbpstpconf64),
+           BC_F_COPYIN|BC_F_COPYOUT },
+
+       { bridge_ioctl_sproto,          sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_stxhc,           sizeof (struct ifbrparam),
+           BC_F_COPYIN|BC_F_SUSER },
+
+       { bridge_ioctl_sifmaxaddr,      sizeof (struct ifbreq),
+           BC_F_COPYIN|BC_F_SUSER },
 };
 
 static const unsigned int bridge_control_table_size =
-sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
+    sizeof (bridge_control_table32) / sizeof (bridge_control_table32[0]);
 
-static LIST_HEAD(, bridge_softc) bridge_list = LIST_HEAD_INITIALIZER(bridge_list);
+static LIST_HEAD(, bridge_softc) bridge_list =
+    LIST_HEAD_INITIALIZER(bridge_list);
 
 static lck_grp_t *bridge_lock_grp = NULL;
 static lck_attr_t *bridge_lock_attr = NULL;
 
 static if_clone_t bridge_cloner = NULL;
 
-__private_extern__ int _if_brige_debug = 0;
-
-SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
-           &_if_brige_debug, 0, "Bridge debug");
+static int if_bridge_txstart = 0;
+SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
 
 #if BRIDGE_DEBUG
+static int if_bridge_debug = 0;
+SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &if_bridge_debug, 0, "Bridge debug");
 
 static void printf_ether_header(struct ether_header *eh);
 static void printf_mbuf_data(mbuf_t m, size_t offset, size_t len);
 static void printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix);
 static void printf_mbuf(mbuf_t m, const char *prefix, const char *suffix);
-static void link_print(struct sockaddr_dl * dl_p);
+static void link_print(struct sockaddr_dl *dl_p);
 
 static void bridge_lock(struct bridge_softc *);
 static void bridge_unlock(struct bridge_softc *);
@@ -754,35 +761,38 @@ static void bridge_unref(struct bridge_softc *);
 static void bridge_xlock(struct bridge_softc *);
 static void bridge_xdrop(struct bridge_softc *);
 
-static void bridge_lock(struct bridge_softc *sc)
+static void
+bridge_lock(struct bridge_softc *sc)
 {
        void *lr_saved = __builtin_return_address(0);
-       
+
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
 
        lck_mtx_lock(sc->sc_mtx);
-       
+
        sc->lock_lr[sc->next_lock_lr] = lr_saved;
        sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX;
 }
 
-static void bridge_unlock(struct bridge_softc *sc)
+static void
+bridge_unlock(struct bridge_softc *sc)
 {
        void *lr_saved = __builtin_return_address(0);
-       
+
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
 
        sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
        sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
-       
+
        lck_mtx_unlock(sc->sc_mtx);
 }
 
-static int bridge_lock2ref(struct bridge_softc *sc)
+static int
+bridge_lock2ref(struct bridge_softc *sc)
 {
        int error = 0;
        void *lr_saved = __builtin_return_address(0);
-       
+
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
 
        if (sc->sc_iflist_xcnt > 0)
@@ -793,11 +803,12 @@ static int bridge_lock2ref(struct bridge_softc *sc)
        sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
        sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
        lck_mtx_unlock(sc->sc_mtx);
-       
-       return error;
+
+       return (error);
 }
 
-static void bridge_unref(struct bridge_softc *sc)
+static void
+bridge_unref(struct bridge_softc *sc)
 {
        void *lr_saved = __builtin_return_address(0);
 
@@ -808,17 +819,18 @@ static void bridge_unref(struct bridge_softc *sc)
        sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX;
 
        sc->sc_iflist_ref--;
-       
+
        sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
        sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
-       if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0))       {
+       if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
                lck_mtx_unlock(sc->sc_mtx);
                wakeup(&sc->sc_cv);
        } else
                lck_mtx_unlock(sc->sc_mtx);
 }
 
-static void bridge_xlock(struct bridge_softc *sc)
+static void
+bridge_xlock(struct bridge_softc *sc)
 {
        void *lr_saved = __builtin_return_address(0);
 
@@ -828,7 +840,7 @@ static void bridge_xlock(struct bridge_softc *sc)
        while (sc->sc_iflist_ref > 0) {
                sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
                sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
-               
+
                msleep(&sc->sc_cv, sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
 
                sc->lock_lr[sc->next_lock_lr] = lr_saved;
@@ -836,7 +848,8 @@ static void bridge_xlock(struct bridge_softc *sc)
        }
 }
 
-static void bridge_xdrop(struct bridge_softc *sc)
+static void
+bridge_xdrop(struct bridge_softc *sc)
 {
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
 
@@ -848,9 +861,9 @@ printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
 {
        if (m)
                printf("%spktlen: %u rcvif: %p header: %p nextpkt: %p%s",
-                          prefix ? prefix : "",
-                          (unsigned int)mbuf_pkthdr_len(m), mbuf_pkthdr_rcvif(m), mbuf_pkthdr_header(m), mbuf_nextpkt(m),
-                          suffix ? suffix : "");
+                   prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
+                   mbuf_pkthdr_rcvif(m), mbuf_pkthdr_header(m),
+                   mbuf_nextpkt(m), suffix ? suffix : "");
        else
                printf("%s<NULL>%s\n", prefix, suffix);
 }
@@ -859,11 +872,12 @@ void
 printf_mbuf(mbuf_t m, const char *prefix, const char *suffix)
 {
        if (m) {
-               printf("%s%p type: %u flags: 0x%x len: %u data: %p maxlen: %u datastart: %p next: %p%s",
-                          prefix ? prefix : "",
-                          m, mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m), mbuf_data(m), 
-                          (unsigned int)mbuf_maxlen(m), mbuf_datastart(m), mbuf_next(m), 
-                          !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
+               printf("%s%p type: %u flags: 0x%x len: %u data: %p maxlen: %u "
+                   "datastart: %p next: %p%s", prefix ? prefix : "",
+                   m, mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
+                   mbuf_data(m), (unsigned int)mbuf_maxlen(m),
+                   mbuf_datastart(m), mbuf_next(m),
+                   !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
                if ((mbuf_flags(m) & MBUF_PKTHDR))
                        printf_mbuf_pkthdr(m, " ", suffix);
        } else
@@ -877,12 +891,12 @@ printf_mbuf_data(mbuf_t m, size_t offset, size_t len)
        size_t                  i, j;
        size_t                  pktlen, mlen, maxlen;
        unsigned char   *ptr;
-       
+
        pktlen = mbuf_pkthdr_len(m);
-       
+
        if (offset > pktlen)
                return;
-       
+
        maxlen = (pktlen - offset > len) ? len : pktlen;
        n = m;
        mlen = mbuf_len(n);
@@ -900,25 +914,25 @@ printf_mbuf_data(mbuf_t m, size_t offset, size_t len)
                        printf("%02x%s", ptr[j], i % 2 ? " " : "");
                }
        }
-       return;
 }
 
 static void
 printf_ether_header(struct ether_header *eh)
 {
-       printf("%02x:%02x:%02x:%02x:%02x:%02x > %02x:%02x:%02x:%02x:%02x:%02x 0x%04x ", 
-                  eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2], 
-                  eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5], 
-                  eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
-                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5], 
-                  eh->ether_type);
+       printf("%02x:%02x:%02x:%02x:%02x:%02x > "
+           "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
+           eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
+           eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
+           eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
+           eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
+           eh->ether_type);
 }
 
 static void
-link_print(struct sockaddr_dl * dl_p)
+link_print(struct sockaddr_dl *dl_p)
 {
        int i;
-       
+
 #if 1
        printf("sdl len %d index %d family %d type 0x%x nlen %d alen %d"
            " slen %d addr ", dl_p->sdl_len,
@@ -926,10 +940,8 @@ link_print(struct sockaddr_dl * dl_p)
            dl_p->sdl_nlen, dl_p->sdl_alen, dl_p->sdl_slen);
 #endif
        for (i = 0; i < dl_p->sdl_alen; i++)
-        printf("%s%x", i ? ":" : "",
-               (CONST_LLADDR(dl_p))[i]);
+        printf("%s%x", i ? ":" : "", (CONST_LLADDR(dl_p))[i]);
        printf("\n");
-       return;
 }
 
 #endif /* BRIDGE_DEBUG */
@@ -945,39 +957,41 @@ bridgeattach(__unused int n)
        int error;
        lck_grp_attr_t *lck_grp_attr = NULL;
        struct ifnet_clone_params ifnet_clone_params;
-       
-       bridge_rtnode_pool = zinit(sizeof(struct bridge_rtnode), 1024 * sizeof(struct bridge_rtnode),
-                               0, "bridge_rtnode");
+
+       bridge_rtnode_pool = zinit(sizeof (struct bridge_rtnode),
+           1024 * sizeof (struct bridge_rtnode), 0, "bridge_rtnode");
        zone_change(bridge_rtnode_pool, Z_CALLERACCT, FALSE);
 
        lck_grp_attr = lck_grp_attr_alloc_init();
-       
+
        bridge_lock_grp = lck_grp_alloc_init("if_bridge", lck_grp_attr);
-       
+
        bridge_lock_attr = lck_attr_alloc_init();
-       
+
 #if BRIDGE_DEBUG
        lck_attr_setdebug(bridge_lock_attr);
 #endif
 
-       bridge_list_mtx = lck_mtx_alloc_init(bridge_lock_grp, bridge_lock_attr);
-       
-       // can free the attributes once we've allocated the group lock
+       lck_mtx_init(bridge_list_mtx, bridge_lock_grp, bridge_lock_attr);
+
+       /* can free the attributes once we've allocated the group lock */
        lck_grp_attr_free(lck_grp_attr);
-       
+
        LIST_INIT(&bridge_list);
-       
+
+#if BRIDGESTP
        bstp_sys_init();
-       
+#endif /* BRIDGESTP */
+
        ifnet_clone_params.ifc_name = "bridge";
        ifnet_clone_params.ifc_create = bridge_clone_create;
        ifnet_clone_params.ifc_destroy = bridge_clone_destroy;
-       
+
        error = ifnet_clone_attach(&ifnet_clone_params, &bridge_cloner);
        if (error != 0)
-               printf("bridgeattach: ifnet_clone_attach failed %d\n", error);
+               printf("%s: ifnet_clone_attach failed %d\n", __func__, error);
 
-       return error;
+       return (error);
 }
 
 #if defined(PFIL_HOOKS)
@@ -987,7 +1001,7 @@ bridgeattach(__unused int n)
 static int
 sysctl_pfil_ipfw SYSCTL_HANDLER_ARGS
 {
-#pragma unused(arg1,arg2)
+#pragma unused(arg1, arg2)
        int enable = pfil_ipfw;
        int error;
 
@@ -1012,6 +1026,7 @@ sysctl_pfil_ipfw SYSCTL_HANDLER_ARGS
 
        return (error);
 }
+
 SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW,
            &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW");
 #endif /* PFIL_HOOKS */
@@ -1027,13 +1042,14 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
        struct ifnet *ifp = NULL;
        struct bridge_softc *sc;
        u_char eaddr[6];
-       struct ifnet_init_params init_params;
+       struct ifnet_init_eparams init_params;
        errno_t error = 0;
-       uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) + IFNAMSIZ + ETHER_ADDR_LEN];
+       uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
+           IFNAMSIZ + ETHER_ADDR_LEN];
        struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
 
-       sc = _MALLOC(sizeof(*sc), M_DEVBUF, M_WAITOK);
-       memset(sc, 0, sizeof(*sc));
+       sc = _MALLOC(sizeof (*sc), M_DEVBUF, M_WAITOK);
+       memset(sc, 0, sizeof (*sc));
 
        sc->sc_mtx = lck_mtx_alloc_init(bridge_lock_grp, bridge_lock_attr);
        sc->sc_brtmax = BRIDGE_RTABLE_MAX;
@@ -1051,66 +1067,70 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
        /* Initialize our routing table. */
        error = bridge_rtable_init(sc);
        if (error != 0) {
-               printf("bridge_clone_create: bridge_rtable_init failed %d\n", error);
+               printf("%s: bridge_rtable_init failed %d\n", __func__, error);
                goto done;
        }
-       
+
        TAILQ_INIT(&sc->sc_iflist);
        TAILQ_INIT(&sc->sc_spanlist);
 
        /* use the interface name as the unique id for ifp recycle */
-       snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
+       snprintf(sc->sc_if_xname, sizeof (sc->sc_if_xname), "%s%d",
              ifc->ifc_name, unit);
-       memset(&init_params, 0, sizeof(struct ifnet_init_params));
-       init_params.uniqueid = sc->sc_if_xname;
-       init_params.uniqueid_len = strlen(sc->sc_if_xname);
-       init_params.name = ifc->ifc_name;
-       init_params.unit = unit;
-       init_params.family = IFNET_FAMILY_ETHERNET;
-       init_params.type = IFT_BRIDGE;
-       init_params.output = bridge_start;
-       init_params.demux = ether_demux;
-       init_params.add_proto = ether_add_proto;
-       init_params.del_proto = ether_del_proto;
-       init_params.check_multi = ether_check_multi;
-       init_params.framer = ether_frameout;
-       init_params.softc = sc;
-       init_params.ioctl = bridge_ioctl;
-       init_params.set_bpf_tap = bridge_set_bpf_tap;
-       init_params.detach = bridge_detach;
-       init_params.broadcast_addr = etherbroadcastaddr;
-       init_params.broadcast_len = ETHER_ADDR_LEN;
-       error = ifnet_allocate(&init_params, &ifp);
+       bzero(&init_params, sizeof (init_params));
+       init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
+       init_params.len                 = sizeof (init_params);
+       if (if_bridge_txstart) {
+               init_params.start       = bridge_start;
+       } else {
+               init_params.flags       = IFNET_INIT_LEGACY;
+               init_params.output      = bridge_output;
+       }
+       init_params.uniqueid            = sc->sc_if_xname;
+       init_params.uniqueid_len        = strlen(sc->sc_if_xname);
+       init_params.sndq_maxlen         = IFQ_MAXLEN;
+       init_params.name                = ifc->ifc_name;
+       init_params.unit                = unit;
+       init_params.family              = IFNET_FAMILY_ETHERNET;
+       init_params.type                = IFT_BRIDGE;
+       init_params.demux               = ether_demux;
+       init_params.add_proto           = ether_add_proto;
+       init_params.del_proto           = ether_del_proto;
+       init_params.check_multi         = ether_check_multi;
+       init_params.framer              = ether_frameout;
+       init_params.softc               = sc;
+       init_params.ioctl               = bridge_ioctl;
+       init_params.set_bpf_tap         = bridge_set_bpf_tap;
+       init_params.detach              = bridge_detach;
+       init_params.broadcast_addr      = etherbroadcastaddr;
+       init_params.broadcast_len       = ETHER_ADDR_LEN;
+       error = ifnet_allocate_extended(&init_params, &ifp);
        if (error != 0) {
-               printf("bridge_clone_create: ifnet_allocate failed %d\n", error);
+               printf("%s: ifnet_allocate failed %d\n", __func__, error);
                goto done;
        }
        sc->sc_ifp = ifp;
-       
+
        error = ifnet_set_mtu(ifp, ETHERMTU);
        if (error != 0) {
-               printf("bridge_clone_create: ifnet_set_mtu failed %d\n", error);
+               printf("%s: ifnet_set_mtu failed %d\n", __func__, error);
                goto done;
        }
        error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
        if (error != 0) {
-               printf("bridge_clone_create: ifnet_set_addrlen failed %d\n", error);
-               goto done;
-       }
-       error = ifnet_set_baudrate(ifp, 10000000) ;     // XXX: this is what IONetworking does
-       if (error != 0) {
-               printf("bridge_clone_create: ifnet_set_baudrate failed %d\n", error);
+               printf("%s: ifnet_set_addrlen failed %d\n", __func__, error);
                goto done;
        }
        error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
        if (error != 0) {
-               printf("bridge_clone_create: ifnet_set_hdrlen failed %d\n", error);
+               printf("%s: ifnet_set_hdrlen failed %d\n", __func__, error);
                goto done;
        }
-       error = ifnet_set_flags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST, 
-                                                       0xffff);
+       error = ifnet_set_flags(ifp,
+           IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
+           0xffff);
        if (error != 0) {
-               printf("bridge_clone_create: ifnet_set_flags failed %d\n", error);
+               printf("%s: ifnet_set_flags failed %d\n", __func__, error);
                goto done;
        }
 
@@ -1125,11 +1145,11 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
         */
        {
                int retry;
-               
+
                for (retry = 1; retry != 0;) {
                        struct ifnet *bifp;
                        struct bridge_softc *sc2;
-               
+
                        read_random(eaddr, ETHER_ADDR_LEN);
                        eaddr[0] &= ~1;         /* clear multicast bit */
                        eaddr[0] |= 2;          /* set the LAA bit */
@@ -1137,7 +1157,8 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
                        lck_mtx_lock(bridge_list_mtx);
                        LIST_FOREACH(sc2, &bridge_list, sc_list) {
                                bifp = sc2->sc_ifp;
-                               if (memcmp(eaddr, ifnet_lladdr(bifp), ETHER_ADDR_LEN) == 0)
+                               if (memcmp(eaddr, ifnet_lladdr(bifp),
+                                   ETHER_ADDR_LEN) == 0)
                                        retry = 1;
                        }
                        lck_mtx_unlock(bridge_list_mtx);
@@ -1150,8 +1171,8 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
         */
        {
                uint32_t r;
-               
-               read_random(&r, sizeof(r));
+
+               read_random(&r, sizeof (r));
                eaddr[0] = 0xAC;
                eaddr[1] = 0xDE;
                eaddr[2] = 0x48;
@@ -1161,59 +1182,65 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
        }
 #endif
 
-       memset(sdl, 0, sizeof(sdl_buffer));
+       memset(sdl, 0, sizeof (sdl_buffer));
        sdl->sdl_family = AF_LINK;
        sdl->sdl_nlen = strlen(sc->sc_if_xname);
        sdl->sdl_alen = ETHER_ADDR_LEN;
        sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
        memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
        memcpy(LLADDR(sdl), eaddr, ETHER_ADDR_LEN);
-       
+
 #if BRIDGE_DEBUG
-       link_print(sdl);
+       if (if_bridge_debug)
+               link_print(sdl);
 #endif
 
        error = ifnet_attach(ifp, NULL);
        if (error != 0) {
-               printf("bridge_clone_create: ifnet_attach failed %d\n", error);
+               printf("%s: ifnet_attach failed %d\n", __func__, error);
                goto done;
        }
-       
-       error = ifnet_set_lladdr_and_type(ifp, eaddr, ETHER_ADDR_LEN, IFT_ETHER);
+
+       error = ifnet_set_lladdr_and_type(ifp, eaddr, ETHER_ADDR_LEN,
+           IFT_ETHER);
        if (error != 0) {
-               printf("bridge_clone_create: ifnet_set_lladdr_and_type failed %d\n", error);
+               printf("%s: ifnet_set_lladdr_and_type failed %d\n", __func__,
+                   error);
                goto done;
        }
-       
+
 #if APPLE_BRIDGE_HWCKSUM_SUPPORT
-       /* 
-        * APPLE MODIFICATION - our bridge can support HW checksums 
+       /*
+        * APPLE MODIFICATION - our bridge can support HW checksums
         * (useful if underlying interfaces support them) on TX,
         * RX is not that interesting, since the stack just looks to
         * see if the packet has been checksummed already (I think)
         * but we might as well indicate we support it
         */
        ifp->if_capabilities =
-               IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx |
-               IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx ;
+           IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx |
+           IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx;
 #endif
-       
+
+#if BRIDGESTP
        bstp_attach(&sc->sc_stp, &bridge_ops);
+#endif /* BRIDGESTP */
 
        lck_mtx_lock(bridge_list_mtx);
        LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
        lck_mtx_unlock(bridge_list_mtx);
 
        /* attach as ethernet */
-       error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header), NULL, NULL);
+       error = bpf_attach(ifp, DLT_EN10MB, sizeof (struct ether_header),
+           NULL, NULL);
 
 done:
        if (error != 0) {
-        printf("bridge_clone_create failed error %d\n", error);
+               printf("%s failed error %d\n", __func__, error);
                /* Cleanup TBD */
        }
-       
-       return error;
+
+       return (error);
 }
 
 /*
@@ -1231,15 +1258,15 @@ bridge_clone_destroy(struct ifnet *ifp)
        BRIDGE_LOCK(sc);
        if ((sc->sc_flags & SCF_DETACHING)) {
                BRIDGE_UNLOCK(sc);
-               return 0;
+               return (0);
        }
        sc->sc_flags |= SCF_DETACHING;
 
-       bridge_stop(ifp, 1);
+       bridge_ifstop(ifp, 1);
 
        error = ifnet_set_flags(ifp, 0, IFF_UP);
        if (error != 0) {
-               printf("bridge_clone_destroy: ifnet_set_flags failed %d\n", error);
+               printf("%s: ifnet_set_flags failed %d\n", __func__, error);
        }
 
        while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL)
@@ -1253,63 +1280,64 @@ bridge_clone_destroy(struct ifnet *ifp)
 
        error = ifnet_detach(ifp);
        if (error != 0) {
-               panic("bridge_clone_destroy: ifnet_detach(%p) failed %d\n", ifp, error);
+               panic("bridge_clone_destroy: ifnet_detach(%p) failed %d\n",
+                   ifp, error);
                if ((sc = (struct bridge_softc *)ifnet_softc(ifp)) != NULL) {
                        BRIDGE_LOCK(sc);
                        sc->sc_flags &= ~SCF_DETACHING;
                        BRIDGE_UNLOCK(sc);
                }
-               return 0;
+               return (0);
        }
 
-       return 0;
+       return (0);
 }
 
 #define DRVSPEC do { \
-               if (ifd->ifd_cmd >= bridge_control_table_size) { \
-                       error = EINVAL; \
-                       break; \
-               } \
-               bc = &bridge_control_table[ifd->ifd_cmd]; \
- \
-               if (cmd == SIOCGDRVSPEC && \
-                   (bc->bc_flags & BC_F_COPYOUT) == 0) { \
-                       error = EINVAL; \
-                       break; \
-               } \
-               else if (cmd == SIOCSDRVSPEC && \
-                   (bc->bc_flags & BC_F_COPYOUT) != 0) { \
-                       error = EINVAL; \
-                       break; \
-               } \
- \
-               if (bc->bc_flags & BC_F_SUSER) { \
-                       error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER); \
-                       if (error) \
-                               break; \
-               } \
- \
-               if (ifd->ifd_len != bc->bc_argsize || \
-                   ifd->ifd_len > sizeof(args)) { \
-                       error = EINVAL; \
-                       break; \
-               } \
- \
-               bzero(&args, sizeof(args)); \
-               if (bc->bc_flags & BC_F_COPYIN) { \
-                       error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
-                       if (error) \
-                               break; \
-               } \
- \
-               BRIDGE_LOCK(sc); \
-               error = (*bc->bc_func)(sc, &args); \
-               BRIDGE_UNLOCK(sc); \
-               if (error) \
-                       break; \
- \
-               if (bc->bc_flags & BC_F_COPYOUT) \
-                       error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
+       if (ifd->ifd_cmd >= bridge_control_table_size) {                \
+               error = EINVAL;                                         \
+               break;                                                  \
+       }                                                               \
+       bc = &bridge_control_table[ifd->ifd_cmd];                       \
                                                                      \
+       if (cmd == SIOCGDRVSPEC &&                                      \
+           (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
+               error = EINVAL;                                         \
+               break;                                                  \
+       } else if (cmd == SIOCSDRVSPEC &&                               \
+           (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
+               error = EINVAL;                                         \
+               break;                                                  \
+       }                                                               \
+                                                                       \
      if (bc->bc_flags & BC_F_SUSER) {                                \
+               error = kauth_authorize_generic(kauth_cred_get(),       \
+                   KAUTH_GENERIC_ISSUSER);                             \
+               if (error)                                              \
+                       break;                                          \
+       }                                                               \
                                                                      \
+       if (ifd->ifd_len != bc->bc_argsize ||                           \
+           ifd->ifd_len > sizeof (args)) {                             \
+               error = EINVAL;                                         \
+               break;                                                  \
+       }                                                               \
                                                                      \
+       bzero(&args, sizeof (args));                                    \
+       if (bc->bc_flags & BC_F_COPYIN) {                               \
+               error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
+               if (error)                                              \
+                       break;                                          \
+       }                                                               \
                                                                      \
+       BRIDGE_LOCK(sc);                                                \
+       error = (*bc->bc_func)(sc, &args);                              \
+       BRIDGE_UNLOCK(sc);                                              \
+       if (error)                                                      \
+               break;                                                  \
                                                                      \
+       if (bc->bc_flags & BC_F_COPYOUT)                                \
+               error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
 } while (0)
 
 
@@ -1322,25 +1350,21 @@ static errno_t
 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 {
        struct bridge_softc *sc = ifp->if_softc;
-       struct ifreq *ifr = (struct ifreq *) data;
+       struct ifreq *ifr = (struct ifreq *)data;
        int error = 0;
 
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
 
 #if BRIDGE_DEBUG
-       if (_if_brige_debug)
-               printf("bridge_ioctl: ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu)\n", 
-                       ifp, 
-                       cmd, 
-                       (cmd & IOC_IN) ? 'I' : ' ',
-                       (cmd & IOC_OUT) ? 'O' : ' ',
-                       IOCPARM_LEN(cmd),
-                       (char)IOCGROUP(cmd),
-                       cmd & 0xff);
+       if (if_bridge_debug)
+               printf("%s: ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu)\n",
+                   __func__, ifp, cmd, (cmd & IOC_IN) ? 'I' : ' ',
+                   (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
+                   (char)IOCGROUP(cmd), cmd & 0xff);
 #endif
-       
+
        switch (cmd) {
-       
+
        case SIOCSIFADDR:
        case SIOCAIFADDR:
                ifnet_set_flags(ifp, IFF_UP, IFF_UP);
@@ -1365,9 +1389,10 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
                        struct ifbrparam ifbrparam;
                        struct ifbropreq32 ifbropreq;
                } args;
-               struct ifdrv32 *ifd = (struct ifdrv32 *) data;
-               const struct bridge_control *bridge_control_table = bridge_control_table32, *bc;
-               
+               struct ifdrv32 *ifd = (struct ifdrv32 *)data;
+               const struct bridge_control *bridge_control_table =
+                   bridge_control_table32, *bc;
+
                DRVSPEC;
 
                break;
@@ -1382,11 +1407,12 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
                        struct ifbrparam ifbrparam;
                        struct ifbropreq64 ifbropreq;
                } args;
-               struct ifdrv64 *ifd = (struct ifdrv64 *) data;
-               const struct bridge_control *bridge_control_table = bridge_control_table64, *bc;
-               
+               struct ifdrv64 *ifd = (struct ifdrv64 *)data;
+               const struct bridge_control *bridge_control_table =
+                   bridge_control_table64, *bc;
+
                DRVSPEC;
-               
+
                break;
        }
 
@@ -1398,7 +1424,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
                         * then stop and disable it.
                         */
                        BRIDGE_LOCK(sc);
-                       bridge_stop(ifp, 1);
+                       bridge_ifstop(ifp, 1);
                        BRIDGE_UNLOCK(sc);
                } else if ((ifp->if_flags & IFF_UP) &&
                    !(ifp->if_flags & IFF_RUNNING)) {
@@ -1413,9 +1439,11 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
                break;
 
        case SIOCSIFLLADDR:
-               error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
+               error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
+                   ifr->ifr_addr.sa_len);
                if (error != 0)
-                       printf("bridge_ioctl: ifnet_set_lladdr failed %d\n", error);
+                       printf("%s: ifnet_set_lladdr failed %d\n", __func__,
+                           error);
                break;
 
        case SIOCSIFMTU:
@@ -1424,22 +1452,15 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
                break;
 
        default:
-               /*
-                * drop the lock as ether_ioctl() will call bridge_start() and
-                * cause the lock to be recursed.
-                */
                error = ether_ioctl(ifp, cmd, data);
 #if BRIDGE_DEBUG
-               if (error != 0)
-                       printf("bridge_ioctl: ether_ioctl ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu) failed error: %d\n", 
-                                  ifp, 
-                                  cmd, 
-                                  (cmd & IOC_IN) ? 'I' : ' ',
-                                  (cmd & IOC_OUT) ? 'O' : ' ',
-                                  IOCPARM_LEN(cmd),
-                                  (char) IOCGROUP(cmd),
-                                  cmd & 0xff,
-                                  error);
+               if (error != 0 && error != EOPNOTSUPP)
+                       printf("%s: ether_ioctl ifp %p cmd 0x%08lx "
+                           "(%c%c [%lu] %c %lu) failed error: %d\n",
+                           __func__, ifp, cmd, (cmd & IOC_IN) ? 'I' : ' ',
+                           (cmd & IOC_OUT) ? 'O' : ' ',
+                           IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
+                           cmd & 0xff, error);
 #endif /* BRIDGE_DEBUG */
                break;
        }
@@ -1487,7 +1508,7 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
        struct ifreq ifr;
        int error;
 
-       bzero(&ifr, sizeof(ifr));
+       bzero(&ifr, sizeof (ifr));
        ifr.ifr_reqcap = set;
 
        if (ifp->if_capenable != set) {
@@ -1495,9 +1516,9 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
                error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
                IFF_UNLOCKGIANT(ifp);
                if (error)
-                       printf("error setting interface capabilities on %s\n",
-                               ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
-                           ifp->if_xname);
+                       printf("%s: error setting interface capabilities "
+                           "on %s\n", __func__, ifnet_name(sc->sc_ifp),
+                           ifnet_unit(sc->sc_ifp), ifp->if_xname);
        }
 }
 #endif /* HAS_IF_CAP */
@@ -1518,9 +1539,9 @@ bridge_lookup_member(struct bridge_softc *sc, const char *name)
 
        TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
                ifp = bif->bif_ifp;
-               snprintf(if_xname, sizeof(if_xname), "%s%d", 
+               snprintf(if_xname, sizeof (if_xname), "%s%d",
                  ifnet_name(ifp), ifnet_unit(ifp));
-               if (strncmp(if_xname, name, sizeof(if_xname)) == 0)
+               if (strncmp(if_xname, name, sizeof (if_xname)) == 0)
                        return (bif);
        }
 
@@ -1547,9 +1568,9 @@ bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
        return (NULL);
 }
 
-static errno_t 
-bridge_iff_input(voidcookie, ifnet_t ifp, __unused protocol_family_t protocol,
-                 mbuf_t *data, char **frame_ptr)
+static errno_t
+bridge_iff_input(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+    mbuf_t *data, char **frame_ptr)
 {
        errno_t error = 0;
        struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
@@ -1560,21 +1581,24 @@ bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
 
        if ((m->m_flags & M_PROTO1))
                goto out;
-       
-       if (*frame_ptr >= (char *)mbuf_datastart(m) && *frame_ptr <= (char *)mbuf_data(m)) {
+
+       if (*frame_ptr >= (char *)mbuf_datastart(m) &&
+           *frame_ptr <= (char *)mbuf_data(m)) {
                included = 1;
                frmlen = (char *)mbuf_data(m) - *frame_ptr;
        }
 #if BRIDGE_DEBUG
-       if (_if_brige_debug) {
-               printf("bridge_iff_input %s%d from %s%d m %p data %p frame %p %s frmlen %lu\n", 
-                          ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
-                          ifnet_name(ifp), ifnet_unit(ifp), 
-                          m, mbuf_data(m), *frame_ptr, included ? "inside" : "outside", frmlen);
-               
-               if (_if_brige_debug > 1) {
+       if (if_bridge_debug) {
+               printf("%s: %s%d from %s%d m %p data %p frame %p %s "
+                   "frmlen %lu\n", __func__, ifnet_name(sc->sc_ifp),
+                   ifnet_unit(sc->sc_ifp), ifnet_name(ifp), ifnet_unit(ifp),
+                   m, mbuf_data(m), *frame_ptr,
+                   included ? "inside" : "outside", frmlen);
+
+               if (if_bridge_debug > 1) {
                        printf_mbuf(m, "bridge_iff_input[", "\n");
-                       printf_ether_header((struct ether_header *)*frame_ptr);
+                       printf_ether_header((struct ether_header *)
+                           (void *)*frame_ptr);
                        printf_mbuf_data(m, 0, 20);
                        printf("\n");
                }
@@ -1583,22 +1607,24 @@ bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
 
        /* Move data pointer to start of frame to the link layer header */
        if (included) {
-               (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen, mbuf_len(m) + frmlen);
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
+                   mbuf_len(m) + frmlen);
                (void) mbuf_pkthdr_adjustlen(m, frmlen);
        } else {
-               printf("bridge_iff_input: frame_ptr outside mbuf\n");
+               printf("%s: frame_ptr outside mbuf\n", __func__);
                goto out;
        }
-       
+
        error = bridge_input(ifp, m, *frame_ptr);
-       
+
        /* Adjust packet back to original */
        if (error == 0) {
-               (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen, mbuf_len(m) - frmlen);
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
+                   mbuf_len(m) - frmlen);
                (void) mbuf_pkthdr_adjustlen(m, -frmlen);
        }
 #if BRIDGE_DEBUG
-       if (_if_brige_debug > 1) {
+       if (if_bridge_debug > 1) {
                printf("\n");
                printf_mbuf(m, "bridge_iff_input]", "\n");
        }
@@ -1606,8 +1632,8 @@ bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
 
 out:
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       
-       return error;
+
+       return (error);
 }
 
 
@@ -1619,39 +1645,39 @@ bridge_iff_output(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol
        struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
        struct bridge_softc *sc = bif->bif_sc;
        mbuf_t m = *data;
-       
+
        if ((m->m_flags & M_PROTO1))
                goto out;
-       
+
 #if BRIDGE_DEBUG
-       if (_if_brige_debug) {
-               printf("bridge_iff_output %s%d from %s%d m %p data %p\n", 
-                               ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
-                               ifnet_name(ifp), ifnet_unit(ifp), 
-                               m, mbuf_data(m));
+       if (if_bridge_debug) {
+               printf("%s: %s%d from %s%d m %p data %p\n", __func__,
+                   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+                   ifnet_name(ifp), ifnet_unit(ifp), m, mbuf_data(m));
        }
 #endif /* BRIDGE_DEBUG */
 
-       error = bridge_output(sc, ifp, m);
+       error = bridge_member_output(sc, ifp, m);
        if (error != 0) {
-               printf("bridge_iff_output: bridge_output failed error %d\n", error);
+               printf("%s: bridge_member_output failed error %d\n", __func__,
+                   error);
        }
 
-out:   
+out:
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
 
-       return error;
+       return (error);
 }
 #endif /* BRIDGE_MEMBER_OUT_FILTER */
 
 
-static void 
-bridge_iff_event(voidcookie, ifnet_t ifp, __unused protocol_family_t protocol,
-                 const struct kev_msg *event_msg)
+static void
+bridge_iff_event(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+    const struct kev_msg *event_msg)
 {
        struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
-       
-       if (event_msg->vendor_code == KEV_VENDOR_APPLE && 
+
+       if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
                event_msg->kev_class == KEV_NETWORK_CLASS &&
                event_msg->kev_subclass == KEV_DL_SUBCLASS) {
                switch (event_msg->event_code) {
@@ -1659,30 +1685,37 @@ bridge_iff_event(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
                        case KEV_DL_IF_DETACHED:
                                bridge_ifdetach(bif, ifp);
                                break;
-                               
+
                        case KEV_DL_LINK_OFF:
                        case KEV_DL_LINK_ON: {
+#if BRIDGESTP
                                bstp_linkstate(ifp, event_msg->event_code);
+#endif /* BRIDGESTP */
                                break;
                        }
-                       
+
                        case KEV_DL_SIFFLAGS: {
-                               if (bif->bif_promisc == 0 && (ifp->if_flags & IFF_UP)) {
-                                       errno_t error = ifnet_set_promiscuous(ifp, 1);
+                               if (bif->bif_promisc == 0 &&
+                                   (ifp->if_flags & IFF_UP)) {
+                                       errno_t error =
+                                           ifnet_set_promiscuous(ifp, 1);
                                        if (error != 0) {
-                                               printf("bridge_iff_event: ifnet_set_promiscuous(%s%d) failed %d\n",
-                                                       ifnet_name(ifp), ifnet_unit(ifp), error);
+                                               printf("%s: "
+                                                   "ifnet_set_promiscuous"
+                                                   "(%s%d) failed %d\n",
+                                                   __func__, ifnet_name(ifp),
+                                                   ifnet_unit(ifp), error);
                                        } else {
                                                bif->bif_promisc = 1;
                                        }
                                }
                                break;
                        }
-                       
+
                        default:
                                break;
                }
-       }               
+       }
 }
 
 /*
@@ -1691,48 +1724,44 @@ bridge_iff_event(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
  *     Detach an interface from a bridge.  Called when a member
  *     interface is detaching.
  */
-static void 
-bridge_iff_detached(voidcookie, __unused ifnet_t ifp)
+static void
+bridge_iff_detached(void *cookie, __unused ifnet_t ifp)
 {
        struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
 
-#if BRIDGE_DEBUG       
-       printf("bridge_iff_detached: %s%d\n",
-               ifnet_name(ifp), ifnet_unit(ifp));
+#if BRIDGE_DEBUG
+       printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp));
 #endif
 
        bridge_ifdetach(bif, ifp);
 
        _FREE(bif, M_DEVBUF);
-       
-       return;
 }
 
 static errno_t
-bridge_proto_input(ifnet_t ifp, __unused protocol_family_t protocol, 
-                                       __unused mbuf_t packet, __unused char *header)
+bridge_proto_input(ifnet_t ifp, __unused protocol_family_t protocol,
+    __unused mbuf_t packet, __unused char *header)
 {
-       printf("bridge_proto_input: unexpected packet from %s%d\n",
-               ifnet_name(ifp), ifnet_unit(ifp));
-       return 0;
+       printf("%s: unexpected packet from %s%d\n", __func__,
+           ifnet_name(ifp), ifnet_unit(ifp));
+       return (0);
 }
 
 static int
 bridge_attach_protocol(struct ifnet *ifp)
 {
-       int                                                             error;
+       int     error;
        struct ifnet_attach_proto_param reg;
 
-       printf("bridge_attach_protocol: %s%d\n",
-               ifnet_name(ifp), ifnet_unit(ifp));
-       
-       bzero(&reg, sizeof(reg));
+       printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp));
+
+       bzero(&reg, sizeof (reg));
        reg.input = bridge_proto_input;
-       
+
        error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
        if (error)
-               printf("bridge_attach_protocol: ifnet_attach_protocol(%s%d) failed, %d\n",
-                       ifnet_name(ifp), ifnet_unit(ifp), error);
+               printf("%s: ifnet_attach_protocol(%s%d) failed, %d\n",
+                   __func__, ifnet_name(ifp), ifnet_unit(ifp), error);
 
        return (error);
 }
@@ -1742,13 +1771,12 @@ bridge_detach_protocol(struct ifnet *ifp)
 {
        int         error;
 
-       printf("bridge_detach_protocol: %s%d\n",
-               ifnet_name(ifp), ifnet_unit(ifp));
+       printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp));
 
        error = ifnet_detach_protocol(ifp, PF_BRIDGE);
        if (error)
-               printf("bridge_attach_protocol: ifnet_detach_protocol(%s%d) failed, %d\n",
-                       ifnet_name(ifp), ifnet_unit(ifp), error);
+               printf("%s: ifnet_detach_protocol(%s%d) failed, %d\n",
+                   __func__, ifnet_name(ifp), ifnet_unit(ifp), error);
 
        return (error);
 }
@@ -1799,8 +1827,10 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
                (void) bridge_detach_protocol(ifs);
                BRIDGE_LOCK(sc);
        }
+#if BRIDGESTP
        if (bif->bif_flags & IFBIF_STP)
                bstp_disable(&bif->bif_stp);
+#endif /* BRIDGESTP */
 
        ifs->if_bridge = NULL;
        BRIDGE_XLOCK(sc);
@@ -1816,10 +1846,12 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
        KASSERT(bif->bif_addrcnt == 0,
            ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
 
+#if BRIDGESTP
        BRIDGE_UNLOCK(sc);
        bstp_destroy(&bif->bif_stp);    /* prepare to free */
        BRIDGE_LOCK(sc);
-       
+#endif /* BRIDGESTP */
+
        if (bif->bif_filter_attached) {
                /* Respect lock ordering with DLIL lock */
                BRIDGE_UNLOCK(sc);
@@ -1874,9 +1906,9 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
                if (TAILQ_EMPTY(&sc->sc_iflist))
                        sc->sc_ifp->if_mtu = ifs->if_mtu;
                else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
-                       printf("%s%d: invalid MTU for %s%d", 
-                 ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
-                 ifnet_name(ifs), ifnet_unit(ifs));
+                       printf("%s: %s%d: invalid MTU for %s%d", __func__,
+                           ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+                           ifnet_name(ifs), ifnet_unit(ifs));
                        return (EINVAL);
                }
        }
@@ -1887,7 +1919,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
        if (ifs->if_bridge != NULL)
                return (EBUSY);
 
-       bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+       bif = _MALLOC(sizeof (*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
        if (bif == NULL)
                return (ENOMEM);
 
@@ -1901,7 +1933,9 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
        ifnet_reference(ifs);
 
        ifs->if_bridge = sc;
+#if BRIDGESTP
        bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
+#endif /* BRIDGESTP */
        /*
         * XXX: XLOCK HERE!?!
         */
@@ -1912,7 +1946,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
        bridge_mutecaps(sc);
 #endif /* HAS_IF_CAP */
 
-       
+
        switch (ifs->if_type) {
        case IFT_ETHER:
        case IFT_L2VLAN:
@@ -1946,7 +1980,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
        /*
         * install an interface filter
         */
-       memset(&iff, 0, sizeof(struct iff_filter));
+       memset(&iff, 0, sizeof (struct iff_filter));
        iff.iff_cookie = bif;
        iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
        iff.iff_input = bridge_iff_input;
@@ -1957,7 +1991,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
        iff.iff_detached = bridge_iff_detached;
        error = iflt_attach(ifs, &iff, &bif->bif_iff_ref);
        if (error != 0) {
-               printf("bridge_ioctl_add: iflt_attach failed %d\n", error);
+               printf("%s: iflt_attach failed %d\n", __func__, error);
                BRIDGE_LOCK(sc);
                goto out;
        }
@@ -1968,7 +2002,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
         */
        if ((error = bridge_attach_protocol(ifs)) != 0) {
                if (error != 0) {
-                       printf("bridge_ioctl_add: bridge_attach_protocol failed %d\n", error);
+                       printf("%s: bridge_attach_protocol failed %d\n",
+                           __func__, error);
                        BRIDGE_LOCK(sc);
                        goto out;
                }
@@ -1980,7 +2015,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 out:
        if (error && bif != NULL)
                bridge_delete_member(sc, bif, 1);
-       
+
        return (error);
 }
 
@@ -2001,7 +2036,7 @@ bridge_ioctl_del(struct bridge_softc *sc, void *arg)
 
 static int
 bridge_ioctl_purge(__unused struct bridge_softc *sc, __unused void *arg)
-{      
+{
        return (0);
 }
 
@@ -2050,19 +2085,21 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
 {
        struct ifbreq *req = arg;
        struct bridge_iflist *bif;
+#if BRIDGESTP
        struct bstp_port *bp;
        int error;
+#endif /* BRIDGESTP */
 
        bif = bridge_lookup_member(sc, req->ifbr_ifsname);
        if (bif == NULL)
                return (ENOENT);
-       bp = &bif->bif_stp;
 
        if (req->ifbr_ifsflags & IFBIF_SPAN)
                /* SPAN is readonly */
                return (EINVAL);
 
-       
+
+#if BRIDGESTP
        if (req->ifbr_ifsflags & IFBIF_STP) {
                if ((bif->bif_flags & IFBIF_STP) == 0) {
                        error = bstp_enable(&bif->bif_stp);
@@ -2075,10 +2112,15 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
        }
 
        /* Pass on STP flags */
+       bp = &bif->bif_stp;
        bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
        bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
        bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
        bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
+#else /* !BRIDGESTP */
+       if (req->ifbr_ifsflags & IFBIF_STP)
+               return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 
        /* Save the bits relating to the bridge */
        bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
@@ -2110,64 +2152,66 @@ bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
 
 
 #define BRIDGE_IOCTL_GIFS do { \
-       struct bridge_iflist *bif; \
-       struct ifbreq breq; \
-       char *buf, *outbuf; \
-       unsigned int count, buflen, len; \
- \
-       count = 0; \
-       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
-               count++; \
-       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
-               count++; \
- \
-       buflen = sizeof(breq) * count; \
-       if (bifc->ifbic_len == 0) { \
-               bifc->ifbic_len = buflen; \
-               return (0); \
-       } \
-       BRIDGE_UNLOCK(sc); \
-       outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO); \
-       BRIDGE_LOCK(sc); \
- \
-       count = 0; \
-       buf = outbuf; \
-       len = min(bifc->ifbic_len, buflen); \
-       bzero(&breq, sizeof(breq)); \
-       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
-               if (len < sizeof(breq)) \
-                       break; \
- \
-               snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname), "%s%d", \
-                 ifnet_name(bif->bif_ifp), ifnet_unit(bif->bif_ifp)); \
-               /* Fill in the ifbreq structure */ \
-               error = bridge_ioctl_gifflags(sc, &breq); \
-               if (error) \
-                       break; \
-               memcpy(buf, &breq, sizeof(breq)); \
-               count++; \
-               buf += sizeof(breq); \
-               len -= sizeof(breq); \
-       } \
-       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
-               if (len < sizeof(breq)) \
-                       break; \
- \
-               snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname), "%s%d", \
-                 ifnet_name(bif->bif_ifp), ifnet_unit(bif->bif_ifp)); \
-               breq.ifbr_ifsflags = bif->bif_flags; \
-               breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff; \
-               memcpy(buf, &breq, sizeof(breq)); \
-               count++; \
-               buf += sizeof(breq); \
-               len -= sizeof(breq); \
-       } \
- \
-       BRIDGE_UNLOCK(sc); \
-       bifc->ifbic_len = sizeof(breq) * count; \
-       error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
-       BRIDGE_LOCK(sc); \
-       _FREE(outbuf, M_TEMP); \
+       struct bridge_iflist *bif;                                      \
+       struct ifbreq breq;                                             \
+       char *buf, *outbuf;                                             \
+       unsigned int count, buflen, len;                                \
+                                                                       \
+       count = 0;                                                      \
+       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
+               count++;                                                \
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
+               count++;                                                \
+                                                                       \
+       buflen = sizeof (breq) * count;                                 \
+       if (bifc->ifbic_len == 0) {                                     \
+               bifc->ifbic_len = buflen;                               \
+               return (0);                                             \
+       }                                                               \
+       BRIDGE_UNLOCK(sc);                                              \
+       outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO);            \
+       BRIDGE_LOCK(sc);                                                \
+                                                                       \
+       count = 0;                                                      \
+       buf = outbuf;                                                   \
+       len = min(bifc->ifbic_len, buflen);                             \
+       bzero(&breq, sizeof (breq));                                    \
+       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
+               if (len < sizeof (breq))                                \
+                       break;                                          \
+                                                                       \
+               snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
+                   "%s%d", ifnet_name(bif->bif_ifp),                   \
+                   ifnet_unit(bif->bif_ifp));                          \
+               /* Fill in the ifbreq structure */                      \
+               error = bridge_ioctl_gifflags(sc, &breq);               \
+               if (error)                                              \
+                       break;                                          \
+               memcpy(buf, &breq, sizeof (breq));                      \
+               count++;                                                \
+               buf += sizeof (breq);                                   \
+               len -= sizeof (breq);                                   \
+       }                                                               \
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
+               if (len < sizeof (breq))                                \
+                       break;                                          \
+                                                                       \
+               snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
+                   "%s%d", ifnet_name(bif->bif_ifp),                   \
+                   ifnet_unit(bif->bif_ifp));                          \
+               breq.ifbr_ifsflags = bif->bif_flags;                    \
+               breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff;      \
+               memcpy(buf, &breq, sizeof (breq));                      \
+               count++;                                                \
+               buf += sizeof (breq);                                   \
+               len -= sizeof (breq);                                   \
+       }                                                               \
+                                                                       \
+       BRIDGE_UNLOCK(sc);                                              \
+       bifc->ifbic_len = sizeof (breq) * count;                        \
+       error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);      \
+       BRIDGE_LOCK(sc);                                                \
+       _FREE(outbuf, M_TEMP);                                          \
 } while (0)
 
 static int
@@ -2175,7 +2219,7 @@ bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
 {
        struct ifbifconf64 *bifc = arg;
        int error = 0;
-       
+
        BRIDGE_IOCTL_GIFS;
 
        return (error);
@@ -2193,55 +2237,57 @@ bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
 }
 
 
-#define BRIDGE_IOCTL_RTS do { \
-       struct bridge_rtnode *brt; \
-       char *buf, *outbuf; \
-       unsigned int count, buflen, len; \
-       struct timespec now; \
- \
-       if (bac->ifbac_len == 0) \
-               return (0); \
- \
-       count = 0; \
-       LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
-               count++; \
-       buflen = sizeof(bareq) * count; \
- \
-       BRIDGE_UNLOCK(sc); \
-       outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO); \
-       BRIDGE_LOCK(sc); \
- \
-       count = 0; \
-       buf = outbuf; \
-       len = min(bac->ifbac_len, buflen); \
-       bzero(&bareq, sizeof(bareq)); \
-       LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
-               if (len < sizeof(bareq)) \
-                       goto out; \
-               snprintf(bareq.ifba_ifsname, sizeof(bareq.ifba_ifsname), "%s%d", \
-                 ifnet_name(brt->brt_ifp), ifnet_unit(brt->brt_ifp)); \
-               memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); \
-               bareq.ifba_vlan = brt->brt_vlan; \
-               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
-                       nanouptime(&now); \
-                       if ((unsigned long)now.tv_sec < brt->brt_expire) \
-                               bareq.ifba_expire = brt->brt_expire - now.tv_sec; \
-               } else \
-                       bareq.ifba_expire = 0; \
-               bareq.ifba_flags = brt->brt_flags; \
- \
-               memcpy(buf, &bareq, sizeof(bareq)); \
-               count++; \
-               buf += sizeof(bareq); \
-               len -= sizeof(bareq); \
-       } \
-out: \
-       BRIDGE_UNLOCK(sc); \
-       bac->ifbac_len = sizeof(bareq) * count; \
-       error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
-       BRIDGE_LOCK(sc); \
-       _FREE(outbuf, M_TEMP); \
-       return (error); \
+#define BRIDGE_IOCTL_RTS do {                                              \
+       struct bridge_rtnode *brt;                                          \
+       char *buf, *outbuf;                                                 \
+       unsigned int count, buflen, len;                                    \
+       struct timespec now;                                                \
+                                                                           \
+       if (bac->ifbac_len == 0)                                            \
+               return (0);                                                 \
+                                                                           \
+       count = 0;                                                          \
+       LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
+               count++;                                                    \
+       buflen = sizeof (bareq) * count;                                    \
+                                                                           \
+       BRIDGE_UNLOCK(sc);                                                  \
+       outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO);                \
+       BRIDGE_LOCK(sc);                                                    \
+                                                                           \
+       count = 0;                                                          \
+       buf = outbuf;                                                       \
+       len = min(bac->ifbac_len, buflen);                                  \
+       bzero(&bareq, sizeof (bareq));                                      \
+       LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
+               if (len < sizeof (bareq))                                   \
+                       goto out;                                           \
+               snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),    \
+                   "%s%d", ifnet_name(brt->brt_ifp),                       \
+                   ifnet_unit(brt->brt_ifp));                              \
+               memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
+               bareq.ifba_vlan = brt->brt_vlan;                            \
+               if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
+                       nanouptime(&now);                                   \
+                       if ((unsigned long)now.tv_sec < brt->brt_expire)    \
+                               bareq.ifba_expire =                         \
+                                   brt->brt_expire - now.tv_sec;           \
+               } else                                                      \
+                       bareq.ifba_expire = 0;                              \
+               bareq.ifba_flags = brt->brt_flags;                          \
+                                                                           \
+               memcpy(buf, &bareq, sizeof (bareq));                        \
+               count++;                                                    \
+               buf += sizeof (bareq);                                      \
+               len -= sizeof (bareq);                                      \
+       }                                                                   \
+out:                                                                       \
+       BRIDGE_UNLOCK(sc);                                                  \
+       bac->ifbac_len = sizeof (bareq) * count;                                    \
+       error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);            \
+       BRIDGE_LOCK(sc);                                                    \
+       _FREE(outbuf, M_TEMP);                                              \
+       return (error);                                                     \
 } while (0)
 
 static int
@@ -2250,7 +2296,7 @@ bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
        struct ifbaconf64 *bac = arg;
        struct ifbareq64 bareq;
        int error = 0;
-       
+
        BRIDGE_IOCTL_RTS;
 
        return (error);
@@ -2262,7 +2308,7 @@ bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
        struct ifbaconf32 *bac = arg;
        struct ifbareq32 bareq;
        int error = 0;
-       
+
        BRIDGE_IOCTL_RTS;
 
        return (error);
@@ -2358,9 +2404,14 @@ bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
 static int
 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbrparam *param = arg;
 
        return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 static int
@@ -2376,9 +2427,14 @@ bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
 static int
 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbrparam *param = arg;
 
        return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 static int
@@ -2394,9 +2450,14 @@ bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
 static int
 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbrparam *param = arg;
 
        return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 static int
@@ -2412,14 +2473,20 @@ bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
 static int
 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbrparam *param = arg;
 
        return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 static int
 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbreq *req = arg;
        struct bridge_iflist *bif;
 
@@ -2428,11 +2495,16 @@ bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
                return (ENOENT);
 
        return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 static int
 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbreq *req = arg;
        struct bridge_iflist *bif;
 
@@ -2441,6 +2513,10 @@ bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
                return (ENOENT);
 
        return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 static int
@@ -2512,7 +2588,7 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
                        return (EINVAL);
        }
 
-       bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+       bif = _MALLOC(sizeof (*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
        if (bif == NULL)
                return (ENOMEM);
 
@@ -2549,29 +2625,29 @@ bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
        return (0);
 }
 
-#define BRIDGE_IOCTL_GBPARAM do { \
-       struct bstp_state *bs = &sc->sc_stp; \
-       struct bstp_port *root_port; \
- \
-       req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
-       req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
-       req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
- \
-       root_port = bs->bs_root_port; \
-       if (root_port == NULL) \
-               req->ifbop_root_port = 0; \
-       else \
-               req->ifbop_root_port = root_port->bp_ifp->if_index; \
- \
-       req->ifbop_holdcount = bs->bs_txholdcount; \
-       req->ifbop_priority = bs->bs_bridge_priority; \
-       req->ifbop_protocol = bs->bs_protover; \
-       req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
-       req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
-       req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
-       req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
-       req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
-       req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
+#define BRIDGE_IOCTL_GBPARAM do {                                      \
+       struct bstp_state *bs = &sc->sc_stp;                            \
+       struct bstp_port *root_port;                                    \
                                                                      \
+       req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
+       req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
+       req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
                                                                      \
+       root_port = bs->bs_root_port;                                   \
+       if (root_port == NULL)                                          \
+               req->ifbop_root_port = 0;                               \
+       else                                                            \
+               req->ifbop_root_port = root_port->bp_ifp->if_index;     \
                                                                      \
+       req->ifbop_holdcount = bs->bs_txholdcount;                      \
+       req->ifbop_priority = bs->bs_bridge_priority;                   \
+       req->ifbop_protocol = bs->bs_protover;                          \
+       req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
+       req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
+       req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
+       req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
+       req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
+       req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
 } while (0)
 
 static int
@@ -2580,7 +2656,7 @@ bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
        struct ifbropreq32 *req = arg;
 
        BRIDGE_IOCTL_GBPARAM;
-       
+
        return (0);
 }
 
@@ -2594,7 +2670,6 @@ bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
        return (0);
 }
 
-
 static int
 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
 {
@@ -2604,60 +2679,60 @@ bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
        return (0);
 }
 
-#define BRIDGE_IOCTL_GIFSSTP do { \
-       struct bridge_iflist *bif; \
-       struct bstp_port *bp; \
-       struct ifbpstpreq bpreq; \
-       char *buf, *outbuf; \
-       unsigned int count, buflen, len; \
- \
-       count = 0; \
-       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
-               if ((bif->bif_flags & IFBIF_STP) != 0) \
-                       count++; \
-       } \
- \
-       buflen = sizeof(bpreq) * count; \
-       if (bifstp->ifbpstp_len == 0) { \
-               bifstp->ifbpstp_len = buflen; \
-               return (0); \
-       } \
- \
-       BRIDGE_UNLOCK(sc); \
-       outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO); \
-       BRIDGE_LOCK(sc); \
- \
-       count = 0; \
-       buf = outbuf; \
-       len = min(bifstp->ifbpstp_len, buflen); \
-       bzero(&bpreq, sizeof(bpreq)); \
-       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
-               if (len < sizeof(bpreq)) \
-                       break; \
- \
-               if ((bif->bif_flags & IFBIF_STP) == 0) \
-                       continue; \
- \
-               bp = &bif->bif_stp; \
-               bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
-               bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
-               bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
-               bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
+#define BRIDGE_IOCTL_GIFSSTP do {                                      \
+       struct bridge_iflist *bif;                                      \
+       struct bstp_port *bp;                                           \
+       struct ifbpstpreq bpreq;                                        \
+       char *buf, *outbuf;                                             \
+       unsigned int count, buflen, len;                                \
                                                                      \
+       count = 0;                                                      \
+       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
+               if ((bif->bif_flags & IFBIF_STP) != 0)                  \
+                       count++;                                        \
+       }                                                               \
                                                                      \
+       buflen = sizeof (bpreq) * count;                                \
+       if (bifstp->ifbpstp_len == 0) {                                 \
+               bifstp->ifbpstp_len = buflen;                           \
+               return (0);                                             \
+       }                                                               \
                                                                      \
+       BRIDGE_UNLOCK(sc);                                              \
+       outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO);            \
+       BRIDGE_LOCK(sc);                                                \
                                                                      \
+       count = 0;                                                      \
+       buf = outbuf;                                                   \
+       len = min(bifstp->ifbpstp_len, buflen);                         \
+       bzero(&bpreq, sizeof (bpreq));                                  \
+       TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
+               if (len < sizeof (bpreq))                               \
+                       break;                                          \
                                                                      \
+               if ((bif->bif_flags & IFBIF_STP) == 0)                  \
+                       continue;                                       \
                                                                      \
+               bp = &bif->bif_stp;                                     \
+               bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
+               bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
+               bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
+               bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
                bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
-               bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
- \
-               memcpy(buf, &bpreq, sizeof(bpreq)); \
-               count++; \
-               buf += sizeof(bpreq); \
-               len -= sizeof(bpreq); \
-       } \
- \
-       BRIDGE_UNLOCK(sc); \
-       bifstp->ifbpstp_len = sizeof(bpreq) * count; \
+               bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
                                                                      \
+               memcpy(buf, &bpreq, sizeof (bpreq));                    \
+               count++;                                                \
+               buf += sizeof (bpreq);                                  \
+               len -= sizeof (bpreq);                                  \
+       }                                                               \
                                                                      \
+       BRIDGE_UNLOCK(sc);                                              \
+       bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
        error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
-       BRIDGE_LOCK(sc); \
-       _FREE(outbuf, M_TEMP); \
-       return (error); \
+       BRIDGE_LOCK(sc);                                                \
+       _FREE(outbuf, M_TEMP);                                          \
+       return (error);                                                 \
 } while (0)
 
 static int
@@ -2685,17 +2760,27 @@ bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
 static int
 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbrparam *param = arg;
 
        return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 static int
 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
 {
+#if BRIDGESTP
        struct ifbrparam *param = arg;
 
        return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
+#else /* !BRIDGESTP */
+#pragma unused(sc, arg)
+       return (EOPNOTSUPP);
+#endif /* !BRIDGESTP */
 }
 
 /*
@@ -2710,7 +2795,7 @@ bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp)
        struct bridge_softc *sc = ifp->if_bridge;
 
 #if BRIDGE_DEBUG
-       printf("bridge_ifdetach %s%d\n", ifnet_name(ifp), ifnet_unit(ifp));
+       printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp));
 #endif
 
        /* Check if the interface is a bridge member */
@@ -2755,26 +2840,28 @@ bridge_init(struct ifnet *ifp)
        BRIDGE_LOCK_ASSERT(sc);
 
        if ((ifnet_flags(ifp) & IFF_RUNNING))
-               return 0;
+               return (0);
 
        ts.tv_sec = bridge_rtable_prune_period;
        ts.tv_nsec = 0;
        bsd_timeout(bridge_timer, sc, &ts);
 
        error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
+#if BRIDGESTP
        if (error == 0)
                bstp_init(&sc->sc_stp);         /* Initialize Spanning Tree */
+#endif /* BRIDGESTP */
 
-       return error;
+       return (error);
 }
 
 /*
- * bridge_stop:
+ * bridge_ifstop:
  *
  *     Stop the bridge interface.
  */
 static void
-bridge_stop(struct ifnet *ifp, __unused int disable)
+bridge_ifstop(struct ifnet *ifp, __unused int disable)
 {
        struct bridge_softc *sc = ifp->if_softc;
 
@@ -2784,7 +2871,9 @@ bridge_stop(struct ifnet *ifp, __unused int disable)
                return;
 
        bsd_untimeout(bridge_timer, sc);
+#if BRIDGESTP
        bstp_stop(&sc->sc_stp);
+#endif /* BRIDGESTP */
 
        bridge_rtflush(sc, IFBF_FLUSHDYN);
 
@@ -2797,21 +2886,30 @@ bridge_stop(struct ifnet *ifp, __unused int disable)
  *     Enqueue a packet on a bridge member interface.
  *
  */
-static void
+static int
 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
 {
        int len, error = 0;
        short mflags;
        struct mbuf *m0;
 
-       /* We may be sending a fragment so traverse the mbuf */
+       VERIFY(dst_ifp != NULL);
+
+       /*
+        * We may be sending a fragment so traverse the mbuf
+        *
+        * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
+        */
        for (; m; m = m0) {
+               errno_t _error;
+               struct flowadv adv = { FADV_SUCCESS };
+
                m0 = m->m_nextpkt;
                m->m_nextpkt = NULL;
 
                len = m->m_pkthdr.len;
                mflags = m->m_flags;
-               m->m_flags |= M_PROTO1; //set to avoid loops 
+               m->m_flags |= M_PROTO1; /* set to avoid loops */
 
 #if HAS_IF_CAP
                /*
@@ -2822,32 +2920,45 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
                    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
                        m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
                        if (m == NULL) {
-                               printf("%s%d: unable to prepend VLAN header\n",
-                                   ifnet_name(dst_ifp), ifnet_unit(dst_ifp));
-                               (void) ifnet_stat_increment_out(dst_ifp, 0, 0, 1);
+                               printf("%s: %s%d: unable to prepend VLAN "
+                                   "header\n", __func__, ifnet_name(dst_ifp),
+                                   ifnet_unit(dst_ifp));
+                               (void) ifnet_stat_increment_out(dst_ifp,
+                                   0, 0, 1);
                                continue;
                        }
                        m->m_flags &= ~M_VLANTAG;
                }
 #endif /* HAS_IF_CAP */
 
-               error = ifnet_output_raw(dst_ifp, 0, m);
+               _error = dlil_output(dst_ifp, 0, m, NULL, NULL, 1, &adv);
+
+               /* Preserve existing error value */
                if (error == 0) {
+                       if (_error != 0)
+                               error = _error;
+                       else if (adv.code == FADV_FLOW_CONTROLLED)
+                               error = EQFULL;
+                       else if (adv.code == FADV_SUSPENDED)
+                               error = EQSUSPENDED;
+               }
+
+               if (_error == 0) {
                        (void) ifnet_stat_increment_out(sc->sc_ifp, 1, len, 0);
                } else {
                        (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
                }
        }
 
-       return;
+       return (error);
 }
 
 #if HAS_BRIDGE_DUMMYNET
 /*
  * bridge_dummynet:
  *
- *     Receive a queued packet from dummynet and pass it on to the output
- *     interface.
+ *     Receive a queued packet from dummynet and pass it on to the output
+ *     interface.
  *
  *     The mbuf has the Ethernet header already attached.
  */
@@ -2879,13 +2990,13 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
                        return;
        }
 
-       bridge_enqueue(sc, ifp, m);
+       (void) bridge_enqueue(sc, ifp, m);
 }
 #endif /* HAS_BRIDGE_DUMMYNET */
 
 #if BRIDGE_MEMBER_OUT_FILTER
 /*
- * bridge_output:
+ * bridge_member_output:
  *
  *     Send output from a bridge member interface.  This
  *     performs the bridging function for locally originated
@@ -2895,8 +3006,8 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
  *     enqueue or free the mbuf before returning.
  */
 static int
-bridge_output(struct ifnet *ifp, struct mbuf *m, __unused struct sockaddr *sa,
-    __unused struct rtentry *rt)
+bridge_member_output(struct ifnet *ifp, struct mbuf *m,
+    __unused struct sockaddr *sa, __unused struct rtentry *rt)
 {
        struct ether_header *eh;
        struct ifnet *dst_if;
@@ -2904,10 +3015,11 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, __unused struct sockaddr *sa,
        uint16_t vlan;
 
 #if BRIDGE_DEBUG
-       if (_if_brige_debug)
-               printf("bridge_output ifp %p %s%d\n", ifp, ifnet_name(ifp), ifnet_unit(ifp));
+       if (if_bridge_debug)
+               printf("%s: ifp %p %s%d\n", __func__, ifp, ifnet_name(ifp),
+                   ifnet_unit(ifp));
 #endif /* BRIDGE_DEBUG */
-       
+
        if (m->m_len < ETHER_HDR_LEN) {
                m = m_pullup(m, ETHER_HDR_LEN);
                if (m == NULL)
@@ -2920,7 +3032,8 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, __unused struct sockaddr *sa,
 
        BRIDGE_LOCK(sc);
 
-       /* APPLE MODIFICATION 
+       /*
+        * APPLE MODIFICATION
         * If the packet is an 802.1X ethertype, then only send on the
         * original output interface.
         */
@@ -2928,7 +3041,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, __unused struct sockaddr *sa,
                dst_if = ifp;
                goto sendunicast;
        }
-       
+
        /*
         * If bridge is down, but the original output interface is up,
         * go ahead and send out that interface.  Otherwise, the packet
@@ -2984,12 +3097,13 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, __unused struct sockaddr *sa,
                        } else {
                                mc = m_copypacket(m, M_DONTWAIT);
                                if (mc == NULL) {
-                                       (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+                                       (void) ifnet_stat_increment_out(
+                                           sc->sc_ifp, 0, 0, 1);
                                        continue;
                                }
                        }
 
-                       bridge_enqueue(sc, dst_if, mc);
+                       (void) bridge_enqueue(sc, dst_if, mc);
                }
                if (used == 0)
                        m_freem(m);
@@ -3010,64 +3124,66 @@ sendunicast:
        }
 
        BRIDGE_UNLOCK(sc);
-       bridge_enqueue(sc, dst_if, m);
+       (void) bridge_enqueue(sc, dst_if, m);
        return (0);
 }
 #endif /* BRIDGE_MEMBER_OUT_FILTER */
 
 #if APPLE_BRIDGE_HWCKSUM_SUPPORT
-static struct mbuf* bridge_fix_txcsum( struct mbuf *m )
+static struct mbuf *
+bridge_fix_txcsum(struct mbuf *m)
 {
-       //      basic tests indicate that the vast majority of packets being processed
-       //      here have an Ethernet header mbuf pre-pended to them (the first case below)
-       //      the second highest are those where the Ethernet and IP/TCP/UDP headers are 
-       //      all in one mbuf (second case below)
-       //      the third case has, in fact, never hit for me -- although if I comment out 
-       //      the first two cases, that code works for them, so I consider it a 
-       //      decent general solution
-       
+       /*
+        * basic tests indicate that the vast majority of packets being
+        * processed here have an Ethernet header mbuf pre-pended to them
+        * (the first case below)
+        *
+        * the second highest are those where the Ethernet and IP/TCP/UDP
+        * headers are all in one mbuf (second case below)
+        *
+        * the third case has, in fact, never hit for me -- although if I
+        * comment out the first two cases, that code works for them, so I
+        * consider it a decent general solution
+        */
        int amt = ETHER_HDR_LEN;
-       int hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
-       int off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
-       
-       /* 
+       int hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
+       int off = M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data);
+
+       /*
         * NOTE we should never get vlan-attached packets here;
         * support for those COULD be added, but we don't use them
         * and it really kinda slows things down to worry about them
         */
-       
+
 #ifdef DIAGNOSTIC
-       if ( m_tag_find( m, PACKET_TAG_VLAN, NULL ) != NULL )
-       {
-               printf( "bridge: transmitting packet tagged with VLAN?\n" );
-               KASSERT( 0 );
-               m_freem( m );
-               return NULL;
+       if (m_tag_find(m, PACKET_TAG_VLAN, NULL) != NULL) {
+               printf("%s: transmitting packet tagged with VLAN?\n", __func__);
+               KASSERT(0);
+               m_freem(m);
+               return (NULL);
        }
 #endif
-       
-       if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
-       {
+
+       if (m->m_pkthdr.csum_flags & M_CSUM_IPv4) {
                amt += hlen;
        }
-       if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
-       {
-               amt += off + sizeof( uint16_t );
+       if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4) {
+               amt += off + sizeof (uint16_t);
        }
-       
-       if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
-       {
-               amt += off + sizeof( uint16_t );
+
+       if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) {
+               amt += off + sizeof (uint16_t);
        }
-       
-       if ( m->m_len == ETHER_HDR_LEN )
-       {
-               // this is the case where there's an Ethernet header in an mbuf
-        
-               // the first mbuf is the Ethernet header -- just strip it off and do the checksum
+
+       if (m->m_len == ETHER_HDR_LEN) {
+               /*
+                * this is the case where there's an Ethernet header in an
+                * mbuf the first mbuf is the Ethernet header -- just strip
+                * it off and do the checksum
+                */
+               /* set up m_ip so the cksum operations work */
                struct mbuf *m_ip = m->m_next;
-        
-               // set up m_ip so the cksum operations work
+
                /* APPLE MODIFICATION 22 Apr 2008 <mvega@apple.com>
                 *  <rdar://5817385> Clear the m_tag list before setting
                 *  M_PKTHDR.
@@ -3097,298 +3213,306 @@ static struct mbuf* bridge_fix_txcsum( struct mbuf *m )
                m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
                m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
                m_ip->m_pkthdr.len = m->m_pkthdr.len - ETHER_HDR_LEN;
-        
-               // set up the header mbuf so we can prepend it back on again later
+
+               /*
+                * set up the header mbuf so we can prepend it
+                * back on again later
+                */
                m->m_pkthdr.csum_flags = 0;
                m->m_pkthdr.csum_data = 0;
                m->m_pkthdr.len = ETHER_HDR_LEN;
                m->m_next = NULL;
-        
-        
-               // now do the checksums we need -- first IP
-               if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
-               {
-                       // make sure the IP header (or at least the part with the cksum) is there
-                       m_ip = m_pullup( m_ip, sizeof( struct ip ) );
-                       if ( m_ip == NULL )
-                       {
-                               printf( "bridge: failed to flatten header\n ");
-                               m_freem( m );
-                               return NULL;
+
+               /* now do the checksums we need -- first IP */
+               if (m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4) {
+                       /*
+                        * make sure the IP header (or at least the part with
+                        * the cksum) is there
+                        */
+                       m_ip = m_pullup(m_ip, sizeof (struct ip));
+                       if (m_ip == NULL) {
+                               printf("%s: failed to flatten header\n",
+                                   __func__);
+                               m_freem(m);
+                               return (NULL);
                        }
-                       
-                       // now do the checksum
+
+                       /* now do the checksum */
                        {
-                               struct ip *ip = mtod( m_ip, struct ip* );
-                               ip->ip_sum = in_cksum( m_ip, hlen );
-                
+                               struct ip *ip = mtod(m_ip, struct ip *);
+                               ip->ip_sum = in_cksum(m_ip, hlen);
+
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
-                               printf( "bridge: performed IPv4 checksum\n" );
+                               printf("%s: performed IPv4 checksum\n",
+                                   __func__);
 #endif
                        }
                }
-        
-               // now do a TCP or UDP delayed checksum
-               if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
-               {
-                       in_delayed_cksum( m_ip );
-            
+
+               /* now do a TCP or UDP delayed checksum */
+               if (m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
+                       in_delayed_cksum(m_ip);
+
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
-                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+                       printf("%s: performed TCPv4/UDPv4 checksum\n",
+                           __func__);
 #endif
                }
-        
-               // now attach the ethernet header back onto the IP packet
+
+               /* now attach the ethernet header back onto the IP packet */
                m->m_next = m_ip;
-               m->m_pkthdr.len += m_length( m_ip );    
-        
-               // clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+               m->m_pkthdr.len += m_length(m_ip);
+
+               /*
+                * clear the M_PKTHDR flags on the ip packet (again,
+                * we re-attach later)
+                */
                m_ip->m_flags &= ~M_PKTHDR;
-        
-               // and clear any csum flags
-               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
-       }
-       else if ( m->m_len >= amt )
-       {
-               // everything fits in the first mbuf, so futz with m->m_data, m->m_len and m->m_pkthdr.len to
-               // make it work
+
+               /* and clear any csum flags */
+               m->m_pkthdr.csum_flags &=
+                   ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       } else if (m->m_len >= amt) {
+               /*
+                * everything fits in the first mbuf, so futz with
+                * m->m_data, m->m_len and m->m_pkthdr.len to make it work
+                */
                m->m_len -= ETHER_HDR_LEN;
                m->m_data += ETHER_HDR_LEN;
                m->m_pkthdr.len -= ETHER_HDR_LEN;
-        
-               // now do the checksums we need -- first IP
-               if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
-               {
-                       struct ip *ip = mtod( m, struct ip* );
-                       ip->ip_sum = in_cksum( m, hlen );
-            
+
+               /* now do the checksums we need -- first IP */
+               if (m->m_pkthdr.csum_flags & M_CSUM_IPv4) {
+                       struct ip *ip = mtod(m, struct ip *);
+                       ip->ip_sum = in_cksum(m, hlen);
+
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
-                       printf( "bridge: performed IPv4 checksum\n" );
+                       printf("%s: performed IPv4 checksum\n", __func__);
 #endif
                }
-        
+
                // now do a TCP or UDP delayed checksum
-               if ( m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
-               {
-                       in_delayed_cksum( m );
-            
+               if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
+                       in_delayed_cksum(m);
+
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
-                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+                       printf("%s: performed TCPv4/UDPv4 checksum\n",
+                           __func__);
 #endif
                }
-               
-               // now stick the ethernet header back on
+
+               /* now stick the ethernet header back on */
                m->m_len += ETHER_HDR_LEN;
                m->m_data -= ETHER_HDR_LEN;
                m->m_pkthdr.len += ETHER_HDR_LEN;
-        
-               // and clear any csum flags
-               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
-       }
-       else
-       {
+
+               /* and clear any csum flags */
+               m->m_pkthdr.csum_flags &=
+                   ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+       } else {
                struct mbuf *m_ip;
-        
-               // general case -- need to simply split it off and deal
-        
-               // first, calculate how much needs to be made writable (we may have a read-only mbuf here)
-               hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
+
+               /*
+                * general case -- need to simply split it off and deal
+                * first, calculate how much needs to be made writable
+                * (we may have a read-only mbuf here)
+                */
+               hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
 #if PARANOID
-               off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
-               
-               if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
-               {
+               off = M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data);
+
+               if (m->m_pkthdr.csum_flags & M_CSUM_IPv4) {
                        amt += hlen;
                }
-               
-               if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
-               {
-                       amt += sizeof( struct tcphdr * );
+
+               if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4) {
+                       amt += sizeof (struct tcphdr *);
                        amt += off;
                }
-               
-               if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
-               {
-                       amt += sizeof( struct udphdr * );
+
+               if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) {
+                       amt += sizeof (struct udphdr *);
                        amt += off;
                }
 #endif
-        
-               // now split the ethernet header off of the IP packet (we'll re-attach later)
-               m_ip = m_split( m, ETHER_HDR_LEN, M_NOWAIT );
-               if ( m_ip == NULL )
-               {
-                       printf( "bridge_fix_txcsum: could not split ether header\n" );
-            
-                       m_freem( m );
-                       return NULL;
+
+               /*
+                * now split the ethernet header off of the IP packet
+                * (we'll re-attach later)
+                */
+               m_ip = m_split(m, ETHER_HDR_LEN, M_NOWAIT);
+               if (m_ip == NULL) {
+                       printf("%s: could not split ether header\n", __func__);
+
+                       m_freem(m);
+                       return (NULL);
                }
-        
+
 #if PARANOID
-               // make sure that the IP packet is writable for the portion we need
-               if ( m_makewritable( &m_ip, 0, amt, M_DONTWAIT ) != 0 )
-               {
-                       printf( "bridge_fix_txcsum: could not make %d bytes writable\n", amt );
-            
-                       m_freem( m );
-                       m_freem( m_ip );
-                       return NULL;
+               /*
+                * make sure that the IP packet is writable
+                * for the portion we need
+                */
+               if (m_makewritable(&m_ip, 0, amt, M_DONTWAIT) != 0) {
+                       printf("%s: could not make %d bytes writable\n",
+                           __func__, amt);
+
+                       m_freem(m);
+                       m_freem(m_ip);
+                       return (NULL);
                }
 #endif
-               
+
                m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
                m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
-        
+
                m->m_pkthdr.csum_flags = 0;
                m->m_pkthdr.csum_data = 0;
-        
-               // now do the checksums we need -- first IP
-               if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
-               {
-                       // make sure the IP header (or at least the part with the cksum) is there
-                       m_ip = m_pullup( m_ip, sizeof( struct ip ) );
-                       if ( m_ip == NULL )
-                       {
-                               printf( "bridge: failed to flatten header\n ");
-                               m_freem( m );
-                               return NULL;
+
+               /* now do the checksums we need -- first IP */
+               if (m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4) {
+                       /*
+                        * make sure the IP header (or at least the part
+                        * with the cksum) is there
+                        */
+                       m_ip = m_pullup(m_ip, sizeof (struct ip));
+                       if (m_ip == NULL) {
+                               printf("%s: failed to flatten header\n",
+                                   __func__);
+                               m_freem(m);
+                               return (NULL);
                        }
-                       
-                       // now do the checksum
+
+                       /* now do the checksum */
                        {
-                               struct ip *ip = mtod( m_ip, struct ip* );
-                               ip->ip_sum = in_cksum( m_ip, hlen );
-                
+                               struct ip *ip = mtod(m_ip, struct ip *);
+                               ip->ip_sum = in_cksum(m_ip, hlen);
+
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
-                               printf( "bridge: performed IPv4 checksum\n" );
+                               printf("%s: performed IPv4 checksum\n",
+                                   __func__);
 #endif
                        }
                }
-        
-               // now do a TCP or UDP delayed checksum
-               if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
-               {
-                       in_delayed_cksum( m_ip );
-            
+
+               /* now do a TCP or UDP delayed checksum */
+               if (m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
+                       in_delayed_cksum(m_ip);
+
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
-                       printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+                       printf("%s: performed TCPv4/UDPv4 checksum\n",
+                           __func__);
 #endif
                }
-        
+
                // now attach the ethernet header back onto the IP packet
                m->m_next = m_ip;
-               m->m_pkthdr.len += m_length( m_ip );    
-        
-               // clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+               m->m_pkthdr.len += m_length(m_ip);
+
+               /*
+                * clear the M_PKTHDR flags on the ip packet
+                * (again, we re-attach later)
+                */
                m_ip->m_flags &= ~M_PKTHDR;
-        
-               // and clear any csum flags
-               m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+
+               /* and clear any csum flags */
+               m->m_pkthdr.csum_flags &=
+                   ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
        }
-       
-       return m;
+
+       return (m);
 }
 #endif
 
 /*
- * bridge_start:
- *
- *     Start output on a bridge.
+ * Output callback.
  *
+ * This routine is called externally from above only when if_bridge_txstart
+ * is disabled; otherwise it is called internally by bridge_start().
  */
-static errno_t
-bridge_start(struct ifnet *ifp, struct mbuf *m)
+static int
+bridge_output(struct ifnet *ifp, struct mbuf *m)
 {
        struct bridge_softc *sc = ifnet_softc(ifp);
        struct ether_header *eh;
        struct ifnet *dst_if;
-       
-       lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+       int error = 0;
 
        eh = mtod(m, struct ether_header *);
-       
+       dst_if = NULL;
+
        BRIDGE_LOCK(sc);
-       
-       if ((m->m_flags & (M_BCAST|M_MCAST)) == 0 &&
-               (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0)) != NULL) {
-               
-               {
+       if (!(m->m_flags & (M_BCAST|M_MCAST))) {
+               dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
+       }
+
 #if APPLE_BRIDGE_HWCKSUM_SUPPORT
-                       /* 
-                        * APPLE MODIFICATION - if the packet needs a checksum (i.e., 
-                        * checksum has been deferred for HW support) AND the destination
-                        * interface doesn't support HW checksums, then we 
-                        * need to fix-up the checksum here
-                        */
-                       if (
-                               ( (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4) ) != 0 ) &&
-                               ( (dst_if->if_csum_flags_tx & m->m_pkthdr.csum_flags ) != m->m_pkthdr.csum_flags )
-                               )
-                       {
-                               m = bridge_fix_txcsum( m );
-                               if ( m == NULL )
-                               {
-                                       goto done;
-                               }
-                       }
-                       
-#else
-                       if (eh->ether_type == htons(ETHERTYPE_IP))
-                               mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
-                       else
-                               m->m_pkthdr.csum_flags = 0;
-#endif
-                       #if NBPFILTER > 0
-                               if (sc->sc_bpf_output)
-                                       bridge_bpf_output(ifp, m);
-                       #endif
+       /*
+        * APPLE MODIFICATION - if the packet needs a checksum
+        * (i.e., checksum has been deferred for HW support)
+        * AND the destination interface doesn't support HW
+        * checksums, then we need to fix-up the checksum here
+        */
+       if ((m->m_pkthdr.csum_flags &
+           (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4)) &&
+           (dst_if == NULL ||
+           (dst_if->if_csum_flags_tx & m->m_pkthdr.csum_flags) !=
+           m->m_pkthdr.csum_flags)) {
+               m = bridge_fix_txcsum(m);
+               if (m == NULL) {
                        BRIDGE_UNLOCK(sc);
-                       bridge_enqueue(sc, dst_if, m);
-               }
-       } else
-       {
-#if APPLE_BRIDGE_HWCKSUM_SUPPORT
-               
-               /* 
-                * APPLE MODIFICATION - if the MULTICAST packet needs a checksum (i.e., 
-                * checksum has been deferred for HW support) AND at least one destination
-                * interface doesn't support HW checksums, then we go ahead and fix it up
-                * here, since it doesn't make sense to do it more than once
-                */
-               
-               if (
-                       (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4)) &&
-                       /*
-                        * XXX FIX ME: keep track of whether or not we have any interfaces that 
-                        * do not support checksums (for now, assume we do)
-                        */
-                       ( 1 )
-                       )
-               {
-                       m = bridge_fix_txcsum( m );
-                       if ( m == NULL )
-                       {
-                               goto done;
-                       }
+                       return (0);
                }
+       }
 #else
-               if (eh->ether_type == htons(ETHERTYPE_IP))
-                       mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
-               else
-                       m->m_pkthdr.csum_flags = 0;
+       if (eh->ether_type == htons(ETHERTYPE_IP))
+               mbuf_outbound_finalize(m, PF_INET, sizeof (*eh));
+       else
+               m->m_pkthdr.csum_flags = 0;
+#endif /* APPLE_BRIDGE_HWCKSUM_SUPPORT */
+
+       atomic_add_64(&ifp->if_obytes, m->m_pkthdr.len);
+       atomic_add_64(&ifp->if_opackets, 1);
+
+#if NBPFILTER > 0
+       if (sc->sc_bpf_output)
+               bridge_bpf_output(ifp, m);
 #endif
-               
-               #if NBPFILTER > 0
-                       if (sc->sc_bpf_output)
-                               bridge_bpf_output(ifp, m);
-               #endif
+
+       if (dst_if == NULL) {
+               /* callee will unlock */
                bridge_broadcast(sc, ifp, m, 0);
+       } else {
+               BRIDGE_UNLOCK(sc);
+               error = bridge_enqueue(sc, dst_if, m);
        }
-#if APPLE_BRIDGE_HWCKSUM_SUPPORT
-done:
-#endif
 
-       return 0;
+       return (error);
+}
+
+/*
+ * bridge_start:
+ *
+ *     Start output on a bridge.
+ *
+ * This routine is invoked by the start worker thread; because we never call
+ * it directly, there is no need do deploy any serialization mechanism other
+ * than what's already used by the worker thread, i.e. this is already single
+ * threaded.
+ *
+ * This routine is called only when if_bridge_txstart is enabled.
+ */
+static void
+bridge_start(struct ifnet *ifp)
+{
+       struct mbuf *m;
+
+       for (;;) {
+               if (ifnet_dequeue(ifp, &m) != 0)
+                       break;
+
+               (void) bridge_output(ifp, m);
+       }
 }
 
 /*
@@ -3412,10 +3536,11 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
        lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
 
 #if BRIDGE_DEBUG
-       if (_if_brige_debug)
-        printf("bridge_forward %s%d m%p\n", ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp), m);
+       if (if_bridge_debug)
+               printf("%s: %s%d m%p\n", __func__, ifnet_name(sc->sc_ifp),
+                   ifnet_unit(sc->sc_ifp), m);
 #endif /* BRIDGE_DEBUG */
-       
+
        src_if = m->m_pkthdr.rcvif;
        ifp = sc->sc_ifp;
 
@@ -3488,10 +3613,10 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
         */
 #if NBPFILTER > 0
        if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
-               dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
-        m->m_pkthdr.rcvif = ifp;
+           dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
+               m->m_pkthdr.rcvif = ifp;
                if (sc->sc_bpf_input)
-            bridge_bpf_input(ifp, m);
+                       bridge_bpf_input(ifp, m);
        }
 #endif /* NBPFILTER */
 
@@ -3512,13 +3637,13 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
 #endif /* PFIL_HOOKS */
 
        if (dst_if == NULL) {
-        /*
-         * Clear any in-bound checksum flags for this packet.
-         */
+               /*
+                * Clear any in-bound checksum flags for this packet.
+                */
                mbuf_inbound_modified(m);
-        
+
                bridge_broadcast(sc, src_if, m, 1);
-               
+
                return;
        }
 
@@ -3573,7 +3698,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
         */
        mbuf_inbound_modified(m);
 
-       bridge_enqueue(sc, dst_if, m);
+       (void) bridge_enqueue(sc, dst_if, m);
        return;
 
 drop:
@@ -3583,15 +3708,15 @@ drop:
 
 #if BRIDGE_DEBUG
 
-char * ether_ntop(char *, size_t , const u_char *);
+char *ether_ntop(char *, size_t, const u_char *);
 
 __private_extern__ char *
 ether_ntop(char *buf, size_t len, const u_char *ap)
 {
-       snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x", 
-                        ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
-       
-       return buf;
+       snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
+           ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
+
+       return (buf);
 }
 
 #endif /* BRIDGE_DEBUG */
@@ -3614,22 +3739,22 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
        int error;
 
 #if BRIDGE_DEBUG
-       if (_if_brige_debug)
-               printf("bridge_input: %s%d from %s%d m %p data %p\n", 
-                          ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
-                          ifnet_name(ifp), ifnet_unit(ifp), 
-                          m, mbuf_data(m));
+       if (if_bridge_debug)
+               printf("%s: %s%d from %s%d m %p data %p\n", __func__,
+                   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+                   ifnet_name(ifp), ifnet_unit(ifp), m, mbuf_data(m));
 #endif /* BRIDGE_DEBUG */
 
        if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
 #if BRIDGE_DEBUG
-               if (_if_brige_debug)
-                       printf( "bridge_input: %s%d not running passing along\n",
-                                  ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+               if (if_bridge_debug)
+                       printf("%s: %s%d not running passing along\n",
+                           __func__, ifnet_name(sc->sc_ifp),
+                           ifnet_unit(sc->sc_ifp));
 #endif /* BRIDGE_DEBUG */
-               return 0;
+               return (0);
        }
-       
+
        bifp = sc->sc_ifp;
        vlan = VLANTAGOF(m);
 
@@ -3645,27 +3770,28 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
                BRIDGE_BPF_MTAP_INPUT(sc, m);
                (void) ifnet_stat_increment_in(bifp, 1, m->m_pkthdr.len, 0);
                m_freem(m);
-               return EJUSTRETURN;
+               return (EJUSTRETURN);
        }
 #endif /* IFF_MONITOR */
 
-       /* 
-        * Need to clear the promiscous flags otherwise it will be 
-        * dropped by DLIL after processing filters 
+       /*
+        * Need to clear the promiscous flags otherwise it will be
+        * dropped by DLIL after processing filters
         */
        if ((mbuf_flags(m) & MBUF_PROMISC))
                mbuf_setflags_mask(m, 0, MBUF_PROMISC);
-       
+
        BRIDGE_LOCK(sc);
        bif = bridge_lookup_member_if(sc, ifp);
        if (bif == NULL) {
                BRIDGE_UNLOCK(sc);
 #if BRIDGE_DEBUG
-               if (_if_brige_debug)
-                       printf( "bridge_input: %s%d bridge_lookup_member_if failed\n",
-                                  ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+               if (if_bridge_debug)
+                       printf("%s: %s%d bridge_lookup_member_if failed\n",
+                           __func__, ifnet_name(sc->sc_ifp),
+                           ifnet_unit(sc->sc_ifp));
 #endif /* BRIDGE_DEBUG */
-               return 0;
+               return (0);
        }
 
        eh = mtod(m, struct ether_header *);
@@ -3675,28 +3801,35 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
        if (m->m_flags & (M_BCAST|M_MCAST)) {
 
 #if BRIDGE_DEBUG
-               if (_if_brige_debug)
+               if (if_bridge_debug)
                        if ((m->m_flags & M_MCAST))
-                               printf("mulicast: %02x:%02x:%02x:%02x:%02x:%02x\n",
-                                  eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
-                                  eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5]);
-               
+                               printf("%s: mulicast: "
+                                   "%02x:%02x:%02x:%02x:%02x:%02x\n",
+                                   __func__,
+                                   eh->ether_dhost[0], eh->ether_dhost[1],
+                                   eh->ether_dhost[2], eh->ether_dhost[3],
+                                   eh->ether_dhost[4], eh->ether_dhost[5]);
 #endif /* BRIDGE_DEBUG */
 
                /* Tap off 802.1D packets; they do not get forwarded. */
                if (memcmp(eh->ether_dhost, bstp_etheraddr,
                    ETHER_ADDR_LEN) == 0) {
+#if BRIDGESTP
                        m = bstp_input(&bif->bif_stp, ifp, m);
+#else /* !BRIDGESTP */
+                       m_freem(m);
+                       m = NULL;
+#endif /* !BRIDGESTP */
                        if (m == NULL) {
                                BRIDGE_UNLOCK(sc);
-                               return EJUSTRETURN;
+                               return (EJUSTRETURN);
                        }
                }
 
                if ((bif->bif_flags & IFBIF_STP) &&
                    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
                        BRIDGE_UNLOCK(sc);
-                       return 0;
+                       return (0);
                }
 
                /*
@@ -3707,16 +3840,16 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
                mc = m_dup(m, M_DONTWAIT);
                if (mc == NULL) {
                        BRIDGE_UNLOCK(sc);
-                       return 0;
+                       return (0);
                }
 
-               /* 
-                * Perform the bridge forwarding function with the copy. 
+               /*
+                * Perform the bridge forwarding function with the copy.
                 *
                 * Note that bridge_forward calls BRIDGE_UNLOCK
                 */
                bridge_forward(sc, bif, mc);
-               
+
                /*
                 * Reinject the mbuf as arriving on the bridge so we have a
                 * chance at claiming multicast packets. We can not loop back
@@ -3735,33 +3868,37 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
                        // mark packet as arriving on the bridge
                        mc2->m_pkthdr.rcvif = bifp;
                        mc2->m_pkthdr.header = mbuf_data(mc2);
-                       
+
 #if NBPFILTER > 0
                        if (sc->sc_bpf_input)
                                bridge_bpf_input(bifp, mc2);
 #endif /* NBPFILTER */
-                       (void) mbuf_setdata(mc2, (char *)mbuf_data(mc2) + ETHER_HDR_LEN, mbuf_len(mc2) - ETHER_HDR_LEN);
+                       (void) mbuf_setdata(mc2,
+                           (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
+                           mbuf_len(mc2) - ETHER_HDR_LEN);
                        (void) mbuf_pkthdr_adjustlen(mc2, - ETHER_HDR_LEN);
-                       
-                       (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(mc2), 0);
-                                               
+
+                       (void) ifnet_stat_increment_in(bifp, 1,
+                           mbuf_pkthdr_len(mc2), 0);
+
 #if BRIDGE_DEBUG
-                       if (_if_brige_debug)
-                               printf( "bridge_input: %s%d mcast for us\n",
-                                          ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+                       if (if_bridge_debug)
+                               printf("%s: %s%d mcast for us\n", __func__,
+                                   ifnet_name(sc->sc_ifp),
+                                   ifnet_unit(sc->sc_ifp));
 #endif /* BRIDGE_DEBUG */
-                       
+
                        dlil_input_packet_list(bifp, mc2);
                }
 
                /* Return the original packet for local processing. */
-               return 0;
+               return (0);
        }
 
        if ((bif->bif_flags & IFBIF_STP) &&
            bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
                BRIDGE_UNLOCK(sc);
-               return 0;
+               return (0);
        }
 
 #ifdef DEV_CARP
@@ -3784,52 +3921,49 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
 #endif
 
 #if defined(PFIL_HOOKS)
-#define PFIL_PHYS(sc, ifp, m) do { \
-               if (pfil_local_phys &&                          \
-                       (PFIL_HOOKED(&inet_pfil_hook)           \
-                        OR_PFIL_HOOKED_INET6)) {                       \
-                       if (bridge_pfil(&m, NULL, ifp,          \
-                               PFIL_IN) != 0 || m == NULL) {   \
-                               BRIDGE_UNLOCK(sc);              \
-                               return (NULL);                  \
-                       }                                       \
-               } \
-       } while (0)
+#define        PFIL_PHYS(sc, ifp, m) do {                                      \
+       if (pfil_local_phys &&                                          \
+       (PFIL_HOOKED(&inet_pfil_hook) OR_PFIL_HOOKED_INET6)) {          \
+               if (bridge_pfil(&m, NULL, ifp,                          \
+                   PFIL_IN) != 0 || m == NULL) {                       \
+                       BRIDGE_UNLOCK(sc);                              \
+                       return (NULL);                                  \
+               }                                                       \
+       }                                                               \
+} while (0)
 #else /* PFIL_HOOKS */
 #define        PFIL_PHYS(sc, ifp, m)
 #endif /* PFIL_HOOKS */
 
-#define GRAB_OUR_PACKETS(iface) \
-       if ((iface)->if_type == IFT_GIF) \
-               continue; \
-       /* It is destined for us. */ \
-       if (memcmp(ifnet_lladdr((iface)), eh->ether_dhost,  ETHER_ADDR_LEN) == 0 \
-           OR_CARP_CHECK_WE_ARE_DST((iface))                           \
-           ) {                                                         \
+#define        GRAB_OUR_PACKETS(iface)                                         \
+       if ((iface)->if_type == IFT_GIF)                                \
+               continue;                                               \
+       /* It is destined for us. */                                    \
+       if (memcmp(ifnet_lladdr((iface)), eh->ether_dhost,              \
+           ETHER_ADDR_LEN) == 0 OR_CARP_CHECK_WE_ARE_DST((iface))) {   \
                if ((iface)->if_type == IFT_BRIDGE) {                   \
                        BRIDGE_BPF_MTAP_INPUT(sc, m);                   \
                        /* Filter on the physical interface. */         \
-                       PFIL_PHYS(sc, iface, m);                                                \
+                       PFIL_PHYS(sc, iface, m);                        \
                }                                                       \
                if (bif->bif_flags & IFBIF_LEARNING) {                  \
                        error = bridge_rtupdate(sc, eh->ether_shost,    \
                            vlan, bif, 0, IFBAF_DYNAMIC);               \
                        if (error && bif->bif_addrmax) {                \
                                BRIDGE_UNLOCK(sc);                      \
-                               return EJUSTRETURN;                             \
+                               return (EJUSTRETURN);                   \
                        }                                               \
                }                                                       \
                m->m_pkthdr.rcvif = iface;                              \
                BRIDGE_UNLOCK(sc);                                      \
-               return 0;                                               \
+               return (0);                                             \
        }                                                               \
                                                                        \
        /* We just received a packet that we sent out. */               \
-       if (memcmp(ifnet_lladdr((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \
-           OR_CARP_CHECK_WE_ARE_SRC((iface))                   \
-           ) {                                                         \
+       if (memcmp(ifnet_lladdr((iface)), eh->ether_shost,              \
+           ETHER_ADDR_LEN) == 0 OR_CARP_CHECK_WE_ARE_SRC((iface))) {   \
                BRIDGE_UNLOCK(sc);                                      \
-               return EJUSTRETURN;                                             \
+               return (EJUSTRETURN);                                   \
        }
 
        /*
@@ -3841,68 +3975,68 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
         * local processing.
         */
        if (memcmp(eh->ether_dhost, ifnet_lladdr(bifp),
-                          ETHER_ADDR_LEN) == 0
-               OR_CARP_CHECK_WE_ARE_DST(bifp)) {
-               
+           ETHER_ADDR_LEN) == 0 OR_CARP_CHECK_WE_ARE_DST(bifp)) {
+
                /* Mark the packet as arriving on the bridge interface */
                (void) mbuf_pkthdr_setrcvif(m, bifp);
                mbuf_pkthdr_setheader(m, frame_header);
-               
+
                /*
                 * If the interface is learning, and the source
                 * address is valid and not multicast, record
                 * the address.
                 */
                if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
-                       ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
-                       (eh->ether_shost[0] | eh->ether_shost[1] |
-                        eh->ether_shost[2] | eh->ether_shost[3] |
-                        eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
-                               (void) bridge_rtupdate(sc, eh->ether_shost,
-                                                                          vlan, bif, 0, IFBAF_DYNAMIC);
-                       }
-               
+                   ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+                   (eh->ether_shost[0] | eh->ether_shost[1] |
+                   eh->ether_shost[2] | eh->ether_shost[3] |
+                   eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+                       (void) bridge_rtupdate(sc, eh->ether_shost,
+                           vlan, bif, 0, IFBAF_DYNAMIC);
+               }
+
                BRIDGE_BPF_MTAP_INPUT(sc, m);
 
-               (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+               (void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
+                   mbuf_len(m) - ETHER_HDR_LEN);
                (void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
-               
+
                (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
 
                BRIDGE_UNLOCK(sc);
-                               
+
 #if BRIDGE_DEBUG
-               if (_if_brige_debug)
-                       printf( "bridge_input: %s%d packet for bridge\n",
-                                  ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+               if (if_bridge_debug)
+                       printf("%s: %s%d packet for bridge\n", __func__,
+                           ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
 #endif /* BRIDGE_DEBUG */
-               
+
                dlil_input_packet_list(bifp, m);
-               
-               return EJUSTRETURN;
+
+               return (EJUSTRETURN);
        }
 
        /*
-        * if the destination of the packet is for the MAC address of 
+        * if the destination of the packet is for the MAC address of
         * the member interface itself, then we don't need to forward
         * it -- just pass it back.  Note that it'll likely just be
-        * dropped by the stack, but if something else is bound to 
+        * dropped by the stack, but if something else is bound to
         * the interface directly (for example, the wireless stats
-        * protocol -- although that actually uses BPF right now), 
+        * protocol -- although that actually uses BPF right now),
         * then it will consume the packet
         *
-        * ALSO, note that we do this check AFTER checking for the 
+        * ALSO, note that we do this check AFTER checking for the
         * bridge's own MAC address, because the bridge may be
         * using the SAME MAC address as one of its interfaces
         */
-       if (memcmp(eh->ether_dhost, ifnet_lladdr(ifp),
-                          ETHER_ADDR_LEN) == 0) {
-                       
+       if (memcmp(eh->ether_dhost, ifnet_lladdr(ifp), ETHER_ADDR_LEN) == 0) {
+
 #ifdef VERY_VERY_VERY_DIAGNOSTIC
-                       printf("bridge_input: not forwarding packet bound for member interface\n" );
+                       printf("%s: not forwarding packet bound for member "
+                           "interface\n", __func__);
 #endif
                        BRIDGE_UNLOCK(sc);
-                       return 0;
+                       return (0);
        }
 
        /* Now check the all bridge members. */
@@ -3915,14 +4049,14 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
 #undef OR_PFIL_HOOKED_INET6
 #undef GRAB_OUR_PACKETS
 
-       /* 
-        * Perform the bridge forwarding function. 
+       /*
+        * Perform the bridge forwarding function.
         *
         * Note that bridge_forward calls BRIDGE_UNLOCK
         */
        bridge_forward(sc, bif, m);
 
-       return EJUSTRETURN;
+       return (EJUSTRETURN);
 }
 
 /*
@@ -3994,7 +4128,8 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                } else {
                        mc = m_dup(m, M_DONTWAIT);
                        if (mc == NULL) {
-                               (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+                               (void) ifnet_stat_increment_out(sc->sc_ifp,
+                                   0, 0, 1);
                                continue;
                        }
                }
@@ -4015,7 +4150,8 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                                int i = min(mc->m_pkthdr.len, max_protohdr);
                                mc = m_copyup(mc, i, ETHER_ALIGN);
                                if (mc == NULL) {
-                                       (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+                                       (void) ifnet_stat_increment_out(
+                                           sc->sc_ifp, 0, 0, 1);
                                        continue;
                                }
                        }
@@ -4026,7 +4162,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                }
 #endif /* PFIL_HOOKS */
 
-               bridge_enqueue(sc, dst_if, mc);
+               (void) bridge_enqueue(sc, dst_if, mc);
        }
        if (used == 0)
                m_freem(m);
@@ -4066,7 +4202,7 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m)
                        continue;
                }
 
-               bridge_enqueue(sc, dst_if, mc);
+               (void) bridge_enqueue(sc, dst_if, mc);
        }
 }
 
@@ -4089,7 +4225,7 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
        /* Check the source address is valid and not multicast. */
        if (ETHER_IS_MULTICAST(dst) ||
            (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
-            dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
+           dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
                return (EINVAL);
 
 
@@ -4147,14 +4283,14 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
 
        if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
                struct timespec now;
-       
+
                nanouptime(&now);
                brt->brt_expire = now.tv_sec + sc->sc_brttimeout;
        }
        if (setflags)
                brt->brt_flags = flags;
 
-       
+
        return (0);
 }
 
@@ -4226,7 +4362,7 @@ bridge_timer(void *arg)
 
        if (sc->sc_ifp->if_flags & IFF_RUNNING) {
                struct timespec ts;
-       
+
                ts.tv_sec = bridge_rtable_prune_period;
                ts.tv_nsec = 0;
                bsd_timeout(bridge_timer, sc, &ts);
@@ -4248,7 +4384,7 @@ bridge_rtage(struct bridge_softc *sc)
        LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
                if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
                        struct timespec now;
-       
+
                        nanouptime(&now);
                        if ((unsigned long)now.tv_sec >= brt->brt_expire)
                                bridge_rtnode_destroy(sc, brt);
@@ -4313,7 +4449,7 @@ bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
 
        LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
                if (brt->brt_ifp == ifp && (full ||
-                           (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
+                   (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
                        bridge_rtnode_destroy(sc, brt);
        }
 }
@@ -4328,7 +4464,7 @@ bridge_rtable_init(struct bridge_softc *sc)
 {
        int i;
 
-       sc->sc_rthash = _MALLOC(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
+       sc->sc_rthash = _MALLOC(sizeof (*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
            M_DEVBUF, M_NOWAIT);
        if (sc->sc_rthash == NULL)
                return (ENOMEM);
@@ -4412,7 +4548,8 @@ bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
  *     vlan id or if zero then just return the first match.
  */
 static struct bridge_rtnode *
-bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
+bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
+    uint16_t vlan)
 {
        struct bridge_rtnode *brt;
        uint32_t hash;
@@ -4499,6 +4636,7 @@ bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
        zfree(bridge_rtnode_pool, brt);
 }
 
+#if BRIDGESTP
 /*
  * bridge_rtable_expire:
  *
@@ -4516,18 +4654,19 @@ bridge_rtable_expire(struct ifnet *ifp, int age)
         * If the age is zero then flush, otherwise set all the expiry times to
         * age for the interface
         */
-       if (age == 0)
+       if (age == 0) {
                bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
-       else {
+       else {
                LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
                        struct timespec now;
-       
+
                        nanouptime(&now);
                        /* Cap the expiry time to 'age' */
                        if (brt->brt_ifp == ifp &&
                            brt->brt_expire > (unsigned long)now.tv_sec + age &&
                            (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
-                               brt->brt_expire = (unsigned long)now.tv_sec + age;
+                               brt->brt_expire =
+                                   (unsigned long)now.tv_sec + age;
                }
        }
        BRIDGE_UNLOCK(sc);
@@ -4553,10 +4692,10 @@ bridge_state_change(struct ifnet *ifp, int state)
 
        if (log_stp)
                log(LOG_NOTICE, "%s%d: state changed to %s on %s%d\n",
-                       ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
-                   stpstates[state], 
-                   ifnet_name(ifp), ifnet_unit(ifp));
+                   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+                   stpstates[state], ifnet_name(ifp), ifnet_unit(ifp));
 }
+#endif /* BRIDGESTP */
 
 #ifdef PFIL_HOOKS
 /*
@@ -4588,11 +4727,11 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
 
        i = min((*mp)->m_pkthdr.len, max_protohdr);
        if ((*mp)->m_len < i) {
-           *mp = m_pullup(*mp, i);
-           if (*mp == NULL) {
-               printf("%s: m_pullup failed\n", __func__);
-               return (-1);
-           }
+               *mp = m_pullup(*mp, i);
+               if (*mp == NULL) {
+                       printf("%s: m_pullup failed\n", __func__);
+                       return (-1);
+               }
        }
 
        eh1 = mtod(*mp, struct ether_header *);
@@ -4645,13 +4784,13 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
        }
 
        /* Strip off the Ethernet header and keep a copy. */
-       m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
+       m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
        m_adj(*mp, ETHER_HDR_LEN);
 
        /* Strip off snap header, if present */
        if (snap) {
-               m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
-               m_adj(*mp, sizeof(struct llc));
+               m_copydata(*mp, 0, sizeof (struct llc), (caddr_t)&llc1);
+               m_adj(*mp, sizeof (struct llc));
        }
 
        /*
@@ -4704,7 +4843,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
                         * packet will return to us via bridge_dummynet().
                         */
                        args.oif = ifp;
-                       ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args);
+                       ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args, DN_CLIENT_IPFW);
                        return (error);
                }
 
@@ -4738,21 +4877,21 @@ ipfwpass:
                 */
                if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
                        error = pfil_run_hooks(&inet_pfil_hook, mp, bifp,
-                                       dir, NULL);
+                           dir, NULL);
 
                if (*mp == NULL || error != 0) /* filter may consume */
                        break;
 
                if (pfil_member && ifp != NULL)
                        error = pfil_run_hooks(&inet_pfil_hook, mp, ifp,
-                                       dir, NULL);
+                           dir, NULL);
 
                if (*mp == NULL || error != 0) /* filter may consume */
                        break;
 
                if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
                        error = pfil_run_hooks(&inet_pfil_hook, mp, bifp,
-                                       dir, NULL);
+                           dir, NULL);
 
                if (*mp == NULL || error != 0) /* filter may consume */
                        break;
@@ -4762,7 +4901,7 @@ ipfwpass:
                        i = (*mp)->m_pkthdr.len;
                        if (i > ifp->if_mtu) {
                                error = bridge_fragment(ifp, *mp, &eh2, snap,
-                                           &llc1);
+                                   &llc1);
                                return (error);
                        }
                }
@@ -4770,7 +4909,7 @@ ipfwpass:
                /* Recalculate the ip checksum and restore byte ordering */
                ip = mtod(*mp, struct ip *);
                hlen = ip->ip_hl << 2;
-               if (hlen < sizeof(struct ip))
+               if (hlen < sizeof (struct ip))
                        goto bad;
                if (hlen > (*mp)->m_len) {
                        if ((*mp = m_pullup(*mp, hlen)) == 0)
@@ -4782,7 +4921,7 @@ ipfwpass:
                ip->ip_len = htons(ip->ip_len);
                ip->ip_off = htons(ip->ip_off);
                ip->ip_sum = 0;
-               if (hlen == sizeof(struct ip))
+               if (hlen == sizeof (struct ip))
                        ip->ip_sum = in_cksum_hdr(ip);
                else
                        ip->ip_sum = in_cksum(*mp, hlen);
@@ -4792,21 +4931,21 @@ ipfwpass:
        case ETHERTYPE_IPV6:
                if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
                        error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
-                                       dir, NULL);
+                           dir, NULL);
 
                if (*mp == NULL || error != 0) /* filter may consume */
                        break;
 
                if (pfil_member && ifp != NULL)
                        error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
-                                       dir, NULL);
+                           dir, NULL);
 
                if (*mp == NULL || error != 0) /* filter may consume */
                        break;
 
                if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
                        error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
-                                       dir, NULL);
+                           dir, NULL);
                break;
 #endif
        default:
@@ -4825,10 +4964,10 @@ ipfwpass:
         * Finally, put everything back the way it was and return
         */
        if (snap) {
-               M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+               M_PREPEND(*mp, sizeof (struct llc), M_DONTWAIT);
                if (*mp == NULL)
                        return (error);
-               bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
+               bcopy(&llc1, mtod(*mp, caddr_t), sizeof (struct llc));
        }
 
        M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
@@ -4869,8 +5008,9 @@ bridge_ip_checkbasic(struct mbuf **mp)
                return (-1);
 
        if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
-               if ((m = m_copyup(m, sizeof(struct ip),
-                       (max_linkhdr + 3) & ~3)) == NULL) {
+               /* max_linkhdr is already rounded up to nearest 4-byte */
+               if ((m = m_copyup(m, sizeof (struct ip),
+                   max_linkhdr)) == NULL) {
                        /* XXXJRT new stat, please */
                        ipstat.ips_toosmall++;
                        goto bad;
@@ -4889,7 +5029,7 @@ bridge_ip_checkbasic(struct mbuf **mp)
                goto bad;
        }
        hlen = ip->ip_hl << 2;
-       if (hlen < sizeof(struct ip)) { /* minimum header length */
+       if (hlen < sizeof (struct ip)) { /* minimum header length */
                ipstat.ips_badhlen++;
                goto bad;
        }
@@ -4905,7 +5045,7 @@ bridge_ip_checkbasic(struct mbuf **mp)
        if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
                sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
        } else {
-               if (hlen == sizeof(struct ip)) {
+               if (hlen == sizeof (struct ip)) {
                        sum = in_cksum_hdr(ip);
                } else {
                        sum = in_cksum(m, hlen);
@@ -4966,16 +5106,17 @@ bridge_ip6_checkbasic(struct mbuf **mp)
         */
        if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
                struct ifnet *inifp = m->m_pkthdr.rcvif;
-               if ((m = m_copyup(m, sizeof(struct ip6_hdr),
-                           (max_linkhdr + 3) & ~3)) == NULL) {
+               /* max_linkhdr is already rounded up to nearest 4-byte */
+               if ((m = m_copyup(m, sizeof (struct ip6_hdr),
+                   max_linkhdr)) == NULL) {
                        /* XXXJRT new stat, please */
                        ip6stat.ip6s_toosmall++;
                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
                        goto bad;
                }
-       } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
+       } else if (__predict_false(m->m_len < sizeof (struct ip6_hdr))) {
                struct ifnet *inifp = m->m_pkthdr.rcvif;
-               if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
+               if ((m = m_pullup(m, sizeof (struct ip6_hdr))) == NULL) {
                        ip6stat.ip6s_toosmall++;
                        in6_ifstat_inc(inifp, ifs6_in_hdrerr);
                        goto bad;
@@ -5013,13 +5154,13 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
        struct ip *ip;
        int error = -1;
 
-       if (m->m_len < sizeof(struct ip) &&
-           (m = m_pullup(m, sizeof(struct ip))) == NULL)
+       if (m->m_len < sizeof (struct ip) &&
+           (m = m_pullup(m, sizeof (struct ip))) == NULL)
                goto out;
        ip = mtod(m, struct ip *);
 
        error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
-                   CSUM_DELAY_IP);
+           CSUM_DELAY_IP);
        if (error)
                goto out;
 
@@ -5027,13 +5168,13 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
        for (m0 = m; m0; m0 = m0->m_nextpkt) {
                if (error == 0) {
                        if (snap) {
-                               M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
+                               M_PREPEND(m0, sizeof (struct llc), M_DONTWAIT);
                                if (m0 == NULL) {
                                        error = ENOBUFS;
                                        continue;
                                }
                                bcopy(llc, mtod(m0, caddr_t),
-                                   sizeof(struct llc));
+                                   sizeof (struct llc));
                        }
                        M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
                        if (m0 == NULL) {
@@ -5041,8 +5182,9 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
                                continue;
                        }
                        bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
-               } else 
+               } else {
                        m_freem(m);
+               }
        }
 
        if (error == 0)
@@ -5061,78 +5203,81 @@ static errno_t
 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
 {
        struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
-       
-       //printf("bridge_set_bpf_tap ifp %p mode %d\n", ifp, mode);
-       
+
        /* TBD locking */
        if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
-               return ENODEV;
+               return (ENODEV);
        }
-       
+
        switch (mode) {
                case BPF_TAP_DISABLE:
                        sc->sc_bpf_input = sc->sc_bpf_output = NULL;
                        break;
-                       
+
                case BPF_TAP_INPUT:
                        sc->sc_bpf_input = bpf_callback;
                        break;
-                       
+
                case BPF_TAP_OUTPUT:
                        sc->sc_bpf_output = bpf_callback;
                        break;
-                       
+
                case BPF_TAP_INPUT_OUTPUT:
                        sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
                        break;
-                       
+
                default:
                        break;
        }
-       
-       return 0;
+
+       return (0);
 }
 
 static void
 bridge_detach(ifnet_t ifp)
 {
        struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
-       
+
+#if BRIDGESTP
        bstp_detach(&sc->sc_stp);
+#endif /* BRIDGESTP */
 
        /* Tear down the routing table. */
        bridge_rtable_fini(sc);
-       
+
        lck_mtx_lock(bridge_list_mtx);
        LIST_REMOVE(sc, sc_list);
        lck_mtx_unlock(bridge_list_mtx);
-       
+
        ifnet_release(ifp);
-       
+
        lck_mtx_free(sc->sc_mtx, bridge_lock_grp);
-       
+
        _FREE(sc, M_DEVBUF);
-       return;
 }
 
-__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
+__private_extern__ errno_t
+bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
 {
        struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
-       
+
        if (sc->sc_bpf_input) {
-               if (mbuf_pkthdr_rcvif(m) != ifp)
-                       printf("bridge_bpf_input rcvif: %p != ifp %p\n", mbuf_pkthdr_rcvif(m), ifp);
+               if (mbuf_pkthdr_rcvif(m) != ifp) {
+                       printf("%s: rcvif: %p != ifp %p\n", __func__,
+                           mbuf_pkthdr_rcvif(m), ifp);
+               }
                (*sc->sc_bpf_input)(ifp, m);
        }
-       return 0;
+       return (0);
 }
 
-__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
+__private_extern__ errno_t
+bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
 {
        struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
-       
+
        if (sc->sc_bpf_output) {
                (*sc->sc_bpf_output)(ifp, m);
        }
-       return 0;
+       return (0);
 }
index 3d1375aed1b116588ea5c7f37f2a585ea63043de..f3774c5aaf1732e26e633dc4aefa10a1ce6a2b2a 100644 (file)
@@ -492,6 +492,8 @@ struct ifbpstpconf64 {
 
 #ifdef XNU_KERNEL_PRIVATE
 
+extern u_int8_t        bstp_etheraddr[ETHER_ADDR_LEN];
+
 int    bridgeattach(int);
 
 #endif /* XNU_KERNEL_PRIVATE */
index 1dd0d8c07f5b320f5da9ad6862cacdd089b13b7e..3d086e4021d299df75fbff0fa8c17fbfabcccc76 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -109,7 +109,7 @@ struct sockaddr_dl {
 #endif
 
 #ifdef BSD_KERNEL_PRIVATE
-#define SDL(s) ((struct sockaddr_dl *)s)
+#define SDL(s) ((struct sockaddr_dl *)(void *)s)
 #endif
 
 #ifndef KERNEL
index eb29560d286396d33e857d25ecd9b08b85fa6350..a0235a74d0b8646ccf1d6d0598e2e9e6acb76c5d 100644 (file)
@@ -50,7 +50,12 @@ errno_t      ether_add_proto(ifnet_t interface, protocol_family_t protocol,
 errno_t        ether_del_proto(ifnet_t interface, protocol_family_t protocol);
 errno_t ether_frameout(ifnet_t interface, mbuf_t *packet,
                                           const struct sockaddr *dest, const char *dest_lladdr,
-                                          const char *frame_type);
+                                          const char *frame_type
+#if KPI_INTERFACE_EMBEDDED
+                                          , 
+                                          u_int32_t *prepend_len, u_int32_t *postpend_len
+#endif /* KPI_INTERFACE_EMBEDDED */
+                                          );
 errno_t        ether_ioctl(ifnet_t interface, u_int32_t command, void* data);
 errno_t        ether_check_multi(ifnet_t ifp, const struct sockaddr *multicast);
 
index b25ecb3a5c6e193116ede25a433387e9931e884d..c638758a2b8567cda568b481eab1ad888f873e34 100644 (file)
@@ -737,10 +737,10 @@ gif_ioctl(
 
                        /* can't configure multiple multi-dest interfaces */
 #define multidest(x) \
-       (((struct sockaddr_in *)(x))->sin_addr.s_addr == INADDR_ANY)
+       (((struct sockaddr_in *)(void *)(x))->sin_addr.s_addr == INADDR_ANY)
 #if INET6
 #define multidest6(x) \
-       (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)(x))->sin6_addr))
+       (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)(void *)(x))->sin6_addr))
 #endif
                        if (dst->sa_family == AF_INET &&
                            multidest(dst) && multidest(sc2->gif_pdst)) {
index 669beb0f41df49f1bf9ea43f6808df8e7ec24bde..db81aa0836010e01c3a6d5bef7aa9f721ba118f1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <net/if_var.h>
 #include <net/if_llreach.h>
 #include <net/dlil.h>
+#include <net/kpi_interface.h>
+#include <net/route.h>
 
 #include <kern/assert.h>
 #include <kern/locks.h>
@@ -343,6 +345,9 @@ found:
        lr->lr_ifp = ifp;
        lr->lr_key.proto = llproto;
        bcopy(addr, &lr->lr_key.addr, IF_LLREACH_MAXLEN);
+       lr->lr_rssi = IFNET_RSSI_UNKNOWN;
+       lr->lr_lqm = IFNET_LQM_THRESH_UNKNOWN;
+       lr->lr_npm = IFNET_NPM_THRESH_UNKNOWN;
        RB_INSERT(ll_reach_tree, &ifp->if_ll_srcs, lr);
        IFLR_UNLOCK(lr);
        lck_rw_done(&ifp->if_llreach_lock);
@@ -386,7 +391,7 @@ ifnet_llreach_free(struct if_llreach *lr)
 }
 
 u_int64_t
-ifnet_llreach_up2cal(struct if_llreach *lr, u_int64_t uptime)
+ifnet_llreach_up2calexp(struct if_llreach *lr, u_int64_t uptime)
 {
        u_int64_t calendar = 0;
 
@@ -411,6 +416,62 @@ ifnet_llreach_up2cal(struct if_llreach *lr, u_int64_t uptime)
        return (calendar);
 }
 
+u_int64_t
+ifnet_llreach_up2upexp(struct if_llreach *lr, u_int64_t uptime)
+{
+       return (lr->lr_reachable + uptime);
+}
+
+int
+ifnet_llreach_get_defrouter(struct ifnet *ifp, int af,
+    struct ifnet_llreach_info *iflri)
+{
+       struct radix_node_head *rnh;
+       struct sockaddr_storage dst_ss, mask_ss;
+       struct rtentry *rt;
+       int error = ESRCH;
+
+       VERIFY(ifp != NULL && iflri != NULL &&
+           (af == AF_INET || af == AF_INET6));
+
+       bzero(iflri, sizeof (*iflri));
+
+       if ((rnh = rt_tables[af]) == NULL)
+               return (error);
+
+       bzero(&dst_ss, sizeof (dst_ss));
+       bzero(&mask_ss, sizeof (mask_ss));
+       dst_ss.ss_family = af;
+       dst_ss.ss_len = (af == AF_INET) ? sizeof (struct sockaddr_in) :
+           sizeof (struct sockaddr_in6);
+
+       lck_mtx_lock(rnh_lock);
+       rt = rt_lookup(TRUE, SA(&dst_ss), SA(&mask_ss), rnh, ifp->if_index);
+       if (rt != NULL) {
+               struct rtentry *gwrt;
+
+               RT_LOCK(rt);
+               if ((rt->rt_flags & RTF_GATEWAY) &&
+                   (gwrt = rt->rt_gwroute) != NULL &&
+                   rt_key(rt)->sa_family == rt_key(gwrt)->sa_family &&
+                   (gwrt->rt_flags & RTF_UP)) {
+                       RT_UNLOCK(rt);
+                       RT_LOCK(gwrt);
+                       if (gwrt->rt_llinfo_get_iflri != NULL) {
+                               (*gwrt->rt_llinfo_get_iflri)(gwrt, iflri);
+                               error = 0;
+                       }
+                       RT_UNLOCK(gwrt);
+               } else {
+                       RT_UNLOCK(rt);
+               }
+               rtfree_locked(rt);
+       }
+       lck_mtx_unlock(rnh_lock);
+
+       return (error);
+}
+
 static struct if_llreach *
 iflr_alloc(int how)
 {
@@ -495,6 +556,44 @@ ifnet_lr2ri(struct if_llreach *lr, struct rt_reach_info *ri)
        ri->ri_refcnt = lri.lri_refcnt;
        ri->ri_probes = lri.lri_probes;
        ri->ri_rcv_expire = lri.lri_expire;
+       ri->ri_rssi = lri.lri_rssi;
+       ri->ri_lqm = lri.lri_lqm;
+       ri->ri_npm = lri.lri_npm;
+}
+
+void
+ifnet_lr2iflri(struct if_llreach *lr, struct ifnet_llreach_info *iflri)
+{
+       IFLR_LOCK_ASSERT_HELD(lr);
+
+       bzero(iflri, sizeof (*iflri));
+       /*
+        * Note here we return request count, not actual memory refcnt.
+        */
+       iflri->iflri_refcnt = lr->lr_reqcnt;
+       iflri->iflri_probes = lr->lr_probes;
+       iflri->iflri_rcv_expire = ifnet_llreach_up2upexp(lr, lr->lr_lastrcvd);
+       iflri->iflri_curtime = net_uptime();
+       switch (lr->lr_key.proto) {
+       case ETHERTYPE_IP:
+               iflri->iflri_netproto = PF_INET;
+               break;
+       case ETHERTYPE_IPV6:
+               iflri->iflri_netproto = PF_INET6;
+               break;
+       default:
+               /*
+                * This shouldn't be possible for the time being,
+                * since link-layer reachability records are only
+                * kept for ARP and ND6.
+                */
+               iflri->iflri_netproto = PF_UNSPEC;
+               break;
+       }
+       bcopy(&lr->lr_key.addr, &iflri->iflri_addr, IF_LLREACH_MAXLEN);
+       iflri->iflri_rssi = lr->lr_rssi;
+       iflri->iflri_lqm = lr->lr_lqm;
+       iflri->iflri_npm = lr->lr_npm;
 }
 
 void
@@ -509,9 +608,12 @@ ifnet_lr2lri(struct if_llreach *lr, struct if_llreach_info *lri)
        lri->lri_refcnt = lr->lr_reqcnt;
        lri->lri_ifindex = lr->lr_ifp->if_index;
        lri->lri_probes = lr->lr_probes;
-       lri->lri_expire = ifnet_llreach_up2cal(lr, lr->lr_lastrcvd);
+       lri->lri_expire = ifnet_llreach_up2calexp(lr, lr->lr_lastrcvd);
        lri->lri_proto = lr->lr_key.proto;
        bcopy(&lr->lr_key.addr, &lri->lri_addr, IF_LLREACH_MAXLEN);
+       lri->lri_rssi = lr->lr_rssi;
+       lri->lri_lqm = lr->lr_lqm;
+       lri->lri_npm = lr->lr_npm;
 }
 
 static int
index e922fb0e4b054665661b02b234b99b8103689635..b36612ce1f39c72638592b50a87deca2976ec6c0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -39,7 +39,8 @@ extern "C" {
 /*
  * Per-interface link-layer reachability information (private).
  */
-#define        IF_LLREACHINFO_ADDRLEN  64              /* max ll addr len */
+#define        IF_LLREACHINFO_ADDRLEN          64      /* max ll addr len */
+#define        IF_LLREACHINFO_RESERVED2        16      /* more reserved bits */
 
 struct if_llreach_info {
        u_int32_t               lri_refcnt;     /* reference count */
@@ -49,6 +50,10 @@ struct if_llreach_info {
        u_int16_t               lri_reserved;   /* for future use */
        u_int16_t               lri_proto;      /* ll proto */
        u_int8_t                lri_addr[IF_LLREACHINFO_ADDRLEN]; /* ll addr */
+       int32_t                 lri_rssi;       /* received signal strength */
+       int32_t                 lri_lqm;        /* link quality metric */
+       int32_t                 lri_npm;        /* node proximity metric */
+       u_int8_t                lri_reserved2[IF_LLREACHINFO_RESERVED2];
 };
 
 #ifdef XNU_KERNEL_PRIVATE
@@ -92,6 +97,9 @@ struct if_llreach {
                u_int16_t       proto;          /* ll proto */
                u_int8_t        addr[IF_LLREACH_MAXLEN]; /* ll addr */
        } lr_key;
+       int32_t                 lr_rssi;        /* received signal strength */
+       int32_t                 lr_lqm;         /* link quality metric */
+       int32_t                 lr_npm;         /* node proximity metric */
 };
 
 RB_PROTOTYPE_SC_PREV(__private_extern__, ll_reach_tree, if_llreach,
@@ -126,6 +134,8 @@ RB_PROTOTYPE_SC_PREV(__private_extern__, ll_reach_tree, if_llreach,
 #define        IFLR_REMREF(_iflr)                                              \
        iflr_remref(_iflr)
 
+struct ifnet_llreach_info;     /* forward declaration */
+
 extern void ifnet_llreach_init(void);
 extern void ifnet_llreach_ifattach(struct ifnet *, boolean_t);
 extern void ifnet_llreach_ifdetach(struct ifnet *);
@@ -136,8 +146,12 @@ extern int ifnet_llreach_reachable(struct if_llreach *);
 extern int ifnet_llreach_reachable_delta(struct if_llreach *, u_int64_t);
 extern void ifnet_llreach_set_reachable(struct ifnet *, u_int16_t, void *,
     unsigned int);
-extern u_int64_t ifnet_llreach_up2cal(struct if_llreach *, u_int64_t);
+extern u_int64_t ifnet_llreach_up2calexp(struct if_llreach *, u_int64_t);
+extern u_int64_t ifnet_llreach_up2upexp(struct if_llreach *, u_int64_t);
+extern int ifnet_llreach_get_defrouter(struct ifnet *, int,
+    struct ifnet_llreach_info *);
 extern void ifnet_lr2ri(struct if_llreach *, struct rt_reach_info *);
+extern void ifnet_lr2iflri(struct if_llreach *, struct ifnet_llreach_info *);
 extern void ifnet_lr2lri(struct if_llreach *, struct if_llreach_info *);
 extern void iflr_addref(struct if_llreach *, int);
 extern void iflr_remref(struct if_llreach *);
index 5ba5b11a5dfe7747be0ece40199f6e05282c5e3d..d051c7611b5f42afc611c79f93b24b12a641c9b3 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
 #include "loop.h"
 #if NLOOP > 0
 
+#if NLOOP != 1
+#error "More than one loopback interface is not supported."
+#endif
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -80,6 +84,7 @@
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mcache.h>
+#include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -87,7 +92,7 @@
 #include <net/bpf.h>
 #include <sys/malloc.h>
 
-#if    INET
+#if INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #endif
 #include <net/dlil.h>
 #include <net/kpi_protocol.h>
 
-#if NETAT
-extern struct ifqueue atalkintrq;
-#endif
-
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
 #endif
 
-#define NLOOP_ATTACHMENTS (NLOOP * 12)
+#include <pexpert/pexpert.h>
+
+#define        LOMTU           16384
+#define        LOSNDQ_MAXLEN   256
+
+#define        LO_BPF_TAP_OUT(_m) {                                            \
+       if (lo_statics[0].bpf_callback != NULL) {                       \
+               bpf_tap_out(lo_ifp, DLT_NULL, _m,                       \
+                   &((struct loopback_header *)_m->m_pkthdr.header)->protocol,\
+                   sizeof (u_int32_t));                                \
+       }                                                               \
+}
+
+#define        LO_BPF_TAP_OUT_MULTI(_m) {                                      \
+       if (lo_statics[0].bpf_callback != NULL) {                       \
+               struct mbuf *_n;                                        \
+               for (_n = _m; _n != NULL; _n = _n->m_nextpkt)           \
+                       LO_BPF_TAP_OUT(_n);                             \
+       }                                                               \
+}
 
 struct lo_statics_str {
-       int                             bpf_mode;
+       int             bpf_mode;
        bpf_packet_func bpf_callback;
 };
 
-void loopattach(void);
-
 static struct lo_statics_str lo_statics[NLOOP];
-int loopattach_done = 0; /* used to sync ip6_init2 loopback configuration */
+static int lo_txstart = 0;
 
-#ifdef TINY_LOMTU
-#define        LOMTU   (1024+512)
-#else
-#define LOMTU  16384
-#endif
-
-ifnet_t        lo_ifp = NULL;
+struct ifnet *lo_ifp = NULL;
 
 struct loopback_header {
        protocol_family_t       protocol;
 };
 
+/* Local forward declerations */
+void loopattach(void);
+static errno_t lo_demux(struct ifnet *, struct mbuf *, char *,
+    protocol_family_t *);
+#if !KPI_INTERFACE_EMBEDDED
+static errno_t lo_framer(struct ifnet *, struct mbuf **,
+    const struct sockaddr *,
+    const char *, const char *);
+#else
+static errno_t
+lo_framer(struct ifnet *, struct mbuf **, const struct sockaddr *,
+    const char *, const char *, u_int32_t *, u_int32_t *);
+#endif
+static errno_t lo_add_proto(struct ifnet *, protocol_family_t,
+    const struct ifnet_demux_desc *, u_int32_t);
+static errno_t lo_del_proto(struct ifnet *, protocol_family_t);
+static int lo_output(struct ifnet *, struct mbuf *);
+static errno_t lo_pre_enqueue(struct ifnet *, struct mbuf *);
+static void lo_start(struct ifnet *);
+static errno_t lo_pre_output(struct ifnet *, protocol_family_t, struct mbuf **,
+    const struct sockaddr *, void *, char *, char *);
+static errno_t lo_input(struct ifnet *, protocol_family_t, struct mbuf *);
+static void lo_rtrequest(int, struct rtentry *, struct sockaddr *);
+static errno_t lo_ioctl(struct ifnet *, u_long, void *);
+static errno_t lo_attach_proto(struct ifnet *, protocol_family_t);
 static void lo_reg_if_mods(void);
+static errno_t lo_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
+static int sysctl_dequeue_max SYSCTL_HANDLER_ARGS;
+static int sysctl_sched_model SYSCTL_HANDLER_ARGS;
+static int sysctl_dequeue_scidx SYSCTL_HANDLER_ARGS;
 
-/* Local forward declerations */
+SYSCTL_DECL(_net_link);
+
+SYSCTL_NODE(_net_link, OID_AUTO, loopback, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+    "loopback interface");
+
+#define        LO_BW_SLEEP     10
+static u_int32_t lo_bw_sleep_usec = LO_BW_SLEEP;
+SYSCTL_UINT(_net_link_loopback, OID_AUTO, bw_sleep_usec,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &lo_bw_sleep_usec, LO_BW_SLEEP, "");
+
+static u_int32_t lo_bw_measure = 0;
+SYSCTL_UINT(_net_link_loopback, OID_AUTO, bw_measure,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &lo_bw_measure, 0, "");
+
+static u_int32_t lo_dequeue_max = LOSNDQ_MAXLEN;
+SYSCTL_PROC(_net_link_loopback, OID_AUTO, max_dequeue,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &lo_dequeue_max, LOSNDQ_MAXLEN,
+    sysctl_dequeue_max, "I", "Maximum number of packets dequeued at a time");
+
+static u_int32_t lo_sched_model = IFNET_SCHED_MODEL_NORMAL;
+SYSCTL_PROC(_net_link_loopback, OID_AUTO, sched_model,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &lo_sched_model,
+    IFNET_SCHED_MODEL_NORMAL, sysctl_sched_model, "I", "Scheduling model");
+
+static u_int32_t lo_dequeue_sc = MBUF_SC_BE;
+static int lo_dequeue_scidx = MBUF_SCIDX(MBUF_SC_BE);
+SYSCTL_PROC(_net_link_loopback, OID_AUTO, dequeue_sc,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &lo_dequeue_scidx,
+    MBUF_SC_BE, sysctl_dequeue_scidx, "I", "Dequeue a specific SC index");
 
 static errno_t
-lo_demux(
-    __unused ifnet_t   ifp,
-    __unused mbuf_t            m,
-    char                               *frame_header,
-    protocol_family_t  *protocol_family)
+lo_demux(struct ifnet *ifp, struct mbuf *m, char *frame_header,
+    protocol_family_t *protocol_family)
 {
-       struct loopback_header *header = (struct loopback_header *)frame_header;
-       
+#pragma unused(ifp, m)
+       struct loopback_header *header =
+           (struct loopback_header *)(void *)frame_header;
+
        *protocol_family = header->protocol;
-       
-       return 0;
-}
 
+       return (0);
+}
 
+#if !KPI_INTERFACE_EMBEDDED
 static errno_t
-lo_framer(
-    __unused ifnet_t                           ifp,
-    mbuf_t                                                     *m,
-    __unused const struct sockaddr     *dest,
-    __unused const char                *dest_linkaddr,
-    const char                                         *frame_type)
+lo_framer(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest,
+    const char *dest_linkaddr, const char *frame_type)
+#else
+static errno_t
+lo_framer(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest,
+    const char *dest_linkaddr, const char *frame_type,
+    u_int32_t *prepend_len, u_int32_t *postpend_len)
+#endif
 {
+#pragma unused(ifp, dest, dest_linkaddr)
        struct loopback_header  *header;
 
-       M_PREPEND(*m, sizeof(struct loopback_header), M_WAITOK);
-       if (*m == NULL)
-               return EJUSTRETURN; /* Tell caller not to try to free passed-in mbuf */
-       header = mtod(*m, struct loopback_header*);
-       header->protocol = *(const u_int32_t*)frame_type;
-       return 0;
+       M_PREPEND(*m, sizeof (struct loopback_header), M_WAITOK);
+       if (*m == NULL) {
+               /* Tell caller not to try to free passed-in mbuf */
+               return (EJUSTRETURN);
+       }
+
+#if KPI_INTERFACE_EMBEDDED
+       *prepend_len = sizeof (struct loopback_header);
+       *postpend_len = 0;
+#endif /* KPI_INTERFACE_EMBEDDED */
+
+       header = mtod(*m, struct loopback_header *);
+       bcopy(frame_type, &header->protocol, sizeof (u_int32_t));
+       return (0);
 }
 
 static errno_t
-lo_add_proto(
-    __unused ifnet_t                                           interface,
-       __unused protocol_family_t                              protocol_family,
-       __unused const struct ifnet_demux_desc  *demux_array,
-       __unused u_int32_t                                              demux_count)
+lo_add_proto(struct ifnet *interface, protocol_family_t protocol_family,
+    const struct ifnet_demux_desc *demux_array, u_int32_t demux_count)
 {
-    return 0;
+#pragma unused(interface, protocol_family, demux_array, demux_count)
+       return (0);
 }
 
-
 static errno_t
-lo_del_proto(
-       __unused ifnet_t                        ifp,
-       __unused protocol_family_t      protocol)
+lo_del_proto(struct ifnet *ifp, protocol_family_t protocol)
 {
-       return 0;
+#pragma unused(ifp, protocol)
+       return (0);
 }
 
+/*
+ * Output callback.
+ *
+ * This routine is called only when lo_txstart is disabled.
+ */
 static int
-lo_output(
-       ifnet_t ifp,
-       mbuf_t  m_list)
+lo_output(struct ifnet *ifp, struct mbuf *m_list)
 {
-       mbuf_t  m;
-       
+       struct mbuf *m, *m_tail = NULL;
+       struct ifnet_stat_increment_param s;
+       u_int32_t cnt = 0, len = 0;
+
+       bzero(&s, sizeof(s));
+
        for (m = m_list; m; m = m->m_nextpkt) {
                if ((m->m_flags & M_PKTHDR) == 0)
                        panic("lo_output: no HDR");
+               cnt++;
+               len += m->m_pkthdr.len;
 
                /*
                 * Don't overwrite the rcvif field if it is in use.
@@ -210,51 +294,149 @@ lo_output(
                if (m->m_pkthdr.rcvif == NULL)
                        m->m_pkthdr.rcvif = ifp;
 
-               atomic_add_64(&ifp->if_ibytes, m->m_pkthdr.len);
-               atomic_add_64(&ifp->if_obytes, m->m_pkthdr.len);
+               m->m_pkthdr.header = mtod(m, char *);
+               if (apple_hwcksum_tx != 0) {
+                       /* loopback checksums are always OK */
+                       m->m_pkthdr.csum_data = 0xffff;
+                       m->m_pkthdr.csum_flags =
+                           CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
+                           CSUM_IP_CHECKED | CSUM_IP_VALID;
+               }
+               m_adj(m, sizeof (struct loopback_header));
+
+               LO_BPF_TAP_OUT(m);
+               if (m->m_nextpkt == NULL) {
+                       m_tail = m;
+               }
+       }
+
+       s.packets_in = cnt;
+       s.packets_out = cnt;
+       s.bytes_in = len;
+       s.bytes_out = len;      
+
+       return (ifnet_input_extended(ifp, m_list, m_tail, &s));
+}
+
+/*
+ * Pre-enqueue callback.
+ *
+ * This routine is called only when lo_txstart is enabled.
+ */
+static errno_t
+lo_pre_enqueue(struct ifnet *ifp, struct mbuf *m0)
+{
+       struct mbuf *m = m0, *n;
+       int error = 0;
+
+       while (m != NULL) {
+               VERIFY((m->m_flags & M_PKTHDR));
 
-               atomic_add_64(&ifp->if_opackets, 1);
-               atomic_add_64(&ifp->if_ipackets, 1);
+               n = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+
+               /*
+                * Don't overwrite the rcvif field if it is in use.
+                *  This is used to match multicast packets, sent looping
+                *  back, with the appropriate group record on input.
+                */
+               if (m->m_pkthdr.rcvif == NULL)
+                       m->m_pkthdr.rcvif = ifp;
 
                m->m_pkthdr.header = mtod(m, char *);
                if (apple_hwcksum_tx != 0) {
                        /* loopback checksums are always OK */
                        m->m_pkthdr.csum_data = 0xffff;
-                       m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
-                               CSUM_IP_CHECKED | CSUM_IP_VALID;
-               }
-               m_adj(m, sizeof(struct loopback_header));
-
-               {
-                       /* We need to prepend the address family as a four byte field. */
-                       u_int32_t protocol_family =
-                               ((struct loopback_header*)m->m_pkthdr.header)->protocol;
-               
-                       bpf_tap_out(ifp, DLT_NULL, m, &protocol_family, sizeof(protocol_family));
+                       m->m_pkthdr.csum_flags =
+                           CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
+                           CSUM_IP_CHECKED | CSUM_IP_VALID;
                }
+               m_adj(m, sizeof (struct loopback_header));
+
+               /*
+                * Let the callee free it in case of error,
+                * and perform any necessary accounting.
+                */
+               (void) ifnet_enqueue(ifp, m);
+
+               m = n;
        }
 
-       return ifnet_input(ifp, m_list, NULL);
+       return (error);
 }
 
+/*
+ * Start output callback.
+ *
+ * This routine is invoked by the start worker thread; because we never call
+ * it directly, there is no need do deploy any serialization mechanism other
+ * than what's already used by the worker thread, i.e. this is already single
+ * threaded.
+ *
+ * This routine is called only when lo_txstart is enabled.
+ */
+static void
+lo_start(struct ifnet *ifp)
+{
+       struct ifnet_stat_increment_param s;
+
+       bzero(&s, sizeof (s));
+
+       for (;;) {
+               struct mbuf *m = NULL, *m_tail = NULL;
+               u_int32_t cnt, len = 0;
+               int sleep_chan = 0;
+               struct timespec ts;
+
+               if (lo_sched_model == IFNET_SCHED_MODEL_NORMAL) {
+                       if (ifnet_dequeue_multi(ifp, lo_dequeue_max, &m,
+                           &m_tail, &cnt, &len) != 0)
+                               break;
+               } else {
+                       if (ifnet_dequeue_service_class_multi(ifp,
+                           lo_dequeue_sc, lo_dequeue_max, &m,
+                           &m_tail, &cnt, &len) != 0)
+                               break;
+               }
+
+               LO_BPF_TAP_OUT_MULTI(m);
+
+               if (lo_bw_measure) {
+                       if (cnt >= if_bw_measure_size)
+                               ifnet_transmit_burst_start(ifp, m);
+                       if (lo_bw_sleep_usec > 0) {
+                               bzero(&ts, sizeof(ts));
+                               ts.tv_nsec = (lo_bw_sleep_usec << 10) * cnt;
+
+                               /* Add msleep with timeout */
+                               (void) msleep(&sleep_chan, NULL,
+                                   PSOCK, "lo_start", &ts);
+                       }
+                       if (cnt >= if_bw_measure_size)
+                               ifnet_transmit_burst_end(ifp, m_tail);
+               }
+
+               /* stats are required for extended variant */
+               s.packets_in = cnt;
+               s.packets_out = cnt;
+               s.bytes_in = len;
+               s.bytes_out = len;
+
+               (void) ifnet_input_extended(ifp, m, m_tail, &s);
+       }
+}
 
 /*
  * This is a common pre-output route used by INET and INET6. This could
  * (should?) be split into separate pre-output routines for each protocol.
  */
-
 static errno_t
-lo_pre_output(
-       __unused ifnet_t        ifp,
-       protocol_family_t       protocol_family,
-       mbuf_t                          *m,
-       __unused const struct sockaddr  *dst,
-       void                            *route,
-       char                            *frame_type,
-       __unused char           *dst_addr)
-
+lo_pre_output(struct ifnet *ifp, protocol_family_t protocol_family,
+    struct mbuf **m, const struct sockaddr *dst, void *route, char *frame_type,
+    char *dst_addr)
 {
-       register struct rtentry *rt = route;
+#pragma unused(ifp, dst, dst_addr)
+       struct rtentry *rt = route;
 
        (*m)->m_flags |= M_LOOP;
 
@@ -266,7 +448,7 @@ lo_pre_output(
                if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) {
                        if (rt_flags & RTF_BLACKHOLE) {
                                m_freem(*m);
-                               return EJUSTRETURN;
+                               return (EJUSTRETURN);
                        } else {
                                return ((rt_flags & RTF_HOST) ?
                                    EHOSTUNREACH : ENETUNREACH);
@@ -274,9 +456,9 @@ lo_pre_output(
                }
        }
 
-       *(protocol_family_t*)frame_type = protocol_family;
+       bcopy(&protocol_family, frame_type, sizeof (protocol_family));
 
-       return 0;
+       return (0);
 }
 
 /*
@@ -284,26 +466,19 @@ lo_pre_output(
  *             ifq/schednetisr input mechanism.
  */
 static errno_t
-lo_input(
-       __unused ifnet_t                        ifp,
-       __unused protocol_family_t      protocol_family,
-       mbuf_t                                          m)
+lo_input(struct ifnet *ifp, protocol_family_t protocol_family, struct mbuf *m)
 {
+#pragma unused(ifp, protocol_family)
        if (proto_input(protocol_family, m) != 0)
                m_freem(m);
        return (0);
 }
 
-
-
-
 /* ARGSUSED */
 static void
-lortrequest(
-       __unused int cmd,
-       struct rtentry *rt,
-       __unused struct sockaddr *sa)
+lo_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa)
 {
+#pragma unused(cmd, sa)
        if (rt != NULL) {
                RT_LOCK_ASSERT_HELD(rt);
                rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
@@ -312,8 +487,7 @@ lortrequest(
                 * should be at least twice the MTU plus a little more for
                 * overhead.
                 */
-               rt->rt_rmx.rmx_recvpipe = 
-                       rt->rt_rmx.rmx_sendpipe = 3 * LOMTU;
+               rt->rt_rmx.rmx_recvpipe = rt->rt_rmx.rmx_sendpipe = 3 * LOMTU;
        }
 }
 
@@ -321,31 +495,30 @@ lortrequest(
  * Process an ioctl request.
  */
 static errno_t
-loioctl(
-       ifnet_t         ifp,
-       u_long          cmd,
-       void*           data)
+lo_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 {
-       register struct ifaddr *ifa;
-       register struct ifreq *ifr = (struct ifreq *)data;
-       register int error = 0;
+       int error = 0;
 
        switch (cmd) {
 
-       case SIOCSIFADDR:
-               ifnet_set_flags(ifp, IFF_UP | IFF_RUNNING, IFF_UP | IFF_RUNNING);
-               ifa = (struct ifaddr *)data;
+       case SIOCSIFADDR: {             /* struct ifaddr pointer */
+               struct ifaddr *ifa = data;
+
+               ifnet_set_flags(ifp, IFF_UP|IFF_RUNNING, IFF_UP|IFF_RUNNING);
                IFA_LOCK_SPIN(ifa);
-               ifa->ifa_rtrequest = lortrequest;
+               ifa->ifa_rtrequest = lo_rtrequest;
                IFA_UNLOCK(ifa);
                /*
                 * Everything else is done at a higher level.
                 */
                break;
+       }
 
-       case SIOCADDMULTI:
-       case SIOCDELMULTI:
-               if (ifr == 0) {
+       case SIOCADDMULTI:              /* struct ifreq */
+       case SIOCDELMULTI: {            /* struct ifreq */
+               struct ifreq *ifr = data;
+
+               if (ifr == NULL) {
                        error = EAFNOSUPPORT;           /* XXX */
                        break;
                }
@@ -365,12 +538,16 @@ loioctl(
                        break;
                }
                break;
+       }
+
+       case SIOCSIFMTU: {              /* struct ifreq */
+               struct ifreq *ifr = data;
 
-       case SIOCSIFMTU:
-               ifp->if_mtu = ifr->ifr_mtu;
+               bcopy(&ifr->ifr_mtu, &ifp->if_mtu, sizeof (int));
                break;
+       }
 
-       case SIOCSIFFLAGS:
+       case SIOCSIFFLAGS:              /* struct ifreq */
                break;
 
        default:
@@ -382,113 +559,208 @@ loioctl(
 #endif /* NLOOP > 0 */
 
 
-static errno_t  lo_attach_proto(ifnet_t ifp, protocol_family_t protocol_family)
+static errno_t
+lo_attach_proto(struct ifnet *ifp, protocol_family_t protocol_family)
 {
        struct ifnet_attach_proto_param_v2      proto;
        errno_t                                                 result = 0;
-       
-       bzero(&proto, sizeof(proto));
+
+       bzero(&proto, sizeof (proto));
        proto.input = lo_input;
        proto.pre_output = lo_pre_output;
-       
+
        result = ifnet_attach_protocol_v2(ifp, protocol_family, &proto);
 
        if (result && result != EEXIST) {
-               printf("lo_attach_proto: ifnet_attach_protocol for %u returned=%d\n",
-                          protocol_family, result);
+               printf("lo_attach_proto: ifnet_attach_protocol for %u "
+                   "returned=%d\n", protocol_family, result);
        }
-       
-       return result;
+
+       return (result);
 }
 
-static void lo_reg_if_mods(void)
+static void
+lo_reg_if_mods(void)
 {
-     int error;
+       int error;
 
        /* Register protocol registration functions */
-       if ((error = proto_register_plumber(PF_INET, APPLE_IF_FAM_LOOPBACK, lo_attach_proto, NULL)) != 0)
-               printf("proto_register_plumber failed for AF_INET error=%d\n", error);
-
-       if ((error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_LOOPBACK, lo_attach_proto, NULL)) != 0)
-               printf("proto_register_plumber failed for AF_INET6 error=%d\n", error);
+       if ((error = proto_register_plumber(PF_INET,
+           APPLE_IF_FAM_LOOPBACK, lo_attach_proto, NULL)) != 0)
+               printf("proto_register_plumber failed for AF_INET "
+                   "error=%d\n", error);
+
+       if ((error = proto_register_plumber(PF_INET6,
+           APPLE_IF_FAM_LOOPBACK, lo_attach_proto, NULL)) != 0)
+               printf("proto_register_plumber failed for AF_INET6 "
+                   "error=%d\n", error);
 }
 
 static errno_t
-lo_set_bpf_tap(
-       ifnet_t                 ifp,
-       bpf_tap_mode    mode,
-       bpf_packet_func bpf_callback)
+lo_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode mode,
+    bpf_packet_func bpf_callback)
 {
+       VERIFY(ifp == lo_ifp);
 
-  /*
-   * NEED MUTEX HERE XXX
-   */
-       if (mode == BPF_TAP_DISABLE) {
-               lo_statics[ifp->if_unit].bpf_mode = mode;
-               lo_statics[ifp->if_unit].bpf_callback = bpf_callback;
-       }
-       else {
-               lo_statics[ifp->if_unit].bpf_callback = bpf_callback;
-               lo_statics[ifp->if_unit].bpf_mode = mode;               
+       lo_statics[0].bpf_mode = mode;
+
+       switch (mode) {
+               case BPF_TAP_DISABLE:
+               case BPF_TAP_INPUT:
+                       lo_statics[0].bpf_callback = NULL;
+                       break;
+
+               case BPF_TAP_OUTPUT:
+               case BPF_TAP_INPUT_OUTPUT:
+                       lo_statics[0].bpf_callback = bpf_callback;
+                       break;
        }
 
-       return 0;
+       return (0);
 }
 
-
 /* ARGSUSED */
 void
 loopattach(void)
 {
-       struct ifnet_init_params        lo_init;
+       struct ifnet_init_eparams lo_init;
        errno_t result = 0;
 
-#if NLOOP != 1
-More than one loopback interface is not supported.
-#endif
+       PE_parse_boot_argn("lo_txstart", &lo_txstart, sizeof (lo_txstart));
 
        lo_reg_if_mods();
-       
-       lo_statics[0].bpf_callback = 0;
-       lo_statics[0].bpf_mode      = BPF_TAP_DISABLE;
-       
-       bzero(&lo_init, sizeof(lo_init));
-       lo_init.name = "lo";
-       lo_init.unit = 0;
-       lo_init.family = IFNET_FAMILY_LOOPBACK;
-       lo_init.type = IFT_LOOP;
-       lo_init.output = lo_output;
-       lo_init.demux = lo_demux;
-       lo_init.add_proto = lo_add_proto;
-       lo_init.del_proto = lo_del_proto;
-       lo_init.framer = lo_framer;
-       lo_init.softc = &lo_statics[0];
-       lo_init.ioctl = loioctl;
-       lo_init.set_bpf_tap = lo_set_bpf_tap;
-       result = ifnet_allocate(&lo_init, &lo_ifp);
+
+       lo_statics[0].bpf_callback = NULL;
+       lo_statics[0].bpf_mode = BPF_TAP_DISABLE;
+
+       bzero(&lo_init, sizeof (lo_init));
+       lo_init.ver                     = IFNET_INIT_CURRENT_VERSION;
+       lo_init.len                     = sizeof (lo_init);
+       lo_init.sndq_maxlen             = LOSNDQ_MAXLEN;
+       if (lo_txstart) {
+               lo_init.flags           = 0;
+               lo_init.pre_enqueue     = lo_pre_enqueue;
+               lo_init.start           = lo_start;
+               lo_init.output_sched_model = lo_sched_model;
+       } else {
+               lo_init.flags           = IFNET_INIT_LEGACY;
+               lo_init.output          = lo_output;
+       }
+       lo_init.name                    = "lo";
+       lo_init.unit                    = 0;
+       lo_init.family                  = IFNET_FAMILY_LOOPBACK;
+       lo_init.type                    = IFT_LOOP;
+       lo_init.demux                   = lo_demux;
+       lo_init.add_proto               = lo_add_proto;
+       lo_init.del_proto               = lo_del_proto;
+       lo_init.framer                  = lo_framer;
+       lo_init.softc                   = &lo_statics[0];
+       lo_init.ioctl                   = lo_ioctl;
+       lo_init.set_bpf_tap             = lo_set_bpf_tap;
+
+       result = ifnet_allocate_extended(&lo_init, &lo_ifp);
        if (result != 0) {
-               printf("ifnet_allocate for lo0 failed - %d\n", result);
-               return;
+               panic("%s: couldn't allocate loopback ifnet (%d)\n",
+                   __func__, result);
+               /* NOTREACHED */
        }
-       
+
        ifnet_set_mtu(lo_ifp, LOMTU);
-       ifnet_set_flags(lo_ifp, IFF_LOOPBACK | IFF_MULTICAST, IFF_LOOPBACK | IFF_MULTICAST);
-       ifnet_set_offload(lo_ifp, IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
-               IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_IPV6_FRAGMENT |
-               IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT | IFNET_MULTIPAGES);
-       ifnet_set_hdrlen(lo_ifp, sizeof(struct loopback_header));
+       ifnet_set_flags(lo_ifp, IFF_LOOPBACK | IFF_MULTICAST,
+           IFF_LOOPBACK | IFF_MULTICAST);
+       ifnet_set_offload(lo_ifp,
+           IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
+           IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_IPV6_FRAGMENT |
+           IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT | IFNET_MULTIPAGES);
+       ifnet_set_hdrlen(lo_ifp, sizeof (struct loopback_header));
        ifnet_set_eflags(lo_ifp, IFEF_SENDLIST, IFEF_SENDLIST);
 
 #if CONFIG_MACF_NET
-               mac_ifnet_label_init(ifp);
+       mac_ifnet_label_init(ifp);
 #endif
 
        result = ifnet_attach(lo_ifp, NULL);
        if (result != 0) {
-               printf("ifnet_attach lo0 failed - %d\n", result);
-               return;
+               panic("%s: couldn't attach loopback ifnet (%d)\n",
+                   __func__, result);
+               /* NOTREACHED */
+       }
+       bpfattach(lo_ifp, DLT_NULL, sizeof (u_int32_t));
+}
+
+static int
+sysctl_dequeue_max SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       u_int32_t i;
+       int err;
+
+       i = lo_dequeue_max;
+
+       err = sysctl_handle_int(oidp, &i, 0, req);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return (err);
+
+       if (i < 1)
+               i = 1;
+       else if (i > LOSNDQ_MAXLEN)
+               i = LOSNDQ_MAXLEN;
+
+       lo_dequeue_max = i;
+
+       return (err);
+}
+
+static int
+sysctl_sched_model SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       u_int32_t i;
+       int err;
+
+       i = lo_sched_model;
+
+       err = sysctl_handle_int(oidp, &i, 0, req);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return (err);
+
+       switch (i) {
+       case IFNET_SCHED_MODEL_NORMAL:
+       case IFNET_SCHED_MODEL_DRIVER_MANAGED:
+               break;
+
+       default:
+               err = EINVAL;
+               break;
        }
-       bpfattach(lo_ifp, DLT_NULL, sizeof(u_int));
-       
-       loopattach_done = 1;
+
+       if (err == 0 && (err = ifnet_set_output_sched_model(lo_ifp, i)) == 0)
+               lo_sched_model = i;
+
+       return (err);
+}
+
+static int
+sysctl_dequeue_scidx SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       u_int32_t i;
+       int err;
+
+       i = lo_dequeue_scidx;
+
+       err = sysctl_handle_int(oidp, &i, 0, req);
+       if (err != 0 || req->newptr == USER_ADDR_NULL)
+               return (err);
+
+       if (!MBUF_VALID_SCIDX(i))
+               return (EINVAL);
+
+       if (lo_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)
+               return (ENODEV);
+
+       lo_dequeue_sc = m_service_class_from_idx(i);
+       lo_dequeue_scidx = MBUF_SCIDX(lo_dequeue_sc);
+
+       return (err);
 }
index 32afe224d7ee604ca6ab91e744ca2df4740a4932..12cbc871bee38b850390442d0881983fe4bf51a6 100644 (file)
@@ -221,7 +221,7 @@ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
 #define IFM_FDX                0x00100000      /* Force full duplex */
 #define        IFM_HDX         0x00200000      /* Force half duplex */
 #define        IFM_FLOW        0x00400000      /* enable hardware flow control */
-#define IFM_EEE                0x00800000      /* Driver defined flag */
+#define IFM_EEE                0x00800000      /* Support energy efficient ethernet */
 #define IFM_FLAG0      0x01000000      /* Driver defined flag */
 #define IFM_FLAG1      0x02000000      /* Driver defined flag */
 #define IFM_FLAG2      0x04000000      /* Driver defined flag */
index 9ab76f698f3872dec2db9148c00538d4c3221848..a7fd2db5c3cfa01925dcf978470c3e3c97905cd9 100644 (file)
@@ -108,12 +108,12 @@ SYSCTL_NODE(_net_link_generic, IFMIB_IFALLDATA, ifalldata, CTLFLAG_RD | CTLFLAG_
 
 static int make_ifmibdata(struct ifnet *, int *, struct sysctl_req *);
 
-int 
+int
 make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req)
 {
        struct ifmibdata        ifmd;
        int error = 0;
-       
+
        switch(name[1]) {
        default:
                error = ENOENT;
@@ -127,15 +127,15 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req)
                if (ifnet_is_attached(ifp, 0)) {
                        snprintf(ifmd.ifmd_name, sizeof(ifmd.ifmd_name), "%s%d",
                                ifp->if_name, ifp->if_unit);
-       
+
 #define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld
                        COPY(pcount);
                        COPY(flags);
                        if_data_internal_to_if_data64(ifp, &ifp->if_data, &ifmd.ifmd_data);
 #undef COPY
-                       ifmd.ifmd_snd_len = ifp->if_snd.ifq_len;
-                       ifmd.ifmd_snd_maxlen = ifp->if_snd.ifq_maxlen;
-                       ifmd.ifmd_snd_drops = ifp->if_snd.ifq_drops;
+                       ifmd.ifmd_snd_len = IFCQ_LEN(&ifp->if_snd);
+                       ifmd.ifmd_snd_maxlen = IFCQ_MAXLEN(&ifp->if_snd);
+                       ifmd.ifmd_snd_drops = ifp->if_snd.ifcq_dropcnt.packets;
                }
                error = SYSCTL_OUT(req, &ifmd, sizeof ifmd);
                if (error || !req->newptr)
@@ -176,15 +176,25 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req)
                break;
 
        case IFDATA_SUPPLEMENTAL: {
-               struct if_traffic_class if_tc;
+               struct ifmibdata_supplemental *ifmd_supp;
+
+               if ((ifmd_supp = _MALLOC(sizeof (*ifmd_supp), M_TEMP,
+                   M_NOWAIT | M_ZERO)) == NULL) {
+                       error = ENOMEM;
+                       break;
+               }
 
-               if_copy_traffic_class(ifp, &if_tc);
-               
-               error = SYSCTL_OUT(req, &if_tc, sizeof(struct if_traffic_class));
+               if_copy_traffic_class(ifp, &ifmd_supp->ifmd_traffic_class);
+               if_copy_data_extended(ifp, &ifmd_supp->ifmd_data_extended);
+               if_copy_packet_stats(ifp, &ifmd_supp->ifmd_packet_stats);
+               if_copy_rxpoll_stats(ifp, &ifmd_supp->ifmd_rxpoll_stats);
+
+               error = SYSCTL_OUT(req, ifmd_supp, sizeof (*ifmd_supp));
+               _FREE(ifmd_supp, M_TEMP);
                break;
        }
        }
-       
+
        return error;
 }
 
index 5b773bddf9591dc09a9495ef4eb62053951a2bbe..3dbf262a21f3e052a9bf5c9dc3df12897565c28a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -74,6 +74,9 @@ struct ifmibdata {
 #ifdef PRIVATE
 struct ifmibdata_supplemental {
        struct if_traffic_class ifmd_traffic_class;
+       struct if_data_extended ifmd_data_extended;
+       struct if_packet_stats  ifmd_packet_stats;
+       struct if_rxpoll_stats  ifmd_rxpoll_stats;
 };
 #endif /* PRIVATE */
 
index ae2f9254cef736d9504a64d60701bd16f56e4630..18d6435e0ab38295619ace1ed78f6f758c8181a3 100644 (file)
@@ -125,10 +125,6 @@ pfloginit(void)
 {
        int i;
 
-       if (pf_perim_lock == NULL || pf_lock == NULL) {
-               panic("%s: called before PF is initialized", __func__);
-               /* NOTREACHED */
-       }
        LIST_INIT(&pflogif_list);
        for (i = 0; i < PFLOGIFS_MAX; i++)
                pflogifs[i] = NULL;
index c9d24e2495a9752b4832050056b7eedcfa20e05d..41d1c15db43ee4f0bcde2a231e7beb2287c52818 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <security/mac_framework.h>
 #endif
 
-#define GET_V4(x)      ((const struct in_addr *)(&(x)->s6_addr16[1]))
+#define GET_V4(x) ((const struct in_addr *)(const void *)(&(x)->s6_addr16[1])) 
 
 static lck_grp_t *stf_mtx_grp;
 
@@ -473,7 +473,7 @@ stf_getsrcifa6(struct ifnet *ifp)
                        IFA_UNLOCK(ia);
                        continue;
                }
-               sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
+               sin6 = (struct sockaddr_in6 *)(void *)ia->ifa_addr;
                if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
                        IFA_UNLOCK(ia);
                        continue;
@@ -524,11 +524,11 @@ stf_pre_output(
        struct ip6_hdr *ip6;
        struct in6_ifaddr *ia6;
        struct sockaddr_in      *dst4;
-       struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF };
        errno_t                         result = 0;
 
        sc = ifnet_softc(ifp);
-       dst6 = (const struct sockaddr_in6 *)dst;
+       dst6 = (const struct sockaddr_in6 *)(const void *)dst;
 
        /* just in case */
        if ((ifnet_flags(ifp) & IFF_UP) == 0) {
@@ -603,7 +603,7 @@ stf_pre_output(
                ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
        lck_mtx_lock(&sc->sc_ro_mtx);
-       dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
+       dst4 = (struct sockaddr_in *)(void *)&sc->sc_ro.ro_dst;
        if (dst4->sin_family != AF_INET ||
            bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
                /* cache route doesn't match: always the case during the first use */
@@ -616,7 +616,8 @@ stf_pre_output(
                }
        }
 
-       result = ip_output_list(m, 0, NULL, &sc->sc_ro, IP_OUTARGS, NULL, &ipoa);
+       result = ip_output_list(m, 0, NULL, &sc->sc_ro, IP_OUTARGS, NULL,
+           &ipoa);
        lck_mtx_unlock(&sc->sc_ro_mtx);
 
        /* Assumption: ip_output will free mbuf on errors */
@@ -865,7 +866,7 @@ stf_ioctl(
                        error = EAFNOSUPPORT;
                        break;
                }
-               sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
+               sin6 = (struct sockaddr_in6 *)(void *)ifa->ifa_addr;
                if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
                         if ( !(ifnet_flags( ifp ) & IFF_UP) ) {
                                 /* do this only if the interface is not already up */
index a8667845ba71509834cc3198c0f976bf89a44872..8f766ba3c340eac76ec127ca358d13f6d3336e3a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,7 +73,12 @@ static errno_t       utun_demux(ifnet_t interface, mbuf_t data, char *frame_header,
                                                   protocol_family_t *protocol);
 static errno_t utun_framer(ifnet_t     interface, mbuf_t *packet,
                                                        const struct sockaddr *dest, const char *desk_linkaddr,
-                                                       const char *frame_type);
+                                                       const char *frame_type
+#if KPI_INTERFACE_EMBEDDED
+                                                       ,
+                                                       u_int32_t *prepend_len, u_int32_t *postpend_len
+#endif /* KPI_INTERFACE_EMBEDDED */
+                                                       );
 static errno_t utun_add_proto(ifnet_t interface, protocol_family_t protocol,
                                                           const struct ifnet_demux_desc *demux_array,
                                                           u_int32_t demux_count);
@@ -88,15 +93,7 @@ static errno_t       utun_proto_input(ifnet_t interface, protocol_family_t protocol,
 static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol, 
                                         mbuf_t *packet, const struct sockaddr *dest, void *route,
                                         char *frame_type, char *link_layer_dest);
-
-/* Control block allocated for each kernel control connection */
-struct utun_pcb {
-       kern_ctl_ref    utun_ctlref;
-       ifnet_t                 utun_ifp;
-       u_int32_t               utun_unit;
-       u_int32_t               utun_flags;
-       int                             utun_ext_ifdata_stats;
-};
+__private_extern__ errno_t utun_pkt_input (struct utun_pcb *pcb, mbuf_t m);
 
 static kern_ctl_ref    utun_kctlref;
 static u_int32_t       utun_family;
@@ -104,7 +101,7 @@ static OSMallocTag  utun_malloc_tag;
 static SInt32          utun_ifcount = 0;
 
 /* Prepend length */
-static void*
+void*
 utun_alloc(size_t size)
 {
        size_t  *mem = OSMalloc(size + sizeof(size_t), utun_malloc_tag);
@@ -117,7 +114,7 @@ utun_alloc(size_t size)
        return (void*)mem;
 }
 
-static void
+void
 utun_free(void *ptr)
 {
        size_t  *size = ptr;
@@ -423,7 +420,9 @@ utun_ctl_disconnect(
        struct utun_pcb *pcb = unitinfo;
        ifnet_t                 ifp = pcb->utun_ifp;
        errno_t                 result = 0;
-       
+
+       utun_cleanup_crypto(pcb);
+
        pcb->utun_ctlref = NULL;
        pcb->utun_unit = 0;
        
@@ -455,37 +454,7 @@ utun_ctl_send(
        mbuf_t                                  m,
        __unused int                    flags)
 {
-       struct utun_pcb                                         *pcb = unitinfo;
-       errno_t                                                         result;
-       
-       mbuf_pkthdr_setrcvif(m, pcb->utun_ifp);
-       
-       bpf_tap_in(pcb->utun_ifp, DLT_NULL, m, 0, 0);
-       
-       if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) {
-               /* flush data */
-               mbuf_freem(m);
-               return 0;
-       }
-       
-       if (!pcb->utun_ext_ifdata_stats) {
-               struct ifnet_stat_increment_param       incs;
-       
-               bzero(&incs, sizeof(incs));
-               incs.packets_in = 1;
-               incs.bytes_in = mbuf_pkthdr_len(m);
-               result = ifnet_input(pcb->utun_ifp, m, &incs);
-       } else {
-               result = ifnet_input(pcb->utun_ifp, m, NULL);
-       }
-       if (result != 0) {
-               ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
-               
-               printf("utun_ctl_send - ifnet_input failed: %d\n", result);
-               mbuf_freem(m);
-       }
-       
-       return 0;
+       return utun_pkt_input((struct utun_pcb *)unitinfo, m);
 }
 
 static errno_t
@@ -518,6 +487,30 @@ utun_ctl_setopt(
                                pcb->utun_flags = *(u_int32_t *)data;
                        break;
 
+               case UTUN_OPT_ENABLE_CRYPTO:
+                       result = utun_ctl_enable_crypto(kctlref, unit, unitinfo, opt, data, len);
+                       break;
+
+               case UTUN_OPT_CONFIG_CRYPTO_KEYS:
+                       result = utun_ctl_config_crypto_keys(kctlref, unit, unitinfo, opt, data, len);
+                       break;
+
+               case UTUN_OPT_UNCONFIG_CRYPTO_KEYS:
+                       result = utun_ctl_unconfig_crypto_keys(kctlref, unit, unitinfo, opt, data, len);
+                       break;
+
+               case UTUN_OPT_DISABLE_CRYPTO:
+                       result = utun_ctl_disable_crypto(kctlref, unit, unitinfo, opt, data, len);
+                       break;
+
+               case UTUN_OPT_STOP_CRYPTO_DATA_TRAFFIC:
+                       result = utun_ctl_stop_crypto_data_traffic(kctlref, unit, unitinfo, opt, data, len);
+                       break;
+
+               case UTUN_OPT_START_CRYPTO_DATA_TRAFFIC:
+                       result = utun_ctl_start_crypto_data_traffic(kctlref, unit, unitinfo, opt, data, len);
+                       break;
+
                case UTUN_OPT_EXT_IFDATA_STATS:
                        if (len != sizeof(int)) {
                                result = EMSGSIZE;
@@ -586,6 +579,10 @@ utun_ctl_getopt(
                        *len = snprintf(data, *len, "%s%d", ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp)) + 1;
                        break;
 
+               case UTUN_OPT_GENERATE_CRYPTO_KEYS_IDX:
+                       result = utun_ctl_generate_crypto_keys_idx(kctlref, unit, unitinfo, opt, data, len);
+                       break;
+
                default:
                        result = ENOPROTOOPT;
                        break;
@@ -611,8 +608,18 @@ utun_output(
                return 0;
        }
 
+       // otherwise, fall thru to ctl_enqueumbuf
        if (pcb->utun_ctlref) {
-               int     length = mbuf_pkthdr_len(data);
+               int     length;
+
+               // only pass packets to utun-crypto if crypto is enabled and 'suspend data traffic' is not.
+               if ((pcb->utun_flags & (UTUN_FLAGS_CRYPTO | UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC)) == UTUN_FLAGS_CRYPTO) {
+                       if (utun_pkt_crypto_output(pcb, &data) == 0) {
+                               return 0;
+                       }
+               }
+
+               length = mbuf_pkthdr_len(data);
                result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
                if (result != 0) {
                        mbuf_freem(data);
@@ -657,7 +664,13 @@ utun_framer(
                   mbuf_t                               *packet,
                        __unused const struct sockaddr *dest, 
                        __unused const char *desk_linkaddr,
-                       const char *frame_type)
+                       const char *frame_type
+#if KPI_INTERFACE_EMBEDDED
+                       ,
+                       u_int32_t *prepend_len, 
+                       u_int32_t *postpend_len
+#endif /* KPI_INTERFACE_EMBEDDED */
+                       )
 {
     if (mbuf_prepend(packet, sizeof(protocol_family_t), MBUF_DONTWAIT) != 0) {
                printf("utun_framer - ifnet_output prepend failed\n");
@@ -667,6 +680,10 @@ utun_framer(
                // just return, because the buffer was freed in mbuf_prepend
         return EJUSTRETURN;    
     }
+#if KPI_INTERFACE_EMBEDDED
+       *prepend_len = sizeof(protocol_family_t);
+       *postpend_len = 0;
+#endif /* KPI_INTERFACE_EMBEDDED */
        
     // place protocol number at the beginning of the mbuf
     *(protocol_family_t *)mbuf_data(*packet) = htonl(*(protocol_family_t *)(uintptr_t)(size_t)frame_type);
@@ -791,3 +808,48 @@ utun_attach_proto(
        return result;
 }
 
+errno_t
+utun_pkt_input (struct utun_pcb *pcb, mbuf_t m)
+{
+       errno_t result;
+       protocol_family_t protocol;
+
+       mbuf_pkthdr_setrcvif(m, pcb->utun_ifp);
+
+       bpf_tap_in(pcb->utun_ifp, DLT_NULL, m, 0, 0);
+
+       if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) {
+               /* flush data */
+               mbuf_freem(m);
+               return 0;
+       }
+       protocol = ntohl(*(u_int32_t *)mbuf_data(m));
+
+       // quick exit for keepalive packets
+       if (protocol == AF_UTUN && pcb->utun_flags & UTUN_FLAGS_CRYPTO) {
+               if (utun_pkt_crypto_output(pcb, &m) == 0) {
+                       return 0;
+               }
+               printf("%s: utun_pkt_crypto_output failed, flags %x\n", __FUNCTION__, pcb->utun_flags);
+               return EINVAL;
+       }
+
+       if (!pcb->utun_ext_ifdata_stats) {
+               struct ifnet_stat_increment_param       incs;
+               
+               bzero(&incs, sizeof(incs));
+               incs.packets_in = 1;
+               incs.bytes_in = mbuf_pkthdr_len(m);
+               result = ifnet_input(pcb->utun_ifp, m, &incs);
+       } else {
+               result = ifnet_input(pcb->utun_ifp, m, NULL);
+       }
+       if (result != 0) {
+               ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
+               
+               printf("%s - ifnet_input failed: %d\n", __FUNCTION__, result);
+               mbuf_freem(m);
+       }
+
+       return 0;
+}
index d1860e11a90aed42fc74fe4d39324862f7908e95..32379a8824f870a8d5360e8024f19e2388108ed7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #ifndef        _NET_IF_UTUN_H_
 #define        _NET_IF_UTUN_H_
 
+#include <net/if_utun_crypto.h>
+
 #ifdef KERNEL_PRIVATE
 
+#include <sys/kern_control.h>
+
+/* Control block allocated for each kernel control connection */
+struct utun_pcb {
+       kern_ctl_ref    utun_ctlref;
+       ifnet_t                 utun_ifp;
+       u_int32_t               utun_unit;
+       u_int32_t               utun_flags;
+       int                             utun_ext_ifdata_stats;
+       utun_crypto_ctx_t utun_crypto_ctx[UTUN_CRYPTO_CTX_NUM_DIRS];
+};
+
+void* utun_alloc(size_t size);
+void utun_free(void *ptr);
 errno_t utun_register_control(void);
 
 #endif
@@ -44,17 +60,34 @@ errno_t utun_register_control(void);
 /*
  * Socket option names to manage utun
  */
-#define UTUN_OPT_FLAGS                                 1
-#define UTUN_OPT_IFNAME                                        2
-#define UTUN_OPT_EXT_IFDATA_STATS              3       /* get|set (type int) */
-#define UTUN_OPT_INC_IFDATA_STATS_IN   4       /* set to increment stat counters (type struct utun_stats_param) */ 
-#define UTUN_OPT_INC_IFDATA_STATS_OUT  5       /* set to increment stat counters (type struct utun_stats_param) */ 
+#define UTUN_OPT_FLAGS                                                 1
+#define UTUN_OPT_IFNAME                                                        2
+#define UTUN_OPT_EXT_IFDATA_STATS                              3       /* get|set (type int) */
+#define UTUN_OPT_INC_IFDATA_STATS_IN                   4       /* set to increment stat counters (type struct utun_stats_param) */ 
+#define UTUN_OPT_INC_IFDATA_STATS_OUT                  5       /* set to increment stat counters (type struct utun_stats_param) */ 
+#define UTUN_OPT_ENABLE_CRYPTO                                 6
+#define UTUN_OPT_CONFIG_CRYPTO_KEYS                            7
+#define UTUN_OPT_UNCONFIG_CRYPTO_KEYS                  8
+#define UTUN_OPT_GENERATE_CRYPTO_KEYS_IDX              9
+#define UTUN_OPT_DISABLE_CRYPTO                                        10
+#define UTUN_OPT_STOP_CRYPTO_DATA_TRAFFIC              11
+#define UTUN_OPT_START_CRYPTO_DATA_TRAFFIC             12
 
 /*
  * Flags for by UTUN_OPT_FLAGS 
  */
 #define        UTUN_FLAGS_NO_OUTPUT            0x0001
 #define UTUN_FLAGS_NO_INPUT                    0x0002
+#define UTUN_FLAGS_CRYPTO                      0x0004
+#define UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC    0x0008
+
+/*
+ * utun packet type flags
+ */
+#define UTUN_PKT_TYPE_KEEPALIVE                                        0x0001
+#define UTUN_PKT_TYPE_IPSEC                                            0x0002
+#define UTUN_PKT_TYPE_DTLS                                             0x0004
+
 
 /*
  * utun stats parameter structure
diff --git a/bsd/net/if_utun_crypto.c b/bsd/net/if_utun_crypto.c
new file mode 100644 (file)
index 0000000..176f4cd
--- /dev/null
@@ -0,0 +1,532 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+
+#include <sys/systm.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_utun.h>
+#include <sys/mbuf.h> 
+#include <net/if_utun_crypto.h>
+#include <net/if_utun_crypto_ipsec.h>
+
+void
+utun_cleanup_crypto (struct utun_pcb *pcb)
+{
+       utun_cleanup_all_crypto_ipsec(pcb);
+       // utun_cleanup_all_crypto_dtls(pcb);
+       pcb->utun_flags &= ~UTUN_FLAGS_CRYPTO;
+}
+
+errno_t
+utun_ctl_enable_crypto (__unused kern_ctl_ref  kctlref,
+                       __unused u_int32_t     unit, 
+                       __unused void         *unitinfo,
+                       __unused int           opt, 
+                       void                  *data, 
+                       size_t                 len)
+{
+       struct utun_pcb *pcb = unitinfo;
+
+       /*
+        * - verify the crypto context args passed from user-land.
+        *    - check the size of the argument buffer.
+        *    - check the direction (IN or OUT)
+        *    - check the type (IPSec or DTLS)
+        * - ensure that the crypto context is *not* already valid (don't recreate already valid context).
+        *    - we have only one context per direction and type.
+        * - any error should be equivalent to noop.
+        */
+       if (len < UTUN_CRYPTO_ARGS_HDR_SIZE) {
+               return EMSGSIZE;
+       } else {
+               int                 idx;
+               utun_crypto_args_t *crypto_args = (__typeof__(crypto_args))data;
+               utun_crypto_ctx_t  *crypto_ctx;
+
+               if (crypto_args->ver == 0 || crypto_args->ver >= UTUN_CRYPTO_ARGS_VER_MAX) {
+                       printf("%s: ver check failed %d\n", __FUNCTION__, crypto_args->ver);
+                       return EINVAL;
+               }
+               if (crypto_args->type == 0 || crypto_args->type >= UTUN_CRYPTO_TYPE_MAX) {
+                       printf("%s: type check failed %d\n", __FUNCTION__, crypto_args->type);
+                       return EINVAL;
+               }
+               if (len < UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args)) {
+                       printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__,
+                                  (int)len, (int)UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args));
+                       return EINVAL;
+               }
+               if (crypto_args->args_ulen != sizeof(crypto_args->u)) {
+                       printf("%s: compatibility mode\n", __FUNCTION__);
+               }
+               if (crypto_args->type == UTUN_CRYPTO_TYPE_IPSEC) {
+                       utun_ctl_enable_crypto_ipsec(pcb, crypto_args);
+               } else {
+                       // unsupported
+                       return EPROTONOSUPPORT;
+               }
+               for (idx = 0; idx < UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_MAX); idx++) {
+                       crypto_ctx = &pcb->utun_crypto_ctx[idx];
+                       if (crypto_ctx->valid) {
+                               return EBADF;
+                       }
+
+                       crypto_ctx->type = crypto_args->type;
+                       LIST_INIT(&crypto_ctx->keys_listhead);
+                       crypto_ctx->valid = 1;
+               }
+               // data traffic is stopped by default
+               pcb->utun_flags |= (UTUN_FLAGS_CRYPTO | UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC);
+               return 0;
+       }
+}
+
+errno_t
+utun_ctl_disable_crypto (__unused kern_ctl_ref  kctlref,
+                        __unused u_int32_t     unit, 
+                        __unused void         *unitinfo,
+                        __unused int           opt, 
+                        void                  *data, 
+                        size_t                 len)
+{
+       struct utun_pcb *pcb = unitinfo;
+
+       /*
+        * - verify the crypto context args passed from user-land.
+        *    - check the size of the argument buffer.
+        *    - check the direction (IN or OUT)
+        *    - check the type (IPSec or DTLS)
+        * - ensure that the crypto context *is* already valid (don't release invalid context).
+        *    - we have only one context per direction and type.
+        * - ensure that the crypto context has no crypto material.
+        * - any error should be equivalent to noop.
+        */
+       if (len < UTUN_CRYPTO_ARGS_HDR_SIZE) {
+               return EMSGSIZE;
+       } else {
+               utun_crypto_args_t *crypto_args = (__typeof__(crypto_args))data;
+
+               if (crypto_args->ver == 0 || crypto_args->ver >= UTUN_CRYPTO_ARGS_VER_MAX) {
+                       printf("%s: ver check failed %d\n", __FUNCTION__, crypto_args->ver);
+                       return EINVAL;
+               }
+               if (crypto_args->type == 0 || crypto_args->type >= UTUN_CRYPTO_TYPE_MAX) {
+                       printf("%s: type check failed %d\n", __FUNCTION__, crypto_args->type);
+                       return EINVAL;
+               }
+               if (len < UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args)) {
+                       printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__,
+                                  (int)len, (int)UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args));
+                       return EINVAL;
+               }
+               if (crypto_args->args_ulen != sizeof(crypto_args->u)) {
+                       printf("%s: compatibility mode\n", __FUNCTION__);
+               }
+
+               if (crypto_args->type == UTUN_CRYPTO_TYPE_IPSEC) {
+                       utun_ctl_disable_crypto_ipsec(pcb);
+               } else {
+                       // unsupported
+                       return EPROTONOSUPPORT;
+               }
+       }
+       pcb->utun_flags &= ~(UTUN_FLAGS_CRYPTO | UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC);
+       return 0;
+}
+
+errno_t
+utun_ctl_config_crypto_keys (__unused kern_ctl_ref  kctlref,
+                            __unused u_int32_t     unit, 
+                            __unused void         *unitinfo,
+                            __unused int           opt, 
+                            void                  *data, 
+                            size_t                 len)
+{
+       struct utun_pcb *pcb = unitinfo;
+
+       /*
+        * - verify the crypto material args passed from user-land.
+        *    - check the size of the argument buffer.
+        *    - check the direction (IN or OUT)
+        *    - check the type (IPSec or DTLS)
+        *    - crypto material direction and type must match the associated crypto context's.
+        *        - we can have a list of crypto materials per context.
+        * - ensure that the crypto context is already valid (don't add crypto material to invalid context).
+        * - any error should be equivalent to noop.
+        */
+       if (len < UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE) {
+               return EMSGSIZE;
+       } else {
+               int                      idx;
+               utun_crypto_keys_args_t *crypto_keys_args = (__typeof__(crypto_keys_args))data;
+               utun_crypto_ctx_t       *crypto_ctx;
+               utun_crypto_keys_t      *crypto_keys = NULL;
+
+               if (crypto_keys_args->ver == 0 || crypto_keys_args->ver >= UTUN_CRYPTO_KEYS_ARGS_VER_MAX) {
+                       printf("%s: ver check failed %d\n", __FUNCTION__, crypto_keys_args->ver);
+                       return EINVAL;
+               }
+               if (crypto_keys_args->dir == 0 || crypto_keys_args->dir >= UTUN_CRYPTO_DIR_MAX) {
+                       printf("%s: dir check failed %d\n", __FUNCTION__, crypto_keys_args->dir);
+                       return EINVAL;
+               }
+               if (crypto_keys_args->type == 0 || crypto_keys_args->type >= UTUN_CRYPTO_TYPE_MAX) {
+                       printf("%s: type check failed %d\n", __FUNCTION__, crypto_keys_args->type);
+                       return EINVAL;
+               }
+               if (len < UTUN_CRYPTO_KEYS_ARGS_TOTAL_SIZE(crypto_keys_args)) {
+                       printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__,
+                                  (int)len, (int)UTUN_CRYPTO_KEYS_ARGS_TOTAL_SIZE(crypto_keys_args));
+                       return EINVAL;
+               }
+               idx = UTUN_CRYPTO_DIR_TO_IDX(crypto_keys_args->dir);
+               crypto_ctx = &pcb->utun_crypto_ctx[idx];
+               if (!crypto_ctx->valid) {
+                       return EBADF;
+               }
+               if (crypto_keys_args->type != crypto_ctx->type) {
+                       // can't add keymat to context with different crypto type
+                       return ENOENT;
+               }
+               crypto_keys = utun_alloc(sizeof(*crypto_keys));
+               if (!crypto_keys) {
+                       return ENOBUFS;
+               }
+               bzero(crypto_keys, sizeof(*crypto_keys));
+               if (crypto_keys_args->args_ulen != sizeof(crypto_keys_args->u)) {
+                       printf("%s: compatibility mode\n", __FUNCTION__);
+               }
+
+               // branch-off for ipsec vs. dtls
+               if (crypto_keys_args->type == UTUN_CRYPTO_TYPE_IPSEC) {
+                       errno_t err;
+                       if ((err = utun_ctl_config_crypto_keys_ipsec(pcb, crypto_keys_args, crypto_keys))) {
+                               utun_free(crypto_keys);
+                               return err;
+                       }
+               } else {
+                       // unsupported
+                       utun_free(crypto_keys);
+                       return EPROTONOSUPPORT;
+               }
+               crypto_keys->type = crypto_keys_args->type;
+               LIST_INSERT_HEAD(&crypto_ctx->keys_listhead, crypto_keys, chain);
+               crypto_keys->valid = 1;
+       }
+
+       return 0;
+}
+
+errno_t
+utun_ctl_unconfig_crypto_keys (__unused kern_ctl_ref  kctlref,
+                              __unused u_int32_t     unit, 
+                              __unused void         *unitinfo,
+                              __unused int           opt, 
+                              void                  *data, 
+                              size_t                 len)
+{
+       struct utun_pcb *pcb = unitinfo;
+
+       /*
+        * - verify the crypto material args passed from user-land.
+        *    - check the size of the argument buffer.
+        *    - check the direction (IN or OUT)
+        *    - check the type (IPSec or DTLS)
+        *    - crypto material direction and type must match the associated crypto context's.
+        *        - we can have a list of crypto materials per context.
+        * - ensure that the crypto context is already valid (don't add crypto material to invalid context).
+        * - any error should be equivalent to noop.
+        */
+       if (len < UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE) {
+               return EMSGSIZE;
+       } else {
+               int                      idx;
+               utun_crypto_keys_args_t *crypto_keys_args = (__typeof__(crypto_keys_args))data;
+               utun_crypto_ctx_t       *crypto_ctx;
+               utun_crypto_keys_t      *cur_crypto_keys, *nxt_crypto_keys;
+
+               if (crypto_keys_args->ver == 0 || crypto_keys_args->ver >= UTUN_CRYPTO_KEYS_ARGS_VER_MAX) {
+                       printf("%s: ver check failed %d\n", __FUNCTION__, crypto_keys_args->ver);
+                       return EINVAL;
+               }
+               if (crypto_keys_args->dir == 0 || crypto_keys_args->dir >= UTUN_CRYPTO_DIR_MAX) {
+                       printf("%s: dir check failed %d\n", __FUNCTION__, crypto_keys_args->dir);
+                       return EINVAL;
+               }
+               if (crypto_keys_args->type == 0 || crypto_keys_args->type >= UTUN_CRYPTO_TYPE_MAX) {
+                       printf("%s: type check failed %d\n", __FUNCTION__, crypto_keys_args->type);
+                       return EINVAL;
+               }
+               if (len < UTUN_CRYPTO_KEYS_ARGS_TOTAL_SIZE(crypto_keys_args)) {
+                       printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__,
+                                  (int)len, (int)UTUN_CRYPTO_KEYS_ARGS_TOTAL_SIZE(crypto_keys_args));
+                       return EINVAL;
+               }
+               idx = UTUN_CRYPTO_DIR_TO_IDX(crypto_keys_args->dir);
+               crypto_ctx = &pcb->utun_crypto_ctx[idx];
+               if (!crypto_ctx->valid) {
+                       return EBADF;
+               }
+               if (crypto_keys_args->type != crypto_ctx->type) {
+                       // can't add keymat to context with different crypto type
+                       return ENOENT;
+               }
+               if (crypto_keys_args->args_ulen != sizeof(crypto_keys_args->u)) {
+                       printf("%s: compatibility mode\n", __FUNCTION__);
+               }
+
+               // traverse crypto materials looking for the right one
+               for (cur_crypto_keys = (__typeof__(cur_crypto_keys))LIST_FIRST(&crypto_ctx->keys_listhead);
+                        cur_crypto_keys != NULL;
+                        cur_crypto_keys = nxt_crypto_keys) {
+                       nxt_crypto_keys = (__typeof__(nxt_crypto_keys))LIST_NEXT(cur_crypto_keys, chain);
+                       // branch-off for ipsec vs. dtls
+                       if (crypto_keys_args->type == UTUN_CRYPTO_TYPE_IPSEC) {
+                               if (crypto_keys_args->u.ipsec_v1.spi == cur_crypto_keys->state.u.ipsec.spi) {
+                                       errno_t err;
+                                       if ((err = utun_ctl_unconfig_crypto_keys_ipsec(crypto_keys_args, cur_crypto_keys))) {
+                                               return err;
+                                       }
+                                       LIST_REMOVE(cur_crypto_keys, chain);
+                                       bzero(cur_crypto_keys, sizeof(*cur_crypto_keys));
+                                       utun_free(cur_crypto_keys);
+                                       return 0;
+                               }
+                       } else {
+                               // unsupported
+                               return EPROTONOSUPPORT;
+                       }
+               }
+               // TODO: if there is no SA left, ensure utun can't decrypt/encrypt packets directly. it should rely on the vpnplugin for that.
+       }
+
+       return 0;
+}
+
+errno_t
+utun_ctl_generate_crypto_keys_idx (__unused kern_ctl_ref   kctlref,
+                                  __unused u_int32_t      unit, 
+                                  __unused void          *unitinfo,
+                                  __unused int            opt, 
+                                  void                   *data, 
+                                  size_t                 *len)
+{
+       struct utun_pcb *pcb = unitinfo;
+
+       /*
+        * - verify the crypto material index args passed from user-land.
+        *    - check the size of the argument buffer.
+        *    - check the direction (IN or OUT)
+        *    - check the type (IPSec or DTLS)
+        *    - crypto material direction and type must match the associated crypto context's.
+        *        - we can have a list of crypto materials per context.
+        * - any error should be equivalent to noop.
+        */
+       if (*len < UTUN_CRYPTO_KEYS_IDX_ARGS_HDR_SIZE) {
+               return EMSGSIZE;
+       } else {
+               int                          idx;
+               utun_crypto_keys_idx_args_t *crypto_keys_idx_args = (__typeof__(crypto_keys_idx_args))data;
+               utun_crypto_ctx_t           *crypto_ctx;
+
+               if (crypto_keys_idx_args->ver == 0 || crypto_keys_idx_args->ver >= UTUN_CRYPTO_KEYS_ARGS_VER_MAX) {
+                       printf("%s: ver check failed %d\n", __FUNCTION__, crypto_keys_idx_args->ver);
+                       return EINVAL;
+               }
+               if (crypto_keys_idx_args->dir == 0 || crypto_keys_idx_args->dir >= UTUN_CRYPTO_DIR_MAX) {
+                       printf("%s: dir check failed %d\n", __FUNCTION__, crypto_keys_idx_args->dir);
+                       return EINVAL;
+               }
+               if (crypto_keys_idx_args->type == 0 || crypto_keys_idx_args->type >= UTUN_CRYPTO_TYPE_MAX) {
+                       printf("%s: type check failed %d\n", __FUNCTION__, crypto_keys_idx_args->type);
+                       return EINVAL;
+               }
+               if (*len < UTUN_CRYPTO_KEYS_IDX_ARGS_TOTAL_SIZE(crypto_keys_idx_args)) {
+                       printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__,
+                                  (int)*len, (int)UTUN_CRYPTO_KEYS_IDX_ARGS_TOTAL_SIZE(crypto_keys_idx_args));
+                       return EINVAL;
+               }
+               idx = UTUN_CRYPTO_DIR_TO_IDX(crypto_keys_idx_args->dir);
+               crypto_ctx = &pcb->utun_crypto_ctx[idx];
+               if (!crypto_ctx->valid) {
+                       return EBADF;
+               }
+               if (crypto_keys_idx_args->type != crypto_ctx->type) {
+                       // can't add keymat to context with different crypto type
+                       return ENOENT;
+               }
+               if (crypto_keys_idx_args->args_ulen != sizeof(crypto_keys_idx_args->u)) {
+                       printf("%s: compatibility mode\n", __FUNCTION__);
+               }
+
+               // traverse crypto materials looking for the right one
+               // branch-off for ipsec vs. dtls
+               if (crypto_keys_idx_args->type == UTUN_CRYPTO_TYPE_IPSEC) {
+                       errno_t err;
+                       if ((err = utun_ctl_generate_crypto_keys_idx_ipsec(crypto_keys_idx_args))) {
+                               return err;
+                       }
+               } else {
+                       // unsupported
+                       return EPROTONOSUPPORT;
+               }
+       }
+
+       return 0;
+}
+
+errno_t
+utun_ctl_stop_crypto_data_traffic (__unused kern_ctl_ref  kctlref,
+                                  __unused u_int32_t     unit, 
+                                  __unused void         *unitinfo,
+                                  __unused int           opt, 
+                                  void                  *data, 
+                                  size_t                 len)
+{
+       struct utun_pcb *pcb = unitinfo;
+
+       /*
+        * - verify the crypto context args passed from user-land.
+        *    - check the size of the argument buffer.
+        *    - check the direction (IN or OUT)
+        *    - check the type (IPSec or DTLS)
+        * - ensure that the crypto context *is* already valid (don't release invalid context).
+        *    - we have only one context per direction and type.
+        * - ensure that the crypto context has no crypto material.
+        * - any error should be equivalent to noop.
+        */
+       if (len < UTUN_CRYPTO_ARGS_HDR_SIZE) {
+               return EMSGSIZE;
+       } else {
+               utun_crypto_args_t *crypto_args = (__typeof__(crypto_args))data;
+
+               if (crypto_args->ver == 0 || crypto_args->ver >= UTUN_CRYPTO_ARGS_VER_MAX) {
+                       printf("%s: ver check failed %d\n", __FUNCTION__, crypto_args->ver);
+                       return EINVAL;
+               }
+               if (crypto_args->type == 0 || crypto_args->type >= UTUN_CRYPTO_TYPE_MAX) {
+                       printf("%s: type check failed %d\n", __FUNCTION__, crypto_args->type);
+                       return EINVAL;
+               }
+               if (len < UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args)) {
+                       printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__,
+                                  (int)len, (int)UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args));
+                       return EINVAL;
+               }
+               if (crypto_args->args_ulen != sizeof(crypto_args->u)) {
+                       printf("%s: compatibility mode\n", __FUNCTION__);
+               }
+
+               if ((pcb->utun_flags & UTUN_FLAGS_CRYPTO) == 0) {
+                       printf("%s: crypto is already disabled\n", __FUNCTION__);
+                       return EINVAL;
+               }
+
+               if (crypto_args->type != UTUN_CRYPTO_TYPE_IPSEC) {
+                       // unsupported
+                       return EPROTONOSUPPORT;
+               }
+       }
+       pcb->utun_flags |= UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC;
+       return 0;
+}
+
+errno_t
+utun_ctl_start_crypto_data_traffic (__unused kern_ctl_ref  kctlref,
+                                   __unused u_int32_t     unit, 
+                                   __unused void         *unitinfo,
+                                   __unused int           opt, 
+                                   void                  *data, 
+                                   size_t                 len)
+{
+       struct utun_pcb *pcb = unitinfo;
+
+       /*
+        * - verify the crypto context args passed from user-land.
+        *    - check the size of the argument buffer.
+        *    - check the direction (IN or OUT)
+        *    - check the type (IPSec or DTLS)
+        * - ensure that the crypto context *is* already valid (don't release invalid context).
+        *    - we have only one context per direction and type.
+        * - ensure that the crypto context has no crypto material.
+        * - any error should be equivalent to noop.
+        */
+       if (len < UTUN_CRYPTO_ARGS_HDR_SIZE) {
+               return EMSGSIZE;
+       } else {
+               utun_crypto_args_t *crypto_args = (__typeof__(crypto_args))data;
+
+               if (crypto_args->ver == 0 || crypto_args->ver >= UTUN_CRYPTO_ARGS_VER_MAX) {
+                       printf("%s: ver check failed %d\n", __FUNCTION__, crypto_args->ver);
+                       return EINVAL;
+               }
+               if (crypto_args->type == 0 || crypto_args->type >= UTUN_CRYPTO_TYPE_MAX) {
+                       printf("%s: type check failed %d\n", __FUNCTION__, crypto_args->type);
+                       return EINVAL;
+               }
+               if (len < UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args)) {
+                       printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__,
+                                  (int)len, (int)UTUN_CRYPTO_ARGS_TOTAL_SIZE(crypto_args));
+                       return EINVAL;
+               }
+               if (crypto_args->args_ulen != sizeof(crypto_args->u)) {
+                       printf("%s: compatibility mode\n", __FUNCTION__);
+               }
+
+               if ((pcb->utun_flags & UTUN_FLAGS_CRYPTO) == 0) {
+                       printf("%s: crypto is already disabled\n", __FUNCTION__);
+                       return EINVAL;
+               }
+
+               if (crypto_args->type != UTUN_CRYPTO_TYPE_IPSEC) {
+                       // unsupported
+                       return EPROTONOSUPPORT;
+               }
+       }
+       pcb->utun_flags &= ~UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC;
+       return 0;
+}
+
+int
+utun_pkt_crypto_output (struct utun_pcb *pcb, mbuf_t *m)
+{
+       int idx = UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_OUT);
+       if (!pcb->utun_crypto_ctx[idx].valid) {
+               printf("%s: context is invalid %d\n", __FUNCTION__, pcb->utun_crypto_ctx[idx].valid);
+               return -1;
+       }
+       if (pcb->utun_crypto_ctx[idx].type ==  UTUN_CRYPTO_TYPE_IPSEC) {
+               return(utun_pkt_ipsec_output(pcb, m));
+       } else {
+               // unsupported
+               printf("%s: type is invalid %d\n", __FUNCTION__, pcb->utun_crypto_ctx[idx].type);
+       }
+       return -1;
+}
diff --git a/bsd/net/if_utun_crypto.h b/bsd/net/if_utun_crypto.h
new file mode 100644 (file)
index 0000000..804ffa9
--- /dev/null
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef        _NET_IF_UTUN_CRYPTO_H_
+#define        _NET_IF_UTUN_CRYPTO_H_
+
+// constants used in configuring the crypto context
+typedef enum utun_crypto_ver {
+       UTUN_CRYPTO_VER_1 = 1,
+       UTUN_CRYPTO_VER_MAX,
+} utun_crypto_ver_t;
+
+#define UTUN_CRYPTO_KEYS_IPSEC_VER_1                  UTUN_CRYPTO_VER_1
+#define UTUN_CRYPTO_IPSEC_VER_1                       UTUN_CRYPTO_VER_1
+
+#define UTUN_CRYPTO_ARGS_VER_MAX                      UTUN_CRYPTO_VER_MAX
+#define UTUN_CRYPTO_KEYS_ARGS_VER_MAX                 UTUN_CRYPTO_VER_MAX
+
+typedef enum utun_crypto_dir {
+       UTUN_CRYPTO_DIR_IN = 1,
+       UTUN_CRYPTO_DIR_OUT,
+       UTUN_CRYPTO_DIR_MAX,
+} utun_crypto_dir_t;
+
+#define UTUN_CRYPTO_CTX_NUM_DIRS 2
+
+#define BITSTOBYTES(n)                                (n >> 3)
+#define BYTESTOBITS(n)                                (n << 3)
+
+#define MAX_KEY_AUTH_LEN_BITS                         512 // corresponds to SHA512
+#define MAX_KEY_AUTH_LEN_BYTES                        (BITSTOBYTES(MAX_KEY_AUTH_LEN_BITS))
+#define MAX_KEY_ENC_LEN_BITS                          256 // corresponds to AES256
+#define MAX_KEY_ENC_LEN_BYTES                         (BITSTOBYTES(MAX_KEY_ENC_LEN_BITS))
+
+typedef enum utun_crypto_type {
+       UTUN_CRYPTO_TYPE_IPSEC = 1,
+       UTUN_CRYPTO_TYPE_DTLS,
+       UTUN_CRYPTO_TYPE_MAX,
+} utun_crypto_type_t;
+
+typedef enum if_utun_crypto_ipsec_mode {
+       IF_UTUN_CRYPTO_IPSEC_MODE_NONE = 0,
+       IF_UTUN_CRYPTO_IPSEC_MODE_TRANSPORT,
+       IF_UTUN_CRYPTO_IPSEC_MODE_TUNNEL,
+       IF_UTUN_CRYPTO_IPSEC_MODE_MAX,
+} if_utun_crypto_ipsec_mode_t;
+
+typedef enum if_utun_crypto_ipsec_proto {
+       IF_UTUN_CRYPTO_IPSEC_PROTO_NONE = 0,
+       IF_UTUN_CRYPTO_IPSEC_PROTO_ESP,
+       IF_UTUN_CRYPTO_IPSEC_PROTO_AH,
+       IF_UTUN_CRYPTO_IPSEC_PROTO_MAX,
+} if_utun_crypto_ipsec_proto_t;
+
+typedef enum if_utun_crypto_ipsec_auth {
+       IF_UTUN_CRYPTO_IPSEC_AUTH_NONE = 0,
+       IF_UTUN_CRYPTO_IPSEC_AUTH_MD5,
+       IF_UTUN_CRYPTO_IPSEC_AUTH_SHA1,
+       IF_UTUN_CRYPTO_IPSEC_AUTH_SHA256,
+       IF_UTUN_CRYPTO_IPSEC_AUTH_SHA384,
+       IF_UTUN_CRYPTO_IPSEC_AUTH_SHA512,
+       IF_UTUN_CRYPTO_IPSEC_AUTH_MAX,
+} if_utun_crypto_ipsec_auth_t;
+
+typedef enum if_utun_crypto_ipsec_enc {
+       IF_UTUN_CRYPTO_IPSEC_ENC_NONE = 0,
+       IF_UTUN_CRYPTO_IPSEC_ENC_DES,
+       IF_UTUN_CRYPTO_IPSEC_ENC_3DES,
+       IF_UTUN_CRYPTO_IPSEC_ENC_AES128,
+       IF_UTUN_CRYPTO_IPSEC_ENC_AES256,
+       IF_UTUN_CRYPTO_IPSEC_ENC_MAX,
+} if_utun_crypto_ipsec_enc_t;
+
+typedef enum if_utun_crypto_ipsec_keepalive {
+       IF_UTUN_CRYPTO_IPSEC_KEEPALIVE_NONE = 0,
+       IF_UTUN_CRYPTO_IPSEC_KEEPALIVE_NATT,
+       IF_UTUN_CRYPTO_IPSEC_KEEPALIVE_ESP,
+       IF_UTUN_CRYPTO_IPSEC_KEEPALIVE_MAX,
+} if_utun_crypto_ipsec_keepalive_t;
+
+typedef enum if_utun_crypto_ipsec_natd {
+       IF_UTUN_CRYPTO_IPSEC_NATD_NONE = 0,
+       IF_UTUN_CRYPTO_IPSEC_NATD_MINE,
+       IF_UTUN_CRYPTO_IPSEC_NATD_PEER,
+       IF_UTUN_CRYPTO_IPSEC_NATD_BOTH,
+       IF_UTUN_CRYPTO_IPSEC_NATD_MAX,
+} if_utun_crypto_ipsec_natd_t;
+
+// structures used for storing the App's keying index arguments
+typedef struct utun_crypto_keys_idx_ipsec_args_v1 {
+       struct sockaddr_storage                       src_addr; // v4 or v6 socket address (ignore port numbers)
+       struct sockaddr_storage                       dst_addr; // v4 or v6 socket address (ignore port numbers)
+       if_utun_crypto_ipsec_proto_t                  proto;
+       if_utun_crypto_ipsec_mode_t                   mode;
+       u_int32_t                                     reqid; // policy's reqid, default to 0 for now since we are avoiding policies.
+       u_int32_t                                     spi;                // 0 when requesting the index, otherwise it contains the resulting index
+       u_int32_t                                     spirange_min; // default to 0
+       u_int32_t                                     spirange_max; // default to 0xffffffff
+} __attribute__((packed)) utun_crypto_keys_idx_ipsec_args_v1_t;
+
+typedef struct utun_crypto_keys_idx_dtls_args_v1 {
+       // stub for DTLS keying index arguments
+       u_int32_t                                     unused; // place holder
+} __attribute__((packed)) utun_crypto_keys_idx_dtls_args_v1_t;
+
+// App's parent structure for sending/storing keying index arguments
+typedef struct utun_crypto_keys_idx_args {
+       utun_crypto_ver_t                             ver;
+       utun_crypto_type_t                            type;
+       utun_crypto_dir_t                             dir;
+       u_int32_t                                     args_ulen;
+       u_int32_t                                     varargs_buflen;
+       union {
+               // don't change the order, number, or size of elements above this line (in this struct). otherwise UTUN_CRYPTO_CTX_IDX_ARGS_HDR_SIZE breaks backwards compatibility
+               utun_crypto_keys_idx_ipsec_args_v1_t  ipsec_v1;
+               utun_crypto_keys_idx_dtls_args_v1_t   dtls_v1;
+               // future (additional) versions of the arguments may be placed here
+       } u;
+       u_int8_t                                      varargs_buf[0];
+} __attribute__((aligned(4), packed)) utun_crypto_keys_idx_args_t;
+
+// structures used for storing the App's keying material arguments
+typedef struct utun_crypto_keys_ipsec_args_v1 {
+       struct sockaddr_storage                       src_addr; // v4 or v6 socket address (ignore port numbers)
+       struct sockaddr_storage                       dst_addr; // v4 or v6 socket address (ignore port numbers)
+       if_utun_crypto_ipsec_proto_t                  proto;
+       if_utun_crypto_ipsec_mode_t                   mode;
+       if_utun_crypto_ipsec_auth_t                   alg_auth;
+       if_utun_crypto_ipsec_enc_t                    alg_enc;
+       if_utun_crypto_ipsec_keepalive_t              keepalive;
+       if_utun_crypto_ipsec_natd_t                   natd;
+       u_int8_t                                      replay;   // window size default to 4
+       u_int8_t                                      punt_rx_keepalive;
+       u_int16_t                                     interval_tx_keepalive;
+       u_int16_t                                     key_auth_len; // 128 or 160 or 192 or 256 or 384 or 512
+       u_int16_t                                     key_enc_len;  // 64 or 128 or 192 or 256
+       u_int16_t                                     natt_port; // if non-zero flags will be set to include SADB_X_EXT_NATT
+       u_int16_t                                     unused;
+       u_int32_t                                     seq;        // default to 0
+       u_int32_t                                     spi;
+       u_int32_t                                     pid;      // vpnagent's process id
+       u_int32_t                                     reqid; // policy's reqid, default to 0 for now since we are avoiding policies.
+       u_int64_t                                     lifetime_hard; // value in seconds
+       u_int64_t                                     lifetime_soft; // value in seconds
+       // key_auth and key_enc will actually be stored in utun_crypto_KEYS_args_t.varargs_buf
+} __attribute__((packed)) utun_crypto_keys_ipsec_args_v1_t;
+
+typedef struct utun_crypto_ctx_dtls_mat_args_v1 {
+       // stub for DTLS keying material arguments
+       u_int32_t                                     unused; // place holder
+} __attribute__((packed)) utun_crypto_keys_dtls_args_v1_t;
+
+// App's parent structure for sending/storing keying material arguments
+typedef struct utun_crypto_keys_args {
+       utun_crypto_ver_t                             ver;
+       utun_crypto_type_t                            type;
+       utun_crypto_dir_t                             dir;
+       u_int32_t                                     args_ulen;
+       u_int32_t                                     varargs_buflen;
+       union {
+               // don't change the order, number, or size of elements above this line (in this struct). otherwise UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE breaks backwards compatibility
+               utun_crypto_keys_ipsec_args_v1_t      ipsec_v1;
+               utun_crypto_keys_dtls_args_v1_t       dtls_v1;
+               // future (additional) versions of the arguments may be placed here
+       } u;
+       u_int8_t                                      varargs_buf[0];
+} __attribute__((aligned(4), packed)) utun_crypto_keys_args_t;
+
+// structures used for storing the App's crypto arguments
+typedef struct utun_crypto_ipsec_args_v1 {
+       // stub for IPSec crypto context arguments
+       u_int32_t                                     unused; // place holder
+} __attribute__((packed)) utun_crypto_ipsec_args_v1_t;
+
+typedef struct utun_crypto_dtls_args_v1 {
+       // stub for DTLS crypto context arguments
+       u_int32_t                                     unused; // place holder
+} __attribute__((packed)) utun_crypto_dtls_args_v1_t;
+
+// App's parent structure for starting/stopping crypto
+typedef struct utun_crypto_args {
+       utun_crypto_ver_t                             ver;
+       utun_crypto_type_t                            type;
+       u_int32_t                                     stop_data_traffic;
+       u_int32_t                                     args_ulen;
+       u_int32_t                                     varargs_buflen;
+       union {
+               // don't change the order, number, or size of elements above this line (in this struct). otherwise UTUN_CRYPTO_ARGS_HDR_SIZE breaks backwards compatibility
+               utun_crypto_ipsec_args_v1_t           ipsec_v1;
+               utun_crypto_dtls_args_v1_t            dtls_v1;
+               // future (additional) versions of the arguments may be placed here
+       } u;
+       u_int8_t                                      varargs_buf[0]; // must be at the end of this struct
+} __attribute__((aligned(4), packed)) utun_crypto_args_t;
+
+#ifdef KERNEL_PRIVATE
+
+#include <sys/kern_control.h>
+#include <net/kpi_protocol.h>
+#include <net/kpi_interface.h>
+#include <net/pfkeyv2.h>
+#include <netkey/key.h>
+#include <netkey/keydb.h>
+
+struct utun_pcb;
+
+// structures used for storing kernel's keying material runtime state
+typedef struct utun_crypto_keys_ipsec_state {
+       // kernel's ipsec keying material state
+       u_int32_t                                     spi;
+       struct secashead                             *sah;
+       struct secasvar                              *sav;
+       u_int8_t                                      proto;
+       u_int8_t                                      ifamily;
+       u_int8_t                                      mode;
+       u_int8_t                                      unused;
+} __attribute__((packed)) utun_crypto_keys_ipsec_state_t;
+
+typedef struct utun_crypto_keys_dtls_state {
+       // stub for kernel's DTLS keying material state
+       u_int32_t                                     unused; // place holder
+} __attribute__((packed)) utun_crypto_keys_dtls_state_t;
+
+// kernel's parent structure for keying material state
+typedef struct utun_crypto_keys_state {
+       union {
+               utun_crypto_keys_ipsec_state_t        ipsec;
+               utun_crypto_keys_dtls_state_t         dtls;
+       } u;
+} __attribute__((aligned(4), packed)) utun_crypto_keys_state_t;
+
+// kernel's parent structure for keying material
+typedef struct utun_crypto_keys {
+       int                                           valid; // is valid?
+       utun_crypto_type_t                            type;
+       u_int16_t                                     unused;
+       utun_crypto_keys_state_t                      state; // runtime state
+       LIST_ENTRY(utun_crypto_keys)                  chain;
+} __attribute__((aligned(4), packed)) utun_crypto_keys_t;
+
+// kernel's parent structure for all crypto stuff
+typedef struct utun_crypto_ctx {
+       int                                           valid;
+       utun_crypto_type_t                            type;
+       u_int16_t                                     unused;
+       LIST_HEAD(chain, utun_crypto_keys)            keys_listhead;
+} __attribute__((aligned(4), packed)) utun_crypto_ctx_t;
+
+#define UTUN_CRYPTO_KEYS_IDX_ARGS_HDR_SIZE            ((size_t)(&((utun_crypto_keys_idx_args_t *)0)->u))
+#define UTUN_CRYPTO_KEYS_IDX_ARGS_VARARGS_BUF(args)   ((u_int8_t *)args + UTUN_CRYPTO_KEYS_IDX_ARGS_HDR_SIZE + args->args_ulen)
+#define UTUN_CRYPTO_KEYS_IDX_ARGS_TOTAL_SIZE(args)    ((size_t)(UTUN_CRYPTO_KEYS_IDX_ARGS_HDR_SIZE + args->args_ulen + args->varargs_buflen))
+
+#define UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE                ((size_t)(&((utun_crypto_keys_args_t *)0)->u))
+#define UTUN_CRYPTO_KEYS_ARGS_VARARGS_BUF(args)       ((u_int8_t *)args + UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE + args->args_ulen)
+#define UTUN_CRYPTO_KEYS_ARGS_TOTAL_SIZE(args)        ((size_t)(UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE + args->args_ulen + args->varargs_buflen))
+
+#define UTUN_CRYPTO_ARGS_HDR_SIZE                     ((size_t)(&((utun_crypto_args_t *)0)->u))
+#define UTUN_CRYPTO_ARGS_VARARGS_BUF(args)            ((u_int8_t *)args + UTUN_CRYPTO_ARGS_HDR_SIZE + args->args_ulen)
+#define UTUN_CRYPTO_ARGS_TOTAL_SIZE(args)             ((size_t)(UTUN_CRYPTO_ARGS_HDR_SIZE + args->args_ulen + args->varargs_buflen))
+
+#define UTUN_CRYPTO_DIR_TO_IDX(dir)                   (dir - 1)
+#define UTUN_CRYPTO_IDX_TO_DIR(idx)                   (idx + 1)
+
+void
+utun_cleanup_crypto(struct utun_pcb *pcb);
+
+errno_t
+utun_ctl_enable_crypto(__unused kern_ctl_ref  kctlref,
+                      __unused u_int32_t     unit, 
+                      __unused void         *unitinfo,
+                      __unused int           opt, 
+                      void                  *data, 
+                      size_t                 len);
+
+errno_t
+utun_ctl_disable_crypto(__unused kern_ctl_ref  kctlref,
+                       __unused u_int32_t     unit, 
+                       __unused void         *unitinfo,
+                       __unused int           opt, 
+                       void                  *data, 
+                       size_t                 len);
+
+errno_t
+utun_ctl_config_crypto_keys(__unused kern_ctl_ref  kctlref,
+                           __unused u_int32_t     unit, 
+                           __unused void         *unitinfo,
+                           __unused int           opt, 
+                           void                  *data, 
+                           size_t                 len);
+
+errno_t
+utun_ctl_unconfig_crypto_keys(__unused kern_ctl_ref  kctlref,
+                             __unused u_int32_t     unit, 
+                             __unused void         *unitinfo,
+                             __unused int           opt, 
+                             void                  *data, 
+                             size_t                 len);
+
+errno_t
+utun_ctl_generate_crypto_keys_idx(__unused kern_ctl_ref  kctlref,
+                                 __unused u_int32_t     unit, 
+                                 __unused void         *unitinfo,
+                                 __unused int           opt, 
+                                 void                  *data, 
+                                 size_t                *len);
+
+errno_t
+utun_ctl_stop_crypto_data_traffic(__unused kern_ctl_ref  kctlref,
+                                 __unused u_int32_t     unit, 
+                                 __unused void         *unitinfo,
+                                 __unused int           opt, 
+                                 void                  *data, 
+                                 size_t                 len);
+
+errno_t
+utun_ctl_start_crypto_data_traffic(__unused kern_ctl_ref  kctlref,
+                                  __unused u_int32_t     unit, 
+                                  __unused void         *unitinfo,
+                                  __unused int           opt, 
+                                  void                  *data, 
+                                  size_t                 len);
+
+int
+utun_pkt_crypto_output(struct utun_pcb *pcb, mbuf_t *m);
+
+#endif // KERNEL_PRIVATE
+
+#endif // _NET_IF_UTUN_CRYPTO_H_
diff --git a/bsd/net/if_utun_crypto_ipsec.c b/bsd/net/if_utun_crypto_ipsec.c
new file mode 100644 (file)
index 0000000..0166ba1
--- /dev/null
@@ -0,0 +1,1088 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_utun.h>
+#include <sys/mbuf.h> 
+#include <netinet/in.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_var.h>
+#include <net/if_utun.h>
+#include <net/if_utun_crypto_ipsec.h>
+#include <netinet6/esp.h>
+#include <netinet6/esp6.h>
+#include <netinet6/ipsec.h>
+#include <net/bpf.h>
+
+extern lck_mtx_t *sadb_mutex;
+extern int        esp_udp_encap_port; // udp encap listening port
+extern int        ipsec_policy_count;
+extern int        ipsec_bypass;
+extern int        natt_keepalive_interval;
+
+static int        utun_punt_rx_keepalive = 0; // optional global control
+
+extern errno_t utun_pkt_input (struct utun_pcb *pcb, mbuf_t m);
+
+static u_int8_t
+utun_ipsec_mode_to_sadb_mode (if_utun_crypto_ipsec_mode_t mode)
+{
+       switch (mode) {
+       case IF_UTUN_CRYPTO_IPSEC_MODE_TRANSPORT:
+               return IPSEC_MODE_TRANSPORT;
+       case IF_UTUN_CRYPTO_IPSEC_MODE_TUNNEL:
+               return IPSEC_MODE_TUNNEL;
+       default:
+               return 0;
+       }
+}
+
+static u_int16_t
+utun_ipsec_proto_to_sadb_proto (if_utun_crypto_ipsec_proto_t proto)
+{
+       switch (proto) {
+               case IF_UTUN_CRYPTO_IPSEC_PROTO_ESP:
+                       return IPPROTO_ESP;
+               case IF_UTUN_CRYPTO_IPSEC_PROTO_AH:
+                       return IPPROTO_AH;
+               default:
+                       return 0;
+    }
+}
+
+static u_int8_t
+utun_ipsec_proto_to_sadb_satype (if_utun_crypto_ipsec_proto_t proto)
+{
+       switch (proto) {
+       case IF_UTUN_CRYPTO_IPSEC_PROTO_ESP:
+               return SADB_SATYPE_ESP;
+       case IF_UTUN_CRYPTO_IPSEC_PROTO_AH:
+               return SADB_SATYPE_AH;
+       default:
+               return 0;
+    }
+}
+
+static u_int8_t
+utun_ipsec_auth_to_sadb_aalg (if_utun_crypto_ipsec_auth_t auth)
+{
+       switch (auth) {
+       case IF_UTUN_CRYPTO_IPSEC_AUTH_MD5:
+               return SADB_AALG_MD5HMAC;
+       case IF_UTUN_CRYPTO_IPSEC_AUTH_SHA1:
+               return SADB_AALG_SHA1HMAC;
+       case IF_UTUN_CRYPTO_IPSEC_AUTH_SHA256:
+               return SADB_X_AALG_SHA2_256;
+       case IF_UTUN_CRYPTO_IPSEC_AUTH_SHA384:
+               return SADB_X_AALG_SHA2_384;
+       case IF_UTUN_CRYPTO_IPSEC_AUTH_SHA512:
+               return SADB_X_AALG_SHA2_512;
+       default:
+               return 0;
+       }
+}
+
+static u_int8_t
+utun_ipsec_enc_to_sadb_ealg (if_utun_crypto_ipsec_enc_t enc)
+{
+       switch (enc) {
+       case IF_UTUN_CRYPTO_IPSEC_ENC_DES:
+               return SADB_EALG_DESCBC;
+       case IF_UTUN_CRYPTO_IPSEC_ENC_3DES:
+               return SADB_EALG_3DESCBC;
+       case IF_UTUN_CRYPTO_IPSEC_ENC_AES128:
+       case IF_UTUN_CRYPTO_IPSEC_ENC_AES256:
+               return SADB_X_EALG_AESCBC;
+       default:
+               return 0;
+       }
+}
+
+static u_int32_t
+utun_ipsec_keepalive_and_nat_info_to_sadb_flags (if_utun_crypto_ipsec_keepalive_t keepalive,
+                                                int                              punt_rx_keepalive,
+                                                if_utun_crypto_ipsec_natd_t      natd,
+                                                u_int16_t                        natt_port)
+{
+       u_int32_t flags = 0;
+
+       if (natt_port && natt_port != 500) {
+               flags |= SADB_X_EXT_NATT;
+
+               switch (keepalive) {
+               case IF_UTUN_CRYPTO_IPSEC_KEEPALIVE_NATT:
+                       flags |= SADB_X_EXT_NATT_KEEPALIVE; // normal keepalive packet
+                       break;
+               case IF_UTUN_CRYPTO_IPSEC_KEEPALIVE_ESP:
+                       flags |= (SADB_X_EXT_ESP_KEEPALIVE | SADB_X_EXT_PUNT_RX_KEEPALIVE); // use an EMPTY ESP as a keepalive 
+                       break;
+               default:
+                       break;
+               }
+
+               switch (natd) {
+               case IF_UTUN_CRYPTO_IPSEC_NATD_PEER:
+                       flags |= SADB_X_EXT_NATT_DETECTED_PEER;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (punt_rx_keepalive) {
+               flags |= SADB_X_EXT_PUNT_RX_KEEPALIVE;
+       }
+
+       return flags;
+}
+
+static errno_t
+utun_ipsec_set_sah (struct secashead        **sah,
+                   u_int8_t                  dir,
+                   u_int16_t                 proto,
+                   u_int8_t                  mode,
+                   u_int32_t                 reqid,
+                   struct sockaddr_storage  *src_addr,
+                   struct sockaddr_storage  *dst_addr)
+{
+       struct secasindex saidx;
+
+       // currently only support tunnel mode and ESP
+       if (proto != IPPROTO_ESP ||
+           mode != IPSEC_MODE_TUNNEL) {
+               return EINVAL;
+       }
+       if ((((struct sockaddr *)src_addr)->sa_family != AF_INET &&
+            ((struct sockaddr *)src_addr)->sa_family != AF_INET6) ||
+           (((struct sockaddr *)dst_addr)->sa_family != AF_INET &&
+            ((struct sockaddr *)dst_addr)->sa_family != AF_INET6)) {
+               return EINVAL;
+       }
+
+       bzero(&saidx, sizeof(saidx));
+       saidx.proto = proto;
+       saidx.mode = mode;
+       saidx.reqid = reqid;
+       bcopy(src_addr, &saidx.src, sizeof(saidx.src)); 
+       bcopy(dst_addr, &saidx.dst, sizeof(saidx.dst)); 
+
+       lck_mtx_lock(sadb_mutex);
+       // TODO: add sah and policy (collision) check and prevention. ensure that there is no conflicting policy.
+       // TDDO: ensure that key_spdaddxxx doesn't add a policy that's conflicting with any of our sahs.
+       *sah = key_newsah2(&saidx, dir);
+       lck_mtx_unlock(sadb_mutex);
+       return 0;
+}
+
+static int
+utun_ipsec_clr_sahs (struct secashead **sah)
+{
+       struct secasvar *sav;
+       struct secasvar *nextsav;
+       u_int            state;
+
+       lck_mtx_lock(sadb_mutex);
+       for (state = 0; state < SADB_SASTATE_MAX; state++) {
+               for (sav = LIST_FIRST(&(*sah)->savtree[state]);
+                    sav != NULL;
+                    sav = nextsav) {
+                       nextsav = LIST_NEXT(sav, chain);
+                       if (sav->state == SADB_SASTATE_LARVAL ||
+                               sav->state == SADB_SASTATE_DEAD) {
+                               continue;
+                       }
+
+                       if (sav->utun_pcb) {
+                               sav->utun_pcb = NULL;
+                               sav->utun_is_keepalive_fn = NULL;
+                               sav->utun_in_fn = NULL;
+                               sav->refcnt--; // unlinked from pcb
+                       } else {
+                               printf("%s: SAV inconsistency\n", __FUNCTION__);
+                       }
+
+                       key_sa_chgstate(sav, SADB_SASTATE_DEAD);
+                       key_freesav(sav, KEY_SADB_LOCKED);
+               }
+       }
+
+       // clear the rest of the SAs
+       key_delsah(*sah);
+       lck_mtx_unlock(sadb_mutex);
+       return 0;
+}
+
+static void
+utun_ipsec_set_udp_encap_listen_port (utun_crypto_dir_t dir,
+                                     u_int16_t         natt_port)
+{
+       if (dir == UTUN_CRYPTO_DIR_IN) {
+               if (natt_port && natt_port != 500) {
+                       esp_udp_encap_port = natt_port;
+               }
+       }       
+}
+
+static void
+utun_set_lifetime (struct sadb_lifetime *lfh,
+                  int                   type,
+                  u_int64_t             l_time)
+{
+       lfh->sadb_lifetime_len = (sizeof(*lfh) >> 3); // convert to words
+       lfh->sadb_lifetime_exttype = type;
+       lfh->sadb_lifetime_allocations = 0;
+       lfh->sadb_lifetime_bytes = 0;
+       lfh->sadb_lifetime_addtime = l_time;
+       lfh->sadb_lifetime_usetime = l_time;
+}
+
+static struct sadb_key *
+utun_ipsec_set_keybuf (u_int16_t  type,
+                      u_int8_t  *key,
+                      u_int16_t  key_len)
+{
+       struct sadb_key *new;
+       int len = sizeof(*new) + BITSTOBYTES(key_len);
+
+       lck_mtx_lock(sadb_mutex);
+       new = utun_alloc(len);
+       if (new == NULL) {
+               return NULL;
+       }
+       lck_mtx_unlock(sadb_mutex);
+       bzero(new, len);
+       new->sadb_key_len = BITSTOBYTES(key_len);
+       new->sadb_key_exttype = type;
+       new->sadb_key_bits = key_len;
+       bcopy(key, &new[1], new->sadb_key_len);
+       return new;
+}
+
+static errno_t
+utun_ipsec_alloc_sav (struct secashead                *sah,
+                     struct secasvar                **sav,
+                     struct utun_pcb                 *pcb,
+                     u_int8_t                         satype,
+                     u_int8_t                         alg_auth,
+                     u_int8_t                         alg_enc,
+                     u_int32_t                        flags,
+                     u_int8_t                         replay,
+                     u_int8_t                        *key_auth,
+                     u_int16_t                        key_auth_len,
+                     u_int8_t                        *key_enc,
+                     u_int16_t                        key_enc_len,
+                     u_int16_t                        natt_port,
+                     u_int32_t                        seq,
+                     u_int32_t                        spi,
+                     u_int32_t                        pid,
+                     u_int64_t                        lifetime_hard,
+                     u_int64_t                        lifetime_soft)
+{
+       struct sadb_key      *keye, *keya;
+       struct sadb_lifetime  lfh, lfs;
+
+       if (*sav) {
+               return EINVAL;
+       }
+
+       bzero(&lfh, sizeof(lfh));
+       utun_set_lifetime(&lfh, SADB_EXT_LIFETIME_HARD, lifetime_hard);
+       bzero(&lfs, sizeof(lfs));
+       utun_set_lifetime(&lfs, SADB_EXT_LIFETIME_SOFT, lifetime_soft);
+
+       if ((keya = utun_ipsec_set_keybuf(SADB_EXT_KEY_AUTH, key_auth, key_auth_len)) == NULL) {
+               return ENOBUFS;
+       }
+       if ((keye = utun_ipsec_set_keybuf(SADB_EXT_KEY_ENCRYPT, key_enc, key_enc_len)) == NULL) {
+               utun_free(keya);
+               return ENOBUFS;
+       }
+
+       lck_mtx_lock(sadb_mutex);
+       if ((*sav = key_newsav2(sah,
+                               satype,
+                               alg_auth,
+                               alg_enc,
+                               flags,
+                               replay,
+                               keya,
+                               key_auth_len,
+                               keye,
+                               key_enc_len,
+                               natt_port,
+                               seq,
+                               spi,
+                               pid,
+                               &lfh,
+                               &lfs)) == NULL) {
+               lck_mtx_unlock(sadb_mutex);
+               utun_free(keya);
+               utun_free(keye);
+               return ENOBUFS;
+       }
+       (*sav)->utun_pcb = (__typeof__((*sav)->utun_pcb))pcb;
+       (*sav)->utun_is_keepalive_fn = (__typeof__((*sav)->utun_is_keepalive_fn))utun_pkt_is_ipsec_keepalive;
+       (*sav)->utun_in_fn = (__typeof__((*sav)->utun_in_fn))utun_pkt_ipsec_input;
+       (*sav)->refcnt++; // for the pcb
+       lck_mtx_unlock(sadb_mutex);
+       utun_free(keya);
+       utun_free(keye);
+       return 0;
+}
+
+static int
+utun_ipsec_free_sav (struct secasvar  **sav)
+{
+       lck_mtx_lock(sadb_mutex);
+       if ((*sav)->utun_pcb) {
+               (*sav)->utun_pcb = NULL;
+               (*sav)->utun_is_keepalive_fn = NULL;
+               (*sav)->utun_in_fn = NULL;
+       }
+       (*sav)->refcnt--; // unlinked from pcb
+       key_sa_chgstate(*sav, SADB_SASTATE_DEAD);
+       key_freesav(*sav, KEY_SADB_LOCKED);
+       lck_mtx_unlock(sadb_mutex);
+       *sav = NULL;
+       return 0;
+}
+
+static int
+utun_ipsec_num_savs (struct secashead **sah)
+{
+       struct secasvar *sav;
+       struct secasvar *nextsav;
+       u_int            state;
+       int              n = 0;
+
+       lck_mtx_lock(sadb_mutex);
+       for (state = 0; state < SADB_SASTATE_MAX; state++) {
+               for (sav = LIST_FIRST(&(*sah)->savtree[state]);
+                    sav != NULL;
+                    sav = nextsav) {
+                       nextsav = LIST_NEXT(sav, chain);
+                       if (sav->state == SADB_SASTATE_LARVAL ||
+                           sav->state == SADB_SASTATE_DYING ||
+                           sav->state == SADB_SASTATE_DEAD) {
+                               continue;
+                       }
+
+                       if (sav->utun_pcb) {
+                               n++;
+                       } else {
+                               printf("%s: SAV inconsistency\n", __FUNCTION__);
+                       }
+               }
+       }
+       lck_mtx_unlock(sadb_mutex);
+
+       return n;
+}
+
+static errno_t
+utun_ctl_config_crypto_keys_ipsec_v1 (struct utun_pcb         *pcb,
+                                     utun_crypto_keys_args_t *args,
+                                     utun_crypto_keys_t      *crypto_keys)
+{
+       utun_crypto_keys_ipsec_args_v1_t *args_ipsec_v1 = &args->u.ipsec_v1;
+       u_int8_t                         *varargs_buf = UTUN_CRYPTO_KEYS_ARGS_VARARGS_BUF(args);
+       errno_t                           err;
+       struct secashead                 *sah;
+       u_int16_t                         proto;
+       u_int8_t                          mode;
+       u_int8_t                          satype, aalg, ealg;
+       u_int32_t                         flags;
+       
+       if (args_ipsec_v1->key_auth_len > MAX_KEY_AUTH_LEN_BITS) {
+               printf("%s: invalid auth key len %d, max %d\n", __FUNCTION__,
+                      args_ipsec_v1->key_auth_len, MAX_KEY_AUTH_LEN_BITS);
+               return EINVAL;
+       }
+       if (args_ipsec_v1->key_enc_len > MAX_KEY_ENC_LEN_BITS) {
+               printf("%s: invalid enc key len %d, max %d\n", __FUNCTION__,
+                      args_ipsec_v1->key_enc_len, MAX_KEY_ENC_LEN_BITS);
+               return EINVAL;
+       }
+       if (args->varargs_buflen != (__typeof__(args->varargs_buflen))((BITSTOBYTES(args_ipsec_v1->key_auth_len) + 
+                                                                       BITSTOBYTES(args_ipsec_v1->key_enc_len)))) {
+               printf("%s: len check failed (%d,%d, %d)\n", __FUNCTION__,
+                      args->varargs_buflen, args_ipsec_v1->key_auth_len, args_ipsec_v1->key_enc_len);
+               return EINVAL;
+       }
+       sah = IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(crypto_keys);
+       if (!sah) {
+               // TODO: make sure we pass through this once
+               proto = utun_ipsec_proto_to_sadb_proto(args_ipsec_v1->proto);
+               mode = utun_ipsec_mode_to_sadb_mode(args_ipsec_v1->mode);
+
+               if ((err = utun_ipsec_set_sah(&IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(crypto_keys),
+                                             UTUN_CRYPTO_DIR_TO_IPSEC_DIR(args->dir),
+                                             proto,
+                                             mode,
+                                             args_ipsec_v1->reqid,
+                                             &args_ipsec_v1->src_addr,
+                                             &args_ipsec_v1->dst_addr))) {
+                       return err;
+               }
+               sah = IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(crypto_keys);
+               if (!sah) {
+                       return EBADF;
+               }
+       }
+
+       satype = utun_ipsec_proto_to_sadb_satype(args_ipsec_v1->proto);
+       aalg = utun_ipsec_auth_to_sadb_aalg(args_ipsec_v1->alg_auth);
+       ealg = utun_ipsec_enc_to_sadb_ealg(args_ipsec_v1->alg_enc);
+       flags = utun_ipsec_keepalive_and_nat_info_to_sadb_flags(args_ipsec_v1->keepalive,
+                                                               args_ipsec_v1->punt_rx_keepalive,
+                                                               args_ipsec_v1->natd,
+                                                               args_ipsec_v1->natt_port);
+
+       if ((err = utun_ipsec_alloc_sav(sah,
+                                       &IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAV(crypto_keys),
+                                       pcb,
+                                       satype,
+                                       aalg,
+                                       ealg,
+                                       flags,
+                                       args_ipsec_v1->replay,
+                                       varargs_buf,
+                                       args_ipsec_v1->key_auth_len,
+                                       (varargs_buf + BITSTOBYTES(args_ipsec_v1->key_auth_len)),
+                                       args_ipsec_v1->key_enc_len,
+                                       args_ipsec_v1->natt_port,
+                                       args_ipsec_v1->seq,
+                                       args_ipsec_v1->spi,
+                                       args_ipsec_v1->pid,
+                                       args_ipsec_v1->lifetime_hard,
+                                       args_ipsec_v1->lifetime_soft))) {
+               return err;
+       }
+       crypto_keys->state.u.ipsec.proto = sah->saidx.proto;
+       crypto_keys->state.u.ipsec.mode = sah->saidx.mode;
+       if (((struct sockaddr *)&sah->saidx.src)->sa_family == AF_INET) {
+               crypto_keys->state.u.ipsec.ifamily = IPPROTO_IPV4;
+       } else {
+               crypto_keys->state.u.ipsec.ifamily = IPPROTO_IPV6;
+       }
+       crypto_keys->state.u.ipsec.spi = args_ipsec_v1->spi;
+       utun_ipsec_set_udp_encap_listen_port(args->dir, args_ipsec_v1->natt_port);
+       return 0;
+}
+
+static errno_t
+utun_ctl_unconfig_crypto_keys_ipsec_v1 (utun_crypto_keys_t *crypto_keys)
+{
+       if (!IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(crypto_keys)) {
+               return EBADF;
+       }
+       if (!IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAV(crypto_keys)) {
+               return EBADF;
+       }
+       if (utun_ipsec_free_sav(&IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAV(crypto_keys))) {
+               return EADDRNOTAVAIL;
+       }
+       if (!utun_ipsec_num_savs(&IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(crypto_keys))) {
+               (void)utun_ipsec_clr_sahs(&IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(crypto_keys));
+
+               // release sah
+               IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(crypto_keys) = NULL;
+       }
+
+       return 0;
+}
+
+static void
+utun_set_spirange (struct sadb_spirange *spirange,
+                  u_int32_t             spirange_min,
+                  u_int32_t             spirange_max)
+{
+       spirange->sadb_spirange_min = spirange_min;
+       spirange->sadb_spirange_max = spirange_max;
+}
+
+static u_int32_t
+utun_ipsec_get_spi (struct sockaddr_storage  *src_addr,
+                   struct sockaddr_storage  *dst_addr,
+                   u_int16_t                 proto,
+                   u_int8_t                  mode,
+                   u_int32_t                 reqid,
+                   u_int32_t         spirange_min,
+                   u_int32_t         spirange_max)
+{
+       struct sadb_spirange spirange;
+       utun_set_spirange(&spirange, spirange_min, spirange_max);
+       // TODO: should this allocate an SAH?
+       return key_getspi2((struct sockaddr *)src_addr,
+                          (struct sockaddr *)dst_addr,
+                          proto,
+                          mode,
+                          reqid,
+                          &spirange);
+}
+
+static errno_t
+utun_ctl_generate_crypto_keys_idx_ipsec_v1 (utun_crypto_keys_idx_args_t *args)
+{
+       utun_crypto_keys_idx_ipsec_args_v1_t *args_ipsec_v1 = &args->u.ipsec_v1;
+       u_int16_t                             proto;
+       u_int8_t                              mode;
+
+       proto = utun_ipsec_proto_to_sadb_proto(args_ipsec_v1->proto);
+       mode = utun_ipsec_mode_to_sadb_mode(args_ipsec_v1->mode);
+
+       args_ipsec_v1->spi = 0;
+       if ((args_ipsec_v1->spi = utun_ipsec_get_spi(&args_ipsec_v1->src_addr,
+                                                    &args_ipsec_v1->dst_addr,
+                                                    proto,
+                                                    mode,
+                                                    args_ipsec_v1->reqid,
+                                                    args_ipsec_v1->spirange_min,
+                                                    args_ipsec_v1->spirange_max)) == 0) {
+               return ENOBUFS;
+       }
+       return 0;
+}
+
+void
+utun_cleanup_all_crypto_ipsec (struct utun_pcb   *pcb)
+{
+       int                 idx;
+       utun_crypto_ctx_t  *crypto_ctx;
+       utun_crypto_keys_t *cur_crypto_keys, *nxt_crypto_keys;
+
+       for (idx = 0; idx < UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_MAX); idx++) {
+               crypto_ctx = &pcb->utun_crypto_ctx[idx];
+               if (!crypto_ctx->valid ||
+                   crypto_ctx->type != UTUN_CRYPTO_TYPE_IPSEC) {
+                       continue;
+               }
+
+               // flush all crypto materials
+               for (cur_crypto_keys = (__typeof__(cur_crypto_keys))LIST_FIRST(&crypto_ctx->keys_listhead);
+                    cur_crypto_keys != NULL;
+                    cur_crypto_keys = nxt_crypto_keys) {
+                       nxt_crypto_keys = (__typeof__(nxt_crypto_keys))LIST_NEXT(cur_crypto_keys, chain);
+
+                       if (!cur_crypto_keys->valid) {
+                               continue;
+                       }
+
+                       if (IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAV(cur_crypto_keys)) {
+                               (void)utun_ipsec_free_sav(&IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAV(cur_crypto_keys));
+                       }
+
+                       if (IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(cur_crypto_keys)) {               
+                               (void)utun_ipsec_clr_sahs(&IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(cur_crypto_keys));
+                       }
+                       
+                       LIST_REMOVE(cur_crypto_keys, chain);
+                       bzero(cur_crypto_keys, sizeof(*cur_crypto_keys));
+                       utun_free(cur_crypto_keys);
+               }
+
+               bzero(crypto_ctx, sizeof(*crypto_ctx));
+       }
+}
+
+static errno_t
+utun_ctl_enable_crypto_ipsec_v1 (__unused utun_crypto_args_t *args)
+{
+       return 0;
+}
+
+/*
+ * Summary: enables ipsec crypto info for the specified utun.
+ */
+void
+utun_ctl_enable_crypto_ipsec(__unused struct utun_pcb    *pcb,
+                            utun_crypto_args_t *args)
+{
+       lck_mtx_lock(sadb_mutex);
+       /* Turn off the ipsec bypass, if already on */
+       if (ipsec_bypass) {
+               ipsec_bypass = 0;
+       }
+       if (args->ver == UTUN_CRYPTO_KEYS_IPSEC_VER_1) {
+               (void)utun_ctl_enable_crypto_ipsec_v1(args);
+       }
+       lck_mtx_unlock(sadb_mutex);
+}
+
+/*
+ * Summary: disables ipsec crypto info for the specified utun.
+ */
+void
+utun_ctl_disable_crypto_ipsec(__unused struct utun_pcb   *pcb)
+{
+       utun_cleanup_all_crypto_ipsec(pcb);
+       lck_mtx_lock(sadb_mutex);
+       /* Turn on the ipsec bypass, if there are no other policies */
+       if (!ipsec_policy_count && !ipsec_bypass) // TODO: ipsec_policy_count may be 1 by default
+               ipsec_bypass = 1;
+       utun_punt_rx_keepalive = 0;
+       lck_mtx_unlock(sadb_mutex);
+}
+
+errno_t
+utun_ctl_config_crypto_keys_ipsec (struct utun_pcb         *pcb,
+                                  utun_crypto_keys_args_t *args,
+                                  utun_crypto_keys_t      *crypto_keys)
+{
+       if (args->ver == UTUN_CRYPTO_KEYS_IPSEC_VER_1) {
+               return(utun_ctl_config_crypto_keys_ipsec_v1(pcb, args, crypto_keys));                      
+       } else {
+               printf("%s: ver unsupported (%d, %d)\n", __FUNCTION__, args->ver, UTUN_CRYPTO_KEYS_IPSEC_VER_1);
+               return EINVAL;
+       }
+}
+
+errno_t
+utun_ctl_unconfig_crypto_keys_ipsec (utun_crypto_keys_args_t *args,
+                                    utun_crypto_keys_t      *crypto_keys)
+{
+       if (args->ver == UTUN_CRYPTO_KEYS_IPSEC_VER_1) {
+               return(utun_ctl_unconfig_crypto_keys_ipsec_v1(crypto_keys));                       
+       } else {
+               printf("%s: ver unsupported (%d, %d)\n", __FUNCTION__, args->ver, UTUN_CRYPTO_KEYS_IPSEC_VER_1);
+               return EINVAL;
+       }
+}
+
+errno_t
+utun_ctl_generate_crypto_keys_idx_ipsec (utun_crypto_keys_idx_args_t *args)
+{
+       if (args->ver == UTUN_CRYPTO_KEYS_IPSEC_VER_1) {
+               return(utun_ctl_generate_crypto_keys_idx_ipsec_v1(args));                          
+       } else {
+               printf("%s: ver unsupported (%d, %d)\n", __FUNCTION__, args->ver, UTUN_CRYPTO_KEYS_IPSEC_VER_1);
+               return EINVAL;
+       }
+}
+
+int
+utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt)
+{
+       utun_crypto_keys_t *crypto_keys = IF_UTUN_GET_TX_CRYPTO_KEYS(pcb);
+       struct secasvar    *sav;
+       protocol_family_t   proto;
+       mbuf_t              new;
+       int                 err;
+       struct route       *ro = NULL;
+       struct route        ro_copy;
+       struct ip_out_args  ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF };
+
+       if (crypto_keys &&
+           crypto_keys->state.u.ipsec.proto == IPPROTO_ESP &&
+           (sav = IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAV(crypto_keys)) &&
+           sav->state == SADB_SASTATE_MATURE) {
+               // TODO: update stats to increment outgoing packets
+               // TODO: allow empty packets thru
+
+               proto = ntohl(*(mtod(*pkt, protocol_family_t *)));
+               m_adj(*pkt, sizeof(protocol_family_t));
+
+               bzero(&ro_copy, sizeof(ro_copy));
+
+               if ((proto == AF_UTUN || proto == AF_INET) && crypto_keys->state.u.ipsec.ifamily == IPPROTO_IPV4) {
+                       struct ip          *ip;
+                       struct sockaddr_in *dst4;
+
+                       if (proto == AF_INET) {
+                               if ((*pkt)->m_len < (__typeof__((*pkt)->m_len))sizeof(*ip)) {
+                                       if (!(*pkt = m_pullup(*pkt, sizeof(*ip)))) {
+                                               printf("%s: m_pullup failed\n", __FUNCTION__);
+                                               return 0;
+                                       }
+                               }
+
+                               // split the mbuf chain to put the ip header and payloads in separate mbufs
+                               new = ipsec4_splithdr(*pkt);
+                               if (!new) {
+                                       printf("%s: ipsec4_splithdr(1) failed\n", __FUNCTION__);
+                                       if (ro_copy.ro_rt != NULL) {
+                                               rtfree(ro_copy.ro_rt);
+                                       }
+                                       *pkt = NULL;
+                                       return 0;
+                               }
+                               *pkt = new;
+
+                               // encapsulate with the outer header
+                               if ((err = ipsec4_encapsulate(new, sav))) {
+                                       printf("%s: ipsec4_encapsulate failed (%d)\n", __FUNCTION__, err);
+                                       *pkt = NULL;
+                                       return 0;
+                               }
+
+                       } else {
+                               // otherwise it's AF_UTUN which will be a keepalive packet to be encapsulated, encrypted and sent
+                               // encapsulate with the outer header
+                               if ((err = ipsec4_encapsulate_utun_esp_keepalive(pkt, sav))) {
+                                       printf("%s: ipsec4_encapsulate failed (%d)\n", __FUNCTION__, err);
+                                       return 0;
+                               }
+                               new = *pkt;
+                       }
+
+                       ip = mtod(new, __typeof__(ip));
+                       // grab sadb_mutex, to update sah's route cache and get a local copy of it
+                       lck_mtx_lock(sadb_mutex);
+                       ro = &sav->sah->sa_route;
+                       dst4 = (struct sockaddr_in *)(void *)&ro->ro_dst;
+                       if (ro->ro_rt) {
+                               RT_LOCK(ro->ro_rt);
+                       }
+                       if (ro->ro_rt != NULL &&
+                           (ro->ro_rt->generation_id != route_generation ||
+                            !(ro->ro_rt->rt_flags & RTF_UP) ||
+                            dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+                               RT_UNLOCK(ro->ro_rt);
+                               rtfree(ro->ro_rt);
+                               ro->ro_rt = NULL;
+                       }
+                       if (ro->ro_rt == NULL) {
+                               dst4->sin_family = AF_INET;
+                               dst4->sin_len = sizeof(*dst4);
+                               dst4->sin_addr = ip->ip_dst;
+                               rtalloc(ro);
+                               if (ro->ro_rt) {
+                                       RT_LOCK(ro->ro_rt);
+                               } else {
+                                       printf("%s: rtalloc(1) failed\n", __FUNCTION__);
+                                       mbuf_freem(new);
+                                       *pkt = NULL;
+                                       return 0;
+                               }
+                       }
+                       if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
+                               dst4 = (struct sockaddr_in *)(void *)ro->ro_rt->rt_gateway;
+                       }
+                       RT_UNLOCK(ro->ro_rt);
+                       route_copyout(&ro_copy, ro, sizeof(ro_copy));
+                       // release sadb_mutex, after updating sah's route cache and getting a local copy
+                       lck_mtx_unlock(sadb_mutex);
+
+                       // split the mbuf chain to put the ip header and payloads in separate mbufs
+                       new = ipsec4_splithdr(*pkt);
+                       if (!new) {
+                               printf("%s: ipsec4_splithdr(2) failed\n", __FUNCTION__);
+                               if (ro_copy.ro_rt != NULL) {
+                                       rtfree(ro_copy.ro_rt);
+                               }
+                               *pkt = NULL;
+                               return 0;
+                       }
+                       *pkt = new;
+
+                       if ((err = esp4_output(new, sav))) {
+                               printf("%s: esp4_output failed (%d)\n", __FUNCTION__, err);
+                               if (ro_copy.ro_rt != NULL) {
+                                       rtfree(ro_copy.ro_rt);
+                               }
+                               *pkt = NULL;
+                               return 0; // drop
+                       }
+
+                       ip = mtod(new, __typeof__(ip));
+                       ip->ip_len = ntohs(ip->ip_len);  /* flip len field before calling ip_output */
+               } else if ((proto == AF_UTUN || proto == AF_INET6) && crypto_keys->state.u.ipsec.ifamily == IPPROTO_IPV6) {
+                       int                  plen;
+                       struct ip6_hdr      *ip6;
+                       struct sockaddr_in6 *dst6;
+
+                       if (proto == AF_INET6) {
+                               // split the mbuf chain to put the ip header and payloads in separate mbufs
+                               new = ipsec6_splithdr(*pkt);
+                               if (!new) {
+                                       printf("%s: ipsec6_splithdr(1) failed\n", __FUNCTION__);
+                                       if (ro_copy.ro_rt != NULL) {
+                                               rtfree(ro_copy.ro_rt);
+                                       }
+                                       *pkt = NULL;
+                                       return 0;
+                               }
+                               *pkt = new;
+
+                               // encapsulate with the outer header
+                               if ((err = ipsec6_encapsulate(new, sav))) {
+                                       printf("%s: ipsec6_encapsulate failed (%d)\n", __FUNCTION__, err);
+                                       *pkt = NULL;
+                                       return 0;
+                               }
+
+                       } else {
+                               // otherwise it's AF_UTUN which will be a keepalive packet to be encapsulated, encrypted and sent
+                               // encapsulate with the outer header
+                               if ((err = ipsec6_encapsulate_utun_esp_keepalive(pkt, sav))) {
+                                       printf("%s: ipsec6_encapsulate failed (%d)\n", __FUNCTION__, err);
+                                       return 0;
+                               }
+                               new = *pkt;
+                       }
+
+                       ip6 = mtod(new, __typeof__(ip6));
+                       // grab sadb_mutex, before updating sah's route cache
+                       lck_mtx_lock(sadb_mutex);
+                       ro = &sav->sah->sa_route;
+                       dst6 = (struct sockaddr_in6 *)(void *)&ro->ro_dst;
+                       if (ro->ro_rt) {
+                               RT_LOCK(ro->ro_rt);
+                       }
+                       if (ro->ro_rt != NULL &&
+                           (ro->ro_rt->generation_id != route_generation ||
+                            !(ro->ro_rt->rt_flags & RTF_UP) ||
+                            !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) {
+                               RT_UNLOCK(ro->ro_rt);
+                               rtfree(ro->ro_rt);
+                               ro->ro_rt = NULL;
+                       }
+                       if (ro->ro_rt == NULL) {
+                               bzero(dst6, sizeof(*dst6));
+                               dst6->sin6_family = AF_INET6;
+                               dst6->sin6_len = sizeof(*dst6);
+                               dst6->sin6_addr = ip6->ip6_dst;
+                               rtalloc(ro);
+                               if (ro->ro_rt) {
+                                       RT_LOCK(ro->ro_rt);
+                               } else {
+                                       printf("%s: rtalloc(2) failed\n", __FUNCTION__);
+                                       mbuf_freem(new);
+                                       *pkt = NULL;
+                                       return 0;
+                               }
+                       }
+                       if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
+                               dst6 = (struct sockaddr_in6 *)(void *)ro->ro_rt->rt_gateway;
+                       }
+                       RT_UNLOCK(ro->ro_rt);
+                       route_copyout(&ro_copy, ro, sizeof(ro_copy));
+                       // release sadb_mutex, after updating sah's route cache and getting a local copy
+                       lck_mtx_unlock(sadb_mutex);
+
+                       // split the mbuf chain to put the ip header and payloads in separate mbufs
+                       new = ipsec6_splithdr(*pkt);
+                       if (!new) {
+                               printf("%s: ipsec6_splithdr failed\n", __FUNCTION__);
+                               if (ro_copy.ro_rt != NULL) {
+                                       rtfree(ro_copy.ro_rt);
+                               }
+                               *pkt = NULL;
+                               return 0;
+                       }
+                       *pkt = new;
+                       
+                       if ((err = esp6_output(new, mtod(new, u_char *), new->m_next, sav))) {
+                               printf("%s: esp6_output failed (%d)\n", __FUNCTION__, err);
+                               if (ro_copy.ro_rt != NULL) {
+                                       rtfree(ro_copy.ro_rt);
+                               }
+                               *pkt = NULL;
+                               return 0; // drop
+                       }
+
+                       plen = new->m_pkthdr.len - sizeof(struct ip6_hdr);
+                       if (plen > IPV6_MAXPACKET) {
+                               printf("%s: esp6_output failed due to invalid len (%d)\n", __FUNCTION__, plen);
+                               if (ro_copy.ro_rt != NULL) {
+                                       rtfree(ro_copy.ro_rt);
+                               }
+                               mbuf_freem(new);
+                               *pkt = NULL;
+                               return 0;
+                       }
+                       ip6 = mtod(new, __typeof__(ip6));
+                       ip6->ip6_plen = ntohs(ip6->ip6_plen);  /* flip len field before calling ip_output */
+               } else {
+                       printf("%s: packet's proto (%d) mismatched the context's proto (%d)\n", __FUNCTION__,
+                                  proto, crypto_keys->state.u.ipsec.ifamily);
+                       mbuf_freem(*pkt);
+                       *pkt = NULL;
+                       return 0;
+               }
+
+               if (pcb->utun_ifp) {
+                       ifnet_stat_increment_out(pcb->utun_ifp, 1, mbuf_pkthdr_len(new), 0);
+               }
+
+               if ((err = ip_output(new, NULL, &ro_copy,
+                   (IP_OUTARGS | IP_NOIPSEC), NULL, &ipoa))) {
+                       printf("%s: ip_output failed (%d)\n", __FUNCTION__, err);
+               }
+               lck_mtx_lock(sadb_mutex);
+               route_copyin(&ro_copy, ro, sizeof(*ro));
+               lck_mtx_unlock(sadb_mutex);
+               return 0;
+       } else {
+               printf("%s: no suitable crypto-mat\n", __FUNCTION__);
+       }
+       return -1;
+}
+
+// returns 0 if false, 1 if true, and -1 if there was a failure
+int
+utun_pkt_is_ipsec_keepalive (struct utun_pcb *pcb, mbuf_t *pkt, u_int16_t nxt, u_int32_t flags, size_t offs)
+{
+       int result;
+       u_int8_t *data;
+       int size_diff;
+
+       if (!pcb->utun_ctlref) {
+               printf("%s - utun ctlref cleared\n", __FUNCTION__);
+               return 0;
+       }
+
+       if (!(pcb->utun_flags & UTUN_FLAGS_CRYPTO)) {
+               printf("%s - crypto disabled\n", __FUNCTION__);
+               return 0;
+       }
+
+       if ((*pkt)->m_pkthdr.len < 0) {
+               printf("%s - invalid hdr len, len %d, offs %lu\n", __FUNCTION__, (*pkt)->m_pkthdr.len, offs);
+               return 0;
+       }
+
+       if ((size_t)(*pkt)->m_pkthdr.len <= offs) {
+               printf("%s - invalid offset, len %d, offs %lu\n", __FUNCTION__, (*pkt)->m_pkthdr.len, offs);
+               return 0;
+       }
+
+       if ((*pkt)->m_len < 0) {
+               printf("%s - invalid len, len %d, offs %lu\n", __FUNCTION__, (*pkt)->m_len, offs);
+               return 0;
+       }
+
+       // pullup offs + 1 bytes
+       if ((size_t)(*pkt)->m_len < (offs + 1)) {
+               if ((*pkt = m_pullup(*pkt, (offs + 1))) == NULL) {
+                       printf("%s: m_pullup failed\n", __FUNCTION__);
+                       return -1;
+               }
+       }
+
+       if (pcb->utun_ifp) {
+               ifnet_stat_increment_in(pcb->utun_ifp, 1, mbuf_pkthdr_len(*pkt), 0);
+       }
+
+       size_diff = (*pkt)->m_pkthdr.len - offs;
+       data = mtod(*pkt, __typeof(data));
+       data += offs;
+
+       // ESP keepalive meets all these conditions: ESP trailer's next proto indicates IP, the decrypted packet only has one zero'd byte in it.
+       if (flags & SADB_X_EXT_ESP_KEEPALIVE &&
+           nxt == IPPROTO_IPV4 &&
+           size_diff == 1 &&
+           *data == 0) {
+               // TODO: update stats to increment keepalives and current timestamp
+               if (utun_punt_rx_keepalive ||
+                       flags & SADB_X_EXT_PUNT_RX_KEEPALIVE) {
+
+                       // strip all headers
+                       if ((size_t)(*pkt)->m_len >= (offs + size_diff)) {
+                               ovbcopy((caddr_t)data, (data + offs), size_diff);
+                               (*pkt)->m_data += offs;
+                               (*pkt)->m_len -= offs;
+                               (*pkt)->m_pkthdr.len -= offs;
+                       } else {
+                               struct mbuf *n;
+
+                               n = m_split(*pkt, offs, M_DONTWAIT);
+                               if (n == NULL) {
+                                       /* *pkt is retained by m_split */
+                                       mbuf_freem(*pkt);
+                                       *pkt = NULL;
+                                       return -1;
+                               }
+                               m_adj(n, offs);
+                               mbuf_freem(*pkt);
+                               *pkt = n;
+                       }
+
+                       // keepalive is being punted up to the control socket, prepend with a special packet type (PF_UTUN)
+                       if (mbuf_prepend(pkt, sizeof(protocol_family_t), MBUF_DONTWAIT) != 0) {
+                               printf("%s - ifnet_output prepend failed\n", __FUNCTION__);
+                               return -1;
+                       }
+                       if ((size_t)(*pkt)->m_len < (sizeof(protocol_family_t) + size_diff)) {
+                               if ((*pkt = m_pullup(*pkt, (sizeof(protocol_family_t) + size_diff))) == NULL) {
+                                       printf("%s: m_pullup failed\n", __FUNCTION__);
+                                       return -1;
+                               }
+                       }
+
+                       // mark UTUN/Keepalive packet
+                       *(protocol_family_t *)mbuf_data(*pkt) = htonl(PF_UTUN);
+
+                       result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, *pkt, CTL_DATA_EOR);
+                       if (result != 0) {
+                               printf("%s: - ctl_enqueuembuf failed: %d\n", __FUNCTION__, result);
+                               mbuf_freem(*pkt);
+                               return -1;
+                       }
+                       *pkt = NULL;
+               }
+               return 1;
+       }
+       return 0;
+}
+
+int
+utun_pkt_ipsec_input (struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family)
+{
+       if (!m_tag_locate(*pkt, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPSEC, NULL)) {
+               return EINVAL;
+       }
+
+       if (!(pcb->utun_flags & UTUN_FLAGS_CRYPTO)) {
+               printf("%s - crypto disabled\n", __FUNCTION__);
+               return EINVAL;
+       }
+
+       if (!pcb->utun_ifp) {
+               printf("%s - utun ifp cleared\n", __FUNCTION__);
+               return EINVAL;
+       }
+
+       // place protocol number at the beginning of the mbuf
+       if (mbuf_prepend(pkt, sizeof(protocol_family_t), MBUF_DONTWAIT) != 0) {
+               printf("%s - ifnet_output prepend failed\n", __FUNCTION__);
+               return ENOBUFS;
+       }
+       *(protocol_family_t *)mbuf_data(*pkt) = htonl(family);
+
+       (void)utun_pkt_input(pcb, *pkt);
+       return 0;
+}
diff --git a/bsd/net/if_utun_crypto_ipsec.h b/bsd/net/if_utun_crypto_ipsec.h
new file mode 100644 (file)
index 0000000..7a4c5f2
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef        _NET_IF_UTUN_CRYPTO_IPSEC_H_
+#define        _NET_IF_UTUN_CRYPTO_IPSEC_H_
+
+#ifdef KERNEL_PRIVATE
+
+struct utun_pcb;
+
+#define UTUN_CRYPTO_DIR_TO_IPSEC_DIR(dir)       (dir == UTUN_CRYPTO_DIR_IN)? IPSEC_DIR_INBOUND : IPSEC_DIR_OUTBOUND
+#define IF_UTUN_GET_TX_CRYPTO_KEYS(pcb)         LIST_FIRST(&pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_OUT)].keys_listhead)
+#define IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAH(keys) keys->state.u.ipsec.sah
+#define IF_UTUN_GET_CRYPTO_KEYS_IPSEC_SAV(keys) keys->state.u.ipsec.sav
+
+/*
+ * Summary: cleans up all crypto info for the specified utun.
+ */
+void
+utun_cleanup_all_crypto_ipsec(struct utun_pcb   *pcb);
+
+/*
+ * Summary: enables ipsec crypto info for the specified utun.
+ */
+void
+utun_ctl_enable_crypto_ipsec(struct utun_pcb   *pcb, utun_crypto_args_t *args);
+
+/*
+ * Summary: disables ipsec crypto info for the specified utun.
+ */
+void
+utun_ctl_disable_crypto_ipsec(struct utun_pcb   *pcb);
+
+/*
+ * Summary: configures an ipsec crypto context for the specified utun, with keying material
+ *          (needed for traffic encrypt/decrypt).
+ * Args:
+ *             pcb - the specified utun state info
+ *      args - the ipsec crypto context keying arguments as passed down from userland.
+ *      crypto_ctx_mat - the ipsec crypto context's keying material to be filled.
+ * Returns: 0 if successful, otherwise returns an appropriate errno.
+ */
+errno_t
+utun_ctl_config_crypto_keys_ipsec(struct utun_pcb         *pcb,
+                                 utun_crypto_keys_args_t *args,
+                                 utun_crypto_keys_t      *crypto_ctx_mat);
+
+/*
+ * Summary: unconfigures the keying material in an ipsec crypto context for the specified utun.
+ * Args:
+ *      args - the ipsec crypto context keying arguments as passed down from userland.
+ *      crypto_ctx_mat - the ipsec crypto context's keying material to be filled.
+ * Returns: 0 if successful, otherwise returns an appropriate errno.
+ */
+errno_t
+utun_ctl_unconfig_crypto_keys_ipsec(utun_crypto_keys_args_t *args,
+                                   utun_crypto_keys_t      *crypto_ctx_mat);
+
+/*
+ * Summary: generates an SPI/index to be using by keying material in an ipsec crypto context 
+ *          for the specified utun.
+ * Args:
+ *      args - the ipsec crypto context key index arguments as passed down from userland.
+ * Returns: 0 if successful, otherwise returns an appropriate errno.
+ */
+errno_t
+utun_ctl_generate_crypto_keys_idx_ipsec(utun_crypto_keys_idx_args_t *args);
+
+int
+utun_pkt_ipsec_output(struct utun_pcb *pcb, mbuf_t *pkt);
+
+int
+utun_pkt_is_ipsec_keepalive(struct utun_pcb *pcb, mbuf_t *pkt, u_int16_t nxt, u_int32_t flags, size_t off);
+
+int
+utun_pkt_ipsec_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family);
+
+#endif // KERNEL_PRIVATE
+
+#endif // _NET_IF_UTUN_CRYPTO_IPSEC_H_
index f3e64b0e34ea6b5abe71a63a0d3d6ee7119984a4..426a78bb5b0d677025341bd828bd8adbaa01d9e5 100644 (file)
@@ -91,7 +91,7 @@
 #define APPLE_IF_FAM_DISC      8
 #define APPLE_IF_FAM_MDECAP    9
 #define APPLE_IF_FAM_GIF       10
-#define APPLE_IF_FAM_FAITH     11
+#define APPLE_IF_FAM_FAITH     11      /* deprecated */
 #define APPLE_IF_FAM_STF       12
 #define APPLE_IF_FAM_FIREWIRE  13
 #define APPLE_IF_FAM_BOND      14
@@ -225,6 +225,10 @@ struct if_data64 {
 
 #ifdef PRIVATE
 struct if_traffic_class {
+       u_int64_t               ifi_ibepackets; /* TC_BE packets received on interface */
+       u_int64_t               ifi_ibebytes;   /* TC_BE bytes received on interface */
+       u_int64_t               ifi_obepackets; /* TC_BE packet sent on interface */
+       u_int64_t               ifi_obebytes;   /* TC_BE bytes sent on interface */
        u_int64_t               ifi_ibkpackets; /* TC_BK packets received on interface */
        u_int64_t               ifi_ibkbytes;   /* TC_BK bytes received on interface */
        u_int64_t               ifi_obkpackets; /* TC_BK packet sent on interface */
@@ -237,6 +241,77 @@ struct if_traffic_class {
        u_int64_t               ifi_ivobytes;   /* TC_VO bytes received on interface */
        u_int64_t               ifi_ovopackets; /* TC_VO packets sent on interface */
        u_int64_t               ifi_ovobytes;   /* TC_VO bytes sent on interface */
+       u_int64_t               ifi_ipvpackets; /* TC priv packets received on interface */
+       u_int64_t               ifi_ipvbytes;   /* TC priv bytes received on interface */
+       u_int64_t               ifi_opvpackets; /* TC priv packets sent on interface */
+       u_int64_t               ifi_opvbytes;   /* TC priv bytes sent on interface */
+};
+
+struct if_data_extended {
+       u_int64_t       ifi_alignerrs;  /* unaligned (32-bit) input pkts */
+};
+
+struct if_packet_stats {
+       /* TCP */
+       u_int64_t               ifi_tcp_badformat;
+       u_int64_t               ifi_tcp_unspecv6;
+       u_int64_t               ifi_tcp_synfin;
+       u_int64_t               ifi_tcp_badformatipsec;
+       u_int64_t               ifi_tcp_noconnnolist;
+       u_int64_t               ifi_tcp_noconnlist;
+       u_int64_t               ifi_tcp_listbadsyn;
+       u_int64_t               ifi_tcp_icmp6unreach;
+       u_int64_t               ifi_tcp_deprecate6;
+       u_int64_t               ifi_tcp_rstinsynrcv;
+       u_int64_t               ifi_tcp_ooopacket;
+       u_int64_t               ifi_tcp_dospacket;
+       u_int64_t               ifi_tcp_cleanup;
+       u_int64_t               ifi_tcp_synwindow;
+       /* UDP */
+       u_int64_t               ifi_udp_port_unreach;
+       u_int64_t               ifi_udp_faithprefix;
+       u_int64_t               ifi_udp_port0;
+       u_int64_t               ifi_udp_badlength;
+       u_int64_t               ifi_udp_badchksum;
+       u_int64_t               ifi_udp_badmcast;
+       u_int64_t               ifi_udp_cleanup;
+       u_int64_t               ifi_udp_badipsec;
+};
+
+struct if_description {
+       u_int32_t       ifd_maxlen;     /* must be IF_DESCSIZE */
+       u_int32_t       ifd_len;        /* actual ifd_desc length */
+       u_int8_t        *ifd_desc;      /* ptr to desc buffer */
+};
+
+struct if_bandwidths {
+       u_int64_t       eff_bw;         /* effective bandwidth */
+       u_int64_t       max_bw;         /* maximum theoretical bandwidth */
+};
+
+struct if_rxpoll_stats {
+       u_int32_t       ifi_poll_off_req;       /* total # of POLL_OFF reqs */
+       u_int32_t       ifi_poll_off_err;       /* total # of POLL_OFF errors */
+       u_int32_t       ifi_poll_on_req;        /* total # of POLL_ON reqs */
+       u_int32_t       ifi_poll_on_err;        /* total # of POLL_ON errors */
+
+       u_int32_t       ifi_poll_wakeups_avg;   /* avg # of wakeup reqs */
+       u_int32_t       ifi_poll_wakeups_lowat; /* wakeups low watermark */
+       u_int32_t       ifi_poll_wakeups_hiwat; /* wakeups high watermark */
+
+       u_int64_t       ifi_poll_packets;       /* total # of polled packets */
+       u_int32_t       ifi_poll_packets_avg;   /* average polled packets */
+       u_int32_t       ifi_poll_packets_min;   /* smallest polled packets */
+       u_int32_t       ifi_poll_packets_max;   /* largest polled packets */
+       u_int32_t       ifi_poll_packets_lowat; /* packets low watermark */
+       u_int32_t       ifi_poll_packets_hiwat; /* packets high watermark */
+
+       u_int64_t       ifi_poll_bytes;         /* total # of polled bytes */
+       u_int32_t       ifi_poll_bytes_avg;     /* average polled bytes */
+       u_int32_t       ifi_poll_bytes_min;     /* smallest polled bytes */
+       u_int32_t       ifi_poll_bytes_max;     /* largest polled bytes */
+       u_int32_t       ifi_poll_bytes_lowat;   /* bytes low watermark */
+       u_int32_t       ifi_poll_bytes_hiwat;   /* bytes high watermark */
 };
 #endif /* PRIVATE */
 
@@ -253,7 +328,7 @@ struct      ifqueue {
        int     ifq_drops;
 };
 
-#ifdef XNU_KERNEL_PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
 /*
  * Internal storage of if_data. This is bound to change. Various places in the
  * stack will translate this data structure in to the externally visible
@@ -289,13 +364,33 @@ struct if_data_internal {
        u_int64_t       ifi_noproto;    /* destined for unsupported protocol */
        u_int32_t       ifi_recvtiming; /* usec spent receiving when timing */
        u_int32_t       ifi_xmittiming; /* usec spent xmitting when timing */
+       u_int64_t       ifi_alignerrs;  /* unaligned (32-bit) input pkts */
 #define IF_LASTCHANGEUPTIME    1       /* lastchange: 1-uptime 0-calendar time */
        struct  timeval ifi_lastchange; /* time of last administrative change */
        u_int32_t       ifi_hwassist;   /* HW offload capabilities */
        u_int32_t       ifi_tso_v4_mtu; /* TCP Segment Offload IPv4 maximum segment size */
        u_int32_t       ifi_tso_v6_mtu; /* TCP Segment Offload IPv6 maximum segment size */
 };
-#endif /* XNU_KERNEL_PRIVATE */
+
+/*
+ * Fields per interface to measure perceived bandwidth.
+ */
+
+struct if_measured_bw {
+       u_int64_t       bw;             /* measured bandwidth in bytes per ms */
+       u_int64_t       bytes;          /* XXX not needed */
+       u_int64_t       ts;             /* XXX not needed */
+       u_int64_t       cur_seq __attribute((aligned(8)));      /* current sequence for marking a packet */
+       u_int64_t       start_ts;       /* time at which a measurement started */
+       u_int64_t       start_seq;      /* sequence at which a measurement should start */
+       u_int64_t       last_seq;       /* last recorded seq */
+       u_int64_t       last_ts;        /* last recorded ts */
+       u_int32_t       flags __attribute__((aligned(4)));              /* flags */
+#define IF_MEASURED_BW_INPROGRESS 0x1
+#define IF_MEASURED_BW_CALCULATION 0x2
+};
+
+#endif /* BSD_KERNEL_PRIVATE */
 
 #ifdef PRIVATE
 #define        if_mtu          if_data.ifi_mtu
@@ -322,12 +417,13 @@ struct if_data_internal {
 #define if_recvquota   if_data.ifi_recvquota
 #define        if_xmitquota    if_data.ifi_xmitquota
 #endif /* PRIVATE */
-#ifdef XNU_KERNEL_PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
 #define        if_tso_v4_mtu   if_data.ifi_tso_v4_mtu
 #define        if_tso_v6_mtu   if_data.ifi_tso_v6_mtu
-#endif /* XNU_KERNEL_PRIVATE */
+#define        if_alignerrs    if_data.ifi_alignerrs
+#endif /* BSD_KERNEL_PRIVATE */
 
-#ifdef XNU_KERNEL_PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
 /*
  * Forward structure declarations for function prototypes [sic].
  */
@@ -340,6 +436,8 @@ struct ifaddr;
 struct tqdummy;
 struct proto_hash_entry;
 struct dlil_threading_info;
+struct tcpstat_local;
+struct udpstat_local;
 #if PF
 struct pfi_kif;
 #endif /* PF */
@@ -352,7 +450,7 @@ LIST_HEAD(ifmultihead, ifmultiaddr);
 TAILQ_HEAD(tailq_head, tqdummy);
 TAILQ_HEAD(ifnet_filter_head, ifnet_filter);
 TAILQ_HEAD(ddesc_head_name, dlil_demux_desc);
-#endif /* XNU_KERNEL_PRIVATE */
+#endif /* BSD_KERNEL_PRIVATE */
 
 #ifdef PRIVATE
 /*
@@ -386,9 +484,13 @@ TAILQ_HEAD(ddesc_head_name, dlil_demux_desc);
 #define IF_HWASSIST_TSO_V6             0x00400000      /* will do TCP Segment offload for IPv6, IFNET_TSO_IPV6 */
 #endif /* PRIVATE */
 
-#ifdef XNU_KERNEL_PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
+/*
+ * ifnet is private to BSD portion of kernel
+ */
 #include <sys/tree.h>
 #include <netinet/in.h>
+#include <net/classq/if_classq.h>
 
 RB_HEAD(ll_reach_tree, if_llreach);    /* define struct ll_reach_tree */
 
@@ -404,6 +506,7 @@ struct ifnet {
        decl_lck_rw_data(, if_lock);
        void            *if_softc;      /* pointer to driver state */
        const char      *if_name;       /* name, e.g. ``en'' or ``lo'' */
+       struct if_description if_desc;  /* extended description */
        TAILQ_ENTRY(ifnet) if_link;     /* all struct ifnets are chained */
        TAILQ_ENTRY(ifnet) if_detaching_link; /* list of detaching ifnets */
 
@@ -435,6 +538,11 @@ struct ifnet {
        ifnet_family_t          if_family;      /* value assigned by Apple */
        uintptr_t               if_family_cookie;
        ifnet_output_func       if_output;
+       ifnet_pre_enqueue_func  if_pre_enqueue;
+       ifnet_start_func        if_start;
+       ifnet_ctl_func          if_output_ctl;
+       ifnet_input_poll_func   if_input_poll;
+       ifnet_ctl_func          if_input_ctl;
        ifnet_ioctl_func        if_ioctl;
        ifnet_set_bpf_tap       if_set_bpf_tap;
        ifnet_detached_func     if_free;
@@ -447,6 +555,18 @@ struct ifnet {
        struct proto_hash_entry *if_proto_hash;
        void                    *if_kpi_storage;
 
+       decl_lck_mtx_data(, if_start_lock);
+       u_int32_t               if_start_req;
+       u_int32_t               if_start_active; /* output is active */
+       struct timespec         if_start_cycle;  /* restart interval */
+       struct thread           *if_start_thread;
+
+       struct ifclassq         if_snd;         /* transmit queue */
+       u_int32_t               if_output_sched_model;  /* tx sched model */
+
+       struct if_bandwidths    if_output_bw;
+       struct if_bandwidths    if_input_bw;
+
        decl_lck_mtx_data(, if_flt_lock)
        u_int32_t               if_flt_busy;
        u_int32_t               if_flt_waiters;
@@ -458,9 +578,14 @@ struct ifnet {
        decl_lck_mtx_data(, if_addrconfig_lock); /* for serializing addr config */
        struct in_multi         *if_allhostsinm; /* store all-hosts inm for this ifp */
 
-       struct dlil_threading_info *if_input_thread;
+       decl_lck_mtx_data(, if_poll_lock);
+       u_int16_t               if_poll_req;
+       u_int16_t               if_poll_update; /* link update */
+       u_int32_t               if_poll_active; /* polling is active */
+       struct timespec         if_poll_cycle;  /* poll interval */
+       struct thread           *if_poll_thread;
 
-       struct ifqueue          if_snd;
+       struct dlil_threading_info *if_inp;
 
        struct  ifprefixhead    if_prefixhead;  /* list of prefixes per if */
        struct {
@@ -476,7 +601,6 @@ struct ifnet {
 
        u_int32_t               if_wake_properties;
 #if PF
-       struct thread           *if_pf_curthread;
        struct pfi_kif          *if_pf_kif;
 #endif /* PF */
 
@@ -504,6 +628,11 @@ struct ifnet {
 #if INET6
        struct mld_ifinfo       *if_mli;        /* for MLDv2 */
 #endif /* INET6 */
+
+       int                     if_lqm;         /* link quality metric */
+       struct if_measured_bw   if_bw;
+       struct tcpstat_local    *if_tcp_stat;   /* TCP specific stats */
+       struct udpstat_local    *if_udp_stat;   /* UDP specific stats */
 };
 
 /*
@@ -542,7 +671,8 @@ struct if_clone {
  */
 #define        IF_QFULL(ifq)           ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
 #define        IF_DROP(ifq)            ((ifq)->ifq_drops++)
-#define        IF_ENQUEUE(ifq, m) {                                            \
+
+#define        IF_ENQUEUE(ifq, m) do {                                         \
        (m)->m_nextpkt = NULL;                                          \
        if ((ifq)->ifq_tail == NULL)                                    \
                (ifq)->ifq_head = m;                                    \
@@ -550,15 +680,17 @@ struct if_clone {
                ((struct mbuf*)(ifq)->ifq_tail)->m_nextpkt = m;         \
        (ifq)->ifq_tail = m;                                            \
        (ifq)->ifq_len++;                                               \
-}
-#define        IF_PREPEND(ifq, m) {                                            \
+} while (0)
+
+#define        IF_PREPEND(ifq, m) do {                                         \
        (m)->m_nextpkt = (ifq)->ifq_head;                               \
        if ((ifq)->ifq_tail == NULL)                                    \
                (ifq)->ifq_tail = (m);                                  \
        (ifq)->ifq_head = (m);                                          \
        (ifq)->ifq_len++;                                               \
-}
-#define        IF_DEQUEUE(ifq, m) {                                            \
+} while (0)
+
+#define        IF_DEQUEUE(ifq, m) do {                                         \
        (m) = (ifq)->ifq_head;                                          \
        if (m != NULL) {                                                \
                if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL)         \
@@ -566,8 +698,9 @@ struct if_clone {
                (m)->m_nextpkt = NULL;                                  \
                (ifq)->ifq_len--;                                       \
        }                                                               \
-}
-#define        IF_REMQUEUE(ifq, m) {                                           \
+} while (0)
+
+#define        IF_REMQUEUE(ifq, m) do {                                        \
        struct mbuf *_p = (ifq)->ifq_head;                              \
        struct mbuf *_n = (m)->m_nextpkt;                               \
        if ((m) == _p)                                                  \
@@ -588,14 +721,15 @@ struct if_clone {
        if (_p != NULL)                                                 \
                _p->m_nextpkt = _n;                                     \
        (m)->m_nextpkt = NULL;                                          \
-}
+} while (0)
+
 #define IF_DRAIN(ifq) do {                                             \
-       struct mbuf *m;                                                 \
+       struct mbuf *_m;                                                \
        for (;;) {                                                      \
-               IF_DEQUEUE(ifq, m);                                     \
-               if (m == NULL)                                          \
+               IF_DEQUEUE(ifq, _m);                                    \
+               if (_m == NULL)                                         \
                        break;                                          \
-               m_freem(m);                                             \
+               m_freem(_m);                                            \
        }                                                               \
 } while (0)
 
@@ -749,7 +883,8 @@ struct ifmultiaddr {
 
 __private_extern__ struct ifnethead ifnet_head;
 __private_extern__ struct ifnet **ifindex2ifnet;
-__private_extern__ int ifqmaxlen;
+__private_extern__ u_int32_t if_sndq_maxlen;
+__private_extern__ u_int32_t if_rcvq_maxlen;
 __private_extern__ int if_index;
 __private_extern__ struct ifaddr **ifnet_addrs;
 __private_extern__ lck_attr_t *ifa_mtx_attr;
@@ -757,6 +892,8 @@ __private_extern__ lck_grp_t *ifa_mtx_grp;
 __private_extern__ lck_grp_t *ifnet_lock_group;
 __private_extern__ lck_attr_t *ifnet_lock_attr;
 extern ifnet_t lo_ifp;
+extern uint32_t if_bw_measure_size;
+extern u_int32_t if_bw_smoothing_val;
 
 extern int if_addmulti(struct ifnet *, const struct sockaddr *,
     struct ifmultiaddr **);
@@ -774,6 +911,9 @@ extern int ifioctl(struct socket *, u_long, caddr_t, struct proc *);
 extern int ifioctllocked(struct socket *, u_long, caddr_t, struct proc *);
 extern struct ifnet *ifunit(const char *);
 extern struct ifnet *if_withname(struct sockaddr *);
+extern void if_qflush(struct ifnet *, int);
+extern void if_qflush_sc(struct ifnet *, mbuf_svc_class_t, u_int32_t,
+    u_int32_t *, u_int32_t *, int);
 
 extern struct if_clone *if_clone_lookup(const char *, u_int32_t *);
 extern int if_clone_attach(struct if_clone *);
@@ -801,6 +941,10 @@ __private_extern__ errno_t ifnet_set_idle_flags_locked(ifnet_t, u_int32_t,
     u_int32_t);
 __private_extern__ int ifnet_is_attached(struct ifnet *, int refio);
 __private_extern__ void ifnet_decr_iorefcnt(struct ifnet *);
+__private_extern__ void ifnet_set_start_cycle(struct ifnet *,
+    struct timespec *);
+__private_extern__ void ifnet_set_poll_cycle(struct ifnet *,
+    struct timespec *);
 
 __private_extern__ void if_attach_ifa(struct ifnet *, struct ifaddr *);
 __private_extern__ void if_attach_link_ifa(struct ifnet *, struct ifaddr *);
@@ -812,15 +956,18 @@ __private_extern__ void dlil_if_unlock(void);
 __private_extern__ void dlil_if_lock_assert(void);
 
 extern struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
-extern struct ifaddr *ifa_ifwithaddr_scoped(const struct sockaddr *, unsigned int);
+extern struct ifaddr *ifa_ifwithaddr_scoped(const struct sockaddr *,
+    unsigned int);
 extern struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *);
 extern struct ifaddr *ifa_ifwithnet(const struct sockaddr *);
-extern struct ifaddr *ifa_ifwithnet_scoped(const struct sockaddr *, unsigned int);
+extern struct ifaddr *ifa_ifwithnet_scoped(const struct sockaddr *,
+    unsigned int);
 extern struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *,
     const struct sockaddr *);
-extern struct  ifaddr *ifa_ifwithroute_locked(int, const struct sockaddr *, const struct sockaddr *);
-extern struct ifaddr *ifa_ifwithroute_scoped_locked(int, const struct sockaddr *,
-    const struct sockaddr *, unsigned int);
+extern struct  ifaddr *ifa_ifwithroute_locked(int, const struct sockaddr *,
+    const struct sockaddr *);
+extern struct ifaddr *ifa_ifwithroute_scoped_locked(int,
+    const struct sockaddr *, const struct sockaddr *, unsigned int);
 extern struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
 __private_extern__ struct ifaddr *ifa_ifpgetprimary(struct ifnet *, int);
 extern void ifa_addref(struct ifaddr *, int);
@@ -836,6 +983,16 @@ __private_extern__ struct in_ifaddr *ifa_foraddr(unsigned int);
 __private_extern__ struct in_ifaddr *ifa_foraddr_scoped(unsigned int,
     unsigned int);
 
+extern void ifnet_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl);
+extern struct sfb_bin_fcentry* ifnet_fce_alloc(int how);
+extern void ifnet_fce_free(struct sfb_bin_fcentry *);
+
+struct ifreq;
+extern errno_t ifnet_getset_opportunistic(struct ifnet *, u_long,
+    struct ifreq *, struct proc *);
+extern int ifnet_get_throttle(struct ifnet *, u_int32_t *);
+extern int ifnet_set_throttle(struct ifnet *, u_int32_t);
+
 #if INET6
 struct in6_addr;
 __private_extern__ struct in6_ifaddr *ifa_foraddr6(struct in6_addr *);
@@ -849,6 +1006,12 @@ __private_extern__ void   if_data_internal_to_if_data64(struct ifnet *ifp,
     const struct if_data_internal *if_data_int, struct if_data64 *if_data64);
 __private_extern__ void        if_copy_traffic_class(struct ifnet *ifp,
     struct if_traffic_class *if_tc);
+__private_extern__ void        if_copy_data_extended(struct ifnet *ifp,
+    struct if_data_extended *if_de);
+__private_extern__ void if_copy_packet_stats(struct ifnet *ifp,
+    struct if_packet_stats *if_ps);
+__private_extern__ void if_copy_rxpoll_stats(struct ifnet *ifp,
+    struct if_rxpoll_stats *if_rs);
 
 __private_extern__ struct rtentry *ifnet_cached_rtlookup_inet(struct ifnet *,
     struct in_addr);
@@ -857,5 +1020,19 @@ __private_extern__ struct rtentry *ifnet_cached_rtlookup_inet6(struct ifnet *,
     struct in6_addr *);
 #endif /* INET6 */
 
+__private_extern__ void if_lqm_update(struct ifnet *, int32_t);
+__private_extern__ void ifnet_update_sndq(struct ifclassq *, cqev_t);
+__private_extern__ void ifnet_update_rcv(struct ifnet *, cqev_t);
+
+__private_extern__ errno_t ifnet_set_input_bandwidths(struct ifnet *,
+    struct if_bandwidths *);
+__private_extern__ errno_t ifnet_set_output_bandwidths(struct ifnet *,
+    struct if_bandwidths *, boolean_t);
+__private_extern__ u_int64_t ifnet_output_linkrate(struct ifnet *);
+__private_extern__ u_int64_t ifnet_input_linkrate(struct ifnet *);
+#endif /* BSD_KERNEL_PRIVATE */
+#ifdef XNU_KERNEL_PRIVATE
+/* for uuid.c */
+__private_extern__ int uuid_get_ethernet(u_int8_t *);
 #endif /* XNU_KERNEL_PRIVATE */
 #endif /* !_NET_IF_VAR_H_ */
index cf090602d57821ebb41944997cce34e1171751ef..c1a5f0a20c4ef38fa23731662854ebc77f45e22b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1090,6 +1090,8 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
     int                        soft_vlan;
     u_short                    tag;
     vlan_parent_ref            vlp = NULL;
+    int                                err;
+    struct flowadv             adv = { FADV_SUCCESS };
        
     if (m == 0) {
        return (0);
@@ -1167,7 +1169,18 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
        evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
        evl->evl_tag = htons(tag);
     }
-    return (ifnet_output_raw(p, PF_VLAN, m));
+
+    err = dlil_output(p, PF_VLAN, m, NULL, NULL, 1, &adv);
+
+    if (err == 0) {
+       if (adv.code == FADV_FLOW_CONTROLLED) {
+           err = EQFULL;
+       } else if (adv.code == FADV_SUSPENDED) {
+           err = EQSUSPENDED;
+       }
+    }
+
+    return (err);
 
  unlock_done:
     vlan_unlock();
@@ -1208,7 +1221,7 @@ vlan_input(ifnet_t p, __unused protocol_family_t protocol,
                m_freem(m);
                return 0;
            }
-           evl = (struct ether_vlan_header *)frame_header;
+           evl = (struct ether_vlan_header *)(void *)frame_header;
            if (ntohs(evl->evl_proto) == ETHERTYPE_VLAN) {
                /* don't allow VLAN within VLAN */
                m_freem(m);
diff --git a/bsd/net/iptap.c b/bsd/net/iptap.c
new file mode 100644 (file)
index 0000000..c665af1
--- /dev/null
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <mach/mach_types.h>
+#include <kern/locks.h>
+#include <sys/kernel.h>
+#include <sys/param.h>
+#include <sys/sockio.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/cdefs.h>
+#include <sys/kern_control.h>
+#include <sys/uio_internal.h>
+#include <sys/mbuf.h>
+#include <net/if_types.h>
+#include <net/if.h>
+#include <net/kpi_interface.h>
+#include <net/bpf.h>
+#include <net/iptap.h>
+#include <netinet/kpi_ipfilter.h>
+#include <libkern/libkern.h>
+#include <libkern/OSMalloc.h>
+#include <libkern/OSAtomic.h>
+
+#include <IOKit/IOLib.h>
+
+#define        IPTAP_IF_NAME                   "iptap"
+#define IPTAP_PRINTF                   printf
+#define IP_TAP_NOT_USED                        0
+
+#define VALID_PACKET(type, label)\
+                       if (iptap_clients == 0)         \
+                               goto label;                             \
+                                                                               \
+                       if (type != IFT_ETHER &&        \
+                               type != IFT_CELLULAR)   \
+                               goto label
+
+static void                            *iptap_alloc(size_t);
+static void                            iptap_free(void *);
+static errno_t                 iptap_register_control(void);
+static inline void             iptap_lock_shared(void);
+static inline void             iptap_lock_exclusive(void);
+static inline void             iptap_lock_done(void);
+static void                            iptap_alloc_lock(void);
+static void                            iptap_free_lock(void);
+
+static void                            iptap_enqueue_mbuf(struct ifnet *, protocol_family_t, struct mbuf *, u_int32_t, u_int32_t, u_int8_t);
+
+/* kernctl callbacks */
+static errno_t                 iptap_ctl_connect(kern_ctl_ref, struct sockaddr_ctl *, void **);
+static errno_t                 iptap_ctl_disconnect(kern_ctl_ref, u_int32_t, void *);
+
+#if IP_TAP_NOT_USED
+
+static errno_t                 iptap_deregister_control(void);
+
+static errno_t                 iptap_ctl_send(kern_ctl_ref, u_int32_t, void *, mbuf_t, int);
+static errno_t                 iptap_ctl_setopt(kern_ctl_ref, u_int32_t, void *, int, void *, size_t);
+static errno_t                 iptap_ctl_getopt(kern_ctl_ref, u_int32_t, void *, int, void *, size_t *);
+
+#endif /* IP_TAP_NOT_USED */
+
+decl_lck_rw_data(static, iptap_mtx);
+static lck_grp_t               *iptap_grp;
+static kern_ctl_ref            iptap_kernctl;
+static unsigned int            iptap_clients;
+static OSMallocTag             iptap_malloc_tag;
+
+struct iptap_client_t {
+       LIST_ENTRY(iptap_client_t)              _cle;
+       u_int32_t                                               _unit;
+};
+
+static LIST_HEAD(, iptap_client_t)     _s_iptap_clients;
+
+
+__private_extern__ void
+iptap_init(void) {
+    
+       iptap_alloc_lock();
+       
+       iptap_malloc_tag = OSMalloc_Tagalloc(IPTAP_CONTROL_NAME, OSMT_DEFAULT);
+       if (iptap_malloc_tag == NULL) {
+               iptap_free_lock();
+               IPTAP_PRINTF("iptap_init failed: unable to allocate malloc tag.\n");
+               return;
+       }
+       
+       if (iptap_register_control() != 0) {
+               iptap_free_lock();
+               OSMalloc_Tagfree(iptap_malloc_tag);
+               IPTAP_PRINTF("iptap_init failed: iptap_register_control failure.\n");
+               return;
+       }
+       
+       iptap_clients = 0;
+}
+
+__private_extern__ void
+iptap_ipf_input(struct ifnet *ifp, protocol_family_t proto, struct mbuf *mp, char *frame_header)
+{      
+       VALID_PACKET(ifp->if_type, done);
+
+       do {
+               char *hdr = (char *)mbuf_data(mp);
+               size_t start = (size_t)((char*)mbuf_datastart(mp));
+               size_t o_len = mp->m_len;
+               
+               if (frame_header != NULL && (size_t)frame_header >= start && (size_t)frame_header <= (size_t)hdr) {
+                       if (mbuf_setdata(mp, frame_header, o_len + ((size_t)hdr - (size_t)frame_header)) == 0) {
+                               iptap_enqueue_mbuf(ifp, proto, mp, ((size_t)hdr - (size_t)frame_header), 0, IPTAP_INPUT_TAG);
+                               mbuf_setdata(mp, hdr, o_len);
+                       }
+               } else {
+                       iptap_enqueue_mbuf(ifp, proto, mp, 0, 0, IPTAP_INPUT_TAG);
+               }
+               
+       } while (0);
+
+done:
+       return;
+}
+
+__private_extern__ void
+iptap_ipf_output(struct ifnet *ifp, protocol_family_t proto, struct mbuf *mp, u_int32_t pre, u_int32_t post)
+{      
+       VALID_PACKET(ifp->if_type, done);
+       
+       iptap_enqueue_mbuf(ifp, proto, mp, pre, post, IPTAP_OUTPUT_TAG);
+       
+done:
+       return;
+}
+
+static void
+iptap_enqueue_mbuf(struct ifnet *ifp, protocol_family_t proto, struct mbuf *mp, u_int32_t pre, u_int32_t post, u_int8_t io)
+{
+       errno_t err = 0;
+       struct iptap_client_t *client = NULL;
+       mbuf_t copy, itr = (mbuf_t)mp;
+       iptap_hdr_t header;
+       u_int32_t len = 0;
+       
+       memset(&header, 0x0, sizeof(header));
+       header.version = IPTAP_VERSION_1;
+       header.type = ifp->if_type;
+       header.unit = ifp->if_unit;
+       strlcpy(header.if_name, ifp->if_name, sizeof(header.if_name));
+       header.hdr_length = sizeof(header);
+       header.protocol_family = proto;
+       header.frame_pre_length = pre;
+       header.frame_pst_length = post;
+       header.io = io;
+       
+       do {
+               len += mbuf_len(itr);
+               itr = mbuf_next(itr);
+       } while (itr != NULL);
+       
+       iptap_lock_shared();
+       
+       LIST_FOREACH(client, &_s_iptap_clients, _cle) {
+               
+               mbuf_dup((mbuf_t)mp, MBUF_DONTWAIT, &copy);
+               if (copy == NULL)
+                       continue;
+               
+               err = mbuf_prepend(&copy, sizeof(header), MBUF_DONTWAIT);
+               if (err != 0) {
+                       if (copy != NULL) {
+                               mbuf_freem(copy);
+                               copy = NULL;
+                       }
+                       continue;
+               }
+               
+               HTONS(header.unit);
+               HTONL(header.hdr_length);
+               HTONL(header.protocol_family);
+               HTONL(header.frame_pre_length);
+               HTONL(header.frame_pst_length);
+               header.length = htonl(len);
+               
+               memcpy(mbuf_data(copy), &header, sizeof(header));
+               
+               err = ctl_enqueuembuf(iptap_kernctl, client->_unit, copy, CTL_DATA_EOR);
+               if (err != 0) {
+                       mbuf_freem(copy);
+                       copy = NULL;
+                       IPTAP_PRINTF("iptap_enqueue_mbuf failed: %d\n", (err));
+                       continue;
+               }
+       }
+       
+       iptap_lock_done();
+}
+
+static void*
+iptap_alloc(size_t size)
+{
+       size_t *mem = OSMalloc(size + sizeof(size_t), iptap_malloc_tag);
+       
+       if (mem) {
+               *mem = size + sizeof(size_t);
+               mem++;
+               memset(mem, 0x0, size);
+       }
+       
+       return (void*)mem;
+}
+
+static void
+iptap_free(void *ptr)
+{
+       size_t *size = ptr;
+       size--;
+       OSFree(size, *size, iptap_malloc_tag);
+       ptr = NULL;
+}
+
+static void
+iptap_alloc_lock(void)
+{
+       lck_grp_attr_t *grp_attr;
+       lck_attr_t *attr;
+       
+       grp_attr = lck_grp_attr_alloc_init();
+       lck_grp_attr_setdefault(grp_attr);
+       iptap_grp = lck_grp_alloc_init(IPTAP_IF_NAME, grp_attr);
+       lck_grp_attr_free(grp_attr);
+       
+       attr = lck_attr_alloc_init();
+       lck_attr_setdefault(attr);
+       
+       lck_rw_init(&iptap_mtx, iptap_grp, attr);
+       lck_attr_free(attr);
+}
+
+static void
+iptap_free_lock(void)
+{
+       lck_rw_destroy(&iptap_mtx, iptap_grp);
+       lck_grp_free(iptap_grp);
+       iptap_grp = NULL;
+}
+
+static inline void
+iptap_lock_shared(void)
+{
+       lck_rw_lock_shared(&iptap_mtx);
+}
+
+static inline void
+iptap_lock_exclusive(void)
+{
+       lck_rw_lock_exclusive(&iptap_mtx);
+}
+
+static inline void
+iptap_lock_done(void)
+{
+       lck_rw_done(&iptap_mtx);
+}
+
+static errno_t
+iptap_register_control(void)
+{
+       errno_t err = 0;
+       struct kern_ctl_reg kern_ctl;
+       
+       bzero(&kern_ctl, sizeof(kern_ctl));
+       strlcpy(kern_ctl.ctl_name, IPTAP_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
+       kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
+       kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED;
+       kern_ctl.ctl_recvsize = IPTAP_BUFFERSZ;
+       kern_ctl.ctl_connect = iptap_ctl_connect;
+       kern_ctl.ctl_disconnect = iptap_ctl_disconnect;
+       kern_ctl.ctl_send = NULL;
+       kern_ctl.ctl_setopt = NULL;
+       kern_ctl.ctl_getopt = NULL;
+       
+       err = ctl_register(&kern_ctl, &iptap_kernctl);
+       
+       return (err);
+}
+
+static errno_t
+iptap_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo)
+{
+#pragma unused(kctlref)
+#pragma unused(unitinfo)
+       errno_t err = 0;
+       struct iptap_client_t *client = NULL;
+       
+       client = (struct iptap_client_t *)iptap_alloc(sizeof(struct iptap_client_t));
+       if (client != NULL) {
+               iptap_lock_exclusive();
+               
+               iptap_clients++;
+               client->_unit = sac->sc_unit;
+               LIST_INSERT_HEAD(&_s_iptap_clients, client, _cle);
+               
+               iptap_lock_done();
+       } else {
+               err = ENOMEM;
+       }
+       
+       return (err == 0) ? (0) : (err);
+}
+
+static errno_t
+iptap_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo)
+{
+#pragma unused(kctlref)
+#pragma unused(unitinfo)
+       errno_t err = 0;
+       struct iptap_client_t *client = NULL;
+       
+       iptap_lock_exclusive();
+       
+       LIST_FOREACH(client, &_s_iptap_clients, _cle) {
+               if (client->_unit == unit) {
+                       iptap_clients--;
+                       LIST_REMOVE(client, _cle);
+                       break;
+               }
+       }
+       
+       iptap_lock_done();
+       
+       /* get rid of all the interfaces before free'ing */
+       iptap_free(client);
+       
+       if (client == NULL)
+               panic("iptap_ctl_disconnect: received a disconnect notification without a cache entry.\n");
+       
+       return (err == 0) ? (0) : (err);
+}
+
+#if IP_TAP_NOT_USED
+
+__private_extern__ void
+iptap_destroy(void) {
+       
+       if (iptap_clients != 0) {
+               IPTAP_PRINTF("iptap_destroy failed: there are still outstanding clients.\n");
+               return;
+       }
+       
+       if (iptap_deregister_control() != 0) {
+               IPTAP_PRINTF("iptap_destroy failed: iptap_deregister_control failed.\n");
+       }
+       
+       OSMalloc_Tagfree(iptap_malloc_tag);
+       
+       iptap_free_lock();
+}
+
+static errno_t
+iptap_deregister_control(void)
+{
+       errno_t err = 0;
+       
+       if (iptap_kernctl != NULL) {
+               err = ctl_deregister(iptap_kernctl);
+       } else {
+               err = EINVAL;
+       }
+       
+       return (err); 
+}
+
+static errno_t
+iptap_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t m, int flags)
+{
+#pragma unused(kctlref)
+#pragma unused(unit)
+#pragma unused(unitinfo)
+#pragma unused(m)
+#pragma unused(flags)
+       return (KERN_SUCCESS);
+}
+
+static errno_t
+iptap_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t len)
+{      
+#pragma unused(kctlref)
+#pragma unused(unit)
+#pragma unused(unitinfo)
+#pragma unused(opt)
+#pragma unused(data)
+#pragma unused(len)
+       return (KERN_SUCCESS);
+}
+
+static errno_t
+iptap_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t *len)
+{
+#pragma unused(kctlref)
+#pragma unused(unit)
+#pragma unused(unitinfo)
+#pragma unused(opt)
+#pragma unused(data)
+#pragma unused(len)
+       return (KERN_SUCCESS);
+}
+
+#endif /* IP_TAP_NOT_USED */
+
diff --git a/bsd/net/iptap.h b/bsd/net/iptap.h
new file mode 100644 (file)
index 0000000..db8b0a2
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef IPTAP_H
+#define IPTAP_H
+
+#include <net/if.h>
+
+#define IPTAP_CONTROL_NAME "com.apple.net.iptap_control"
+
+#define IPTAP_BUFFERSZ (128 * 1024)
+#define IPTAP_VERSION_1                0x1
+
+enum {
+       IPTAP_OUTPUT_TAG        =       0x01,
+       IPTAP_INPUT_TAG         =       0x10,
+       IPTAP_UNKNOWN_TAG       =       0x11
+};
+
+#pragma pack(push)
+#pragma pack(1)
+
+typedef struct iptap_hdr_t {
+       uint32_t        hdr_length;
+       uint8_t         version;
+       uint32_t        length;
+       uint8_t         type;
+       uint16_t        unit;
+       uint8_t         io;
+       uint32_t        protocol_family;
+       uint32_t        frame_pre_length;
+       uint32_t        frame_pst_length;
+       char            if_name[IFNAMSIZ];
+} __attribute__ ((__packed__)) iptap_hdr_t;
+
+#pragma pack(pop)
+
+#ifdef KERNEL_PRIVATE
+
+extern void iptap_init(void);
+extern void iptap_ipf_input(struct ifnet *, protocol_family_t, struct mbuf *, char *);
+extern void iptap_ipf_output(struct ifnet *, protocol_family_t, struct mbuf *, u_int32_t, u_int32_t);
+#if 0
+extern void iptap_destroy(void);
+#endif
+
+#endif /* KERNEL_PRIVATE */
+#endif /* IPTAP_H */
\ No newline at end of file
index 82ba11b039d67c3b105653a772a35ff8a01fb4ba..e613eec82424a8124e64e2beceddf3930485442c 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
 #include <sys/malloc.h>
 #include <sys/kpi_mbuf.h>
 #include <sys/mcache.h>
+#include <sys/protosw.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/dlil.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/if_arp.h>
+#include <net/if_llreach.h>
+#include <net/route.h>
 #include <libkern/libkern.h>
 #include <libkern/OSAtomic.h>
 #include <kern/locks.h>
+#include <kern/clock.h>
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#ifdef INET
+#include <netinet/igmp_var.h>
+#endif
+#ifdef INET6
+#include <netinet6/mld6_var.h>
+#endif
 
 #include "net/net_str_id.h"
 
 #if IF_LASTCHANGEUPTIME
-#define TOUCHLASTCHANGE(__if_lastchange) microuptime(__if_lastchange)
+#define        TOUCHLASTCHANGE(__if_lastchange) {                              \
+       (__if_lastchange)->tv_sec = net_uptime();                       \
+       (__if_lastchange)->tv_usec = 0;                                 \
+}
 #else
-#define TOUCHLASTCHANGE(__if_lastchange) microtime(__if_lastchange)
+#define        TOUCHLASTCHANGE(__if_lastchange) microtime(__if_lastchange)
 #endif
 
-static errno_t
-ifnet_list_get_common(ifnet_family_t, boolean_t, ifnet_t **, u_int32_t *);
+#define        _cast_non_const(p) ((void *)(uintptr_t)(p))
+
+static errno_t ifnet_defrouter_llreachinfo(ifnet_t, int,
+    struct ifnet_llreach_info *);
+static void ifnet_kpi_free(ifnet_t);
+static errno_t ifnet_list_get_common(ifnet_family_t, boolean_t, ifnet_t **,
+    u_int32_t *);
+static errno_t ifnet_set_lladdr_internal(ifnet_t, const void *, size_t,
+    u_char, int);
+static errno_t ifnet_awdl_check_eflags(ifnet_t, u_int32_t *, u_int32_t *);
 
 /*
      Temporary work around until we have real reference counting
-       
      We keep the bits about calling dlil_if_release (which should be
      called recycle) transparent by calling it from our if_free function
      pointer. We have to keep the client's original detach function
      somewhere so we can call it.
* Temporary work around until we have real reference counting
+ *
* We keep the bits about calling dlil_if_release (which should be
* called recycle) transparent by calling it from our if_free function
* pointer. We have to keep the client's original detach function
* somewhere so we can call it.
  */
 static void
-ifnet_kpi_free(
-       ifnet_t ifp)
+ifnet_kpi_free(ifnet_t ifp)
 {
-       ifnet_detached_func     detach_func = ifp->if_kpi_storage;
-       
-       if (detach_func)
+       ifnet_detached_func detach_func = ifp->if_kpi_storage;
+
+       if (detach_func != NULL)
                detach_func(ifp);
-       
-       if (ifp->if_broadcast.length > sizeof(ifp->if_broadcast.u.buffer)) {
+
+       if (ifp->if_broadcast.length > sizeof (ifp->if_broadcast.u.buffer)) {
                FREE(ifp->if_broadcast.u.ptr, M_IFADDR);
                ifp->if_broadcast.u.ptr = NULL;
        }
-       
+
        dlil_if_release(ifp);
 }
 
-static __inline__ void*
-_cast_non_const(const void * ptr) {
-       union {
-               const void*             cval;
-               void*                   val;
-       } ret;
-       
-       ret.cval = ptr;
-       return (ret.val);
+errno_t
+ifnet_allocate(const struct ifnet_init_params *init, ifnet_t *interface)
+{
+       struct ifnet_init_eparams einit;
+
+       bzero(&einit, sizeof (einit));
+
+       einit.ver               = IFNET_INIT_CURRENT_VERSION;
+       einit.len               = sizeof (einit);
+       einit.flags             = IFNET_INIT_LEGACY;
+       einit.uniqueid          = init->uniqueid;
+       einit.uniqueid_len      = init->uniqueid_len;
+       einit.name              = init->name;
+       einit.unit              = init->unit;
+       einit.family            = init->family;
+       einit.type              = init->type;
+       einit.output            = init->output;
+       einit.demux             = init->demux;
+       einit.add_proto         = init->add_proto;
+       einit.del_proto         = init->del_proto;
+       einit.check_multi       = init->check_multi;
+       einit.framer            = init->framer;
+       einit.softc             = init->softc;
+       einit.ioctl             = init->ioctl;
+       einit.set_bpf_tap       = init->set_bpf_tap;
+       einit.detach            = init->detach;
+       einit.event             = init->event;
+       einit.broadcast_addr    = init->broadcast_addr;
+       einit.broadcast_len     = init->broadcast_len;
+
+       return (ifnet_allocate_extended(&einit, interface));
 }
 
 errno_t
-ifnet_allocate(
-       const struct ifnet_init_params *init,
-       ifnet_t *interface)
+ifnet_allocate_extended(const struct ifnet_init_eparams *einit0,
+    ifnet_t *interface)
 {
-       int error;
+       struct ifnet_init_eparams einit;
        struct ifnet *ifp = NULL;
-       
-       if (init->family == 0)
-               return EINVAL;
-       if (init->name == NULL ||
-               init->output == NULL)
-               return EINVAL;
-       if (strlen(init->name) >= IFNAMSIZ)
-               return EINVAL;
-       if ((init->type & 0xFFFFFF00) != 0 || init->type == 0)
-               return EINVAL;
-       
-       error = dlil_if_acquire(init->family, init->uniqueid, init->uniqueid_len, &ifp);
-       if (error == 0)
-       {
+       int error;
+
+       einit = *einit0;
+
+       if (einit.ver != IFNET_INIT_CURRENT_VERSION ||
+           einit.len < sizeof (einit))
+               return (EINVAL);
+
+       if (einit.family == 0 || einit.name == NULL ||
+           strlen(einit.name) >= IFNAMSIZ ||
+           (einit.type & 0xFFFFFF00) != 0 || einit.type == 0)
+               return (EINVAL);
+
+       if (einit.flags & IFNET_INIT_LEGACY) {
+               if (einit.output == NULL || einit.flags != IFNET_INIT_LEGACY)
+                       return (EINVAL);
+
+               einit.pre_enqueue = NULL;
+               einit.start = NULL;
+               einit.output_ctl = NULL;
+               einit.output_sched_model = IFNET_SCHED_MODEL_NORMAL;
+               einit.input_poll = NULL;
+               einit.input_ctl = NULL;
+       } else {
+               if (einit.start == NULL)
+                       return (EINVAL);
+
+               einit.output = NULL;
+               if (einit.output_sched_model >= IFNET_SCHED_MODEL_MAX)
+                       return (EINVAL);
+
+               if (einit.flags & IFNET_INIT_INPUT_POLL) {
+                       if (einit.input_poll == NULL || einit.input_ctl == NULL)
+                               return (EINVAL);
+               } else {
+                       einit.input_poll = NULL;
+                       einit.input_ctl = NULL;
+               }
+       }
+
+       error = dlil_if_acquire(einit.family, einit.uniqueid,
+           einit.uniqueid_len, &ifp);
+
+       if (error == 0) {
+               u_int64_t br;
+
                /*
                 * Cast ifp->if_name as non const. dlil_if_acquire sets it up
                 * to point to storage of at least IFNAMSIZ bytes. It is safe
                 * to write to this.
                 */
-               strncpy(_cast_non_const(ifp->if_name), init->name, IFNAMSIZ);
-               ifp->if_type = init->type;
-               ifp->if_family = init->family;
-               ifp->if_unit = init->unit;
-               ifp->if_output = init->output;
-               ifp->if_demux = init->demux;
-               ifp->if_add_proto = init->add_proto;
-               ifp->if_del_proto = init->del_proto;
-               ifp->if_check_multi = init->check_multi;
-               ifp->if_framer = init->framer;
-               ifp->if_softc = init->softc;
-               ifp->if_ioctl = init->ioctl;
-               ifp->if_set_bpf_tap = init->set_bpf_tap;
-               ifp->if_free = ifnet_kpi_free;
-               ifp->if_event = init->event;
-               ifp->if_kpi_storage = init->detach;
-               ifp->if_eflags |= IFEF_USEKPI;
-               
-               if (init->broadcast_len && init->broadcast_addr) {
-                       if (init->broadcast_len > sizeof(ifp->if_broadcast.u.buffer)) {
-                               MALLOC(ifp->if_broadcast.u.ptr, u_char*, init->broadcast_len, M_IFADDR, M_NOWAIT);
+               strncpy(_cast_non_const(ifp->if_name), einit.name, IFNAMSIZ);
+               ifp->if_type            = einit.type;
+               ifp->if_family          = einit.family;
+               ifp->if_unit            = einit.unit;
+               ifp->if_output          = einit.output;
+               ifp->if_pre_enqueue     = einit.pre_enqueue;
+               ifp->if_start           = einit.start;
+               ifp->if_output_ctl      = einit.output_ctl;
+               ifp->if_output_sched_model = einit.output_sched_model;
+               ifp->if_output_bw.eff_bw = einit.output_bw;
+               ifp->if_output_bw.max_bw = einit.output_bw_max;
+               ifp->if_input_poll      = einit.input_poll;
+               ifp->if_input_ctl       = einit.input_ctl;
+               ifp->if_input_bw.eff_bw = einit.input_bw;
+               ifp->if_input_bw.max_bw = einit.input_bw_max;
+               ifp->if_demux           = einit.demux;
+               ifp->if_add_proto       = einit.add_proto;
+               ifp->if_del_proto       = einit.del_proto;
+               ifp->if_check_multi     = einit.check_multi;
+               ifp->if_framer          = einit.framer;
+               ifp->if_softc           = einit.softc;
+               ifp->if_ioctl           = einit.ioctl;
+               ifp->if_set_bpf_tap     = einit.set_bpf_tap;
+               ifp->if_free            = ifnet_kpi_free;
+               ifp->if_event           = einit.event;
+               ifp->if_kpi_storage     = einit.detach;
+
+               if (ifp->if_output_bw.eff_bw > ifp->if_output_bw.max_bw)
+                       ifp->if_output_bw.max_bw = ifp->if_output_bw.eff_bw;
+               else if (ifp->if_output_bw.eff_bw == 0)
+                       ifp->if_output_bw.eff_bw = ifp->if_output_bw.max_bw;
+
+               if (ifp->if_input_bw.eff_bw > ifp->if_input_bw.max_bw)
+                       ifp->if_input_bw.max_bw = ifp->if_input_bw.eff_bw;
+               else if (ifp->if_input_bw.eff_bw == 0)
+                       ifp->if_input_bw.eff_bw = ifp->if_input_bw.max_bw;
+
+               if (ifp->if_output_bw.max_bw == 0)
+                       ifp->if_output_bw = ifp->if_input_bw;
+               else if (ifp->if_input_bw.max_bw == 0)
+                       ifp->if_input_bw = ifp->if_output_bw;
+
+               if (ifp->if_ioctl == NULL)
+                       ifp->if_ioctl = ifp_if_ioctl;
+
+               /* Pin if_baudrate to 32 bits */
+               br = MAX(ifp->if_output_bw.max_bw, ifp->if_input_bw.max_bw);
+               if (br != 0)
+                       ifp->if_baudrate = (br > 0xFFFFFFFF) ? 0xFFFFFFFF : br;
+
+               if (ifp->if_start != NULL) {
+                       ifp->if_eflags |= IFEF_TXSTART;
+                       if (ifp->if_pre_enqueue == NULL)
+                               ifp->if_pre_enqueue = ifnet_enqueue;
+                       ifp->if_output = ifp->if_pre_enqueue;
+               } else {
+                       ifp->if_eflags &= ~IFEF_TXSTART;
+               }
+
+               if (ifp->if_input_poll != NULL)
+                       ifp->if_eflags |= IFEF_RXPOLL;
+               else
+                       ifp->if_eflags &= ~IFEF_RXPOLL;
+
+               VERIFY(!(einit.flags & IFNET_INIT_LEGACY) ||
+                   (ifp->if_pre_enqueue == NULL && ifp->if_start == NULL &&
+                   ifp->if_output_ctl == NULL && ifp->if_input_poll == NULL &&
+                   ifp->if_input_ctl == NULL));
+               VERIFY(!(einit.flags & IFNET_INIT_INPUT_POLL) ||
+                   (ifp->if_input_poll != NULL && ifp->if_input_ctl != NULL));
+
+               if (einit.broadcast_len && einit.broadcast_addr) {
+                       if (einit.broadcast_len >
+                           sizeof (ifp->if_broadcast.u.buffer)) {
+                               MALLOC(ifp->if_broadcast.u.ptr, u_char *,
+                                   einit.broadcast_len, M_IFADDR, M_NOWAIT);
                                if (ifp->if_broadcast.u.ptr == NULL) {
                                        error = ENOMEM;
+                               } else {
+                                       bcopy(einit.broadcast_addr,
+                                           ifp->if_broadcast.u.ptr,
+                                           einit.broadcast_len);
                                }
-                               else {
-                                       bcopy(init->broadcast_addr, ifp->if_broadcast.u.ptr, init->broadcast_len);
-                               }
+                       } else {
+                               bcopy(einit.broadcast_addr,
+                                   ifp->if_broadcast.u.buffer,
+                                   einit.broadcast_len);
                        }
-                       else {
-                               bcopy(init->broadcast_addr, ifp->if_broadcast.u.buffer, init->broadcast_len);
-                       }
-                       ifp->if_broadcast.length = init->broadcast_len;
-               }
-               else {
-                       bzero(&ifp->if_broadcast, sizeof(ifp->if_broadcast));
+                       ifp->if_broadcast.length = einit.broadcast_len;
+               } else {
+                       bzero(&ifp->if_broadcast, sizeof (ifp->if_broadcast));
                }
-               
+
+               IFCQ_MAXLEN(&ifp->if_snd) = einit.sndq_maxlen;
+
                if (error == 0) {
                        *interface = ifp;
-                       ifnet_reference(ifp); // temporary - this should be done in dlil_if_acquire
-               }
-               else {
+                       // temporary - this should be done in dlil_if_acquire
+                       ifnet_reference(ifp);
+               else {
                        dlil_if_release(ifp);
-                       *interface = 0;
+                       *interface = NULL;
                }
        }
-       
+
        /*
-         Note: We should do something here to indicate that we haven't been
-         attached yet. By doing so, we can catch the case in ifnet_release
-         where the reference count reaches zero and call the recycle
-         function. If the interface is attached, the interface will be
-         recycled when the interface's if_free function is called. If the
-         interface is never attached, the if_free function will never be
-         called and the interface will never be recycled.
-       */
-       
-       return error;
+        * Note: We should do something here to indicate that we haven't been
+        * attached yet. By doing so, we can catch the case in ifnet_release
+        * where the reference count reaches zero and call the recycle
+        * function. If the interface is attached, the interface will be
+        * recycled when the interface's if_free function is called. If the
+        * interface is never attached, the if_free function will never be
+        * called and the interface will never be recycled.
+        */
+
+       return (error);
 }
 
 errno_t
@@ -193,53 +328,54 @@ ifnet_release(ifnet_t ifp)
        return (dlil_if_free(ifp));
 }
 
-errno_t 
-ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id)
+errno_t
+ifnet_interface_family_find(const char *module_string,
+    ifnet_family_t *family_id)
 {
        if (module_string == NULL || family_id == NULL)
-               return EINVAL;
-       return net_str_id_find_internal(module_string, family_id, NSI_IF_FAM_ID, 1);
-       
+               return (EINVAL);
+
+       return (net_str_id_find_internal(module_string, family_id,
+           NSI_IF_FAM_ID, 1));
 }
 
-void*
-ifnet_softc(
-       ifnet_t interface)
+void *
+ifnet_softc(ifnet_t interface)
 {
-       return interface == NULL ? NULL : interface->if_softc;
+       return ((interface == NULL) ? NULL : interface->if_softc);
 }
 
-const char*
-ifnet_name(
-       ifnet_t interface)
+const char *
+ifnet_name(ifnet_t interface)
 {
-       return interface == NULL ? NULL : interface->if_name;
+       return ((interface == NULL) ? NULL : interface->if_name);
 }
 
 ifnet_family_t
-ifnet_family(
-       ifnet_t interface)
+ifnet_family(ifnet_t interface)
 {
-       return interface == NULL ? 0 : interface->if_family;
+       return ((interface == NULL) ? 0 : interface->if_family);
 }
 
 u_int32_t
-ifnet_unit(
-       ifnet_t interface)
+ifnet_unit(ifnet_t interface)
 {
-       return interface == NULL ? (u_int32_t)0xffffffff : (u_int32_t)interface->if_unit;
+       return ((interface == NULL) ? (u_int32_t)0xffffffff :
+           (u_int32_t)interface->if_unit);
 }
 
 u_int32_t
-ifnet_index(
-       ifnet_t interface)
+ifnet_index(ifnet_t interface)
 {
-       return interface == NULL ? (u_int32_t)0xffffffff : interface->if_index;
+       return ((interface == NULL) ? (u_int32_t)0xffffffff :
+           interface->if_index);
 }
 
 errno_t
 ifnet_set_flags(ifnet_t interface, u_int16_t new_flags, u_int16_t mask)
 {
+       uint16_t old_flags;
+
        if (interface == NULL)
                return (EINVAL);
 
@@ -250,17 +386,69 @@ ifnet_set_flags(ifnet_t interface, u_int16_t new_flags, u_int16_t mask)
                if_updown(interface, (new_flags & IFF_UP) == IFF_UP);
        }
 
+       old_flags = interface->if_flags;
        interface->if_flags = (new_flags & mask) | (interface->if_flags & ~mask);
+       /* If we are modifying the multicast flag, set/unset the silent flag */
+       if ((old_flags & IFF_MULTICAST) !=
+           (interface->if_flags & IFF_MULTICAST)) {
+#if INET
+               if (IGMP_IFINFO(interface) != NULL)
+                       igmp_initsilent(interface, IGMP_IFINFO(interface));
+#endif /* INET */
+#if INET6
+               if (MLD_IFINFO(interface) != NULL)
+                       mld6_initsilent(interface, MLD_IFINFO(interface));
+#endif /* INET6 */
+       }
+
        ifnet_lock_done(interface);
 
        return (0);
 }
 
 u_int16_t
-ifnet_flags(
-       ifnet_t interface)
+ifnet_flags(ifnet_t interface)
 {
-       return interface == NULL ? 0 : interface->if_flags;
+       return ((interface == NULL) ? 0 : interface->if_flags);
+}
+
+/*
+ * This routine ensures the following:
+ *
+ * If IFEF_AWDL is set by the caller, also set the rest of flags as
+ * defined in IFEF_AWDL_MASK.
+ *
+ * If IFEF_AWDL has been set on the interface and the caller attempts
+ * to clear one or more of the associated flags in IFEF_AWDL_MASK,
+ * return failure.
+ *
+ * All other flags not associated with AWDL are not affected.
+ *
+ * See <net/if.h> for current definition of IFEF_AWDL_MASK.
+ */
+static errno_t
+ifnet_awdl_check_eflags(ifnet_t ifp, u_int32_t *new_eflags, u_int32_t *mask)
+{
+       u_int32_t eflags;
+
+       ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+
+       eflags = (*new_eflags & *mask) | (ifp->if_eflags & ~(*mask));
+
+       if (ifp->if_eflags & IFEF_AWDL) {
+               if (eflags & IFEF_AWDL) {
+                       if ((eflags & IFEF_AWDL_MASK) != IFEF_AWDL_MASK)
+                               return (1);
+               } else {
+                       *new_eflags &= ~IFEF_AWDL_MASK;
+                       *mask |= IFEF_AWDL_MASK;
+               }
+       } else if (eflags & IFEF_AWDL) {
+               *new_eflags |= IFEF_AWDL_MASK;
+               *mask |= IFEF_AWDL_MASK;
+       }
+
+       return (0);
 }
 
 errno_t
@@ -270,17 +458,24 @@ ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask)
                return (EINVAL);
 
        ifnet_lock_exclusive(interface);
-       interface->if_eflags = (new_flags & mask) | (interface->if_eflags & ~mask);
+       /*
+        * Sanity checks for IFEF_AWDL and its related flags.
+        */
+       if (ifnet_awdl_check_eflags(interface, &new_flags, &mask) != 0) {
+               ifnet_lock_done(interface);
+               return (EINVAL);
+       }
+       interface->if_eflags =
+           (new_flags & mask) | (interface->if_eflags & ~mask);
        ifnet_lock_done(interface);
 
        return (0);
 }
 
 u_int32_t
-ifnet_eflags(
-       ifnet_t interface)
+ifnet_eflags(ifnet_t interface)
 {
-       return interface == NULL ? 0 : interface->if_eflags;
+       return ((interface == NULL) ? 0 : interface->if_eflags);
 }
 
 errno_t
@@ -349,15 +544,76 @@ ifnet_idle_flags(ifnet_t ifp)
        return ((ifp == NULL) ? 0 : ifp->if_idle_flags);
 }
 
-errno_t ifnet_set_capabilities_supported(ifnet_t ifp, u_int32_t new_caps,
+errno_t
+ifnet_set_link_quality(ifnet_t ifp, int quality)
+{
+       errno_t err = 0;
+
+       if (ifp == NULL || quality < IFNET_LQM_MIN || quality > IFNET_LQM_MAX) {
+               err = EINVAL;
+               goto done;
+       }
+
+       if (!ifnet_is_attached(ifp, 0)) {
+               err = ENXIO;
+               goto done;
+       }
+
+       if_lqm_update(ifp, quality);
+
+done:
+       return (err);
+}
+
+int
+ifnet_link_quality(ifnet_t ifp)
+{
+       int lqm;
+
+       if (ifp == NULL)
+               return (IFNET_LQM_THRESH_OFF);
+
+       ifnet_lock_shared(ifp);
+       lqm = ifp->if_lqm;
+       ifnet_lock_done(ifp);
+
+       return (lqm);
+}
+
+static errno_t
+ifnet_defrouter_llreachinfo(ifnet_t ifp, int af,
+    struct ifnet_llreach_info *iflri)
+{
+       if (ifp == NULL || iflri == NULL)
+               return (EINVAL);
+
+       VERIFY(af == AF_INET || af == AF_INET6);
+
+       return (ifnet_llreach_get_defrouter(ifp, af, iflri));
+}
+
+errno_t
+ifnet_inet_defrouter_llreachinfo(ifnet_t ifp, struct ifnet_llreach_info *iflri)
+{
+       return (ifnet_defrouter_llreachinfo(ifp, AF_INET, iflri));
+}
+
+errno_t
+ifnet_inet6_defrouter_llreachinfo(ifnet_t ifp, struct ifnet_llreach_info *iflri)
+{
+       return (ifnet_defrouter_llreachinfo(ifp, AF_INET6, iflri));
+}
+
+errno_t
+ifnet_set_capabilities_supported(ifnet_t ifp, u_int32_t new_caps,
     u_int32_t mask)
 {
        errno_t error = 0;
        int tmp;
 
        if (ifp == NULL)
-               return EINVAL;
-       
+               return (EINVAL);
+
        ifnet_lock_exclusive(ifp);
        tmp = (new_caps & mask) | (ifp->if_capabilities & ~mask);
        if ((tmp & ~IFCAP_VALID))
@@ -365,27 +621,29 @@ errno_t ifnet_set_capabilities_supported(ifnet_t ifp, u_int32_t new_caps,
        else
                ifp->if_capabilities = tmp;
        ifnet_lock_done(ifp);
-       
-       return error;
+
+       return (error);
 }
 
-u_int32_t ifnet_capabilities_supported(ifnet_t ifp)
+u_int32_t
+ifnet_capabilities_supported(ifnet_t ifp)
 {
        return ((ifp == NULL) ? 0 : ifp->if_capabilities);
 }
 
 
-errno_t ifnet_set_capabilities_enabled(ifnet_t ifp, u_int32_t new_caps,
+errno_t
+ifnet_set_capabilities_enabled(ifnet_t ifp, u_int32_t new_caps,
     u_int32_t mask)
 {
        errno_t error = 0;
        int tmp;
-       struct kev_msg        ev_msg;
+       struct kev_msg ev_msg;
        struct net_event_data ev_data;
 
        if (ifp == NULL)
-               return EINVAL;
-       
+               return (EINVAL);
+
        ifnet_lock_exclusive(ifp);
        tmp = (new_caps & mask) | (ifp->if_capenable & ~mask);
        if ((tmp & ~IFCAP_VALID) || (tmp & ~ifp->if_capabilities))
@@ -393,55 +651,52 @@ errno_t ifnet_set_capabilities_enabled(ifnet_t ifp, u_int32_t new_caps,
        else
                ifp->if_capenable = tmp;
        ifnet_lock_done(ifp);
-       
+
        /* Notify application of the change */
-       bzero(&ev_data, sizeof(struct net_event_data));
-       bzero(&ev_msg, sizeof(struct kev_msg));
-       ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-       ev_msg.kev_class      = KEV_NETWORK_CLASS;
-       ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
+       bzero(&ev_data, sizeof (struct net_event_data));
+       bzero(&ev_msg, sizeof (struct kev_msg));
+       ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+       ev_msg.kev_class        = KEV_NETWORK_CLASS;
+       ev_msg.kev_subclass     = KEV_DL_SUBCLASS;
 
-       ev_msg.event_code = KEV_DL_IFCAP_CHANGED;
+       ev_msg.event_code       = KEV_DL_IFCAP_CHANGED;
        strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
-       ev_data.if_family = ifp->if_family;
-       ev_data.if_unit   = (u_int32_t) ifp->if_unit;
-       ev_msg.dv[0].data_length = sizeof(struct net_event_data);
-       ev_msg.dv[0].data_ptr    = &ev_data;
+       ev_data.if_family       = ifp->if_family;
+       ev_data.if_unit         = (u_int32_t)ifp->if_unit;
+       ev_msg.dv[0].data_length = sizeof (struct net_event_data);
+       ev_msg.dv[0].data_ptr = &ev_data;
        ev_msg.dv[1].data_length = 0;
        kev_post_msg(&ev_msg);
 
-       return error;
+       return (error);
 }
 
-u_int32_t ifnet_capabilities_enabled(ifnet_t ifp)
+u_int32_t
+ifnet_capabilities_enabled(ifnet_t ifp)
 {
        return ((ifp == NULL) ? 0 : ifp->if_capenable);
-       
-       return 0;
 }
 
-static const ifnet_offload_t offload_mask = IFNET_CSUM_IP | IFNET_CSUM_TCP |
-                       IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT |
-                       IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_IPV6_FRAGMENT |
-                       IFNET_CSUM_SUM16 | IFNET_VLAN_TAGGING | IFNET_VLAN_MTU |
-                       IFNET_MULTIPAGES | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
-
-static const ifnet_offload_t any_offload_csum = IFNET_CSUM_IP | IFNET_CSUM_TCP |
-                       IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT |
-                       IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 |
-                       IFNET_CSUM_SUM16;
+static const ifnet_offload_t offload_mask =
+       (IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT |
+       IFNET_IP_FRAGMENT | IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 |
+       IFNET_IPV6_FRAGMENT | IFNET_CSUM_SUM16 | IFNET_VLAN_TAGGING |
+       IFNET_VLAN_MTU | IFNET_MULTIPAGES | IFNET_TSO_IPV4 | IFNET_TSO_IPV6);
 
+static const ifnet_offload_t any_offload_csum =
+       (IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT |
+       IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_CSUM_SUM16);
 
 errno_t
 ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload)
 {
        u_int32_t ifcaps = 0;
-       
+
        if (interface == NULL)
                return (EINVAL);
 
        ifnet_lock_exclusive(interface);
-       interface->if_hwassist = (offload & offload_mask);      
+       interface->if_hwassist = (offload & offload_mask);
        ifnet_lock_done(interface);
 
        if ((offload & any_offload_csum))
@@ -455,133 +710,130 @@ ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload)
        if ((offload & IFNET_VLAN_TAGGING))
                ifcaps |= IFCAP_VLAN_HWTAGGING;
        if (ifcaps != 0) {
-               (void) ifnet_set_capabilities_supported(interface, ifcaps, IFCAP_VALID);
-               (void) ifnet_set_capabilities_enabled(interface, ifcaps, IFCAP_VALID);
+               (void) ifnet_set_capabilities_supported(interface, ifcaps,
+                   IFCAP_VALID);
+               (void) ifnet_set_capabilities_enabled(interface, ifcaps,
+                   IFCAP_VALID);
        }
 
        return (0);
 }
 
 ifnet_offload_t
-ifnet_offload(
-       ifnet_t interface)
+ifnet_offload(ifnet_t interface)
 {
-       return interface == NULL ? 0 : (interface->if_hwassist & offload_mask);
+       return ((interface == NULL) ?
+           0 : (interface->if_hwassist & offload_mask));
 }
 
-errno_t 
-ifnet_set_tso_mtu(
-       ifnet_t interface, 
-       sa_family_t     family,
-       u_int32_t mtuLen)
+errno_t
+ifnet_set_tso_mtu(ifnet_t interface, sa_family_t family, u_int32_t mtuLen)
 {
        errno_t error = 0;
 
-       if (interface == NULL) return EINVAL;
-
-       if (mtuLen < interface->if_mtu)
-               return EINVAL;
-       
+       if (interface == NULL || mtuLen < interface->if_mtu)
+               return (EINVAL);
 
        switch (family) {
+       case AF_INET:
+               if (interface->if_hwassist & IFNET_TSO_IPV4)
+                       interface->if_tso_v4_mtu = mtuLen;
+               else
+                       error = EINVAL;
+               break;
 
-               case AF_INET: 
-                       if (interface->if_hwassist & IFNET_TSO_IPV4)
-                               interface->if_tso_v4_mtu = mtuLen;
-                       else
-                               error = EINVAL;
-                       break;
-
-               case AF_INET6:
-                       if (interface->if_hwassist & IFNET_TSO_IPV6)
-                               interface->if_tso_v6_mtu = mtuLen;
-                       else
-                               error = EINVAL;
-                       break;
+       case AF_INET6:
+               if (interface->if_hwassist & IFNET_TSO_IPV6)
+                       interface->if_tso_v6_mtu = mtuLen;
+               else
+                       error = EINVAL;
+               break;
 
-               default:
-                       error = EPROTONOSUPPORT;
+       default:
+               error = EPROTONOSUPPORT;
+               break;
        }
 
-       return error;
+       return (error);
 }
-       
-errno_t 
-ifnet_get_tso_mtu(
-       ifnet_t interface, 
-       sa_family_t     family,
-       u_int32_t *mtuLen)
+
+errno_t
+ifnet_get_tso_mtu(ifnet_t interface, sa_family_t family, u_int32_t *mtuLen)
 {
        errno_t error = 0;
 
-       if (interface == NULL || mtuLen == NULL) return EINVAL;
-       
+       if (interface == NULL || mtuLen == NULL)
+               return (EINVAL);
+
        switch (family) {
+       case AF_INET:
+               if (interface->if_hwassist & IFNET_TSO_IPV4)
+                       *mtuLen = interface->if_tso_v4_mtu;
+               else
+                       error = EINVAL;
+               break;
 
-               case AF_INET: 
-                       if (interface->if_hwassist & IFNET_TSO_IPV4)
-                               *mtuLen = interface->if_tso_v4_mtu;
-                       else
-                               error = EINVAL;
-                       break;
+       case AF_INET6:
+               if (interface->if_hwassist & IFNET_TSO_IPV6)
+                       *mtuLen = interface->if_tso_v6_mtu;
+               else
+                       error = EINVAL;
+               break;
 
-               case AF_INET6:
-                       if (interface->if_hwassist & IFNET_TSO_IPV6)
-                               *mtuLen = interface->if_tso_v6_mtu;
-                       else
-                               error = EINVAL;
-                       break;
-               default:
-                       error = EPROTONOSUPPORT;
+       default:
+               error = EPROTONOSUPPORT;
+               break;
        }
 
-       return error;
+       return (error);
 }
 
 errno_t
 ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask)
 {
-       struct kev_msg        ev_msg;
+       struct kev_msg ev_msg;
        struct net_event_data ev_data;
 
-       bzero(&ev_data, sizeof(struct net_event_data));
-       bzero(&ev_msg, sizeof(struct kev_msg));
+       bzero(&ev_data, sizeof (struct net_event_data));
+       bzero(&ev_msg, sizeof (struct kev_msg));
+
        if (interface == NULL)
-               return EINVAL;
+               return (EINVAL);
 
        /* Do not accept wacky values */
        if ((properties & mask) & ~IF_WAKE_VALID_FLAGS)
-               return EINVAL;
+               return (EINVAL);
 
        ifnet_lock_exclusive(interface);
 
-       interface->if_wake_properties = (properties & mask) | (interface->if_wake_properties & ~mask);
+       interface->if_wake_properties =
+           (properties & mask) | (interface->if_wake_properties & ~mask);
 
        ifnet_lock_done(interface);
 
        (void) ifnet_touch_lastchange(interface);
 
        /* Notify application of the change */
-       ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-       ev_msg.kev_class      = KEV_NETWORK_CLASS;
-       ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
+       ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+       ev_msg.kev_class        = KEV_NETWORK_CLASS;
+       ev_msg.kev_subclass     = KEV_DL_SUBCLASS;
 
-       ev_msg.event_code = KEV_DL_WAKEFLAGS_CHANGED;
+       ev_msg.event_code       = KEV_DL_WAKEFLAGS_CHANGED;
        strlcpy(&ev_data.if_name[0], interface->if_name, IFNAMSIZ);
-       ev_data.if_family = interface->if_family;
-       ev_data.if_unit   = (u_int32_t) interface->if_unit;
-       ev_msg.dv[0].data_length = sizeof(struct net_event_data);
-       ev_msg.dv[0].data_ptr    = &ev_data;
+       ev_data.if_family       = interface->if_family;
+       ev_data.if_unit         = (u_int32_t)interface->if_unit;
+       ev_msg.dv[0].data_length = sizeof (struct net_event_data);
+       ev_msg.dv[0].data_ptr   = &ev_data;
        ev_msg.dv[1].data_length = 0;
        kev_post_msg(&ev_msg);
 
-       return 0;
+       return (0);
 }
 
 u_int32_t
 ifnet_get_wake_flags(ifnet_t interface)
 {
-       return interface == NULL ? 0 : interface->if_wake_properties;
+       return ((interface == NULL) ? 0 : interface->if_wake_properties);
 }
 
 /*
@@ -624,269 +876,358 @@ ifnet_get_link_mib_data(ifnet_t interface, void *mibData, u_int32_t *mibLen)
 }
 
 u_int32_t
-ifnet_get_link_mib_data_length(
-       ifnet_t interface)
+ifnet_get_link_mib_data_length(ifnet_t interface)
 {
-       return interface == NULL ? 0 : interface->if_linkmiblen;
+       return ((interface == NULL) ? 0 : interface->if_linkmiblen);
 }
 
 errno_t
-ifnet_output(
-       ifnet_t interface,
-       protocol_family_t protocol_family,
-       mbuf_t m,
-       void *route,
-       const struct sockaddr *dest)
+ifnet_output(ifnet_t interface, protocol_family_t protocol_family,
+    mbuf_t m, void *route, const struct sockaddr *dest)
 {
        if (interface == NULL || protocol_family == 0 || m == NULL) {
-               if (m)
+               if (m != NULL)
                        mbuf_freem_list(m);
-               return EINVAL;
+               return (EINVAL);
        }
-       return dlil_output(interface, protocol_family, m, route, dest, 0);
+       return (dlil_output(interface, protocol_family, m, route, dest, 0, NULL));
 }
 
 errno_t
-ifnet_output_raw(
-       ifnet_t interface,
-       protocol_family_t protocol_family,
-       mbuf_t m)
+ifnet_output_raw(ifnet_t interface, protocol_family_t protocol_family, mbuf_t m)
 {
        if (interface == NULL || m == NULL) {
-               if (m)
+               if (m != NULL)
                        mbuf_freem_list(m);
-               return EINVAL;
+               return (EINVAL);
        }
-       return dlil_output(interface, protocol_family, m, NULL, NULL, 1);
+       return (dlil_output(interface, protocol_family, m, NULL, NULL, 1, NULL));
 }
 
 errno_t
-ifnet_set_mtu(
-       ifnet_t interface,
-       u_int32_t mtu)
+ifnet_set_mtu(ifnet_t interface, u_int32_t mtu)
 {
-       if (interface == NULL) return EINVAL;
-       interface->if_data.ifi_mtu = mtu;
-       return 0;
+       if (interface == NULL)
+               return (EINVAL);
+
+       interface->if_mtu = mtu;
+       return (0);
 }
 
 u_int32_t
-ifnet_mtu(
-       ifnet_t interface)
+ifnet_mtu(ifnet_t interface)
 {
-       u_int32_t retval;
-       retval = interface == NULL ? 0 : interface->if_data.ifi_mtu;
-       return retval;
+       return ((interface == NULL) ? 0 : interface->if_mtu);
 }
 
 u_char
-ifnet_type(
-       ifnet_t interface)
+ifnet_type(ifnet_t interface)
 {
-       u_char retval;
-       
-       retval = interface == NULL ? 0 : interface->if_data.ifi_type;
-       return retval;
+       return ((interface == NULL) ? 0 : interface->if_data.ifi_type);
 }
 
-#if 0
 errno_t
-ifnet_set_typelen(ifnet_t interface, u_char typelen)
+ifnet_set_addrlen(ifnet_t interface, u_char addrlen)
 {
-       ifnet_lock_exclusive(interface);
-       interface->if_data.ifi_typelen = typelen;
-       ifnet_lock_done(interface);
+       if (interface == NULL)
+               return (EINVAL);
+
+       interface->if_data.ifi_addrlen = addrlen;
        return (0);
 }
 
 u_char
-ifnet_typelen(
-       ifnet_t interface)
+ifnet_addrlen(ifnet_t interface)
 {
-       u_char retval;
-       retval = interface == NULL ? 0 : interface->if_data.ifi_typelen;
-       return retval;
+       return ((interface == NULL) ? 0 : interface->if_data.ifi_addrlen);
 }
-#endif
 
 errno_t
-ifnet_set_addrlen(
-       ifnet_t interface,
-       u_char addrlen)
+ifnet_set_hdrlen(ifnet_t interface, u_char hdrlen)
 {
-       if (interface == NULL) return EINVAL;
-       interface->if_data.ifi_addrlen = addrlen;
-       return 0;
+       if (interface == NULL)
+               return (EINVAL);
+
+       interface->if_data.ifi_hdrlen = hdrlen;
+       return (0);
 }
 
 u_char
-ifnet_addrlen(
-       ifnet_t interface)
+ifnet_hdrlen(ifnet_t interface)
 {
-       u_char retval;
-       retval = interface == NULL ? 0 : interface->if_data.ifi_addrlen;
-       return retval;
+       return ((interface == NULL) ? 0 : interface->if_data.ifi_hdrlen);
 }
 
 errno_t
-ifnet_set_hdrlen(
-       ifnet_t interface,
-       u_char hdrlen)
+ifnet_set_metric(ifnet_t interface, u_int32_t metric)
 {
-       if (interface == NULL) return EINVAL;
-       interface->if_data.ifi_hdrlen = hdrlen;
-       return 0;
+       if (interface == NULL)
+               return (EINVAL);
+
+       interface->if_data.ifi_metric = metric;
+       return (0);
 }
 
-u_char
-ifnet_hdrlen(
-       ifnet_t interface)
+u_int32_t
+ifnet_metric(ifnet_t interface)
 {
-       u_char retval;
-       retval = interface == NULL ? 0 : interface->if_data.ifi_hdrlen;
-       return retval;
+       return ((interface == NULL) ? 0 : interface->if_data.ifi_metric);
 }
 
 errno_t
-ifnet_set_metric(
-       ifnet_t interface,
-       u_int32_t metric)
+ifnet_set_baudrate(struct ifnet *ifp, u_int64_t baudrate)
 {
-       if (interface == NULL) return EINVAL;
-       interface->if_data.ifi_metric = metric;
-       return 0;
+       if (ifp == NULL)
+               return (EINVAL);
+
+       ifp->if_output_bw.max_bw = ifp->if_input_bw.max_bw =
+           ifp->if_output_bw.eff_bw = ifp->if_input_bw.eff_bw = baudrate;
+
+       /* Pin if_baudrate to 32 bits until we can change the storage size */
+       ifp->if_baudrate = (baudrate > 0xFFFFFFFF) ? 0xFFFFFFFF : baudrate;
+
+       return (0);
 }
 
-u_int32_t
-ifnet_metric(
-       ifnet_t interface)
+u_int64_t
+ifnet_baudrate(struct ifnet *ifp)
+{
+       return ((ifp == NULL) ? 0 : ifp->if_baudrate);
+}
+
+errno_t
+ifnet_set_bandwidths(struct ifnet *ifp, struct if_bandwidths *output_bw,
+    struct if_bandwidths *input_bw)
 {
-       u_int32_t retval;
-       retval = interface == NULL ? 0 : interface->if_data.ifi_metric;
-       return retval;
+       if (ifp == NULL)
+               return (EINVAL);
+
+       if (input_bw != NULL)
+               (void) ifnet_set_input_bandwidths(ifp, input_bw);
+
+       if (output_bw != NULL)
+               (void) ifnet_set_output_bandwidths(ifp, output_bw, FALSE);
+
+       return (0);
+}
+
+errno_t
+ifnet_set_output_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw,
+    boolean_t locked)
+{
+       struct if_bandwidths old_bw;
+       struct ifclassq *ifq;
+       u_int64_t br;
+
+       ifq = &ifp->if_snd;
+       if (!locked)
+               IFCQ_LOCK(ifq);
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       old_bw = ifp->if_output_bw;
+       if (bw != NULL) {
+               if (bw->eff_bw != 0)
+                       ifp->if_output_bw.eff_bw = bw->eff_bw;
+               if (bw->max_bw != 0)
+                       ifp->if_output_bw.max_bw = bw->max_bw;
+               if (ifp->if_output_bw.eff_bw > ifp->if_output_bw.max_bw)
+                       ifp->if_output_bw.max_bw = ifp->if_output_bw.eff_bw;
+               else if (ifp->if_output_bw.eff_bw == 0)
+                       ifp->if_output_bw.eff_bw = ifp->if_output_bw.max_bw;
+       }
+
+       /* Pin if_baudrate to 32 bits */
+       br = MAX(ifp->if_output_bw.max_bw, ifp->if_input_bw.max_bw);
+       if (br != 0)
+               ifp->if_baudrate = (br > 0xFFFFFFFF) ? 0xFFFFFFFF : br;
+
+       /* Adjust queue parameters if needed */
+       if (old_bw.eff_bw != ifp->if_output_bw.eff_bw ||
+           old_bw.max_bw != ifp->if_output_bw.max_bw)
+               ifnet_update_sndq(ifq, CLASSQ_EV_LINK_SPEED);
+
+       if (!locked)
+               IFCQ_UNLOCK(ifq);
+
+       return (0);
 }
 
 errno_t
-ifnet_set_baudrate(
-       ifnet_t interface,
-       u_int64_t baudrate)
+ifnet_set_input_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw)
 {
-       if (interface == NULL) return EINVAL;
-       /* Pin baudrate to 32 bits until we can change the storage size */
-       interface->if_data.ifi_baudrate = baudrate > 0xFFFFFFFF ? 0xFFFFFFFF : baudrate;
-       return 0;
+       struct if_bandwidths old_bw;
+
+       old_bw = ifp->if_input_bw;
+       if (bw->eff_bw != 0)
+               ifp->if_input_bw.eff_bw = bw->eff_bw;
+       if (bw->max_bw != 0)
+               ifp->if_input_bw.max_bw = bw->max_bw;
+       if (ifp->if_input_bw.eff_bw > ifp->if_input_bw.max_bw)
+               ifp->if_input_bw.max_bw = ifp->if_input_bw.eff_bw;
+       else if (ifp->if_input_bw.eff_bw == 0)
+               ifp->if_input_bw.eff_bw = ifp->if_input_bw.max_bw;
+
+       if (old_bw.eff_bw != ifp->if_input_bw.eff_bw ||
+           old_bw.max_bw != ifp->if_input_bw.max_bw)
+               ifnet_update_rcv(ifp, CLASSQ_EV_LINK_SPEED);
+
+       return (0);
 }
 
 u_int64_t
-ifnet_baudrate(
-       ifnet_t interface)
+ifnet_output_linkrate(struct ifnet *ifp)
 {
-       u_int64_t retval;
-       retval = interface == NULL ? 0 : interface->if_data.ifi_baudrate;
-       return retval;
+       struct ifclassq *ifq = &ifp->if_snd;
+       u_int64_t rate;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       rate = ifp->if_output_bw.eff_bw;
+       if (IFCQ_TBR_IS_ENABLED(ifq)) {
+               u_int64_t tbr_rate = ifp->if_snd.ifcq_tbr.tbr_rate_raw;
+               VERIFY(tbr_rate > 0);
+               rate = MIN(rate, ifp->if_snd.ifcq_tbr.tbr_rate_raw);
+       }
+
+       return (rate);
+}
+
+u_int64_t
+ifnet_input_linkrate(struct ifnet *ifp)
+{
+       return (ifp->if_input_bw.eff_bw);
 }
 
 errno_t
-ifnet_stat_increment(ifnet_t interface,
-    const struct ifnet_stat_increment_param *counts)
+ifnet_bandwidths(struct ifnet *ifp, struct if_bandwidths *output_bw,
+    struct if_bandwidths *input_bw)
 {
-       if (interface == NULL)
+       if (ifp == NULL)
                return (EINVAL);
 
-       atomic_add_64(&interface->if_data.ifi_ipackets, counts->packets_in);
-       atomic_add_64(&interface->if_data.ifi_ibytes, counts->bytes_in);
-       atomic_add_64(&interface->if_data.ifi_ierrors, counts->errors_in);
+       if (output_bw != NULL)
+               *output_bw = ifp->if_output_bw;
+       if (input_bw != NULL)
+               *input_bw = ifp->if_input_bw;
+
+       return (0);
+}
 
-       atomic_add_64(&interface->if_data.ifi_opackets, counts->packets_out);
-       atomic_add_64(&interface->if_data.ifi_obytes, counts->bytes_out);
-       atomic_add_64(&interface->if_data.ifi_oerrors, counts->errors_out);
+errno_t
+ifnet_stat_increment(struct ifnet *ifp,
+    const struct ifnet_stat_increment_param *s)
+{
+       if (ifp == NULL)
+               return (EINVAL);
 
-       atomic_add_64(&interface->if_data.ifi_collisions, counts->collisions);
-       atomic_add_64(&interface->if_data.ifi_iqdrops, counts->dropped);
+       if (s->packets_in != 0)
+               atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
+       if (s->bytes_in != 0)
+               atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
+       if (s->errors_in != 0)
+               atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
+
+       if (s->packets_out != 0)
+               atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
+       if (s->bytes_out != 0)
+               atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
+       if (s->errors_out != 0)
+               atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
+
+       if (s->collisions != 0)
+               atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
+       if (s->dropped != 0)
+               atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
 
        /* Touch the last change time. */
-       TOUCHLASTCHANGE(&interface->if_lastchange);
+       TOUCHLASTCHANGE(&ifp->if_lastchange);
 
        return (0);
 }
 
 errno_t
-ifnet_stat_increment_in(ifnet_t interface, u_int32_t packets_in,
+ifnet_stat_increment_in(struct ifnet *ifp, u_int32_t packets_in,
     u_int32_t bytes_in, u_int32_t errors_in)
 {
-       if (interface == NULL)
+       if (ifp == NULL)
                return (EINVAL);
 
-       atomic_add_64(&interface->if_data.ifi_ipackets, packets_in);
-       atomic_add_64(&interface->if_data.ifi_ibytes, bytes_in);
-       atomic_add_64(&interface->if_data.ifi_ierrors, errors_in);
+       if (packets_in != 0)
+               atomic_add_64(&ifp->if_data.ifi_ipackets, packets_in);
+       if (bytes_in != 0)
+               atomic_add_64(&ifp->if_data.ifi_ibytes, bytes_in);
+       if (errors_in != 0)
+               atomic_add_64(&ifp->if_data.ifi_ierrors, errors_in);
 
-       TOUCHLASTCHANGE(&interface->if_lastchange);
+       TOUCHLASTCHANGE(&ifp->if_lastchange);
 
        return (0);
 }
 
 errno_t
-ifnet_stat_increment_out(ifnet_t interface, u_int32_t packets_out,
+ifnet_stat_increment_out(struct ifnet *ifp, u_int32_t packets_out,
     u_int32_t bytes_out, u_int32_t errors_out)
 {
-       if (interface == NULL)
+       if (ifp == NULL)
                return (EINVAL);
 
-       atomic_add_64(&interface->if_data.ifi_opackets, packets_out);
-       atomic_add_64(&interface->if_data.ifi_obytes, bytes_out);
-       atomic_add_64(&interface->if_data.ifi_oerrors, errors_out);
+       if (packets_out != 0)
+               atomic_add_64(&ifp->if_data.ifi_opackets, packets_out);
+       if (bytes_out != 0)
+               atomic_add_64(&ifp->if_data.ifi_obytes, bytes_out);
+       if (errors_out != 0)
+               atomic_add_64(&ifp->if_data.ifi_oerrors, errors_out);
 
-       TOUCHLASTCHANGE(&interface->if_lastchange);
+       TOUCHLASTCHANGE(&ifp->if_lastchange);
 
        return (0);
 }
 
 errno_t
-ifnet_set_stat(ifnet_t interface, const struct ifnet_stats_param *stats)
+ifnet_set_stat(struct ifnet *ifp, const struct ifnet_stats_param *s)
 {
-       if (interface == NULL)
+       if (ifp == NULL)
                return (EINVAL);
 
-       atomic_set_64(&interface->if_data.ifi_ipackets, stats->packets_in);
-       atomic_set_64(&interface->if_data.ifi_ibytes, stats->bytes_in);
-       atomic_set_64(&interface->if_data.ifi_imcasts, stats->multicasts_in);
-       atomic_set_64(&interface->if_data.ifi_ierrors, stats->errors_in);
+       atomic_set_64(&ifp->if_data.ifi_ipackets, s->packets_in);
+       atomic_set_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
+       atomic_set_64(&ifp->if_data.ifi_imcasts, s->multicasts_in);
+       atomic_set_64(&ifp->if_data.ifi_ierrors, s->errors_in);
 
-       atomic_set_64(&interface->if_data.ifi_opackets, stats->packets_out);
-       atomic_set_64(&interface->if_data.ifi_obytes, stats->bytes_out);
-       atomic_set_64(&interface->if_data.ifi_omcasts, stats->multicasts_out);
-       atomic_set_64(&interface->if_data.ifi_oerrors, stats->errors_out);
+       atomic_set_64(&ifp->if_data.ifi_opackets, s->packets_out);
+       atomic_set_64(&ifp->if_data.ifi_obytes, s->bytes_out);
+       atomic_set_64(&ifp->if_data.ifi_omcasts, s->multicasts_out);
+       atomic_set_64(&ifp->if_data.ifi_oerrors, s->errors_out);
 
-       atomic_set_64(&interface->if_data.ifi_collisions, stats->collisions);
-       atomic_set_64(&interface->if_data.ifi_iqdrops, stats->dropped);
-       atomic_set_64(&interface->if_data.ifi_noproto, stats->no_protocol);
+       atomic_set_64(&ifp->if_data.ifi_collisions, s->collisions);
+       atomic_set_64(&ifp->if_data.ifi_iqdrops, s->dropped);
+       atomic_set_64(&ifp->if_data.ifi_noproto, s->no_protocol);
 
        /* Touch the last change time. */
-       TOUCHLASTCHANGE(&interface->if_lastchange);
+       TOUCHLASTCHANGE(&ifp->if_lastchange);
 
-       return 0;
+       return (0);
 }
 
 errno_t
-ifnet_stat(ifnet_t interface, struct ifnet_stats_param *stats)
+ifnet_stat(struct ifnet *ifp, struct ifnet_stats_param *s)
 {
-       if (interface == NULL)
+       if (ifp == NULL)
                return (EINVAL);
 
-       atomic_get_64(stats->packets_in, &interface->if_data.ifi_ipackets);
-       atomic_get_64(stats->bytes_in, &interface->if_data.ifi_ibytes);
-       atomic_get_64(stats->multicasts_in, &interface->if_data.ifi_imcasts);
-       atomic_get_64(stats->errors_in, &interface->if_data.ifi_ierrors);
+       atomic_get_64(s->packets_in, &ifp->if_data.ifi_ipackets);
+       atomic_get_64(s->bytes_in, &ifp->if_data.ifi_ibytes);
+       atomic_get_64(s->multicasts_in, &ifp->if_data.ifi_imcasts);
+       atomic_get_64(s->errors_in, &ifp->if_data.ifi_ierrors);
 
-       atomic_get_64(stats->packets_out, &interface->if_data.ifi_opackets);
-       atomic_get_64(stats->bytes_out, &interface->if_data.ifi_obytes);
-       atomic_get_64(stats->multicasts_out, &interface->if_data.ifi_omcasts);
-       atomic_get_64(stats->errors_out, &interface->if_data.ifi_oerrors);
+       atomic_get_64(s->packets_out, &ifp->if_data.ifi_opackets);
+       atomic_get_64(s->bytes_out, &ifp->if_data.ifi_obytes);
+       atomic_get_64(s->multicasts_out, &ifp->if_data.ifi_omcasts);
+       atomic_get_64(s->errors_out, &ifp->if_data.ifi_oerrors);
 
-       atomic_get_64(stats->collisions, &interface->if_data.ifi_collisions);
-       atomic_get_64(stats->dropped, &interface->if_data.ifi_iqdrops);
-       atomic_get_64(stats->no_protocol, &interface->if_data.ifi_noproto);
+       atomic_get_64(s->collisions, &ifp->if_data.ifi_collisions);
+       atomic_get_64(s->dropped, &ifp->if_data.ifi_iqdrops);
+       atomic_get_64(s->no_protocol, &ifp->if_data.ifi_noproto);
 
        return (0);
 }
@@ -1060,38 +1401,37 @@ ifnet_lladdr(ifnet_t interface)
         */
        ifa = interface->if_lladdr;
        IFA_LOCK_SPIN(ifa);
-       lladdr = LLADDR(SDL(ifa->ifa_addr));
+       lladdr = LLADDR(SDL((void *)ifa->ifa_addr));
        IFA_UNLOCK(ifa);
 
        return (lladdr);
 }
 
 errno_t
-ifnet_llbroadcast_copy_bytes(
-       ifnet_t interface,
-       void    *addr,
-       size_t  buffer_len,
-       size_t  *out_len)
+ifnet_llbroadcast_copy_bytes(ifnet_t interface, void *addr, size_t buffer_len,
+    size_t *out_len)
 {
-       if (interface == NULL || addr == NULL || out_len == NULL) return EINVAL;
-       
+       if (interface == NULL || addr == NULL || out_len == NULL)
+               return (EINVAL);
+
        *out_len = interface->if_broadcast.length;
-       
-       if (buffer_len < interface->if_broadcast.length) {
-               return EMSGSIZE;
-       }
-       
+
+       if (buffer_len < interface->if_broadcast.length)
+               return (EMSGSIZE);
+
        if (interface->if_broadcast.length == 0)
-               return ENXIO;
-       
-       if (interface->if_broadcast.length <= sizeof(interface->if_broadcast.u.buffer)) {
-               bcopy(interface->if_broadcast.u.buffer, addr, interface->if_broadcast.length);
-       }
-       else {
-               bcopy(interface->if_broadcast.u.ptr, addr, interface->if_broadcast.length);
+               return (ENXIO);
+
+       if (interface->if_broadcast.length <=
+           sizeof (interface->if_broadcast.u.buffer)) {
+               bcopy(interface->if_broadcast.u.buffer, addr,
+                   interface->if_broadcast.length);
+       } else {
+               bcopy(interface->if_broadcast.u.ptr, addr,
+                   interface->if_broadcast.length);
        }
-       
-       return 0;
+
+       return (0);
 }
 
 errno_t
@@ -1109,7 +1449,7 @@ ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr, size_t   lladdr_len)
         */
        ifa = interface->if_lladdr;
        IFA_LOCK_SPIN(ifa);
-       sdl = SDL(ifa->ifa_addr);
+       sdl = SDL((void *)ifa->ifa_addr);
        if (lladdr_len != sdl->sdl_alen) {
                bzero(lladdr, lladdr_len);
                IFA_UNLOCK(ifa);
@@ -1144,7 +1484,7 @@ ifnet_set_lladdr_internal(ifnet_t interface, const void *lladdr,
                struct sockaddr_dl *sdl;
 
                IFA_LOCK_SPIN(ifa);
-               sdl = (struct sockaddr_dl*)ifa->ifa_addr;
+               sdl = (struct sockaddr_dl *)(void *)ifa->ifa_addr;
                if (lladdr_len != 0) {
                        bcopy(lladdr, LLADDR(sdl), lladdr_len);
                } else {
@@ -1165,29 +1505,24 @@ ifnet_set_lladdr_internal(ifnet_t interface, const void *lladdr,
        /* Generate a kernel event */
        if (error == 0) {
                dlil_post_msg(interface, KEV_DL_SUBCLASS,
-                       KEV_DL_LINK_ADDRESS_CHANGED, NULL, 0);
+                   KEV_DL_LINK_ADDRESS_CHANGED, NULL, 0);
        }
 
        return (error);
 }
 
 errno_t
-ifnet_set_lladdr(
-       ifnet_t interface,
-       const void* lladdr,
-       size_t lladdr_len)
+ifnet_set_lladdr(ifnet_t interface, const void* lladdr, size_t lladdr_len)
 {
-       return ifnet_set_lladdr_internal(interface, lladdr, lladdr_len, 0, 0);
+       return (ifnet_set_lladdr_internal(interface, lladdr, lladdr_len, 0, 0));
 }
 
 errno_t
-ifnet_set_lladdr_and_type(
-       ifnet_t interface,
-       const void* lladdr,
-       size_t lladdr_len,
-       u_char type)
+ifnet_set_lladdr_and_type(ifnet_t interface, const void* lladdr,
+    size_t lladdr_len, u_char type)
 {
-       return ifnet_set_lladdr_internal(interface, lladdr, lladdr_len, type, 1);
+       return (ifnet_set_lladdr_internal(interface, lladdr,
+           lladdr_len, type, 1));
 }
 
 errno_t
@@ -1256,18 +1591,16 @@ ifnet_get_multicast_list(ifnet_t ifp, ifmultiaddr_t **addresses)
 }
 
 void
-ifnet_free_multicast_list(
-       ifmultiaddr_t *addresses)
+ifnet_free_multicast_list(ifmultiaddr_t *addresses)
 {
        int i;
-       
-       if (addresses == NULL) return;
-       
+
+       if (addresses == NULL)
+               return;
+
        for (i = 0; addresses[i] != NULL; i++)
-       {
                ifmaddr_release(addresses[i]);
-       }
-       
+
        FREE(addresses, M_TEMP);
 }
 
@@ -1294,10 +1627,10 @@ ifnet_find_by_name(const char *ifname, ifnet_t *ifpp)
                        continue;
 
                IFA_LOCK(ifa);
-               ll_addr = (struct sockaddr_dl *)ifa->ifa_addr;
+               ll_addr = (struct sockaddr_dl *)(void *)ifa->ifa_addr;
 
-               if (namelen == ll_addr->sdl_nlen &&
-                   !strncmp(ll_addr->sdl_data, ifname, ll_addr->sdl_nlen)) {
+               if (namelen == ll_addr->sdl_nlen && strncmp(ll_addr->sdl_data,
+                   ifname, ll_addr->sdl_nlen) == 0) {
                        IFA_UNLOCK(ifa);
                        *ifpp = ifp;
                        ifnet_reference(*ifpp);
@@ -1406,8 +1739,91 @@ ifnet_list_free(ifnet_t *interfaces)
        FREE(interfaces, M_TEMP);
 }
 
+void
+ifnet_transmit_burst_start(ifnet_t ifp, mbuf_t pkt)
+{
+       uint32_t orig_flags;
+
+       if (ifp == NULL || !(pkt->m_flags & M_PKTHDR))
+               return;
+
+       orig_flags = OSBitOrAtomic(IF_MEASURED_BW_INPROGRESS,
+           &ifp->if_bw.flags);
+       if (orig_flags & IF_MEASURED_BW_INPROGRESS) {
+               /* There is already a measurement in progress; skip this one */
+               return;
+       }
+
+       ifp->if_bw.start_seq = pkt->m_pkthdr.pf_mtag.pftag_pktseq;
+       ifp->if_bw.start_ts = mach_absolute_time();
+}
+
+void
+ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt)
+{
+       uint64_t oseq, ots, bytes, ts, t;
+       uint32_t flags;
+
+       if ( ifp == NULL || !(pkt->m_flags & M_PKTHDR))
+               return;
+
+       flags = OSBitOrAtomic(IF_MEASURED_BW_CALCULATION, &ifp->if_bw.flags);
+
+       /* If a calculation is already in progress, just return */
+       if (flags & IF_MEASURED_BW_CALCULATION)
+               return;
+
+       /* Check if a measurement was started at all */
+       if (!(flags & IF_MEASURED_BW_INPROGRESS)) {
+               /*
+                * It is an error to call burst_end before burst_start.
+                * Reset the calculation flag and return.
+                */
+               goto done;
+       }
+
+       oseq = pkt->m_pkthdr.pf_mtag.pftag_pktseq;
+       ots = mach_absolute_time();
+
+       if (ifp->if_bw.start_seq > 0 && oseq > ifp->if_bw.start_seq) {
+               ts = ots - ifp->if_bw.start_ts;
+               if (ts > 0 ) {
+                       absolutetime_to_nanoseconds(ts, &t);
+                       bytes = oseq - ifp->if_bw.start_seq;
+                       ifp->if_bw.bytes = bytes;
+                       ifp->if_bw.ts = ts;
+
+                       if (t > 0) {
+                               uint64_t bw = 0;
+
+                               /* Compute bandwidth as bytes/ms */
+                               bw = (bytes * NSEC_PER_MSEC) / t;
+                               if (bw > 0) {
+                                       if (ifp->if_bw.bw > 0) {
+                                               u_int32_t shft;
+
+                                               shft = if_bw_smoothing_val;
+                                               /* Compute EWMA of bw */
+                                               ifp->if_bw.bw = (bw +
+                                                   ((ifp->if_bw.bw << shft) -
+                                                   ifp->if_bw.bw)) >> shft;
+                                       } else {
+                                               ifp->if_bw.bw = bw;
+                                       }
+                               }
+                       }
+                       ifp->if_bw.last_seq = oseq;
+                       ifp->if_bw.last_ts = ots;
+               }
+       }
+
+done:
+       flags = ~(IF_MEASURED_BW_INPROGRESS | IF_MEASURED_BW_CALCULATION);
+       OSBitAndAtomic(flags, &ifp->if_bw.flags);
+}
+
 /****************************************************************************/
-/* ifaddr_t accessors                                                                                                          */
+/* ifaddr_t accessors                                                      */
 /****************************************************************************/
 
 errno_t
@@ -1540,46 +1956,49 @@ ifaddr_ifnet(ifaddr_t ifa)
 }
 
 ifaddr_t
-ifaddr_withaddr(
-       const struct sockaddr* address)
+ifaddr_withaddr(const struct sockaddr *address)
 {
-       if (address == NULL) return NULL;
-       return ifa_ifwithaddr(address);
+       if (address == NULL)
+               return (NULL);
+
+       return (ifa_ifwithaddr(address));
 }
 
 ifaddr_t
-ifaddr_withdstaddr(
-       const struct sockaddr* address)
+ifaddr_withdstaddr(const struct sockaddr *address)
 {
-       if (address == NULL) return NULL;
-       return ifa_ifwithdstaddr(address);
+       if (address == NULL)
+               return (NULL);
+
+       return (ifa_ifwithdstaddr(address));
 }
 
 ifaddr_t
-ifaddr_withnet(
-       const struct sockaddr* net)
+ifaddr_withnet(const struct sockaddr *net)
 {
-       if (net == NULL) return NULL;
-       return ifa_ifwithnet(net);
+       if (net == NULL)
+               return (NULL);
+
+       return (ifa_ifwithnet(net));
 }
 
 ifaddr_t
-ifaddr_withroute(
-       int flags,
-       const struct sockaddr* destination,
-       const struct sockaddr* gateway)
+ifaddr_withroute(int flags, const struct sockaddr *destination,
+    const struct sockaddr *gateway)
 {
-       if (destination == NULL || gateway == NULL) return NULL;
-       return ifa_ifwithroute(flags, destination, gateway);
+       if (destination == NULL || gateway == NULL)
+               return (NULL);
+
+       return (ifa_ifwithroute(flags, destination, gateway));
 }
 
 ifaddr_t
-ifaddr_findbestforaddr(
-       const struct sockaddr *addr,
-       ifnet_t interface)
+ifaddr_findbestforaddr(const struct sockaddr *addr, ifnet_t interface)
 {
-       if (addr == NULL || interface == NULL) return NULL;
-       return ifaof_ifpforaddr(addr, interface);
+       if (addr == NULL || interface == NULL)
+               return (NULL);
+
+       return (ifaof_ifpforaddr(addr, interface));
 }
 
 errno_t
@@ -1646,37 +2065,42 @@ ifmaddr_lladdress(ifmultiaddr_t ifma, struct sockaddr *out_addr,
 ifnet_t
 ifmaddr_ifnet(ifmultiaddr_t ifma)
 {
-       return (ifma == NULL ? NULL : ifma->ifma_ifp);
+       return ((ifma == NULL) ? NULL : ifma->ifma_ifp);
 }
 
 /******************************************************************************/
 /* interface cloner                                                           */
 /******************************************************************************/
 
-errno_t 
-ifnet_clone_attach(struct ifnet_clone_params *cloner_params, if_clone_t *ifcloner)
+errno_t
+ifnet_clone_attach(struct ifnet_clone_params *cloner_params,
+    if_clone_t *ifcloner)
 {
        errno_t error = 0;
        struct if_clone *ifc = NULL;
        size_t namelen;
-       
-       if (cloner_params == NULL || ifcloner == NULL || cloner_params->ifc_name == NULL ||
-               cloner_params->ifc_create == NULL || cloner_params->ifc_destroy == NULL ||
-               (namelen = strlen(cloner_params->ifc_name)) >= IFNAMSIZ) {
+
+       if (cloner_params == NULL || ifcloner == NULL ||
+           cloner_params->ifc_name == NULL ||
+           cloner_params->ifc_create == NULL ||
+           cloner_params->ifc_destroy == NULL ||
+           (namelen = strlen(cloner_params->ifc_name)) >= IFNAMSIZ) {
                error = EINVAL;
                goto fail;
        }
-       
+
        if (if_clone_lookup(cloner_params->ifc_name, NULL) != NULL) {
-               printf("ifnet_clone_attach: already a cloner for %s\n", cloner_params->ifc_name);
+               printf("%s: already a cloner for %s\n", __func__,
+                   cloner_params->ifc_name);
                error = EEXIST;
                goto fail;
        }
 
        /* Make room for name string */
-       ifc = _MALLOC(sizeof(struct if_clone) + IFNAMSIZ + 1, M_CLONE, M_WAITOK | M_ZERO);
+       ifc = _MALLOC(sizeof (struct if_clone) + IFNAMSIZ + 1, M_CLONE,
+           M_WAITOK | M_ZERO);
        if (ifc == NULL) {
-               printf("ifnet_clone_attach: _MALLOC failed\n");
+               printf("%s: _MALLOC failed\n", __func__);
                error = ENOBUFS;
                goto fail;
        }
@@ -1689,41 +2113,97 @@ ifnet_clone_attach(struct ifnet_clone_params *cloner_params, if_clone_t *ifclone
 
        error = if_clone_attach(ifc);
        if (error != 0) {
-               printf("ifnet_clone_attach: if_clone_attach failed %d\n", error);
+               printf("%s: if_clone_attach failed %d\n", __func__, error);
                goto fail;
        }
        *ifcloner = ifc;
-       
-       return 0;
+
+       return (0);
 fail:
        if (ifc != NULL)
                FREE(ifc, M_CLONE);
-       return error;   
+       return (error);
 }
 
-errno_t 
+errno_t
 ifnet_clone_detach(if_clone_t ifcloner)
 {
        errno_t error = 0;
        struct if_clone *ifc = ifcloner;
-       
+
        if (ifc == NULL || ifc->ifc_name == NULL)
-               return EINVAL;
-       
+               return (EINVAL);
+
        if ((if_clone_lookup(ifc->ifc_name, NULL)) == NULL) {
-               printf("ifnet_clone_attach: no cloner for %s\n", ifc->ifc_name);
+               printf("%s: no cloner for %s\n", __func__, ifc->ifc_name);
                error = EINVAL;
                goto fail;
        }
 
        if_clone_detach(ifc);
-       
+
        FREE(ifc, M_CLONE);
 
-       return 0;
 fail:
-       return error;   
+       return (error);
 }
 
+/******************************************************************************/
+/* misc                                                                       */
+/******************************************************************************/
 
+extern void udp_get_ports_used(unsigned int ifindex, uint8_t *bitfield);
+extern void tcp_get_ports_used(unsigned int ifindex, uint8_t *bitfield);
 
+errno_t
+ifnet_get_local_ports(ifnet_t ifp, uint8_t *bitfield)
+{
+       if (bitfield == NULL)
+               return (EINVAL);
+
+       bzero(bitfield, 8192);
+
+       udp_get_ports_used(ifp ? ifp->if_index : 0, bitfield);
+       tcp_get_ports_used(ifp ? ifp->if_index : 0, bitfield);
+
+       return (0);
+}
+
+errno_t
+ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi,
+    int lqm, int npm, u_int8_t srvinfo[48])
+{
+       if (ifp == NULL || sa == NULL || srvinfo == NULL)
+               return(EINVAL);
+       if (sa->sa_len > sizeof(struct sockaddr_storage))
+               return(EINVAL);
+       if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6)
+               return(EINVAL);
+       
+       dlil_node_present(ifp, sa, rssi, lqm, npm, srvinfo);
+       return (0);
+}
+
+errno_t
+ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr* sa)
+{
+       if (ifp == NULL || sa == NULL)
+               return(EINVAL);
+       if (sa->sa_len > sizeof(struct sockaddr_storage))
+               return(EINVAL);
+       if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6)
+               return(EINVAL);
+       
+       dlil_node_absent(ifp, sa);
+       return (0);
+}
+
+errno_t
+ifnet_notice_master_elected(ifnet_t ifp)
+{
+       if (ifp == NULL)
+               return(EINVAL);
+       
+       dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_MASTER_ELECTED, NULL, 0);
+       return (0);
+}
index e2fd084b63cf2ae9b3ddc403385ef4eae2ab4763..d4df862dacf5eac9b259ebed5d96d9daa86ac45a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #ifndef __KPI_INTERFACE__
 #define __KPI_INTERFACE__
+
+#ifndef XNU_KERNEL_PRIVATE
+#include <TargetConditionals.h>
+#endif
+
 #include <sys/kernel_types.h>
 
+#ifdef KERNEL_PRIVATE
+#include <sys/kpi_mbuf.h>
+#endif /* KERNEL_PRIVATE */
+
 #ifndef _SA_FAMILY_T
 #define _SA_FAMILY_T
 typedef __uint8_t              sa_family_t;
 #endif
 
+#ifdef XNU_KERNEL_PRIVATE
+#if CONFIG_EMBEDDED
+       #define KPI_INTERFACE_EMBEDDED 1
+#else
+       #define KPI_INTERFACE_EMBEDDED 0
+#endif
+#else
+#if TARGET_OS_EMBEDDED
+       #define KPI_INTERFACE_EMBEDDED 1
+#else
+       #define KPI_INTERFACE_EMBEDDED 0
+#endif
+#endif
+
 struct timeval;
 struct sockaddr;
 struct sockaddr_dl;
@@ -81,7 +104,7 @@ enum {
        IFNET_FAMILY_DISC               = 8,
        IFNET_FAMILY_MDECAP             = 9,
        IFNET_FAMILY_GIF                = 10,
-       IFNET_FAMILY_FAITH              = 11,
+       IFNET_FAMILY_FAITH              = 11,   /* deprecated */
        IFNET_FAMILY_STF                = 12,
        IFNET_FAMILY_FIREWIRE           = 13,
        IFNET_FAMILY_BOND               = 14,
@@ -310,6 +333,10 @@ typedef void (*ifnet_event_func)(ifnet_t interface, const struct kev_msg *msg);
                protocol's pre-output function.
        @param frame_type The frame type as determined by the protocol's
                pre-output function.
+       @param prepend_len The length of prepended bytes to the mbuf. 
+               (ONLY used if KPI_INTERFACE_EMBEDDED is defined to 1)
+       @param postpend_len The length of the postpended bytes to the mbuf.
+               (ONLY used if KPI_INTERFACE_EMBEDDED is defined to 1)
        @result
                If the result is zero, processing will continue normally.
                If the result is EJUSTRETURN, processing will stop but the
@@ -318,8 +345,11 @@ typedef void (*ifnet_event_func)(ifnet_t interface, const struct kev_msg *msg);
                        the packet will be freed.
  */
 typedef errno_t (*ifnet_framer_func)(ifnet_t interface, mbuf_t *packet,
-    const struct sockaddr *dest, const char *desk_linkaddr,
-    const char *frame_type);
+       const struct sockaddr *dest, const char *desk_linkaddr, const char *frame_type
+#if KPI_INTERFACE_EMBEDDED
+       , u_int32_t *prepend_len, u_int32_t *postpend_len
+#endif /* KPI_INTERFACE_EMBEDDED */
+       );
 
 /*!
        @typedef ifnet_add_proto_func
@@ -560,9 +590,9 @@ struct ifnet_stat_increment_param {
 /*!
        @struct ifnet_init_params
        @discussion This structure is used to define various properties of
-               the interface when calling ifnet_init. A copy of these values
-               will be stored in the ifnet and can not be modified while the
-               interface is attached.
+               the interface when calling ifnet_allocate. A copy of these
+               values will be stored in the ifnet and cannot be modified
+               while the interface is attached.
        @field uniqueid An identifier unique to this instance of the
                interface.
        @field uniqueid_len The length, in bytes, of the uniqueid.
@@ -618,6 +648,245 @@ struct ifnet_init_params {
        u_int32_t               broadcast_len;          /* required for non point-to-point interfaces */
 };
 
+#ifdef KERNEL_PRIVATE
+/* Valid values for version */
+#define        IFNET_INIT_VERSION_2            2
+#define        IFNET_INIT_CURRENT_VERSION      IFNET_INIT_VERSION_2
+
+/* Valid values for flags */
+#define        IFNET_INIT_LEGACY       0x1     /* legacy network interface model */
+#define        IFNET_INIT_INPUT_POLL   0x2     /* opportunistic input polling model */
+
+/*
+       @typedef ifnet_pre_enqueue_func
+       @discussion ifnet_pre_enqueue_func is called for each outgoing packet
+               for the interface. The driver may perform last-minute changes
+               on the (fully formed) packet, but it is responsible for calling
+               ifnet_enqueue() to enqueue the packet upon completion.
+       @param interface The interface being sent on.
+       @param data The packet to be sent.
+ */
+typedef errno_t (*ifnet_pre_enqueue_func)(ifnet_t interface, mbuf_t data);
+
+/*
+       @typedef ifnet_start_func
+       @discussion ifnet_start_func is used to indicate to the driver that
+               one or more packets may be dequeued by calling ifnet_dequeue()
+               or ifnet_dequeue_multi(). This routine gets invoked when
+               ifnet_start() is called; the ifnet_start_func callback will
+               be executed within the context of a dedicated kernel thread,
+               hence it is guaranteed to be single threaded. The driver must
+               employ additional serializations if this callback routine is
+               to be called directly from another context, in order to prevent
+               race condition related issues (e.g. out-of-order packets.)
+               The dequeued packets will be fully formed packets (including
+               frame headers). The packets must be freed by the driver.
+       @param interface The interface being sent on.
+ */
+typedef void (*ifnet_start_func)(ifnet_t interface);
+
+/*
+       @typedef ifnet_input_poll_func
+       @discussion ifnet_input_poll_func is called by the network stack to
+               retrieve one or more packets from the driver which implements
+               the new driver input model.
+       @param interface The interface to retrieve the packets from.
+       @param flags For future use.
+       @param max_count The maximum number of packets to be dequeued.
+       @param first_packet Pointer to the first packet being dequeued.
+       @param last_packet Pointer to the last packet being dequeued.
+       @param cnt Pointer to a storage for the number of packets dequeued.
+       @param len Pointer to a storage for the total length (in bytes)
+               of the dequeued packets.
+ */
+typedef void (*ifnet_input_poll_func)(ifnet_t interface, u_int32_t flags,
+    u_int32_t max_count, mbuf_t *first_packet, mbuf_t *last_packet,
+    u_int32_t *cnt, u_int32_t *len);
+
+/*
+       @enum Interface control commands
+       @abstract Constants defining control commands.
+       @constant IFNET_CTL_SET_INPUT_MODEL Set input model.
+       @constant IFNET_CTL_GET_INPUT_MODEL Get input model.
+ */
+enum {
+       IFNET_CTL_SET_INPUT_MODEL       = 1,
+       IFNET_CTL_GET_INPUT_MODEL       = 2,
+};
+
+/*
+       @typedef ifnet_ctl_cmd_t
+       @abstract Storage type for the interface control command.
+ */
+typedef u_int32_t ifnet_ctl_cmd_t;
+
+/*
+       @enum Interface model sub-commands
+       @abstract Constants defining model sub-commands.
+       @constant IFNET_MODEL_INPUT_POLL_OFF Polling is inactive.  When set,
+               the network stack will no longer invoke the input_poll callback
+               until the next time polling is turned on; the driver should
+               proceed to pushing the packets up to the network stack as in
+               the legacy input model, and if applicable, the driver should
+               also enable receive interrupt for the hardware.  During get,
+               this indicates that the driver is currently operating in
+               the legacy/push input model.
+       @constant IFNET_MODEL_INPUT_POLL_ON Polling is active.  When set, the
+               network stack will begin to invoke the input_poll callback to
+               retrieve packets from the driver until the next time polling
+               is turned off; the driver should no longer be pushing packets
+               up to the network stack, and if applicable, the driver should
+               also disable receive interrupt for the hardware.  During get,
+               this indicates that the driver is currently operating in
+               the new/pull input model.
+ */
+enum {
+       IFNET_MODEL_INPUT_POLL_OFF      = 0,
+       IFNET_MODEL_INPUT_POLL_ON       = 1,
+};
+
+/*
+       @typedef ifnet_model_t
+       @abstract Storage type for the interface model sub-command.
+ */
+typedef u_int32_t ifnet_model_t;
+
+/*
+       @struct ifnet_model_params
+       @discussion This structure is used as parameter to the ifnet model
+               sub-commands.
+       @field model The interface model.
+ */
+struct ifnet_model_params {
+       ifnet_model_t           model;
+       u_int32_t               reserved[3];
+};
+
+/*
+       @typedef ifnet_ctl_func
+       @discussion ifnet_ctl_func is called by the network stack to inform
+               about changes in parameters, or retrieve the parameters
+               related to the output or input processing or capabilities.
+       @param interface The interface.
+       @param cmd The ifnet_ctl_cmd_t interface control command.
+       @param arglen The length of the command argument.
+       @param arg The command argument.
+       @result 0 upon success, otherwise errno error.
+ */
+typedef errno_t (*ifnet_ctl_func)(ifnet_t interface, ifnet_ctl_cmd_t cmd,
+    u_int32_t arglen, void *arg);
+
+/*
+       @struct ifnet_init_eparams
+       @discussion This structure is used to define various properties of
+               the interface when calling ifnet_allocate_extended. A copy of
+               these values will be stored in the ifnet and cannot be modified
+               while the interface is attached.
+       @field ver The current structure version (IFNET_INIT_CURRENT_VERSION)
+       @field len The length of this structure.
+       @field flags See above values for flags.
+       @field uniqueid An identifier unique to this instance of the
+               interface.
+       @field uniqueid_len The length, in bytes, of the uniqueid.
+       @field name The interface name (i.e. en).
+       @field unit The interface unit number (en0's unit number is 0).
+       @field family The interface family.
+       @field type The interface type (see sys/if_types.h). Must be less
+               than 256. For new types, use IFT_OTHER.
+       @field sndq_maxlen The maximum size of the output queue; valid only
+               if IFNET_INIT_LEGACY is not set.
+       @field output The output function for the interface. Every packet the
+               stack attempts to send through this interface will go out
+               through this function.
+       @field pre_enqueue The pre_enqueue function for the interface, valid
+               only if IFNET_INIT_LEGACY is not set, and optional if it is set.
+       @field start The start function for the interface, valid only if
+               IFNET_INIT_LEGACY is not set, and required if it is set.
+       @field output_ctl The output control function for the interface, valid
+               only if IFNET_INIT_LEGACY is not set.
+       @field output_sched_model The IFNET_SCHED_MODEL value for the output
+               queue, as defined in net/if.h
+       @field output_bw The effective output bandwidth (in bits per second.)
+       @field output_bw_max The maximum theoretical output bandwidth
+               (in bits per second.)
+       @field input_poll The poll function for the interface, valid only if
+               IFNET_INIT_LEGACY is not set and only if IFNET_INIT_INPUT_POLL
+               is set.
+       @field input_ctl The input control function for the interface, valid
+               only if IFNET_INIT_LEGACY is not set and only if opportunistic
+               input polling is enabled via IFNET_INIT_INPUT_POLL flag.
+       @field rcvq_maxlen The size of the driver's receive ring or the total
+               count of descriptors used in the receive path; valid only if
+               IFNET_INIT_INPUT_POLL is set.
+       @field input_bw The effective input bandwidth (in bits per second.)
+       @field input_bw_max The maximum theoretical input bandwidth
+               (in bits per second.)
+       @field demux The function used to determine the protocol family of an
+               incoming packet.
+       @field add_proto The function used to attach a protocol to this
+               interface.
+       @field del_proto The function used to remove a protocol from this
+               interface.
+       @field framer The function used to frame outbound packets, may be NULL.
+       @field softc Driver specific storage. This value can be retrieved from
+               the ifnet using the ifnet_softc function.
+       @field ioctl The function used to handle ioctls.
+       @field set_bpf_tap The function used to set the bpf_tap function.
+       @field detach The function called to let the driver know the interface
+               has been detached.
+       @field event The function to notify the interface of various interface
+               specific kernel events.
+       @field broadcast_addr The link-layer broadcast address for this
+               interface.
+       @field broadcast_len The length of the link-layer broadcast address.
+*/
+struct ifnet_init_eparams {
+       u_int32_t               ver;                    /* required */
+       u_int32_t               len;                    /* required */
+       u_int32_t               flags;                  /* optional */
+
+       /* used to match recycled interface */
+       const void              *uniqueid;              /* optional */
+       u_int32_t               uniqueid_len;           /* optional */
+
+       /* used to fill out initial values for interface */
+       const char              *name;                  /* required */
+       u_int32_t               unit;                   /* required */
+       ifnet_family_t          family;                 /* required */
+       u_int32_t               type;                   /* required */
+       u_int32_t               sndq_maxlen;            /* optional, only for new model */
+       ifnet_output_func       output;                 /* required only for legacy model */
+       ifnet_pre_enqueue_func  pre_enqueue;            /* optional, only for new model */
+       ifnet_start_func        start;                  /* required only for new model */
+       ifnet_ctl_func          output_ctl;             /* optional, only for new model */
+       u_int32_t               output_sched_model;     /* optional, only for new model */
+       u_int32_t               reserved;               /* for future use */
+       u_int64_t               output_bw;              /* optional */
+       u_int64_t               output_bw_max;          /* optional */
+       u_int64_t               _reserved[4];           /* for future use */
+       ifnet_input_poll_func   input_poll;             /* optional, ignored for legacy model */
+       ifnet_ctl_func          input_ctl;              /* required for opportunistic polling */
+       u_int32_t               rcvq_maxlen;            /* optional, only for opportunistic polling */
+       u_int32_t               __reserved;             /* for future use */
+       u_int64_t               input_bw;               /* optional */
+       u_int64_t               input_bw_max;           /* optional */
+       u_int64_t               ___reserved[4];         /* for future use */
+       ifnet_demux_func        demux;                  /* required  */
+       ifnet_add_proto_func    add_proto;              /* required  */
+       ifnet_del_proto_func    del_proto;              /* required  */
+       ifnet_check_multi       check_multi;            /* required for non point-to-point interfaces */
+       ifnet_framer_func       framer;                 /* optional */
+       void                    *softc;                 /* optional */
+       ifnet_ioctl_func        ioctl;                  /* optional */
+       ifnet_set_bpf_tap       set_bpf_tap;            /* deprecated */
+       ifnet_detached_func     detach;                 /* optional */
+       ifnet_event_func        event;                  /* optional */
+       const void              *broadcast_addr;        /* required for non point-to-point interfaces */
+       u_int32_t               broadcast_len;          /* required for non point-to-point interfaces */
+       u_int64_t               ____reserved[4];        /* for future use */
+};
+#endif /* KERNEL_PRIVATE */
+
 /*!
        @struct ifnet_stats_param
        @discussion This structure is used get and set the interface
@@ -744,6 +1013,250 @@ __BEGIN_DECLS
 extern errno_t ifnet_allocate(const struct ifnet_init_params *init,
     ifnet_t *interface);
 
+#ifdef KERNEL_PRIVATE
+/*
+       @function ifnet_allocate_extended
+       @discussion An extended/newer variant of ifnet_allocate, with additional
+               support for the new output and input driver models.
+       @param init The initial values for the interface. These values can
+               not be changed after the interface has been allocated.
+       @param interface The interface allocated upon success.
+       @result May return ENOMEM if there is insufficient memory or EEXIST
+               if an interface with the same uniqueid and family has already
+               been allocated and is in use.
+ */
+extern errno_t ifnet_allocate_extended(const struct ifnet_init_eparams *init,
+    ifnet_t *interface);
+
+/*
+       @function ifnet_purge
+       @discussion Purge the output queue of an interface which implements
+               the new driver output model.
+       @param interface The interface to purge.
+ */
+extern void ifnet_purge(ifnet_t interface);
+
+/*
+       @function ifnet_enqueue
+       @discussion Enqueue a packet to the output queue of an interface
+               which implements the new driver output model.
+       @param interface The interface to enqueue the packet to.
+       @param packet The packet being enqueued; only one packet is allowed
+               to be enqueued at a time.
+       @result May return EINVAL if the parameters are invalid; ENXIO if
+               the interface doesn't implement the new driver output model;
+               EQFULL if the output queue is flow-controlled; or EQSUSPENDED
+               if the output queue is suspended.  This routine either frees
+               or consumes the packet; the caller must not modify or free
+               it after calling this routine.  Any attempt to enqueue more
+               than one packet will cause the entire packet chain to be freed.
+ */
+extern errno_t ifnet_enqueue(ifnet_t interface, mbuf_t packet);
+
+/*
+       @function ifnet_dequeue
+       @discussion Dequeue a packet from the output queue of an interface
+               which implements the new driver output model, and that the
+               output scheduling model is set to IFNET_SCHED_MODEL_NORMAL.
+       @param interface The interface to dequeue the packet from.
+       @param packet Pointer to the packet being dequeued.
+       @result May return EINVAL if the parameters are invalid, ENXIO if
+               the interface doesn't implement the new driver output model
+               or the output scheduling model isn't IFNET_SCHED_MODEL_NORMAL,
+               or EAGAIN if there is currently no packet available to
+               be dequeued.
+ */
+extern errno_t ifnet_dequeue(ifnet_t interface, mbuf_t *packet);
+
+/*
+       @function ifnet_dequeue_service_class
+       @discussion Dequeue a packet of a particular service class from the
+               appropriate output queue of an interface which implements the
+               new driver output model, and that the output scheduling model
+               is set to IFNET_SCHED_MODEL_DRIVER_MANAGED.
+       @param interface The interface to dequeue the packet from.
+       @param tc The service class.
+       @param packet Pointer to the packet being dequeued.
+       @result May return EINVAL if the parameters are invalid, ENXIO if
+               the interface doesn't implement the new driver output model
+               or if the output scheduling model isn't configured to
+               IFNET_SCHED_MODEL_DRIVER_MANAGED, or EAGAIN if there
+               is currently no packet available to be dequeued.
+ */
+extern errno_t ifnet_dequeue_service_class(ifnet_t interface,
+    mbuf_svc_class_t tc, mbuf_t *packet);
+
+/*
+       @function ifnet_dequeue_multi
+       @discussion Dequeue one or more packets from the output queue of an
+               interface which implements the new driver output model, and that
+               the output scheduling model is set to IFNET_SCHED_MODEL_NORMAL.
+               The returned packet chain is traversable with mbuf_nextpkt().
+       @param interface The interface to dequeue the packets from.
+       @param first_packet Pointer to the first packet being dequeued.
+       @param last_packet Pointer to the last packet being dequeued.  Caller
+               may supply NULL if not interested in value.
+       @param cnt Pointer to a storage for the number of packets dequeued.
+               Caller may supply NULL if not interested in value.
+       @param len Pointer to a storage for the total length (in bytes)
+               of the dequeued packets.  Caller may supply NULL if not
+               interested in value.
+       @result May return EINVAL if the parameters are invalid, ENXIO if
+               the interface doesn't implement the new driver output model
+               or the output scheduling model isn't IFNET_SCHED_MODEL_NORMAL,
+               or EAGAIN if there is currently no packet available to
+               be dequeued.
+ */
+extern errno_t ifnet_dequeue_multi(ifnet_t interface, u_int32_t max,
+    mbuf_t *first_packet, mbuf_t *last_packet, u_int32_t *cnt, u_int32_t *len);
+
+/*
+       @function ifnet_dequeue_service_class_multi
+       @discussion Dequeue one or more packets of a particular service class
+               from the appropriate output queue of an interface which
+               implements the new driver output model, and that the output
+               scheduling model is set to IFNET_SCHED_MODEL_DRIVER_MANAGED.
+               The returned packet chain is traversable with mbuf_nextpkt().
+       @param interface The interface to dequeue the packets from.
+       @param tc The service class.
+       @param first_packet Pointer to the first packet being dequeued.
+       @param last_packet Pointer to the last packet being dequeued.  Caller
+               may supply NULL if not interested in value.
+       @param cnt Pointer to a storage for the number of packets dequeued.
+               Caller may supply NULL if not interested in value.
+       @param len Pointer to a storage for the total length (in bytes)
+               of the dequeued packets.  Caller may supply NULL if not
+               interested in value.
+       @result May return EINVAL if the parameters are invalid, ENXIO if
+               the interface doesn't implement the new driver output model
+               or if the output scheduling model isn't configured to
+               IFNET_SCHED_MODEL_DRIVER_MANAGED, or EAGAIN if there
+               is currently no packet available to be dequeued.
+ */
+extern errno_t ifnet_dequeue_service_class_multi(ifnet_t interface,
+    mbuf_svc_class_t tc, u_int32_t max, mbuf_t *first_packet,
+    mbuf_t *last_packet, u_int32_t *cnt, u_int32_t *len);
+
+/*
+       @function ifnet_set_output_sched_model
+       @discussion Set the output scheduling model of an interface which
+               implements the new driver output model.
+       @param interface The interface to set scheduling model on.
+       @param model The IFNET_SCHED_MODEL value as defined in net/if.h
+       @result May return EINVAL if the parameters are invalid or ENXIO if
+               the interface doesn't implement the new driver output model.
+ */
+extern errno_t ifnet_set_output_sched_model(ifnet_t interface,
+    u_int32_t model);
+
+/*
+       @function ifnet_set_sndq_maxlen
+       @discussion Set the maximum length of the output queue of an
+               interface which implements the new driver output model.
+               This call may be issued post ifnet_allocate_extended in
+               order to modify the maximum output queue length previously
+               set at registration time.
+       @param interface The interface to set the max queue length on.
+       @param maxqlen The maximum number of packets in the output queue.
+       @result May return EINVAL if the parameters are invalid or ENXIO if
+               the interface doesn't implement the new driver output model.
+ */
+extern errno_t ifnet_set_sndq_maxlen(ifnet_t interface, u_int32_t maxqlen);
+
+/*
+       @function ifnet_get_sndq_maxlen
+       @discussion Get the maximum length of the output queue of an
+               interface which implements the new driver output model.
+       @param interface The interface to get the max queue length on.
+       @param maxqlen Pointer to a storage for the maximum number of packets
+               in the output queue.
+       @result May return EINVAL if the parameters are invalid or ENXIO if
+               the interface doesn't implement the new driver output model.
+ */
+extern errno_t ifnet_get_sndq_maxlen(ifnet_t interface, u_int32_t *maxqlen);
+
+/*
+       @function ifnet_get_sndq_len
+       @discussion Get the current length of the output queue of an
+               interface which implements the new driver output model.
+       @param interface The interface to get the current queue length on.
+       @param qlen Pointer to a storage for the current number of packets
+               in the output queue.
+       @result May return EINVAL if the parameters are invalid or ENXIO if
+               the interface doesn't implement the new driver output model.
+ */
+extern errno_t ifnet_get_sndq_len(ifnet_t interface, u_int32_t *qlen);
+
+/*
+       @function ifnet_set_rcvq_maxlen
+       @discussion Set the maximum length of the input queue of an
+               interface which implements the new driver input model.
+               This call may be issued post ifnet_allocate_extended in
+               order to modify the maximum input queue length previously
+               set at registration time.
+       @param interface The interface to set the max queue length on.
+       @param maxqlen The maximum number of packets in the input queue.
+               Drivers typically set this to the size of the receive ring
+               or the total number of descriptors used for the input path.
+       @result May return EINVAL if the parameters are invalid or ENXIO if
+               the interface doesn't implement the new driver input model.
+ */
+extern errno_t ifnet_set_rcvq_maxlen(ifnet_t interface, u_int32_t maxqlen);
+
+/*
+       @function ifnet_get_rcvq_maxlen
+       @discussion Get the maximum length of the input queue of an
+               interface which implements the new driver input model.
+       @param interface The interface to get the max queue length on.
+       @param maxqlen Pointer to a storage for the maximum number of packets
+               in the input queue.
+       @result May return EINVAL if the parameters are invalid or ENXIO if
+               the interface doesn't implement the new driver input model.
+ */
+extern errno_t ifnet_get_rcvq_maxlen(ifnet_t interface, u_int32_t *maxqlen);
+
+/*
+       @function ifnet_start
+       @discussion Trigger the transmission at the driver layer on an
+               interface which implements the new driver output model.
+       @param interface The interface to start the transmission on.
+ */
+extern void ifnet_start(ifnet_t interface);
+
+/*
+       @function ifnet_transmit_burst_start
+       @discussion Inform the kernel about the beginning of transmission
+               of a burst.  This function should be called when a burst of
+               packets are scheduled to get transmitted over the link. The
+               callback will be used by the system to start measuring
+               bandwidth available on that link.  The driver may choose to
+               adopt this scheme for uplink bandwidth measurement, in case
+               the information can't be obtained from the hardware.  Else
+               it may alternatively inform the network stack about the
+               information using ifnet_set_bandwidths.
+       @param interface The interface.
+       @param mbuf_t The first packet in a burst of packets that has been
+               scheduled to transmit.
+*/
+extern void ifnet_transmit_burst_start(ifnet_t interface, mbuf_t pkt);
+
+/*
+       @function ifnet_transmit_burst_end
+       @discussion Inform the kernel about the end of transmission of a burst.
+               This function should be called when the transmission of a burst
+               of packets is done. This information will be used by the
+               system to estimate bandwidth available on that link.  The
+               driver may choose to adopt this scheme for uplink bandwidth
+               measurement, in case the information can't be obtained from
+               the hardware.  Else it may alternatively inform the network
+               stack about the information using ifnet_set_bandwidths.
+       @param interface The interface.
+       @param mbuf_t The last packet in the burst that has been successfully
+               transmitted.
+*/
+extern void ifnet_transmit_burst_end(ifnet_t interface, mbuf_t pkt);
+#endif /* KERNEL_PRIVATE */
+
 /*!
        @function ifnet_reference
        @discussion Increment the reference count of the ifnet to assure
@@ -903,7 +1416,7 @@ extern u_int16_t ifnet_flags(ifnet_t interface);
 
 
 #ifdef KERNEL_PRIVATE
-/*!
+/*
        @function ifnet_set_eflags
        @discussion Sets the extended interface flags to new_flags. This
                function lets you specify which flags you want to change using
@@ -919,7 +1432,7 @@ extern u_int16_t ifnet_flags(ifnet_t interface);
 extern errno_t ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags,
     u_int32_t mask);
 
-/*!
+/*
        @function ifnet_eflags
        @discussion Returns the extended interface flags that are set.
        @param interface Interface to retrieve the flags from.
@@ -927,7 +1440,7 @@ extern errno_t ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags,
  */
 extern u_int32_t ifnet_eflags(ifnet_t interface);
 
-/*!
+/*
        @function ifnet_set_idle_flags
        @discussion Sets the if_idle_flags to new_flags. This function
                lets you specify which flags you want to change using the
@@ -947,7 +1460,7 @@ extern u_int32_t ifnet_eflags(ifnet_t interface);
 extern errno_t ifnet_set_idle_flags(ifnet_t interface, u_int32_t new_flags,
     u_int32_t mask);
 
-/*!
+/*
        @function ifnet_idle_flags
        @discussion Returns the value of if_idle_flags.
        @param interface Interface to retrieve the flags from.
@@ -955,6 +1468,101 @@ extern errno_t ifnet_set_idle_flags(ifnet_t interface, u_int32_t new_flags,
 */
 extern u_int32_t ifnet_idle_flags(ifnet_t interface);
 
+/*
+       @function ifnet_set_link_quality
+       @discussion Sets the Link Quality Metric for the ifnet.
+       @param interface Interface for which the Link Quality Metric should
+               be associated to.
+       @param quality IFNET_LQM value as defined in net/if.h.
+       @result 0 on success otherwise the errno error.  EINVAL if quality
+               is not a valid value.  ENXIO if the interface is not attached.
+*/
+extern errno_t ifnet_set_link_quality(ifnet_t interface, int quality);
+
+/*
+       @function ifnet_link_quality
+       @discussion Returns the Link Quality Metric for the ifnet.
+       @param interface Interface to retrieve the value from.
+       @result IFNET_LQM as defined in net/if.h
+*/
+extern int ifnet_link_quality(ifnet_t interface);
+
+/*
+       @struct ifnet_llreach_info
+       @discussion This structure is used to describe the link-layer
+               reachability information of an on-link node.
+       @field iflri_refcnt The number of network-layer objects referring
+               to this link-layer reachability record.
+       @field iflri_probes The total number of outstanding probes.
+       @field iflri_snd_expire The send expiration time.  This is calculated
+               based on the last time the system transmitted a packet to the
+               node.  A zero value indicates that a packet has not been sent
+               to the node.  A non-zero value indicates the time before the
+               record is determined to be invalid.  When the record is no
+               longer valid, the system will send probe(s) to resolve the
+               node again.  This value is relative to the current time
+               specified in iflri_curtime.
+       @field iflri_rcv_expire The receive expiriation time.  This is
+               calculated based on the last time the system received a packet
+               from the node.  A zero value indicates that a packet has not
+               been received from the node.  A non-zero value indicates the
+               time before the record is determined to be invalid.  When the
+               record is no longer valid, the system will send probe(s) to
+               resolve the node again.  This value is relative to the current
+               time specified in iflri_curtime.
+       @field iflri_curtime The current time when this record was retrieved.
+       @field iflri_netproto The protocol number of the network-layer object.
+       @field iflri_addr The link-layer address of the node.
+       @field iflri_rssi The received signal strength indication (RSSI) of the
+               node in dBm.  The special value IFNET_RSSI_UNKNOWN is used when
+               the RSSI is either unknown or inapplicable for the interface.
+       @field iflri_lqm The link quality metric (LQM) to the node.  The
+               special value IFNET_LQM_UNKNOWN is used when the LQM is not
+               currently known.  The special value IFNET_LQM_OFF is used when
+               the link quality metric is inapplicable to nodes at this
+               attached to the network at this interface.
+       @field iflri_npm The node proximity metric (NPM) to the node.  The
+               special value IFNET_NPM_UNKNOWN is used when the NPM is not
+               currently known.
+ */
+#define        IFNET_LLREACHINFO_ADDRLEN       64      /* max ll addr len */
+
+struct ifnet_llreach_info {
+       u_int32_t iflri_refcnt;
+       u_int32_t iflri_probes;
+       u_int64_t iflri_snd_expire;
+       u_int64_t iflri_rcv_expire;
+       u_int64_t iflri_curtime;
+       u_int32_t iflri_netproto;
+       u_int8_t  iflri_addr[IFNET_LLREACHINFO_ADDRLEN];
+       int32_t   iflri_rssi;
+       int32_t   iflri_lqm;
+       int32_t   iflri_npm;
+};
+
+/*
+       @function ifnet_inet_defrouter_llreachinfo
+       @discussion Retrieve link-layer reachability information of the
+               default IPv4 router specific to the interface.
+       @param interface The interface associated with the default IPv4 router.
+       @param pinfo Pointer to the ifnet_llreach_info structure where the
+               information will be returned to, upon success.
+       @result 0 upon success, otherwise errno error.
+ */
+extern errno_t ifnet_inet_defrouter_llreachinfo(ifnet_t interface,
+    struct ifnet_llreach_info *pinfo);
+
+/*
+       @function ifnet_inet6_defrouter_llreachinfo
+       @discussion Retrieve link-layer reachability information of the
+               default IPv6 router specific to the interface.
+       @param interface The interface associated with the default IPv6 router.
+       @param pinfo Pointer to the ifnet_llreach_info structure where the
+               information will be returned to, upon success.
+       @result 0 upon success, otherwise errno error.
+ */
+extern errno_t ifnet_inet6_defrouter_llreachinfo(ifnet_t interface,
+    struct ifnet_llreach_info *pinfo);
 #endif /* KERNEL_PRIVATE */
 
 /*!
@@ -1258,6 +1866,28 @@ extern errno_t ifnet_output_raw(ifnet_t interface,
 extern errno_t ifnet_input(ifnet_t interface, mbuf_t first_packet,
     const struct ifnet_stat_increment_param *stats);
 
+#ifdef KERNEL_PRIVATE
+/*
+       @function ifnet_input_extended
+       @discussion Inputs packets from the interface. The interface's demux
+               will be called to determine the protocol. Once the protocol is
+               determined, the interface filters and protocol filters will be
+               called. From there, the packet will be passed to the registered
+               protocol. If there is an error, the mbuf chain will be freed.
+       @param interface The interface.
+       @param first_packet The first packet in a chain of packets.
+       @param last_packet The last packet in a chain of packets.  This may be
+               set to NULL if the driver does not have the information.
+       @param stats Counts to be integrated in to the stats. The interface
+               statistics will be incremented by the amounts specified in
+               stats. Unlike ifnet_input(), this parameter is required by
+               this extended variant.
+       @result 0 on success otherwise the errno error.
+ */
+extern errno_t ifnet_input_extended(ifnet_t interface, mbuf_t first_packet,
+    mbuf_t last_packet, const struct ifnet_stat_increment_param *stats);
+#endif /* KERNEL_PRIVATE */
+
 /*!
        @function ifnet_ioctl
        @discussion Calls the interface's ioctl function with the parameters
@@ -1392,6 +2022,38 @@ extern errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate);
  */
 extern u_int64_t ifnet_baudrate(ifnet_t interface);
 
+#ifdef KERNEL_PRIVATE
+typedef struct if_bandwidths if_bandwidths_t;
+
+/*
+       @function ifnet_set_bandwidths
+       @param interface The interface.
+       @param output_bw The output bandwidth values (in bits per second).
+               May be set to NULL if the caller does not want to alter the
+               existing output bandwidth values.
+       @param input_bw The input bandwidth values (in bits per second).
+               May be set to NULL if the caller does not want to alter the
+               existing input bandwidth values.
+       @result 0 on success otherwise the errno error.
+ */
+extern errno_t ifnet_set_bandwidths(ifnet_t interface,
+    if_bandwidths_t *output_bw, if_bandwidths_t *input_bw);
+
+/*
+       @function ifnet_bandwidths
+       @param interface The interface.
+       @param output_bw The output bandwidth values (in bits per second).
+               May be set to NULL if the caller does not want to retrieve the
+               output bandwidth value.
+       @param input_bw The input bandwidth values (in bits per second).
+               May be set to NULL if the caller does not want to retrieve the
+               input bandwidth value.
+       @result 0 on success otherwise the errno error.
+ */
+extern errno_t ifnet_bandwidths(ifnet_t interface, if_bandwidths_t *output_bw,
+    if_bandwidths_t *input_bw);
+#endif /* KERNEL_PRIVATE */
+
 /*!
        @function ifnet_stat_increment
        @discussion
@@ -2010,6 +2672,71 @@ extern errno_t ifnet_clone_attach(struct ifnet_clone_params *cloner_params, if_c
  */
 extern errno_t ifnet_clone_detach(if_clone_t ifcloner);
 
+/******************************************************************************/
+/* misc                                                                       */
+/******************************************************************************/
+
+/*
+       @function ifnet_get_local_ports
+       @discussion Returns a bitfield indicating which ports have sockets
+               open. An interface that supports waking the host on unicast traffic may
+               use this information to discard incoming unicast packets that don't have
+               a corresponding bit set instead of waking up the host. For port 0x0001,
+               bit 1 of the first byte would be set. For port n, bit 1 << (n % 8) of
+               the (n / 8)'th byte would be set.
+       @param ifp The interface in question.
+       @param bitfield A pointer to 8192 bytes.
+       @result Returns 0 on success.
+ */
+extern errno_t ifnet_get_local_ports(ifnet_t ifp, uint8_t *bitfield);
+/******************************************************************************/
+/* for interfaces that support dynamic node absence/presence events           */
+/******************************************************************************/
+
+/*
+       @function ifnet_notice_node_presence
+       @discussion Provided for network interface drivers to notify the
+               system of a change detected in the presence of the specified
+               node.
+       @param ifp The interface attached to the link where the specified node
+               is present.
+       @param sa The AF_LINK family address of the node whose presence is
+               changing.
+       @param rssi The received signal strength indication as measured in
+               dBm by a radio receiver.
+       @param lqm A link quality metric associated with the specified node.
+       @param npm A node proximity metric associated with the specified node.
+       @param srvinfo A fixed-size array of octets containing opaque service
+               information data used by the mDNS responder subsystem.
+       @result Returns 0 on success, or EINVAL if arguments are invalid.
+ */
+extern errno_t
+ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi,
+    int lqm, int npm, u_int8_t srvinfo[48]);
+
+/*
+       @function ifnet_notice_node_absence
+       @discussion Provided for network interface drivers to notify the
+               system that the absence of the specified node has been detected.
+       @param ifp The interface attached to the link where the absence of the
+               specified node has been detected.
+       @param sa The AF_LINK family address of the node whose absence has been
+               detected.
+       @result Returns 0 on success, or EINVAL if arguments are invalid.
+ */
+extern errno_t ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr* sa);
+
+/*
+       @function ifnet_notice_master_elected
+       @discussion Provided for network interface drivers to notify the system
+               that the nodes with a locally detected presence on the attached
+               link have elected a new master.
+       @param ifp The interface attached to the link where the new master has
+               been elected.
+       @result Returns 0 on success, or EINVAL if arguments are invalid.
+ */
+extern errno_t ifnet_notice_master_elected(ifnet_t ifp);
+
 #endif /* KERNEL_PRIVATE */
 
 __END_DECLS
index 6c3043c94575684f94fdab8b2ed98293a43c38d0..982dc905ab06ca68d1b3021099c6b3948ca55ca0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -44,167 +44,156 @@ typedef int (*attach_t)(struct ifnet *ifp, uint32_t protocol_family);
 typedef int (*detach_t)(struct ifnet *ifp, uint32_t protocol_family);
 
 struct proto_input_entry {
-       struct proto_input_entry                *next;
-       int                                                             detach;
-       struct domain                                   *domain;
-       int                                                             hash;
-       int                                                             chain;
-       
-       protocol_family_t                               protocol;
-       proto_input_handler                             input;
+       struct proto_input_entry        *next;
+       int                             detach;
+       struct domain                   *domain;
+       int                             hash;
+       int                             chain;
+
+       protocol_family_t               protocol;
+       proto_input_handler             input;
        proto_input_detached_handler    detached;
-       
-       mbuf_t                                                  inject_first;
-       mbuf_t                                                  inject_last;
-       
-       struct proto_input_entry                *input_next;
-       mbuf_t                                                  input_first;
-       mbuf_t                                                  input_last;
+
+       mbuf_t                          inject_first;
+       mbuf_t                          inject_last;
+
+       struct proto_input_entry        *input_next;
+       mbuf_t                          input_first;
+       mbuf_t                          input_last;
 };
 
 
 struct proto_family_str {
        TAILQ_ENTRY(proto_family_str)   proto_fam_next;
-       protocol_family_t                               proto_family;
-       ifnet_family_t                                  if_family;
-       proto_plumb_handler                             attach_proto;
-       proto_unplumb_handler                   detach_proto;
+       protocol_family_t               proto_family;
+       ifnet_family_t                  if_family;
+       proto_plumb_handler             attach_proto;
+       proto_unplumb_handler           detach_proto;
 };
 
-#define PROTO_HASH_SLOTS       5
+#define        PROTO_HASH_SLOTS        5
 
-static struct proto_input_entry                        *proto_hash[PROTO_HASH_SLOTS];
-static int                                                             proto_total_waiting = 0;
-static struct proto_input_entry                *proto_input_add_list = NULL;
-static lck_mtx_t                                               *proto_family_mutex = 0;
-static TAILQ_HEAD(, proto_family_str)  proto_family_head =
-                       TAILQ_HEAD_INITIALIZER(proto_family_head);
-
-extern lck_mtx_t       *domain_proto_mtx;
-extern struct dlil_threading_info *dlil_lo_thread_ptr;
+static struct proto_input_entry *proto_hash[PROTO_HASH_SLOTS];
+static int proto_total_waiting = 0;
+static struct proto_input_entry        *proto_input_add_list = NULL;
+decl_lck_mtx_data(static, proto_family_mutex_data);
+static lck_mtx_t *proto_family_mutex = &proto_family_mutex_data;
+static TAILQ_HEAD(, proto_family_str) proto_family_head =
+    TAILQ_HEAD_INITIALIZER(proto_family_head);
 
 static int
-proto_hash_value(
-       protocol_family_t protocol)
+proto_hash_value(protocol_family_t protocol)
 {
-       switch(protocol) {
+       switch (protocol) {
                case PF_INET:
-                       return 0;
+                       return (0);
                case PF_INET6:
-                       return 1;
+                       return (1);
                case PF_APPLETALK:
-                       return 2;
+                       return (2);
                case PF_VLAN:
-                       return 3;
+                       return (3);
        }
-       return 4;
+       return (4);
 }
 
 __private_extern__ void
 proto_kpi_init(void)
 {
-       lck_grp_attr_t  *grp_attrib = 0;
-       lck_attr_t              *lck_attrib = 0;
-       lck_grp_t               *lck_group = 0;
-       
+       lck_grp_attr_t  *grp_attrib = NULL;
+       lck_attr_t      *lck_attrib = NULL;
+       lck_grp_t       *lck_group = NULL;
+
        /* Allocate a mtx lock */
        grp_attrib = lck_grp_attr_alloc_init();
        lck_group = lck_grp_alloc_init("protocol kpi", grp_attrib);
        lck_grp_attr_free(grp_attrib);
        lck_attrib = lck_attr_alloc_init();
-       proto_family_mutex = lck_mtx_alloc_init(lck_group, lck_attrib);
+       lck_mtx_init(proto_family_mutex, lck_group, lck_attrib);
        lck_grp_free(lck_group);
        lck_attr_free(lck_attrib);
-       
-       bzero(proto_hash, sizeof(proto_hash));
+
+       bzero(proto_hash, sizeof (proto_hash));
 }
 
 __private_extern__ errno_t
-proto_register_input(
-       protocol_family_t protocol,
-       proto_input_handler input,
-       proto_input_detached_handler detached,
-       int     chains)
+proto_register_input(protocol_family_t protocol, proto_input_handler input,
+    proto_input_detached_handler detached, int chains)
 {
-       
        struct proto_input_entry *entry;
-       struct dlil_threading_info *thread = dlil_lo_thread_ptr;
-       
-       entry = _MALLOC(sizeof(*entry), M_IFADDR, M_WAITOK);
-       
+       struct dlil_threading_info *inp = dlil_main_input_thread;
+       struct domain *dp = domains;
+       int do_unlock;
+
+       entry = _MALLOC(sizeof (*entry), M_IFADDR, M_WAITOK);
+
        if (entry == NULL)
-               return ENOMEM;
-       
-       bzero(entry, sizeof(*entry));
+               return (ENOMEM);
+
+       bzero(entry, sizeof (*entry));
        entry->protocol = protocol;
        entry->input = input;
        entry->detached = detached;
        entry->hash = proto_hash_value(protocol);
        entry->chain = chains;
-       
-       {
-               struct domain *dp = domains;
-
-               lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-               lck_mtx_lock(domain_proto_mtx);
-               while (dp && (protocol_family_t)dp->dom_family != protocol)
-                       dp = dp->dom_next;
-               entry->domain = dp;
-               lck_mtx_unlock(domain_proto_mtx);       
-       }
 
-       
-       lck_mtx_lock(&thread->input_lck);
+       do_unlock = domain_proto_mtx_lock();
+       while (dp && (protocol_family_t)dp->dom_family != protocol)
+               dp = dp->dom_next;
+       entry->domain = dp;
+       domain_proto_mtx_unlock(do_unlock);
+
+       lck_mtx_lock(&inp->input_lck);
        entry->next = proto_input_add_list;
        proto_input_add_list = entry;
-       
-       thread->input_waiting |= DLIL_PROTO_REGISTER;
-       if ((thread->input_waiting & DLIL_INPUT_RUNNING) == 0)
-               wakeup((caddr_t)&thread->input_waiting);
-       lck_mtx_unlock(&thread->input_lck);
-       
-       return 0;
-}
 
+       inp->input_waiting |= DLIL_PROTO_REGISTER;
+       if ((inp->input_waiting & DLIL_INPUT_RUNNING) == 0)
+               wakeup((caddr_t)&inp->input_waiting);
+       lck_mtx_unlock(&inp->input_lck);
+
+       return (0);
+}
 
 __private_extern__ void
-proto_unregister_input(
-       protocol_family_t       protocol)
+proto_unregister_input(protocol_family_t protocol)
 {
        struct proto_input_entry *entry = NULL;
-       
-       for (entry = proto_hash[proto_hash_value(protocol)]; entry; entry = entry->next)
+
+       for (entry = proto_hash[proto_hash_value(protocol)]; entry != NULL;
+           entry = entry->next) {
                if (entry->protocol == protocol)
                        break;
-       
-       if (entry)
+       }
+
+       if (entry != NULL)
                entry->detach = 1;
 }
 
-
 static void
-proto_delayed_attach(
-       struct proto_input_entry *entry)
+proto_delayed_attach(struct proto_input_entry *entry)
 {
        struct proto_input_entry *next_entry;
-       for (next_entry = entry->next; entry; entry = next_entry) {
+
+       for (next_entry = entry->next; entry != NULL; entry = next_entry) {
                struct proto_input_entry *exist;
                int hash_slot;
-               
+
                hash_slot = proto_hash_value(entry->protocol);
                next_entry = entry->next;
-               
-               for (exist = proto_hash[hash_slot]; exist; exist = exist->next)
+
+               for (exist = proto_hash[hash_slot]; exist != NULL;
+                   exist = exist->next) {
                        if (exist->protocol == entry->protocol)
                                break;
-               
+               }
+
                /* If the entry already exists, call detached and dispose */
-               if (exist) {
+               if (exist != NULL) {
                        if (entry->detached)
                                entry->detached(entry->protocol);
                        FREE(entry, M_IFADDR);
-               }
-               else {
+               } else {
                        entry->next = proto_hash[hash_slot];
                        proto_hash[hash_slot] = entry;
                }
@@ -214,31 +203,32 @@ proto_delayed_attach(
 __private_extern__ void
 proto_input_run(void)
 {
-       struct proto_input_entry        *entry;
-       struct dlil_threading_info *thread = dlil_lo_thread_ptr;
+       struct proto_input_entry *entry;
+       struct dlil_threading_info *inp = dlil_main_input_thread;
        mbuf_t packet_list;
        int i, locked = 0;
 
-       lck_mtx_assert(&thread->input_lck,  LCK_MTX_ASSERT_NOTOWNED);
+       lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_NOTOWNED);
 
-       if ((thread->input_waiting & DLIL_PROTO_REGISTER) != 0) {
-               lck_mtx_lock_spin(&thread->input_lck);
+       if (inp->input_waiting & DLIL_PROTO_REGISTER) {
+               lck_mtx_lock_spin(&inp->input_lck);
                entry = proto_input_add_list;
                proto_input_add_list = NULL;
-               thread->input_waiting &= ~DLIL_PROTO_REGISTER;
-               lck_mtx_unlock(&thread->input_lck);
+               inp->input_waiting &= ~DLIL_PROTO_REGISTER;
+               lck_mtx_unlock(&inp->input_lck);
                proto_delayed_attach(entry);
        }
+
        /*
-         Move everything from the lock protected list to the thread
-         specific list.
+        * Move everything from the lock protected list to the thread
+        * specific list.
         */
        for (i = 0; proto_total_waiting != 0 && i < PROTO_HASH_SLOTS; i++) {
-               for (entry = proto_hash[i]; entry && proto_total_waiting;
-                        entry = entry->next) {
-                       if (entry->inject_first) {
-                               lck_mtx_lock_spin(&thread->input_lck);
-                               thread->input_waiting &= ~DLIL_PROTO_WAITING;
+               for (entry = proto_hash[i];
+                   entry != NULL && proto_total_waiting; entry = entry->next) {
+                       if (entry->inject_first != NULL) {
+                               lck_mtx_lock_spin(&inp->input_lck);
+                               inp->input_waiting &= ~DLIL_PROTO_WAITING;
 
                                packet_list = entry->inject_first;
 
@@ -246,160 +236,156 @@ proto_input_run(void)
                                entry->inject_last = NULL;
                                proto_total_waiting--;
 
-                               lck_mtx_unlock(&thread->input_lck);
+                               lck_mtx_unlock(&inp->input_lck);
 
-                               if (entry->domain && (entry->domain->dom_flags & DOM_REENTRANT) == 0) {
+                               if (entry->domain != NULL && !(entry->domain->
+                                   dom_flags & DOM_REENTRANT)) {
                                        lck_mtx_lock(entry->domain->dom_mtx);
                                        locked = 1;
                                }
-               
+
                                if (entry->chain) {
-                                       entry->input(entry->protocol, packet_list);
-                               }
-                               else {
+                                       entry->input(entry->protocol,
+                                           packet_list);
+                               else {
                                        mbuf_t  packet;
-                               
-                                       for (packet = packet_list; packet; packet = packet_list) {
-                                               packet_list = mbuf_nextpkt(packet);
+
+                                       for (packet = packet_list;
+                                           packet != NULL;
+                                           packet = packet_list) {
+                                               packet_list =
+                                                   mbuf_nextpkt(packet);
                                                mbuf_setnextpkt(packet, NULL);
-                                               entry->input(entry->protocol, packet);
+                                               entry->input(entry->protocol,
+                                                   packet);
                                        }
                                }
                                if (locked) {
                                        locked = 0;
                                        lck_mtx_unlock(entry->domain->dom_mtx);
-                               }       
+                               }
+                       }
                }
        }
-       }
-
 }
 
 errno_t
-proto_input(
-       protocol_family_t       protocol,
-       mbuf_t                          packet_list)
+proto_input(protocol_family_t protocol, mbuf_t packet_list)
 {
-       struct proto_input_entry        *entry;
-       errno_t                         locked =0, result = 0;
+       struct proto_input_entry *entry;
+       errno_t locked = 0, result = 0;
 
-       for (entry = proto_hash[proto_hash_value(protocol)]; entry;
-                entry = entry->next) {
+       for (entry = proto_hash[proto_hash_value(protocol)]; entry != NULL;
+           entry = entry->next) {
                if (entry->protocol == protocol)
                        break;
        }
 
-       if (entry->domain && (entry->domain->dom_flags & DOM_REENTRANT) == 0) {
+       if (entry->domain && !(entry->domain->dom_flags & DOM_REENTRANT)) {
                lck_mtx_lock(entry->domain->dom_mtx);
                locked = 1;
        }
-       
+
        if (entry->chain) {
                entry->input(entry->protocol, packet_list);
-       }
-       else {
+       } else {
                mbuf_t  packet;
-               
-               for (packet = packet_list; packet; packet = packet_list) {
+
+               for (packet = packet_list; packet != NULL;
+                   packet = packet_list) {
                        packet_list = mbuf_nextpkt(packet);
                        mbuf_setnextpkt(packet, NULL);
                        entry->input(entry->protocol, packet);
                }
        }
-       
+
        if (locked) {
                lck_mtx_unlock(entry->domain->dom_mtx);
-       }       
-       return result;
+       }
+       return (result);
 }
 
 errno_t
-proto_inject(
-       protocol_family_t       protocol,
-       mbuf_t                          packet_list)
+proto_inject(protocol_family_t protocol, mbuf_t packet_list)
 {
-       struct proto_input_entry        *entry;
-       mbuf_t                          last_packet;
-       int                             hash_slot = proto_hash_value(protocol);
-       struct dlil_threading_info      *thread = dlil_lo_thread_ptr;
-       
-       for (last_packet = packet_list; mbuf_nextpkt(last_packet);
-                last_packet = mbuf_nextpkt(last_packet))
+       struct proto_input_entry *entry;
+       mbuf_t last_packet;
+       int hash_slot = proto_hash_value(protocol);
+       struct dlil_threading_info *inp = dlil_main_input_thread;
+
+       for (last_packet = packet_list; mbuf_nextpkt(last_packet) != NULL;
+           last_packet = mbuf_nextpkt(last_packet))
                /* find the last packet */;
-       
-       for (entry = proto_hash[hash_slot]; entry; entry = entry->next) {
+
+       for (entry = proto_hash[hash_slot]; entry != NULL;
+           entry = entry->next) {
                if (entry->protocol == protocol)
                        break;
        }
-       
-       if (entry) {
-               lck_mtx_lock(&thread->input_lck);
+
+       if (entry != NULL) {
+               lck_mtx_lock(&inp->input_lck);
                if (entry->inject_first == NULL) {
                        proto_total_waiting++;
-                       thread->input_waiting |= DLIL_PROTO_WAITING;
+                       inp->input_waiting |= DLIL_PROTO_WAITING;
                        entry->inject_first = packet_list;
-               }
-               else {
+               } else {
                        mbuf_setnextpkt(entry->inject_last, packet_list);
                }
                entry->inject_last = last_packet;
-               if ((thread->input_waiting & DLIL_INPUT_RUNNING) == 0) {
-                       wakeup((caddr_t)&thread->input_waiting);
+               if ((inp->input_waiting & DLIL_INPUT_RUNNING) == 0) {
+                       wakeup((caddr_t)&inp->input_waiting);
                }
-               lck_mtx_unlock(&thread->input_lck);
-       }
-       else
-       {
-               return ENOENT;
+               lck_mtx_unlock(&inp->input_lck);
+       } else {
+               return (ENOENT);
        }
 
-       return 0;
+       return (0);
 }
 
-static struct proto_family_str*
-proto_plumber_find(
-       protocol_family_t       proto_family,
-       ifnet_family_t          if_family)
+static struct proto_family_str *
+proto_plumber_find(protocol_family_t proto_family, ifnet_family_t if_family)
 {
        struct proto_family_str  *mod = NULL;
 
        TAILQ_FOREACH(mod, &proto_family_head, proto_fam_next) {
-               if ((mod->proto_family == (proto_family & 0xffff)) 
-                       && (mod->if_family == (if_family & 0xffff))) 
+               if ((mod->proto_family == (proto_family & 0xffff)) &&
+                   (mod->if_family == (if_family & 0xffff)))
                        break;
-               }
+       }
 
-       return mod;
+       return (mod);
 }
 
 errno_t
-proto_register_plumber(
-       protocol_family_t               protocol_family,
-       ifnet_family_t                  interface_family, 
-       proto_plumb_handler             attach,
-       proto_unplumb_handler   detach)
+proto_register_plumber(protocol_family_t protocol_family,
+    ifnet_family_t interface_family, proto_plumb_handler attach,
+    proto_unplumb_handler detach)
 {
        struct proto_family_str *proto_family;
 
-       if (attach == NULL) return EINVAL;
+       if (attach == NULL)
+               return (EINVAL);
 
        lck_mtx_lock(proto_family_mutex);
-       
+
        TAILQ_FOREACH(proto_family, &proto_family_head, proto_fam_next) {
                if (proto_family->proto_family == protocol_family &&
-                       proto_family->if_family == interface_family) {
+                   proto_family->if_family == interface_family) {
                        lck_mtx_unlock(proto_family_mutex);
-                       return EEXIST;
+                       return (EEXIST);
                }
        }
 
-       proto_family = (struct proto_family_str *) _MALLOC(sizeof(struct proto_family_str), M_IFADDR, M_WAITOK);
+       proto_family = (struct proto_family_str *)
+           _MALLOC(sizeof (struct proto_family_str), M_IFADDR, M_WAITOK);
        if (!proto_family) {
                lck_mtx_unlock(proto_family_mutex);
-               return ENOMEM;
+               return (ENOMEM);
        }
 
-       bzero(proto_family, sizeof(struct proto_family_str));
+       bzero(proto_family, sizeof (struct proto_family_str));
        proto_family->proto_family      = protocol_family;
        proto_family->if_family         = interface_family & 0xffff;
        proto_family->attach_proto      = attach;
@@ -407,57 +393,51 @@ proto_register_plumber(
 
        TAILQ_INSERT_TAIL(&proto_family_head, proto_family, proto_fam_next);
        lck_mtx_unlock(proto_family_mutex);
-       return 0;
+       return (0);
 }
 
 void
-proto_unregister_plumber(
-       protocol_family_t       protocol_family,
-       ifnet_family_t          interface_family)
+proto_unregister_plumber(protocol_family_t protocol_family,
+    ifnet_family_t interface_family)
 {
        struct proto_family_str  *proto_family;
 
        lck_mtx_lock(proto_family_mutex);
 
        proto_family = proto_plumber_find(protocol_family, interface_family);
-       if (proto_family == 0) {
+       if (proto_family == NULL) {
                lck_mtx_unlock(proto_family_mutex);
                return;
        }
 
        TAILQ_REMOVE(&proto_family_head, proto_family, proto_fam_next);
        FREE(proto_family, M_IFADDR);
-       
+
        lck_mtx_unlock(proto_family_mutex);
-       return;
 }
 
 __private_extern__ errno_t
-proto_plumb(
-       protocol_family_t       protocol_family,
-       ifnet_t                         ifp)
+proto_plumb(protocol_family_t protocol_family, ifnet_t ifp)
 {
        struct proto_family_str  *proto_family;
        int ret = 0;
 
        lck_mtx_lock(proto_family_mutex);
        proto_family = proto_plumber_find(protocol_family, ifp->if_family);
-       if (proto_family == 0) {
+       if (proto_family == NULL) {
                lck_mtx_unlock(proto_family_mutex);
-               return ENXIO;
+               return (ENXIO);
        }
 
        ret = proto_family->attach_proto(ifp, protocol_family);
 
        lck_mtx_unlock(proto_family_mutex);
-       return ret;
+       return (ret);
 }
 
 
 __private_extern__ errno_t
-proto_unplumb(
-       protocol_family_t       protocol_family,
-       ifnet_t                         ifp)
+proto_unplumb(protocol_family_t protocol_family, ifnet_t ifp)
 {
        struct proto_family_str  *proto_family;
        int ret = 0;
@@ -465,11 +445,11 @@ proto_unplumb(
        lck_mtx_lock(proto_family_mutex);
 
        proto_family = proto_plumber_find(protocol_family, ifp->if_family);
-       if (proto_family && proto_family->detach_proto)
+       if (proto_family != NULL && proto_family->detach_proto)
                proto_family->detach_proto(ifp, protocol_family);
        else
                ret = ifnet_detach_protocol(ifp, protocol_family);
-    
+
        lck_mtx_unlock(proto_family_mutex);
-       return ret;
+       return (ret);
 }
index 73fb8a1ab4fad62513a1bbe98140e5c69ffa3874..04c81c167feac888cfcd15544e438c65ed32eebb 100644 (file)
@@ -43,6 +43,7 @@
 #define        _NET_LACP_H_
 
 #include <sys/types.h>
+#include <string.h>
 
 /**
  ** Link Aggregation Control Protocol (LACP) definitions
@@ -248,6 +249,61 @@ lacp_actor_partner_state_expired(lacp_actor_partner_state state)
     return ((state & LACP_ACTOR_PARTNER_STATE_EXPIRED) != 0);
 }
 
+/*
+ * Function: lacp_uint16_set
+ * Purpose:
+ *   Set a field in a structure that's at least 16 bits to the given
+ *   value, putting it into network byte order
+ */
+static __inline__ void
+lacp_uint16_set(uint8_t * field, uint16_t value)
+{
+    uint16_t tmp_value = htons(value);
+    memcpy((void *)field, (void *)&tmp_value, sizeof(uint16_t));
+    return;
+}
+
+/*
+ * Function: lacp_uint16_get
+ * Purpose:
+ *   Get a field in a structure that's at least 16 bits, converting
+ *   to host byte order.
+ */
+static __inline__ uint16_t
+lacp_uint16_get(const uint8_t * field)
+{
+    uint16_t tmp_field;
+    memcpy((void *)&tmp_field, (void *)field, sizeof(uint16_t));
+    return (ntohs(tmp_field));
+}
+
+/*
+ * Function: lacp_uint32_set
+ * Purpose:
+ *   Set a field in a structure that's at least 32 bits to the given
+ *   value, putting it into network byte order
+ */
+static __inline__ void
+lacp_uint32_set(uint8_t * field, uint32_t value)
+{
+    uint32_t tmp_value = htonl(value);
+    memcpy((void *)field, (void *)&tmp_value, sizeof(uint32_t));
+    return;
+}
+
+/*
+ * Function: lacp_uint32_get
+ * Purpose:
+ *   Get a field in a structure that's at least 32 bits, converting
+ *   to host byte order.
+ */
+static __inline__ uint32_t
+lacp_uint32_get(const uint8_t * field)
+{
+    uint32_t tmp_field;
+    memcpy((void *)&tmp_field, (void *)field, sizeof(uint32_t));
+    return (ntohl(tmp_field));
+}
 
 /*
  * LACP Actor/Partner TLV access functions
@@ -256,57 +312,54 @@ static __inline__ void
 lacp_actor_partner_tlv_set_system_priority(lacp_actor_partner_tlv_ref tlv, 
                                           lacp_system_priority system_priority)
 {
-    *((lacp_system_priority *)tlv->lap_system_priority) 
-       = (lacp_system_priority)htons(system_priority);
+    lacp_uint16_set(tlv->lap_system_priority, system_priority);
     return;
 }
 
 static __inline__ lacp_system_priority
 lacp_actor_partner_tlv_get_system_priority(const lacp_actor_partner_tlv_ref tlv)
 {
-    return ((lacp_system_priority)
-           ntohs(*((u_short *)tlv->lap_system_priority)));
+    return (lacp_system_priority)lacp_uint16_get(tlv->lap_system_priority);
 }
 
 static __inline__ void
 lacp_actor_partner_tlv_set_key(lacp_actor_partner_tlv_ref tlv, lacp_key key)
 {
-    *((lacp_key *)tlv->lap_key) = (lacp_key)htons(key);
+    lacp_uint16_set(tlv->lap_key, key);
     return;
 }
 
 static __inline__ lacp_key
 lacp_actor_partner_tlv_get_key(const lacp_actor_partner_tlv_ref tlv)
 {
-    return ((lacp_key)ntohs(*((u_short *)tlv->lap_key)));
+    return (lacp_key)lacp_uint16_get(tlv->lap_key);
 }
 
 static __inline__ void
 lacp_actor_partner_tlv_set_port_priority(lacp_actor_partner_tlv_ref tlv, 
                                         lacp_port_priority port_priority)
 {
-    *((lacp_port_priority *)tlv->lap_port_priority) 
-       = (lacp_port_priority)htons(port_priority);
+    lacp_uint16_set(tlv->lap_port_priority, port_priority);
     return;
 }
 
 static __inline__ lacp_port_priority
 lacp_actor_partner_tlv_get_port_priority(const lacp_actor_partner_tlv_ref tlv)
 {
-    return ((lacp_port_priority)ntohs(*((u_short *)tlv->lap_port_priority)));
+    return (lacp_port_priority)lacp_uint16_get(tlv->lap_port_priority);
 }
 
 static __inline__ void
 lacp_actor_partner_tlv_set_port(lacp_actor_partner_tlv_ref tlv, lacp_port port)
 {
-    *((lacp_port *)tlv->lap_port) = (lacp_port)htons(port);
+    lacp_uint16_set(tlv->lap_port, port);
     return;
 }
 
 static __inline__ lacp_port
 lacp_actor_partner_tlv_get_port(const lacp_actor_partner_tlv_ref tlv)
 {
-    return ((lacp_port)ntohs(*((u_short *)tlv->lap_port)));
+    return (lacp_port)lacp_uint16_get(tlv->lap_port);
 }
 
 /*
@@ -316,15 +369,14 @@ static __inline__ void
 lacp_collector_tlv_set_max_delay(lacp_collector_tlv_ref tlv, 
                                 lacp_collector_max_delay delay)
 {
-    *((lacp_collector_max_delay *)tlv->lac_max_delay) 
-       = (lacp_collector_max_delay)htons(delay);
+    lacp_uint16_set(tlv->lac_max_delay, delay);
     return;
 }
 
 static __inline__ lacp_collector_max_delay
 lacp_collector_tlv_get_max_delay(const lacp_collector_tlv_ref tlv)
 {
-    return ((lacp_collector_max_delay)ntohs(*((u_short *)tlv->lac_max_delay)));
+    return (lacp_collector_max_delay)lacp_uint16_get(tlv->lac_max_delay);
 }
 
 typedef struct lacpdu_s {
@@ -380,32 +432,28 @@ typedef struct la_marker_pdu_s {
 static __inline__ void
 la_marker_pdu_set_requestor_port(la_marker_pdu_ref lmpdu, lacp_port port)
 {
-    *((lacp_port *)lmpdu->lm_requestor_port) = (lacp_port)htons(port);
+    lacp_uint16_set(lmpdu->lm_requestor_port, port);
     return;
 }
 
 static __inline__ lacp_port
 la_marker_pdu_get_requestor_port(la_marker_pdu_ref lmpdu)
 {
-    return ((lacp_port)ntohs(*((lacp_port *)lmpdu->lm_requestor_port)));
+    return (lacp_port)lacp_uint16_get(lmpdu->lm_requestor_port);
 }
 
 static __inline__ void
 la_marker_pdu_set_requestor_transaction_id(la_marker_pdu_ref lmpdu, 
                                           la_marker_transaction_id xid)
 {
-    *((la_marker_transaction_id *)lmpdu->lm_requestor_transaction_id) 
-       = (la_marker_transaction_id)htonl(xid);
+    lacp_uint32_set(lmpdu->lm_requestor_transaction_id, xid);
     return;
 }
 
 static __inline__ la_marker_transaction_id
 la_marker_pdu_get_requestor_transaction_id(la_marker_pdu_ref lmpdu)
 {
-    la_marker_transaction_id * xid_p;
-
-    xid_p = (la_marker_transaction_id *)lmpdu->lm_requestor_transaction_id;
-    return ((la_marker_transaction_id)ntohl(*xid_p));
+    return (la_marker_transaction_id)lacp_uint32_get(lmpdu->lm_requestor_transaction_id);
 }
 
 static __inline__ void
index 51c2189104915c184162b0b196a7a99c92041485..dc49773edef4ebdef2da1790a274f477e887ad14 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1997-2008, 2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -57,6 +57,7 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/ioctl.h>
+#include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/proc.h>
 
 #include <machine/spl.h>
 
+static unsigned int ndrv_multi_max_count = NDRV_DMUX_MAX_DESCR;
+SYSCTL_UINT(_net, OID_AUTO, ndrv_multi_max_count, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &ndrv_multi_max_count, 0, "Number of allowed multicast addresses per NRDV socket");
+
 static int ndrv_do_detach(struct ndrv_cb *);
 static int ndrv_do_disconnect(struct ndrv_cb *);
 static struct ndrv_cb *ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol_family);
@@ -98,7 +103,6 @@ TAILQ_HEAD(, ndrv_cb)        ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl);
 
 extern struct domain ndrvdomain;
 extern struct protosw ndrvsw;
-extern lck_mtx_t *domain_proto_mtx;
 
 #define NDRV_PROTODEMUX_COUNT  10
 
@@ -594,7 +598,7 @@ ndrv_do_disconnect(struct ndrv_cb *np)
 }
 
 /* Hackery - return a string version of a decimal number */
-static char *
+static void
 sprint_d(u_int n, char *buf, int buflen)
 {      char dbuf[IFNAMSIZ];
        char *cp = dbuf+IFNAMSIZ-1;
@@ -606,7 +610,7 @@ sprint_d(u_int n, char *buf, int buflen)
                 n /= 10;
         } while (n != 0 && buflen > 0);
        strncpy(buf, cp, IFNAMSIZ-buflen);
-        return (cp);
+        return;
 }
 
 /*
@@ -622,7 +626,7 @@ static int name_cmp(struct ifnet *ifp, char *q)
        len = strlen(ifnet_name(ifp));
        strncpy(r, ifnet_name(ifp), IFNAMSIZ);
        r += len;
-       (void)sprint_d(ifnet_unit(ifp), r, IFNAMSIZ-(r-buf));
+       sprint_d(ifnet_unit(ifp), r, IFNAMSIZ-(r-buf));
 #if NDRV_DEBUG
        kprintf("Comparing %s, %s\n", buf, q);
 #endif
@@ -885,10 +889,12 @@ ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt)
     int                                                result;
     
     if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
-        sopt->sopt_level != SOL_NDRVPROTO)
+        sopt->sopt_level != SOL_NDRVPROTO || sopt->sopt_valsize > SOCK_MAXADDRLEN)
         return EINVAL;
     if (np->nd_if == NULL)
         return ENXIO;
+       if (!(np->nd_dlist_cnt < ndrv_multi_max_count))
+               return EPERM;
     
     // Allocate storage
     MALLOC(ndrv_multi, struct ndrv_multiaddr*, sizeof(struct ndrv_multiaddr) -
@@ -918,6 +924,7 @@ ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt)
         // Add to our linked list
         ndrv_multi->next = np->nd_multiaddrs;
         np->nd_multiaddrs = ndrv_multi;
+               np->nd_dlist_cnt++;
     }
     else
     {
@@ -938,7 +945,7 @@ ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt)
     if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
         sopt->sopt_level != SOL_NDRVPROTO)
         return EINVAL;
-    if (np->nd_if == NULL)
+    if (np->nd_if == NULL || np->nd_dlist_cnt == 0)
         return ENXIO;
     
     // Allocate storage
@@ -992,6 +999,8 @@ ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt)
             }
         }
         
+               np->nd_dlist_cnt--;
+               
         // Free the memory
         FREE(ndrv_entry, M_IFADDR);
     }
index 7e9fc970083be34f22ec1c523e3bbabc29fd2df9..a201a2fd70908d55b72ed58b9a94c9201deec2f6 100644 (file)
@@ -169,4 +169,12 @@ struct ndrv_protocol_desc32 {
  * you a second or two.
  */
 
+/* Max number of descriptions allowed by default */
+#define NDRV_DMUX_MAX_DESCR    1024
+/*
+ * sysctl MIB tags at the kern.ipc.nrdv level
+ */
+#define NRDV_MULTICAST_ADDRS_PER_SOCK 1        /* to toggle NDRV_DMUX_MAX_DESCR value */
+
 #endif /* _NET_NDRV_H */
index e12a0e0ef5b4550a39b77b773193248014c94812..c2c2085954c6dc7d19a2b7d2f20bb7b2b7ffba82 100644 (file)
@@ -62,11 +62,12 @@ struct ndrv_cb
        struct sockproto nd_proto;      /* proto family, protocol */
        int nd_descrcnt;                /* # elements in nd_dlist - Obsolete */
        TAILQ_HEAD(dlist, dlil_demux_desc) nd_dlist; /* Descr. list */
+       u_int32_t nd_dlist_cnt; /* Descr. list count */
        struct ifnet *nd_if; /* obsolete, maintained for binary compatibility */
-    u_int32_t  nd_proto_family;
-    u_int32_t  nd_family;
-    struct ndrv_multiaddr*     nd_multiaddrs;
-    short      nd_unit;
+       u_int32_t nd_proto_family;
+       u_int32_t nd_family;
+       struct ndrv_multiaddr* nd_multiaddrs;
+       short nd_unit;
 };
 
 #define        sotondrvcb(so)          ((struct ndrv_cb *)(so)->so_pcb)
index bc28f03c4f9de27779319cd4d64256782f607b7a..e1ed7e907604f77a0361164b92a752d3ae74f156 100644 (file)
@@ -47,7 +47,8 @@
 
 #define        FIRST_NET_STR_ID                                1000
 static SLIST_HEAD(,net_str_id_entry)   net_str_id_list = {NULL};
-static lck_mtx_t                                               *net_str_id_lock = NULL;
+decl_lck_mtx_data(static, net_str_id_lock_data);
+static lck_mtx_t       *net_str_id_lock = &net_str_id_lock_data;
 
 static u_int32_t nsi_kind_next[NSI_MAX_KIND] = { FIRST_NET_STR_ID, FIRST_NET_STR_ID, FIRST_NET_STR_ID };
 static u_int32_t nsi_next_id = FIRST_NET_STR_ID;
@@ -71,7 +72,7 @@ net_str_id_init(void)
        lck_grp_attr_free(grp_attrib);
        lck_attrb = lck_attr_alloc_init();
        
-       net_str_id_lock = lck_mtx_alloc_init(lck_group, lck_attrb);
+       lck_mtx_init(net_str_id_lock, lck_group, lck_attrb);
        
        lck_grp_free(lck_group);
        lck_attr_free(lck_attrb);
index 2c1037c262f556a20c1cc16ca58174c9e7a2d681..7501053e35bad5d9ad99db4b16afc436d28167b8 100644 (file)
@@ -227,6 +227,8 @@ netsrc_ipv6(kern_ctl_ref kctl, uint32_t unit, struct netsrc_req *nrq)
                                nrp.nrp_flags |= NETSRC_IP6_FLAG_TENTATIVE;
                        if (ia->ia6_flags & IN6_IFF_DEPRECATED)
                                nrp.nrp_flags |= NETSRC_IP6_FLAG_DEPRECATED;
+                       if (ia->ia6_flags & IN6_IFF_OPTIMISTIC)
+                               nrp.nrp_flags |= NETSRC_IP6_FLAG_OPTIMISTIC;
                        sin6.sin6_family = AF_INET6;
                        sin6.sin6_len    = sizeof(sin6);
                        memcpy(&sin6.sin6_addr, in6, sizeof(*in6));
index 54ba8d8be27af6e8458dda097fbc217ca1626a58..d93c4a01477bff19937385fc5372bd661724a984 100644 (file)
@@ -53,6 +53,7 @@ struct netsrc_rep {
 #define        NETSRC_IP6_FLAG_TENTATIVE       0x0001
 #define        NETSRC_IP6_FLAG_TEMPORARY       0x0002
 #define        NETSRC_IP6_FLAG_DEPRECATED      0x0004
+#define        NETSRC_IP6_FLAG_OPTIMISTIC      0x0008
        uint16_t nrp_flags;
        uint16_t nrp_label;
        uint16_t nrp_precedence;
index 833b8ca34e753c69eb77618a779e96b277ed0786..eb83ac3c957f4ebdba749fd6b610854376f8f448 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -62,24 +62,62 @@ __private_extern__ int      nstat_collect = 1;
 SYSCTL_INT(_net, OID_AUTO, statistics, CTLFLAG_RW | CTLFLAG_LOCKED,
     &nstat_collect, 0, "Collect detailed statistics");
 
+enum
+{
+       NSTAT_FLAG_CLEANUP              = (0x1 << 0),
+       NSTAT_FLAG_REQCOUNTS    = (0x1 << 1)
+};
+
 typedef struct nstat_control_state
 {
-       struct nstat_control_state      *next;
-       u_int32_t                                       watching;
+       struct nstat_control_state      *ncs_next;
+       u_int32_t                                       ncs_watching;
        decl_lck_mtx_data(, mtx);
-       kern_ctl_ref                            kctl;
-       u_int32_t                                       unit;
-       nstat_src_ref_t                         next_srcref;
-       struct nstat_src                        *srcs;
-       int                                                     cleanup;
-       int                                                     suser;
+       kern_ctl_ref                            ncs_kctl;
+       u_int32_t                                       ncs_unit;
+       nstat_src_ref_t                         ncs_next_srcref;
+       struct nstat_src                        *ncs_srcs;
+       u_int32_t                                       ncs_flags;
 } nstat_control_state;
 
+typedef struct nstat_provider
+{
+       struct nstat_provider   *next;
+       nstat_provider_id_t             nstat_provider_id;
+       size_t                                  nstat_descriptor_length;
+       errno_t                                 (*nstat_lookup)(const void *data, u_int32_t length, nstat_provider_cookie_t *out_cookie);
+       int                                             (*nstat_gone)(nstat_provider_cookie_t cookie);
+       errno_t                                 (*nstat_counts)(nstat_provider_cookie_t cookie, struct nstat_counts *out_counts, int *out_gone);
+       errno_t                                 (*nstat_watcher_add)(nstat_control_state *state);
+       void                                    (*nstat_watcher_remove)(nstat_control_state *state);
+       errno_t                                 (*nstat_copy_descriptor)(nstat_provider_cookie_t cookie, void *data, u_int32_t len);
+       void                                    (*nstat_release)(nstat_provider_cookie_t cookie, boolean_t locked);
+} nstat_provider;
+
+
+typedef struct nstat_src
+{
+       struct nstat_src                *next;
+       nstat_src_ref_t                 srcref;
+       nstat_provider                  *provider;
+       nstat_provider_cookie_t cookie;
+} nstat_src;
+
+static errno_t         nstat_control_send_counts(nstat_control_state *,
+                           nstat_src *, unsigned long long, int *); 
+static int             nstat_control_send_description(nstat_control_state *state, nstat_src *src, u_int64_t context);
+static errno_t         nstat_control_send_removed(nstat_control_state *, nstat_src *);
+static void            nstat_control_cleanup_source(nstat_control_state *state, nstat_src *src,
+                               boolean_t);
+
+static u_int32_t       nstat_udp_watchers = 0;
+static u_int32_t       nstat_tcp_watchers = 0;
+
 static void nstat_control_register(void);
 
 static volatile OSMallocTag    nstat_malloc_tag = NULL;
 static nstat_control_state     *nstat_controls = NULL;
-static uint64_t                                nstat_idle_time = 0ULL;
+static uint64_t                                nstat_idle_time = 0;
 static decl_lck_mtx_data(, nstat_mtx);
 
 static void
@@ -94,7 +132,7 @@ nstat_copy_sa_out(
        if (src->sa_family == AF_INET6 &&
                src->sa_len >= sizeof(struct sockaddr_in6))
        {
-               struct sockaddr_in6     *sin6 = (struct sockaddr_in6*)dst;
+               struct sockaddr_in6     *sin6 = (struct sockaddr_in6*)(void *)dst;
                if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr))
                {
                        if (sin6->sin6_scope_id == 0)
@@ -143,20 +181,6 @@ nstat_ip6_to_sockaddr(
 
 #pragma mark -- Network Statistic Providers --
 
-typedef struct nstat_provider
-{
-       struct nstat_provider   *next;
-       nstat_provider_id_t             nstat_provider_id;
-       size_t                                  nstat_descriptor_length;
-       errno_t                                 (*nstat_lookup)(const void *data, u_int32_t length, nstat_provider_cookie_t *out_cookie);
-       int                                             (*nstat_gone)(nstat_provider_cookie_t cookie);
-       errno_t                                 (*nstat_counts)(nstat_provider_cookie_t cookie, struct nstat_counts *out_counts, int *out_gone);
-       errno_t                                 (*nstat_watcher_add)(nstat_control_state *state);
-       void                                    (*nstat_watcher_remove)(nstat_control_state *state);
-       errno_t                                 (*nstat_copy_descriptor)(nstat_provider_cookie_t cookie, void *data, u_int32_t len);
-       void                                    (*nstat_release)(nstat_provider_cookie_t cookie);
-} nstat_provider;
-
 static errno_t nstat_control_source_add(u_int64_t context, nstat_control_state *state, nstat_provider *provider, nstat_provider_cookie_t cookie);
 struct nstat_provider  *nstat_providers = NULL;
 
@@ -186,7 +210,6 @@ nstat_lookup_entry(
        *out_provider = nstat_find_provider_by_id(id);
        if (*out_provider == NULL)
        {
-               printf("%s:%d: provider %u not found\n", __FUNCTION__, __LINE__, id);
                return ENOENT;
        }
        
@@ -197,7 +220,7 @@ static void nstat_init_route_provider(void);
 static void nstat_init_tcp_provider(void);
 static void nstat_init_udp_provider(void);
 
-static void
+__private_extern__ void
 nstat_init(void)
 {
        if (nstat_malloc_tag != NULL) return;
@@ -241,7 +264,7 @@ nstat_malloc_aligned(
        u_int8_t        *aligned = buffer + sizeof(*hdr);
        aligned = (u_int8_t*)P2ROUNDUP(aligned, alignment);
        
-       hdr = (struct align_header*)(aligned - sizeof(*hdr));
+       hdr = (struct align_header*)(void *)(aligned - sizeof(*hdr));
        hdr->offset = aligned - buffer;
        hdr->length = size;
        
@@ -253,7 +276,7 @@ nstat_free_aligned(
        void            *buffer,
        OSMallocTag     tag)
 {
-       struct align_header *hdr = (struct align_header*)((u_int8_t*)buffer - sizeof(*hdr));
+       struct align_header *hdr = (struct align_header*)(void *)((u_int8_t*)buffer - sizeof(*hdr));
        OSFree(((char*)buffer) - hdr->offset, hdr->length, tag);
 }
 
@@ -280,7 +303,6 @@ nstat_route_lookup(
        
        if (length < sizeof(*param))
        {
-               printf("%s:%d: expected %lu byte param, received %u\n", __FUNCTION__, __LINE__, sizeof(*param), length);
                return EINVAL;
        }
        
@@ -288,16 +310,13 @@ nstat_route_lookup(
                param->dst.v4.sin_family > AF_MAX ||
                (param->mask.v4.sin_family != 0 && param->mask.v4.sin_family != param->dst.v4.sin_family))
        {
-               printf("%s:%d invalid family (dst=%d, mask=%d)\n", __FUNCTION__, __LINE__,
-                       param->dst.v4.sin_family, param->mask.v4.sin_family);
                return EINVAL;
        }
        
        if (param->dst.v4.sin_len > sizeof(param->dst) ||
                (param->mask.v4.sin_family && param->mask.v4.sin_len > sizeof(param->mask.v4.sin_len)))
        {
-               printf("%s:%d invalid length (dst=%d, mask=%d)\n", __FUNCTION__, __LINE__,
-                       param->dst.v4.sin_len, param->mask.v4.sin_len);
+               return EINVAL;
        }
        
        // TBD: Need to validate length of sockaddr for different families?
@@ -360,7 +379,8 @@ nstat_route_counts(
 
 static void
 nstat_route_release(
-       nstat_provider_cookie_t cookie)
+       nstat_provider_cookie_t cookie,
+       __unused int locked)
 {
        rtfree((struct rtentry*)cookie);
 }
@@ -421,7 +441,6 @@ nstat_route_add_watcher(
                result = rnh->rnh_walktree(rnh, nstat_route_walktree_add, state);
                if (result != 0)
                {
-                       printf("%s:%d rnh_walktree failed: %d\n", __FUNCTION__, __LINE__, result);
                        break;
                }
        }
@@ -441,9 +460,9 @@ nstat_route_new_entry(
        if ((rt->rt_flags & RTF_UP) != 0)
        {
                nstat_control_state     *state;
-               for (state = nstat_controls; state; state = state->next)
+               for (state = nstat_controls; state; state = state->ncs_next)
                {
-                       if ((state->watching & (1 << NSTAT_PROVIDER_ROUTE)) != 0)
+                       if ((state->ncs_watching & (1 << NSTAT_PROVIDER_ROUTE)) != 0)
                        {
                                // this client is watching routes
                                // acquire a reference for the route
@@ -474,7 +493,6 @@ nstat_route_copy_descriptor(
        nstat_route_descriptor  *desc = (nstat_route_descriptor*)data;
        if (len < sizeof(*desc))
        {
-               printf("%s:%d invalid length, wanted %lu, got %d\n", __FUNCTION__, __LINE__, sizeof(*desc), len);
                return EINVAL;
        }
        bzero(desc, sizeof(*desc));
@@ -710,6 +728,7 @@ nstat_route_rtt(
        }
 }
 
+
 #pragma mark -- TCP Provider --
 
 static nstat_provider  nstat_tcp_provider;
@@ -725,7 +744,6 @@ nstat_tcpudp_lookup(
        const nstat_tcp_add_param       *param = (const nstat_tcp_add_param*)data;
        if (length < sizeof(*param))
        {
-               printf("%s:%d expected %lu byte param, received %u\n", __FUNCTION__, __LINE__, sizeof(*param), length);
                return EINVAL;
        }
        
@@ -733,8 +751,6 @@ nstat_tcpudp_lookup(
        if (param->remote.v4.sin_family != 0 &&
                param->remote.v4.sin_family != param->local.v4.sin_family)
        {
-               printf("%s:%d src family (%d) and dst family (%d) don't match\n",
-                       __FUNCTION__, __LINE__, param->local.v4.sin_family, param->remote.v4.sin_family);
                return EINVAL;
        }
        
@@ -748,9 +764,6 @@ nstat_tcpudp_lookup(
                                (param->remote.v4.sin_family != 0 &&
                                 param->remote.v4.sin_len != sizeof(param->remote.v4)))
                        {
-                               printf("%s:%d invalid length for v4 src (%d) or dst (%d), should be %lu\n",
-                                       __FUNCTION__, __LINE__, param->local.v4.sin_len, param->remote.v4.sin_len,
-                                       sizeof(param->remote.v4));
                                return EINVAL;
                        }
                        
@@ -772,9 +785,6 @@ nstat_tcpudp_lookup(
                                (param->remote.v6.sin6_family != 0 &&
                                 param->remote.v6.sin6_len != sizeof(param->remote.v6)))
                        {
-                               printf("%s:%d invalid length for v6 src (%d) or dst (%d), should be %lu\n",
-                                       __FUNCTION__, __LINE__, param->local.v6.sin6_len, param->remote.v6.sin6_len,
-                                       sizeof(param->remote.v6));
                                return EINVAL;
                        }
                        
@@ -788,7 +798,6 @@ nstat_tcpudp_lookup(
 #endif
                
                default:
-                       printf("%s:%d unsupported address family %d\n", __FUNCTION__, __LINE__, param->local.v4.sin_family);
                        return EINVAL;
        }
        
@@ -836,35 +845,33 @@ nstat_tcp_counts(
                *out_gone = 1;
        }
        
-       if (tp->t_state > TCPS_LISTEN)
-       {
-               atomic_get_64(out_counts->nstat_rxpackets, &inp->inp_stat->rxpackets);
-               atomic_get_64(out_counts->nstat_rxbytes, &inp->inp_stat->rxbytes);
-               atomic_get_64(out_counts->nstat_txpackets, &inp->inp_stat->txpackets);
-               atomic_get_64(out_counts->nstat_txbytes, &inp->inp_stat->txbytes);
-               out_counts->nstat_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
-               out_counts->nstat_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
-               out_counts->nstat_txretransmit = tp->t_stat.txretransmitbytes;
-               out_counts->nstat_connectattempts = tp->t_state >= TCPS_SYN_SENT ? 1 : 0;
-               out_counts->nstat_connectsuccesses = tp->t_state >= TCPS_ESTABLISHED ? 1 : 0;
-               out_counts->nstat_avg_rtt = tp->t_srtt;
-               out_counts->nstat_min_rtt = tp->t_rttbest;
-               out_counts->nstat_var_rtt = tp->t_rttvar;
-       }
+       atomic_get_64(out_counts->nstat_rxpackets, &inp->inp_stat->rxpackets);
+       atomic_get_64(out_counts->nstat_rxbytes, &inp->inp_stat->rxbytes);
+       atomic_get_64(out_counts->nstat_txpackets, &inp->inp_stat->txpackets);
+       atomic_get_64(out_counts->nstat_txbytes, &inp->inp_stat->txbytes);
+       out_counts->nstat_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
+       out_counts->nstat_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
+       out_counts->nstat_txretransmit = tp->t_stat.txretransmitbytes;
+       out_counts->nstat_connectattempts = tp->t_state >= TCPS_SYN_SENT ? 1 : 0;
+       out_counts->nstat_connectsuccesses = tp->t_state >= TCPS_ESTABLISHED ? 1 : 0;
+       out_counts->nstat_avg_rtt = tp->t_srtt;
+       out_counts->nstat_min_rtt = tp->t_rttbest;
+       out_counts->nstat_var_rtt = tp->t_rttvar;
+       if (out_counts->nstat_avg_rtt < out_counts->nstat_min_rtt)
+               out_counts->nstat_min_rtt = out_counts->nstat_avg_rtt;
        
        return 0;
 }
 
 static void
 nstat_tcp_release(
-       nstat_provider_cookie_t cookie)
+       nstat_provider_cookie_t cookie,
+       int locked)
 {
        struct inpcb *inp = (struct inpcb*)cookie;
-       in_pcb_checkstate(inp, WNT_RELEASE, 0);
+       in_pcb_checkstate(inp, WNT_RELEASE, locked);
 }
 
-static u_int32_t       nstat_tcp_watchers = 0;
-
 static errno_t
 nstat_tcp_add_watcher(
        nstat_control_state     *state)
@@ -908,9 +915,9 @@ nstat_tcp_new_pcb(
        
        lck_mtx_lock(&nstat_mtx);
        nstat_control_state     *state;
-       for (state = nstat_controls; state; state = state->next)
+       for (state = nstat_controls; state; state = state->ncs_next)
        {
-               if ((state->watching & (1 << NSTAT_PROVIDER_TCP)) != 0)
+               if ((state->ncs_watching & (1 << NSTAT_PROVIDER_TCP)) != 0)
                {
                        // this client is watching tcp
                        // acquire a reference for it
@@ -928,6 +935,54 @@ nstat_tcp_new_pcb(
        lck_mtx_unlock(&nstat_mtx);
 }
 
+__private_extern__ void
+nstat_pcb_detach(struct inpcb *inp)
+{
+       nstat_control_state *state;
+       nstat_src *src, *prevsrc;
+       nstat_src *dead_list = NULL;
+
+       if (inp == NULL || (nstat_tcp_watchers == 0 && nstat_udp_watchers == 0))
+               return;
+
+       lck_mtx_lock(&nstat_mtx);
+       for (state = nstat_controls; state; state = state->ncs_next) {
+               lck_mtx_lock(&state->mtx);
+               for (prevsrc = NULL, src = state->ncs_srcs; src;
+                   prevsrc = src, src = src->next)
+                       if (src->cookie == inp)
+                               break;
+
+               if (src) {
+                       // send one last counts notification
+                       nstat_control_send_counts(state, src, 0, NULL);
+
+                       // send a last description
+                       nstat_control_send_description(state, src, 0);
+
+                       // send the source removed notification
+                       nstat_control_send_removed(state, src);
+
+                       if (prevsrc)
+                               prevsrc->next = src->next;
+                       else
+                               state->ncs_srcs = src->next;
+
+                       src->next = dead_list;
+                       dead_list = src;
+               }
+               lck_mtx_unlock(&state->mtx);
+       }
+       lck_mtx_unlock(&nstat_mtx);
+
+       while (dead_list) {
+               src = dead_list;
+               dead_list = src->next;
+
+               nstat_control_cleanup_source(NULL, src, TRUE);
+       }
+}
+
 static errno_t
 nstat_tcp_copy_descriptor(
        nstat_provider_cookie_t cookie,
@@ -936,13 +991,15 @@ nstat_tcp_copy_descriptor(
 {
        if (len < sizeof(nstat_tcp_descriptor))
        {
-               printf("%s:%d invalid length, wanted %lu, got %d\n", __FUNCTION__, __LINE__, sizeof(nstat_tcp_descriptor), len);
                return EINVAL;
        }
        
        nstat_tcp_descriptor    *desc = (nstat_tcp_descriptor*)data;
        struct inpcb                    *inp = (struct inpcb*)cookie;
        struct tcpcb                    *tp = intotcpcb(inp);
+
+       if (inp->inp_state == INPCB_STATE_DEAD)
+               return EINVAL;
        
        bzero(desc, sizeof(*desc));
        
@@ -962,8 +1019,8 @@ nstat_tcp_copy_descriptor(
        }
        
        desc->state = intotcpcb(inp)->t_state;
-       if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->rt_ifp)
-               desc->ifindex = inp->inp_route.ro_rt->rt_ifp->if_index;
+       desc->ifindex = (inp->inp_last_outifp == NULL) ? 0 :
+           inp->inp_last_outifp->if_index;
        
        // danger - not locked, values could be bogus
        desc->txunacked = tp->snd_max - tp->snd_una;
@@ -977,6 +1034,7 @@ nstat_tcp_copy_descriptor(
                // they're in sync?
                desc->upid = so->last_upid;
                desc->pid = so->last_pid;
+               desc->traffic_class = so->so_traffic_class;
                
                proc_name(desc->pid, desc->pname, sizeof(desc->pname));
                desc->pname[sizeof(desc->pname) - 1] = 0;
@@ -1054,14 +1112,13 @@ nstat_udp_counts(
 
 static void
 nstat_udp_release(
-       nstat_provider_cookie_t cookie)
+       nstat_provider_cookie_t cookie,
+       int locked)
 {
        struct inpcb *inp = (struct inpcb*)cookie;
-       in_pcb_checkstate(inp, WNT_RELEASE, 0);
+       in_pcb_checkstate(inp, WNT_RELEASE, locked);
 }
 
-static u_int32_t       nstat_udp_watchers = 0;
-
 static errno_t
 nstat_udp_add_watcher(
        nstat_control_state     *state)
@@ -1105,9 +1162,9 @@ nstat_udp_new_pcb(
        
        lck_mtx_lock(&nstat_mtx);
        nstat_control_state     *state;
-       for (state = nstat_controls; state; state = state->next)
+       for (state = nstat_controls; state; state = state->ncs_next)
        {
-               if ((state->watching & (1 << NSTAT_PROVIDER_UDP)) != 0)
+               if ((state->ncs_watching & (1 << NSTAT_PROVIDER_UDP)) != 0)
                {
                        // this client is watching tcp
                        // acquire a reference for it
@@ -1133,13 +1190,15 @@ nstat_udp_copy_descriptor(
 {
        if (len < sizeof(nstat_udp_descriptor))
        {
-               printf("%s:%d invalid length, wanted %lu, got %d\n", __FUNCTION__, __LINE__, sizeof(nstat_tcp_descriptor), len);
                return EINVAL;
        }
        
        nstat_udp_descriptor    *desc = (nstat_udp_descriptor*)data;
        struct inpcb                    *inp = (struct inpcb*)cookie;
        
+       if (inp->inp_state == INPCB_STATE_DEAD)
+               return EINVAL;
+
        bzero(desc, sizeof(*desc));
        
        if (inp->inp_vflag & INP_IPV6)
@@ -1157,9 +1216,9 @@ nstat_udp_copy_descriptor(
                        &desc->remote.v4, sizeof(desc->remote));
        }
        
-       if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->rt_ifp)
-               desc->ifindex = inp->inp_route.ro_rt->rt_ifp->if_index;
-       
+       desc->ifindex = (inp->inp_last_outifp == NULL) ? 0 :
+           inp->inp_last_outifp->if_index;
+               
        struct socket *so = inp->inp_socket;
        if (so)
        {
@@ -1170,6 +1229,7 @@ nstat_udp_copy_descriptor(
                
                desc->rcvbufsize = so->so_rcv.sb_hiwat;
                desc->rcvbufused = so->so_rcv.sb_cc;
+               desc->traffic_class = so->so_traffic_class;
                
                proc_name(desc->pid, desc->pname, sizeof(desc->pname));
                desc->pname[sizeof(desc->pname) - 1] = 0;
@@ -1197,22 +1257,12 @@ nstat_init_udp_provider(void)
 
 #pragma mark -- Kernel Control Socket --
 
-typedef struct nstat_src
-{
-       struct nstat_src                *next;
-       nstat_src_ref_t                 srcref;
-       nstat_provider                  *provider;
-       nstat_provider_cookie_t cookie;
-} nstat_src;
-
 static kern_ctl_ref    nstat_ctlref = NULL;
 static lck_grp_t       *nstat_lck_grp = NULL;
 
 static errno_t nstat_control_connect(kern_ctl_ref kctl, struct sockaddr_ctl *sac, void **uinfo);
 static errno_t nstat_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void *uinfo);
 static errno_t nstat_control_send(kern_ctl_ref kctl, u_int32_t unit, void *uinfo, mbuf_t m, int flags);
-static int             nstat_control_send_description(nstat_control_state *state, nstat_src *src, u_int64_t context);
-static void            nstat_control_cleanup_source(nstat_control_state *state, struct nstat_src *src);
 
 
 static void*
@@ -1222,43 +1272,47 @@ nstat_idle_check(
 {
        lck_mtx_lock(&nstat_mtx);
        
-       nstat_idle_time = 0ULL;
+       nstat_idle_time = 0;
        
        nstat_control_state *control;
        nstat_src       *dead = NULL;
        nstat_src       *dead_list = NULL;
-       for (control = nstat_controls; control; control = control->next)
+       for (control = nstat_controls; control; control = control->ncs_next)
        {
                lck_mtx_lock(&control->mtx);
-               nstat_src       **srcpp = &control->srcs;
+               nstat_src       **srcpp = &control->ncs_srcs;
                
-               while(*srcpp != NULL)
+               if (!(control->ncs_flags & NSTAT_FLAG_REQCOUNTS))
                {
-                       if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie))
+                       while(*srcpp != NULL)
                        {
-                               // Pull it off the list
-                               dead = *srcpp;
-                               *srcpp = (*srcpp)->next;
-                               
-                               // send a last description
-                               nstat_control_send_description(control, dead, 0ULL);
-                               
-                               // send the source removed notification
-                               nstat_msg_src_removed   removed;
-                               removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED;
-                               removed.hdr.context = 0;
-                               removed.srcref = dead->srcref;
-                               (void)ctl_enqueuedata(control->kctl, control->unit, &removed, sizeof(removed), CTL_DATA_EOR);
-                               
-                               // Put this on the list to release later
-                               dead->next = dead_list;
-                               dead_list = dead;
-                       }
-                       else
-                       {
-                               srcpp = &(*srcpp)->next;
+                               if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie))
+                               {
+                                       // Pull it off the list
+                                       dead = *srcpp;
+                                       *srcpp = (*srcpp)->next;
+                                       
+                                       // send one last counts notification
+                                       nstat_control_send_counts(control, dead,
+                                           0, NULL);
+                                               
+                                       // send a last description
+                                       nstat_control_send_description(control, dead, 0);
+                                       
+                                       // send the source removed notification
+                                       nstat_control_send_removed(control, dead);
+                                       
+                                       // Put this on the list to release later
+                                       dead->next = dead_list;
+                                       dead_list = dead;
+                               }
+                               else
+                               {
+                                       srcpp = &(*srcpp)->next;
+                               }
                        }
                }
+               control->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS;
                lck_mtx_unlock(&control->mtx);
        }
        
@@ -1276,7 +1330,7 @@ nstat_idle_check(
                dead = dead_list;
                dead_list = dead->next;
                
-               nstat_control_cleanup_source(NULL, dead);
+               nstat_control_cleanup_source(NULL, dead, FALSE);
        }
        
        return NULL;
@@ -1301,27 +1355,20 @@ nstat_control_register(void)
        nstat_control.ctl_disconnect = nstat_control_disconnect;
        nstat_control.ctl_send = nstat_control_send;
        
-       errno_t result = ctl_register(&nstat_control, &nstat_ctlref);
-       if (result != 0)
-               printf("%s:%d ctl_register failed: %d", __FUNCTION__, __LINE__, result);
+       ctl_register(&nstat_control, &nstat_ctlref);
 }
 
 static void
 nstat_control_cleanup_source(
        nstat_control_state     *state,
-       struct nstat_src        *src)
+       struct nstat_src        *src,
+       boolean_t               locked)
 {
        if (state)
-       {
-               nstat_msg_src_removed   removed;
-               removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED;
-               removed.hdr.context = 0;
-               removed.srcref = src->srcref;
-               (void)ctl_enqueuedata(state->kctl, state->unit, &removed, sizeof(removed), CTL_DATA_EOR);
-       }
+               nstat_control_send_removed(state, src);
        
        // Cleanup the source if we found it.
-       src->provider->nstat_release(src->cookie);
+       src->provider->nstat_release(src->cookie, locked);
        OSFree(src, sizeof(*src), nstat_malloc_tag);
 }
 
@@ -1336,20 +1383,16 @@ nstat_control_connect(
        
        bzero(state, sizeof(*state));
        lck_mtx_init(&state->mtx, nstat_lck_grp, NULL);
-       state->kctl = kctl;
-       state->unit = sac->sc_unit;
+       state->ncs_kctl = kctl;
+       state->ncs_unit = sac->sc_unit;
+       state->ncs_flags = NSTAT_FLAG_REQCOUNTS;
        *uinfo = state;
        
-       // check if we're super user
-       proc_t  pself = proc_self();
-       state->suser = proc_suser(pself) == 0;
-       proc_rele(pself);
-       
        lck_mtx_lock(&nstat_mtx);
-       state->next = nstat_controls;
+       state->ncs_next = nstat_controls;
        nstat_controls = state;
        
-       if (nstat_idle_time == 0ULL)
+       if (nstat_idle_time == 0)
        {
                clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time);
                thread_call_func_delayed((thread_call_func_t)nstat_idle_check, NULL, nstat_idle_time);
@@ -1372,11 +1415,11 @@ nstat_control_disconnect(
        // pull it out of the global list of states
        lck_mtx_lock(&nstat_mtx);
        nstat_control_state     **statepp;
-       for (statepp = &nstat_controls; *statepp; statepp = &(*statepp)->next)
+       for (statepp = &nstat_controls; *statepp; statepp = &(*statepp)->ncs_next)
        {
                if (*statepp == state)
                {
-                       *statepp = state->next;
+                       *statepp = state->ncs_next;
                        break;
                }
        }
@@ -1385,8 +1428,8 @@ nstat_control_disconnect(
        lck_mtx_lock(&state->mtx);
        // Stop watching for sources
        nstat_provider  *provider;
-       watching = state->watching;
-       state->watching = 0;
+       watching = state->ncs_watching;
+       state->ncs_watching = 0;
        for (provider = nstat_providers; provider && watching;  provider = provider->next)
        {
                if ((watching & (1 << provider->nstat_provider_id)) != 0)
@@ -1397,11 +1440,11 @@ nstat_control_disconnect(
        }
        
        // set cleanup flags
-       state->cleanup = TRUE;
+       state->ncs_flags |= NSTAT_FLAG_CLEANUP;
        
        // Copy out the list of sources
-       nstat_src       *srcs = state->srcs;
-       state->srcs = NULL;
+       nstat_src       *srcs = state->ncs_srcs;
+       state->ncs_srcs = NULL;
        lck_mtx_unlock(&state->mtx);
        
        while (srcs)
@@ -1413,7 +1456,7 @@ nstat_control_disconnect(
                srcs = src->next;
                
                // clean it up
-               nstat_control_cleanup_source(NULL, src);
+               nstat_control_cleanup_source(NULL, src, FALSE);
        }
        
        OSFree(state, sizeof(*state), nstat_malloc_tag);
@@ -1430,26 +1473,51 @@ nstat_control_next_src_ref(
        
        for (i = 0; i < 1000 && toReturn == NSTAT_SRC_REF_INVALID; i++)
        {
-               if (state->next_srcref == NSTAT_SRC_REF_INVALID ||
-                       state->next_srcref == NSTAT_SRC_REF_ALL)
+               if (state->ncs_next_srcref == NSTAT_SRC_REF_INVALID ||
+                       state->ncs_next_srcref == NSTAT_SRC_REF_ALL)
                {
-                       state->next_srcref = 1;
+                       state->ncs_next_srcref = 1;
                }
                
                nstat_src       *src;
-               for (src = state->srcs; src; src = src->next)
+               for (src = state->ncs_srcs; src; src = src->next)
                {
-                       if (src->srcref == state->next_srcref)
+                       if (src->srcref == state->ncs_next_srcref)
                                break;
                }
                
-               if (src == NULL) toReturn = state->next_srcref;
-               state->next_srcref++;
+               if (src == NULL) toReturn = state->ncs_next_srcref;
+               state->ncs_next_srcref++;
        }
        
        return toReturn;
 }
 
+static errno_t
+nstat_control_send_counts(
+       nstat_control_state     *state,
+       nstat_src               *src,
+       unsigned long long      context,
+       int *gone)
+{      
+       nstat_msg_src_counts counts;
+       int localgone = 0;
+       errno_t result = 0;
+
+       counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS;
+       counts.hdr.context = context;
+       counts.srcref = src->srcref;
+       bzero(&counts.counts, sizeof(counts.counts));
+       if (src->provider->nstat_counts(src->cookie, &counts.counts,
+           &localgone) == 0) {
+               result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &counts,
+                   sizeof(counts), CTL_DATA_EOR);
+       }
+       if (gone)
+               *gone = localgone;
+       return result;
+}
+
 static int
 nstat_control_send_description(
        nstat_control_state     *state,
@@ -1460,8 +1528,6 @@ nstat_control_send_description(
        if (src->provider->nstat_descriptor_length == 0 ||
                src->provider->nstat_copy_descriptor == NULL)
        {
-               lck_mtx_unlock(&state->mtx);
-               printf("%s:%d - provider doesn't support descriptions\n", __FUNCTION__, __LINE__);
                return EOPNOTSUPP;
        }
        
@@ -1471,8 +1537,6 @@ nstat_control_send_description(
        u_int32_t               size = offsetof(nstat_msg_src_description, data) + src->provider->nstat_descriptor_length;
        if (mbuf_allocpacket(MBUF_WAITOK, size, &one, &msg) != 0)
        {
-               lck_mtx_unlock(&state->mtx);
-               printf("%s:%d - failed to allocate response\n", __FUNCTION__, __LINE__);
                return ENOMEM;
        }
        
@@ -1486,7 +1550,6 @@ nstat_control_send_description(
        if (result != 0)
        {
                mbuf_freem(msg);
-               printf("%s:%d - provider failed to copy descriptor %d\n", __FUNCTION__, __LINE__, result);
                return result;
        }
        
@@ -1495,16 +1558,32 @@ nstat_control_send_description(
        desc->srcref = src->srcref;
        desc->provider = src->provider->nstat_provider_id;
        
-       result = ctl_enqueuembuf(state->kctl, state->unit, msg, CTL_DATA_EOR);
+       result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR);
        if (result != 0)
        {
-               printf("%s:%d ctl_enqueuembuf returned error %d\n", __FUNCTION__, __LINE__, result);
                mbuf_freem(msg);
        }
        
        return result;
 }
 
+static errno_t
+nstat_control_send_removed(
+       nstat_control_state     *state,
+       nstat_src               *src)
+{
+       nstat_msg_src_removed removed;
+       errno_t result;
+
+       removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED;
+       removed.hdr.context = 0;
+       removed.srcref = src->srcref;
+       result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &removed,
+           sizeof(removed), CTL_DATA_EOR);
+
+       return result;
+}
+
 static errno_t
 nstat_control_handle_add_request(
        nstat_control_state     *state,
@@ -1515,8 +1594,6 @@ nstat_control_handle_add_request(
        // Verify the header fits in the first mbuf
        if (mbuf_len(m) < offsetof(nstat_msg_add_src_req, param))
        {
-               printf("mbuf_len(m)=%lu, offsetof(nstat_msg_add_src_req*, param)=%lu\n",
-                       mbuf_len(m), offsetof(nstat_msg_add_src_req, param));
                return EINVAL;
        }
        
@@ -1524,7 +1601,6 @@ nstat_control_handle_add_request(
        int32_t paramlength = mbuf_pkthdr_len(m) - offsetof(nstat_msg_add_src_req, param);
        if (paramlength < 0 || paramlength > 2 * 1024)
        {
-               printf("invalid paramlength=%d\n", paramlength);
                return EINVAL;
        }
        
@@ -1554,33 +1630,11 @@ nstat_control_handle_add_request(
        
        result = nstat_control_source_add(req->hdr.context, state, provider, cookie);
        if (result != 0)
-               provider->nstat_release(cookie);
+               provider->nstat_release(cookie, 0);
        
        return result;
 }
 
-static int
-nstat_perm_check(
-       __unused nstat_control_state    *state)
-{
-       int allow = 0;
-#if !REQUIRE_ROOT_FOR_STATS
-       allow = 1;
-#else
-       // If the socket was created by a priv process, allow
-       if (state->suser) return 1;
-       
-       // If the current process is priv, allow
-       proc_t  self = proc_self();
-       allow = proc_suser(self) == 0;
-       proc_rele(self);
-       
-       // TBD: check for entitlement, root check is too coarse
-#endif /* REQUIRE_ROOT_FOR_STATS */
-       
-       return allow;
-}
-
 static errno_t
 nstat_control_handle_add_all(
        nstat_control_state     *state,
@@ -1588,16 +1642,9 @@ nstat_control_handle_add_all(
 {
        errno_t result = 0;
        
-       if (!nstat_perm_check(state))
-       {
-               return EPERM;
-       }
-       
        // Verify the header fits in the first mbuf
        if (mbuf_len(m) < sizeof(nstat_msg_add_all_srcs))
        {
-               printf("mbuf_len(m)=%lu, sizeof(nstat_msg_add_all_srcs)=%lu\n",
-                       mbuf_len(m), sizeof(nstat_msg_add_all_srcs));
                return EINVAL;
        }
        
@@ -1609,9 +1656,9 @@ nstat_control_handle_add_all(
        
        // Make sure we don't add the provider twice
        lck_mtx_lock(&state->mtx);
-       if ((state->watching & (1 << provider->nstat_provider_id)) != 0)
+       if ((state->ncs_watching & (1 << provider->nstat_provider_id)) != 0)
                result = EALREADY;
-       state->watching |= (1 << provider->nstat_provider_id);
+       state->ncs_watching |= (1 << provider->nstat_provider_id);
        lck_mtx_unlock(&state->mtx);
        if (result != 0) return result;
        
@@ -1619,7 +1666,7 @@ nstat_control_handle_add_all(
        if (result != 0)
        {
                lck_mtx_lock(&state->mtx);
-               state->watching &= ~(1 << provider->nstat_provider_id);
+               state->ncs_watching &= ~(1 << provider->nstat_provider_id);
                lck_mtx_unlock(&state->mtx);
        }
        
@@ -1630,8 +1677,7 @@ nstat_control_handle_add_all(
                success.context = req->hdr.context;
                success.type = NSTAT_MSG_TYPE_SUCCESS;
                success.pad = 0;
-               if (ctl_enqueuedata(state->kctl, state->unit, &success, sizeof(success), CTL_DATA_EOR) != 0)
-                       printf("%s:%d - failed to enqueue success message\n", __FUNCTION__, __LINE__);
+               ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, sizeof(success), CTL_DATA_EOR);
        }
        
        return result;
@@ -1671,7 +1717,7 @@ nstat_control_source_add(
        lck_mtx_lock(&state->mtx);
        
        add->srcref = src->srcref = nstat_control_next_src_ref(state);
-       if (state->cleanup || src->srcref == NSTAT_SRC_REF_INVALID)
+       if (state->ncs_flags & NSTAT_FLAG_CLEANUP || src->srcref == NSTAT_SRC_REF_INVALID)
        {
                lck_mtx_unlock(&state->mtx);
                OSFree(src, sizeof(*src), nstat_malloc_tag);
@@ -1682,23 +1728,22 @@ nstat_control_source_add(
        src->cookie = cookie;
        
        // send the source added message
-       errno_t result = ctl_enqueuembuf(state->kctl, state->unit, msg, CTL_DATA_EOR);
+       errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR);
        if (result != 0)
        {
                lck_mtx_unlock(&state->mtx);
-               printf("%s:%d ctl_enqueuembuf failed: %d\n", __FUNCTION__, __LINE__, result);
                OSFree(src, sizeof(*src), nstat_malloc_tag);
                mbuf_freem(msg);
                return result;
        }
        
        // Put the      source in the list
-       src->next = state->srcs;
-       state->srcs = src;
+       src->next = state->ncs_srcs;
+       state->ncs_srcs = src;
        
        // send the description message
        // not useful as the source is often not complete
-//     nstat_control_send_description(state, src, 0ULL);
+//     nstat_control_send_description(state, src, 0);
        
        lck_mtx_unlock(&state->mtx);
        
@@ -1714,7 +1759,6 @@ nstat_control_handle_remove_request(
        
        if (mbuf_copydata(m, offsetof(nstat_msg_rem_src_req, srcref), sizeof(srcref), &srcref) != 0)
        {
-               printf("%s:%d - invalid length %u, expected %lu\n", __FUNCTION__, __LINE__, (u_int32_t)mbuf_pkthdr_len(m), sizeof(nstat_msg_rem_src_req));
                return EINVAL;
        }
        
@@ -1723,7 +1767,7 @@ nstat_control_handle_remove_request(
        // Remove this source as we look for it
        nstat_src       **nextp;
        nstat_src       *src = NULL;
-       for (nextp = &state->srcs; *nextp; nextp = &(*nextp)->next)
+       for (nextp = &state->ncs_srcs; *nextp; nextp = &(*nextp)->next)
        {
                if ((*nextp)->srcref == srcref)
                {
@@ -1735,7 +1779,7 @@ nstat_control_handle_remove_request(
        
        lck_mtx_unlock(&state->mtx);
        
-       if (src) nstat_control_cleanup_source(state, src);
+       if (src) nstat_control_cleanup_source(state, src, FALSE);
        
        return src ? 0 : ENOENT;
 }
@@ -1758,12 +1802,13 @@ nstat_control_handle_query_request(
        nstat_msg_query_src_req req;
        if (mbuf_copydata(m, 0, sizeof(req), &req) != 0)
        {
-               printf("%s:%d - invalid length %u, expected %lu\n", __FUNCTION__, __LINE__, (u_int32_t)mbuf_pkthdr_len(m), sizeof(req));
                return EINVAL;
        }
        
        lck_mtx_lock(&state->mtx);
-       nstat_src       **srcpp = &state->srcs;
+       if (req.srcref == NSTAT_SRC_REF_ALL)
+               state->ncs_flags |= NSTAT_FLAG_REQCOUNTS;
+       nstat_src       **srcpp = &state->ncs_srcs;
        while (*srcpp != NULL)
        {
                int     gone;
@@ -1772,26 +1817,20 @@ nstat_control_handle_query_request(
                if (req.srcref == NSTAT_SRC_REF_ALL ||
                        (*srcpp)->srcref == req.srcref)
                {
-                       nstat_msg_src_counts    counts;
-                       counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS;
-                       counts.hdr.context = req.hdr.context;
-                       counts.srcref = (*srcpp)->srcref;
-                       bzero(&counts.counts, sizeof(counts.counts));
-                       result = (*srcpp)->provider->nstat_counts((*srcpp)->cookie, &counts.counts, &gone);
+                       result = nstat_control_send_counts(state, *srcpp,
+                           req.hdr.context, &gone);
                        
-                       if (result == 0)
-                       {
-                               result = ctl_enqueuedata(state->kctl, state->unit, &counts, sizeof(counts), CTL_DATA_EOR);
-                       }
-                       else
-                       {
-                               printf("%s:%d provider->nstat_counts failed: %d\n", __FUNCTION__, __LINE__, result);
-                       }
+                       // If the counts message failed to enqueue then we should clear our flag so
+                       // that a client doesn't miss anything on idle cleanup.
+                       if (result != 0)
+                               state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS;
                        
                        if (gone)
                        {
                                // send one last descriptor message so client may see last state
-                               nstat_control_send_description(state, *srcpp, 0ULL);
+
+                               nstat_control_send_description(state, *srcpp,
+                                   0);
                                
                                // pull src out of the list
                                nstat_src       *src = *srcpp;
@@ -1818,7 +1857,7 @@ nstat_control_handle_query_request(
                dead_srcs = src->next;
                
                // release src and send notification
-               nstat_control_cleanup_source(state, src);
+               nstat_control_cleanup_source(state, src, FALSE);
        }
        
        if (req.srcref == NSTAT_SRC_REF_ALL)
@@ -1827,8 +1866,7 @@ nstat_control_handle_query_request(
                success.context = req.hdr.context;
                success.type = NSTAT_MSG_TYPE_SUCCESS;
                success.pad = 0;
-               if (ctl_enqueuedata(state->kctl, state->unit, &success, sizeof(success), CTL_DATA_EOR) != 0)
-                       printf("%s:%d - failed to enqueue success message\n", __FUNCTION__, __LINE__);
+               ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, sizeof(success), CTL_DATA_EOR);
                result = 0;
        }
        
@@ -1843,14 +1881,13 @@ nstat_control_handle_get_src_description(
        nstat_msg_get_src_description   req;
        if (mbuf_copydata(m, 0, sizeof(req), &req) != 0)
        {
-               printf("%s:%d - invalid length %u, expected %lu\n", __FUNCTION__, __LINE__, (u_int32_t)mbuf_pkthdr_len(m), sizeof(req));
                return EINVAL;
        }
        
        // Find the source
        lck_mtx_lock(&state->mtx);
        nstat_src       *src;
-       for (src = state->srcs; src; src = src->next)
+       for (src = state->ncs_srcs; src; src = src->next)
        {
                if (src->srcref == req.srcref)
                        break;
@@ -1860,7 +1897,6 @@ nstat_control_handle_get_src_description(
        if (!src)
        {
                lck_mtx_unlock(&state->mtx);
-               printf("%s:%d - no matching source\n", __FUNCTION__, __LINE__);
                return ENOENT;
        }
        
@@ -1886,8 +1922,6 @@ nstat_control_send(
        if (mbuf_pkthdr_len(m) < sizeof(hdr))
        {
                // Is this the right thing to do?
-               printf("%s:%d - message too short, was %ld expected %lu\n", __FUNCTION__, __LINE__,
-                       mbuf_pkthdr_len(m), sizeof(*hdr));
                mbuf_freem(m);
                return EINVAL;
        }
@@ -1925,7 +1959,6 @@ nstat_control_send(
                        break;
                
                default:
-                       printf("%s:%d - unknown message type %d\n", __FUNCTION__, __LINE__, hdr->type);
                        result = EINVAL;
                        break;
        }
index 4bbb3dc1bde451cf311334d84764727e8f4fa62a..4696d89ff91d0548f4b2e57c4f686fe922fafed1 100644 (file)
@@ -121,6 +121,7 @@ typedef struct nstat_tcp_descriptor
        u_int32_t       txunacked;
        u_int32_t       txwindow;
        u_int32_t       txcwindow;
+       u_int32_t       traffic_class;
        
        u_int64_t       upid;
        u_int32_t       pid;
@@ -147,6 +148,7 @@ typedef struct nstat_udp_descriptor
        
        u_int32_t       rcvbufsize;
        u_int32_t       rcvbufused;
+       u_int32_t       traffic_class;
        
        u_int64_t       upid;
        u_int32_t       pid;
@@ -191,7 +193,7 @@ typedef struct nstat_route_descriptor
 
 enum
 {
-       // generice respnse messages
+       // generic response messages
        NSTAT_MSG_TYPE_SUCCESS                  = 0
        ,NSTAT_MSG_TYPE_ERROR                   = 1
        
@@ -315,6 +317,8 @@ enum
 // indicates whether or not collection of statistics is enabled
 extern int     nstat_collect;
 
+void nstat_init(void);
+
 // Route collection routines
 void nstat_route_connect_attempt(struct rtentry *rte);
 void nstat_route_connect_success(struct rtentry *rte);
@@ -328,6 +332,7 @@ struct inpcb;
 void nstat_tcp_new_pcb(struct inpcb *inp);
 void nstat_udp_new_pcb(struct inpcb *inp);
 void nstat_route_new_entry(struct rtentry *rt);
+void nstat_pcb_detach(struct inpcb *inp);
 
 // locked_add_64 uses atomic operations on 32bit so the 64bit
 // value can be properly read. The values are only ever incremented
index 0597ffd4dd1322594e9938a729fdf88bebcb7289..b13db985e4fa6cf1b83a39de53b60b991553298f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <netinet/icmp_var.h>
 #include <net/if_ether.h>
 #include <net/ethernet.h>
-
+#include <net/flowhash.h>
 #include <net/pfvar.h>
 #include <net/if_pflog.h>
 
 #include <netinet6/nd6.h>
 #endif /* INET6 */
 
-#ifndef NO_APPLE_EXTENSIONS
-#define DPFPRINTF(n, x)        (pf_status.debug >= (n) ? printf x : ((void)0))
-#else
-#define DPFPRINTF(n, x)        if (pf_status.debug >= (n)) printf x
-#endif
+#if DUMMYNET
+#include <netinet/ip_dummynet.h>
+#endif /* DUMMYNET */
 
-/* XXX: should be in header somewhere */
-#define        satosin(sa)     ((struct sockaddr_in *)(sa))
-#define sintosa(sin)   ((struct sockaddr *)(sin))
+#define DPFPRINTF(n, x)        (pf_status.debug >= (n) ? printf x : ((void)0))
 
 /*
  * On Mac OS X, the rtableid value is treated as the interface scope
 /*
  * Global variables
  */
-lck_mtx_t *pf_lock;
-lck_rw_t *pf_perim_lock;
+decl_lck_mtx_data(,pf_lock_data);
+decl_lck_rw_data(,pf_perim_lock_data);
+lck_mtx_t *pf_lock = &pf_lock_data;
+lck_rw_t *pf_perim_lock = &pf_perim_lock_data;
 
 /* state tables */
 struct pf_state_tree_lan_ext    pf_statetbl_lan_ext;
@@ -155,14 +153,14 @@ struct pf_state_tree_ext_gwy       pf_statetbl_ext_gwy;
 struct pf_palist        pf_pabuf;
 struct pf_status        pf_status;
 
-#if ALTQ
+#if PF_ALTQ
 struct pf_altqqueue     pf_altqs[2];
 struct pf_altqqueue    *pf_altqs_active;
 struct pf_altqqueue    *pf_altqs_inactive;
 u_int32_t               ticket_altqs_active;
 u_int32_t               ticket_altqs_inactive;
 int                     altqs_inactive_open;
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
 u_int32_t               ticket_pabuf;
 
 static MD5_CTX          pf_tcp_secret_ctx;
@@ -179,11 +177,10 @@ static struct pf_anchor_stackframe {
 
 struct pool             pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
 struct pool             pf_state_pl, pf_state_key_pl;
-#if ALTQ
+#if PF_ALTQ
 struct pool             pf_altq_pl;
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
 
-#ifndef NO_APPLE_EXTENSIONS
 typedef void (*hook_fn_t)(void *);
 
 struct hook_desc {
@@ -204,7 +201,6 @@ struct pool          pf_app_state_pl;
 static void             pf_print_addr(struct pf_addr *addr, sa_family_t af);
 static void             pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
                            u_int8_t);
-#endif
 
 static void             pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
 
@@ -233,7 +229,6 @@ static void          pf_send_tcp(const struct pf_rule *, sa_family_t,
                            u_int16_t, struct ether_header *, struct ifnet *);
 static void             pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
                            sa_family_t, struct pf_rule *);
-#ifndef NO_APPLE_EXTENSIONS
 static struct pf_rule  *pf_match_translation(struct pf_pdesc *, struct mbuf *,
                            int, int, struct pfi_kif *, struct pf_addr *,
                            union pf_state_xport *, struct pf_addr *,
@@ -244,17 +239,6 @@ static struct pf_rule      *pf_get_translation_aux(struct pf_pdesc *,
                            union pf_state_xport *, struct pf_addr *,
                            union pf_state_xport *, struct pf_addr *,
                            union pf_state_xport *);
-#else
-struct pf_rule         *pf_match_translation(struct pf_pdesc *, struct mbuf *,
-                           int, int, struct pfi_kif *,
-                           struct pf_addr *, u_int16_t, struct pf_addr *,
-                           u_int16_t, int);
-struct pf_rule         *pf_get_translation(struct pf_pdesc *, struct mbuf *,
-                           int, int, struct pfi_kif *, struct pf_src_node **,
-                           struct pf_addr *, u_int16_t,
-                           struct pf_addr *, u_int16_t,
-                           struct pf_addr *, u_int16_t *);
-#endif
 static void             pf_attach_state(struct pf_state_key *,
                            struct pf_state *, int);
 static void             pf_detach_state(struct pf_state *, int);
@@ -263,6 +247,11 @@ static int          pf_test_rule(struct pf_rule **, struct pf_state **,
                            int, struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, struct pf_rule **,
                            struct pf_ruleset **, struct ifqueue *);
+#if DUMMYNET
+static int              pf_test_dummynet(struct pf_rule **, int, 
+                           struct pfi_kif *, struct mbuf **, 
+                           struct pf_pdesc *, struct ip_fw_args *);
+#endif /* DUMMYNET */
 static int              pf_test_fragment(struct pf_rule **, int,
                            struct pfi_kif *, struct mbuf *, void *,
                            struct pf_pdesc *, struct pf_rule **,
@@ -270,15 +259,9 @@ static int          pf_test_fragment(struct pf_rule **, int,
 static int              pf_test_state_tcp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, u_short *);
-#ifndef NO_APPLE_EXTENSIONS
 static int              pf_test_state_udp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, u_short *);
-#else
-static int              pf_test_state_udp(struct pf_state **, int,
-                           struct pfi_kif *, struct mbuf *, int,
-                           void *, struct pf_pdesc *);
-#endif
 static int              pf_test_state_icmp(struct pf_state **, int,
                            struct pfi_kif *, struct mbuf *, int,
                            void *, struct pf_pdesc *, u_short *);
@@ -291,18 +274,11 @@ static void                pf_hash(struct pf_addr *, struct pf_addr *,
 static int              pf_map_addr(u_int8_t, struct pf_rule *,
                            struct pf_addr *, struct pf_addr *,
                            struct pf_addr *, struct pf_src_node **);
-#ifndef NO_APPLE_EXTENSIONS
 static int              pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
                            struct pf_rule *, struct pf_addr *,
                            union pf_state_xport *, struct pf_addr *,
                            union pf_state_xport *, struct pf_addr *,
                            union pf_state_xport *, struct pf_src_node **);
-#else
-int                     pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
-                           struct pf_addr *, struct pf_addr *, u_int16_t,
-                           struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t,
-                           struct pf_src_node **);
-#endif
 static void             pf_route(struct mbuf **, struct pf_rule *, int,
                            struct ifnet *, struct pf_state *,
                            struct pf_pdesc *);
@@ -330,7 +306,6 @@ static void          pf_stateins_err(const char *, struct pf_state *,
                            struct pfi_kif *);
 static int              pf_check_congestion(struct ifqueue *);
 
-#ifndef NO_APPLE_EXTENSIONS
 #if 0
 static const char *pf_pptp_ctrl_type_name(u_int16_t code);
 #endif
@@ -344,7 +319,6 @@ static int          pf_ike_compare(struct pf_app_state *,
                            struct pf_app_state *);
 static int             pf_test_state_esp(struct pf_state **, int,
                            struct pfi_kif *, int, struct pf_pdesc *);
-#endif
 
 extern struct pool pfr_ktable_pl;
 extern struct pool pfr_kentry_pl;
@@ -356,10 +330,9 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
        { &pf_src_tree_pl, PFSNODE_HIWAT },
        { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
        { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
-       { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
+       { &pfr_kentry_pl, PFR_KENTRY_HIWAT },
 };
 
-#ifndef NO_APPLE_EXTENSIONS
 struct mbuf *
 pf_lazy_makewritable(struct pf_pdesc *pd, struct mbuf *m, int len)
 {
@@ -425,6 +398,10 @@ pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
        do {                                                             \
                int action;                                              \
                *state = pf_find_state(kif, &key, direction);            \
+               if (*state != NULL && pd != NULL &&                      \
+                       pd->flowhash == 0) {                             \
+                       pd->flowhash = (*state)->state_key->flowhash;    \
+               }                                                        \
                if (pf_state_lookup_aux(state, kif, direction, &action)) \
                        return (action);                                 \
        } while (0)
@@ -444,31 +421,6 @@ pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
        (STATE_ADDR_TRANSLATE(sk) ||                                    \
        (sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
 
-#else
-#define STATE_LOOKUP()                                                 \
-       do {                                                            \
-               *state = pf_find_state(kif, &key, direction);           \
-               if (*state == NULL || (*state)->timeout == PFTM_PURGE)  \
-                       return (PF_DROP);                               \
-               if (direction == PF_OUT &&                              \
-                   (((*state)->rule.ptr->rt == PF_ROUTETO &&           \
-                   (*state)->rule.ptr->direction == PF_OUT) ||         \
-                   ((*state)->rule.ptr->rt == PF_REPLYTO &&            \
-                   (*state)->rule.ptr->direction == PF_IN)) &&         \
-                   (*state)->rt_kif != NULL &&                         \
-                   (*state)->rt_kif != kif)                            \
-                       return (PF_PASS);                               \
-       } while (0)
-
-#define        STATE_TRANSLATE(sk) \
-       (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
-       ((sk)->af == AF_INET6 && \
-       ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
-       (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
-       (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \
-       (sk)->lan.port != (sk)->gwy.port
-#endif
-
 #define BOUND_IFACE(r, k) \
        ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
 
@@ -524,7 +476,6 @@ RB_GENERATE(pf_state_tree_id, pf_state,
 #define        PF_DT_SKIP_LANEXT       0x01
 #define        PF_DT_SKIP_EXTGWY       0x02
 
-#ifndef NO_APPLE_EXTENSIONS
 static const u_int16_t PF_PPTP_PORT = 1723;
 static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
 
@@ -807,7 +758,6 @@ struct pf_esp_hdr {
        u_int32_t seqno;
        u_int8_t payload[];
 };
-#endif
 
 static __inline int
 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
@@ -857,16 +807,13 @@ static __inline int
 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
 {
        int     diff;
-#ifndef NO_APPLE_EXTENSIONS
-       int extfilter;
-#endif
+       int     extfilter;
 
        if ((diff = a->proto - b->proto) != 0)
                return (diff);
        if ((diff = a->af - b->af) != 0)
                return (diff);
 
-#ifndef NO_APPLE_EXTENSIONS
        extfilter = PF_EXTFILTER_APD;
 
        switch (a->proto) {
@@ -911,7 +858,6 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
        default:
                break;
        }
-#endif
 
        switch (a->af) {
 #if INET
@@ -920,24 +866,16 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
                        return (1);
                if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
                        return (-1);
-#ifndef NO_APPLE_EXTENSIONS
                if (extfilter < PF_EXTFILTER_EI) {
                        if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
                                return (1);
                        if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
                                return (-1);
                }
-#else
-               if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                       return (1);
-               if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                       return (-1);
-#endif
                break;
 #endif /* INET */
 #if INET6
        case AF_INET6:
-#ifndef NO_APPLE_EXTENSIONS
                if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
                        return (1);
                if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
@@ -973,45 +911,10 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
                        if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
                                return (-1);
                }
-#else
-               if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
-                       return (1);
-               if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
-                       return (-1);
-               if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
-                       return (1);
-               if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
-                       return (-1);
-               if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
-                       return (1);
-               if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
-                       return (-1);
-               if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
-                       return (1);
-               if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
-                       return (-1);
-               if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
-                       return (1);
-               if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
-                       return (-1);
-               if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
-                       return (1);
-               if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
-                       return (-1);
-               if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
-                       return (1);
-               if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
-                       return (-1);
-               if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                       return (1);
-               if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                       return (-1);
-#endif
                break;
 #endif /* INET6 */
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        if (a->app_state && b->app_state) {
                if (a->app_state->compare_lan_ext &&
                    b->app_state->compare_lan_ext) {
@@ -1025,12 +928,6 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
                                return (diff);
                }
        }
-#else
-       if ((diff = a->lan.port - b->lan.port) != 0)
-               return (diff);
-       if ((diff = a->ext.port - b->ext.port) != 0)
-               return (diff);
-#endif
 
        return (0);
 }
@@ -1039,9 +936,7 @@ static __inline int
 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
 {
        int     diff;
-#ifndef NO_APPLE_EXTENSIONS
-       int extfilter;
-#endif
+       int     extfilter;
 
        if ((diff = a->proto - b->proto) != 0)
                return (diff);
@@ -1049,7 +944,6 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
        if ((diff = a->af - b->af) != 0)
                return (diff);
 
-#ifndef NO_APPLE_EXTENSIONS
        extfilter = PF_EXTFILTER_APD;
 
        switch (a->proto) {
@@ -1094,12 +988,10 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
        default:
                break;
        }
-#endif
 
        switch (a->af) {
 #if INET
        case AF_INET:
-#ifndef NO_APPLE_EXTENSIONS
                if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
                        return (1);
                if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
@@ -1110,21 +1002,10 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
                        if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
                                return (-1);
                }
-#else
-               if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                       return (1);
-               if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                       return (-1);
-               if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
-                       return (1);
-               if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
-                       return (-1);
-#endif
                break;
 #endif /* INET */
 #if INET6
        case AF_INET6:
-#ifndef NO_APPLE_EXTENSIONS
                if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
                        return (1);
                if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
@@ -1160,45 +1041,10 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
                        if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
                                return (-1);
                }
-#else
-               if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
-                       return (1);
-               if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
-                       return (-1);
-               if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
-                       return (1);
-               if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
-                       return (-1);
-               if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
-                       return (1);
-               if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
-                       return (-1);
-               if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
-                       return (1);
-               if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
-                       return (-1);
-               if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
-                       return (1);
-               if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
-                       return (-1);
-               if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
-                       return (1);
-               if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
-                       return (-1);
-               if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                       return (1);
-               if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                       return (-1);
-               if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
-                       return (1);
-               if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
-                       return (-1);
-#endif
                break;
 #endif /* INET6 */
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        if (a->app_state && b->app_state) {
                if (a->app_state->compare_ext_gwy &&
                    b->app_state->compare_ext_gwy) {
@@ -1212,12 +1058,6 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
                                return (diff);
                }
        }
-#else
-       if ((diff = a->ext.port - b->ext.port) != 0)
-               return (diff);
-       if ((diff = a->gwy.port - b->gwy.port) != 0)
-               return (diff);
-#endif
 
        return (0);
 }
@@ -1262,7 +1102,8 @@ pf_find_state_byid(struct pf_state_cmp *key)
 {
        pf_status.fcounters[FCNT_STATE_SEARCH]++;
 
-       return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
+       return (RB_FIND(pf_state_tree_id, &tree_id,
+           (struct pf_state *)(void *)key));
 }
 
 static struct pf_state *
@@ -1529,7 +1370,6 @@ pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
        struct pf_state_key     *sk = s->state_key;
 
        if (pf_status.debug >= PF_DEBUG_MISC) {
-#ifndef NO_APPLE_EXTENSIONS
                printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
                switch (sk->proto) {
                case IPPROTO_TCP:
@@ -1557,18 +1397,6 @@ pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
                printf(" ext: ");
                pf_print_sk_host(&sk->ext, sk->af, sk->proto,
                    sk->proto_variant);
-#else
-               printf("pf: state insert failed: %s %s", tree, kif->pfik_name);
-               printf(" lan: ");
-               pf_print_host(&sk->lan.addr, sk->lan.port,
-                   sk->af);
-               printf(" gwy: ");
-               pf_print_host(&sk->gwy.addr, sk->gwy.port,
-                   sk->af);
-               printf(" ext: ");
-               pf_print_host(&sk->ext.addr, sk->ext.port,
-                   sk->af);
-#endif
                if (s->sync_flags & PFSTATE_FROMSYNC)
                        printf(" (from sync)");
                printf("\n");
@@ -1634,64 +1462,76 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
        return (0);
 }
 
-void
-pf_purge_thread_fn(void *v, wait_result_t w)
+static int
+pf_purge_thread_cont(int err)
 {
-#pragma unused(v, w)
-       u_int32_t nloops = 0;
-       int t = 0;
-
-       for (;;) {
-               (void) tsleep(pf_purge_thread_fn, PWAIT, "pftm", t * hz);
-
-               lck_rw_lock_shared(pf_perim_lock);
-               lck_mtx_lock(pf_lock);
-
-               /* purge everything if not running */
-               if (!pf_status.running) {
-                       pf_purge_expired_states(pf_status.states);
-                       pf_purge_expired_fragments();
-                       pf_purge_expired_src_nodes();
-
-                       /* terminate thread (we don't currently do this) */
-                       if (pf_purge_thread == NULL) {
-                               lck_mtx_unlock(pf_lock);
-                               lck_rw_done(pf_perim_lock);
-
-                               thread_deallocate(current_thread());
-                               thread_terminate(current_thread());
-                               /* NOTREACHED */
-                               return;
-                       } else {
-                               /* if there's nothing left, sleep w/o timeout */
-                               if (pf_status.states == 0 &&
-                                   pf_normalize_isempty() &&
-                                   RB_EMPTY(&tree_src_tracking))
-                                       t = 0;
-
-                               lck_mtx_unlock(pf_lock);
-                               lck_rw_done(pf_perim_lock);
-                               continue;
+#pragma unused(err)
+       static u_int32_t nloops = 0;
+       int t = 1;      /* 1 second */
+
+       lck_rw_lock_shared(pf_perim_lock);
+       lck_mtx_lock(pf_lock);
+
+       /* purge everything if not running */
+       if (!pf_status.running) {
+               pf_purge_expired_states(pf_status.states);
+               pf_purge_expired_fragments();
+               pf_purge_expired_src_nodes();
+
+               /* terminate thread (we don't currently do this) */
+               if (pf_purge_thread == NULL) {
+                       lck_mtx_unlock(pf_lock);
+                       lck_rw_done(pf_perim_lock);
+
+                       thread_deallocate(current_thread());
+                       thread_terminate(current_thread());
+                       /* NOTREACHED */
+                       return (0);
+               } else {
+                       /* if there's nothing left, sleep w/o timeout */
+                       if (pf_status.states == 0 &&
+                           pf_normalize_isempty() &&
+                           RB_EMPTY(&tree_src_tracking)) {
+                               nloops = 0;
+                               t = 0;
                        }
-               } else if (t == 0) {
-                       /* Set timeout to 1 second */
-                       t = 1;
+                       goto done;
                }
+       }
 
-               /* process a fraction of the state table every second */
-               pf_purge_expired_states(1 + (pf_status.states
-                   / pf_default_rule.timeout[PFTM_INTERVAL]));
-
-               /* purge other expired types every PFTM_INTERVAL seconds */
-               if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
-                       pf_purge_expired_fragments();
-                       pf_purge_expired_src_nodes();
-                       nloops = 0;
-               }
+       /* process a fraction of the state table every second */
+       pf_purge_expired_states(1 + (pf_status.states
+           / pf_default_rule.timeout[PFTM_INTERVAL]));
 
-               lck_mtx_unlock(pf_lock);
-               lck_rw_done(pf_perim_lock);
+       /* purge other expired types every PFTM_INTERVAL seconds */
+       if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+               pf_purge_expired_fragments();
+               pf_purge_expired_src_nodes();
+               nloops = 0;
        }
+done:
+       lck_mtx_unlock(pf_lock);
+       lck_rw_done(pf_perim_lock);
+
+       (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
+           t * hz, pf_purge_thread_cont);
+       /* NOTREACHED */
+       VERIFY(0);
+
+       return (0);
+}
+
+void
+pf_purge_thread_fn(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+       (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
+           pf_purge_thread_cont);
+       /*
+        * tsleep0() shouldn't have returned as PCATCH was not set;
+        * therefore assert in this case.
+        */
+       VERIFY(0);
 }
 
 u_int64_t
@@ -1795,7 +1635,6 @@ pf_unlink_state(struct pf_state *cur)
 {
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
-#ifndef NO_APPLE_EXTENSIONS
        if (cur->src.state == PF_TCPS_PROXY_DST) {
                pf_send_tcp(cur->rule.ptr, cur->state_key->af,
                    &cur->state_key->ext.addr, &cur->state_key->lan.addr,
@@ -1806,15 +1645,6 @@ pf_unlink_state(struct pf_state *cur)
        }
 
        hook_runloop(&cur->unlink_hooks, HOOK_REMOVE|HOOK_FREE);
-#else
-       if (cur->src.state == PF_TCPS_PROXY_DST) {
-               pf_send_tcp(cur->rule.ptr, cur->state_key->af,
-                   &cur->state_key->ext.addr, &cur->state_key->lan.addr,
-                   cur->state_key->ext.port, cur->state_key->lan.port,
-                   cur->src.seqhi, cur->src.seqlo + 1,
-                   TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
-       }
-#endif
        RB_REMOVE(pf_state_tree_id, &tree_id, cur);
 #if NPFSYNC
        if (cur->creatorid == pf_status.hostid)
@@ -1933,7 +1763,6 @@ pf_tbladdr_copyout(struct pf_addr_wrap *aw)
            kt->pfrkt_cnt : -1;
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static void
 pf_print_addr(struct pf_addr *addr, sa_family_t af)
 {
@@ -2016,76 +1845,13 @@ pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
                break;
        }
 }
-#endif
 
 static void
 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
 {
-#ifndef NO_APPLE_EXTENSIONS
        pf_print_addr(addr, af);
        if (p)
                printf("[%u]", ntohs(p));
-#else
-       switch (af) {
-#if INET
-       case AF_INET: {
-               u_int32_t a = ntohl(addr->addr32[0]);
-               printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
-                   (a>>8)&255, a&255);
-               if (p) {
-                       p = ntohs(p);
-                       printf(":%u", p);
-               }
-               break;
-       }
-#endif /* INET */
-#if INET6
-       case AF_INET6: {
-               u_int16_t b;
-               u_int8_t i, curstart = 255, curend = 0,
-                   maxstart = 0, maxend = 0;
-               for (i = 0; i < 8; i++) {
-                       if (!addr->addr16[i]) {
-                               if (curstart == 255)
-                                       curstart = i;
-                               else
-                                       curend = i;
-                       } else {
-                               if (curstart) {
-                                       if ((curend - curstart) >
-                                           (maxend - maxstart)) {
-                                               maxstart = curstart;
-                                               maxend = curend;
-                                               curstart = 255;
-                                       }
-                               }
-                       }
-               }
-               for (i = 0; i < 8; i++) {
-                       if (i >= maxstart && i <= maxend) {
-                               if (maxend != 7) {
-                                       if (i == maxstart)
-                                               printf(":");
-                               } else {
-                                       if (i == maxend)
-                                               printf(":");
-                               }
-                       } else {
-                               b = ntohs(addr->addr16[i]);
-                               printf("%x", b);
-                               if (i < 7)
-                                       printf(":");
-                       }
-               }
-               if (p) {
-                       p = ntohs(p);
-                       printf("[%u]", p);
-               }
-               break;
-       }
-#endif /* INET6 */
-       }
-#endif
 }
 
 void
@@ -2093,14 +1859,12 @@ pf_print_state(struct pf_state *s)
 {
        struct pf_state_key *sk = s->state_key;
        switch (sk->proto) {
-#ifndef NO_APPLE_EXTENSIONS
        case IPPROTO_ESP:
                printf("ESP ");
                break;
        case IPPROTO_GRE:
                printf("GRE%u ", sk->proto_variant);
                break;
-#endif
        case IPPROTO_TCP:
                printf("TCP ");
                break;
@@ -2117,19 +1881,11 @@ pf_print_state(struct pf_state *s)
                printf("%u ", sk->proto);
                break;
        }
-#ifndef NO_APPLE_EXTENSIONS
        pf_print_sk_host(&sk->lan, sk->af, sk->proto, sk->proto_variant);
        printf(" ");
        pf_print_sk_host(&sk->gwy, sk->af, sk->proto, sk->proto_variant);
        printf(" ");
        pf_print_sk_host(&sk->ext, sk->af, sk->proto, sk->proto_variant);
-#else
-       pf_print_host(&sk->lan.addr, sk->lan.port, sk->af);
-       printf(" ");
-       pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af);
-       printf(" ");
-       pf_print_host(&sk->ext.addr, sk->ext.port, sk->af);
-#endif
        printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
            s->src.seqhi, s->src.max_win, s->src.seqdiff);
        if (s->src.wscale && s->dst.wscale)
@@ -2197,7 +1953,6 @@ pf_calc_skip_steps(struct pf_rulequeue *rules)
                if (cur->src.neg != prev->src.neg ||
                    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
                        PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
-#ifndef NO_APPLE_EXTENSIONS
                {
                        union pf_rule_xport *cx = &cur->src.xport;
                        union pf_rule_xport *px = &prev->src.xport;
@@ -2217,16 +1972,9 @@ pf_calc_skip_steps(struct pf_rulequeue *rules)
                                break;
                        }
                }
-#else
-               if (cur->src.port[0] != prev->src.port[0] ||
-                   cur->src.port[1] != prev->src.port[1] ||
-                   cur->src.port_op != prev->src.port_op)
-                       PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
-#endif
                if (cur->dst.neg != prev->dst.neg ||
                    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
                        PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
-#ifndef NO_APPLE_EXTENSIONS
                {
                        union pf_rule_xport *cx = &cur->dst.xport;
                        union pf_rule_xport *px = &prev->dst.xport;
@@ -2252,12 +2000,6 @@ pf_calc_skip_steps(struct pf_rulequeue *rules)
                                break;
                        }
                }
-#else
-               if (cur->dst.port[0] != prev->dst.port[0] ||
-                   cur->dst.port[1] != prev->dst.port[1] ||
-                   cur->dst.port_op != prev->dst.port_op)
-                       PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
-#endif
 
                prev = cur;
                cur = TAILQ_NEXT(cur, entries);
@@ -2266,6 +2008,32 @@ pf_calc_skip_steps(struct pf_rulequeue *rules)
                PF_SET_SKIP_STEPS(i);
 }
 
+u_int32_t
+pf_calc_state_key_flowhash(struct pf_state_key *sk)
+{
+       struct pf_flowhash_key fh __attribute__((aligned(8)));
+
+       bzero(&fh, sizeof (fh));
+       if (PF_ALEQ(&sk->lan.addr, &sk->ext.addr, sk->af)) {
+               bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof (fh.ap1.addr));
+               bcopy(&sk->ext.addr, &fh.ap2.addr, sizeof (fh.ap2.addr));
+       } else {
+               bcopy(&sk->ext.addr, &fh.ap1.addr, sizeof (fh.ap1.addr));
+               bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof (fh.ap2.addr));
+       }
+       if (sk->lan.xport.spi <= sk->ext.xport.spi) {
+               fh.ap1.xport.spi = sk->lan.xport.spi;
+               fh.ap2.xport.spi = sk->ext.xport.spi;
+       } else {
+               fh.ap1.xport.spi = sk->ext.xport.spi;
+               fh.ap2.xport.spi = sk->lan.xport.spi;
+       }
+       fh.af = sk->af;
+       fh.proto = sk->proto;
+
+       return (net_flowhash(&fh, sizeof (fh), pf_hash_seed));
+}
+
 static int
 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
 {
@@ -2556,11 +2324,7 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
                                            dst->seqdiff), 0);
                                        memcpy(&opt[i], &sack, sizeof (sack));
                                }
-#ifndef NO_APPLE_EXTENSIONS
                                copyback = off + sizeof (*th) + thoptlen;
-#else
-                               copyback = 1;
-#endif
                        }
                        /* FALLTHROUGH */
                default:
@@ -2571,17 +2335,12 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
                }
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        if (copyback) {
                m = pf_lazy_makewritable(pd, m, copyback);
                if (!m)
                        return (-1);
                m_copyback(m, off + sizeof (*th), thoptlen, opts);
        }
-#else
-       if (copyback)
-               m_copyback(m, off + sizeof (*th), thoptlen, opts);
-#endif
        return (copyback);
 }
 
@@ -2637,19 +2396,37 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
        }
 
        if (tag)
-               pf_mtag->flags |= PF_TAG_GENERATED;
-       pf_mtag->tag = rtag;
+               pf_mtag->pftag_flags |= PF_TAG_GENERATED;
+       pf_mtag->pftag_tag = rtag;
 
        if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid))
-               pf_mtag->rtableid = r->rtableid;
+               pf_mtag->pftag_rtableid = r->rtableid;
 
-#if ALTQ
-       if (r != NULL && r->qid) {
-               pf_mtag->qid = r->qid;
-               /* add hints for ecn */
-               pf_mtag->hdr = mtod(m, struct ip *);
+#if PF_ALTQ
+       if (altq_allowed && r != NULL && r->qid)
+               pf_mtag->pftag_qid = r->qid;
+#endif /* PF_ALTQ */
+
+       /* add hints for ecn */
+       pf_mtag->pftag_hdr = mtod(m, struct ip *);
+       /* record address family */
+       pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
+       switch (af) {
+#if INET
+       case AF_INET:
+               pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
+               break;
+#endif /* INET */
+#if INET6
+       case AF_INET6:
+               pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
+               break;
+#endif /* INET6 */
        }
-#endif /* ALTQ */
+       /* indicate this is TCP */
+       pf_mtag->pftag_flags |= PF_TAG_TCP;
+
+       /* Make sure headers are 32-bit aligned */
        m->m_data += max_linkhdr;
        m->m_pkthdr.len = m->m_len = len;
        m->m_pkthdr.rcvif = NULL;
@@ -2665,7 +2442,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
                h->ip_src.s_addr = saddr->v4.s_addr;
                h->ip_dst.s_addr = daddr->v4.s_addr;
 
-               th = (struct tcphdr *)((caddr_t)h + sizeof (struct ip));
+               th = (struct tcphdr *)(void *)((caddr_t)h + sizeof (struct ip));
                break;
 #endif /* INET */
 #if INET6
@@ -2678,7 +2455,8 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
                memcpy(&h6->ip6_src, &saddr->v6, sizeof (struct in6_addr));
                memcpy(&h6->ip6_dst, &daddr->v6, sizeof (struct in6_addr));
 
-               th = (struct tcphdr *)((caddr_t)h6 + sizeof (struct ip6_hdr));
+               th = (struct tcphdr *)(void *)
+                   ((caddr_t)h6 + sizeof (struct ip6_hdr));
                break;
 #endif /* INET6 */
        }
@@ -2764,18 +2542,34 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
        if ((pf_mtag = pf_get_mtag(m0)) == NULL)
                return;
 
-       pf_mtag->flags |= PF_TAG_GENERATED;
+       pf_mtag->pftag_flags |= PF_TAG_GENERATED;
 
        if (PF_RTABLEID_IS_VALID(r->rtableid))
-               pf_mtag->rtableid = r->rtableid;
-
-#if ALTQ
-       if (r->qid) {
-               pf_mtag->qid = r->qid;
-               /* add hints for ecn */
-               pf_mtag->hdr = mtod(m0, struct ip *);
+               pf_mtag->pftag_rtableid = r->rtableid;
+
+#if PF_ALTQ
+       if (altq_allowed && r->qid)
+               pf_mtag->pftag_qid = r->qid;
+#endif /* PF_ALTQ */
+
+       /* add hints for ecn */
+       pf_mtag->pftag_hdr = mtod(m0, struct ip *);
+       /* record address family */
+       pf_mtag->pftag_flags &=
+           ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6 | PF_TAG_TCP);
+       switch (af) {
+#if INET
+       case AF_INET:
+               pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
+               break;
+#endif /* INET */
+#if INET6
+       case AF_INET6:
+               pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
+               break;
+#endif /* INET6 */
        }
-#endif /* ALTQ */
+
        switch (af) {
 #if INET
        case AF_INET:
@@ -2911,7 +2705,6 @@ pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
        return (pf_match(op, a1, a2, p));
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 int
 pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
     union pf_state_xport *sx)
@@ -2946,7 +2739,6 @@ pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
 
        return (d);
 }
-#endif
 
 int
 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
@@ -2970,7 +2762,7 @@ pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
 {
 #pragma unused(m)
        if (*tag == -1)
-               *tag = pf_mtag->tag;
+               *tag = pf_mtag->pftag_tag;
 
        return ((!r->match_tag_not && r->match_tag == *tag) ||
            (r->match_tag_not && r->match_tag != *tag));
@@ -2978,18 +2770,25 @@ pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
 
 int
 pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag,
-    unsigned int rtableid)
+    unsigned int rtableid, struct pf_pdesc *pd)
 {
-       if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid))
+       if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) && 
+               (pd == NULL || pd->flowhash == 0))
                return (0);
 
        if (pf_mtag == NULL && (pf_mtag = pf_get_mtag(m)) == NULL)
                return (1);
 
        if (tag > 0)
-               pf_mtag->tag = tag;
+               pf_mtag->pftag_tag = tag;
        if (PF_RTABLEID_IS_VALID(rtableid))
-               pf_mtag->rtableid = rtableid;
+               pf_mtag->pftag_rtableid = rtableid;
+       if (pd != NULL && pd->flowhash != 0) {
+               pf_mtag->pftag_flags |= PF_TAG_FLOWHASH;
+               pf_mtag->pftag_flowhash = pd->flowhash;
+               pf_mtag->pftag_flags |= (pd->flags & PFDESC_FLOW_ADV) ?
+                       PF_TAG_FLOWADV : 0;
+       }
 
        return (0);
 }
@@ -3297,8 +3096,10 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                }
                break;
        case PF_POOL_SRCHASH:
-               pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
-               PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+               pf_hash(saddr, (struct pf_addr *)(void *)&hash,
+                   &rpool->key, af);
+               PF_POOLMASK(naddr, raddr, rmask,
+                   (struct pf_addr *)(void *)&hash, af);
                break;
        case PF_POOL_ROUNDROBIN:
                if (rpool->cur->addr.type == PF_ADDR_TABLE) {
@@ -3363,32 +3164,20 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
        return (0);
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static int
 pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
     union pf_state_xport *dxport, struct pf_addr *naddr,
     union pf_state_xport *nxport, struct pf_src_node **sn)
-#else
-int
-pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
-    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
-    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
-    struct pf_src_node **sn)
-#endif
 {
 #pragma unused(kif)
        struct pf_state_key_cmp key;
        struct pf_addr          init_addr;
-#ifndef NO_APPLE_EXTENSIONS
        unsigned int cut;
        sa_family_t af = pd->af;
        u_int8_t proto = pd->proto;
        unsigned int low = r->rpool.proxy_port[0];
        unsigned int high = r->rpool.proxy_port[1];
-#else
-       u_int16_t               cut;
-#endif
 
        bzero(&init_addr, sizeof (init_addr));
        if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
@@ -3399,7 +3188,6 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
                high = 65535;
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        if (!nxport)
                return (0); /* No output necessary. */
 
@@ -3459,13 +3247,11 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
                        return (0);
                }
        }
-#endif
        do {
                key.af = af;
                key.proto = proto;
                PF_ACPY(&key.ext.addr, daddr, key.af);
                PF_ACPY(&key.gwy.addr, naddr, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                switch (proto) {
                        case IPPROTO_UDP:
                                key.proto_variant = r->extfilter;
@@ -3478,54 +3264,31 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
                        key.ext.xport = *dxport;
                else
                        memset(&key.ext.xport, 0, sizeof (key.ext.xport));
-#else
-               key.ext.port = dport;
-#endif
                /*
                 * port search; start random, step;
                 * similar 2 portloop in in_pcbbind
                 */
                if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
                    proto == IPPROTO_ICMP)) {
-#ifndef NO_APPLE_EXTENSIONS
                        if (dxport)
                                key.gwy.xport = *dxport;
                        else
                                memset(&key.gwy.xport, 0,
                                    sizeof (key.ext.xport));
-#else
-                       key.gwy.port = dport;
-#endif
                        if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
                                return (0);
                } else if (low == 0 && high == 0) {
-#ifndef NO_APPLE_EXTENSIONS
                        key.gwy.xport = *nxport;
-#else
-                       key.gwy.port = *nport;
-#endif
                        if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
                                return (0);
                } else if (low == high) {
-#ifndef NO_APPLE_EXTENSIONS
                        key.gwy.xport.port = htons(low);
                        if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
                                nxport->port = htons(low);
                                return (0);
                        }
-#else
-                       key.gwy.port = htons(low);
-                       if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
-                               *nport = htons(low);
-                               return (0);
-                       }
-#endif
                } else {
-#ifndef NO_APPLE_EXTENSIONS
                        unsigned int tmp;
-#else
-                       u_int16_t tmp;
-#endif
                        if (low > high) {
                                tmp = low;
                                low = high;
@@ -3535,38 +3298,20 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
                        cut = htonl(random()) % (1 + high - low) + low;
                        /* low <= cut <= high */
                        for (tmp = cut; tmp <= high; ++(tmp)) {
-#ifndef NO_APPLE_EXTENSIONS
                                key.gwy.xport.port = htons(tmp);
                                if (pf_find_state_all(&key, PF_IN, NULL) ==
                                    NULL) {
                                        nxport->port = htons(tmp);
                                        return (0);
                                }
-#else
-                               key.gwy.port = htons(tmp);
-                               if (pf_find_state_all(&key, PF_IN, NULL) ==
-                                   NULL) {
-                                       *nport = htons(tmp);
-                                       return (0);
-                               }
-#endif
                        }
                        for (tmp = cut - 1; tmp >= low; --(tmp)) {
-#ifndef NO_APPLE_EXTENSIONS
                                key.gwy.xport.port = htons(tmp);
                                if (pf_find_state_all(&key, PF_IN, NULL) ==
                                    NULL) {
                                        nxport->port = htons(tmp);
                                        return (0);
                                }
-#else
-                               key.gwy.port = htons(tmp);
-                               if (pf_find_state_all(&key, PF_IN, NULL) ==
-                                   NULL) {
-                                       *nport = htons(tmp);
-                                       return (0);
-                               }
-#endif
                        }
                }
 
@@ -3587,18 +3332,11 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
        return (1);                                     /* none available */
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static struct pf_rule *
 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
     int direction, struct pfi_kif *kif, struct pf_addr *saddr,
     union pf_state_xport *sxport, struct pf_addr *daddr,
     union pf_state_xport *dxport, int rs_num)
-#else
-struct pf_rule *
-pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
-    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
-    struct pf_addr *daddr, u_int16_t dport, int rs_num)
-#endif
 {
        struct pf_rule          *r, *rm = NULL;
        struct pf_ruleset       *ruleset = NULL;
@@ -3610,16 +3348,13 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
        while (r && rm == NULL) {
                struct pf_rule_addr     *src = NULL, *dst = NULL;
                struct pf_addr_wrap     *xdst = NULL;
-#ifndef NO_APPLE_EXTENSIONS
                struct pf_addr_wrap     *xsrc = NULL;
                union pf_rule_xport     rdrxport;
-#endif
 
                if (r->action == PF_BINAT && direction == PF_IN) {
                        src = &r->dst;
                        if (r->rpool.cur != NULL)
                                xdst = &r->rpool.cur->addr;
-#ifndef NO_APPLE_EXTENSIONS
                } else if (r->action == PF_RDR && direction == PF_OUT) {
                        dst = &r->src;
                        src = &r->dst;
@@ -3629,7 +3364,6 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
                                    htons(r->rpool.proxy_port[0]);
                                xsrc = &r->rpool.cur->addr;
                        }
-#endif
                } else {
                        src = &r->src;
                        dst = &r->dst;
@@ -3644,7 +3378,6 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
                        r = r->skip[PF_SKIP_AF].ptr;
                else if (r->proto && r->proto != pd->proto)
                        r = r->skip[PF_SKIP_PROTO].ptr;
-#ifndef NO_APPLE_EXTENSIONS
                else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL))
                        r = TAILQ_NEXT(r, entries);
                else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
@@ -3656,14 +3389,6 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
                        r = TAILQ_NEXT(r, entries);
                else if (!xsrc && !pf_match_xport(r->proto,
                    r->proto_variant, &src->xport, sxport))
-#else
-               else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
-                   src->neg, kif))
-                       r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
-                           PF_SKIP_DST_ADDR].ptr;
-               else if (src->port_op && !pf_match_port(src->port_op,
-                   src->port[0], src->port[1], sport))
-#endif
                        r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
                            PF_SKIP_DST_PORT].ptr;
                else if (dst != NULL &&
@@ -3672,14 +3397,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
                else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
                    0, NULL))
                        r = TAILQ_NEXT(r, entries);
-#ifndef NO_APPLE_EXTENSIONS
                else if (dst && !pf_match_xport(r->proto, r->proto_variant,
                    &dst->xport, dxport))
-#else
-               else if (dst != NULL && dst->port_op &&
-                   !pf_match_port(dst->port_op, dst->port[0],
-                   dst->port[1], dport))
-#endif
                        r = r->skip[PF_SKIP_DST_PORT].ptr;
                else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
                        r = TAILQ_NEXT(r, entries);
@@ -3702,7 +3421,7 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
                        pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
                            NULL, NULL);
        }
-       if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid))
+       if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, NULL))
                return (NULL);
        if (rm != NULL && (rm->action == PF_NONAT ||
            rm->action == PF_NORDR || rm->action == PF_NOBINAT))
@@ -3710,25 +3429,15 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
        return (rm);
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static struct pf_rule *
 pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
     int direction, struct pfi_kif *kif, struct pf_src_node **sn,
     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
     union pf_state_xport *dxport, struct pf_addr *naddr,
     union pf_state_xport *nxport)
-#else
-struct pf_rule *
-pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
-    struct pfi_kif *kif, struct pf_src_node **sn,
-    struct pf_addr *saddr, u_int16_t sport,
-    struct pf_addr *daddr, u_int16_t dport,
-    struct pf_addr *naddr, u_int16_t *nport)
-#endif
 {
        struct pf_rule  *r = NULL;
 
-#ifndef NO_APPLE_EXTENSIONS
        if (direction == PF_OUT) {
                r = pf_match_translation(pd, m, off, direction, kif, saddr,
                    sxport, daddr, dxport, PF_RULESET_BINAT);
@@ -3745,21 +3454,6 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
                        r = pf_match_translation(pd, m, off, direction, kif,
                            saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
        }
-#else
-       if (direction == PF_OUT) {
-               r = pf_match_translation(pd, m, off, direction, kif, saddr,
-                   sport, daddr, dport, PF_RULESET_BINAT);
-               if (r == NULL)
-                       r = pf_match_translation(pd, m, off, direction, kif,
-                           saddr, sport, daddr, dport, PF_RULESET_NAT);
-       } else {
-               r = pf_match_translation(pd, m, off, direction, kif, saddr,
-                   sport, daddr, dport, PF_RULESET_RDR);
-               if (r == NULL)
-                       r = pf_match_translation(pd, m, off, direction, kif,
-                           saddr, sport, daddr, dport, PF_RULESET_BINAT);
-       }
-#endif
 
        if (r != NULL) {
                switch (r->action) {
@@ -3768,14 +3462,8 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
                case PF_NORDR:
                        return (NULL);
                case PF_NAT:
-#ifndef NO_APPLE_EXTENSIONS
                        if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
                            dxport, naddr, nxport, sn)) {
-#else
-                       if (pf_get_sport(pd->af, pd->proto, r, saddr,
-                           daddr, dport, naddr, nport, r->rpool.proxy_port[0],
-                           r->rpool.proxy_port[1], sn)) {
-#endif
                                DPFPRINTF(PF_DEBUG_MISC,
                                    ("pf: NAT proxy port allocation "
                                    "(%u-%u) failed\n",
@@ -3863,7 +3551,6 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
                        }
                        break;
                case PF_RDR: {
-#ifndef NO_APPLE_EXTENSIONS
                        switch (direction) {
                        case PF_OUT:
                                if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
@@ -3939,31 +3626,6 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
                                }
                                break;
                        }
-#else
-                       if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
-                               return (NULL);
-                       if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
-                           PF_POOL_BITMASK)
-                               PF_POOLMASK(naddr, naddr,
-                                   &r->rpool.cur->addr.v.a.mask, daddr,
-                                   pd->af);
-
-                       if (r->rpool.proxy_port[1]) {
-                               u_int32_t       tmp_nport;
-
-                               tmp_nport = ((ntohs(dport) -
-                                   ntohs(r->dst.port[0])) %
-                                   (r->rpool.proxy_port[1] -
-                                   r->rpool.proxy_port[0] + 1)) +
-                                   r->rpool.proxy_port[0];
-
-                               /* wrap around if necessary */
-                               if (tmp_nport > 65535)
-                                       tmp_nport -= 65535;
-                               *nport = htons((u_int16_t)tmp_nport);
-                       } else if (r->rpool.proxy_port[0])
-                               *nport = htons(r->rpool.proxy_port[0]);
-#endif
                        break;
                }
                default:
@@ -4183,7 +3845,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
        case AF_INET:
                hlen = sizeof (struct ip);
                bzero(&ro, sizeof (ro));
-               dst = (struct sockaddr_in *)&ro.ro_dst;
+               dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
                dst->sin_family = AF_INET;
                dst->sin_len = sizeof (*dst);
                dst->sin_addr = addr->v4;
@@ -4195,7 +3857,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
        case AF_INET6:
                hlen = sizeof (struct ip6_hdr);
                bzero(&ro6, sizeof (ro6));
-               dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
+               dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
                dst6->sin6_family = AF_INET6;
                dst6->sin6_len = sizeof (*dst6);
                dst6->sin6_addr = addr->v6;
@@ -4274,16 +3936,14 @@ pf_detach_state(struct pf_state *s, int flags)
                if (!(flags & PF_DT_SKIP_LANEXT))
                        RB_REMOVE(pf_state_tree_lan_ext,
                            &pf_statetbl_lan_ext, sk);
-#ifndef NO_APPLE_EXTENSIONS
                if (sk->app_state)
                        pool_put(&pf_app_state_pl, sk->app_state);
-#endif
                pool_put(&pf_state_key_pl, sk);
        }
 }
 
 struct pf_state_key *
-pf_alloc_state_key(struct pf_state *s)
+pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
 {
        struct pf_state_key     *sk;
 
@@ -4293,6 +3953,20 @@ pf_alloc_state_key(struct pf_state *s)
        TAILQ_INIT(&sk->states);
        pf_attach_state(sk, s, 0);
 
+       /* initialize state key from psk, if provided */
+       if (psk != NULL) {
+               bcopy(&psk->lan, &sk->lan, sizeof (sk->lan));
+               bcopy(&psk->gwy, &sk->gwy, sizeof (sk->gwy));
+               bcopy(&psk->ext, &sk->ext, sizeof (sk->ext));
+               sk->af = psk->af;
+               sk->proto = psk->proto;
+               sk->direction = psk->direction;
+               sk->proto_variant = psk->proto_variant;
+               VERIFY(psk->app_state == NULL);
+               sk->flowhash = psk->flowhash;
+               /* don't touch tree entries, states and refcnt on sk */
+       }
+
        return (sk);
 }
 
@@ -4334,9 +4008,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #pragma unused(h)
        struct pf_rule          *nr = NULL;
        struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
-#ifdef NO_APPLE_EXTENSIONS
-       u_int16_t                bport, nport = 0;
-#endif
        sa_family_t              af = pd->af;
        struct pf_rule          *r, *a = NULL;
        struct pf_ruleset       *ruleset = NULL;
@@ -4350,15 +4021,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
        int                      match = 0;
        int                      state_icmp = 0;
        u_int16_t                mss = tcp_mssdflt;
-#ifdef NO_APPLE_EXTENSIONS
-       u_int16_t                sport, dport;
-#endif
        u_int8_t                 icmptype = 0, icmpcode = 0;
 
-#ifndef NO_APPLE_EXTENSIONS
        struct pf_grev1_hdr     *grev1 = pd->hdr.grev1;
        union pf_state_xport bxport, nxport, sxport, dxport;
-#endif
+       struct pf_state_key      psk;
 
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
@@ -4367,46 +4034,28 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                return (PF_DROP);
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        hdrlen = 0;
        sxport.spi = 0;
        dxport.spi = 0;
        nxport.spi = 0;
-#else
-       sport = dport = hdrlen = 0;
-#endif
 
        switch (pd->proto) {
        case IPPROTO_TCP:
-#ifndef NO_APPLE_EXTENSIONS
                sxport.port = th->th_sport;
                dxport.port = th->th_dport;
-#else
-               sport = th->th_sport;
-               dport = th->th_dport;
-#endif
                hdrlen = sizeof (*th);
                break;
        case IPPROTO_UDP:
-#ifndef NO_APPLE_EXTENSIONS
                sxport.port = pd->hdr.udp->uh_sport;
                dxport.port = pd->hdr.udp->uh_dport;
-#else
-               sport = pd->hdr.udp->uh_sport;
-               dport = pd->hdr.udp->uh_dport;
-#endif
                hdrlen = sizeof (*pd->hdr.udp);
                break;
 #if INET
        case IPPROTO_ICMP:
                if (pd->af != AF_INET)
                        break;
-#ifndef NO_APPLE_EXTENSIONS
                sxport.port = dxport.port = pd->hdr.icmp->icmp_id;
                hdrlen = ICMP_MINLEN;
-#else
-               sport = dport = pd->hdr.icmp->icmp_id;
-#endif
                icmptype = pd->hdr.icmp->icmp_type;
                icmpcode = pd->hdr.icmp->icmp_code;
 
@@ -4422,11 +4071,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
        case IPPROTO_ICMPV6:
                if (pd->af != AF_INET6)
                        break;
-#ifndef NO_APPLE_EXTENSIONS
                sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id;
-#else
-               sport = dport = pd->hdr.icmp6->icmp6_id;
-#endif
                hdrlen = sizeof (*pd->hdr.icmp6);
                icmptype = pd->hdr.icmp6->icmp6_type;
                icmpcode = pd->hdr.icmp6->icmp6_code;
@@ -4438,7 +4083,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        state_icmp++;
                break;
 #endif /* INET6 */
-#ifndef NO_APPLE_EXTENSIONS
        case IPPROTO_GRE:
                if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
                        sxport.call_id = dxport.call_id =
@@ -4451,80 +4095,55 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                dxport.spi = pd->hdr.esp->spi;
                hdrlen = sizeof (*pd->hdr.esp);
                break;
-#endif
        }
 
        r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
        if (direction == PF_OUT) {
-#ifndef NO_APPLE_EXTENSIONS
                bxport = nxport = sxport;
                /* check outgoing packet for BINAT/NAT */
                if ((nr = pf_get_translation_aux(pd, m, off, PF_OUT, kif, &nsn,
                    saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) !=
                    NULL) {
-#else
-               bport = nport = sport;
-               /* check outgoing packet for BINAT/NAT */
-               if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
-                   saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
-#endif
                        PF_ACPY(&pd->baddr, saddr, af);
                        switch (pd->proto) {
                        case IPPROTO_TCP:
-#ifndef NO_APPLE_EXTENSIONS
                                pf_change_ap(direction, pd->mp, saddr,
                                    &th->th_sport, pd->ip_sum, &th->th_sum,
                                    &pd->naddr, nxport.port, 0, af);
                                sxport.port = th->th_sport;
-#else
-                               pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
-                                   &th->th_sum, &pd->naddr, nport, 0, af);
-                               sport = th->th_sport;
-#endif
                                rewrite++;
                                break;
                        case IPPROTO_UDP:
-#ifndef NO_APPLE_EXTENSIONS
                                pf_change_ap(direction, pd->mp, saddr,
                                    &pd->hdr.udp->uh_sport, pd->ip_sum,
                                    &pd->hdr.udp->uh_sum, &pd->naddr,
                                    nxport.port, 1, af);
                                sxport.port = pd->hdr.udp->uh_sport;
-#else
-                               pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
-                                   pd->ip_sum, &pd->hdr.udp->uh_sum,
-                                   &pd->naddr, nport, 1, af);
-                               sport = pd->hdr.udp->uh_sport;
-#endif
                                rewrite++;
                                break;
 #if INET
                        case IPPROTO_ICMP:
-                               pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
-                                   pd->naddr.v4.s_addr, 0);
-#ifndef NO_APPLE_EXTENSIONS
-                               pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
-                                   pd->hdr.icmp->icmp_cksum, sxport.port,
-                                   nxport.port, 0);
-                               pd->hdr.icmp->icmp_id = nxport.port;
-                               ++rewrite;
-#else
-                               pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
-                                   pd->hdr.icmp->icmp_cksum, sport, nport, 0);
-                               pd->hdr.icmp->icmp_id = nport;
-                               m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
-#endif
+                               if (pd->af == AF_INET) {
+                                       pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+                                           pd->naddr.v4.s_addr, 0);
+                                       pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+                                           pd->hdr.icmp->icmp_cksum, sxport.port,
+                                           nxport.port, 0);
+                                       pd->hdr.icmp->icmp_id = nxport.port;
+                                       ++rewrite;
+                               }
                                break;
 #endif /* INET */
 #if INET6
                        case IPPROTO_ICMPV6:
-                               pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
-                                   &pd->naddr, 0);
-                               rewrite++;
+                               if (pd->af == AF_INET6) {
+                                       pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
+                                           &pd->naddr, 0);
+                                       rewrite++;
+                               }
                                break;
 #endif /* INET */
-#ifndef NO_APPLE_EXTENSIONS
                        case IPPROTO_GRE:
                                switch (af) {
 #if INET
@@ -4557,7 +4176,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #endif /* INET6 */
                                }
                                break;
-#endif
                        default:
                                switch (af) {
 #if INET
@@ -4580,63 +4198,45 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        pd->nat_rule = nr;
                }
        } else {
-#ifndef NO_APPLE_EXTENSIONS
                bxport.port = nxport.port = dxport.port;
                /* check incoming packet for BINAT/RDR */
                if ((nr = pf_get_translation_aux(pd, m, off, PF_IN, kif, &nsn,
                    saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) !=
                    NULL) {
-#else
-               bport = nport = dport;
-               /* check incoming packet for BINAT/RDR */
-               if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
-                   saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
-#endif
                        PF_ACPY(&pd->baddr, daddr, af);
                        switch (pd->proto) {
                        case IPPROTO_TCP:
-#ifndef NO_APPLE_EXTENSIONS
                                pf_change_ap(direction, pd->mp, daddr,
                                    &th->th_dport, pd->ip_sum, &th->th_sum,
                                    &pd->naddr, nxport.port, 0, af);
                                dxport.port = th->th_dport;
-#else
-                               pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
-                                   &th->th_sum, &pd->naddr, nport, 0, af);
-                               dport = th->th_dport;
-#endif
                                rewrite++;
                                break;
                        case IPPROTO_UDP:
-#ifndef NO_APPLE_EXTENSIONS
                                pf_change_ap(direction, pd->mp, daddr,
                                    &pd->hdr.udp->uh_dport, pd->ip_sum,
                                    &pd->hdr.udp->uh_sum, &pd->naddr,
                                    nxport.port, 1, af);
                                dxport.port = pd->hdr.udp->uh_dport;
-#else
-                               pf_change_ap(direction, daddr,
-                                   &pd->hdr.udp->uh_dport,
-                                   pd->ip_sum, &pd->hdr.udp->uh_sum,
-                                   &pd->naddr, nport, 1, af);
-                               dport = pd->hdr.udp->uh_dport;
-#endif
                                rewrite++;
                                break;
 #if INET
                        case IPPROTO_ICMP:
-                               pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
-                                   pd->naddr.v4.s_addr, 0);
+                               if (pd->af == AF_INET) {
+                                       pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
+                                           pd->naddr.v4.s_addr, 0);
+                               }
                                break;
 #endif /* INET */
 #if INET6
                        case IPPROTO_ICMPV6:
-                               pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
-                                   &pd->naddr, 0);
-                               rewrite++;
+                               if (pd->af == AF_INET6) {
+                                       pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
+                                           &pd->naddr, 0);
+                                       rewrite++;
+                               }
                                break;
 #endif /* INET6 */
-#ifndef NO_APPLE_EXTENSIONS
                        case IPPROTO_GRE:
                                if (pd->proto_variant == PF_GRE_PPTP_VARIANT)
                                        grev1->call_id = nxport.call_id;
@@ -4671,7 +4271,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #endif /* INET6 */
                                }
                                break;
-#endif
                        default:
                                switch (af) {
 #if INET
@@ -4695,10 +4294,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                }
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        if (nr && nr->tag > 0)
                tag = nr->tag;
-#endif
 
        while (r != NULL) {
                r->evaluations++;
@@ -4714,33 +4311,23 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                    r->src.neg, kif))
                        r = r->skip[PF_SKIP_SRC_ADDR].ptr;
                /* tcp/udp only. port_op always 0 in other cases */
-#ifndef NO_APPLE_EXTENSIONS
                else if (r->proto == pd->proto &&
                    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
                    r->src.xport.range.op &&
                    !pf_match_port(r->src.xport.range.op,
                    r->src.xport.range.port[0], r->src.xport.range.port[1],
                    th->th_sport))
-#else
-               else if (r->src.port_op && !pf_match_port(r->src.port_op,
-                   r->src.port[0], r->src.port[1], th->th_sport))
-#endif
                        r = r->skip[PF_SKIP_SRC_PORT].ptr;
                else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
                    r->dst.neg, NULL))
                        r = r->skip[PF_SKIP_DST_ADDR].ptr;
                /* tcp/udp only. port_op always 0 in other cases */
-#ifndef NO_APPLE_EXTENSIONS
                else if (r->proto == pd->proto &&
                    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
                    r->dst.xport.range.op &&
                    !pf_match_port(r->dst.xport.range.op,
                    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
                    th->th_dport))
-#else
-               else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
-                   r->dst.port[0], r->dst.port[1], th->th_dport))
-#endif
                        r = r->skip[PF_SKIP_DST_PORT].ptr;
                /* icmp only. type always 0 in other cases */
                else if (r->type && r->type != icmptype + 1)
@@ -4748,7 +4335,14 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                /* icmp only. type always 0 in other cases */
                else if (r->code && r->code != icmpcode + 1)
                        r = TAILQ_NEXT(r, entries);
-               else if (r->tos && !(r->tos == pd->tos))
+               else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
+                   !(r->tos & pd->tos))
+                       r = TAILQ_NEXT(r, entries);
+               else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
+                   !(r->tos & (pd->tos & DSCP_MASK)))
+                       r = TAILQ_NEXT(r, entries);
+               else if ((r->rule_flag & PFRULE_SC) && r->tos &&
+                   ((r->tos & SCIDX_MASK) != pd->sc))
                        r = TAILQ_NEXT(r, entries);
                else if (r->rule_flag & PFRULE_FRAGMENT)
                        r = TAILQ_NEXT(r, entries);
@@ -4804,7 +4398,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
        REASON_SET(&reason, PFRES_MATCH);
 
        if (r->log || (nr != NULL && nr->log)) {
-#ifndef NO_APPLE_EXTENSIONS
                if (rewrite > 0) {
                        if (rewrite < off + hdrlen)
                                rewrite = off + hdrlen;
@@ -4817,10 +4410,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 
                        m_copyback(m, off, hdrlen, pd->hdr.any);
                }
-#else
-               if (rewrite)
-                       m_copyback(m, off, hdrlen, pd->hdr.any);
-#endif
                PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
                    a, ruleset, pd);
        }
@@ -4834,34 +4423,19 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        if (direction == PF_OUT) {
                                switch (pd->proto) {
                                case IPPROTO_TCP:
-#ifndef NO_APPLE_EXTENSIONS
                                        pf_change_ap(direction, pd->mp, saddr,
                                            &th->th_sport, pd->ip_sum,
                                            &th->th_sum, &pd->baddr,
                                            bxport.port, 0, af);
                                        sxport.port = th->th_sport;
-#else
-                                       pf_change_ap(saddr, &th->th_sport,
-                                           pd->ip_sum, &th->th_sum,
-                                           &pd->baddr, bport, 0, af);
-                                       sport = th->th_sport;
-#endif
                                        rewrite++;
                                        break;
                                case IPPROTO_UDP:
-#ifndef NO_APPLE_EXTENSIONS
                                        pf_change_ap(direction, pd->mp, saddr,
                                            &pd->hdr.udp->uh_sport, pd->ip_sum,
                                            &pd->hdr.udp->uh_sum, &pd->baddr,
                                            bxport.port, 1, af);
                                        sxport.port = pd->hdr.udp->uh_sport;
-#else
-                                       pf_change_ap(saddr,
-                                           &pd->hdr.udp->uh_sport, pd->ip_sum,
-                                           &pd->hdr.udp->uh_sum, &pd->baddr,
-                                           bport, 1, af);
-                                       sport = pd->hdr.udp->uh_sport;
-#endif
                                        rewrite++;
                                        break;
                                case IPPROTO_ICMP:
@@ -4870,7 +4444,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #endif
                                        /* nothing! */
                                        break;
-#ifndef NO_APPLE_EXTENSIONS
                                case IPPROTO_GRE:
                                        PF_ACPY(&pd->baddr, saddr, af);
                                        ++rewrite;
@@ -4908,7 +4481,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #endif /* INET6 */
                                        }
                                        break;
-#endif
                                default:
                                        switch (af) {
                                        case AF_INET:
@@ -4924,34 +4496,19 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        } else {
                                switch (pd->proto) {
                                case IPPROTO_TCP:
-#ifndef NO_APPLE_EXTENSIONS
                                        pf_change_ap(direction, pd->mp, daddr,
                                            &th->th_dport, pd->ip_sum,
                                            &th->th_sum, &pd->baddr,
                                            bxport.port, 0, af);
                                        dxport.port = th->th_dport;
-#else
-                                       pf_change_ap(daddr, &th->th_dport,
-                                           pd->ip_sum, &th->th_sum,
-                                           &pd->baddr, bport, 0, af);
-                                       dport = th->th_dport;
-#endif
                                        rewrite++;
                                        break;
                                case IPPROTO_UDP:
-#ifndef NO_APPLE_EXTENSIONS
                                        pf_change_ap(direction, pd->mp, daddr,
                                            &pd->hdr.udp->uh_dport, pd->ip_sum,
                                            &pd->hdr.udp->uh_sum, &pd->baddr,
                                            bxport.port, 1, af);
                                        dxport.port = pd->hdr.udp->uh_dport;
-#else
-                                       pf_change_ap(daddr,
-                                           &pd->hdr.udp->uh_dport, pd->ip_sum,
-                                           &pd->hdr.udp->uh_sum, &pd->baddr,
-                                           bport, 1, af);
-                                       dport = pd->hdr.udp->uh_dport;
-#endif
                                        rewrite++;
                                        break;
                                case IPPROTO_ICMP:
@@ -4960,7 +4517,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #endif
                                        /* nothing! */
                                        break;
-#ifndef NO_APPLE_EXTENSIONS
                                case IPPROTO_GRE:
                                        if (pd->proto_variant ==
                                            PF_GRE_PPTP_VARIANT)
@@ -4999,7 +4555,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #endif /* INET6 */
                                        }
                                        break;
-#endif
                                default:
                                        switch (af) {
                                        case AF_INET:
@@ -5054,16 +4609,12 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                                    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
                        }
                } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
-#ifndef NO_APPLE_EXTENSIONS
                    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
-#endif
                    r->return_icmp)
                        pf_send_icmp(m, r->return_icmp >> 8,
                            r->return_icmp & 255, af, r);
                else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
-#ifndef NO_APPLE_EXTENSIONS
                    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
-#endif
                    r->return_icmp6)
                        pf_send_icmp(m, r->return_icmp6 >> 8,
                            r->return_icmp6 & 255, af, r);
@@ -5072,7 +4623,91 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
        if (r->action == PF_DROP)
                return (PF_DROP);
 
-       if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+       /* prepare state key, for flowhash and/or the state (if created) */
+       bzero(&psk, sizeof (psk));
+       psk.proto = pd->proto;
+       psk.direction = direction;
+       psk.af = af;
+       if (pd->proto == IPPROTO_UDP) {
+               if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
+                   ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
+                       psk.proto_variant = PF_EXTFILTER_APD;
+               } else {
+                       psk.proto_variant = nr ? nr->extfilter : r->extfilter;
+                       if (psk.proto_variant < PF_EXTFILTER_APD)
+                               psk.proto_variant = PF_EXTFILTER_APD;
+               }
+       } else if (pd->proto == IPPROTO_GRE) {
+               psk.proto_variant = pd->proto_variant;
+       }
+       if (direction == PF_OUT) {
+               PF_ACPY(&psk.gwy.addr, saddr, af);
+               PF_ACPY(&psk.ext.addr, daddr, af);
+               switch (pd->proto) {
+               case IPPROTO_UDP:
+                       psk.gwy.xport = sxport;
+                       psk.ext.xport = dxport;
+                       break;
+               case IPPROTO_ESP:
+                       psk.gwy.xport.spi = 0;
+                       psk.ext.xport.spi = pd->hdr.esp->spi;
+                       break;
+               case IPPROTO_ICMP:
+#if INET6
+               case IPPROTO_ICMPV6:
+#endif
+                       psk.gwy.xport.port = nxport.port;
+                       psk.ext.xport.spi = 0;
+                       break;
+               default:
+                       psk.gwy.xport = sxport;
+                       psk.ext.xport = dxport;
+                       break;
+               }
+               if (nr != NULL) {
+                       PF_ACPY(&psk.lan.addr, &pd->baddr, af);
+                       psk.lan.xport = bxport;
+               } else {
+                       PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
+                       psk.lan.xport = psk.gwy.xport;
+               }
+       } else {
+               PF_ACPY(&psk.lan.addr, daddr, af);
+               PF_ACPY(&psk.ext.addr, saddr, af);
+               switch (pd->proto) {
+               case IPPROTO_ICMP:
+#if INET6
+               case IPPROTO_ICMPV6:
+#endif
+                       psk.lan.xport = nxport;
+                       psk.ext.xport.spi = 0;
+                       break;
+               case IPPROTO_ESP:
+                       psk.ext.xport.spi = 0;
+                       psk.lan.xport.spi = pd->hdr.esp->spi;
+                       break;
+               default:
+                       psk.lan.xport = dxport;
+                       psk.ext.xport = sxport;
+                       break;
+               }
+               if (nr != NULL) {
+                       PF_ACPY(&psk.gwy.addr, &pd->baddr, af);
+                       psk.gwy.xport = bxport;
+               } else {
+                       PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
+                       psk.gwy.xport = psk.lan.xport;
+               }
+       }
+       if (pd->flowhash != 0) {
+               /* flowhash was already computed by upper layers */
+               psk.flowhash = pd->flowhash;
+       } else {
+               psk.flowhash = pf_calc_state_key_flowhash(&psk);
+               pd->flowhash = psk.flowhash;
+       }
+
+       if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, pd)) {
                REASON_SET(&reason, PFRES_MEMORY);
                return (PF_DROP);
        }
@@ -5083,7 +4718,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                struct pf_state *s = NULL;
                struct pf_state_key *sk = NULL;
                struct pf_src_node *sn = NULL;
-#ifndef NO_APPLE_EXTENSIONS
                struct pf_ike_hdr ike;
 
                if (pd->proto == IPPROTO_UDP) {
@@ -5124,7 +4758,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                                goto cleanup;
                        }
                }
-#endif
 
                /* check maximums */
                if (r->max_states && (r->states >= r->max_states)) {
@@ -5142,9 +4775,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                /* src node for translation rule */
                if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
                    ((direction == PF_OUT &&
-#ifndef NO_APPLE_EXTENSIONS
                    nr->action != PF_RDR &&
-#endif
                    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
                    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
                        REASON_SET(&reason, PFRES_SRCLIMIT);
@@ -5168,19 +4799,15 @@ cleanup:
                                pool_put(&pf_src_tree_pl, nsn);
                        }
                        if (sk != NULL) {
-#ifndef NO_APPLE_EXTENSIONS
                                if (sk->app_state)
                                        pool_put(&pf_app_state_pl,
                                            sk->app_state);
-#endif
                                pool_put(&pf_state_key_pl, sk);
                        }
                        return (PF_DROP);
                }
                bzero(s, sizeof (*s));
-#ifndef NO_APPLE_EXTENSIONS
                TAILQ_INIT(&s->unlink_hooks);
-#endif
                s->rule.ptr = r;
                s->nat_rule.ptr = nr;
                s->anchor.ptr = a;
@@ -5236,7 +4863,6 @@ cleanup:
 #endif
                        s->timeout = PFTM_ICMP_FIRST_PACKET;
                        break;
-#ifndef NO_APPLE_EXTENSIONS
                case IPPROTO_GRE:
                        s->src.state = PFGRE1S_INITIATING;
                        s->dst.state = PFGRE1S_NO_TRAFFIC;
@@ -5247,7 +4873,6 @@ cleanup:
                        s->dst.state = PFESPS_NO_TRAFFIC;
                        s->timeout = PFTM_ESP_FIRST_PACKET;
                        break;
-#endif
                default:
                        s->src.state = PFOTHERS_SINGLE;
                        s->dst.state = PFOTHERS_NO_TRAFFIC;
@@ -5293,136 +4918,14 @@ cleanup:
                        }
                }
 
-               if ((sk = pf_alloc_state_key(s)) == NULL) {
+               /* allocate state key and import values from psk */
+               if ((sk = pf_alloc_state_key(s, &psk)) == NULL) {
                        REASON_SET(&reason, PFRES_MEMORY);
                        goto cleanup;
                }
 
-               sk->proto = pd->proto;
-               sk->direction = direction;
-               sk->af = af;
-#ifndef NO_APPLE_EXTENSIONS
-               if (pd->proto == IPPROTO_UDP) {
-                       if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
-                           ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
-                               sk->proto_variant = PF_EXTFILTER_APD;
-                       } else {
-                               sk->proto_variant = nr ? nr->extfilter :
-                                   r->extfilter;
-                               if (sk->proto_variant < PF_EXTFILTER_APD)
-                                       sk->proto_variant = PF_EXTFILTER_APD;
-                       }
-               } else if (pd->proto == IPPROTO_GRE) {
-                       sk->proto_variant = pd->proto_variant;
-               }
-#endif
-               if (direction == PF_OUT) {
-                       PF_ACPY(&sk->gwy.addr, saddr, af);
-                       PF_ACPY(&sk->ext.addr, daddr, af);
-                       switch (pd->proto) {
-#ifndef NO_APPLE_EXTENSIONS
-                       case IPPROTO_UDP:
-                               sk->gwy.xport = sxport;
-                               sk->ext.xport = dxport;
-                               break;
-                       case IPPROTO_ESP:
-                               sk->gwy.xport.spi = 0;
-                               sk->ext.xport.spi = pd->hdr.esp->spi;
-                               break;
-#endif
-                       case IPPROTO_ICMP:
-#if INET6
-                       case IPPROTO_ICMPV6:
-#endif
-#ifndef NO_APPLE_EXTENSIONS
-                               sk->gwy.xport.port = nxport.port;
-                               sk->ext.xport.spi = 0;
-#else
-                               sk->gwy.port = nport;
-                               sk->ext.port = 0;
-#endif
-                               break;
-                       default:
-#ifndef NO_APPLE_EXTENSIONS
-                               sk->gwy.xport = sxport;
-                               sk->ext.xport = dxport;
-                               break;
-#else
-                               sk->gwy.port = sport;
-                               sk->ext.port = dport;
-#endif
-                       }
-#ifndef NO_APPLE_EXTENSIONS
-                       if (nr != NULL) {
-                               PF_ACPY(&sk->lan.addr, &pd->baddr, af);
-                               sk->lan.xport = bxport;
-                       } else {
-                               PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af);
-                               sk->lan.xport = sk->gwy.xport;
-                       }
-#else
-                       if (nr != NULL) {
-                               PF_ACPY(&sk->lan.addr, &pd->baddr, af);
-                               sk->lan.port = bport;
-                       } else {
-                               PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af);
-                               sk->lan.port = sk->gwy.port;
-                       }
-#endif
-               } else {
-                       PF_ACPY(&sk->lan.addr, daddr, af);
-                       PF_ACPY(&sk->ext.addr, saddr, af);
-                       switch (pd->proto) {
-                       case IPPROTO_ICMP:
-#if INET6
-                       case IPPROTO_ICMPV6:
-#endif
-#ifndef NO_APPLE_EXTENSIONS
-                               sk->lan.xport = nxport;
-                               sk->ext.xport.spi = 0;
-#else
-                               sk->lan.port = nport;
-                               sk->ext.port = 0;
-#endif
-                               break;
-#ifndef NO_APPLE_EXTENSIONS
-                       case IPPROTO_ESP:
-                               sk->ext.xport.spi = 0;
-                               sk->lan.xport.spi = pd->hdr.esp->spi;
-                               break;
-                       default:
-                               sk->lan.xport = dxport;
-                               sk->ext.xport = sxport;
-                               break;
-#else
-                       default:
-                               sk->lan.port = dport;
-                               sk->ext.port = sport;
-#endif
-                       }
-#ifndef NO_APPLE_EXTENSIONS
-                       if (nr != NULL) {
-                               PF_ACPY(&sk->gwy.addr, &pd->baddr, af);
-                               sk->gwy.xport = bxport;
-                       } else {
-                               PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af);
-                               sk->gwy.xport = sk->lan.xport;
-                       }
-               }
-#else
-                       if (nr != NULL) {
-                               PF_ACPY(&sk->gwy.addr, &pd->baddr, af);
-                               sk->gwy.port = bport;
-                       } else {
-                               PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af);
-                               sk->gwy.port = sk->lan.port;
-                       }
-               }
-#endif
-
                pf_set_rt_ifp(s, saddr);        /* needs s->state_key set */
 
-#ifndef NO_APPLE_EXTENSIONS
                m = pd->mp;
 
                if (sk->app_state == 0) {
@@ -5484,7 +4987,6 @@ cleanup:
                                break;
                        }
                }
-#endif
 
                if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
                        if (pd->proto == IPPROTO_TCP)
@@ -5505,7 +5007,6 @@ cleanup:
                    r->keep_state == PF_STATE_SYNPROXY) {
                        s->src.state = PF_TCPS_PROXY_SRC;
                        if (nr != NULL) {
-#ifndef NO_APPLE_EXTENSIONS
                                if (direction == PF_OUT) {
                                        pf_change_ap(direction, pd->mp, saddr,
                                            &th->th_sport, pd->ip_sum,
@@ -5519,19 +5020,6 @@ cleanup:
                                            bxport.port, 0, af);
                                        sxport.port = th->th_dport;
                                }
-#else
-                               if (direction == PF_OUT) {
-                                       pf_change_ap(saddr, &th->th_sport,
-                                           pd->ip_sum, &th->th_sum, &pd->baddr,
-                                           bport, 0, af);
-                                       sport = th->th_sport;
-                               } else {
-                                       pf_change_ap(daddr, &th->th_dport,
-                                           pd->ip_sum, &th->th_sum, &pd->baddr,
-                                           bport, 0, af);
-                                       sport = th->th_dport;
-                               }
-#endif
                        }
                        s->src.seqhi = htonl(random());
                        /* Find mss option */
@@ -5546,7 +5034,6 @@ cleanup:
                        return (PF_SYNPROXY_DROP);
                }
 
-#ifndef NO_APPLE_EXTENSIONS
                if (sk->app_state && sk->app_state->handler) {
                        int offx = off;
 
@@ -5572,11 +5059,9 @@ cleanup:
                                m = pd->mp;
                        }
                }
-#endif
        }
 
        /* copy back packet headers if we performed NAT operations */
-#ifndef NO_APPLE_EXTENSIONS
        if (rewrite) {
                if (rewrite < off + hdrlen)
                        rewrite = off + hdrlen;
@@ -5589,14 +5074,282 @@ cleanup:
 
                m_copyback(m, off, hdrlen, pd->hdr.any);
        }
-#else
-       if (rewrite)
-               m_copyback(m, off, hdrlen, pd->hdr.any);
-#endif
 
        return (PF_PASS);
 }
 
+#if DUMMYNET
+/*
+ * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm" 
+ * remains unchanged, meaning the packet did not match a dummynet rule.
+ * when the packet does match a dummynet rule, pf_test_dummynet() returns 
+ * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned 
+ * out by dummynet.
+ */
+static int
+pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif, 
+    struct mbuf **m0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
+{
+       struct mbuf             *m = *m0;
+       struct pf_rule          *am = NULL;
+       struct pf_ruleset       *rsm = NULL;
+       struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
+       sa_family_t              af = pd->af;
+       struct pf_rule          *r, *a = NULL;
+       struct pf_ruleset       *ruleset = NULL;
+       struct tcphdr           *th = pd->hdr.tcp;
+       u_short                  reason;
+       int                      hdrlen = 0;
+       int                      tag = -1;
+       unsigned int             rtableid = IFSCOPE_NONE;
+       int                      asd = 0;
+       int                      match = 0;
+       u_int8_t                 icmptype = 0, icmpcode = 0;
+       union pf_state_xport    nxport, sxport, dxport;
+       struct ip_fw_args       dnflow;
+       struct pf_rule          *prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
+       int                     found_prev_rule = (prev_matching_rule) ? 0 : 1;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+       if (!DUMMYNET_LOADED)
+               return (PF_PASS);
+       
+       if (TAILQ_EMPTY(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr)) {
+               return (PF_PASS);
+       }
+       bzero(&dnflow, sizeof(dnflow));
+
+       hdrlen = 0;
+       sxport.spi = 0;
+       dxport.spi = 0;
+       nxport.spi = 0;
+
+       /* Fragments don't gave protocol headers */
+       if (!(pd->flags & PFDESC_IP_FRAG))      
+               switch (pd->proto) {
+               case IPPROTO_TCP:
+                       dnflow.fwa_id.flags = pd->hdr.tcp->th_flags;
+                       dnflow.fwa_id.dst_port = pd->hdr.tcp->th_dport;
+                       dnflow.fwa_id.src_port = pd->hdr.tcp->th_sport;
+                       sxport.port = pd->hdr.tcp->th_sport;
+                       dxport.port = pd->hdr.tcp->th_dport;
+                       hdrlen = sizeof (*th);
+                       break;
+               case IPPROTO_UDP:
+                       dnflow.fwa_id.dst_port = pd->hdr.udp->uh_dport;
+                       dnflow.fwa_id.src_port = pd->hdr.udp->uh_sport;
+                       sxport.port = pd->hdr.udp->uh_sport;
+                       dxport.port = pd->hdr.udp->uh_dport;
+                       hdrlen = sizeof (*pd->hdr.udp);
+                       break;
+#if INET
+               case IPPROTO_ICMP:
+                       if (pd->af != AF_INET)
+                               break;
+                       sxport.port = dxport.port = pd->hdr.icmp->icmp_id;
+                       hdrlen = ICMP_MINLEN;
+                       icmptype = pd->hdr.icmp->icmp_type;
+                       icmpcode = pd->hdr.icmp->icmp_code;
+                       break;
+#endif /* INET */
+#if INET6
+               case IPPROTO_ICMPV6:
+                       if (pd->af != AF_INET6)
+                               break;
+                       sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id;
+                       hdrlen = sizeof (*pd->hdr.icmp6);
+                       icmptype = pd->hdr.icmp6->icmp6_type;
+                       icmpcode = pd->hdr.icmp6->icmp6_code;
+                       break;
+#endif /* INET6 */
+               case IPPROTO_GRE:
+                       if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
+                               sxport.call_id = dxport.call_id =
+                                   pd->hdr.grev1->call_id;
+                               hdrlen = sizeof (*pd->hdr.grev1);
+                       }
+                       break;
+               case IPPROTO_ESP:
+                       sxport.spi = 0;
+                       dxport.spi = pd->hdr.esp->spi;
+                       hdrlen = sizeof (*pd->hdr.esp);
+                       break;
+               }
+
+       r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
+
+       while (r != NULL) {
+               r->evaluations++;
+               if (pfi_kif_match(r->kif, kif) == r->ifnot)
+                       r = r->skip[PF_SKIP_IFP].ptr;
+               else if (r->direction && r->direction != direction)
+                       r = r->skip[PF_SKIP_DIR].ptr;
+               else if (r->af && r->af != af)
+                       r = r->skip[PF_SKIP_AF].ptr;
+               else if (r->proto && r->proto != pd->proto)
+                       r = r->skip[PF_SKIP_PROTO].ptr;
+               else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+                   r->src.neg, kif))
+                       r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+               /* tcp/udp only. port_op always 0 in other cases */
+               else if (r->proto == pd->proto && 
+                   (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
+                   ((pd->flags & PFDESC_IP_FRAG) ||
+                   ((r->src.xport.range.op &&
+                   !pf_match_port(r->src.xport.range.op,
+                   r->src.xport.range.port[0], r->src.xport.range.port[1],
+                   th->th_sport)))))
+                       r = r->skip[PF_SKIP_SRC_PORT].ptr;
+               else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+                   r->dst.neg, NULL))
+                       r = r->skip[PF_SKIP_DST_ADDR].ptr;
+               /* tcp/udp only. port_op always 0 in other cases */
+               else if (r->proto == pd->proto &&
+                   (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
+                   r->dst.xport.range.op &&
+                   ((pd->flags & PFDESC_IP_FRAG) ||
+                   !pf_match_port(r->dst.xport.range.op,
+                   r->dst.xport.range.port[0], r->dst.xport.range.port[1],
+                   th->th_dport)))
+                       r = r->skip[PF_SKIP_DST_PORT].ptr;
+               /* icmp only. type always 0 in other cases */
+               else if (r->type && 
+                       ((pd->flags & PFDESC_IP_FRAG) ||
+                       r->type != icmptype + 1))
+                       r = TAILQ_NEXT(r, entries);
+               /* icmp only. type always 0 in other cases */
+               else if (r->code && 
+                       ((pd->flags & PFDESC_IP_FRAG) ||
+                       r->code != icmpcode + 1))
+                       r = TAILQ_NEXT(r, entries);
+               else if (r->tos && !(r->tos == pd->tos))
+                       r = TAILQ_NEXT(r, entries);
+               else if (r->rule_flag & PFRULE_FRAGMENT)
+                       r = TAILQ_NEXT(r, entries);
+               else if (pd->proto == IPPROTO_TCP &&
+                   ((pd->flags & PFDESC_IP_FRAG) ||
+                   (r->flagset & th->th_flags) != r->flags))
+                       r = TAILQ_NEXT(r, entries);
+               else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+                       r = TAILQ_NEXT(r, entries);
+               else {
+                       /* 
+                        * Need to go past the previous dummynet matching rule  
+                        */
+                       if (r->anchor == NULL) {
+                               if (found_prev_rule) {
+                                       if (r->tag)
+                                               tag = r->tag;
+                                       if (PF_RTABLEID_IS_VALID(r->rtableid))
+                                               rtableid = r->rtableid;
+                                       match = 1;
+                                       *rm = r;
+                                       am = a;
+                                       rsm = ruleset;
+                                       if ((*rm)->quick)
+                                               break;
+                               } else if (r == prev_matching_rule) {
+                                       found_prev_rule = 1;
+                               }
+                               r = TAILQ_NEXT(r, entries);
+                       } else {
+                               pf_step_into_anchor(&asd, &ruleset,
+                                   PF_RULESET_DUMMYNET, &r, &a, &match);
+                       }
+               }
+               if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+                   PF_RULESET_DUMMYNET, &r, &a, &match))
+                       break;
+       }
+       r = *rm;
+       a = am;
+       ruleset = rsm;
+
+       if (!match)
+               return (PF_PASS);
+
+       REASON_SET(&reason, PFRES_DUMMYNET);
+
+       if (r->log) {
+               PFLOG_PACKET(kif, h, m, af, direction, reason, r,
+                   a, ruleset, pd);
+       }
+
+       if (r->action == PF_NODUMMYNET) {
+               int dirndx = (direction == PF_OUT);
+               
+               r->packets[dirndx]++;
+               r->bytes[dirndx] += pd->tot_len;
+
+               return (PF_PASS);
+       }
+       if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, pd)) {
+               REASON_SET(&reason, PFRES_MEMORY);
+
+               return (PF_DROP);
+       }
+
+       if (r->dnpipe && ip_dn_io_ptr != NULL) {
+               int dirndx = (direction == PF_OUT);
+               
+               r->packets[dirndx]++;
+               r->bytes[dirndx] += pd->tot_len;
+               
+               dnflow.fwa_cookie = r->dnpipe;
+               dnflow.fwa_pf_rule = r;
+               dnflow.fwa_id.addr_type = (af == AF_INET) ? 4 : 6;
+               dnflow.fwa_id.proto = pd->proto;
+               dnflow.fwa_flags = r->dntype;
+               
+               if (fwa != NULL) {
+                       dnflow.fwa_oif = fwa->fwa_oif;
+                       dnflow.fwa_oflags = fwa->fwa_oflags;
+                       /*
+                        * Note that fwa_ro, fwa_dst and fwa_ipoa are 
+                        * actually in a union so the following does work  
+                        * for both IPv4 and IPv6
+                        */
+                       dnflow.fwa_ro = fwa->fwa_ro;
+                       dnflow.fwa_dst = fwa->fwa_dst;
+                       dnflow.fwa_ipoa = fwa->fwa_ipoa;
+                       dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
+                       dnflow.fwa_origifp = fwa->fwa_origifp;
+                       dnflow.fwa_mtu = fwa->fwa_mtu;
+                       dnflow.fwa_alwaysfrag = fwa->fwa_alwaysfrag;
+                       dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
+                       dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
+               }
+               
+               if (af == AF_INET) {
+                       struct ip *iphdr = mtod(m, struct ip *);
+                       NTOHS(iphdr->ip_len);
+                       NTOHS(iphdr->ip_off);
+               }
+               /* 
+                * Don't need to unlock pf_lock as NET_THREAD_HELD_PF 
+                * allows for recursive behavior
+                */
+               ip_dn_io_ptr(m,
+                       dnflow.fwa_cookie,
+                       af == AF_INET ? 
+                               direction == PF_IN ? DN_TO_IP_IN : DN_TO_IP_OUT :
+                               direction == PF_IN ? DN_TO_IP6_IN : DN_TO_IP6_OUT,
+                       &dnflow, DN_CLIENT_PF);
+               
+               /*
+                * The packet is siphoned out by dummynet so return a NULL 
+                * mbuf so the caller can still return success.
+                */
+               *m0 = NULL;
+                
+               return (PF_PASS);
+       }
+
+       return (PF_PASS);
+}
+#endif /* DUMMYNET */
+
 static int
 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
@@ -5628,11 +5381,17 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
                else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
                    r->dst.neg, NULL))
                        r = r->skip[PF_SKIP_DST_ADDR].ptr;
-               else if (r->tos && !(r->tos == pd->tos))
+                else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
+                   !(r->tos & pd->tos))
+                       r = TAILQ_NEXT(r, entries);
+                else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
+                   !(r->tos & (pd->tos & DSCP_MASK)))
+                       r = TAILQ_NEXT(r, entries);
+                else if ((r->rule_flag & PFRULE_SC) && r->tos &&
+                   ((r->tos & SCIDX_MASK) != pd->sc))
                        r = TAILQ_NEXT(r, entries);
                else if (r->os_fingerprint != PF_OSFP_ANY)
                        r = TAILQ_NEXT(r, entries);
-#ifndef NO_APPLE_EXTENSIONS
                else if (pd->proto == IPPROTO_UDP &&
                    (r->src.xport.range.op || r->dst.xport.range.op))
                        r = TAILQ_NEXT(r, entries);
@@ -5640,14 +5399,6 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
                    (r->src.xport.range.op || r->dst.xport.range.op ||
                    r->flagset))
                        r = TAILQ_NEXT(r, entries);
-#else
-               else if (pd->proto == IPPROTO_UDP &&
-                   (r->src.port_op || r->dst.port_op))
-                       r = TAILQ_NEXT(r, entries);
-               else if (pd->proto == IPPROTO_TCP &&
-                   (r->src.port_op || r->dst.port_op || r->flagset))
-                       r = TAILQ_NEXT(r, entries);
-#endif
                else if ((pd->proto == IPPROTO_ICMP ||
                    pd->proto == IPPROTO_ICMPV6) &&
                    (r->type || r->code))
@@ -5686,7 +5437,7 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
        if (r->action != PF_PASS)
                return (PF_DROP);
 
-       if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) {
+       if (pf_tag_packet(m, pd->pf_mtag, tag, -1, NULL)) {
                REASON_SET(&reason, PFRES_MEMORY);
                return (PF_DROP);
        }
@@ -5694,7 +5445,6 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
        return (PF_PASS);
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static void
 pf_pptp_handler(struct pf_state *s, int direction, int off,
     struct pf_pdesc *pd, struct pfi_kif *kif)
@@ -5763,7 +5513,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                        return;
                }
 
-               gsk = pf_alloc_state_key(gs);
+               gsk = pf_alloc_state_key(gs, NULL);
                if (!gsk) {
                        pool_put(&pf_app_state_pl, gas);
                        pool_put(&pf_state_pl, gs);
@@ -5780,6 +5530,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                gsk->lan.xport.call_id = 0;
                gsk->gwy.xport.call_id = 0;
                gsk->ext.xport.call_id = 0;
+               gsk->flowhash = pf_calc_state_key_flowhash(gsk);
                memset(gas, 0, sizeof (*gas));
                gas->u.grev1.pptp_state = s;
                STATE_INC_COUNTERS(gs);
@@ -6038,7 +5789,6 @@ pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
        int64_t d = a->u.ike.cookie - b->u.ike.cookie;
        return ((d > 0) ? 1 : ((d < 0) ? -1 : 0));
 }
-#endif
 
 static int
 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
@@ -6055,31 +5805,19 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
        int                      copyback = 0;
        struct pf_state_peer    *src, *dst;
 
-#ifndef NO_APPLE_EXTENSIONS
        key.app_state = 0;
-#endif
        key.af = pd->af;
        key.proto = IPPROTO_TCP;
        if (direction == PF_IN) {
                PF_ACPY(&key.ext.addr, pd->src, key.af);
                PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                key.ext.xport.port = th->th_sport;
                key.gwy.xport.port = th->th_dport;
-#else
-               key.ext.port = th->th_sport;
-               key.gwy.port = th->th_dport;
-#endif
        } else {
                PF_ACPY(&key.lan.addr, pd->src, key.af);
                PF_ACPY(&key.ext.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                key.lan.xport.port = th->th_sport;
                key.ext.xport.port = th->th_dport;
-#else
-               key.lan.port = th->th_sport;
-               key.ext.port = th->th_dport;
-#endif
        }
 
        STATE_LOOKUP();
@@ -6142,11 +5880,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if ((*state)->dst.seqhi == 1)
                                (*state)->dst.seqhi = htonl(random());
                        pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
-#ifndef NO_APPLE_EXTENSIONS
                            &pdst->addr, psrc->xport.port, pdst->xport.port,
-#else
-                           &pdst->addr, psrc->port, pdst->port,
-#endif
                            (*state)->dst.seqhi, 0, TH_SYN, 0,
                            (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
                        REASON_SET(reason, PFRES_SYNPROXY);
@@ -6165,11 +5899,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            TH_ACK, (*state)->src.max_win, 0, 0, 0,
                            (*state)->tag, NULL, NULL);
                        pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
-#ifndef NO_APPLE_EXTENSIONS
                            &pdst->addr, psrc->xport.port, pdst->xport.port,
-#else
-                           &pdst->addr, psrc->port, pdst->port,
-#endif
                            (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
                            TH_ACK, (*state)->dst.max_win, 0, 0, 1,
                            0, NULL, NULL);
@@ -6259,10 +5989,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                            >> sws;
                                        dws = dst->wscale & PF_WSCALE_MASK;
                                } else {
-#ifndef NO_APPLE_MODIFICATION
                                        /*
-                                        * <rdar://5786370>
-                                        *
                                         * Window scale negotiation has failed,
                                         * therefore we must restore the window
                                         * scale in the state record that we
@@ -6270,17 +5997,12 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                         * pf_test_rule().  Care is required to
                                         * prevent arithmetic overflow from
                                         * zeroing the window when it's
-                                        * truncated down to 16-bits.   --jhw
+                                        * truncated down to 16-bits.
                                         */
                                        u_int32_t max_win = dst->max_win;
                                        max_win <<=
                                            dst->wscale & PF_WSCALE_MASK;
                                        dst->max_win = MIN(0xffff, max_win);
-#else
-                                       /* fixup other window */
-                                       dst->max_win <<= dst->wscale &
-                                           PF_WSCALE_MASK;
-#endif
                                        /* in case of a retrans SYN|ACK */
                                        dst->wscale = 0;
                                }
@@ -6298,16 +6020,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                 * the crappy stack check or if we picked up the connection
                 * after establishment)
                 */
-#ifndef NO_APPLE_MODIFICATIONS
                if (src->seqhi == 1 ||
                    SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
                    src->seqhi))
                        src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
-#else
-               if (src->seqhi == 1 ||
-                   SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
-                       src->seqhi = end + MAX(1, dst->max_win << dws);
-#endif
                if (win > src->max_win)
                        src->max_win = win;
 
@@ -6363,7 +6079,6 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
         * options anyway.
         */
        if (dst->seqdiff && (th->th_off << 2) > (int)sizeof (struct tcphdr)) {
-#ifndef NO_APPLE_EXTENSIONS
                copyback = pf_modulate_sack(m, off, pd, th, dst);
                if (copyback == -1) {
                        REASON_SET(reason, PFRES_MEMORY);
@@ -6371,21 +6086,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                }
 
                m = pd->mp;
-#else
-               if (pf_modulate_sack(m, off, pd, th, dst))
-                       copyback = 1;
-#endif
        }
 
 
 #define MAXACKWINDOW (0xffff + 1500)   /* 1500 is an arbitrary fudge factor */
        if (SEQ_GEQ(src->seqhi, end) &&
            /* Last octet inside other's window space */
-#ifndef NO_APPLE_MODIFICATIONS
            SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
-#else
-           SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
-#endif
            /* Retrans: not more than one window back */
            (ackskew >= -MAXACKWINDOW) &&
            /* Acking not more than one reassembled fragment backwards */
@@ -6401,9 +6108,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            *state, src, dst, &copyback))
                                return (PF_DROP);
 
-#ifndef NO_APPLE_EXTENSIONS
                        m = pd->mp;
-#endif
                }
 
                /* update max window */
@@ -6413,13 +6118,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
-#ifndef NO_APPLE_MODIFICATIONS
                if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
-#else
-               if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
-                       dst->seqhi = ack + MAX((win << sws), 1);
-#endif
 
                /* update states */
                if (th->th_flags & TH_SYN)
@@ -6507,9 +6207,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
                            *state, src, dst, &copyback))
                                return (PF_DROP);
-#ifndef NO_APPLE_EXTENSIONS
                        m = pd->mp;
-#endif
                }
 
                /* update max window */
@@ -6519,13 +6217,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                if (SEQ_GT(end, src->seqlo))
                        src->seqlo = end;
                /* slide the window of what the other end can send */
-#ifndef NO_APPLE_MODIFICATIONS
                if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi))
                        dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
-#else
-               if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
-                       dst->seqhi = ack + MAX((win << sws), 1);
-#endif
 
                /*
                 * Cannot set dst->seqhi here since this could be a shotgunned
@@ -6567,12 +6260,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            "fwd" : "rev");
                        printf("pf: State failure on: %c %c %c %c | %c %c\n",
                            SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
-#ifndef NO_APPLE_MODIFICATIONS
                            SEQ_GEQ(seq,
                            src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
-#else
-                           SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
-#endif
                            ' ': '2',
                            (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
                            (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
@@ -6585,7 +6274,6 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
        /* Any packets which have gotten here are to be passed */
 
-#ifndef NO_APPLE_EXTENSIONS
        if ((*state)->state_key->app_state &&
            (*state)->state_key->app_state->handler) {
                (*state)->state_key->app_state->handler(*state, direction,
@@ -6622,74 +6310,39 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                /* Copyback sequence modulation or stateful scrub changes */
                m_copyback(m, off, sizeof (*th), th);
        }
-#else
-       /* translate source/destination address, if necessary */
-       if (STATE_TRANSLATE((*state)->state_key)) {
-               if (direction == PF_OUT)
-                       pf_change_ap(pd->src, pd->mp, &th->th_sport, pd->ip_sum,
-                           &th->th_sum, &(*state)->state_key->gwy.addr,
-                           (*state)->state_key->gwy.port, 0, pd->af);
-               else
-                       pf_change_ap(pd->dst, pd->mp, &th->th_dport, pd->ip_sum,
-                           &th->th_sum, &(*state)->state_key->lan.addr,
-                           (*state)->state_key->lan.port, 0, pd->af);
-               m_copyback(m, off, sizeof (*th), th);
-       } else if (copyback) {
-               /* Copyback sequence modulation or stateful scrub changes */
-               m_copyback(m, off, sizeof (*th), th);
-       }
-#endif
 
        return (PF_PASS);
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
-#else
-pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
-    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
-#endif
 {
 #pragma unused(h)
        struct pf_state_peer    *src, *dst;
        struct pf_state_key_cmp  key;
        struct udphdr           *uh = pd->hdr.udp;
-#ifndef NO_APPLE_EXTENSIONS
        struct pf_app_state as;
        int dx, action, extfilter;
        key.app_state = 0;
        key.proto_variant = PF_EXTFILTER_APD;
-#endif
 
        key.af = pd->af;
        key.proto = IPPROTO_UDP;
        if (direction == PF_IN) {
                PF_ACPY(&key.ext.addr, pd->src, key.af);
                PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                key.ext.xport.port = uh->uh_sport;
                key.gwy.xport.port = uh->uh_dport;
                dx = PF_IN;
-#else
-               key.ext.port = uh->uh_sport;
-               key.gwy.port = uh->uh_dport;
-#endif
        } else {
                PF_ACPY(&key.lan.addr, pd->src, key.af);
                PF_ACPY(&key.ext.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                key.lan.xport.port = uh->uh_sport;
                key.ext.xport.port = uh->uh_dport;
                dx = PF_OUT;
-#else
-               key.lan.port = uh->uh_sport;
-               key.ext.port = uh->uh_dport;
-#endif
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
            ntohs(uh->uh_dport) == PF_IKE_PORT) {
                struct pf_ike_hdr ike;
@@ -6734,11 +6387,12 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
                *state = pf_find_state(kif, &key, dx);
        }
 
+       if ((*state) != NULL && pd != NULL &&
+               pd->flowhash == 0)
+               pd->flowhash = (*state)->state_key->flowhash;
+
        if (pf_state_lookup_aux(state, kif, direction, &action))
                return (action);
-#else
-       STATE_LOOKUP();
-#endif
 
        if (direction == (*state)->state_key->direction) {
                src = &(*state)->src;
@@ -6761,7 +6415,6 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
        else
                (*state)->timeout = PFTM_UDP_SINGLE;
 
-#ifndef NO_APPLE_EXTENSIONS
        extfilter = (*state)->state_key->proto_variant;
        if (extfilter > PF_EXTFILTER_APD) {
                (*state)->state_key->ext.xport.port = key.ext.xport.port;
@@ -6801,20 +6454,6 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            (*state)->state_key->lan.xport.port, 1, pd->af);
                m_copyback(m, off, sizeof (*uh), uh);
        }
-#else
-       /* translate source/destination address, if necessary */
-       if (STATE_TRANSLATE((*state)->state_key)) {
-               if (direction == PF_OUT)
-                       pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
-                           &uh->uh_sum, &(*state)->state_key->gwy.addr,
-                           (*state)->state_key->gwy.port, 1, pd->af);
-               else
-                       pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
-                           &uh->uh_sum, &(*state)->state_key->lan.addr,
-                           (*state)->state_key->lan.port, 1, pd->af);
-               m_copyback(m, off, sizeof (*uh), uh);
-       }
-#endif
 
        return (PF_PASS);
 }
@@ -6830,10 +6469,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
        int              state_icmp = 0;
        struct pf_state_key_cmp key;
 
-#ifndef NO_APPLE_EXTENSIONS
        struct pf_app_state as;
        key.app_state = 0;
-#endif
 
        switch (pd->proto) {
 #if INET
@@ -6876,23 +6513,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                if (direction == PF_IN) {
                        PF_ACPY(&key.ext.addr, pd->src, key.af);
                        PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                        key.ext.xport.port = 0;
                        key.gwy.xport.port = icmpid;
-#else
-                       key.ext.port = 0;
-                       key.gwy.port = icmpid;
-#endif
                } else {
                        PF_ACPY(&key.lan.addr, pd->src, key.af);
                        PF_ACPY(&key.ext.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                        key.lan.xport.port = icmpid;
                        key.ext.xport.port = 0;
-#else
-                       key.lan.port = icmpid;
-                       key.ext.port = 0;
-#endif
                }
 
                STATE_LOOKUP();
@@ -6909,7 +6536,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                        pf_change_a(&saddr->v4.s_addr,
                                            pd->ip_sum,
                                            (*state)->state_key->gwy.addr.v4.s_addr, 0);
-#ifndef NO_APPLE_EXTENSIONS
                                        pd->hdr.icmp->icmp_cksum =
                                            pf_cksum_fixup(
                                            pd->hdr.icmp->icmp_cksum, icmpid,
@@ -6920,14 +6546,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                            off + ICMP_MINLEN);
                                        if (!m)
                                                return (PF_DROP);
-#else
-                                       pd->hdr.icmp->icmp_cksum =
-                                           pf_cksum_fixup(
-                                           pd->hdr.icmp->icmp_cksum, icmpid,
-                                           (*state)->state_key->gwy.port, 0);
-                                       pd->hdr.icmp->icmp_id =
-                                           (*state)->state_key->gwy.port;
-#endif
                                        m_copyback(m, off, ICMP_MINLEN,
                                            pd->hdr.icmp);
                                        break;
@@ -6937,12 +6555,10 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                        pf_change_a6(saddr,
                                            &pd->hdr.icmp6->icmp6_cksum,
                                            &(*state)->state_key->gwy.addr, 0);
-#ifndef NO_APPLE_EXTENSIONS
                                        m = pf_lazy_makewritable(pd, m,
                                            off + sizeof (struct icmp6_hdr));
                                        if (!m)
                                                return (PF_DROP);
-#endif
                                        m_copyback(m, off,
                                            sizeof (struct icmp6_hdr),
                                            pd->hdr.icmp6);
@@ -6956,7 +6572,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                        pf_change_a(&daddr->v4.s_addr,
                                            pd->ip_sum,
                                            (*state)->state_key->lan.addr.v4.s_addr, 0);
-#ifndef NO_APPLE_EXTENSIONS
                                        pd->hdr.icmp->icmp_cksum =
                                            pf_cksum_fixup(
                                            pd->hdr.icmp->icmp_cksum, icmpid,
@@ -6967,14 +6582,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                            off + ICMP_MINLEN);
                                        if (!m)
                                                return (PF_DROP);
-#else
-                                       pd->hdr.icmp->icmp_cksum =
-                                           pf_cksum_fixup(
-                                           pd->hdr.icmp->icmp_cksum, icmpid,
-                                           (*state)->state_key->lan.port, 0);
-                                       pd->hdr.icmp->icmp_id =
-                                           (*state)->state_key->lan.port;
-#endif
                                        m_copyback(m, off, ICMP_MINLEN,
                                            pd->hdr.icmp);
                                        break;
@@ -6984,12 +6591,10 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                        pf_change_a6(daddr,
                                            &pd->hdr.icmp6->icmp6_cksum,
                                            &(*state)->state_key->lan.addr, 0);
-#ifndef NO_APPLE_EXTENSIONS
                                        m = pf_lazy_makewritable(pd, m,
                                            off + sizeof (struct icmp6_hdr));
                                        if (!m)
                                                return (PF_DROP);
-#endif
                                        m_copyback(m, off,
                                            sizeof (struct icmp6_hdr),
                                            pd->hdr.icmp6);
@@ -7134,23 +6739,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if (direction == PF_IN) {
                                PF_ACPY(&key.ext.addr, pd2.dst, key.af);
                                PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.ext.xport.port = th.th_dport;
                                key.gwy.xport.port = th.th_sport;
-#else
-                               key.ext.port = th.th_dport;
-                               key.gwy.port = th.th_sport;
-#endif
                        } else {
                                PF_ACPY(&key.lan.addr, pd2.dst, key.af);
                                PF_ACPY(&key.ext.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.lan.xport.port = th.th_dport;
                                key.ext.xport.port = th.th_sport;
-#else
-                               key.lan.port = th.th_dport;
-                               key.ext.port = th.th_sport;
-#endif
                        }
 
                        STATE_LOOKUP();
@@ -7177,12 +6772,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        }
 
                        if (!SEQ_GEQ(src->seqhi, seq) ||
-#ifndef NO_APPLE_MODIFICATION
                            !SEQ_GEQ(seq,
                            src->seqlo - ((u_int32_t)dst->max_win << dws))) {
-#else
-                           !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
-#endif
                                if (pf_status.debug >= PF_DEBUG_MISC) {
                                        printf("pf: BAD ICMP %d:%d ",
                                            icmptype, pd->hdr.icmp->icmp_code);
@@ -7201,21 +6792,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &th.th_sport,
                                            daddr, &(*state)->state_key->lan.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->lan.xport.port, NULL,
-#else
-                                           (*state)->state_key->lan.port, NULL,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, pd2.af);
                                } else {
                                        pf_change_icmp(pd2.dst, &th.th_dport,
                                            saddr, &(*state)->state_key->gwy.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->gwy.xport.port, NULL,
-#else
-                                           (*state)->state_key->gwy.port, NULL,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, pd2.af);
                                }
@@ -7223,11 +6806,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        }
 
                        if (copyback) {
-#ifndef NO_APPLE_EXTENSIONS
                                m = pf_lazy_makewritable(pd, m, off2 + 8);
                                if (!m)
                                        return (PF_DROP);
-#endif
                                switch (pd2.af) {
 #if INET
                                case AF_INET:
@@ -7255,9 +6836,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                }
                case IPPROTO_UDP: {
                        struct udphdr           uh;
-#ifndef NO_APPLE_EXTENSIONS
                        int dx, action;
-#endif
                        if (!pf_pull_hdr(m, off2, &uh, sizeof (uh),
                            NULL, reason, pd2.af)) {
                                DPFPRINTF(PF_DEBUG_MISC,
@@ -7271,28 +6850,17 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if (direction == PF_IN) {
                                PF_ACPY(&key.ext.addr, pd2.dst, key.af);
                                PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.ext.xport.port = uh.uh_dport;
                                key.gwy.xport.port = uh.uh_sport;
                                dx = PF_IN;
-#else
-                               key.ext.port = uh.uh_dport;
-                               key.gwy.port = uh.uh_sport;
-#endif
                        } else {
                                PF_ACPY(&key.lan.addr, pd2.dst, key.af);
                                PF_ACPY(&key.ext.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.lan.xport.port = uh.uh_dport;
                                key.ext.xport.port = uh.uh_sport;
                                dx = PF_OUT;
-#else
-                               key.lan.port = uh.uh_dport;
-                               key.ext.port = uh.uh_sport;
-#endif
                        }
 
-#ifndef NO_APPLE_EXTENSIONS
                        key.proto_variant = PF_EXTFILTER_APD;
 
                        if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
@@ -7335,40 +6903,31 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                *state = pf_find_state(kif, &key, dx);
                        }
 
+                       if (*state != NULL && pd != NULL &&
+                               pd->flowhash == 0)
+                               pd->flowhash = (*state)->state_key->flowhash;
+
                        if (pf_state_lookup_aux(state, kif, direction, &action))
                                return (action);
-#else
-                       STATE_LOOKUP();
-#endif
 
                        if (STATE_TRANSLATE((*state)->state_key)) {
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &uh.uh_sport,
                                            daddr, &(*state)->state_key->lan.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->lan.xport.port, &uh.uh_sum,
-#else
-                                           (*state)->state_key->lan.port, &uh.uh_sum,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 1, pd2.af);
                                } else {
                                        pf_change_icmp(pd2.dst, &uh.uh_dport,
                                            saddr, &(*state)->state_key->gwy.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->gwy.xport.port, &uh.uh_sum,
-#else
-                                           (*state)->state_key->gwy.port, &uh.uh_sum,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 1, pd2.af);
                                }
-#ifndef NO_APPLE_EXTENSIONS
                                m = pf_lazy_makewritable(pd, m,
                                    off2 + sizeof (uh));
                                if (!m)
                                        return (PF_DROP);
-#endif
                                switch (pd2.af) {
 #if INET
                                case AF_INET:
@@ -7410,23 +6969,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if (direction == PF_IN) {
                                PF_ACPY(&key.ext.addr, pd2.dst, key.af);
                                PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.ext.xport.port = 0;
                                key.gwy.xport.port = iih.icmp_id;
-#else
-                               key.ext.port = 0;
-                               key.gwy.port = iih.icmp_id;
-#endif
                        } else {
                                PF_ACPY(&key.lan.addr, pd2.dst, key.af);
                                PF_ACPY(&key.ext.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.lan.xport.port = iih.icmp_id;
                                key.ext.xport.port = 0;
-#else
-                               key.lan.port = iih.icmp_id;
-                               key.ext.port = 0;
-#endif
                        }
 
                        STATE_LOOKUP();
@@ -7435,29 +6984,19 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &iih.icmp_id,
                                            daddr, &(*state)->state_key->lan.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->lan.xport.port, NULL,
-#else
-                                           (*state)->state_key->lan.port, NULL,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET);
                                } else {
                                        pf_change_icmp(pd2.dst, &iih.icmp_id,
                                            saddr, &(*state)->state_key->gwy.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->gwy.xport.port, NULL,
-#else
-                                           (*state)->state_key->gwy.port, NULL,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET);
                                }
-#ifndef NO_APPLE_EXTENSIONS
                                m = pf_lazy_makewritable(pd, m, off2 + ICMP_MINLEN);
                                if (!m)
                                        return (PF_DROP);
-#endif
                                m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
                                m_copyback(m, ipoff2, sizeof (h2), &h2);
                                m_copyback(m, off2, ICMP_MINLEN, &iih);
@@ -7484,23 +7023,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if (direction == PF_IN) {
                                PF_ACPY(&key.ext.addr, pd2.dst, key.af);
                                PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.ext.xport.port = 0;
                                key.gwy.xport.port = iih.icmp6_id;
-#else
-                               key.ext.port = 0;
-                               key.gwy.port = iih.icmp6_id;
-#endif
                        } else {
                                PF_ACPY(&key.lan.addr, pd2.dst, key.af);
                                PF_ACPY(&key.ext.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.lan.xport.port = iih.icmp6_id;
                                key.ext.xport.port = 0;
-#else
-                               key.lan.port = iih.icmp6_id;
-                               key.ext.port = 0;
-#endif
                        }
 
                        STATE_LOOKUP();
@@ -7509,30 +7038,20 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &iih.icmp6_id,
                                            daddr, &(*state)->state_key->lan.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->lan.xport.port, NULL,
-#else
-                                           (*state)->state_key->lan.port, NULL,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET6);
                                } else {
                                        pf_change_icmp(pd2.dst, &iih.icmp6_id,
                                            saddr, &(*state)->state_key->gwy.addr,
-#ifndef NO_APPLE_EXTENSIONS
                                            (*state)->state_key->gwy.xport.port, NULL,
-#else
-                                           (*state)->state_key->gwy.port, NULL,
-#endif
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET6);
                                }
-#ifndef NO_APPLE_EXTENSIONS
                                m = pf_lazy_makewritable(pd, m, off2 +
                                    sizeof (struct icmp6_hdr));
                                if (!m)
                                        return (PF_DROP);
-#endif
                                m_copyback(m, off, sizeof (struct icmp6_hdr),
                                    pd->hdr.icmp6);
                                m_copyback(m, ipoff2, sizeof (h2_6), &h2_6);
@@ -7550,23 +7069,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if (direction == PF_IN) {
                                PF_ACPY(&key.ext.addr, pd2.dst, key.af);
                                PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.ext.xport.port = 0;
                                key.gwy.xport.port = 0;
-#else
-                               key.ext.port = 0;
-                               key.gwy.port = 0;
-#endif
                        } else {
                                PF_ACPY(&key.lan.addr, pd2.dst, key.af);
                                PF_ACPY(&key.ext.addr, pd2.src, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                                key.lan.xport.port = 0;
                                key.ext.xport.port = 0;
-#else
-                               key.lan.port = 0;
-                               key.ext.port = 0;
-#endif
                        }
 
                        STATE_LOOKUP();
@@ -7588,25 +7097,17 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                switch (pd2.af) {
 #if INET
                                case AF_INET:
-#ifndef NO_APPLE_EXTENSIONS
                                        m = pf_lazy_makewritable(pd, m,
                                            ipoff2 + sizeof (h2));
                                        if (!m)
                                                return (PF_DROP);
-#endif
-                                       m_copyback(m, off, ICMP_MINLEN,
-                                           pd->hdr.icmp);
-                                       m_copyback(m, ipoff2, sizeof (h2), &h2);
-                                       break;
 #endif /* INET */
 #if INET6
                                case AF_INET6:
-#ifndef NO_APPLE_EXTENSIONS
                                        m = pf_lazy_makewritable(pd, m,
                                            ipoff2 + sizeof (h2_6));
                                        if (!m)
                                                return (PF_DROP);
-#endif
                                        m_copyback(m, off,
                                            sizeof (struct icmp6_hdr),
                                            pd->hdr.icmp6);
@@ -7624,7 +7125,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
        }
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static int
 pf_test_state_grev1(struct pf_state **state, int direction,
     struct pfi_kif *kif, int off, struct pf_pdesc *pd)
@@ -7726,7 +7226,7 @@ pf_test_state_grev1(struct pf_state **state, int direction,
        return (PF_PASS);
 }
 
-int
+static int
 pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
     int off, struct pf_pdesc *pd)
 {
@@ -7814,6 +7314,11 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
                }
        }
 
+       if (*state != NULL && pd != NULL &&
+               pd->flowhash == 0) {
+               pd->flowhash = (*state)->state_key->flowhash;
+       }
+
        if (pf_state_lookup_aux(state, kif, direction, &action))
                return (action);
 
@@ -7878,7 +7383,6 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
        return (PF_PASS);
 }
-#endif
 
 static int
 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
@@ -7887,31 +7391,19 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
        struct pf_state_peer    *src, *dst;
        struct pf_state_key_cmp  key;
 
-#ifndef NO_APPLE_EXTENSIONS
        key.app_state = 0;
-#endif
        key.af = pd->af;
        key.proto = pd->proto;
        if (direction == PF_IN) {
                PF_ACPY(&key.ext.addr, pd->src, key.af);
                PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                key.ext.xport.port = 0;
                key.gwy.xport.port = 0;
-#else
-               key.ext.port = 0;
-               key.gwy.port = 0;
-#endif
        } else {
                PF_ACPY(&key.lan.addr, pd->src, key.af);
                PF_ACPY(&key.ext.addr, pd->dst, key.af);
-#ifndef NO_APPLE_EXTENSIONS
                key.lan.xport.port = 0;
                key.ext.xport.port = 0;
-#else
-               key.lan.port = 0;
-               key.ext.port = 0;
-#endif
        }
 
        STATE_LOOKUP();
@@ -7938,11 +7430,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
                (*state)->timeout = PFTM_OTHER_SINGLE;
 
        /* translate source/destination address, if necessary */
-#ifndef NO_APPLE_EXTENSIONS
        if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
-#else
-       if (STATE_TRANSLATE((*state)->state_key)) {
-#endif
                if (direction == PF_OUT) {
                        switch (pd->af) {
 #if INET
@@ -8143,7 +7631,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
            (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
                panic("pf_route: invalid parameters");
 
-       if (pd->pf_mtag->routed++ > 3) {
+       if (pd->pf_mtag->pftag_routed++ > 3) {
                m0 = *m;
                *m = NULL;
                goto bad;
@@ -8168,7 +7656,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
 
        ro = &iproute;
        bzero((caddr_t)ro, sizeof (*ro));
-       dst = satosin(&ro->ro_dst);
+       dst = satosin((void *)&ro->ro_dst);
        dst->sin_family = AF_INET;
        dst->sin_len = sizeof (*dst);
        dst->sin_addr = ip->ip_dst;
@@ -8185,7 +7673,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
                ro->ro_rt->rt_use++;
 
                if (ro->ro_rt->rt_flags & RTF_GATEWAY)
-                       dst = satosin(ro->ro_rt->rt_gateway);
+                       dst = satosin((void *)ro->ro_rt->rt_gateway);
                RT_UNLOCK(ro->ro_rt);
        } else {
                if (TAILQ_EMPTY(&r->rpool.list)) {
@@ -8211,7 +7699,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
                goto bad;
 
        if (oifp != ifp) {
-               if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+               if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS)
                        goto bad;
                else if (m0 == NULL)
                        goto done;
@@ -8279,7 +7767,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
                ip->ip_sum = 0;
                if (sw_csum & CSUM_DELAY_IP)
                        ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
-               error = ifnet_output(ifp, PF_INET, m0, ro, sintosa(dst));
+               error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
                goto done;
        }
 
@@ -8300,12 +7788,12 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
        m1 = m0;
 
        /* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
-#if BYTE_ORDER != BIG_ENDIAN           
+#if BYTE_ORDER != BIG_ENDIAN
        NTOHS(ip->ip_off);
        NTOHS(ip->ip_len);
 #endif
        error = ip_fragment(m0, ifp, ifp->if_mtu, sw_csum);
-       
+
        if (error) {
                m0 = NULL;
                goto bad;
@@ -8315,7 +7803,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
                m1 = m0->m_nextpkt;
                m0->m_nextpkt = 0;
                if (error == 0)
-                       error = ifnet_output(ifp, PF_INET, m0, ro,
+                       error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
                            sintosa(dst));
                else
                        m_freem(m0);
@@ -8357,7 +7845,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
            (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
                panic("pf_route6: invalid parameters");
 
-       if (pd->pf_mtag->routed++ > 3) {
+       if (pd->pf_mtag->pftag_routed++ > 3) {
                m0 = *m;
                *m = NULL;
                goto bad;
@@ -8392,7 +7880,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
 
                if ((pf_mtag = pf_get_mtag(m0)) == NULL)
                        goto bad;
-               pf_mtag->flags |= PF_TAG_GENERATED;
+               pf_mtag->pftag_flags |= PF_TAG_GENERATED;
                ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
                return;
        }
@@ -8419,7 +7907,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
                goto bad;
 
        if (oifp != ifp) {
-               if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+               if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS)
                        goto bad;
                else if (m0 == NULL)
                        goto done;
@@ -8438,7 +7926,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
        if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
                dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
        if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
-               error = nd6_output(ifp, ifp, m0, dst, NULL);
+               error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
        } else {
                in6_ifstat_inc(ifp, ifs6_in_toobig);
                if (r->rt != PF_DUPTO)
@@ -8549,22 +8037,24 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
 }
 
 #if INET
-#ifndef NO_APPLE_EXTENSIONS
 #define PF_APPLE_UPDATE_PDESC_IPv4()                           \
        do {                                                    \
                if (m && pd.mp && m != pd.mp) {                 \
                        m = pd.mp;                              \
                        h = mtod(m, struct ip *);               \
+                       pd.pf_mtag = pf_get_mtag(m);            \
                }                                               \
        } while (0)
-#endif
 
 int
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
-    struct ether_header *eh)
+    struct ether_header *eh, struct ip_fw_args *fwa)
 {
+#if !DUMMYNET
+#pragma unused(fwa)
+#endif
        struct pfi_kif          *kif;
-       u_short                  action, reason = 0, log = 0;
+       u_short                  action = PF_PASS, reason = 0, log = 0;
        struct mbuf             *m = *m0;
        struct ip               *h = 0;
        struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
@@ -8587,7 +8077,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                return (PF_DROP);
        }
 
-       if (pd.pf_mtag->flags & PF_TAG_GENERATED)
+       if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED)
                return (PF_PASS);
 
        kif = (struct pfi_kif *)ifp->if_pf_kif;
@@ -8605,6 +8095,22 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                panic("non-M_PKTHDR is passed to pf_test");
 #endif /* DIAGNOSTIC */
 
+       /* initialize enough of pd for the done label */
+       h = mtod(m, struct ip *);
+       pd.mp = m;
+       pd.lmw = 0;
+       pd.pf_mtag = pf_get_mtag(m);
+       pd.src = (struct pf_addr *)&h->ip_src;
+       pd.dst = (struct pf_addr *)&h->ip_dst;
+       PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
+       pd.ip_sum = &h->ip_sum;
+       pd.proto = h->ip_p;
+       pd.proto_variant = 0;
+       pd.af = AF_INET;
+       pd.tos = h->ip_tos;
+       pd.tot_len = ntohs(h->ip_len);
+       pd.eh = eh;
+
        if (m->m_pkthdr.len < (int)sizeof (*h)) {
                action = PF_DROP;
                REASON_SET(&reason, PFRES_SHORT);
@@ -8612,11 +8118,22 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                goto done;
        }
 
+#if DUMMYNET
+       if (fwa != NULL && fwa->fwa_pf_rule != NULL)
+               goto nonormalize;
+#endif /* DUMMYNET */
+
        /* We do IP header normalization and packet reassembly here */
-       if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
+       action = pf_normalize_ip(m0, dir, kif, &reason, &pd);
+       pd.mp = m = *m0;
+       if (action != PF_PASS || pd.lmw < 0) {
                action = PF_DROP;
                goto done;
        }
+
+#if DUMMYNET
+nonormalize:
+#endif /* DUMMYNET */
        m = *m0;        /* pf_normalize messes with m0 */
        h = mtod(m, struct ip *);
 
@@ -8633,18 +8150,32 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
        PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
        pd.ip_sum = &h->ip_sum;
        pd.proto = h->ip_p;
-#ifndef NO_APPLE_EXTENSIONS
        pd.proto_variant = 0;
        pd.mp = m;
        pd.lmw = 0;
-#endif
+       pd.pf_mtag = pf_get_mtag(m);
        pd.af = AF_INET;
        pd.tos = h->ip_tos;
+       pd.sc = MBUF_SCIDX(mbuf_get_service_class(m));
        pd.tot_len = ntohs(h->ip_len);
        pd.eh = eh;
+       if (pd.pf_mtag != NULL && pd.pf_mtag->pftag_flowhash != 0) {
+               pd.flowhash = pd.pf_mtag->pftag_flowhash;
+               pd.flags |= (m->m_pkthdr.m_fhflags & PF_TAG_FLOWADV) ?
+                   PFDESC_FLOW_ADV : 0;
+       }
 
        /* handle fragments that didn't get reassembled by normalization */
        if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
+               pd.flags |= PFDESC_IP_FRAG;
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_fragment(&r, dir, kif, m, h,
                    &pd, &a, &ruleset);
                goto done;
@@ -8663,21 +8194,25 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                pd.p_len = pd.tot_len - off - (th.th_off << 2);
                if ((th.th_flags & TH_ACK) && pd.p_len == 0)
                        pqid = 1;
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
-#endif
                if (action == PF_DROP)
                        goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -8707,15 +8242,19 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
-#ifndef NO_APPLE_EXTENSIONS
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
                    &reason);
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
-#else
-               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -8738,13 +8277,19 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                        log = action != PF_PASS;
                        goto done;
                }
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
                    &reason);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -8758,7 +8303,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                break;
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        case IPPROTO_ESP: {
                struct pf_esp_hdr       esp;
 
@@ -8768,6 +8312,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                        log = action != PF_PASS;
                        goto done;
                }
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_esp(&s, dir, kif, off, &pd);
                if (pd.lmw < 0)
                        goto done;
@@ -8793,6 +8345,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
                        log = (action != PF_PASS);
                        goto done;
                }
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
                    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
                        if (ntohs(grev1.payload_length) >
@@ -8823,15 +8383,20 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
 
                /* not GREv1/PPTP, so treat as ordinary GRE... */
        }
-#endif
 
        default:
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_other(&s, dir, kif, &pd);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -8846,10 +8411,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
        }
 
 done:
-#ifndef NO_APPLE_EXTENSIONS
        *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv4();
-#endif
 
        if (action == PF_PASS && h->ip_hl > 5 &&
            !((s && s->allow_opts) || r->allow_opts)) {
@@ -8861,20 +8424,31 @@ done:
                    (unsigned int) h->ip_hl));
        }
 
-       if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid))
+       if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
+           pd.flowhash != 0)
                (void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0,
-                   r->rtableid);
+                   r->rtableid, &pd);
 
-#if ALTQ
-       if (action == PF_PASS && r->qid) {
-               if (pqid || (pd.tos & IPTOS_LOWDELAY))
-                       pd.pf_mtag->qid = r->pqid;
-               else
-                       pd.pf_mtag->qid = r->qid;
+       if (action == PF_PASS) {
+#if PF_ALTQ
+               if (altq_allowed && r->qid) {
+                       if (pqid || (pd.tos & IPTOS_LOWDELAY))
+                               pd.pf_mtag->pftag_qid = r->pqid;
+                       else
+                               pd.pf_mtag->pftag_qid = r->qid;
+               }
+#endif /* PF_ALTQ */
                /* add hints for ecn */
-               pd.pf_mtag->hdr = h;
+               pd.pf_mtag->pftag_hdr = h;
+               /* record address family */
+               pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
+               pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
+               /* record TCP vs. non-TCP */
+               if (pd.proto == IPPROTO_TCP)
+                       pd.pf_mtag->pftag_flags |= PF_TAG_TCP;
+               else
+                       pd.pf_mtag->pftag_flags &= ~PF_TAG_TCP;
        }
-#endif /* ALTQ */
 
        /*
         * connections redirected to loopback should not match sockets
@@ -8886,7 +8460,7 @@ done:
            (s->nat_rule.ptr->action == PF_RDR ||
            s->nat_rule.ptr->action == PF_BINAT) &&
            (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
-               pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
+               pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
 
        if (log) {
                struct pf_rule *lr;
@@ -8966,7 +8540,6 @@ done:
                            tr->dst.neg);
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
 
        if (*m0) {
@@ -8983,7 +8556,6 @@ done:
 
                *m0 = m;
        }
-#endif
 
        if (action == PF_SYNPROXY_DROP) {
                m_freem(*m0);
@@ -8998,7 +8570,6 @@ done:
 #endif /* INET */
 
 #if INET6
-#ifndef NO_APPLE_EXTENSIONS
 #define PF_APPLE_UPDATE_PDESC_IPv6()                           \
        do {                                                    \
                if (m && pd.mp && m != pd.mp) {                 \
@@ -9008,14 +8579,16 @@ done:
                        h = mtod(m, struct ip6_hdr *);          \
                }                                               \
        } while (0)
-#endif
 
 int
 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
-    struct ether_header *eh)
+    struct ether_header *eh, struct ip_fw_args *fwa)
 {
+#if !DUMMYNET
+#pragma unused(fwa)
+#endif
        struct pfi_kif          *kif;
-       u_short                  action, reason = 0, log = 0;
+       u_short                  action = PF_PASS, reason = 0, log = 0;
        struct mbuf             *m = *m0, *n = NULL;
        struct ip6_hdr          *h;
        struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
@@ -9024,6 +8597,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
        struct pf_ruleset       *ruleset = NULL;
        struct pf_pdesc          pd;
        int                      off, terminal = 0, dirndx, rh_cnt = 0;
+       u_int8_t                 nxt;
 
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
@@ -9038,7 +8612,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                return (PF_DROP);
        }
 
-       if (pd.pf_mtag->flags & PF_TAG_GENERATED)
+       if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED)
                return (PF_PASS);
 
        kif = (struct pfi_kif *)ifp->if_pf_kif;
@@ -9058,6 +8632,29 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
 
        h = mtod(m, struct ip6_hdr *);
 
+       nxt = h->ip6_nxt;
+       off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
+       pd.mp = m;
+       pd.lmw = 0;
+       pd.pf_mtag = pf_get_mtag(m);
+       pd.src = (struct pf_addr *)&h->ip6_src;
+       pd.dst = (struct pf_addr *)&h->ip6_dst;
+       PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
+       pd.ip_sum = NULL;
+       pd.af = AF_INET6;
+       pd.proto = nxt;
+       pd.proto_variant = 0;
+       pd.tos = 0;
+       pd.sc = MBUF_SCIDX(mbuf_get_service_class(m));
+       pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
+       pd.eh = eh;
+
+       if (pd.pf_mtag->pftag_flowhash != 0) {
+               pd.flowhash = pd.pf_mtag->pftag_flowhash;
+               pd.flags |= (m->m_pkthdr.m_fhflags & PF_TAG_FLOWADV) ?
+                   PFDESC_FLOW_ADV : 0;
+       }
+
        if (m->m_pkthdr.len < (int)sizeof (*h)) {
                action = PF_DROP;
                REASON_SET(&reason, PFRES_SHORT);
@@ -9065,12 +8662,22 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                goto done;
        }
 
+#if DUMMYNET
+       if (fwa != NULL && fwa->fwa_pf_rule != NULL)
+               goto nonormalize;
+#endif /* DUMMYNET */
+
        /* We do IP header normalization and packet reassembly here */
-       if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
+       action = pf_normalize_ip6(m0, dir, kif, &reason, &pd);
+       pd.mp = m = *m0;
+       if (action != PF_PASS || pd.lmw < 0) {
                action = PF_DROP;
                goto done;
        }
-       m = *m0;        /* pf_normalize messes with m0 */
+
+#if DUMMYNET
+nonormalize:
+#endif /* DUMMYNET */
        h = mtod(m, struct ip6_hdr *);
 
 #if 1
@@ -9096,56 +8703,54 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
 
        off = ((caddr_t)h - m->m_data) + sizeof (struct ip6_hdr);
        pd.proto = h->ip6_nxt;
-#ifndef NO_APPLE_EXTENSIONS
        pd.proto_variant = 0;
        pd.mp = m;
        pd.lmw = 0;
-#endif
-       do {
-               switch (pd.proto) {
-               case IPPROTO_FRAGMENT:
-                       action = pf_test_fragment(&r, dir, kif, m, h,
-                           &pd, &a, &ruleset);
-                       if (action == PF_DROP)
-                               REASON_SET(&reason, PFRES_FRAG);
-                       goto done;
-               case IPPROTO_ROUTING: {
-                       struct ip6_rthdr rthdr;
+       pd.pf_mtag = pf_get_mtag(m);
 
-                       if (rh_cnt++) {
-                               DPFPRINTF(PF_DEBUG_MISC,
-                                   ("pf: IPv6 more than one rthdr\n"));
-                               action = PF_DROP;
-                               REASON_SET(&reason, PFRES_IPOPTIONS);
-                               log = 1;
-                               goto done;
-                       }
-                       if (!pf_pull_hdr(m, off, &rthdr, sizeof (rthdr), NULL,
+       do {
+               switch (nxt) {
+               case IPPROTO_FRAGMENT: {
+                       struct ip6_frag ip6f;
+                       
+                       pd.flags |= PFDESC_IP_FRAG;
+                       if (!pf_pull_hdr(m, off, &ip6f, sizeof ip6f, NULL,
                            &reason, pd.af)) {
                                DPFPRINTF(PF_DEBUG_MISC,
-                                   ("pf: IPv6 short rthdr\n"));
+                                   ("pf: IPv6 short fragment header\n"));
                                action = PF_DROP;
                                REASON_SET(&reason, PFRES_SHORT);
                                log = 1;
                                goto done;
                        }
-                       if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
-                               DPFPRINTF(PF_DEBUG_MISC,
-                                   ("pf: IPv6 rthdr0\n"));
-                               action = PF_DROP;
-                               REASON_SET(&reason, PFRES_IPOPTIONS);
+                       pd.proto = nxt = ip6f.ip6f_nxt;
+#if DUMMYNET
+                       /* Traffic goes through dummynet first */
+                       action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+                       if (action == PF_DROP || m == NULL) {
+                               *m0 = NULL;
+                               return (action);
+                       }
+#endif /* DUMMYNET */
+                       action = pf_test_fragment(&r, dir, kif, m, h, &pd, &a,
+                           &ruleset);
+                       if (action == PF_DROP) {
+                               REASON_SET(&reason, PFRES_FRAG);
                                log = 1;
-                               goto done;
                        }
-                       /* FALLTHROUGH */
+                       goto done;
                }
+               case IPPROTO_ROUTING:
+                       ++rh_cnt;
+                       /* FALL THROUGH */
+
                case IPPROTO_AH:
                case IPPROTO_HOPOPTS:
                case IPPROTO_DSTOPTS: {
                        /* get next header and header length */
                        struct ip6_ext  opt6;
 
-                       if (!pf_pull_hdr(m, off, &opt6, sizeof (opt6),
+                       if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
                            NULL, &reason, pd.af)) {
                                DPFPRINTF(PF_DEBUG_MISC,
                                    ("pf: IPv6 short opt\n"));
@@ -9157,7 +8762,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                                off += (opt6.ip6e_len + 2) * 4;
                        else
                                off += (opt6.ip6e_len + 1) * 8;
-                       pd.proto = opt6.ip6e_nxt;
+                       nxt = opt6.ip6e_nxt;
                        /* goto the next header */
                        break;
                }
@@ -9183,21 +8788,25 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                        goto done;
                }
                pd.p_len = pd.tot_len - off - (th.th_off << 2);
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
-#endif
                if (action == PF_DROP)
                        goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -9227,15 +8836,19 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                        REASON_SET(&reason, PFRES_SHORT);
                        goto done;
                }
-#ifndef NO_APPLE_EXTENSIONS
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
                    &reason);
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
-#else
-               action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -9258,13 +8871,19 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                        log = action != PF_PASS;
                        goto done;
                }
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_icmp(&s, dir, kif,
                    m, off, h, &pd, &reason);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -9278,7 +8897,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                break;
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        case IPPROTO_ESP: {
                struct pf_esp_hdr       esp;
 
@@ -9288,6 +8906,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                        log = action != PF_PASS;
                        goto done;
                }
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_esp(&s, dir, kif, off, &pd);
                if (pd.lmw < 0)
                        goto done;
@@ -9314,6 +8940,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
                        log = (action != PF_PASS);
                        goto done;
                }
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
                    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
                        if (ntohs(grev1.payload_length) >
@@ -9344,15 +8978,20 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
 
                /* not GREv1/PPTP, so treat as ordinary GRE... */
        }
-#endif
 
        default:
+#if DUMMYNET
+               /* Traffic goes through dummynet first */
+               action = pf_test_dummynet(&r, dir, kif, &m, &pd, fwa);
+               if (action == PF_DROP || m == NULL) {
+                       *m0 = NULL;
+                       return (action);
+               }
+#endif /* DUMMYNET */
                action = pf_test_state_other(&s, dir, kif, &pd);
-#ifndef NO_APPLE_EXTENSIONS
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
-#endif
                if (action == PF_PASS) {
 #if NPFSYNC
                        pfsync_update_state(s);
@@ -9367,10 +9006,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
        }
 
 done:
-#ifndef NO_APPLE_EXTENSIONS
        *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv6();
-#endif
 
        if (n != m) {
                m_freem(n);
@@ -9387,27 +9024,37 @@ done:
                    ("pf: dropping packet with dangerous v6 headers\n"));
        }
 
-       if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid))
+       if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) || pd.flowhash != 0)
                (void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0,
-                   r->rtableid);
+                   r->rtableid, &pd);
 
-#if ALTQ
-       if (action == PF_PASS && r->qid) {
-               if (pd.tos & IPTOS_LOWDELAY)
-                       pd.pf_mtag->qid = r->pqid;
-               else
-                       pd.pf_mtag->qid = r->qid;
+       if (action == PF_PASS) {
+#if PF_ALTQ
+               if (altq_allowed && r->qid) {
+                       if (pd.tos & IPTOS_LOWDELAY)
+                               pd.pf_mtag->pftag_qid = r->pqid;
+                       else
+                               pd.pf_mtag->pftag_qid = r->qid;
+               }
+#endif /* PF_ALTQ */
                /* add hints for ecn */
-               pd.pf_mtag->hdr = h;
+               pd.pf_mtag->pftag_hdr = h;
+               /* record address family */
+               pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
+               pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
+               /* record TCP vs. non-TCP */
+               if (pd.proto == IPPROTO_TCP)
+                       pd.pf_mtag->pftag_flags |= PF_TAG_TCP;
+               else
+                       pd.pf_mtag->pftag_flags &= ~PF_TAG_TCP;
        }
-#endif /* ALTQ */
 
        if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
            pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
            (s->nat_rule.ptr->action == PF_RDR ||
            s->nat_rule.ptr->action == PF_BINAT) &&
            IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
-               pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
+               pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
 
        if (log) {
                struct pf_rule *lr;
@@ -9495,7 +9142,6 @@ done:
                /* pf_route6 can free the mbuf causing *m0 to become NULL */
                pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
 #else
-#ifndef NO_APPLE_EXTENSIONS
        VERIFY(m == NULL || pd.mp == NULL || pd.mp == m);
 
        if (*m0) {
@@ -9526,23 +9172,7 @@ done:
                /* pf_route6 can free the mbuf causing *m0 to become NULL */
                pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
        }
-#else
-       if (action != PF_SYNPROXY_DROP && r->rt)
-               /* pf_route6 can free the mbuf causing *m0 to become NULL */
-               pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
-
-       if (action == PF_PASS) {
-               m = *m0;
-               h = mtod(m, struct ip6_hdr *);
-       }
-
-       if (action == PF_SYNPROXY_DROP) {
-               m_freem(*m0);
-               *m0 = NULL;
-               action = PF_PASS;
-       }
-#endif
-#endif
+#endif /* 0 */
 
        return (action);
 }
@@ -9627,41 +9257,16 @@ pool_put(struct pool *pp, void *v)
 struct pf_mtag *
 pf_find_mtag(struct mbuf *m)
 {
-#if !PF_PKTHDR
-       struct m_tag    *mtag;
-
-       if ((mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
-           KERNEL_TAG_TYPE_PF, NULL)) == NULL)
-               return (NULL);
-
-       return ((struct pf_mtag *)(mtag + 1));
-#else
        if (!(m->m_flags & M_PKTHDR))
                return (NULL);
 
-       return (&m->m_pkthdr.pf_mtag);
-#endif /* PF_PKTHDR */
+       return (m_pftag(m));
 }
 
 struct pf_mtag *
 pf_get_mtag(struct mbuf *m)
 {
-#if !PF_PKTHDR
-       struct m_tag    *mtag;
-
-       if ((mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF,
-           NULL)) == NULL) {
-               mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF,
-                   sizeof (struct pf_mtag), M_NOWAIT, m);
-               if (mtag == NULL)
-                       return (NULL);
-               bzero(mtag + 1, sizeof (struct pf_mtag));
-               m_tag_prepend(m, mtag);
-       }
-       return ((struct pf_mtag *)(mtag + 1));
-#else
        return (pf_find_mtag(m));
-#endif /* PF_PKTHDR */
 }
 
 uint64_t
index 4c05205baf47a7c00cf1933fd706c00072e792ed..66d939f92542d4306e326c51dbf960cad2b8b2ef 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -148,7 +148,8 @@ pfi_kif_get(const char *kif_name)
 
        bzero(&s, sizeof (s));
        strlcpy(s.pfik_name, kif_name, sizeof (s.pfik_name));
-       if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL)
+       if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs,
+           (struct pfi_kif *)(void *)&s)) != NULL)
                return (kif);
 
        /* create new one */
@@ -461,8 +462,8 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags)
                        continue;
                }
                if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
-                   IN6_IS_ADDR_LINKLOCAL(
-                   &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr)) {
+                   IN6_IS_ADDR_LINKLOCAL(&((struct sockaddr_in6 *)
+                   (void *)ia->ifa_addr)->sin6_addr)) {
                        IFA_UNLOCK(ia);
                        continue;
                }
@@ -484,10 +485,10 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags)
                if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) {
                        if (af == AF_INET)
                                net2 = pfi_unmask(&((struct sockaddr_in *)
-                                   ia->ifa_netmask)->sin_addr);
+                                   (void *)ia->ifa_netmask)->sin_addr);
                        else if (af == AF_INET6)
                                net2 = pfi_unmask(&((struct sockaddr_in6 *)
-                                   ia->ifa_netmask)->sin6_addr);
+                                   (void *)ia->ifa_netmask)->sin6_addr);
                }
                if (af == AF_INET && net2 > 32)
                        net2 = 32;
@@ -536,9 +537,10 @@ pfi_address_add(struct sockaddr *sa, int af, int net)
        p->pfra_af = af;
        p->pfra_net = net;
        if (af == AF_INET)
-               p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr;
+               p->pfra_ip4addr = ((struct sockaddr_in *)(void *)sa)->sin_addr;
        else if (af == AF_INET6) {
-               p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr;
+               p->pfra_ip6addr =
+                   ((struct sockaddr_in6 *)(void *)sa)->sin6_addr;
                if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr))
                        p->pfra_ip6addr.s6_addr16[1] = 0;
        }
@@ -601,7 +603,7 @@ pfi_update_status(const char *name, struct pf_status *pfs)
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        strlcpy(key.pfik_name, name, sizeof (key.pfik_name));
-       p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key);
+       p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)(void *)&key);
        if (p == NULL)
                return;
 
index 25763d8f55c52932d800da8f2cb43326ce6e204a..6e76775a746642b7149573fff0c11d1737ffb3ac 100644 (file)
@@ -83,6 +83,7 @@
 
 #include <mach/vm_param.h>
 
+#include <net/dlil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/if_ether.h>
 
+#if DUMMYNET
+#include <netinet/ip_dummynet.h>
+#else
+struct ip_fw_args;
+#endif /* DUMMYNET */
+
 #include <libkern/crypto/md5.h>
 
+#include <machine/machine_routines.h>
+
 #include <miscfs/devfs/devfs.h>
 
 #include <net/pfvar.h>
 #include <netinet/in_pcb.h>
 #endif /* INET6 */
 
-#if ALTQ
-#include <altq/altq.h>
-#endif /* ALTQ */
+#if PF_ALTQ
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+#endif /* PF_ALTQ */
 
 #if 0
 static void pfdetach(void);
@@ -124,12 +138,34 @@ static void pfdetach(void);
 static int pfopen(dev_t, int, int, struct proc *);
 static int pfclose(dev_t, int, int, struct proc *);
 static int pfioctl(dev_t, u_long, caddr_t, int, struct proc *);
+static int pfioctl_ioc_table(u_long, struct pfioc_table_32 *,
+    struct pfioc_table_64 *, struct proc *);
+static int pfioctl_ioc_tokens(u_long, struct pfioc_tokens_32 *,
+    struct pfioc_tokens_64 *, struct proc *);
+static int pfioctl_ioc_rule(u_long, int, struct pfioc_rule *, struct proc *);
+static int pfioctl_ioc_state_kill(u_long, struct pfioc_state_kill *,
+    struct proc *);
+static int pfioctl_ioc_state(u_long, struct pfioc_state *, struct proc *);
+static int pfioctl_ioc_states(u_long, struct pfioc_states_32 *,
+    struct pfioc_states_64 *, struct proc *);
+static int pfioctl_ioc_natlook(u_long, struct pfioc_natlook *, struct proc *);
+static int pfioctl_ioc_tm(u_long, struct pfioc_tm *, struct proc *);
+static int pfioctl_ioc_limit(u_long, struct pfioc_limit *, struct proc *);
+static int pfioctl_ioc_pooladdr(u_long, struct pfioc_pooladdr *, struct proc *);
+static int pfioctl_ioc_ruleset(u_long, struct pfioc_ruleset *, struct proc *);
+static int pfioctl_ioc_trans(u_long, struct pfioc_trans_32 *,
+    struct pfioc_trans_64 *, struct proc *);
+static int pfioctl_ioc_src_nodes(u_long, struct pfioc_src_nodes_32 *,
+    struct pfioc_src_nodes_64 *, struct proc *);
+static int pfioctl_ioc_src_node_kill(u_long, struct pfioc_src_node_kill *,
+    struct proc *);
+static int pfioctl_ioc_iface(u_long, struct pfioc_iface_32 *,
+    struct pfioc_iface_64 *, struct proc *);
 static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t,
     u_int8_t, u_int8_t, u_int8_t);
-
 static void pf_mv_pool(struct pf_palist *, struct pf_palist *);
 static void pf_empty_pool(struct pf_palist *);
-#if ALTQ
+#if PF_ALTQ
 static int pf_begin_altq(u_int32_t *);
 static int pf_rollback_altq(u_int32_t);
 static int pf_commit_altq(u_int32_t);
@@ -137,18 +173,15 @@ static int pf_enable_altq(struct pf_altq *);
 static int pf_disable_altq(struct pf_altq *);
 static void pf_altq_copyin(struct pf_altq *, struct pf_altq *);
 static void pf_altq_copyout(struct pf_altq *, struct pf_altq *);
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
 static int pf_begin_rules(u_int32_t *, int, const char *);
 static int pf_rollback_rules(u_int32_t, int, char *);
 static int pf_setup_pfsync_matching(struct pf_ruleset *);
 static void pf_hash_rule(MD5_CTX *, struct pf_rule *);
-#ifndef NO_APPLE_EXTENSIONS
 static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *, u_int8_t);
-#else
-static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
-#endif
 static int pf_commit_rules(u_int32_t, int, char *);
-static void pf_rule_copyin(struct pf_rule *, struct pf_rule *, struct proc *);
+static void pf_rule_copyin(struct pf_rule *, struct pf_rule *, struct proc *,
+    int);
 static void pf_rule_copyout(struct pf_rule *, struct pf_rule *);
 static void pf_state_export(struct pfsync_state *, struct pf_state_key *,
     struct pf_state *);
@@ -156,6 +189,16 @@ static void pf_state_import(struct pfsync_state *, struct pf_state_key *,
     struct pf_state *);
 static void pf_pooladdr_copyin(struct pf_pooladdr *, struct pf_pooladdr *);
 static void pf_pooladdr_copyout(struct pf_pooladdr *, struct pf_pooladdr *);
+static void pf_expire_states_and_src_nodes(struct pf_rule *);
+static void pf_delete_rule_from_ruleset(struct pf_ruleset *,
+    int, struct pf_rule *);
+static int pf_rule_setup(struct pfioc_rule *, struct pf_rule *,
+    struct pf_ruleset *);
+static void pf_delete_rule_by_owner(char *);
+static int pf_delete_rule_by_ticket(struct pfioc_rule *);
+static void pf_ruleset_cleanup(struct pf_ruleset *, int);
+static void pf_deleterule_anchor_step_out(struct pf_ruleset **,
+    int, struct pf_rule **);
 
 #define        PF_CDEV_MAJOR   (-1)
 
@@ -189,27 +232,37 @@ static void pf_detach_hooks(void);
  */
 int pf_is_enabled = 0;
 
+#if PF_ALTQ
+u_int32_t altq_allowed = 0;
+#endif /* PF_ALTQ */
+
+u_int32_t pf_hash_seed;
+
 /*
  * These are the pf enabled reference counting variables
  */
 static u_int64_t pf_enabled_ref_count;
 static u_int32_t nr_tokens = 0;
+static u_int64_t pffwrules;
+static u_int32_t pfdevcnt;
 
 SLIST_HEAD(list_head, pfioc_kernel_token);
 static struct list_head token_list_head;
 
 struct pf_rule          pf_default_rule;
-#if ALTQ
+#if PF_ALTQ
 static int              pf_altq_running;
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
 
 #define        TAGID_MAX        50000
+#if !PF_ALTQ
 static TAILQ_HEAD(pf_tags, pf_tagname) pf_tags =
     TAILQ_HEAD_INITIALIZER(pf_tags);
-#if ALTQ
-static TAILQ_HEAD(pf_tags, pf_tagname) pf_qids =
-    TAILQ_HEAD_INITIALIZER(pf_qids);
-#endif /* ALTQ */
+#else /* PF_ALTQ */
+static TAILQ_HEAD(pf_tags, pf_tagname)
+    pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
+    pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
+#endif /* PF_ALTQ */
 
 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
@@ -222,17 +275,77 @@ static void                pf_rtlabel_remove(struct pf_addr_wrap *);
 static void             pf_rtlabel_copyout(struct pf_addr_wrap *);
 
 #if INET
-static int pf_inet_hook(struct ifnet *, struct mbuf **, int);
+static int pf_inet_hook(struct ifnet *, struct mbuf **, int,
+    struct ip_fw_args *);
 #endif /* INET */
 #if INET6
-static int pf_inet6_hook(struct ifnet *, struct mbuf **, int);
+static int pf_inet6_hook(struct ifnet *, struct mbuf **, int,
+    struct ip_fw_args *);
 #endif /* INET6 */
 
-#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
+#define        DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
+
+/*
+ * Helper macros for ioctl structures which vary in size (32-bit vs. 64-bit)
+ */
+#define        PFIOCX_STRUCT_DECL(s)                                           \
+struct {                                                               \
+       union {                                                         \
+               struct s##_32   _s##_32;                                \
+               struct s##_64   _s##_64;                                \
+       } _u;                                                           \
+} *s##_un = NULL                                                       \
+
+#define        PFIOCX_STRUCT_BEGIN(a, s, _action) {                            \
+       VERIFY(s##_un == NULL);                                         \
+       s##_un = _MALLOC(sizeof (*s##_un), M_TEMP, M_WAITOK|M_ZERO);    \
+       if (s##_un == NULL) {                                           \
+               _action                                                 \
+       } else {                                                        \
+               if (p64)                                                \
+                       bcopy(a, &s##_un->_u._s##_64,                   \
+                           sizeof (struct s##_64));                    \
+               else                                                    \
+                       bcopy(a, &s##_un->_u._s##_32,                   \
+                           sizeof (struct s##_32));                    \
+       }                                                               \
+}
+
+#define        PFIOCX_STRUCT_END(s, a) {                                       \
+       VERIFY(s##_un != NULL);                                         \
+       if (p64)                                                        \
+               bcopy(&s##_un->_u._s##_64, a, sizeof (struct s##_64));  \
+       else                                                            \
+               bcopy(&s##_un->_u._s##_32, a, sizeof (struct s##_32));  \
+       _FREE(s##_un, M_TEMP);                                          \
+       s##_un = NULL;                                                  \
+}
+
+#define        PFIOCX_STRUCT_ADDR32(s)         (&s##_un->_u._s##_32)
+#define        PFIOCX_STRUCT_ADDR64(s)         (&s##_un->_u._s##_64)
+
+/*
+ * Helper macros for regular ioctl structures.
+ */
+#define        PFIOC_STRUCT_BEGIN(a, v, _action) {                             \
+       VERIFY((v) == NULL);                                            \
+       (v) = _MALLOC(sizeof (*(v)), M_TEMP, M_WAITOK|M_ZERO);          \
+       if ((v) == NULL) {                                              \
+               _action                                                 \
+       } else {                                                        \
+               bcopy(a, v, sizeof (*(v)));                             \
+       }                                                               \
+}
+
+#define        PFIOC_STRUCT_END(v, a) {                                        \
+       VERIFY((v) != NULL);                                            \
+       bcopy(v, a, sizeof (*(v)));                                     \
+       _FREE(v, M_TEMP);                                               \
+       (v) = NULL;                                                     \
+}
 
-#define        PF_USER_ADDR(a, s, f)                                   \
-       (proc_is64bit(current_proc()) ?                         \
-       ((struct s##_64 *)a)->f : ((struct s##_32 *)a)->f)
+#define        PFIOC_STRUCT_ADDR32(s)          (&s##_un->_u._s##_32)
+#define        PFIOC_STRUCT_ADDR64(s)          (&s##_un->_u._s##_64)
 
 static lck_attr_t *pf_perim_lock_attr;
 static lck_grp_t *pf_perim_lock_grp;
@@ -247,38 +360,39 @@ struct thread *pf_purge_thread;
 extern void pfi_kifaddr_update(void *);
 
 /* pf enable ref-counting helper functions */
-static u_int64_t                generate_token(void);
-static int                      remove_token(struct pfioc_remove_token *);
-static void                     invalidate_all_tokens(void);
+static u_int64_t               generate_token(struct proc *);
+static int                     remove_token(struct pfioc_remove_token *);
+static void                    invalidate_all_tokens(void);
 
 static u_int64_t
-generate_token(void)
+generate_token(struct proc *p)
 {
        u_int64_t token_value;
        struct pfioc_kernel_token *new_token;
 
-       new_token = _MALLOC(sizeof (struct pfioc_kernel_token), M_TEMP, M_WAITOK|M_ZERO);
+       new_token = _MALLOC(sizeof (struct pfioc_kernel_token), M_TEMP,
+           M_WAITOK|M_ZERO);
 
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if (new_token == NULL) {
                /* malloc failed! bail! */
                printf("%s: unable to allocate pf token structure!", __func__);
-               return 0;
+               return (0);
        }
 
        token_value = (u_int64_t)(uintptr_t)new_token;
 
        new_token->token.token_value = token_value;
-       new_token->token.pid = proc_pid(current_proc());
+       new_token->token.pid = proc_pid(p);
        proc_name(new_token->token.pid, new_token->token.proc_name,
-               sizeof (new_token->token.proc_name));
+           sizeof (new_token->token.proc_name));
        new_token->token.timestamp = pf_calendar_time_second();
 
        SLIST_INSERT_HEAD(&token_list_head, new_token, next);
        nr_tokens++;
 
-       return token_value;
+       return (token_value);
 }
 
 static int
@@ -290,15 +404,16 @@ remove_token(struct pfioc_remove_token *tok)
 
        SLIST_FOREACH_SAFE(entry, &token_list_head, next, tmp) {
                if (tok->token_value == entry->token.token_value) {
-                       SLIST_REMOVE(&token_list_head, entry, pfioc_kernel_token, next);
+                       SLIST_REMOVE(&token_list_head, entry,
+                           pfioc_kernel_token, next);
                        _FREE(entry, M_TEMP);
                        nr_tokens--;
-                       return 0;    /* success */
+                       return (0);    /* success */
                }
        }
 
        printf("pf : remove failure\n");
-       return ESRCH;    /* failure */
+       return (ESRCH);    /* failure */
 }
 
 static void
@@ -314,8 +429,6 @@ invalidate_all_tokens(void)
        }
 
        nr_tokens = 0;
-
-       return;
 }
 
 void
@@ -328,13 +441,12 @@ pfinit(void)
        pf_perim_lock_grp = lck_grp_alloc_init("pf_perim",
            pf_perim_lock_grp_attr);
        pf_perim_lock_attr = lck_attr_alloc_init();
-       pf_perim_lock = lck_rw_alloc_init(pf_perim_lock_grp,
-           pf_perim_lock_attr);
+       lck_rw_init(pf_perim_lock, pf_perim_lock_grp, pf_perim_lock_attr);
 
        pf_lock_grp_attr = lck_grp_attr_alloc_init();
        pf_lock_grp = lck_grp_alloc_init("pf", pf_lock_grp_attr);
        pf_lock_attr = lck_attr_alloc_init();
-       pf_lock = lck_mtx_alloc_init(pf_lock_grp, pf_lock_attr);
+       lck_mtx_init(pf_lock, pf_lock_grp, pf_lock_attr);
 
        pool_init(&pf_rule_pl, sizeof (struct pf_rule), 0, 0, 0, "pfrulepl",
            NULL);
@@ -344,14 +456,12 @@ pfinit(void)
            NULL);
        pool_init(&pf_state_key_pl, sizeof (struct pf_state_key), 0, 0, 0,
            "pfstatekeypl", NULL);
-#ifndef NO_APPLE_EXTENSIONS
        pool_init(&pf_app_state_pl, sizeof (struct pf_app_state), 0, 0, 0,
            "pfappstatepl", NULL);
-#endif
-#if ALTQ
+#if PF_ALTQ
        pool_init(&pf_altq_pl, sizeof (struct pf_altq), 0, 0, 0, "pfaltqpl",
            NULL);
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
        pool_init(&pf_pooladdr_pl, sizeof (struct pf_pooladdr), 0, 0, 0,
            "pfpooladdrpl", NULL);
        pfr_initialize();
@@ -370,12 +480,32 @@ pfinit(void)
        pf_init_ruleset(&pf_main_ruleset);
        TAILQ_INIT(&pf_pabuf);
        TAILQ_INIT(&state_list);
-#if ALTQ
+#if PF_ALTQ
        TAILQ_INIT(&pf_altqs[0]);
        TAILQ_INIT(&pf_altqs[1]);
        pf_altqs_active = &pf_altqs[0];
        pf_altqs_inactive = &pf_altqs[1];
-#endif /* ALTQ */
+
+       PE_parse_boot_argn("altq", &altq_allowed, sizeof (altq_allowed));
+
+       _CASSERT(ALTRQ_PURGE == CLASSQRQ_PURGE);
+       _CASSERT(ALTRQ_PURGE_SC == CLASSQRQ_PURGE_SC);
+       _CASSERT(ALTRQ_EVENT == CLASSQRQ_EVENT);
+
+       _CASSERT(ALTDQ_REMOVE == CLASSQDQ_REMOVE);
+       _CASSERT(ALTDQ_POLL == CLASSQDQ_POLL);
+#endif /* PF_ALTQ */
+
+       _CASSERT((SC_BE & SCIDX_MASK) == SCIDX_BE);
+       _CASSERT((SC_BK_SYS & SCIDX_MASK) == SCIDX_BK_SYS);
+       _CASSERT((SC_BK & SCIDX_MASK) == SCIDX_BK);
+       _CASSERT((SC_RD & SCIDX_MASK) == SCIDX_RD);
+       _CASSERT((SC_OAM & SCIDX_MASK) == SCIDX_OAM);
+       _CASSERT((SC_AV & SCIDX_MASK) == SCIDX_AV);
+       _CASSERT((SC_RV & SCIDX_MASK) == SCIDX_RV);
+       _CASSERT((SC_VI & SCIDX_MASK) == SCIDX_VI);
+       _CASSERT((SC_VO & SCIDX_MASK) == SCIDX_VO);
+       _CASSERT((SC_CTL & SCIDX_MASK) == SCIDX_CTL);
 
        /* default rule should never be garbage collected */
        pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
@@ -395,14 +525,12 @@ pfinit(void)
        t[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
        t[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
        t[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
-#ifndef NO_APPLE_EXTENSIONS
        t[PFTM_GREv1_FIRST_PACKET] = PFTM_GREv1_FIRST_PACKET_VAL;
        t[PFTM_GREv1_INITIATING] = PFTM_GREv1_INITIATING_VAL;
        t[PFTM_GREv1_ESTABLISHED] = PFTM_GREv1_ESTABLISHED_VAL;
        t[PFTM_ESP_FIRST_PACKET] = PFTM_ESP_FIRST_PACKET_VAL;
        t[PFTM_ESP_INITIATING] = PFTM_ESP_INITIATING_VAL;
        t[PFTM_ESP_ESTABLISHED] = PFTM_ESP_ESTABLISHED_VAL;
-#endif
        t[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
        t[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
        t[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
@@ -416,6 +544,7 @@ pfinit(void)
        pf_normalize_init();
        bzero(&pf_status, sizeof (pf_status));
        pf_status.debug = PF_DEBUG_URGENT;
+       pf_hash_seed = random();
 
        /* XXX do our best to avoid a conflict */
        pf_status.hostid = random();
@@ -431,9 +560,12 @@ pfinit(void)
                printf("%s: failed to allocate major number!\n", __func__);
                return;
        }
-       (void) devfs_make_node(makedev(maj, 0), DEVFS_CHAR,
+       (void) devfs_make_node(makedev(maj, PFDEV_PF), DEVFS_CHAR,
            UID_ROOT, GID_WHEEL, 0600, "pf", 0);
 
+       (void) devfs_make_node(makedev(maj, PFDEV_PFM), DEVFS_CHAR,
+           UID_ROOT, GID_WHEEL, 0600, "pfm", 0);
+
        pf_attach_hooks();
 }
 
@@ -458,10 +590,10 @@ pfdetach(void)
        for (i = 0; i < PF_RULESET_MAX; i++)
                if (pf_begin_rules(&ticket, i, &r) == 0)
                                pf_commit_rules(ticket, i, &r);
-#if ALTQ
+#if PF_ALTQ
        if (pf_begin_altq(&ticket) == 0)
                pf_commit_altq(ticket);
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
 
        /* clear states */
        RB_FOREACH(state, pf_state_tree_id, &tree_id) {
@@ -503,9 +635,9 @@ pfdetach(void)
 
        /* destroy the pools */
        pool_destroy(&pf_pooladdr_pl);
-#if ALTQ
+#if PF_ALTQ
        pool_destroy(&pf_altq_pl);
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
        pool_destroy(&pf_state_pl);
        pool_destroy(&pf_rule_pl);
        pool_destroy(&pf_src_tree_pl);
@@ -522,8 +654,18 @@ static int
 pfopen(dev_t dev, int flags, int fmt, struct proc *p)
 {
 #pragma unused(flags, fmt, p)
-       if (minor(dev) >= 1)
+       if (minor(dev) >= PFDEV_MAX)
                return (ENXIO);
+
+       if (minor(dev) == PFDEV_PFM) {
+               lck_mtx_lock(pf_lock);
+               if (pfdevcnt != 0) {
+                       lck_mtx_unlock(pf_lock);
+                       return (EBUSY);
+               }
+               pfdevcnt++;
+               lck_mtx_unlock(pf_lock);
+       }
        return (0);
 }
 
@@ -531,8 +673,15 @@ static int
 pfclose(dev_t dev, int flags, int fmt, struct proc *p)
 {
 #pragma unused(flags, fmt, p)
-       if (minor(dev) >= 1)
+       if (minor(dev) >= PFDEV_MAX)
                return (ENXIO);
+
+       if (minor(dev) == PFDEV_PFM) {
+               lck_mtx_lock(pf_lock);
+               VERIFY(pfdevcnt > 0);
+               pfdevcnt--;
+               lck_mtx_unlock(pf_lock);
+       }
        return (0);
 }
 
@@ -630,11 +779,13 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
                return;
        pf_tag_unref(rule->tag);
        pf_tag_unref(rule->match_tag);
-#if ALTQ
-       if (rule->pqid != rule->qid)
-               pf_qid_unref(rule->pqid);
-       pf_qid_unref(rule->qid);
-#endif /* ALTQ */
+#if PF_ALTQ
+       if (altq_allowed) {
+               if (rule->pqid != rule->qid)
+                       pf_qid_unref(rule->pqid);
+               pf_qid_unref(rule->qid);
+       }
+#endif /* PF_ALTQ */
        pf_rtlabel_remove(&rule->src.addr);
        pf_rtlabel_remove(&rule->dst.addr);
        pfi_dynaddr_remove(&rule->src.addr);
@@ -775,22 +926,28 @@ pf_rtlabel_copyout(struct pf_addr_wrap *a)
 #pragma unused(a)
 }
 
-#if ALTQ
+#if PF_ALTQ
 u_int32_t
 pf_qname2qid(char *qname)
 {
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
        return ((u_int32_t)tagname2tag(&pf_qids, qname));
 }
 
 void
 pf_qid2qname(u_int32_t qid, char *p)
 {
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
        tag2tagname(&pf_qids, (u_int16_t)qid, p);
 }
 
 void
 pf_qid_unref(u_int32_t qid)
 {
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
        tag_unref(&pf_qids, (u_int16_t)qid);
 }
 
@@ -800,10 +957,12 @@ pf_begin_altq(u_int32_t *ticket)
        struct pf_altq  *altq;
        int              error = 0;
 
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
        /* Purge the old altq list */
        while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
                TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
-               if (altq->qname[0] == 0) {
+               if (altq->qname[0] == '\0') {
                        /* detach and destroy the discipline */
                        error = altq_remove(altq);
                } else
@@ -823,12 +982,14 @@ pf_rollback_altq(u_int32_t ticket)
        struct pf_altq  *altq;
        int              error = 0;
 
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
        if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
                return (0);
        /* Purge the old altq list */
        while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
                TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
-               if (altq->qname[0] == 0) {
+               if (altq->qname[0] == '\0') {
                        /* detach and destroy the discipline */
                        error = altq_remove(altq);
                } else
@@ -844,13 +1005,14 @@ pf_commit_altq(u_int32_t ticket)
 {
        struct pf_altqqueue     *old_altqs;
        struct pf_altq          *altq;
-       int                      s, err, error = 0;
+       int                      err, error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
                return (EBUSY);
 
        /* swap altqs, keep the old. */
-       s = splnet();
        old_altqs = pf_altqs_active;
        pf_altqs_active = pf_altqs_inactive;
        pf_altqs_inactive = old_altqs;
@@ -858,13 +1020,12 @@ pf_commit_altq(u_int32_t ticket)
 
        /* Attach new disciplines */
        TAILQ_FOREACH(altq, pf_altqs_active, entries) {
-               if (altq->qname[0] == 0) {
+               if (altq->qname[0] == '\0') {
                        /* attach the discipline */
                        error = altq_pfattach(altq);
                        if (error == 0 && pf_altq_running)
                                error = pf_enable_altq(altq);
                        if (error != 0) {
-                               splx(s);
                                return (error);
                        }
                }
@@ -873,7 +1034,7 @@ pf_commit_altq(u_int32_t ticket)
        /* Purge the old altq list */
        while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
                TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
-               if (altq->qname[0] == 0) {
+               if (altq->qname[0] == '\0') {
                        /* detach and destroy the discipline */
                        if (pf_altq_running)
                                error = pf_disable_altq(altq);
@@ -887,7 +1048,6 @@ pf_commit_altq(u_int32_t ticket)
                        pf_qid_unref(altq->qid);
                pool_put(&pf_altq_pl, altq);
        }
-       splx(s);
 
        altqs_inactive_open = 0;
        return (error);
@@ -897,23 +1057,40 @@ static int
 pf_enable_altq(struct pf_altq *altq)
 {
        struct ifnet            *ifp;
-       struct tb_profile        tb;
-       int                      s, error = 0;
+       struct ifclassq         *ifq;
+       int                      error = 0;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if ((ifp = ifunit(altq->ifname)) == NULL)
                return (EINVAL);
 
-       if (ifp->if_snd.altq_type != ALTQT_NONE)
-               error = altq_enable(&ifp->if_snd);
+       ifq = &ifp->if_snd;
+       IFCQ_LOCK(ifq);
+       if (IFCQ_ALTQ(ifq)->altq_type != ALTQT_NONE)
+               error = altq_enable(IFCQ_ALTQ(ifq));
+
+       /* set or clear tokenbucket regulator */
+       if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) {
+               struct tb_profile tb = { 0, 0, 0 };
 
-       /* set tokenbucket regulator */
-       if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
-               tb.rate = altq->ifbandwidth;
-               tb.depth = altq->tbrsize;
-               s = splnet();
-               error = tbr_set(&ifp->if_snd, &tb);
-               splx(s);
+               if (altq->aflags & PF_ALTQF_TBR) {
+                       if (altq->bwtype != PF_ALTQ_BW_ABSOLUTE &&
+                           altq->bwtype != PF_ALTQ_BW_PERCENT) {
+                               error = EINVAL;
+                       } else {
+                               if (altq->bwtype == PF_ALTQ_BW_ABSOLUTE)
+                                       tb.rate = altq->ifbandwidth;
+                               else
+                                       tb.percent = altq->ifbandwidth;
+                               tb.depth = altq->tbrsize;
+                               error = ifclassq_tbr_set(ifq, &tb, TRUE);
+                       }
+               } else if (IFCQ_TBR_IS_ENABLED(ifq)) {
+                       error = ifclassq_tbr_set(ifq, &tb, TRUE);
+               }
        }
+       IFCQ_UNLOCK(ifq);
 
        return (error);
 }
@@ -922,8 +1099,10 @@ static int
 pf_disable_altq(struct pf_altq *altq)
 {
        struct ifnet            *ifp;
-       struct tb_profile        tb;
-       int                      s, error;
+       struct ifclassq         *ifq;
+       int                      error;
+
+       lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if ((ifp = ifunit(altq->ifname)) == NULL)
                return (EINVAL);
@@ -932,18 +1111,21 @@ pf_disable_altq(struct pf_altq *altq)
         * when the discipline is no longer referenced, it was overridden
         * by a new one.  if so, just return.
         */
-       if (altq->altq_disc != ifp->if_snd.altq_disc)
+       ifq = &ifp->if_snd;
+       IFCQ_LOCK(ifq);
+       if (altq->altq_disc != IFCQ_ALTQ(ifq)->altq_disc) {
+               IFCQ_UNLOCK(ifq);
                return (0);
+       }
 
-       error = altq_disable(&ifp->if_snd);
+       error = altq_disable(IFCQ_ALTQ(ifq));
 
-       if (error == 0) {
+       if (error == 0 && IFCQ_TBR_IS_ENABLED(ifq)) {
                /* clear tokenbucket regulator */
-               tb.rate = 0;
-               s = splnet();
-               error = tbr_set(&ifp->if_snd, &tb);
-               splx(s);
+               struct tb_profile  tb = { 0, 0, 0 };
+               error = ifclassq_tbr_set(ifq, &tb, TRUE);
        }
+       IFCQ_UNLOCK(ifq);
 
        return (error);
 }
@@ -957,18 +1139,22 @@ pf_altq_copyin(struct pf_altq *src, struct pf_altq *dst)
        dst->qname[sizeof (dst->qname) - 1] = '\0';
        dst->parent[sizeof (dst->parent) - 1] = '\0';
        dst->altq_disc = NULL;
-       TAILQ_INIT(&dst->entries);
+       dst->entries.tqe_next = NULL;
+       dst->entries.tqe_prev = NULL;
 }
 
 static void
 pf_altq_copyout(struct pf_altq *src, struct pf_altq *dst)
 {
-       bcopy(src, dst, sizeof (struct pf_altq));
+       struct pf_altq pa;
 
-       dst->altq_disc = NULL;
-       TAILQ_INIT(&dst->entries);
+       bcopy(src, &pa, sizeof (struct pf_altq));
+       pa.altq_disc = NULL;
+       pa.entries.tqe_next = NULL;
+       pa.entries.tqe_prev = NULL;
+       bcopy(&pa, dst, sizeof (struct pf_altq));
 }
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
 
 static int
 pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
@@ -1010,29 +1196,24 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
        return (0);
 }
 
-#define PF_MD5_UPD(st, elm)                                            \
+#define        PF_MD5_UPD(st, elm)                                             \
        MD5Update(ctx, (u_int8_t *)&(st)->elm, sizeof ((st)->elm))
 
-#define PF_MD5_UPD_STR(st, elm)                                                \
+#define        PF_MD5_UPD_STR(st, elm)                                         \
        MD5Update(ctx, (u_int8_t *)(st)->elm, strlen((st)->elm))
 
-#define PF_MD5_UPD_HTONL(st, elm, stor) do {                           \
+#define        PF_MD5_UPD_HTONL(st, elm, stor) do {                            \
        (stor) = htonl((st)->elm);                                      \
        MD5Update(ctx, (u_int8_t *)&(stor), sizeof (u_int32_t));        \
 } while (0)
 
-#define PF_MD5_UPD_HTONS(st, elm, stor) do {                           \
+#define        PF_MD5_UPD_HTONS(st, elm, stor) do {                            \
        (stor) = htons((st)->elm);                                      \
        MD5Update(ctx, (u_int8_t *)&(stor), sizeof (u_int16_t));        \
 } while (0)
 
-#ifndef NO_APPLE_EXTENSIONS
 static void
 pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr, u_int8_t proto)
-#else
-static void
-pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
-#endif
 {
        PF_MD5_UPD(pfr, addr.type);
        switch (pfr->addr.type) {
@@ -1053,7 +1234,6 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
                break;
        }
 
-#ifndef NO_APPLE_EXTENSIONS
        switch (proto) {
        case IPPROTO_TCP:
        case IPPROTO_UDP:
@@ -1067,12 +1247,6 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
        }
 
        PF_MD5_UPD(pfr, neg);
-#else
-       PF_MD5_UPD(pfr, port[0]);
-       PF_MD5_UPD(pfr, port[1]);
-       PF_MD5_UPD(pfr, neg);
-       PF_MD5_UPD(pfr, port_op);
-#endif
 }
 
 static void
@@ -1081,13 +1255,8 @@ pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
        u_int16_t x;
        u_int32_t y;
 
-#ifndef NO_APPLE_EXTENSIONS
        pf_hash_rule_addr(ctx, &rule->src, rule->proto);
        pf_hash_rule_addr(ctx, &rule->dst, rule->proto);
-#else
-       pf_hash_rule_addr(ctx, &rule->src);
-       pf_hash_rule_addr(ctx, &rule->dst);
-#endif
        PF_MD5_UPD_STR(rule, label);
        PF_MD5_UPD_STR(rule, ifname);
        PF_MD5_UPD_STR(rule, match_tagname);
@@ -1177,7 +1346,8 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
 }
 
 static void
-pf_rule_copyin(struct pf_rule *src, struct pf_rule *dst, struct proc *p)
+pf_rule_copyin(struct pf_rule *src, struct pf_rule *dst, struct proc *p,
+    int minordev)
 {
        bcopy(src, dst, sizeof (struct pf_rule));
 
@@ -1205,6 +1375,8 @@ pf_rule_copyin(struct pf_rule *src, struct pf_rule *dst, struct proc *p)
 
        dst->entries.tqe_prev = NULL;
        dst->entries.tqe_next = NULL;
+       if ((uint8_t)minordev == PFDEV_PFM)
+               dst->rule_flag |= PFRULE_PFM;
 }
 
 static void
@@ -1231,7 +1403,6 @@ pf_state_export(struct pfsync_state *sp, struct pf_state_key *sk,
        bzero(sp, sizeof (struct pfsync_state));
 
        /* copy from state key */
-#ifndef NO_APPLE_EXTENSIONS
        sp->lan.addr = sk->lan.addr;
        sp->lan.xport = sk->lan.xport;
        sp->gwy.addr = sk->gwy.addr;
@@ -1240,17 +1411,10 @@ pf_state_export(struct pfsync_state *sp, struct pf_state_key *sk,
        sp->ext.xport = sk->ext.xport;
        sp->proto_variant = sk->proto_variant;
        sp->tag = s->tag;
-#else
-       sp->lan.addr = sk->lan.addr;
-       sp->lan.port = sk->lan.port;
-       sp->gwy.addr = sk->gwy.addr;
-       sp->gwy.port = sk->gwy.port;
-       sp->ext.addr = sk->ext.addr;
-       sp->ext.port = sk->ext.port;
-#endif
        sp->proto = sk->proto;
        sp->af = sk->af;
        sp->direction = sk->direction;
+       sp->flowhash = sk->flowhash;
 
        /* copy from state */
        memcpy(&sp->id, &s->id, sizeof (sp->id));
@@ -1292,7 +1456,6 @@ pf_state_import(struct pfsync_state *sp, struct pf_state_key *sk,
     struct pf_state *s)
 {
        /* copy to state key */
-#ifndef NO_APPLE_EXTENSIONS
        sk->lan.addr = sp->lan.addr;
        sk->lan.xport = sp->lan.xport;
        sk->gwy.addr = sp->gwy.addr;
@@ -1301,17 +1464,10 @@ pf_state_import(struct pfsync_state *sp, struct pf_state_key *sk,
        sk->ext.xport = sp->ext.xport;
        sk->proto_variant = sp->proto_variant;
        s->tag = sp->tag;
-#else
-       sk->lan.addr = sp->lan.addr;
-       sk->lan.port = sp->lan.port;
-       sk->gwy.addr = sp->gwy.addr;
-       sk->gwy.port = sp->gwy.port;
-       sk->ext.addr = sp->ext.addr;
-       sk->ext.port = sp->ext.port;
-#endif
        sk->proto = sp->proto;
        sk->af = sp->af;
        sk->direction = sp->direction;
+       sk->flowhash = pf_calc_state_key_flowhash(sk);
 
        /* copy to state */
        memcpy(&s->id, &sp->id, sizeof (sp->id));
@@ -1429,9 +1585,9 @@ static int
 pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 {
 #pragma unused(dev)
-       struct pf_pooladdr      *pa = NULL;
-       struct pf_pool          *pool = NULL;
-       int                      error = 0;
+       int p64 = proc_is64bit(p);
+       int error = 0;
+       int minordev = minor(dev);
 
        if (kauth_cred_issuser(kauth_cred_get()) == 0)
                return (EPERM);
@@ -1450,6 +1606,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                case DIOCNATLOOK:
                case DIOCSETDEBUG:
                case DIOCGETSTATES:
+               case DIOCINSERTRULE:
+               case DIOCDELETERULE:
                case DIOCGETTIMEOUT:
                case DIOCCLRRULECTRS:
                case DIOCGETLIMIT:
@@ -1473,17 +1631,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                case DIOCGETSRCNODES:
                case DIOCCLRSRCNODES:
                case DIOCIGETIFACES:
+               case DIOCGIFSPEED:
                case DIOCSETIFFLAG:
                case DIOCCLRIFFLAG:
                        break;
                case DIOCRCLRTABLES:
                case DIOCRADDTABLES:
                case DIOCRDELTABLES:
-               case DIOCRSETTFLAGS:
-                       if (((struct pfioc_table *)addr)->pfrio_flags &
-                           PFR_FLAG_DUMMY)
+               case DIOCRSETTFLAGS: {
+                       int pfrio_flags;
+
+                       bcopy(&((struct pfioc_table *)(void *)addr)->
+                           pfrio_flags, &pfrio_flags, sizeof (pfrio_flags));
+
+                       if (pfrio_flags & PFR_FLAG_DUMMY)
                                break; /* dummy operation ok */
                        return (EPERM);
+               }
                default:
                        return (EPERM);
                }
@@ -1501,6 +1665,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                case DIOCGETSTATE:
                case DIOCGETSTATUS:
                case DIOCGETSTATES:
+               case DIOCINSERTRULE:
+               case DIOCDELETERULE:
                case DIOCGETTIMEOUT:
                case DIOCGETLIMIT:
                case DIOCGETALTQS:
@@ -1517,6 +1683,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                case DIOCOSFPGET:
                case DIOCGETSRCNODES:
                case DIOCIGETIFACES:
+               case DIOCGIFSPEED:
                        break;
                case DIOCRCLRTABLES:
                case DIOCRADDTABLES:
@@ -1526,22 +1693,48 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                case DIOCRADDADDRS:
                case DIOCRDELADDRS:
                case DIOCRSETADDRS:
-               case DIOCRSETTFLAGS:
-                       if (((struct pfioc_table *)addr)->pfrio_flags &
-                       PFR_FLAG_DUMMY) {
+               case DIOCRSETTFLAGS: {
+                       int pfrio_flags;
+
+                       bcopy(&((struct pfioc_table *)(void *)addr)->
+                           pfrio_flags, &pfrio_flags, sizeof (pfrio_flags));
+
+                       if (pfrio_flags & PFR_FLAG_DUMMY) {
                                flags |= FWRITE; /* need write lock for dummy */
                                break; /* dummy operation ok */
                        }
                        return (EACCES);
-               case DIOCGETRULE:
-                       if (((struct pfioc_rule *)addr)->action ==
-                           PF_GET_CLR_CNTR)
+               }
+               case DIOCGETRULE: {
+                       u_int32_t action;
+
+                       bcopy(&((struct pfioc_rule *)(void *)addr)->action,
+                           &action, sizeof (action));
+
+                       if (action == PF_GET_CLR_CNTR)
                                return (EACCES);
                        break;
+               }
                default:
                        return (EACCES);
                }
 
+#if PF_ALTQ
+       switch (cmd) {
+       case DIOCSTARTALTQ:
+       case DIOCSTOPALTQ:
+       case DIOCADDALTQ:
+       case DIOCGETALTQS:
+       case DIOCGETALTQ:
+       case DIOCCHANGEALTQ:
+       case DIOCGETQSTATS:
+               /* fail if ALTQ is disabled */
+               if (!altq_allowed)
+                       return (ENODEV);
+               break;
+       }
+#endif /* PF_ALTQ */
+
        if (flags & FWRITE)
                lck_rw_lock_exclusive(pf_perim_lock);
        else
@@ -1573,11 +1766,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                }
                break;
 
-       case DIOCSTARTREF:    /* returns a token */
+       case DIOCSTARTREF:              /* u_int64_t */
                if (pf_purge_thread == NULL) {
                        error = ENOMEM;
                } else {
-                       if ((*(u_int64_t *)addr = generate_token()) != 0) {
+                       u_int64_t token;
+
+                       /* small enough to be on stack */
+                       if ((token = generate_token(p)) != 0) {
                                if (pf_is_enabled == 0) {
                                        pf_start();
                                }
@@ -1586,8 +1782,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        } else {
                                error = ENOMEM;
                                DPFPRINTF(PF_DEBUG_URGENT,
-                                       ("pf: unable to generate token\n"));
+                                   ("pf: unable to generate token\n"));
                        }
+                       bcopy(&token, addr, sizeof (token));
                }
                break;
 
@@ -1601,230 +1798,1332 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                }
                break;
 
-       case DIOCSTOPREF:
+       case DIOCSTOPREF:               /* struct pfioc_remove_token */
                if (!pf_status.running) {
                        error = ENOENT;
                } else {
-                       if ((error = remove_token(
-                                       (struct pfioc_remove_token*)addr))==0) {
+                       struct pfioc_remove_token pfrt;
+
+                       /* small enough to be on stack */
+                       bcopy(addr, &pfrt, sizeof (pfrt));
+                       if ((error = remove_token(&pfrt)) == 0) {
                                VERIFY(pf_enabled_ref_count != 0);
                                pf_enabled_ref_count--;
-                               // return currently held references
-                               ((struct pfioc_remove_token *)addr)->refcount
-                                       = pf_enabled_ref_count;
+                               /* return currently held references */
+                               pfrt.refcount = pf_enabled_ref_count;
                                DPFPRINTF(PF_DEBUG_MISC,
-                                       ("pf: enabled refcount decremented\n"));
+                                   ("pf: enabled refcount decremented\n"));
                        } else {
                                error = EINVAL;
                                DPFPRINTF(PF_DEBUG_URGENT,
-                                       ("pf: token mismatch\n"));
-                               break;
+                                   ("pf: token mismatch\n"));
                        }
+                       bcopy(&pfrt, addr, sizeof (pfrt));
 
-                       if (pf_enabled_ref_count  == 0)
+                       if (error == 0 && pf_enabled_ref_count == 0)
                                pf_stop();
                }
                break;
 
-       case DIOCGETSTARTERS: {
-               struct pfioc_tokens             *g_token = (struct pfioc_tokens *)addr;
-               struct pfioc_token              *tokens;
-               struct pfioc_kernel_token       *entry, *tmp;
-               user_addr_t                     token_buf;
-               int                             g_token_size_copy;
-               char                            *ptr;
+       case DIOCGETSTARTERS: {         /* struct pfioc_tokens */
+               PFIOCX_STRUCT_DECL(pfioc_tokens);
 
-               if (nr_tokens == 0) {
-                       error = ENOENT;
-                       break;
-               }
+               PFIOCX_STRUCT_BEGIN(addr, pfioc_tokens, error = ENOMEM; break;);
+               error = pfioctl_ioc_tokens(cmd,
+                   PFIOCX_STRUCT_ADDR32(pfioc_tokens),
+                   PFIOCX_STRUCT_ADDR64(pfioc_tokens), p);
+               PFIOCX_STRUCT_END(pfioc_tokens, addr);
+               break;
+       }
+
+       case DIOCADDRULE:               /* struct pfioc_rule */
+       case DIOCGETRULES:              /* struct pfioc_rule */
+       case DIOCGETRULE:               /* struct pfioc_rule */
+       case DIOCCHANGERULE:            /* struct pfioc_rule */
+       case DIOCINSERTRULE:            /* struct pfioc_rule */
+       case DIOCDELETERULE: {          /* struct pfioc_rule */
+               struct pfioc_rule *pr = NULL;
+
+               PFIOC_STRUCT_BEGIN(addr, pr, error = ENOMEM; break;);
+               error = pfioctl_ioc_rule(cmd, minordev, pr, p);
+               PFIOC_STRUCT_END(pr, addr);
+               break;
+       }
 
-               g_token_size_copy = g_token->size;
+       case DIOCCLRSTATES:             /* struct pfioc_state_kill */
+       case DIOCKILLSTATES: {          /* struct pfioc_state_kill */
+               struct pfioc_state_kill *psk = NULL;
 
-               if (g_token->size == 0) {
-                       g_token->size = sizeof (struct pfioc_token) * nr_tokens;
-                       break;
-               }
+               PFIOC_STRUCT_BEGIN(addr, psk, error = ENOMEM; break;);
+               error = pfioctl_ioc_state_kill(cmd, psk, p);
+               PFIOC_STRUCT_END(psk, addr);
+               break;
+       }
 
-               token_buf = PF_USER_ADDR(addr, pfioc_tokens, pgt_buf);
-               tokens = _MALLOC(sizeof(struct pfioc_token) * nr_tokens,
-                       M_TEMP, M_WAITOK);
+       case DIOCADDSTATE:              /* struct pfioc_state */
+       case DIOCGETSTATE: {            /* struct pfioc_state */
+               struct pfioc_state *ps = NULL;
 
-               if (tokens == NULL) {
-                       error = ENOMEM;
+               PFIOC_STRUCT_BEGIN(addr, ps, error = ENOMEM; break;);
+               error = pfioctl_ioc_state(cmd, ps, p);
+               PFIOC_STRUCT_END(ps, addr);
+               break;
+       }
+
+       case DIOCGETSTATES: {           /* struct pfioc_states */
+               PFIOCX_STRUCT_DECL(pfioc_states);
+
+               PFIOCX_STRUCT_BEGIN(addr, pfioc_states, error = ENOMEM; break;);
+               error = pfioctl_ioc_states(cmd,
+                   PFIOCX_STRUCT_ADDR32(pfioc_states),
+                   PFIOCX_STRUCT_ADDR64(pfioc_states), p);
+               PFIOCX_STRUCT_END(pfioc_states, addr);
+               break;
+       }
+
+       case DIOCGETSTATUS: {           /* struct pf_status */
+               struct pf_status *s = NULL;
+
+               PFIOC_STRUCT_BEGIN(&pf_status, s, error = ENOMEM; break;);
+               pfi_update_status(s->ifname, s);
+               PFIOC_STRUCT_END(s, addr);
+               break;
+       }
+
+       case DIOCSETSTATUSIF: {         /* struct pfioc_if */
+               struct pfioc_if *pi = (struct pfioc_if *)(void *)addr;
+
+               /* OK for unaligned accesses */
+               if (pi->ifname[0] == 0) {
+                       bzero(pf_status.ifname, IFNAMSIZ);
                        break;
                }
+               strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ);
+               break;
+       }
 
-               ptr = (void *)tokens;
-               SLIST_FOREACH_SAFE(entry, &token_list_head, next, tmp) {
-                       if ((unsigned)g_token_size_copy 
-                               < sizeof(struct pfioc_token))
-                       break;    /* no more buffer space left */
+       case DIOCCLRSTATUS: {
+               bzero(pf_status.counters, sizeof (pf_status.counters));
+               bzero(pf_status.fcounters, sizeof (pf_status.fcounters));
+               bzero(pf_status.scounters, sizeof (pf_status.scounters));
+               pf_status.since = pf_calendar_time_second();
+               if (*pf_status.ifname)
+                       pfi_update_status(pf_status.ifname, NULL);
+               break;
+       }
 
-                       ((struct pfioc_token *)(ptr))->token_value = entry->token.token_value;
-                       ((struct pfioc_token *)(ptr))->timestamp = entry->token.timestamp;
-                       ((struct pfioc_token *)(ptr))->pid = entry->token.pid;
-                       memcpy(((struct pfioc_token *)(ptr))->proc_name, entry->token.proc_name, 
-                               PFTOK_PROCNAME_LEN);
-                       ptr += sizeof(struct pfioc_token);
+       case DIOCNATLOOK: {             /* struct pfioc_natlook */
+               struct pfioc_natlook *pnl = NULL;
 
-                       g_token_size_copy -= sizeof(struct pfioc_token);
-               }
+               PFIOC_STRUCT_BEGIN(addr, pnl, error = ENOMEM; break;);
+               error = pfioctl_ioc_natlook(cmd, pnl, p);
+               PFIOC_STRUCT_END(pnl, addr);
+               break;
+       }
 
-               if (g_token_size_copy < g_token->size) {
-                       error = copyout(tokens, token_buf, 
-                               g_token->size - g_token_size_copy);
-               }
+       case DIOCSETTIMEOUT:            /* struct pfioc_tm */
+       case DIOCGETTIMEOUT: {          /* struct pfioc_tm */
+               struct pfioc_tm pt;
 
-               g_token->size -= g_token_size_copy;
-               _FREE(tokens, M_TEMP);
+               /* small enough to be on stack */
+               bcopy(addr, &pt, sizeof (pt));
+               error = pfioctl_ioc_tm(cmd, &pt, p);
+               bcopy(&pt, addr, sizeof (pt));
+               break;
+       }
+
+       case DIOCGETLIMIT:              /* struct pfioc_limit */
+       case DIOCSETLIMIT: {            /* struct pfioc_limit */
+               struct pfioc_limit pl;
 
+               /* small enough to be on stack */
+               bcopy(addr, &pl, sizeof (pl));
+               error = pfioctl_ioc_limit(cmd, &pl, p);
+               bcopy(&pl, addr, sizeof (pl));
                break;
-               }
+       }
 
-       case DIOCADDRULE: {
-               struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
-               struct pf_ruleset       *ruleset;
-               struct pf_rule          *rule, *tail;
-               struct pf_pooladdr      *apa;
-               int                     rs_num;
+       case DIOCSETDEBUG: {            /* u_int32_t */
+               bcopy(addr, &pf_status.debug, sizeof (u_int32_t));
+               break;
+       }
 
-               pr->anchor[sizeof (pr->anchor) - 1] = '\0';
-               pr->anchor_call[sizeof (pr->anchor_call) - 1] = '\0';
-               ruleset = pf_find_ruleset(pr->anchor);
-               if (ruleset == NULL) {
-                       error = EINVAL;
-                       break;
+       case DIOCCLRRULECTRS: {
+               /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
+               struct pf_ruleset       *ruleset = &pf_main_ruleset;
+               struct pf_rule          *rule;
+
+               TAILQ_FOREACH(rule,
+                   ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
+                       rule->evaluations = 0;
+                       rule->packets[0] = rule->packets[1] = 0;
+                       rule->bytes[0] = rule->bytes[1] = 0;
                }
-               rs_num = pf_get_ruleset_number(pr->rule.action);
-               if (rs_num >= PF_RULESET_MAX) {
+               break;
+       }
+
+       case DIOCGIFSPEED: {
+               struct pf_ifspeed *psp = (struct pf_ifspeed *)(void *)addr;
+               struct pf_ifspeed ps;
+               struct ifnet *ifp;
+               u_int64_t baudrate;
+
+               if (psp->ifname[0] != '\0') {
+                       /* Can we completely trust user-land? */
+                       strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
+                       ps.ifname[IFNAMSIZ - 1] = '\0';
+                       ifp = ifunit(ps.ifname);
+                       if (ifp != NULL) {
+                               baudrate = ifp->if_output_bw.max_bw;
+                               bcopy(&baudrate, &psp->baudrate,
+                                   sizeof (baudrate));
+                       } else {
+                               error = EINVAL;
+                       }
+               } else {
                        error = EINVAL;
-                       break;
                }
-               if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
-                       error = EINVAL;
-                       break;
+               break;
+       }
+
+#if PF_ALTQ
+       case DIOCSTARTALTQ: {
+               struct pf_altq          *altq;
+
+               VERIFY(altq_allowed);
+               /* enable all altq interfaces on active list */
+               TAILQ_FOREACH(altq, pf_altqs_active, entries) {
+                       if (altq->qname[0] == '\0') {
+                               error = pf_enable_altq(altq);
+                               if (error != 0)
+                                       break;
+                       }
                }
-               if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) {
-                       error = EBUSY;
-                       break;
+               if (error == 0)
+                       pf_altq_running = 1;
+               DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
+               break;
+       }
+
+       case DIOCSTOPALTQ: {
+               struct pf_altq          *altq;
+
+               VERIFY(altq_allowed);
+               /* disable all altq interfaces on active list */
+               TAILQ_FOREACH(altq, pf_altqs_active, entries) {
+                       if (altq->qname[0] == '\0') {
+                               error = pf_disable_altq(altq);
+                               if (error != 0)
+                                       break;
+                       }
                }
-               if (pr->pool_ticket != ticket_pabuf) {
+               if (error == 0)
+                       pf_altq_running = 0;
+               DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
+               break;
+       }
+
+       case DIOCADDALTQ: {             /* struct pfioc_altq */
+               struct pfioc_altq       *pa = (struct pfioc_altq *)(void *)addr;
+               struct pf_altq          *altq, *a;
+               u_int32_t               ticket;
+
+               VERIFY(altq_allowed);
+               bcopy(&pa->ticket, &ticket, sizeof (ticket));
+               if (ticket != ticket_altqs_inactive) {
                        error = EBUSY;
                        break;
                }
-               rule = pool_get(&pf_rule_pl, PR_WAITOK);
-               if (rule == NULL) {
+               altq = pool_get(&pf_altq_pl, PR_WAITOK);
+               if (altq == NULL) {
                        error = ENOMEM;
                        break;
                }
-               pf_rule_copyin(&pr->rule, rule, p);
-#if !INET
-               if (rule->af == AF_INET) {
-                       pool_put(&pf_rule_pl, rule);
-                       error = EAFNOSUPPORT;
-                       break;
-               }
-#endif /* INET */
-#if !INET6
-               if (rule->af == AF_INET6) {
-                       pool_put(&pf_rule_pl, rule);
-                       error = EAFNOSUPPORT;
-                       break;
-               }
-#endif /* INET6 */
-               tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
-                   pf_rulequeue);
-               if (tail)
-                       rule->nr = tail->nr + 1;
-               else
-                       rule->nr = 0;
-               if (rule->ifname[0]) {
-                       rule->kif = pfi_kif_get(rule->ifname);
-                       if (rule->kif == NULL) {
-                               pool_put(&pf_rule_pl, rule);
-                               error = EINVAL;
+               pf_altq_copyin(&pa->altq, altq);
+
+               /*
+                * if this is for a queue, find the discipline and
+                * copy the necessary fields
+                */
+               if (altq->qname[0] != '\0') {
+                       if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
+                               error = EBUSY;
+                               pool_put(&pf_altq_pl, altq);
                                break;
                        }
-                       pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE);
+                       altq->altq_disc = NULL;
+                       TAILQ_FOREACH(a, pf_altqs_inactive, entries) {
+                               if (strncmp(a->ifname, altq->ifname,
+                                   IFNAMSIZ) == 0 && a->qname[0] == '\0') {
+                                       altq->altq_disc = a->altq_disc;
+                                       break;
+                               }
+                       }
+               }
+
+               error = altq_add(altq);
+               if (error) {
+                       pool_put(&pf_altq_pl, altq);
+                       break;
+               }
+
+               TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries);
+               pf_altq_copyout(altq, &pa->altq);
+               break;
+       }
+
+       case DIOCGETALTQS: {
+               struct pfioc_altq       *pa = (struct pfioc_altq *)(void *)addr;
+               struct pf_altq          *altq;
+               u_int32_t               nr;
+
+               VERIFY(altq_allowed);
+               nr = 0;
+               TAILQ_FOREACH(altq, pf_altqs_active, entries)
+                       nr++;
+               bcopy(&nr, &pa->nr, sizeof (nr));
+               bcopy(&ticket_altqs_active, &pa->ticket, sizeof (pa->ticket));
+               break;
+       }
+
+       case DIOCGETALTQ: {
+               struct pfioc_altq       *pa = (struct pfioc_altq *)(void *)addr;
+               struct pf_altq          *altq;
+               u_int32_t                nr, pa_nr, ticket;
+
+               VERIFY(altq_allowed);
+               bcopy(&pa->ticket, &ticket, sizeof (ticket));
+               if (ticket != ticket_altqs_active) {
+                       error = EBUSY;
+                       break;
+               }
+               bcopy(&pa->nr, &pa_nr, sizeof (pa_nr));
+               nr = 0;
+               altq = TAILQ_FIRST(pf_altqs_active);
+               while ((altq != NULL) && (nr < pa_nr)) {
+                       altq = TAILQ_NEXT(altq, entries);
+                       nr++;
+               }
+               if (altq == NULL) {
+                       error = EBUSY;
+                       break;
+               }
+               pf_altq_copyout(altq, &pa->altq);
+               break;
+       }
+
+       case DIOCCHANGEALTQ:
+               VERIFY(altq_allowed);
+               /* CHANGEALTQ not supported yet! */
+               error = ENODEV;
+               break;
+
+       case DIOCGETQSTATS: {
+               struct pfioc_qstats *pq = (struct pfioc_qstats *)(void *)addr;
+               struct pf_altq          *altq;
+               u_int32_t                nr, pq_nr, ticket;
+               int                      nbytes;
+
+               VERIFY(altq_allowed);
+               bcopy(&pq->ticket, &ticket, sizeof (ticket));
+               if (ticket != ticket_altqs_active) {
+                       error = EBUSY;
+                       break;
+               }
+               bcopy(&pq->nr, &pq_nr, sizeof (pq_nr));
+               nr = 0;
+               altq = TAILQ_FIRST(pf_altqs_active);
+               while ((altq != NULL) && (nr < pq_nr)) {
+                       altq = TAILQ_NEXT(altq, entries);
+                       nr++;
+               }
+               if (altq == NULL) {
+                       error = EBUSY;
+                       break;
+               }
+               bcopy(&pq->nbytes, &nbytes, sizeof (nbytes));
+               error = altq_getqstats(altq, pq->buf, &nbytes);
+               if (error == 0) {
+                       pq->scheduler = altq->scheduler;
+                       bcopy(&nbytes, &pq->nbytes, sizeof (nbytes));
+               }
+               break;
+       }
+#endif /* PF_ALTQ */
+
+       case DIOCBEGINADDRS:            /* struct pfioc_pooladdr */
+       case DIOCADDADDR:               /* struct pfioc_pooladdr */
+       case DIOCGETADDRS:              /* struct pfioc_pooladdr */
+       case DIOCGETADDR:               /* struct pfioc_pooladdr */
+       case DIOCCHANGEADDR: {          /* struct pfioc_pooladdr */
+               struct pfioc_pooladdr *pp = NULL;
+
+               PFIOC_STRUCT_BEGIN(addr, pp, error = ENOMEM; break;)
+               error = pfioctl_ioc_pooladdr(cmd, pp, p);
+               PFIOC_STRUCT_END(pp, addr);
+               break;
+       }
+
+       case DIOCGETRULESETS:           /* struct pfioc_ruleset */
+       case DIOCGETRULESET: {          /* struct pfioc_ruleset */
+               struct pfioc_ruleset *pr = NULL;
+
+               PFIOC_STRUCT_BEGIN(addr, pr, error = ENOMEM; break;);
+               error = pfioctl_ioc_ruleset(cmd, pr, p);
+               PFIOC_STRUCT_END(pr, addr);
+               break;
+       }
+
+       case DIOCRCLRTABLES:            /* struct pfioc_table */
+       case DIOCRADDTABLES:            /* struct pfioc_table */
+       case DIOCRDELTABLES:            /* struct pfioc_table */
+       case DIOCRGETTABLES:            /* struct pfioc_table */
+       case DIOCRGETTSTATS:            /* struct pfioc_table */
+       case DIOCRCLRTSTATS:            /* struct pfioc_table */
+       case DIOCRSETTFLAGS:            /* struct pfioc_table */
+       case DIOCRCLRADDRS:             /* struct pfioc_table */
+       case DIOCRADDADDRS:             /* struct pfioc_table */
+       case DIOCRDELADDRS:             /* struct pfioc_table */
+       case DIOCRSETADDRS:             /* struct pfioc_table */
+       case DIOCRGETADDRS:             /* struct pfioc_table */
+       case DIOCRGETASTATS:            /* struct pfioc_table */
+       case DIOCRCLRASTATS:            /* struct pfioc_table */
+       case DIOCRTSTADDRS:             /* struct pfioc_table */
+       case DIOCRINADEFINE: {          /* struct pfioc_table */
+               PFIOCX_STRUCT_DECL(pfioc_table);
+
+               PFIOCX_STRUCT_BEGIN(addr, pfioc_table, error = ENOMEM; break;);
+               error = pfioctl_ioc_table(cmd,
+                   PFIOCX_STRUCT_ADDR32(pfioc_table),
+                   PFIOCX_STRUCT_ADDR64(pfioc_table), p);
+               PFIOCX_STRUCT_END(pfioc_table, addr);
+               break;
+       }
+
+       case DIOCOSFPADD:               /* struct pf_osfp_ioctl */
+       case DIOCOSFPGET: {             /* struct pf_osfp_ioctl */
+               struct pf_osfp_ioctl *io = NULL;
+
+               PFIOC_STRUCT_BEGIN(addr, io, error = ENOMEM; break;);
+               if (cmd == DIOCOSFPADD) {
+                       error = pf_osfp_add(io);
+               } else {
+                       VERIFY(cmd == DIOCOSFPGET);
+                       error = pf_osfp_get(io);
+               }
+               PFIOC_STRUCT_END(io, addr);
+               break;
+       }
+
+       case DIOCXBEGIN:                /* struct pfioc_trans */
+       case DIOCXROLLBACK:             /* struct pfioc_trans */
+       case DIOCXCOMMIT: {             /* struct pfioc_trans */
+               PFIOCX_STRUCT_DECL(pfioc_trans);
+
+               PFIOCX_STRUCT_BEGIN(addr, pfioc_trans, error = ENOMEM; break;);
+               error = pfioctl_ioc_trans(cmd,
+                   PFIOCX_STRUCT_ADDR32(pfioc_trans),
+                   PFIOCX_STRUCT_ADDR64(pfioc_trans), p);
+               PFIOCX_STRUCT_END(pfioc_trans, addr);
+               break;
+       }
+
+       case DIOCGETSRCNODES: {         /* struct pfioc_src_nodes */
+               PFIOCX_STRUCT_DECL(pfioc_src_nodes);
+
+               PFIOCX_STRUCT_BEGIN(addr, pfioc_src_nodes,
+                   error = ENOMEM; break;);
+               error = pfioctl_ioc_src_nodes(cmd,
+                   PFIOCX_STRUCT_ADDR32(pfioc_src_nodes),
+                   PFIOCX_STRUCT_ADDR64(pfioc_src_nodes), p);
+               PFIOCX_STRUCT_END(pfioc_src_nodes, addr);
+               break;
+       }
+
+       case DIOCCLRSRCNODES: {
+               struct pf_src_node      *n;
+               struct pf_state         *state;
+
+               RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+                       state->src_node = NULL;
+                       state->nat_src_node = NULL;
+               }
+               RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+                       n->expire = 1;
+                       n->states = 0;
+               }
+               pf_purge_expired_src_nodes();
+               pf_status.src_nodes = 0;
+               break;
+       }
+
+       case DIOCKILLSRCNODES: {        /* struct pfioc_src_node_kill */
+               struct pfioc_src_node_kill *psnk = NULL;
+
+               PFIOC_STRUCT_BEGIN(addr, psnk, error = ENOMEM; break;);
+               error = pfioctl_ioc_src_node_kill(cmd, psnk, p);
+               PFIOC_STRUCT_END(psnk, addr);
+               break;
+       }
+
+       case DIOCSETHOSTID: {           /* u_int32_t */
+               u_int32_t hid;
+
+               /* small enough to be on stack */
+               bcopy(addr, &hid, sizeof (hid));
+               if (hid == 0)
+                       pf_status.hostid = random();
+               else
+                       pf_status.hostid = hid;
+               break;
+       }
+
+       case DIOCOSFPFLUSH:
+               pf_osfp_flush();
+               break;
+
+       case DIOCIGETIFACES:            /* struct pfioc_iface */
+       case DIOCSETIFFLAG:             /* struct pfioc_iface */
+       case DIOCCLRIFFLAG: {           /* struct pfioc_iface */
+               PFIOCX_STRUCT_DECL(pfioc_iface);
+
+               PFIOCX_STRUCT_BEGIN(addr, pfioc_iface, error = ENOMEM; break;);
+               error = pfioctl_ioc_iface(cmd,
+                   PFIOCX_STRUCT_ADDR32(pfioc_iface),
+                   PFIOCX_STRUCT_ADDR64(pfioc_iface), p);
+               PFIOCX_STRUCT_END(pfioc_iface, addr);
+               break;
+       }
+
+       default:
+               error = ENODEV;
+               break;
+       }
+
+       lck_mtx_unlock(pf_lock);
+       lck_rw_done(pf_perim_lock);
+
+       return (error);
+}
+
+static int
+pfioctl_ioc_table(u_long cmd, struct pfioc_table_32 *io32,
+    struct pfioc_table_64 *io64, struct proc *p)
+{
+       int p64 = proc_is64bit(p);
+       int error = 0;
+
+       if (!p64)
+               goto struct32;
+
+       /*
+        * 64-bit structure processing
+        */
+       switch (cmd) {
+       case DIOCRCLRTABLES:
+               if (io64->pfrio_esize != 0) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_clr_tables(&io64->pfrio_table, &io64->pfrio_ndel,
+                   io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRADDTABLES:
+               if (io64->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_add_tables(io64->pfrio_buffer, io64->pfrio_size,
+                   &io64->pfrio_nadd, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRDELTABLES:
+               if (io64->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_del_tables(io64->pfrio_buffer, io64->pfrio_size,
+                   &io64->pfrio_ndel, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRGETTABLES:
+               if (io64->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_get_tables(&io64->pfrio_table, io64->pfrio_buffer,
+                   &io64->pfrio_size, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRGETTSTATS:
+               if (io64->pfrio_esize != sizeof (struct pfr_tstats)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_get_tstats(&io64->pfrio_table, io64->pfrio_buffer,
+                   &io64->pfrio_size, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRCLRTSTATS:
+               if (io64->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_clr_tstats(io64->pfrio_buffer, io64->pfrio_size,
+                   &io64->pfrio_nzero, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRSETTFLAGS:
+               if (io64->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_set_tflags(io64->pfrio_buffer, io64->pfrio_size,
+                   io64->pfrio_setflag, io64->pfrio_clrflag,
+                   &io64->pfrio_nchange, &io64->pfrio_ndel,
+                   io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRCLRADDRS:
+               if (io64->pfrio_esize != 0) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_clr_addrs(&io64->pfrio_table, &io64->pfrio_ndel,
+                   io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRADDADDRS:
+               if (io64->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_add_addrs(&io64->pfrio_table, io64->pfrio_buffer,
+                   io64->pfrio_size, &io64->pfrio_nadd, io64->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRDELADDRS:
+               if (io64->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_del_addrs(&io64->pfrio_table, io64->pfrio_buffer,
+                   io64->pfrio_size, &io64->pfrio_ndel, io64->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRSETADDRS:
+               if (io64->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_set_addrs(&io64->pfrio_table, io64->pfrio_buffer,
+                   io64->pfrio_size, &io64->pfrio_size2, &io64->pfrio_nadd,
+                   &io64->pfrio_ndel, &io64->pfrio_nchange, io64->pfrio_flags |
+                   PFR_FLAG_USERIOCTL, 0);
+               break;
+
+       case DIOCRGETADDRS:
+               if (io64->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_get_addrs(&io64->pfrio_table, io64->pfrio_buffer,
+                   &io64->pfrio_size, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRGETASTATS:
+               if (io64->pfrio_esize != sizeof (struct pfr_astats)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_get_astats(&io64->pfrio_table, io64->pfrio_buffer,
+                   &io64->pfrio_size, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRCLRASTATS:
+               if (io64->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_clr_astats(&io64->pfrio_table, io64->pfrio_buffer,
+                   io64->pfrio_size, &io64->pfrio_nzero, io64->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRTSTADDRS:
+               if (io64->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_tst_addrs(&io64->pfrio_table, io64->pfrio_buffer,
+                   io64->pfrio_size, &io64->pfrio_nmatch, io64->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRINADEFINE:
+               if (io64->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io64->pfrio_table);
+               error = pfr_ina_define(&io64->pfrio_table, io64->pfrio_buffer,
+                   io64->pfrio_size, &io64->pfrio_nadd, &io64->pfrio_naddr,
+                   io64->pfrio_ticket, io64->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+       goto done;
+
+struct32:
+       /*
+        * 32-bit structure processing
+        */
+       switch (cmd) {
+       case DIOCRCLRTABLES:
+               if (io32->pfrio_esize != 0) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_clr_tables(&io32->pfrio_table, &io32->pfrio_ndel,
+                   io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRADDTABLES:
+               if (io32->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_add_tables(io32->pfrio_buffer, io32->pfrio_size,
+                   &io32->pfrio_nadd, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRDELTABLES:
+               if (io32->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_del_tables(io32->pfrio_buffer, io32->pfrio_size,
+                   &io32->pfrio_ndel, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRGETTABLES:
+               if (io32->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_get_tables(&io32->pfrio_table, io32->pfrio_buffer,
+                   &io32->pfrio_size, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRGETTSTATS:
+               if (io32->pfrio_esize != sizeof (struct pfr_tstats)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_get_tstats(&io32->pfrio_table, io32->pfrio_buffer,
+                   &io32->pfrio_size, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRCLRTSTATS:
+               if (io32->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_clr_tstats(io32->pfrio_buffer, io32->pfrio_size,
+                   &io32->pfrio_nzero, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRSETTFLAGS:
+               if (io32->pfrio_esize != sizeof (struct pfr_table)) {
+                       error = ENODEV;
+                       break;
+               }
+               error = pfr_set_tflags(io32->pfrio_buffer, io32->pfrio_size,
+                   io32->pfrio_setflag, io32->pfrio_clrflag,
+                   &io32->pfrio_nchange, &io32->pfrio_ndel,
+                   io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRCLRADDRS:
+               if (io32->pfrio_esize != 0) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_clr_addrs(&io32->pfrio_table, &io32->pfrio_ndel,
+                   io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRADDADDRS:
+               if (io32->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_add_addrs(&io32->pfrio_table, io32->pfrio_buffer,
+                   io32->pfrio_size, &io32->pfrio_nadd, io32->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRDELADDRS:
+               if (io32->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_del_addrs(&io32->pfrio_table, io32->pfrio_buffer,
+                   io32->pfrio_size, &io32->pfrio_ndel, io32->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRSETADDRS:
+               if (io32->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_set_addrs(&io32->pfrio_table, io32->pfrio_buffer,
+                   io32->pfrio_size, &io32->pfrio_size2, &io32->pfrio_nadd,
+                   &io32->pfrio_ndel, &io32->pfrio_nchange, io32->pfrio_flags |
+                   PFR_FLAG_USERIOCTL, 0);
+               break;
+
+       case DIOCRGETADDRS:
+               if (io32->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_get_addrs(&io32->pfrio_table, io32->pfrio_buffer,
+                   &io32->pfrio_size, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRGETASTATS:
+               if (io32->pfrio_esize != sizeof (struct pfr_astats)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_get_astats(&io32->pfrio_table, io32->pfrio_buffer,
+                   &io32->pfrio_size, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRCLRASTATS:
+               if (io32->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_clr_astats(&io32->pfrio_table, io32->pfrio_buffer,
+                   io32->pfrio_size, &io32->pfrio_nzero, io32->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRTSTADDRS:
+               if (io32->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_tst_addrs(&io32->pfrio_table, io32->pfrio_buffer,
+                   io32->pfrio_size, &io32->pfrio_nmatch, io32->pfrio_flags |
+                   PFR_FLAG_USERIOCTL);
+               break;
+
+       case DIOCRINADEFINE:
+               if (io32->pfrio_esize != sizeof (struct pfr_addr)) {
+                       error = ENODEV;
+                       break;
+               }
+               pfr_table_copyin_cleanup(&io32->pfrio_table);
+               error = pfr_ina_define(&io32->pfrio_table, io32->pfrio_buffer,
+                   io32->pfrio_size, &io32->pfrio_nadd, &io32->pfrio_naddr,
+                   io32->pfrio_ticket, io32->pfrio_flags | PFR_FLAG_USERIOCTL);
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+done:
+       return (error);
+}
+
+static int
+pfioctl_ioc_tokens(u_long cmd, struct pfioc_tokens_32 *tok32,
+    struct pfioc_tokens_64 *tok64, struct proc *p)
+{
+       struct pfioc_token *tokens;
+       struct pfioc_kernel_token *entry, *tmp;
+       user_addr_t token_buf;
+       int ocnt, cnt, error = 0, p64 = proc_is64bit(p);
+       char *ptr;
+
+       switch (cmd) {
+       case DIOCGETSTARTERS: {
+               int size;
+
+               if (nr_tokens == 0) {
+                       error = ENOENT;
+                       break;
+               }
+
+               size = sizeof (struct pfioc_token) * nr_tokens;
+               ocnt = cnt = (p64 ? tok64->size : tok32->size);
+               if (cnt == 0) {
+                       if (p64)
+                               tok64->size = size;
+                       else
+                               tok32->size = size;
+                       break;
+               }
+
+               token_buf = (p64 ? tok64->pgt_buf : tok32->pgt_buf);
+               tokens = _MALLOC(size, M_TEMP, M_WAITOK|M_ZERO);
+               if (tokens == NULL) {
+                       error = ENOMEM;
+                       break;
+               }
+
+               ptr = (void *)tokens;
+               SLIST_FOREACH_SAFE(entry, &token_list_head, next, tmp) {
+                       struct pfioc_token *t;
+
+                       if ((unsigned)cnt < sizeof (*tokens))
+                               break;    /* no more buffer space left */
+
+                       t = (struct pfioc_token *)(void *)ptr;
+                       t->token_value  = entry->token.token_value;
+                       t->timestamp    = entry->token.timestamp;
+                       t->pid          = entry->token.pid;
+                       bcopy(entry->token.proc_name, t->proc_name,
+                           PFTOK_PROCNAME_LEN);
+                       ptr += sizeof (struct pfioc_token);
+
+                       cnt -= sizeof (struct pfioc_token);
+               }
+
+               if (cnt < ocnt)
+                       error = copyout(tokens, token_buf, ocnt - cnt);
+
+               if (p64)
+                       tok64->size = ocnt - cnt;
+               else
+                       tok32->size = ocnt - cnt;
+
+               _FREE(tokens, M_TEMP);
+               break;
+       }
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (error);
+}
+
+static void
+pf_expire_states_and_src_nodes(struct pf_rule *rule)
+{
+       struct pf_state         *state;
+       struct pf_src_node      *sn;
+       int                      killed = 0;
+
+       /* expire the states */
+       state = TAILQ_FIRST(&state_list);
+       while (state) {
+               if (state->rule.ptr == rule)
+                       state->timeout = PFTM_PURGE;
+               state = TAILQ_NEXT(state, entry_list);
+       }
+       pf_purge_expired_states(pf_status.states);
+
+       /* expire the src_nodes */
+       RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) {
+               if (sn->rule.ptr != rule)
+                       continue;
+               if (sn->states != 0) {
+                       RB_FOREACH(state, pf_state_tree_id,
+                           &tree_id) {
+                               if (state->src_node == sn)
+                                       state->src_node = NULL;
+                               if (state->nat_src_node == sn)
+                                       state->nat_src_node = NULL;
+                       }
+                       sn->states = 0;
                }
+               sn->expire = 1;
+               killed++;
+       }
+       if (killed)
+               pf_purge_expired_src_nodes();
+}
 
-#if ALTQ
-               /* set queue IDs */
-               if (rule->qname[0] != 0) {
-                       if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
-                               error = EBUSY;
-                       else if (rule->pqname[0] != 0) {
-                               if ((rule->pqid =
-                                   pf_qname2qid(rule->pqname)) == 0)
-                                       error = EBUSY;
-                       } else
-                               rule->pqid = rule->qid;
+static void
+pf_delete_rule_from_ruleset(struct pf_ruleset *ruleset, int rs_num,
+    struct pf_rule *rule)
+{
+       struct pf_rule *r;
+       int nr = 0;
+
+       pf_expire_states_and_src_nodes(rule);
+
+       pf_rm_rule(ruleset->rules[rs_num].active.ptr, rule);
+       if (ruleset->rules[rs_num].active.rcount-- == 0)
+               panic("%s: rcount value broken!", __func__);
+       r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+
+       while (r) {
+               r->nr = nr++;
+               r = TAILQ_NEXT(r, entries);
+       }
+}
+
+
+static void
+pf_ruleset_cleanup(struct pf_ruleset *ruleset, int rs)
+{
+       pf_calc_skip_steps(ruleset->rules[rs].active.ptr);
+       ruleset->rules[rs].active.ticket =
+           ++ruleset->rules[rs].inactive.ticket;
+}
+
+static int
+pf_delete_rule_by_ticket(struct pfioc_rule *pr)
+{
+       struct pf_ruleset       *ruleset;
+       struct pf_rule          *rule;
+       int                      rs_num;
+       int                      is_anchor;
+       int                      error;
+
+       is_anchor = (pr->anchor_call[0] != '\0');
+       if ((ruleset = pf_find_ruleset_with_owner(pr->anchor,
+           pr->rule.owner, is_anchor, &error)) == NULL)
+               return (error);
+
+       rs_num = pf_get_ruleset_number(pr->rule.action);
+       if (rs_num >= PF_RULESET_MAX) {
+               return (EINVAL);
+       }
+
+       if (pr->rule.ticket) {
+               rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+               while (rule && (rule->ticket != pr->rule.ticket))
+                       rule = TAILQ_NEXT(rule, entries);
+               if (rule == NULL)
+                       return (ENOENT);
+
+               if (strcmp(rule->owner, pr->rule.owner))
+                       return (EACCES);
+
+delete_rule:
+               if (rule->anchor && (ruleset != &pf_main_ruleset) &&
+                   ((strcmp(ruleset->anchor->owner, "")) == 0) &&
+                   ((ruleset->rules[rs_num].active.rcount - 1) == 0)) {
+                       /* set rule & ruleset to parent and repeat */
+                       struct pf_rule *delete_rule = rule;
+                       struct pf_ruleset *delete_ruleset = ruleset;
+
+#define        parent_ruleset          ruleset->anchor->parent->ruleset
+                       if (ruleset->anchor->parent == NULL)
+                               ruleset = &pf_main_ruleset;
+                       else
+                               ruleset = &parent_ruleset;
+
+                       rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+                       while (rule &&
+                           (rule->anchor != delete_ruleset->anchor))
+                               rule = TAILQ_NEXT(rule, entries);
+                       if (rule == NULL)
+                               panic("%s: rule not found!", __func__);
+
+                       if (delete_rule->rule_flag & PFRULE_PFM)
+                               pffwrules--;
+
+                       pf_delete_rule_from_ruleset(delete_ruleset,
+                           rs_num, delete_rule);
+                       delete_ruleset->rules[rs_num].active.ticket =
+                           ++delete_ruleset->rules[rs_num].inactive.ticket;
+
+                       goto delete_rule;
+               } else {
+                       if (rule->rule_flag & PFRULE_PFM)
+                               pffwrules--;
+                       pf_delete_rule_from_ruleset(ruleset, rs_num,
+                           rule);
+                       pf_ruleset_cleanup(ruleset, rs_num);
                }
-#endif /* ALTQ */
-               if (rule->tagname[0])
-                       if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
-                               error = EBUSY;
-               if (rule->match_tagname[0])
-                       if ((rule->match_tag =
-                           pf_tagname2tag(rule->match_tagname)) == 0)
+       }
+
+       return (0);
+}
+
+static void
+pf_delete_rule_by_owner(char *owner)
+{
+       struct pf_ruleset       *ruleset;
+       struct pf_rule          *rule, *next;
+       int                      deleted = 0;
+
+       for (int rs = 0; rs < PF_RULESET_MAX; rs++) {
+               rule = TAILQ_FIRST(pf_main_ruleset.rules[rs].active.ptr);
+               ruleset = &pf_main_ruleset;
+               while (rule) {
+                       next = TAILQ_NEXT(rule, entries);
+                       if (rule->anchor) {
+                               if (((strcmp(rule->owner, owner)) == 0) ||
+                                   ((strcmp(rule->owner, "")) == 0)) {
+                                       if (rule->anchor->ruleset.rules[rs].active.rcount > 0) {
+                                               if (deleted) {
+                                                       pf_ruleset_cleanup(ruleset, rs);
+                                                       deleted = 0;
+                                               }
+                                               /* step into anchor */
+                                               ruleset =
+                                                   &rule->anchor->ruleset;
+                                               rule = TAILQ_FIRST(ruleset->rules[rs].active.ptr);
+                                               continue;
+                                       } else {
+                                               if (rule->rule_flag &
+                                                   PFRULE_PFM)
+                                                       pffwrules--;
+                                               pf_delete_rule_from_ruleset(ruleset, rs, rule);
+                                               deleted = 1;
+                                               rule = next;
+                                       }
+                               } else
+                                       rule = next;
+                       } else {
+                               if (((strcmp(rule->owner, owner)) == 0)) {
+                                       /* delete rule */
+                                       if (rule->rule_flag & PFRULE_PFM)
+                                               pffwrules--;
+                                       pf_delete_rule_from_ruleset(ruleset,
+                                           rs, rule);
+                                       deleted = 1;
+                               }
+                               rule = next;
+                       }
+                       if (rule == NULL) {
+                               if (deleted) {
+                                       pf_ruleset_cleanup(ruleset, rs);
+                                       deleted = 0;
+                               }
+                               if (ruleset != &pf_main_ruleset)
+                                       pf_deleterule_anchor_step_out(&ruleset,
+                                           rs, &rule);
+                       }
+               }
+       }
+}
+
+static void
+pf_deleterule_anchor_step_out(struct pf_ruleset **ruleset_ptr,
+    int rs, struct pf_rule **rule_ptr)
+{
+       struct pf_ruleset *ruleset = *ruleset_ptr;
+       struct pf_rule *rule = *rule_ptr;
+
+       /* step out of anchor */
+       struct pf_ruleset *rs_copy = ruleset;
+       ruleset = ruleset->anchor->parent?
+           &ruleset->anchor->parent->ruleset:&pf_main_ruleset;
+
+       rule = TAILQ_FIRST(ruleset->rules[rs].active.ptr);
+       while (rule && (rule->anchor != rs_copy->anchor))
+               rule = TAILQ_NEXT(rule, entries);
+       if (rule == NULL)
+               panic("%s: parent rule of anchor not found!", __func__);
+       if (rule->anchor->ruleset.rules[rs].active.rcount > 0)
+               rule = TAILQ_NEXT(rule, entries);
+
+       *ruleset_ptr = ruleset;
+       *rule_ptr = rule;
+}
+
+static int
+pf_rule_setup(struct pfioc_rule *pr, struct pf_rule *rule,
+    struct pf_ruleset *ruleset) {
+       struct pf_pooladdr      *apa;
+       int                      error = 0;
+
+       if (rule->ifname[0]) {
+               rule->kif = pfi_kif_get(rule->ifname);
+               if (rule->kif == NULL) {
+                       pool_put(&pf_rule_pl, rule);
+                       return (EINVAL);
+               }
+               pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE);
+       }
+#if PF_ALTQ
+       /* set queue IDs */
+       if (altq_allowed && rule->qname[0] != '\0') {
+               if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
+                       error = EBUSY;
+               else if (rule->pqname[0] != '\0') {
+                       if ((rule->pqid =
+                           pf_qname2qid(rule->pqname)) == 0)
                                error = EBUSY;
-               if (rule->rt && !rule->direction)
-                       error = EINVAL;
+               } else
+                       rule->pqid = rule->qid;
+       }
+#endif /* PF_ALTQ */
+       if (rule->tagname[0])
+               if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
+                       error = EBUSY;
+       if (rule->match_tagname[0])
+               if ((rule->match_tag =
+                   pf_tagname2tag(rule->match_tagname)) == 0)
+                       error = EBUSY;
+       if (rule->rt && !rule->direction)
+               error = EINVAL;
 #if PFLOG
-               if (!rule->log)
-                       rule->logif = 0;
-               if (rule->logif >= PFLOGIFS_MAX)
-                       error = EINVAL;
+       if (!rule->log)
+               rule->logif = 0;
+       if (rule->logif >= PFLOGIFS_MAX)
+               error = EINVAL;
 #endif /* PFLOG */
-               if (pf_rtlabel_add(&rule->src.addr) ||
-                   pf_rtlabel_add(&rule->dst.addr))
-                       error = EBUSY;
-               if (pfi_dynaddr_setup(&rule->src.addr, rule->af))
-                       error = EINVAL;
-               if (pfi_dynaddr_setup(&rule->dst.addr, rule->af))
+       if (pf_rtlabel_add(&rule->src.addr) ||
+           pf_rtlabel_add(&rule->dst.addr))
+               error = EBUSY;
+       if (pfi_dynaddr_setup(&rule->src.addr, rule->af))
+               error = EINVAL;
+       if (pfi_dynaddr_setup(&rule->dst.addr, rule->af))
+               error = EINVAL;
+       if (pf_tbladdr_setup(ruleset, &rule->src.addr))
+               error = EINVAL;
+       if (pf_tbladdr_setup(ruleset, &rule->dst.addr))
+               error = EINVAL;
+       if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
+               error = EINVAL;
+       TAILQ_FOREACH(apa, &pf_pabuf, entries)
+               if (pf_tbladdr_setup(ruleset, &apa->addr))
                        error = EINVAL;
-               if (pf_tbladdr_setup(ruleset, &rule->src.addr))
+
+       if (rule->overload_tblname[0]) {
+               if ((rule->overload_tbl = pfr_attach_table(ruleset,
+                   rule->overload_tblname)) == NULL)
                        error = EINVAL;
-               if (pf_tbladdr_setup(ruleset, &rule->dst.addr))
+               else
+                       rule->overload_tbl->pfrkt_flags |=
+                           PFR_TFLAG_ACTIVE;
+       }
+
+       pf_mv_pool(&pf_pabuf, &rule->rpool.list);
+       if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
+           (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
+           (rule->rt > PF_FASTROUTE)) &&
+           (TAILQ_FIRST(&rule->rpool.list) == NULL))
+               error = EINVAL;
+
+       if (error) {
+               pf_rm_rule(NULL, rule);
+               return (error);
+       }
+       rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
+       rule->evaluations = rule->packets[0] = rule->packets[1] =
+           rule->bytes[0] = rule->bytes[1] = 0;
+
+       return (0);
+}
+
+static int
+pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p)
+{
+       int error = 0;
+
+       switch (cmd) {
+       case DIOCADDRULE: {
+               struct pf_ruleset       *ruleset;
+               struct pf_rule          *rule, *tail;
+               int                     rs_num;
+
+               pr->anchor[sizeof (pr->anchor) - 1] = '\0';
+               pr->anchor_call[sizeof (pr->anchor_call) - 1] = '\0';
+               ruleset = pf_find_ruleset(pr->anchor);
+               if (ruleset == NULL) {
                        error = EINVAL;
-               if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
+                       break;
+               }
+               rs_num = pf_get_ruleset_number(pr->rule.action);
+               if (rs_num >= PF_RULESET_MAX) {
                        error = EINVAL;
-               TAILQ_FOREACH(apa, &pf_pabuf, entries)
-                       if (pf_tbladdr_setup(ruleset, &apa->addr))
-                               error = EINVAL;
-
-               if (rule->overload_tblname[0]) {
-                       if ((rule->overload_tbl = pfr_attach_table(ruleset,
-                           rule->overload_tblname)) == NULL)
-                               error = EINVAL;
-                       else
-                               rule->overload_tbl->pfrkt_flags |=
-                                   PFR_TFLAG_ACTIVE;
+                       break;
                }
-
-               pf_mv_pool(&pf_pabuf, &rule->rpool.list);
-               if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
-                   (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
-                   (rule->rt > PF_FASTROUTE)) &&
-                   (TAILQ_FIRST(&rule->rpool.list) == NULL))
+               if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
                        error = EINVAL;
-
-               if (error) {
-                       pf_rm_rule(NULL, rule);
                        break;
                }
-               rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
-               rule->evaluations = rule->packets[0] = rule->packets[1] =
-                   rule->bytes[0] = rule->bytes[1] = 0;
+               if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) {
+                       error = EBUSY;
+                       break;
+               }
+               if (pr->pool_ticket != ticket_pabuf) {
+                       error = EBUSY;
+                       break;
+               }
+               rule = pool_get(&pf_rule_pl, PR_WAITOK);
+               if (rule == NULL) {
+                       error = ENOMEM;
+                       break;
+               }
+               pf_rule_copyin(&pr->rule, rule, p, minordev);
+#if !INET
+               if (rule->af == AF_INET) {
+                       pool_put(&pf_rule_pl, rule);
+                       error = EAFNOSUPPORT;
+                       break;
+               }
+#endif /* INET */
+#if !INET6
+               if (rule->af == AF_INET6) {
+                       pool_put(&pf_rule_pl, rule);
+                       error = EAFNOSUPPORT;
+                       break;
+               }
+#endif /* INET6 */
+               tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+                   pf_rulequeue);
+               if (tail)
+                       rule->nr = tail->nr + 1;
+               else
+                       rule->nr = 0;
+
+               if ((error = pf_rule_setup(pr, rule, ruleset)))
+                       break;
+
                TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
                    rule, entries);
                ruleset->rules[rs_num].inactive.rcount++;
+               if (rule->rule_flag & PFRULE_PFM)
+                       pffwrules++;
                break;
        }
 
        case DIOCGETRULES: {
-               struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
                struct pf_ruleset       *ruleset;
                struct pf_rule          *tail;
                int                      rs_num;
@@ -1852,7 +3151,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCGETRULE: {
-               struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
                struct pf_ruleset       *ruleset;
                struct pf_rule          *rule;
                int                      rs_num, i;
@@ -1907,9 +3205,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCCHANGERULE: {
-               struct pfioc_rule       *pcr = (struct pfioc_rule *)addr;
+               struct pfioc_rule       *pcr = pr;
                struct pf_ruleset       *ruleset;
                struct pf_rule          *oldrule = NULL, *newrule = NULL;
+               struct pf_pooladdr      *pa;
                u_int32_t                nr = 0;
                int                      rs_num;
 
@@ -1959,7 +3258,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                                error = ENOMEM;
                                break;
                        }
-                       pf_rule_copyin(&pcr->rule, newrule, p);
+                       pf_rule_copyin(&pcr->rule, newrule, p, minordev);
 #if !INET
                        if (newrule->af == AF_INET) {
                                pool_put(&pf_rule_pl, newrule);
@@ -1985,20 +3284,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        } else
                                newrule->kif = NULL;
 
-#if ALTQ
+#if PF_ALTQ
                        /* set queue IDs */
-                       if (newrule->qname[0] != 0) {
+                       if (altq_allowed && newrule->qname[0] != '\0') {
                                if ((newrule->qid =
                                    pf_qname2qid(newrule->qname)) == 0)
                                        error = EBUSY;
-                               else if (newrule->pqname[0] != 0) {
+                               else if (newrule->pqname[0] != '\0') {
                                        if ((newrule->pqid =
                                            pf_qname2qid(newrule->pqname)) == 0)
                                                error = EBUSY;
                                } else
                                        newrule->pqid = newrule->qid;
                        }
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
                        if (newrule->tagname[0])
                                if ((newrule->tag =
                                    pf_tagname2tag(newrule->tagname)) == 0)
@@ -2112,9 +3411,149 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
+       case DIOCINSERTRULE: {
+               struct pf_ruleset       *ruleset;
+               struct pf_rule          *rule, *tail, *r;
+               int                     rs_num;
+               int                     is_anchor;
+
+               pr->anchor[sizeof (pr->anchor) - 1] = '\0';
+               pr->anchor_call[sizeof (pr->anchor_call) - 1] = '\0';
+               is_anchor = (pr->anchor_call[0] != '\0');
+
+               if ((ruleset = pf_find_ruleset_with_owner(pr->anchor,
+                   pr->rule.owner, is_anchor, &error)) == NULL)
+                       break;
+
+               rs_num = pf_get_ruleset_number(pr->rule.action);
+               if (rs_num >= PF_RULESET_MAX) {
+                       error = EINVAL;
+                       break;
+               }
+               if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+                       error = EINVAL;
+                       break;
+               }
+
+               /* make sure this anchor rule doesn't exist already */
+               if (is_anchor) {
+                       r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+                       while (r) {
+                               if (r->anchor &&
+                                   ((strcmp(r->anchor->name,
+                                   pr->anchor_call)) == 0)) {
+                                       if (((strcmp(pr->rule.owner,
+                                           r->owner)) == 0) ||
+                                           ((strcmp(r->owner, "")) == 0))
+                                               error = EEXIST;
+                                       else
+                                               error = EPERM;
+                                       break;
+                               }
+                               r = TAILQ_NEXT(r, entries);
+                       }
+               }
+
+               rule = pool_get(&pf_rule_pl, PR_WAITOK);
+               if (rule == NULL) {
+                       error = ENOMEM;
+                       break;
+               }
+               pf_rule_copyin(&pr->rule, rule, p, minordev);
+#if !INET
+               if (rule->af == AF_INET) {
+                       pool_put(&pf_rule_pl, rule);
+                       error = EAFNOSUPPORT;
+                       break;
+               }
+#endif /* INET */
+#if !INET6
+               if (rule->af == AF_INET6) {
+                       pool_put(&pf_rule_pl, rule);
+                       error = EAFNOSUPPORT;
+                       break;
+               }
+
+#endif /* INET6 */
+               r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+               while ((r != NULL) && (rule->priority >= (unsigned)r->priority))
+                       r = TAILQ_NEXT(r, entries);
+               if (r == NULL) {
+                       if ((tail =
+                           TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+                           pf_rulequeue)) != NULL)
+                               rule->nr = tail->nr + 1;
+                       else
+                               rule->nr = 0;
+               } else {
+                       rule->nr = r->nr;
+               }
+
+               if ((error = pf_rule_setup(pr, rule, ruleset)))
+                       break;
+
+               if (rule->anchor != NULL)
+                       strncpy(rule->anchor->owner, rule->owner,
+                           PF_OWNER_NAME_SIZE);
+
+               if (r) {
+                       TAILQ_INSERT_BEFORE(r, rule, entries);
+                       while (r && ++r->nr)
+                               r = TAILQ_NEXT(r, entries);
+               } else
+                       TAILQ_INSERT_TAIL(ruleset->rules[rs_num].active.ptr,
+                           rule, entries);
+               ruleset->rules[rs_num].active.rcount++;
+
+               /* Calculate checksum for the main ruleset */
+               if (ruleset == &pf_main_ruleset)
+                       error = pf_setup_pfsync_matching(ruleset);
+
+               pf_ruleset_cleanup(ruleset, rs_num);
+               rule->ticket = ruleset->rules[rs_num].active.ticket;
+
+               pr->rule.ticket = rule->ticket;
+               pf_rule_copyout(rule, &pr->rule);
+               if (rule->rule_flag & PFRULE_PFM)
+                       pffwrules++;
+               break;
+       }
+
+       case DIOCDELETERULE: {
+               pr->anchor[sizeof (pr->anchor) - 1] = '\0';
+               pr->anchor_call[sizeof (pr->anchor_call) - 1] = '\0';
+
+               if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+                       error = EINVAL;
+                       break;
+               }
+
+               if (pr->rule.ticket) {
+                       if ((error = pf_delete_rule_by_ticket(pr)))
+                               break;
+               } else
+                       pf_delete_rule_by_owner(pr->rule.owner);
+               pr->nr = pffwrules;
+               break;
+       }
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (error);
+}
+
+static int
+pfioctl_ioc_state_kill(u_long cmd, struct pfioc_state_kill *psk, struct proc *p)
+{
+#pragma unused(p)
+       int error = 0;
+
+       switch (cmd) {
        case DIOCCLRSTATES: {
                struct pf_state         *s, *nexts;
-               struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
                int                      killed = 0;
 
                psk->psk_ifname[sizeof (psk->psk_ifname) - 1] = '\0';
@@ -2142,7 +3581,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                struct pf_state         *s, *nexts;
                struct pf_state_key     *sk;
                struct pf_state_host    *src, *dst;
-               struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
                int                      killed = 0;
 
                for (s = RB_MIN(pf_state_tree_id, &tree_id); s;
@@ -2167,23 +3605,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                            &psk->psk_dst.addr.v.a.addr,
                            &psk->psk_dst.addr.v.a.mask,
                            &dst->addr, sk->af) &&
-#ifndef NO_APPLE_EXTENSIONS
                            (pf_match_xport(psk->psk_proto,
                            psk->psk_proto_variant, &psk->psk_src.xport,
                            &src->xport)) &&
                            (pf_match_xport(psk->psk_proto,
                            psk->psk_proto_variant, &psk->psk_dst.xport,
                            &dst->xport)) &&
-#else
-                           (psk->psk_src.port_op == 0 ||
-                           pf_match_port(psk->psk_src.port_op,
-                           psk->psk_src.port[0], psk->psk_src.port[1],
-                           src->port)) &&
-                           (psk->psk_dst.port_op == 0 ||
-                           pf_match_port(psk->psk_dst.port_op,
-                           psk->psk_dst.port[0], psk->psk_dst.port[1],
-                           dst->port)) &&
-#endif
                            (!psk->psk_ifname[0] || strcmp(psk->psk_ifname,
                            s->kif->pfik_name) == 0)) {
 #if NPFSYNC
@@ -2199,9 +3626,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (error);
+}
+
+static int
+pfioctl_ioc_state(u_long cmd, struct pfioc_state *ps, struct proc *p)
+{
+#pragma unused(p)
+       int error = 0;
+
+       switch (cmd) {
        case DIOCADDSTATE: {
-               struct pfioc_state      *ps = (struct pfioc_state *)addr;
-               struct pfsync_state     *sp = &ps->state;
+               struct pfsync_state     *sp = &ps->state;
                struct pf_state         *s;
                struct pf_state_key     *sk;
                struct pfi_kif          *kif;
@@ -2217,7 +3658,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        break;
                }
                bzero(s, sizeof (struct pf_state));
-               if ((sk = pf_alloc_state_key(s)) == NULL) {
+               if ((sk = pf_alloc_state_key(s, NULL)) == NULL) {
                        pool_put(&pf_state_pl, s);
                        error = ENOMEM;
                        break;
@@ -2230,10 +3671,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        error = ENOENT;
                        break;
                }
-#ifndef NO_APPLE_EXTENSIONS
                TAILQ_INIT(&s->unlink_hooks);
                s->state_key->app_state = 0;
-#endif
                if (pf_insert_state(kif, s)) {
                        pfi_kif_unref(kif, PFI_KIF_REF_NONE);
                        pool_put(&pf_state_pl, s);
@@ -2246,7 +3685,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCGETSTATE: {
-               struct pfioc_state      *ps = (struct pfioc_state *)addr;
                struct pf_state         *s;
                struct pf_state_cmp      id_key;
 
@@ -2263,27 +3701,50 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
-       case DIOCGETSTATES: {
-               struct pfioc_states     *ps = (struct pfioc_states *)addr;
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (error);
+}
+
+static int
+pfioctl_ioc_states(u_long cmd, struct pfioc_states_32 *ps32,
+    struct pfioc_states_64 *ps64, struct proc *p)
+{
+       int p64 = proc_is64bit(p);
+       int error = 0;
+
+       switch (cmd) {
+       case DIOCGETSTATES: {           /* struct pfioc_states */
                struct pf_state         *state;
                struct pfsync_state     *pstore;
                user_addr_t              buf;
                u_int32_t                nr = 0;
+               int                      len, size;
 
-               if (ps->ps_len == 0) {
-                       nr = pf_status.states;
-                       ps->ps_len = sizeof (struct pfsync_state) * nr;
+               len = (p64 ? ps64->ps_len : ps32->ps_len);
+               if (len == 0) {
+                       size = sizeof (struct pfsync_state) * pf_status.states;
+                       if (p64)
+                               ps64->ps_len = size;
+                       else
+                               ps32->ps_len = size;
                        break;
                }
 
                pstore = _MALLOC(sizeof (*pstore), M_TEMP, M_WAITOK);
-               buf = PF_USER_ADDR(addr, pfioc_states, ps_buf);
+               if (pstore == NULL) {
+                       error = ENOMEM;
+                       break;
+               }
+               buf = (p64 ? ps64->ps_buf : ps32->ps_buf);
 
                state = TAILQ_FIRST(&state_list);
                while (state) {
                        if (state->timeout != PFTM_UNLINKED) {
-                               if ((nr + 1) * sizeof (*pstore) >
-                                   (unsigned)ps->ps_len)
+                               if ((nr + 1) * sizeof (*pstore) > (unsigned)len)
                                        break;
 
                                pf_state_export(pstore,
@@ -2299,42 +3760,32 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        state = TAILQ_NEXT(state, entry_list);
                }
 
-               ps->ps_len = sizeof (struct pfsync_state) * nr;
+               size = sizeof (struct pfsync_state) * nr;
+               if (p64)
+                       ps64->ps_len = size;
+               else
+                       ps32->ps_len = size;
 
                _FREE(pstore, M_TEMP);
                break;
        }
 
-       case DIOCGETSTATUS: {
-               struct pf_status *s = (struct pf_status *)addr;
-               bcopy(&pf_status, s, sizeof (struct pf_status));
-               pfi_update_status(s->ifname, s);
-               break;
-       }
-
-       case DIOCSETSTATUSIF: {
-               struct pfioc_if *pi = (struct pfioc_if *)addr;
-
-               if (pi->ifname[0] == 0) {
-                       bzero(pf_status.ifname, IFNAMSIZ);
-                       break;
-               }
-               strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ);
-               break;
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
        }
+fail:
+       return (error);
+}
 
-       case DIOCCLRSTATUS: {
-               bzero(pf_status.counters, sizeof (pf_status.counters));
-               bzero(pf_status.fcounters, sizeof (pf_status.fcounters));
-               bzero(pf_status.scounters, sizeof (pf_status.scounters));
-               pf_status.since = pf_calendar_time_second();
-               if (*pf_status.ifname)
-                       pfi_update_status(pf_status.ifname, NULL);
-               break;
-       }
+static int
+pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p)
+{
+#pragma unused(p)
+       int error = 0;
 
+       switch (cmd) {
        case DIOCNATLOOK: {
-               struct pfioc_natlook    *pnl = (struct pfioc_natlook *)addr;
                struct pf_state_key     *sk;
                struct pf_state         *state;
                struct pf_state_key_cmp  key;
@@ -2342,21 +3793,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 
                key.af = pnl->af;
                key.proto = pnl->proto;
-
-#ifndef NO_APPLE_EXTENSIONS
                key.proto_variant = pnl->proto_variant;
-#endif
 
                if (!pnl->proto ||
                    PF_AZERO(&pnl->saddr, pnl->af) ||
                    PF_AZERO(&pnl->daddr, pnl->af) ||
                    ((pnl->proto == IPPROTO_TCP ||
                    pnl->proto == IPPROTO_UDP) &&
-#ifndef NO_APPLE_EXTENSIONS
                    (!pnl->dxport.port || !pnl->sxport.port)))
-#else
-                   (!pnl->dport || !pnl->sport)))
-#endif
                        error = EINVAL;
                else {
                        /*
@@ -2367,35 +3811,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                         */
                        if (direction == PF_IN) {
                                PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af);
-#ifndef NO_APPLE_EXTENSIONS
                                memcpy(&key.ext.xport, &pnl->dxport,
                                    sizeof (key.ext.xport));
-#else
-                               key.ext.port = pnl->dport;
-#endif
                                PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af);
-#ifndef NO_APPLE_EXTENSIONS
                                memcpy(&key.gwy.xport, &pnl->sxport,
                                    sizeof (key.gwy.xport));
-#else
-                               key.gwy.port = pnl->sport;
-#endif
                                state = pf_find_state_all(&key, PF_IN, &m);
                        } else {
                                PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af);
-#ifndef NO_APPLE_EXTENSIONS
                                memcpy(&key.lan.xport, &pnl->dxport,
                                    sizeof (key.lan.xport));
-#else
-                               key.lan.port = pnl->dport;
-#endif
                                PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af);
-#ifndef NO_APPLE_EXTENSIONS
                                memcpy(&key.ext.xport, &pnl->sxport,
                                    sizeof (key.ext.xport));
-#else
-                               key.ext.port = pnl->sport;
-#endif
                                state = pf_find_state_all(&key, PF_OUT, &m);
                        }
                        if (m > 1)
@@ -2405,37 +3833,21 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                                if (direction == PF_IN) {
                                        PF_ACPY(&pnl->rsaddr, &sk->lan.addr,
                                            sk->af);
-#ifndef NO_APPLE_EXTENSIONS
                                        memcpy(&pnl->rsxport, &sk->lan.xport,
                                            sizeof (pnl->rsxport));
-#else
-                                       pnl->rsport = sk->lan.port;
-#endif
                                        PF_ACPY(&pnl->rdaddr, &pnl->daddr,
                                            pnl->af);
-#ifndef NO_APPLE_EXTENSIONS
                                        memcpy(&pnl->rdxport, &pnl->dxport,
                                            sizeof (pnl->rdxport));
-#else
-                                       pnl->rdport = pnl->dport;
-#endif
                                } else {
                                        PF_ACPY(&pnl->rdaddr, &sk->gwy.addr,
                                            sk->af);
-#ifndef NO_APPLE_EXTENSIONS
                                        memcpy(&pnl->rdxport, &sk->gwy.xport,
                                            sizeof (pnl->rdxport));
-#else
-                                       pnl->rdport = sk->gwy.port;
-#endif
                                        PF_ACPY(&pnl->rsaddr, &pnl->saddr,
                                            pnl->af);
-#ifndef NO_APPLE_EXTENSIONS
                                        memcpy(&pnl->rsxport, &pnl->sxport,
                                            sizeof (pnl->rsxport));
-#else
-                                       pnl->rsport = pnl->sport;
-#endif
                                }
                        } else
                                error = ENOENT;
@@ -2443,9 +3855,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (error);
+}
+
+static int
+pfioctl_ioc_tm(u_long cmd, struct pfioc_tm *pt, struct proc *p)
+{
+#pragma unused(p)
+       int error = 0;
+
+       switch (cmd) {
        case DIOCSETTIMEOUT: {
-               struct pfioc_tm *pt = (struct pfioc_tm *)addr;
-               int              old;
+               int old;
 
                if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
                    pt->seconds < 0) {
@@ -2463,8 +3889,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCGETTIMEOUT: {
-               struct pfioc_tm *pt = (struct pfioc_tm *)addr;
-
                if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
                        error = EINVAL;
                        goto fail;
@@ -2473,8 +3897,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+fail:
+       return (error);
+}
+
+static int
+pfioctl_ioc_limit(u_long cmd, struct pfioc_limit *pl, struct proc *p)
+{
+#pragma unused(p)
+       int error = 0;
+
+       switch (cmd) {
        case DIOCGETLIMIT: {
-               struct pfioc_limit      *pl = (struct pfioc_limit *)addr;
 
                if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
                        error = EINVAL;
@@ -2485,8 +3923,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCSETLIMIT: {
-               struct pfioc_limit      *pl = (struct pfioc_limit *)addr;
-               int                      old_limit;
+               int old_limit;
 
                if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
                    pf_pool_limits[pl->index].pp == NULL) {
@@ -2501,188 +3938,30 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
-       case DIOCSETDEBUG: {
-               u_int32_t       *level = (u_int32_t *)addr;
-
-               pf_status.debug = *level;
-               break;
-       }
-
-       case DIOCCLRRULECTRS: {
-               /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
-               struct pf_ruleset       *ruleset = &pf_main_ruleset;
-               struct pf_rule          *rule;
-
-               TAILQ_FOREACH(rule,
-                   ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
-                       rule->evaluations = 0;
-                       rule->packets[0] = rule->packets[1] = 0;
-                       rule->bytes[0] = rule->bytes[1] = 0;
-               }
-               break;
-       }
-
-#if ALTQ
-       case DIOCSTARTALTQ: {
-               struct pf_altq          *altq;
-
-               /* enable all altq interfaces on active list */
-               TAILQ_FOREACH(altq, pf_altqs_active, entries) {
-                       if (altq->qname[0] == 0) {
-                               error = pf_enable_altq(altq);
-                               if (error != 0)
-                                       break;
-                       }
-               }
-               if (error == 0)
-                       pf_altq_running = 1;
-               DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
-               break;
-       }
-
-       case DIOCSTOPALTQ: {
-               struct pf_altq          *altq;
-
-               /* disable all altq interfaces on active list */
-               TAILQ_FOREACH(altq, pf_altqs_active, entries) {
-                       if (altq->qname[0] == 0) {
-                               error = pf_disable_altq(altq);
-                               if (error != 0)
-                                       break;
-                       }
-               }
-               if (error == 0)
-                       pf_altq_running = 0;
-               DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
-               break;
-       }
-
-       case DIOCADDALTQ: {
-               struct pfioc_altq       *pa = (struct pfioc_altq *)addr;
-               struct pf_altq          *altq, *a;
-
-               if (pa->ticket != ticket_altqs_inactive) {
-                       error = EBUSY;
-                       break;
-               }
-               altq = pool_get(&pf_altq_pl, PR_WAITOK);
-               if (altq == NULL) {
-                       error = ENOMEM;
-                       break;
-               }
-               pf_altq_copyin(&pa->altq, altq);
-
-               /*
-                * if this is for a queue, find the discipline and
-                * copy the necessary fields
-                */
-               if (altq->qname[0] != 0) {
-                       if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
-                               error = EBUSY;
-                               pool_put(&pf_altq_pl, altq);
-                               break;
-                       }
-                       altq->altq_disc = NULL;
-                       TAILQ_FOREACH(a, pf_altqs_inactive, entries) {
-                               if (strncmp(a->ifname, altq->ifname,
-                                   IFNAMSIZ) == 0 && a->qname[0] == 0) {
-                                       altq->altq_disc = a->altq_disc;
-                                       break;
-                               }
-                       }
-               }
-
-               error = altq_add(altq);
-               if (error) {
-                       pool_put(&pf_altq_pl, altq);
-                       break;
-               }
-
-               TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries);
-               pf_altq_copyout(altq, &pa->altq);
-               break;
-       }
-
-       case DIOCGETALTQS: {
-               struct pfioc_altq       *pa = (struct pfioc_altq *)addr;
-               struct pf_altq          *altq;
-
-               pa->nr = 0;
-               TAILQ_FOREACH(altq, pf_altqs_active, entries)
-                       pa->nr++;
-               pa->ticket = ticket_altqs_active;
-               break;
-       }
-
-       case DIOCGETALTQ: {
-               struct pfioc_altq       *pa = (struct pfioc_altq *)addr;
-               struct pf_altq          *altq;
-               u_int32_t                nr;
-
-               if (pa->ticket != ticket_altqs_active) {
-                       error = EBUSY;
-                       break;
-               }
-               nr = 0;
-               altq = TAILQ_FIRST(pf_altqs_active);
-               while ((altq != NULL) && (nr < pa->nr)) {
-                       altq = TAILQ_NEXT(altq, entries);
-                       nr++;
-               }
-               if (altq == NULL) {
-                       error = EBUSY;
-                       break;
-               }
-               pf_altq_copyout(altq, &pa->altq);
-               break;
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
        }
+fail:
+       return (error);
+}
 
-       case DIOCCHANGEALTQ:
-               /* CHANGEALTQ not supported yet! */
-               error = ENODEV;
-               break;
-
-       case DIOCGETQSTATS: {
-               struct pfioc_qstats     *pq = (struct pfioc_qstats *)addr;
-               struct pf_altq          *altq;
-               u_int32_t                nr;
-               int                      nbytes;
-
-               if (pq->ticket != ticket_altqs_active) {
-                       error = EBUSY;
-                       break;
-               }
-               nbytes = pq->nbytes;
-               nr = 0;
-               altq = TAILQ_FIRST(pf_altqs_active);
-               while ((altq != NULL) && (nr < pq->nr)) {
-                       altq = TAILQ_NEXT(altq, entries);
-                       nr++;
-               }
-               if (altq == NULL) {
-                       error = EBUSY;
-                       break;
-               }
-               error = altq_getqstats(altq, pq->buf, &nbytes);
-               if (error == 0) {
-                       pq->scheduler = altq->scheduler;
-                       pq->nbytes = nbytes;
-               }
-               break;
-       }
-#endif /* ALTQ */
+static int
+pfioctl_ioc_pooladdr(u_long cmd, struct pfioc_pooladdr *pp, struct proc *p)
+{
+#pragma unused(p)
+       struct pf_pooladdr *pa = NULL;
+       struct pf_pool *pool = NULL;
+       int error = 0;
 
+       switch (cmd) {
        case DIOCBEGINADDRS: {
-               struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
-
                pf_empty_pool(&pf_pabuf);
                pp->ticket = ++ticket_pabuf;
                break;
        }
 
        case DIOCADDADDR: {
-               struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
-
                pp->anchor[sizeof (pp->anchor) - 1] = '\0';
                if (pp->ticket != ticket_pabuf) {
                        error = EBUSY;
@@ -2733,8 +4012,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCGETADDRS: {
-               struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
-
                pp->nr = 0;
                pp->anchor[sizeof (pp->anchor) - 1] = '\0';
                pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
@@ -2749,7 +4026,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCGETADDR: {
-               struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
                u_int32_t                nr = 0;
 
                pp->anchor[sizeof (pp->anchor) - 1] = '\0';
@@ -2776,7 +4052,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCCHANGEADDR: {
-               struct pfioc_pooladdr   *pca = (struct pfioc_pooladdr *)addr;
+               struct pfioc_pooladdr   *pca = pp;
                struct pf_pooladdr      *oldpa = NULL, *newpa = NULL;
                struct pf_ruleset       *ruleset;
 
@@ -2886,8 +4162,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (error);
+}
+
+static int
+pfioctl_ioc_ruleset(u_long cmd, struct pfioc_ruleset *pr, struct proc *p)
+{
+#pragma unused(p)
+       int error = 0;
+
+       switch (cmd) {
        case DIOCGETRULESETS: {
-               struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;
                struct pf_ruleset       *ruleset;
                struct pf_anchor        *anchor;
 
@@ -2912,7 +4202,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCGETRULESET: {
-               struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;
                struct pf_ruleset       *ruleset;
                struct pf_anchor        *anchor;
                u_int32_t                nr = 0;
@@ -2945,259 +4234,39 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
-       case DIOCRCLRTABLES: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-
-               if (io->pfrio_esize != 0) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
-                   io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRADDTABLES: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_table)) {
-                       error = ENODEV;
-                       break;
-               }
-               error = pfr_add_tables(buf, io->pfrio_size,
-                   &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRDELTABLES: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_table)) {
-                       error = ENODEV;
-                       break;
-               }
-               error = pfr_del_tables(buf, io->pfrio_size,
-                   &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRGETTABLES: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_table)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_get_tables(&io->pfrio_table, buf,
-                   &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRGETTSTATS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_tstats)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_get_tstats(&io->pfrio_table, buf,
-                   &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRCLRTSTATS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_table)) {
-                       error = ENODEV;
-                       break;
-               }
-               error = pfr_clr_tstats(buf, io->pfrio_size,
-                   &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRSETTFLAGS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_table)) {
-                       error = ENODEV;
-                       break;
-               }
-               error = pfr_set_tflags(buf, io->pfrio_size,
-                   io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
-                   &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRCLRADDRS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-
-               if (io->pfrio_esize != 0) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
-                   io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRADDADDRS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_addr)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_add_addrs(&io->pfrio_table, buf,
-                   io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
-                   PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRDELADDRS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_addr)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_del_addrs(&io->pfrio_table, buf,
-                   io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
-                   PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRSETADDRS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_addr)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_set_addrs(&io->pfrio_table, buf,
-                   io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
-                   &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
-                   PFR_FLAG_USERIOCTL, 0);
-               break;
-       }
-
-       case DIOCRGETADDRS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_addr)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_get_addrs(&io->pfrio_table, buf,
-                   &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRGETASTATS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_astats)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_get_astats(&io->pfrio_table, buf,
-                   &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRCLRASTATS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_addr)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_clr_astats(&io->pfrio_table, buf,
-                   io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
-                   PFR_FLAG_USERIOCTL);
-               break;
-       }
-
-       case DIOCRTSTADDRS: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_addr)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_tst_addrs(&io->pfrio_table, buf,
-                   io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
-                   PFR_FLAG_USERIOCTL);
-               break;
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
        }
 
-       case DIOCRINADEFINE: {
-               struct pfioc_table *io = (struct pfioc_table *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_table, pfrio_buffer);
-
-               if (io->pfrio_esize != sizeof (struct pfr_addr)) {
-                       error = ENODEV;
-                       break;
-               }
-               pfr_table_copyin_cleanup(&io->pfrio_table);
-               error = pfr_ina_define(&io->pfrio_table, buf,
-                   io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
-                   io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               break;
-       }
+       return (error);
+}
 
-       case DIOCOSFPADD: {
-               struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
-               error = pf_osfp_add(io);
-               break;
-       }
+static int
+pfioctl_ioc_trans(u_long cmd, struct pfioc_trans_32 *io32,
+    struct pfioc_trans_64 *io64, struct proc *p)
+{
+       int p64 = proc_is64bit(p);
+       int error = 0, esize, size;
+       user_addr_t buf;
 
-       case DIOCOSFPGET: {
-               struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
-               error = pf_osfp_get(io);
-               break;
-       }
+       esize = (p64 ? io64->esize : io32->esize);
+       size = (p64 ? io64->size : io32->size);
+       buf = (p64 ? io64->array : io32->array);
 
+       switch (cmd) {
        case DIOCXBEGIN: {
-               struct pfioc_trans      *io = (struct pfioc_trans *)addr;
                struct pfioc_trans_e    *ioe;
                struct pfr_table        *table;
-               user_addr_t              buf;
                int                      i;
 
-               if (io->esize != sizeof (*ioe)) {
+               if (esize != sizeof (*ioe)) {
                        error = ENODEV;
                        goto fail;
                }
                ioe = _MALLOC(sizeof (*ioe), M_TEMP, M_WAITOK);
                table = _MALLOC(sizeof (*table), M_TEMP, M_WAITOK);
-               buf = PF_USER_ADDR(addr, pfioc_trans, array);
-               for (i = 0; i < io->size; i++, buf += sizeof (*ioe)) {
+               for (i = 0; i < size; i++, buf += sizeof (*ioe)) {
                        if (copyin(buf, ioe, sizeof (*ioe))) {
                                _FREE(table, M_TEMP);
                                _FREE(ioe, M_TEMP);
@@ -3207,19 +4276,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
                        switch (ioe->rs_num) {
                        case PF_RULESET_ALTQ:
-#if ALTQ
-                               if (ioe->anchor[0]) {
-                                       _FREE(table, M_TEMP);
-                                       _FREE(ioe, M_TEMP);
-                                       error = EINVAL;
-                                       goto fail;
-                               }
-                               if ((error = pf_begin_altq(&ioe->ticket))) {
-                                       _FREE(table, M_TEMP);
-                                       _FREE(ioe, M_TEMP);
-                                       goto fail;
+#if PF_ALTQ
+                               if (altq_allowed) {
+                                       if (ioe->anchor[0]) {
+                                               _FREE(table, M_TEMP);
+                                               _FREE(ioe, M_TEMP);
+                                               error = EINVAL;
+                                               goto fail;
+                                       }
+                                       error = pf_begin_altq(&ioe->ticket);
+                                       if (error != 0) {
+                                               _FREE(table, M_TEMP);
+                                               _FREE(ioe, M_TEMP);
+                                               goto fail;
+                                       }
                                }
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
                                break;
                        case PF_RULESET_TABLE:
                                bzero(table, sizeof (*table));
@@ -3254,20 +4326,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCXROLLBACK: {
-               struct pfioc_trans      *io = (struct pfioc_trans *)addr;
                struct pfioc_trans_e    *ioe;
                struct pfr_table        *table;
-               user_addr_t              buf;
                int                      i;
 
-               if (io->esize != sizeof (*ioe)) {
+               if (esize != sizeof (*ioe)) {
                        error = ENODEV;
                        goto fail;
                }
                ioe = _MALLOC(sizeof (*ioe), M_TEMP, M_WAITOK);
                table = _MALLOC(sizeof (*table), M_TEMP, M_WAITOK);
-               buf = PF_USER_ADDR(addr, pfioc_trans, array);
-               for (i = 0; i < io->size; i++, buf += sizeof (*ioe)) {
+               for (i = 0; i < size; i++, buf += sizeof (*ioe)) {
                        if (copyin(buf, ioe, sizeof (*ioe))) {
                                _FREE(table, M_TEMP);
                                _FREE(ioe, M_TEMP);
@@ -3277,19 +4346,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
                        switch (ioe->rs_num) {
                        case PF_RULESET_ALTQ:
-#if ALTQ
-                               if (ioe->anchor[0]) {
-                                       _FREE(table, M_TEMP);
-                                       _FREE(ioe, M_TEMP);
-                                       error = EINVAL;
-                                       goto fail;
-                               }
-                               if ((error = pf_rollback_altq(ioe->ticket))) {
-                                       _FREE(table, M_TEMP);
-                                       _FREE(ioe, M_TEMP);
-                                       goto fail; /* really bad */
+#if PF_ALTQ
+                               if (altq_allowed) {
+                                       if (ioe->anchor[0]) {
+                                               _FREE(table, M_TEMP);
+                                               _FREE(ioe, M_TEMP);
+                                               error = EINVAL;
+                                               goto fail;
+                                       }
+                                       error = pf_rollback_altq(ioe->ticket);
+                                       if (error != 0) {
+                                               _FREE(table, M_TEMP);
+                                               _FREE(ioe, M_TEMP);
+                                               goto fail; /* really bad */
+                                       }
                                }
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
                                break;
                        case PF_RULESET_TABLE:
                                bzero(table, sizeof (*table));
@@ -3318,22 +4390,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
        }
 
        case DIOCXCOMMIT: {
-               struct pfioc_trans      *io = (struct pfioc_trans *)addr;
                struct pfioc_trans_e    *ioe;
                struct pfr_table        *table;
                struct pf_ruleset       *rs;
-               user_addr_t              _buf, buf;
+               user_addr_t              _buf = buf;
                int                      i;
 
-               if (io->esize != sizeof (*ioe)) {
+               if (esize != sizeof (*ioe)) {
                        error = ENODEV;
                        goto fail;
                }
                ioe = _MALLOC(sizeof (*ioe), M_TEMP, M_WAITOK);
                table = _MALLOC(sizeof (*table), M_TEMP, M_WAITOK);
-               buf = _buf = PF_USER_ADDR(addr, pfioc_trans, array);
                /* first makes sure everything will succeed */
-               for (i = 0; i < io->size; i++, buf += sizeof (*ioe)) {
+               for (i = 0; i < size; i++, buf += sizeof (*ioe)) {
                        if (copyin(buf, ioe, sizeof (*ioe))) {
                                _FREE(table, M_TEMP);
                                _FREE(ioe, M_TEMP);
@@ -3343,21 +4413,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
                        switch (ioe->rs_num) {
                        case PF_RULESET_ALTQ:
-#if ALTQ
-                               if (ioe->anchor[0]) {
-                                       _FREE(table, M_TEMP);
-                                       _FREE(ioe, M_TEMP);
-                                       error = EINVAL;
-                                       goto fail;
-                               }
-                               if (!altqs_inactive_open || ioe->ticket !=
-                                   ticket_altqs_inactive) {
-                                       _FREE(table, M_TEMP);
-                                       _FREE(ioe, M_TEMP);
-                                       error = EBUSY;
-                                       goto fail;
+#if PF_ALTQ
+                               if (altq_allowed) {
+                                       if (ioe->anchor[0]) {
+                                               _FREE(table, M_TEMP);
+                                               _FREE(ioe, M_TEMP);
+                                               error = EINVAL;
+                                               goto fail;
+                                       }
+                                       if (!altqs_inactive_open ||
+                                           ioe->ticket !=
+                                           ticket_altqs_inactive) {
+                                               _FREE(table, M_TEMP);
+                                               _FREE(ioe, M_TEMP);
+                                               error = EBUSY;
+                                               goto fail;
+                                       }
                                }
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
                                break;
                        case PF_RULESET_TABLE:
                                rs = pf_find_ruleset(ioe->anchor);
@@ -3392,7 +4465,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                }
                buf = _buf;
                /* now do the commit - no errors should happen here */
-               for (i = 0; i < io->size; i++, buf += sizeof (*ioe)) {
+               for (i = 0; i < size; i++, buf += sizeof (*ioe)) {
                        if (copyin(buf, ioe, sizeof (*ioe))) {
                                _FREE(table, M_TEMP);
                                _FREE(ioe, M_TEMP);
@@ -3402,13 +4475,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
                        switch (ioe->rs_num) {
                        case PF_RULESET_ALTQ:
-#if ALTQ
-                               if ((error = pf_commit_altq(ioe->ticket))) {
+#if PF_ALTQ
+                               if (altq_allowed &&
+                                   (error = pf_commit_altq(ioe->ticket))) {
                                        _FREE(table, M_TEMP);
                                        _FREE(ioe, M_TEMP);
                                        goto fail; /* really bad */
                                }
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
                                break;
                        case PF_RULESET_TABLE:
                                bzero(table, sizeof (*table));
@@ -3436,28 +4510,52 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+fail:
+       return (error);
+}
+
+static int
+pfioctl_ioc_src_nodes(u_long cmd, struct pfioc_src_nodes_32 *psn32,
+    struct pfioc_src_nodes_64 *psn64, struct proc *p)
+{
+       int p64 = proc_is64bit(p);
+       int error = 0;
+
+       switch (cmd) {
        case DIOCGETSRCNODES: {
-               struct pfioc_src_nodes  *psn = (struct pfioc_src_nodes *)addr;
                struct pf_src_node      *n, *pstore;
                user_addr_t              buf;
                u_int32_t                nr = 0;
-               int                      space = psn->psn_len;
+               int                      space, size;
 
+               space = (p64 ? psn64->psn_len : psn32->psn_len);
                if (space == 0) {
                        RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
                                nr++;
-                       psn->psn_len = sizeof (struct pf_src_node) * nr;
+
+                       size = sizeof (struct pf_src_node) * nr;
+                       if (p64)
+                               psn64->psn_len = size;
+                       else
+                               psn32->psn_len = size;
                        break;
                }
 
                pstore = _MALLOC(sizeof (*pstore), M_TEMP, M_WAITOK);
-               buf = PF_USER_ADDR(addr, pfioc_src_nodes, psn_buf);
+               if (pstore == NULL) {
+                       error = ENOMEM;
+                       break;
+               }
+               buf = (p64 ? psn64->psn_buf : psn32->psn_buf);
 
                RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
                        uint64_t secs = pf_time_second(), diff;
 
-                       if ((nr + 1) * sizeof (*pstore) >
-                           (unsigned)psn->psn_len)
+                       if ((nr + 1) * sizeof (*pstore) > (unsigned)space)
                                break;
 
                        bcopy(n, pstore, sizeof (*pstore));
@@ -3490,34 +4588,37 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                        buf += sizeof (*pstore);
                        nr++;
                }
-               psn->psn_len = sizeof (struct pf_src_node) * nr;
+
+               size = sizeof (struct pf_src_node) * nr;
+               if (p64)
+                       psn64->psn_len = size;
+               else
+                       psn32->psn_len = size;
 
                _FREE(pstore, M_TEMP);
                break;
        }
 
-       case DIOCCLRSRCNODES: {
-               struct pf_src_node      *n;
-               struct pf_state         *state;
-
-               RB_FOREACH(state, pf_state_tree_id, &tree_id) {
-                       state->src_node = NULL;
-                       state->nat_src_node = NULL;
-               }
-               RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
-                       n->expire = 1;
-                       n->states = 0;
-               }
-               pf_purge_expired_src_nodes();
-               pf_status.src_nodes = 0;
-               break;
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
        }
+fail:
+       return (error);
 
+}
+
+static int
+pfioctl_ioc_src_node_kill(u_long cmd, struct pfioc_src_node_kill *psnk,
+    struct proc *p)
+{
+#pragma unused(p)
+       int error = 0;
+
+       switch (cmd) {
        case DIOCKILLSRCNODES: {
                struct pf_src_node      *sn;
                struct pf_state         *s;
-               struct pfioc_src_node_kill *psnk =
-                   (struct pfioc_src_node_kill *)addr;
                int                     killed = 0;
 
                RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) {
@@ -3552,77 +4653,91 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
                break;
        }
 
-       case DIOCSETHOSTID: {
-               u_int32_t       *hid = (u_int32_t *)addr;
-
-               if (*hid == 0)
-                       pf_status.hostid = random();
-               else
-                       pf_status.hostid = *hid;
-               break;
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
        }
 
-       case DIOCOSFPFLUSH:
-               pf_osfp_flush();
-               break;
+       return (error);
+}
+
+static int
+pfioctl_ioc_iface(u_long cmd, struct pfioc_iface_32 *io32,
+    struct pfioc_iface_64 *io64, struct proc *p)
+{
+       int p64 = proc_is64bit(p);
+       int error = 0;
 
+       switch (cmd) {
        case DIOCIGETIFACES: {
-               struct pfioc_iface *io = (struct pfioc_iface *)addr;
-               user_addr_t buf = PF_USER_ADDR(addr, pfioc_iface, pfiio_buffer);
+               user_addr_t buf;
+               int esize;
+
+               buf = (p64 ? io64->pfiio_buffer : io32->pfiio_buffer);
+               esize = (p64 ? io64->pfiio_esize : io32->pfiio_esize);
 
                /* esize must be that of the user space version of pfi_kif */
-               if (io->pfiio_esize != sizeof (struct pfi_uif)) {
+               if (esize != sizeof (struct pfi_uif)) {
                        error = ENODEV;
                        break;
                }
-               io->pfiio_name[sizeof (io->pfiio_name) - 1] = '\0';
-               error = pfi_get_ifaces(io->pfiio_name, buf, &io->pfiio_size);
+               if (p64)
+                       io64->pfiio_name[sizeof (io64->pfiio_name) - 1] = '\0';
+               else
+                       io32->pfiio_name[sizeof (io32->pfiio_name) - 1] = '\0';
+               error = pfi_get_ifaces(
+                   p64 ? io64->pfiio_name : io32->pfiio_name, buf,
+                   p64 ? &io64->pfiio_size : &io32->pfiio_size);
                break;
        }
 
        case DIOCSETIFFLAG: {
-               struct pfioc_iface *io = (struct pfioc_iface *)addr;
+               if (p64)
+                       io64->pfiio_name[sizeof (io64->pfiio_name) - 1] = '\0';
+               else
+                       io32->pfiio_name[sizeof (io32->pfiio_name) - 1] = '\0';
 
-               io->pfiio_name[sizeof (io->pfiio_name) - 1] = '\0';
-               error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
+               error = pfi_set_flags(
+                   p64 ? io64->pfiio_name : io32->pfiio_name,
+                   p64 ? io64->pfiio_flags : io32->pfiio_flags);
                break;
        }
 
        case DIOCCLRIFFLAG: {
-               struct pfioc_iface *io = (struct pfioc_iface *)addr;
+               if (p64)
+                       io64->pfiio_name[sizeof (io64->pfiio_name) - 1] = '\0';
+               else
+                       io32->pfiio_name[sizeof (io32->pfiio_name) - 1] = '\0';
 
-               io->pfiio_name[sizeof (io->pfiio_name) - 1] = '\0';
-               error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
+               error = pfi_clear_flags(
+                   p64 ? io64->pfiio_name : io32->pfiio_name,
+                   p64 ? io64->pfiio_flags : io32->pfiio_flags);
                break;
        }
 
        default:
-               error = ENODEV;
-               break;
+               VERIFY(0);
+               /* NOTREACHED */
        }
-fail:
-       lck_mtx_unlock(pf_lock);
-       lck_rw_done(pf_perim_lock);
 
        return (error);
 }
 
 int
 pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp,
-    unsigned int af, int input)
+    unsigned int af, int input, struct ip_fw_args *fwa)
 {
        int error = 0, reentry;
-       struct thread *curthread = current_thread();
        struct mbuf *nextpkt;
 
-       reentry = (ifp->if_pf_curthread == curthread);
+       reentry = net_thread_check_lock(NET_THREAD_HELD_PF);
        if (!reentry) {
                lck_rw_lock_shared(pf_perim_lock);
                if (!pf_is_enabled)
                        goto done;
 
                lck_mtx_lock(pf_lock);
-               ifp->if_pf_curthread = curthread;
+               net_thread_set_lock(NET_THREAD_HELD_PF);
        }
 
        if (mppn != NULL && *mppn != NULL)
@@ -3633,34 +4748,35 @@ pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp,
        switch (af) {
 #if INET
        case AF_INET: {
-               error = pf_inet_hook(ifp, mp, input);
+               error = pf_inet_hook(ifp, mp, input, fwa);
                break;
        }
 #endif /* INET */
 #if INET6
        case AF_INET6:
-               error = pf_inet6_hook(ifp, mp, input);
+               error = pf_inet6_hook(ifp, mp, input, fwa);
                break;
 #endif /* INET6 */
        default:
                break;
        }
 
-       if (nextpkt != NULL) {
-               if (*mp != NULL) {
-                       struct mbuf *m = *mp;
-                       while (m->m_nextpkt != NULL)
-                               m = m->m_nextpkt;
-                       m->m_nextpkt = nextpkt;
-               } else {
-                       *mp = nextpkt;
-               }
+       /* When packet valid, link to the next packet */
+       if (*mp != NULL && nextpkt != NULL) {
+               struct mbuf *m = *mp;
+               while (m->m_nextpkt != NULL)
+                       m = m->m_nextpkt;
+               m->m_nextpkt = nextpkt;
+       }
+       /* Fix up linkage of previous packet in the chain */
+       if (mppn != NULL) {
+               if (*mp != NULL)
+                       *mppn = *mp;
+               else
+                       *mppn = nextpkt;
        }
-       if (mppn != NULL && *mppn != NULL)
-               *mppn = *mp;
-
        if (!reentry) {
-               ifp->if_pf_curthread = NULL;
+               net_thread_unset_lock(NET_THREAD_HELD_PF);
                lck_mtx_unlock(pf_lock);
        }
 done:
@@ -3673,7 +4789,8 @@ done:
 
 #if INET
 static int
-pf_inet_hook(struct ifnet *ifp, struct mbuf **mp, int input)
+pf_inet_hook(struct ifnet *ifp, struct mbuf **mp, int input,
+    struct ip_fw_args *fwa)
 {
        struct mbuf *m = *mp;
 #if BYTE_ORDER != BIG_ENDIAN
@@ -3703,7 +4820,7 @@ pf_inet_hook(struct ifnet *ifp, struct mbuf **mp, int input)
        HTONS(ip->ip_len);
        HTONS(ip->ip_off);
 #endif
-       if (pf_test(input ? PF_IN : PF_OUT, ifp, mp, NULL) != PF_PASS) {
+       if (pf_test(input ? PF_IN : PF_OUT, ifp, mp, NULL, fwa) != PF_PASS) {
                if (*mp != NULL) {
                        m_freem(*mp);
                        *mp = NULL;
@@ -3727,7 +4844,8 @@ pf_inet_hook(struct ifnet *ifp, struct mbuf **mp, int input)
 
 #if INET6
 int
-pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input)
+pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input,
+    struct ip_fw_args *fwa)
 {
        int error = 0;
 
@@ -3749,7 +4867,7 @@ pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input)
                }
        }
 
-       if (pf_test6(input ? PF_IN : PF_OUT, ifp, mp, NULL) != PF_PASS) {
+       if (pf_test6(input ? PF_IN : PF_OUT, ifp, mp, NULL, fwa) != PF_PASS) {
                if (*mp != NULL) {
                        m_freem(*mp);
                        *mp = NULL;
diff --git a/bsd/net/pf_mtag.h b/bsd/net/pf_mtag.h
deleted file mode 100644 (file)
index 218ca4e..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _NET_PF_MTAG_H_
-#define _NET_PF_MTAG_H_
-
-#if PF
-#if KERNEL_PRIVATE
-
-#ifdef  __cplusplus
-extern "C" {
-#endif
-
-#define        PF_TAG_GENERATED                0x01
-#define        PF_TAG_FRAGCACHE                0x02
-#define        PF_TAG_TRANSLATE_LOCALHOST      0x04
-
-struct pf_mtag {
-       void            *hdr;           /* saved hdr pos in mbuf, for ECN */
-       unsigned int    rtableid;       /* alternate routing table id */
-       u_int32_t       qid;            /* queue id */
-       u_int16_t       tag;            /* tag id */
-       u_int8_t        flags;
-       u_int8_t        routed;
-};
-
-__private_extern__ struct pf_mtag *pf_find_mtag(struct mbuf *);
-__private_extern__ struct pf_mtag *pf_get_mtag(struct mbuf *);
-
-#ifdef  __cplusplus
-}
-#endif
-#endif /* KERNEL_PRIVATE */
-#endif /* PF */
-#endif /* _NET_PF_MTAG_H_ */
index 053cdc13c3aafdb977e1b8d7ac8300b8a6ff3607..69283ce6ebe0b03ccd3f5eb489d11955faede15f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <net/pfvar.h>
 
 struct pf_frent {
-       LIST_ENTRY(pf_frent) fr_next;
-       struct ip *fr_ip;
-       struct mbuf *fr_m;
+       LIST_ENTRY(pf_frent)    fr_next;
+       struct mbuf             *fr_m;
+#define fr_ip          fr_u.fru_ipv4
+#define fr_ip6         fr_u.fru_ipv6
+       union {
+               struct ip       *fru_ipv4;
+               struct ip6_hdr  *fru_ipv6;
+       } fr_u;
+       struct ip6_frag         fr_ip6f_opt;
+       int                     fr_ip6f_hlen;
 };
 
 struct pf_frcache {
@@ -108,12 +115,18 @@ struct pf_frcache {
 struct pf_fragment {
        RB_ENTRY(pf_fragment) fr_entry;
        TAILQ_ENTRY(pf_fragment) frag_next;
-       struct in_addr  fr_src;
-       struct in_addr  fr_dst;
+       struct pf_addr  fr_srcx;
+       struct pf_addr  fr_dstx;
        u_int8_t        fr_p;           /* protocol of this fragment */
        u_int8_t        fr_flags;       /* status flags */
-       u_int16_t       fr_id;          /* fragment id for reassemble */
        u_int16_t       fr_max;         /* fragment data max */
+#define fr_id          fr_uid.fru_id4
+#define fr_id6         fr_uid.fru_id6
+       union {
+               u_int16_t       fru_id4;
+               u_int32_t       fru_id6;
+       } fr_uid;
+       int             fr_af;
        u_int32_t       fr_timeout;
 #define fr_queue       fr_u.fru_queue
 #define fr_cache       fr_u.fru_cache
@@ -134,22 +147,29 @@ RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
 
 /* Private prototypes */
+static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *,
+    struct ip6_frag *);
 static void pf_ip2key(struct pf_fragment *, struct ip *);
 static void pf_remove_fragment(struct pf_fragment *);
 static void pf_flush_fragments(void);
 static void pf_free_fragment(struct pf_fragment *);
-static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
+static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *,
+    struct pf_frag_tree *);
+static __inline struct pf_fragment *
+    pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *);
+static __inline struct pf_fragment *
+    pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *,
+    struct pf_frag_tree *);
 static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
     struct pf_frent *, int);
 static struct mbuf *pf_fragcache(struct mbuf **, struct ip *,
     struct pf_fragment **, int, int, int *);
-#ifndef NO_APPLE_MODIFICATIONS
+static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **,
+    struct pf_frent *, int);
+static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*,
+    struct ip6_frag *, struct pf_fragment **, int, int, int, int *);
 static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *,
     struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *);
-#else
-static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
-    struct tcphdr *, int, sa_family_t);
-#endif
 
 #define        DPFPRINTF(x) do {                               \
        if (pf_status.debug >= PF_DEBUG_MISC) {         \
@@ -211,18 +231,74 @@ pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 {
        int     diff;
 
-       if ((diff = a->fr_id - b->fr_id))
+       if ((diff = a->fr_af - b->fr_af))
                return (diff);
        else if ((diff = a->fr_p - b->fr_p))
                return (diff);
-       else if (a->fr_src.s_addr < b->fr_src.s_addr)
-               return (-1);
-       else if (a->fr_src.s_addr > b->fr_src.s_addr)
-               return (1);
-       else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
-               return (-1);
-       else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
-               return (1);
+       else {
+               struct pf_addr *sa = &a->fr_srcx;
+               struct pf_addr *sb = &b->fr_srcx;
+               struct pf_addr *da = &a->fr_dstx;
+               struct pf_addr *db = &b->fr_dstx;
+               
+               switch (a->fr_af) {
+#ifdef INET
+               case AF_INET:
+                       if ((diff = a->fr_id - b->fr_id))
+                               return (diff);
+                       else if (sa->v4.s_addr < sb->v4.s_addr)
+                               return (-1);
+                       else if (sa->v4.s_addr > sb->v4.s_addr)
+                               return (1);
+                       else if (da->v4.s_addr < db->v4.s_addr)
+                               return (-1);
+                       else if (da->v4.s_addr > db->v4.s_addr)
+                               return (1);
+                       break;
+#endif
+#ifdef INET6
+               case AF_INET6:
+                       if ((diff = a->fr_id6 - b->fr_id6))
+                               return (diff);
+                       else if (sa->addr32[3] < sb->addr32[3])
+                               return (-1);
+                       else if (sa->addr32[3] > sb->addr32[3])
+                               return (1);
+                       else if (sa->addr32[2] < sb->addr32[2])
+                               return (-1);
+                       else if (sa->addr32[2] > sb->addr32[2])
+                               return (1);
+                       else if (sa->addr32[1] < sb->addr32[1])
+                               return (-1);
+                       else if (sa->addr32[1] > sb->addr32[1])
+                               return (1);
+                       else if (sa->addr32[0] < sb->addr32[0])
+                               return (-1);
+                       else if (sa->addr32[0] > sb->addr32[0])
+                               return (1);
+                       else if (da->addr32[3] < db->addr32[3])
+                               return (-1);
+                       else if (da->addr32[3] > db->addr32[3])
+                               return (1);
+                       else if (da->addr32[2] < db->addr32[2])
+                               return (-1);
+                       else if (da->addr32[2] > db->addr32[2])
+                               return (1);
+                       else if (da->addr32[1] < db->addr32[1])
+                               return (-1);
+                       else if (da->addr32[1] > db->addr32[1])
+                               return (1);
+                       else if (da->addr32[0] < db->addr32[0])
+                               return (-1);
+                       else if (da->addr32[0] > db->addr32[0])
+                               return (1);
+                       break;
+#endif
+               default:
+                       VERIFY(!0 && "only IPv4 and IPv6 supported!");
+                       break;
+               }
+       }
        return (0);
 }
 
@@ -238,7 +314,19 @@ pf_purge_expired_fragments(void)
                if (frag->fr_timeout > expire)
                        break;
 
-               DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+               switch (frag->fr_af) {
+               case AF_INET:
+                     DPFPRINTF(("expiring IPv4 %d(%p) from queue.\n",
+                         ntohs(frag->fr_id), frag));
+                     break;
+               case AF_INET6:
+                     DPFPRINTF(("expiring IPv6 %d(%p) from queue.\n",
+                         ntohl(frag->fr_id6), frag));
+                     break;
+               default:
+                     VERIFY(0 && "only IPv4 and IPv6 supported");
+                     break;
+               }
                pf_free_fragment(frag);
        }
 
@@ -247,7 +335,19 @@ pf_purge_expired_fragments(void)
                if (frag->fr_timeout > expire)
                        break;
 
-               DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+               switch (frag->fr_af) {
+               case AF_INET:
+                     DPFPRINTF(("expiring IPv4 %d(%p) from cache.\n",
+                         ntohs(frag->fr_id), frag));
+                     break;
+               case AF_INET6:
+                     DPFPRINTF(("expiring IPv6 %d(%p) from cache.\n",
+                         ntohl(frag->fr_id6), frag));
+                     break;
+               default:
+                     VERIFY(0 && "only IPv4 and IPv6 supported");
+                     break;
+               }
                pf_free_fragment(frag);
                VERIFY(TAILQ_EMPTY(&pf_cachequeue) ||
                    TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
@@ -321,24 +421,33 @@ pf_free_fragment(struct pf_fragment *frag)
        pf_remove_fragment(frag);
 }
 
+static void
+pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6,
+    struct ip6_frag *fh)
+{
+       key->fr_p = fh->ip6f_nxt;
+       key->fr_id6 = fh->ip6f_ident;
+       key->fr_af = AF_INET6;
+       key->fr_srcx.v6 = ip6->ip6_src;
+       key->fr_dstx.v6 = ip6->ip6_dst;
+}
 static void
 pf_ip2key(struct pf_fragment *key, struct ip *ip)
 {
        key->fr_p = ip->ip_p;
        key->fr_id = ip->ip_id;
-       key->fr_src.s_addr = ip->ip_src.s_addr;
-       key->fr_dst.s_addr = ip->ip_dst.s_addr;
+       key->fr_af = AF_INET;
+       key->fr_srcx.v4.s_addr = ip->ip_src.s_addr;
+       key->fr_dstx.v4.s_addr = ip->ip_dst.s_addr;
 }
 
 static struct pf_fragment *
-pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
+pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree)
 {
-       struct pf_fragment       key;
-       struct pf_fragment      *frag;
-
-       pf_ip2key(&key, ip);
-
-       frag = RB_FIND(pf_frag_tree, tree, &key);
+       struct pf_fragment *frag;
+       
+       frag = RB_FIND(pf_frag_tree, tree, key);
        if (frag != NULL) {
                /* XXX Are we sure we want to update the timeout? */
                frag->fr_timeout = pf_time_second();
@@ -350,9 +459,26 @@ pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
                        TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
                }
        }
-
+       
        return (frag);
 }
+  
+static __inline struct pf_fragment *
+pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree)
+{
+       struct pf_fragment key;
+       pf_ip2key(&key, ip);
+       return pf_find_fragment_by_key(&key, tree);
+}
+
+static __inline struct pf_fragment *
+pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh,
+    struct pf_frag_tree *tree)
+{
+      struct pf_fragment key;
+      pf_ip6hdr2key(&key, ip6, fh);
+      return pf_find_fragment_by_key(&key, tree);
+}
 
 /* Removes a fragment from the fragment queue and frees the fragment */
 
@@ -402,8 +528,9 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
 
                (*frag)->fr_flags = 0;
                (*frag)->fr_max = 0;
-               (*frag)->fr_src = frent->fr_ip->ip_src;
-               (*frag)->fr_dst = frent->fr_ip->ip_dst;
+               (*frag)->fr_af = AF_INET;
+               (*frag)->fr_srcx.v4 = frent->fr_ip->ip_src;
+               (*frag)->fr_dstx.v4 = frent->fr_ip->ip_dst;
                (*frag)->fr_p = frent->fr_ip->ip_p;
                (*frag)->fr_id = frent->fr_ip->ip_id;
                (*frag)->fr_timeout = pf_time_second();
@@ -534,8 +661,8 @@ insert:
                m_cat(m, m2);
        }
 
-       ip->ip_src = (*frag)->fr_src;
-       ip->ip_dst = (*frag)->fr_dst;
+       ip->ip_src = (*frag)->fr_srcx.v4;
+       ip->ip_dst = (*frag)->fr_dstx.v4;
 
        /* Remove from fragment queue */
        pf_remove_fragment(*frag);
@@ -600,8 +727,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
 
                (*frag)->fr_flags = PFFRAG_NOBUFFER;
                (*frag)->fr_max = 0;
-               (*frag)->fr_src = h->ip_src;
-               (*frag)->fr_dst = h->ip_dst;
+               (*frag)->fr_af = AF_INET;
+               (*frag)->fr_srcx.v4 = h->ip_src;
+               (*frag)->fr_dstx.v4 = h->ip_dst;
                (*frag)->fr_p = h->ip_p;
                (*frag)->fr_id = h->ip_id;
                (*frag)->fr_timeout = pf_time_second();
@@ -865,6 +993,535 @@ drop_fragment:
        return (NULL);
 }
 
+#define FR_IP6_OFF(fr) \
+       (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
+#define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
+struct mbuf *
+pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag,
+    struct pf_frent *frent, int mff)
+{
+       struct mbuf *m, *m2;
+       struct pf_frent *frea, *frep, *next;
+       struct ip6_hdr *ip6;
+       int plen, off, fr_max;
+       
+       VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
+       m = *m0;
+       frep = NULL;
+       ip6 = frent->fr_ip6;
+       off = FR_IP6_OFF(frent);
+       plen = FR_IP6_PLEN(frent);
+       fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6);
+
+       DPFPRINTF(("%p IPv6 frag plen %u off %u fr_ip6f_hlen %u fr_max %u m_len %u\n", m, 
+               plen, off, frent->fr_ip6f_hlen, fr_max, m->m_len));
+       
+       /* strip off headers up to the fragment payload */
+       m->m_data += frent->fr_ip6f_hlen;
+       m->m_len -= frent->fr_ip6f_hlen;
+       
+       /* Create a new reassembly queue for this packet */
+       if (*frag == NULL) {
+               *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+               if (*frag == NULL) {
+                       pf_flush_fragments();
+                       *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+                       if (*frag == NULL)
+                               goto drop_fragment;
+               }
+               
+               (*frag)->fr_flags = 0;
+               (*frag)->fr_max = 0;
+               (*frag)->fr_af = AF_INET6;
+               (*frag)->fr_srcx.v6 = frent->fr_ip6->ip6_src;
+               (*frag)->fr_dstx.v6 = frent->fr_ip6->ip6_dst;
+               (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
+               (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
+               (*frag)->fr_timeout = pf_time_second();
+               LIST_INIT(&(*frag)->fr_queue);
+               
+               RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
+               TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
+               
+               /* We do not have a previous fragment */
+               frep = NULL;
+               goto insert;
+       }
+       
+       /*
+        * Find a fragment after the current one:
+        *  - off contains the real shifted offset.
+        */
+       LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
+               if (FR_IP6_OFF(frea) > off)
+                       break;
+               frep = frea;
+       }
+       
+       VERIFY(frep != NULL || frea != NULL);
+       
+       if (frep != NULL &&
+           FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off)
+       {
+               u_int16_t precut;
+               
+               precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
+                   frep->fr_ip6f_hlen - off;
+               if (precut >= plen)
+                       goto drop_fragment;
+               m_adj(frent->fr_m, precut);
+               DPFPRINTF(("overlap -%d\n", precut));
+               /* Enforce 8 byte boundaries */
+               frent->fr_ip6f_opt.ip6f_offlg =
+                   htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
+                   (precut >> 3));
+               off = FR_IP6_OFF(frent);
+               plen -= precut;
+               ip6->ip6_plen = htons(plen);
+       }
+       
+       for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
+               u_int16_t       aftercut;
+               
+               aftercut = plen + off - FR_IP6_OFF(frea);
+               DPFPRINTF(("adjust overlap %d\n", aftercut));
+               if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
+                       frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
+                               aftercut);
+                       frea->fr_ip6f_opt.ip6f_offlg =
+                           htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
+                           (aftercut >> 3));
+                       m_adj(frea->fr_m, aftercut);
+                       break;
+               }
+               
+               /* This fragment is completely overlapped, lose it */
+               next = LIST_NEXT(frea, fr_next);
+               m_freem(frea->fr_m);
+               LIST_REMOVE(frea, fr_next);
+               pool_put(&pf_frent_pl, frea);
+               pf_nfrents--;
+       }
+       
+  insert:
+       /* Update maximum data size */
+       if ((*frag)->fr_max < fr_max)
+               (*frag)->fr_max = fr_max;
+       /* This is the last segment */
+       if (!mff)
+               (*frag)->fr_flags |= PFFRAG_SEENLAST;
+       
+       if (frep == NULL)
+               LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
+       else
+               LIST_INSERT_AFTER(frep, frent, fr_next);
+       
+       /* Check if we are completely reassembled */
+       if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
+               return (NULL);
+       
+       /* Check if we have all the data */
+       off = 0;
+       for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
+               next = LIST_NEXT(frep, fr_next);
+               off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
+               DPFPRINTF(("frep at %d, next %d, max %d\n",
+                       off, next == NULL ? -1 : FR_IP6_OFF(next),
+                       (*frag)->fr_max));
+               if (off < (*frag)->fr_max &&
+                   (next == NULL || FR_IP6_OFF(next) != off)) {
+                       DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
+                           off, next == NULL ? -1 : FR_IP6_OFF(next),
+                           (*frag)->fr_max));
+                       return (NULL);
+               }
+       }
+       DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
+       if (off < (*frag)->fr_max)
+               return (NULL);
+       
+       /* We have all the data */
+       frent = LIST_FIRST(&(*frag)->fr_queue);
+       VERIFY(frent != NULL);
+       if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
+               DPFPRINTF(("drop: too big: %d\n", off));
+               pf_free_fragment(*frag);
+               *frag = NULL;
+               return (NULL);
+       }
+       
+       ip6 = frent->fr_ip6;
+       ip6->ip6_nxt = (*frag)->fr_p;
+       ip6->ip6_plen = htons(off);
+       ip6->ip6_src = (*frag)->fr_srcx.v6;
+       ip6->ip6_dst = (*frag)->fr_dstx.v6;
+       
+       /* Remove from fragment queue */
+       pf_remove_fragment(*frag);
+       *frag = NULL;
+       
+       m = frent->fr_m;
+       m->m_len += sizeof(struct ip6_hdr);
+       m->m_data -= sizeof(struct ip6_hdr);
+       memmove(m->m_data, ip6, sizeof(struct ip6_hdr));
+       
+       next = LIST_NEXT(frent, fr_next);
+       pool_put(&pf_frent_pl, frent);
+       pf_nfrents--;
+       for (frent = next; next != NULL; frent = next) {
+               m2 = frent->fr_m;
+
+               m_cat(m, m2);
+               next = LIST_NEXT(frent, fr_next);
+               pool_put(&pf_frent_pl, frent);
+               pf_nfrents--;
+       }
+       
+       /* XXX this should be done elsewhere */
+       if (m->m_flags & M_PKTHDR) {
+               int pktlen = 0;
+               for (m2 = m; m2; m2 = m2->m_next)
+                       pktlen += m2->m_len;
+               m->m_pkthdr.len = pktlen;
+       }
+       
+       DPFPRINTF(("complete: %p ip6_plen %d m_pkthdr.len %d\n", 
+               m, ntohs(ip6->ip6_plen), m->m_pkthdr.len));
+
+       return m;
+       
+ drop_fragment:
+       /* Oops - fail safe - drop packet */
+       pool_put(&pf_frent_pl, frent);
+       --pf_nfrents;
+       m_freem(m);
+       return NULL;
+}
+
+static struct mbuf *
+pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh,
+    struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem)
+{
+       struct mbuf *m = *m0;
+       u_int16_t plen, off, fr_max;
+       struct pf_frcache *frp, *fra, *cur = NULL;
+       int hosed = 0;
+       
+       VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
+       m = *m0;
+       off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
+       plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
+
+       /*
+        * Apple Modification: dimambro@apple.com. The hlen, being passed
+        * into this function Includes all the headers associated with 
+        * the packet, and may include routing headers, so to get to
+        * the data payload as stored in the original IPv6 header we need
+        * to subtract al those headers and the IP header.
+        *
+        * The 'max' local variable should also contain the offset from the start
+        * of the reassembled packet to the octet just past the end of the octets
+        * in the current fragment where:
+        * - 'off' is the offset from the start of the reassembled packet to the
+        *    first octet in the fragment,
+        * - 'plen' is the length of the "payload data length" Excluding all the
+        *   IPv6 headers of the fragment.
+        * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
+        *   of the IPv6 packet to the beginning of the data.
+        */
+       fr_max = off + plen;
+       
+       DPFPRINTF(("%p plen %u off %u fr_max %u\n", m, 
+               plen, off, fr_max));
+
+       /* Create a new range queue for this packet */
+       if (*frag == NULL) {
+               *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
+               if (*frag == NULL) {
+                       pf_flush_fragments();
+                       *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
+                       if (*frag == NULL)
+                               goto no_mem;
+               }
+               
+               /* Get an entry for the queue */
+               cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+               if (cur == NULL) {
+                       pool_put(&pf_cache_pl, *frag);
+                       *frag = NULL;
+                       goto no_mem;
+               }
+               pf_ncache++;
+               
+               (*frag)->fr_flags = PFFRAG_NOBUFFER;
+               (*frag)->fr_max = 0;
+               (*frag)->fr_af = AF_INET6;
+               (*frag)->fr_srcx.v6 = h->ip6_src;
+               (*frag)->fr_dstx.v6 = h->ip6_dst;
+               (*frag)->fr_p = fh->ip6f_nxt;
+               (*frag)->fr_id6 = fh->ip6f_ident;
+               (*frag)->fr_timeout = pf_time_second();
+               
+               cur->fr_off = off;
+               cur->fr_end = fr_max;
+               LIST_INIT(&(*frag)->fr_cache);
+               LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
+               
+               RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
+               TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
+               
+               DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
+                   off, fr_max));
+               
+               goto pass;
+       }
+       
+       /*
+        * Find a fragment after the current one:
+        *  - off contains the real shifted offset.
+        */
+       frp = NULL;
+       LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
+               if (fra->fr_off > off)
+                       break;
+               frp = fra;
+       }
+       
+       VERIFY(frp != NULL || fra != NULL);
+       
+       if (frp != NULL) {
+               int precut;
+               
+               precut = frp->fr_end - off;
+               if (precut >= plen) {
+                       /* Fragment is entirely a duplicate */
+                       DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
+                           ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
+                           off, fr_max));
+                       goto drop_fragment;
+               }
+               if (precut == 0) {
+                       /* They are adjacent.  Fixup cache entry */
+                       DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
+                           ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
+                           off, fr_max));
+                       frp->fr_end = fr_max;
+               } else if (precut > 0) {
+                       /* The first part of this payload overlaps with a
+                        * fragment that has already been passed.
+                        * Need to trim off the first part of the payload.
+                        * But to do so easily, we need to create another
+                        * mbuf to throw the original header into.
+                        */
+                       
+                       DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
+                           ntohl(fh->ip6f_ident), precut, frp->fr_off,
+                           frp->fr_end, off, fr_max));
+                       
+                       off += precut;
+                       fr_max -= precut;
+                       /* Update the previous frag to encompass this one */
+                       frp->fr_end = fr_max;
+                       
+                       if (!drop) {
+                               /* XXX Optimization opportunity
+                                * This is a very heavy way to trim the payload.
+                                * we could do it much faster by diddling mbuf
+                                * internals but that would be even less legible
+                                * than this mbuf magic.  For my next trick,
+                                * I'll pull a rabbit out of my laptop.
+                                */
+                               *m0 = m_copym(m, 0, hlen, M_NOWAIT);
+                               if (*m0 == NULL)
+                                       goto no_mem;
+                               VERIFY((*m0)->m_next == NULL);
+                               m_adj(m, precut + hlen);
+                               m_cat(*m0, m);
+                               m = *m0;
+                               if (m->m_flags & M_PKTHDR) {
+                                       int pktlen = 0;
+                                       struct mbuf *t;
+                                       for (t = m; t; t = t->m_next)
+                                               pktlen += t->m_len;
+                                       m->m_pkthdr.len = pktlen;
+                               }
+                               
+                               h = mtod(m, struct ip6_hdr *);
+                               
+                               VERIFY((int)m->m_len ==
+                                   ntohs(h->ip6_plen) - precut);
+                               fh->ip6f_offlg &= ~IP6F_OFF_MASK;
+                               fh->ip6f_offlg |=
+                                   htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
+                                   + (precut >> 3));
+                               h->ip6_plen = htons(ntohs(h->ip6_plen) -
+                                   precut);
+                       } else {
+                               hosed++;
+                       }
+               } else {
+                       /* There is a gap between fragments */
+                       
+                       DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
+                           ntohl(fh->ip6f_ident), -precut, frp->fr_off,
+                           frp->fr_end, off, fr_max));
+                       
+                       cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+                       if (cur == NULL)
+                               goto no_mem;
+                       pf_ncache++;
+                       
+                       cur->fr_off = off;
+                       cur->fr_end = fr_max;
+                       LIST_INSERT_AFTER(frp, cur, fr_next);
+               }
+       }
+       
+       if (fra != NULL) {
+               int     aftercut;
+               int     merge = 0;
+               
+               aftercut = fr_max - fra->fr_off;
+               if (aftercut == 0) {
+                       /* Adjacent fragments */
+                       DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
+                           ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
+                           fra->fr_end));
+                       fra->fr_off = off;
+                       merge = 1;
+               } else if (aftercut > 0) {
+                       /* Need to chop off the tail of this fragment */
+                       DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
+                           ntohl(fh->ip6f_ident), aftercut, off, fr_max,
+                           fra->fr_off, fra->fr_end));
+                       fra->fr_off = off;
+                       fr_max -= aftercut;
+                       
+                       merge = 1;
+                       
+                       if (!drop) {
+                               m_adj(m, -aftercut);
+                               if (m->m_flags & M_PKTHDR) {
+                                       int pktlen = 0;
+                                       struct mbuf *t;
+                                       for (t = m; t; t = t->m_next)
+                                               pktlen += t->m_len;
+                                       m->m_pkthdr.len = pktlen;
+                               }
+                               h = mtod(m, struct ip6_hdr *);
+                               VERIFY((int)m->m_len ==
+                                   ntohs(h->ip6_plen) - aftercut);
+                               h->ip6_plen =
+                                   htons(ntohs(h->ip6_plen) - aftercut);
+                       } else {
+                               hosed++;
+                       }
+               } else if (frp == NULL) {
+                       /* There is a gap between fragments */
+                       DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
+                           ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
+                           fra->fr_off, fra->fr_end));
+                       
+                       cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+                       if (cur == NULL)
+                               goto no_mem;
+                       pf_ncache++;
+                       
+                       cur->fr_off = off;
+                       cur->fr_end = fr_max;
+                       LIST_INSERT_BEFORE(fra, cur, fr_next);
+               }
+               
+               /* Need to glue together two separate fragment descriptors */
+               if (merge) {
+                       if (cur && fra->fr_off <= cur->fr_end) {
+                               /* Need to merge in a previous 'cur' */
+                               DPFPRINTF(("frag6cache[%u]: adjacent(merge "
+                                   "%d-%d) %d-%d (%d-%d)\n",
+                                   ntohl(fh->ip6f_ident), cur->fr_off,
+                                   cur->fr_end, off, fr_max, fra->fr_off,
+                                   fra->fr_end));
+                               fra->fr_off = cur->fr_off;
+                               LIST_REMOVE(cur, fr_next);
+                               pool_put(&pf_cent_pl, cur);
+                               pf_ncache--;
+                               cur = NULL;
+                       } else if (frp && fra->fr_off <= frp->fr_end) {
+                               /* Need to merge in a modified 'frp' */
+                               VERIFY(cur == NULL);
+                               DPFPRINTF(("frag6cache[%u]: adjacent(merge "
+                                   "%d-%d) %d-%d (%d-%d)\n",
+                                   ntohl(fh->ip6f_ident), frp->fr_off,
+                                   frp->fr_end, off, fr_max, fra->fr_off,
+                                   fra->fr_end));
+                               fra->fr_off = frp->fr_off;
+                               LIST_REMOVE(frp, fr_next);
+                               pool_put(&pf_cent_pl, frp);
+                               pf_ncache--;
+                               frp = NULL;
+                       }
+               }
+       }
+       
+       if (hosed) {
+               /*
+                * We must keep tracking the overall fragment even when
+                * we're going to drop it anyway so that we know when to
+                * free the overall descriptor.  Thus we drop the frag late.
+                */
+               goto drop_fragment;
+       }
+       
+ pass:
+       /* Update maximum data size */
+       if ((*frag)->fr_max < fr_max)
+               (*frag)->fr_max = fr_max;
+       
+       /* This is the last segment */
+       if (!mff)
+               (*frag)->fr_flags |= PFFRAG_SEENLAST;
+       
+       /* Check if we are completely reassembled */
+       if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
+           LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
+           LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
+               /* Remove from fragment queue */
+               DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
+                   ntohl(fh->ip6f_ident), (*frag)->fr_max));
+               pf_free_fragment(*frag);
+               *frag = NULL;
+       }
+       
+       return (m);
+       
+ no_mem:
+       *nomem = 1;
+       
+       /* Still need to pay attention to !IP_MF */
+       if (!mff && *frag != NULL)
+               (*frag)->fr_flags |= PFFRAG_SEENLAST;
+       
+       m_freem(m);
+       return (NULL);
+       
+ drop_fragment:
+       
+       /* Still need to pay attention to !IP_MF */
+       if (!mff && *frag != NULL)
+               (*frag)->fr_flags |= PFFRAG_SEENLAST;
+       
+       if (drop) {
+               /* This fragment has been deemed bad.  Don't reass */
+               if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
+                       DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
+                           ntohl(fh->ip6f_ident)));
+               (*frag)->fr_flags |= PFFRAG_DROP;
+       }
+       
+       m_freem(m);
+       return (NULL);
+}
+
 int
 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
     struct pf_pdesc *pd)
@@ -969,8 +1626,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
        if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
                /* Fully buffer all of the fragments */
 
-               frag = pf_find_fragment(h, &pf_frag_tree);
-
+               frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
                /* Check if we saw the last fragment already */
                if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
                    fr_max > frag->fr_max)
@@ -987,8 +1643,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
                frent->fr_m = m;
 
                /* Might return a completely reassembled mbuf, or NULL */
-               DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff,
-                   fr_max));
+               DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
+                   fragoff, fr_max));
                *m0 = m = pf_reassemble(m0, &frag, frent, mff);
 
                if (m == NULL)
@@ -1014,7 +1670,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
                /* non-buffering fragment cache (drops or masks overlaps) */
                int     nomem = 0;
 
-               if (dir == PF_OUT && (pd->pf_mtag->flags & PF_TAG_FRAGCACHE)) {
+               if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
                        /*
                         * Already passed the fragment cache in the
                         * input direction.  If we continued, it would
@@ -1023,7 +1679,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
                        goto fragment_pass;
                }
 
-               frag = pf_find_fragment(h, &pf_cache_tree);
+               frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
 
                /* Check if we saw the last fragment already */
                if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
@@ -1054,7 +1710,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
                }
 #endif
                if (dir == PF_IN)
-                       pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
+                       pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
 
                if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
                        goto drop;
@@ -1117,7 +1773,7 @@ drop:
        return (PF_DROP);
 
 bad:
-       DPFPRINTF(("dropping bad fragment\n"));
+       DPFPRINTF(("dropping bad IPv4 fragment\n"));
 
        /* Free associated fragments */
        if (frag != NULL)
@@ -1152,6 +1808,10 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
        u_int16_t                fragoff = 0;
        u_int8_t                 proto;
        int                      terminal;
+       struct pf_frent         *frent;
+       struct pf_fragment      *pff = NULL;
+       int                      mff = 0, rh_cnt = 0;
+       u_int16_t                fr_max;
        int                      asd = 0;
        struct pf_ruleset       *ruleset = NULL;
 
@@ -1203,6 +1863,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
        proto = h->ip6_nxt;
        terminal = 0;
        do {
+               pd->proto = proto;
                switch (proto) {
                case IPPROTO_FRAGMENT:
                        goto fragment;
@@ -1213,19 +1874,20 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
                        if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
                            NULL, AF_INET6))
                                goto shortpkt;
-#ifndef NO_APPLE_EXTENSIONS
                        /*
                         * <jhw@apple.com>
+                        * Multiple routing headers not allowed.
                         * Routing header type zero considered harmful.
                         */
                        if (proto == IPPROTO_ROUTING) {
                                const struct ip6_rthdr *rh =
                                    (const struct ip6_rthdr *)&ext;
+                               if (rh_cnt++)
+                                       goto drop;
                                if (rh->ip6r_type == IPV6_RTHDR_TYPE_0)
                                        goto drop;
                        }
                        else
-#endif
                        if (proto == IPPROTO_AH)
                                off += (ext.ip6e_len + 2) * 4;
                        else
@@ -1311,32 +1973,110 @@ fragment:
        if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6))
                goto shortpkt;
        fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
-       if (fragoff + (plen - off - sizeof (frag)) > IPV6_MAXPACKET)
-               goto badfrag;
-
-       /* do something about it */
-       /* remember to set pd->flags |= PFDESC_IP_REAS */
+       pd->proto = frag.ip6f_nxt;
+       mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
+       off += sizeof frag;
+       if (fragoff + (plen - off) > IPV6_MAXPACKET)
+              goto badfrag;
+       
+       fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
+       DPFPRINTF(("%p IPv6 frag plen %u mff %d off %u fragoff %u fr_max %u\n", m, 
+               plen, mff, off, fragoff, fr_max));
+       
+       if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
+               /* Fully buffer all of the fragments */
+               pd->flags |= PFDESC_IP_REAS;
+               
+               pff = pf_find_fragment_by_ipv6_header(h, &frag,
+                  &pf_frag_tree);
+               
+               /* Check if we saw the last fragment already */
+               if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
+                   fr_max > pff->fr_max)
+                       goto badfrag;
+               
+               /* Get an entry for the fragment queue */
+               frent = pool_get(&pf_frent_pl, PR_NOWAIT);
+               if (frent == NULL) {
+                       REASON_SET(reason, PFRES_MEMORY);
+                       return (PF_DROP);
+               }
+               pf_nfrents++;
+               frent->fr_ip6 = h;
+               frent->fr_m = m;
+               frent->fr_ip6f_opt = frag;
+               frent->fr_ip6f_hlen = off;
+               
+               /* Might return a completely reassembled mbuf, or NULL */
+               DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
+                    ntohl(frag.ip6f_ident), fragoff, fr_max));
+               *m0 = m = pf_reassemble6(m0, &pff, frent, mff);
+               
+               if (m == NULL)
+                       return (PF_DROP);
+               
+               if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
+                       goto drop;
+               
+               h = mtod(m, struct ip6_hdr *);
+       }
+       else if (dir == PF_IN || !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
+               /* non-buffering fragment cache (overlaps: see RFC 5722) */
+               int nomem = 0;
+               
+               pff = pf_find_fragment_by_ipv6_header(h, &frag,
+                   &pf_cache_tree);
+               
+               /* Check if we saw the last fragment already */
+               if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
+                   fr_max > pff->fr_max) {
+                      if (r->rule_flag & PFRULE_FRAGDROP)
+                               pff->fr_flags |= PFFRAG_DROP;
+                      goto badfrag;
+               }
+               
+               *m0 = m = pf_frag6cache(m0, h, &frag, &pff, off, mff,
+                    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
+               if (m == NULL) {
+                       if (nomem)
+                               goto no_mem;
+                       goto drop;
+               }
+               
+               if (dir == PF_IN)
+                       pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
+               
+               if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
+                       goto drop;
+       }
+       
+       /* Enforce a minimum ttl, may cause endless packet loops */
+       if (r->min_ttl && h->ip6_hlim < r->min_ttl)
+               h->ip6_hlim = r->min_ttl;
        return (PF_PASS);
 
-shortpkt:
+  no_mem:
+       REASON_SET(reason, PFRES_MEMORY);
+       goto dropout;
+       
+  shortpkt:
        REASON_SET(reason, PFRES_SHORT);
-       if (r != NULL && r->log)
-               PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r,
-                   NULL, NULL, pd);
-       return (PF_DROP);
-
-drop:
+       goto dropout;
+       
+  drop:
        REASON_SET(reason, PFRES_NORM);
-       if (r != NULL && r->log)
-               PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r,
-                   NULL, NULL, pd);
-       return (PF_DROP);
-
-badfrag:
+       goto dropout;
+       
+  badfrag:
+       DPFPRINTF(("dropping bad IPv6 fragment\n"));
        REASON_SET(reason, PFRES_FRAG);
+       goto dropout;
+       
+  dropout:
+       if (pff != NULL)
+               pf_free_fragment(pff);
        if (r != NULL && r->log)
-               PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r,
-                   NULL, NULL, pd);
+               PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
        return (PF_DROP);
 }
 #endif /* INET6 */
@@ -1354,12 +2094,10 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
        u_int8_t         flags;
        sa_family_t      af = pd->af;
        struct pf_ruleset *ruleset = NULL;
-#ifndef NO_APPLE_EXTENSIONS
        union pf_state_xport sxport, dxport;
 
        sxport.port = th->th_sport;
        dxport.port = th->th_dport;
-#endif
 
        r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
        while (r != NULL) {
@@ -1375,26 +2113,16 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
                else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
                    r->src.neg, kif))
                        r = r->skip[PF_SKIP_SRC_ADDR].ptr;
-#ifndef NO_APPLE_EXTENSIONS
                else if (r->src.xport.range.op &&
                    !pf_match_xport(r->src.xport.range.op, r->proto_variant,
                    &r->src.xport, &sxport))
-#else
-               else if (r->src.port_op && !pf_match_port(r->src.port_op,
-                   r->src.port[0], r->src.port[1], th->th_sport))
-#endif
                        r = r->skip[PF_SKIP_SRC_PORT].ptr;
                else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
                    r->dst.neg, NULL))
                        r = r->skip[PF_SKIP_DST_ADDR].ptr;
-#ifndef NO_APPLE_EXTENSIONS
                else if (r->dst.xport.range.op &&
                    !pf_match_xport(r->dst.xport.range.op, r->proto_variant,
                    &r->dst.xport, &dxport))
-#else
-               else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
-                   r->dst.port[0], r->dst.port[1], th->th_dport))
-#endif
                        r = r->skip[PF_SKIP_DST_PORT].ptr;
                else if (r->os_fingerprint != PF_OSFP_ANY &&
                    !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th),
@@ -1469,7 +2197,6 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
        }
 
        /* copy back packet headers if we sanitized */
-#ifndef NO_APPLE_EXTENSIONS
        /* Process options */
        if (r->max_mss) {
                int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off,
@@ -1492,14 +2219,6 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
 
                m_copyback(mw, off, sizeof (*th), th);
        }
-#else
-       /* Process options */
-       if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
-               rewrite = 1;
-
-       if (rewrite)
-               m_copyback(m, off, sizeof (*th), th);
-#endif
 
        return (PF_PASS);
 
@@ -1721,7 +2440,6 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
                }
                if (copyback) {
                        /* Copyback the options, caller copys back header */
-#ifndef NO_APPLE_EXTENSIONS
                        int optoff = off + sizeof (*th);
                        int optlen = (th->th_off << 2) - sizeof (*th);
                        m = pf_lazy_makewritable(pd, m, optoff + optlen);
@@ -1731,12 +2449,6 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
                        }
                        *writeback = optoff + optlen;
                        m_copyback(m, optoff, optlen, hdr + sizeof (*th));
-#else
-                       *writeback = 1;
-                       m_copyback(m, off + sizeof (struct tcphdr),
-                           (th->th_off << 2) - sizeof (struct tcphdr), hdr +
-                           sizeof (struct tcphdr));
-#endif
                }
        }
 
@@ -2012,7 +2724,6 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
        return (0);
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 static int
 pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
     struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off,
@@ -2020,12 +2731,6 @@ pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
 {
 #pragma unused(dir, kif)
        sa_family_t af = pd->af;
-#else
-static int
-pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
-    int off, sa_family_t af)
-{
-#endif
        u_int16_t       *mss;
        int             thoff;
        int             opt, cnt, optlen = 0;
@@ -2036,15 +2741,9 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
        thoff = th->th_off << 2;
        cnt = thoff - sizeof (struct tcphdr);
 
-#ifndef NO_APPLE_MODIFICATIONS
        if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt,
            NULL, NULL, af))
                return PF_DROP;
-#else
-       if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt,
-           NULL, NULL, af))
-               return (rewrite);
-#endif
 
        for (; cnt > 0; cnt -= optlen, optp += optlen) {
                opt = optp[0];
@@ -2061,9 +2760,8 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
                }
                switch (opt) {
                case TCPOPT_MAXSEG:
-                       mss = (u_int16_t *)(optp + 2);
+                       mss = (u_int16_t *)(void *)(optp + 2);
                        if ((ntohs(*mss)) > r->max_mss) {
-#ifndef NO_APPLE_MODIFICATIONS
                                /*
                                 * <jhw@apple.com>
                                 *  Only do the TCP checksum fixup if delayed
@@ -2073,10 +2771,6 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
                                    !(m->m_pkthdr.csum_flags & CSUM_TCP))
                                        th->th_sum = pf_cksum_fixup(th->th_sum,
                                            *mss, htons(r->max_mss), 0);
-#else
-                               th->th_sum = pf_cksum_fixup(th->th_sum,
-                                   *mss, htons(r->max_mss), 0);
-#endif
                                *mss = htons(r->max_mss);
                                rewrite = 1;
                        }
@@ -2086,7 +2780,6 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
                }
        }
 
-#ifndef NO_APPLE_MODIFICATIONS
        if (rewrite) {
                struct mbuf *mw;
                u_short reason;
@@ -2106,10 +2799,4 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
        }
 
        return PF_PASS;
-#else
-       if (rewrite)
-               m_copyback(m, off + sizeof (*th), thoff - sizeof (*th), opts);
-
-       return (rewrite);
-#endif
 }
index 89d71e8897835964a73da9d82c7faf47b9b51715..e04a94e0fd6aa634ad48ffcfe1199f32d19999c4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -109,7 +109,7 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off,
            pd->af))
                return (NULL);
 
-       return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr));
+       return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)(void *)hdr));
 }
 
 struct pf_osfp_enlist *
index 2ac10962e62291776241c35f25a5817cbb93bb25..27121f77902d87ec041ca6ad1e164c80a41b9c1a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -168,6 +168,11 @@ pf_get_ruleset_number(u_int8_t action)
        case PF_NORDR:
                return (PF_RULESET_RDR);
                break;
+#if DUMMYNET
+       case PF_DUMMYNET:
+       case PF_NODUMMYNET:
+               return (PF_RULESET_DUMMYNET);
+#endif /* DUMMYNET */
        default:
                return (PF_RULESET_MAX);
                break;
@@ -217,6 +222,29 @@ pf_find_ruleset(const char *path)
                return (&anchor->ruleset);
 }
 
+struct pf_ruleset *
+pf_find_ruleset_with_owner(const char *path, const char *owner, int is_anchor,
+    int *error)
+{
+       struct pf_anchor        *anchor;
+
+       while (*path == '/')
+               path++;
+       if (!*path)
+               return (&pf_main_ruleset);
+       anchor = pf_find_anchor(path);
+       if (anchor == NULL) {
+               *error = EINVAL;
+               return (NULL);
+       } else {
+               if ((owner && anchor->owner && (!strcmp(owner, anchor->owner)))
+                   || (is_anchor && !strcmp(anchor->owner, "")))
+                       return (&anchor->ruleset);
+               *error = EPERM;
+               return NULL;
+       }
+}
+
 struct pf_ruleset *
 pf_find_or_create_ruleset(const char *path)
 {
index ea3b529f5cf45940f4ce9e674be4904a31d319f3..427cc65670670140ce2ec2c33a4629d11a53117b 100644 (file)
@@ -1517,7 +1517,7 @@ pfr_ina_define(struct pfr_table *tbl, user_addr_t addr, int size,
                return (EBUSY);
        tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
        SLIST_INIT(&tableq);
-       kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
+       kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)(void *)tbl);
        if (kt == NULL) {
                kt = pfr_create_ktable(tbl, 0, 1);
                if (kt == NULL)
@@ -2005,7 +2005,7 @@ pfr_lookup_table(struct pfr_table *tbl)
 
        /* struct pfr_ktable start like a struct pfr_table */
        return (RB_FIND(pfr_ktablehead, &pfr_ktables,
-           (struct pfr_ktable *)tbl));
+           (struct pfr_ktable *)(void *)tbl));
 }
 
 int
index e452e1d2e6cf2aef3bd3a3ca3873078f9166c69f..3f8089c8feb2f1a11f19e3de0d7be8b40d6ff110 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -412,7 +412,6 @@ struct sadb_sastat {
 #define SADB_X_EXT_NATT_KEEPALIVE      0x0004  /* Local node is behind NAT, send keepalives */
                                                                                        /* Should only be set for outbound SAs */
 #define SADB_X_EXT_NATT_MULTIPLEUSERS 0x0008   /* For use on VPN server - support multiple users       */
-#define SADB_X_EXT_NATT_DETECTED_PEER 0x0010
 
 #endif /* PRIVATE */   
 
@@ -426,6 +425,12 @@ struct sadb_sastat {
 #define SADB_X_EXT_PZERO       0x0200  /* zero padding for ESP */
 #define SADB_X_EXT_PMASK       0x0300  /* mask for padding flag */
 
+#ifdef PRIVATE
+#define SADB_X_EXT_NATT_DETECTED_PEER 0x1000
+#define SADB_X_EXT_ESP_KEEPALIVE      0x2000
+#define SADB_X_EXT_PUNT_RX_KEEPALIVE  0x4000
+#endif /* PRIVATE */   
+
 #if 1
 #define SADB_X_EXT_RAWCPI      0x0080  /* use well known CPI (IPComp) */
 #endif
index 4483dc867fe2837be17692be8ca43c8932afb2ae..a1f77f8a0990329e4da2f58e3a442aa87dd0c904 100644 (file)
@@ -74,6 +74,7 @@
 extern "C" {
 #endif
 
+#include <stdbool.h>
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/queue.h>
@@ -89,7 +90,6 @@ extern "C" {
 
 #include <machine/endian.h>
 #include <sys/systm.h>
-#include <net/pf_mtag.h>
 
 #if BYTE_ORDER == BIG_ENDIAN
 #define        htobe64(x)      (x)
@@ -146,17 +146,19 @@ union sockaddr_union {
 struct ip;
 struct ip6_hdr;
 struct tcphdr;
-#ifndef NO_APPLE_EXTENSIONS
 struct pf_grev1_hdr;
 struct pf_esp_hdr;
-#endif /* !NO_APPLE_EXTENSIONS */
 #endif /* KERNEL */
 
+#define PF_GRE_PPTP_VARIANT    0x01
+
 enum   { PF_INOUT, PF_IN, PF_OUT };
 enum   { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
-         PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP };
+         PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP,
+         PF_DUMMYNET, PF_NODUMMYNET };
 enum   { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
-         PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX };
+         PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_DUMMYNET, 
+         PF_RULESET_MAX };
 enum   { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT,
          PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG };
 enum   { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY };
@@ -173,11 +175,9 @@ enum       { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
          PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED,
          PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE,
          PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY,
-#ifndef NO_APPLE_EXTENSIONS
-         PFTM_GREv1_FIRST_PACKET, PFTM_GREv1_INITIATING, PFTM_GREv1_ESTABLISHED,
-         PFTM_ESP_FIRST_PACKET, PFTM_ESP_INITIATING, PFTM_ESP_ESTABLISHED,
-#endif /* !NO_APPLE_EXTENSIONS */
-         PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE,
+         PFTM_GREv1_FIRST_PACKET, PFTM_GREv1_INITIATING,
+         PFTM_GREv1_ESTABLISHED, PFTM_ESP_FIRST_PACKET, PFTM_ESP_INITIATING,
+         PFTM_ESP_ESTABLISHED, PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE,
          PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL,
          PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE,
          PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED,
@@ -195,14 +195,12 @@ enum      { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
 #define PFTM_UDP_MULTIPLE_VAL          60      /* Bidirectional */
 #define PFTM_ICMP_FIRST_PACKET_VAL     20      /* First ICMP packet */
 #define PFTM_ICMP_ERROR_REPLY_VAL      10      /* Got error response */
-#ifndef NO_APPLE_EXTENSIONS
 #define PFTM_GREv1_FIRST_PACKET_VAL    120
 #define PFTM_GREv1_INITIATING_VAL      30
 #define PFTM_GREv1_ESTABLISHED_VAL     1800
 #define PFTM_ESP_FIRST_PACKET_VAL      120
 #define PFTM_ESP_INITIATING_VAL                30
 #define PFTM_ESP_ESTABLISHED_VAL       900
-#endif /* !NO_APPLE_EXTENSIONS */
 #define PFTM_OTHER_FIRST_PACKET_VAL    60      /* First packet */
 #define PFTM_OTHER_SINGLE_VAL          30      /* Unidirectional */
 #define PFTM_OTHER_MULTIPLE_VAL                60      /* Bidirectional */
@@ -213,9 +211,7 @@ enum        { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
 
 enum   { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
 enum   { PF_LIMIT_STATES,
-#ifndef NO_APPLE_EXTENSIONS
          PF_LIMIT_APP_STATES,
-#endif /* !NO_APPLE_EXTENSIONS */
          PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
          PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
 #define PF_POOL_IDMASK         0x0f
@@ -286,7 +282,6 @@ struct pf_addr_wrap {
        u_int8_t                 iflags;        /* PFI_AFLAG_* */
 };
 
-#ifndef NO_APPLE_EXTENSIONS
 struct pf_port_range {
        u_int16_t                       port[2];
        u_int8_t                        op;
@@ -297,7 +292,6 @@ union pf_rule_xport {
        u_int16_t               call_id;
        u_int32_t               spi;
 };
-#endif /* !NO_APPLE_EXTENSIONS */
 
 #ifdef KERNEL
 struct pfi_dynaddr {
@@ -361,6 +355,13 @@ struct pfi_dynaddr {
        (a)->addr32[1] != (b)->addr32[1] || \
        (a)->addr32[0] != (b)->addr32[0])) \
 
+#define PF_ALEQ(a, b, c) \
+       ((c == AF_INET && (a)->addr32[0] <= (b)->addr32[0]) || \
+       ((a)->addr32[3] <= (b)->addr32[3] && \
+       (a)->addr32[2] <= (b)->addr32[2] && \
+       (a)->addr32[1] <= (b)->addr32[1] && \
+       (a)->addr32[0] <= (b)->addr32[0])) \
+
 #define PF_AZERO(a, c) \
        ((c == AF_INET && !(a)->addr32[0]) || \
        (!(a)->addr32[0] && !(a)->addr32[1] && \
@@ -396,6 +397,12 @@ struct pfi_dynaddr {
        (a)->addr32[1] != (b)->addr32[1] || \
        (a)->addr32[0] != (b)->addr32[0]) \
 
+#define PF_ALEQ(a, b, c) \
+       ((a)->addr32[3] <= (b)->addr32[3] && \
+       (a)->addr32[2] <= (b)->addr32[2] && \
+       (a)->addr32[1] <= (b)->addr32[1] && \
+       (a)->addr32[0] <= (b)->addr32[0]) \
+
 #define PF_AZERO(a, c) \
        (!(a)->addr32[0] && \
        !(a)->addr32[1] && \
@@ -425,6 +432,9 @@ struct pfi_dynaddr {
 #define PF_ANEQ(a, b, c) \
        ((a)->addr32[0] != (b)->addr32[0])
 
+#define PF_ALEQ(a, b, c) \
+       ((a)->addr32[0] <= (b)->addr32[0])
+
 #define PF_AZERO(a, c) \
        (!(a)->addr32[0])
 
@@ -487,14 +497,8 @@ struct pf_rule_gid {
 
 struct pf_rule_addr {
        struct pf_addr_wrap      addr;
-#ifndef NO_APPLE_EXTENSIONS
        union pf_rule_xport      xport;
        u_int8_t                 neg;
-#else /* NO_APPLE_EXTENSIONS */
-       u_int16_t                port[2];
-       u_int8_t                 neg;
-       u_int8_t                 port_op;
-#endif /* NO_APPLE_EXTENSIONS */
 };
 
 struct pf_pooladdr {
@@ -690,6 +694,11 @@ struct pf_rule {
        u_int64_t                packets[2];
        u_int64_t                bytes[2];
 
+       u_int32_t                ticket;
+#define PF_OWNER_NAME_SIZE      64
+       char                     owner[PF_OWNER_NAME_SIZE];
+       u_int32_t                priority;
+
 #ifdef KERNEL
        struct pfi_kif          *kif            __attribute__((aligned(8)));
 #else /* !KERNEL */
@@ -757,6 +766,38 @@ struct pf_rule {
        u_int8_t                 allow_opts;
        u_int8_t                 rt;
        u_int8_t                 return_ttl;
+
+/* service class categories */
+#define        SCIDX_MASK              0x0f
+#define        SC_BE                   0x10
+#define        SC_BK_SYS               0x11
+#define        SC_BK                   0x12
+#define        SC_RD                   0x13
+#define        SC_OAM                  0x14
+#define        SC_AV                   0x15
+#define        SC_RV                   0x16
+#define        SC_VI                   0x17
+#define        SC_VO                   0x18
+#define        SC_CTL                  0x19
+
+/* diffserve code points */
+#define        DSCP_MASK               0xfc
+#define        DSCP_CUMASK             0x03
+#define        DSCP_EF                 0xb8
+#define        DSCP_AF11               0x28
+#define        DSCP_AF12               0x30
+#define        DSCP_AF13               0x38
+#define        DSCP_AF21               0x48
+#define        DSCP_AF22               0x50
+#define        DSCP_AF23               0x58
+#define        DSCP_AF31               0x68
+#define        DSCP_AF32               0x70
+#define        DSCP_AF33               0x78
+#define        DSCP_AF41               0x88
+#define        DSCP_AF42               0x90
+#define        DSCP_AF43               0x98
+#define        AF_CLASSMASK            0xe0
+#define        AF_DROPPRECMASK         0x18
        u_int8_t                 tos;
        u_int8_t                 anchor_relative;
        u_int8_t                 anchor_wildcard;
@@ -765,13 +806,18 @@ struct pf_rule {
 #define PF_FLUSH_GLOBAL                0x02
        u_int8_t                 flush;
 
-#ifndef NO_APPLE_EXTENSIONS
        u_int8_t                proto_variant;
        u_int8_t                extfilter; /* Filter mode [PF_EXTFILTER_xxx] */
-       u_int8_t                extmap; /* Mapping mode [PF_EXTMAP_xxx] */
-#endif /* !NO_APPLE_EXTENSIONS */
+       u_int8_t                extmap;    /* Mapping mode [PF_EXTMAP_xxx] */
+       u_int32_t               dnpipe;
+       u_int32_t               dntype;
 };
 
+/* pf device identifiers */
+#define PFDEV_PF               0
+#define PFDEV_PFM              1
+#define PFDEV_MAX              2
+
 /* rule flags */
 #define        PFRULE_DROP             0x0000
 #define        PFRULE_RETURNRST        0x0001
@@ -789,28 +835,32 @@ struct pf_rule {
 #define PFRULE_RANDOMID                0x0800
 #define PFRULE_REASSEMBLE_TCP  0x1000
 
+/* rule flags for TOS/DSCP/service class differentiation */
+#define        PFRULE_TOS              0x2000
+#define        PFRULE_DSCP             0x4000
+#define        PFRULE_SC               0x8000
+
 /* rule flags again */
-#define PFRULE_IFBOUND         0x00010000      /* if-bound */
+#define        PFRULE_IFBOUND          0x00010000      /* if-bound */
+#define        PFRULE_PFM              0x00020000      /* created by pfm device */
 
-#define PFSTATE_HIWAT          10000   /* default state table size */
-#define PFSTATE_ADAPT_START    6000    /* default adaptive timeout start */
-#define PFSTATE_ADAPT_END      12000   /* default adaptive timeout end */
+#define        PFSTATE_HIWAT           10000   /* default state table size */
+#define        PFSTATE_ADAPT_START     6000    /* default adaptive timeout start */
+#define        PFSTATE_ADAPT_END       12000   /* default adaptive timeout end */
 
-#ifndef NO_APPLE_EXTENSIONS
-#define PFAPPSTATE_HIWAT       10000   /* default same as state table */
+#define        PFAPPSTATE_HIWAT        10000   /* default same as state table */
 
 enum pf_extmap {
        PF_EXTMAP_APD   = 1,    /* Address-port-dependent mapping */
-       PF_EXTMAP_AD,                   /* Address-dependent mapping */
-       PF_EXTMAP_EI                    /* Endpoint-independent mapping */
+       PF_EXTMAP_AD,           /* Address-dependent mapping */
+       PF_EXTMAP_EI            /* Endpoint-independent mapping */
 };
 
 enum pf_extfilter {
        PF_EXTFILTER_APD = 1,   /* Address-port-dependent filtering */
-       PF_EXTFILTER_AD,                /* Address-dependent filtering */
-       PF_EXTFILTER_EI                 /* Endpoint-independent filtering */
+       PF_EXTFILTER_AD,        /* Address-dependent filtering */
+       PF_EXTFILTER_EI         /* Endpoint-independent filtering */
 };
-#endif /* !NO_APPLE_EXTENSIONS */
 
 struct pf_threshold {
        u_int32_t       limit;
@@ -862,7 +912,6 @@ struct pf_state_scrub {
 };
 #endif /* KERNEL */
 
-#ifndef NO_APPLE_EXTENSIONS
 union pf_state_xport {
        u_int16_t       port;
        u_int16_t       call_id;
@@ -870,16 +919,9 @@ union pf_state_xport {
 };
 
 struct pf_state_host {
-       struct pf_addr                  addr;
+       struct pf_addr          addr;
        union pf_state_xport    xport;
 };
-#else /* NO_APPLE_EXTENSIONS */
-struct pf_state_host {
-       struct pf_addr  addr;
-       u_int16_t       port;
-       u_int16_t       pad;
-};
-#endif /* NO_APPLE_EXTENSIONS */
 
 #ifdef KERNEL
 struct pf_state_peer {
@@ -896,10 +938,7 @@ struct pf_state_peer {
 };
 
 TAILQ_HEAD(pf_state_queue, pf_state);
-#endif /* KERNEL */
 
-#ifndef NO_APPLE_EXTENSIONS
-#ifdef KERNEL
 struct pf_state;
 struct pf_pdesc;
 struct pf_app_state;
@@ -931,11 +970,7 @@ struct pf_app_state {
                struct pf_ike_state ike;
        } u;
 };
-#endif /* KERNEL */
-#define PF_GRE_PPTP_VARIANT    0x01
-#endif /* !NO_APPLE_EXTENSIONS */
 
-#ifdef KERNEL
 /* keep synced with struct pf_state, used in RB_FIND */
 struct pf_state_key_cmp {
        struct pf_state_host lan;
@@ -944,12 +979,8 @@ struct pf_state_key_cmp {
        sa_family_t      af;
        u_int8_t         proto;
        u_int8_t         direction;
-#ifndef NO_APPLE_EXTENSIONS
        u_int8_t         proto_variant;
        struct pf_app_state     *app_state;
-#else /* NO_APPLE_EXTENSIONS */
-       u_int8_t         pad;
-#endif /* NO_APPLE_EXTENSIONS */
 };
 
 TAILQ_HEAD(pf_statelist, pf_state);
@@ -961,17 +992,14 @@ struct pf_state_key {
        sa_family_t      af;
        u_int8_t         proto;
        u_int8_t         direction;
-#ifndef NO_APPLE_EXTENSIONS
        u_int8_t         proto_variant;
        struct pf_app_state     *app_state;
-#else /* NO_APPLE_EXTENSIONS */
-       u_int8_t         pad;
-#endif /* NO_APPLE_EXTENSIONS */
+       u_int32_t        flowhash;
 
        RB_ENTRY(pf_state_key)   entry_lan_ext;
        RB_ENTRY(pf_state_key)   entry_ext_gwy;
        struct pf_statelist      states;
-       u_short          refcnt;        /* same size as if_index */
+       u_int32_t        refcnt;
 };
 
 
@@ -981,6 +1009,14 @@ struct pf_state_cmp {
        u_int32_t        creatorid;
        u_int32_t        pad;
 };
+
+/* flowhash key (12-bytes multiple for performance) */
+struct pf_flowhash_key {
+       struct pf_state_host    ap1;    /* address+port blob 1 */
+       struct pf_state_host    ap2;    /* address+port blob 2 */
+       u_int32_t               af;
+       u_int32_t               proto;
+};
 #endif /* KERNEL */
 
 struct hook_desc;
@@ -1001,9 +1037,7 @@ struct pf_state {
        union pf_rule_ptr        anchor;
        union pf_rule_ptr        nat_rule;
        struct pf_addr           rt_addr;
-#ifndef NO_APPLE_EXTENSIONS
-       struct hook_desc_head unlink_hooks;
-#endif /* !NO_APPLE_EXTENSIONS */
+       struct hook_desc_head    unlink_hooks;
        struct pf_state_key     *state_key;
        struct pfi_kif          *kif;
        struct pfi_kif          *rt_kif;
@@ -1041,14 +1075,9 @@ struct pfsync_state_scrub {
 } __packed;
 
 struct pfsync_state_host {
-       struct pf_addr  addr;
-#ifndef NO_APPLE_EXTENSIONS
+       struct pf_addr          addr;
        union pf_state_xport    xport;
-       u_int16_t       pad[2];
-#else /* NO_APPLE_EXTENSIONS */
-       u_int16_t       port;
-       u_int16_t       pad[3];
-#endif /* NO_APPLE_EXTENSIONS */
+       u_int16_t               pad[2];
 } __packed;
 
 struct pfsync_state_peer {
@@ -1072,12 +1101,10 @@ struct pfsync_state {
        struct pfsync_state_peer src;
        struct pfsync_state_peer dst;
        struct pf_addr   rt_addr;
-#ifndef NO_APPLE_EXTENSIONS
        struct hook_desc_head unlink_hooks;
 #if !defined(__LP64__)
        u_int32_t       _pad[2];
 #endif /* !__LP64__ */
-#endif /* !NO_APPLE_EXTENSIONS */
        u_int32_t        rule;
        u_int32_t        anchor;
        u_int32_t        nat_rule;
@@ -1086,9 +1113,7 @@ struct pfsync_state {
        u_int32_t        packets[2][2];
        u_int32_t        bytes[2][2];
        u_int32_t        creatorid;
-#ifndef NO_APPLE_EXTENSIONS
-    u_int16_t    tag;
-#endif /* !NO_APPLE_EXTENSIONS */
+       u_int16_t        tag;
        sa_family_t      af;
        u_int8_t         proto;
        u_int8_t         direction;
@@ -1097,9 +1122,9 @@ struct pfsync_state {
        u_int8_t         timeout;
        u_int8_t         sync_flags;
        u_int8_t         updates;
-#ifndef NO_APPLE_EXTENSIONS
        u_int8_t         proto_variant;
-#endif /* !NO_APPLE_EXTENSIONS */
+       u_int8_t         __pad;
+       u_int32_t        flowhash;
 } __packed;
 
 #define PFSYNC_FLAG_COMPRESS   0x01
@@ -1187,6 +1212,7 @@ struct pf_anchor {
        struct pf_ruleset        ruleset;
        int                      refcnt;        /* anchor rules */
        int                      match;
+       char                     owner[PF_OWNER_NAME_SIZE];
 };
 #ifdef KERNEL
 RB_PROTOTYPE_SC(__private_extern__, pf_anchor_global, pf_anchor, entry_global,
@@ -1382,10 +1408,8 @@ struct pf_pdesc {
 #if INET6
                struct icmp6_hdr        *icmp6;
 #endif /* INET6 */
-#ifndef NO_APPLE_EXTENSIONS
-               struct pf_grev1_hdr *grev1;
-               struct pf_esp_hdr *esp;
-#endif /* !NO_APPLE_EXTENSIONS */
+               struct pf_grev1_hdr     *grev1;
+               struct pf_esp_hdr       *esp;
                void                    *any;
        } hdr;
        struct pf_addr   baddr;         /* address before translation */
@@ -1395,10 +1419,8 @@ struct pf_pdesc {
        struct pf_addr  *dst;
        struct ether_header
                        *eh;
-#ifndef NO_APPLE_EXTENSIONS
        struct mbuf     *mp;
-       int                             lmw;    /* lazy writable offset */
-#endif /* !NO_APPLE_EXTENSIONS */
+       int             lmw;            /* lazy writable offset */
        struct pf_mtag  *pf_mtag;
        u_int16_t       *ip_sum;
        u_int32_t        p_len;         /* total length of payload */
@@ -1406,12 +1428,14 @@ struct pf_pdesc {
                                        /* state code. Easier than tags */
 #define PFDESC_TCP_NORM        0x0001          /* TCP shall be statefully scrubbed */
 #define PFDESC_IP_REAS 0x0002          /* IP frags would've been reassembled */
+#define        PFDESC_FLOW_ADV 0x0004          /* sender can use flow advisory */
+#define PFDESC_IP_FRAG 0x0008          /* This is a fragment */
        sa_family_t      af;
        u_int8_t         proto;
        u_int8_t         tos;
-#ifndef NO_APPLE_EXTENSIONS
        u_int8_t         proto_variant;
-#endif /* !NO_APPLE_EXTENSIONS */
+       mbuf_svc_class_t sc;
+       u_int32_t        flowhash;      /* flow hash to identify the sender */
 };
 #endif /* KERNEL */
 
@@ -1435,7 +1459,8 @@ struct pf_pdesc {
 #define PFRES_MAXSTATES        12              /* State limit */
 #define PFRES_SRCLIMIT 13              /* Source node/conn limit */
 #define PFRES_SYNPROXY 14              /* SYN proxy */
-#define PFRES_MAX      15              /* total+1 */
+#define PFRES_DUMMYNET 15              /* Dummynet */
+#define PFRES_MAX      16              /* total+1 */
 
 #define PFRES_NAMES { \
        "match", \
@@ -1453,6 +1478,7 @@ struct pf_pdesc {
        "state-limit", \
        "src-limit", \
        "synproxy", \
+       "dummynet", \
        NULL \
 }
 
@@ -1491,7 +1517,6 @@ struct pf_pdesc {
        NULL \
 }
 
-#ifndef NO_APPLE_EXTENSIONS
 /* GREv1 protocol state enumeration */
 #define PFGRE1S_NO_TRAFFIC             0
 #define PFGRE1S_INITIATING             1
@@ -1513,7 +1538,6 @@ struct pf_pdesc {
 #define PFESPS_NSTATES         3       /* number of state levels */
 
 #define PFESPS_NAMES { "NO_TRAFFIC", "INITIATING", "ESTABLISHED", NULL }
-#endif /* !NO_APPLE_EXTENSIONS */
 
 /* Other protocol state enumeration */
 #define PFOTHERS_NO_TRAFFIC    0
@@ -1574,66 +1598,111 @@ struct pf_status {
 };
 
 struct cbq_opts {
-       u_int           minburst;
-       u_int           maxburst;
-       u_int           pktsize;
-       u_int           maxpktsize;
-       u_int           ns_per_byte;
-       u_int           maxidle;
-       int             minidle;
-       u_int           offtime;
-       int             flags;
+       u_int32_t       minburst;
+       u_int32_t       maxburst;
+       u_int32_t       pktsize;
+       u_int32_t       maxpktsize;
+       u_int32_t       ns_per_byte;
+       u_int32_t       maxidle;
+       int32_t         minidle;
+       u_int32_t       offtime;
+       u_int32_t       flags;
 };
 
 struct priq_opts {
-       int             flags;
+       u_int32_t       flags;
+};
+
+struct qfq_opts {
+       u_int32_t       flags;
+       u_int32_t       lmax;
 };
 
 struct hfsc_opts {
        /* real-time service curve */
-       u_int           rtsc_m1;        /* slope of the 1st segment in bps */
-       u_int           rtsc_d;         /* the x-projection of m1 in msec */
-       u_int           rtsc_m2;        /* slope of the 2nd segment in bps */
+       u_int64_t       rtsc_m1;        /* slope of the 1st segment in bps */
+       u_int64_t       rtsc_d;         /* the x-projection of m1 in msec */
+       u_int64_t       rtsc_m2;        /* slope of the 2nd segment in bps */
+       u_int32_t       rtsc_fl;        /* service curve flags */
+#if !defined(__LP64__)
+       u_int32_t       _pad;
+#endif /* !__LP64__ */
        /* link-sharing service curve */
-       u_int           lssc_m1;
-       u_int           lssc_d;
-       u_int           lssc_m2;
+       u_int64_t       lssc_m1;
+       u_int64_t       lssc_d;
+       u_int64_t       lssc_m2;
+       u_int32_t       lssc_fl;
+#if !defined(__LP64__)
+       u_int32_t       __pad;
+#endif /* !__LP64__ */
        /* upper-limit service curve */
-       u_int           ulsc_m1;
-       u_int           ulsc_d;
-       u_int           ulsc_m2;
-       int             flags;
+       u_int64_t       ulsc_m1;
+       u_int64_t       ulsc_d;
+       u_int64_t       ulsc_m2;
+       u_int32_t       ulsc_fl;
+       u_int32_t       flags;          /* scheduler flags */
 };
 
+struct fairq_opts {
+       u_int32_t       nbuckets;       /* hash buckets */
+       u_int32_t       flags;
+       u_int64_t       hogs_m1;        /* hog detection bandwidth */
+
+       /* link-sharing service curve */
+       u_int64_t       lssc_m1;
+       u_int64_t       lssc_d;
+       u_int64_t       lssc_m2;
+};
+
+/* bandwidth types */
+#define PF_ALTQ_BW_ABSOLUTE    1       /* bw in absolute value (bps) */
+#define PF_ALTQ_BW_PERCENT     2       /* bandwidth in percentage */
+
+/* ALTQ rule flags */
+#define        PF_ALTQF_TBR            0x1     /* enable Token Bucket Regulator */
+
+/* queue rule flags */
+#define        PF_ALTQ_QRF_WEIGHT      0x1     /* weight instead of priority */
+
 struct pf_altq {
        char                     ifname[IFNAMSIZ];
 
-       void                    *altq_disc;     /* discipline-specific state */
+       /* discipline-specific state */
+       void                    *altq_disc __attribute__((aligned(8)));
+       TAILQ_ENTRY(pf_altq)     entries __attribute__((aligned(8)));
 #if !defined(__LP64__)
-       u_int32_t               _pad;
-#endif /* !__LP64__ */
-       TAILQ_ENTRY(pf_altq)     entries;
-#if !defined(__LP64__)
-       u_int32_t               __pad[2];
+       u_int32_t               _pad[2];
 #endif /* !__LP64__ */
 
+       u_int32_t                aflags;        /* ALTQ rule flags */
+       u_int32_t                bwtype;        /* bandwidth type */
+
        /* scheduler spec */
-       u_int8_t                 scheduler;     /* scheduler type */
-       u_int16_t                tbrsize;       /* tokenbucket regulator size */
-       u_int32_t                ifbandwidth;   /* interface bandwidth */
+       u_int32_t                scheduler;     /* scheduler type */
+       u_int32_t                tbrsize;       /* tokenbucket regulator size */
+       u_int64_t                ifbandwidth;   /* interface bandwidth */
 
        /* queue spec */
        char                     qname[PF_QNAME_SIZE];  /* queue name */
        char                     parent[PF_QNAME_SIZE]; /* parent name */
        u_int32_t                parent_qid;    /* parent queue id */
-       u_int32_t                bandwidth;     /* queue bandwidth */
-       u_int8_t                 priority;      /* priority */
-       u_int16_t                qlimit;        /* queue size limit */
-       u_int16_t                flags;         /* misc flags */
+       u_int32_t                qrflags;       /* queue rule flags */
+       union {
+               u_int32_t        priority;      /* priority */
+               u_int32_t        weight;        /* weight */
+       };
+       u_int32_t                qlimit;        /* queue size limit */
+       u_int32_t                flags;         /* misc flags */
+#if !defined(__LP64__)
+       u_int32_t               __pad;
+#endif /* !__LP64__ */
+       u_int64_t                bandwidth;     /* queue bandwidth */
        union {
                struct cbq_opts          cbq_opts;
                struct priq_opts         priq_opts;
                struct hfsc_opts         hfsc_opts;
+               struct fairq_opts        fairq_opts;
+               struct qfq_opts          qfq_opts;
        } pq_u;
 
        u_int32_t                qid;           /* return value */
@@ -1686,7 +1755,6 @@ struct pfioc_natlook {
        struct pf_addr   daddr;
        struct pf_addr   rsaddr;
        struct pf_addr   rdaddr;
-#ifndef NO_APPLE_EXTENSIONS
        union pf_state_xport    sxport;
        union pf_state_xport    dxport;
        union pf_state_xport    rsxport;
@@ -1695,15 +1763,6 @@ struct pfioc_natlook {
        u_int8_t         proto;
        u_int8_t         proto_variant;
        u_int8_t         direction;
-#else /* NO_APPLE_EXTENSIONS */
-       u_int16_t        sport;
-       u_int16_t        dport;
-       u_int16_t        rsport;
-       u_int16_t        rdport;
-       sa_family_t      af;
-       u_int8_t         proto;
-       u_int8_t         direction;
-#endif /* NO_APPLE_EXTENSIONS */
 };
 
 struct pfioc_state {
@@ -1717,29 +1776,21 @@ struct pfioc_src_node_kill {
        struct pf_rule_addr psnk_dst;
 };
 
-#ifndef NO_APPLE_EXTENSIONS
 struct pfioc_state_addr_kill {
        struct pf_addr_wrap             addr;
        u_int8_t                        reserved_[3];
        u_int8_t                        neg;
        union pf_rule_xport             xport;
 };
-#endif /* !NO_APPLE_EXTENSIONS */
 
 struct pfioc_state_kill {
        /* XXX returns the number of states killed in psk_af */
        sa_family_t             psk_af;
-#ifndef NO_APPLE_EXTENSIONS
        u_int8_t                psk_proto;
        u_int8_t                psk_proto_variant;
        u_int8_t                _pad;
        struct pfioc_state_addr_kill    psk_src;
        struct pfioc_state_addr_kill    psk_dst;
-#else /* NO_APPLE_EXTENSIONS */
-       int                     psk_proto;
-       struct pf_rule_addr     psk_src;
-       struct pf_rule_addr     psk_dst;
-#endif /* NO_APPLE_EXTENSIONS */
        char                    psk_ifname[IFNAMSIZ];
 };
 
@@ -1998,6 +2049,11 @@ struct pfioc_iface_64 {
 };
 #endif /* KERNEL */
 
+struct pf_ifspeed {
+       char                    ifname[IFNAMSIZ];
+       u_int64_t               baudrate;
+};
+
 /*
  * ioctl operations
  */
@@ -2020,7 +2076,8 @@ struct pfioc_iface_64 {
 #define DIOCSETDEBUG   _IOWR('D', 24, u_int32_t)
 #define DIOCGETSTATES  _IOWR('D', 25, struct pfioc_states)
 #define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule)
-/* XXX cut 26 - 28 */
+#define DIOCINSERTRULE _IOWR('D',  27, struct pfioc_rule)
+#define DIOCDELETERULE _IOWR('D',  28, struct pfioc_rule)
 #define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm)
 #define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm)
 #define DIOCADDSTATE   _IOWR('D', 37, struct pfioc_state)
@@ -2071,7 +2128,8 @@ struct pfioc_iface_64 {
 #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface)
 #define DIOCSETIFFLAG  _IOWR('D', 89, struct pfioc_iface)
 #define DIOCCLRIFFLAG  _IOWR('D', 90, struct pfioc_iface)
-#define DIOCKILLSRCNODES       _IOWR('D', 91, struct pfioc_src_node_kill)
+#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
+#define        DIOCGIFSPEED    _IOWR('D', 92, struct pf_ifspeed)
 
 #ifdef KERNEL
 RB_HEAD(pf_src_tree, pf_src_node);
@@ -2089,7 +2147,7 @@ TAILQ_HEAD(pf_poolqueue, pf_pool);
 __private_extern__ struct pf_poolqueue pf_pools[2];
 __private_extern__ struct pf_palist    pf_pabuf;
 __private_extern__ u_int32_t           ticket_pabuf;
-#if ALTQ
+#if PF_ALTQ
 TAILQ_HEAD(pf_altqqueue, pf_altq);
 __private_extern__ struct pf_altqqueue pf_altqs[2];
 __private_extern__ u_int32_t           ticket_altqs_active;
@@ -2097,7 +2155,7 @@ __private_extern__ u_int32_t              ticket_altqs_inactive;
 __private_extern__ int                 altqs_inactive_open;
 __private_extern__ struct pf_altqqueue *pf_altqs_active;
 __private_extern__ struct pf_altqqueue *pf_altqs_inactive;
-#endif /* ALTQ */
+#endif /* PF_ALTQ */
 __private_extern__ struct pf_poolqueue *pf_pools_active;
 __private_extern__ struct pf_poolqueue *pf_pools_inactive;
 
@@ -2106,16 +2164,15 @@ __private_extern__ int pf_tbladdr_setup(struct pf_ruleset *,
 __private_extern__ void pf_tbladdr_remove(struct pf_addr_wrap *);
 __private_extern__ void pf_tbladdr_copyout(struct pf_addr_wrap *);
 __private_extern__ void pf_calc_skip_steps(struct pf_rulequeue *);
+__private_extern__ u_int32_t pf_calc_state_key_flowhash(struct pf_state_key *);
 
 __private_extern__ struct pool pf_src_tree_pl, pf_rule_pl;
 __private_extern__ struct pool pf_state_pl, pf_state_key_pl, pf_pooladdr_pl;
 __private_extern__ struct pool pf_state_scrub_pl;
-#if ALTQ
+#if PF_ALTQ
 __private_extern__ struct pool pf_altq_pl;
-#endif /* ALTQ */
-#ifndef NO_APPLE_EXTENSIONS
+#endif /* PF_ALTQ */
 __private_extern__ struct pool pf_app_state_pl;
-#endif /* !NO_APPLE_EXTENSIONS */
 
 __private_extern__ struct thread *pf_purge_thread;
 
@@ -2143,23 +2200,22 @@ __private_extern__ void pf_addrcpy(struct pf_addr *, struct pf_addr *,
     u_int8_t);
 __private_extern__ void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *);
 
+struct ip_fw_args;
 #if INET
 __private_extern__ int pf_test(int, struct ifnet *, struct mbuf **,
-    struct ether_header *);
+    struct ether_header *, struct ip_fw_args *);
 #endif /* INET */
 
 #if INET6
 __private_extern__ int pf_test6(int, struct ifnet *, struct mbuf **,
-    struct ether_header *);
+    struct ether_header *, struct ip_fw_args *);
 __private_extern__ void pf_poolmask(struct pf_addr *, struct pf_addr *,
     struct pf_addr *, struct pf_addr *, u_int8_t);
 __private_extern__ void pf_addr_inc(struct pf_addr *, sa_family_t);
 #endif /* INET6 */
 
-#ifndef NO_APPLE_EXTENSIONS
 __private_extern__ struct mbuf *pf_lazy_makewritable(struct pf_pdesc *,
     struct mbuf *, int);
-#endif /* !NO_APPLE_EXTENSIONS */
 __private_extern__ void *pf_pull_hdr(struct mbuf *, int, void *, int,
     u_short *, u_short *, sa_family_t);
 __private_extern__ void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
@@ -2172,10 +2228,8 @@ __private_extern__ int pf_match_addr_range(struct pf_addr *, struct pf_addr *,
     struct pf_addr *, sa_family_t);
 __private_extern__ int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t);
 __private_extern__ int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
-#ifndef NO_APPLE_EXTENSIONS
 __private_extern__ int pf_match_xport(u_int8_t, u_int8_t, union pf_rule_xport *,
     union pf_state_xport *);
-#endif /* !NO_APPLE_EXTENSIONS */
 __private_extern__ int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t);
 __private_extern__ int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t);
 
@@ -2201,7 +2255,8 @@ __private_extern__ int pf_routable(struct pf_addr *addr, sa_family_t af,
 __private_extern__ int pf_rtlabel_match(struct pf_addr *, sa_family_t,
     struct pf_addr_wrap *);
 __private_extern__ int pf_socket_lookup(int, struct pf_pdesc *);
-__private_extern__ struct pf_state_key *pf_alloc_state_key(struct pf_state *);
+__private_extern__ struct pf_state_key *pf_alloc_state_key(struct pf_state *,
+    struct pf_state_key *);
 __private_extern__ void pfr_initialize(void);
 __private_extern__ int pfr_match_addr(struct pfr_ktable *, struct pf_addr *,
     sa_family_t);
@@ -2275,7 +2330,7 @@ __private_extern__ void pf_tag2tagname(u_int16_t, char *);
 __private_extern__ void pf_tag_ref(u_int16_t);
 __private_extern__ void pf_tag_unref(u_int16_t);
 __private_extern__ int pf_tag_packet(struct mbuf *, struct pf_mtag *,
-    int, unsigned int);
+    int, unsigned int, struct pf_pdesc *);
 __private_extern__ void pf_step_into_anchor(int *, struct pf_ruleset **, int,
     struct pf_rule **, struct pf_rule **,  int *);
 __private_extern__ int pf_step_out_of_anchor(int *, struct pf_ruleset **, int,
@@ -2294,7 +2349,7 @@ struct pf_pool_limit {
 __private_extern__ struct pf_pool_limit        pf_pool_limits[PF_LIMIT_MAX];
 
 __private_extern__ int pf_af_hook(struct ifnet *, struct mbuf **,
-    struct mbuf **, unsigned int, int);
+    struct mbuf **, unsigned int, int, struct ip_fw_args *);
 __private_extern__ int pf_ifaddr_hook(struct ifnet *, unsigned long);
 __private_extern__ void pf_ifnet_hook(struct ifnet *, int);
 
@@ -2308,6 +2363,11 @@ __private_extern__ struct pf_anchor pf_main_anchor;
 
 __private_extern__ int pf_is_enabled;
 #define PF_IS_ENABLED (pf_is_enabled != 0)
+__private_extern__ u_int32_t pf_hash_seed;
+
+#if PF_ALTQ
+__private_extern__ u_int32_t altq_allowed;
+#endif /* PF_ALTQ */
 
 /* these ruleset functions can be linked into userland programs (pfctl) */
 __private_extern__ int pf_get_ruleset_number(u_int8_t);
@@ -2320,6 +2380,8 @@ __private_extern__ void pf_anchor_remove(struct pf_rule *);
 __private_extern__ void pf_remove_if_empty_ruleset(struct pf_ruleset *);
 __private_extern__ struct pf_anchor *pf_find_anchor(const char *);
 __private_extern__ struct pf_ruleset *pf_find_ruleset(const char *);
+__private_extern__ struct pf_ruleset *pf_find_ruleset_with_owner(const char *,
+    const char *, int, int *);
 __private_extern__ struct pf_ruleset *pf_find_or_create_ruleset(const char *);
 __private_extern__ void pf_rs_initialize(void);
 
@@ -2333,6 +2395,8 @@ __private_extern__ int pf_osfp_get(struct pf_osfp_ioctl *);
 __private_extern__ void pf_osfp_initialize(void);
 __private_extern__ int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
 __private_extern__ struct pf_os_fingerprint *pf_osfp_validate(void);
+__private_extern__ struct pf_mtag *pf_find_mtag(struct mbuf *);
+__private_extern__ struct pf_mtag *pf_get_mtag(struct mbuf *);
 #else /* !KERNEL */
 extern struct pf_anchor_global pf_anchors;
 extern struct pf_anchor pf_main_anchor;
@@ -2349,6 +2413,8 @@ extern void pf_anchor_remove(struct pf_rule *);
 extern void pf_remove_if_empty_ruleset(struct pf_ruleset *);
 extern struct pf_anchor *pf_find_anchor(const char *);
 extern struct pf_ruleset *pf_find_ruleset(const char *);
+extern struct pf_ruleset *pf_find_ruleset_with_owner(const char *,
+    const char *, int, int *);
 extern struct pf_ruleset *pf_find_or_create_ruleset(const char *);
 extern void pf_rs_initialize(void);
 #endif /* !KERNEL */
diff --git a/bsd/net/pktsched/Makefile b/bsd/net/pktsched/Makefile
new file mode 100644 (file)
index 0000000..ad82443
--- /dev/null
@@ -0,0 +1,44 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+INSTINC_SUBDIRS = \
+
+INSTINC_SUBDIRS_PPC = \
+
+INSTINC_SUBDIRS_I386 = \
+
+EXPINC_SUBDIRS = \
+
+EXPINC_SUBDIRS_PPC = \
+
+EXPINC_SUBDIRS_I386 = \
+
+DATAFILES= \
+
+KERNELFILES= \
+
+PRIVATE_DATAFILES = \
+       pktsched.h pktsched_cbq.h pktsched_fairq.h pktsched_hfsc.h \
+       pktsched_priq.h pktsched_tcq.h pktsched_rmclass.h pktsched_qfq.h
+
+PRIVATE_KERNELFILES = ${KERNELFILES}
+
+INSTALL_MI_LIST        = ${DATAFILES}
+
+INSTALL_MI_DIR = net/pktsched
+
+EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES}
+
+EXPORT_MI_DIR = ${INSTALL_MI_DIR}
+
+INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+
+INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/bsd/net/pktsched/pktsched.c b/bsd/net/pktsched/pktsched.c
new file mode 100644 (file)
index 0000000..eda1ae4
--- /dev/null
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/mcache.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/net_osdep.h>
+#include <net/pktsched/pktsched.h>
+#include <net/pktsched/pktsched_tcq.h>
+#include <net/pktsched/pktsched_qfq.h>
+#if PKTSCHED_PRIQ
+#include <net/pktsched/pktsched_priq.h>
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_FAIRQ
+#include <net/pktsched/pktsched_fairq.h>
+#endif /* PKTSCHED_FAIRQ */
+#if PKTSCHED_CBQ
+#include <net/pktsched/pktsched_cbq.h>
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_HFSC
+#include <net/pktsched/pktsched_hfsc.h>
+#endif /* PKTSCHED_HFSC */
+
+#include <pexpert/pexpert.h>
+
+u_int32_t machclk_freq = 0;
+u_int64_t machclk_per_sec = 0;
+u_int32_t pktsched_verbose;    /* more noise if greater than 1 */
+
+static void init_machclk(void);
+
+SYSCTL_NODE(_net, OID_AUTO, pktsched, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "pktsched");
+
+SYSCTL_UINT(_net_pktsched, OID_AUTO, verbose, CTLFLAG_RW|CTLFLAG_LOCKED,
+       &pktsched_verbose, 0, "Packet scheduler verbosity level");
+
+void
+pktsched_init(void)
+{
+       init_machclk();
+       if (machclk_freq == 0) {
+               panic("%s: no CPU clock available!\n", __func__);
+               /* NOTREACHED */
+       }
+
+       tcq_init();
+       qfq_init();
+#if PKTSCHED_PRIQ
+       priq_init();
+#endif /* PKTSCHED_PRIQ */
+#if PKTSCHED_FAIRQ
+       fairq_init();
+#endif /* PKTSCHED_FAIRQ */
+#if PKTSCHED_CBQ
+       cbq_init();
+#endif /* PKTSCHED_CBQ */
+#if PKTSCHED_HFSC
+       hfsc_init();
+#endif /* PKTSCHED_HFSC */
+}
+
+static void
+init_machclk(void)
+{
+       /*
+        * Initialize machclk_freq using the timerbase frequency
+        * value from device specific info.
+        */
+       machclk_freq = gPEClockFrequencyInfo.timebase_frequency_hz;
+
+       clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC,
+           &machclk_per_sec);
+}
+
+u_int64_t
+pktsched_abs_to_nsecs(u_int64_t abstime)
+{
+       u_int64_t nsecs;
+
+       absolutetime_to_nanoseconds(abstime, &nsecs);
+       return (nsecs);
+}
+
+u_int64_t
+pktsched_nsecs_to_abstime(u_int64_t nsecs)
+{
+       u_int64_t abstime;
+
+       nanoseconds_to_absolutetime(nsecs, &abstime);
+       return (abstime);
+}
+
+int
+pktsched_setup(struct ifclassq *ifq, u_int32_t scheduler, u_int32_t sflags)
+{
+       int error = 0;
+       u_int32_t qflags = sflags;
+       u_int32_t rflags;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(machclk_freq != 0);
+
+       /* Nothing to do unless the scheduler type changes */
+       if (ifq->ifcq_type == scheduler)
+               return (0);
+
+       qflags &= (PKTSCHEDF_QALG_RED | PKTSCHEDF_QALG_RIO |
+           PKTSCHEDF_QALG_BLUE | PKTSCHEDF_QALG_SFB);
+
+       /* These are mutually exclusive */
+       if (qflags != 0 &&
+           qflags != PKTSCHEDF_QALG_RED && qflags != PKTSCHEDF_QALG_RIO &&
+           qflags != PKTSCHEDF_QALG_BLUE && qflags != PKTSCHEDF_QALG_SFB) {
+               panic("%s: RED|RIO|BLUE|SFB mutually exclusive\n", __func__);
+               /* NOTREACHED */
+       }
+
+       /*
+        * Remember the flags that need to be restored upon success, as
+        * they may be cleared when we tear down existing scheduler.
+        */
+       rflags = (ifq->ifcq_flags & IFCQF_ENABLED);
+
+       if (ifq->ifcq_type != PKTSCHEDT_NONE) {
+               (void) pktsched_teardown(ifq);
+
+               /* Teardown should have succeeded */
+               VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+               VERIFY(ifq->ifcq_disc == NULL);
+               VERIFY(ifq->ifcq_enqueue == NULL);
+               VERIFY(ifq->ifcq_dequeue == NULL);
+               VERIFY(ifq->ifcq_dequeue_sc == NULL);
+               VERIFY(ifq->ifcq_request == NULL);
+       }
+
+       switch (scheduler) {
+#if PKTSCHED_PRIQ
+       case PKTSCHEDT_PRIQ:
+               error = priq_setup_ifclassq(ifq, sflags);
+               break;
+#endif /* PKTSCHED_PRIQ */
+
+       case PKTSCHEDT_TCQ:
+               error = tcq_setup_ifclassq(ifq, sflags);
+               break;
+
+       case PKTSCHEDT_QFQ:
+               error = qfq_setup_ifclassq(ifq, sflags);
+               break;
+
+       default:
+               error = ENXIO;
+               break;
+       }
+
+       if (error == 0)
+               ifq->ifcq_flags |= rflags;
+
+       return (error);
+}
+
+int
+pktsched_teardown(struct ifclassq *ifq)
+{
+       int error = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if_qflush(ifq->ifcq_ifp, 1);
+       VERIFY(IFCQ_IS_EMPTY(ifq));
+
+       ifq->ifcq_flags &= ~IFCQF_ENABLED;
+
+       switch (ifq->ifcq_type) {
+       case PKTSCHEDT_NONE:
+               break;
+
+#if PKTSCHED_PRIQ
+       case PKTSCHEDT_PRIQ:
+               error = priq_teardown_ifclassq(ifq);
+               break;
+#endif /* PKTSCHED_PRIQ */
+
+       case PKTSCHEDT_TCQ:
+               error = tcq_teardown_ifclassq(ifq);
+               break;
+
+       case PKTSCHEDT_QFQ:
+               error = qfq_teardown_ifclassq(ifq);
+               break;
+
+       default:
+               error = ENXIO;
+               break;
+       }
+
+       return (error);
+}
+
+int
+pktsched_getqstats(struct ifclassq *ifq, u_int32_t qid,
+    struct if_ifclassq_stats *ifqs)
+{
+       int error;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       switch (ifq->ifcq_type) {
+#if PKTSCHED_PRIQ
+       case PKTSCHEDT_PRIQ:
+               error = priq_getqstats_ifclassq(ifq, qid, ifqs);
+               break;
+#endif /* PKTSCHED_PRIQ */
+
+       case PKTSCHEDT_TCQ:
+               error = tcq_getqstats_ifclassq(ifq, qid, ifqs);
+               break;
+
+       case PKTSCHEDT_QFQ:
+               error = qfq_getqstats_ifclassq(ifq, qid, ifqs);
+               break;
+
+       default:
+               error = ENXIO;
+               break;
+       }
+
+       return (error);
+}
diff --git a/bsd/net/pktsched/pktsched.h b/bsd/net/pktsched/pktsched.h
new file mode 100644 (file)
index 0000000..aa3361b
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _PKTSCHED_PKTSCHED_H_
+#define        _PKTSCHED_PKTSCHED_H_
+
+#ifdef PRIVATE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* packet scheduler type */
+#define        PKTSCHEDT_NONE          0       /* reserved */
+#define        PKTSCHEDT_CBQ           1       /* cbq */
+#define        PKTSCHEDT_HFSC          2       /* hfsc */
+#define        PKTSCHEDT_PRIQ          3       /* priority queue */
+#define        PKTSCHEDT_FAIRQ         4       /* fairq */
+#define        PKTSCHEDT_TCQ           5       /* traffic class queue */
+#define        PKTSCHEDT_QFQ           6       /* quick fair queueing */
+#define        PKTSCHEDT_MAX           7       /* should be max sched type + 1 */
+
+#ifdef BSD_KERNEL_PRIVATE
+#include <mach/mach_time.h>
+#include <sys/sysctl.h>
+#include <libkern/libkern.h>
+
+/* flags for pktsched_setup */
+#define        PKTSCHEDF_QALG_RED      0x1     /* use RED */
+#define        PKTSCHEDF_QALG_RIO      0x2     /* use RIO */
+#define        PKTSCHEDF_QALG_BLUE     0x4     /* use BLUE */
+#define        PKTSCHEDF_QALG_SFB      0x8     /* use SFB */
+#define        PKTSCHEDF_QALG_ECN      0x10    /* enable ECN */
+#define        PKTSCHEDF_QALG_FLOWCTL  0x20    /* enable flow control advisories */
+
+/* macro for timeout/untimeout */
+/* use old-style timeout/untimeout */
+/* dummy callout structure */
+struct callout {
+       void            *c_arg;                 /* function argument */
+       void            (*c_func)(void *);      /* function to call */
+};
+
+#define        CALLOUT_INIT(c) do {                                    \
+       (void) memset((c), 0, sizeof (*(c)));                   \
+} while (/*CONSTCOND*/ 0)
+
+#define        CALLOUT_RESET(c, t, f, a) do {                          \
+       (c)->c_arg = (a);                                       \
+       (c)->c_func = (f);                                      \
+       timeout((f), (a), (t));                                 \
+} while (/*CONSTCOND*/ 0)
+
+#define        CALLOUT_STOP(c)         untimeout((c)->c_func, (c)->c_arg)
+#define        CALLOUT_INITIALIZER     { NULL, NULL }
+
+typedef void (timeout_t)(void *);
+
+/*
+ * Bitmap operations
+ */
+typedef        u_int32_t pktsched_bitmap_t;
+
+static inline boolean_t
+pktsched_bit_tst(u_int32_t ix, pktsched_bitmap_t *pData)
+{
+       return (*pData & (1 << ix));
+}
+
+static inline void
+pktsched_bit_set(u_int32_t ix, pktsched_bitmap_t *pData)
+{
+       *pData |= (1 << ix);
+}
+
+static inline void
+pktsched_bit_clr(u_int32_t ix, pktsched_bitmap_t *pData)
+{
+       *pData &= ~(1 << ix);
+}
+
+static inline pktsched_bitmap_t
+pktsched_ffs(pktsched_bitmap_t pData)
+{
+       return (ffs(pData));
+}
+
+static inline pktsched_bitmap_t
+pktsched_fls(pktsched_bitmap_t pData)
+{
+       return ((sizeof (pktsched_bitmap_t) << 3) - clz(pData));
+}
+
+static inline pktsched_bitmap_t
+__fls(pktsched_bitmap_t word)
+{
+       VERIFY(word != 0);
+       return (pktsched_fls(word) - 1);
+}
+
+/*
+ * We can use mach_absolute_time which returns a 64-bit value with
+ * granularity less than a microsecond even on the slowest processor.
+ */
+#define        read_machclk()          mach_absolute_time()
+
+/*
+ * machine dependent clock
+ * a 64bit high resolution time counter.
+ */
+extern u_int32_t machclk_freq;
+extern u_int64_t machclk_per_sec;
+extern u_int32_t pktsched_verbose;
+
+SYSCTL_DECL(_net_pktsched);
+
+struct if_ifclassq_stats;
+
+extern void pktsched_init(void);
+extern int pktsched_setup(struct ifclassq *, u_int32_t, u_int32_t);
+extern int pktsched_teardown(struct ifclassq *);
+extern int pktsched_getqstats(struct ifclassq *, u_int32_t,
+    struct if_ifclassq_stats *);
+extern u_int64_t pktsched_abs_to_nsecs(u_int64_t);
+extern u_int64_t pktsched_nsecs_to_abstime(u_int64_t);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _PKTSCHED_PKTSCHED_H_ */
diff --git a/bsd/net/pktsched/pktsched_cbq.c b/bsd/net/pktsched/pktsched_cbq.c
new file mode 100644 (file)
index 0000000..a923f6c
--- /dev/null
@@ -0,0 +1,699 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_cbq.c,v 1.23 2007/09/13 20:40:02 chl Exp $       */
+/*     $KAME: altq_cbq.c,v 1.9 2000/12/14 08:12:45 thorpej Exp $       */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#if PKTSCHED_CBQ
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+
+#include <net/pktsched/pktsched_cbq.h>
+#include <netinet/in.h>
+
+/*
+ * Forward Declarations.
+ */
+#if 0
+static int cbq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
+static struct mbuf *cbq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
+static int cbq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
+#endif
+static int cbq_class_destroy(cbq_state_t *, struct rm_class *);
+static int cbq_destroy_locked(cbq_state_t *);
+static struct rm_class *cbq_clh_to_clp(cbq_state_t *, u_int32_t);
+static const char *cbq_style(cbq_state_t *);
+static int cbq_clear_interface(cbq_state_t *);
+static void cbqrestart(struct ifclassq *);
+
+#define        CBQ_ZONE_MAX    32              /* maximum elements in zone */
+#define        CBQ_ZONE_NAME   "pktsched_cbq"  /* zone name */
+
+static unsigned int cbq_size;          /* size of zone element */
+static struct zone *cbq_zone;          /* zone for cbq */
+
+void
+cbq_init(void)
+{
+       _CASSERT(CBQCLF_RED == RMCF_RED);
+       _CASSERT(CBQCLF_ECN == RMCF_ECN);
+       _CASSERT(CBQCLF_RIO == RMCF_RIO);
+       _CASSERT(CBQCLF_FLOWVALVE == RMCF_FLOWVALVE);
+       _CASSERT(CBQCLF_CLEARDSCP == RMCF_CLEARDSCP);
+       _CASSERT(CBQCLF_WRR == RMCF_WRR);
+       _CASSERT(CBQCLF_EFFICIENT == RMCF_EFFICIENT);
+       _CASSERT(CBQCLF_BLUE == RMCF_BLUE);
+       _CASSERT(CBQCLF_SFB == RMCF_SFB);
+       _CASSERT(CBQCLF_FLOWCTL == RMCF_FLOWCTL);
+       _CASSERT(CBQCLF_LAZY == RMCF_LAZY);
+
+       cbq_size = sizeof (cbq_state_t);
+       cbq_zone = zinit(cbq_size, CBQ_ZONE_MAX * cbq_size, 0, CBQ_ZONE_NAME);
+       if (cbq_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, CBQ_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(cbq_zone, Z_EXPAND, TRUE);
+       zone_change(cbq_zone, Z_CALLERACCT, TRUE);
+
+       rmclass_init();
+}
+
+cbq_state_t *
+cbq_alloc(struct ifnet *ifp, int how, boolean_t altq)
+{
+       cbq_state_t     *cbqp;
+
+       /* allocate and initialize cbq_state_t */
+       cbqp = (how == M_WAITOK) ? zalloc(cbq_zone) : zalloc_noblock(cbq_zone);
+       if (cbqp == NULL)
+               return (NULL);
+
+       bzero(cbqp, cbq_size);
+       CALLOUT_INIT(&cbqp->cbq_callout);
+       cbqp->cbq_qlen = 0;
+       cbqp->ifnp.ifq_ = &ifp->if_snd;         /* keep the ifclassq */
+       if (altq)
+               cbqp->cbq_flags |= CBQSF_ALTQ;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler allocated\n",
+                   if_name(ifp), cbq_style(cbqp));
+       }
+
+       return (cbqp);
+}
+
+int
+cbq_destroy(cbq_state_t *cbqp)
+{
+       struct ifclassq *ifq = cbqp->ifnp.ifq_;
+       int err;
+
+       IFCQ_LOCK(ifq);
+       err = cbq_destroy_locked(cbqp);
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+static int
+cbq_destroy_locked(cbq_state_t *cbqp)
+{
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       (void) cbq_clear_interface(cbqp);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
+                   if_name(CBQS_IFP(cbqp)), cbq_style(cbqp));
+       }
+
+       if (cbqp->ifnp.default_)
+               cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+       if (cbqp->ifnp.root_)
+               cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+
+       /* deallocate cbq_state_t */
+       zfree(cbq_zone, cbqp);
+
+       return (0);
+}
+
+int
+cbq_add_queue(cbq_state_t *cbqp, u_int32_t qlimit, u_int32_t priority,
+    u_int32_t minburst, u_int32_t maxburst, u_int32_t pktsize,
+    u_int32_t maxpktsize, u_int32_t ns_per_byte, u_int32_t maxidle, int minidle,
+    u_int32_t offtime, u_int32_t flags, u_int32_t parent_qid, u_int32_t qid,
+    struct rm_class **clp)
+{
+#pragma unused(minburst, maxburst, maxpktsize)
+       struct rm_class *borrow, *parent;
+       struct rm_class *cl;
+       int i, error;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       /* Sanitize flags unless internally configured */
+       if (cbqp->cbq_flags & CBQSF_ALTQ)
+               flags &= CBQCLF_USERFLAGS;
+
+       /*
+        * find a free slot in the class table.  if the slot matching
+        * the lower bits of qid is free, use this slot.  otherwise,
+        * use the first free slot.
+        */
+       i = qid % CBQ_MAX_CLASSES;
+       if (cbqp->cbq_class_tbl[i] != NULL) {
+               for (i = 0; i < CBQ_MAX_CLASSES; i++)
+                       if (cbqp->cbq_class_tbl[i] == NULL)
+                               break;
+               if (i == CBQ_MAX_CLASSES)
+                       return (EINVAL);
+       }
+
+       /* check parameters */
+       if (priority >= CBQ_MAXPRI)
+               return (EINVAL);
+
+       if (ns_per_byte == 0) {
+               log(LOG_ERR, "%s: %s invalid inverse data rate\n",
+                   if_name(CBQS_IFP(cbqp)), cbq_style(cbqp));
+               return (EINVAL);
+       }
+
+       /* Get pointers to parent and borrow classes.  */
+       parent = cbq_clh_to_clp(cbqp, parent_qid);
+       if (flags & CBQCLF_BORROW)
+               borrow = parent;
+       else
+               borrow = NULL;
+
+       /*
+        * A class must borrow from its parent or it can not
+        * borrow at all.  Hence, borrow can be null.
+        */
+       if (parent == NULL && (flags & CBQCLF_ROOTCLASS) == 0) {
+               log(LOG_ERR, "%s: %s no parent class!\n",
+                   if_name(CBQS_IFP(cbqp)), cbq_style(cbqp));
+               return (EINVAL);
+       }
+
+       if ((borrow != parent) && (borrow != NULL)) {
+               log(LOG_ERR, "%s: %s borrow class != parent\n",
+                   if_name(CBQS_IFP(cbqp)), cbq_style(cbqp));
+               return (EINVAL);
+       }
+
+       /*
+        * check parameters
+        */
+       switch (flags & CBQCLF_CLASSMASK) {
+       case CBQCLF_ROOTCLASS:
+               if (parent != NULL) {
+                       log(LOG_ERR, "%s: %s parent exists\n",
+                           if_name(CBQS_IFP(cbqp)), cbq_style(cbqp));
+                       return (EINVAL);
+               }
+               if (cbqp->ifnp.root_) {
+                       log(LOG_ERR, "%s: %s root class exists\n",
+                           if_name(CBQS_IFP(cbqp)), cbq_style(cbqp));
+                       return (EINVAL);
+               }
+               break;
+       case CBQCLF_DEFCLASS:
+               if (cbqp->ifnp.default_) {
+                       log(LOG_ERR, "%s: %s default class exists\n",
+                           if_name(CBQS_IFP(cbqp)), cbq_style(cbqp));
+                       return (EINVAL);
+               }
+               break;
+       case 0:
+               break;
+       default:
+               /* more than two flags bits set */
+               log(LOG_ERR, "%s: %s invalid class flags 0x%x\n",
+                   if_name(CBQS_IFP(cbqp)), cbq_style(cbqp),
+                   (flags & CBQCLF_CLASSMASK));
+               return (EINVAL);
+       }
+
+       /*
+        * create a class.  if this is a root class, initialize the
+        * interface.
+        */
+       if ((flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+               error = rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, ns_per_byte,
+                   cbqrestart, qid, qlimit, RM_MAXQUEUED, maxidle, minidle,
+                   offtime, flags);
+               if (error != 0)
+                       return (error);
+               cl = cbqp->ifnp.root_;
+       } else {
+               cl = rmc_newclass(priority, &cbqp->ifnp, ns_per_byte,
+                   rmc_delay_action, qid, qlimit, parent, borrow, maxidle,
+                   minidle, offtime, pktsize, flags);
+       }
+       if (cl == NULL)
+               return (ENOMEM);
+
+       /* return handle to user space. */
+       cl->stats_.handle = qid;
+       cl->stats_.depth = cl->depth_;
+
+       /* save the allocated class */
+       cbqp->cbq_class_tbl[i] = cl;
+
+       if ((flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+               cbqp->ifnp.default_ = cl;
+
+       if (clp != NULL)
+               *clp = cl;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s created qid=%d pri=%d qlimit=%d "
+                   "flags=%b\n", if_name(CBQS_IFP(cbqp)), cbq_style(cbqp),
+                   qid, priority, qlimit, flags, CBQCLF_BITS);
+       }
+
+       return (0);
+}
+
+int
+cbq_remove_queue(cbq_state_t *cbqp, u_int32_t qid)
+{
+       struct rm_class *cl;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       if ((cl = cbq_clh_to_clp(cbqp, qid)) == NULL)
+               return (EINVAL);
+
+       /* if we are a parent class, then return an error. */
+       if (RMC_IS_A_PARENT_CLASS(cl))
+               return (EINVAL);
+
+       /* delete the class */
+       rmc_delete_class(&cbqp->ifnp, cl);
+
+       /*
+        * free the class handle
+        */
+       for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+               if (cbqp->cbq_class_tbl[i] == cl) {
+                       cbqp->cbq_class_tbl[i] = NULL;
+                       if (cl == cbqp->ifnp.root_)
+                               cbqp->ifnp.root_ = NULL;
+                       if (cl == cbqp->ifnp.default_)
+                               cbqp->ifnp.default_ = NULL;
+                       break;
+               }
+       }
+       return (0);
+}
+
+/*
+ * int
+ * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
+ *     function destroys a given traffic class.  Before destroying
+ *     the class, all traffic for that class is released.
+ */
+static int
+cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
+{
+       int     i;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s destroyed qid=%d pri=%d\n",
+                   if_name(CBQS_IFP(cbqp)), cbq_style(cbqp),
+                   cl->stats_.handle, cl->pri_);
+       }
+
+       /* delete the class */
+       rmc_delete_class(&cbqp->ifnp, cl);
+
+       /*
+        * free the class handle
+        */
+       for (i = 0; i < CBQ_MAX_CLASSES; i++)
+               if (cbqp->cbq_class_tbl[i] == cl)
+                       cbqp->cbq_class_tbl[i] = NULL;
+
+       if (cl == cbqp->ifnp.root_)
+               cbqp->ifnp.root_ = NULL;
+       if (cl == cbqp->ifnp.default_)
+               cbqp->ifnp.default_ = NULL;
+
+       return (0);
+}
+
+/* convert class handle to class pointer */
+static struct rm_class *
+cbq_clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
+{
+       int i;
+       struct rm_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       /*
+        * first, try optimistically the slot matching the lower bits of
+        * the handle.  if it fails, do the linear table search.
+        */
+       i = chandle % CBQ_MAX_CLASSES;
+       if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+           cl->stats_.handle == chandle)
+               return (cl);
+       for (i = 0; i < CBQ_MAX_CLASSES; i++)
+               if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+                   cl->stats_.handle == chandle)
+                       return (cl);
+       return (NULL);
+}
+
+static const char *
+cbq_style(cbq_state_t *cbqp)
+{
+       return ((cbqp->cbq_flags & CBQSF_ALTQ) ? "ALTQ_CBQ" : "CBQ");
+}
+
+static int
+cbq_clear_interface(cbq_state_t *cbqp)
+{
+       int              again, i;
+       struct rm_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       /* clear out the classes now */
+       do {
+               again = 0;
+               for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+                       if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+                               if (RMC_IS_A_PARENT_CLASS(cl))
+                                       again++;
+                               else {
+                                       cbq_class_destroy(cbqp, cl);
+                                       cbqp->cbq_class_tbl[i] = NULL;
+                                       if (cl == cbqp->ifnp.root_)
+                                               cbqp->ifnp.root_ = NULL;
+                                       if (cl == cbqp->ifnp.default_)
+                                               cbqp->ifnp.default_ = NULL;
+                               }
+                       }
+               }
+       } while (again);
+
+       return (0);
+}
+
+/* copy the stats info in rm_class to class_states_t */
+int
+cbq_get_class_stats(cbq_state_t *cbqp, u_int32_t qid, class_stats_t *statsp)
+{
+       struct rm_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       if ((cl = cbq_clh_to_clp(cbqp, qid)) == NULL)
+               return (EINVAL);
+
+       statsp->xmit_cnt        = cl->stats_.xmit_cnt;
+       statsp->drop_cnt        = cl->stats_.drop_cnt;
+       statsp->over            = cl->stats_.over;
+       statsp->borrows         = cl->stats_.borrows;
+       statsp->overactions     = cl->stats_.overactions;
+       statsp->delays          = cl->stats_.delays;
+
+       statsp->depth           = cl->depth_;
+       statsp->priority        = cl->pri_;
+       statsp->maxidle         = cl->maxidle_;
+       statsp->minidle         = cl->minidle_;
+       statsp->offtime         = cl->offtime_;
+       statsp->qmax            = qlimit(&cl->q_);
+       statsp->ns_per_byte     = cl->ns_per_byte_;
+       statsp->wrr_allot       = cl->w_allotment_;
+       statsp->qcnt            = qlen(&cl->q_);
+       statsp->avgidle         = cl->avgidle_;
+
+       statsp->qtype           = qtype(&cl->q_);
+       statsp->qstate          = qstate(&cl->q_);
+#if CLASSQ_RED
+       if (q_is_red(&cl->q_))
+               red_getstats(cl->red_, &statsp->red[0]);
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->q_))
+               rio_getstats(cl->rio_, &statsp->red[0]);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->q_))
+               blue_getstats(cl->blue_, &statsp->blue);
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL)
+               sfb_getstats(cl->sfb_, &statsp->sfb);
+
+       return (0);
+}
+
+int
+cbq_enqueue(cbq_state_t *cbqp, struct rm_class *cl, struct mbuf *m,
+    struct pf_mtag *t)
+{
+       struct ifclassq *ifq = cbqp->ifnp.ifq_;
+       int len, ret;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       /* grab class set by classifier */
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               log(LOG_ERR, "%s: packet for %s does not have pkthdr\n",
+                   if_name(ifq->ifcq_ifp));
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       if (cl == NULL) {
+               cl = cbq_clh_to_clp(cbqp, t->pftag_qid);
+               if (cl == NULL) {
+                       cl = cbqp->ifnp.default_;
+                       if (cl == NULL) {
+                               IFCQ_CONVERT_LOCK(ifq);
+                               m_freem(m);
+                               return (ENOBUFS);
+                       }
+               }
+       }
+
+       len = m_pktlen(m);
+
+       ret = rmc_queue_packet(cl, m, t);
+       if (ret != 0) {
+               if (ret == CLASSQEQ_SUCCESS_FC) {
+                       /* packet enqueued, return advisory feedback */
+                       ret = EQFULL;
+               } else {
+                       VERIFY(ret == CLASSQEQ_DROPPED ||
+                           ret == CLASSQEQ_DROPPED_FC ||
+                           ret == CLASSQEQ_DROPPED_SP);
+                       /* packet has been freed in rmc_queue_packet */
+                       PKTCNTR_ADD(&cl->stats_.drop_cnt, 1, len);
+                       IFCQ_DROP_ADD(ifq, 1, len);
+                       switch (ret) {
+                       case CLASSQEQ_DROPPED:
+                               return (ENOBUFS);
+                       case CLASSQEQ_DROPPED_FC:
+                               return (EQFULL);
+                       case CLASSQEQ_DROPPED_SP:
+                               return (EQSUSPENDED);
+                       }
+                       /* NOT REACHED */
+               }
+       }
+
+       /* successfully queued. */
+       ++cbqp->cbq_qlen;
+       IFCQ_INC_LEN(ifq);
+
+       return (ret);
+}
+
+struct mbuf *
+cbq_dequeue(cbq_state_t *cbqp, cqdq_op_t op)
+{
+       struct ifclassq *ifq = cbqp->ifnp.ifq_;
+       struct mbuf *m;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       m = rmc_dequeue_next(&cbqp->ifnp, op);
+
+       if (m && op == CLASSQDQ_REMOVE) {
+               --cbqp->cbq_qlen;  /* decrement # of packets in cbq */
+               IFCQ_DEC_LEN(ifq);
+               IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
+
+               /* Update the class. */
+               rmc_update_class_util(&cbqp->ifnp);
+       }
+       return (m);
+}
+
+/*
+ * void
+ * cbqrestart(queue_t *) - Restart sending of data.
+ * called from rmc_restart via timeout after waking up
+ * a suspended class.
+ *     Returns:        NONE
+ */
+
+static void
+cbqrestart(struct ifclassq *ifq)
+{
+       u_int32_t qlen;
+
+       IFCQ_LOCK(ifq);
+       qlen = IFCQ_LEN(ifq);
+       IFCQ_UNLOCK(ifq);
+
+       if (qlen > 0)
+               ifnet_start(ifq->ifcq_ifp);
+}
+
+void
+cbq_purge(cbq_state_t *cbqp)
+{
+       struct rm_class *cl;
+       int              i;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+               if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+                       if (!qempty(&cl->q_) && pktsched_verbose) {
+                               log(LOG_DEBUG, "%s: %s purge qid=%d pri=%d "
+                                   "qlen=%d\n", if_name(CBQS_IFP(cbqp)),
+                                   cbq_style(cbqp), cl->stats_.handle,
+                                   cl->pri_, qlen(&cl->q_));
+                       }
+                       rmc_dropall(cl);
+               }
+       }
+}
+
+void
+cbq_event(cbq_state_t *cbqp, cqev_t ev)
+{
+       struct rm_class *cl;
+       int              i;
+
+       IFCQ_LOCK_ASSERT_HELD(cbqp->ifnp.ifq_);
+
+       for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+               if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+                       if (pktsched_verbose) {
+                               log(LOG_DEBUG, "%s: %s update qid=%d pri=%d "
+                                   "event=%s\n", if_name(CBQS_IFP(cbqp)),
+                                   cbq_style(cbqp), cl->stats_.handle,
+                                   cl->pri_, ifclassq_ev2str(ev));
+                       }
+                       rmc_updateq(cl, ev);
+               }
+       }
+}
+
+int
+cqb_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
+{
+#pragma unused(ifq, flags)
+       return (ENXIO);         /* not yet */
+}
+
+int
+cbq_teardown_ifclassq(struct ifclassq *ifq)
+{
+       cbq_state_t *cbqp = ifq->ifcq_disc;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(cbqp != NULL && ifq->ifcq_type == PKTSCHEDT_CBQ);
+
+       (void) cbq_destroy_locked(cbqp);
+
+       ifq->ifcq_disc = NULL;
+       for (i = 0; i < IFCQ_SC_MAX; i++) {
+               ifq->ifcq_disc_slots[i].qid = 0;
+               ifq->ifcq_disc_slots[i].cl = NULL;
+       }
+
+       return (ifclassq_detach(ifq));
+}
+
+int
+cbq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
+    struct if_ifclassq_stats *ifqs)
+{
+       cbq_state_t *cbqp = ifq->ifcq_disc;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_CBQ);
+
+       if (slot >= IFCQ_SC_MAX)
+               return (EINVAL);
+
+       return (cbq_get_class_stats(cbqp, ifq->ifcq_disc_slots[slot].qid,
+           &ifqs->ifqs_cbq_stats));
+}
+#endif /* PKTSCHED_CBQ */
diff --git a/bsd/net/pktsched/pktsched_cbq.h b/bsd/net/pktsched/pktsched_cbq.h
new file mode 100644 (file)
index 0000000..15fe1b0
--- /dev/null
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_cbq.h,v 1.8 2006/10/12 19:59:08 peter Exp $       */
+/*     $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $  */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _NET_PKTSCHED_PKTSCHED_CBQ_H_
+#define        _NET_PKTSCHED_PKTSCHED_CBQ_H_
+
+#ifdef PRIVATE
+#include <net/pktsched/pktsched_rmclass.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* class flags should be same as class flags in rm_class.h */
+#define        CBQCLF_RED              RMCF_RED        /* use RED */
+#define        CBQCLF_ECN              RMCF_ECN        /* use ECN with RED/BLUE/SFB */
+#define        CBQCLF_RIO              RMCF_RIO        /* use RIO */
+#define        CBQCLF_FLOWVALVE        RMCF_FLOWVALVE  /* use flowvalve/penalty-box */
+#define        CBQCLF_CLEARDSCP        RMCF_CLEARDSCP  /* clear diffserv codepoint */
+#define        CBQCLF_BORROW           0x0020          /* borrow from parent */
+
+/* class flags only for root class */
+#define        CBQCLF_WRR              RMCF_WRR        /* weighted-round robin */
+#define        CBQCLF_EFFICIENT        RMCF_EFFICIENT  /* work-conserving */
+
+/* class flags for special classes */
+#define        CBQCLF_ROOTCLASS        0x1000          /* root class */
+#define        CBQCLF_DEFCLASS         0x2000          /* default class */
+#define        CBQCLF_CLASSMASK        0xf000          /* class mask */
+
+#define        CBQCLF_BLUE             RMCF_BLUE       /* use BLUE */
+#define        CBQCLF_SFB              RMCF_SFB        /* use SFB */
+#define        CBQCLF_FLOWCTL          RMCF_FLOWCTL    /* enable flow ctl advisories */
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        CBQCLF_LAZY             0x10000000 /* on-demand resource allocation */
+#endif /* BSD_KERNEL_PRIVATE */
+
+#define        CBQCLF_USERFLAGS                                                   \
+       (CBQCLF_RED | CBQCLF_ECN | CBQCLF_RIO | CBQCLF_FLOWVALVE |         \
+       CBQCLF_CLEARDSCP | CBQCLF_BORROW | CBQCLF_WRR | CBQCLF_EFFICIENT | \
+       CBQCLF_ROOTCLASS | CBQCLF_DEFCLASS | CBQCLF_BLUE | CBQCLF_SFB |    \
+       CBQCLF_FLOWCTL)
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        CBQCLF_BITS \
+       "\020\1RED\2ECN\3RIO\4FLOWVALVE\5CLEARDSCP\6BORROW" \
+       "\11WRR\12EFFICIENT\15ROOT\16DEFAULT\21BLUE\22SFB\23FLOWCTL\35LAZY"
+#else
+#define        CBQCLF_BITS \
+       "\020\1RED\2ECN\3RIO\4FLOWVALVE\5CLEARDSCP\6BORROW" \
+       "\11WRR\12EFFICIENT\15ROOT\16DEFAULT\21BLUE\22SFB\23FLOWCTL"
+#endif /* !BSD_KERNEL_PRIVATE */
+
+#define        CBQ_MAXQSIZE            200
+#define        CBQ_MAXPRI              RM_MAXPRIO
+
+typedef struct cbq_classstats {
+       u_int32_t       handle;
+       u_int32_t       depth;
+
+       struct pktcntr  xmit_cnt;       /* packets sent in this class */
+       struct pktcntr  drop_cnt;       /* dropped packets */
+       u_int32_t       over;           /* # times went over limit */
+       u_int32_t       borrows;        /* # times tried to borrow */
+       u_int32_t       overactions;    /* # times invoked overlimit action */
+       u_int32_t       delays;         /* # times invoked delay actions */
+
+       /* other static class parameters useful for debugging */
+       int             priority;
+       int             maxidle;
+       int             minidle;
+       int             offtime;
+       int             qmax;
+       int             ns_per_byte;
+       int             wrr_allot;
+
+       int             qcnt;           /* # packets in queue */
+       int             avgidle;
+
+       /* RED, RIO, BLUE, SFB related info */
+       classq_type_t   qtype;
+       union {
+               /* RIO has 3 red stats */
+               struct red_stats        red[RIO_NDROPPREC];
+               struct blue_stats       blue;
+               struct sfb_stats        sfb;
+       };
+       classq_state_t  qstate;
+} class_stats_t;
+
+#ifdef BSD_KERNEL_PRIVATE
+/*
+ * Define macros only good for kernel drivers and modules.
+ */
+#define        CBQ_WATCHDOG            (hz / 20)
+#define        CBQ_TIMEOUT             10
+#define        CBQ_LS_TIMEOUT          (20 * hz / 1000)
+
+#define        CBQ_MAX_CLASSES 256
+
+/* cbqstate flags */
+#define        CBQSF_ALTQ              0x1     /* configured via PF/ALTQ */
+
+/*
+ * Define State structures.
+ */
+typedef struct cbqstate {
+       int                      cbq_qlen;      /* # of packets in cbq */
+       u_int32_t                cbq_flags;     /* flags */
+       struct rm_class         *cbq_class_tbl[CBQ_MAX_CLASSES];
+
+       struct rm_ifdat          ifnp;
+       struct callout           cbq_callout;   /* for timeouts */
+} cbq_state_t;
+
+#define        CBQS_IFP(_cs)           ((_cs)->ifnp.ifq_->ifcq_ifp)
+
+extern void cbq_init(void);
+extern cbq_state_t *cbq_alloc(struct ifnet *, int, boolean_t);
+extern int cbq_destroy(cbq_state_t *);
+extern void cbq_purge(cbq_state_t *);
+extern void cbq_event(cbq_state_t *, cqev_t);
+extern int cbq_add_queue(cbq_state_t *, u_int32_t, u_int32_t, u_int32_t,
+    u_int32_t, u_int32_t, u_int32_t, u_int32_t, u_int32_t, int, u_int32_t,
+    u_int32_t, u_int32_t, u_int32_t, struct rm_class **);
+extern int cbq_remove_queue(cbq_state_t *, u_int32_t);
+extern int cbq_get_class_stats(cbq_state_t *, u_int32_t, class_stats_t *);
+extern int cbq_enqueue(cbq_state_t *, struct rm_class *, struct mbuf *,
+    struct pf_mtag *);
+extern struct mbuf *cbq_dequeue(cbq_state_t *, cqdq_op_t);
+extern int cqb_setup_ifclassq(struct ifclassq *, u_int32_t);
+extern int cbq_teardown_ifclassq(struct ifclassq *);
+extern int cbq_getqstats_ifclassq(struct ifclassq *, u_int32_t,
+    struct if_ifclassq_stats *);
+#endif /* BSD_KERNEL_PRIVATE */
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* !_NET_PKTSCHED_PKTSCHED_CBQ_H_ */
diff --git a/bsd/net/pktsched/pktsched_fairq.c b/bsd/net/pktsched/pktsched_fairq.c
new file mode 100644 (file)
index 0000000..f5fc735
--- /dev/null
@@ -0,0 +1,1290 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $
+ */
+/*
+ * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
+ * fairq.  The fairq algorithm is completely different then priq, of course,
+ * but because I used priq's skeleton I believe I should include priq's
+ * copyright.
+ *
+ * Copyright (C) 2000-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FAIRQ - take traffic classified by keep state (hashed into
+ *        pf->pftag_flowhash) and bucketize it.  Fairly extract
+ *        the first packet from each bucket in a round-robin fashion.
+ *
+ * TODO - better overall qlimit support (right now it is per-bucket).
+ *     - NOTE: red etc is per bucket, not overall.
+ *     - better service curve support.
+ *
+ * EXAMPLE:
+ *
+ *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
+ *  queue std  priority 3 bandwidth 200Kb \
+ *     fairq (buckets 64, default, hogs 1Kb) qlimit 50
+ *  queue bulk priority 2 bandwidth 100Kb \
+ *     fairq (buckets 64, hogs 1Kb) qlimit 50
+ *
+ *     NOTE: When the aggregate bandwidth is less than the link bandwidth
+ *           any remaining bandwidth is dynamically assigned using the
+ *           existing bandwidth specs as weightings.
+ *
+ *  pass out on em0 from any to any keep state queue std
+ *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
+ */
+
+#if PKTSCHED_FAIRQ
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+
+#include <net/pktsched/pktsched_fairq.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+#if 0
+static int fairq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
+static struct mbuf *fairq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
+static int fairq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
+#endif
+static int fairq_clear_interface(struct fairq_if *);
+static inline int fairq_addq(struct fairq_class *, struct mbuf *,
+    struct pf_mtag *);
+static inline struct mbuf *fairq_getq(struct fairq_class *, u_int64_t);
+static inline struct mbuf *fairq_pollq(struct fairq_class *, u_int64_t, int *);
+static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
+static void fairq_purgeq(struct fairq_if *, struct fairq_class *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+static void fairq_updateq(struct fairq_if *, struct fairq_class *, cqev_t);
+static struct fairq_class *fairq_class_create(struct fairq_if *, int, u_int32_t,
+    u_int64_t, u_int32_t, int, u_int64_t, u_int64_t, u_int64_t, u_int64_t,
+    u_int32_t);
+static int fairq_class_destroy(struct fairq_if *, struct fairq_class *);
+static int fairq_destroy_locked(struct fairq_if *);
+static inline struct fairq_class *fairq_clh_to_clp(struct fairq_if *,
+    u_int32_t);
+static const char *fairq_style(struct fairq_if *);
+
+#define        FAIRQ_ZONE_MAX  32              /* maximum elements in zone */
+#define        FAIRQ_ZONE_NAME "pktsched_fairq" /* zone name */
+
+static unsigned int fairq_size;                /* size of zone element */
+static struct zone *fairq_zone;                /* zone for fairq */
+
+#define        FAIRQ_CL_ZONE_MAX       32      /* maximum elements in zone */
+#define        FAIRQ_CL_ZONE_NAME      "pktsched_fairq_cl" /* zone name */
+
+static unsigned int fairq_cl_size;     /* size of zone element */
+static struct zone *fairq_cl_zone;     /* zone for fairq */
+
+void
+fairq_init(void)
+{
+       fairq_size = sizeof (struct fairq_if);
+       fairq_zone = zinit(fairq_size, FAIRQ_ZONE_MAX * fairq_size,
+           0, FAIRQ_ZONE_NAME);
+       if (fairq_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, FAIRQ_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(fairq_zone, Z_EXPAND, TRUE);
+       zone_change(fairq_zone, Z_CALLERACCT, TRUE);
+
+       fairq_cl_size = sizeof (struct fairq_class);
+       fairq_cl_zone = zinit(fairq_cl_size, FAIRQ_CL_ZONE_MAX * fairq_cl_size,
+           0, FAIRQ_CL_ZONE_NAME);
+       if (fairq_cl_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, FAIRQ_CL_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(fairq_cl_zone, Z_EXPAND, TRUE);
+       zone_change(fairq_cl_zone, Z_CALLERACCT, TRUE);
+}
+
+struct fairq_if *
+fairq_alloc(struct ifnet *ifp, int how, boolean_t altq)
+{
+       struct fairq_if *fif;
+
+       fif = (how == M_WAITOK) ?
+           zalloc(fairq_zone) : zalloc_noblock(fairq_zone);
+       if (fif == NULL)
+               return (NULL);
+
+       bzero(fif, fairq_size);
+       fif->fif_maxpri = -1;
+       fif->fif_ifq = &ifp->if_snd;
+       if (altq)
+               fif->fif_flags |= FAIRQIFF_ALTQ;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler allocated\n",
+                   if_name(ifp), fairq_style(fif));
+       }
+
+       return (fif);
+}
+
+int
+fairq_destroy(struct fairq_if *fif)
+{
+       struct ifclassq *ifq = fif->fif_ifq;
+       int err;
+
+       IFCQ_LOCK(ifq);
+       err = fairq_destroy_locked(fif);
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+static int
+fairq_destroy_locked(struct fairq_if *fif)
+{
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       (void) fairq_clear_interface(fif);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif));
+       }
+
+       zfree(fairq_zone, fif);
+
+       return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+fairq_clear_interface(struct fairq_if *fif)
+{
+       struct fairq_class *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       /* clear out the classes */
+       for (pri = 0; pri <= fif->fif_maxpri; pri++)
+               if ((cl = fif->fif_classes[pri]) != NULL)
+                       fairq_class_destroy(fif, cl);
+
+       return (0);
+}
+
+/* discard all the queued packets on the interface */
+void
+fairq_purge(struct fairq_if *fif)
+{
+       struct fairq_class *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       for (pri = 0; pri <= fif->fif_maxpri; pri++) {
+               if ((cl = fif->fif_classes[pri]) != NULL && cl->cl_head)
+                       fairq_purgeq(fif, cl, 0, NULL, NULL);
+       }
+#if !PF_ALTQ
+       /*
+        * This assertion is safe to be made only when PF_ALTQ is not
+        * configured; otherwise, IFCQ_LEN represents the sum of the
+        * packets managed by ifcq_disc and altq_disc instances, which
+        * is possible when transitioning between the two.
+        */
+       VERIFY(IFCQ_LEN(fif->fif_ifq) == 0);
+#endif /* !PF_ALTQ */
+}
+
+void
+fairq_event(struct fairq_if *fif, cqev_t ev)
+{
+       struct fairq_class *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       for (pri = 0; pri <= fif->fif_maxpri; pri++)
+               if ((cl = fif->fif_classes[pri]) != NULL)
+                       fairq_updateq(fif, cl, ev);
+}
+
+int
+fairq_add_queue(struct fairq_if *fif, int priority, u_int32_t qlimit,
+    u_int64_t bandwidth, u_int32_t nbuckets, int flags, u_int64_t hogs_m1,
+    u_int64_t lssc_m1, u_int64_t lssc_d, u_int64_t lssc_m2, u_int32_t qid,
+    struct fairq_class **clp)
+{
+       struct fairq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       /* check parameters */
+       if (priority >= FAIRQ_MAXPRI)
+               return (EINVAL);
+       if (bandwidth == 0 || (bandwidth / 8) == 0)
+               return (EINVAL);
+       if (fif->fif_classes[priority] != NULL)
+               return (EBUSY);
+       if (fairq_clh_to_clp(fif, qid) != NULL)
+               return (EBUSY);
+
+       cl = fairq_class_create(fif, priority, qlimit, bandwidth,
+           nbuckets, flags, hogs_m1, lssc_m1, lssc_d, lssc_m2, qid);
+       if (cl == NULL)
+               return (ENOMEM);
+
+       if (clp != NULL)
+               *clp = cl;
+
+       return (0);
+}
+
+static struct fairq_class *
+fairq_class_create(struct fairq_if *fif, int pri, u_int32_t qlimit,
+    u_int64_t bandwidth, u_int32_t nbuckets, int flags, u_int64_t hogs_m1,
+    u_int64_t lssc_m1, u_int64_t lssc_d, u_int64_t lssc_m2, u_int32_t qid)
+{
+#pragma unused(lssc_d, lssc_m2)
+       struct ifnet *ifp;
+       struct ifclassq *ifq;
+       struct fairq_class *cl;
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       /* Sanitize flags unless internally configured */
+       if (fif->fif_flags & FAIRQIFF_ALTQ)
+               flags &= FARF_USERFLAGS;
+
+#if !CLASSQ_RED
+       if (flags & FARF_RED) {
+               log(LOG_ERR, "%s: %s RED not available!\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif));
+               return (NULL);
+       }
+#endif /* !CLASSQ_RED */
+
+#if !CLASSQ_RIO
+       if (flags & FARF_RIO) {
+               log(LOG_ERR, "%s: %s RIO not available!\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif));
+               return (NULL);
+       }
+#endif /* CLASSQ_RIO */
+
+#if !CLASSQ_BLUE
+       if (flags & FARF_BLUE) {
+               log(LOG_ERR, "%s: %s BLUE not available!\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif));
+               return (NULL);
+       }
+#endif /* CLASSQ_BLUE */
+
+       /* These are mutually exclusive */
+       if ((flags & (FARF_RED|FARF_RIO|FARF_BLUE|FARF_SFB)) &&
+           (flags & (FARF_RED|FARF_RIO|FARF_BLUE|FARF_SFB)) != FARF_RED &&
+           (flags & (FARF_RED|FARF_RIO|FARF_BLUE|FARF_SFB)) != FARF_RIO &&
+           (flags & (FARF_RED|FARF_RIO|FARF_BLUE|FARF_SFB)) != FARF_BLUE &&
+           (flags & (FARF_RED|FARF_RIO|FARF_BLUE|FARF_SFB)) != FARF_SFB) {
+               log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif));
+               return (NULL);
+       }
+
+       if (bandwidth == 0 || (bandwidth / 8) == 0) {
+               log(LOG_ERR, "%s: %s invalid data rate %llu\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif), bandwidth);
+               return (NULL);
+       }
+
+       if (nbuckets == 0)
+               nbuckets = 256;
+       if (nbuckets > FAIRQ_MAX_BUCKETS)
+               nbuckets = FAIRQ_MAX_BUCKETS;
+       /* enforce power-of-2 size */
+       while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
+               ++nbuckets;
+
+       ifq = fif->fif_ifq;
+       ifp = FAIRQIF_IFP(fif);
+
+       if ((cl = fif->fif_classes[pri]) != NULL) {
+               /* modify the class instead of creating a new one */
+               if (cl->cl_head)
+                       fairq_purgeq(fif, cl, 0, NULL, NULL);
+#if CLASSQ_RIO
+               if (cl->cl_qtype == Q_RIO)
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (cl->cl_qtype == Q_RED)
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (cl->cl_qtype == Q_BLUE)
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (cl->cl_qtype == Q_SFB && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               cl->cl_qtype = Q_DROPTAIL;
+               cl->cl_qstate = QS_RUNNING;
+       } else {
+               cl = zalloc(fairq_cl_zone);
+               if (cl == NULL)
+                       goto err_ret;
+               bzero(cl, fairq_cl_size);
+               cl->cl_nbuckets = nbuckets;
+               cl->cl_nbucket_mask = nbuckets - 1;
+
+               cl->cl_buckets = _MALLOC(sizeof (struct fairq_bucket) *
+                   cl->cl_nbuckets, M_DEVBUF, M_WAITOK|M_ZERO);
+               if (cl->cl_buckets == NULL)
+                       goto err_buckets;
+               cl->cl_head = NULL;
+       }
+
+       fif->fif_classes[pri] = cl;
+       if (flags & FARF_DEFAULTCLASS)
+               fif->fif_default = cl;
+       if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
+               qlimit = IFCQ_MAXLEN(ifq);
+               if (qlimit == 0)
+                       qlimit = DEFAULT_QLIMIT;        /* use default */
+       }
+       cl->cl_qlimit = qlimit;
+       for (i = 0; i < cl->cl_nbuckets; ++i) {
+               _qinit(&cl->cl_buckets[i].queue, Q_DROPTAIL, qlimit);
+       }
+       cl->cl_bandwidth = bandwidth / 8;       /* cvt to bytes per second */
+       cl->cl_qtype = Q_DROPTAIL;
+       cl->cl_qstate = QS_RUNNING;
+       cl->cl_flags = flags;
+       cl->cl_pri = pri;
+       if (pri > fif->fif_maxpri)
+               fif->fif_maxpri = pri;
+       cl->cl_fif = fif;
+       cl->cl_handle = qid;
+       cl->cl_hogs_m1 = hogs_m1 / 8;
+       cl->cl_lssc_m1 = lssc_m1 / 8;   /* NOT YET USED */
+       cl->cl_bw_current = 0;
+
+       if (flags & (FARF_RED|FARF_RIO|FARF_BLUE|FARF_SFB)) {
+#if CLASSQ_RED || CLASSQ_RIO
+               u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
+               int pkttime;
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+
+               cl->cl_qflags = 0;
+               if (flags & FARF_ECN) {
+                       if (flags & FARF_BLUE)
+                               cl->cl_qflags |= BLUEF_ECN;
+                       else if (flags & FARF_SFB)
+                               cl->cl_qflags |= SFBF_ECN;
+                       else if (flags & FARF_RED)
+                               cl->cl_qflags |= REDF_ECN;
+                       else if (flags & FARF_RIO)
+                               cl->cl_qflags |= RIOF_ECN;
+               }
+               if (flags & FARF_FLOWCTL) {
+                       if (flags & FARF_SFB)
+                               cl->cl_qflags |= SFBF_FLOWCTL;
+               }
+               if (flags & FARF_CLEARDSCP) {
+                       if (flags & FARF_RIO)
+                               cl->cl_qflags |= RIOF_CLEARDSCP;
+               }
+#if CLASSQ_RED || CLASSQ_RIO
+               /*
+                * XXX: RED & RIO should be watching link speed and MTU
+                *      events and recompute pkttime accordingly.
+                */
+               if (ifbandwidth < 8)
+                       pkttime = 1000 * 1000 * 1000; /* 1 sec */
+               else
+                       pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
+                           (ifbandwidth / 8);
+
+               /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
+#if CLASSQ_RIO
+               if (flags & FARF_RIO) {
+                       cl->cl_rio =
+                           rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
+                       if (cl->cl_rio != NULL)
+                               cl->cl_qtype = Q_RIO;
+               }
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (flags & FARF_RED) {
+                       cl->cl_red = red_alloc(ifp, 0, 0,
+                           cl->cl_qlimit * 10/100,
+                           cl->cl_qlimit * 30/100,
+                           cl->cl_qflags, pkttime);
+                       if (cl->cl_red != NULL)
+                               cl->cl_qtype = Q_RED;
+               }
+#endif /* CLASSQ_RED */
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+#if CLASSQ_BLUE
+               if (flags & FARF_BLUE) {
+                       cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
+                       if (cl->cl_blue != NULL)
+                               cl->cl_qtype = Q_BLUE;
+               }
+#endif /* CLASSQ_BLUE */
+               if (flags & FARF_SFB) {
+                       if (!(cl->cl_flags & FARF_LAZY))
+                               cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                                   cl->cl_qlimit, cl->cl_qflags);
+                       if (cl->cl_sfb != NULL || (cl->cl_flags & FARF_LAZY))
+                               cl->cl_qtype = Q_SFB;
+               }
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s created qid=%d pri=%d qlimit=%d "
+                   "flags=%b\n", if_name(ifp), fairq_style(fif),
+                   cl->cl_handle, cl->cl_pri, cl->cl_qlimit, flags, FARF_BITS);
+       }
+
+       return (cl);
+
+err_buckets:
+       if (cl->cl_buckets != NULL)
+               _FREE(cl->cl_buckets, M_DEVBUF);
+err_ret:
+       if (cl != NULL) {
+               if (cl->cl_qalg.ptr != NULL) {
+#if CLASSQ_RIO
+                       if (cl->cl_qtype == Q_RIO)
+                               rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+                       if (cl->cl_qtype == Q_RED)
+                               red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+                       if (cl->cl_qtype == Q_BLUE)
+                               blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+                       if (cl->cl_qtype == Q_SFB && cl->cl_sfb != NULL)
+                               sfb_destroy(cl->cl_sfb);
+                       cl->cl_qalg.ptr = NULL;
+                       cl->cl_qtype = Q_DROPTAIL;
+                       cl->cl_qstate = QS_RUNNING;
+               }
+               zfree(fairq_cl_zone, cl);
+       }
+       return (NULL);
+}
+
+int
+fairq_remove_queue(struct fairq_if *fif, u_int32_t qid)
+{
+       struct fairq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       if ((cl = fairq_clh_to_clp(fif, qid)) == NULL)
+               return (EINVAL);
+
+       return (fairq_class_destroy(fif, cl));
+}
+
+static int
+fairq_class_destroy(struct fairq_if *fif, struct fairq_class *cl)
+{
+       struct ifclassq *ifq = fif->fif_ifq;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (cl->cl_head)
+               fairq_purgeq(fif, cl, 0, NULL, NULL);
+
+       fif->fif_classes[cl->cl_pri] = NULL;
+       if (fif->fif_poll_cache == cl)
+               fif->fif_poll_cache = NULL;
+       if (fif->fif_maxpri == cl->cl_pri) {
+               for (pri = cl->cl_pri; pri >= 0; pri--)
+                       if (fif->fif_classes[pri] != NULL) {
+                               fif->fif_maxpri = pri;
+                               break;
+                       }
+               if (pri < 0)
+                       fif->fif_maxpri = -1;
+       }
+
+       if (cl->cl_qalg.ptr != NULL) {
+#if CLASSQ_RIO
+               if (cl->cl_qtype == Q_RIO)
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (cl->cl_qtype == Q_RED)
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (cl->cl_qtype == Q_BLUE)
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (cl->cl_qtype == Q_SFB && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               cl->cl_qtype = Q_DROPTAIL;
+               cl->cl_qstate = QS_RUNNING;
+       }
+
+       if (fif->fif_default == cl)
+               fif->fif_default = NULL;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s destroyed qid=%d pri=%d\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif),
+                   cl->cl_handle, cl->cl_pri);
+       }
+
+       _FREE(cl->cl_buckets, M_DEVBUF);
+       cl->cl_head = NULL;     /* sanity */
+       cl->cl_polled = NULL;   /* sanity */
+       cl->cl_buckets = NULL;  /* sanity */
+
+       zfree(fairq_cl_zone, cl);
+
+       return (0);
+}
+
+int
+fairq_enqueue(struct fairq_if *fif, struct fairq_class *cl, struct mbuf *m,
+    struct pf_mtag *t)
+{
+       struct ifclassq *ifq = fif->fif_ifq;
+       int len, ret;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(cl == NULL || cl->cl_fif == fif);
+
+       if (cl == NULL) {
+               cl = fairq_clh_to_clp(fif, t->pftag_qid);
+               if (cl == NULL) {
+                       cl = fif->fif_default;
+                       if (cl == NULL) {
+                               IFCQ_CONVERT_LOCK(ifq);
+                               m_freem(m);
+                               return (ENOBUFS);
+                       }
+               }
+       }
+
+       cl->cl_flags |= FARF_HAS_PACKETS;
+       len = m_pktlen(m);
+
+       ret = fairq_addq(cl, m, t);
+       if (ret != 0) {
+               if (ret == CLASSQEQ_SUCCESS_FC) {
+                       /* packet enqueued, return advisory feedback */
+                       ret = EQFULL;
+               } else {
+                       VERIFY(ret == CLASSQEQ_DROPPED ||
+                           ret == CLASSQEQ_DROPPED_FC ||
+                           ret == CLASSQEQ_DROPPED_SP);
+
+                       /* packet has been freed in fairq_addq */
+                       PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
+                       IFCQ_DROP_ADD(ifq, 1, len);
+                       switch (ret) {
+                       case CLASSQEQ_DROPPED:
+                               return (ENOBUFS);
+                       case CLASSQEQ_DROPPED_FC:
+                               return (EQFULL);
+                       case CLASSQEQ_DROPPED_SP:
+                               return (EQSUSPENDED);
+                       }
+                       /* NOT REACHED */
+               }
+       }
+       IFCQ_INC_LEN(ifq);
+
+       /* successfully queued. */
+       return (ret);
+}
+
+/*
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+struct mbuf *
+fairq_dequeue(struct fairq_if *fif, cqdq_op_t op)
+{
+       struct ifclassq *ifq = fif->fif_ifq;
+       struct fairq_class *cl;
+       struct fairq_class *best_cl;
+       struct mbuf *best_m;
+       struct mbuf *m;
+       u_int64_t cur_time = read_machclk();
+       u_int32_t best_scale;
+       u_int32_t scale;
+       int pri;
+       int hit_limit;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (IFCQ_IS_EMPTY(ifq)) {
+               /* no packet in the queue */
+               return (NULL);
+       }
+
+       if (fif->fif_poll_cache && op == CLASSQDQ_REMOVE) {
+               best_cl = fif->fif_poll_cache;
+               m = fairq_getq(best_cl, cur_time);
+               fif->fif_poll_cache = NULL;
+               if (m != NULL) {
+                       IFCQ_DEC_LEN(ifq);
+                       IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
+                       PKTCNTR_ADD(&best_cl->cl_xmitcnt, 1, m_pktlen(m));
+               }
+       } else {
+               best_cl = NULL;
+               best_m = NULL;
+               best_scale = 0xFFFFFFFFU;
+
+               for (pri = fif->fif_maxpri;  pri >= 0; pri--) {
+                       if ((cl = fif->fif_classes[pri]) == NULL)
+                               continue;
+                       if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
+                               continue;
+                       m = fairq_pollq(cl, cur_time, &hit_limit);
+                       if (m == NULL) {
+                               cl->cl_flags &= ~FARF_HAS_PACKETS;
+                               continue;
+                       }
+
+                       /*
+                        * We can halt the search immediately if the queue
+                        * did not hit its bandwidth limit.
+                        */
+                       if (hit_limit == 0) {
+                               best_cl = cl;
+                               best_m = m;
+                               break;
+                       }
+
+                       /*
+                        * Otherwise calculate the scale factor and select
+                        * the queue with the lowest scale factor.  This
+                        * apportions any unused bandwidth weighted by
+                        * the relative bandwidth specification.
+                        */
+                       scale = cl->cl_bw_current * 100 / cl->cl_bandwidth;
+                       if (scale < best_scale) {
+                               best_cl = cl;
+                               best_m = m;
+                               best_scale = scale;
+                       }
+               }
+
+               if (op == CLASSQDQ_POLL) {
+                       fif->fif_poll_cache = best_cl;
+                       m = best_m;
+               } else if (best_cl != NULL) {
+                       m = fairq_getq(best_cl, cur_time);
+                       if (m != NULL) {
+                               IFCQ_DEC_LEN(ifq);
+                               IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
+                               PKTCNTR_ADD(&best_cl->cl_xmitcnt, 1,
+                                   m_pktlen(m));
+                       }
+               } else {
+                       m = NULL;
+               }
+       }
+       return (m);
+}
+
+static inline int
+fairq_addq(struct fairq_class *cl, struct mbuf *m, struct pf_mtag *t)
+{
+       struct ifclassq *ifq = cl->cl_fif->fif_ifq;
+       fairq_bucket_t *b;
+       u_int32_t hash = t->pftag_flowhash;
+       u_int32_t hindex;
+       u_int64_t bw;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       /*
+        * If the packet doesn't have any keep state put it on the end of
+        * our queue.  XXX this can result in out of order delivery.
+        */
+       if (hash == 0) {
+               if (cl->cl_head)
+                       b = cl->cl_head->prev;
+               else
+                       b = &cl->cl_buckets[0];
+       } else {
+               hindex = (hash & cl->cl_nbucket_mask);
+               b = &cl->cl_buckets[hindex];
+       }
+
+       /*
+        * Add the bucket to the end of the circular list of active buckets.
+        *
+        * As a special case we add the bucket to the beginning of the list
+        * instead of the end if it was not previously on the list and if
+        * its traffic is less then the hog level.
+        */
+       if (b->in_use == 0) {
+               b->in_use = 1;
+               if (cl->cl_head == NULL) {
+                       cl->cl_head = b;
+                       b->next = b;
+                       b->prev = b;
+               } else {
+                       b->next = cl->cl_head;
+                       b->prev = cl->cl_head->prev;
+                       b->prev->next = b;
+                       b->next->prev = b;
+
+                       if (b->bw_delta && cl->cl_hogs_m1) {
+                               bw = b->bw_bytes * machclk_freq / b->bw_delta;
+                               if (bw < cl->cl_hogs_m1)
+                                       cl->cl_head = b;
+                       }
+               }
+       }
+
+#if CLASSQ_RIO
+       if (cl->cl_qtype == Q_RIO)
+               return (rio_addq(cl->cl_rio, &b->queue, m, t));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (cl->cl_qtype == Q_RED)
+               return (red_addq(cl->cl_red, &b->queue, m, t));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (cl->cl_qtype == Q_BLUE)
+               return (blue_addq(cl->cl_blue, &b->queue, m, t));
+       else
+#endif /* CLASSQ_BLUE */
+       if (cl->cl_qtype == Q_SFB) {
+               if (cl->cl_sfb == NULL) {
+                       struct ifnet *ifp = FAIRQIF_IFP(cl->cl_fif);
+
+                       VERIFY(cl->cl_flags & FARF_LAZY);
+                       IFCQ_CONVERT_LOCK(ifq);
+
+                       cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                           cl->cl_qlimit, cl->cl_qflags);
+                       if (cl->cl_sfb == NULL) {
+                               /* fall back to droptail */
+                               cl->cl_qtype = Q_DROPTAIL;
+                               cl->cl_flags &= ~FARF_SFB;
+                               cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
+
+                               log(LOG_ERR, "%s: %s SFB lazy allocation "
+                                   "failed for qid=%d pri=%d, falling back "
+                                   "to DROPTAIL\n", if_name(ifp),
+                                   fairq_style(cl->cl_fif), cl->cl_handle,
+                                   cl->cl_pri);
+                       }
+               }
+               if (cl->cl_sfb != NULL)
+                       return (sfb_addq(cl->cl_sfb, &b->queue, m, t));
+       } else if (qlen(&b->queue) >= qlimit(&b->queue)) {
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+
+       if (cl->cl_flags & FARF_CLEARDSCP)
+               write_dsfield(m, t, 0);
+
+       _addq(&b->queue, m);
+
+       return (0);
+}
+
+static inline struct mbuf *
+fairq_getq(struct fairq_class *cl, u_int64_t cur_time)
+{
+       fairq_bucket_t *b;
+       struct mbuf *m;
+
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_fif->fif_ifq);
+
+       b = fairq_selectq(cl, 0);
+       if (b == NULL)
+               m = NULL;
+#if CLASSQ_RIO
+       else if (cl->cl_qtype == Q_RIO)
+               m = rio_getq(cl->cl_rio, &b->queue);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       else if (cl->cl_qtype == Q_RED)
+               m = red_getq(cl->cl_red, &b->queue);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       else if (cl->cl_qtype == Q_BLUE)
+               m = blue_getq(cl->cl_blue, &b->queue);
+#endif /* CLASSQ_BLUE */
+       else if (cl->cl_qtype == Q_SFB && cl->cl_sfb != NULL)
+               m = sfb_getq(cl->cl_sfb, &b->queue);
+       else
+               m = _getq(&b->queue);
+
+       /*
+        * Calculate the BW change
+        */
+       if (m != NULL) {
+               u_int64_t delta;
+
+               /*
+                * Per-class bandwidth calculation
+                */
+               delta = (cur_time - cl->cl_last_time);
+               if (delta > machclk_freq * 8)
+                       delta = machclk_freq * 8;
+               cl->cl_bw_delta += delta;
+               cl->cl_bw_bytes += m->m_pkthdr.len;
+               cl->cl_last_time = cur_time;
+               if (cl->cl_bw_delta > machclk_freq) {
+                       cl->cl_bw_delta -= cl->cl_bw_delta >> 2;
+                       cl->cl_bw_bytes -= cl->cl_bw_bytes >> 2;
+               }
+
+               /*
+                * Per-bucket bandwidth calculation
+                */
+               delta = (cur_time - b->last_time);
+               if (delta > machclk_freq * 8)
+                       delta = machclk_freq * 8;
+               b->bw_delta += delta;
+               b->bw_bytes += m->m_pkthdr.len;
+               b->last_time = cur_time;
+               if (b->bw_delta > machclk_freq) {
+                       b->bw_delta -= b->bw_delta >> 2;
+                       b->bw_bytes -= b->bw_bytes >> 2;
+               }
+       }
+       return (m);
+}
+
+/*
+ * Figure out what the next packet would be if there were no limits.  If
+ * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
+ * it is set to 0.  A non-NULL mbuf is returned either way.
+ */
+static inline struct mbuf *
+fairq_pollq(struct fairq_class *cl, u_int64_t cur_time, int *hit_limit)
+{
+       fairq_bucket_t *b;
+       struct mbuf *m;
+       u_int64_t delta;
+       u_int64_t bw;
+
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_fif->fif_ifq);
+
+       *hit_limit = 0;
+       b = fairq_selectq(cl, 1);
+       if (b == NULL)
+               return (NULL);
+       m = qhead(&b->queue);
+
+       /*
+        * Did this packet exceed the class bandwidth?  Calculate the
+        * bandwidth component of the packet.
+        *
+        * - Calculate bytes per second
+        */
+       delta = cur_time - cl->cl_last_time;
+       if (delta > machclk_freq * 8)
+               delta = machclk_freq * 8;
+       cl->cl_bw_delta += delta;
+       cl->cl_last_time = cur_time;
+       if (cl->cl_bw_delta) {
+               bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
+
+               if (bw > cl->cl_bandwidth)
+                       *hit_limit = 1;
+               cl->cl_bw_current = bw;
+#if 0
+               printf("BW %6lld relative to %6u %d queue %p\n",
+                   bw, cl->cl_bandwidth, *hit_limit, b);
+#endif
+       }
+       return (m);
+}
+
+/*
+ * Locate the next queue we want to pull a packet out of.  This code
+ * is also responsible for removing empty buckets from the circular list.
+ */
+static fairq_bucket_t *
+fairq_selectq(struct fairq_class *cl, int ispoll)
+{
+       fairq_bucket_t *b;
+       u_int64_t bw;
+
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_fif->fif_ifq);
+
+       if (ispoll == 0 && cl->cl_polled) {
+               b = cl->cl_polled;
+               cl->cl_polled = NULL;
+               return (b);
+       }
+
+       while ((b = cl->cl_head) != NULL) {
+               /*
+                * Remove empty queues from consideration
+                */
+               if (qempty(&b->queue)) {
+                       b->in_use = 0;
+                       cl->cl_head = b->next;
+                       if (cl->cl_head == b) {
+                               cl->cl_head = NULL;
+                       } else {
+                               b->next->prev = b->prev;
+                               b->prev->next = b->next;
+                       }
+                       continue;
+               }
+
+               /*
+                * Advance the round robin.  Queues with bandwidths less
+                * then the hog bandwidth are allowed to burst.
+                */
+               if (cl->cl_hogs_m1 == 0) {
+                       cl->cl_head = b->next;
+               } else if (b->bw_delta) {
+                       bw = b->bw_bytes * machclk_freq / b->bw_delta;
+                       if (bw >= cl->cl_hogs_m1) {
+                               cl->cl_head = b->next;
+                       }
+                       /*
+                        * XXX TODO -
+                        */
+               }
+
+               /*
+                * Return bucket b.
+                */
+               break;
+       }
+       if (ispoll)
+               cl->cl_polled = b;
+       return (b);
+}
+
+static void
+fairq_purgeq(struct fairq_if *fif, struct fairq_class *cl, u_int32_t flow,
+    u_int32_t *packets, u_int32_t *bytes)
+{
+       struct ifclassq *ifq = fif->fif_ifq;
+       u_int32_t _cnt = 0, _len = 0;
+       fairq_bucket_t *b;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       /* become regular mutex before freeing mbufs */
+       IFCQ_CONVERT_LOCK(ifq);
+
+       while ((b = fairq_selectq(cl, 0)) != NULL) {
+               u_int32_t cnt, len, qlen;
+
+               if ((qlen = qlen(&b->queue)) == 0)
+                       continue;
+
+#if CLASSQ_RIO
+               if (cl->cl_qtype == Q_RIO)
+                       rio_purgeq(cl->cl_rio, &b->queue, flow, &cnt, &len);
+               else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (cl->cl_qtype == Q_RED)
+                       red_purgeq(cl->cl_red, &b->queue, flow, &cnt, &len);
+               else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (cl->cl_qtype == Q_BLUE)
+                       blue_purgeq(cl->cl_blue, &b->queue, flow, &cnt, &len);
+               else
+#endif /* CLASSQ_BLUE */
+               if (cl->cl_qtype == Q_SFB && cl->cl_sfb != NULL)
+                       sfb_purgeq(cl->cl_sfb, &b->queue, flow, &cnt, &len);
+               else
+                       _flushq_flow(&b->queue, flow, &cnt, &len);
+
+               if (cnt == 0)
+                       continue;
+
+               VERIFY(qlen(&b->queue) == (qlen - cnt));
+
+               PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
+               IFCQ_DROP_ADD(ifq, cnt, len);
+
+               VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
+               IFCQ_LEN(ifq) -= cnt;
+
+               _cnt += cnt;
+               _len += len;
+
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s purge qid=%d pri=%d "
+                           "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
+                           if_name(FAIRQIF_IFP(fif)), fairq_style(fif),
+                           cl->cl_handle, cl->cl_pri, qlen, qlen(&b->queue),
+                           cnt, len, flow);
+               }
+       }
+
+       if (packets != NULL)
+               *packets = _cnt;
+       if (bytes != NULL)
+               *bytes = _len;
+}
+
+static void
+fairq_updateq(struct fairq_if *fif, struct fairq_class *cl, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s update qid=%d pri=%d event=%s\n",
+                   if_name(FAIRQIF_IFP(fif)), fairq_style(fif),
+                   cl->cl_handle, cl->cl_pri, ifclassq_ev2str(ev));
+       }
+
+#if CLASSQ_RIO
+       if (cl->cl_qtype == Q_RIO)
+               return (rio_updateq(cl->cl_rio, ev));
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (cl->cl_qtype == Q_RED)
+               return (red_updateq(cl->cl_red, ev));
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (cl->cl_qtype == Q_BLUE)
+               return (blue_updateq(cl->cl_blue, ev));
+#endif /* CLASSQ_BLUE */
+       if (cl->cl_qtype == Q_SFB && cl->cl_sfb != NULL)
+               return (sfb_updateq(cl->cl_sfb, ev));
+}
+
+int
+fairq_get_class_stats(struct fairq_if *fif, u_int32_t qid,
+    struct fairq_classstats *sp)
+{
+       struct fairq_class *cl;
+       fairq_bucket_t *b;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       if ((cl = fairq_clh_to_clp(fif, qid)) == NULL)
+               return (EINVAL);
+
+       sp->class_handle = cl->cl_handle;
+       sp->priority = cl->cl_pri;
+       sp->qlimit = cl->cl_qlimit;
+       sp->xmit_cnt = cl->cl_xmitcnt;
+       sp->drop_cnt = cl->cl_dropcnt;
+       sp->qtype = cl->cl_qtype;
+       sp->qstate = cl->cl_qstate;
+       sp->qlength = 0;
+
+       if (cl->cl_head) {
+               b = cl->cl_head;
+               do {
+                       sp->qlength += qlen(&b->queue);
+                       b = b->next;
+               } while (b != cl->cl_head);
+       }
+
+#if CLASSQ_RED
+       if (cl->cl_qtype == Q_RED)
+               red_getstats(cl->cl_red, &sp->red[0]);
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       if (cl->cl_qtype == Q_RIO)
+               rio_getstats(cl->cl_rio, &sp->red[0]);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       if (cl->cl_qtype == Q_BLUE)
+               blue_getstats(cl->cl_blue, &sp->blue);
+#endif /* CLASSQ_BLUE */
+       if (cl->cl_qtype == Q_SFB && cl->cl_sfb != NULL)
+               sfb_getstats(cl->cl_sfb, &sp->sfb);
+
+       return (0);
+}
+
+/* convert a class handle to the corresponding class pointer */
+static inline struct fairq_class *
+fairq_clh_to_clp(struct fairq_if *fif, u_int32_t chandle)
+{
+       struct fairq_class *cl;
+       int idx;
+
+       IFCQ_LOCK_ASSERT_HELD(fif->fif_ifq);
+
+       for (idx = fif->fif_maxpri; idx >= 0; idx--)
+               if ((cl = fif->fif_classes[idx]) != NULL &&
+                   cl->cl_handle == chandle)
+                       return (cl);
+
+       return (NULL);
+}
+
+static const char *
+fairq_style(struct fairq_if *fif)
+{
+       return ((fif->fif_flags & FAIRQIFF_ALTQ) ? "ALTQ_FAIRQ" : "FAIRQ");
+}
+
+int
+fairq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
+{
+#pragma unused(ifq, flags)
+       return (ENXIO);         /* not yet */
+}
+
+int
+fairq_teardown_ifclassq(struct ifclassq *ifq)
+{
+       struct fairq_if *fif = ifq->ifcq_disc;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(fif != NULL && ifq->ifcq_type == PKTSCHEDT_FAIRQ);
+
+       (void) fairq_destroy_locked(fif);
+
+       ifq->ifcq_disc = NULL;
+       for (i = 0; i < IFCQ_SC_MAX; i++) {
+               ifq->ifcq_disc_slots[i].qid = 0;
+               ifq->ifcq_disc_slots[i].cl = NULL;
+       }
+
+       return (ifclassq_detach(ifq));
+}
+
+int
+fairq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
+    struct if_ifclassq_stats *ifqs)
+{
+       struct fairq_if *fif = ifq->ifcq_disc;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_FAIRQ);
+
+       if (slot >= IFCQ_SC_MAX)
+               return (EINVAL);
+
+       return (fairq_get_class_stats(fif, ifq->ifcq_disc_slots[slot].qid,
+           &ifqs->ifqs_fairq_stats));
+}
+#endif /* PKTSCHED_FAIRQ */
diff --git a/bsd/net/pktsched/pktsched_fairq.h b/bsd/net/pktsched/pktsched_fairq.h
new file mode 100644 (file)
index 0000000..9101729
--- /dev/null
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ */
+
+#ifndef _NET_PKTSCHED_PKTSCHED_FAIRQ_H_
+#define        _NET_PKTSCHED_PKTSCHED_FAIRQ_H_
+
+#ifdef PRIVATE
+#include <net/pktsched/pktsched.h>
+#include <net/pktsched/pktsched_rmclass.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        FAIRQ_MAX_BUCKETS       2048    /* maximum number of sorting buckets */
+#define        FAIRQ_MAXPRI            RM_MAXPRIO
+#define        FAIRQ_BITMAP_WIDTH      (sizeof (fairq_bitmap_t) * 8)
+#define        FAIRQ_BITMAP_MASK       (FAIRQ_BITMAP_WIDTH - 1)
+
+/* fairq class flags */
+#define        FARF_RED                0x0001  /* use RED */
+#define        FARF_ECN                0x0002  /* use ECN with RED/BLUE/SFB */
+#define        FARF_RIO                0x0004  /* use RIO */
+#define        FARF_CLEARDSCP          0x0010  /* clear diffserv codepoint */
+#define        FARF_BLUE               0x0100  /* use BLUE */
+#define        FARF_SFB                0x0200  /* use SFB */
+#define        FARF_FLOWCTL            0x0400  /* enable flow control advisories */
+#define        FARF_DEFAULTCLASS       0x1000  /* default class */
+#ifdef BSD_KERNEL_PRIVATE
+#define        FARF_HAS_PACKETS        0x2000  /* might have queued packets */
+#define        FARF_LAZY               0x10000000 /* on-demand resource allocation */
+#endif /* BSD_KERNEL_PRIVATE */
+
+#define        FARF_USERFLAGS                                                  \
+       (FARF_RED | FARF_ECN | FARF_RIO | FARF_CLEARDSCP |              \
+       FARF_BLUE | FARF_SFB | FARF_FLOWCTL | FARF_DEFAULTCLASS)
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        FARF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
+       "\16HASPKTS\35LAZY"
+#else
+#define        FARF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
+       "\16HASPKTS"
+#endif /* !BSD_KERNEL_PRIVATE */
+
+typedef u_int32_t      fairq_bitmap_t;
+
+struct fairq_classstats {
+       u_int32_t               class_handle;
+       u_int32_t               priority;
+
+       u_int32_t               qlength;
+       u_int32_t               qlimit;
+       struct pktcntr          xmit_cnt;  /* transmitted packet counter */
+       struct pktcntr          drop_cnt;  /* dropped packet counter */
+
+       /* RED, RIO, BLUE, SFB related info */
+       classq_type_t           qtype;
+       union {
+               /* RIO has 3 red stats */
+               struct red_stats        red[RIO_NDROPPREC];
+               struct blue_stats       blue;
+               struct sfb_stats        sfb;
+       };
+       classq_state_t          qstate;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+
+typedef struct fairq_bucket {
+       struct fairq_bucket *next;      /* circular list */
+       struct fairq_bucket *prev;      /* circular list */
+       class_queue_t   queue;          /* the actual queue */
+       u_int64_t       bw_bytes;       /* statistics used to calculate bw */
+       u_int64_t       bw_delta;       /* statistics used to calculate bw */
+       u_int64_t       last_time;
+       int             in_use;
+} fairq_bucket_t;
+
+struct fairq_class {
+       u_int32_t       cl_handle;      /* class handle */
+       u_int32_t       cl_nbuckets;    /* (power of 2) */
+       u_int32_t       cl_nbucket_mask; /* bucket mask */
+       u_int32_t       cl_qflags;      /* class queue flags */
+       fairq_bucket_t  *cl_buckets;
+       fairq_bucket_t  *cl_head;       /* head of circular bucket list */
+       fairq_bucket_t  *cl_polled;
+       union {
+               void            *ptr;
+               struct red      *red;   /* RED state */
+               struct rio      *rio;   /* RIO state */
+               struct blue     *blue;  /* BLUE state */
+               struct sfb      *sfb;   /* SFB state */
+       } cl_qalg;
+       u_int64_t       cl_hogs_m1;
+       u_int64_t       cl_lssc_m1;
+       u_int64_t       cl_bandwidth;
+       u_int64_t       cl_bw_current;
+       u_int64_t       cl_bw_bytes;
+       u_int64_t       cl_bw_delta;
+       u_int64_t       cl_last_time;
+       classq_type_t   cl_qtype;       /* rollup */
+       classq_state_t  cl_qstate;      /* state */
+       int             cl_qlimit;
+       int             cl_pri;         /* priority */
+       int             cl_flags;       /* class flags */
+       struct fairq_if *cl_fif;        /* back pointer to fif */
+
+       /* round robin index */
+
+       /* statistics */
+       struct pktcntr  cl_xmitcnt;     /* transmitted packet counter */
+       struct pktcntr  cl_dropcnt;     /* dropped packet counter */
+};
+
+#define        cl_red  cl_qalg.red
+#define        cl_rio  cl_qalg.rio
+#define        cl_blue cl_qalg.blue
+#define        cl_sfb  cl_qalg.sfb
+
+/* fairq_if flags */
+#define        FAIRQIFF_ALTQ           0x1     /* configured via PF/ALTQ */
+
+/*
+ * fairq interface state
+ */
+struct fairq_if {
+       struct ifclassq         *fif_ifq;       /* backpointer to ifclassq */
+       int                     fif_maxpri;     /* max priority in use */
+       u_int32_t               fif_flags;      /* flags */
+       struct fairq_class      *fif_poll_cache; /* cached poll */
+       struct fairq_class      *fif_default;   /* default class */
+       struct fairq_class      *fif_classes[FAIRQ_MAXPRI]; /* classes */
+};
+
+#define        FAIRQIF_IFP(_fif)       ((_fif)->fif_ifq->ifcq_ifp)
+
+struct if_ifclassq_stats;
+
+extern void fairq_init(void);
+extern struct fairq_if *fairq_alloc(struct ifnet *, int, boolean_t);
+extern int fairq_destroy(struct fairq_if *);
+extern void fairq_purge(struct fairq_if *);
+extern void fairq_event(struct fairq_if *, cqev_t);
+extern int fairq_add_queue(struct fairq_if *, int, u_int32_t, u_int64_t,
+    u_int32_t, int, u_int64_t, u_int64_t, u_int64_t, u_int64_t, u_int32_t,
+    struct fairq_class **);
+extern int fairq_remove_queue(struct fairq_if *, u_int32_t);
+extern int fairq_get_class_stats(struct fairq_if *, u_int32_t,
+    struct fairq_classstats *);
+extern int fairq_enqueue(struct fairq_if *, struct fairq_class *,
+    struct mbuf *, struct pf_mtag *);
+extern struct mbuf *fairq_dequeue(struct fairq_if *, cqdq_op_t);
+extern int fairq_setup_ifclassq(struct ifclassq *, u_int32_t);
+extern int fairq_teardown_ifclassq(struct ifclassq *ifq);
+extern int fairq_getqstats_ifclassq(struct ifclassq *, u_int32_t,
+    struct if_ifclassq_stats *);
+#endif /* BSD_KERNEL_PRIVATE */
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_PKTSCHED_PKTSCHED_FAIRQ_H_ */
diff --git a/bsd/net/pktsched/pktsched_hfsc.c b/bsd/net/pktsched/pktsched_hfsc.c
new file mode 100644 (file)
index 0000000..c7b4053
--- /dev/null
@@ -0,0 +1,2057 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_hfsc.c,v 1.25 2007/09/13 20:40:02 chl Exp $      */
+/*     $KAME: altq_hfsc.c,v 1.17 2002/11/29 07:48:33 kjc Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve.  the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#if PKTSCHED_HFSC
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+
+#include <net/pktsched/pktsched_hfsc.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+#if 0
+static int hfsc_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
+static struct mbuf *hfsc_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
+static int hfsc_request_ifclassq(struct ifclassq *, cqrq_t, void *);
+#endif
+static int hfsc_addq(struct hfsc_class *, struct mbuf *, struct pf_mtag *);
+static struct mbuf *hfsc_getq(struct hfsc_class *);
+static struct mbuf *hfsc_pollq(struct hfsc_class *);
+static void hfsc_purgeq(struct hfsc_if *, struct hfsc_class *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+static void hfsc_print_sc(struct hfsc_if *, u_int32_t, u_int64_t,
+    struct service_curve *, struct internal_sc *, const char *);
+static void hfsc_updateq_linkrate(struct hfsc_if *, struct hfsc_class *);
+static void hfsc_updateq(struct hfsc_if *, struct hfsc_class *, cqev_t);
+
+static int hfsc_clear_interface(struct hfsc_if *);
+static struct hfsc_class *hfsc_class_create(struct hfsc_if *,
+    struct service_curve *, struct service_curve *, struct service_curve *,
+    struct hfsc_class *, u_int32_t, int, u_int32_t);
+static int hfsc_class_destroy(struct hfsc_if *, struct hfsc_class *);
+static int hfsc_destroy_locked(struct hfsc_if *);
+static struct hfsc_class *hfsc_nextclass(struct hfsc_class *);
+static struct hfsc_class *hfsc_clh_to_clp(struct hfsc_if *, u_int32_t);
+static const char *hfsc_style(struct hfsc_if *);
+
+static void set_active(struct hfsc_class *, u_int32_t);
+static void set_passive(struct hfsc_class *);
+
+static void init_ed(struct hfsc_class *, u_int32_t);
+static void update_ed(struct hfsc_class *, u_int32_t);
+static void update_d(struct hfsc_class *, u_int32_t);
+static void init_vf(struct hfsc_class *, u_int32_t);
+static void update_vf(struct hfsc_class *, u_int32_t, u_int64_t);
+static void update_cfmin(struct hfsc_class *);
+static void ellist_insert(struct hfsc_class *);
+static void ellist_remove(struct hfsc_class *);
+static void ellist_update(struct hfsc_class *);
+static struct hfsc_class *ellist_get_mindl(ellist_t *, u_int64_t);
+static void actlist_insert(struct hfsc_class *);
+static void actlist_remove(struct hfsc_class *);
+static void actlist_update(struct hfsc_class *);
+static struct hfsc_class *actlist_firstfit(struct hfsc_class *, u_int64_t);
+
+static inline u_int64_t        seg_x2y(u_int64_t, u_int64_t);
+static inline u_int64_t        seg_y2x(u_int64_t, u_int64_t);
+static inline u_int64_t        m2sm(u_int64_t);
+static inline u_int64_t        m2ism(u_int64_t);
+static inline u_int64_t        d2dx(u_int64_t);
+static u_int64_t sm2m(u_int64_t);
+static u_int64_t dx2d(u_int64_t);
+
+static boolean_t sc2isc(struct hfsc_class *, struct service_curve *,
+    struct internal_sc *, u_int64_t);
+static void rtsc_init(struct runtime_sc *, struct internal_sc *,
+    u_int64_t, u_int64_t);
+static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t);
+static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t);
+static void rtsc_min(struct runtime_sc *, struct internal_sc *,
+    u_int64_t, u_int64_t);
+
+#define        HFSC_ZONE_MAX   32              /* maximum elements in zone */
+#define        HFSC_ZONE_NAME  "pktsched_hfsc" /* zone name */
+
+static unsigned int hfsc_size;         /* size of zone element */
+static struct zone *hfsc_zone;         /* zone for hfsc_if */
+
+#define        HFSC_CL_ZONE_MAX        32      /* maximum elements in zone */
+#define        HFSC_CL_ZONE_NAME       "pktsched_hfsc_cl" /* zone name */
+
+static unsigned int hfsc_cl_size;      /* size of zone element */
+static struct zone *hfsc_cl_zone;      /* zone for hfsc_class */
+
+/*
+ * macros
+ */
+#define        HFSC_IS_A_PARENT_CLASS(cl)      ((cl)->cl_children != NULL)
+
+#define        HT_INFINITY     0xffffffffffffffffLL    /* infinite time value */
+
+void
+hfsc_init(void)
+{
+       hfsc_size = sizeof (struct hfsc_if);
+       hfsc_zone = zinit(hfsc_size, HFSC_ZONE_MAX * hfsc_size,
+           0, HFSC_ZONE_NAME);
+       if (hfsc_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, HFSC_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(hfsc_zone, Z_EXPAND, TRUE);
+       zone_change(hfsc_zone, Z_CALLERACCT, TRUE);
+
+       hfsc_cl_size = sizeof (struct hfsc_class);
+       hfsc_cl_zone = zinit(hfsc_cl_size, HFSC_CL_ZONE_MAX * hfsc_cl_size,
+           0, HFSC_CL_ZONE_NAME);
+       if (hfsc_cl_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, HFSC_CL_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(hfsc_cl_zone, Z_EXPAND, TRUE);
+       zone_change(hfsc_cl_zone, Z_CALLERACCT, TRUE);
+}
+
+struct hfsc_if *
+hfsc_alloc(struct ifnet *ifp, int how, boolean_t altq)
+{
+       struct hfsc_if *hif;
+
+       hif = (how == M_WAITOK) ? zalloc(hfsc_zone) : zalloc_noblock(hfsc_zone);
+       if (hif == NULL)
+               return (NULL);
+
+       bzero(hif, hfsc_size);
+       TAILQ_INIT(&hif->hif_eligible);
+       hif->hif_ifq = &ifp->if_snd;
+       if (altq) {
+               hif->hif_maxclasses = HFSC_MAX_CLASSES;
+               hif->hif_flags |= HFSCIFF_ALTQ;
+       } else {
+               hif->hif_maxclasses = IFCQ_SC_MAX + 1;  /* incl. root class */
+       }
+
+       if ((hif->hif_class_tbl = _MALLOC(sizeof (struct hfsc_class *) *
+           hif->hif_maxclasses, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
+               log(LOG_ERR, "%s: %s unable to allocate class table array\n",
+                   if_name(ifp), hfsc_style(hif));
+               goto error;
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler allocated\n",
+                   if_name(ifp), hfsc_style(hif));
+       }
+
+       return (hif);
+
+error:
+       if (hif->hif_class_tbl != NULL) {
+               _FREE(hif->hif_class_tbl, M_DEVBUF);
+               hif->hif_class_tbl = NULL;
+       }
+       zfree(hfsc_zone, hif);
+
+       return (NULL);
+}
+
+int
+hfsc_destroy(struct hfsc_if *hif)
+{
+       struct ifclassq *ifq = hif->hif_ifq;
+       int err;
+
+       IFCQ_LOCK(ifq);
+       err = hfsc_destroy_locked(hif);
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+static int
+hfsc_destroy_locked(struct hfsc_if *hif)
+{
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       (void) hfsc_clear_interface(hif);
+       (void) hfsc_class_destroy(hif, hif->hif_rootclass);
+
+       VERIFY(hif->hif_class_tbl != NULL);
+       _FREE(hif->hif_class_tbl, M_DEVBUF);
+       hif->hif_class_tbl = NULL;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif));
+       }
+
+       zfree(hfsc_zone, hif);
+
+       return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes except the root class.
+ */
+static int
+hfsc_clear_interface(struct hfsc_if *hif)
+{
+       struct hfsc_class       *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       /* clear out the classes */
+       while (hif->hif_rootclass != NULL &&
+           (cl = hif->hif_rootclass->cl_children) != NULL) {
+               /*
+                * remove the first leaf class found in the hierarchy
+                * then start over
+                */
+               for (; cl != NULL; cl = hfsc_nextclass(cl)) {
+                       if (!HFSC_IS_A_PARENT_CLASS(cl)) {
+                               (void) hfsc_class_destroy(hif, cl);
+                               break;
+                       }
+               }
+       }
+
+       return (0);
+}
+
+/* discard all the queued packets on the interface */
+void
+hfsc_purge(struct hfsc_if *hif)
+{
+       struct hfsc_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) {
+               if (!qempty(&cl->cl_q))
+                       hfsc_purgeq(hif, cl, 0, NULL, NULL);
+       }
+#if !PF_ALTQ
+       /*
+        * This assertion is safe to be made only when PF_ALTQ is not
+        * configured; otherwise, IFCQ_LEN represents the sum of the
+        * packets managed by ifcq_disc and altq_disc instances, which
+        * is possible when transitioning between the two.
+        */
+       VERIFY(IFCQ_LEN(hif->hif_ifq) == 0);
+#endif /* !PF_ALTQ */
+}
+
+void
+hfsc_event(struct hfsc_if *hif, cqev_t ev)
+{
+       struct hfsc_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+               hfsc_updateq(hif, cl, ev);
+}
+
+int
+hfsc_add_queue(struct hfsc_if *hif, struct service_curve *rtsc,
+    struct service_curve *lssc, struct service_curve *ulsc,
+    u_int32_t qlimit, int flags, u_int32_t parent_qid, u_int32_t qid,
+    struct hfsc_class **clp)
+{
+       struct hfsc_class *cl = NULL, *parent;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       if (parent_qid == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL)
+               parent = NULL;
+       else if ((parent = hfsc_clh_to_clp(hif, parent_qid)) == NULL)
+               return (EINVAL);
+
+       if (hfsc_clh_to_clp(hif, qid) != NULL)
+               return (EBUSY);
+
+       cl = hfsc_class_create(hif, rtsc, lssc, ulsc, parent,
+           qlimit, flags, qid);
+       if (cl == NULL)
+               return (ENOMEM);
+
+       if (clp != NULL)
+               *clp = cl;
+
+       return (0);
+}
+
+static struct hfsc_class *
+hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
+    struct service_curve *fsc, struct service_curve *usc,
+    struct hfsc_class *parent, u_int32_t qlimit, int flags, u_int32_t qid)
+{
+       struct ifnet *ifp;
+       struct ifclassq *ifq;
+       struct hfsc_class *cl, *p;
+       u_int64_t eff_rate;
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       /* Sanitize flags unless internally configured */
+       if (hif->hif_flags & HFSCIFF_ALTQ)
+               flags &= HFCF_USERFLAGS;
+
+       if (hif->hif_classes >= hif->hif_maxclasses) {
+               log(LOG_ERR, "%s: %s out of classes! (max %d)\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif),
+                   hif->hif_maxclasses);
+               return (NULL);
+       }
+
+#if !CLASSQ_RED
+       if (flags & HFCF_RED) {
+               log(LOG_ERR, "%s: %s RED not available!\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif));
+               return (NULL);
+       }
+#endif /* !CLASSQ_RED */
+
+#if !CLASSQ_RIO
+       if (flags & HFCF_RIO) {
+               log(LOG_ERR, "%s: %s RIO not available!\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif));
+               return (NULL);
+       }
+#endif /* CLASSQ_RIO */
+
+#if !CLASSQ_BLUE
+       if (flags & HFCF_BLUE) {
+               log(LOG_ERR, "%s: %s BLUE not available!\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif));
+               return (NULL);
+       }
+#endif /* CLASSQ_BLUE */
+
+       /* These are mutually exclusive */
+       if ((flags & (HFCF_RED|HFCF_RIO|HFCF_BLUE|HFCF_SFB)) &&
+           (flags & (HFCF_RED|HFCF_RIO|HFCF_BLUE|HFCF_SFB)) != HFCF_RED &&
+           (flags & (HFCF_RED|HFCF_RIO|HFCF_BLUE|HFCF_SFB)) != HFCF_RIO &&
+           (flags & (HFCF_RED|HFCF_RIO|HFCF_BLUE|HFCF_SFB)) != HFCF_BLUE &&
+           (flags & (HFCF_RED|HFCF_RIO|HFCF_BLUE|HFCF_SFB)) != HFCF_SFB) {
+               log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif));
+               return (NULL);
+       }
+
+       cl = zalloc(hfsc_cl_zone);
+       if (cl == NULL)
+               return (NULL);
+
+       bzero(cl, hfsc_cl_size);
+       TAILQ_INIT(&cl->cl_actc);
+       ifq = hif->hif_ifq;
+       ifp = HFSCIF_IFP(hif);
+
+       if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
+               qlimit = IFCQ_MAXLEN(ifq);
+               if (qlimit == 0)
+                       qlimit = DEFAULT_QLIMIT;  /* use default */
+       }
+       _qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
+
+       cl->cl_flags = flags;
+       if (flags & (HFCF_RED|HFCF_RIO|HFCF_BLUE|HFCF_SFB)) {
+#if CLASSQ_RED || CLASSQ_RIO
+               int pkttime;
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+               u_int64_t m2;
+
+               m2 = 0;
+               if (rsc != NULL && rsc->m2 > m2)
+                       m2 = rsc->m2;
+               if (fsc != NULL && fsc->m2 > m2)
+                       m2 = fsc->m2;
+               if (usc != NULL && usc->m2 > m2)
+                       m2 = usc->m2;
+
+               cl->cl_qflags = 0;
+               if (flags & HFCF_ECN) {
+                       if (flags & HFCF_BLUE)
+                               cl->cl_qflags |= BLUEF_ECN;
+                       else if (flags & HFCF_SFB)
+                               cl->cl_qflags |= SFBF_ECN;
+                       else if (flags & HFCF_RED)
+                               cl->cl_qflags |= REDF_ECN;
+                       else if (flags & HFCF_RIO)
+                               cl->cl_qflags |= RIOF_ECN;
+               }
+               if (flags & HFCF_FLOWCTL) {
+                       if (flags & HFCF_SFB)
+                               cl->cl_qflags |= SFBF_FLOWCTL;
+               }
+               if (flags & HFCF_CLEARDSCP) {
+                       if (flags & HFCF_RIO)
+                               cl->cl_qflags |= RIOF_CLEARDSCP;
+               }
+#if CLASSQ_RED || CLASSQ_RIO
+               /*
+                * XXX: RED & RIO should be watching link speed and MTU
+                *      events and recompute pkttime accordingly.
+                */
+               if (m2 < 8)
+                       pkttime = 1000 * 1000 * 1000; /* 1 sec */
+               else
+                       pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
+                           (m2 / 8);
+
+               /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
+#if CLASSQ_RED
+               if (flags & HFCF_RED) {
+                       cl->cl_red = red_alloc(ifp, 0, 0,
+                           qlimit(&cl->cl_q) * 10/100,
+                           qlimit(&cl->cl_q) * 30/100,
+                           cl->cl_qflags, pkttime);
+                       if (cl->cl_red != NULL)
+                               qtype(&cl->cl_q) = Q_RED;
+               }
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+               if (flags & HFCF_RIO) {
+                       cl->cl_rio =
+                           rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
+                       if (cl->cl_rio != NULL)
+                               qtype(&cl->cl_q) = Q_RIO;
+               }
+#endif /* CLASSQ_RIO */
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+#if CLASSQ_BLUE
+               if (flags & HFCF_BLUE) {
+                       cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
+                       if (cl->cl_blue != NULL)
+                               qtype(&cl->cl_q) = Q_BLUE;
+               }
+#endif /* CLASSQ_BLUE */
+               if (flags & HFCF_SFB) {
+                       if (!(cl->cl_flags & HFCF_LAZY))
+                               cl->cl_sfb = sfb_alloc(ifp, qid,
+                                   qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb != NULL || (cl->cl_flags & HFCF_LAZY))
+                               qtype(&cl->cl_q) = Q_SFB;
+               }
+       }
+
+       cl->cl_id = hif->hif_classid++;
+       cl->cl_handle = qid;
+       cl->cl_hif = hif;
+       cl->cl_parent = parent;
+
+       eff_rate = ifnet_output_linkrate(HFSCIF_IFP(hif));
+       hif->hif_eff_rate = eff_rate;
+
+       if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
+           (!(rsc->fl & HFSCF_M1_PCT) || (rsc->m1 > 0 && rsc->m1 <= 100)) &&
+           (!(rsc->fl & HFSCF_M2_PCT) || (rsc->m2 > 0 && rsc->m2 <= 100))) {
+               rsc->fl &= HFSCF_USERFLAGS;
+               cl->cl_flags |= HFCF_RSC;
+               cl->cl_rsc0 = *rsc;
+               (void) sc2isc(cl, &cl->cl_rsc0, &cl->cl_rsc, eff_rate);
+               rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0);
+               rtsc_init(&cl->cl_eligible, &cl->cl_rsc, 0, 0);
+       }
+       if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
+           (!(fsc->fl & HFSCF_M1_PCT) || (fsc->m1 > 0 && fsc->m1 <= 100)) &&
+           (!(fsc->fl & HFSCF_M2_PCT) || (fsc->m2 > 0 && fsc->m2 <= 100))) {
+               fsc->fl &= HFSCF_USERFLAGS;
+               cl->cl_flags |= HFCF_FSC;
+               cl->cl_fsc0 = *fsc;
+               (void) sc2isc(cl, &cl->cl_fsc0, &cl->cl_fsc, eff_rate);
+               rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0);
+       }
+       if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
+           (!(usc->fl & HFSCF_M1_PCT) || (usc->m1 > 0 && usc->m1 <= 100)) &&
+           (!(usc->fl & HFSCF_M2_PCT) || (usc->m2 > 0 && usc->m2 <= 100))) {
+               usc->fl &= HFSCF_USERFLAGS;
+               cl->cl_flags |= HFCF_USC;
+               cl->cl_usc0 = *usc;
+               (void) sc2isc(cl, &cl->cl_usc0, &cl->cl_usc, eff_rate);
+               rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0);
+       }
+
+       /*
+        * find a free slot in the class table.  if the slot matching
+        * the lower bits of qid is free, use this slot.  otherwise,
+        * use the first free slot.
+        */
+       i = qid % hif->hif_maxclasses;
+       if (hif->hif_class_tbl[i] == NULL) {
+               hif->hif_class_tbl[i] = cl;
+       } else {
+               for (i = 0; i < hif->hif_maxclasses; i++)
+                       if (hif->hif_class_tbl[i] == NULL) {
+                               hif->hif_class_tbl[i] = cl;
+                               break;
+                       }
+               if (i == hif->hif_maxclasses) {
+                       goto err_ret;
+               }
+       }
+       hif->hif_classes++;
+
+       if (flags & HFCF_DEFAULTCLASS)
+               hif->hif_defaultclass = cl;
+
+       if (parent == NULL) {
+               /* this is root class */
+               hif->hif_rootclass = cl;
+       } else {
+               /* add this class to the children list of the parent */
+               if ((p = parent->cl_children) == NULL)
+                       parent->cl_children = cl;
+               else {
+                       while (p->cl_siblings != NULL)
+                               p = p->cl_siblings;
+                       p->cl_siblings = cl;
+               }
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s created qid=%d pqid=%d qlimit=%d "
+                   "flags=%b\n", if_name(ifp), hfsc_style(hif), cl->cl_handle,
+                   (cl->cl_parent != NULL) ? cl->cl_parent->cl_handle : 0,
+                   qlimit(&cl->cl_q), cl->cl_flags, HFCF_BITS);
+               if (cl->cl_flags & HFCF_RSC) {
+                       hfsc_print_sc(hif, cl->cl_handle, eff_rate,
+                           &cl->cl_rsc0, &cl->cl_rsc, "rsc");
+               }
+               if (cl->cl_flags & HFCF_FSC) {
+                       hfsc_print_sc(hif, cl->cl_handle, eff_rate,
+                           &cl->cl_fsc0, &cl->cl_fsc, "fsc");
+               }
+               if (cl->cl_flags & HFCF_USC) {
+                       hfsc_print_sc(hif, cl->cl_handle, eff_rate,
+                           &cl->cl_usc0, &cl->cl_usc, "usc");
+               }
+       }
+
+       return (cl);
+
+err_ret:
+       if (cl->cl_qalg.ptr != NULL) {
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->cl_q))
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->cl_q))
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->cl_q))
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               qtype(&cl->cl_q) = Q_DROPTAIL;
+               qstate(&cl->cl_q) = QS_RUNNING;
+       }
+       zfree(hfsc_cl_zone, cl);
+       return (NULL);
+}
+
+int
+hfsc_remove_queue(struct hfsc_if *hif, u_int32_t qid)
+{
+       struct hfsc_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       if ((cl = hfsc_clh_to_clp(hif, qid)) == NULL)
+               return (EINVAL);
+
+       return (hfsc_class_destroy(hif, cl));
+}
+
+static int
+hfsc_class_destroy(struct hfsc_if *hif, struct hfsc_class *cl)
+{
+       u_int32_t i;
+
+       if (cl == NULL)
+               return (0);
+
+       if (HFSC_IS_A_PARENT_CLASS(cl))
+               return (EBUSY);
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       if (!qempty(&cl->cl_q))
+               hfsc_purgeq(hif, cl, 0, NULL, NULL);
+
+       if (cl->cl_parent == NULL) {
+               /* this is root class */
+       } else {
+               struct hfsc_class *p = cl->cl_parent->cl_children;
+
+               if (p == cl)
+                       cl->cl_parent->cl_children = cl->cl_siblings;
+               else do {
+                       if (p->cl_siblings == cl) {
+                               p->cl_siblings = cl->cl_siblings;
+                               break;
+                       }
+               } while ((p = p->cl_siblings) != NULL);
+               VERIFY(p != NULL);
+       }
+
+       for (i = 0; i < hif->hif_maxclasses; i++)
+               if (hif->hif_class_tbl[i] == cl) {
+                       hif->hif_class_tbl[i] = NULL;
+                       break;
+               }
+
+       hif->hif_classes--;
+
+       if (cl->cl_qalg.ptr != NULL) {
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->cl_q))
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->cl_q))
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->cl_q))
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               qtype(&cl->cl_q) = Q_DROPTAIL;
+               qstate(&cl->cl_q) = QS_RUNNING;
+       }
+
+       if (cl == hif->hif_rootclass)
+               hif->hif_rootclass = NULL;
+       if (cl == hif->hif_defaultclass)
+               hif->hif_defaultclass = NULL;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s destroyed qid=%d slot=%d\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif),
+                   cl->cl_handle, cl->cl_id);
+       }
+
+       zfree(hfsc_cl_zone, cl);
+
+       return (0);
+}
+
+/*
+ * hfsc_nextclass returns the next class in the tree.
+ *   usage:
+ *     for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ *             do_something;
+ */
+static struct hfsc_class *
+hfsc_nextclass(struct hfsc_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_hif->hif_ifq);
+
+       if (cl->cl_children != NULL)
+               cl = cl->cl_children;
+       else if (cl->cl_siblings != NULL)
+               cl = cl->cl_siblings;
+       else {
+               while ((cl = cl->cl_parent) != NULL)
+                       if (cl->cl_siblings) {
+                               cl = cl->cl_siblings;
+                               break;
+                       }
+       }
+
+       return (cl);
+}
+
+int
+hfsc_enqueue(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf *m,
+    struct pf_mtag *t)
+{
+       struct ifclassq *ifq = hif->hif_ifq;
+       u_int32_t len;
+       int ret;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(cl == NULL || cl->cl_hif == hif);
+
+       if (cl == NULL) {
+               cl = hfsc_clh_to_clp(hif, t->pftag_qid);
+               if (cl == NULL || HFSC_IS_A_PARENT_CLASS(cl)) {
+                       cl = hif->hif_defaultclass;
+                       if (cl == NULL) {
+                               IFCQ_CONVERT_LOCK(ifq);
+                               m_freem(m);
+                               return (ENOBUFS);
+                       }
+               }
+       }
+
+       len = m_pktlen(m);
+
+       ret = hfsc_addq(cl, m, t);
+       if (ret != 0) {
+               if (ret == CLASSQEQ_SUCCESS_FC) {
+                       /* packet enqueued, return advisory feedback */
+                       ret = EQFULL;
+               } else {
+                       VERIFY(ret == CLASSQEQ_DROPPED ||
+                           ret == CLASSQEQ_DROPPED_FC ||
+                           ret == CLASSQEQ_DROPPED_SP);
+                       /* packet has been freed in hfsc_addq */
+                       PKTCNTR_ADD(&cl->cl_stats.drop_cnt, 1, len);
+                       IFCQ_DROP_ADD(ifq, 1, len);
+                       switch (ret) {
+                       case CLASSQEQ_DROPPED:
+                               return (ENOBUFS);
+                       case CLASSQEQ_DROPPED_FC:
+                               return (EQFULL);
+                       case CLASSQEQ_DROPPED_SP:
+                               return (EQSUSPENDED);
+                       }
+                       /* NOT_REACHED */
+               }
+       }
+       IFCQ_INC_LEN(ifq);
+       cl->cl_hif->hif_packets++;
+
+       /* successfully queued. */
+       if (qlen(&cl->cl_q) == 1)
+               set_active(cl, len);
+
+       return (ret);
+}
+
+/*
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+struct mbuf *
+hfsc_dequeue(struct hfsc_if *hif, cqdq_op_t op)
+{
+       struct ifclassq *ifq = hif->hif_ifq;
+       struct hfsc_class *cl;
+       struct mbuf *m;
+       u_int32_t len, next_len;
+       int realtime = 0;
+       u_int64_t cur_time;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (hif->hif_packets == 0)
+               /* no packet in the tree */
+               return (NULL);
+
+       cur_time = read_machclk();
+
+       if (op == CLASSQDQ_REMOVE && hif->hif_pollcache != NULL) {
+
+               cl = hif->hif_pollcache;
+               hif->hif_pollcache = NULL;
+               /* check if the class was scheduled by real-time criteria */
+               if (cl->cl_flags & HFCF_RSC)
+                       realtime = (cl->cl_e <= cur_time);
+       } else {
+               /*
+                * if there are eligible classes, use real-time criteria.
+                * find the class with the minimum deadline among
+                * the eligible classes.
+                */
+               if ((cl = ellist_get_mindl(&hif->hif_eligible, cur_time))
+                   != NULL) {
+                       realtime = 1;
+               } else {
+                       int fits = 0;
+                       /*
+                        * use link-sharing criteria
+                        * get the class with the minimum vt in the hierarchy
+                        */
+                       cl = hif->hif_rootclass;
+                       while (HFSC_IS_A_PARENT_CLASS(cl)) {
+
+                               cl = actlist_firstfit(cl, cur_time);
+                               if (cl == NULL) {
+                                       if (fits > 0)
+                                               log(LOG_ERR, "%s: %s "
+                                                   "%d fit but none found\n",
+                                                   if_name(HFSCIF_IFP(hif)),
+                                                   hfsc_style(hif), fits);
+                                       return (NULL);
+                               }
+                               /*
+                                * update parent's cl_cvtmin.
+                                * don't update if the new vt is smaller.
+                                */
+                               if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+                                       cl->cl_parent->cl_cvtmin = cl->cl_vt;
+                               fits++;
+                       }
+               }
+
+               if (op == CLASSQDQ_POLL) {
+                       hif->hif_pollcache = cl;
+                       m = hfsc_pollq(cl);
+                       return (m);
+               }
+       }
+
+       m = hfsc_getq(cl);
+       VERIFY(m != NULL);
+       len = m_pktlen(m);
+       cl->cl_hif->hif_packets--;
+       IFCQ_DEC_LEN(ifq);
+       IFCQ_XMIT_ADD(ifq, 1, len);
+       PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, 1, len);
+
+       update_vf(cl, len, cur_time);
+       if (realtime)
+               cl->cl_cumul += len;
+
+       if (!qempty(&cl->cl_q)) {
+               if (cl->cl_flags & HFCF_RSC) {
+                       /* update ed */
+                       next_len = m_pktlen(qhead(&cl->cl_q));
+
+                       if (realtime)
+                               update_ed(cl, next_len);
+                       else
+                               update_d(cl, next_len);
+               }
+       } else {
+               /* the class becomes passive */
+               set_passive(cl);
+       }
+
+       return (m);
+
+}
+
+static int
+hfsc_addq(struct hfsc_class *cl, struct mbuf *m, struct pf_mtag *t)
+{
+       struct ifclassq *ifq = cl->cl_hif->hif_ifq;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_addq(cl->cl_red, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q)) {
+               if (cl->cl_sfb == NULL) {
+                       struct ifnet *ifp = HFSCIF_IFP(cl->cl_hif);
+
+                       VERIFY(cl->cl_flags & HFCF_LAZY);
+                       IFCQ_CONVERT_LOCK(ifq);
+
+                       cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                           qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb == NULL) {
+                               /* fall back to droptail */
+                               qtype(&cl->cl_q) = Q_DROPTAIL;
+                               cl->cl_flags &= ~HFCF_SFB;
+                               cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
+
+                               log(LOG_ERR, "%s: %s SFB lazy allocation "
+                                   "failed for qid=%d slot=%d, falling back "
+                                   "to DROPTAIL\n", if_name(ifp),
+                                   hfsc_style(cl->cl_hif), cl->cl_handle,
+                                   cl->cl_id);
+                       }
+               }
+               if (cl->cl_sfb != NULL)
+                       return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
+       } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+
+       if (cl->cl_flags & HFCF_CLEARDSCP)
+               write_dsfield(m, t, 0);
+
+       _addq(&cl->cl_q, m);
+
+       return (0);
+}
+
+static struct mbuf *
+hfsc_getq(struct hfsc_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_hif->hif_ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_getq(cl->cl_rio, &cl->cl_q));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_getq(cl->cl_red, &cl->cl_q));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_getq(cl->cl_blue, &cl->cl_q));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_getq(cl->cl_sfb, &cl->cl_q));
+
+       return (_getq(&cl->cl_q));
+}
+
+static struct mbuf *
+hfsc_pollq(struct hfsc_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_hif->hif_ifq);
+
+       return (qhead(&cl->cl_q));
+}
+
+static void
+hfsc_purgeq(struct hfsc_if *hif, struct hfsc_class *cl, u_int32_t flow,
+    u_int32_t *packets, u_int32_t *bytes)
+{
+       struct ifclassq *ifq = hif->hif_ifq;
+       u_int32_t cnt = 0, len = 0, qlen;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if ((qlen = qlen(&cl->cl_q)) == 0) {
+               VERIFY(hif->hif_packets == 0);
+               goto done;
+       }
+
+       /* become regular mutex before freeing mbufs */
+       IFCQ_CONVERT_LOCK(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
+       else
+               _flushq_flow(&cl->cl_q, flow, &cnt, &len);
+
+       if (cnt > 0) {
+               VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
+
+               PKTCNTR_ADD(&cl->cl_stats.drop_cnt, cnt, len);
+               IFCQ_DROP_ADD(ifq, cnt, len);
+
+               VERIFY(hif->hif_packets >= cnt);
+               hif->hif_packets -= cnt;
+
+               VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
+               IFCQ_LEN(ifq) -= cnt;
+
+               if (qempty(&cl->cl_q)) {
+                       update_vf(cl, 0, 0);    /* remove cl from the actlist */
+                       set_passive(cl);
+               }
+
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s purge qid=%d slot=%d "
+                           "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
+                           if_name(HFSCIF_IFP(hif)), hfsc_style(hif),
+                           cl->cl_handle, cl->cl_id, qlen, qlen(&cl->cl_q),
+                           cnt, len, flow);
+               }
+       }
+done:
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+static void
+hfsc_print_sc(struct hfsc_if *hif, u_int32_t qid, u_int64_t eff_rate,
+    struct service_curve *sc, struct internal_sc *isc, const char *which)
+{
+       struct ifnet *ifp = HFSCIF_IFP(hif);
+
+       log(LOG_DEBUG, "%s: %s   qid=%d {%s_m1=%llu%s [%llu], "
+           "%s_d=%u msec, %s_m2=%llu%s [%llu]} linkrate=%llu bps\n",
+           if_name(ifp), hfsc_style(hif), qid,
+           which, sc->m1, (sc->fl & HFSCF_M1_PCT) ? "%" : " bps", isc->sm1,
+           which, sc->d,
+           which, sc->m2, (sc->fl & HFSCF_M2_PCT) ? "%" : " bps", isc->sm2,
+           eff_rate);
+}
+
+static void
+hfsc_updateq_linkrate(struct hfsc_if *hif, struct hfsc_class *cl)
+{
+       u_int64_t eff_rate = ifnet_output_linkrate(HFSCIF_IFP(hif));
+       struct service_curve *sc;
+       struct internal_sc *isc;
+
+       /* Update parameters only if rate has changed */
+       if (eff_rate == hif->hif_eff_rate)
+               return;
+
+       sc = &cl->cl_rsc0;
+       isc = &cl->cl_rsc;
+       if ((cl->cl_flags & HFCF_RSC) && sc2isc(cl, sc, isc, eff_rate)) {
+               rtsc_init(&cl->cl_deadline, isc, 0, 0);
+               rtsc_init(&cl->cl_eligible, isc, 0, 0);
+               if (pktsched_verbose) {
+                       hfsc_print_sc(hif, cl->cl_handle, eff_rate,
+                           sc, isc, "rsc");
+               }
+       }
+       sc = &cl->cl_fsc0;
+       isc = &cl->cl_fsc;
+       if ((cl->cl_flags & HFCF_FSC) && sc2isc(cl, sc, isc, eff_rate)) {
+               rtsc_init(&cl->cl_virtual, isc, 0, 0);
+               if (pktsched_verbose) {
+                       hfsc_print_sc(hif, cl->cl_handle, eff_rate,
+                           sc, isc, "fsc");
+               }
+       }
+       sc = &cl->cl_usc0;
+       isc = &cl->cl_usc;
+       if ((cl->cl_flags & HFCF_USC) && sc2isc(cl, sc, isc, eff_rate)) {
+               rtsc_init(&cl->cl_ulimit, isc, 0, 0);
+               if (pktsched_verbose) {
+                       hfsc_print_sc(hif, cl->cl_handle, eff_rate,
+                           sc, isc, "usc");
+               }
+       }
+}
+
+static void
+hfsc_updateq(struct hfsc_if *hif, struct hfsc_class *cl, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s update qid=%d slot=%d event=%s\n",
+                   if_name(HFSCIF_IFP(hif)), hfsc_style(hif),
+                   cl->cl_handle, cl->cl_id, ifclassq_ev2str(ev));
+       }
+
+       if (ev == CLASSQ_EV_LINK_SPEED)
+               hfsc_updateq_linkrate(hif, cl);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_updateq(cl->cl_rio, ev));
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_updateq(cl->cl_red, ev));
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_updateq(cl->cl_blue, ev));
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_updateq(cl->cl_sfb, ev));
+}
+
+static void
+set_active(struct hfsc_class *cl, u_int32_t len)
+{
+       if (cl->cl_flags & HFCF_RSC)
+               init_ed(cl, len);
+       if (cl->cl_flags & HFCF_FSC)
+               init_vf(cl, len);
+
+       cl->cl_stats.period++;
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+       if (cl->cl_flags & HFCF_RSC)
+               ellist_remove(cl);
+
+       /*
+        * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
+        * needs to be called explicitly to remove a class from actlist
+        */
+}
+
+static void
+init_ed(struct hfsc_class *cl, u_int32_t next_len)
+{
+       u_int64_t cur_time;
+
+       cur_time = read_machclk();
+
+       /* update the deadline curve */
+       rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+
+       /*
+        * update the eligible curve.
+        * for concave, it is equal to the deadline curve.
+        * for convex, it is a linear curve with slope m2.
+        */
+       cl->cl_eligible = cl->cl_deadline;
+       if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+               cl->cl_eligible.dx = 0;
+               cl->cl_eligible.dy = 0;
+       }
+
+       /* compute e and d */
+       cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+       cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+       ellist_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, u_int32_t next_len)
+{
+       cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+       cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+       ellist_update(cl);
+}
+
+static void
+update_d(struct hfsc_class *cl, u_int32_t next_len)
+{
+       cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static void
+init_vf(struct hfsc_class *cl, u_int32_t len)
+{
+#pragma unused(len)
+       struct hfsc_class *max_cl, *p;
+       u_int64_t vt, f, cur_time;
+       int go_active;
+
+       cur_time = 0;
+       go_active = 1;
+       for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+               if (go_active && cl->cl_nactive++ == 0)
+                       go_active = 1;
+               else
+                       go_active = 0;
+
+               if (go_active) {
+                       max_cl = actlist_last(&cl->cl_parent->cl_actc);
+                       if (max_cl != NULL) {
+                               /*
+                                * set vt to the average of the min and max
+                                * classes.  if the parent's period didn't
+                                * change, don't decrease vt of the class.
+                                */
+                               vt = max_cl->cl_vt;
+                               if (cl->cl_parent->cl_cvtmin != 0)
+                                       vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+                               if (cl->cl_parent->cl_vtperiod !=
+                                   cl->cl_parentperiod || vt > cl->cl_vt)
+                                       cl->cl_vt = vt;
+                       } else {
+                               /*
+                                * first child for a new parent backlog period.
+                                * add parent's cvtmax to vtoff of children
+                                * to make a new vt (vtoff + vt) larger than
+                                * the vt in the last period for all children.
+                                */
+                               vt = cl->cl_parent->cl_cvtmax;
+                               for (p = cl->cl_parent->cl_children; p != NULL;
+                                   p = p->cl_siblings)
+                                       p->cl_vtoff += vt;
+                               cl->cl_vt = 0;
+                               cl->cl_parent->cl_cvtmax = 0;
+                               cl->cl_parent->cl_cvtmin = 0;
+                       }
+                       cl->cl_initvt = cl->cl_vt;
+
+                       /* update the virtual curve */
+                       vt = cl->cl_vt + cl->cl_vtoff;
+                       rtsc_min(&cl->cl_virtual, &cl->cl_fsc,
+                           vt, cl->cl_total);
+                       if (cl->cl_virtual.x == vt) {
+                               cl->cl_virtual.x -= cl->cl_vtoff;
+                               cl->cl_vtoff = 0;
+                       }
+                       cl->cl_vtadj = 0;
+
+                       cl->cl_vtperiod++;  /* increment vt period */
+                       cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+                       if (cl->cl_parent->cl_nactive == 0)
+                               cl->cl_parentperiod++;
+                       cl->cl_f = 0;
+
+                       actlist_insert(cl);
+
+                       if (cl->cl_flags & HFCF_USC) {
+                               /* class has upper limit curve */
+                               if (cur_time == 0)
+                                       cur_time = read_machclk();
+
+                               /* update the ulimit curve */
+                               rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
+                                   cl->cl_total);
+                               /* compute myf */
+                               cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+                                   cl->cl_total);
+                               cl->cl_myfadj = 0;
+                       }
+               }
+
+               if (cl->cl_myf > cl->cl_cfmin)
+                       f = cl->cl_myf;
+               else
+                       f = cl->cl_cfmin;
+               if (f != cl->cl_f) {
+                       cl->cl_f = f;
+                       update_cfmin(cl->cl_parent);
+               }
+       }
+}
+
+static void
+update_vf(struct hfsc_class *cl, u_int32_t len, u_int64_t cur_time)
+{
+#pragma unused(cur_time)
+#if 0
+       u_int64_t myf_bound, delta;
+#endif
+       u_int64_t f;
+       int go_passive;
+
+       go_passive = (qempty(&cl->cl_q) && (cl->cl_flags & HFCF_FSC));
+
+       for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+               cl->cl_total += len;
+
+               if (!(cl->cl_flags & HFCF_FSC) || cl->cl_nactive == 0)
+                       continue;
+
+               if (go_passive && --cl->cl_nactive == 0)
+                       go_passive = 1;
+               else
+                       go_passive = 0;
+
+               if (go_passive) {
+                       /* no more active child, going passive */
+
+                       /* update cvtmax of the parent class */
+                       if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+                               cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+                       /* remove this class from the vt list */
+                       actlist_remove(cl);
+
+                       update_cfmin(cl->cl_parent);
+
+                       continue;
+               }
+
+               /*
+                * update vt and f
+                */
+               cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+                   - cl->cl_vtoff + cl->cl_vtadj;
+
+               /*
+                * if vt of the class is smaller than cvtmin,
+                * the class was skipped in the past due to non-fit.
+                * if so, we need to adjust vtadj.
+                */
+               if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+                       cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+                       cl->cl_vt = cl->cl_parent->cl_cvtmin;
+               }
+
+               /* update the vt list */
+               actlist_update(cl);
+
+               if (cl->cl_flags & HFCF_USC) {
+                       cl->cl_myf = cl->cl_myfadj +
+                           rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+#if 0
+                       /*
+                        * if myf lags behind by more than one clock tick
+                        * from the current time, adjust myfadj to prevent
+                        * a rate-limited class from going greedy.
+                        * in a steady state under rate-limiting, myf
+                        * fluctuates within one clock tick.
+                        */
+                       myf_bound = cur_time - machclk_per_tick;
+                       if (cl->cl_myf < myf_bound) {
+                               delta = cur_time - cl->cl_myf;
+                               cl->cl_myfadj += delta;
+                               cl->cl_myf += delta;
+                       }
+#endif
+               }
+
+               /* cl_f is max(cl_myf, cl_cfmin) */
+               if (cl->cl_myf > cl->cl_cfmin)
+                       f = cl->cl_myf;
+               else
+                       f = cl->cl_cfmin;
+               if (f != cl->cl_f) {
+                       cl->cl_f = f;
+                       update_cfmin(cl->cl_parent);
+               }
+       }
+}
+
+static void
+update_cfmin(struct hfsc_class *cl)
+{
+       struct hfsc_class *p;
+       u_int64_t cfmin;
+
+       if (TAILQ_EMPTY(&cl->cl_actc)) {
+               cl->cl_cfmin = 0;
+               return;
+       }
+       cfmin = HT_INFINITY;
+       TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+               if (p->cl_f == 0) {
+                       cl->cl_cfmin = 0;
+                       return;
+               }
+               if (p->cl_f < cfmin)
+                       cfmin = p->cl_f;
+       }
+       cl->cl_cfmin = cfmin;
+}
+
+/*
+ * TAILQ based ellist and actlist implementation
+ * (ion wanted to make a calendar queue based implementation)
+ */
+/*
+ * eligible list holds backlogged classes being sorted by their eligible times.
+ * there is one eligible list per interface.
+ */
+
+static void
+ellist_insert(struct hfsc_class *cl)
+{
+       struct hfsc_if  *hif = cl->cl_hif;
+       struct hfsc_class *p;
+
+       /* check the last entry first */
+       if ((p = TAILQ_LAST(&hif->hif_eligible, _eligible)) == NULL ||
+           p->cl_e <= cl->cl_e) {
+               TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+               return;
+       }
+
+       TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
+               if (cl->cl_e < p->cl_e) {
+                       TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+                       return;
+               }
+       }
+       VERIFY(0); /* should not reach here */
+}
+
+static void
+ellist_remove(struct hfsc_class *cl)
+{
+       struct hfsc_if  *hif = cl->cl_hif;
+
+       TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+}
+
+static void
+ellist_update(struct hfsc_class *cl)
+{
+       struct hfsc_if  *hif = cl->cl_hif;
+       struct hfsc_class *p, *last;
+
+       /*
+        * the eligible time of a class increases monotonically.
+        * if the next entry has a larger eligible time, nothing to do.
+        */
+       p = TAILQ_NEXT(cl, cl_ellist);
+       if (p == NULL || cl->cl_e <= p->cl_e)
+               return;
+
+       /* check the last entry */
+       last = TAILQ_LAST(&hif->hif_eligible, _eligible);
+       VERIFY(last != NULL);
+       if (last->cl_e <= cl->cl_e) {
+               TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+               TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+               return;
+       }
+
+       /*
+        * the new position must be between the next entry
+        * and the last entry
+        */
+       while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
+               if (cl->cl_e < p->cl_e) {
+                       TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+                       TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+                       return;
+               }
+       }
+       VERIFY(0); /* should not reach here */
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+static struct hfsc_class *
+ellist_get_mindl(ellist_t *head, u_int64_t cur_time)
+{
+       struct hfsc_class *p, *cl = NULL;
+
+       TAILQ_FOREACH(p, head, cl_ellist) {
+               if (p->cl_e > cur_time)
+                       break;
+               if (cl == NULL || p->cl_d < cl->cl_d)
+                       cl = p;
+       }
+       return (cl);
+}
+
+/*
+ * active children list holds backlogged child classes being sorted
+ * by their virtual time.
+ * each intermediate class has one active children list.
+ */
+
+static void
+actlist_insert(struct hfsc_class *cl)
+{
+       struct hfsc_class *p;
+
+       /* check the last entry first */
+       if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, _active)) == NULL ||
+           p->cl_vt <= cl->cl_vt) {
+               TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+               return;
+       }
+
+       TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
+               if (cl->cl_vt < p->cl_vt) {
+                       TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+                       return;
+               }
+       }
+       VERIFY(0); /* should not reach here */
+}
+
+static void
+actlist_remove(struct hfsc_class *cl)
+{
+       TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+}
+
+static void
+actlist_update(struct hfsc_class *cl)
+{
+       struct hfsc_class *p, *last;
+
+       /*
+        * the virtual time of a class increases monotonically during its
+        * backlogged period.
+        * if the next entry has a larger virtual time, nothing to do.
+        */
+       p = TAILQ_NEXT(cl, cl_actlist);
+       if (p == NULL || cl->cl_vt < p->cl_vt)
+               return;
+
+       /* check the last entry */
+       last = TAILQ_LAST(&cl->cl_parent->cl_actc, _active);
+       VERIFY(last != NULL);
+       if (last->cl_vt <= cl->cl_vt) {
+               TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+               TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+               return;
+       }
+
+       /*
+        * the new position must be between the next entry
+        * and the last entry
+        */
+       while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
+               if (cl->cl_vt < p->cl_vt) {
+                       TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+                       TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+                       return;
+               }
+       }
+       VERIFY(0); /* should not reach here */
+}
+
+static struct hfsc_class *
+actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
+{
+       struct hfsc_class *p;
+
+       TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+               if (p->cl_f <= cur_time)
+                       return (p);
+       }
+       return (NULL);
+}
+
+/*
+ * service curve support functions
+ *
+ *  external service curve parameters
+ *     m: bits/sec
+ *     d: msec
+ *  internal service curve parameters
+ *     sm: (bytes/tsc_interval) << SM_SHIFT
+ *     ism: (tsc_count/byte) << ISM_SHIFT
+ *     dx: tsc_count
+ *
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
+ * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
+ * speed.  SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
+ * digits in decimal using the following table.
+ *
+ *  bits/sec    100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
+ *  ----------+-------------------------------------------------------
+ *  bytes/nsec  12.5e-6    125e-6     1250e-6    12500e-6   125000e-6
+ *  sm(500MHz)  25.0e-6    250e-6     2500e-6    25000e-6   250000e-6
+ *  sm(200MHz)  62.5e-6    625e-6     6250e-6    62500e-6   625000e-6
+ *
+ *  nsec/byte   80000      8000       800        80         8
+ *  ism(500MHz) 40000      4000       400        40         4
+ *  ism(200MHz) 16000      1600       160        16         1.6
+ */
+#define        SM_SHIFT        24
+#define        ISM_SHIFT       10
+
+#define        SM_MASK         ((1LL << SM_SHIFT) - 1)
+#define        ISM_MASK        ((1LL << ISM_SHIFT) - 1)
+
+static inline u_int64_t
+seg_x2y(u_int64_t x, u_int64_t sm)
+{
+       u_int64_t y;
+
+       /*
+        * compute
+        *      y = x * sm >> SM_SHIFT
+        * but divide it for the upper and lower bits to avoid overflow
+        */
+       y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+       return (y);
+}
+
+static inline u_int64_t
+seg_y2x(u_int64_t y, u_int64_t ism)
+{
+       u_int64_t x;
+
+       if (y == 0)
+               x = 0;
+       else if (ism == HT_INFINITY)
+               x = HT_INFINITY;
+       else {
+               x = (y >> ISM_SHIFT) * ism
+                   + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+       }
+       return (x);
+}
+
+static inline u_int64_t
+m2sm(u_int64_t m)
+{
+       u_int64_t sm;
+
+       sm = (m << SM_SHIFT) / 8 / machclk_freq;
+       return (sm);
+}
+
+static inline u_int64_t
+m2ism(u_int64_t m)
+{
+       u_int64_t ism;
+
+       if (m == 0)
+               ism = HT_INFINITY;
+       else
+               ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
+       return (ism);
+}
+
+static inline u_int64_t
+d2dx(u_int64_t d)
+{
+       u_int64_t dx;
+
+       dx = (d * machclk_freq) / 1000;
+       return (dx);
+}
+
+static u_int64_t
+sm2m(u_int64_t sm)
+{
+       u_int64_t m;
+
+       m = (sm * 8 * machclk_freq) >> SM_SHIFT;
+       return (m);
+}
+
+static u_int64_t
+dx2d(u_int64_t dx)
+{
+       u_int64_t d;
+
+       d = dx * 1000 / machclk_freq;
+       return (d);
+}
+
+static boolean_t
+sc2isc(struct hfsc_class *cl, struct service_curve *sc, struct internal_sc *isc,
+    u_int64_t eff_rate)
+{
+       struct hfsc_if *hif = cl->cl_hif;
+       struct internal_sc oisc = *isc;
+       u_int64_t m1, m2;
+
+       if (eff_rate == 0 && (sc->fl & (HFSCF_M1_PCT | HFSCF_M2_PCT))) {
+               /*
+                * If service curve is configured with percentage and the
+                * effective uplink rate is not known, assume this is a
+                * transient case, and that the rate will be updated in
+                * the near future via CLASSQ_EV_LINK_SPEED.  Pick a
+                * reasonable number for now, e.g. 10 Mbps.
+                */
+               eff_rate = (10 * 1000 * 1000);
+
+               log(LOG_WARNING, "%s: %s qid=%d slot=%d eff_rate unknown; "
+                   "using temporary rate %llu bps\n", if_name(HFSCIF_IFP(hif)),
+                   hfsc_style(hif), cl->cl_handle, cl->cl_id, eff_rate);
+       }
+
+       m1 = sc->m1;
+       if (sc->fl & HFSCF_M1_PCT) {
+               VERIFY(m1 > 0 && m1 <= 100);
+               m1 = (eff_rate * m1) / 100;
+       }
+
+       m2 = sc->m2;
+       if (sc->fl & HFSCF_M2_PCT) {
+               VERIFY(m2 > 0 && m2 <= 100);
+               m2 = (eff_rate * m2) / 100;
+       }
+
+       isc->sm1 = m2sm(m1);
+       isc->ism1 = m2ism(m1);
+       isc->dx = d2dx(sc->d);
+       isc->dy = seg_x2y(isc->dx, isc->sm1);
+       isc->sm2 = m2sm(m2);
+       isc->ism2 = m2ism(m2);
+
+       /* return non-zero if there's any change */
+       return (bcmp(&oisc, isc, sizeof (*isc)));
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
+    u_int64_t y)
+{
+       rtsc->x =       x;
+       rtsc->y =       y;
+       rtsc->sm1 =     isc->sm1;
+       rtsc->ism1 =    isc->ism1;
+       rtsc->dx =      isc->dx;
+       rtsc->dy =      isc->dy;
+       rtsc->sm2 =     isc->sm2;
+       rtsc->ism2 =    isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u_int64_t
+rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
+{
+       u_int64_t       x;
+
+       if (y < rtsc->y)
+               x = rtsc->x;
+       else if (y <= rtsc->y + rtsc->dy) {
+               /* x belongs to the 1st segment */
+               if (rtsc->dy == 0)
+                       x = rtsc->x + rtsc->dx;
+               else
+                       x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+       } else {
+               /* x belongs to the 2nd segment */
+               x = rtsc->x + rtsc->dx
+                   + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+       }
+       return (x);
+}
+
+static u_int64_t
+rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
+{
+       u_int64_t       y;
+
+       if (x <= rtsc->x)
+               y = rtsc->y;
+       else if (x <= rtsc->x + rtsc->dx)
+               /* y belongs to the 1st segment */
+               y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+       else
+               /* y belongs to the 2nd segment */
+               y = rtsc->y + rtsc->dy
+                   + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+       return (y);
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
+    u_int64_t y)
+{
+       u_int64_t       y1, y2, dx, dy;
+
+       if (isc->sm1 <= isc->sm2) {
+               /* service curve is convex */
+               y1 = rtsc_x2y(rtsc, x);
+               if (y1 < y)
+                       /* the current rtsc is smaller */
+                       return;
+               rtsc->x = x;
+               rtsc->y = y;
+               return;
+       }
+
+       /*
+        * service curve is concave
+        * compute the two y values of the current rtsc
+        *      y1: at x
+        *      y2: at (x + dx)
+        */
+       y1 = rtsc_x2y(rtsc, x);
+       if (y1 <= y) {
+               /* rtsc is below isc, no change to rtsc */
+               return;
+       }
+
+       y2 = rtsc_x2y(rtsc, x + isc->dx);
+       if (y2 >= y + isc->dy) {
+               /* rtsc is above isc, replace rtsc by isc */
+               rtsc->x = x;
+               rtsc->y = y;
+               rtsc->dx = isc->dx;
+               rtsc->dy = isc->dy;
+               return;
+       }
+
+       /*
+        * the two curves intersect
+        * compute the offsets (dx, dy) using the reverse
+        * function of seg_x2y()
+        *      seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+        */
+       dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
+       /*
+        * check if (x, y1) belongs to the 1st segment of rtsc.
+        * if so, add the offset.
+        */
+       if (rtsc->x + rtsc->dx > x)
+               dx += rtsc->x + rtsc->dx - x;
+       dy = seg_x2y(dx, isc->sm1);
+
+       rtsc->x = x;
+       rtsc->y = y;
+       rtsc->dx = dx;
+       rtsc->dy = dy;
+}
+
+int
+hfsc_get_class_stats(struct hfsc_if *hif, u_int32_t qid,
+    struct hfsc_classstats *sp)
+{
+       struct hfsc_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       if ((cl = hfsc_clh_to_clp(hif, qid)) == NULL)
+               return (EINVAL);
+
+       sp->class_id = cl->cl_id;
+       sp->class_handle = cl->cl_handle;
+
+       if (cl->cl_flags & HFCF_RSC) {
+               sp->rsc.m1 = sm2m(cl->cl_rsc.sm1);
+               sp->rsc.d = dx2d(cl->cl_rsc.dx);
+               sp->rsc.m2 = sm2m(cl->cl_rsc.sm2);
+       } else {
+               sp->rsc.m1 = 0;
+               sp->rsc.d = 0;
+               sp->rsc.m2 = 0;
+       }
+       if (cl->cl_flags & HFCF_FSC) {
+               sp->fsc.m1 = sm2m(cl->cl_fsc.sm1);
+               sp->fsc.d = dx2d(cl->cl_fsc.dx);
+               sp->fsc.m2 = sm2m(cl->cl_fsc.sm2);
+       } else {
+               sp->fsc.m1 = 0;
+               sp->fsc.d = 0;
+               sp->fsc.m2 = 0;
+       }
+       if (cl->cl_flags & HFCF_USC) {
+               sp->usc.m1 = sm2m(cl->cl_usc.sm1);
+               sp->usc.d = dx2d(cl->cl_usc.dx);
+               sp->usc.m2 = sm2m(cl->cl_usc.sm2);
+       } else {
+               sp->usc.m1 = 0;
+               sp->usc.d = 0;
+               sp->usc.m2 = 0;
+       }
+
+       sp->total = cl->cl_total;
+       sp->cumul = cl->cl_cumul;
+
+       sp->d = cl->cl_d;
+       sp->e = cl->cl_e;
+       sp->vt = cl->cl_vt;
+       sp->f = cl->cl_f;
+
+       sp->initvt = cl->cl_initvt;
+       sp->vtperiod = cl->cl_vtperiod;
+       sp->parentperiod = cl->cl_parentperiod;
+       sp->nactive = cl->cl_nactive;
+       sp->vtoff = cl->cl_vtoff;
+       sp->cvtmax = cl->cl_cvtmax;
+       sp->myf = cl->cl_myf;
+       sp->cfmin = cl->cl_cfmin;
+       sp->cvtmin = cl->cl_cvtmin;
+       sp->myfadj = cl->cl_myfadj;
+       sp->vtadj = cl->cl_vtadj;
+
+       sp->cur_time = read_machclk();
+       sp->machclk_freq = machclk_freq;
+
+       sp->qlength = qlen(&cl->cl_q);
+       sp->qlimit = qlimit(&cl->cl_q);
+       sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+       sp->drop_cnt = cl->cl_stats.drop_cnt;
+       sp->period = cl->cl_stats.period;
+
+       sp->qtype = qtype(&cl->cl_q);
+       sp->qstate = qstate(&cl->cl_q);
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_getstats(cl->cl_red, &sp->red[0]);
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_getstats(cl->cl_rio, &sp->red[0]);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_getstats(cl->cl_blue, &sp->blue);
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_getstats(cl->cl_sfb, &sp->sfb);
+
+       return (0);
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct hfsc_class *
+hfsc_clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
+{
+       u_int32_t i;
+       struct hfsc_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(hif->hif_ifq);
+
+       /*
+        * first, try optimistically the slot matching the lower bits of
+        * the handle.  if it fails, do the linear table search.
+        */
+       i = chandle % hif->hif_maxclasses;
+       if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
+               return (cl);
+       for (i = 0; i < hif->hif_maxclasses; i++)
+               if ((cl = hif->hif_class_tbl[i]) != NULL &&
+                   cl->cl_handle == chandle)
+                       return (cl);
+       return (NULL);
+}
+
+static const char *
+hfsc_style(struct hfsc_if *hif)
+{
+       return ((hif->hif_flags & HFSCIFF_ALTQ) ? "ALTQ_HFSC" : "HFSC");
+}
+
+int
+hfsc_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
+{
+#pragma unused(ifq, flags)
+       return (ENXIO);         /* not yet */
+}
+
+int
+hfsc_teardown_ifclassq(struct ifclassq *ifq)
+{
+       struct hfsc_if *hif = ifq->ifcq_disc;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(hif != NULL && ifq->ifcq_type == PKTSCHEDT_HFSC);
+
+       (void) hfsc_destroy_locked(hif);
+
+       ifq->ifcq_disc = NULL;
+       for (i = 0; i < IFCQ_SC_MAX; i++) {
+               ifq->ifcq_disc_slots[i].qid = 0;
+               ifq->ifcq_disc_slots[i].cl = NULL;
+       }
+
+       return (ifclassq_detach(ifq));
+}
+
+int
+hfsc_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
+    struct if_ifclassq_stats *ifqs)
+{
+       struct hfsc_if *hif = ifq->ifcq_disc;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_HFSC);
+
+       if (slot >= IFCQ_SC_MAX)
+               return (EINVAL);
+
+       return (hfsc_get_class_stats(hif, ifq->ifcq_disc_slots[slot].qid,
+           &ifqs->ifqs_hfsc_stats));
+}
+#endif /* PKTSCHED_HFSC */
diff --git a/bsd/net/pktsched/pktsched_hfsc.h b/bsd/net/pktsched/pktsched_hfsc.h
new file mode 100644 (file)
index 0000000..d22b953
--- /dev/null
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_hfsc.h,v 1.8 2006/10/12 19:59:08 peter Exp $      */
+/*     $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+#ifndef _NET_PKTSCHED_PKTSCHED_HFSC_H_
+#define        _NET_PKTSCHED_PKTSCHED_HFSC_H_
+
+#ifdef PRIVATE
+#include <net/pktsched/pktsched.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct service_curve {
+       u_int32_t fl;   /* service curve flags */
+       u_int64_t m1;   /* slope of the first segment in bits/sec */
+       u_int32_t d;    /* the x-projection of the first segment in msec */
+       u_int64_t m2;   /* slope of the second segment in bits/sec */
+};
+
+/* valid values for service curve flags */
+#define        HFSCF_M1_PCT            0x1     /* m1 is in percentage */
+#define        HFSCF_M2_PCT            0x10    /* m2 is in percentage */
+
+#define        HFSCF_USERFLAGS         (HFSCF_M1_PCT | HFSCF_M2_PCT)
+
+/* special class handles */
+#define        HFSC_NULLCLASS_HANDLE   0
+#define        HFSC_MAX_CLASSES        64
+
+/* hfsc class flags */
+#define        HFCF_RED                0x0001  /* use RED */
+#define        HFCF_ECN                0x0002  /* use ECN with RED/BLUE/SFB */
+#define        HFCF_RIO                0x0004  /* use RIO */
+#define        HFCF_CLEARDSCP          0x0010  /* clear diffserv codepoint */
+#define        HFCF_BLUE               0x0100  /* use BLUE */
+#define        HFCF_SFB                0x0200  /* use SFB */
+#define        HFCF_FLOWCTL            0x0400  /* enable flow control advisories */
+#define        HFCF_DEFAULTCLASS       0x1000  /* default class */
+#ifdef BSD_KERNEL_PRIVATE
+#define        HFCF_RSC                0x10000 /* has realtime sc */
+#define        HFCF_FSC                0x20000 /* has linkshare sc */
+#define        HFCF_USC                0x40000 /* has upperlimit sc */
+#define        HFCF_LAZY               0x10000000 /* on-demand resource allocation */
+#endif /* BSD_KERNEL_PRIVATE */
+
+#define        HFCF_USERFLAGS                                                  \
+       (HFCF_RED | HFCF_ECN | HFCF_RIO | HFCF_CLEARDSCP | HFCF_BLUE |  \
+       HFCF_SFB | HFCF_FLOWCTL | HFCF_DEFAULTCLASS)
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        HFCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
+       "\21RSC\22FSC\23USC\35LAZY"
+#else
+#define        HFCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT"
+#endif /* !BSD_KERNEL_PRIVATE */
+
+/* service curve types */
+#define        HFSC_REALTIMESC         1
+#define        HFSC_LINKSHARINGSC      2
+#define        HFSC_UPPERLIMITSC       4
+#define        HFSC_DEFAULTSC          (HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
+
+struct hfsc_classstats {
+       u_int32_t               class_id;
+       u_int32_t               class_handle;
+       struct service_curve    rsc;
+       struct service_curve    fsc;
+       struct service_curve    usc;    /* upper limit service curve */
+
+       u_int64_t               total;  /* total work in bytes */
+       u_int64_t               cumul;  /* cumulative work in bytes */
+                                       /*   done by real-time criteria */
+       u_int64_t               d;              /* deadline */
+       u_int64_t               e;              /* eligible time */
+       u_int64_t               vt;             /* virtual time */
+       u_int64_t               f;              /* fit time for upper-limit */
+
+       /* info helpful for debugging */
+       u_int64_t               initvt;         /* init virtual time */
+       u_int64_t               vtoff;          /* cl_vt_ipoff */
+       u_int64_t               cvtmax;         /* cl_maxvt */
+       u_int64_t               myf;            /* cl_myf */
+       u_int64_t               cfmin;          /* cl_mincf */
+       u_int64_t               cvtmin;         /* cl_mincvt */
+       u_int64_t               myfadj;         /* cl_myfadj */
+       u_int64_t               vtadj;          /* cl_vtadj */
+       u_int64_t               cur_time;
+       u_int32_t               machclk_freq;
+
+       u_int32_t               qlength;
+       u_int32_t               qlimit;
+       struct pktcntr          xmit_cnt;
+       struct pktcntr          drop_cnt;
+       u_int32_t               period;
+
+       u_int32_t               vtperiod;       /* vt period sequence no */
+       u_int32_t               parentperiod;   /* parent's vt period seqno */
+       int                     nactive;        /* number of active children */
+
+       /* RED, RIO, BLUE, SFB related info */
+       classq_type_t           qtype;
+       union {
+               /* RIO has 3 red stats */
+               struct red_stats        red[RIO_NDROPPREC];
+               struct blue_stats       blue;
+               struct sfb_stats        sfb;
+       };
+       classq_state_t          qstate;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+#include <sys/queue.h>
+/*
+ * kernel internal service curve representation
+ *     coordinates are given by 64 bit unsigned integers.
+ *     x-axis: unit is clock count.  for the intel x86 architecture,
+ *             the raw Pentium TSC (Timestamp Counter) value is used.
+ *             virtual time is also calculated in this time scale.
+ *     y-axis: unit is byte.
+ *
+ *     the service curve parameters are converted to the internal
+ *     representation.
+ *     the slope values are scaled to avoid overflow.
+ *     the inverse slope values as well as the y-projection of the 1st
+ *     segment are kept in order to to avoid 64-bit divide operations
+ *     that are expensive on 32-bit architectures.
+ *
+ *  note: Intel Pentium TSC never wraps around in several thousands of years.
+ *     x-axis doesn't wrap around for 1089 years with 1GHz clock.
+ *      y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
+ */
+
+/* kernel internal representation of a service curve */
+struct internal_sc {
+       u_int64_t       sm1;    /* scaled slope of the 1st segment */
+       u_int64_t       ism1;   /* scaled inverse-slope of the 1st segment */
+       u_int64_t       dx;     /* the x-projection of the 1st segment */
+       u_int64_t       dy;     /* the y-projection of the 1st segment */
+       u_int64_t       sm2;    /* scaled slope of the 2nd segment */
+       u_int64_t       ism2;   /* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc {
+       u_int64_t       x;      /* current starting position on x-axis */
+       u_int64_t       y;      /* current starting position on x-axis */
+       u_int64_t       sm1;    /* scaled slope of the 1st segment */
+       u_int64_t       ism1;   /* scaled inverse-slope of the 1st segment */
+       u_int64_t       dx;     /* the x-projection of the 1st segment */
+       u_int64_t       dy;     /* the y-projection of the 1st segment */
+       u_int64_t       sm2;    /* scaled slope of the 2nd segment */
+       u_int64_t       ism2;   /* scaled inverse-slope of the 2nd segment */
+};
+
+/* for TAILQ based ellist and actlist implementation */
+struct hfsc_class;
+typedef TAILQ_HEAD(_eligible, hfsc_class) ellist_t;
+typedef TAILQ_ENTRY(hfsc_class) elentry_t;
+typedef TAILQ_HEAD(_active, hfsc_class) actlist_t;
+typedef TAILQ_ENTRY(hfsc_class) actentry_t;
+#define        ellist_first(s)         TAILQ_FIRST(s)
+#define        actlist_first(s)        TAILQ_FIRST(s)
+#define        actlist_last(s)         TAILQ_LAST(s, _active)
+
+struct hfsc_class {
+       u_int32_t       cl_id;          /* class id (just for debug) */
+       u_int32_t       cl_handle;      /* class handle */
+       struct hfsc_if  *cl_hif;        /* back pointer to struct hfsc_if */
+       u_int32_t       cl_flags;       /* misc flags */
+
+       struct hfsc_class *cl_parent;   /* parent class */
+       struct hfsc_class *cl_siblings; /* sibling classes */
+       struct hfsc_class *cl_children; /* child classes */
+
+       class_queue_t   cl_q;           /* class queue structure */
+       u_int32_t       cl_qflags;      /* class queue flags */
+       union {
+               void            *ptr;
+               struct red      *red;   /* RED state */
+               struct rio      *rio;   /* RIO state */
+               struct blue     *blue;  /* BLUE state */
+               struct sfb      *sfb;   /* SFB state */
+       } cl_qalg;
+
+       u_int64_t       cl_total;       /* total work in bytes */
+       u_int64_t       cl_cumul;       /* cumulative work in bytes */
+                                       /*   done by real-time criteria */
+       u_int64_t       cl_d;           /* deadline */
+       u_int64_t       cl_e;           /* eligible time */
+       u_int64_t       cl_vt;          /* virtual time */
+       u_int64_t       cl_f;           /* time when this class will fit for */
+                                       /*   link-sharing, max(myf, cfmin) */
+       u_int64_t       cl_myf;         /* my fit-time (as calculated from */
+                                       /*   this class's own upperlimit */
+                                       /*   curve) */
+       u_int64_t       cl_myfadj;      /* my fit-time adjustment */
+                                       /*   (to cancel history dependence) */
+       u_int64_t       cl_cfmin;       /* earliest children's fit-time (used */
+                                       /*   with cl_myf to obtain cl_f) */
+       u_int64_t       cl_cvtmin;      /* minimal virtual time among the */
+                                       /*   children fit for link-sharing */
+                                       /*   (monotonic within a period) */
+       u_int64_t       cl_vtadj;       /* intra-period cumulative vt */
+                                       /*   adjustment */
+       u_int64_t       cl_vtoff;       /* inter-period cumulative vt offset */
+       u_int64_t       cl_cvtmax;      /* max child's vt in the last period */
+
+       u_int64_t       cl_initvt;      /* init virtual time (for debugging) */
+
+       struct service_curve cl_rsc0;   /* external real-time service curve */
+       struct service_curve cl_fsc0;   /* external fair service curve */
+       struct service_curve cl_usc0;   /* external uppperlimit service curve */
+       struct internal_sc cl_rsc;      /* internal real-time service curve */
+       struct internal_sc cl_fsc;      /* internal fair service curve */
+       struct internal_sc cl_usc;      /* internal upperlimit service curve */
+       struct runtime_sc  cl_deadline; /* deadline curve */
+       struct runtime_sc  cl_eligible; /* eligible curve */
+       struct runtime_sc  cl_virtual;  /* virtual curve */
+       struct runtime_sc  cl_ulimit;   /* upperlimit curve */
+
+       u_int32_t       cl_vtperiod;    /* vt period sequence no */
+       u_int32_t       cl_parentperiod;  /* parent's vt period seqno */
+       u_int32_t       cl_nactive;     /* number of active children */
+       actlist_t       cl_actc;        /* active children list */
+
+       actentry_t      cl_actlist;     /* active children list entry */
+       elentry_t       cl_ellist;      /* eligible list entry */
+
+       struct {
+               struct pktcntr  xmit_cnt;
+               struct pktcntr  drop_cnt;
+               u_int32_t period;
+       } cl_stats;
+};
+
+#define        cl_red  cl_qalg.red
+#define        cl_rio  cl_qalg.rio
+#define        cl_blue cl_qalg.blue
+#define        cl_sfb  cl_qalg.sfb
+
+/* hfsc_if flags */
+#define        HFSCIFF_ALTQ            0x1     /* configured via PF/ALTQ */
+
+/*
+ * hfsc interface state
+ */
+struct hfsc_if {
+       struct ifclassq         *hif_ifq;       /* backpointer to ifclassq */
+       struct hfsc_class       *hif_rootclass;         /* root class */
+       struct hfsc_class       *hif_defaultclass;      /* default class */
+       struct hfsc_class       **hif_class_tbl;
+       struct hfsc_class       *hif_pollcache; /* cache for poll operation */
+
+       u_int32_t               hif_flags;      /* flags */
+       u_int32_t               hif_maxclasses; /* max # of classes in table */
+       u_int32_t               hif_classes;    /* # of classes in the tree */
+       u_int32_t               hif_packets;    /* # of packets in the tree */
+       u_int32_t               hif_classid;    /* class id sequence number */
+       u_int64_t               hif_eff_rate;   /* last known effective rate */
+
+       ellist_t hif_eligible;                  /* eligible list */
+};
+
+#define        HFSCIF_IFP(_hif)        ((_hif)->hif_ifq->ifcq_ifp)
+
+extern void hfsc_init(void);
+extern struct hfsc_if *hfsc_alloc(struct ifnet *, int, boolean_t);
+extern int hfsc_destroy(struct hfsc_if *);
+extern void hfsc_purge(struct hfsc_if *);
+extern void hfsc_event(struct hfsc_if *, cqev_t);
+extern int hfsc_add_queue(struct hfsc_if *, struct service_curve *,
+    struct service_curve *, struct service_curve *, u_int32_t, int,
+    u_int32_t, u_int32_t, struct hfsc_class **);
+extern int hfsc_remove_queue(struct hfsc_if *, u_int32_t);
+extern int hfsc_get_class_stats(struct hfsc_if *, u_int32_t,
+    struct hfsc_classstats *);
+extern int hfsc_enqueue(struct hfsc_if *, struct hfsc_class *,
+    struct mbuf *, struct pf_mtag *);
+extern struct mbuf *hfsc_dequeue(struct hfsc_if *, cqdq_op_t);
+extern int hfsc_setup_ifclassq(struct ifclassq *, u_int32_t);
+extern int hfsc_teardown_ifclassq(struct ifclassq *);
+extern int hfsc_getqstats_ifclassq(struct ifclassq *, u_int32_t,
+    struct if_ifclassq_stats *);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_PKTSCHED_PKTSCHED_HFSC_H_ */
diff --git a/bsd/net/pktsched/pktsched_priq.c b/bsd/net/pktsched/pktsched_priq.c
new file mode 100644 (file)
index 0000000..c3a6f5e
--- /dev/null
@@ -0,0 +1,1275 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_priq.c,v 1.21 2007/09/13 20:40:02 chl Exp $      */
+/*     $KAME: altq_priq.c,v 1.1 2000/10/18 09:15:23 kjc Exp $  */
+
+/*
+ * Copyright (C) 2000-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * priority queue
+ */
+
+#if PKTSCHED_PRIQ
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+
+#include <net/pktsched/pktsched_priq.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+static int priq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
+static struct mbuf *priq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
+static int priq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
+static int priq_clear_interface(struct priq_if *);
+static struct priq_class *priq_class_create(struct priq_if *, int, u_int32_t,
+    int, u_int32_t);
+static int priq_class_destroy(struct priq_if *, struct priq_class *);
+static int priq_destroy_locked(struct priq_if *);
+static inline int priq_addq(struct priq_class *, struct mbuf *,
+    struct pf_mtag *);
+static inline struct mbuf *priq_getq(struct priq_class *);
+static inline struct mbuf *priq_pollq(struct priq_class *);
+static void priq_purgeq(struct priq_if *, struct priq_class *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+static void priq_purge_sc(struct priq_if *, cqrq_purge_sc_t *);
+static void priq_updateq(struct priq_if *, struct priq_class *, cqev_t);
+static int priq_throttle(struct priq_if *, cqrq_throttle_t *);
+static int priq_resumeq(struct priq_if *, struct priq_class *);
+static int priq_suspendq(struct priq_if *, struct priq_class *);
+static inline struct priq_class *priq_clh_to_clp(struct priq_if *, u_int32_t);
+static const char *priq_style(struct priq_if *);
+
+#define        PRIQ_ZONE_MAX   32              /* maximum elements in zone */
+#define        PRIQ_ZONE_NAME  "pktsched_priq" /* zone name */
+
+static unsigned int priq_size;         /* size of zone element */
+static struct zone *priq_zone;         /* zone for priq */
+
+#define        PRIQ_CL_ZONE_MAX        32      /* maximum elements in zone */
+#define        PRIQ_CL_ZONE_NAME       "pktsched_priq_cl" /* zone name */
+
+static unsigned int priq_cl_size;      /* size of zone element */
+static struct zone *priq_cl_zone;      /* zone for priq_class */
+
+void
+priq_init(void)
+{
+       priq_size = sizeof (struct priq_if);
+       priq_zone = zinit(priq_size, PRIQ_ZONE_MAX * priq_size,
+           0, PRIQ_ZONE_NAME);
+       if (priq_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, PRIQ_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(priq_zone, Z_EXPAND, TRUE);
+       zone_change(priq_zone, Z_CALLERACCT, TRUE);
+
+       priq_cl_size = sizeof (struct priq_class);
+       priq_cl_zone = zinit(priq_cl_size, PRIQ_CL_ZONE_MAX * priq_cl_size,
+           0, PRIQ_CL_ZONE_NAME);
+       if (priq_cl_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, PRIQ_CL_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(priq_cl_zone, Z_EXPAND, TRUE);
+       zone_change(priq_cl_zone, Z_CALLERACCT, TRUE);
+}
+
+struct priq_if *
+priq_alloc(struct ifnet *ifp, int how, boolean_t altq)
+{
+       struct priq_if  *pif;
+
+       pif = (how == M_WAITOK) ? zalloc(priq_zone) : zalloc_noblock(priq_zone);
+       if (pif == NULL)
+               return (NULL);
+
+       bzero(pif, priq_size);
+       pif->pif_maxpri = -1;
+       pif->pif_ifq = &ifp->if_snd;
+       if (altq)
+               pif->pif_flags |= PRIQIFF_ALTQ;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler allocated\n",
+                   if_name(ifp), priq_style(pif));
+       }
+
+       return (pif);
+}
+
+int
+priq_destroy(struct priq_if *pif)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       int err;
+
+       IFCQ_LOCK(ifq);
+       err = priq_destroy_locked(pif);
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+static int
+priq_destroy_locked(struct priq_if *pif)
+{
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       (void) priq_clear_interface(pif);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
+                   if_name(PRIQIF_IFP(pif)), priq_style(pif));
+       }
+
+       zfree(priq_zone, pif);
+
+       return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+priq_clear_interface(struct priq_if *pif)
+{
+       struct priq_class       *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       /* clear out the classes */
+       for (pri = 0; pri <= pif->pif_maxpri; pri++)
+               if ((cl = pif->pif_classes[pri]) != NULL)
+                       priq_class_destroy(pif, cl);
+
+       return (0);
+}
+
+/* discard all the queued packets on the interface */
+void
+priq_purge(struct priq_if *pif)
+{
+       struct priq_class *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+               if ((cl = pif->pif_classes[pri]) != NULL && !qempty(&cl->cl_q))
+                       priq_purgeq(pif, cl, 0, NULL, NULL);
+       }
+#if !PF_ALTQ
+       /*
+        * This assertion is safe to be made only when PF_ALTQ is not
+        * configured; otherwise, IFCQ_LEN represents the sum of the
+        * packets managed by ifcq_disc and altq_disc instances, which
+        * is possible when transitioning between the two.
+        */
+       VERIFY(IFCQ_LEN(pif->pif_ifq) == 0);
+#endif /* !PF_ALTQ */
+}
+
+static void
+priq_purge_sc(struct priq_if *pif, cqrq_purge_sc_t *pr)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
+       VERIFY(pr->flow != 0);
+
+       if (pr->sc != MBUF_SC_UNSPEC) {
+               i = MBUF_SCIDX(pr->sc);
+               VERIFY(i < IFCQ_SC_MAX);
+
+               priq_purgeq(pif, ifq->ifcq_disc_slots[i].cl,
+                   pr->flow, &pr->packets, &pr->bytes);
+       } else {
+               u_int32_t cnt, len;
+
+               pr->packets = 0;
+               pr->bytes = 0;
+
+               for (i = 0; i < IFCQ_SC_MAX; i++) {
+                       priq_purgeq(pif, ifq->ifcq_disc_slots[i].cl,
+                           pr->flow, &cnt, &len);
+                       pr->packets += cnt;
+                       pr->bytes += len;
+               }
+       }
+}
+
+void
+priq_event(struct priq_if *pif, cqev_t ev)
+{
+       struct priq_class *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       for (pri = 0; pri <= pif->pif_maxpri; pri++)
+               if ((cl = pif->pif_classes[pri]) != NULL)
+                       priq_updateq(pif, cl, ev);
+}
+
+int
+priq_add_queue(struct priq_if *pif, int priority, u_int32_t qlimit,
+    int flags, u_int32_t qid, struct priq_class **clp)
+{
+       struct priq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       /* check parameters */
+       if (priority >= PRIQ_MAXPRI)
+               return (EINVAL);
+       if (pif->pif_classes[priority] != NULL)
+               return (EBUSY);
+       if (priq_clh_to_clp(pif, qid) != NULL)
+               return (EBUSY);
+
+       cl = priq_class_create(pif, priority, qlimit, flags, qid);
+       if (cl == NULL)
+               return (ENOMEM);
+
+       if (clp != NULL)
+               *clp = cl;
+
+       return (0);
+}
+
+static struct priq_class *
+priq_class_create(struct priq_if *pif, int pri, u_int32_t qlimit,
+    int flags, u_int32_t qid)
+{
+       struct ifnet *ifp;
+       struct ifclassq *ifq;
+       struct priq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       /* Sanitize flags unless internally configured */
+       if (pif->pif_flags & PRIQIFF_ALTQ)
+               flags &= PRCF_USERFLAGS;
+
+#if !CLASSQ_RED
+       if (flags & PRCF_RED) {
+               log(LOG_ERR, "%s: %s RED not available!\n",
+                   if_name(PRIQIF_IFP(pif)), priq_style(pif));
+               return (NULL);
+       }
+#endif /* !CLASSQ_RED */
+
+#if !CLASSQ_RIO
+       if (flags & PRCF_RIO) {
+               log(LOG_ERR, "%s: %s RIO not available!\n",
+                   if_name(PRIQIF_IFP(pif)), priq_style(pif));
+               return (NULL);
+       }
+#endif /* CLASSQ_RIO */
+
+#if !CLASSQ_BLUE
+       if (flags & PRCF_BLUE) {
+               log(LOG_ERR, "%s: %s BLUE not available!\n",
+                   if_name(PRIQIF_IFP(pif)), priq_style(pif));
+               return (NULL);
+       }
+#endif /* CLASSQ_BLUE */
+
+       /* These are mutually exclusive */
+       if ((flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) &&
+           (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_RED &&
+           (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_RIO &&
+           (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_BLUE &&
+           (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_SFB) {
+               log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
+                   if_name(PRIQIF_IFP(pif)), priq_style(pif));
+               return (NULL);
+       }
+
+       ifq = pif->pif_ifq;
+       ifp = PRIQIF_IFP(pif);
+
+       if ((cl = pif->pif_classes[pri]) != NULL) {
+               /* modify the class instead of creating a new one */
+               if (!qempty(&cl->cl_q))
+                       priq_purgeq(pif, cl, 0, NULL, NULL);
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->cl_q))
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->cl_q))
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->cl_q))
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               qtype(&cl->cl_q) = Q_DROPTAIL;
+               qstate(&cl->cl_q) = QS_RUNNING;
+       } else {
+               cl = zalloc(priq_cl_zone);
+               if (cl == NULL)
+                       return (NULL);
+
+               bzero(cl, priq_cl_size);
+       }
+
+       pif->pif_classes[pri] = cl;
+       if (flags & PRCF_DEFAULTCLASS)
+               pif->pif_default = cl;
+       if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
+               qlimit = IFCQ_MAXLEN(ifq);
+               if (qlimit == 0)
+                       qlimit = DEFAULT_QLIMIT;  /* use default */
+       }
+       _qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
+       cl->cl_flags = flags;
+       cl->cl_pri = pri;
+       if (pri > pif->pif_maxpri)
+               pif->pif_maxpri = pri;
+       cl->cl_pif = pif;
+       cl->cl_handle = qid;
+
+       if (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) {
+#if CLASSQ_RED || CLASSQ_RIO
+               u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
+               int pkttime;
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+
+               cl->cl_qflags = 0;
+               if (flags & PRCF_ECN) {
+                       if (flags & PRCF_BLUE)
+                               cl->cl_qflags |= BLUEF_ECN;
+                       else if (flags & PRCF_SFB)
+                               cl->cl_qflags |= SFBF_ECN;
+                       else if (flags & PRCF_RED)
+                               cl->cl_qflags |= REDF_ECN;
+                       else if (flags & PRCF_RIO)
+                               cl->cl_qflags |= RIOF_ECN;
+               }
+               if (flags & PRCF_FLOWCTL) {
+                       if (flags & PRCF_SFB)
+                               cl->cl_qflags |= SFBF_FLOWCTL;
+               }
+               if (flags & PRCF_CLEARDSCP) {
+                       if (flags & PRCF_RIO)
+                               cl->cl_qflags |= RIOF_CLEARDSCP;
+               }
+#if CLASSQ_RED || CLASSQ_RIO
+               /*
+                * XXX: RED & RIO should be watching link speed and MTU
+                *      events and recompute pkttime accordingly.
+                */
+               if (ifbandwidth < 8)
+                       pkttime = 1000 * 1000 * 1000; /* 1 sec */
+               else
+                       pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
+                           (ifbandwidth / 8);
+
+               /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
+#if CLASSQ_RED
+               if (flags & PRCF_RED) {
+                       cl->cl_red = red_alloc(ifp, 0, 0,
+                           qlimit(&cl->cl_q) * 10/100,
+                           qlimit(&cl->cl_q) * 30/100,
+                           cl->cl_qflags, pkttime);
+                       if (cl->cl_red != NULL)
+                               qtype(&cl->cl_q) = Q_RED;
+               }
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+               if (flags & PRCF_RIO) {
+                       cl->cl_rio =
+                           rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
+                       if (cl->cl_rio != NULL)
+                               qtype(&cl->cl_q) = Q_RIO;
+               }
+#endif /* CLASSQ_RIO */
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+#if CLASSQ_BLUE
+               if (flags & PRCF_BLUE) {
+                       cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
+                       if (cl->cl_blue != NULL)
+                               qtype(&cl->cl_q) = Q_BLUE;
+               }
+#endif /* CLASSQ_BLUE */
+               if (flags & PRCF_SFB) {
+                       if (!(cl->cl_flags & PRCF_LAZY))
+                               cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                                   qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb != NULL || (cl->cl_flags & PRCF_LAZY))
+                               qtype(&cl->cl_q) = Q_SFB;
+               }
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s created qid=%d pri=%d qlimit=%d "
+                   "flags=%b\n", if_name(ifp), priq_style(pif),
+                   cl->cl_handle, cl->cl_pri, qlimit, flags, PRCF_BITS);
+       }
+
+       return (cl);
+}
+
+int
+priq_remove_queue(struct priq_if *pif, u_int32_t qid)
+{
+       struct priq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       if ((cl = priq_clh_to_clp(pif, qid)) == NULL)
+               return (EINVAL);
+
+       return (priq_class_destroy(pif, cl));
+}
+
+static int
+priq_class_destroy(struct priq_if *pif, struct priq_class *cl)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!qempty(&cl->cl_q))
+               priq_purgeq(pif, cl, 0, NULL, NULL);
+
+       VERIFY(cl->cl_pri < PRIQ_MAXPRI);
+       VERIFY(!pktsched_bit_tst(cl->cl_pri, &pif->pif_bitmap));
+
+       pif->pif_classes[cl->cl_pri] = NULL;
+       if (pif->pif_maxpri == cl->cl_pri) {
+               for (pri = cl->cl_pri; pri >= 0; pri--)
+                       if (pif->pif_classes[pri] != NULL) {
+                               pif->pif_maxpri = pri;
+                               break;
+                       }
+               if (pri < 0)
+                       pif->pif_maxpri = -1;
+       }
+
+       if (pif->pif_default == cl)
+               pif->pif_default = NULL;
+
+       if (cl->cl_qalg.ptr != NULL) {
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->cl_q))
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->cl_q))
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->cl_q))
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               qtype(&cl->cl_q) = Q_DROPTAIL;
+               qstate(&cl->cl_q) = QS_RUNNING;
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s destroyed qid=%d pri=%d\n",
+                   if_name(PRIQIF_IFP(pif)), priq_style(pif),
+                   cl->cl_handle, cl->cl_pri);
+       }
+
+       zfree(priq_cl_zone, cl);
+
+       return (0);
+}
+
+int
+priq_enqueue(struct priq_if *pif, struct priq_class *cl, struct mbuf *m,
+    struct pf_mtag *t)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       u_int32_t pri;
+       int len, ret;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(cl == NULL || cl->cl_pif == pif);
+
+       if (cl == NULL) {
+               cl = priq_clh_to_clp(pif, t->pftag_qid);
+               if (cl == NULL) {
+                       cl = pif->pif_default;
+                       if (cl == NULL) {
+                               IFCQ_CONVERT_LOCK(ifq);
+                               m_freem(m);
+                               return (ENOBUFS);
+                       }
+               }
+       }
+       pri = cl->cl_pri;
+       VERIFY(pri < PRIQ_MAXPRI);
+
+       len = m_pktlen(m);
+
+       ret = priq_addq(cl, m, t);
+       if (ret != 0) {
+               if (ret == CLASSQEQ_SUCCESS_FC) {
+                       /* packet enqueued, return advisory feedback */
+                       ret = EQFULL;
+               } else {
+                       VERIFY(ret == CLASSQEQ_DROPPED ||
+                           ret == CLASSQEQ_DROPPED_FC ||
+                           ret == CLASSQEQ_DROPPED_SP);
+                       /* packet has been freed in priq_addq */
+                       PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
+                       IFCQ_DROP_ADD(ifq, 1, len);
+                       switch (ret) {
+                       case CLASSQEQ_DROPPED:
+                               return (ENOBUFS);
+                       case CLASSQEQ_DROPPED_FC:
+                               return (EQFULL);
+                       case CLASSQEQ_DROPPED_SP:
+                               return (EQSUSPENDED);
+                       }
+                       /* NOT REACHED */
+               }
+       }
+       IFCQ_INC_LEN(ifq);
+
+       /* class is now active; indicate it as such */
+       if (!pktsched_bit_tst(pri, &pif->pif_bitmap))
+               pktsched_bit_set(pri, &pif->pif_bitmap);
+
+       /* successfully queued. */
+       return (ret);
+}
+
+/*
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+struct mbuf *
+priq_dequeue(struct priq_if *pif, cqdq_op_t op)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       struct priq_class *cl;
+       struct mbuf *m;
+       u_int32_t pri, len;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (pif->pif_bitmap == 0) {
+               /* no active class; nothing to dequeue */
+               return (NULL);
+       }
+       VERIFY(!IFCQ_IS_EMPTY(ifq));
+
+       pri = pktsched_fls(pif->pif_bitmap) - 1;        /* zero based */
+       VERIFY(pri < PRIQ_MAXPRI);
+       cl = pif->pif_classes[pri];
+       VERIFY(cl != NULL && !qempty(&cl->cl_q));
+
+       if (op == CLASSQDQ_POLL)
+               return (priq_pollq(cl));
+
+       m = priq_getq(cl);
+       VERIFY(m != NULL);      /* qalg must be work conserving */
+       len = m_pktlen(m);
+
+       IFCQ_DEC_LEN(ifq);
+       if (qempty(&cl->cl_q)) {
+               cl->cl_period++;
+               /* class is now inactive; indicate it as such */
+               pktsched_bit_clr(pri, &pif->pif_bitmap);
+       }
+       PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
+       IFCQ_XMIT_ADD(ifq, 1, len);
+
+       return (m);
+}
+
+static inline int
+priq_addq(struct priq_class *cl, struct mbuf *m, struct pf_mtag *t)
+{
+       struct priq_if *pif = cl->cl_pif;
+       struct ifclassq *ifq = pif->pif_ifq;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_addq(cl->cl_red, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q)) {
+               if (cl->cl_sfb == NULL) {
+                       struct ifnet *ifp = PRIQIF_IFP(pif);
+
+                       VERIFY(cl->cl_flags & PRCF_LAZY);
+                       cl->cl_flags &= ~PRCF_LAZY;
+                       IFCQ_CONVERT_LOCK(ifq);
+
+                       cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                           qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb == NULL) {
+                               /* fall back to droptail */
+                               qtype(&cl->cl_q) = Q_DROPTAIL;
+                               cl->cl_flags &= ~PRCF_SFB;
+                               cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
+
+                               log(LOG_ERR, "%s: %s SFB lazy allocation "
+                                   "failed for qid=%d pri=%d, falling back "
+                                   "to DROPTAIL\n", if_name(ifp),
+                                   priq_style(pif), cl->cl_handle,
+                                   cl->cl_pri);
+                       } else if (pif->pif_throttle != IFNET_THROTTLE_OFF) {
+                               /* if there's pending throttling, set it */
+                               cqrq_throttle_t tr = { 1, pif->pif_throttle };
+                               int err = priq_throttle(pif, &tr);
+
+                               if (err == EALREADY)
+                                       err = 0;
+                               if (err != 0) {
+                                       tr.level = IFNET_THROTTLE_OFF;
+                                       (void) priq_throttle(pif, &tr);
+                               }
+                       }
+               }
+               if (cl->cl_sfb != NULL)
+                       return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
+       } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+
+       if (cl->cl_flags & PRCF_CLEARDSCP)
+               write_dsfield(m, t, 0);
+
+       _addq(&cl->cl_q, m);
+
+       return (0);
+}
+
+static inline struct mbuf *
+priq_getq(struct priq_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_pif->pif_ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_getq(cl->cl_rio, &cl->cl_q));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_getq(cl->cl_red, &cl->cl_q));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_getq(cl->cl_blue, &cl->cl_q));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_getq(cl->cl_sfb, &cl->cl_q));
+
+       return (_getq(&cl->cl_q));
+}
+
+static inline struct mbuf *
+priq_pollq(struct priq_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_pif->pif_ifq);
+
+       return (qhead(&cl->cl_q));
+}
+
+static void
+priq_purgeq(struct priq_if *pif, struct priq_class *cl, u_int32_t flow,
+    u_int32_t *packets, u_int32_t *bytes)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       u_int32_t cnt = 0, len = 0, qlen;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if ((qlen = qlen(&cl->cl_q)) == 0) {
+               VERIFY(!pktsched_bit_tst(cl->cl_pri, &pif->pif_bitmap));
+               goto done;
+       }
+
+       /* become regular mutex before freeing mbufs */
+       IFCQ_CONVERT_LOCK(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
+       else
+               _flushq_flow(&cl->cl_q, flow, &cnt, &len);
+
+       if (cnt > 0) {
+               VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
+
+               PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
+               IFCQ_DROP_ADD(ifq, cnt, len);
+
+               VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
+               IFCQ_LEN(ifq) -= cnt;
+
+               if (qempty(&cl->cl_q))
+                       pktsched_bit_clr(cl->cl_pri, &pif->pif_bitmap);
+
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s purge qid=%d pri=%d "
+                           "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
+                           if_name(PRIQIF_IFP(pif)), priq_style(pif),
+                           cl->cl_handle, cl->cl_pri, qlen, qlen(&cl->cl_q),
+                           cnt, len, flow);
+               }
+       }
+done:
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+static void
+priq_updateq(struct priq_if *pif, struct priq_class *cl, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s update qid=%d pri=%d event=%s\n",
+                   if_name(PRIQIF_IFP(pif)), priq_style(pif),
+                   cl->cl_handle, cl->cl_pri, ifclassq_ev2str(ev));
+       }
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_updateq(cl->cl_rio, ev));
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_updateq(cl->cl_red, ev));
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_updateq(cl->cl_blue, ev));
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_updateq(cl->cl_sfb, ev));
+}
+
+int
+priq_get_class_stats(struct priq_if *pif, u_int32_t qid,
+    struct priq_classstats *sp)
+{
+       struct priq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       if ((cl = priq_clh_to_clp(pif, qid)) == NULL)
+               return (EINVAL);
+
+       sp->class_handle = cl->cl_handle;
+       sp->priority = cl->cl_pri;
+       sp->qlength = qlen(&cl->cl_q);
+       sp->qlimit = qlimit(&cl->cl_q);
+       sp->period = cl->cl_period;
+       sp->xmitcnt = cl->cl_xmitcnt;
+       sp->dropcnt = cl->cl_dropcnt;
+
+       sp->qtype = qtype(&cl->cl_q);
+       sp->qstate = qstate(&cl->cl_q);
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_getstats(cl->cl_red, &sp->red[0]);
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_getstats(cl->cl_rio, &sp->red[0]);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_getstats(cl->cl_blue, &sp->blue);
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_getstats(cl->cl_sfb, &sp->sfb);
+
+       return (0);
+}
+
+/* convert a class handle to the corresponding class pointer */
+static inline struct priq_class *
+priq_clh_to_clp(struct priq_if *pif, u_int32_t chandle)
+{
+       struct priq_class *cl;
+       int idx;
+
+       IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
+
+       for (idx = pif->pif_maxpri; idx >= 0; idx--)
+               if ((cl = pif->pif_classes[idx]) != NULL &&
+                   cl->cl_handle == chandle)
+                       return (cl);
+
+       return (NULL);
+}
+
+static const char *
+priq_style(struct priq_if *pif)
+{
+       return ((pif->pif_flags & PRIQIFF_ALTQ) ? "ALTQ_PRIQ" : "PRIQ");
+}
+
+/*
+ * priq_enqueue_ifclassq is an enqueue function to be registered to
+ * (*ifcq_enqueue) in struct ifclassq.
+ */
+static int
+priq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
+{
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               log(LOG_ERR, "%s: packet does not have pkthdr\n",
+                   if_name(ifq->ifcq_ifp));
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       i = MBUF_SCIDX(mbuf_get_service_class(m));
+       VERIFY((u_int32_t)i < IFCQ_SC_MAX);
+
+       return (priq_enqueue(ifq->ifcq_disc,
+           ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
+}
+
+/*
+ * priq_dequeue_ifclassq is a dequeue function to be registered to
+ * (*ifcq_dequeue) in struct ifclass.
+ *
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+static struct mbuf *
+priq_dequeue_ifclassq(struct ifclassq *ifq, cqdq_op_t op)
+{
+       return (priq_dequeue(ifq->ifcq_disc, op));
+}
+
+static int
+priq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
+{
+       struct priq_if *pif = (struct priq_if *)ifq->ifcq_disc;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       switch (req) {
+       case CLASSQRQ_PURGE:
+               priq_purge(pif);
+               break;
+
+       case CLASSQRQ_PURGE_SC:
+               priq_purge_sc(pif, (cqrq_purge_sc_t *)arg);
+               break;
+
+       case CLASSQRQ_EVENT:
+               priq_event(pif, (cqev_t)arg);
+               break;
+
+       case CLASSQRQ_THROTTLE:
+               err = priq_throttle(pif, (cqrq_throttle_t *)arg);
+               break;
+       }
+       return (err);
+}
+
+int
+priq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       struct priq_class *cl0, *cl1, *cl2, *cl3, *cl4;
+       struct priq_class *cl5, *cl6, *cl7, *cl8, *cl9;
+       struct priq_if *pif;
+       u_int32_t maxlen = 0, qflags = 0;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+
+       if (flags & PKTSCHEDF_QALG_RED)
+               qflags |= PRCF_RED;
+       if (flags & PKTSCHEDF_QALG_RIO)
+               qflags |= PRCF_RIO;
+       if (flags & PKTSCHEDF_QALG_BLUE)
+               qflags |= PRCF_BLUE;
+       if (flags & PKTSCHEDF_QALG_SFB)
+               qflags |= PRCF_SFB;
+       if (flags & PKTSCHEDF_QALG_ECN)
+               qflags |= PRCF_ECN;
+       if (flags & PKTSCHEDF_QALG_FLOWCTL)
+               qflags |= PRCF_FLOWCTL;
+
+       pif = priq_alloc(ifp, M_WAITOK, FALSE);
+       if (pif == NULL)
+               return (ENOMEM);
+
+       if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
+               maxlen = if_sndq_maxlen;
+
+       if ((err = priq_add_queue(pif, 0, maxlen,
+           qflags | PRCF_LAZY, SCIDX_BK_SYS, &cl0)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 1, maxlen,
+           qflags | PRCF_LAZY, SCIDX_BK, &cl1)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 2, maxlen,
+           qflags | PRCF_DEFAULTCLASS, SCIDX_BE, &cl2)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 3, maxlen,
+           qflags | PRCF_LAZY, SCIDX_RD, &cl3)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 4, maxlen,
+           qflags | PRCF_LAZY, SCIDX_OAM, &cl4)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 5, maxlen,
+           qflags | PRCF_LAZY, SCIDX_AV, &cl5)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 6, maxlen,
+           qflags | PRCF_LAZY, SCIDX_RV, &cl6)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 7, maxlen,
+           qflags | PRCF_LAZY, SCIDX_VI, &cl7)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 8, maxlen,
+           qflags | PRCF_LAZY, SCIDX_VO, &cl8)) != 0)
+               goto cleanup;
+
+       if ((err = priq_add_queue(pif, 9, maxlen,
+           qflags, SCIDX_CTL, &cl9)) != 0)
+               goto cleanup;
+
+       err = ifclassq_attach(ifq, PKTSCHEDT_PRIQ, pif,
+           priq_enqueue_ifclassq, priq_dequeue_ifclassq, NULL,
+           priq_request_ifclassq);
+
+       /* cache these for faster lookup */
+       if (err == 0) {
+               ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS;
+               ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
+
+               ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
+               ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1;
+
+               ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
+               ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2;
+
+               ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD;
+               ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3;
+
+               ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM;
+               ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4;
+
+               ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV;
+               ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5;
+
+               ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV;
+               ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6;
+
+               ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
+               ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7;
+
+               ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
+               ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8;
+
+               ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL;
+               ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9;
+       }
+
+cleanup:
+       if (err != 0)
+               (void) priq_destroy_locked(pif);
+
+       return (err);
+}
+
+int
+priq_teardown_ifclassq(struct ifclassq *ifq)
+{
+       struct priq_if *pif = ifq->ifcq_disc;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(pif != NULL && ifq->ifcq_type == PKTSCHEDT_PRIQ);
+
+       (void) priq_destroy_locked(pif);
+
+       ifq->ifcq_disc = NULL;
+       for (i = 0; i < IFCQ_SC_MAX; i++) {
+               ifq->ifcq_disc_slots[i].qid = 0;
+               ifq->ifcq_disc_slots[i].cl = NULL;
+       }
+
+       return (ifclassq_detach(ifq));
+}
+
+int
+priq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
+    struct if_ifclassq_stats *ifqs)
+{
+       struct priq_if *pif = ifq->ifcq_disc;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_PRIQ);
+
+       if (slot >= IFCQ_SC_MAX)
+               return (EINVAL);
+
+       return (priq_get_class_stats(pif, ifq->ifcq_disc_slots[slot].qid,
+           &ifqs->ifqs_priq_stats));
+}
+
+static int
+priq_throttle(struct priq_if *pif, cqrq_throttle_t *tr)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       struct priq_class *cl;
+       int err;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(!(pif->pif_flags & PRIQIFF_ALTQ));
+
+       if (!tr->set) {
+               tr->level = pif->pif_throttle;
+               return (0);
+       }
+
+       if (tr->level == pif->pif_throttle)
+               return (EALREADY);
+
+       /* Current throttling levels only involve BK_SYS class */
+       cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
+
+       switch (tr->level) {
+       case IFNET_THROTTLE_OFF:
+               err = priq_resumeq(pif, cl);
+               break;
+
+       case IFNET_THROTTLE_OPPORTUNISTIC:
+               err = priq_suspendq(pif, cl);
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       if (err == 0 || err == ENXIO) {
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n",
+                           if_name(PRIQIF_IFP(pif)), priq_style(pif),
+                           (err == 0) ? "" : "lazy ", pif->pif_throttle,
+                           tr->level);
+               }
+               pif->pif_throttle = tr->level;
+               if (err != 0)
+                       err = 0;
+               else
+                       priq_purgeq(pif, cl, 0, NULL, NULL);
+       } else {
+               log(LOG_ERR, "%s: %s unable to set throttling level "
+                   "%d->%d [error=%d]\n", if_name(PRIQIF_IFP(pif)),
+                   priq_style(pif), pif->pif_throttle, tr->level, err);
+       }
+
+       return (err);
+}
+
+static int
+priq_resumeq(struct priq_if *pif, struct priq_class *cl)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
+
+       if (err == 0)
+               qstate(&cl->cl_q) = QS_RUNNING;
+
+       return (err);
+}
+
+static int
+priq_suspendq(struct priq_if *pif, struct priq_class *cl)
+{
+       struct ifclassq *ifq = pif->pif_ifq;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q)) {
+               if (cl->cl_sfb != NULL) {
+                       err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
+               } else {
+                       VERIFY(cl->cl_flags & PRCF_LAZY);
+                       err = ENXIO;    /* delayed throttling */
+               }
+       }
+
+       if (err == 0 || err == ENXIO)
+               qstate(&cl->cl_q) = QS_SUSPENDED;
+
+       return (err);
+}
+#endif /* PKTSCHED_PRIQ */
diff --git a/bsd/net/pktsched/pktsched_priq.h b/bsd/net/pktsched/pktsched_priq.h
new file mode 100644 (file)
index 0000000..4dc9b74
--- /dev/null
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $NetBSD: altq_priq.h,v 1.7 2006/10/12 19:59:08 peter Exp $      */
+/*     $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $  */
+/*
+ * Copyright (C) 2000-2003
+ *     Sony Computer Science Laboratories Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_PKTSCHED_PKTSCHED_PRIQ_H_
+#define        _NET_PKTSCHED_PKTSCHED_PRIQ_H_
+
+#ifdef PRIVATE
+#include <net/pktsched/pktsched.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        PRIQ_MAXPRI     16      /* upper limit of the number of priorities */
+
+/* priq class flags */
+#define        PRCF_RED                0x0001  /* use RED */
+#define        PRCF_ECN                0x0002  /* use ECN with RED/BLUE/SFB */
+#define        PRCF_RIO                0x0004  /* use RIO */
+#define        PRCF_CLEARDSCP          0x0010  /* clear diffserv codepoint */
+#define        PRCF_BLUE               0x0100  /* use BLUE */
+#define        PRCF_SFB                0x0200  /* use SFB */
+#define        PRCF_FLOWCTL            0x0400  /* enable flow control advisories */
+#define        PRCF_DEFAULTCLASS       0x1000  /* default class */
+#ifdef BSD_KERNEL_PRIVATE
+#define        PRCF_LAZY               0x10000000 /* on-demand resource allocation */
+#endif /* BSD_KERNEL_PRIVATE */
+
+#define        PRCF_USERFLAGS                                                  \
+       (PRCF_RED | PRCF_ECN | PRCF_RIO | PRCF_CLEARDSCP | PRCF_BLUE |  \
+       PRCF_SFB | PRCF_FLOWCTL | PRCF_DEFAULTCLASS)
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        PRCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
+       "\35LAZY"
+#else
+#define        PRCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT"
+#endif /* !BSD_KERNEL_PRIVATE */
+
+struct priq_classstats {
+       u_int32_t               class_handle;
+       u_int32_t               priority;
+
+       u_int32_t               qlength;
+       u_int32_t               qlimit;
+       u_int32_t               period;
+       struct pktcntr          xmitcnt;  /* transmitted packet counter */
+       struct pktcntr          dropcnt;  /* dropped packet counter */
+
+       /* RED, RIO, BLUE, SFB related info */
+       classq_type_t           qtype;
+       union {
+               /* RIO has 3 red stats */
+               struct red_stats        red[RIO_NDROPPREC];
+               struct blue_stats       blue;
+               struct sfb_stats        sfb;
+       };
+       classq_state_t          qstate;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+struct priq_class {
+       u_int32_t       cl_handle;      /* class handle */
+       class_queue_t   cl_q;           /* class queue structure */
+       u_int32_t       cl_qflags;      /* class queue flags */
+       union {
+               void            *ptr;
+               struct red      *red;   /* RED state */
+               struct rio      *rio;   /* RIO state */
+               struct blue     *blue;  /* BLUE state */
+               struct sfb      *sfb;   /* SFB state */
+       } cl_qalg;
+       int32_t         cl_pri;         /* priority */
+       u_int32_t       cl_flags;       /* class flags */
+       struct priq_if  *cl_pif;        /* back pointer to pif */
+
+       /* statistics */
+       u_int32_t       cl_period;      /* backlog period */
+       struct pktcntr  cl_xmitcnt;     /* transmitted packet counter */
+       struct pktcntr  cl_dropcnt;     /* dropped packet counter */
+};
+
+#define        cl_red  cl_qalg.red
+#define        cl_rio  cl_qalg.rio
+#define        cl_blue cl_qalg.blue
+#define        cl_sfb  cl_qalg.sfb
+
+/* priq_if flags */
+#define        PRIQIFF_ALTQ            0x1     /* configured via PF/ALTQ */
+
+/*
+ * priq interface state
+ */
+struct priq_if {
+       struct ifclassq         *pif_ifq;       /* backpointer to ifclassq */
+       int                     pif_maxpri;     /* max priority in use */
+       u_int32_t               pif_flags;      /* flags */
+       u_int32_t               pif_throttle;   /* throttling level */
+       pktsched_bitmap_t       pif_bitmap;     /* active class bitmap */
+       struct priq_class       *pif_default;   /* default class */
+       struct priq_class       *pif_classes[PRIQ_MAXPRI]; /* classes */
+};
+
+#define        PRIQIF_IFP(_pif)        ((_pif)->pif_ifq->ifcq_ifp)
+
+struct if_ifclassq_stats;
+
+extern void priq_init(void);
+extern struct priq_if *priq_alloc(struct ifnet *, int, boolean_t);
+extern int priq_destroy(struct priq_if *);
+extern void priq_purge(struct priq_if *);
+extern void priq_event(struct priq_if *, cqev_t);
+extern int priq_add_queue(struct priq_if *, int, u_int32_t, int, u_int32_t,
+    struct priq_class **);
+extern int priq_remove_queue(struct priq_if *, u_int32_t);
+extern int priq_get_class_stats(struct priq_if *, u_int32_t,
+    struct priq_classstats *);
+extern int priq_enqueue(struct priq_if *, struct priq_class *, struct mbuf *,
+    struct pf_mtag *);
+extern struct mbuf *priq_dequeue(struct priq_if *, cqdq_op_t);
+extern int priq_setup_ifclassq(struct ifclassq *, u_int32_t);
+extern int priq_teardown_ifclassq(struct ifclassq *ifq);
+extern int priq_getqstats_ifclassq(struct ifclassq *, u_int32_t,
+    struct if_ifclassq_stats *);
+extern int priq_set_throttle(struct ifclassq *, u_int32_t);
+extern int priq_get_throttle(struct ifclassq *, u_int32_t *);
+#endif /* BSD_KERNEL_PRIVATE */
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_PKTSCHED_PKTSCHED_PRIQ_H_ */
diff --git a/bsd/net/pktsched/pktsched_qfq.c b/bsd/net/pktsched/pktsched_qfq.c
new file mode 100644 (file)
index 0000000..d7cca36
--- /dev/null
@@ -0,0 +1,2034 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Quick Fair Queueing is described in
+ * "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution
+ * Guarantees" by Fabio Checconi, Paolo Valente, and Luigi Rizzo.
+ *
+ * This code is ported from the dummynet(4) QFQ implementation.
+ * See also http://info.iet.unipi.it/~luigi/qfq/
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+
+#include <net/pktsched/pktsched_qfq.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+static int qfq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
+static struct mbuf *qfq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
+static int qfq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
+static int qfq_clear_interface(struct qfq_if *);
+static struct qfq_class *qfq_class_create(struct qfq_if *, u_int32_t,
+    u_int32_t, u_int32_t, u_int32_t, u_int32_t);
+static int qfq_class_destroy(struct qfq_if *, struct qfq_class *);
+static int qfq_destroy_locked(struct qfq_if *);
+static inline int qfq_addq(struct qfq_class *, struct mbuf *, struct pf_mtag *);
+static inline struct mbuf *qfq_getq(struct qfq_class *);
+static inline struct mbuf *qfq_pollq(struct qfq_class *);
+static void qfq_purgeq(struct qfq_if *, struct qfq_class *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+static void qfq_purge_sc(struct qfq_if *, cqrq_purge_sc_t *);
+static void qfq_updateq(struct qfq_if *, struct qfq_class *, cqev_t);
+static int qfq_throttle(struct qfq_if *, cqrq_throttle_t *);
+static int qfq_resumeq(struct qfq_if *, struct qfq_class *);
+static int qfq_suspendq(struct qfq_if *, struct qfq_class *);
+static inline struct qfq_class *qfq_clh_to_clp(struct qfq_if *, u_int32_t);
+static const char *qfq_style(struct qfq_if *);
+
+static inline int qfq_gt(u_int64_t, u_int64_t);
+static inline u_int64_t qfq_round_down(u_int64_t, u_int32_t);
+static inline struct qfq_group *qfq_ffs(struct qfq_if *, pktsched_bitmap_t);
+static int qfq_calc_index(struct qfq_class *, u_int32_t, u_int32_t);
+static inline pktsched_bitmap_t mask_from(pktsched_bitmap_t, int);
+static inline u_int32_t qfq_calc_state(struct qfq_if *, struct qfq_group *);
+static inline void qfq_move_groups(struct qfq_if *, pktsched_bitmap_t,
+    int, int);
+static inline void qfq_unblock_groups(struct qfq_if *, int, u_int64_t);
+static inline void qfq_make_eligible(struct qfq_if *, u_int64_t);
+static inline void qfq_slot_insert(struct qfq_if *, struct qfq_group *,
+    struct qfq_class *, u_int64_t);
+static inline void qfq_front_slot_remove(struct qfq_group *);
+static inline struct qfq_class *qfq_slot_scan(struct qfq_if *,
+    struct qfq_group *);
+static inline void qfq_slot_rotate(struct qfq_if *, struct qfq_group *,
+    u_int64_t);
+static inline void qfq_update_eligible(struct qfq_if *, u_int64_t);
+static inline int qfq_update_class(struct qfq_if *, struct qfq_group *,
+    struct qfq_class *);
+static inline void qfq_update_start(struct qfq_if *, struct qfq_class *);
+static inline void qfq_slot_remove(struct qfq_if *, struct qfq_group *,
+    struct qfq_class *);
+static void qfq_deactivate_class(struct qfq_if *, struct qfq_class *);
+static const char *qfq_state2str(int);
+#if QFQ_DEBUG
+static void qfq_dump_groups(struct qfq_if *, u_int32_t);
+static void qfq_dump_sched(struct qfq_if *, const char *);
+#endif /* QFQ_DEBUG */
+
+#define        QFQ_ZONE_MAX    32              /* maximum elements in zone */
+#define        QFQ_ZONE_NAME   "pktsched_qfq"  /* zone name */
+
+static unsigned int qfq_size;          /* size of zone element */
+static struct zone *qfq_zone;          /* zone for qfq */
+
+#define        QFQ_CL_ZONE_MAX 32      /* maximum elements in zone */
+#define        QFQ_CL_ZONE_NAME        "pktsched_qfq_cl" /* zone name */
+
+static unsigned int qfq_cl_size;       /* size of zone element */
+static struct zone *qfq_cl_zone;       /* zone for qfq_class */
+
+/*
+ * Maximum number of consecutive slots occupied by backlogged classes
+ * inside a group.  This is approx lmax/lmin + 5.  Used when ALTQ is
+ * available.
+ *
+ * XXX check because it poses constraints on MAX_INDEX
+ */
+#define        QFQ_MAX_SLOTS   32      /* default when ALTQ is available */
+
+void
+qfq_init(void)
+{
+       qfq_size = sizeof (struct qfq_if);
+       qfq_zone = zinit(qfq_size, QFQ_ZONE_MAX * qfq_size,
+           0, QFQ_ZONE_NAME);
+       if (qfq_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, QFQ_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(qfq_zone, Z_EXPAND, TRUE);
+       zone_change(qfq_zone, Z_CALLERACCT, TRUE);
+
+       qfq_cl_size = sizeof (struct qfq_class);
+       qfq_cl_zone = zinit(qfq_cl_size, QFQ_CL_ZONE_MAX * qfq_cl_size,
+           0, QFQ_CL_ZONE_NAME);
+       if (qfq_cl_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, QFQ_CL_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(qfq_cl_zone, Z_EXPAND, TRUE);
+       zone_change(qfq_cl_zone, Z_CALLERACCT, TRUE);
+}
+
+struct qfq_if *
+qfq_alloc(struct ifnet *ifp, int how, boolean_t altq)
+{
+       struct qfq_if   *qif;
+
+       qif = (how == M_WAITOK) ? zalloc(qfq_zone) : zalloc_noblock(qfq_zone);
+       if (qif == NULL)
+               return (NULL);
+
+       bzero(qif, qfq_size);
+       qif->qif_ifq = &ifp->if_snd;
+       if (altq) {
+               qif->qif_maxclasses = QFQ_MAX_CLASSES;
+               qif->qif_maxslots = QFQ_MAX_SLOTS;
+               qif->qif_flags |= QFQIFF_ALTQ;
+       } else {
+               qif->qif_maxclasses = IFCQ_SC_MAX;
+               /*
+                * TODO: adi@apple.com
+                *
+                * Ideally I would like to have the following
+                * but QFQ needs further modifications.
+                *
+                *      qif->qif_maxslots = IFCQ_SC_MAX;
+                */
+               qif->qif_maxslots = QFQ_MAX_SLOTS;
+       }
+
+       if ((qif->qif_class_tbl = _MALLOC(sizeof (struct qfq_class *) *
+           qif->qif_maxclasses, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
+               log(LOG_ERR, "%s: %s unable to allocate class table array\n",
+                   if_name(ifp), qfq_style(qif));
+               goto error;
+       }
+
+       if ((qif->qif_groups = _MALLOC(sizeof (struct qfq_group *) *
+           (QFQ_MAX_INDEX + 1), M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
+               log(LOG_ERR, "%s: %s unable to allocate group array\n",
+                   if_name(ifp), qfq_style(qif));
+               goto error;
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler allocated\n",
+                   if_name(ifp), qfq_style(qif));
+       }
+
+       return (qif);
+
+error:
+       if (qif->qif_class_tbl != NULL) {
+               _FREE(qif->qif_class_tbl, M_DEVBUF);
+               qif->qif_class_tbl = NULL;
+       }
+       if (qif->qif_groups != NULL) {
+               _FREE(qif->qif_groups, M_DEVBUF);
+               qif->qif_groups = NULL;
+       }
+       zfree(qfq_zone, qif);
+
+       return (NULL);
+}
+
+int
+qfq_destroy(struct qfq_if *qif)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       int err;
+
+       IFCQ_LOCK(ifq);
+       err = qfq_destroy_locked(qif);
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+static int
+qfq_destroy_locked(struct qfq_if *qif)
+{
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       (void) qfq_clear_interface(qif);
+
+       VERIFY(qif->qif_class_tbl != NULL);
+       _FREE(qif->qif_class_tbl, M_DEVBUF);
+       qif->qif_class_tbl = NULL;
+
+       VERIFY(qif->qif_groups != NULL);
+       for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+               struct qfq_group *grp = qif->qif_groups[i];
+
+               if (grp != NULL) {
+                       VERIFY(grp->qfg_slots != NULL);
+                       _FREE(grp->qfg_slots, M_DEVBUF);
+                       grp->qfg_slots = NULL;
+                       _FREE(grp, M_DEVBUF);
+                       qif->qif_groups[i] = NULL;
+               }
+       }
+       _FREE(qif->qif_groups, M_DEVBUF);
+       qif->qif_groups = NULL;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif));
+       }
+
+       zfree(qfq_zone, qif);
+
+       return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+qfq_clear_interface(struct qfq_if *qif)
+{
+       struct qfq_class *cl;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       /* clear out the classes */
+       for (i = 0; i < qif->qif_maxclasses; i++)
+               if ((cl = qif->qif_class_tbl[i]) != NULL)
+                       qfq_class_destroy(qif, cl);
+
+       return (0);
+}
+
+/* discard all the queued packets on the interface */
+void
+qfq_purge(struct qfq_if *qif)
+{
+       struct qfq_class *cl;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       for (i = 0; i < qif->qif_maxclasses; i++) {
+               if ((cl = qif->qif_class_tbl[i]) != NULL)
+                       qfq_purgeq(qif, cl, 0, NULL, NULL);
+       }
+#if !PF_ALTQ
+       /*
+        * This assertion is safe to be made only when PF_ALTQ is not
+        * configured; otherwise, IFCQ_LEN represents the sum of the
+        * packets managed by ifcq_disc and altq_disc instances, which
+        * is possible when transitioning between the two.
+        */
+       VERIFY(IFCQ_LEN(qif->qif_ifq) == 0);
+#endif /* !PF_ALTQ */
+}
+
+static void
+qfq_purge_sc(struct qfq_if *qif, cqrq_purge_sc_t *pr)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
+       VERIFY(pr->flow != 0);
+
+       if (pr->sc != MBUF_SC_UNSPEC) {
+               i = MBUF_SCIDX(pr->sc);
+               VERIFY(i < IFCQ_SC_MAX);
+
+               qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
+                   pr->flow, &pr->packets, &pr->bytes);
+       } else {
+               u_int32_t cnt, len;
+
+               pr->packets = 0;
+               pr->bytes = 0;
+
+               for (i = 0; i < IFCQ_SC_MAX; i++) {
+                       qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
+                           pr->flow, &cnt, &len);
+                       pr->packets += cnt;
+                       pr->bytes += len;
+               }
+       }
+}
+
+void
+qfq_event(struct qfq_if *qif, cqev_t ev)
+{
+       struct qfq_class *cl;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       for (i = 0; i < qif->qif_maxclasses; i++)
+               if ((cl = qif->qif_class_tbl[i]) != NULL)
+                       qfq_updateq(qif, cl, ev);
+}
+
+int
+qfq_add_queue(struct qfq_if *qif, u_int32_t qlimit, u_int32_t weight,
+    u_int32_t maxsz, u_int32_t flags, u_int32_t qid, struct qfq_class **clp)
+{
+       struct qfq_class *cl;
+       u_int32_t w;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       if (qfq_clh_to_clp(qif, qid) != NULL)
+               return (EBUSY);
+
+       /* check parameters */
+       if (weight == 0 || weight > QFQ_MAX_WEIGHT)
+               return (EINVAL);
+
+       w = (QFQ_ONE_FP / (QFQ_ONE_FP / weight));
+       if (qif->qif_wsum + w > QFQ_MAX_WSUM)
+               return (EINVAL);
+
+       if (maxsz == 0 || maxsz > (1 << QFQ_MTU_SHIFT))
+               return (EINVAL);
+
+       cl = qfq_class_create(qif, weight, qlimit, flags, maxsz, qid);
+       if (cl == NULL)
+               return (ENOMEM);
+
+       if (clp != NULL)
+               *clp = cl;
+
+       return (0);
+}
+
+static struct qfq_class *
+qfq_class_create(struct qfq_if *qif, u_int32_t weight, u_int32_t qlimit,
+    u_int32_t flags, u_int32_t maxsz, u_int32_t qid)
+{
+       struct ifnet *ifp;
+       struct ifclassq *ifq;
+       struct qfq_group *grp;
+       struct qfq_class *cl;
+       u_int32_t w;                    /* approximated weight */
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       /* Sanitize flags unless internally configured */
+       if (qif->qif_flags & QFQIFF_ALTQ)
+               flags &= QFCF_USERFLAGS;
+
+       if (qif->qif_classes >= qif->qif_maxclasses) {
+               log(LOG_ERR, "%s: %s out of classes! (max %d)\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif),
+                   qif->qif_maxclasses);
+               return (NULL);
+       }
+
+#if !CLASSQ_RED
+       if (flags & QFCF_RED) {
+               log(LOG_ERR, "%s: %s RED not available!\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif));
+               return (NULL);
+       }
+#endif /* !CLASSQ_RED */
+
+#if !CLASSQ_RIO
+       if (flags & QFCF_RIO) {
+               log(LOG_ERR, "%s: %s RIO not available!\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif));
+               return (NULL);
+       }
+#endif /* CLASSQ_RIO */
+
+#if !CLASSQ_BLUE
+       if (flags & QFCF_BLUE) {
+               log(LOG_ERR, "%s: %s BLUE not available!\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif));
+               return (NULL);
+       }
+#endif /* CLASSQ_BLUE */
+
+       /* These are mutually exclusive */
+       if ((flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) &&
+           (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RED &&
+           (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RIO &&
+           (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_BLUE &&
+           (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_SFB) {
+               log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif));
+               return (NULL);
+       }
+
+       ifq = qif->qif_ifq;
+       ifp = QFQIF_IFP(qif);
+
+       cl = zalloc(qfq_cl_zone);
+       if (cl == NULL)
+               return (NULL);
+
+       bzero(cl, qfq_cl_size);
+
+       if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
+               qlimit = IFCQ_MAXLEN(ifq);
+               if (qlimit == 0)
+                       qlimit = DEFAULT_QLIMIT;  /* use default */
+       }
+       _qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
+       cl->cl_qif = qif;
+       cl->cl_flags = flags;
+       cl->cl_handle = qid;
+
+       /*
+        * Find a free slot in the class table.  If the slot matching
+        * the lower bits of qid is free, use this slot.  Otherwise,
+        * use the first free slot.
+        */
+       i = qid % qif->qif_maxclasses;
+       if (qif->qif_class_tbl[i] == NULL) {
+               qif->qif_class_tbl[i] = cl;
+       } else {
+               for (i = 0; i < qif->qif_maxclasses; i++) {
+                       if (qif->qif_class_tbl[i] == NULL) {
+                               qif->qif_class_tbl[i] = cl;
+                               break;
+                       }
+               }
+               if (i == qif->qif_maxclasses) {
+                       zfree(qfq_cl_zone, cl);
+                       return (NULL);
+               }
+       }
+
+       w = weight;
+       VERIFY(w > 0 && w <= QFQ_MAX_WEIGHT);
+       cl->cl_lmax = maxsz;
+       cl->cl_inv_w = (QFQ_ONE_FP / w);
+       w = (QFQ_ONE_FP / cl->cl_inv_w);
+       VERIFY(qif->qif_wsum + w <= QFQ_MAX_WSUM);
+
+       i = qfq_calc_index(cl, cl->cl_inv_w, cl->cl_lmax);
+       VERIFY(i <= QFQ_MAX_INDEX);
+       grp = qif->qif_groups[i];
+       if (grp == NULL) {
+               grp = _MALLOC(sizeof (*grp), M_DEVBUF, M_WAITOK|M_ZERO);
+               if (grp != NULL) {
+                       grp->qfg_index = i;
+                       grp->qfg_slot_shift =
+                           QFQ_MTU_SHIFT + QFQ_FRAC_BITS - (QFQ_MAX_INDEX - i);
+                       grp->qfg_slots = _MALLOC(sizeof (struct qfq_class *) *
+                           qif->qif_maxslots, M_DEVBUF, M_WAITOK|M_ZERO);
+                       if (grp->qfg_slots == NULL) {
+                               log(LOG_ERR, "%s: %s unable to allocate group "
+                                   "slots for index %d\n", if_name(ifp),
+                                   qfq_style(qif), i);
+                       }
+               } else {
+                       log(LOG_ERR, "%s: %s unable to allocate group for "
+                           "qid=%d\n", if_name(ifp), qfq_style(qif),
+                           cl->cl_handle);
+               }
+               if (grp == NULL || grp->qfg_slots == NULL) {
+                       qif->qif_class_tbl[qid % qif->qif_maxclasses] = NULL;
+                       if (grp != NULL)
+                               _FREE(grp, M_DEVBUF);
+                       zfree(qfq_cl_zone, cl);
+                       return (NULL);
+               } else {
+                       qif->qif_groups[i] = grp;
+               }
+       }
+       cl->cl_grp = grp;
+       qif->qif_wsum += w;
+       /* XXX cl->cl_S = qif->qif_V; ? */
+       /* XXX compute qif->qif_i_wsum */
+
+       qif->qif_classes++;
+
+       if (flags & QFCF_DEFAULTCLASS)
+               qif->qif_default = cl;
+
+       if (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) {
+#if CLASSQ_RED || CLASSQ_RIO
+               u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
+               int pkttime;
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+
+               cl->cl_qflags = 0;
+               if (flags & QFCF_ECN) {
+                       if (flags & QFCF_BLUE)
+                               cl->cl_qflags |= BLUEF_ECN;
+                       else if (flags & QFCF_SFB)
+                               cl->cl_qflags |= SFBF_ECN;
+                       else if (flags & QFCF_RED)
+                               cl->cl_qflags |= REDF_ECN;
+                       else if (flags & QFCF_RIO)
+                               cl->cl_qflags |= RIOF_ECN;
+               }
+               if (flags & QFCF_FLOWCTL) {
+                       if (flags & QFCF_SFB)
+                               cl->cl_qflags |= SFBF_FLOWCTL;
+               }
+               if (flags & QFCF_CLEARDSCP) {
+                       if (flags & QFCF_RIO)
+                               cl->cl_qflags |= RIOF_CLEARDSCP;
+               }
+#if CLASSQ_RED || CLASSQ_RIO
+               /*
+                * XXX: RED & RIO should be watching link speed and MTU
+                *      events and recompute pkttime accordingly.
+                */
+               if (ifbandwidth < 8)
+                       pkttime = 1000 * 1000 * 1000; /* 1 sec */
+               else
+                       pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
+                           (ifbandwidth / 8);
+
+               /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
+#if CLASSQ_RED
+               if (flags & QFCF_RED) {
+                       cl->cl_red = red_alloc(ifp, 0, 0,
+                           qlimit(&cl->cl_q) * 10/100,
+                           qlimit(&cl->cl_q) * 30/100,
+                           cl->cl_qflags, pkttime);
+                       if (cl->cl_red != NULL)
+                               qtype(&cl->cl_q) = Q_RED;
+               }
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+               if (flags & QFCF_RIO) {
+                       cl->cl_rio =
+                           rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
+                       if (cl->cl_rio != NULL)
+                               qtype(&cl->cl_q) = Q_RIO;
+               }
+#endif /* CLASSQ_RIO */
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+#if CLASSQ_BLUE
+               if (flags & QFCF_BLUE) {
+                       cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
+                       if (cl->cl_blue != NULL)
+                               qtype(&cl->cl_q) = Q_BLUE;
+               }
+#endif /* CLASSQ_BLUE */
+               if (flags & QFCF_SFB) {
+                       if (!(cl->cl_flags & QFCF_LAZY))
+                               cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                                   qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb != NULL || (cl->cl_flags & QFCF_LAZY))
+                               qtype(&cl->cl_q) = Q_SFB;
+               }
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s created qid=%d grp=%d weight=%d "
+                   "qlimit=%d flags=%b\n", if_name(ifp), qfq_style(qif),
+                   cl->cl_handle, cl->cl_grp->qfg_index, weight, qlimit,
+                   flags, QFCF_BITS);
+       }
+
+       return (cl);
+}
+
+int
+qfq_remove_queue(struct qfq_if *qif, u_int32_t qid)
+{
+       struct qfq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
+               return (EINVAL);
+
+       return (qfq_class_destroy(qif, cl));
+}
+
+static int
+qfq_class_destroy(struct qfq_if *qif, struct qfq_class *cl)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       qfq_purgeq(qif, cl, 0, NULL, NULL);
+
+       if (cl->cl_inv_w != 0) {
+               qif->qif_wsum -= (QFQ_ONE_FP / cl->cl_inv_w);
+               cl->cl_inv_w = 0;       /* reset weight to avoid run twice */
+       }
+
+       for (i = 0; i < qif->qif_maxclasses; i++) {
+               if (qif->qif_class_tbl[i] == cl) {
+                       qif->qif_class_tbl[i] = NULL;
+                       break;
+               }
+       }
+       qif->qif_classes--;
+
+       if (cl->cl_qalg.ptr != NULL) {
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->cl_q))
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->cl_q))
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->cl_q))
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               qtype(&cl->cl_q) = Q_DROPTAIL;
+               qstate(&cl->cl_q) = QS_RUNNING;
+       }
+
+       if (qif->qif_default == cl)
+               qif->qif_default = NULL;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s destroyed qid=%d\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle);
+       }
+
+       zfree(qfq_cl_zone, cl);
+
+       return (0);
+}
+
+/*
+ * Calculate a mask to mimic what would be ffs_from()
+ */
+static inline pktsched_bitmap_t
+mask_from(pktsched_bitmap_t bitmap, int from)
+{
+       return (bitmap & ~((1UL << from) - 1));
+}
+
+/*
+ * The state computation relies on ER=0, IR=1, EB=2, IB=3
+ * First compute eligibility comparing grp->qfg_S, qif->qif_V,
+ * then check if someone is blocking us and possibly add EB
+ */
+static inline u_int32_t
+qfq_calc_state(struct qfq_if *qif, struct qfq_group *grp)
+{
+       /* if S > V we are not eligible */
+       u_int32_t state = qfq_gt(grp->qfg_S, qif->qif_V);
+       pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER],
+           grp->qfg_index);
+       struct qfq_group *next;
+
+       if (mask) {
+               next = qfq_ffs(qif, mask);
+               if (qfq_gt(grp->qfg_F, next->qfg_F))
+                       state |= EB;
+       }
+
+       return (state);
+}
+
+/*
+ * In principle
+ *     qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
+ *     qif->qif_bitmaps[src] &= ~mask;
+ * but we should make sure that src != dst
+ */
+static inline void
+qfq_move_groups(struct qfq_if *qif, pktsched_bitmap_t mask, int src, int dst)
+{
+       qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
+       qif->qif_bitmaps[src] &= ~mask;
+}
+
+static inline void
+qfq_unblock_groups(struct qfq_if *qif, int index, u_int64_t old_finish)
+{
+       pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER], index + 1);
+       struct qfq_group *next;
+
+       if (mask) {
+               next = qfq_ffs(qif, mask);
+               if (!qfq_gt(next->qfg_F, old_finish))
+                       return;
+       }
+
+       mask = (1UL << index) - 1;
+       qfq_move_groups(qif, mask, EB, ER);
+       qfq_move_groups(qif, mask, IB, IR);
+}
+
+/*
+ * perhaps
+ *
+ *     old_V ^= qif->qif_V;
+ *     old_V >>= QFQ_MIN_SLOT_SHIFT;
+ *     if (old_V) {
+ *             ...
+ *     }
+ */
+static inline void
+qfq_make_eligible(struct qfq_if *qif, u_int64_t old_V)
+{
+       pktsched_bitmap_t mask, vslot, old_vslot;
+
+       vslot = qif->qif_V >> QFQ_MIN_SLOT_SHIFT;
+       old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
+
+       if (vslot != old_vslot) {
+               mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
+               qfq_move_groups(qif, mask, IR, ER);
+               qfq_move_groups(qif, mask, IB, EB);
+       }
+}
+
+/*
+ * XXX we should make sure that slot becomes less than 32.
+ * This is guaranteed by the input values.
+ * roundedS is always cl->qfg_S rounded on grp->qfg_slot_shift bits.
+ */
+static inline void
+qfq_slot_insert(struct qfq_if *qif, struct qfq_group *grp,
+    struct qfq_class *cl, u_int64_t roundedS)
+{
+       u_int64_t slot = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
+       u_int32_t i = (grp->qfg_front + slot) % qif->qif_maxslots;
+
+       cl->cl_next = grp->qfg_slots[i];
+       grp->qfg_slots[i] = cl;
+       pktsched_bit_set(slot, &grp->qfg_full_slots);
+}
+
+/*
+ * remove the entry from the slot
+ */
+static inline void
+qfq_front_slot_remove(struct qfq_group *grp)
+{
+       struct qfq_class **h = &grp->qfg_slots[grp->qfg_front];
+
+       *h = (*h)->cl_next;
+       if (!*h)
+               pktsched_bit_clr(0, &grp->qfg_full_slots);
+}
+
+/*
+ * Returns the first full queue in a group. As a side effect,
+ * adjust the bucket list so the first non-empty bucket is at
+ * position 0 in qfg_full_slots.
+ */
+static inline struct qfq_class *
+qfq_slot_scan(struct qfq_if *qif, struct qfq_group *grp)
+{
+       int i;
+
+       if (pktsched_verbose > 2) {
+               log(LOG_DEBUG, "%s: %s grp=%d full_slots=0x%x\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif), grp->qfg_index,
+                   grp->qfg_full_slots);
+       }
+
+       if (grp->qfg_full_slots == 0)
+               return (NULL);
+
+       i = pktsched_ffs(grp->qfg_full_slots) - 1; /* zero-based */
+       if (i > 0) {
+               grp->qfg_front = (grp->qfg_front + i) % qif->qif_maxslots;
+               grp->qfg_full_slots >>= i;
+       }
+
+       return (grp->qfg_slots[grp->qfg_front]);
+}
+
+/*
+ * adjust the bucket list. When the start time of a group decreases,
+ * we move the index down (modulo qif->qif_maxslots) so we don't need to
+ * move the objects. The mask of occupied slots must be shifted
+ * because we use ffs() to find the first non-empty slot.
+ * This covers decreases in the group's start time, but what about
+ * increases of the start time ?
+ * Here too we should make sure that i is less than 32
+ */
+static inline void
+qfq_slot_rotate(struct qfq_if *qif, struct qfq_group *grp, u_int64_t roundedS)
+{
+#pragma unused(qif)
+       u_int32_t i = (grp->qfg_S - roundedS) >> grp->qfg_slot_shift;
+
+       grp->qfg_full_slots <<= i;
+       grp->qfg_front = (grp->qfg_front - i) % qif->qif_maxslots;
+}
+
+static inline void
+qfq_update_eligible(struct qfq_if *qif, u_int64_t old_V)
+{
+       pktsched_bitmap_t ineligible;
+
+       ineligible = qif->qif_bitmaps[IR] | qif->qif_bitmaps[IB];
+       if (ineligible) {
+               if (!qif->qif_bitmaps[ER]) {
+                       struct qfq_group *grp;
+                       grp = qfq_ffs(qif, ineligible);
+                       if (qfq_gt(grp->qfg_S, qif->qif_V))
+                               qif->qif_V = grp->qfg_S;
+               }
+               qfq_make_eligible(qif, old_V);
+       }
+}
+
+/*
+ * Updates the class, returns true if also the group needs to be updated.
+ */
+static inline int
+qfq_update_class(struct qfq_if *qif, struct qfq_group *grp,
+    struct qfq_class *cl)
+{
+#pragma unused(qif)
+       cl->cl_S = cl->cl_F;
+       if (qempty(&cl->cl_q))  {
+               qfq_front_slot_remove(grp);
+       } else {
+               u_int32_t len;
+               u_int64_t roundedS;
+
+               len = m_pktlen(qhead(&cl->cl_q));
+               cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
+               roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
+               if (roundedS == grp->qfg_S)
+                       return (0);
+
+               qfq_front_slot_remove(grp);
+               qfq_slot_insert(qif, grp, cl, roundedS);
+       }
+       return (1);
+}
+
+/*
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+struct mbuf *
+qfq_dequeue(struct qfq_if *qif, cqdq_op_t op)
+{
+       pktsched_bitmap_t er_bits = qif->qif_bitmaps[ER];
+       struct ifclassq *ifq = qif->qif_ifq;
+       struct qfq_group *grp;
+       struct qfq_class *cl;
+       struct mbuf *m;
+       u_int64_t old_V;
+       u_int32_t len;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       for (;;) {
+               if (er_bits == 0) {
+#if QFQ_DEBUG
+                       if (qif->qif_queued && pktsched_verbose > 1)
+                               qfq_dump_sched(qif, "start dequeue");
+#endif /* QFQ_DEBUG */
+                       /* no eligible and ready packet */
+                       return (NULL);
+               }
+               grp = qfq_ffs(qif, er_bits);
+               /* if group is non-empty, use it */
+               if (grp->qfg_full_slots != 0)
+                       break;
+               pktsched_bit_clr(grp->qfg_index, &er_bits);
+#if QFQ_DEBUG
+               qif->qif_emptygrp++;
+#endif /* QFQ_DEBUG */
+       }
+       VERIFY(!IFCQ_IS_EMPTY(ifq));
+
+       cl = grp->qfg_slots[grp->qfg_front];
+       VERIFY(cl != NULL && !qempty(&cl->cl_q));
+
+       if (op == CLASSQDQ_POLL)
+               return (qfq_pollq(cl));
+
+       m = qfq_getq(cl);
+       VERIFY(m != NULL);      /* qalg must be work conserving */
+       len = m_pktlen(m);
+
+#if QFQ_DEBUG
+       qif->qif_queued--;
+#endif /* QFQ_DEBUG */
+
+       IFCQ_DEC_LEN(ifq);
+       if (qempty(&cl->cl_q))
+               cl->cl_period++;
+       PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
+       IFCQ_XMIT_ADD(ifq, 1, len);
+
+       old_V = qif->qif_V;
+       qif->qif_V += (u_int64_t)len * QFQ_IWSUM;
+
+       if (pktsched_verbose > 2) {
+               log(LOG_DEBUG, "%s: %s qid=%d dequeue m=%p F=0x%llx V=0x%llx",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle,
+                   m, cl->cl_F, qif->qif_V);
+       }
+
+       if (qfq_update_class(qif, grp, cl)) {
+               u_int64_t old_F = grp->qfg_F;
+
+               cl = qfq_slot_scan(qif, grp);
+               if (!cl) { /* group gone, remove from ER */
+                       pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
+               } else {
+                       u_int32_t s;
+                       u_int64_t roundedS =
+                           qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
+
+                       if (grp->qfg_S == roundedS)
+                               goto skip_unblock;
+
+                       grp->qfg_S = roundedS;
+                       grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
+
+                       /* remove from ER and put in the new set */
+                       pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
+                       s = qfq_calc_state(qif, grp);
+                       pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
+               }
+               /* we need to unblock even if the group has gone away */
+               qfq_unblock_groups(qif, grp->qfg_index, old_F);
+       }
+
+skip_unblock:
+       qfq_update_eligible(qif, old_V);
+
+#if QFQ_DEBUG
+       if (!qif->qif_bitmaps[ER] && qif->qif_queued && pktsched_verbose > 1)
+               qfq_dump_sched(qif, "end dequeue");
+#endif /* QFQ_DEBUG */
+
+       return (m);
+}
+
+/*
+ * Assign a reasonable start time for a new flow k in group i.
+ * Admissible values for hat(F) are multiples of sigma_i
+ * no greater than V+sigma_i . Larger values mean that
+ * we had a wraparound so we consider the timestamp to be stale.
+ *
+ * If F is not stale and F >= V then we set S = F.
+ * Otherwise we should assign S = V, but this may violate
+ * the ordering in ER. So, if we have groups in ER, set S to
+ * the F_j of the first group j which would be blocking us.
+ * We are guaranteed not to move S backward because
+ * otherwise our group i would still be blocked.
+ */
+static inline void
+qfq_update_start(struct qfq_if *qif, struct qfq_class *cl)
+{
+       pktsched_bitmap_t mask;
+       u_int64_t limit, roundedF;
+       int slot_shift = cl->cl_grp->qfg_slot_shift;
+
+       roundedF = qfq_round_down(cl->cl_F, slot_shift);
+       limit = qfq_round_down(qif->qif_V, slot_shift) + (1UL << slot_shift);
+
+       if (!qfq_gt(cl->cl_F, qif->qif_V) || qfq_gt(roundedF, limit)) {
+               /* timestamp was stale */
+               mask = mask_from(qif->qif_bitmaps[ER], cl->cl_grp->qfg_index);
+               if (mask) {
+                       struct qfq_group *next = qfq_ffs(qif, mask);
+                       if (qfq_gt(roundedF, next->qfg_F)) {
+                               cl->cl_S = next->qfg_F;
+                               return;
+                       }
+               }
+               cl->cl_S = qif->qif_V;
+       } else { /* timestamp is not stale */
+               cl->cl_S = cl->cl_F;
+       }
+}
+
+int
+qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m,
+    struct pf_mtag *t)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       struct qfq_group *grp;
+       u_int64_t roundedS;
+       int len, ret, s;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(cl == NULL || cl->cl_qif == qif);
+
+       if (cl == NULL) {
+               cl = qfq_clh_to_clp(qif, t->pftag_qid);
+               if (cl == NULL) {
+                       cl = qif->qif_default;
+                       if (cl == NULL) {
+                               IFCQ_CONVERT_LOCK(ifq);
+                               m_freem(m);
+                               return (ENOBUFS);
+                       }
+               }
+       }
+
+       len = m_pktlen(m);
+
+       ret = qfq_addq(cl, m, t);
+       if (ret != 0) {
+               if (ret == CLASSQEQ_SUCCESS_FC) {
+                       /* packet enqueued, return advisory feedback */
+                       ret = EQFULL;
+               } else {
+                       VERIFY(ret == CLASSQEQ_DROPPED ||
+                           ret == CLASSQEQ_DROPPED_FC ||
+                           ret == CLASSQEQ_DROPPED_SP);
+                       /* packet has been freed in qfq_addq */
+                       PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
+                       IFCQ_DROP_ADD(ifq, 1, len);
+                       switch (ret) {
+                       case CLASSQEQ_DROPPED:
+                               return (ENOBUFS);
+                       case CLASSQEQ_DROPPED_FC:
+                               return (EQFULL);
+                       case CLASSQEQ_DROPPED_SP:
+                               return (EQSUSPENDED);
+                       }
+                       /* NOT REACHED */
+               }
+       }
+       IFCQ_INC_LEN(ifq);
+
+#if QFQ_DEBUG
+       qif->qif_queued++;
+#endif /* QFQ_DEBUG */
+
+       /* queue was not idle, we're done */
+       if (qlen(&cl->cl_q) > 1)
+               goto done;
+
+       /* queue was idle */
+       grp = cl->cl_grp;
+       qfq_update_start(qif, cl);      /* adjust start time */
+
+       /* compute new finish time and rounded start */
+       cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
+       roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
+
+       /*
+        * Insert cl in the correct bucket.
+        *
+        * If cl->cl_S >= grp->qfg_S we don't need to adjust the bucket list
+        * and simply go to the insertion phase.  Otherwise grp->qfg_S is
+        * decreasing, we must make room in the bucket list, and also
+        * recompute the group state.  Finally, if there were no flows
+        * in this group and nobody was in ER make sure to adjust V.
+        */
+       if (grp->qfg_full_slots != 0) {
+               if (!qfq_gt(grp->qfg_S, cl->cl_S))
+                       goto skip_update;
+
+               /* create a slot for this cl->cl_S */
+               qfq_slot_rotate(qif, grp, roundedS);
+
+               /* group was surely ineligible, remove */
+               pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
+               pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
+       } else if (!qif->qif_bitmaps[ER] && qfq_gt(roundedS, qif->qif_V)) {
+               qif->qif_V = roundedS;
+       }
+
+       grp->qfg_S = roundedS;
+       grp->qfg_F =
+           roundedS + (2ULL << grp->qfg_slot_shift); /* i.e. 2 sigma_i */
+       s = qfq_calc_state(qif, grp);
+       pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
+
+       if (pktsched_verbose > 2) {
+               log(LOG_DEBUG, "%s: %s qid=%d enqueue m=%p state=%s 0x%x "
+                   "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif)),
+                   qfq_style(qif), cl->cl_handle, m, qfq_state2str(s),
+                   qif->qif_bitmaps[s], cl->cl_S, cl->cl_F, qif->qif_V);
+       }
+
+skip_update:
+       qfq_slot_insert(qif, grp, cl, roundedS);
+
+done:
+       /* successfully queued. */
+       return (ret);
+}
+
+static inline void
+qfq_slot_remove(struct qfq_if *qif, struct qfq_group *grp,
+    struct qfq_class *cl)
+{
+#pragma unused(qif)
+       struct qfq_class **pprev;
+       u_int32_t i, offset;
+       u_int64_t roundedS;
+
+       roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
+       offset = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
+       i = (grp->qfg_front + offset) % qif->qif_maxslots;
+
+       pprev = &grp->qfg_slots[i];
+       while (*pprev && *pprev != cl)
+               pprev = &(*pprev)->cl_next;
+
+       *pprev = cl->cl_next;
+       if (!grp->qfg_slots[i])
+               pktsched_bit_clr(offset, &grp->qfg_full_slots);
+}
+
+/*
+ * Called to forcibly destroy a queue.
+ * If the queue is not in the front bucket, or if it has
+ * other queues in the front bucket, we can simply remove
+ * the queue with no other side effects.
+ * Otherwise we must propagate the event up.
+ * XXX description to be completed.
+ */
+static void
+qfq_deactivate_class(struct qfq_if *qif, struct qfq_class *cl)
+{
+       struct qfq_group *grp = cl->cl_grp;
+       pktsched_bitmap_t mask;
+       u_int64_t roundedS;
+       int s;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s deactivate qid=%d grp=%d "
+                   "full_slots=0x%x front=%d bitmaps={ER=0x%x,EB=0x%x,"
+                   "IR=0x%x,IB=0x%x}\n",
+                   if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
+                   cl->cl_handle, grp->qfg_index, grp->qfg_full_slots,
+                   grp->qfg_front, qif->qif_bitmaps[ER], qif->qif_bitmaps[EB],
+                   qif->qif_bitmaps[IR], qif->qif_bitmaps[IB]);
+#if QFQ_DEBUG
+               if (pktsched_verbose > 1)
+                       qfq_dump_sched(qif, "start deactivate");
+#endif /* QFQ_DEBUG */
+       }
+
+       cl->cl_F = cl->cl_S;    /* not needed if the class goes away */
+       qfq_slot_remove(qif, grp, cl);
+
+       if (grp->qfg_full_slots == 0) {
+               /*
+                * Nothing left in the group, remove from all sets.
+                * Do ER last because if we were blocking other groups
+                * we must unblock them.
+                */
+               pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
+               pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
+               pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
+
+               if (pktsched_bit_tst(grp->qfg_index, &qif->qif_bitmaps[ER]) &&
+                   !(qif->qif_bitmaps[ER] & ~((1UL << grp->qfg_index) - 1))) {
+                       mask = qif->qif_bitmaps[ER] &
+                           ((1UL << grp->qfg_index) - 1);
+                       if (mask)
+                               mask = ~((1UL << __fls(mask)) - 1);
+                       else
+                               mask = (pktsched_bitmap_t)~0UL;
+                       qfq_move_groups(qif, mask, EB, ER);
+                       qfq_move_groups(qif, mask, IB, IR);
+               }
+               pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
+       } else if (!grp->qfg_slots[grp->qfg_front]) {
+               cl = qfq_slot_scan(qif, grp);
+               roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
+               if (grp->qfg_S != roundedS) {
+                       pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
+                       pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
+                       pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
+                       pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
+                       grp->qfg_S = roundedS;
+                       grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
+                       s = qfq_calc_state(qif, grp);
+                       pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
+               }
+       }
+       qfq_update_eligible(qif, qif->qif_V);
+
+#if QFQ_DEBUG
+       if (pktsched_verbose > 1)
+               qfq_dump_sched(qif, "end deactivate");
+#endif /* QFQ_DEBUG */
+}
+
+static const char *
+qfq_state2str(int s)
+{
+       const char *c;
+
+       switch (s) {
+       case ER:
+               c = "ER";
+               break;
+       case IR:
+               c = "IR";
+               break;
+       case EB:
+               c = "EB";
+               break;
+       case IB:
+               c = "IB";
+               break;
+       default:
+               c = "?";
+               break;
+       }
+       return (c);
+}
+
+static inline int
+qfq_addq(struct qfq_class *cl, struct mbuf *m, struct pf_mtag *t)
+{
+       struct qfq_if   *qif = cl->cl_qif;
+       struct ifclassq *ifq = qif->qif_ifq;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_addq(cl->cl_red, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q)) {
+               if (cl->cl_sfb == NULL) {
+                       struct ifnet *ifp = QFQIF_IFP(qif);
+
+                       VERIFY(cl->cl_flags & QFCF_LAZY);
+                       cl->cl_flags &= ~QFCF_LAZY;
+                       IFCQ_CONVERT_LOCK(ifq);
+
+                       cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                           qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb == NULL) {
+                               /* fall back to droptail */
+                               qtype(&cl->cl_q) = Q_DROPTAIL;
+                               cl->cl_flags &= ~QFCF_SFB;
+                               cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
+
+                               log(LOG_ERR, "%s: %s SFB lazy allocation "
+                                   "failed for qid=%d grp=%d, falling back "
+                                   "to DROPTAIL\n", if_name(ifp),
+                                   qfq_style(qif), cl->cl_handle,
+                                   cl->cl_grp->qfg_index);
+                       } else if (qif->qif_throttle != IFNET_THROTTLE_OFF) {
+                               /* if there's pending throttling, set it */
+                               cqrq_throttle_t tr = { 1, qif->qif_throttle };
+                               int err = qfq_throttle(qif, &tr);
+
+                               if (err == EALREADY)
+                                       err = 0;
+                               if (err != 0) {
+                                       tr.level = IFNET_THROTTLE_OFF;
+                                       (void) qfq_throttle(qif, &tr);
+                               }
+                       }
+               }
+               if (cl->cl_sfb != NULL)
+                       return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
+       } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+
+       if (cl->cl_flags & QFCF_CLEARDSCP)
+               write_dsfield(m, t, 0);
+
+       _addq(&cl->cl_q, m);
+
+       return (0);
+}
+
+static inline struct mbuf *
+qfq_getq(struct qfq_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_getq(cl->cl_rio, &cl->cl_q));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_getq(cl->cl_red, &cl->cl_q));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_getq(cl->cl_blue, &cl->cl_q));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_getq(cl->cl_sfb, &cl->cl_q));
+
+       return (_getq(&cl->cl_q));
+}
+
+static inline struct mbuf *
+qfq_pollq(struct qfq_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
+
+       return (qhead(&cl->cl_q));
+}
+
+static void
+qfq_purgeq(struct qfq_if *qif, struct qfq_class *cl, u_int32_t flow,
+    u_int32_t *packets, u_int32_t *bytes)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       u_int32_t cnt = 0, len = 0, qlen;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if ((qlen = qlen(&cl->cl_q)) == 0)
+               goto done;
+
+       /* become regular mutex before freeing mbufs */
+       IFCQ_CONVERT_LOCK(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
+       else
+               _flushq_flow(&cl->cl_q, flow, &cnt, &len);
+
+       if (cnt > 0) {
+               VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
+#if QFQ_DEBUG
+               VERIFY(qif->qif_queued >= cnt);
+               qif->qif_queued -= cnt;
+#endif /* QFQ_DEBUG */
+
+               PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
+               IFCQ_DROP_ADD(ifq, cnt, len);
+
+               VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
+               IFCQ_LEN(ifq) -= cnt;
+
+               if (qempty(&cl->cl_q))
+                       qfq_deactivate_class(qif, cl);
+
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s purge qid=%d weight=%d "
+                           "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
+                           if_name(QFQIF_IFP(qif)),
+                           qfq_style(qif), cl->cl_handle,
+                           (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w), qlen,
+                           qlen(&cl->cl_q), cnt, len, flow);
+               }
+       }
+done:
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+static void
+qfq_updateq(struct qfq_if *qif, struct qfq_class *cl, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s update qid=%d weight=%d event=%s\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif),
+                   cl->cl_handle, (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w),
+                   ifclassq_ev2str(ev));
+       }
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_updateq(cl->cl_rio, ev));
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_updateq(cl->cl_red, ev));
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_updateq(cl->cl_blue, ev));
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_updateq(cl->cl_sfb, ev));
+}
+
+int
+qfq_get_class_stats(struct qfq_if *qif, u_int32_t qid,
+    struct qfq_classstats *sp)
+{
+       struct qfq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
+               return (EINVAL);
+
+       sp->class_handle = cl->cl_handle;
+       sp->index = cl->cl_grp->qfg_index;
+       sp->weight = (QFQ_ONE_FP / cl->cl_inv_w);
+       sp->lmax = cl->cl_lmax;
+       sp->qlength = qlen(&cl->cl_q);
+       sp->qlimit = qlimit(&cl->cl_q);
+       sp->period = cl->cl_period;
+       sp->xmitcnt = cl->cl_xmitcnt;
+       sp->dropcnt = cl->cl_dropcnt;
+
+       sp->qtype = qtype(&cl->cl_q);
+       sp->qstate = qstate(&cl->cl_q);
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_getstats(cl->cl_red, &sp->red[0]);
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_getstats(cl->cl_rio, &sp->red[0]);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_getstats(cl->cl_blue, &sp->blue);
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_getstats(cl->cl_sfb, &sp->sfb);
+
+       return (0);
+}
+
+/* convert a class handle to the corresponding class pointer */
+static inline struct qfq_class *
+qfq_clh_to_clp(struct qfq_if *qif, u_int32_t chandle)
+{
+       struct qfq_class *cl;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
+
+       /*
+        * First, try optimistically the slot matching the lower bits of
+        * the handle.  If it fails, do the linear table search.
+        */
+       i = chandle % qif->qif_maxclasses;
+       if ((cl = qif->qif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
+               return (cl);
+       for (i = 0; i < qif->qif_maxclasses; i++)
+               if ((cl = qif->qif_class_tbl[i]) != NULL &&
+                   cl->cl_handle == chandle)
+                       return (cl);
+
+       return (NULL);
+}
+
+static const char *
+qfq_style(struct qfq_if *qif)
+{
+       return ((qif->qif_flags & QFQIFF_ALTQ) ? "ALTQ_QFQ" : "QFQ");
+}
+
+/*
+ * Generic comparison function, handling wraparound
+ */
+static inline int
+qfq_gt(u_int64_t a, u_int64_t b)
+{
+       return ((int64_t)(a - b) > 0);
+}
+
+/*
+ * Round a precise timestamp to its slotted value
+ */
+static inline u_int64_t
+qfq_round_down(u_int64_t ts, u_int32_t shift)
+{
+       return (ts & ~((1ULL << shift) - 1));
+}
+
+/*
+ * Return the pointer to the group with lowest index in the bitmap
+ */
+static inline struct qfq_group *
+qfq_ffs(struct qfq_if *qif, pktsched_bitmap_t bitmap)
+{
+       int index = pktsched_ffs(bitmap) - 1;   /* zero-based */
+       VERIFY(index >= 0 && index <= QFQ_MAX_INDEX &&
+           qif->qif_groups[index] != NULL);
+       return (qif->qif_groups[index]);
+}
+
+/*
+ * Calculate a flow index, given its weight and maximum packet length.
+ * index = log_2(maxlen/weight) but we need to apply the scaling.
+ * This is used only once at flow creation.
+ */
+static int
+qfq_calc_index(struct qfq_class *cl, u_int32_t inv_w, u_int32_t maxlen)
+{
+       u_int64_t slot_size = (u_int64_t)maxlen *inv_w;
+       pktsched_bitmap_t size_map;
+       int index = 0;
+
+       size_map = (pktsched_bitmap_t)(slot_size >> QFQ_MIN_SLOT_SHIFT);
+       if (!size_map)
+               goto out;
+
+       index = __fls(size_map) + 1;    /* basically a log_2() */
+       index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
+
+       if (index < 0)
+               index = 0;
+out:
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s qid=%d grp=%d W=%u, L=%u, I=%d\n",
+                   if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
+                   cl->cl_handle, index, (u_int32_t)(QFQ_ONE_FP/inv_w),
+                   maxlen, index);
+       }
+       return (index);
+}
+
+#if QFQ_DEBUG
+static void
+qfq_dump_groups(struct qfq_if *qif, u_int32_t mask)
+{
+       int i, j;
+
+       for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
+               struct qfq_group *g = qif->qif_groups[i];
+
+               if (0 == (mask & (1 << i)))
+                       continue;
+               if (g == NULL)
+                       continue;
+
+               log(LOG_DEBUG, "%s: %s [%2d] full_slots 0x%x\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif), i,
+                   g->qfg_full_slots);
+               log(LOG_DEBUG, "%s: %s             S 0x%20llx F 0x%llx %c\n",
+                   if_name(QFQIF_IFP(qif)), qfq_style(qif),
+                   g->qfg_S, g->qfg_F, mask & (1 << i) ? '1' : '0');
+
+               for (j = 0; j < qif->qif_maxslots; j++) {
+                       if (g->qfg_slots[j]) {
+                               log(LOG_DEBUG, "%s: %s      bucket %d %p "
+                                   "qid %d\n", if_name(QFQIF_IFP(qif)),
+                                   qfq_style(qif), j, g->qfg_slots[j],
+                                   g->qfg_slots[j]->cl_handle);
+                       }
+               }
+       }
+}
+
+static void
+qfq_dump_sched(struct qfq_if *qif, const char *msg)
+{
+       log(LOG_DEBUG, "%s: %s --- in %s: ---\n",
+           if_name(QFQIF_IFP(qif)), qfq_style(qif), msg);
+       log(LOG_DEBUG, "%s: %s emptygrp %d queued %d V 0x%llx\n",
+           if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_emptygrp,
+           qif->qif_queued, qif->qif_V);
+       log(LOG_DEBUG, "%s: %s      ER 0x%08x\n",
+           if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[ER]);
+       log(LOG_DEBUG, "%s: %s      EB 0x%08x\n",
+           if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[EB]);
+       log(LOG_DEBUG, "%s: %s      IR 0x%08x\n",
+           if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IR]);
+       log(LOG_DEBUG, "%s: %s      IB 0x%08x\n",
+           if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IB]);
+       qfq_dump_groups(qif, 0xffffffff);
+};
+#endif /* QFQ_DEBUG */
+
+/*
+ * qfq_enqueue_ifclassq is an enqueue function to be registered to
+ * (*ifcq_enqueue) in struct ifclassq.
+ */
+static int
+qfq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
+{
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               log(LOG_ERR, "%s: packet does not have pkthdr\n",
+                   if_name(ifq->ifcq_ifp));
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       i = MBUF_SCIDX(mbuf_get_service_class(m));
+       VERIFY((u_int32_t)i < IFCQ_SC_MAX);
+
+       return (qfq_enqueue(ifq->ifcq_disc,
+           ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
+}
+
+/*
+ * qfq_dequeue_ifclassq is a dequeue function to be registered to
+ * (*ifcq_dequeue) in struct ifclass.
+ *
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+static struct mbuf *
+qfq_dequeue_ifclassq(struct ifclassq *ifq, cqdq_op_t op)
+{
+       return (qfq_dequeue(ifq->ifcq_disc, op));
+}
+
+static int
+qfq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
+{
+       struct qfq_if *qif = (struct qfq_if *)ifq->ifcq_disc;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       switch (req) {
+       case CLASSQRQ_PURGE:
+               qfq_purge(qif);
+               break;
+
+       case CLASSQRQ_PURGE_SC:
+               qfq_purge_sc(qif, (cqrq_purge_sc_t *)arg);
+               break;
+
+       case CLASSQRQ_EVENT:
+               qfq_event(qif, (cqev_t)arg);
+               break;
+
+       case CLASSQRQ_THROTTLE:
+               err = qfq_throttle(qif, (cqrq_throttle_t *)arg);
+               break;
+       }
+       return (err);
+}
+
+int
+qfq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       struct qfq_class *cl0, *cl1, *cl2, *cl3, *cl4;
+       struct qfq_class *cl5, *cl6, *cl7, *cl8, *cl9;
+       struct qfq_if *qif;
+       u_int32_t maxlen = 0, qflags = 0;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+
+       if (flags & PKTSCHEDF_QALG_RED)
+               qflags |= QFCF_RED;
+       if (flags & PKTSCHEDF_QALG_RIO)
+               qflags |= QFCF_RIO;
+       if (flags & PKTSCHEDF_QALG_BLUE)
+               qflags |= QFCF_BLUE;
+       if (flags & PKTSCHEDF_QALG_SFB)
+               qflags |= QFCF_SFB;
+       if (flags & PKTSCHEDF_QALG_ECN)
+               qflags |= QFCF_ECN;
+       if (flags & PKTSCHEDF_QALG_FLOWCTL)
+               qflags |= QFCF_FLOWCTL;
+
+       qif = qfq_alloc(ifp, M_WAITOK, FALSE);
+       if (qif == NULL)
+               return (ENOMEM);
+
+       if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
+               maxlen = if_sndq_maxlen;
+
+       if ((err = qfq_add_queue(qif, maxlen, 300, 1200,
+           qflags | QFCF_LAZY, SCIDX_BK_SYS, &cl0)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 600, 1400,
+           qflags | QFCF_LAZY, SCIDX_BK, &cl1)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 2400, 600,
+           qflags | QFCF_DEFAULTCLASS, SCIDX_BE, &cl2)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 2700, 600,
+           qflags | QFCF_LAZY, SCIDX_RD, &cl3)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 3000, 400,
+           qflags | QFCF_LAZY, SCIDX_OAM, &cl4)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 8000, 1000,
+           qflags | QFCF_LAZY, SCIDX_AV, &cl5)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 15000, 1200,
+           qflags | QFCF_LAZY, SCIDX_RV, &cl6)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 20000, 1400,
+           qflags | QFCF_LAZY, SCIDX_VI, &cl7)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 23000, 200,
+           qflags | QFCF_LAZY, SCIDX_VO, &cl8)) != 0)
+               goto cleanup;
+
+       if ((err = qfq_add_queue(qif, maxlen, 25000, 200,
+           qflags, SCIDX_CTL, &cl9)) != 0)
+               goto cleanup;
+
+       err = ifclassq_attach(ifq, PKTSCHEDT_QFQ, qif,
+           qfq_enqueue_ifclassq, qfq_dequeue_ifclassq, NULL,
+           qfq_request_ifclassq);
+
+       /* cache these for faster lookup */
+       if (err == 0) {
+               ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS;
+               ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
+
+               ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
+               ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1;
+
+               ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
+               ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2;
+
+               ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD;
+               ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3;
+
+               ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM;
+               ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4;
+
+               ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV;
+               ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5;
+
+               ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV;
+               ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6;
+
+               ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
+               ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7;
+
+               ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
+               ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8;
+
+               ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL;
+               ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9;
+       }
+
+cleanup:
+       if (err != 0)
+               (void) qfq_destroy_locked(qif);
+
+       return (err);
+}
+
+int
+qfq_teardown_ifclassq(struct ifclassq *ifq)
+{
+       struct qfq_if *qif = ifq->ifcq_disc;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(qif != NULL && ifq->ifcq_type == PKTSCHEDT_QFQ);
+
+       (void) qfq_destroy_locked(qif);
+
+       ifq->ifcq_disc = NULL;
+       for (i = 0; i < IFCQ_SC_MAX; i++) {
+               ifq->ifcq_disc_slots[i].qid = 0;
+               ifq->ifcq_disc_slots[i].cl = NULL;
+       }
+
+       return (ifclassq_detach(ifq));
+}
+
+int
+qfq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
+    struct if_ifclassq_stats *ifqs)
+{
+       struct qfq_if *qif = ifq->ifcq_disc;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_QFQ);
+
+       if (slot >= IFCQ_SC_MAX)
+               return (EINVAL);
+
+       return (qfq_get_class_stats(qif, ifq->ifcq_disc_slots[slot].qid,
+           &ifqs->ifqs_qfq_stats));
+}
+
+static int
+qfq_throttle(struct qfq_if *qif, cqrq_throttle_t *tr)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       struct qfq_class *cl;
+       int err;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(!(qif->qif_flags & QFQIFF_ALTQ));
+
+       if (!tr->set) {
+               tr->level = qif->qif_throttle;
+               return (0);
+       }
+
+       if (tr->level == qif->qif_throttle)
+               return (EALREADY);
+
+       /* Current throttling levels only involve BK_SYS class */
+       cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
+
+       switch (tr->level) {
+       case IFNET_THROTTLE_OFF:
+               err = qfq_resumeq(qif, cl);
+               break;
+
+       case IFNET_THROTTLE_OPPORTUNISTIC:
+               err = qfq_suspendq(qif, cl);
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       if (err == 0 || err == ENXIO) {
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n",
+                           if_name(QFQIF_IFP(qif)), qfq_style(qif),
+                           (err == 0) ? "" : "lazy ", qif->qif_throttle,
+                           tr->level);
+               }
+               qif->qif_throttle = tr->level;
+               if (err != 0)
+                       err = 0;
+               else
+                       qfq_purgeq(qif, cl, 0, NULL, NULL);
+       } else {
+               log(LOG_ERR, "%s: %s unable to set throttling level "
+                   "%d->%d [error=%d]\n", if_name(QFQIF_IFP(qif)),
+                   qfq_style(qif), qif->qif_throttle, tr->level, err);
+       }
+
+       return (err);
+}
+
+static int
+qfq_resumeq(struct qfq_if *qif, struct qfq_class *cl)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
+
+       if (err == 0)
+               qstate(&cl->cl_q) = QS_RUNNING;
+
+       return (err);
+}
+
+static int
+qfq_suspendq(struct qfq_if *qif, struct qfq_class *cl)
+{
+       struct ifclassq *ifq = qif->qif_ifq;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q)) {
+               if (cl->cl_sfb != NULL) {
+                       err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
+               } else {
+                       VERIFY(cl->cl_flags & QFCF_LAZY);
+                       err = ENXIO;    /* delayed throttling */
+               }
+       }
+
+       if (err == 0 || err == ENXIO)
+               qstate(&cl->cl_q) = QS_SUSPENDED;
+
+       return (err);
+}
diff --git a/bsd/net/pktsched/pktsched_qfq.h b/bsd/net/pktsched/pktsched_qfq.h
new file mode 100644 (file)
index 0000000..825cc92
--- /dev/null
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_PKTSCHED_PKTSCHED_QFQ_H_
+#define        _NET_PKTSCHED_PKTSCHED_QFQ_H_
+
+#ifdef PRIVATE
+#include <net/pktsched/pktsched.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* qfq class flags */
+#define        QFCF_RED                0x0001  /* use RED */
+#define        QFCF_ECN                0x0002  /* use ECN with RED/BLUE/SFB */
+#define        QFCF_RIO                0x0004  /* use RIO */
+#define        QFCF_CLEARDSCP          0x0010  /* clear diffserv codepoint */
+#define        QFCF_BLUE               0x0100  /* use BLUE */
+#define        QFCF_SFB                0x0200  /* use SFB */
+#define        QFCF_FLOWCTL            0x0400  /* enable flow control advisories */
+#define        QFCF_DEFAULTCLASS       0x1000  /* default class */
+#ifdef BSD_KERNEL_PRIVATE
+#define        QFCF_LAZY               0x10000000 /* on-demand resource allocation */
+#endif /* BSD_KERNEL_PRIVATE */
+
+#define        QFCF_USERFLAGS                                                  \
+       (QFCF_RED | QFCF_ECN | QFCF_RIO | QFCF_CLEARDSCP | QFCF_BLUE |  \
+       QFCF_SFB | QFCF_FLOWCTL | QFCF_DEFAULTCLASS)
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        QFCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
+       "\35LAZY"
+#else
+#define        QFCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT"
+#endif /* !BSD_KERNEL_PRIVATE */
+
+#define        QFQ_MAX_CLASSES         32
+#define        QFQ_MAX_WSHIFT          16      /* log2(max_weight) */
+#define        QFQ_MAX_WEIGHT          (1 << QFQ_MAX_WSHIFT)
+
+struct qfq_classstats {
+       u_int32_t               class_handle;
+       u_int32_t               index;
+       u_int32_t               weight;
+       u_int32_t               lmax;
+
+       u_int32_t               qlength;
+       u_int32_t               qlimit;
+       u_int32_t               period;
+       struct pktcntr          xmitcnt;  /* transmitted packet counter */
+       struct pktcntr          dropcnt;  /* dropped packet counter */
+
+       /* RED, RIO, BLUE, SFB related info */
+       classq_type_t           qtype;
+       union {
+               /* RIO has 3 red stats */
+               struct red_stats        red[RIO_NDROPPREC];
+               struct blue_stats       blue;
+               struct sfb_stats        sfb;
+       };
+       classq_state_t          qstate;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        QFQ_DEBUG       1       /* enable extra debugging */
+
+/*
+ * Virtual time computations.
+ *
+ * S, F and V are all computed in fixed point arithmetic with
+ * FRAC_BITS decimal bits.
+ *
+ * QFQ_MAX_INDEX is the maximum index allowed for a group. We need
+ * one bit per index.
+ *
+ * QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
+ * The layout of the bits is as below:
+ *
+ *                 [ MTU_SHIFT ][      FRAC_BITS    ]
+ *                 [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
+ *                              ^.__grp->index = 0
+ *                              *.__grp->slot_shift
+ *
+ * where MIN_SLOT_SHIFT is derived by difference from the others.
+ *
+ * The max group index corresponds to Lmax/w_min, where
+ *     Lmax=1<<MTU_SHIFT, w_min = 1 .
+ * From this, and knowing how many groups (MAX_INDEX) we want,
+ * we can derive the shift corresponding to each group.
+ *
+ * Because we often need to compute
+ *     F = S + len/w_i  and V = V + len/wsum
+ * instead of storing w_i store the value
+ *     inv_w = (1<<FRAC_BITS)/w_i
+ * so we can do F = S + len * inv_w * wsum.
+ * We use W_TOT in the formulas so we can easily move between
+ * static and adaptive weight sum.
+ *
+ * The per-scheduler-instance data contain all the data structures
+ * for the scheduler: bitmaps and bucket lists.
+ */
+
+/*
+ * Shifts used for class<->group mapping.  Class weights are in the
+ * range [1, QFQ_MAX_WEIGHT], we need to map each class i to the
+ * group with the smallest index that can support the L_i / r_i
+ * configured for the class.
+ *
+ * grp->qfg_index is the index of the group; and grp->qfg_slot_shift
+ * is the shift for the corresponding (scaled) sigma_i.
+ *
+ * When computing the group index, we do (len<<FP_SHIFT)/weight,
+ * then compute an FLS (which is like a log2()), and if the result
+ * is below the MAX_INDEX region we use 0 (which is the same as
+ * using a larger len).
+ */
+#define        QFQ_MAX_INDEX           19
+#define        QFQ_MAX_WSUM            (2 * QFQ_MAX_WEIGHT)
+
+#define        QFQ_FRAC_BITS           30      /* fixed point arithmetic */
+#define        QFQ_ONE_FP              (1UL << QFQ_FRAC_BITS)
+#define        QFQ_IWSUM               (QFQ_ONE_FP / QFQ_MAX_WSUM)
+
+#define        QFQ_MTU_SHIFT           11      /* log2(max_len) */
+#define        QFQ_MIN_SLOT_SHIFT      (QFQ_FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+
+/*
+ * Possible group states, also indexes for the bitmaps array in
+ * struct qfq_if. We rely on ER, IR, EB, IB being numbered 0..3
+ */
+enum qfq_state {
+       ER = 0,                         /* eligible, ready */
+       IR = 1,                         /* ineligible, ready */
+       EB = 2,                         /* eligible, backlogged */
+       IB = 3,                         /* ineligible, backlogged */
+       QFQ_MAX_STATE
+};
+
+struct qfq_group;
+
+struct qfq_class {
+       u_int32_t       cl_handle;      /* class handle */
+       class_queue_t   cl_q;           /* class queue structure */
+       u_int32_t       cl_qflags;      /* class queue flags */
+       union {
+               void            *ptr;
+               struct red      *red;   /* RED state */
+               struct rio      *rio;   /* RIO state */
+               struct blue     *blue;  /* BLUE state */
+               struct sfb      *sfb;   /* SFB state */
+       } cl_qalg;
+       struct qfq_if   *cl_qif;        /* back pointer to qif */
+       u_int32_t       cl_flags;       /* class flags */
+
+       u_int64_t       cl_S, cl_F;     /* flow timestamps (exact) */
+       struct qfq_class *cl_next;      /* link for the slot list */
+       /*
+        * Group we belong to.  In principle we would need the index,
+        * which is log_2(lmax/weight), but we never reference it
+        * directly, only the group.
+        */
+       struct qfq_group *cl_grp;
+       u_int32_t       cl_inv_w;       /* QFQ_ONE_FP/weight */
+       u_int32_t       cl_lmax;        /* max packet size for this flow */
+
+       /* statistics */
+       u_int32_t       cl_period;      /* backlog period */
+       struct pktcntr  cl_xmitcnt;     /* transmitted packet counter */
+       struct pktcntr  cl_dropcnt;     /* dropped packet counter */
+};
+
+#define        cl_red  cl_qalg.red
+#define        cl_rio  cl_qalg.rio
+#define        cl_blue cl_qalg.blue
+#define        cl_sfb  cl_qalg.sfb
+
+/*
+ * Group descriptor, see the paper for details.
+ * Basically this contains the bucket lists.
+ */
+struct qfq_group {
+       u_int64_t       qfg_S, qfg_F;   /* group timestamps (approx) */
+       u_int8_t        qfg_slot_shift; /* slot shift */
+       u_int8_t        qfg_index;      /* group index */
+       u_int8_t        qfg_front;      /* index of the front slot */
+       pktsched_bitmap_t qfg_full_slots; /* non-empty slots */
+
+       /* array of lists of active classes */
+       struct qfq_class **qfg_slots;
+};
+
+/* qfq_if flags */
+#define        QFQIFF_ALTQ             0x1     /* configured via PF/ALTQ */
+
+/*
+ * qfq interface state
+ */
+struct qfq_if {
+       struct ifclassq         *qif_ifq;       /* backpointer to ifclassq */
+       u_int32_t               qif_flags;      /* flags */
+       u_int32_t               qif_throttle;   /* throttling level */
+       u_int8_t                qif_classes;    /* # of classes in table */
+       u_int8_t                qif_maxclasses; /* max # of classes in table */
+       u_int8_t                qif_maxslots;   /* max # of slots */
+       struct qfq_class        *qif_default;   /* default class */
+       struct qfq_class        **qif_class_tbl;
+
+       u_int64_t               qif_V;          /* precise virtual time */
+       u_int32_t               qif_wsum;       /* weight sum */
+#if QFQ_DEBUG
+       u_int32_t               qif_i_wsum;     /* QFQ_ONE_FP/w_sum */
+       u_int32_t               qif_queued;     /* debugging */
+       u_int32_t               qif_emptygrp;   /* debugging */
+#endif /* QFQ_DEBUG */
+       pktsched_bitmap_t       qif_bitmaps[QFQ_MAX_STATE]; /* group bitmaps */
+       struct qfq_group        **qif_groups;   /* the groups */
+};
+
+#define        QFQIF_IFP(_qif) ((_qif)->qif_ifq->ifcq_ifp)
+
+struct if_ifclassq_stats;
+
+extern void qfq_init(void);
+extern struct qfq_if *qfq_alloc(struct ifnet *, int, boolean_t);
+extern int qfq_destroy(struct qfq_if *);
+extern void qfq_purge(struct qfq_if *);
+extern void qfq_event(struct qfq_if *, cqev_t);
+extern int qfq_add_queue(struct qfq_if *, u_int32_t, u_int32_t, u_int32_t,
+    u_int32_t, u_int32_t, struct qfq_class **);
+extern int qfq_remove_queue(struct qfq_if *, u_int32_t);
+extern int qfq_get_class_stats(struct qfq_if *, u_int32_t,
+    struct qfq_classstats *);
+extern int qfq_enqueue(struct qfq_if *, struct qfq_class *, struct mbuf *,
+    struct pf_mtag *);
+extern struct mbuf *qfq_dequeue(struct qfq_if *, cqdq_op_t);
+extern int qfq_setup_ifclassq(struct ifclassq *, u_int32_t);
+extern int qfq_teardown_ifclassq(struct ifclassq *ifq);
+extern int qfq_getqstats_ifclassq(struct ifclassq *, u_int32_t,
+    struct if_ifclassq_stats *);
+#endif /* BSD_KERNEL_PRIVATE */
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_PKTSCHED_PKTSCHED_QFQ_H_ */
diff --git a/bsd/net/pktsched/pktsched_rmclass.c b/bsd/net/pktsched/pktsched_rmclass.c
new file mode 100644 (file)
index 0000000..a5f8e5a
--- /dev/null
@@ -0,0 +1,1849 @@
+/*
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*     $OpenBSD: altq_rmclass.c,v 1.13 2007/09/13 20:40:02 chl Exp $   */
+/*     $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $      */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the Network Research
+ *      Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ */
+
+#include <sys/cdefs.h>
+
+#ident "@(#)rm_class.c  1.48     97/12/05 SMI"
+
+#if PKTSCHED_CBQ
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/kernel_types.h>
+#include <sys/syslog.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+#include <net/pktsched/pktsched.h>
+#include <net/pktsched/pktsched_rmclass.h>
+#include <net/pktsched/pktsched_rmclass_debug.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+
+/*
+ * Local Macros
+ */
+
+#define        reset_cutoff(ifd)       { ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int     rmc_satisfied(struct rm_class *, struct timeval *);
+static void    rmc_wrr_set_weights(struct rm_ifdat *);
+static void    rmc_depth_compute(struct rm_class *);
+static void    rmc_depth_recompute(rm_class_t *);
+
+static struct mbuf *_rmc_wrr_dequeue_next(struct rm_ifdat *, cqdq_op_t);
+static struct mbuf *_rmc_prr_dequeue_next(struct rm_ifdat *, cqdq_op_t);
+
+static int     _rmc_addq(rm_class_t *, struct mbuf *, struct pf_mtag *);
+static void    _rmc_dropq(rm_class_t *);
+static struct mbuf *_rmc_getq(rm_class_t *);
+static struct mbuf *_rmc_pollq(rm_class_t *);
+
+static int     rmc_under_limit(struct rm_class *, struct timeval *);
+static void    rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static void    rmc_drop_action(struct rm_class *);
+static void    rmc_restart(struct rm_class *);
+static void    rmc_root_overlimit(rm_class_t *, rm_class_t *);
+
+#define        RMC_ZONE_MAX    32              /* maximum elements in zone */
+#define        RMC_ZONE_NAME   "pktsched_cbq_cl" /* zone name (CBQ for now) */
+
+static unsigned int rmc_size;          /* size of zone element */
+static struct zone *rmc_zone;          /* zone for rm_class */
+
+void
+rmclass_init(void)
+{
+       if (rmc_zone != NULL)
+               return;
+
+       rmc_size = sizeof (struct rm_class);
+       rmc_zone = zinit(rmc_size, RMC_ZONE_MAX * rmc_size, 0, RMC_ZONE_NAME);
+       if (rmc_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, RMC_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(rmc_zone, Z_EXPAND, TRUE);
+       zone_change(rmc_zone, Z_CALLERACCT, TRUE);
+}
+
+#define        BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define        ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ *     1. if a packet arrives for a not-overlimit class, set cutoff
+ *        to the depth of the class.
+ *     2. if cutoff is i, and a packet arrives for an overlimit class
+ *        with an underlimit ancestor at a lower level than i (say j),
+ *        then set cutoff to j.
+ *     3. at scheduling a packet, if there is no underlimit class
+ *        due to the current cutoff level, increase cutoff by 1 and
+ *        then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
+ *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
+ *              than 100% of the bandwidth, this number should be the
+ *              'effective' rate for the class.  Let f be the
+ *              bandwidth fraction allocated to this class, and let
+ *              nsPerByte be the data rate of the output link in
+ *              nanoseconds/byte.  Then nsecPerByte is set to
+ *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
+ *              for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action       the routine to call when the class is over limit.
+ *
+ * maxq         max allowable queue size for class (in packets).
+ *
+ * parent       parent class pointer.
+ *
+ * borrow       class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle      max value allowed for class 'idle' time estimate (this
+ *              parameter determines how large an initial burst of packets
+ *              can be before overlimit action is invoked.
+ *
+ * offtime      how long 'delay' action will delay when class goes over
+ *              limit (this parameter determines the steady-state burst
+ *              size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following:  If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ *   ptime = s * nsPerByte * (1 - f) / f
+ *   maxidle = ptime * (1 - g^b) / g^b
+ *   minidle = -ptime * (1 / (f - 1))
+ *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers.  This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.)  Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value.  Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ *     maxidle = maxilde * (8.0 / nsecPerByte);
+ *     offtime = offtime * (8.0 / nsecPerByte);
+ */
+struct rm_class *
+rmc_newclass(int pri, struct rm_ifdat *ifd, u_int32_t nsecPerByte,
+    void (*action)(rm_class_t *, rm_class_t *), u_int32_t qid, u_int32_t maxq,
+    struct rm_class *parent, struct rm_class *borrow, u_int32_t maxidle,
+    int minidle, u_int32_t offtime, int pktsize, int flags)
+{
+       struct ifnet *ifp;
+       struct ifclassq *ifq;
+       struct rm_class *cl;
+       struct rm_class *peer;
+
+       if (nsecPerByte == 0) {
+               log(LOG_ERR, "%s: invalid inverse data rate\n", __func__);
+               return (NULL);
+       }
+
+       if (pri >= RM_MAXPRIO) {
+               log(LOG_ERR, "%s: priority %d out of range! (max %d)\n",
+                   __func__, pri, RM_MAXPRIO - 1);
+               return (NULL);
+       }
+
+#if !CLASSQ_RED
+       if (flags & RMCF_RED) {
+               log(LOG_ERR, "%s: RED not configured for CBQ!\n", __func__);
+               return (NULL);
+       }
+#endif /* !CLASSQ_RED */
+
+#if !CLASSQ_RIO
+       if (flags & RMCF_RIO) {
+               log(LOG_ERR, "%s: RIO not configured for CBQ!\n", __func__);
+               return (NULL);
+       }
+#endif /* CLASSQ_RIO */
+
+#if !CLASSQ_BLUE
+       if (flags & RMCF_BLUE) {
+               log(LOG_ERR, "%s: BLUE not configured for CBQ!\n", __func__);
+               return (NULL);
+       }
+#endif /* CLASSQ_BLUE */
+
+       /* These are mutually exclusive */
+       if ((flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) &&
+           (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_RED &&
+           (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_RIO &&
+           (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_BLUE &&
+           (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) != RMCF_SFB) {
+               log(LOG_ERR, "%s: RED|RIO|BLUE|SFB mutually exclusive\n",
+                   __func__);
+               return (NULL);
+       }
+
+       cl = zalloc(rmc_zone);
+       if (cl == NULL)
+               return (NULL);
+
+       bzero(cl, rmc_size);
+       CALLOUT_INIT(&cl->callout_);
+
+       /*
+        * Class initialization.
+        */
+       cl->children_ = NULL;
+       cl->parent_ = parent;
+       cl->borrow_ = borrow;
+       cl->leaf_ = 1;
+       cl->ifdat_ = ifd;
+       cl->pri_ = pri;
+       cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+       cl->depth_ = 0;
+       cl->qthresh_ = 0;
+       cl->ns_per_byte_ = nsecPerByte;
+
+       ifq = ifd->ifq_;
+       ifp = ifq->ifcq_ifp;
+
+       if (maxq == 0 || maxq > IFCQ_MAXLEN(ifq)) {
+               maxq = IFCQ_MAXLEN(ifq);
+               if (maxq == 0)
+                       maxq = DEFAULT_QLIMIT;  /* use default */
+       }
+       _qinit(&cl->q_, Q_DROPHEAD, maxq);
+
+       cl->flags_ = flags;
+
+       cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+       if (cl->minidle_ > 0)
+               cl->minidle_ = 0;
+
+       cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+       if (cl->maxidle_ == 0)
+               cl->maxidle_ = 1;
+
+       cl->avgidle_ = cl->maxidle_;
+       cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+       if (cl->offtime_ == 0)
+               cl->offtime_ = 1;
+
+       cl->overlimit = action;
+
+       if (flags & (RMCF_RED|RMCF_RIO|RMCF_BLUE|RMCF_SFB)) {
+               int pkttime;
+
+               cl->qflags_ = 0;
+               if (flags & RMCF_ECN) {
+                       if (flags & RMCF_BLUE)
+                               cl->qflags_ |= BLUEF_ECN;
+                       else if (flags & RMCF_SFB)
+                               cl->qflags_ |= SFBF_ECN;
+                       else if (flags & RMCF_RED)
+                               cl->qflags_ |= REDF_ECN;
+                       else if (flags & RMCF_RIO)
+                               cl->qflags_ |= RIOF_ECN;
+               }
+               if (flags & RMCF_FLOWCTL) {
+                       if (flags & RMCF_SFB)
+                               cl->qflags_ |= SFBF_FLOWCTL;
+               }
+               if (flags & RMCF_FLOWVALVE) {
+                       if (flags & RMCF_RED)
+                               cl->qflags_ |= REDF_FLOWVALVE;
+               }
+               if (flags & RMCF_CLEARDSCP) {
+                       if (flags & RMCF_RIO)
+                               cl->qflags_ |= RIOF_CLEARDSCP;
+               }
+               pkttime = nsecPerByte * pktsize  / 1000;
+
+               /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
+#if CLASSQ_RED
+               if (flags & RMCF_RED) {
+                       cl->red_ = red_alloc(ifp, 0, 0,
+                           qlimit(&cl->q_) * 10/100,
+                           qlimit(&cl->q_) * 30/100,
+                           cl->qflags_, pkttime);
+                       if (cl->red_ != NULL)
+                               qtype(&cl->q_) = Q_RED;
+               }
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+               if (flags & RMCF_RIO) {
+                       cl->rio_ =
+                           rio_alloc(ifp, 0, NULL, cl->qflags_, pkttime);
+                       if (cl->rio_ != NULL)
+                               qtype(&cl->q_) = Q_RIO;
+               }
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+               if (flags & RMCF_BLUE) {
+                       cl->blue_ = blue_alloc(ifp, 0, 0, cl->qflags_);
+                       if (cl->blue_ != NULL)
+                               qtype(&cl->q_) = Q_BLUE;
+               }
+#endif /* CLASSQ_BLUE */
+               if (flags & RMCF_SFB) {
+                       if (!(cl->flags_ & RMCF_LAZY))
+                               cl->sfb_ = sfb_alloc(ifp, qid,
+                                   qlimit(&cl->q_), cl->qflags_);
+                       if (cl->sfb_ != NULL || (cl->flags_ & RMCF_LAZY))
+                               qtype(&cl->q_) = Q_SFB;
+               }
+       }
+
+       /*
+        * put the class into the class tree
+        */
+       if ((peer = ifd->active_[pri]) != NULL) {
+               /* find the last class at this pri */
+               cl->peer_ = peer;
+               while (peer->peer_ != ifd->active_[pri])
+                       peer = peer->peer_;
+               peer->peer_ = cl;
+       } else {
+               ifd->active_[pri] = cl;
+               cl->peer_ = cl;
+       }
+
+       if (cl->parent_) {
+               cl->next_ = parent->children_;
+               parent->children_ = cl;
+               parent->leaf_ = 0;
+       }
+
+       /*
+        * Compute the depth of this class and its ancestors in the class
+        * hierarchy.
+        */
+       rmc_depth_compute(cl);
+
+       /*
+        * If CBQ's WRR is enabled, then initialize the class WRR state.
+        */
+       if (ifd->wrr_) {
+               ifd->num_[pri]++;
+               ifd->alloc_[pri] += cl->allotment_;
+               rmc_wrr_set_weights(ifd);
+       }
+       return (cl);
+}
+
+int
+rmc_modclass(struct rm_class *cl, u_int32_t nsecPerByte, int maxq,
+    u_int32_t maxidle, int minidle, u_int32_t offtime, int pktsize)
+{
+#pragma unused(pktsize)
+       struct rm_ifdat *ifd;
+       u_int32_t        old_allotment;
+
+       ifd = cl->ifdat_;
+       old_allotment = cl->allotment_;
+
+       cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+       cl->qthresh_ = 0;
+       cl->ns_per_byte_ = nsecPerByte;
+
+       qlimit(&cl->q_) = maxq;
+
+       cl->minidle_ = (minidle * nsecPerByte) / 8;
+       if (cl->minidle_ > 0)
+               cl->minidle_ = 0;
+
+       cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+       if (cl->maxidle_ == 0)
+               cl->maxidle_ = 1;
+
+       cl->avgidle_ = cl->maxidle_;
+       cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+       if (cl->offtime_ == 0)
+               cl->offtime_ = 1;
+
+       /*
+        * If CBQ's WRR is enabled, then initialize the class WRR state.
+        */
+       if (ifd->wrr_) {
+               ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+               rmc_wrr_set_weights(ifd);
+       }
+       return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ *     the appropriate run robin weights for the CBQ weighted round robin
+ *     algorithm.
+ *
+ *     Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(struct rm_ifdat *ifd)
+{
+       int             i;
+       struct rm_class *cl, *clh;
+
+       for (i = 0; i < RM_MAXPRIO; i++) {
+               /*
+                * This is inverted from that of the simulator to
+                * maintain precision.
+                */
+               if (ifd->num_[i] == 0) {
+                       ifd->M_[i] = 0;
+               } else {
+                       ifd->M_[i] =
+                           ifd->alloc_[i] / (ifd->num_[i] * ifd->maxpkt_);
+               }
+               /*
+                * Compute the weighted allotment for each class.
+                * This takes the expensive div instruction out
+                * of the main loop for the wrr scheduling path.
+                * These only get recomputed when a class comes or
+                * goes.
+                */
+               if (ifd->active_[i] != NULL) {
+                       clh = cl = ifd->active_[i];
+                       do {
+                               /* safe-guard for slow link or alloc_ == 0 */
+                               if (ifd->M_[i] == 0) {
+                                       cl->w_allotment_ = 0;
+                               } else {
+                                       cl->w_allotment_ =
+                                           cl->allotment_ / ifd->M_[i];
+                               }
+                               cl = cl->peer_;
+                       } while ((cl != NULL) && (cl != clh));
+               }
+       }
+}
+
+int
+rmc_get_weight(struct rm_ifdat *ifd, int pri)
+{
+       if ((pri >= 0) && (pri < RM_MAXPRIO))
+               return (ifd->M_[pri]);
+       else
+               return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ *     appropriate depth of class 'cl' and its ancestors.
+ *
+ *     Returns:        NONE
+ */
+
+static void
+rmc_depth_compute(struct rm_class *cl)
+{
+       rm_class_t      *t = cl, *p;
+
+       /*
+        * Recompute the depth for the branch of the tree.
+        */
+       while (t != NULL) {
+               p = t->parent_;
+               if (p && (t->depth_ >= p->depth_)) {
+                       p->depth_ = t->depth_ + 1;
+                       t = p;
+               } else
+                       t = NULL;
+       }
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ *     the depth of the tree after a class has been deleted.
+ *
+ *     Returns:        NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+       rm_class_t      *p, *t;
+
+       p = cl;
+       while (p != NULL) {
+               if ((t = p->children_) == NULL) {
+                       p->depth_ = 0;
+               } else {
+                       int cdepth = 0;
+
+                       while (t != NULL) {
+                               if (t->depth_ > cdepth)
+                                       cdepth = t->depth_;
+                               t = t->next_;
+                       }
+
+                       if (p->depth_ == cdepth + 1)
+                               /* no change to this parent */
+                               return;
+
+                       p->depth_ = cdepth + 1;
+               }
+
+               p = p->parent_;
+       }
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ *     function deletes a class from the link-sharing structure and frees
+ *     all resources associated with the class.
+ *
+ *     Returns: NONE
+ */
+
+void
+rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
+{
+       struct rm_class *p, *head, *previous;
+
+       VERIFY(cl->children_ == NULL);
+
+       if (cl->sleeping_)
+               CALLOUT_STOP(&cl->callout_);
+
+       /*
+        * Free packets in the packet queue.
+        * XXX - this may not be a desired behavior.  Packets should be
+        *              re-queued.
+        */
+       rmc_dropall(cl);
+
+       /*
+        * If the class has a parent, then remove the class from the
+        * class from the parent's children chain.
+        */
+       if (cl->parent_ != NULL) {
+               head = cl->parent_->children_;
+               p = previous = head;
+               if (head->next_ == NULL) {
+                       VERIFY(head == cl);
+                       cl->parent_->children_ = NULL;
+                       cl->parent_->leaf_ = 1;
+               } else while (p != NULL) {
+                       if (p == cl) {
+                               if (cl == head)
+                                       cl->parent_->children_ = cl->next_;
+                               else
+                                       previous->next_ = cl->next_;
+                               cl->next_ = NULL;
+                               p = NULL;
+                       } else {
+                               previous = p;
+                               p = p->next_;
+                       }
+               }
+       }
+
+       /*
+        * Delete class from class priority peer list.
+        */
+       if ((p = ifd->active_[cl->pri_]) != NULL) {
+               /*
+                * If there is more than one member of this priority
+                * level, then look for class(cl) in the priority level.
+                */
+               if (p != p->peer_) {
+                       while (p->peer_ != cl)
+                               p = p->peer_;
+                       p->peer_ = cl->peer_;
+
+                       if (ifd->active_[cl->pri_] == cl)
+                               ifd->active_[cl->pri_] = cl->peer_;
+               } else {
+                       VERIFY(p == cl);
+                       ifd->active_[cl->pri_] = NULL;
+               }
+       }
+
+       /*
+        * Recompute the WRR weights.
+        */
+       if (ifd->wrr_) {
+               ifd->alloc_[cl->pri_] -= cl->allotment_;
+               ifd->num_[cl->pri_]--;
+               rmc_wrr_set_weights(ifd);
+       }
+
+       /*
+        * Re-compute the depth of the tree.
+        */
+       rmc_depth_recompute(cl->parent_);
+
+       /*
+        * Free the class structure.
+        */
+       if (cl->qalg_.ptr != NULL) {
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->q_))
+                       rio_destroy(cl->rio_);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->q_))
+                       red_destroy(cl->red_);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->q_))
+                       blue_destroy(cl->blue_);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL)
+                       sfb_destroy(cl->sfb_);
+               cl->qalg_.ptr = NULL;
+               qtype(&cl->q_) = Q_DROPTAIL;
+               qstate(&cl->q_) = QS_RUNNING;
+       }
+       zfree(rmc_zone, cl);
+}
+
+
+/*
+ * int
+ * rmc_init(...) - Initialize the resource management data structures
+ *     associated with the output portion of interface 'ifp'.  'ifd' is
+ *     where the structures will be built (for backwards compatibility, the
+ *     structures aren't kept in the ifnet struct).  'nsecPerByte'
+ *     gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ *     'restart' is the driver-specific routine that the generic 'delay
+ *     until under limit' action will call to restart output.  `maxq'
+ *     is the queue size of the 'link' & 'default' classes.  'maxqueued'
+ *     is the maximum number of packets that the resource management
+ *     code will allow to be queued 'downstream' (this is typically 1).
+ *
+ *     Returns:        0 on success
+ */
+
+int
+rmc_init(struct ifclassq *ifq, struct rm_ifdat *ifd, u_int32_t nsecPerByte,
+    void (*restart)(struct ifclassq *), u_int32_t qid, int maxq, int maxqueued,
+    u_int32_t maxidle, int minidle, u_int32_t offtime, int flags)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       int i, mtu;
+
+       /*
+        * Initialize the CBQ tracing/debug facility.
+        */
+       CBQTRACEINIT();
+
+       if (nsecPerByte == 0) {
+               log(LOG_ERR, "%s: %s: invalid inverse data rate)\n",
+                   __func__, if_name(ifp));
+               return (EINVAL);
+       }
+
+       mtu = ifp->if_mtu;
+       if (mtu < 1) {
+               log(LOG_ERR, "%s: %s: invalid MTU (interface not "
+                   "initialized?)\n", __func__, if_name(ifp));
+               return (EINVAL);
+       }
+       bzero((char *)ifd, sizeof (*ifd));
+
+       ifd->ifq_ = ifq;
+       ifd->restart = restart;
+       ifd->maxqueued_ = maxqueued;
+       ifd->ns_per_byte_ = nsecPerByte;
+       ifd->maxpkt_ = mtu;
+       ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+       ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+       ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+       if (mtu * nsecPerByte > 10 * 1000000)
+               ifd->maxiftime_ /= 4;
+#endif
+
+       reset_cutoff(ifd);
+       CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+       /*
+        * Initialize the CBQ's WRR state.
+        */
+       for (i = 0; i < RM_MAXPRIO; i++) {
+               ifd->alloc_[i] = 0;
+               ifd->M_[i] = 0;
+               ifd->num_[i] = 0;
+               ifd->na_[i] = 0;
+               ifd->active_[i] = NULL;
+       }
+
+       /*
+        * Initialize current packet state.
+        */
+       ifd->qi_ = 0;
+       ifd->qo_ = 0;
+       for (i = 0; i < RM_MAXQUEUED; i++) {
+               ifd->class_[i] = NULL;
+               ifd->curlen_[i] = 0;
+               ifd->borrowed_[i] = NULL;
+       }
+
+       /*
+        * Create the root class of the link-sharing structure.
+        */
+       if ((ifd->root_ = rmc_newclass(0, ifd, nsecPerByte,
+           rmc_root_overlimit, qid, maxq, 0, 0, maxidle, minidle, offtime,
+           0, 0)) == NULL) {
+               log(LOG_ERR, "rmc_init: root class not allocated\n");
+               return (ENOMEM);
+       }
+       ifd->root_->depth_ = 0;
+
+       return (0);
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, struct mbuf *m) - Add packet given by
+ *     mbuf 'm' to queue for resource class 'cl'.  This routine is called
+ *     by a driver's if_output routine.  This routine must be called with
+ *     output packet completion interrupts locked out (to avoid racing with
+ *     rmc_dequeue_next).
+ *
+ *     Returns:        0 on successful queueing
+ *                     CLASSQEQ_DROPPED when packet drop occurs
+ */
+int
+rmc_queue_packet(struct rm_class *cl, struct mbuf *m, struct pf_mtag *t)
+{
+       struct timeval   now;
+       struct rm_ifdat *ifd = cl->ifdat_;
+       int              cpri = cl->pri_;
+       int              is_empty = qempty(&cl->q_);
+       int ret = 0;
+
+       RM_GETTIME(now);
+       if (ifd->cutoff_ > 0) {
+               if (TV_LT(&cl->undertime_, &now)) {
+                       if (ifd->cutoff_ > cl->depth_)
+                               ifd->cutoff_ = cl->depth_;
+                       CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+               } else {
+                       /*
+                        * the class is overlimit. if the class has
+                        * underlimit ancestors, set cutoff to the lowest
+                        * depth among them.
+                        */
+                       struct rm_class *borrow = cl->borrow_;
+
+                       while (borrow != NULL &&
+                           borrow->depth_ < ifd->cutoff_) {
+                               if (TV_LT(&borrow->undertime_, &now)) {
+                                       ifd->cutoff_ = borrow->depth_;
+                                       CBQTRACE(rmc_queue_packet, 'ffob',
+                                           ifd->cutoff_);
+                                       break;
+                               }
+                               borrow = borrow->borrow_;
+                       }
+               }
+       }
+
+       ret = _rmc_addq(cl, m, t);
+       if (ret != 0 &&
+           (ret == CLASSQEQ_DROPPED || ret == CLASSQEQ_DROPPED_FC ||
+           ret == CLASSQEQ_DROPPED_SP)) {
+               /* failed */
+               return (ret);
+       }
+       VERIFY(ret == 0 || ret == CLASSQEQ_SUCCESS_FC);
+       if (is_empty) {
+               CBQTRACE(rmc_queue_packet, 'type', cl->stats_.handle);
+               ifd->na_[cpri]++;
+       }
+
+       if (qlen(&cl->q_) > qlimit(&cl->q_)) {
+               /* note: qlimit can be set to 0 or 1 */
+               rmc_drop_action(cl);
+               return (CLASSQEQ_DROPPED);
+       }
+       return (ret);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ *     classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+{
+       int              i;
+       rm_class_t      *p, *bp;
+
+       for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+               if ((bp = ifd->active_[i]) != NULL) {
+                       p = bp;
+                       do {
+                               if (!rmc_satisfied(p, now)) {
+                                       ifd->cutoff_ = p->depth_;
+                                       return;
+                               }
+                               p = p->peer_;
+                       } while (p != bp);
+               }
+       }
+
+       reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
+ */
+
+static int
+rmc_satisfied(struct rm_class *cl, struct timeval *now)
+{
+       rm_class_t      *p;
+
+       if (cl == NULL)
+               return (1);
+       if (TV_LT(now, &cl->undertime_))
+               return (1);
+       if (cl->depth_ == 0) {
+               if (!cl->sleeping_ && (qlen(&cl->q_) > cl->qthresh_))
+                       return (0);
+               else
+                       return (1);
+       }
+       if (cl->children_ != NULL) {
+               p = cl->children_;
+               while (p != NULL) {
+                       if (!rmc_satisfied(p, now))
+                               return (0);
+                       p = p->next_;
+               }
+       }
+
+       return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit.  As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(struct rm_class *cl, struct timeval *now)
+{
+       rm_class_t      *p = cl;
+       rm_class_t      *top;
+       struct rm_ifdat *ifd = cl->ifdat_;
+
+       ifd->borrowed_[ifd->qi_] = NULL;
+       /*
+        * If cl is the root class, then always return that it is
+        * underlimit.  Otherwise, check to see if the class is underlimit.
+        */
+       if (cl->parent_ == NULL)
+               return (1);
+
+       if (cl->sleeping_) {
+               if (TV_LT(now, &cl->undertime_))
+                       return (0);
+
+               CALLOUT_STOP(&cl->callout_);
+               cl->sleeping_ = 0;
+               cl->undertime_.tv_sec = 0;
+               return (1);
+       }
+
+       top = NULL;
+       while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+               if (((cl = cl->borrow_) == NULL) ||
+                   (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+                       if (cl != NULL)
+                               /*
+                                * cutoff is taking effect, just
+                                * return false without calling
+                                * the delay action.
+                                */
+                               return (0);
+#endif
+#ifdef BORROW_OFFTIME
+                       /*
+                        * check if the class can borrow offtime too.
+                        * borrow offtime from the top of the borrow
+                        * chain if the top class is not overloaded.
+                        */
+                       if (cl != NULL) {
+                               /*
+                                * cutoff is taking effect, use this
+                                * class as top.
+                                */
+                               top = cl;
+                               CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+                       }
+                       if (top != NULL && top->avgidle_ == top->minidle_)
+                               top = NULL;
+                       p->overtime_ = *now;
+                       (p->overlimit)(p, top);
+#else
+                       p->overtime_ = *now;
+                       (p->overlimit)(p, NULL);
+#endif
+                       return (0);
+               }
+               top = cl;
+       }
+
+       if (cl != p)
+               ifd->borrowed_[ifd->qi_] = cl;
+       return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ *     Packet-by-packet round robin.
+ *
+ * The heart of the weighted round-robin scheduler, which decides which
+ * class next gets to send a packet.  Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted.  Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static struct mbuf *
+_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, cqdq_op_t op)
+{
+       struct rm_class *cl = NULL, *first = NULL;
+       u_int32_t        deficit;
+       int              cpri;
+       struct mbuf     *m;
+       struct timeval   now;
+
+       RM_GETTIME(now);
+
+       /*
+        * if the driver polls the top of the queue and then removes
+        * the polled packet, we must return the same packet.
+        */
+       if (op == CLASSQDQ_REMOVE && ifd->pollcache_) {
+               cl = ifd->pollcache_;
+               cpri = cl->pri_;
+               if (ifd->efficient_) {
+                       /* check if this class is overlimit */
+                       if (cl->undertime_.tv_sec != 0 &&
+                           rmc_under_limit(cl, &now) == 0)
+                               first = cl;
+               }
+               ifd->pollcache_ = NULL;
+               goto _wrr_out;
+       } else {
+               /* mode == CLASSQDQ_POLL || pollcache == NULL */
+               ifd->pollcache_ = NULL;
+               ifd->borrowed_[ifd->qi_] = NULL;
+       }
+#ifdef ADJUST_CUTOFF
+_again:
+#endif
+       for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+               if (ifd->na_[cpri] == 0)
+                       continue;
+               deficit = 0;
+               /*
+                * Loop through twice for a priority level, if some class
+                * was unable to send a packet the first round because
+                * of the weighted round-robin mechanism.
+                * During the second loop at this level, deficit==2.
+                * (This second loop is not needed if for every class,
+                * "M[cl->pri_])" times "cl->allotment" is greater than
+                * the byte size for the largest packet in the class.)
+                */
+_wrr_loop:
+               cl = ifd->active_[cpri];
+               VERIFY(cl != NULL);
+               do {
+                       if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+                               cl->bytes_alloc_ += cl->w_allotment_;
+                       if (!qempty(&cl->q_)) {
+                               if ((cl->undertime_.tv_sec == 0) ||
+                                   rmc_under_limit(cl, &now)) {
+                                       if (cl->bytes_alloc_ > 0 || deficit > 1)
+                                               goto _wrr_out;
+
+                                       /* underlimit but no alloc */
+                                       deficit = 1;
+#if 1
+                                       ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+                               } else if (first == NULL && cl->borrow_ != NULL)
+                                       first = cl; /* borrowing candidate */
+                       }
+
+                       cl->bytes_alloc_ = 0;
+                       cl = cl->peer_;
+               } while (cl != ifd->active_[cpri]);
+
+               if (deficit == 1) {
+                       /* first loop found an underlimit class with deficit */
+                       /* Loop on same priority level, with new deficit.  */
+                       deficit = 2;
+                       goto _wrr_loop;
+               }
+       }
+
+#ifdef ADJUST_CUTOFF
+       /*
+        * no underlimit class found.  if cutoff is taking effect,
+        * increase cutoff and try again.
+        */
+       if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+               ifd->cutoff_++;
+               CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+               goto _again;
+       }
+#endif /* ADJUST_CUTOFF */
+       /*
+        * If LINK_EFFICIENCY is turned on, then the first overlimit
+        * class we encounter will send a packet if all the classes
+        * of the link-sharing structure are overlimit.
+        */
+       reset_cutoff(ifd);
+       CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+       if (!ifd->efficient_ || first == NULL)
+               return (NULL);
+
+       cl = first;
+       cpri = cl->pri_;
+#if 0  /* too time-consuming for nothing */
+       if (cl->sleeping_)
+               CALLOUT_STOP(&cl->callout_);
+       cl->sleeping_ = 0;
+       cl->undertime_.tv_sec = 0;
+#endif
+       ifd->borrowed_[ifd->qi_] = cl->borrow_;
+       ifd->cutoff_ = cl->borrow_->depth_;
+
+       /*
+        * Deque the packet and do the book keeping...
+        */
+_wrr_out:
+       if (op == CLASSQDQ_REMOVE) {
+               m = _rmc_getq(cl);
+               if (m == NULL)
+                       return (NULL);
+
+               if (qempty(&cl->q_))
+                       ifd->na_[cpri]--;
+
+               /*
+                * Update class statistics and link data.
+                */
+               if (cl->bytes_alloc_ > 0)
+                       cl->bytes_alloc_ -= m_pktlen(m);
+
+               if ((cl->bytes_alloc_ <= 0) || first == cl)
+                       ifd->active_[cl->pri_] = cl->peer_;
+               else
+                       ifd->active_[cl->pri_] = cl;
+
+               ifd->class_[ifd->qi_] = cl;
+               ifd->curlen_[ifd->qi_] = m_pktlen(m);
+               ifd->now_[ifd->qi_] = now;
+               ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+               ifd->queued_++;
+       } else {
+               /* mode == ALTDQ_PPOLL */
+               m = _rmc_pollq(cl);
+               ifd->pollcache_ = cl;
+       }
+       return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it.  This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static struct mbuf *
+_rmc_prr_dequeue_next(struct rm_ifdat *ifd, cqdq_op_t op)
+{
+       struct mbuf     *m;
+       int              cpri;
+       struct rm_class *cl, *first = NULL;
+       struct timeval   now;
+
+       RM_GETTIME(now);
+
+       /*
+        * if the driver polls the top of the queue and then removes
+        * the polled packet, we must return the same packet.
+        */
+       if (op == CLASSQDQ_REMOVE && ifd->pollcache_) {
+               cl = ifd->pollcache_;
+               cpri = cl->pri_;
+               ifd->pollcache_ = NULL;
+               goto _prr_out;
+       } else {
+               /* mode == CLASSQDQ_POLL || pollcache == NULL */
+               ifd->pollcache_ = NULL;
+               ifd->borrowed_[ifd->qi_] = NULL;
+       }
+#ifdef ADJUST_CUTOFF
+_again:
+#endif
+       for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+               if (ifd->na_[cpri] == 0)
+                       continue;
+               cl = ifd->active_[cpri];
+               VERIFY(cl != NULL);
+               do {
+                       if (!qempty(&cl->q_)) {
+                               if ((cl->undertime_.tv_sec == 0) ||
+                                   rmc_under_limit(cl, &now))
+                                       goto _prr_out;
+                               if (first == NULL && cl->borrow_ != NULL)
+                                       first = cl;
+                       }
+                       cl = cl->peer_;
+               } while (cl != ifd->active_[cpri]);
+       }
+
+#ifdef ADJUST_CUTOFF
+       /*
+        * no underlimit class found.  if cutoff is taking effect, increase
+        * cutoff and try again.
+        */
+       if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+               ifd->cutoff_++;
+               goto _again;
+       }
+#endif /* ADJUST_CUTOFF */
+       /*
+        * If LINK_EFFICIENCY is turned on, then the first overlimit
+        * class we encounter will send a packet if all the classes
+        * of the link-sharing structure are overlimit.
+        */
+       reset_cutoff(ifd);
+       if (!ifd->efficient_ || first == NULL)
+               return (NULL);
+
+       cl = first;
+       cpri = cl->pri_;
+#if 0  /* too time-consuming for nothing */
+       if (cl->sleeping_)
+               CALLOUT_STOP(&cl->callout_);
+       cl->sleeping_ = 0;
+       cl->undertime_.tv_sec = 0;
+#endif
+       ifd->borrowed_[ifd->qi_] = cl->borrow_;
+       ifd->cutoff_ = cl->borrow_->depth_;
+
+       /*
+        * Deque the packet and do the book keeping...
+        */
+_prr_out:
+       if (op == CLASSQDQ_REMOVE) {
+               m = _rmc_getq(cl);
+               if (m == NULL)
+                       return (NULL);
+
+               if (qempty(&cl->q_))
+                       ifd->na_[cpri]--;
+
+               ifd->active_[cpri] = cl->peer_;
+
+               ifd->class_[ifd->qi_] = cl;
+               ifd->curlen_[ifd->qi_] = m_pktlen(m);
+               ifd->now_[ifd->qi_] = now;
+               ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+               ifd->queued_++;
+       } else {
+               /* mode == CLASSQDQ_POLL */
+               m = _rmc_pollq(cl);
+               ifd->pollcache_ = cl;
+       }
+       return (m);
+}
+
+/*
+ * struct mbuf *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ *     is invoked by the packet driver to get the next packet to be
+ *     dequeued and output on the link.  If WRR is enabled, then the
+ *     WRR dequeue next routine will determine the next packet to sent.
+ *     Otherwise, packet-by-packet round robin is invoked.
+ *
+ *     Returns:        NULL, if a packet is not available or if all
+ *                     classes are overlimit.
+ *
+ *                     Otherwise, Pointer to the next packet.
+ */
+
+struct mbuf *
+rmc_dequeue_next(struct rm_ifdat *ifd, cqdq_op_t mode)
+{
+       if (ifd->queued_ >= ifd->maxqueued_)
+               return (NULL);
+       else if (ifd->wrr_)
+               return (_rmc_wrr_dequeue_next(ifd, mode));
+       else
+               return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated.  This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define        NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(struct rm_ifdat *ifd)
+{
+       int              idle, avgidle, pktlen;
+       int              pkt_time, tidle;
+       rm_class_t      *cl, *borrowed;
+       rm_class_t      *borrows;
+       struct timeval  *nowp;
+
+       /*
+        * Get the most recent completed class.
+        */
+       if ((cl = ifd->class_[ifd->qo_]) == NULL)
+               return;
+
+       pktlen = ifd->curlen_[ifd->qo_];
+       borrowed = ifd->borrowed_[ifd->qo_];
+       borrows = borrowed;
+
+       PKTCNTR_ADD(&cl->stats_.xmit_cnt, 1, pktlen);
+
+       /*
+        * Run estimator on class and its ancestors.
+        */
+       /*
+        * rm_update_class_util is designed to be called when the
+        * transfer is completed from a xmit complete interrupt,
+        * but most drivers don't implement an upcall for that.
+        * so, just use estimated completion time.
+        * as a result, ifd->qi_ and ifd->qo_ are always synced.
+        */
+       nowp = &ifd->now_[ifd->qo_];
+       /* get pkt_time (for link) in usec */
+#if 1  /* use approximation */
+       pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+       pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+       pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+       if (TV_LT(nowp, &ifd->ifnow_)) {
+               int iftime;
+
+               /*
+                * make sure the estimated completion time does not go
+                * too far.  it can happen when the link layer supports
+                * data compression or the interface speed is set to
+                * a much lower value.
+                */
+               TV_DELTA(&ifd->ifnow_, nowp, iftime);
+               if (iftime+pkt_time < ifd->maxiftime_) {
+                       TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+               } else {
+                       TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+               }
+       } else {
+               TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+       }
+#else
+       if (TV_LT(nowp, &ifd->ifnow_)) {
+               TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+       } else {
+               TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+       }
+#endif
+
+       while (cl != NULL) {
+               TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+               if (idle >= 2000000)
+                       /*
+                        * this class is idle enough, reset avgidle.
+                        * (TV_DELTA returns 2000000 us when delta is large.)
+                        */
+                       cl->avgidle_ = cl->maxidle_;
+
+               /* get pkt_time (for class) in usec */
+#if 1  /* use approximation */
+               pkt_time = pktlen * cl->ns_per_byte_;
+               pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+               pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+               idle -= pkt_time;
+
+               avgidle = cl->avgidle_;
+               avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+               cl->avgidle_ = avgidle;
+
+               /* Are we overlimit ? */
+               if (avgidle <= 0) {
+                       CBQTRACE(rmc_update_class_util, 'milo',
+                           cl->stats_.handle);
+                       /*
+                        * need some lower bound for avgidle, otherwise
+                        * a borrowing class gets unbounded penalty.
+                        */
+                       if (avgidle < cl->minidle_)
+                               avgidle = cl->avgidle_ = cl->minidle_;
+
+                       /* set next idle to make avgidle 0 */
+                       tidle = pkt_time +
+                           (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+                       TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+                       ++cl->stats_.over;
+               } else {
+                       cl->avgidle_ =
+                           (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+                       cl->undertime_.tv_sec = 0;
+                       if (cl->sleeping_) {
+                               CALLOUT_STOP(&cl->callout_);
+                               cl->sleeping_ = 0;
+                       }
+               }
+
+               if (borrows != NULL) {
+                       if (borrows != cl)
+                               ++cl->stats_.borrows;
+                       else
+                               borrows = NULL;
+               }
+               cl->last_ = ifd->ifnow_;
+               cl->last_pkttime_ = pkt_time;
+
+#if 1
+               if (cl->parent_ == NULL) {
+                       /* take stats of root class */
+                       PKTCNTR_ADD(&cl->stats_.xmit_cnt, 1, pktlen);
+               }
+#endif
+
+               cl = cl->parent_;
+       }
+
+       /*
+        * Check to see if cutoff needs to set to a new level.
+        */
+       cl = ifd->class_[ifd->qo_];
+       if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+               if ((qlen(&cl->q_) <= 0) ||
+                   TV_LT(nowp, &borrowed->undertime_)) {
+                       rmc_tl_satisfied(ifd, nowp);
+                       CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+               } else {
+                       ifd->cutoff_ = borrowed->depth_;
+                       CBQTRACE(rmc_update_class_util, 'ffob',
+                           borrowed->depth_);
+               }
+       }
+
+       /*
+        * Release class slot
+        */
+       ifd->borrowed_[ifd->qo_] = NULL;
+       ifd->class_[ifd->qo_] = NULL;
+       ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+       ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ *     over-limit action routines.  These get invoked by rmc_under_limit()
+ *     if a class with packets to send if over its bandwidth limit & can't
+ *     borrow from a parent class.
+ *
+ *     Returns: NONE
+ */
+
+static void
+rmc_drop_action(struct rm_class *cl)
+{
+       struct rm_ifdat *ifd = cl->ifdat_;
+
+       VERIFY(qlen(&cl->q_) > 0);
+       IFCQ_CONVERT_LOCK(ifd->ifq_);
+       _rmc_dropq(cl);
+       if (qempty(&cl->q_))
+               ifd->na_[cl->pri_]--;
+}
+
+void
+rmc_drop(struct rm_class *cl, u_int32_t flow, u_int32_t *packets,
+    u_int32_t *bytes)
+{
+       struct rm_ifdat *ifd = cl->ifdat_;
+       struct ifclassq *ifq = ifd->ifq_;
+       u_int32_t pkt = 0, len = 0, qlen;
+
+       if ((qlen = qlen(&cl->q_)) != 0) {
+               IFCQ_CONVERT_LOCK(ifq);
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->q_))
+                       rio_purgeq(cl->rio_, &cl->q_, flow, &pkt, &len);
+               else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->q_))
+                       red_purgeq(cl->red_, &cl->q_, flow, &pkt, &len);
+               else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->q_))
+                       blue_purgeq(cl->blue_, &cl->q_, flow, &pkt, &len);
+               else
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL)
+                       sfb_purgeq(cl->sfb_, &cl->q_, flow, &pkt, &len);
+               else
+                       _flushq_flow(&cl->q_, flow, &pkt, &len);
+
+               if (pkt > 0) {
+                       VERIFY(qlen(&cl->q_) == (qlen - pkt));
+
+                       PKTCNTR_ADD(&cl->stats_.drop_cnt, pkt, len);
+                       IFCQ_DROP_ADD(ifq, pkt, len);
+
+                       VERIFY(((signed)IFCQ_LEN(ifq) - pkt) >= 0);
+                       IFCQ_LEN(ifq) -= pkt;
+
+                       if (qempty(&cl->q_))
+                               ifd->na_[cl->pri_]--;
+               }
+       }
+       if (packets != NULL)
+               *packets = pkt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+void
+rmc_dropall(struct rm_class *cl)
+{
+       rmc_drop(cl, 0, NULL, NULL);
+}
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ *     delay action routine.  It is invoked via rmc_under_limit when the
+ *     packet is discoverd to be overlimit.
+ *
+ *     If the delay action is result of borrow class being overlimit, then
+ *     delay for the offtime of the borrowing class that is overlimit.
+ *
+ *     Returns: NONE
+ */
+
+void
+rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
+{
+       int     ndelay, t, extradelay;
+
+       cl->stats_.overactions++;
+       TV_DELTA(&cl->undertime_, &cl->overtime_, ndelay);
+#ifndef BORROW_OFFTIME
+       ndelay += cl->offtime_;
+#endif
+
+       if (!cl->sleeping_) {
+               CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+               if (borrow != NULL)
+                       extradelay = borrow->offtime_;
+               else
+#endif
+                       extradelay = cl->offtime_;
+
+               /*
+                * XXX recalculate suspend time:
+                * current undertime is (tidle + pkt_time) calculated
+                * from the last transmission.
+                *      tidle: time required to bring avgidle back to 0
+                *      pkt_time: target waiting time for this class
+                * we need to replace pkt_time by offtime
+                */
+               extradelay -= cl->last_pkttime_;
+               if (extradelay > 0) {
+                       TV_ADD_DELTA(&cl->undertime_, extradelay,
+                           &cl->undertime_);
+                       ndelay += extradelay;
+               }
+
+               cl->sleeping_ = 1;
+               cl->stats_.delays++;
+
+               /*
+                * Since packets are phased randomly with respect to the
+                * clock, 1 tick (the next clock tick) can be an arbitrarily
+                * short time so we have to wait for at least two ticks.
+                * NOTE:  If there's no other traffic, we need the timer as
+                * a 'backstop' to restart this class.
+                */
+               if (ndelay > tick * 2) {
+                       /*
+                        * FreeBSD rounds up the tick;
+                        * other BSDs round down the tick.
+                        */
+                       t = hzto(&cl->undertime_) + 1;
+               } else {
+                       t = 2;
+               }
+               CALLOUT_RESET(&cl->callout_, t,
+                   (timeout_t *)rmc_restart, (caddr_t)cl);
+       }
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ *     called by the system timer code & is responsible checking if the
+ *     class is still sleeping (it might have been restarted as a side
+ *     effect of the queue scan on a packet arrival) and, if so, restarting
+ *     output for the class.  Inspecting the class state & restarting output
+ *     require locking the class structure.  In general the driver is
+ *     responsible for locking but this is the only routine that is not
+ *     called directly or indirectly from the interface driver so it has
+ *     know about system locking conventions.
+ *
+ *     Returns:        NONE
+ */
+
+static void
+rmc_restart(struct rm_class *cl)
+{
+       struct rm_ifdat *ifd = cl->ifdat_;
+
+       if (cl->sleeping_) {
+               cl->sleeping_ = 0;
+               cl->undertime_.tv_sec = 0;
+
+               if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+                       CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+                       (ifd->restart)(ifd->ifq_);
+               }
+       }
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ *     handling routine for the root class of the link sharing structure.
+ *
+ *     Returns: NONE
+ */
+static void
+rmc_root_overlimit(struct rm_class *cl,
+    struct rm_class *borrow)
+{
+#pragma unused(cl, borrow)
+       panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines.  Eventually, this is to localize the
+ *     effects on the code whether queues are red queues or droptail
+ *     queues.
+ */
+
+static int
+_rmc_addq(rm_class_t *cl, struct mbuf *m, struct pf_mtag *t)
+{
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->q_))
+               return (rio_addq(cl->rio_, &cl->q_, m, t));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->q_))
+               return (red_addq(cl->red_, &cl->q_, m, t));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->q_))
+               return (blue_addq(cl->blue_, &cl->q_, m, t));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->q_)) {
+               if (cl->sfb_ == NULL) {
+                       struct ifclassq *ifq = cl->ifdat_->ifq_;
+                       struct ifnet *ifp = ifq->ifcq_ifp;
+
+                       VERIFY(cl->flags_ & RMCF_LAZY);
+                       IFCQ_CONVERT_LOCK(ifq);
+
+                       cl->sfb_ = sfb_alloc(ifp, cl->stats_.handle,
+                           qlimit(&cl->q_), cl->qflags_);
+                       if (cl->sfb_ == NULL) {
+                               /* fall back to droptail */
+                               qtype(&cl->q_) = Q_DROPTAIL;
+                               cl->flags_ &= ~RMCF_SFB;
+                               cl->qflags_ &= ~(SFBF_ECN | SFBF_FLOWCTL);
+
+                               log(LOG_ERR, "%s: CBQ SFB lazy allocation "
+                                   "failed for qid=%d pri=%d, falling back "
+                                   "to DROPTAIL\n", if_name(ifp),
+                                   cl->stats_.handle, cl->pri_);
+                       }
+               }
+               if (cl->sfb_ != NULL)
+                       return (sfb_addq(cl->sfb_, &cl->q_, m, t));
+       } else if (cl->flags_ & RMCF_CLEARDSCP)
+               write_dsfield(m, t, 0);
+
+       /* test for qlen > qlimit is done by caller */
+       _addq(&cl->q_, m);
+       return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(rm_class_t *cl)
+{
+       struct mbuf *m;
+
+       if ((m = _rmc_getq(cl)) != NULL)
+               m_freem(m);
+}
+
+static struct mbuf *
+_rmc_getq(rm_class_t *cl)
+{
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->q_))
+               return (rio_getq(cl->rio_, &cl->q_));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->q_))
+               return (red_getq(cl->red_, &cl->q_));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->q_))
+               return (blue_getq(cl->blue_, &cl->q_));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL)
+               return (sfb_getq(cl->sfb_, &cl->q_));
+
+       return (_getq(&cl->q_));
+}
+
+static struct mbuf *
+_rmc_pollq(rm_class_t *cl)
+{
+       return (qhead(&cl->q_));
+}
+
+void
+rmc_updateq(rm_class_t *cl, cqev_t ev)
+{
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->q_))
+               return (rio_updateq(cl->rio_, ev));
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->q_))
+               return (red_updateq(cl->red_, ev));
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->q_))
+               return (blue_updateq(cl->blue_, ev));
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->q_) && cl->sfb_ != NULL)
+               return (sfb_updateq(cl->sfb_, ev));
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace                 cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace                *cbqtrace_ptr = NULL;
+int                     cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ *  the last 1024 events are held in a circular buffer.
+ *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+       void    *func;
+       char    *name;
+} rmc_funcs[] =
+{
+       rmc_init,               "rmc_init",
+       rmc_queue_packet,       "rmc_queue_packet",
+       rmc_under_limit,        "rmc_under_limit",
+       rmc_update_class_util,  "rmc_update_class_util",
+       rmc_delay_action,       "rmc_delay_action",
+       rmc_restart,            "rmc_restart",
+       _rmc_wrr_dequeue_next,  "_rmc_wrr_dequeue_next",
+       NULL,                   NULL
+};
+
+static char *
+rmc_funcname(void *func)
+{
+       struct rmc_funcs *fp;
+
+       for (fp = rmc_funcs; fp->func != NULL; fp++)
+               if (fp->func == func)
+                       return (fp->name);
+       return ("unknown");
+}
+
+void
+cbqtrace_dump(int counter)
+{
+       int      i, *p;
+       char    *cp;
+
+       counter = counter % NCBQTRACE;
+       p = (int *)&cbqtrace_buffer[counter];
+
+       for (i = 0; i < 20; i++) {
+               log(LOG_DEBUG, "[0x%x] ", *p++);
+               log(LOG_DEBUG, "%s: ", rmc_funcname((void *)*p++));
+               cp = (char *)p++;
+               log(LOG_DEBUG, "%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+               log(LOG_DEBUG, "%d\n", *p++);
+
+               if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+                       p = (int *)cbqtrace_buffer;
+       }
+}
+#endif /* CBQ_TRACE */
+#endif /* PKTSCHED_CBQ */
diff --git a/bsd/net/pktsched/pktsched_rmclass.h b/bsd/net/pktsched/pktsched_rmclass.h
new file mode 100644 (file)
index 0000000..d5f6b13
--- /dev/null
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* $OpenBSD: altq_rmclass.h,v 1.10 2007/06/17 19:58:58 jasper Exp $    */
+/* $KAME: altq_rmclass.h,v 1.6 2000/12/09 09:22:44 kjc Exp $   */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the Network Research
+ *     Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_PKTSCHED_PKTSCHED_RMCLASS_H_
+#define        _NET_PKTSCHED_PKTSCHED_RMCLASS_H_
+
+#ifdef PRIVATE
+#include <net/classq/classq.h>
+#include <net/pktsched/pktsched.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        RM_MAXPRIO      8       /* Max priority */
+
+/* flags for rmc_init and rmc_newclass */
+/* class flags */
+#define        RMCF_RED                0x0001  /* use RED */
+#define        RMCF_ECN                0x0002  /* use ECN with RED/BLUE/SFB */
+#define        RMCF_RIO                0x0004  /* use RIO */
+#define        RMCF_FLOWVALVE          0x0008  /* use flowvalve (aka penalty-box) */
+#define        RMCF_CLEARDSCP          0x0010  /* clear diffserv codepoint */
+
+/* flags for rmc_init */
+#define        RMCF_WRR                0x0100
+#define        RMCF_EFFICIENT          0x0200
+
+#define        RMCF_BLUE               0x10000 /* use BLUE */
+#define        RMCF_SFB                0x20000 /* use SFB */
+#define        RMCF_FLOWCTL            0x40000 /* enable flow control advisories */
+#ifdef BSD_KERNEL_PRIVATE
+#define        RMCF_LAZY               0x10000000 /* on-demand resource allocation */
+
+typedef struct rm_ifdat                rm_ifdat_t;
+typedef struct rm_class                rm_class_t;
+
+struct red;
+struct rio;
+struct blue;
+struct sfb;
+
+/*
+ * Macros for dealing with time values.  We assume all times are
+ * 'timevals'.  `microuptime' is used to get the best available clock
+ * resolution.  If `microuptime' *doesn't* return a value that's about
+ * ten times smaller than the average packet time on the fastest
+ * link that will use these routines, a slightly different clock
+ * scheme than this one should be used.
+ * (Bias due to truncation error in this scheme will overestimate utilization
+ * and discriminate against high bandwidth classes.  To remove this bias an
+ * integrator needs to be added.  The simplest integrator uses a history of
+ * 10 * avg.packet.time / min.tick.time packet completion entries.  This is
+ * straight forward to add but we don't want to pay the extra memory
+ * traffic to maintain it if it's not necessary (occasionally a vendor
+ * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
+ */
+
+#define        RM_GETTIME(now) microuptime(&now)
+
+#define        TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) ||  \
+       (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+
+#define        TV_DELTA(a, b, delta) {                                         \
+       int     xxs;                                                    \
+                                                                       \
+       delta = (a)->tv_usec - (b)->tv_usec;                            \
+       if ((xxs = (a)->tv_sec - (b)->tv_sec)) {                        \
+               switch (xxs) {                                          \
+               default:                                                \
+                       /*                                              \
+                        * if (xxs < 0)                                 \
+                        *      printf("rm_class: bogus time values\n"); \
+                        */                                             \
+                       delta = 0;                                      \
+                       /* fall through */                              \
+               case 2:                                                 \
+                       delta += 1000000;                               \
+                       /* fall through */                              \
+               case 1:                                                 \
+                       delta += 1000000;                               \
+                       break;                                          \
+               }                                                       \
+       }                                                               \
+}
+
+#define        TV_ADD_DELTA(a, delta, res) {                                   \
+       int xxus = (a)->tv_usec + (delta);                              \
+                                                                       \
+       (res)->tv_sec = (a)->tv_sec;                                    \
+       while (xxus >= 1000000) {                                       \
+               ++((res)->tv_sec);                                      \
+               xxus -= 1000000;                                        \
+       }                                                               \
+       (res)->tv_usec = xxus;                                          \
+}
+
+#define        RM_TIMEOUT      2       /* 1 Clock tick. */
+
+#if 1
+#define        RM_MAXQUEUED    1       /* this isn't used in ALTQ/CBQ */
+#else
+#define        RM_MAXQUEUED    16      /* Max number of packets downstream of CBQ */
+#endif
+#define        RM_MAXQUEUE     64      /* Max queue length */
+#define        RM_FILTER_GAIN  5       /* log2 of gain, e.g., 5 => 31/32 */
+#define        RM_POWER        (1 << RM_FILTER_GAIN)
+#define        RM_MAXDEPTH     32
+#define        RM_NS_PER_SEC   (1000000000)
+
+typedef struct _rm_class_stats_ {
+       u_int32_t       handle;
+       u_int32_t       depth;
+
+       struct pktcntr  xmit_cnt;       /* packets sent in this class */
+       struct pktcntr  drop_cnt;       /* dropped packets */
+       u_int32_t       over;           /* # times went over limit */
+       u_int32_t       borrows;        /* # times tried to borrow */
+       u_int32_t       overactions;    /* # times invoked overlimit action */
+       u_int32_t       delays;         /* # times invoked delay actions */
+} rm_class_stats_t;
+
+/*
+ * CBQ Class state structure
+ */
+struct rm_class {
+       class_queue_t   q_;             /* Queue of packets */
+       rm_ifdat_t      *ifdat_;
+       int             pri_;           /* Class priority. */
+       int             depth_;         /* Class depth */
+       u_int32_t       ns_per_byte_;   /* NanoSeconds per byte. */
+       u_int32_t       maxrate_;       /* Bytes per second for this class. */
+       u_int32_t       allotment_;     /* Fraction of link bandwidth. */
+       u_int32_t       w_allotment_;   /* Weighted allotment for WRR */
+       int             bytes_alloc_;   /* Allocation for round of WRR */
+
+       int             avgidle_;
+       int             maxidle_;
+       int             minidle_;
+       int             offtime_;
+       int             sleeping_;      /* != 0 if delaying */
+       u_int32_t       qthresh_;       /* Threshold for formal link sharing */
+       int             leaf_;          /* Note whether leaf class or not */
+
+       rm_class_t      *children_;     /* Children of this class */
+       rm_class_t      *next_;         /* Next pointer, used if child */
+
+       rm_class_t      *peer_;         /* Peer class */
+       rm_class_t      *borrow_;       /* Borrow class */
+       rm_class_t      *parent_;       /* Parent class */
+
+       void    (*overlimit)(struct rm_class *, struct rm_class *);
+       void    (*drop)(struct rm_class *); /* Class drop action. */
+
+       union {
+               void            *ptr;
+               struct red      *red;   /* RED state */
+               struct rio      *rio;   /* RIO state */
+               struct blue     *blue;  /* BLUE state */
+               struct sfb      *sfb;   /* SFB state */
+       } qalg_;
+       int             flags_;
+       u_int32_t       qflags_;
+
+       int             last_pkttime_;  /* saved pkt_time */
+       struct timeval  undertime_;     /* time can next send */
+       struct timeval  last_;          /* time last packet sent */
+       struct timeval  overtime_;
+       struct callout  callout_;       /* for timeout() calls */
+
+       rm_class_stats_t stats_;        /* Class Statistics */
+};
+
+#define        red_    qalg_.red
+#define        rio_    qalg_.rio
+#define        blue_   qalg_.blue
+#define        sfb_    qalg_.sfb
+
+/*
+ * CBQ Interface state
+ */
+struct rm_ifdat {
+       int             queued_;        /* # pkts queued downstream */
+       int             efficient_;     /* Link Efficency bit */
+       int             wrr_;           /* Enable Weighted Round-Robin */
+       u_long          ns_per_byte_;   /* Link byte speed. */
+       int             maxqueued_;     /* Max packets to queue */
+       int             maxpkt_;        /* Max packet size. */
+       int             qi_;            /* In/out pointers for downstream */
+       int             qo_;            /* packets */
+
+       /*
+        * Active class state and WRR state.
+        */
+       rm_class_t      *active_[RM_MAXPRIO];   /* Active cl's in each pri */
+       int             na_[RM_MAXPRIO];        /* # of active cl's in a pri */
+       int             num_[RM_MAXPRIO];       /* # of cl's per pri */
+       int             alloc_[RM_MAXPRIO];     /* Byte Allocation */
+       u_long          M_[RM_MAXPRIO];         /* WRR weights. */
+
+       /*
+        * Network Interface/Solaris Queue state pointer.
+        */
+       struct ifclassq *ifq_;
+       rm_class_t      *default_;      /* Default Pkt class, BE */
+       rm_class_t      *root_;         /* Root Link class. */
+       rm_class_t      *ctl_;          /* Control Traffic class. */
+       void            (*restart)(struct ifclassq *);  /* Restart routine. */
+
+       /*
+        * Current packet downstream packet state and dynamic state.
+        */
+       rm_class_t      *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
+       rm_class_t      *class_[RM_MAXQUEUED];  /* class sending */
+       int             curlen_[RM_MAXQUEUED];  /* Current pktlen */
+       struct timeval  now_[RM_MAXQUEUED];     /* Current packet time */
+       int             is_overlimit_[RM_MAXQUEUED]; /* Current packet time */
+
+       int             cutoff_;        /* Cut-off depth for borrowing */
+
+       struct timeval  ifnow_;         /* expected xmit completion time */
+#if 1 /* ALTQ4PPP */
+       int             maxiftime_;     /* max delay inside interface */
+#endif
+       rm_class_t      *pollcache_;    /* cached rm_class by poll operation */
+};
+
+#define        RMC_IS_A_PARENT_CLASS(cl)       ((cl)->children_ != NULL)
+
+extern void rmclass_init(void);
+extern rm_class_t *rmc_newclass(int, struct rm_ifdat *, u_int32_t,
+    void (*)(struct rm_class *, struct rm_class *), u_int32_t,
+    u_int32_t, struct rm_class *, struct rm_class *,
+    u_int32_t, int, u_int32_t, int, int);
+extern void rmc_delete_class(struct rm_ifdat *, struct rm_class *);
+extern int rmc_modclass(struct rm_class *, u_int32_t, int, u_int32_t,
+    int, u_int32_t, int);
+extern int rmc_init(struct ifclassq *, struct rm_ifdat *, u_int32_t,
+    void (*)(struct ifclassq *), u_int32_t, int, int, u_int32_t,
+    int, u_int32_t, int);
+extern int rmc_queue_packet(struct rm_class *, struct mbuf *, struct pf_mtag *);
+extern struct mbuf *rmc_dequeue_next(struct rm_ifdat *, cqdq_op_t);
+extern void rmc_update_class_util(struct rm_ifdat *);
+extern void rmc_delay_action(struct rm_class *, struct rm_class *);
+extern void rmc_drop(struct rm_class *, u_int32_t, u_int32_t *, u_int32_t *);
+extern void rmc_dropall(struct rm_class *);
+extern int rmc_get_weight(struct rm_ifdat *, int);
+extern void rmc_updateq(struct rm_class *, cqev_t);
+
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_PKTSCHED_PKTSCHED_RMCLASS_H_ */
diff --git a/bsd/net/pktsched/pktsched_rmclass_debug.h b/bsd/net/pktsched/pktsched_rmclass_debug.h
new file mode 100644 (file)
index 0000000..dd3f364
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* $NetBSD: altq_rmclass_debug.h,v 1.7 2006/10/12 19:59:08 peter Exp $ */
+/* $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $     */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the SMCC Technology
+ *      Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ *      promote products derived from this software without specific prior
+ *      written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _NET_PKTSCHED_PKTSCHED_RMCLASS_DEBUG_H_
+#define        _NET_PKTSCHED_PKTSCHED_RMCLASS_DEBUG_H_
+
+/* #pragma ident       "@(#)rm_class_debug.h   1.7     98/05/04 SMI" */
+
+/*
+ * Cbq debugging macros
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef BSD_KERNEL_PRIVATE
+
+#ifdef CBQ_TRACE
+#ifndef NCBQTRACE
+#define        NCBQTRACE (16 * 1024)
+#endif
+
+/*
+ * To view the trace output, using adb, type:
+ *     adb -k /dev/ksyms /dev/mem <cr>, then type
+ *     cbqtrace_count/D to get the count, then type
+ *     cbqtrace_buffer,0tcount/Dp4C" "Xn
+ *     This will dump the trace buffer from 0 to count.
+ */
+/*
+ * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
+ * from Nth event in the circular buffer.
+ */
+
+struct cbqtrace {
+       int count;
+       int function;           /* address of function */
+       int trace_action;       /* descriptive 4 characters */
+       int object;             /* object operated on */
+};
+
+extern struct cbqtrace cbqtrace_buffer[];
+extern struct cbqtrace *cbqtrace_ptr;
+extern int cbqtrace_count;
+
+#define        CBQTRACEINIT() {                                                \
+       if (cbqtrace_ptr == NULL)                                       \
+               cbqtrace_ptr = cbqtrace_buffer;                         \
+       else {                                                          \
+               cbqtrace_ptr = cbqtrace_buffer;                         \
+               bzero((void *)cbqtrace_ptr, sizeof (cbqtrace_buffer));  \
+               cbqtrace_count = 0;                                     \
+       }                                                               \
+}
+
+#define        CBQTRACE(func, act, obj) {                                      \
+       int *_p = &cbqtrace_ptr->count;                                 \
+       *_p++ = ++cbqtrace_count;                                       \
+       *_p++ = (int)(func);                                            \
+       *_p++ = (int)(act);                                             \
+       *_p++ = (int)(obj);                                             \
+       if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE]) \
+               cbqtrace_ptr = cbqtrace_buffer;                         \
+       else                                                            \
+               cbqtrace_ptr = (struct cbqtrace *)(void *)_p;           \
+       }
+#else
+
+/* If no tracing, define no-ops */
+#define        CBQTRACEINIT()
+#define        CBQTRACE(a, b, c)
+
+#endif /* !CBQ_TRACE */
+
+#endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NET_PKTSCHED_PKTSCHED_RMCLASS_DEBUG_H_ */
diff --git a/bsd/net/pktsched/pktsched_tcq.c b/bsd/net/pktsched/pktsched_tcq.c
new file mode 100644 (file)
index 0000000..fb66ee7
--- /dev/null
@@ -0,0 +1,1215 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * traffic class queue
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/net_osdep.h>
+
+#include <net/pktsched/pktsched_tcq.h>
+#include <netinet/in.h>
+
+/*
+ * function prototypes
+ */
+static int tcq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
+static struct mbuf *tcq_dequeue_tc_ifclassq(struct ifclassq *,
+    mbuf_svc_class_t, cqdq_op_t);
+static int tcq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
+static int tcq_clear_interface(struct tcq_if *);
+static struct tcq_class *tcq_class_create(struct tcq_if *, int, u_int32_t,
+    int, u_int32_t);
+static int tcq_class_destroy(struct tcq_if *, struct tcq_class *);
+static int tcq_destroy_locked(struct tcq_if *);
+static inline int tcq_addq(struct tcq_class *, struct mbuf *,
+    struct pf_mtag *);
+static inline struct mbuf *tcq_getq(struct tcq_class *);
+static inline struct mbuf *tcq_pollq(struct tcq_class *);
+static void tcq_purgeq(struct tcq_if *, struct tcq_class *, u_int32_t,
+    u_int32_t *, u_int32_t *);
+static void tcq_purge_sc(struct tcq_if *, cqrq_purge_sc_t *);
+static void tcq_updateq(struct tcq_if *, struct tcq_class *, cqev_t);
+static int tcq_throttle(struct tcq_if *, cqrq_throttle_t *);
+static int tcq_resumeq(struct tcq_if *, struct tcq_class *);
+static int tcq_suspendq(struct tcq_if *, struct tcq_class *);
+static struct mbuf *tcq_dequeue_cl(struct tcq_if *, struct tcq_class *,
+    mbuf_svc_class_t, cqdq_op_t);
+static inline struct tcq_class *tcq_clh_to_clp(struct tcq_if *, u_int32_t);
+static const char *tcq_style(struct tcq_if *);
+
+#define        TCQ_ZONE_MAX    32              /* maximum elements in zone */
+#define        TCQ_ZONE_NAME   "pktsched_tcq"  /* zone name */
+
+static unsigned int tcq_size;          /* size of zone element */
+static struct zone *tcq_zone;          /* zone for tcq */
+
+#define        TCQ_CL_ZONE_MAX 32              /* maximum elements in zone */
+#define        TCQ_CL_ZONE_NAME "pktsched_tcq_cl" /* zone name */
+
+static unsigned int tcq_cl_size;       /* size of zone element */
+static struct zone *tcq_cl_zone;       /* zone for tcq_class */
+
+void
+tcq_init(void)
+{
+       tcq_size = sizeof (struct tcq_if);
+       tcq_zone = zinit(tcq_size, TCQ_ZONE_MAX * tcq_size,
+           0, TCQ_ZONE_NAME);
+       if (tcq_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, TCQ_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(tcq_zone, Z_EXPAND, TRUE);
+       zone_change(tcq_zone, Z_CALLERACCT, TRUE);
+
+       tcq_cl_size = sizeof (struct tcq_class);
+       tcq_cl_zone = zinit(tcq_cl_size, TCQ_CL_ZONE_MAX * tcq_cl_size,
+           0, TCQ_CL_ZONE_NAME);
+       if (tcq_cl_zone == NULL) {
+               panic("%s: failed allocating %s", __func__, TCQ_CL_ZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(tcq_cl_zone, Z_EXPAND, TRUE);
+       zone_change(tcq_cl_zone, Z_CALLERACCT, TRUE);
+}
+
+struct tcq_if *
+tcq_alloc(struct ifnet *ifp, int how, boolean_t altq)
+{
+       struct tcq_if   *tif;
+
+       tif = (how == M_WAITOK) ? zalloc(tcq_zone) : zalloc_noblock(tcq_zone);
+       if (tif == NULL)
+               return (NULL);
+
+       bzero(tif, tcq_size);
+       tif->tif_maxpri = -1;
+       tif->tif_ifq = &ifp->if_snd;
+       if (altq)
+               tif->tif_flags |= TCQIFF_ALTQ;
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler allocated\n",
+                   if_name(ifp), tcq_style(tif));
+       }
+
+       return (tif);
+}
+
+int
+tcq_destroy(struct tcq_if *tif)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       int err;
+
+       IFCQ_LOCK(ifq);
+       err = tcq_destroy_locked(tif);
+       IFCQ_UNLOCK(ifq);
+
+       return (err);
+}
+
+static int
+tcq_destroy_locked(struct tcq_if *tif)
+{
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       (void) tcq_clear_interface(tif);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
+                   if_name(TCQIF_IFP(tif)), tcq_style(tif));
+       }
+
+       zfree(tcq_zone, tif);
+
+       return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+tcq_clear_interface(struct tcq_if *tif)
+{
+       struct tcq_class        *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       /* clear out the classes */
+       for (pri = 0; pri <= tif->tif_maxpri; pri++)
+               if ((cl = tif->tif_classes[pri]) != NULL)
+                       tcq_class_destroy(tif, cl);
+
+       return (0);
+}
+
+/* discard all the queued packets on the interface */
+void
+tcq_purge(struct tcq_if *tif)
+{
+       struct tcq_class *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       for (pri = 0; pri <= tif->tif_maxpri; pri++) {
+               if ((cl = tif->tif_classes[pri]) != NULL && !qempty(&cl->cl_q))
+                       tcq_purgeq(tif, cl, 0, NULL, NULL);
+       }
+#if !PF_ALTQ
+       /*
+        * This assertion is safe to be made only when PF_ALTQ is not
+        * configured; otherwise, IFCQ_LEN represents the sum of the
+        * packets managed by ifcq_disc and altq_disc instances, which
+        * is possible when transitioning between the two.
+        */
+       VERIFY(IFCQ_LEN(tif->tif_ifq) == 0);
+#endif /* !PF_ALTQ */
+}
+
+static void
+tcq_purge_sc(struct tcq_if *tif, cqrq_purge_sc_t *pr)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
+       VERIFY(pr->flow != 0);
+
+       if (pr->sc != MBUF_SC_UNSPEC) {
+               i = MBUF_SCIDX(pr->sc);
+               VERIFY(i < IFCQ_SC_MAX);
+
+               tcq_purgeq(tif, ifq->ifcq_disc_slots[i].cl,
+                   pr->flow, &pr->packets, &pr->bytes);
+       } else {
+               u_int32_t cnt, len;
+
+               pr->packets = 0;
+               pr->bytes = 0;
+
+               for (i = 0; i < IFCQ_SC_MAX; i++) {
+                       tcq_purgeq(tif, ifq->ifcq_disc_slots[i].cl,
+                           pr->flow, &cnt, &len);
+                       pr->packets += cnt;
+                       pr->bytes += len;
+               }
+       }
+}
+
+void
+tcq_event(struct tcq_if *tif, cqev_t ev)
+{
+       struct tcq_class *cl;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       for (pri = 0; pri <= tif->tif_maxpri; pri++)
+               if ((cl = tif->tif_classes[pri]) != NULL)
+                       tcq_updateq(tif, cl, ev);
+}
+
+int
+tcq_add_queue(struct tcq_if *tif, int priority, u_int32_t qlimit,
+    int flags, u_int32_t qid, struct tcq_class **clp)
+{
+       struct tcq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       /* check parameters */
+       if (priority >= TCQ_MAXPRI)
+               return (EINVAL);
+       if (tif->tif_classes[priority] != NULL)
+               return (EBUSY);
+       if (tcq_clh_to_clp(tif, qid) != NULL)
+               return (EBUSY);
+
+       cl = tcq_class_create(tif, priority, qlimit, flags, qid);
+       if (cl == NULL)
+               return (ENOMEM);
+
+       if (clp != NULL)
+               *clp = cl;
+
+       return (0);
+}
+
+static struct tcq_class *
+tcq_class_create(struct tcq_if *tif, int pri, u_int32_t qlimit,
+    int flags, u_int32_t qid)
+{
+       struct ifnet *ifp;
+       struct ifclassq *ifq;
+       struct tcq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       /* Sanitize flags unless internally configured */
+       if (tif->tif_flags & TCQIFF_ALTQ)
+               flags &= TQCF_USERFLAGS;
+
+#if !CLASSQ_RED
+       if (flags & TQCF_RED) {
+               log(LOG_ERR, "%s: %s RED not available!\n",
+                   if_name(TCQIF_IFP(tif)), tcq_style(tif));
+               return (NULL);
+       }
+#endif /* !CLASSQ_RED */
+
+#if !CLASSQ_RIO
+       if (flags & TQCF_RIO) {
+               log(LOG_ERR, "%s: %s RIO not available!\n",
+                   if_name(TCQIF_IFP(tif)), tcq_style(tif));
+               return (NULL);
+       }
+#endif /* CLASSQ_RIO */
+
+#if !CLASSQ_BLUE
+       if (flags & TQCF_BLUE) {
+               log(LOG_ERR, "%s: %s BLUE not available!\n",
+                   if_name(TCQIF_IFP(tif)), tcq_style(tif));
+               return (NULL);
+       }
+#endif /* CLASSQ_BLUE */
+
+       /* These are mutually exclusive */
+       if ((flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) &&
+           (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_RED &&
+           (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_RIO &&
+           (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_BLUE &&
+           (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_SFB) {
+               log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
+                   if_name(TCQIF_IFP(tif)), tcq_style(tif));
+               return (NULL);
+       }
+
+       ifq = tif->tif_ifq;
+       ifp = TCQIF_IFP(tif);
+
+       if ((cl = tif->tif_classes[pri]) != NULL) {
+               /* modify the class instead of creating a new one */
+               if (!qempty(&cl->cl_q))
+                       tcq_purgeq(tif, cl, 0, NULL, NULL);
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->cl_q))
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->cl_q))
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->cl_q))
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               qtype(&cl->cl_q) = Q_DROPTAIL;
+               qstate(&cl->cl_q) = QS_RUNNING;
+       } else {
+               cl = zalloc(tcq_cl_zone);
+               if (cl == NULL)
+                       return (NULL);
+
+               bzero(cl, tcq_cl_size);
+       }
+
+       tif->tif_classes[pri] = cl;
+       if (flags & TQCF_DEFAULTCLASS)
+               tif->tif_default = cl;
+       if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
+               qlimit = IFCQ_MAXLEN(ifq);
+               if (qlimit == 0)
+                       qlimit = DEFAULT_QLIMIT;  /* use default */
+       }
+       _qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
+       cl->cl_flags = flags;
+       cl->cl_pri = pri;
+       if (pri > tif->tif_maxpri)
+               tif->tif_maxpri = pri;
+       cl->cl_tif = tif;
+       cl->cl_handle = qid;
+
+       if (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) {
+#if CLASSQ_RED || CLASSQ_RIO
+               u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
+               int pkttime;
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+
+               cl->cl_qflags = 0;
+               if (flags & TQCF_ECN) {
+                       if (flags & TQCF_BLUE)
+                               cl->cl_qflags |= BLUEF_ECN;
+                       else if (flags & TQCF_SFB)
+                               cl->cl_qflags |= SFBF_ECN;
+                       else if (flags & TQCF_RED)
+                               cl->cl_qflags |= REDF_ECN;
+                       else if (flags & TQCF_RIO)
+                               cl->cl_qflags |= RIOF_ECN;
+               }
+               if (flags & TQCF_FLOWCTL) {
+                       if (flags & TQCF_SFB)
+                               cl->cl_qflags |= SFBF_FLOWCTL;
+               }
+               if (flags & TQCF_CLEARDSCP) {
+                       if (flags & TQCF_RIO)
+                               cl->cl_qflags |= RIOF_CLEARDSCP;
+               }
+#if CLASSQ_RED || CLASSQ_RIO
+               /*
+                * XXX: RED & RIO should be watching link speed and MTU
+                *      events and recompute pkttime accordingly.
+                */
+               if (ifbandwidth < 8)
+                       pkttime = 1000 * 1000 * 1000; /* 1 sec */
+               else
+                       pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
+                           (ifbandwidth / 8);
+
+               /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
+#if CLASSQ_RED
+               if (flags & TQCF_RED) {
+                       cl->cl_red = red_alloc(ifp, 0, 0,
+                           qlimit(&cl->cl_q) * 10/100,
+                           qlimit(&cl->cl_q) * 30/100,
+                           cl->cl_qflags, pkttime);
+                       if (cl->cl_red != NULL)
+                               qtype(&cl->cl_q) = Q_RED;
+               }
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+               if (flags & TQCF_RIO) {
+                       cl->cl_rio =
+                           rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
+                       if (cl->cl_rio != NULL)
+                               qtype(&cl->cl_q) = Q_RIO;
+               }
+#endif /* CLASSQ_RIO */
+#endif /* CLASSQ_RED || CLASSQ_RIO */
+#if CLASSQ_BLUE
+               if (flags & TQCF_BLUE) {
+                       cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
+                       if (cl->cl_blue != NULL)
+                               qtype(&cl->cl_q) = Q_BLUE;
+               }
+#endif /* CLASSQ_BLUE */
+               if (flags & TQCF_SFB) {
+                       if (!(cl->cl_flags & TQCF_LAZY))
+                               cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                                   qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb != NULL || (cl->cl_flags & TQCF_LAZY))
+                               qtype(&cl->cl_q) = Q_SFB;
+               }
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s created qid=%d pri=%d qlimit=%d "
+                   "flags=%b\n", if_name(ifp), tcq_style(tif),
+                   cl->cl_handle, cl->cl_pri, qlimit, flags, TQCF_BITS);
+       }
+
+       return (cl);
+}
+
+int
+tcq_remove_queue(struct tcq_if *tif, u_int32_t qid)
+{
+       struct tcq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       if ((cl = tcq_clh_to_clp(tif, qid)) == NULL)
+               return (EINVAL);
+
+       return (tcq_class_destroy(tif, cl));
+}
+
+static int
+tcq_class_destroy(struct tcq_if *tif, struct tcq_class *cl)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       int pri;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!qempty(&cl->cl_q))
+               tcq_purgeq(tif, cl, 0, NULL, NULL);
+
+       tif->tif_classes[cl->cl_pri] = NULL;
+       if (tif->tif_maxpri == cl->cl_pri) {
+               for (pri = cl->cl_pri; pri >= 0; pri--)
+                       if (tif->tif_classes[pri] != NULL) {
+                               tif->tif_maxpri = pri;
+                               break;
+                       }
+               if (pri < 0)
+                       tif->tif_maxpri = -1;
+       }
+
+       if (tif->tif_default == cl)
+               tif->tif_default = NULL;
+
+       if (cl->cl_qalg.ptr != NULL) {
+#if CLASSQ_RIO
+               if (q_is_rio(&cl->cl_q))
+                       rio_destroy(cl->cl_rio);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+               if (q_is_red(&cl->cl_q))
+                       red_destroy(cl->cl_red);
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+               if (q_is_blue(&cl->cl_q))
+                       blue_destroy(cl->cl_blue);
+#endif /* CLASSQ_BLUE */
+               if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+                       sfb_destroy(cl->cl_sfb);
+               cl->cl_qalg.ptr = NULL;
+               qtype(&cl->cl_q) = Q_DROPTAIL;
+               qstate(&cl->cl_q) = QS_RUNNING;
+       }
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s destroyed qid=%d pri=%d\n",
+                   if_name(TCQIF_IFP(tif)), tcq_style(tif),
+                   cl->cl_handle, cl->cl_pri);
+       }
+
+       zfree(tcq_cl_zone, cl);
+       return (0);
+}
+
+int
+tcq_enqueue(struct tcq_if *tif, struct tcq_class *cl, struct mbuf *m,
+    struct pf_mtag *t)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       int len, ret;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(cl == NULL || cl->cl_tif == tif);
+
+       if (cl == NULL) {
+               cl = tcq_clh_to_clp(tif, t->pftag_qid);
+               if (cl == NULL) {
+                       cl = tif->tif_default;
+                       if (cl == NULL) {
+                               IFCQ_CONVERT_LOCK(ifq);
+                               m_freem(m);
+                               return (ENOBUFS);
+                       }
+               }
+       }
+
+       len = m_pktlen(m);
+
+       ret = tcq_addq(cl, m, t);
+       if (ret != 0) {
+               if (ret == CLASSQEQ_SUCCESS_FC) {
+                       /* packet enqueued, return advisory feedback */
+                       ret = EQFULL;
+               } else {
+                       VERIFY(ret == CLASSQEQ_DROPPED ||
+                           ret == CLASSQEQ_DROPPED_FC ||
+                           ret == CLASSQEQ_DROPPED_SP);
+                       /* packet has been freed in tcq_addq */
+                       PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
+                       IFCQ_DROP_ADD(ifq, 1, len);
+                       switch (ret) {
+                       case CLASSQEQ_DROPPED:
+                               return (ENOBUFS);
+                       case CLASSQEQ_DROPPED_FC:
+                               return (EQFULL);
+                       case CLASSQEQ_DROPPED_SP:
+                               return (EQSUSPENDED);
+                       }
+                       /* NOT REACHED */
+               }
+       }
+       IFCQ_INC_LEN(ifq);
+
+       /* successfully queued. */
+       return (ret);
+}
+
+/*
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+struct mbuf *
+tcq_dequeue_tc(struct tcq_if *tif, mbuf_svc_class_t sc, cqdq_op_t op)
+{
+       return (tcq_dequeue_cl(tif, NULL, sc, op));
+}
+
+static struct mbuf *
+tcq_dequeue_cl(struct tcq_if *tif, struct tcq_class *cl,
+    mbuf_svc_class_t sc, cqdq_op_t op)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       struct mbuf *m;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (cl == NULL) {
+               cl = tcq_clh_to_clp(tif, MBUF_SCIDX(sc));
+               if (cl == NULL)
+                       return (NULL);
+       }
+
+       if (qempty(&cl->cl_q))
+               return (NULL);
+
+       VERIFY(!IFCQ_IS_EMPTY(ifq));
+
+       if (op == CLASSQDQ_POLL)
+               return (tcq_pollq(cl));
+
+       m = tcq_getq(cl);
+       if (m != NULL) {
+               IFCQ_DEC_LEN(ifq);
+               if (qempty(&cl->cl_q))
+                       cl->cl_period++;
+               PKTCNTR_ADD(&cl->cl_xmitcnt, 1, m_pktlen(m));
+               IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
+       }
+       return (m);
+}
+
+static inline int
+tcq_addq(struct tcq_class *cl, struct mbuf *m, struct pf_mtag *t)
+{
+       struct tcq_if *tif = cl->cl_tif;
+       struct ifclassq *ifq = tif->tif_ifq;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_addq(cl->cl_red, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q)) {
+               if (cl->cl_sfb == NULL) {
+                       struct ifnet *ifp = TCQIF_IFP(tif);
+
+                       VERIFY(cl->cl_flags & TQCF_LAZY);
+                       cl->cl_flags &= ~TQCF_LAZY;
+                       IFCQ_CONVERT_LOCK(ifq);
+
+                       cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
+                           qlimit(&cl->cl_q), cl->cl_qflags);
+                       if (cl->cl_sfb == NULL) {
+                               /* fall back to droptail */
+                               qtype(&cl->cl_q) = Q_DROPTAIL;
+                               cl->cl_flags &= ~TQCF_SFB;
+                               cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
+
+                               log(LOG_ERR, "%s: %s SFB lazy allocation "
+                                   "failed for qid=%d pri=%d, falling back "
+                                   "to DROPTAIL\n", if_name(ifp),
+                                   tcq_style(tif), cl->cl_handle,
+                                   cl->cl_pri);
+                       } else if (tif->tif_throttle != IFNET_THROTTLE_OFF) {
+                               /* if there's pending throttling, set it */
+                               cqrq_throttle_t tr = { 1, tif->tif_throttle };
+                               int err = tcq_throttle(tif, &tr);
+
+                               if (err == EALREADY)
+                                       err = 0;
+                               if (err != 0) {
+                                       tr.level = IFNET_THROTTLE_OFF;
+                                       (void) tcq_throttle(tif, &tr);
+                               }
+                       }
+               }
+               if (cl->cl_sfb != NULL)
+                       return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
+       } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (CLASSQEQ_DROPPED);
+       }
+
+       if (cl->cl_flags & TQCF_CLEARDSCP)
+               write_dsfield(m, t, 0);
+
+       _addq(&cl->cl_q, m);
+
+       return (0);
+}
+
+static inline struct mbuf *
+tcq_getq(struct tcq_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_tif->tif_ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_getq(cl->cl_rio, &cl->cl_q));
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_getq(cl->cl_red, &cl->cl_q));
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_getq(cl->cl_blue, &cl->cl_q));
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_getq(cl->cl_sfb, &cl->cl_q));
+
+       return (_getq(&cl->cl_q));
+}
+
+static inline struct mbuf *
+tcq_pollq(struct tcq_class *cl)
+{
+       IFCQ_LOCK_ASSERT_HELD(cl->cl_tif->tif_ifq);
+
+       return (qhead(&cl->cl_q));
+}
+
+static void
+tcq_purgeq(struct tcq_if *tif, struct tcq_class *cl, u_int32_t flow,
+    u_int32_t *packets, u_int32_t *bytes)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       u_int32_t cnt = 0, len = 0, qlen;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if ((qlen = qlen(&cl->cl_q)) == 0)
+               goto done;
+
+       /* become regular mutex before freeing mbufs */
+       IFCQ_CONVERT_LOCK(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
+       else
+               _flushq_flow(&cl->cl_q, flow, &cnt, &len);
+
+       if (cnt > 0) {
+               VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
+
+               PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
+               IFCQ_DROP_ADD(ifq, cnt, len);
+
+               VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
+               IFCQ_LEN(ifq) -= cnt;
+
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s purge qid=%d pri=%d "
+                           "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
+                           if_name(TCQIF_IFP(tif)), tcq_style(tif),
+                           cl->cl_handle, cl->cl_pri, qlen, qlen(&cl->cl_q),
+                           cnt, len, flow);
+               }
+       }
+done:
+       if (packets != NULL)
+               *packets = cnt;
+       if (bytes != NULL)
+               *bytes = len;
+}
+
+static void
+tcq_updateq(struct tcq_if *tif, struct tcq_class *cl, cqev_t ev)
+{
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       if (pktsched_verbose) {
+               log(LOG_DEBUG, "%s: %s update qid=%d pri=%d event=%s\n",
+                   if_name(TCQIF_IFP(tif)), tcq_style(tif),
+                   cl->cl_handle, cl->cl_pri, ifclassq_ev2str(ev));
+       }
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               return (rio_updateq(cl->cl_rio, ev));
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               return (red_updateq(cl->cl_red, ev));
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               return (blue_updateq(cl->cl_blue, ev));
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               return (sfb_updateq(cl->cl_sfb, ev));
+}
+
+int
+tcq_get_class_stats(struct tcq_if *tif, u_int32_t qid,
+    struct tcq_classstats *sp)
+{
+       struct tcq_class *cl;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       if ((cl = tcq_clh_to_clp(tif, qid)) == NULL)
+               return (EINVAL);
+
+       sp->class_handle = cl->cl_handle;
+       sp->priority = cl->cl_pri;
+       sp->qlength = qlen(&cl->cl_q);
+       sp->qlimit = qlimit(&cl->cl_q);
+       sp->period = cl->cl_period;
+       sp->xmitcnt = cl->cl_xmitcnt;
+       sp->dropcnt = cl->cl_dropcnt;
+
+       sp->qtype = qtype(&cl->cl_q);
+       sp->qstate = qstate(&cl->cl_q);
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               red_getstats(cl->cl_red, &sp->red[0]);
+#endif /* CLASSQ_RED */
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               rio_getstats(cl->cl_rio, &sp->red[0]);
+#endif /* CLASSQ_RIO */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               blue_getstats(cl->cl_blue, &sp->blue);
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               sfb_getstats(cl->cl_sfb, &sp->sfb);
+
+       return (0);
+}
+
+/* convert a class handle to the corresponding class pointer */
+static inline struct tcq_class *
+tcq_clh_to_clp(struct tcq_if *tif, u_int32_t chandle)
+{
+       struct tcq_class *cl;
+       int idx;
+
+       IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
+
+       for (idx = tif->tif_maxpri; idx >= 0; idx--)
+               if ((cl = tif->tif_classes[idx]) != NULL &&
+                   cl->cl_handle == chandle)
+                       return (cl);
+
+       return (NULL);
+}
+
+static const char *
+tcq_style(struct tcq_if *tif)
+{
+       return ((tif->tif_flags & TCQIFF_ALTQ) ? "ALTQ_TCQ" : "TCQ");
+}
+
+/*
+ * tcq_enqueue_ifclassq is an enqueue function to be registered to
+ * (*ifcq_enqueue) in struct ifclassq.
+ */
+static int
+tcq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
+{
+       u_int32_t i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       if (!(m->m_flags & M_PKTHDR)) {
+               /* should not happen */
+               log(LOG_ERR, "%s: packet does not have pkthdr\n",
+                   if_name(ifq->ifcq_ifp));
+               IFCQ_CONVERT_LOCK(ifq);
+               m_freem(m);
+               return (ENOBUFS);
+       }
+
+       i = MBUF_SCIDX(mbuf_get_service_class(m));
+       VERIFY((u_int32_t)i < IFCQ_SC_MAX);
+
+       return (tcq_enqueue(ifq->ifcq_disc,
+           ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
+}
+
+/*
+ * tcq_dequeue_tc_ifclassq is a dequeue function to be registered to
+ * (*ifcq_dequeue) in struct ifclass.
+ *
+ * note: CLASSQDQ_POLL returns the next packet without removing the packet
+ *     from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
+ *     CLASSQDQ_REMOVE must return the same packet if called immediately
+ *     after CLASSQDQ_POLL.
+ */
+static struct mbuf *
+tcq_dequeue_tc_ifclassq(struct ifclassq *ifq, mbuf_svc_class_t sc,
+    cqdq_op_t op)
+{
+       u_int32_t i = MBUF_SCIDX(sc);
+
+       VERIFY((u_int32_t)i < IFCQ_SC_MAX);
+
+       return (tcq_dequeue_cl(ifq->ifcq_disc,
+           ifq->ifcq_disc_slots[i].cl, sc, op));
+}
+
+static int
+tcq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
+{
+       struct tcq_if   *tif = (struct tcq_if *)ifq->ifcq_disc;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+       switch (req) {
+       case CLASSQRQ_PURGE:
+               tcq_purge(tif);
+               break;
+
+       case CLASSQRQ_PURGE_SC:
+               tcq_purge_sc(tif, (cqrq_purge_sc_t *)arg);
+               break;
+
+       case CLASSQRQ_EVENT:
+               tcq_event(tif, (cqev_t)arg);
+               break;
+
+       case CLASSQRQ_THROTTLE:
+               err = tcq_throttle(tif, (cqrq_throttle_t *)arg);
+               break;
+       }
+       return (err);
+}
+
+int
+tcq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
+{
+       struct ifnet *ifp = ifq->ifcq_ifp;
+       struct tcq_class *cl0, *cl1, *cl2, *cl3;
+       struct tcq_if *tif;
+       u_int32_t maxlen = 0, qflags = 0;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_disc == NULL);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
+
+       if (flags & PKTSCHEDF_QALG_RED)
+               qflags |= TQCF_RED;
+       if (flags & PKTSCHEDF_QALG_RIO)
+               qflags |= TQCF_RIO;
+       if (flags & PKTSCHEDF_QALG_BLUE)
+               qflags |= TQCF_BLUE;
+       if (flags & PKTSCHEDF_QALG_SFB)
+               qflags |= TQCF_SFB;
+       if (flags & PKTSCHEDF_QALG_ECN)
+               qflags |= TQCF_ECN;
+       if (flags & PKTSCHEDF_QALG_FLOWCTL)
+               qflags |= TQCF_FLOWCTL;
+
+       tif = tcq_alloc(ifp, M_WAITOK, FALSE);
+       if (tif == NULL)
+               return (ENOMEM);
+
+       if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
+               maxlen = if_sndq_maxlen;
+
+       if ((err = tcq_add_queue(tif, 0, maxlen,
+           qflags | PRCF_LAZY, SCIDX_BK, &cl0)) != 0)
+               goto cleanup;
+
+       if ((err = tcq_add_queue(tif, 1, maxlen,
+           qflags | TQCF_DEFAULTCLASS, SCIDX_BE, &cl1)) != 0)
+               goto cleanup;
+
+       if ((err = tcq_add_queue(tif, 2, maxlen,
+           qflags | PRCF_LAZY, SCIDX_VI, &cl2)) != 0)
+               goto cleanup;
+
+       if ((err = tcq_add_queue(tif, 3, maxlen,
+           qflags, SCIDX_VO, &cl3)) != 0)
+               goto cleanup;
+
+       err = ifclassq_attach(ifq, PKTSCHEDT_TCQ, tif,
+           tcq_enqueue_ifclassq, NULL, tcq_dequeue_tc_ifclassq,
+           tcq_request_ifclassq);
+
+       /* cache these for faster lookup */
+       if (err == 0) {
+               /* Map {BK_SYS,BK} to TC_BK */
+               ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK;
+               ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
+
+               ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
+               ifq->ifcq_disc_slots[SCIDX_BK].cl = cl0;
+
+               /* Map {BE,RD,OAM} to TC_BE */
+               ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
+               ifq->ifcq_disc_slots[SCIDX_BE].cl = cl1;
+
+               ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_BE;
+               ifq->ifcq_disc_slots[SCIDX_RD].cl = cl1;
+
+               ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_BE;
+               ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl1;
+
+               /* Map {AV,RV,VI} to TC_VI */
+               ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_VI;
+               ifq->ifcq_disc_slots[SCIDX_AV].cl = cl2;
+
+               ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_VI;
+               ifq->ifcq_disc_slots[SCIDX_RV].cl = cl2;
+
+               ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
+               ifq->ifcq_disc_slots[SCIDX_VI].cl = cl2;
+
+               /* Map {VO,CTL} to TC_VO */
+               ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
+               ifq->ifcq_disc_slots[SCIDX_VO].cl = cl3;
+
+               ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_VO;
+               ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl3;
+       }
+
+cleanup:
+       if (err != 0)
+               (void) tcq_destroy_locked(tif);
+
+       return (err);
+}
+
+int
+tcq_teardown_ifclassq(struct ifclassq *ifq)
+{
+       struct tcq_if *tif = ifq->ifcq_disc;
+       int i;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(tif != NULL && ifq->ifcq_type == PKTSCHEDT_TCQ);
+
+       (void) tcq_destroy_locked(tif);
+
+       ifq->ifcq_disc = NULL;
+       for (i = 0; i < IFCQ_SC_MAX; i++) {
+               ifq->ifcq_disc_slots[i].qid = 0;
+               ifq->ifcq_disc_slots[i].cl = NULL;
+       }
+
+       return (ifclassq_detach(ifq));
+}
+
+int
+tcq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
+    struct if_ifclassq_stats *ifqs)
+{
+       struct tcq_if *tif = ifq->ifcq_disc;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(ifq->ifcq_type == PKTSCHEDT_TCQ);
+
+       if (slot >= IFCQ_SC_MAX)
+               return (EINVAL);
+
+       return (tcq_get_class_stats(tif, ifq->ifcq_disc_slots[slot].qid,
+           &ifqs->ifqs_tcq_stats));
+}
+
+static int
+tcq_throttle(struct tcq_if *tif, cqrq_throttle_t *tr)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       struct tcq_class *cl;
+       int err;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+       VERIFY(!(tif->tif_flags & TCQIFF_ALTQ));
+
+       if (!tr->set) {
+               tr->level = tif->tif_throttle;
+               return (0);
+       }
+
+       if (tr->level == tif->tif_throttle)
+               return (EALREADY);
+
+       /* Current throttling levels only involve BK_SYS class */
+       cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
+
+       switch (tr->level) {
+       case IFNET_THROTTLE_OFF:
+               err = tcq_resumeq(tif, cl);
+               break;
+
+       case IFNET_THROTTLE_OPPORTUNISTIC:
+               err = tcq_suspendq(tif, cl);
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       if (err == 0 || err == ENXIO) {
+               if (pktsched_verbose) {
+                       log(LOG_DEBUG, "%s: %s throttling %slevel set %d->%d\n",
+                           if_name(TCQIF_IFP(tif)), tcq_style(tif),
+                           (err == 0) ? "" : "lazy ", tif->tif_throttle,
+                           tr->level);
+               }
+               tif->tif_throttle = tr->level;
+               if (err != 0)
+                       err = 0;
+               else
+                       tcq_purgeq(tif, cl, 0, NULL, NULL);
+       } else {
+               log(LOG_ERR, "%s: %s unable to set throttling level "
+                   "%d->%d [error=%d]\n", if_name(TCQIF_IFP(tif)),
+                   tcq_style(tif), tif->tif_throttle, tr->level, err);
+       }
+
+       return (err);
+}
+
+static int
+tcq_resumeq(struct tcq_if *tif, struct tcq_class *cl)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
+               err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
+
+       if (err == 0)
+               qstate(&cl->cl_q) = QS_RUNNING;
+
+       return (err);
+}
+
+static int
+tcq_suspendq(struct tcq_if *tif, struct tcq_class *cl)
+{
+       struct ifclassq *ifq = tif->tif_ifq;
+       int err = 0;
+
+       IFCQ_LOCK_ASSERT_HELD(ifq);
+
+#if CLASSQ_RIO
+       if (q_is_rio(&cl->cl_q))
+               err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_RIO */
+#if CLASSQ_RED
+       if (q_is_red(&cl->cl_q))
+               err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_RED */
+#if CLASSQ_BLUE
+       if (q_is_blue(&cl->cl_q))
+               err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
+       else
+#endif /* CLASSQ_BLUE */
+       if (q_is_sfb(&cl->cl_q)) {
+               if (cl->cl_sfb != NULL) {
+                       err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
+               } else {
+                       VERIFY(cl->cl_flags & TQCF_LAZY);
+                       err = ENXIO;    /* delayed throttling */
+               }
+       }
+
+       if (err == 0 || err == ENXIO)
+               qstate(&cl->cl_q) = QS_SUSPENDED;
+
+       return (err);
+}
diff --git a/bsd/net/pktsched/pktsched_tcq.h b/bsd/net/pktsched/pktsched_tcq.h
new file mode 100644 (file)
index 0000000..8b85caa
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _NET_PKTSCHED_PKTSCHED_TCQ_H_
+#define        _NET_PKTSCHED_PKTSCHED_TCQ_H_
+
+#ifdef PRIVATE
+#include <net/pktsched/pktsched.h>
+#include <net/classq/classq.h>
+#include <net/classq/classq_red.h>
+#include <net/classq/classq_rio.h>
+#include <net/classq/classq_blue.h>
+#include <net/classq/classq_sfb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        TCQ_MAXPRI      4       /* upper limit of the number of priorities */
+
+/* tcq class flags */
+#define        TQCF_RED                0x0001  /* use RED */
+#define        TQCF_ECN                0x0002  /* use ECN with RED/BLUE/SFB */
+#define        TQCF_RIO                0x0004  /* use RIO */
+#define        TQCF_CLEARDSCP          0x0010  /* clear diffserv codepoint */
+#define        TQCF_BLUE               0x0100  /* use BLUE */
+#define        TQCF_SFB                0x0200  /* use SFB */
+#define        TQCF_FLOWCTL            0x0400  /* enable flow control advisories */
+#define        TQCF_DEFAULTCLASS       0x1000  /* default class */
+#ifdef BSD_KERNEL_PRIVATE
+#define        TQCF_LAZY               0x10000000 /* on-demand resource allocation */
+#endif /* BSD_KERNEL_PRIVATE */
+
+#define        TQCF_USERFLAGS                                                  \
+       (TQCF_RED | TQCF_ECN | TQCF_RIO | TQCF_CLEARDSCP | TQCF_BLUE |  \
+       TQCF_SFB | TQCF_FLOWCTL | TQCF_DEFAULTCLASS)
+
+#ifdef BSD_KERNEL_PRIVATE
+#define        TQCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
+       "\35LAZY"
+#else
+#define        TQCF_BITS \
+       "\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL"
+#endif /* !BSD_KERNEL_PRIVATE */
+
+struct tcq_classstats {
+       u_int32_t               class_handle;
+       u_int32_t               priority;
+
+       u_int32_t               qlength;
+       u_int32_t               qlimit;
+       u_int32_t               period;
+       struct pktcntr          xmitcnt;  /* transmitted packet counter */
+       struct pktcntr          dropcnt;  /* dropped packet counter */
+
+       /* RED, RIO, BLUE, SFB related info */
+       classq_type_t           qtype;
+       union {
+               /* RIO has 3 red stats */
+               struct red_stats        red[RIO_NDROPPREC];
+               struct blue_stats       blue;
+               struct sfb_stats        sfb;
+       };
+       classq_state_t          qstate;
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+struct tcq_class {
+       u_int32_t       cl_handle;      /* class handle */
+       class_queue_t   cl_q;           /* class queue structure */
+       u_int32_t       cl_qflags;      /* class queue flags */
+       union {
+               void            *ptr;
+               struct red      *red;   /* RED state */
+               struct rio      *rio;   /* RIO state */
+               struct blue     *blue;  /* BLUE state */
+               struct sfb      *sfb;   /* SFB state */
+       } cl_qalg;
+       int32_t         cl_pri;         /* priority */
+       u_int32_t       cl_flags;       /* class flags */
+       struct tcq_if   *cl_tif;        /* back pointer to tif */
+
+       /* statistics */
+       u_int32_t       cl_period;      /* backlog period */
+       struct pktcntr  cl_xmitcnt;     /* transmitted packet counter */
+       struct pktcntr  cl_dropcnt;     /* dropped packet counter */
+};
+
+#define        cl_red  cl_qalg.red
+#define        cl_rio  cl_qalg.rio
+#define        cl_blue cl_qalg.blue
+#define        cl_sfb  cl_qalg.sfb
+
+/* tcq_if flags */
+#define        TCQIFF_ALTQ             0x1     /* configured via PF/ALTQ */
+
+/*
+ * tcq interface state
+ */
+struct tcq_if {
+       struct ifclassq         *tif_ifq;       /* backpointer to ifclassq */
+       int                     tif_maxpri;     /* max priority in use */
+       u_int32_t               tif_flags;      /* flags */
+       u_int32_t               tif_throttle;   /* throttling level */
+       struct tcq_class        *tif_default;   /* default class */
+       struct tcq_class        *tif_classes[TCQ_MAXPRI]; /* classes */
+};
+
+#define        TCQIF_IFP(_tif)         ((_tif)->tif_ifq->ifcq_ifp)
+
+struct if_ifclassq_stats;
+
+extern void tcq_init(void);
+extern struct tcq_if *tcq_alloc(struct ifnet *, int, boolean_t);
+extern int tcq_destroy(struct tcq_if *);
+extern void tcq_purge(struct tcq_if *);
+extern void tcq_event(struct tcq_if *, cqev_t);
+extern int tcq_add_queue(struct tcq_if *, int, u_int32_t, int, u_int32_t,
+    struct tcq_class **);
+extern int tcq_remove_queue(struct tcq_if *, u_int32_t);
+extern int tcq_get_class_stats(struct tcq_if *, u_int32_t,
+    struct tcq_classstats *);
+extern int tcq_enqueue(struct tcq_if *, struct tcq_class *, struct mbuf *,
+    struct pf_mtag *);
+extern struct mbuf *tcq_dequeue_tc(struct tcq_if *, mbuf_svc_class_t,
+    cqdq_op_t);
+extern int tcq_setup_ifclassq(struct ifclassq *, u_int32_t);
+extern int tcq_teardown_ifclassq(struct ifclassq *ifq);
+extern int tcq_getqstats_ifclassq(struct ifclassq *, u_int32_t qid,
+    struct if_ifclassq_stats *);
+#endif /* BSD_KERNEL_PRIVATE */
+#ifdef __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* _NET_PKTSCHED_PKTSCHED_TCQ_H_ */
index 1213828f76af53c98a40644d20d2717644adf11d..51c90586ac98f36f8a1d3b74ad0e264eb12e9fd2 100644 (file)
@@ -101,7 +101,6 @@ static char *rn_zeros, *rn_ones;
 
 extern lck_grp_t       *domain_proto_mtx_grp;
 extern lck_attr_t      *domain_proto_mtx_attr;
-lck_mtx_t *rn_mutex;
 
 #define rn_masktop (mask_rnhead->rnh_treetop)
 #undef Bcmp
@@ -1173,6 +1172,4 @@ rn_init(void)
                *cp++ = -1;
        if (rn_inithead((void **)&mask_rnhead, 0) == 0)
                panic("rn_init 2");
-
-       rn_mutex = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr);
 }
index 035c50926f4982006706fb774bbb04cd2de200c2..1284ca8c775229956c1f7030fb458f0b224fa343 100644 (file)
@@ -73,7 +73,8 @@
 
 #include <net/raw_cb.h>
 
-lck_mtx_t      *raw_mtx;       /*### global raw cb mutex for now */
+decl_lck_mtx_data(,raw_mtx_data);      /*### global raw cb mutex for now */
+lck_mtx_t      *raw_mtx = &raw_mtx_data;
 lck_attr_t     *raw_mtx_attr;
 lck_grp_t      *raw_mtx_grp;
 lck_grp_attr_t         *raw_mtx_grp_attr;
@@ -89,10 +90,7 @@ raw_init(void)
 
        raw_mtx_attr = lck_attr_alloc_init();
 
-       if ((raw_mtx = lck_mtx_alloc_init(raw_mtx_grp, raw_mtx_attr)) == NULL) {
-               printf("raw_init: can't alloc raw_mtx\n");
-               return;
-       }
+       lck_mtx_init(raw_mtx, raw_mtx_grp, raw_mtx_attr);
        LIST_INIT(&rawcb_list);
 }
 
index 5ed681a0fa196e03e5a91564bad90c26c115d3da..132768cfe8c03e5dc26ca1885afc30b60748e7d2 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
@@ -60,7 +60,7 @@
  *     @(#)route.c     8.2 (Berkeley) 11/15/93
  * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $
  */
+
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
@@ -204,7 +204,8 @@ struct route_cb route_cb;
 __private_extern__ struct rtstat rtstat  = { 0, 0, 0, 0, 0 };
 struct radix_node_head *rt_tables[AF_MAX+1];
 
-lck_mtx_t              *rnh_lock;      /* global routing tables mutex */
+decl_lck_mtx_data(,rnh_lock_data);     /* global routing tables mutex */
+lck_mtx_t              *rnh_lock = &rnh_lock_data;
 static lck_attr_t      *rnh_lock_attr;
 static lck_grp_t       *rnh_lock_grp;
 static lck_grp_attr_t  *rnh_lock_grp_attr;
@@ -214,7 +215,6 @@ static lck_attr_t   *rte_mtx_attr;
 static lck_grp_t       *rte_mtx_grp;
 static lck_grp_attr_t  *rte_mtx_grp_attr;
 
-lck_mtx_t      *route_domain_mtx;      /*### global routing tables mutex for now */
 int rttrash = 0;               /* routes not in table but not freed */
 
 unsigned int rte_debug;
@@ -337,9 +337,6 @@ struct sockaddr_inifscope {
 #define        sin_scope_id    un._in_index.ifscope
 };
 
-#define        SA(sa)          ((struct sockaddr *)(size_t)(sa))
-#define        SIN(sa)         ((struct sockaddr_in *)(size_t)(sa))
-#define        SIN6(sa)        ((struct sockaddr_in6 *)(size_t)(sa))
 #define        SINIFSCOPE(sa)  ((struct sockaddr_inifscope *)(size_t)(sa))
 #define        SIN6IFSCOPE(sa) SIN6(sa)
 
@@ -398,7 +395,7 @@ static unsigned int primary6_ifscope = IFSCOPE_NONE;
 SYSCTL_DECL(_net_idle_route);
 
 static int rt_if_idle_expire_timeout = RT_IF_IDLE_EXPIRE_TIMEOUT;
-SYSCTL_INT(_net_idle_route, OID_AUTO, expire_timeout, CTLFLAG_RW,
+SYSCTL_INT(_net_idle_route, OID_AUTO, expire_timeout, CTLFLAG_RW|CTLFLAG_LOCKED,
     &rt_if_idle_expire_timeout, 0, "Default expiration time on routes for "
     "interface idle reference counting");
 
@@ -749,11 +746,7 @@ route_init(void)
        rnh_lock_grp_attr = lck_grp_attr_alloc_init();
        rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr);
        rnh_lock_attr = lck_attr_alloc_init();
-       if ((rnh_lock = lck_mtx_alloc_init(rnh_lock_grp,
-           rnh_lock_attr)) == NULL) {
-               printf("route_init: can't alloc rnh_lock\n");
-               return;
-       }
+       lck_mtx_init(rnh_lock, rnh_lock_grp, rnh_lock_attr);
 
        rte_mtx_grp_attr = lck_grp_attr_alloc_init();
        rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr);
@@ -763,7 +756,6 @@ route_init(void)
        rn_init();      /* initialize all zeroes, all ones, mask table */
        lck_mtx_unlock(rnh_lock);
        rtable_init((void **)rt_tables);
-       route_domain_mtx = routedomain.dom_mtx;
 
        if (rte_debug & RTD_DEBUG)
                size = sizeof (struct rtentry_dbg);
@@ -1453,7 +1445,7 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
 #else
        if (dst != NULL && dst->sa_family == AF_INET && ip_doscopedroute)
 #endif /* !INET6 */
-               dst = sa_copy(SA(dst), &dst_ss, NULL);
+               dst = sa_copy(SA((uintptr_t)dst), &dst_ss, NULL);
 
 #if INET6
        if (gw != NULL &&
@@ -1462,7 +1454,7 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
 #else
        if (gw != NULL && gw->sa_family == AF_INET && ip_doscopedroute)
 #endif /* !INET6 */
-               gw = sa_copy(SA(gw), &gw_ss, NULL);
+               gw = sa_copy(SA((uintptr_t)gw), &gw_ss, NULL);
 
        if (!(flags & RTF_GATEWAY)) {
                /*
@@ -1708,6 +1700,14 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
                 */
                rt->rt_flags |= RTF_CONDEMNED;
 
+               /*
+                * Clear RTF_ROUTER if it's set.
+                */
+               if (rt->rt_flags & RTF_ROUTER) {
+                       VERIFY(rt->rt_flags & RTF_HOST);
+                       rt->rt_flags &= ~RTF_ROUTER;
+               }
+
                /*
                 * Now search what's left of the subtree for any cloned
                 * routes which might have been formed from this node.
@@ -1819,10 +1819,13 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
                 * When scoped routing is enabled, cloned entries are
                 * always scoped according to the interface portion of
                 * the parent route.  The exception to this are IPv4
-                * link local addresses.
+                * link local addresses, or those routes that are cloned
+                * from a RTF_PROXY route.  For the latter, the clone
+                * gets to keep the RTF_PROXY flag.
                 */
-               if (af == AF_INET &&
-                   IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) {
+               if ((af == AF_INET &&
+                   IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) ||
+                   (rt->rt_flags & RTF_PROXY)) {
                        ifscope = IFSCOPE_NONE;
                        flags &= ~RTF_IFSCOPE;
                } else {
@@ -1878,11 +1881,12 @@ makeroute:
                 * also add the rt_gwroute if possible.
                 */
                if ((error = rt_setgate(rt, dst, gateway)) != 0) {
+                       int tmp = error;
                        RT_UNLOCK(rt);
                        nstat_route_detach(rt);
                        rte_lock_destroy(rt);
                        rte_free(rt);
-                       senderr(error);
+                       senderr(tmp);
                }
 
                /*
@@ -1949,10 +1953,8 @@ makeroute:
                 * then un-make it (this should be a function)
                 */
                if (rn == NULL) {
-                       if (rt->rt_gwroute) {
-                               rtfree_locked(rt->rt_gwroute);
-                               rt->rt_gwroute = NULL;
-                       }
+                       /* Clear gateway route */
+                       rt_set_gwroute(rt, rt_key(rt), NULL);
                        if (rt->rt_ifa) {
                                IFA_REMREF(rt->rt_ifa);
                                rt->rt_ifa = NULL;
@@ -1978,8 +1980,10 @@ makeroute:
                 */
                if (req == RTM_RESOLVE) {
                        RT_LOCK_SPIN(*ret_nrt);
-                       VERIFY((*ret_nrt)->rt_expire == 0 || (*ret_nrt)->rt_rmx.rmx_expire != 0);
-                       VERIFY((*ret_nrt)->rt_expire != 0 || (*ret_nrt)->rt_rmx.rmx_expire == 0);
+                       VERIFY((*ret_nrt)->rt_expire == 0 ||
+                           (*ret_nrt)->rt_rmx.rmx_expire != 0);
+                       VERIFY((*ret_nrt)->rt_expire != 0 ||
+                           (*ret_nrt)->rt_rmx.rmx_expire == 0);
                        rt->rt_rmx = (*ret_nrt)->rt_rmx;
                        rt_setexpire(rt, (*ret_nrt)->rt_expire);
                        if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
@@ -2029,10 +2033,13 @@ makeroute:
                }
 
                /*
-                * We repeat the same procedure from rt_setgate() here because
-                * it doesn't fire when we call it there because the node
-                * hasn't been added to the tree yet.
+                * We repeat the same procedures from rt_setgate() here
+                * because they weren't completed when we called it earlier,
+                * since the node was embryonic.
                 */
+               if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL)
+                       rt_set_gwroute(rt, rt_key(rt), rt->rt_gwroute);
+
                if (req == RTM_ADD &&
                    !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
                        struct rtfc_arg arg;
@@ -2044,7 +2051,7 @@ makeroute:
                } else {
                        RT_UNLOCK(rt);
                }
-               
+
                nstat_route_new_entry(rt);
                break;
        }
@@ -2053,6 +2060,7 @@ bad:
                IFA_REMREF(ifa);
        return (error);
 }
+#undef senderr
 
 int
 rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway,
@@ -2221,6 +2229,7 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 {
        int dlen = SA_SIZE(dst->sa_len), glen = SA_SIZE(gate->sa_len);
        struct radix_node_head *rnh = rt_tables[dst->sa_family];
+       boolean_t loop = FALSE;
 
        lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
        RT_LOCK_ASSERT_HELD(rt);
@@ -2235,14 +2244,39 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
        /* Add an extra ref for ourselves */
        RT_ADDREF_LOCKED(rt);
 
+       if (rt->rt_flags & RTF_GATEWAY) {
+               if ((dst->sa_len == gate->sa_len) &&
+                   (dst->sa_family == AF_INET || dst->sa_family == AF_INET6)) {
+                       struct sockaddr_storage dst_ss, gate_ss;
+
+                       (void) sa_copy(dst, &dst_ss, NULL);
+                       (void) sa_copy(gate, &gate_ss, NULL);
+
+                       loop = equal(SA(&dst_ss), SA(&gate_ss));
+               } else {
+                       loop = (dst->sa_len == gate->sa_len &&
+                           equal(dst, gate));
+               }
+       }
+
+       /*
+        * A (cloning) network route with the destination equal to the gateway
+        * will create an endless loop (see notes below), so disallow it.
+        */
+       if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
+           RTF_GATEWAY) && loop) {
+               /* Release extra ref */
+               RT_REMREF_LOCKED(rt);
+               return (EADDRNOTAVAIL);
+       }
+
        /*
         * A host route with the destination equal to the gateway
         * will interfere with keeping LLINFO in the routing
         * table, so disallow it.
         */
        if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
-           (RTF_HOST|RTF_GATEWAY)) && (dst->sa_len == gate->sa_len) &&
-           (bcmp(dst, gate, dst->sa_len) == 0)) {
+           (RTF_HOST|RTF_GATEWAY)) && loop) {
                /*
                 * The route might already exist if this is an RTM_CHANGE
                 * or a routing redirect, so try to delete it.
@@ -2279,8 +2313,12 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
                        ifscope = IFSCOPE_NONE;
 
                RT_UNLOCK(rt);
-               gwrt = rtalloc1_scoped_locked(gate, 1,
-                   RTF_CLONING | RTF_PRCLONING, ifscope);
+               /*
+                * Don't ignore RTF_CLONING, since we prefer that rt_gwroute
+                * points to a clone rather than a cloning route; see above
+                * check for cloning loop avoidance (dst == gate).
+                */
+               gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope);
                if (gwrt != NULL)
                        RT_LOCK_ASSERT_NOTHELD(gwrt);
                RT_LOCK(rt);
@@ -2330,9 +2368,8 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
                        return (EBUSY);
                }
 
-               if (rt->rt_gwroute != NULL)
-                       rtfree_locked(rt->rt_gwroute);
-               rt->rt_gwroute = gwrt;
+               /* Set gateway route; callee adds ref to gwrt if non-NULL */
+               rt_set_gwroute(rt, dst, gwrt);
 
                /*
                 * In case the (non-scoped) default route gets modified via
@@ -2356,8 +2393,14 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
                if ((dst->sa_family == AF_INET) &&
                    gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK &&
                    (gwrt->rt_ifp->if_index == get_primary_ifscope(AF_INET) ||
-                   get_primary_ifscope(AF_INET) == IFSCOPE_NONE))
-                       kdp_set_gateway_mac(SDL(gwrt->rt_gateway)->sdl_data);
+                   get_primary_ifscope(AF_INET) == IFSCOPE_NONE)) {
+                       kdp_set_gateway_mac(SDL((void *)gwrt->rt_gateway)->
+                           sdl_data);
+               }
+
+               /* Release extra ref from rtalloc1() */
+               if (gwrt != NULL)
+                       RT_REMREF(gwrt);
        }
 
        /*
@@ -2373,9 +2416,8 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
                /* The underlying allocation is done with M_WAITOK set */
                R_Malloc(new, caddr_t, dlen + glen);
                if (new == NULL) {
-                       if (rt->rt_gwroute != NULL)
-                               rtfree_locked(rt->rt_gwroute);
-                       rt->rt_gwroute = NULL;
+                       /* Clear gateway route */
+                       rt_set_gwroute(rt, dst, NULL);
                        /* Release extra ref */
                        RT_REMREF_LOCKED(rt);
                        return (ENOBUFS);
@@ -2436,6 +2478,60 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 
 #undef SA_SIZE
 
+void
+rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt)
+{
+       boolean_t gwrt_isrouter;
+
+       lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
+       RT_LOCK_ASSERT_HELD(rt);
+
+       if (gwrt != NULL)
+               RT_ADDREF(gwrt);        /* for this routine */
+
+       /*
+        * Get rid of existing gateway route; if rt_gwroute is already
+        * set to gwrt, this is slightly redundant (though safe since
+        * we held an extra ref above) but makes the code simpler.
+        */
+       if (rt->rt_gwroute != NULL) {
+               struct rtentry *ogwrt = rt->rt_gwroute;
+
+               VERIFY(rt != ogwrt);    /* sanity check */
+               rt->rt_gwroute = NULL;
+               RT_UNLOCK(rt);
+               rtfree_locked(ogwrt);
+               RT_LOCK(rt);
+               VERIFY(rt->rt_gwroute == NULL);
+       }
+
+       /*
+        * And associate the new gateway route.
+        */
+       if ((rt->rt_gwroute = gwrt) != NULL) {
+               RT_ADDREF(gwrt);        /* for rt */
+
+               if (rt->rt_flags & RTF_WASCLONED) {
+                       /* rt_parent might be NULL if rt is embryonic */
+                       gwrt_isrouter = (rt->rt_parent != NULL &&
+                           SA_DEFAULT(rt_key(rt->rt_parent)) &&
+                           !RT_HOST(rt->rt_parent));
+               } else {
+                       gwrt_isrouter = (SA_DEFAULT(dst) && !RT_HOST(rt));
+               }
+
+               /* If gwrt points to a default router, mark it accordingly */
+               if (gwrt_isrouter && RT_HOST(gwrt) &&
+                   !(gwrt->rt_flags & RTF_ROUTER)) {
+                       RT_LOCK(gwrt);
+                       gwrt->rt_flags |= RTF_ROUTER;
+                       RT_UNLOCK(gwrt);
+               }
+
+               RT_REMREF(gwrt);        /* for this routine */
+       }
+}
+
 static void
 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
              struct sockaddr *netmask)
@@ -2706,7 +2802,7 @@ rt_validate(struct rtentry *rt)
 {
        RT_LOCK_ASSERT_HELD(rt);
 
-       if (!(rt->rt_flags & RTF_CONDEMNED)) {
+       if ((rt->rt_flags & (RTF_UP | RTF_CONDEMNED)) == RTF_UP) {
                int af = rt_key(rt)->sa_family;
 
                if (af == AF_INET)
@@ -2970,6 +3066,34 @@ rt_clear_idleref(struct rtentry *rt)
        }
 }
 
+void
+rt_set_proxy(struct rtentry *rt, boolean_t set)
+{
+       lck_mtx_lock(rnh_lock);
+       RT_LOCK(rt);
+       /*
+        * Search for any cloned routes which might have
+        * been formed from this node, and delete them.
+        */
+       if (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
+               struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family];
+
+               if (set)
+                       rt->rt_flags |= RTF_PROXY;
+               else
+                       rt->rt_flags &= ~RTF_PROXY;
+
+               RT_UNLOCK(rt);
+               if (rnh != NULL && rt_mask(rt)) {
+                       rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
+                           rt_fixdelete, rt);
+               }
+       } else {
+               RT_UNLOCK(rt);
+       }
+       lck_mtx_unlock(rnh_lock);
+}
+
 static void
 rte_lock_init(struct rtentry *rt)
 {
@@ -3189,3 +3313,255 @@ route_copyin(
        /* This function consumes the reference */
        src->ro_rt = NULL;
 }
+
+/*
+ * route_to_gwroute will find the gateway route for a given route.
+ *
+ * If the route is down, look the route up again.
+ * If the route goes through a gateway, get the route to the gateway.
+ * If the gateway route is down, look it up again.
+ * If the route is set to reject, verify it hasn't expired.
+ *
+ * If the returned route is non-NULL, the caller is responsible for
+ * releasing the reference and unlocking the route.
+ */
+#define senderr(e) { error = (e); goto bad; }
+errno_t
+route_to_gwroute(const struct sockaddr *net_dest, struct rtentry *hint0,
+     struct rtentry **out_route)
+{
+       uint64_t timenow;
+       struct rtentry *rt = hint0, *hint = hint0;
+       errno_t error = 0;
+       unsigned int ifindex;
+       boolean_t gwroute;
+
+       *out_route = NULL;
+
+       if (rt == NULL)
+               return (0);
+
+       /*
+        * Next hop determination.  Because we may involve the gateway route
+        * in addition to the original route, locking is rather complicated.
+        * The general concept is that regardless of whether the route points
+        * to the original route or to the gateway route, this routine takes
+        * an extra reference on such a route.  This extra reference will be
+        * released at the end.
+        *
+        * Care must be taken to ensure that the "hint0" route never gets freed
+        * via rtfree(), since the caller may have stored it inside a struct
+        * route with a reference held for that placeholder.
+        */
+       RT_LOCK_SPIN(rt);
+       ifindex = rt->rt_ifp->if_index;
+       RT_ADDREF_LOCKED(rt);
+       if (!(rt->rt_flags & RTF_UP)) {
+               RT_REMREF_LOCKED(rt);
+               RT_UNLOCK(rt);
+               /* route is down, find a new one */
+               hint = rt = rtalloc1_scoped((struct sockaddr *)
+                   (size_t)net_dest, 1, 0, ifindex);
+               if (hint != NULL) {
+                       RT_LOCK_SPIN(rt);
+                       ifindex = rt->rt_ifp->if_index;
+               } else {
+                       senderr(EHOSTUNREACH);
+               }
+       }
+
+       /*
+        * We have a reference to "rt" by now; it will either
+        * be released or freed at the end of this routine.
+        */
+       RT_LOCK_ASSERT_HELD(rt);
+       if ((gwroute = (rt->rt_flags & RTF_GATEWAY))) {
+               struct rtentry *gwrt = rt->rt_gwroute;
+               struct sockaddr_storage ss;
+               struct sockaddr *gw = (struct sockaddr *)&ss;
+
+               VERIFY(rt == hint);
+               RT_ADDREF_LOCKED(hint);
+
+               /* If there's no gateway rt, look it up */
+               if (gwrt == NULL) {
+                       bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
+                           rt->rt_gateway->sa_len));
+                       RT_UNLOCK(rt);
+                       goto lookup;
+               }
+               /* Become a regular mutex */
+               RT_CONVERT_LOCK(rt);
+
+               /*
+                * Take gwrt's lock while holding route's lock;
+                * this is okay since gwrt never points back
+                * to "rt", so no lock ordering issues.
+                */
+               RT_LOCK_SPIN(gwrt);
+               if (!(gwrt->rt_flags & RTF_UP)) {
+                       rt->rt_gwroute = NULL;
+                       RT_UNLOCK(gwrt);
+                       bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
+                           rt->rt_gateway->sa_len));
+                       RT_UNLOCK(rt);
+                       rtfree(gwrt);
+lookup:
+                       lck_mtx_lock(rnh_lock);
+                       gwrt = rtalloc1_scoped_locked(gw, 1, 0, ifindex);
+
+                       RT_LOCK(rt);
+                       /*
+                        * Bail out if the route is down, no route
+                        * to gateway, circular route, or if the
+                        * gateway portion of "rt" has changed.
+                        */
+                       if (!(rt->rt_flags & RTF_UP) || gwrt == NULL ||
+                           gwrt == rt || !equal(gw, rt->rt_gateway)) {
+                               if (gwrt == rt) {
+                                       RT_REMREF_LOCKED(gwrt);
+                                       gwrt = NULL;
+                               }
+                               VERIFY(rt == hint);
+                               RT_REMREF_LOCKED(hint);
+                               hint = NULL;
+                               RT_UNLOCK(rt);
+                               if (gwrt != NULL)
+                                       rtfree_locked(gwrt);
+                               lck_mtx_unlock(rnh_lock);
+                               senderr(EHOSTUNREACH);
+                       }
+                       VERIFY(gwrt != NULL);
+                       /*
+                        * Set gateway route; callee adds ref to gwrt;
+                        * gwrt has an extra ref from rtalloc1() for
+                        * this routine.
+                        */
+                       rt_set_gwroute(rt, rt_key(rt), gwrt);
+                       VERIFY(rt == hint);
+                       RT_REMREF_LOCKED(rt);   /* hint still holds a refcnt */
+                       RT_UNLOCK(rt);
+                       lck_mtx_unlock(rnh_lock);
+                       rt = gwrt;
+               } else {
+                       RT_ADDREF_LOCKED(gwrt);
+                       RT_UNLOCK(gwrt);
+                       VERIFY(rt == hint);
+                       RT_REMREF_LOCKED(rt);   /* hint still holds a refcnt */
+                       RT_UNLOCK(rt);
+                       rt = gwrt;
+               }
+               VERIFY(rt == gwrt && rt != hint);
+
+               /*
+                * This is an opportunity to revalidate the parent route's
+                * rt_gwroute, in case it now points to a dead route entry.
+                * Parent route won't go away since the clone (hint) holds
+                * a reference to it.  rt == gwrt.
+                */
+               RT_LOCK_SPIN(hint);
+               if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
+                   (RTF_WASCLONED | RTF_UP)) {
+                       struct rtentry *prt = hint->rt_parent;
+                       VERIFY(prt != NULL);
+
+                       RT_CONVERT_LOCK(hint);
+                       RT_ADDREF(prt);
+                       RT_UNLOCK(hint);
+                       rt_revalidate_gwroute(prt, rt);
+                       RT_REMREF(prt);
+               } else {
+                       RT_UNLOCK(hint);
+               }
+
+               /* Clean up "hint" now; see notes above regarding hint0 */
+               if (hint == hint0)
+                       RT_REMREF(hint);
+               else
+                       rtfree(hint);
+               hint = NULL;
+
+               /* rt == gwrt; if it is now down, give up */
+               RT_LOCK_SPIN(rt);
+               if (!(rt->rt_flags & RTF_UP)) {
+                       RT_UNLOCK(rt);
+                       senderr(EHOSTUNREACH);
+               }
+       }
+
+       if (rt->rt_flags & RTF_REJECT) {
+               VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+               VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+               timenow = net_uptime();
+               if (rt->rt_expire == 0 || timenow < rt->rt_expire) {
+                       RT_UNLOCK(rt);
+                       senderr(!gwroute ? EHOSTDOWN : EHOSTUNREACH);
+               }
+       }
+
+       /* Become a regular mutex */
+       RT_CONVERT_LOCK(rt);
+
+       /* Caller is responsible for cleaning up "rt" */
+       *out_route = rt;
+       return (0);
+
+bad:
+       /* Clean up route (either it is "rt" or "gwrt") */
+       if (rt != NULL) {
+               RT_LOCK_SPIN(rt);
+               if (rt == hint0) {
+                       RT_REMREF_LOCKED(rt);
+                       RT_UNLOCK(rt);
+               } else {
+                       RT_UNLOCK(rt);
+                       rtfree(rt);
+               }
+       }
+       return (error);
+}
+#undef senderr
+
+void
+rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt)
+{
+       VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING));
+       VERIFY(gwrt != NULL);
+
+       RT_LOCK_SPIN(rt);
+       if ((rt->rt_flags & (RTF_GATEWAY | RTF_UP)) == (RTF_GATEWAY | RTF_UP) &&
+           rt->rt_ifp == gwrt->rt_ifp && rt->rt_gateway->sa_family ==
+           rt_key(gwrt)->sa_family && (rt->rt_gwroute == NULL ||
+           !(rt->rt_gwroute->rt_flags & RTF_UP))) {
+               boolean_t isequal;
+
+               if (rt->rt_gateway->sa_family == AF_INET ||
+                   rt->rt_gateway->sa_family == AF_INET6) {
+                       struct sockaddr_storage key_ss, gw_ss;
+                       /*
+                        * We need to compare rt_key and rt_gateway; create
+                        * local copies to get rid of any ifscope association.
+                        */
+                       (void) sa_copy(rt_key(gwrt), &key_ss, NULL);
+                       (void) sa_copy(rt->rt_gateway, &gw_ss, NULL);
+
+                       isequal = equal(SA(&key_ss), SA(&gw_ss));
+               } else {
+                       isequal = equal(rt_key(gwrt), rt->rt_gateway);
+               }
+
+               /* If they are the same, update gwrt */
+               if (isequal) {
+                       RT_UNLOCK(rt);
+                       lck_mtx_lock(rnh_lock);
+                       RT_LOCK(rt);
+                       rt_set_gwroute(rt, rt_key(rt), gwrt);
+                       RT_UNLOCK(rt);
+                       lck_mtx_unlock(rnh_lock);
+               } else {
+                       RT_UNLOCK(rt);
+               }
+       } else {
+               RT_UNLOCK(rt);
+       }
+}
index 47aa3f9023359e459034e2114235cd4f0852dac9..c5fe155d6f01b15969f6d2a74998d7f8bd03a448 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -103,6 +103,9 @@ struct rt_reach_info {
        u_int32_t               ri_probes;      /* total # of probes */
        u_int64_t               ri_snd_expire;  /* transmit expiration (calendar) time */
        u_int64_t               ri_rcv_expire;  /* receive expiration (calendar) time */
+       int32_t                 ri_rssi;        /* received signal strength */
+       int32_t                 ri_lqm;         /* link quality metric */
+       int32_t                 ri_npm;         /* node proximity metric */
 };
 #else
 struct route;
@@ -154,13 +157,14 @@ struct rt_metrics {
 #ifndef RNF_NORMAL
 #include <net/radix.h>
 #endif
+struct ifnet_llreach_info;     /* forward declaration */
 /*
  * Kernel routing entry structure (private).
  */
 struct rtentry {
        struct  radix_node rt_nodes[2]; /* tree glue, and other values */
-#define        rt_key(r)       ((struct sockaddr *)((r)->rt_nodes->rn_key))
-#define        rt_mask(r)      ((struct sockaddr *)((r)->rt_nodes->rn_mask))
+#define        rt_key(r)       ((struct sockaddr *)(void *)((r)->rt_nodes->rn_key))
+#define        rt_mask(r)      ((struct sockaddr *)(void *)((r)->rt_nodes->rn_mask))
        struct  sockaddr *rt_gateway;   /* value */
        int32_t rt_refcnt;              /* # held references */
        uint32_t rt_flags;              /* up/down?, host/net */
@@ -170,6 +174,8 @@ struct rtentry {
        void    *rt_llinfo;             /* pointer to link level info cache */
        void    (*rt_llinfo_get_ri)     /* llinfo get reachability info fn */
            (struct rtentry *, struct rt_reach_info *);
+       void    (*rt_llinfo_get_iflri)  /* ifnet llinfo get reach. info fn */
+           (struct rtentry *, struct ifnet_llreach_info *);
        void    (*rt_llinfo_purge)(struct rtentry *); /* llinfo purge fn */
        void    (*rt_llinfo_free)(void *); /* link level info free function */
        struct  rt_metrics rt_rmx;      /* metrics used by rx'ing protocols */
@@ -222,7 +228,9 @@ extern void rt_setexpire(struct rtentry *, uint64_t);
 #define RTF_IFSCOPE    0x1000000       /* has valid interface scope */
 #define RTF_CONDEMNED  0x2000000       /* defunct; no longer modifiable */
 #define RTF_IFREF      0x4000000       /* route holds a ref to interface */
-                                       /* 0x8000000 and up unassigned */
+#define        RTF_PROXY       0x8000000       /* proxying, no interface scope */
+#define        RTF_ROUTER      0x10000000      /* host is a router */
+                                       /* 0x20000000 and up unassigned */
 
 /*
  * Routing statistics.
@@ -535,6 +543,12 @@ extern void rt_set_idleref(struct rtentry *);
 extern void rt_clear_idleref(struct rtentry *);
 extern void rt_aggdrain(int);
 extern boolean_t rt_validate(struct rtentry *);
+extern void rt_set_proxy(struct rtentry *, boolean_t);
+extern void rt_set_gwroute(struct rtentry *, struct sockaddr *,
+    struct rtentry *);
+extern void rt_revalidate_gwroute(struct rtentry *, struct rtentry *);
+extern errno_t route_to_gwroute(const struct sockaddr *, struct rtentry *,
+    struct rtentry **);
 
 #ifdef XNU_KERNEL_PRIVATE
 extern void route_copyin(struct route *src, struct route *dst, size_t length);
index 42b20064a18dd375084c5b8eefc5e03fde7f3e7d..d8a1b60b26e58eca555f664ae50d2dacc41ecc91 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -118,14 +118,17 @@ static void       rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
                    struct sockaddr *, unsigned int);
 static void rt_drainall(void);
 
+#ifndef SIN
 #define        SIN(sa)         ((struct sockaddr_in *)(size_t)(sa))
+#endif
 
-
-SYSCTL_NODE(_net, OID_AUTO, idle, CTLFLAG_RW, 0, "idle network monitoring");
+SYSCTL_NODE(_net, OID_AUTO, idle, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+    "idle network monitoring");
 
 static struct timeval last_ts;
 
-SYSCTL_NODE(_net_idle, OID_AUTO, route, CTLFLAG_RW, 0, "idle route monitoring");
+SYSCTL_NODE(_net_idle, OID_AUTO, route, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+    "idle route monitoring");
 
 static int rt_if_idle_drain_interval = RT_IF_IDLE_DRAIN_INTERVAL;
 SYSCTL_INT(_net_idle_route, OID_AUTO, drain_interval, CTLFLAG_RW,
@@ -330,7 +333,7 @@ route_output(struct mbuf *m, struct socket *so)
        int sendonlytoself = 0;
        unsigned int ifscope = IFSCOPE_NONE;
 
-#define senderr(e) { error = e; goto flush;}
+#define senderr(e) { error = (e); goto flush;}
        if (m == NULL ||
            ((m->m_len < sizeof(intptr_t)) && (m = m_pullup(m, sizeof(intptr_t))) == 0))
                return (ENOBUFS);
@@ -433,6 +436,12 @@ route_output(struct mbuf *m, struct socket *so)
                ifscope = rtm->rtm_index;
        }
 
+       /*
+        * RTF_PROXY can only be set internally from within the kernel.
+        */
+       if (rtm->rtm_flags & RTF_PROXY)
+               senderr(EINVAL);
+
        /*
         * For AF_INET, always zero out the embedded scope ID.  If this is
         * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
@@ -464,7 +473,7 @@ route_output(struct mbuf *m, struct socket *so)
  * confusing the routing table with a wrong route to the previous default gateway
  */
 {
-#define satosinaddr(sa) (((struct sockaddr_in *)sa)->sin_addr.s_addr)
+#define satosinaddr(sa) (((struct sockaddr_in *)(void *)sa)->sin_addr.s_addr)
        
                        if (check_routeselfref && (info.rti_info[RTAX_DST] && info.rti_info[RTAX_DST]->sa_family == AF_INET) && 
                                (info.rti_info[RTAX_NETMASK] && satosinaddr(info.rti_info[RTAX_NETMASK]) == INADDR_BROADCAST) &&
@@ -620,8 +629,9 @@ route_output(struct mbuf *m, struct socket *so)
                                case RTM_CHANGE:
                                        if (info.rti_info[RTAX_GATEWAY] && (error = rt_setgate(rt,
                                            rt_key(rt), info.rti_info[RTAX_GATEWAY]))) {
+                                               int tmp = error;
                                                RT_UNLOCK(rt);
-                                               senderr(error);
+                                               senderr(tmp);
                                        }
                                        /*
                                         * If they tried to change things but didn't specify
@@ -1162,8 +1172,7 @@ again:
                        if (rw->w_tmemsize < len) {
                                if (rw->w_tmem)
                                        FREE(rw->w_tmem, M_RTABLE);
-                               rw->w_tmem = (caddr_t)
-                                       _MALLOC(len, M_RTABLE, M_WAITOK); /*###LD0412 was NOWAIT */
+                               rw->w_tmem = _MALLOC(len, M_RTABLE, M_WAITOK);
                                if (rw->w_tmem)
                                        rw->w_tmemsize = len;
                        }
@@ -1175,7 +1184,7 @@ again:
                }
        }
        if (cp) {
-               struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+               struct rt_msghdr *rtm = (struct rt_msghdr *)(void *)cp0;
 
                rtm->rtm_version = RTM_VERSION;
                rtm->rtm_type = type;
@@ -1392,7 +1401,8 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
        if (w->w_op != NET_RT_DUMP2) {
                size = rt_msg2(RTM_GET, &info, 0, w);
                if (w->w_req && w->w_tmem) {
-                       struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
+                       struct rt_msghdr *rtm =
+                           (struct rt_msghdr *)(void *)w->w_tmem;
 
                        rtm->rtm_flags = rt->rt_flags;
                        rtm->rtm_use = rt->rt_use;
@@ -1409,7 +1419,8 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
        } else {
                size = rt_msg2(RTM_GET2, &info, 0, w);
                if (w->w_req && w->w_tmem) {
-                       struct rt_msghdr2 *rtm = (struct rt_msghdr2 *)w->w_tmem;
+                       struct rt_msghdr2 *rtm =
+                           (struct rt_msghdr2 *)(void *)w->w_tmem;
 
                        rtm->rtm_flags = rt->rt_flags;
                        rtm->rtm_use = rt->rt_use;
@@ -1455,7 +1466,8 @@ sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
        
        size = rt_msg2(RTM_GET_EXT, &info, 0, w);
        if (w->w_req && w->w_tmem) {
-               struct rt_msghdr_ext *ertm = (struct rt_msghdr_ext *)w->w_tmem;
+               struct rt_msghdr_ext *ertm =
+                   (struct rt_msghdr_ext *)(void *)w->w_tmem;
 
                ertm->rtm_flags = rt->rt_flags;
                ertm->rtm_use = rt->rt_use;
@@ -1465,8 +1477,12 @@ sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
                ertm->rtm_seq = 0;
                ertm->rtm_errno = 0;
                ertm->rtm_addrs = info.rti_addrs;
-               if (rt->rt_llinfo_get_ri == NULL)
+               if (rt->rt_llinfo_get_ri == NULL) {
                        bzero(&ertm->rtm_ri, sizeof (ertm->rtm_ri));
+                       ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
+                       ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
+                       ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
+               }
                else
                        rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
 
@@ -1538,7 +1554,7 @@ sysctl_iflist(int af, struct walkarg *w)
                                len = rt_msg2(RTM_IFINFO, &info, (caddr_t)cp, NULL);
                                info.rti_info[RTAX_IFP] = NULL;
        
-                               ifm = (struct if_msghdr *)cp;
+                               ifm = (struct if_msghdr *)(void *)cp;
                                ifm->ifm_index = ifp->if_index;
                                ifm->ifm_flags = (u_short)ifp->if_flags;
                                if_data_internal_to_if_data(ifp, &ifp->if_data,
@@ -1573,7 +1589,7 @@ sysctl_iflist(int af, struct walkarg *w)
                                        }
                                        len = rt_msg2(RTM_NEWADDR, &info, (caddr_t)cp, NULL);
        
-                                       ifam = (struct ifa_msghdr *)cp;
+                                       ifam = (struct ifa_msghdr *)(void *)cp;
                                        ifam->ifam_index = ifa->ifa_ifp->if_index;
                                        ifam->ifam_flags = ifa->ifa_flags;
                                        ifam->ifam_metric = ifa->ifa_metric;
@@ -1667,13 +1683,14 @@ sysctl_iflist2(int af, struct walkarg *w)
                                len = rt_msg2(RTM_IFINFO2, &info, (caddr_t)cp, NULL);
                                info.rti_info[RTAX_IFP] = NULL;
                
-                               ifm = (struct if_msghdr2 *)cp;
+                               ifm = (struct if_msghdr2 *)(void *)cp;
                                ifm->ifm_addrs = info.rti_addrs;
                                ifm->ifm_flags = (u_short)ifp->if_flags;
                                ifm->ifm_index = ifp->if_index;
-                               ifm->ifm_snd_len = ifp->if_snd.ifq_len;
-                               ifm->ifm_snd_maxlen = ifp->if_snd.ifq_maxlen;
-                               ifm->ifm_snd_drops = ifp->if_snd.ifq_drops;
+                               ifm->ifm_snd_len = IFCQ_LEN(&ifp->if_snd);
+                               ifm->ifm_snd_maxlen = IFCQ_MAXLEN(&ifp->if_snd);
+                               ifm->ifm_snd_drops =
+                                   ifp->if_snd.ifcq_dropcnt.packets;
                                ifm->ifm_timer = ifp->if_timer;
                                if_data_internal_to_if_data64(ifp, &ifp->if_data,
                                        &ifm->ifm_data);
@@ -1706,7 +1723,7 @@ sysctl_iflist2(int af, struct walkarg *w)
                                        }
                                        len = rt_msg2(RTM_NEWADDR, &info, (caddr_t)cp, 0);
 
-                                       ifam = (struct ifa_msghdr *)cp;
+                                       ifam = (struct ifa_msghdr *)(void *)cp;
                                        ifam->ifam_index = ifa->ifa_ifp->if_index;
                                        ifam->ifam_flags = ifa->ifa_flags;
                                        ifam->ifam_metric = ifa->ifa_metric;
@@ -1761,7 +1778,7 @@ sysctl_iflist2(int af, struct walkarg *w)
                                                }
                                                len = rt_msg2(RTM_NEWMADDR2, &info, (caddr_t)cp, 0);
 
-                                               ifmam = (struct ifma_msghdr2 *)cp;
+                                               ifmam = (struct ifma_msghdr2 *)(void *)cp;
                                                ifmam->ifmam_addrs = info.rti_addrs;
                                                ifmam->ifmam_flags = 0;
                                                ifmam->ifmam_index =
index 01ba010ca7065628af9374a6bd9ab40131a61714..6add8260f3ca8951486cdc0dd459b1c1308ab219 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -84,8 +84,6 @@ short appletalk_inited = 0;
 void atalk_load(void);
 void atalk_unload(void);
 
-extern lck_mtx_t *domain_proto_mtx;
-
 extern int pktsIn, pktsOut;
 
 
@@ -99,9 +97,9 @@ void atalk_load()
                for 2225395
                this happens in adsp_open and is undone on ADSP_UNLINK 
 */
-       lck_mtx_unlock(domain_proto_mtx);
+       domain_proto_mtx_unlock(TRUE);
        proto_register_input(PF_APPLETALK, at_input_packet, NULL, 0);
-       lck_mtx_lock(domain_proto_mtx);
+       domain_proto_mtx_lock();
 } /* atalk_load */
 
 /* Undo everything atalk_load() did. */
@@ -190,7 +188,7 @@ int pat_output(patp, mlist, dst_addr, type)
                                (m->m_next)->m_len);
 #endif
                atalk_unlock();
-               dlil_output(patp->aa_ifp, PF_APPLETALK, m, NULL, &dst, 0);
+               dlil_output(patp->aa_ifp, PF_APPLETALK, m, NULL, &dst, 0, NULL);
                atalk_lock();
 
                pktsOut++;
index 91973125c52bf2a0bfff4920d6e4d6dcd67c88c4..3a98cd8d61f4a1d2c54ac2f46a32a4bdf515a11b 100644 (file)
@@ -19,7 +19,6 @@ DATAFILES = \
        bootp.h icmp6.h if_ether.h icmp_var.h \
        igmp.h igmp_var.h in.h in_pcb.h \
        in_systm.h in_var.h ip.h ip6.h \
-       ip_fw.h ip_fw2.h \
        ip_icmp.h ip_mroute.h ip_var.h tcp.h \
        tcp_fsm.h tcp_seq.h tcp_timer.h tcp_var.h \
        tcpip.h udp.h udp_var.h
@@ -29,8 +28,10 @@ KERNELFILES = \
 
 PRIVATE_DATAFILES = \
        ip_dummynet.h \
+       ip_flowid.h \
+       ip_fw.h ip_fw2.h \
        tcp_debug.h \
-       in_gif.h ip_compat.h
+       in_gif.h ip_compat.h 
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
        ip_ecn.h ip_encap.h
index aab7c4ffd52b7047dc79d49bee2710f3e8042a95..841d3b390d5d38c4e80e530439514573c0a11af7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000,2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000,2008-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define _NETINET_ICMP6_H_
 #include <sys/appleapiopts.h>
 
-#ifdef XNU_KERNEL_PRIVATE
-#include <sys/mcache.h>
-#endif
-
 #define ICMPV6_PLD_MAXLEN      1232    /* IPV6_MMTU - sizeof(struct ip6_hdr)
                                           - sizeof(struct icmp6_hdr) */
 
@@ -333,6 +329,7 @@ struct nd_opt_hdr {         /* Neighbor discovery option header */
 #define ND_OPT_REDIRECTED_HEADER       4
 #define ND_OPT_MTU                     5
 #define ND_OPT_RDNSS                   25      /* RFC 5006 */
+#define ND_OPT_DNSSL                   31      /* RFC 6106 */
 
 #define ND_OPT_ROUTE_INFO              200     /* draft-ietf-ipngwg-router-preference, not officially assigned yet */
 
@@ -382,6 +379,14 @@ struct nd_opt_rdnss {      /* recursive domain name system servers */
     struct in6_addr    nd_opt_rdnss_addr[1];
 } __attribute__((__packed__));
 
+struct nd_opt_dnssl {  /* domain name search list */
+    u_int8_t           nd_opt_dnssl_type;
+    u_int8_t           nd_opt_dnssl_len;
+    u_int16_t          nd_opt_dnssl_reserved;
+    u_int32_t          nd_opt_dnssl_lifetime;
+    u_int8_t           nd_opt_dnssl_domains[8];
+} __attribute__((__packed__));
+
 /*
  * icmp6 namelookup
  */
@@ -648,26 +653,27 @@ struct icmp6stat {
 #if 0  /*obsoleted*/
 #define ICMPV6CTL_ERRRATELIMIT         5       /* ICMPv6 error rate limitation */
 #endif
-#define ICMPV6CTL_ND6_PRUNE                    6
-#define ICMPV6CTL_ND6_DELAY                    8
+#define ICMPV6CTL_ND6_PRUNE            6
+#define ICMPV6CTL_ND6_DELAY            8
 #define ICMPV6CTL_ND6_UMAXTRIES                9
 #define ICMPV6CTL_ND6_MMAXTRIES                10
 #define ICMPV6CTL_ND6_USELOOPBACK      11
 /*#define ICMPV6CTL_ND6_PROXYALL       12      obsoleted, do not reuse here */
-#define ICMPV6CTL_NODEINFO                     13
+#define ICMPV6CTL_NODEINFO             13
 #define ICMPV6CTL_ERRPPSLIMIT          14      /* ICMPv6 error pps limitation */
 #define ICMPV6CTL_ND6_MAXNUDHINT       15
 #define ICMPV6CTL_MTUDISC_HIWAT                16
 #define ICMPV6CTL_MTUDISC_LOWAT                17
-#define ICMPV6CTL_ND6_DEBUG                    18
+#define ICMPV6CTL_ND6_DEBUG            18
 #define ICMPV6CTL_ND6_DRLIST           19
 #define ICMPV6CTL_ND6_PRLIST           20
 #define ICMPV6CTL_MLD_MAXSRCFILTER     21
 #define ICMPV6CTL_MLD_SOMAXSRC         22
 #define ICMPV6CTL_MLD_VERSION          23
 #define ICMPV6CTL_ND6_MAXQLEN          24
-#define        ICMPV6CTL_ND6_ACCEPT_6TO4       25
-#define ICMPV6CTL_MAXID                                26
+#define ICMPV6CTL_ND6_ACCEPT_6TO4      25
+#define ICMPV6CTL_ND6_OPTIMISTIC_DAD   26      /* RFC 4429 */
+#define ICMPV6CTL_MAXID                        27
 
 #ifdef KERNEL_PRIVATE
 #define ICMPV6CTL_NAMES { \
@@ -697,6 +703,7 @@ struct icmp6stat {
        { 0, 0 }, \
        { 0, 0 }, \
        { "nd6_accept_6to4", CTLTYPE_INT }, \
+       { "nd6_optimistic_dad", CTLTYPE_INT }, \
 }
 
 #define RTF_PROBEMTU   RTF_PROTO1
@@ -722,14 +729,14 @@ void      icmp6_mtudisc_update(struct ip6ctlparam *, int);
 extern lck_rw_t icmp6_ifs_rwlock;
 /* XXX: is this the right place for these macros? */
 #define icmp6_ifstat_inc(ifp, tag) \
-do {                                                                   \
-       lck_rw_lock_shared(&icmp6_ifs_rwlock);                          \
-       if ((ifp) && (ifp)->if_index <= if_index                        \
-        && (ifp)->if_index < icmp6_ifstatmax                           \
-        && icmp6_ifstat && icmp6_ifstat[(ifp)->if_index]) {            \
-               atomic_add_64(&icmp6_ifstat[(ifp)->if_index]->tag, 1);  \
-       }                                                               \
-       lck_rw_done(&icmp6_ifs_rwlock);                                 \
+do {                                                           \
+       lck_rw_lock_shared(&icmp6_ifs_rwlock);                  \
+       if ((ifp) && (ifp)->if_index <= if_index                \
+        && (ifp)->if_index < icmp6_ifstatmax                   \
+        && icmp6_ifstat && icmp6_ifstat[(ifp)->if_index]) {    \
+               icmp6_ifstat[(ifp)->if_index]->tag++;           \
+       }                                                       \
+       lck_rw_done(&icmp6_ifs_rwlock);                         \
 } while (0)
 
 #define icmp6_ifoutstat_inc(ifp, type, code) \
index 1705a1413de6d1a6f44a9c15711ce084be47dd16..e796437dd7afd468562c60eca94bc25fa7da5954 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -132,7 +132,8 @@ struct sockaddr_inarp {
        struct  in_addr sin_srcaddr;
        u_short sin_tos;
        u_short sin_other;
-#define SIN_PROXY 1
+#define        SIN_PROXY       0x1
+#define        SIN_ROUTER      0x2
 };
 /*
  * IP and ethernet specific routing flags
index 1142c99e2a7fe729f58adbcc26a7e8dbe6d5806f..7e0cd82e92d01d7975f81e20e0180d4951b14c01 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -556,6 +556,9 @@ igmp_domifattach(struct ifnet *ifp, int how)
        IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
        IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
        IGI_UNLOCK(igi);
+       ifnet_lock_shared(ifp);
+       igmp_initsilent(ifp, igi);
+       ifnet_lock_done(ifp);
 
        LIST_INSERT_HEAD(&igi_head, igi, igi_link);
 
@@ -586,6 +589,9 @@ igmp_domifreattach(struct igmp_ifinfo *igi)
        igi->igi_debug |= IFD_ATTACHED;
        IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
        IGI_UNLOCK(igi);
+       ifnet_lock_shared(ifp);
+       igmp_initsilent(ifp, igi);
+       ifnet_lock_done(ifp);
 
        LIST_INSERT_HEAD(&igi_head, igi, igi_link);
 
@@ -651,6 +657,20 @@ igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
        panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
 }
 
+__private_extern__ void
+igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
+{
+       ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
+
+       IGI_LOCK_ASSERT_NOTHELD(igi);
+       IGI_LOCK(igi);
+       if (!(ifp->if_flags & IFF_MULTICAST))
+               igi->igi_flags |= IGIF_SILENT;
+       else
+               igi->igi_flags &= ~IGIF_SILENT;
+       IGI_UNLOCK(igi);
+}
+
 static void
 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
 {
@@ -664,10 +684,6 @@ igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
        igi->igi_qri = IGMP_QRI_INIT;
        igi->igi_uri = IGMP_URI_INIT;
 
-       /* ifnet is not yet attached; no need to hold ifnet lock */
-       if (!(ifp->if_flags & IFF_MULTICAST))
-               igi->igi_flags |= IGIF_SILENT;
-
        if (!reattach)
                SLIST_INIT(&igi->igi_relinmhead);
 
@@ -1553,6 +1569,9 @@ igmp_input(struct mbuf *m, int off)
        IGMPSTAT_INC(igps_rcv_total);
        OIGMPSTAT_INC(igps_rcv_total);
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip = mtod(m, struct ip *);
        iphlen = off;
 
@@ -1578,12 +1597,14 @@ igmp_input(struct mbuf *m, int off)
        else
                minlen = IGMP_MINLEN;
 
-       M_STRUCT_GET(igmp, struct igmp *, m, off, minlen);
+       /* A bit more expensive than M_STRUCT_GET, but ensures alignment */
+       M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
        if (igmp == NULL) {
                IGMPSTAT_INC(igps_rcv_tooshort);
                OIGMPSTAT_INC(igps_rcv_tooshort);
                return;
        }
+       VERIFY(IS_P2ALIGNED(igmp, sizeof (u_int32_t)));
 
        /*
         * Validate checksum.
@@ -1669,13 +1690,19 @@ igmp_input(struct mbuf *m, int off)
                                        return;
                                }
                                igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
-                               M_STRUCT_GET(igmpv3, struct igmpv3 *, m,
+                               /*
+                                * A bit more expensive than M_STRUCT_GET,
+                                * but ensures alignment.
+                                */
+                               M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
                                    off, igmpv3len);
                                if (igmpv3 == NULL) {
                                        IGMPSTAT_INC(igps_rcv_tooshort);
                                        OIGMPSTAT_INC(igps_rcv_tooshort);
                                        return;
                                }
+                               VERIFY(IS_P2ALIGNED(igmpv3,
+                                   sizeof (u_int32_t)));
                                if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
                                        m_freem(m);
                                        return;
@@ -2857,6 +2884,7 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
        int                      type;
        in_addr_t                naddr;
        uint8_t                  mode;
+       u_int16_t                ig_numsrc;
 
        INM_LOCK_ASSERT_HELD(inm);
        IGI_LOCK_ASSERT_HELD(inm->inm_igi);
@@ -3026,12 +3054,12 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
        if (record_has_sources) {
                if (m == m0) {
                        md = m_last(m);
-                       pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
-                           md->m_len - nbytes);
+                       pig = (struct igmp_grouprec *)(void *)
+                           (mtod(md, uint8_t *) + md->m_len - nbytes);
                } else {
                        md = m_getptr(m, 0, &off);
-                       pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
-                           off);
+                       pig = (struct igmp_grouprec *)(void *)
+                           (mtod(md, uint8_t *) + off);
                }
                msrcs = 0;
                RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
@@ -3065,7 +3093,8 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
                }
                IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
                    msrcs));
-               pig->ig_numsrc = htons(msrcs);
+               ig_numsrc = htons(msrcs);
+               bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc));
                nbytes += (msrcs * sizeof(in_addr_t));
        }
 
@@ -3114,7 +3143,8 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
                if (m == NULL)
                        return (-ENOMEM);
                md = m_getptr(m, 0, &off);
-               pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
+               pig = (struct igmp_grouprec *)(void *)
+                   (mtod(md, uint8_t *) + off);
                IGMP_PRINTF(("%s: allocated next packet\n", __func__));
 
                if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
@@ -3157,7 +3187,8 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
                        if (msrcs == m0srcs)
                                break;
                }
-               pig->ig_numsrc = htons(msrcs);
+               ig_numsrc = htons(msrcs);
+               bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc));
                nbytes += (msrcs * sizeof(in_addr_t));
 
                IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
@@ -3216,6 +3247,7 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
        int                      nallow, nblock;
        uint8_t                  mode, now, then;
        rectype_t                crt, drt, nrt;
+       u_int16_t                ig_numsrc;
 
        INM_LOCK_ASSERT_HELD(inm);
 
@@ -3301,12 +3333,12 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
                                /* new packet; offset in c hain */
                                md = m_getptr(m, npbytes -
                                    sizeof(struct igmp_grouprec), &off);
-                               pig = (struct igmp_grouprec *)(mtod(md,
+                               pig = (struct igmp_grouprec *)(void *)(mtod(md,
                                    uint8_t *) + off);
                        } else {
                                /* current packet; offset from last append */
                                md = m_last(m);
-                               pig = (struct igmp_grouprec *)(mtod(md,
+                               pig = (struct igmp_grouprec *)(void *)(mtod(md,
                                    uint8_t *) + md->m_len -
                                    sizeof(struct igmp_grouprec));
                        }
@@ -3384,7 +3416,8 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
                                pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
                        else if (crt == REC_BLOCK)
                                pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
-                       pig->ig_numsrc = htons(rsrcs);
+                       ig_numsrc = htons(rsrcs);
+                       bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc));
                        /*
                         * Count the new group record, and enqueue this
                         * packet if it wasn't already queued.
@@ -3658,6 +3691,13 @@ igmp_sendpkt(struct mbuf *m, struct ifnet *ifp)
 #ifdef MAC
        mac_netinet_igmp_send(ifp, m0);
 #endif
+
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               /* Use control service class if the interface supports
+                * transmit-start model.
+                */
+               (void) m_set_service_class(m0, MBUF_SC_CTL);
+       }
        bzero(&ro, sizeof (ro));
        error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
        if (ro.ro_rt != NULL) {
index 8fdaab868cc7b64e879c31ce0e76ead6077c5194..30a0cacb34cbcde29f42f633fb74a0f870d3f279 100644 (file)
@@ -310,6 +310,7 @@ extern void igmp_leavegroup(struct in_multi *);
 extern void igmp_slowtimo(void);
 extern void igi_addref(struct igmp_ifinfo *, int);
 extern void igi_remref(struct igmp_ifinfo *);
+__private_extern__ void igmp_initsilent(struct ifnet *, struct igmp_ifinfo *);
 
 SYSCTL_DECL(_net_inet_igmp);
 
index 85b9d38afa59c376a06a641ae9fe1cd709795682..1df980df6ba89101137506cd16fe91b810b5e3bc 100644 (file)
 
 static int in_mask2len(struct in_addr *);
 static void in_len2mask(struct in_addr *, int);
-static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
-       struct ifnet *, struct proc *);
+static int in_lifaddr_ioctl(struct socket *, u_long, struct if_laddrreq *,
+    struct ifnet *, struct proc *);
+static int in_setrouter(struct ifnet *, int);
 
 static void    in_socktrim(struct sockaddr_in *);
 static int     in_ifinit(struct ifnet *,
@@ -366,35 +367,37 @@ in_domifattach(struct ifnet *ifp)
  */
 /* ARGSUSED */
 int
-in_control(
-       struct socket *so,
-       u_long cmd,
-       caddr_t data,
-       struct ifnet *ifp,
-       struct proc *p)
+in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
+    struct proc *p)
 {
-       struct ifreq *ifr = (struct ifreq *)data;
-       struct in_ifaddr *ia = NULL, *iap;
+       struct in_ifaddr *ia = NULL;
        struct ifaddr *ifa;
-       struct in_aliasreq *ifra = (struct in_aliasreq *)data;
        struct sockaddr_in oldaddr;
        int error = 0;
        int hostIsNew, maskIsNew;
-       struct kev_msg        ev_msg;
-       struct kev_in_data    in_event_data;
+       struct kev_msg ev_msg;
+       struct kev_in_data in_event_data;
+
+       bzero(&in_event_data, sizeof (struct kev_in_data));
+       bzero(&ev_msg, sizeof (struct kev_msg));
 
-       bzero(&in_event_data, sizeof(struct kev_in_data));
-       bzero(&ev_msg, sizeof(struct kev_msg));
        switch (cmd) {
-       case SIOCALIFADDR:
-       case SIOCDLIFADDR:
+       case SIOCALIFADDR:              /* struct if_laddrreq */
+       case SIOCDLIFADDR:              /* struct if_laddrreq */
                if ((error = proc_suser(p)) != 0)
-                       return error;
-               /*fall through*/
-       case SIOCGLIFADDR:
-               if (!ifp)
-                       return EINVAL;
-               return in_lifaddr_ioctl(so, cmd, data, ifp, p);
+                       return (error);
+               /* FALLTHRU */
+       case SIOCGLIFADDR: {            /* struct if_laddrreq */
+               struct if_laddrreq iflr;
+
+               if (ifp == NULL)
+                       return (EINVAL);
+
+               bcopy(data, &iflr, sizeof (iflr));
+               error = in_lifaddr_ioctl(so, cmd, &iflr, ifp, p);
+               bcopy(&iflr, data, sizeof (iflr));
+               return (error);
+       }
        }
 
        /*
@@ -403,51 +406,75 @@ in_control(
         * If an alias address was specified, find that one instead of
         * the first one on the interface.
         */
-       if (ifp) {
+       if (ifp != NULL) {
+               struct in_ifaddr *iap;
+               struct sockaddr_in sin;
+
+               bcopy(&((struct ifreq *)(void *)data)->ifr_addr,
+                   &sin, sizeof (sin));
+
                lck_rw_lock_shared(in_ifaddr_rwlock);
-               for (iap = in_ifaddrhead.tqh_first; iap; 
-                    iap = iap->ia_link.tqe_next)
-                       if (iap->ia_ifp == ifp) {
-                               IFA_LOCK(&iap->ia_ifa);
-                               if (((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr.s_addr ==
-                                   iap->ia_addr.sin_addr.s_addr) {
-                                       ia = iap;
+               for (iap = in_ifaddrhead.tqh_first; iap != NULL;
+                    iap = iap->ia_link.tqe_next) {
+                       if (iap->ia_ifp != ifp)
+                               continue;
+
+                       IFA_LOCK(&iap->ia_ifa);
+                       if (sin.sin_addr.s_addr ==
+                           iap->ia_addr.sin_addr.s_addr) {
+                               ia = iap;
+                               IFA_UNLOCK(&iap->ia_ifa);
+                               break;
+                       } else if (ia == NULL) {
+                               ia = iap;
+                               if (sin.sin_family != AF_INET) {
                                        IFA_UNLOCK(&iap->ia_ifa);
                                        break;
-                               } else if (ia == NULL) {
-                                       ia = iap;
-                                       if (ifr->ifr_addr.sa_family != AF_INET) {
-                                               IFA_UNLOCK(&iap->ia_ifa);
-                                               break;
-                                       }
                                }
-                               IFA_UNLOCK(&iap->ia_ifa);
                        }
+                       IFA_UNLOCK(&iap->ia_ifa);
+               }
                /* take a reference on ia before releasing lock */
-               if (ia != NULL) {
+               if (ia != NULL)
                        IFA_ADDREF(&ia->ia_ifa);
-               }
                lck_rw_done(in_ifaddr_rwlock);
        }
+
        switch (cmd) {
-       case SIOCAUTOADDR:
-       case SIOCARPIPLL:
+       case SIOCAUTOADDR:              /* struct ifreq */
+       case SIOCARPIPLL:               /* struct ifreq */
+       case SIOCSETROUTERMODE:         /* struct ifreq */
                if ((error = proc_suser(p)) != 0) {
                        goto done;
                }
-               if (ifp == 0) {
+               if (ifp == NULL) {
                        error = EADDRNOTAVAIL;
                        goto done;
                }
                break;
 
-       case SIOCAIFADDR:
-       case SIOCDIFADDR:
-               if (ifp == 0) {
+       case SIOCAIFADDR:               /* struct ifaliasreq */
+       case SIOCDIFADDR: {             /* struct ifreq */
+               struct sockaddr_in addr, dstaddr;
+
+               if (ifp == NULL) {
                        error = EADDRNOTAVAIL;
                        goto done;
                }
-               if (ifra->ifra_addr.sin_family == AF_INET) {
+
+               if (cmd == SIOCAIFADDR) {
+                       bcopy(&((struct in_aliasreq *)(void *)data)->
+                           ifra_addr, &addr, sizeof (addr));
+                       bcopy(&((struct in_aliasreq *)(void *)data)->
+                           ifra_dstaddr, &dstaddr, sizeof (dstaddr));
+               } else {
+                       VERIFY(cmd == SIOCDIFADDR);
+                       bcopy(&((struct ifreq *)(void *)data)->ifr_addr,
+                           &addr, sizeof (addr));
+                       bzero(&dstaddr, sizeof (dstaddr));
+               }
+
+               if (addr.sin_family == AF_INET) {
                        struct in_ifaddr *oia;
 
                        lck_rw_lock_shared(in_ifaddr_rwlock);
@@ -455,7 +482,7 @@ in_control(
                                IFA_LOCK(&ia->ia_ifa);
                                if (ia->ia_ifp == ifp  &&
                                    ia->ia_addr.sin_addr.s_addr ==
-                                   ifra->ifra_addr.sin_addr.s_addr) {
+                                   addr.sin_addr.s_addr) {
                                        IFA_ADDREF_LOCKED(&ia->ia_ifa);
                                        IFA_UNLOCK(&ia->ia_ifa);
                                        break;
@@ -465,26 +492,38 @@ in_control(
                        lck_rw_done(in_ifaddr_rwlock);
                        if (oia != NULL)
                                IFA_REMREF(&oia->ia_ifa);
-                       if ((ifp->if_flags & IFF_POINTOPOINT)
-                           && (cmd == SIOCAIFADDR)
-                           && (ifra->ifra_dstaddr.sin_addr.s_addr
-                               == INADDR_ANY)) {
+                       if ((ifp->if_flags & IFF_POINTOPOINT) &&
+                           (cmd == SIOCAIFADDR) &&
+                           (dstaddr.sin_addr.s_addr == INADDR_ANY)) {
                                error = EDESTADDRREQ;
                                goto done;
                        }
-               }
-               else if (cmd == SIOCAIFADDR) {
+               } else if (cmd == SIOCAIFADDR) {
                        error = EINVAL;
                        goto done;
                }
-               if (cmd == SIOCDIFADDR && ia == 0) {
+               if (cmd == SIOCDIFADDR && ia == NULL) {
                        error = EADDRNOTAVAIL;
                        goto done;
                }
                /* FALLTHROUGH */
-       case SIOCSIFADDR:
-       case SIOCSIFNETMASK:
-       case SIOCSIFDSTADDR:
+       }
+       case SIOCSIFADDR:               /* struct ifreq */
+       case SIOCSIFNETMASK:            /* struct ifreq */
+       case SIOCSIFDSTADDR: {          /* struct ifreq */
+               struct sockaddr_in addr;
+
+               if (cmd == SIOCAIFADDR) {
+                       /* fell thru from above; just repeat it */
+                       bcopy(&((struct in_aliasreq *)(void *)data)->
+                           ifra_addr, &addr, sizeof (addr));
+               } else {
+                       VERIFY(cmd == SIOCDIFADDR || cmd == SIOCSIFADDR ||
+                           cmd == SIOCSIFNETMASK || cmd == SIOCSIFDSTADDR);
+                       bcopy(&((struct ifreq *)(void *)data)->ifr_addr,
+                           &addr, sizeof (addr));
+               }
+
                /* socket is NULL if called from in_purgeaddrs() */
                if (so != NULL && (so->so_state & SS_PRIV) == 0) {
                        error = EPERM;
@@ -495,12 +534,11 @@ in_control(
                        error = EPERM;
                        goto done;
                }
-               if (ifp == 0) {
+               if (ifp == NULL) {
                        error = EADDRNOTAVAIL;
                        goto done;
                }
-               if (ifra->ifra_addr.sin_family != AF_INET 
-                   && cmd == SIOCSIFADDR) {
+               if (addr.sin_family != AF_INET && cmd == SIOCSIFADDR) {
                        error = EINVAL;
                        goto done;
                }
@@ -521,7 +559,7 @@ in_control(
                        ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
                        ia->ia_sockmask.sin_len = 8;
                        if (ifp->if_flags & IFF_BROADCAST) {
-                               ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
+                               ia->ia_broadaddr.sin_len = sizeof (ia->ia_addr);
                                ia->ia_broadaddr.sin_family = AF_INET;
                        }
                        ia->ia_ifp = ifp;
@@ -544,104 +582,161 @@ in_control(
                        IFA_ADDREF(ifa);
                        TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link);
                        lck_rw_done(in_ifaddr_rwlock);
-                       error = in_domifattach(ifp);
-                       /* discard error,can be cold with unsupported interfaces */
-                       if (error)
-                               error = 0;
+                       /* discard error */
+                       (void) in_domifattach(ifp);
+                       error = 0;
                }
                break;
+       }
 
-       case SIOCPROTOATTACH:
-       case SIOCPROTODETACH:
+       case SIOCPROTOATTACH:           /* struct ifreq */
+       case SIOCPROTODETACH:           /* struct ifreq */
                if ((error = proc_suser(p)) != 0) {
                        goto done;
                }
-               if (ifp == 0) {
+               if (ifp == NULL) {
                        error = EADDRNOTAVAIL;
                        goto done;
                }
                break;
 
-       case SIOCSIFBRDADDR:
+       case SIOCSIFBRDADDR:            /* struct ifreq */
                if ((so->so_state & SS_PRIV) == 0) {
                        error = EPERM;
                        goto done;
                }
                /* FALLTHROUGH */
-
-       case SIOCGIFADDR:
-       case SIOCGIFNETMASK:
-       case SIOCGIFDSTADDR:
-       case SIOCGIFBRDADDR:
-               if (ia == (struct in_ifaddr *)0) {
+       case SIOCGIFADDR:               /* struct ifreq */
+       case SIOCGIFNETMASK:            /* struct ifreq */
+       case SIOCGIFDSTADDR:            /* struct ifreq */
+       case SIOCGIFBRDADDR:            /* struct ifreq */
+               if (ia == NULL) {
                        error = EADDRNOTAVAIL;
                        goto done;
                }
                break;
        }
+
        switch (cmd) {
-       case SIOCAUTOADDR:
+       case SIOCAUTOADDR: {            /* struct ifreq */
+               int intval;
+
+               VERIFY(ifp != NULL);
+               bcopy(&((struct ifreq *)(void *)data)->ifr_intval,
+                   &intval, sizeof (intval));
+
                ifnet_lock_exclusive(ifp);
-               if (ifr->ifr_intval)
-                       ifp->if_eflags |= IFEF_AUTOCONFIGURING;
-               else
+               if (intval) {
+                       /*
+                        * An interface in IPv4 router mode implies that it
+                        * is configured with a static IP address and should
+                        * not act as a DHCP client; prevent SIOCAUTOADDR from
+                        * being set in that mode.
+                        */
+                       if (ifp->if_eflags & IFEF_IPV4_ROUTER) {
+                               intval = 0;     /* be safe; clear flag if set */
+                               error = EBUSY;
+                       } else {
+                               ifp->if_eflags |= IFEF_AUTOCONFIGURING;
+                       }
+               }
+               if (!intval)
                        ifp->if_eflags &= ~IFEF_AUTOCONFIGURING;
                ifnet_lock_done(ifp);
                break;
-       
-       case SIOCARPIPLL:
+       }
+
+       case SIOCARPIPLL: {             /* struct ifreq */
+               int intval;
+
+               VERIFY(ifp != NULL);
+               bcopy(&((struct ifreq *)(void *)data)->ifr_intval,
+                   &intval, sizeof (intval));
                ipv4_ll_arp_aware = 1;
+
                ifnet_lock_exclusive(ifp);
-               if (ifr->ifr_data)
-                       ifp->if_eflags |= IFEF_ARPLL;
-               else
+               if (intval) {
+                       /*
+                        * An interface in IPv4 router mode implies that it
+                        * is configured with a static IP address and should
+                        * not have to deal with IPv4 Link-Local Address;
+                        * prevent SIOCARPIPLL from being set in that mode.
+                        */
+                       if (ifp->if_eflags & IFEF_IPV4_ROUTER) {
+                               intval = 0;     /* be safe; clear flag if set */
+                               error = EBUSY;
+                       } else {
+                               ifp->if_eflags |= IFEF_ARPLL;
+                       }
+               }
+               if (!intval)
                        ifp->if_eflags &= ~IFEF_ARPLL;
                ifnet_lock_done(ifp);
                break;
+       }
 
-       case SIOCGIFADDR:
+       case SIOCGIFADDR:               /* struct ifreq */
+               VERIFY(ia != NULL);
                IFA_LOCK(&ia->ia_ifa);
-               *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
+               bcopy(&ia->ia_addr, &((struct ifreq *)(void *)data)->ifr_addr,
+                   sizeof (struct sockaddr_in));
                IFA_UNLOCK(&ia->ia_ifa);
                break;
 
-       case SIOCGIFBRDADDR:
+       case SIOCGIFBRDADDR:            /* struct ifreq */
+               VERIFY(ia != NULL);
                if ((ifp->if_flags & IFF_BROADCAST) == 0) {
                        error = EINVAL;
                        break;
                }
                IFA_LOCK(&ia->ia_ifa);
-               *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
+               bcopy(&ia->ia_broadaddr,
+                   &((struct ifreq *)(void *)data)->ifr_broadaddr,
+                   sizeof (struct sockaddr_in));
                IFA_UNLOCK(&ia->ia_ifa);
                break;
 
-       case SIOCGIFDSTADDR:
+       case SIOCGIFDSTADDR:            /* struct ifreq */
+               VERIFY(ia != NULL);
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
                        error = EINVAL;
                        break;
                }
                IFA_LOCK(&ia->ia_ifa);
-               *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
+               bcopy(&ia->ia_dstaddr,
+                   &((struct ifreq *)(void *)data)->ifr_dstaddr,
+                   sizeof (struct sockaddr_in));
                IFA_UNLOCK(&ia->ia_ifa);
                break;
 
-       case SIOCGIFNETMASK:
+       case SIOCGIFNETMASK:            /* struct ifreq */
+               VERIFY(ia != NULL);
                IFA_LOCK(&ia->ia_ifa);
-               *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
+               bcopy(&ia->ia_sockmask,
+                   &((struct ifreq *)(void *)data)->ifr_addr,
+                   sizeof (struct sockaddr_in));
                IFA_UNLOCK(&ia->ia_ifa);
                break;
 
-       case SIOCSIFDSTADDR:
+       case SIOCSIFDSTADDR:            /* struct ifreq */
+               VERIFY(ifp != NULL && ia != NULL);
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
                        error = EINVAL;
                        break;
                }
                IFA_LOCK(&ia->ia_ifa);
                oldaddr = ia->ia_dstaddr;
-               ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
+               bcopy(&((struct ifreq *)(void *)data)->ifr_dstaddr,
+                   &ia->ia_dstaddr, sizeof (struct sockaddr_in));
                if (ia->ia_dstaddr.sin_family == AF_INET)
                        ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in);
                IFA_UNLOCK(&ia->ia_ifa);
+               /*
+                * NOTE: SIOCSIFDSTADDR is defined with struct ifreq
+                * as parameter, but here we are sending it down
+                * to the interface with a pointer to struct ifaddr,
+                * for legacy reasons.
+                */
                error = ifnet_ioctl(ifp, PF_INET, SIOCSIFDSTADDR, ia);
                IFA_LOCK(&ia->ia_ifa);
                if (error == EOPNOTSUPP) {
@@ -660,11 +755,12 @@ in_control(
 
                ev_msg.event_code = KEV_INET_SIFDSTADDR;
 
-               if (ia->ia_ifa.ifa_dstaddr)
-                    in_event_data.ia_dstaddr = 
-                         ((struct sockaddr_in *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
-               else
-                    in_event_data.ia_dstaddr.s_addr  = 0;
+               if (ia->ia_ifa.ifa_dstaddr) {
+                       in_event_data.ia_dstaddr = ((struct sockaddr_in *)
+                           (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
+               } else {
+                       in_event_data.ia_dstaddr.s_addr = INADDR_ANY;
+               }
 
                in_event_data.ia_addr         = ia->ia_addr.sin_addr;
                in_event_data.ia_net          = ia->ia_net;
@@ -673,12 +769,13 @@ in_control(
                in_event_data.ia_subnetmask   = ia->ia_subnetmask;
                in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
                IFA_UNLOCK(&ia->ia_ifa);
-               strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
+               (void) strncpy(&in_event_data.link_data.if_name[0],
+                   ifp->if_name, IFNAMSIZ);
                in_event_data.link_data.if_family = ifp->if_family;
                in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
 
                ev_msg.dv[0].data_ptr    = &in_event_data;
-               ev_msg.dv[0].data_length      = sizeof(struct kev_in_data);
+               ev_msg.dv[0].data_length = sizeof (struct kev_in_data);
                ev_msg.dv[1].data_length = 0;
 
                kev_post_msg(&ev_msg);
@@ -701,13 +798,15 @@ in_control(
                lck_mtx_unlock(rnh_lock);
                break;
 
-       case SIOCSIFBRDADDR:
+       case SIOCSIFBRDADDR:            /* struct ifreq */
+               VERIFY(ia != NULL);
                if ((ifp->if_flags & IFF_BROADCAST) == 0) {
                        error = EINVAL;
                        break;
                }
                IFA_LOCK(&ia->ia_ifa);
-               ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
+               bcopy(&((struct ifreq *)(void *)data)->ifr_broadaddr,
+                   &ia->ia_broadaddr, sizeof (struct sockaddr_in));
 
                ev_msg.vendor_code    = KEV_VENDOR_APPLE;
                ev_msg.kev_class      = KEV_NETWORK_CLASS;
@@ -715,12 +814,12 @@ in_control(
 
                ev_msg.event_code = KEV_INET_SIFBRDADDR;
 
-               if (ia->ia_ifa.ifa_dstaddr)
-                    in_event_data.ia_dstaddr = 
-                         ((struct sockaddr_in *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
-               else
-                    in_event_data.ia_dstaddr.s_addr  = 0;
-
+               if (ia->ia_ifa.ifa_dstaddr) {
+                       in_event_data.ia_dstaddr = ((struct sockaddr_in *)
+                           (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
+               } else {
+                       in_event_data.ia_dstaddr.s_addr = INADDR_ANY;
+               }
                in_event_data.ia_addr         = ia->ia_addr.sin_addr;
                in_event_data.ia_net          = ia->ia_net;
                in_event_data.ia_netmask      = ia->ia_netmask;
@@ -728,36 +827,43 @@ in_control(
                in_event_data.ia_subnetmask   = ia->ia_subnetmask;
                in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
                IFA_UNLOCK(&ia->ia_ifa);
-               strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
+               (void) strncpy(&in_event_data.link_data.if_name[0],
+                   ifp->if_name, IFNAMSIZ);
                in_event_data.link_data.if_family = ifp->if_family;
                in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
 
                ev_msg.dv[0].data_ptr    = &in_event_data;
-               ev_msg.dv[0].data_length      = sizeof(struct kev_in_data);
+               ev_msg.dv[0].data_length = sizeof (struct kev_in_data);
                ev_msg.dv[1].data_length = 0;
 
                kev_post_msg(&ev_msg);
-
                break;
 
-       case SIOCSIFADDR:
+       case SIOCSIFADDR: {             /* struct ifreq */
+               struct sockaddr_in addr;
+
+               VERIFY(ifp != NULL && ia != NULL);
+               bcopy(&((struct ifreq *)(void *)data)->ifr_addr,
+                   &addr, sizeof (addr));
                /*
                 * If this is a new address, the reference count for the
                 * hash table has been taken at creation time above.
                 */
-               error = in_ifinit(ifp, ia,
-                   (struct sockaddr_in *)&ifr->ifr_addr, 1);
+               error = in_ifinit(ifp, ia, &addr, 1);
 #if PF
                if (!error)
                        (void) pf_ifaddr_hook(ifp, cmd);
 #endif /* PF */
                break;
+       }
 
-       case SIOCPROTOATTACH:
+       case SIOCPROTOATTACH:           /* struct ifreq */
+               VERIFY(ifp != NULL);
                error = in_domifattach(ifp);
                break;
 
-       case SIOCPROTODETACH:
+       case SIOCPROTODETACH:           /* struct ifreq */
+               VERIFY(ifp != NULL);
                 /*
                 * If an IPv4 address is still present, refuse to detach.
                 */
@@ -779,10 +885,26 @@ in_control(
                error = proto_unplumb(PF_INET, ifp);
                break;
 
-       case SIOCSIFNETMASK: {
-               u_long i;
+       case SIOCSETROUTERMODE: {       /* struct ifreq */
+               int intval;
+
+               VERIFY(ifp != NULL);
+               bcopy(&((struct ifreq *)(void *)data)->ifr_intval,
+                   &intval, sizeof (intval));
+
+               error = in_setrouter(ifp, intval);
+               break;
+       }
+
+       case SIOCSIFNETMASK: {          /* struct ifreq */
+               struct sockaddr_in addr;
+               in_addr_t i;
+
+               VERIFY(ifp != NULL && ia != NULL);
+               bcopy(&((struct ifreq *)(void *)data)->ifr_addr,
+                   &addr, sizeof (addr));
+               i = addr.sin_addr.s_addr;
 
-               i = ifra->ifra_addr.sin_addr.s_addr;
                IFA_LOCK(&ia->ia_ifa);
                ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr = i);
                ev_msg.vendor_code    = KEV_VENDOR_APPLE;
@@ -791,12 +913,12 @@ in_control(
 
                ev_msg.event_code = KEV_INET_SIFNETMASK;
 
-               if (ia->ia_ifa.ifa_dstaddr)
-                    in_event_data.ia_dstaddr = 
-                         ((struct sockaddr_in *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
-               else
-                    in_event_data.ia_dstaddr.s_addr  = 0;
-
+               if (ia->ia_ifa.ifa_dstaddr) {
+                    in_event_data.ia_dstaddr = ((struct sockaddr_in *)
+                        (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
+               } else {
+                       in_event_data.ia_dstaddr.s_addr = INADDR_ANY;
+               }
                in_event_data.ia_addr         = ia->ia_addr.sin_addr;
                in_event_data.ia_net          = ia->ia_net;
                in_event_data.ia_netmask      = ia->ia_netmask;
@@ -804,54 +926,65 @@ in_control(
                in_event_data.ia_subnetmask   = ia->ia_subnetmask;
                in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
                IFA_UNLOCK(&ia->ia_ifa);
-               strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
+               (void) strncpy(&in_event_data.link_data.if_name[0],
+                   ifp->if_name, IFNAMSIZ);
                in_event_data.link_data.if_family = ifp->if_family;
                in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
 
                ev_msg.dv[0].data_ptr    = &in_event_data;
-               ev_msg.dv[0].data_length      = sizeof(struct kev_in_data);
+               ev_msg.dv[0].data_length = sizeof (struct kev_in_data);
                ev_msg.dv[1].data_length = 0;
 
                kev_post_msg(&ev_msg);
-
                break;
        }
-       case SIOCAIFADDR:
+
+       case SIOCAIFADDR: {             /* struct ifaliasreq */
+               struct sockaddr_in addr, broadaddr, mask;
+
+               VERIFY(ifp != NULL && ia != NULL);
+               bcopy(&((struct ifaliasreq *)(void *)data)->ifra_addr,
+                   &addr, sizeof (addr));
+               bcopy(&((struct ifaliasreq *)(void *)data)->ifra_broadaddr,
+                   &broadaddr, sizeof (broadaddr));
+               bcopy(&((struct ifaliasreq *)(void *)data)->ifra_mask,
+                   &mask, sizeof (mask));
+
                maskIsNew = 0;
                hostIsNew = 1;
                error = 0;
 
                IFA_LOCK(&ia->ia_ifa);
                if (ia->ia_addr.sin_family == AF_INET) {
-                       if (ifra->ifra_addr.sin_len == 0) {
-                               ifra->ifra_addr = ia->ia_addr;
+                       if (addr.sin_len == 0) {
+                               addr = ia->ia_addr;
                                hostIsNew = 0;
-                       } else if (ifra->ifra_addr.sin_addr.s_addr ==
-                                              ia->ia_addr.sin_addr.s_addr)
+                       } else if (addr.sin_addr.s_addr ==
+                           ia->ia_addr.sin_addr.s_addr) {
                                hostIsNew = 0;
+                       }
                }
-               if (ifra->ifra_mask.sin_len) {
+               if (mask.sin_len) {
                        IFA_UNLOCK(&ia->ia_ifa);
                        in_ifscrub(ifp, ia, 0);
                        IFA_LOCK(&ia->ia_ifa);
-                       ia->ia_sockmask = ifra->ifra_mask;
+                       ia->ia_sockmask = mask;
                        ia->ia_subnetmask =
                             ntohl(ia->ia_sockmask.sin_addr.s_addr);
                        maskIsNew = 1;
                }
                if ((ifp->if_flags & IFF_POINTOPOINT) &&
-                   (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+                   (broadaddr.sin_family == AF_INET)) {
                        IFA_UNLOCK(&ia->ia_ifa);
                        in_ifscrub(ifp, ia, 0);
                        IFA_LOCK(&ia->ia_ifa);
-                       ia->ia_dstaddr = ifra->ifra_dstaddr;
+                       ia->ia_dstaddr = broadaddr;
                        ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in);
                        maskIsNew  = 1; /* We lie; but the effect's the same */
                }
-               if (ifra->ifra_addr.sin_family == AF_INET &&
-                   (hostIsNew || maskIsNew)) {
+               if (addr.sin_family == AF_INET && (hostIsNew || maskIsNew)) {
                        IFA_UNLOCK(&ia->ia_ifa);
-                       error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+                       error = in_ifinit(ifp, ia, &addr, 0);
                } else {
                        IFA_UNLOCK(&ia->ia_ifa);
                }
@@ -861,51 +994,54 @@ in_control(
 #endif /* PF */
                IFA_LOCK(&ia->ia_ifa);
                if ((ifp->if_flags & IFF_BROADCAST) &&
-                   (ifra->ifra_broadaddr.sin_family == AF_INET))
-                       ia->ia_broadaddr = ifra->ifra_broadaddr;
+                   (broadaddr.sin_family == AF_INET))
+                       ia->ia_broadaddr = broadaddr;
 
                /*
                 * Report event.
                 */
-
                if ((error == 0) || (error == EEXIST)) {
-                    ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-                    ev_msg.kev_class      = KEV_NETWORK_CLASS;
-                    ev_msg.kev_subclass   = KEV_INET_SUBCLASS;
-
-                    if (hostIsNew)
-                         ev_msg.event_code = KEV_INET_NEW_ADDR;
-                    else
-                         ev_msg.event_code = KEV_INET_CHANGED_ADDR;
-
-                    if (ia->ia_ifa.ifa_dstaddr)
-                         in_event_data.ia_dstaddr = 
-                              ((struct sockaddr_in *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
-                    else
-                         in_event_data.ia_dstaddr.s_addr  = 0;
-
-                    in_event_data.ia_addr         = ia->ia_addr.sin_addr;
-                    in_event_data.ia_net          = ia->ia_net;
-                    in_event_data.ia_netmask      = ia->ia_netmask;
-                    in_event_data.ia_subnet       = ia->ia_subnet;
-                    in_event_data.ia_subnetmask   = ia->ia_subnetmask;
-                    in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
-                    IFA_UNLOCK(&ia->ia_ifa);
-                    strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
-                    in_event_data.link_data.if_family = ifp->if_family;
-                    in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
-
-                    ev_msg.dv[0].data_ptr    = &in_event_data;
-                    ev_msg.dv[0].data_length      = sizeof(struct kev_in_data);
-                    ev_msg.dv[1].data_length = 0;
-
-                    kev_post_msg(&ev_msg);
+                       ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+                       ev_msg.kev_class      = KEV_NETWORK_CLASS;
+                       ev_msg.kev_subclass   = KEV_INET_SUBCLASS;
+
+                       if (hostIsNew)
+                               ev_msg.event_code = KEV_INET_NEW_ADDR;
+                       else
+                               ev_msg.event_code = KEV_INET_CHANGED_ADDR;
+
+                       if (ia->ia_ifa.ifa_dstaddr) {
+                               in_event_data.ia_dstaddr =
+                                   ((struct sockaddr_in *)(void *)ia->
+                                   ia_ifa.ifa_dstaddr)->sin_addr;
+                       } else {
+                               in_event_data.ia_dstaddr.s_addr = INADDR_ANY;
+                       }
+                       in_event_data.ia_addr         = ia->ia_addr.sin_addr;
+                       in_event_data.ia_net          = ia->ia_net;
+                       in_event_data.ia_netmask      = ia->ia_netmask;
+                       in_event_data.ia_subnet       = ia->ia_subnet;
+                       in_event_data.ia_subnetmask   = ia->ia_subnetmask;
+                       in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
+                       IFA_UNLOCK(&ia->ia_ifa);
+                       (void) strncpy(&in_event_data.link_data.if_name[0],
+                           ifp->if_name, IFNAMSIZ);
+                       in_event_data.link_data.if_family = ifp->if_family;
+                       in_event_data.link_data.if_unit = ifp->if_unit;
+
+                       ev_msg.dv[0].data_ptr    = &in_event_data;
+                       ev_msg.dv[0].data_length = sizeof (struct kev_in_data);
+                       ev_msg.dv[1].data_length = 0;
+
+                       kev_post_msg(&ev_msg);
                } else {
-                    IFA_UNLOCK(&ia->ia_ifa);
+                       IFA_UNLOCK(&ia->ia_ifa);
                }
                break;
+       }
 
-       case SIOCDIFADDR:
+       case SIOCDIFADDR:               /* struct ifreq */
+               VERIFY(ifp != NULL && ia != NULL);
                error = ifnet_ioctl(ifp, PF_INET, SIOCDIFADDR, ia);
                if (error == EOPNOTSUPP)
                        error = 0;
@@ -921,12 +1057,12 @@ in_control(
                ev_msg.event_code = KEV_INET_ADDR_DELETED;
 
                IFA_LOCK(&ia->ia_ifa);
-               if (ia->ia_ifa.ifa_dstaddr)
-                    in_event_data.ia_dstaddr = 
-                         ((struct sockaddr_in *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
-               else
-                    in_event_data.ia_dstaddr.s_addr  = 0;
-
+               if (ia->ia_ifa.ifa_dstaddr) {
+                    in_event_data.ia_dstaddr = ((struct sockaddr_in *)
+                        (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
+               } else {
+                       in_event_data.ia_dstaddr.s_addr = INADDR_ANY;
+               }
                in_event_data.ia_addr         = ia->ia_addr.sin_addr;
                in_event_data.ia_net          = ia->ia_net;
                in_event_data.ia_netmask      = ia->ia_netmask;
@@ -934,7 +1070,8 @@ in_control(
                in_event_data.ia_subnetmask   = ia->ia_subnetmask;
                in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
                IFA_UNLOCK(&ia->ia_ifa);
-               strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
+               (void) strncpy(&in_event_data.link_data.if_name[0],
+                   ifp->if_name, IFNAMSIZ);
                in_event_data.link_data.if_family = ifp->if_family;
                in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
 
@@ -987,8 +1124,8 @@ in_control(
                                ifp->if_allhostsinm = NULL;
 
                                in_delmulti(inm);
-                               /* release the reference for allhostsinm pointer */
-                               INM_REMREF(inm); 
+                               /* release the reference for allhostsinm */
+                               INM_REMREF(inm);
                        }
                        lck_mtx_unlock(&ifp->if_addrconfig_lock);
                } else {
@@ -1004,6 +1141,12 @@ in_control(
                 */
                ifa = ifa_ifpgetprimary(ifp, AF_INET);
                if (ifa != NULL) {
+                       /*
+                        * NOTE: SIOCSIFADDR is defined with struct ifreq
+                        * as parameter, but here we are sending it down
+                        * to the interface with a pointer to struct ifaddr,
+                        * for legacy reasons.
+                        */
                        error = ifnet_ioctl(ifp, PF_INET, SIOCSIFADDR, ifa);
                        if (error == EOPNOTSUPP)
                                error = 0;
@@ -1017,71 +1160,59 @@ in_control(
                break;
 
 #ifdef __APPLE__
-    case SIOCSETOT: {
-        /*
-         * Inspiration from tcp_ctloutput() and ip_ctloutput()
-         * Special ioctl for OpenTransport sockets
-         */
-        struct inpcb   *inp, *cloned_inp;
-        int                    error2 = 0;
-        int                    cloned_fd = *(int *)data;
-
-        inp = sotoinpcb(so);
-        if (inp == NULL) {
-            break;
-        }
-
-        /* let's make sure it's either -1 or a valid file descriptor */
-        if (cloned_fd != -1) {
-            struct socket      *cloned_so;
-            error2 = file_socket(cloned_fd, &cloned_so);
-            if (error2){
-                break;
-            }
-            cloned_inp = sotoinpcb(cloned_so);
+       case SIOCSETOT: {               /* int */
+               /*
+                * Inspiration from tcp_ctloutput() and ip_ctloutput()
+                * Special ioctl for OpenTransport sockets
+                */
+               struct inpcb *inp, *cloned_inp;
+               int error2 = 0;
+               int cloned_fd;
+
+               bcopy(data, &cloned_fd, sizeof (cloned_fd));
+
+               inp = sotoinpcb(so);
+               if (inp == NULL) {
+                       break;
+               }
+
+               /* let's make sure it's either -1 or a valid file descriptor */
+               if (cloned_fd != -1) {
+                       struct socket   *cloned_so;
+                       error2 = file_socket(cloned_fd, &cloned_so);
+                       if (error2) {
+                               break;
+                       }
+                       cloned_inp = sotoinpcb(cloned_so);
                        file_drop(cloned_fd);
-        } else {
-            cloned_inp = NULL;
-        }
-
-        if (cloned_inp == NULL) {
-            /* OT always uses IP_PORTRANGE_HIGH */
-            inp->inp_flags &= ~(INP_LOWPORT);
-            inp->inp_flags |= INP_HIGHPORT;
-            /* For UDP, OT allows broadcast by default */
-            if (so->so_type == SOCK_DGRAM)
-                so->so_options |= SO_BROADCAST;
-            /* For TCP we want to see MSG_OOB when receive urgent data */
-            else if (so->so_type == SOCK_STREAM)
-                so->so_options |= SO_WANTOOBFLAG;
-        } else {
-            inp->inp_ip_tos = cloned_inp->inp_ip_tos;
-            inp->inp_ip_ttl = cloned_inp->inp_ip_ttl;
-            inp->inp_flags = cloned_inp->inp_flags;
-
-            /* Multicast options */
-            if (cloned_inp->inp_moptions != NULL) {
-                struct ip_moptions     *cloned_imo = cloned_inp->inp_moptions;
-                struct ip_moptions     *imo = inp->inp_moptions;
-
-                if (imo == NULL) {
-                    /*
-                     * No multicast option buffer attached to the pcb;
-                     * allocate one.
-                     */
-                    imo = ip_allocmoptions(M_WAITOK);
-                    if (imo == NULL) {
-                        error2 = ENOBUFS;
-                        break;
-                    }
-                    inp->inp_moptions = imo;
-                }
-
-               error2 = imo_clone(cloned_imo, imo);
-            }
-        }
-        break;
-    }
+               } else {
+                       cloned_inp = NULL;
+               }
+
+               if (cloned_inp == NULL) {
+                       /* OT always uses IP_PORTRANGE_HIGH */
+                       inp->inp_flags &= ~(INP_LOWPORT);
+                       inp->inp_flags |= INP_HIGHPORT;
+                       /*
+                        * For UDP, OT allows broadcast by default;
+                        * for TCP we want to see MSG_OOB when we
+                        * receive urgent data.
+                        */
+                       if (so->so_type == SOCK_DGRAM)
+                               so->so_options |= SO_BROADCAST;
+                       else if (so->so_type == SOCK_STREAM)
+                               so->so_options |= SO_WANTOOBFLAG;
+               } else {
+                       inp->inp_ip_tos = cloned_inp->inp_ip_tos;
+                       inp->inp_ip_ttl = cloned_inp->inp_ip_ttl;
+                       inp->inp_flags = cloned_inp->inp_flags;
+
+                       /* Multicast options */
+                       if (cloned_inp->inp_moptions != NULL)
+                               error2 = imo_clone(cloned_inp, inp);
+               }
+               break;
+       }
 #endif /* __APPLE__ */
 
        default:
@@ -1111,21 +1242,12 @@ in_control(
  *     other values may be returned from in_ioctl()
  */
 static int
-in_lifaddr_ioctl(
-       struct socket *so,
-       u_long cmd,
-       caddr_t data,
-       struct ifnet *ifp,
-       struct proc *p)
+in_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr,
+    struct ifnet *ifp, struct proc *p)
 {
-       struct if_laddrreq *iflr = (struct if_laddrreq *)data;
        struct ifaddr *ifa;
 
-       /* sanity checks */
-       if (!data || !ifp) {
-               panic("invalid argument to in_lifaddr_ioctl");
-               /*NOTREACHED*/
-       }
+       VERIFY(ifp != NULL);
 
        switch (cmd) {
        case SIOCGLIFADDR:
@@ -1288,6 +1410,35 @@ in_lifaddr_ioctl(
        return EOPNOTSUPP;      /*just for safety*/
 }
 
+/*
+ * Handle SIOCSETROUTERMODE to set or clear the IPv4 router mode flag on
+ * the interface.  When in this mode, IPv4 Link-Local Address support is
+ * disabled in ARP, and DHCP client support is disabled in IP input; turning
+ * any of them on would cause an error to be returned.  Entering or exiting
+ * this mode will result in the removal of IPv4 addresses currently configured
+ * on the interface.
+ */
+static int
+in_setrouter(struct ifnet *ifp, int enable)
+{
+       if (ifp->if_flags & IFF_LOOPBACK)
+               return (ENODEV);
+
+       ifnet_lock_exclusive(ifp);
+       if (enable) {
+               ifp->if_eflags |= IFEF_IPV4_ROUTER;
+               ifp->if_eflags &= ~(IFEF_ARPLL | IFEF_AUTOCONFIGURING);
+       } else {
+               ifp->if_eflags &= ~IFEF_IPV4_ROUTER;
+       }
+       ifnet_lock_done(ifp);
+
+       /* purge all IPv4 addresses configured on this interface */
+       in_purgeaddrs(ifp);
+
+       return (0);
+}
+
 /*
  * Delete any existing route for an interface.
  */
@@ -1463,6 +1614,12 @@ in_ifinit(
         * be reconfigured with the current primary IPV4 address.
         */
        if (error == 0 && cmd == SIOCAIFADDR) {
+               /*
+                * NOTE: SIOCSIFADDR is defined with struct ifreq
+                * as parameter, but here we are sending it down
+                * to the interface with a pointer to struct ifaddr,
+                * for legacy reasons.
+                */
                error = ifnet_ioctl(ifp, PF_INET, SIOCSIFADDR, ifa0);
                if (error == EOPNOTSUPP)
                        error = 0;
@@ -1674,9 +1831,9 @@ in_purgeaddrs(struct ifnet *ifp)
 
                                IFA_LOCK(ifa);
                                s = &((struct sockaddr_in *)
-                                   ifa->ifa_addr)->sin_addr;
+                                   (void *)ifa->ifa_addr)->sin_addr;
                                d = &((struct sockaddr_in *)
-                                   ifa->ifa_dstaddr)->sin_addr;
+                                   (void *)ifa->ifa_dstaddr)->sin_addr;
                                (void) inet_ntop(AF_INET, &s->s_addr, s_addr,
                                    sizeof (s_addr));
                                (void) inet_ntop(AF_INET, &d->s_addr, s_dstaddr,
index 4e66c26c778c93627c7a40e8dfd3b63b1362b636..4e0d49b9e18970ee2a68a706555a4435e3578f1b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -430,7 +430,7 @@ struct ip_opts {
 #define        IP_RECVIF               20   /* bool; receive reception if w/dgram */
 /* for IPSEC */
 #define        IP_IPSEC_POLICY         21   /* int; set/get security policy */
-#define        IP_FAITH                22   /* bool; accept FAITH'ed connections */
+#define        IP_FAITH                22   /* deprecated */
 #ifdef __APPLE__
 #define IP_STRIPHDR            23   /* bool: drop receive of raw IP header */
 #endif
@@ -601,13 +601,13 @@ struct sockaddr;
  * We use uint32_t here to be consistent.
  */
 int    setipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t,
-           uint32_t, struct in_addr *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+           uint32_t, struct in_addr *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3);
 int    getipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t *,
-           uint32_t *, struct in_addr *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+           uint32_t *, struct in_addr *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3);
 int    setsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
-           uint32_t, uint32_t, struct sockaddr_storage *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+           uint32_t, uint32_t, struct sockaddr_storage *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3);
 int    getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
-           uint32_t *, uint32_t *, struct sockaddr_storage *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+           uint32_t *, uint32_t *, struct sockaddr_storage *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3);
 #endif
 
 /*
@@ -737,7 +737,7 @@ struct in_pktinfo {
 #define        IPCTL_STATS             12      /* ipstat structure */
 #define        IPCTL_ACCEPTSOURCEROUTE 13      /* may accept source routed packets */
 #define        IPCTL_FASTFORWARDING    14      /* use fast IP forwarding code */
-#define        IPCTL_KEEPFAITH         15      /* FAITH IPv4->IPv6 translater ctl */
+#define        IPCTL_KEEPFAITH         15      /* deprecated */
 #define        IPCTL_GIF_TTL           16      /* default TTL for gif encap packet */
 #define        IPCTL_MAXID             17
 
@@ -796,6 +796,9 @@ extern int inaddr_local(struct in_addr);
 #define        in_nullhost(x)  ((x).s_addr == INADDR_ANY)
 #define        in_allhosts(x)  ((x).s_addr == htonl(INADDR_ALLHOSTS_GROUP))
 
+#define        SIN(s)          ((struct sockaddr_in *)(void *)s)
+#define        satosin(sa)     SIN(sa)
+#define        sintosa(sin)    ((struct sockaddr *)(void *)(sin))
 #endif /* KERNEL_PRIVATE */
 #define MAX_IPv4_STR_LEN       16
 #define MAX_IPv6_STR_LEN       64
index 8a4dfcd143b3e110407c4d84b3860f75ad29f65b..7dd09e904a7d41ea2a25dd61cd1e1b9a47649dab 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -83,8 +83,6 @@
 #include <netinet/in_var.h>
 #include <kern/zalloc.h>
 
-#define        SA(p) ((struct sockaddr *)(p))
-#define SIN(s) ((struct sockaddr_in *)s)
 #define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen))
 #define        equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
 
@@ -208,6 +206,7 @@ static struct llinfo_arp *arp_llinfo_alloc(void);
 static void arp_llinfo_free(void *);
 static void arp_llinfo_purge(struct rtentry *);
 static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
+static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
 
 static __inline void arp_llreach_use(struct llinfo_arp *);
 static __inline int arp_llreach_reachable(struct llinfo_arp *);
@@ -301,12 +300,38 @@ arp_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
 
        if (lr == NULL) {
                bzero(ri, sizeof (*ri));
+               ri->ri_rssi = IFNET_RSSI_UNKNOWN;
+               ri->ri_lqm = IFNET_LQM_THRESH_OFF;
+               ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
        } else {
                IFLR_LOCK(lr);
                /* Export to rt_reach_info structure */
                ifnet_lr2ri(lr, ri);
-               /* Export ARP send expiration time */
-               ri->ri_snd_expire = ifnet_llreach_up2cal(lr, la->la_lastused);
+               /* Export ARP send expiration (calendar) time */
+               ri->ri_snd_expire =
+                   ifnet_llreach_up2calexp(lr, la->la_lastused);
+               IFLR_UNLOCK(lr);
+       }
+}
+
+static void
+arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
+{
+       struct llinfo_arp *la = rt->rt_llinfo;
+       struct if_llreach *lr = la->la_llreach;
+
+       if (lr == NULL) {
+               bzero(iflri, sizeof (*iflri));
+               iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
+               iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
+               iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
+       } else {
+               IFLR_LOCK(lr);
+               /* Export to ifnet_llreach_info structure */
+               ifnet_lr2iflri(lr, iflri);
+               /* Export ARP send expiration (uptime) time */
+               iflri->iflri_snd_expire =
+                   ifnet_llreach_up2upexp(lr, la->la_lastused);
                IFLR_UNLOCK(lr);
        }
 }
@@ -579,7 +604,7 @@ arp_rtrequest(
                 * such as older version of routed or gated might provide,
                 * restore cloning bit.
                 */
-               if ((rt->rt_flags & RTF_HOST) == 0 &&
+               if ((rt->rt_flags & RTF_HOST) == 0 && rt_mask(rt) != NULL &&
                    SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
                        rt->rt_flags |= RTF_CLONING;
                if (rt->rt_flags & RTF_CLONING) {
@@ -605,7 +630,7 @@ arp_rtrequest(
                                arp_llreach_use(la); /* Mark use timestamp */
                        RT_UNLOCK(rt);
                        dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
-                           SDL(gate), rt_key(rt), NULL, rt_key(rt));
+                           SDL(gate), rt_key(rt), NULL, rt_key(rt), 0);
                        RT_LOCK(rt);
                }
                /*FALLTHROUGH*/
@@ -631,6 +656,7 @@ arp_rtrequest(
                        break;
                }
                rt->rt_llinfo_get_ri = arp_llinfo_get_ri;
+               rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri;
                rt->rt_llinfo_purge = arp_llinfo_purge;
                rt->rt_llinfo_free = arp_llinfo_free;
 
@@ -859,192 +885,6 @@ arp_lookup_route(const struct in_addr *addr, int create, int proxy,
        return (0);
 }
 
-/*
- * arp_route_to_gateway_route will find the gateway route for a given route.
- *
- * If the route is down, look the route up again.
- * If the route goes through a gateway, get the route to the gateway.
- * If the gateway route is down, look it up again.
- * If the route is set to reject, verify it hasn't expired.
- *
- * If the returned route is non-NULL, the caller is responsible for
- * releasing the reference and unlocking the route.
- */
-#define senderr(e) { error = (e); goto bad; }
-__private_extern__ errno_t
-arp_route_to_gateway_route(const struct sockaddr *net_dest, route_t hint0,
-     route_t *out_route)
-{
-       uint64_t timenow;
-       route_t rt = hint0, hint = hint0;
-       errno_t error = 0;
-
-       *out_route = NULL;
-
-       /*
-        * Next hop determination.  Because we may involve the gateway route
-        * in addition to the original route, locking is rather complicated.
-        * The general concept is that regardless of whether the route points
-        * to the original route or to the gateway route, this routine takes
-        * an extra reference on such a route.  This extra reference will be
-        * released at the end.
-        *
-        * Care must be taken to ensure that the "hint0" route never gets freed
-        * via rtfree(), since the caller may have stored it inside a struct
-        * route with a reference held for that placeholder.
-        */
-       if (rt != NULL) {
-               unsigned int ifindex;
-
-               RT_LOCK_SPIN(rt);
-               ifindex = rt->rt_ifp->if_index;
-               RT_ADDREF_LOCKED(rt);
-               if (!(rt->rt_flags & RTF_UP)) {
-                       RT_REMREF_LOCKED(rt);
-                       RT_UNLOCK(rt);
-                       /* route is down, find a new one */
-                       hint = rt = rtalloc1_scoped((struct sockaddr *)
-                           (size_t)net_dest, 1, 0, ifindex);
-                       if (hint != NULL) {
-                               RT_LOCK_SPIN(rt);
-                               ifindex = rt->rt_ifp->if_index;
-                       } else {
-                               senderr(EHOSTUNREACH);
-                       }
-               }
-
-               /*
-                * We have a reference to "rt" by now; it will either
-                * be released or freed at the end of this routine.
-                */
-               RT_LOCK_ASSERT_HELD(rt);
-               if (rt->rt_flags & RTF_GATEWAY) {
-                       struct rtentry *gwrt = rt->rt_gwroute;
-                       struct sockaddr_in gw;
-
-                       /* If there's no gateway rt, look it up */
-                       if (gwrt == NULL) {
-                               gw = *((struct sockaddr_in *)rt->rt_gateway);
-                               RT_UNLOCK(rt);
-                               goto lookup;
-                       }
-                       /* Become a regular mutex */
-                       RT_CONVERT_LOCK(rt);
-
-                       /*
-                        * Take gwrt's lock while holding route's lock;
-                        * this is okay since gwrt never points back
-                        * to "rt", so no lock ordering issues.
-                        */
-                       RT_LOCK_SPIN(gwrt);
-                       if (!(gwrt->rt_flags & RTF_UP)) {
-                               struct rtentry *ogwrt;
-
-                               rt->rt_gwroute = NULL;
-                               RT_UNLOCK(gwrt);
-                               gw = *((struct sockaddr_in *)rt->rt_gateway);
-                               RT_UNLOCK(rt);
-                               rtfree(gwrt);
-lookup:
-                               gwrt = rtalloc1_scoped(
-                                   (struct sockaddr *)&gw, 1, 0, ifindex);
-
-                               RT_LOCK(rt);
-                               /*
-                                * Bail out if the route is down, no route
-                                * to gateway, circular route, or if the
-                                * gateway portion of "rt" has changed.
-                                */
-                               if (!(rt->rt_flags & RTF_UP) ||
-                                   gwrt == NULL || gwrt == rt ||
-                                   !equal(SA(&gw), rt->rt_gateway)) {
-                                       if (gwrt == rt) {
-                                               RT_REMREF_LOCKED(gwrt);
-                                               gwrt = NULL;
-                                       }
-                                       RT_UNLOCK(rt);
-                                       if (gwrt != NULL)
-                                               rtfree(gwrt);
-                                       senderr(EHOSTUNREACH);
-                               }
-
-                               /* Remove any existing gwrt */
-                               ogwrt = rt->rt_gwroute;
-                               if ((rt->rt_gwroute = gwrt) != NULL)
-                                       RT_ADDREF(gwrt);
-
-                               /* Clean up "rt" now while we can */
-                               if (rt == hint0) {
-                                       RT_REMREF_LOCKED(rt);
-                                       RT_UNLOCK(rt);
-                               } else {
-                                       RT_UNLOCK(rt);
-                                       rtfree(rt);
-                               }
-                               rt = gwrt;
-                               /* Now free the replaced gwrt */
-                               if (ogwrt != NULL)
-                                       rtfree(ogwrt);
-                               /* If still no route to gateway, bail out */
-                               if (rt == NULL)
-                                       senderr(EHOSTUNREACH);
-                       } else {
-                               RT_ADDREF_LOCKED(gwrt);
-                               RT_UNLOCK(gwrt);
-                               /* Clean up "rt" now while we can */
-                               if (rt == hint0) {
-                                       RT_REMREF_LOCKED(rt);
-                                       RT_UNLOCK(rt);
-                               } else {
-                                       RT_UNLOCK(rt);
-                                       rtfree(rt);
-                               }
-                               rt = gwrt;
-                       }
-
-                       /* rt == gwrt; if it is now down, give up */
-                       RT_LOCK_SPIN(rt);
-                       if (!(rt->rt_flags & RTF_UP)) {
-                               RT_UNLOCK(rt);
-                               senderr(EHOSTUNREACH);
-                       }
-               }
-
-               if (rt->rt_flags & RTF_REJECT) {
-                       VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
-                       VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
-                       timenow = net_uptime();
-                       if (rt->rt_expire == 0 ||
-                           timenow < rt->rt_expire) {
-                               RT_UNLOCK(rt);
-                               senderr(rt == hint ? EHOSTDOWN : EHOSTUNREACH);
-                       }
-               }
-
-               /* Become a regular mutex */
-               RT_CONVERT_LOCK(rt);
-
-               /* Caller is responsible for cleaning up "rt" */
-               *out_route = rt;
-       }
-       return (0);
-
-bad:
-       /* Clean up route (either it is "rt" or "gwrt") */
-       if (rt != NULL) {
-               RT_LOCK_SPIN(rt);
-               if (rt == hint0) {
-                       RT_REMREF_LOCKED(rt);
-                       RT_UNLOCK(rt);
-               } else {
-                       RT_UNLOCK(rt);
-                       rtfree(rt);
-               }
-       }
-       return (error);
-}
-#undef senderr
-
 /*
  * This is the ARP pre-output routine; care must be taken to ensure that
  * the "hint" route never gets freed via rtfree(), since the caller may
@@ -1077,7 +917,7 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
                 * Callee holds a reference on the route and returns
                 * with the route entry locked, upon success.
                 */
-               result = arp_route_to_gateway_route((const struct sockaddr*)
+               result = route_to_gwroute((const struct sockaddr *)
                    net_dest, hint, &route);
                if (result != 0)
                        return (result);
@@ -1194,6 +1034,7 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
                        if (llinfo->la_asked++ < arp_maxtries) {
                                struct ifaddr *rt_ifa = route->rt_ifa;
                                struct sockaddr *sa;
+                               u_int32_t rtflags;
 
                                /* Become a regular mutex, just in case */
                                RT_CONVERT_LOCK(route);
@@ -1208,9 +1049,11 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
                                sa = rt_ifa->ifa_addr;
                                IFA_UNLOCK(rt_ifa);
                                arp_llreach_use(llinfo); /* Mark use timestamp */
+                               rtflags = route->rt_flags;
                                RT_UNLOCK(route);
                                dlil_send_arp(ifp, ARPOP_REQUEST, NULL,
-                                   sa, NULL, (const struct sockaddr*)net_dest);
+                                   sa, NULL, (const struct sockaddr*)net_dest,
+                                   rtflags);
                                IFA_REMREF(rt_ifa);
                                RT_LOCK(route);
                                result = EJUSTRETURN;
@@ -1385,7 +1228,7 @@ match:
                u_char  storage[sizeof(struct kev_in_collision) + MAX_HW_LEN];
                bzero(&ev_msg, sizeof(struct kev_msg));
                bzero(storage, (sizeof(struct kev_in_collision) + MAX_HW_LEN));
-               in_collision = (struct kev_in_collision*)storage;
+               in_collision = (struct kev_in_collision*)(void *)storage;
                log(LOG_ERR, "%s%d duplicate IP address %s sent from address %s\n",
                        ifp->if_name, ifp->if_unit,
                        inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, sizeof(ipv4str)),
@@ -1646,7 +1489,7 @@ match:
        /* Update the expire time for the route and clear the reject flag */
        if (route->rt_expire) {
                uint64_t timenow;
-                                            
+
                timenow = net_uptime();
                rt_setexpire(route,
                    rt_expiry(route, timenow, arpt_keep));
@@ -1666,7 +1509,7 @@ match:
                llinfo->la_hold = NULL;
 
                RT_UNLOCK(route);
-               dlil_output(ifp, PF_INET, m0, (caddr_t)route, rt_key(route), 0);
+               dlil_output(ifp, PF_INET, m0, (caddr_t)route, rt_key(route), 0, NULL);
                RT_REMREF(route);
                route = NULL;
        }
@@ -1748,7 +1591,7 @@ respond:
 
        dlil_send_arp(ifp, ARPOP_REPLY,
            target_hw, (const struct sockaddr*)target_ip,
-           sender_hw, (const struct sockaddr*)sender_ip);
+           sender_hw, (const struct sockaddr*)sender_ip, 0);
 
 done:
        if (best_ia != NULL)
@@ -1766,5 +1609,5 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
        ifa->ifa_flags |= RTF_CLONING;
        sa = ifa->ifa_addr;
        IFA_UNLOCK(ifa);
-       dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa);
+       dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa, 0);
 }
index 99a1065729b82000284291a04813cabe62a51638..56573aad96fafe7027e46486b22889e2e3174370 100644 (file)
@@ -76,8 +76,6 @@ extern void arp_llreach_set_reachable(struct ifnet *, void *, unsigned int);
 extern errno_t arp_lookup_ip(ifnet_t interface,
     const struct sockaddr_in *ip_dest, struct sockaddr_dl *ll_dest,
     size_t ll_dest_len, route_t hint, mbuf_t packet);
-__private_extern__ errno_t arp_route_to_gateway_route(const struct sockaddr *,
-    route_t, route_t *);
 #endif /* KERNEL_PRIVATE */
 
 /*!
index f32cef303be01d9d7ddbbc70286fcad2d069ae4b..594e275779eb4caec20cc26d7fef76eb2ae7cacf 100644 (file)
@@ -177,7 +177,7 @@ inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip,
                for (; skip && m; m = m->m_next) {
                        if (m->m_len > skip) {
                                mlen = m->m_len - skip;
-                               w = (u_short *)(m->m_data+skip);
+                               w = (u_short *)(void *)(m->m_data+skip);
                                goto skip_start;
                        } else {
                                skip -= m->m_len;
@@ -200,7 +200,7 @@ inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip,
                         */
                        s_util.c[1] = *(char *)w;
                        sum += s_util.s;
-                       w = (u_short *)((char *)w + 1);
+                       w = (u_short *)(void *)((char *)w + 1);
                        mlen = m->m_len - 1;
                        len--;
                } else {
@@ -218,7 +218,7 @@ skip_start:
                        REDUCE;
                        sum <<= 8;
                        s_util.c[0] = *(u_char *)w;
-                       w = (u_short *)((char *)w + 1);
+                       w = (u_short *)(void *)((char *)w + 1);
                        mlen--;
                        byte_swapped = 1;
                }
index 90fc06ae58cadecd4602bee84b2ed2ddb644ddce..b0bad24cbe8bcbcf353672f18d6a1eab3bb3d8f9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1988-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 1988-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -105,16 +105,16 @@ inet_aifaddr(struct socket * so, const char * name,
     bzero(&ifra, sizeof(ifra));
     strlcpy(ifra.ifra_name, name, sizeof(ifra.ifra_name));
     if (addr) {
-       *((struct sockaddr_in *)&ifra.ifra_addr) = blank_sin;
-       ((struct sockaddr_in *)&ifra.ifra_addr)->sin_addr = *addr;
+       *((struct sockaddr_in *)(void *)&ifra.ifra_addr) = blank_sin;
+       ((struct sockaddr_in *)(void *)&ifra.ifra_addr)->sin_addr = *addr;
     }
     if (mask) {
-       *((struct sockaddr_in *)&ifra.ifra_mask) = blank_sin;
-       ((struct sockaddr_in *)&ifra.ifra_mask)->sin_addr = *mask;
+       *((struct sockaddr_in *)(void *)&ifra.ifra_mask) = blank_sin;
+       ((struct sockaddr_in *)(void *)&ifra.ifra_mask)->sin_addr = *mask;
     }
     if (broadcast) {
-       *((struct sockaddr_in *)&ifra.ifra_broadaddr) = blank_sin;
-       ((struct sockaddr_in *)&ifra.ifra_broadaddr)->sin_addr = *broadcast;
+       *((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr) = blank_sin;
+       ((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr)->sin_addr = *broadcast;
     }
     return (ifioctl(so, SIOCAIFADDR, (caddr_t)&ifra, current_proc()));
 }
@@ -140,13 +140,13 @@ struct dhcp_context {
 static __inline__ struct dhcp_packet *
 dhcp_context_request(struct dhcp_context * context)
 {
-    return ((struct dhcp_packet *)context->request);
+    return ((struct dhcp_packet *)(void *)context->request);
 }
 
 static __inline__ struct dhcp *
 dhcp_context_reply(struct dhcp_context * context)
 {
-    return ((struct dhcp *)context->reply);
+    return ((struct dhcp *)(void *)context->reply);
 }
 
 struct mbuf * ip_pkt_to_mbuf(caddr_t pkt, int pktsize);
@@ -291,7 +291,7 @@ link_print(struct sockaddr_dl * dl_p)
 static struct sockaddr_dl *
 link_from_ifnet(struct ifnet * ifp)
 {
-    return ((struct sockaddr_dl *)ifp->if_lladdr->ifa_addr);
+    return ((struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr);
 }
 
 /*
@@ -309,7 +309,7 @@ send_packet(struct ifnet * ifp, struct dhcp_packet * pkt, int pkt_size)
     dest.sin_port = htons(IPPORT_BOOTPS);
     dest.sin_addr.s_addr = INADDR_BROADCAST;
     m = ip_pkt_to_mbuf((caddr_t)pkt, pkt_size);
-    return dlil_output(ifp, PF_INET, m, 0, (struct sockaddr *)&dest, 0);
+    return dlil_output(ifp, PF_INET, m, 0, (struct sockaddr *)&dest, 0, NULL);
 }
 
 /*
index 9a6cb3db6ca5affbc62c45c1cd528b889292ce2b..c65eceb6818eff16ac854fd76522f79dd8b87640 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -105,13 +105,13 @@ in_gif_output(
        __unused struct rtentry *rt)
 {
        struct gif_softc *sc = ifnet_softc(ifp);
-       struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
-       struct sockaddr_in *sin_src = (struct sockaddr_in *)sc->gif_psrc;
-       struct sockaddr_in *sin_dst = (struct sockaddr_in *)sc->gif_pdst;
+       struct sockaddr_in *dst = (struct sockaddr_in *)(void *)&sc->gif_ro.ro_dst;
+       struct sockaddr_in *sin_src = (struct sockaddr_in *)(void *)sc->gif_psrc;
+       struct sockaddr_in *sin_dst = (struct sockaddr_in *)(void *)sc->gif_pdst;
        struct ip iphdr;        /* capsule IP header, host byte ordered */
        int proto, error;
        u_int8_t tos;
-       struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF };
 
        if (sin_src == NULL || sin_dst == NULL ||
            sin_src->sin_family != AF_INET ||
@@ -343,8 +343,8 @@ gif_encapcheck4(
 
        /* sanity check done in caller */
        sc = (struct gif_softc *)arg;
-       src = (struct sockaddr_in *)sc->gif_psrc;
-       dst = (struct sockaddr_in *)sc->gif_pdst;
+       src = (struct sockaddr_in *)(void *)sc->gif_psrc;
+       dst = (struct sockaddr_in *)(void *)sc->gif_pdst;
 
        mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip), &ip);
 
index 1854fd26e734d4061cca24505ddefa5db3b67b6b..deded6a557428c3736b50902f3976acf8fffb659 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -347,7 +347,7 @@ imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
 
        IMO_LOCK_ASSERT_HELD(IMO_CAST_TO_NONCONST(imo));
 
-       gsin = (const struct sockaddr_in *)group;
+       gsin = (struct sockaddr_in *)(uintptr_t)(size_t)group;
 
        /* The imo_membership array may be lazy allocated. */
        if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
@@ -399,7 +399,7 @@ imo_match_source(const struct ip_moptions *imo, const size_t gidx,
        imf = &imo->imo_mfilters[gidx];
 
        /* Source trees are keyed in host byte order. */
-       psa = (const sockunion_t *)src;
+       psa = (sockunion_t *)(uintptr_t)(size_t)src;
        find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
        ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
 
@@ -448,9 +448,21 @@ imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
 }
 
 int
-imo_clone(struct ip_moptions *from, struct ip_moptions *to)
+imo_clone(struct inpcb *from_inp, struct inpcb *to_inp)
 {
        int i, err = 0;
+       struct ip_moptions *from;
+       struct ip_moptions *to;
+
+       from = inp_findmoptions(from_inp);
+       if (from == NULL)
+               return (ENOMEM); 
+
+       to = inp_findmoptions(to_inp);
+       if (to == NULL) {
+               IMO_REMREF(from);
+               return (ENOMEM);
+       }
 
        IMO_LOCK(from);
        IMO_LOCK(to);
@@ -497,16 +509,21 @@ imo_clone(struct ip_moptions *from, struct ip_moptions *to)
         * Source filtering doesn't apply to OpenTransport socket,
         * so simply hold additional reference count per membership.
         */
-        for (i = 0; i < from->imo_num_memberships; i++) {
-               to->imo_membership[i] = from->imo_membership[i];
-               INM_ADDREF(from->imo_membership[i]);
+       for (i = 0; i < from->imo_num_memberships; i++) {
+               to->imo_membership[i] = 
+                       in_addmulti(&from->imo_membership[i]->inm_addr,
+                                               from->imo_membership[i]->inm_ifp);
+               if (to->imo_membership[i] == NULL)
+                       break;
                to->imo_num_memberships++;
         }
        VERIFY(to->imo_num_memberships == from->imo_num_memberships);
 
 done:
        IMO_UNLOCK(to);
+       IMO_REMREF(to);
        IMO_UNLOCK(from);
+       IMO_REMREF(from);
 
        return (err);
 }
@@ -1710,7 +1727,11 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 
        if (ifp == NULL)
                return (EADDRNOTAVAIL);
-               
+
+       if ((size_t) msfr.msfr_nsrcs >
+           SIZE_MAX / sizeof(struct sockaddr_storage))
+               msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage);
+
        if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
                msfr.msfr_nsrcs = in_mcast_maxsocksrc;
 
@@ -1750,12 +1771,13 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 
        tss = NULL;
        if (tmp_ptr != USER_ADDR_NULL && msfr.msfr_nsrcs > 0) {
-               tss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+               tss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*tss),
                    M_TEMP, M_WAITOK | M_ZERO);
                if (tss == NULL) {
                        IMO_UNLOCK(imo);
                        return (ENOBUFS);
                }
+               bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss));
        }
 
        /*
@@ -1785,8 +1807,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
        IMO_UNLOCK(imo);
 
        if (tss != NULL) {
-               error = copyout(tss, tmp_ptr,
-                   sizeof(struct sockaddr_storage) * ncsrcs);
+               error = copyout(tss, tmp_ptr, ncsrcs * sizeof(*tss));
                FREE(tss, M_TEMP);
                if (error)
                        return (error);
@@ -1980,7 +2001,7 @@ inp_lookup_mcast_ifp(const struct inpcb *inp,
                unsigned int ifscope = IFSCOPE_NONE;
 
                if (inp != NULL && (inp->inp_flags & INP_BOUND_IF))
-                       ifscope = inp->inp_boundif;
+                       ifscope = inp->inp_boundifp->if_index;
 
                bzero(&ro, sizeof (ro));
                memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
@@ -2673,6 +2694,10 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
                memcpy(&msfr, &msfr32, sizeof(msfr));
        }
 
+       if ((size_t) msfr.msfr_nsrcs >
+           SIZE_MAX / sizeof(struct sockaddr_storage))
+               msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage);
+
        if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
                return (ENOBUFS);
 
@@ -2742,14 +2767,14 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
 
                IGMP_PRINTF(("%s: loading %lu source list entries\n",
                    __func__, (unsigned long)msfr.msfr_nsrcs));
-               kss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+               kss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*kss),
                    M_TEMP, M_WAITOK);
                if (kss == NULL) {
                        error = ENOMEM;
                        goto out_imo_locked;
                }
                error = copyin(tmp_ptr, kss,
-                   sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+                   (size_t) msfr.msfr_nsrcs * sizeof(*kss));
                if (error) {
                        FREE(kss, M_TEMP);
                        goto out_imo_locked;
index 51eeca0b323477caa2074b9ef4d93d6d3c2eaf69..043057a59529fb39f9db0aef3860a9db048c2434 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,6 +79,7 @@
 #include <sys/kauth.h>
 #include <sys/priv.h>
 #include <libkern/OSAtomic.h>
+#include <kern/locks.h>
 
 #include <machine/limits.h>
 
@@ -89,6 +90,8 @@
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
+#include <net/flowhash.h>
+#include <net/flowadv.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
-#include "faith.h"
-
 #if IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 
 #include <sys/kdebug.h>
 #include <sys/random.h>
+#include <dev/random/randomdev.h>
 
 #if IPSEC
 extern int ipsec_bypass;
@@ -175,14 +177,79 @@ SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | C
 extern int     udp_use_randomport;
 extern int     tcp_use_randomport;
 
+/* Structs used for flowhash computation */
+struct inp_flowhash_key_addr {
+       union {
+               struct in_addr  v4;
+               struct in6_addr v6;
+               u_int8_t        addr8[16];
+               u_int16_t       addr16[8];
+               u_int32_t       addr32[4];
+       } infha;
+};
+
+struct inp_flowhash_key {
+       struct inp_flowhash_key_addr    infh_laddr;
+       struct inp_flowhash_key_addr    infh_faddr;
+       u_int32_t                       infh_lport;
+       u_int32_t                       infh_fport;
+       u_int32_t                       infh_af;
+       u_int32_t                       infh_proto;
+       u_int32_t                       infh_rand1;
+       u_int32_t                       infh_rand2;
+};
+
+u_int32_t inp_hash_seed = 0;
+
+static __inline int infc_cmp(const struct inp_fc_entry *,
+    const struct inp_fc_entry *);
+lck_grp_t *inp_lck_grp;
+lck_grp_attr_t *inp_lck_grp_attr;
+lck_attr_t *inp_lck_attr;
+decl_lck_mtx_data(, inp_fc_lck);
+
+RB_HEAD(inp_fc_tree, inp_fc_entry) inp_fc_tree;
+RB_PROTOTYPE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp);
+
+RB_GENERATE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp);
+
+static unsigned int inp_fcezone_size;
+static struct zone *inp_fcezone;
+#define INP_FCEZONE_NAME "inp_fcezone"
+#define INP_FCEZONE_MAX 32
+
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
- *
- * NOTE: It is assumed that most of these functions will be called at
- * splnet(). XXX - There are, unfortunately, a few exceptions to this
- * rule that should be fixed.
  */
 
+/*
+ * Initialize data structures required to deliver
+ * flow advisories.
+ */
+void
+socket_flowadv_init(void)
+{
+       inp_lck_grp_attr = lck_grp_attr_alloc_init();
+       inp_lck_grp = lck_grp_alloc_init("inp_lck_grp", inp_lck_grp_attr);
+
+       inp_lck_attr = lck_attr_alloc_init();
+       lck_mtx_init(&inp_fc_lck, inp_lck_grp, inp_lck_attr);
+
+       RB_INIT(&inp_fc_tree);
+
+       inp_fcezone_size = P2ROUNDUP(sizeof (struct inp_fc_entry),
+           sizeof (u_int64_t));
+       inp_fcezone = zinit(inp_fcezone_size,
+           INP_FCEZONE_MAX * inp_fcezone_size, 0, INP_FCEZONE_NAME);
+       if (inp_fcezone == NULL) {
+               panic("%s: failed allocating %s", __func__,
+                   INP_FCEZONE_NAME);
+               /* NOTREACHED */
+       }
+       zone_change(inp_fcezone, Z_EXPAND, TRUE);
+       zone_change(inp_fcezone, Z_CALLERACCT, FALSE);
+}
+
 /*
  * Allocate a PCB and associate it with the socket.
  *
@@ -218,7 +285,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *
 #if TEMPDEBUG
            printf("PCBALLOC reusing PCB for socket %x\n", so);
 #endif
-           inp = (struct inpcb *) so->so_saved_pcb;
+           inp = (struct inpcb *)(void *)so->so_saved_pcb;
            temp = inp->inp_saved_ppcb;
            bzero((caddr_t) inp, sizeof(*inp));
            inp->inp_saved_ppcb = temp;
@@ -375,9 +442,9 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
        socket_unlock(so, 0); /* keep reference on socket */
        lck_rw_lock_exclusive(pcbinfo->mtx);
        if (nam) {
-               unsigned int outif = 0;
+               struct ifnet *outif = NULL;
 
-               sin = (struct sockaddr_in *)nam;
+               sin = (struct sockaddr_in *)(void *)nam;
                if (nam->sa_len != sizeof (*sin)) {
                        lck_rw_done(pcbinfo->mtx);
                        socket_lock(so, 0);
@@ -415,7 +482,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                        }
                        else {
                                IFA_LOCK(ifa);
-                               outif = ifa->ifa_ifp->if_index;
+                               outif = ifa->ifa_ifp;
                                IFA_UNLOCK(ifa);
                                IFA_REMREF(ifa);
                        }
@@ -436,7 +503,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                }
                        }
 #endif
-                       if (so->so_uid &&
+                       if (kauth_cred_getuid(so->so_cred) &&
                            !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
                                t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo,
                                    sin->sin_addr, lport, INPLOOKUP_WILDCARD);
@@ -445,17 +512,12 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
                                     (t->inp_socket->so_options &
                                         SO_REUSEPORT) == 0) &&
-                                    (so->so_uid != t->inp_socket->so_uid) &&
-                                     ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
-#if INET6
-                                       if (ntohl(sin->sin_addr.s_addr) !=
-                                           INADDR_ANY ||
-                                           ntohl(t->inp_laddr.s_addr) !=
-                                           INADDR_ANY ||
-                                           INP_SOCKAF(so) ==
-                                           INP_SOCKAF(t->inp_socket))
-#endif /* INET6 */
-                                       {
+                                    (kauth_cred_getuid(so->so_cred) !=
+                                        kauth_cred_getuid(t->inp_socket->so_cred)) &&
+                                     ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0) &&
+                                       (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
+                                        ntohl(t->inp_laddr.s_addr) != INADDR_ANY))
+                               {
 #ifdef __APPLE_API_PRIVATE
 
                                                if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0)) 
@@ -471,7 +533,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 
                                                socket_lock(so, 0);
                                                return (EADDRINUSE);
-                                       }
                                }
                        }
                        t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr,
@@ -479,13 +540,12 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                        if (t &&
                            (reuseport & t->inp_socket->so_options) == 0) {
 #if INET6
-                               if (ip6_mapped_addr_on == 0 ||
-                                   ntohl(sin->sin_addr.s_addr) !=
+                               if (ntohl(sin->sin_addr.s_addr) !=
                                    INADDR_ANY ||
                                    ntohl(t->inp_laddr.s_addr) !=
                                    INADDR_ANY ||
-                                   INP_SOCKAF(so) ==
-                                   INP_SOCKAF(t->inp_socket))
+                                   INP_SOCKAF(so) != AF_INET6 ||
+                                   INP_SOCKAF(t->inp_socket) != AF_INET6)
 #endif /* INET6 */
                                {
 #ifdef __APPLE_API_PRIVATE
@@ -506,7 +566,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                        }
                }
                inp->inp_laddr = sin->sin_addr;
-               inp->inp_last_outif = outif;
+               inp->inp_last_outifp = outif;
        }
        if (lport == 0) {
                u_short first, last;
@@ -564,7 +624,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                        lck_rw_done(pcbinfo->mtx);
                                        socket_lock(so, 0);
                                        inp->inp_laddr.s_addr = INADDR_ANY;
-                                       inp->inp_last_outif = 0;
+                                       inp->inp_last_outifp = NULL;
                                        return (EADDRNOTAVAIL);
                                }
                                --*lastport;
@@ -588,7 +648,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                        lck_rw_done(pcbinfo->mtx);
                                        socket_lock(so, 0);
                                        inp->inp_laddr.s_addr = INADDR_ANY;
-                                       inp->inp_last_outif = 0;
+                                       inp->inp_last_outifp = NULL;
                                        return (EADDRNOTAVAIL);
                                }
                                ++*lastport;
@@ -604,7 +664,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
        if (in_pcbinshash(inp, 1) != 0) {
                inp->inp_laddr.s_addr = INADDR_ANY;
                inp->inp_lport = 0;
-               inp->inp_last_outif = 0;
+               inp->inp_last_outifp = NULL;
                lck_rw_done(pcbinfo->mtx);
                return (EAGAIN);
        }
@@ -631,10 +691,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
  */
 int
 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
-    struct sockaddr_in *plocal_sin, unsigned int *out_ifscope)
+    struct sockaddr_in *plocal_sin, struct ifnet **outif)
 {
        struct in_ifaddr *ia;
-       struct sockaddr_in *sin = (struct sockaddr_in *)nam;
+       struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
 
        if (nam->sa_len != sizeof (*sin))
                return (EINVAL);
@@ -653,9 +713,6 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
                 * and the primary interface supports broadcast,
                 * choose the broadcast address for that interface.
                 */
-#define        satosin(sa)     ((struct sockaddr_in *)(sa))
-#define sintosa(sin)   ((struct sockaddr *)(sin))
-#define ifatoia(ifa)   ((struct in_ifaddr *)(ifa))
                IFA_LOCK_SPIN(&ia->ia_ifa);
                if (sin->sin_addr.s_addr == INADDR_ANY)
                        sin->sin_addr = IA_SIN(ia)->sin_addr;
@@ -678,10 +735,10 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
                 */
                ia = (struct in_ifaddr *)0;
 
-               if (out_ifscope != NULL && *out_ifscope != IFSCOPE_NONE)
-                       ifscope = *out_ifscope;
+               if (outif != NULL && *outif != NULL)
+                       ifscope = (*outif)->if_index;
                else if (inp->inp_flags & INP_BOUND_IF)
-                       ifscope = inp->inp_boundif;
+                       ifscope = inp->inp_boundifp->if_index;
 
                nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
                /*
@@ -710,7 +767,7 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
                        bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
                        ro->ro_dst.sa_family = AF_INET;
                        ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
-                       ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+                       ((struct sockaddr_in *)(void *)&ro->ro_dst)->sin_addr =
                                sin->sin_addr;
                        rtalloc_scoped(ro, ifscope);
                        if (ro->ro_rt != NULL)
@@ -727,6 +784,9 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
                                RT_UNLOCK(ro->ro_rt);
                                rtfree(ro->ro_rt);
                                ro->ro_rt = NULL;
+                               soevent(inp->inp_socket,
+                                   (SO_FILT_HINT_LOCKED |
+                                   SO_FILT_HINT_IFDENIED));
                        }
                }
                /*
@@ -772,6 +832,9 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
                            ia->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) {
                                IFA_REMREF(&ia->ia_ifa);
                                ia = NULL;
+                           soevent(inp->inp_socket,
+                               (SO_FILT_HINT_LOCKED |
+                               SO_FILT_HINT_IFDENIED));
                        }
                        if (ia == 0)
                                return (EADDRNOTAVAIL);
@@ -814,8 +877,8 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
                 */
                IFA_LOCK_SPIN(&ia->ia_ifa);
                *plocal_sin = ia->ia_addr;
-               if (out_ifscope != NULL)
-                       *out_ifscope = ia->ia_ifp->if_index;
+               if (outif != NULL)
+                       *outif = ia->ia_ifp;
                IFA_UNLOCK(&ia->ia_ifa);
                IFA_REMREF(&ia->ia_ifa);
        }
@@ -830,17 +893,18 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
  * then pick one.
  */
 int
-in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, unsigned int *ifscope)
+in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
+    struct ifnet **outif)
 {
        struct sockaddr_in ifaddr;
-       struct sockaddr_in *sin = (struct sockaddr_in *)nam;
+       struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
        struct inpcb *pcb;
        int error;
 
        /*
         *   Call inner routine, to assign local interface address.
         */
-       if ((error = in_pcbladdr(inp, nam, &ifaddr, ifscope)) != 0)
+       if ((error = in_pcbladdr(inp, nam, &ifaddr, outif)) != 0)
                return(error);
 
        socket_unlock(inp->inp_socket, 0);
@@ -874,7 +938,7 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, unsigned
                        socket_lock(inp->inp_socket, 0);
                }
                inp->inp_laddr = ifaddr.sin_addr;
-               inp->inp_last_outif = ifscope ? *ifscope : IFSCOPE_NONE;
+               inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
                inp->inp_flags |= INP_INADDR_ANY;
        }
         else {
@@ -980,9 +1044,9 @@ in_pcbdispose(struct inpcb *inp)
        lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
 
        inp->inp_gencnt = ++ipi->ipi_gencnt;
-       /*### access ipi in in_pcbremlists */
+       /* access ipi in in_pcbremlists */
        in_pcbremlists(inp);
-       
+
        if (so) {
                if (so->so_proto->pr_flags & PR_PCBLOCK) {
                        sofreelastref(so, 0);
@@ -1300,7 +1364,7 @@ in_pcblookup_hash_exists(
        int wildcard,
        uid_t *uid,
        gid_t *gid,
-       __unused struct ifnet *ifp)
+       struct ifnet *ifp)
 {
        struct inpcbhead *head;
        struct inpcb *inp;
@@ -1309,7 +1373,7 @@ in_pcblookup_hash_exists(
 
        *uid = UID_MAX;
        *gid = GID_MAX;
-       
+
        /*
         * We may have found the pcb in the last lookup - check this first.
         */
@@ -1326,6 +1390,11 @@ in_pcblookup_hash_exists(
                if ((inp->inp_vflag & INP_IPV4) == 0)
                        continue;
 #endif
+               if (ip_restrictrecvif && ifp != NULL &&
+                   (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                   !(inp->inp_flags & INP_RECV_ANYIF))
+                       continue;
+
                if (inp->inp_faddr.s_addr == faddr.s_addr &&
                    inp->inp_laddr.s_addr == laddr.s_addr &&
                    inp->inp_fport == fport &&
@@ -1334,8 +1403,10 @@ in_pcblookup_hash_exists(
                                /*
                                 * Found.
                                 */
-                               *uid = inp->inp_socket->so_uid;
-                               *gid = inp->inp_socket->so_gid;
+                               *uid = kauth_cred_getuid(
+                                   inp->inp_socket->so_cred);
+                               *gid = kauth_cred_getgid(
+                                   inp->inp_socket->so_cred);
                        }
                        lck_rw_done(pcbinfo->mtx);
                        return (found);
@@ -1354,17 +1425,19 @@ in_pcblookup_hash_exists(
                        if ((inp->inp_vflag & INP_IPV4) == 0)
                                continue;
 #endif
+                       if (ip_restrictrecvif && ifp != NULL &&
+                           (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                           !(inp->inp_flags & INP_RECV_ANYIF))
+                               continue;
+
                        if (inp->inp_faddr.s_addr == INADDR_ANY &&
                            inp->inp_lport == lport) {
-#if defined(NFAITH) && NFAITH > 0
-                               if (ifp && ifp->if_type == IFT_FAITH &&
-                                   (inp->inp_flags & INP_FAITH) == 0)
-                                       continue;
-#endif
                                if (inp->inp_laddr.s_addr == laddr.s_addr) {
                                        if ((found = (inp->inp_socket != NULL))) {
-                                               *uid = inp->inp_socket->so_uid;
-                                               *gid = inp->inp_socket->so_gid;
+                                               *uid = kauth_cred_getuid(
+                                                   inp->inp_socket->so_cred);
+                                               *gid = kauth_cred_getgid(
+                                                   inp->inp_socket->so_cred);
                                        }
                                        lck_rw_done(pcbinfo->mtx);
                                        return (found);
@@ -1385,8 +1458,10 @@ in_pcblookup_hash_exists(
 #if INET6
                        if (local_wild_mapped != NULL) {
                                if ((found = (local_wild_mapped->inp_socket != NULL))) {
-                                       *uid = local_wild_mapped->inp_socket->so_uid;
-                                       *gid = local_wild_mapped->inp_socket->so_gid;
+                                       *uid = kauth_cred_getuid(
+                                           local_wild_mapped->inp_socket->so_cred);
+                                       *gid = kauth_cred_getgid(
+                                           local_wild_mapped->inp_socket->so_cred);
                                }
                                lck_rw_done(pcbinfo->mtx);
                                return (found);
@@ -1397,8 +1472,10 @@ in_pcblookup_hash_exists(
                }
                if (local_wild != NULL) {
                        if ((found = (local_wild->inp_socket != NULL))) {
-                               *uid = local_wild->inp_socket->so_uid;
-                               *gid = local_wild->inp_socket->so_gid;
+                               *uid = kauth_cred_getuid(
+                                   local_wild->inp_socket->so_cred);
+                               *gid = kauth_cred_getgid(
+                                   local_wild->inp_socket->so_cred);
                        }
                        lck_rw_done(pcbinfo->mtx);
                        return (found);
@@ -1423,7 +1500,7 @@ in_pcblookup_hash(
        struct in_addr laddr,
        u_int lport_arg,
        int wildcard,
-       __unused struct ifnet *ifp)
+       struct ifnet *ifp)
 {
        struct inpcbhead *head;
        struct inpcb *inp;
@@ -1444,6 +1521,11 @@ in_pcblookup_hash(
                if ((inp->inp_vflag & INP_IPV4) == 0)
                        continue;
 #endif
+               if (ip_restrictrecvif && ifp != NULL &&
+                   (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                   !(inp->inp_flags & INP_RECV_ANYIF))
+                       continue;
+
                if (inp->inp_faddr.s_addr == faddr.s_addr &&
                    inp->inp_laddr.s_addr == laddr.s_addr &&
                    inp->inp_fport == fport &&
@@ -1456,8 +1538,8 @@ in_pcblookup_hash(
                                return (inp);
                        }
                        else {  /* it's there but dead, say it isn't found */
-                               lck_rw_done(pcbinfo->mtx);      
-                               return(NULL);
+                               lck_rw_done(pcbinfo->mtx);
+                               return (NULL);
                        }
                }
        }
@@ -1473,21 +1555,21 @@ in_pcblookup_hash(
                        if ((inp->inp_vflag & INP_IPV4) == 0)
                                continue;
 #endif
+                       if (ip_restrictrecvif && ifp != NULL &&
+                           (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                           !(inp->inp_flags & INP_RECV_ANYIF))
+                               continue;
+
                        if (inp->inp_faddr.s_addr == INADDR_ANY &&
                            inp->inp_lport == lport) {
-#if defined(NFAITH) && NFAITH > 0
-                               if (ifp && ifp->if_type == IFT_FAITH &&
-                                   (inp->inp_flags & INP_FAITH) == 0)
-                                       continue;
-#endif
                                if (inp->inp_laddr.s_addr == laddr.s_addr) {
                                        if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
                                                lck_rw_done(pcbinfo->mtx);
                                                return (inp);
                                        }
                                        else {  /* it's there but dead, say it isn't found */
-                                               lck_rw_done(pcbinfo->mtx);      
-                                               return(NULL);
+                                               lck_rw_done(pcbinfo->mtx);
+                                               return (NULL);
                                        }
                                }
                                else if (inp->inp_laddr.s_addr == INADDR_ANY) {
@@ -1509,8 +1591,8 @@ in_pcblookup_hash(
                                        return (local_wild_mapped);
                                }
                                else {  /* it's there but dead, say it isn't found */
-                                       lck_rw_done(pcbinfo->mtx);      
-                                       return(NULL);
+                                       lck_rw_done(pcbinfo->mtx);
+                                       return (NULL);
                                }
                        }
 #endif /* INET6 */
@@ -1522,8 +1604,8 @@ in_pcblookup_hash(
                        return (local_wild);
                }
                else {  /* it's there but dead, say it isn't found */
-                       lck_rw_done(pcbinfo->mtx);      
-                       return(NULL);
+                       lck_rw_done(pcbinfo->mtx);
+                       return (NULL);
                }
        }
 
@@ -1581,6 +1663,9 @@ in_pcbinshash(struct inpcb *inp, int locked)
                if (phd->phd_port == inp->inp_lport)
                        break;
        }
+
+       VERIFY(inp->inp_state != INPCB_STATE_DEAD);
+
        /*
         * If none exists, malloc one and tack it on.
         */
@@ -1631,11 +1716,12 @@ in_pcbrehash(struct inpcb *inp)
 
 /*
  * Remove PCB from various lists.
+ * Must be called pcbinfo lock is held in exclusive mode.
  */
-//###LOCK must be called with list lock held
 void
 in_pcbremlists(struct inpcb *inp)
 {
+       struct inp_fc_entry *infce;
        inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
 
        if (inp->inp_lport) {
@@ -1649,6 +1735,11 @@ in_pcbremlists(struct inpcb *inp)
                }
        }
        LIST_REMOVE(inp, inp_list);
+
+       infce = inp_fc_getinp(inp->inp_flowhash);
+       if (infce != NULL)
+               inp_fc_entry_free(infce);
+
        inp->inp_pcbinfo->ipi_count--;
 }
 
@@ -1763,7 +1854,7 @@ inpcb_to_compat(
        bzero(inp_compat, sizeof(*inp_compat));
        inp_compat->inp_fport = inp->inp_fport;
        inp_compat->inp_lport = inp->inp_lport;
-       inp_compat->nat_owner = inp->nat_owner;
+       inp_compat->nat_owner = 0;
        inp_compat->nat_cookie = inp->nat_cookie;
        inp_compat->inp_gencnt = inp->inp_gencnt;
        inp_compat->inp_flags = inp->inp_flags;
@@ -1848,7 +1939,7 @@ inp_route_copyout(struct inpcb *inp, struct route *dst)
                rtfree(src->ro_rt);
                src->ro_rt = NULL;
        }
-       
+
        route_copyout(dst, src, sizeof(*dst));
 }
 
@@ -1869,9 +1960,21 @@ inp_route_copyin(struct inpcb *inp, struct route *src)
 /*
  * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
  */
-void
+int
 inp_bindif(struct inpcb *inp, unsigned int ifscope)
 {
+       struct ifnet *ifp = NULL;
+
+       ifnet_head_lock_shared();
+       if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
+           (ifp = ifindex2ifnet[ifscope]) == NULL)) {
+               ifnet_head_done();
+               return (ENXIO);
+       }
+       ifnet_head_done();
+
+       VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
+
        /*
         * A zero interface scope value indicates an "unbind".
         * Otherwise, take in whatever value the app desires;
@@ -1881,8 +1984,8 @@ inp_bindif(struct inpcb *inp, unsigned int ifscope)
         * route lookup from this point on will require an
         * exact match for the embedded interface scope.
         */
-       inp->inp_boundif = ifscope;
-       if (inp->inp_boundif == IFSCOPE_NONE)
+       inp->inp_boundifp = ifp;
+       if (inp->inp_boundifp == NULL)
                inp->inp_flags &= ~INP_BOUND_IF;
        else
                inp->inp_flags |= INP_BOUND_IF;
@@ -1892,6 +1995,8 @@ inp_bindif(struct inpcb *inp, unsigned int ifscope)
                rtfree(inp->inp_route.ro_rt);
                inp->inp_route.ro_rt = NULL;
        }
+
+       return (0);
 }
 
 /*
@@ -1915,3 +2020,262 @@ inp_nocellular(struct inpcb *inp, unsigned int val)
 
        return (0);
 }
+
+/*
+ * Calculate flow hash for an inp, used by an interface to identify a
+ * flow. When an interface provides flow control advisory, this flow
+ * hash is used as an identifier.
+ */
+u_int32_t
+inp_calc_flowhash(struct inpcb *inp)
+{
+       struct inp_flowhash_key fh __attribute__((aligned(8)));
+       u_int32_t flowhash = 0;
+
+       if (inp_hash_seed == 0)
+               inp_hash_seed = RandomULong();
+
+       bzero(&fh, sizeof (fh));
+
+       bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr));
+       bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr));
+
+       fh.infh_lport = inp->inp_lport;
+       fh.infh_fport = inp->inp_fport;
+       fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
+       fh.infh_proto = inp->inp_ip_p;
+       fh.infh_rand1 = RandomULong();
+       fh.infh_rand2 = RandomULong();
+
+try_again:
+       flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed);
+       if (flowhash == 0) {
+               /* try to get a non-zero flowhash */
+               inp_hash_seed = RandomULong();
+               goto try_again;
+       }
+
+       return flowhash;
+}
+
+/*
+ * Function to compare inp_fc_entries in inp flow control tree
+ */
+static inline int
+infc_cmp(const struct inp_fc_entry *fc1, const struct inp_fc_entry *fc2)
+{
+       return (fc1->infc_flowhash - fc2->infc_flowhash);
+}
+
+int
+inp_fc_addinp(struct inpcb *inp)
+{
+       struct inp_fc_entry keyfc, *infc;
+       u_int32_t flowhash = inp->inp_flowhash;
+
+       keyfc.infc_flowhash = flowhash;
+
+       lck_mtx_lock_spin(&inp_fc_lck);
+       infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc);
+       if (infc != NULL && infc->infc_inp == inp) {
+               /* Entry is already in inp_fc_tree, return */
+               lck_mtx_unlock(&inp_fc_lck);
+               return (1);
+       }
+
+       if (infc != NULL) {
+               /*
+                * There is a different fc entry with the same
+                * flow hash but different inp pointer. There
+                * can be a collision on flow hash but the
+                * probability is low. Let's just avoid
+                * adding a second one when there is a collision
+                */
+               lck_mtx_unlock(&inp_fc_lck);
+               return (0);
+       }
+
+       /* become regular mutex */
+       lck_mtx_convert_spin(&inp_fc_lck);
+
+       infc = zalloc_noblock(inp_fcezone);
+       if (infc == NULL) {
+               /* memory allocation failed */
+               lck_mtx_unlock(&inp_fc_lck);
+               return (0);
+       }
+       bzero(infc, sizeof (*infc));
+
+       infc->infc_flowhash = flowhash;
+       infc->infc_inp = inp;
+
+       RB_INSERT(inp_fc_tree, &inp_fc_tree, infc);
+       lck_mtx_unlock(&inp_fc_lck);
+       return (1);
+}
+
+struct inp_fc_entry*
+inp_fc_getinp(u_int32_t flowhash)
+{
+       struct inp_fc_entry keyfc, *infc;
+
+       keyfc.infc_flowhash = flowhash;
+
+       lck_mtx_lock_spin(&inp_fc_lck);
+       infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc);
+       if (infc == NULL) {
+               /* inp is not present, return */
+               lck_mtx_unlock(&inp_fc_lck);
+               return (NULL);
+       }
+
+       RB_REMOVE(inp_fc_tree, &inp_fc_tree, infc);
+
+       if (in_pcb_checkstate(infc->infc_inp, WNT_ACQUIRE, 0) ==
+           WNT_STOPUSING) {
+               /* become regular mutex */
+               lck_mtx_convert_spin(&inp_fc_lck);
+
+               /*
+                * This inp is going away, just don't process it.
+                */
+               inp_fc_entry_free(infc);
+               infc = NULL;
+       }
+       lck_mtx_unlock(&inp_fc_lck);
+
+       return (infc);
+}
+
+void
+inp_fc_entry_free(struct inp_fc_entry *infc)
+{
+       zfree(inp_fcezone, infc);
+}
+
+void
+inp_fc_feedback(struct inpcb *inp)
+{
+       struct socket *so = inp->inp_socket;
+
+       /* we already hold a want_cnt on this inp, socket can't be null */
+       VERIFY (so != NULL);
+       socket_lock(so, 1);
+
+       if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
+               socket_unlock(so, 1);
+               return;
+       }
+
+       /*
+        * Return if the connection is not in flow-controlled state.
+        * This can happen if the connection experienced
+        * loss while it was in flow controlled state
+        */
+       if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
+               socket_unlock(so, 1);
+               return;
+       }
+       inp_reset_fc_state(inp);
+
+       if (so->so_proto->pr_type == SOCK_STREAM)
+               inp_fc_unthrottle_tcp(inp);
+
+       socket_unlock(so, 1);
+}
+
+void
+inp_reset_fc_state(struct inpcb *inp)
+{
+       struct socket *so = inp->inp_socket;
+       int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
+       int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
+
+       inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
+
+       if (suspended) {
+               so->so_flags &= ~(SOF_SUSPENDED);
+               soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
+       }
+
+       if (inp->inp_sndinprog_cnt > 0)
+               inp->inp_flags |= INP_FC_FEEDBACK;
+
+       /* Give a write wakeup to unblock the socket */
+       if (needwakeup)
+               sowwakeup(so);
+}
+
+int
+inp_set_fc_state(struct inpcb *inp, int advcode)
+{
+       /*
+        * If there was a feedback from the interface when 
+        * send operation was in progress, we should ignore
+        * this flow advisory to avoid a race between setting
+        * flow controlled state and receiving feedback from
+        * the interface
+        */
+       if (inp->inp_flags & INP_FC_FEEDBACK)
+               return(0);
+
+       inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
+       if (inp_fc_addinp(inp)) {
+               switch (advcode) {
+               case FADV_FLOW_CONTROLLED:
+                       inp->inp_flags |= INP_FLOW_CONTROLLED;
+                       break;
+               case FADV_SUSPENDED:
+                       inp->inp_flags |= INP_FLOW_SUSPENDED;
+                       soevent(inp->inp_socket,
+                           (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
+
+                       /* Record the fact that suspend event was sent */
+                       inp->inp_socket->so_flags |= SOF_SUSPENDED;
+                       break;
+               }
+       }
+       return(1);
+}
+
+/*
+ * Handler for SO_FLUSH socket option.
+ */
+int
+inp_flush(struct inpcb *inp, int optval)
+{
+       u_int32_t flowhash = inp->inp_flowhash;
+       struct rtentry *rt;
+
+       /* Either all classes or one of the valid ones */
+       if (optval != SO_TC_ALL && !SO_VALID_TC(optval))
+               return (EINVAL);
+
+       /* We need a flow hash for identification */
+       if (flowhash == 0)
+               return (0);
+
+       /* We need a cached route for the interface */
+       if ((rt = inp->inp_route.ro_rt) != NULL) {
+               struct ifnet *ifp = rt->rt_ifp;
+               if_qflush_sc(ifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
+       }
+
+       return (0);
+}
+
+/*
+ * Clear the INP_INADDR_ANY flag (special case for PPP only)
+ */
+void inp_clear_INP_INADDR_ANY(struct socket *so)
+{
+       struct inpcb *inp = NULL;
+
+       socket_lock(so, 1);
+       inp = sotoinpcb(so);
+       if (inp) {
+               inp->inp_flags &= ~INP_INADDR_ANY;
+       }
+       socket_unlock(so, 1);
+}
+
index 728b93e3353a90fac75616896d9d7aa12eefd570..63dddb8fd7a0dbb819b529d8cf9fe5d26c051236 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/types.h>
 #include <sys/queue.h>
 #ifdef KERNEL_PRIVATE
-#ifdef KERNEL
+#ifdef BSD_KERNEL_PRIVATE
+#include <sys/tree.h>
+#endif /* BSD_KERNEL_PRIVATE */
 #include <kern/locks.h>
-#endif
 #endif /* KERNEL_PRIVATE */
 
 #include <netinet6/ipsec.h> /* for IPSEC */
@@ -85,7 +86,9 @@
 
 #define        in6pcb          inpcb   /* for KAME src sync over BSD*'s */
 #define        in6p_sp         inp_sp  /* for KAME src sync over BSD*'s */
+#endif /* KERNEL_PRIVATE */
 
+#ifdef BSD_KERNEL_PRIVATE
 /*
  * Common structure pcb for internet protocol implementation.
  * Here are stored pointers to local and foreign host table
@@ -95,9 +98,9 @@
  */
 LIST_HEAD(inpcbhead, inpcb);
 LIST_HEAD(inpcbporthead, inpcbport);
-#endif /* KERNEL_PRIVATE */
-typedef        u_quad_t        inp_gen_t;
+#endif /* BSD_KERNEL_PRIVATE */
 
+typedef        u_quad_t        inp_gen_t;
 /*
  * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
  * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
@@ -108,7 +111,7 @@ struct in_addr_4in6 {
        struct  in_addr ia46_addr4;
 };
 
-#ifdef KERNEL_PRIVATE
+#ifdef KERNEL_PRIVATE 
 /*
  * NB: the zone allocator is type-stable EXCEPT FOR THE FIRST TWO LONGS
  * of the structure.  Therefore, it is important that the members in
@@ -119,34 +122,44 @@ struct    icmp6_filter;
 #if CONFIG_MACF_NET
 struct label;
 #endif
+struct ifnet;
+
+#ifdef BSD_KERNEL_PRIVATE
+/* Flow control entry per socket */
+struct inp_fc_entry {
+       RB_ENTRY(inp_fc_entry) infc_link;
+       u_int32_t infc_flowhash;
+       struct inpcb *infc_inp;
+};
+#endif /* BSD_KERNEL_PRIVATE */
 
-struct inp_stat
-{
+struct inp_stat {
        u_int64_t       rxpackets;
        u_int64_t       rxbytes;
        u_int64_t       txpackets;
        u_int64_t       txbytes;
 };
 
+
 struct inpcb {
        LIST_ENTRY(inpcb) inp_hash;     /* hash list */
-       int             inp_wantcnt;            /* pcb wanted count. protected by pcb list lock */
-       int             inp_state;              /* state of this pcb, in use, recycled, ready for recycling... */
+       int             inp_wantcnt;    /* pcb wanted count. protected by pcb list lock */
+       int             inp_state;      /* state of this pcb, in use, recycled, ready for recycling... */
        u_short inp_fport;              /* foreign port */
        u_short inp_lport;              /* local port */
        LIST_ENTRY(inpcb) inp_list;     /* list for all PCBs of this proto */
        void    *inp_ppcb;              /* pointer to per-protocol pcb */
        struct  inpcbinfo *inp_pcbinfo; /* PCB list info */
        struct  socket *inp_socket;     /* back pointer to socket */
-       u_char  nat_owner;              /* Used to NAT TCP/UDP traffic */
        u_int32_t nat_cookie;           /* Cookie stored and returned to NAT */
        LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */
        struct  inpcbport *inp_phd;     /* head of this list */
        inp_gen_t inp_gencnt;           /* generation count of this instance */
-       int     inp_flags;              /* generic IP/datagram flags */
+       u_int32_t inp_flags;            /* generic IP/datagram flags */
        u_int32_t inp_flow;
 
-       u_char  inp_vflag;      /* INP_IPV4 or INP_IPV6 */
+       u_char  inp_sndinprog_cnt;      /* outstanding send operations */
+       u_char  inp_vflag;              /* INP_IPV4 or INP_IPV6 */
 
        u_char inp_ip_ttl;              /* time to live proto */
        u_char inp_ip_p;                /* protocol proto */
@@ -196,14 +209,16 @@ struct inpcb {
        caddr_t inp_saved_ppcb;         /* place to save pointer while cached */
        struct inpcbpolicy *inp_sp;
        decl_lck_mtx_data( ,inpcb_mtx); /* inpcb per-socket mutex */
-       unsigned int inp_boundif;       /* interface scope for INP_BOUND_IF */
-       unsigned int inp_last_outif;    /* last known outgoing interface */
+       struct ifnet *inp_boundifp;     /* interface for INP_BOUND_IF */
+       struct ifnet *inp_last_outifp;  /* last known outgoing interface */
        u_int32_t inp_reserved[2];      /* reserved for future use */
+       u_int32_t inp_flowhash;         /* flow hash */
+
 #if CONFIG_MACF_NET
        struct label *inp_label;        /* MAC label */
 #endif
        struct inp_stat *inp_stat;
-       u_int8_t                inp_stat_store[sizeof(struct inp_stat) + sizeof(u_int64_t)];
+       u_int8_t inp_stat_store[sizeof(struct inp_stat) + sizeof(u_int64_t)];
 };
 
 #endif /* KERNEL_PRIVATE */
@@ -422,6 +437,7 @@ struct      xinpcb_n {
                u_short         inp6_ifindex;
                short           inp6_hops;
        }                               inp_depend6;
+       u_int32_t               inp_flowhash;
 };
 
 #endif /* PRIVATE */
@@ -442,12 +458,14 @@ struct    xinpgen {
 #define INP_IPV6       0x2
 #define        inp_faddr       inp_dependfaddr.inp46_foreign.ia46_addr4
 #define        inp_laddr       inp_dependladdr.inp46_local.ia46_addr4
+#define        in6p_faddr      inp_dependfaddr.inp6_foreign
+#define        in6p_laddr      inp_dependladdr.inp6_local
+
+#ifdef BSD_KERNEL_PRIVATE
 #define        inp_route       inp_dependroute.inp4_route
 #define        inp_ip_tos      inp_depend4.inp4_ip_tos
 #define        inp_options     inp_depend4.inp4_options
 #define        inp_moptions    inp_depend4.inp4_moptions
-#define        in6p_faddr      inp_dependfaddr.inp6_foreign
-#define        in6p_laddr      inp_dependladdr.inp6_local
 #define        in6p_route      inp_dependroute.inp6_route
 #define        in6p_ip6_hlim   inp_depend6.inp6_hlim
 #define        in6p_hops       inp_depend6.inp6_hops   /* default hop limit */
@@ -462,14 +480,19 @@ struct    xinpgen {
 #define        in6p_ifindex    inp_depend6.inp6_ifindex
 #define        in6p_flags      inp_flags  /* for KAME src sync over BSD*'s */
 #define        in6p_socket     inp_socket  /* for KAME src sync over BSD*'s */
+#endif /* BSD_KERNEL_PRIVATE */
+
 #define        in6p_lport      inp_lport  /* for KAME src sync over BSD*'s */
 #define        in6p_fport      inp_fport  /* for KAME src sync over BSD*'s */
 #define        in6p_ppcb       inp_ppcb  /* for KAME src sync over BSD*'s */
+
+#ifdef BSD_KERNEL_PRIVATE
 #define        in6p_state      inp_state
 #define        in6p_wantcnt    inp_wantcnt
-#define        in6p_last_outif inp_last_outif
+#define        in6p_last_outifp inp_last_outifp
+#endif /* BSD_KERNEL_PRIVATE */
 
-#ifdef KERNEL_PRIVATE
+#ifdef BSD_KERNEL_PRIVATE
 struct inpcbport {
        LIST_ENTRY(inpcbport) phd_hash;
        struct inpcbhead phd_pcblist;
@@ -479,18 +502,18 @@ struct inpcbport {
 struct inpcbinfo {             /* XXX documentation, prefixes */
        struct  inpcbhead *hashbase;
 #ifdef __APPLE__
-       u_int32_t       hashsize; /* in elements */
+       u_int32_t hashsize;             /* in elements */
 #endif
-       u_long  hashmask;       /* needs to be u_long as expected by hash functions */
+       u_long  hashmask;               /* u_long as expected by hash functions */
        struct  inpcbporthead *porthashbase;
-       u_long  porthashmask;   /* needs to be u_long as expected by hash functions */
+       u_long  porthashmask;           /* u_long as expected by hash functions */
        struct  inpcbhead *listhead;
        u_short lastport;
        u_short lastlow;
        u_short lasthi;
-       void   *ipi_zone; /* zone to allocate pcbs from */
-       u_int   ipi_count;      /* number of pcbs in this list */
-       u_quad_t ipi_gencnt;    /* current generation count */
+       void   *ipi_zone;               /* zone to allocate pcbs from */
+       u_int   ipi_count;              /* number of pcbs in this list */
+       u_quad_t ipi_gencnt;            /* current generation count */
 #ifdef __APPLE__
 #ifdef _KERN_LOCKS_H_
        lck_attr_t      *mtx_attr;      /* mutex attributes */
@@ -511,29 +534,44 @@ struct inpcbinfo {                /* XXX documentation, prefixes */
 #define INP_PCBPORTHASH(lport, mask) \
        (ntohs((lport)) & (mask))
 
-#endif /* KERNEL_PRIVATE */
+#define INP_IS_FLOW_CONTROLLED(_inp_) ((_inp_)->inp_flags & INP_FLOW_CONTROLLED)
+#define INP_IS_FLOW_SUSPENDED(_inp_) \
+       (((_inp_)->inp_flags & INP_FLOW_SUSPENDED) || \
+       ((_inp_)->inp_socket->so_flags & SOF_SUSPENDED))
+#define INP_WAIT_FOR_IF_FEEDBACK(_inp_) \
+       (((_inp_)->inp_flags & (INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED)) != 0)
+
+#endif /* BSD_KERNEL_PRIVATE */
 
 /* flags in inp_flags: */
+#ifdef BSD_KERNEL_PRIVATE
 #define        INP_RECVOPTS            0x01    /* receive incoming IP options */
 #define        INP_RECVRETOPTS         0x02    /* receive IP options for reply */
 #define        INP_RECVDSTADDR         0x04    /* receive IP dst address */
 #define        INP_HDRINCL             0x08    /* user supplies entire IP header */
 #define        INP_HIGHPORT            0x10    /* user wants "high" port binding */
 #define        INP_LOWPORT             0x20    /* user wants "low" port binding */
+#endif /* BSD_KERNEL_PRIVATE */
 #define        INP_ANONPORT            0x40    /* port chosen for user */
+#ifdef BSD_KERNEL_PRIVATE
 #define        INP_RECVIF              0x80    /* receive incoming interface */
 #define        INP_MTUDISC             0x100   /* user can do MTU discovery */
 #ifdef __APPLE__
 #define INP_STRIPHDR           0x200   /* Strip headers in raw_ip, for OT support */
 #endif
-#define  INP_FAITH             0x400   /* accept FAITH'ed connections */
+#define  INP_RECV_ANYIF                0x400   /* don't restrict inbound interface */
+#endif /* BSD_KERNEL_PRIVATE */
 #define  INP_INADDR_ANY        0x800   /* local address wasn't specified */
 
+#ifdef BSD_KERNEL_PRIVATE
 #define INP_RECVTTL            0x1000
 #define        INP_UDP_NOCKSUM         0x2000  /* Turn off outbound UDP checksum */
 #define        INP_BOUND_IF            0x4000  /* bind socket to an ifindex */
+#endif /* BSD_KERNEL_PRIVATE */
 
 #define IN6P_IPV6_V6ONLY       0x8000 /* restrict AF_INET6 socket for v6 */
+
+#ifdef BSD_KERNEL_PRIVATE
 #define        IN6P_PKTINFO            0x10000 /* receive IP6 dst and I/F */
 #define        IN6P_HOPLIMIT           0x20000 /* receive hoplimit */
 #define        IN6P_HOPOPTS            0x40000 /* receive hop-by-hop options */
@@ -542,14 +580,19 @@ struct inpcbinfo {                /* XXX documentation, prefixes */
 #define        IN6P_RTHDRDSTOPTS       0x200000 /* receive dstoptions before rthdr */
 #define        IN6P_TCLASS             0x400000 /* receive traffic class value */
 #define        IN6P_AUTOFLOWLABEL      0x800000 /* attach flowlabel automatically */
+#endif /* BSD_KERNEL_PRIVATE */
+
 #define        IN6P_BINDV6ONLY         0x1000000 /* do not grab IPv4 traffic */
+
+#ifdef BSD_KERNEL_PRIVATE
 #define        IN6P_RFC2292            0x2000000 /* used RFC2292 API on the socket */
 #define        IN6P_MTU                0x4000000 /* receive path MTU */
 #define        INP_PKTINFO             0x8000000 /* receive and send PKTINFO for IPv4 */
-
+#define INP_FLOW_SUSPENDED     0x10000000 /* flow suspended */
 #define        INP_NO_IFT_CELLULAR     0x20000000 /* do not use IFT_CELLULAR route */
+#define INP_FLOW_CONTROLLED    0x40000000 /* flow controlled */
+#define INP_FC_FEEDBACK        0x80000000 /* got interface flow adv feedback */
 
-#ifdef KERNEL_PRIVATE
 #define        INP_CONTROLOPTS         (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
                                 INP_RECVIF|INP_RECVTTL|INP_PKTINFO|\
                                 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
@@ -565,7 +608,7 @@ struct inpcbinfo {          /* XXX documentation, prefixes */
 #define        IN6P_ANONPORT           INP_ANONPORT
 #define        IN6P_RECVIF             INP_RECVIF
 #define        IN6P_MTUDISC            INP_MTUDISC
-#define        IN6P_FAITH              INP_FAITH
+#define        IN6P_RECV_ANYIF         INP_RECV_ANYIF
 #define        IN6P_CONTROLOPTS INP_CONTROLOPTS
 #define        IN6P_NO_IFT_CELLULAR    INP_NO_IFT_CELLULAR
        /*
@@ -580,25 +623,31 @@ struct inpcbinfo {                /* XXX documentation, prefixes */
 #define INPCB_OWNED_BY_X       0x80
 #define INPCB_MAX_IDS          7
 #endif /* __APPLE__ */
+#endif /* BSD_KERNEL_PRIVATE */
 
 #define        sotoinpcb(so)   ((struct inpcb *)(so)->so_pcb)
 #define        sotoin6pcb(so)  sotoinpcb(so) /* for KAME src sync over BSD*'s */
 
+#ifdef BSD_KERNEL_PRIVATE
 #define        INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
 #define        INP_SOCKTYPE(so) so->so_proto->pr_type
 
 #define        INP_CHECK_SOCKAF(so, af)        (INP_SOCKAF(so) == af)
+#define        INP_CHECK_SOCKTYPE(so, type)    (INP_SOCKTYPE(so) == type)
 
-#ifdef KERNEL
 extern int     ipport_lowfirstauto;
 extern int     ipport_lowlastauto;
 extern int     ipport_firstauto;
 extern int     ipport_lastauto;
+#endif /* BSD_KERNEL_PRIVATE */
+
 extern int     ipport_hifirstauto;
 extern int     ipport_hilastauto;
 
 struct sysctl_req;
 
+#ifdef BSD_KERNEL_PRIVATE
+
 #define INPCB_STATE_INUSE      0x1     /* freshly allocated PCB, it's in use */
 #define INPCB_STATE_CACHED     0x2     /* this pcb is sitting in a a cache */
 #define INPCB_STATE_DEAD       0x3     /* should treat as gone, will be garbage collected and freed */
@@ -611,13 +660,14 @@ extern void       in_losing(struct inpcb *);
 extern void    in_rtchange(struct inpcb *, int);
 extern int     in_pcballoc(struct socket *, struct inpcbinfo *, struct proc *);
 extern int     in_pcbbind(struct inpcb *, struct sockaddr *, struct proc *);
-extern int     in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *, unsigned int *);
+extern int     in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *,
+                   struct ifnet **);
 extern void    in_pcbdetach(struct inpcb *);
 extern void    in_pcbdispose (struct inpcb *);
 extern void    in_pcbdisconnect(struct inpcb *);
 extern int     in_pcbinshash(struct inpcb *, int);
 extern int     in_pcbladdr(struct inpcb *, struct sockaddr *,
-                   struct sockaddr_in *, unsigned int *);
+                   struct sockaddr_in *, struct ifnet **);
 extern struct inpcb *in_pcblookup_local(struct inpcbinfo *, struct in_addr,
                    u_int, int);
 extern struct inpcb *in_pcblookup_local_and_cleanup(struct inpcbinfo *,
@@ -641,12 +691,29 @@ extern void       inpcb_to_xinpcb64(struct inpcb *inp,
                        struct xinpcb64 *xinp);
 #endif
 extern int get_pcblist_n(short , struct sysctl_req *, struct inpcbinfo *);
+extern void inpcb_get_ports_used(unsigned int , uint8_t *, struct inpcbinfo *);
+
+#define INPCB_OPPORTUNISTIC_THROTTLEON 0x0001
+#define INPCB_OPPORTUNISTIC_SETCMD     0x0002
+extern uint32_t inpcb_count_opportunistic(unsigned int , struct inpcbinfo *, u_int32_t);
 extern void    inp_route_copyout(struct inpcb *, struct route *);
 extern void    inp_route_copyin(struct inpcb *, struct route *);
-extern void    inp_bindif(struct inpcb *, unsigned int);
+extern int     inp_bindif(struct inpcb *, unsigned int);
 extern int     inp_nocellular(struct inpcb *, unsigned int);
+extern u_int32_t inp_calc_flowhash(struct inpcb *);
+extern void    socket_flowadv_init(void);
+extern int     inp_fc_addinp(struct inpcb *);
+extern struct inp_fc_entry *inp_fc_getinp(u_int32_t);
+extern void    inp_fc_entry_free(struct inp_fc_entry *);
+extern void    inp_fc_feedback(struct inpcb *);
+extern void    inp_reset_fc_state(struct inpcb *);
+extern int     inp_set_fc_state(struct inpcb *, int advcode);
+extern void    inp_fc_unthrottle_tcp(struct inpcb *);
+extern int     inp_flush(struct inpcb *, int);
+#endif /* BSD_KERNEL_PRIVATE */
 
-#endif /* KERNEL */
+#ifdef KERNEL_PRIVATE
+extern void    inp_clear_INP_INADDR_ANY(struct socket *so);
 #endif /* KERNEL_PRIVATE */
 
 #endif /* !_NETINET_IN_PCB_H_ */
index 9ff8839b5dc9a1d882079c8e491ff9c8e9b7f59b..00ef003241a1153e46218dbf51c2cbc65fdf5f4c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/dtrace.h>
+#include <sys/kauth.h>
 
 #include <net/route.h>
+#include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
@@ -124,7 +126,7 @@ sotoxsocket_n(struct socket *so, struct xsocket_n *xso)
                xso->so_error = so->so_error;
                xso->so_pgid = so->so_pgid;
                xso->so_oobmark = so->so_oobmark;
-               xso->so_uid = so->so_uid;
+               xso->so_uid = kauth_cred_getuid(so->so_cred);
        }
 }
 
@@ -186,6 +188,7 @@ inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp)
        xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
        xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
        xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
+       xinp->inp_flowhash = inp->inp_flowhash;
 }
 
 __private_extern__ void
@@ -381,3 +384,69 @@ done:
        return error;
 }
 
+__private_extern__ void
+inpcb_get_ports_used(unsigned int ifindex, uint8_t *bitfield, struct inpcbinfo *pcbinfo)
+{
+       lck_rw_lock_shared(pcbinfo->mtx);
+       
+       struct inpcb *inp;
+       inp_gen_t       gencnt = pcbinfo->ipi_gencnt;
+       for (inp = LIST_FIRST(pcbinfo->listhead); inp; inp = LIST_NEXT(inp, inp_list)) {
+               if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD &&
+                       (ifindex == 0 || inp->inp_last_outifp == NULL || ifindex == inp->inp_last_outifp->if_index)) {
+                       uint16_t port = ntohs(inp->inp_lport);
+                       bitfield[port / 8] |= 1 << (port & 0x7);
+               }
+       }
+       
+       lck_rw_done(pcbinfo->mtx);
+}
+
+__private_extern__ uint32_t
+inpcb_count_opportunistic(unsigned int ifindex, struct inpcbinfo *pcbinfo,
+    u_int32_t flags)
+{
+       uint32_t opportunistic = 0;
+
+       lck_rw_lock_shared(pcbinfo->mtx);
+
+       struct inpcb *inp;
+       inp_gen_t       gencnt = pcbinfo->ipi_gencnt;
+       for (inp = LIST_FIRST(pcbinfo->listhead);
+               inp; inp = LIST_NEXT(inp, inp_list)) {
+               if (inp->inp_gencnt <= gencnt &&
+                   inp->inp_state != INPCB_STATE_DEAD &&
+                   inp->inp_socket != NULL &&
+                   so_get_opportunistic(inp->inp_socket) &&
+                   inp->inp_last_outifp != NULL &&
+                   ifindex == inp->inp_last_outifp->if_index) {
+                       opportunistic++;
+                       struct socket *so = inp->inp_socket;
+                       if ((flags & INPCB_OPPORTUNISTIC_SETCMD) &&
+                           (so->so_state & SS_ISCONNECTED)) {
+                               socket_lock(so, 1);
+                               if (flags & INPCB_OPPORTUNISTIC_THROTTLEON) {
+                                       so->so_flags |= SOF_SUSPENDED;
+                                       soevent(so,
+                                           (SO_FILT_HINT_LOCKED |
+                                           SO_FILT_HINT_SUSPEND));
+                               } else {
+                                       so->so_flags &= ~(SOF_SUSPENDED);
+                                       soevent(so,
+                                           (SO_FILT_HINT_LOCKED |
+                                           SO_FILT_HINT_RESUME));
+                               }
+                               SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] "
+                                   "%s\n", so->last_pid, so, INP_SOCKAF(so),
+                                   INP_SOCKTYPE(so),
+                                   (so->so_flags & SOF_SUSPENDED) ?
+                                   "SUSPENDED" : "RESUMED"));
+                               socket_unlock(so, 1);
+                       }
+               }
+       }
+
+       lck_rw_done(pcbinfo->mtx);
+
+       return (opportunistic);
+}
index 7c979683af30b92b432d76e7237f6fd9aa873668..f3f3be8e7b355601fd2eb3275fe51a2e2d4b2d68 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -213,7 +213,7 @@ struct protosw inetsw[] = {
   encap_init,          0,              0,              0,
   0,
   &rip_usrreqs,
-  0,           0,              0,      { 0, 0 },       0,      { 0 }
+  0,           rip_unlock,             0,      { 0, 0 },       0,      { 0 }
 },
 # if INET6
 { SOCK_RAW,    &inetdomain,    IPPROTO_IPV6,   PR_ATOMIC|PR_ADDR|PR_LASTHDR,
@@ -222,7 +222,7 @@ struct protosw inetsw[] = {
   encap_init,  0,              0,              0,
   0,
   &rip_usrreqs,
-  0,           0,              0,      { 0, 0 },       0,      { 0 }
+  0,           rip_unlock,             0,      { 0, 0 },       0,      { 0 }
 },
 #endif
 #if IPDIVERT
@@ -242,7 +242,7 @@ struct protosw inetsw[] = {
   0,           0,              0,              0,
   0,
   &rip_usrreqs,
-  0,           0,              0,      { 0, 0 },       0,      { 0 }
+  0,           rip_unlock,             0,      { 0, 0 },       0,      { 0 }
 },
 #endif
 #if NSIP
@@ -252,7 +252,7 @@ struct protosw inetsw[] = {
   0,           0,              0,              0,
   0,
   &rip_usrreqs,
-  0,           0,              0,      { 0, 0 },       0,      { 0 }
+  0,           rip_unlock,             0,      { 0, 0 },       0,      { 0 }
 },
 #endif
        /* raw wildcard */
index 2d0c2735d10d7fc46b8a703161e6c09c0e43e72c..ca9a4247ebbc97a567f12781370ad14e1253e03a 100644 (file)
@@ -106,7 +106,7 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
            struct radix_node *treenodes)
 {
        struct rtentry *rt = (struct rtentry *)treenodes;
-       struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
+       struct sockaddr_in *sin = (struct sockaddr_in *)(void *)rt_key(rt);
        struct radix_node *ret;
 
        lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
@@ -145,11 +145,9 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
                        /* Become a regular mutex */
                        RT_CONVERT_LOCK(rt);
                        IFA_LOCK_SPIN(rt->rt_ifa);
-#define satosin(sa) ((struct sockaddr_in *)sa)
                        if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
                            == sin->sin_addr.s_addr)
                                rt->rt_flags |= RTF_LOCAL;
-#undef satosin
                        IFA_UNLOCK(rt->rt_ifa);
                }
        }
@@ -211,8 +209,9 @@ in_validate(struct radix_node *rn)
                        /* It's one of ours; unexpire it */
                        rt->rt_flags &= ~RTPRF_OURS;
                        rt_setexpire(rt, 0);
-               } else if ((rt->rt_flags & RTF_LLINFO) &&
-                   (rt->rt_flags & RTF_HOST) && rt->rt_gateway != NULL &&
+               } else if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) ==
+                   (RTF_LLINFO | RTF_HOST) && rt->rt_llinfo != NULL &&
+                   rt->rt_gateway != NULL &&
                    rt->rt_gateway->sa_family == AF_LINK) {
                        /* It's ARP; let it be handled there */
                        arp_validate(rt);
index 54b5fcc1df7d9a9fd90dd7ef87f65e513f4d268c..02d9ccc86bf13b7bbcd999f613a4871825224756 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -60,32 +60,32 @@ extern char *proc_name_address(void *p);
 
 static int tfp_count = 0;
 
-static TAILQ_HEAD(, tclass_for_proc) tfp_head = TAILQ_HEAD_INITIALIZER(tfp_head);
+static TAILQ_HEAD(, tclass_for_proc) tfp_head =
+    TAILQ_HEAD_INITIALIZER(tfp_head);
 
 struct tclass_for_proc {
        TAILQ_ENTRY(tclass_for_proc)    tfp_link;
-       int                                                     tfp_class;
-       pid_t                                                   tfp_pid;
-       char                                                    tfp_pname[MAXCOMLEN + 1];
+       int     tfp_class;
+       pid_t   tfp_pid;
+       char    tfp_pname[MAXCOMLEN + 1];
 };
 
-extern void tcp_set_background_cc(struct socket *);
-extern void tcp_set_foreground_cc(struct socket *);
-
-int dscp_code_from_mbuf_tclass(int );
-
-static int get_pid_tclass(pid_t , int *);
-static int get_pname_tclass(const char * , int *);
-static int set_pid_tclass(pid_t , int );
-static int set_pname_tclass(const char * , int );
+static int dscp_code_from_mbuf_tclass(mbuf_traffic_class_t);
+static int get_pid_tclass(struct so_tcdbg *);
+static int get_pname_tclass(struct so_tcdbg *);
+static int set_pid_tclass(struct so_tcdbg *);
+static int set_pname_tclass(struct so_tcdbg *);
+static int flush_pid_tclass(struct so_tcdbg *);
 static int purge_tclass_for_proc(void);
 static int flush_tclass_for_proc(void);
+static void so_set_lro(struct socket*, int);
+int get_tclass_for_curr_proc(int *);
 
-
-static lck_grp_attr_t *tclass_lck_grp_attr = NULL;  /* mutex group attributes */
-static lck_grp_t *tclass_lck_grp = NULL;            /* mutex group definition */
-static lck_attr_t *tclass_lck_attr = NULL;          /* mutex attributes */
-static lck_mtx_t *tclass_lock = NULL;
+static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
+static lck_grp_t *tclass_lck_grp = NULL;       /* mutex group definition */
+static lck_attr_t *tclass_lck_attr = NULL;     /* mutex attributes */
+decl_lck_mtx_data(static, tclass_lock_data);
+static lck_mtx_t *tclass_lock = &tclass_lock_data;
 
 /*
  * Must be called with tclass_lock held
@@ -94,12 +94,12 @@ static struct tclass_for_proc *
 find_tfp_by_pid(pid_t pid)
 {
        struct tclass_for_proc *tfp;
-       
+
        TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
                if (tfp->tfp_pid == pid)
                        break;
        }
-       return tfp;
+       return (tfp);
 }
 
 /*
@@ -109,36 +109,39 @@ static struct tclass_for_proc *
 find_tfp_by_pname(const char *pname)
 {
        struct tclass_for_proc *tfp;
-       
+
        TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
-               if (strncmp(pname, tfp->tfp_pname, sizeof(tfp->tfp_pname)) == 0)
+               if (strncmp(pname, tfp->tfp_pname,
+                   sizeof (tfp->tfp_pname)) == 0)
                        break;
        }
-       return tfp;
+       return (tfp);
 }
 
-static int
-get_tclass_for_curr_proc(void)
+__private_extern__ int
+get_tclass_for_curr_proc(int *sotc)
 {
-       struct tclass_for_proc *tfp;
-       int sotc = SO_TC_BE;
+       struct tclass_for_proc *tfp = NULL;
        proc_t p = current_proc();      /* Not ref counted */
        pid_t pid = proc_pid(p);
        char *pname = proc_name_address(p);
-       
+
+       *sotc = -1;
+
        lck_mtx_lock(tclass_lock);
-       
+
        TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
-               if ((tfp->tfp_pid == pid) ||
-                       (tfp->tfp_pid == -1 && strncmp(pname, tfp->tfp_pname, sizeof(tfp->tfp_pname)) == 0)) {
-                       sotc = tfp->tfp_class;
+               if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
+                   strncmp(pname, tfp->tfp_pname,
+                   sizeof (tfp->tfp_pname)) == 0)) {
+                       *sotc = tfp->tfp_class;
                        break;
-               } 
+               }
        }
 
        lck_mtx_unlock(tclass_lock);
 
-       return sotc;
+       return ((tfp == NULL) ? 0 : 1);
 }
 
 /*
@@ -154,13 +157,13 @@ purge_tclass_for_proc(void)
 
        TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
                proc_t p;
-               
+
                if (tfp->tfp_pid == -1)
                        continue;
                if ((p = proc_find(tfp->tfp_pid)) == NULL) {
                        tfp_count--;
                        TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
-                                       
+
                        _FREE(tfp, M_TEMP);
                } else {
                        proc_rele(p);
@@ -168,8 +171,8 @@ purge_tclass_for_proc(void)
        }
 
        lck_mtx_unlock(tclass_lock);
-       
-       return error;
+
+       return (error);
 }
 
 /*
@@ -200,10 +203,10 @@ flush_tclass_for_proc(void)
        TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
                free_tclass_for_proc(tfp);
        }
-       
+
        lck_mtx_unlock(tclass_lock);
-               
-       return error;
+
+       return (error);
 
 }
 
@@ -211,40 +214,39 @@ flush_tclass_for_proc(void)
  * Must be called with tclass_lock held
  */
 static struct tclass_for_proc *
-alloc_tclass_for_proc(pid_t pid, const char *pname, int tclass)
+alloc_tclass_for_proc(pid_t pid, const char *pname)
 {
        struct tclass_for_proc *tfp;
-       
+
        if (pid == -1 && pname == NULL)
-               return NULL;
+               return (NULL);
 
-       tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO);
+       tfp = _MALLOC(sizeof (struct tclass_for_proc), M_TEMP, M_NOWAIT|M_ZERO);
        if (tfp == NULL)
-               return NULL;
-       
+               return (NULL);
+
        tfp->tfp_pid = pid;
-       tfp->tfp_class = tclass;
        /*
-        * Add per pid entries before per proc name so we can find 
+        * Add per pid entries before per proc name so we can find
         * a specific instance of a process before the general name base entry.
         */
        if (pid != -1) {
                TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
        } else {
-               strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
+               strlcpy(tfp->tfp_pname, pname, sizeof (tfp->tfp_pname));
                TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
        }
-       
+
        tfp_count++;
 
-       return tfp;
+       return (tfp);
 }
 
 /*
  * -1 for tclass means to remove the entry
  */
-int 
-set_pid_tclass(pid_t pid, int tclass)
+int
+set_pid_tclass(struct so_tcdbg *so_tcdbg)
 {
        int error = EINVAL;
        proc_t p = NULL;
@@ -252,205 +254,279 @@ set_pid_tclass(pid_t pid, int tclass)
        struct fileproc *fp;
        struct tclass_for_proc *tfp;
        int i;
+       pid_t pid = so_tcdbg->so_tcdbg_pid;
+       int tclass = so_tcdbg->so_tcdbg_tclass;
 
        p = proc_find(pid);
        if (p == NULL) {
-               printf("set_pid_tclass proc_find(%d) \n", pid);
+               printf("%s proc_find(%d) failed\n", __func__, pid);
                goto done;
        }
-       
+
        /* Need a tfp */
        lck_mtx_lock(tclass_lock);
-       
+
        tfp = find_tfp_by_pid(pid);
-       if (tclass == -1) {
-               if (tfp != NULL) {
-                       free_tclass_for_proc(tfp);
-                       error = 0;
-               }
-               lck_mtx_unlock(tclass_lock);
-               goto done;
-       } else {
+       if (tfp == NULL) {
+               tfp = alloc_tclass_for_proc(pid, NULL);
                if (tfp == NULL) {
-                       tfp = alloc_tclass_for_proc(pid, NULL, tclass);
-                       if (tfp == NULL) {
-                               lck_mtx_unlock(tclass_lock);
-                               error = ENOBUFS;
-                               goto done;
-                       }
-               } else {
-                       tfp->tfp_class = tclass;
+                       lck_mtx_unlock(tclass_lock);
+                       error = ENOBUFS;
+                       goto done;
                }
        }
+       tfp->tfp_class = tclass;
+
        lck_mtx_unlock(tclass_lock);
 
        if (tfp != NULL) {
                proc_fdlock(p);
-               
+
                fdp = p->p_fd;
                for (i = 0; i < fdp->fd_nfiles; i++) {
                        struct socket *so;
-                       
+
                        fp = fdp->fd_ofiles[i];
-                       if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
-                               fp->f_fglob->fg_type != DTYPE_SOCKET)
+                       if (fp == NULL ||
+                           (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+                           fp->f_fglob->fg_type != DTYPE_SOCKET)
                                continue;
-                       
+
                        so = (struct socket *)fp->f_fglob->fg_data;
-                       if (so->so_proto->pr_domain->dom_family != AF_INET && 
-                               so->so_proto->pr_domain->dom_family != AF_INET6)
+                       if (so->so_proto->pr_domain->dom_family != AF_INET &&
+                           so->so_proto->pr_domain->dom_family != AF_INET6)
                                continue;
                        socket_lock(so, 1);
-                       error = so_set_traffic_class(so, tclass != -1 ? tclass : SO_TC_BE);
-                       socket_unlock(so, 1);
-                       if (error != 0) {
-                               printf("set_pid_tclass so_set_traffic_class(%p, %d) failed %d\n", so, tclass, error);
-                               error = 0;
+                       if (tclass != -1) {
+                               error = so_set_traffic_class(so, tclass);
+                               if (error != 0) {
+                                       printf("%s: so_set_traffic_class"
+                                           "(so=%p, fd=%d, tclass=%d) "
+                                           "failed %d\n", __func__,
+                                           so, i, tclass, error);
+                                       error = 0;
+                               }
                        }
+                       socket_unlock(so, 1);
                }
-               
+
                proc_fdunlock(p);
        }
-       
-       error = 0;      
+
+       error = 0;
 done:
        if (p != NULL)
                proc_rele(p);
-       
-       return error;
+
+       return (error);
 }
 
-int 
-set_pname_tclass(const char *pname, int tclass)
+int
+set_pname_tclass(struct so_tcdbg *so_tcdbg)
 {
        int error = EINVAL;
        struct tclass_for_proc *tfp;
 
        lck_mtx_lock(tclass_lock);
-       
-       tfp = find_tfp_by_pname(pname);
-       if (tclass == -1) {
-               if (tfp != NULL)
-                       free_tclass_for_proc(tfp);
-       } else {
+
+       tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
+       if (tfp == NULL) {
+               tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname);
                if (tfp == NULL) {
-                       tfp = alloc_tclass_for_proc(-1, pname, tclass);
-                       if (tfp == NULL) {
-                               lck_mtx_unlock(tclass_lock);
-                               error = ENOBUFS;
-                               goto done;
-                       }
-               } else {
-                       tfp->tfp_class = tclass;
+                       lck_mtx_unlock(tclass_lock);
+                       error = ENOBUFS;
+                       goto done;
                }
        }
+       tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
+
        lck_mtx_unlock(tclass_lock);
-       
-       error = 0;      
+
+       error = 0;
 done:
-       
-       return error;
+
+       return (error);
 }
 
-int 
-get_pid_tclass(pid_t pid, int *tclass)
+static int
+flush_pid_tclass(struct so_tcdbg *so_tcdbg)
+{
+       pid_t pid = so_tcdbg->so_tcdbg_pid;
+       int tclass = so_tcdbg->so_tcdbg_tclass;
+       struct filedesc *fdp;
+       int error = EINVAL;
+       proc_t p;
+       int i;
+
+       p = proc_find(pid);
+       if (p == PROC_NULL) {
+               printf("%s proc_find(%d) failed\n", __func__, pid);
+               goto done;
+       }
+
+       proc_fdlock(p);
+       fdp = p->p_fd;
+       for (i = 0; i < fdp->fd_nfiles; i++) {
+               struct socket *so;
+               struct fileproc *fp;
+
+               fp = fdp->fd_ofiles[i];
+               if (fp == NULL ||
+                   (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+                   fp->f_fglob->fg_type != DTYPE_SOCKET)
+                       continue;
+
+               so = (struct socket *)fp->f_fglob->fg_data;
+               error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
+                   sizeof (tclass));
+               if (error != 0) {
+                       printf("%s: setsockopt(SO_FLUSH) (so=%p, fd=%d, "
+                           "tclass=%d) failed %d\n", __func__, so, i, tclass,
+                           error);
+                       error = 0;
+               }
+       }
+       proc_fdunlock(p);
+
+       error = 0;
+done:
+       if (p != PROC_NULL)
+               proc_rele(p);
+
+       return (error);
+}
+
+int
+get_pid_tclass(struct so_tcdbg *so_tcdbg)
 {
        int error = EINVAL;
        proc_t p = NULL;
        struct tclass_for_proc *tfp;
-       
-       *tclass = -1; /* Means not set */
+       pid_t pid = so_tcdbg->so_tcdbg_pid;
+
+       so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
+       so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
 
        p = proc_find(pid);
        if (p == NULL) {
-               printf("get_pid_tclass proc_find(%d) \n", pid);
+               printf("%s proc_find(%d) failed\n", __func__, pid);
                goto done;
        }
-       
+
        /* Need a tfp */
        lck_mtx_lock(tclass_lock);
-       
+
        tfp = find_tfp_by_pid(pid);
        if (tfp != NULL) {
-               *tclass = tfp->tfp_class ;
+               so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
                error = 0;
        }
        lck_mtx_unlock(tclass_lock);
 done:
        if (p != NULL)
                proc_rele(p);
-       
-       return error;
+
+       return (error);
 }
 
-int 
-get_pname_tclass(const char *pname, int *tclass)
+int
+get_pname_tclass(struct so_tcdbg *so_tcdbg)
 {
        int error = EINVAL;
        struct tclass_for_proc *tfp;
-       
-       *tclass = -1; /* Means not set */
+
+       so_tcdbg->so_tcdbg_tclass = -1; /* Means not set */
+       so_tcdbg->so_tcdbg_opportunistic = -1; /* Means not set */
 
        /* Need a tfp */
        lck_mtx_lock(tclass_lock);
-       
-       tfp = find_tfp_by_pname(pname);
+
+       tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
        if (tfp != NULL) {
-               *tclass = tfp->tfp_class ;
+               so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
                error = 0;
        }
        lck_mtx_unlock(tclass_lock);
-       
-       return error;
+
+       return (error);
 }
 
+static int
+delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
+{
+       int error = EINVAL;
+       pid_t pid = so_tcdbg->so_tcdbg_pid;
+       struct tclass_for_proc *tfp = NULL;
+
+       lck_mtx_lock(tclass_lock);
 
+       if (pid != -1)
+               tfp = find_tfp_by_pid(pid);
+       else
+               tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
+
+       if (tfp != NULL) {
+               free_tclass_for_proc(tfp);
+               error = 0;
+       }
+
+       lck_mtx_unlock(tclass_lock);
+
+       return (error);
+}
 
 /*
  * Setting options requires privileges
  */
-__private_extern__ int 
+__private_extern__ int
 so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
 {
        int error = 0;
-       
+
        if ((so->so_state & SS_PRIV) == 0)
-               return EPERM;
+               return (EPERM);
 
        socket_unlock(so, 0);
 
        switch (so_tcdbg->so_tcdbg_cmd) {
                case SO_TCDBG_PID:
-                       error = set_pid_tclass(so_tcdbg->so_tcdbg_pid, so_tcdbg->so_tcdbg_tclass);
+                       error = set_pid_tclass(so_tcdbg);
                        break;
-               
+
                case SO_TCDBG_PNAME:
-                       error = set_pname_tclass(so_tcdbg->so_tcdbg_pname, so_tcdbg->so_tcdbg_tclass);
+                       error = set_pname_tclass(so_tcdbg);
                        break;
-               
+
                case SO_TCDBG_PURGE:
                        error = purge_tclass_for_proc();
                        break;
-               
+
                case SO_TCDBG_FLUSH:
                        error = flush_tclass_for_proc();
                        break;
-               
+
+               case SO_TCDBG_DELETE:
+                       error = delete_tclass_for_pid_pname(so_tcdbg);
+                       break;
+
+               case SO_TCDBG_TCFLUSH_PID:
+                       error = flush_pid_tclass(so_tcdbg);
+                       break;
+
                default:
                        error = EINVAL;
                        break;
-               
        }
 
        socket_lock(so, 0);
 
-       return error;
+       return (error);
 }
 
 /*
  * Not required to be privileged to get
  */
-__private_extern__ int 
+__private_extern__ int
 sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
 {
        int error = 0;
@@ -458,23 +534,24 @@ sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
        void *buf = NULL;
        size_t len = sopt->sopt_valsize;
 
-       error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg), sizeof(struct so_tcdbg));
+       error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg),
+           sizeof (struct so_tcdbg));
        if (error != 0)
-               return error;
-       
+               return (error);
+
        sopt->sopt_valsize = len;
-       
+
        socket_unlock(so, 0);
 
        switch (so_tcdbg.so_tcdbg_cmd) {
                case SO_TCDBG_PID:
-                       error = get_pid_tclass(so_tcdbg.so_tcdbg_pid, &so_tcdbg.so_tcdbg_tclass);
+                       error = get_pid_tclass(&so_tcdbg);
                        break;
-               
+
                case SO_TCDBG_PNAME:
-                       error = get_pname_tclass(so_tcdbg.so_tcdbg_pname, &so_tcdbg.so_tcdbg_tclass);
+                       error = get_pname_tclass(&so_tcdbg);
                        break;
-               
+
                case SO_TCDBG_COUNT:
                        lck_mtx_lock(tclass_lock);
                        so_tcdbg.so_tcdbg_count = tfp_count;
@@ -492,7 +569,7 @@ sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
                                error = EINVAL;
                                break;
                        }
-                       len = alloc_count * sizeof(struct so_tcdbg);
+                       len = alloc_count * sizeof (struct so_tcdbg);
                        lck_mtx_unlock(tclass_lock);
 
                        buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
@@ -513,33 +590,35 @@ sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
                                } else {
                                        ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
                                        ptr->so_tcdbg_pid = -1;
-                                       strlcpy(ptr->so_tcdbg_pname, tfp->tfp_pname, sizeof(ptr->so_tcdbg_pname));
+                                       strlcpy(ptr->so_tcdbg_pname,
+                                           tfp->tfp_pname,
+                                           sizeof (ptr->so_tcdbg_pname));
                                }
                                ptr->so_tcdbg_tclass = tfp->tfp_class;
                                ptr++;
                        }
-                       
+
                        lck_mtx_unlock(tclass_lock);
                        }
                        break;
-               
+
                default:
                        error = EINVAL;
                        break;
-               
        }
 
        socket_lock(so, 0);
 
        if (error == 0) {
                if (buf == NULL) {
-                       error = sooptcopyout(sopt, &so_tcdbg, sizeof(struct so_tcdbg));
+                       error = sooptcopyout(sopt, &so_tcdbg,
+                           sizeof (struct so_tcdbg));
                } else {
                        error = sooptcopyout(sopt, buf, len);
                        _FREE(buf, M_TEMP);
                }
        }
-       return error;
+       return (error);
 }
 
 
@@ -547,78 +626,121 @@ __private_extern__ int
 so_set_traffic_class(struct socket *so, int optval)
 {
        int error = 0;
-       
-       if (optval < SO_TC_BE || optval > SO_TC_VO) {
+
+       if (optval < SO_TC_BE || optval > SO_TC_CTL) {
                error = EINVAL;
        } else {
-               so->so_traffic_class = optval;
-       
-               if ((INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) && 
-                       INP_SOCKTYPE(so) == SOCK_STREAM) {
-                       set_tcp_stream_priority(so);
+               switch (optval) {
+               case _SO_TC_BK:
+                       optval = SO_TC_BK;
+                       break;
+               case _SO_TC_VI:
+                       optval = SO_TC_VI;
+                       break;
+               case _SO_TC_VO:
+                       optval = SO_TC_VO;
+                       break;
+               default:
+                       if (!SO_VALID_TC(optval))
+                               error = EINVAL;
+                       break;
+               }
+
+               if (error == 0) {
+                       int oldval = so->so_traffic_class;
+
+                       VERIFY(SO_VALID_TC(optval));
+                       so->so_traffic_class = optval;
+
+                       if ((INP_SOCKAF(so) == AF_INET ||
+                           INP_SOCKAF(so) == AF_INET6) &&
+                           INP_SOCKTYPE(so) == SOCK_STREAM) {
+                               set_tcp_stream_priority(so);
+
+                               /* Set/unset use of Large Receive Offload */
+                               so_set_lro(so, optval);
+                       }
+
+                       if ((INP_SOCKAF(so) == AF_INET ||
+                           INP_SOCKAF(so) == AF_INET6) &&
+                           optval != oldval && (optval == SO_TC_BK_SYS ||
+                           oldval == SO_TC_BK_SYS)) {
+                               /*
+                                * If the app switches from BK_SYS to something
+                                * else, resume the socket if it was suspended.
+                                */
+                               if (oldval == SO_TC_BK_SYS)
+                                       inp_reset_fc_state(so->so_pcb);
+
+                               SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] "
+                                   "opportunistic %s\n", so->last_pid,
+                                   so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+                                   (optval == SO_TC_BK_SYS) ? "ON" : "OFF"));
+                       }
                }
        }
-       return error;
+       return (error);
 }
 
 __private_extern__ void
 so_set_default_traffic_class(struct socket *so)
 {
-       int sotc = SO_TC_BE;
+       int sotc = -1;
 
-       if (tfp_count > 0 && (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) {
-               sotc = get_tclass_for_curr_proc();
+       if (tfp_count > 0 &&
+           (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) {
+               get_tclass_for_curr_proc(&sotc);
        }
-       
-       so->so_traffic_class = sotc;
-       
-       return;
+
+       so->so_traffic_class = (sotc != -1) ? sotc : SO_TC_BE;
 }
 
+__private_extern__ int
+so_set_opportunistic(struct socket *so, int optval)
+{
+       return (so_set_traffic_class(so, (optval == 0) ?
+           SO_TC_BE : SO_TC_BK_SYS));
+}
 
 __private_extern__ int
-mbuf_traffic_class_from_control(struct mbuf *control)
+so_get_opportunistic(struct socket *so)
+{
+       return (so->so_traffic_class == SO_TC_BK_SYS);
+}
+
+__private_extern__ mbuf_svc_class_t
+mbuf_service_class_from_control(struct mbuf *control)
 {
        struct cmsghdr *cm;
-       
-       for (cm = M_FIRST_CMSGHDR(control); 
-                cm != NULL; 
-                cm = M_NXT_CMSGHDR(control, cm)) {
+       mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
+
+       for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
+           cm = M_NXT_CMSGHDR(control, cm)) {
                int tc;
 
-               if (cm->cmsg_len < sizeof(struct cmsghdr))
+               if (cm->cmsg_len < sizeof (struct cmsghdr))
                        break;
-               
+
                if (cm->cmsg_level != SOL_SOCKET ||
-                       cm->cmsg_type != SO_TRAFFIC_CLASS)
+                   cm->cmsg_type != SO_TRAFFIC_CLASS)
                        continue;
-               if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
+               if (cm->cmsg_len != CMSG_LEN(sizeof (int)))
                        continue;
-               
-               tc = *(int *)CMSG_DATA(cm);
-               
-               switch (tc) {
-                       case SO_TC_BE:
-                               return MBUF_TC_BE;
-                       case SO_TC_BK:
-                               return MBUF_TC_BK;
-                       case SO_TC_VI:
-                               return MBUF_TC_VI;
-                       case SO_TC_VO:
-                               return MBUF_TC_VO;
-                       default:
-                               break;
-               }
+
+               tc = *(int *)(void *)CMSG_DATA(cm);
+               msc = so_tc2msc(tc);
+               if (MBUF_VALID_SC(msc))
+                       break;
        }
-       
-       return MBUF_TC_UNSPEC;
+
+       return (msc);
 }
 
 __private_extern__  int
-dscp_code_from_mbuf_tclass(int mtc)
+dscp_code_from_mbuf_tclass(mbuf_traffic_class_t mtc)
 {
        int dscp_code;
-       
+
        switch (mtc) {
                default:
                case MBUF_TC_BE:
@@ -634,56 +756,65 @@ dscp_code_from_mbuf_tclass(int mtc)
                        dscp_code = 0x30;
                        break;
        }
-       
-       return dscp_code;
+
+       return (dscp_code);
 }
 
 __private_extern__ void
 so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
 {
-       uint32_t sotc = m->m_pkthdr.prio;
+       uint32_t sotc = m_get_traffic_class(m);
 
        if (sotc >= SO_TC_STATS_MAX)
                sotc = SO_TC_BE;
-       
-       so->so_tc_stats[sotc].rxpackets += 1;
-       so->so_tc_stats[sotc].rxbytes += ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
 
-       return;
+       so->so_tc_stats[sotc].rxpackets += 1;
+       so->so_tc_stats[sotc].rxbytes +=
+           ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
 }
 
 __private_extern__ void
 set_tcp_stream_priority(struct socket *so)
 {
        struct tcpcb *tp = intotcpcb(sotoinpcb(so));
+       int old_cc = tp->tcp_cc_index;
+       int recvbg = IS_TCP_RECV_BG(so);
 
-       /* If the socket was marked as a background socket or if the
-        * traffic class is set to background with traffic class socket 
-        * option then make both send and recv side of the stream to be 
-        * background. The variable sotcdb which can be set with sysctl 
+       /*
+        * If the socket was marked as a background socket or if the
+        * traffic class is set to background with traffic class socket
+        * option then make both send and recv side of the stream to be
+        * background. The variable sotcdb which can be set with sysctl
         * is used to disable these settings for testing.
         */
-       if (soisbackground(so) || so->so_traffic_class == SO_TC_BK) {
+       if (soisthrottled(so) || IS_SO_TC_BACKGROUND(so->so_traffic_class)) {
                if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0) {
-                       if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX)
+                       if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
                                tcp_set_foreground_cc(so);
                } else {
-                       if (tp->tcp_cc_index != TCP_CC_ALGO_BACKGROUND_INDEX)
+                       if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX)
                                tcp_set_background_cc(so);
                }
-               
+
                /* Set receive side background flags */
-               if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0) {
-                       so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
-               } else {
-                       so->so_traffic_mgt_flags |= TRAFFIC_MGT_TCP_RECVBG;
-               }
+               if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0)
+                       tcp_clear_recv_bg(so);
+               else
+                       tcp_set_recv_bg(so);
        } else {
-               so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
-               if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX)
+               tcp_clear_recv_bg(so);
+               if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
                        tcp_set_foreground_cc(so);
        }
-       return;
+
+       if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
+               SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] TCP %s send; "
+                  "%s recv\n", so->last_pid, so, INP_SOCKAF(so),
+                  INP_SOCKTYPE(so),
+                  (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
+                  "background" : "foreground",
+                  IS_TCP_RECV_BG(so) ? "background" : "foreground"));
+       }
 }
 
 /*
@@ -692,119 +823,126 @@ set_tcp_stream_priority(struct socket *so)
  * - set the DSCP code following the WMM mapping
  */
 __private_extern__ void
-set_packet_tclass(struct mbuf *m, struct socket *so, int in_mtc, int isipv6)
+set_packet_service_class(struct mbuf *m, struct socket *so,
+    mbuf_svc_class_t in_msc, u_int32_t flags)
 {
-       int mtc = MBUF_TC_BE; /* Best effort by default */
-       struct inpcb *inp = sotoinpcb(so);       /* in6pcb and inpcb are the same */
+       mbuf_svc_class_t msc = MBUF_SC_BE;         /* Best effort by default */
+       struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
        struct ip *ip = mtod(m, struct ip *);
 #if INET6
        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 #endif /* INET6 */
-       
+       int isipv6 = ((flags & PKT_SCF_IPV6) != 0) ? 1 : 0; 
+
        if (!(m->m_flags & M_PKTHDR))
                return;
-       
-       /* 
+
+       /*
         * Here is the precedence:
         * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
         * 2) Traffic class passed via ancillary data to sendmsdg(2)
         * 3) Traffic class socket option last
         */
-       if (soisbackground(so)) {
-               mtc = MBUF_TC_BK;
-       } else if (in_mtc != MBUF_TC_UNSPEC) {
-               if (in_mtc >= MBUF_TC_BE && in_mtc <= MBUF_TC_VO)
-                       mtc = in_mtc;
+       if (in_msc != MBUF_SC_UNSPEC) {
+               if (in_msc >= MBUF_SC_BE && in_msc <= MBUF_SC_CTL)
+                       msc = in_msc;
        } else {
-               switch (so->so_traffic_class) {
-                       case SO_TC_BE:
-                               mtc = MBUF_TC_BE;
-                               break;
-                       case SO_TC_BK:
-                               mtc = MBUF_TC_BK;
-                               break;
-                       case SO_TC_VI:
-                               mtc = MBUF_TC_VI;
-                               break;
-                       case SO_TC_VO:
-                               mtc = MBUF_TC_VO;
-                               break;
-                       default:
-                               break;
-               }
+               VERIFY(SO_VALID_TC(so->so_traffic_class));
+               msc = so_tc2msc(so->so_traffic_class);
+               /* Assert because tc must have been valid */
+               VERIFY(MBUF_VALID_SC(msc));
        }
-       
+
+       /*
+        * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority.
+        */
+       if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc))
+               msc = MBUF_SC_BK;
+
        /*
-        * Set the traffic class in the mbuf packet header prio field
+        * Set the traffic class in the mbuf packet header svc field
         */
-       if ((sotcdb & SOTCDB_NO_MTC))
+       if (sotcdb & SOTCDB_NO_MTC)
                goto no_mbtc;
-       m->m_pkthdr.prio = mtc;
-       
+
+       /* Elevate service class if the packet is a pure TCP ACK.
+        * We can do this only when the flow is not a background
+        * flow and the outgoing interface supports 
+        * transmit-start model.
+        */
+       if (!IS_MBUF_SC_BACKGROUND(msc) && (flags & PKT_SCF_TCP_ACK))
+               msc = MBUF_SC_CTL;
+
+       (void) m_set_service_class(m, msc);
+
+       /*
+        * Set the privileged traffic auxiliary flag if applicable, or clear it.
+        */
+       if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
+           msc != MBUF_SC_UNSPEC)
+               m->m_pkthdr.aux_flags |= MAUXF_PRIO_PRIVILEGED;
+       else
+               m->m_pkthdr.aux_flags &= ~MAUXF_PRIO_PRIVILEGED;
+
 no_mbtc:
        /*
-         * Quick exit when best effort
+        * Quick exit when best effort
         */
-       if (mtc == MBUF_TC_BE)
+       if (msc == MBUF_SC_BE)
                goto no_dscp;
+
        /*
-        * Now let set the DSCP code in IPv4 or IPv6 header
-        * By default do this only for local traffic if a code is not already set
+        * The default behavior is for the networking stack to not set the
+        * DSCP code, based on SOTCDB_NO_DSCP being set.  If the flag is
+        * cleared, set the DSCP code in IPv4 or IPv6 header only for local
+        * traffic, if it is not already set.  <rdar://problem/11277343>
         */
-       if ((sotcdb & SOTCDB_NO_DSCP))
+       if (sotcdb & SOTCDB_NO_DSCP)
                goto no_dscp;
-               
+
        /*
-        * Test if a IP TOS or IPV6 TCLASS has already been set on the socket or the raw packet
+        * Test if a IP TOS or IPV6 TCLASS has already been set
+        * on the socket or the raw packet.
         */
-       if ((sotcdb & SOTCDB_NO_DSCPTST) == 0) {
+       if (!(sotcdb & SOTCDB_NO_DSCPTST)) {
 #if INET6
-               if (isipv6) 
-               {
-                       if ((so->so_type == SOCK_RAW && (ip6->ip6_flow & htonl(0xff << 20)) != 0) ||
-                           (inp->in6p_outputopts && inp->in6p_outputopts->ip6po_tclass != -1))
+               if (isipv6) {
+                       if ((so->so_type == SOCK_RAW &&
+                           (ip6->ip6_flow & htonl(0xff << 20)) != 0) ||
+                           (inp->in6p_outputopts &&
+                           inp->in6p_outputopts->ip6po_tclass != -1))
                                goto no_dscp;
-               } 
-               else 
+               } else
 #endif /* INET6 */
-               {
-                       if ((so->so_type == SOCK_RAW && (inp->inp_flags & INP_HDRINCL)) ||
-                               inp->inp_ip_tos != 0)
-                               goto no_dscp;
-               }
+               if ((so->so_type == SOCK_RAW &&
+                   (inp->inp_flags & INP_HDRINCL)) ||
+                   inp->inp_ip_tos != 0)
+                       goto no_dscp;
        }
-       
+
        /*
         * Test if destination is local
         */
-       if ((sotcdb & SOTCDB_NO_LCLTST) == 0) {
+       if (!(sotcdb & SOTCDB_NO_LCLTST)) {
                int islocal = 0;
-               struct route *ro = &inp->inp_route;
+               struct rtentry *rt = inp->inp_route.ro_rt;
 
                if (so->so_type == SOCK_STREAM) {
-                       struct tcpcb *tp = intotcpcb(inp);
-                       
-                       if ((tp->t_flags & TF_LOCAL))
+                       if (intotcpcb(inp)->t_flags & TF_LOCAL)
                                islocal = 1;
-               }
-               else
-#if INET6
-               if (isipv6) 
-               {
-                       if ((ro != NULL && ro->ro_rt != NULL &&
-                                (ro->ro_rt->rt_gateway->sa_family == AF_LINK ||
-                                 (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))) ||
-                                in6addr_local(&ip6->ip6_dst))
+               } else if (rt != NULL &&
+                   (rt->rt_gateway->sa_family == AF_LINK ||
+                   (rt->rt_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))) {
+                       if (!(rt->rt_ifp->if_flags & IFF_POINTOPOINT))
                                islocal = 1;
-               } 
-               else
+               } else
+#if INET6
+               if (isipv6 && in6addr_local(&ip6->ip6_dst)) {
+                       islocal = 1;
+               } else
 #endif /* INET6 */
-               {
-                       if ((ro != NULL && ro->ro_rt != NULL && 
-                                (ro->ro_rt->rt_gateway->sa_family == AF_LINK ||
-                                 (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))) ||
-                                inaddr_local(ip->ip_dst))
-                               islocal = 1;
+               if (inaddr_local(ip->ip_dst)) {
+                       islocal = 1;
                }
                if (islocal == 0)
                        goto no_dscp;
@@ -812,28 +950,38 @@ no_mbtc:
 
 #if INET6
        if (isipv6)
-               ip6->ip6_flow |=
-                       htonl(dscp_code_from_mbuf_tclass(m->m_pkthdr.prio) << 20);
+               ip6->ip6_flow |= htonl(dscp_code_from_mbuf_tclass(
+                   m_get_traffic_class(m)) << 20);
        else
 #endif /* INET6 */
-               ip->ip_tos |= dscp_code_from_mbuf_tclass(m->m_pkthdr.prio) << 2;
-       
+               ip->ip_tos |= dscp_code_from_mbuf_tclass(
+                   m_get_traffic_class(m)) << 2;
+
 no_dscp:
        /*
         * For TCP with background traffic class switch CC algo based on sysctl
         */
-       if (so->so_type == SOCK_STREAM) {
+       if (so->so_type == SOCK_STREAM)
                set_tcp_stream_priority(so);
-       }
-       
+
+       so_tc_update_stats(m, so, msc);
+}
+
+__private_extern__ void
+so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
+{
+       mbuf_traffic_class_t mtc;
+
        /*
         * Assume socket and mbuf traffic class values are the same
-        * Also assume the socket lock is held
+        * Also assume the socket lock is held.  Note that the stats
+        * at the socket layer are reduced down to the legacy traffic
+        * classes; we could/should potentially expand so_tc_stats[].
         */
+       mtc = MBUF_SC2TC(msc);
+       VERIFY(mtc < SO_TC_STATS_MAX);
        so->so_tc_stats[mtc].txpackets += 1;
        so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
-       
-       return;
 }
 
 __private_extern__ void
@@ -842,9 +990,100 @@ socket_tclass_init(void)
        tclass_lck_grp_attr = lck_grp_attr_alloc_init();
        tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
        tclass_lck_attr = lck_attr_alloc_init();
-       if ((tclass_lock = lck_mtx_alloc_init(tclass_lck_grp, tclass_lck_attr)) == NULL) {
-                       panic("failed to allocate memory for tclass\n");
+       lck_mtx_init(tclass_lock, tclass_lck_grp, tclass_lck_attr);
+}
+
+__private_extern__ mbuf_svc_class_t
+so_tc2msc(int tc)
+{
+       mbuf_svc_class_t msc;
+
+       switch (tc) {
+       case SO_TC_BK_SYS:
+               msc = MBUF_SC_BK_SYS;
+               break;
+       case SO_TC_BK:
+       case _SO_TC_BK:
+               msc = MBUF_SC_BK;
+               break;
+       case SO_TC_BE:
+               msc = MBUF_SC_BE;
+               break;
+       case SO_TC_RD:
+               msc = MBUF_SC_RD;
+               break;
+       case SO_TC_OAM:
+               msc = MBUF_SC_OAM;
+               break;
+       case SO_TC_AV:
+               msc = MBUF_SC_AV;
+               break;
+       case SO_TC_RV:
+               msc = MBUF_SC_RV;
+               break;
+       case SO_TC_VI:
+       case _SO_TC_VI:
+               msc = MBUF_SC_VI;
+               break;
+       case SO_TC_VO:
+       case _SO_TC_VO:
+               msc = MBUF_SC_VO;
+               break;
+       case SO_TC_CTL:
+               msc = MBUF_SC_CTL;
+               break;
+       case SO_TC_ALL:
+       default:
+               msc = MBUF_SC_UNSPEC;
+               break;
        }
+
+       return (msc);
 }
 
+__private_extern__ int
+so_svc2tc(mbuf_svc_class_t svc)
+{
+       switch (svc) {
+       case MBUF_SC_UNSPEC:
+               return SO_TC_BE;
+       case MBUF_SC_BK_SYS:
+               return SO_TC_BK_SYS;
+       case MBUF_SC_BK:
+               return SO_TC_BK;
+       case MBUF_SC_BE:
+               return SO_TC_BE;
+       case MBUF_SC_RD:
+               return SO_TC_RD;
+       case MBUF_SC_OAM:
+               return SO_TC_OAM;
+       case MBUF_SC_AV:
+               return SO_TC_AV;
+       case MBUF_SC_RV:
+               return SO_TC_RV;
+       case MBUF_SC_VI:
+               return SO_TC_VI;
+       case MBUF_SC_VO:
+               return SO_TC_VO;
+       case MBUF_SC_CTL:
+               return SO_TC_CTL;
+       default:
+               return SO_TC_BE;
+       }
+}
+
+/*
+ * LRO is turned on for AV streaming and background classes.
+ */
+static void
+so_set_lro(struct socket *so, int optval)
+{
+       if ((optval == SO_TC_BK) ||
+            (optval == SO_TC_BK_SYS) ||
+           (optval == SO_TC_AV)) {
+               so->so_flags |= SOF_USELRO;
+       } else {
+               so->so_flags &= ~SOF_USELRO;
+       }
+}
 
index 70a0370edd5f548201121c4f17d851cbe513e90d..74100b4a37baacefa8aef1e7b00d3297d5b9b384 100644 (file)
@@ -96,6 +96,8 @@ struct in_ifaddr {
        struct sockaddr_in      ia_sockmask;    /* reserve space for general netmask */
        TAILQ_ENTRY(in_ifaddr)  ia_hash;        /* hash bucket entry */
 };
+
+#define        ifatoia(ifa)    ((struct in_ifaddr *)(void *)(ifa))
 #endif /* XNU_KERNEL_PRIVATE */
 
 struct in_aliasreq {
@@ -458,6 +460,7 @@ do {                                                                        \
 
 struct route;
 struct ip_moptions;
+struct inpcb;
 
 /*
  * Return values for imo_multi_filter().
@@ -470,7 +473,7 @@ struct      ip_moptions;
 extern void in_ifaddr_init(void);
 extern int imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
     const struct sockaddr *, const struct sockaddr *);
-extern int imo_clone(struct ip_moptions *, struct ip_moptions *);
+extern int imo_clone(struct inpcb *, struct inpcb *);
 extern void inm_commit(struct in_multi *);
 extern void inm_clear_recorded(struct in_multi *);
 extern void inm_print(const struct in_multi *);
@@ -498,8 +501,6 @@ extern void in_purgeaddrs(struct ifnet *);
 extern void    imf_leave(struct in_mfilter *);
 extern void    imf_purge(struct in_mfilter *);
 
-struct inpcb;
-
 __private_extern__ int inp_join_group(struct inpcb *, struct sockopt *);
 __private_extern__ int inp_leave_group(struct inpcb *, struct sockopt *);
 __private_extern__ void in_multihead_lock_exclusive(void);
index 600a796d1133632311d1c6b16ca900ffe42f9ccb..1af24caecc1e79be52ba72a02df81e4161e8b694 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -242,7 +242,7 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule)
                                continue;
                        }
                        divsrc.sin_addr =
-                           ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
+                           ((struct sockaddr_in *)(void *) ifa->ifa_addr)->sin_addr;
                        IFA_UNLOCK(ifa);
                        break;
                }
@@ -314,12 +314,12 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 {
        struct inpcb *const inp = sotoinpcb(so);
        struct ip *const ip = mtod(m, struct ip *);
-       struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+       struct sockaddr_in *sin = (struct sockaddr_in *)(void *)addr;
        int error = 0;
-       mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
+       mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
 
        if (control != NULL) {
-               mtc = mbuf_traffic_class_from_control(control);
+               msc = mbuf_service_class_from_control(control);
 
                m_freem(control);               /* XXX */
        }
@@ -357,7 +357,8 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 
        /* Reinject packet into the system as incoming or outgoing */
        if (!sin || sin->sin_addr.s_addr == 0) {
-               struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+               struct ip_out_args ipoa =
+                   { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF };
                struct route ro;
                struct ip_moptions *imo;
 
@@ -381,7 +382,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
                /* Copy the cached route and take an extra reference */
                inp_route_copyout(inp, &ro);
 
-               set_packet_tclass(m, so, mtc, 0);
+               set_packet_service_class(m, so, msc, 0);
 
                imo = inp->inp_moptions;
                if (imo != NULL)
@@ -518,7 +519,7 @@ div_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
        if (nam->sa_family != AF_INET) {
                error = EAFNOSUPPORT;
        } else {
-               ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
+               ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr = INADDR_ANY;
                error = in_pcbbind(inp, nam, p);
        }
        return error;
index 048cff0044275a233be89aeeb96ed1f491ec7c11..5ebcb2a51cbba04fe7f2b2755604dca6c7628d9b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/socketvar.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
-//#include <sys/mcache.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <net/kpi_protocol.h>
+#if DUMMYNET
+#include <net/kpi_protocol.h>
+#endif /* DUMMYNET */
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/ip_var.h>
 
+#include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
+#include <netinet6/ip6_var.h>
+
+static struct ip_fw default_rule;
+
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timer.c)
@@ -211,7 +218,8 @@ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 static lck_grp_t         *dn_mutex_grp;
 static lck_grp_attr_t    *dn_mutex_grp_attr;
 static lck_attr_t        *dn_mutex_attr;
-static lck_mtx_t         *dn_mutex;
+decl_lck_mtx_data(static, dn_mutex_data);
+static lck_mtx_t         *dn_mutex = &dn_mutex_data;
 
 static int config_pipe(struct dn_pipe *p);
 static int ip_dn_ctl(struct sockopt *sopt);
@@ -220,7 +228,6 @@ static void dummynet(void *);
 static void dummynet_flush(void);
 void dummynet_drain(void);
 static ip_dn_io_t dummynet_io;
-static void dn_rule_delete(void *);
 
 int if_tx_rdy(struct ifnet *ifp);
 
@@ -687,11 +694,12 @@ static struct dn_pkt_tag *
 dn_tag_get(struct mbuf *m)
 {
     struct m_tag *mtag = m_tag_first(m);
-/*     KASSERT(mtag != NULL &&
-           mtag->m_tag_id == KERNEL_MODULE_TAG_ID &&
-           mtag->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET,
-           ("packet on dummynet queue w/o dummynet tag!"));
-*/
+
+    if (!(mtag != NULL &&
+          mtag->m_tag_id == KERNEL_MODULE_TAG_ID &&
+          mtag->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET))
+       panic("packet on dummynet queue w/o dummynet tag: %p", m);
+
     return (struct dn_pkt_tag *)(mtag+1);
 }
 
@@ -716,16 +724,16 @@ dn_tag_get(struct mbuf *m)
 static void
 transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
 {
-    struct mbuf *m ;
-    struct dn_pkt_tag *pkt ;
-    u_int64_t schedule_time;
+       struct mbuf *m ;
+       struct dn_pkt_tag *pkt = NULL;
+       u_int64_t schedule_time;
 
        lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED);
-       ASSERT(serialize >= 0);
+        ASSERT(serialize >= 0);
        if (serialize == 0) {
                while ((m = pipe->head) != NULL) {
                        pkt = dn_tag_get(m);
-                       if (!DN_KEY_LEQ(pkt->output_time, curr_time))
+                       if (!DN_KEY_LEQ(pkt->dn_output_time, curr_time))
                                break;
 
                        pipe->head = m->m_nextpkt;
@@ -738,19 +746,19 @@ transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
                
                if (*tail != NULL)
                        (*tail)->m_nextpkt = NULL;
-               }
+       }
 
-               schedule_time = DN_KEY_LEQ(pkt->output_time, curr_time) ?
-                   curr_time+1 : pkt->output_time;
+       schedule_time = pkt == NULL || DN_KEY_LEQ(pkt->dn_output_time, curr_time) ?
+               curr_time + 1 : pkt->dn_output_time;
 
-    /* if there are leftover packets, put the pipe into the heap for next ready event */
-    if ((m = pipe->head) != NULL) {
+       /* if there are leftover packets, put the pipe into the heap for next ready event */
+       if ((m = pipe->head) != NULL) {
                pkt = dn_tag_get(m);
                /* XXX should check errors on heap_insert, by draining the
                 * whole pipe p and hoping in the future we are more successful
                 */
                heap_insert(&extract_heap, schedule_time, pipe);
-    }
+       }
 }
 
 /*
@@ -783,7 +791,7 @@ move_pkt(struct mbuf *pkt, struct dn_flow_queue *q,
     q->len-- ;
     q->len_bytes -= len ;
 
-    dt->output_time = curr_time + p->delay ;
+    dt->dn_output_time = curr_time + p->delay ;
 
     if (p->head == NULL)
        p->head = pkt;
@@ -875,11 +883,11 @@ ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
        int64_t p_numbytes = p->numbytes;
 
        lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED);
-       
+
     if (p->if_name[0] == 0) /* tx clock is simulated */
        p_numbytes += ( curr_time - p->sched_time ) * p->bandwidth;
     else { /* tx clock is for real, the ifq must be empty or this is a NOP */
-       if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
+       if (p->ifp && !IFCQ_IS_EMPTY(&p->ifp->if_snd))
            return ;
        else {
            DPRINTF(("dummynet: pipe %d ready from %s --\n",
@@ -968,7 +976,7 @@ ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
 
        if (p->bandwidth > 0)
            t = ( p->bandwidth -1 - p_numbytes) / p->bandwidth ;
-       dn_tag_get(p->tail)->output_time += t ;
+       dn_tag_get(p->tail)->dn_output_time += t ;
        p->sched_time = curr_time ;
        heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
        /* XXX should check errors on heap_insert, and drain the whole
@@ -1055,7 +1063,7 @@ dummynet(__unused void * unused)
            q->S = q->F + 1 ; /* mark timestamp as invalid */
            pe->sum -= q->fs->weight ;
        }
-       
+
        /* check the heaps to see if there's still stuff in there, and 
         * only set the timer if there are packets to process 
         */
@@ -1097,10 +1105,15 @@ dummynet_send(struct mbuf *m)
                m->m_nextpkt = NULL;
                pkt = dn_tag_get(m);
                
+               DPRINTF(("dummynet_send m: %p dn_dir: %d dn_flags: 0x%x\n",
+                               m, pkt->dn_dir, pkt->dn_flags));
+               
        switch (pkt->dn_dir) {
                case DN_TO_IP_OUT: {
-                       struct route tmp_rt = pkt->ro;
-                       (void)ip_output(m, NULL, &tmp_rt, pkt->flags, NULL, NULL);
+                       struct route tmp_rt = pkt->dn_ro;
+                       /* Force IP_RAWOUTPUT as the IP header is fully formed */
+                       pkt->dn_flags |= IP_RAWOUTPUT | IP_FORWARDING;
+                       (void)ip_output(m, NULL, &tmp_rt, pkt->dn_flags, NULL, NULL);
                        if (tmp_rt.ro_rt) {
                                rtfree(tmp_rt.ro_rt);
                                tmp_rt.ro_rt = NULL;
@@ -1110,7 +1123,22 @@ dummynet_send(struct mbuf *m)
                case DN_TO_IP_IN :
                        proto_inject(PF_INET, m);
                        break ;
-       
+#ifdef INET6
+               case DN_TO_IP6_OUT: {
+                       struct route_in6 ro6;
+
+                       ro6 = pkt->dn_ro6;
+
+                       ip6_output(m, NULL, &ro6, IPV6_FORWARDING, NULL, NULL, NULL);
+
+                       if (ro6.ro_rt)
+                               rtfree(ro6.ro_rt);      
+                       break;
+               }
+               case DN_TO_IP6_IN:
+                       proto_inject(PF_INET6, m);
+                       break;
+#endif /* INET6 */     
                default:
                        printf("dummynet: bad switch %d!\n", pkt->dn_dir);
                        m_freem(m);
@@ -1150,7 +1178,7 @@ if_tx_rdy(struct ifnet *ifp)
     }
     if (p != NULL) {
        DPRINTF(("dummynet: ++ tx rdy from %s%d - qlen %d\n", ifp->if_name,
-               ifp->if_unit, ifp->if_snd.ifq_len));
+               ifp->if_unit, IFCQ_LEN(&ifp->if_snd)));
        p->numbytes = 0 ; /* mark ready for I/O */
        ready_event_wfq(p, &head, &tail);
     }
@@ -1161,11 +1189,12 @@ if_tx_rdy(struct ifnet *ifp)
        
        lck_mtx_unlock(dn_mutex);
 
-       
        /* Send out the de-queued list of ready-to-send packets */
        if (head != NULL) {
                dummynet_send(head);
+               lck_mtx_lock(dn_mutex);
                serialize--;
+               lck_mtx_unlock(dn_mutex);
        }
     return 0;
 }
@@ -1243,41 +1272,84 @@ create_queue(struct dn_flow_set *fs, int i)
  * so that further searches take less time.
  */
 static struct dn_flow_queue *
-find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id)
+find_queue(struct dn_flow_set *fs, struct ip_flow_id *id)
 {
     int i = 0 ; /* we need i and q for new allocations */
     struct dn_flow_queue *q, *prev;
+    int is_v6 = IS_IP6_FLOW_ID(id);
 
     if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) )
        q = fs->rq[0] ;
     else {
-       /* first, do the masking */
-       id->dst_ip &= fs->flow_mask.dst_ip ;
-       id->src_ip &= fs->flow_mask.src_ip ;
+       /* first, do the masking, then hash */
        id->dst_port &= fs->flow_mask.dst_port ;
        id->src_port &= fs->flow_mask.src_port ;
        id->proto &= fs->flow_mask.proto ;
        id->flags = 0 ; /* we don't care about this one */
-       /* then, hash function */
-       i = ( (id->dst_ip) & 0xffff ) ^
-           ( (id->dst_ip >> 15) & 0xffff ) ^
-           ( (id->src_ip << 1) & 0xffff ) ^
-           ( (id->src_ip >> 16 ) & 0xffff ) ^
-           (id->dst_port << 1) ^ (id->src_port) ^
-           (id->proto );
+        if (is_v6) {
+            APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6);
+            APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6);
+            id->flow_id6 &= fs->flow_mask.flow_id6;
+
+            i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^
+                ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^
+                ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^
+                ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^
+
+                ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^
+                ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^
+                ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^
+                ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^
+
+                ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^
+
+                ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^
+
+                (id->dst_port << 1) ^ (id->src_port) ^
+                (id->proto ) ^
+                (id->flow_id6);
+        } else {
+            id->dst_ip &= fs->flow_mask.dst_ip ;
+            id->src_ip &= fs->flow_mask.src_ip ;
+
+            i = ( (id->dst_ip) & 0xffff ) ^
+                ( (id->dst_ip >> 15) & 0xffff ) ^
+                ( (id->src_ip << 1) & 0xffff ) ^
+                ( (id->src_ip >> 16 ) & 0xffff ) ^
+                (id->dst_port << 1) ^ (id->src_port) ^
+                (id->proto );
+        }
        i = i % fs->rq_size ;
        /* finally, scan the current list for a match */
        searches++ ;
        for (prev=NULL, q = fs->rq[i] ; q ; ) {
            search_steps++;
-           if (id->dst_ip == q->id.dst_ip &&
-                   id->src_ip == q->id.src_ip &&
-                   id->dst_port == q->id.dst_port &&
-                   id->src_port == q->id.src_port &&
-                   id->proto == q->id.proto &&
-                   id->flags == q->id.flags)
-               break ; /* found */
-           else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) {
+            if (is_v6 &&
+                    IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) &&
+                    IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) &&
+                    id->dst_port == q->id.dst_port &&
+                    id->src_port == q->id.src_port &&
+                    id->proto == q->id.proto &&
+                    id->flags == q->id.flags &&
+                    id->flow_id6 == q->id.flow_id6)
+                break ; /* found */
+
+            if (!is_v6 && id->dst_ip == q->id.dst_ip &&
+                    id->src_ip == q->id.src_ip &&
+                    id->dst_port == q->id.dst_port &&
+                    id->src_port == q->id.src_port &&
+                    id->proto == q->id.proto &&
+                    id->flags == q->id.flags)
+                break ; /* found */
+
+            /* No match. Check if we can expire the entry */
+           if (pipe_expire && q->head == NULL && q->S == q->F+1 ) {
                /* entry is idle and not in any heap, expire it */
                struct dn_flow_queue *old_q = q ;
 
@@ -1451,9 +1523,9 @@ locate_pipe(int pipe_nr)
  *
  */
 static int
-dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
+dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa, int client)
 {
-       struct mbuf *head = NULL, *tail = NULL;
+    struct mbuf *head = NULL, *tail = NULL;
     struct dn_pkt_tag *pkt;
     struct m_tag *mtag;
     struct dn_flow_set *fs = NULL;
@@ -1464,15 +1536,28 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
     struct timespec ts;
     struct timeval     tv;
     
+    DPRINTF(("dummynet_io m: %p pipe: %d dir: %d client: %d\n",
+               m, pipe_nr, dir, client));
+
+#if IPFIREWALL
 #if IPFW2
-    ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs;
+    if (client == DN_CLIENT_IPFW) {
+        ipfw_insn *cmd = fwa->fwa_ipfw_rule->cmd + fwa->fwa_ipfw_rule->act_ofs;
 
-    if (cmd->opcode == O_LOG)
-       cmd += F_LEN(cmd);
-    is_pipe = (cmd->opcode == O_PIPE);
+        if (cmd->opcode == O_LOG)
+           cmd += F_LEN(cmd);
+        is_pipe = (cmd->opcode == O_PIPE);
+    }
 #else
-    is_pipe = (fwa->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE;
+    if (client == DN_CLIENT_IPFW)
+        is_pipe = (fwa->fwa_ipfw_rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE;
 #endif
+#endif /* IPFIREWALL */
+
+#if DUMMYNET
+    if (client == DN_CLIENT_PF)
+       is_pipe = fwa->fwa_flags == DN_IS_PIPE ? 1 : 0;
+#endif /* DUMMYNET */
 
     pipe_nr &= 0xffff ;
 
@@ -1482,7 +1567,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
          * here we convert secs and usecs to msecs (just divide the 
          * usecs and take the closest whole number).
         */
-        microuptime(&tv);
+    microuptime(&tv);
        curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
        
    /*
@@ -1511,7 +1596,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
            goto dropit ;
        }
     }
-    q = find_queue(fs, &(fwa->f_id));
+    q = find_queue(fs, &(fwa->fwa_id));
     if ( q == NULL )
        goto dropit ;           /* cannot allocate queue                */
     /*
@@ -1542,28 +1627,70 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
     bzero(pkt, sizeof(struct dn_pkt_tag));
     /* ok, i can handle the pkt now... */
     /* build and enqueue packet + parameters */
-    pkt->rule = fwa->rule ;
+    /*
+     * PF is checked before ipfw so remember ipfw rule only when
+     * the caller is ipfw. When the caller is PF, fwa_ipfw_rule
+     * is a fake rule just used for convenience
+     */
+    if (client == DN_CLIENT_IPFW)
+       pkt->dn_ipfw_rule = fwa->fwa_ipfw_rule;
+    pkt->dn_pf_rule = fwa->fwa_pf_rule;
     pkt->dn_dir = dir ;
+    pkt->dn_client = client;
 
-    pkt->ifp = fwa->oif;
+    pkt->dn_ifp = fwa->fwa_oif;
     if (dir == DN_TO_IP_OUT) {
-       /*
-        * We need to copy *ro because for ICMP pkts (and maybe others)
-        * the caller passed a pointer into the stack; dst might also be
-        * a pointer into *ro so it needs to be updated.
-        */
-       pkt->ro = *(fwa->ro);
-       if (fwa->ro->ro_rt)
-               RT_ADDREF(fwa->ro->ro_rt);
-
-       if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */
-           fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ;
-
-       bcopy (fwa->dst, &pkt->dn_dst, sizeof(pkt->dn_dst));
-       pkt->flags = fwa->flags;
-       if (fwa->ipoa != NULL)
-               pkt->ipoa = *(fwa->ipoa);
-       }
+               /*
+                * We need to copy *ro because for ICMP pkts (and maybe others)
+                * the caller passed a pointer into the stack; dst might also be
+                * a pointer into *ro so it needs to be updated.
+                */
+               if (fwa->fwa_ro) {
+                       pkt->dn_ro = *(fwa->fwa_ro);
+                       if (fwa->fwa_ro->ro_rt)
+                               RT_ADDREF(fwa->fwa_ro->ro_rt);
+               }
+               if (fwa->fwa_dst) {
+                       if (fwa->fwa_dst == (struct sockaddr_in *)&fwa->fwa_ro->ro_dst) /* dst points into ro */
+                               fwa->fwa_dst = (struct sockaddr_in *)&(pkt->dn_ro.ro_dst) ;
+       
+                       bcopy (fwa->fwa_dst, &pkt->dn_dst, sizeof(pkt->dn_dst));
+               }
+    } else if (dir == DN_TO_IP6_OUT) {
+               if (fwa->fwa_ro6) {
+                       pkt->dn_ro6 = *(fwa->fwa_ro6);
+                       if (fwa->fwa_ro6->ro_rt)
+                               RT_ADDREF(fwa->fwa_ro6->ro_rt);
+               }
+               if (fwa->fwa_ro6_pmtu) {
+                       pkt->dn_ro6_pmtu = *(fwa->fwa_ro6_pmtu);
+                       if (fwa->fwa_ro6_pmtu->ro_rt)
+                               RT_ADDREF(fwa->fwa_ro6_pmtu->ro_rt);
+               }
+               if (fwa->fwa_dst6) {
+                       if (fwa->fwa_dst6 == (struct sockaddr_in6 *)&fwa->fwa_ro6->ro_dst) /* dst points into ro */
+                               fwa->fwa_dst6 = (struct sockaddr_in6 *)&(pkt->dn_ro6.ro_dst) ;
+       
+                       bcopy (fwa->fwa_dst6, &pkt->dn_dst6, sizeof(pkt->dn_dst6));
+               }
+               pkt->dn_origifp = fwa->fwa_origifp;
+               pkt->dn_mtu = fwa->fwa_mtu;
+               pkt->dn_alwaysfrag = fwa->fwa_alwaysfrag;
+               pkt->dn_unfragpartlen = fwa->fwa_unfragpartlen;
+               if (fwa->fwa_exthdrs) {
+                       bcopy (fwa->fwa_exthdrs, &pkt->dn_exthdrs, sizeof(pkt->dn_exthdrs));
+                       /* 
+                        * Need to zero out the source structure so the mbufs
+                        * won't be freed by ip6_output()
+                        */ 
+                       bzero(fwa->fwa_exthdrs, sizeof(struct ip6_exthdrs));
+               }
+    }
+    if (dir == DN_TO_IP_OUT || dir == DN_TO_IP6_OUT) {
+               pkt->dn_flags = fwa->fwa_oflags;
+               if (fwa->fwa_ipoa != NULL)
+                       pkt->dn_ipoa = *(fwa->fwa_ipoa);
+    }
     if (q->head == NULL)
        q->head = m;
     else
@@ -1587,7 +1714,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
        if (pipe->bandwidth)
            t = SET_TICKS(m, q, pipe);
        q->sched_time = curr_time ;
-       if (t == 0)     /* must process it now */
+       if (t == 0)     /* must process it now */
            ready_event( q , &head, &tail );
        else
            heap_insert(&ready_heap, curr_time + t , q );
@@ -1653,9 +1780,10 @@ done:
        }
 
        lck_mtx_unlock(dn_mutex);
-
-       if (head != NULL)
+       
+       if (head != NULL) {
                dummynet_send(head);
+       }
 
     return 0;
 
@@ -1675,9 +1803,9 @@ dropit:
        struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \
        if (tag) {                                              \
                struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1);    \
-               if (n->ro.ro_rt != NULL) {                      \
-                       rtfree(n->ro.ro_rt);                    \
-                       n->ro.ro_rt = NULL;                     \
+               if (n->dn_ro.ro_rt != NULL) {                   \
+                       rtfree(n->dn_ro.ro_rt);                 \
+                       n->dn_ro.ro_rt = NULL;                  \
                }                                               \
        }                                                       \
        m_tag_delete(_m, tag);                                  \
@@ -1761,9 +1889,11 @@ dummynet_flush(void)
 
        lck_mtx_lock(dn_mutex);
 
-    /* remove all references to pipes ...*/
-    flush_pipe_ptrs(NULL);
+#if IPFW2
+       /* remove all references to pipes ...*/
+       flush_pipe_ptrs(NULL);
+#endif /* IPFW2 */
+
        /* Free heaps so we don't have unwanted events. */
        heap_free(&ready_heap);
        heap_free(&wfq_ready_heap);
@@ -1789,9 +1919,8 @@ dummynet_flush(void)
 }
 
 
-extern struct ip_fw *ip_fw_default_rule ;
 static void
-dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
+dn_ipfw_rule_delete_fs(struct dn_flow_set *fs, void *r)
 {
     int i ;
     struct dn_flow_queue *q ;
@@ -1801,8 +1930,8 @@ dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
        for (q = fs->rq[i] ; q ; q = q->next )
            for (m = q->head ; m ; m = m->m_nextpkt ) {
                struct dn_pkt_tag *pkt = dn_tag_get(m) ;
-               if (pkt->rule == r)
-                   pkt->rule = ip_fw_default_rule ;
+               if (pkt->dn_ipfw_rule == r)
+                   pkt->dn_ipfw_rule = &default_rule ;
            }
 }
 /*
@@ -1810,7 +1939,7 @@ dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
  * from packets matching this rule.
  */
 void
-dn_rule_delete(void *r)
+dn_ipfw_rule_delete(void *r)
 {
     struct dn_pipe *p ;
     struct dn_flow_set *fs ;
@@ -1827,16 +1956,16 @@ dn_rule_delete(void *r)
      */
     for (i = 0; i < HASHSIZE; i++)
        SLIST_FOREACH(fs, &flowsethash[i], next)
-               dn_rule_delete_fs(fs, r);
+               dn_ipfw_rule_delete_fs(fs, r);
 
     for (i = 0; i < HASHSIZE; i++)
        SLIST_FOREACH(p, &pipehash[i], next) {
                fs = &(p->fs);
-               dn_rule_delete_fs(fs, r);
+               dn_ipfw_rule_delete_fs(fs, r);
                for (m = p->head ; m ; m = m->m_nextpkt ) {
                        pkt = dn_tag_get(m);
-                       if (pkt->rule == r)
-                               pkt->rule = ip_fw_default_rule;
+                       if (pkt->dn_ipfw_rule == r)
+                               pkt->dn_ipfw_rule = &default_rule;
                }
        }
        lck_mtx_unlock(dn_mutex);
@@ -1933,9 +2062,9 @@ set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
            x->qsize = 1024*1024 ;
     } else {
        if (x->qsize == 0)
-           x->qsize = 50;
+           x->qsize = 50 ;
        if (x->qsize > 100)
-           x->qsize = 50;
+           x->qsize = 50 ;
     }
     /* configuring RED */
     if ( x->flags_fs & DN_IS_RED )
@@ -2161,8 +2290,10 @@ delete_pipe(struct dn_pipe *p)
        /* Unlink from list of pipes. */
        SLIST_REMOVE(&pipehash[HASH(b->pipe_nr)], b, dn_pipe, next);
 
+#if IPFW2
        /* remove references to this pipe from the ip_fw rules. */
        flush_pipe_ptrs(&(b->fs));
+#endif /* IPFW2 */
 
        /* Remove all references to this pipe from flow_sets. */
        for (i = 0; i < HASHSIZE; i++)
@@ -2193,8 +2324,10 @@ delete_pipe(struct dn_pipe *p)
            return EINVAL ; /* not found */
        }
 
+#if IPFW2
        /* remove references to this flow_set from the ip_fw rules. */
        flush_pipe_ptrs(b);
+#endif /* IPFW2 */
 
        /* Unlink from list of flowsets. */
        SLIST_REMOVE( &flowsethash[HASH(b->fs_nr)], b, dn_flow_set, next);
@@ -2443,21 +2576,30 @@ ip_dn_init(void)
        dn_mutex_grp_attr = lck_grp_attr_alloc_init();
        dn_mutex_grp = lck_grp_alloc_init("dn", dn_mutex_grp_attr);
        dn_mutex_attr = lck_attr_alloc_init();
-
-       if ((dn_mutex = lck_mtx_alloc_init(dn_mutex_grp, dn_mutex_attr)) == NULL) {
-               printf("ip_dn_init: can't alloc dn_mutex\n");
-               return;
-       }
+       lck_mtx_init(dn_mutex, dn_mutex_grp, dn_mutex_attr);
 
        ready_heap.size = ready_heap.elements = 0 ;
-    ready_heap.offset = 0 ;
+       ready_heap.offset = 0 ;
+
+       wfq_ready_heap.size = wfq_ready_heap.elements = 0 ;
+       wfq_ready_heap.offset = 0 ;
 
-    wfq_ready_heap.size = wfq_ready_heap.elements = 0 ;
-    wfq_ready_heap.offset = 0 ;
+       extract_heap.size = extract_heap.elements = 0 ;
+       extract_heap.offset = 0 ;
+       ip_dn_ctl_ptr = ip_dn_ctl;
+       ip_dn_io_ptr = dummynet_io;
 
-    extract_heap.size = extract_heap.elements = 0 ;
-    extract_heap.offset = 0 ;
-    ip_dn_ctl_ptr = ip_dn_ctl;
-    ip_dn_io_ptr = dummynet_io;
-    ip_dn_ruledel_ptr = dn_rule_delete;
+       bzero(&default_rule, sizeof default_rule);
+       
+       default_rule.act_ofs = 0;
+       default_rule.rulenum = IPFW_DEFAULT_RULE;
+       default_rule.cmd_len = 1;
+       default_rule.set = RESVD_SET;
+
+       default_rule.cmd[0].len = 1;
+       default_rule.cmd[0].opcode = 
+#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
+                                1 ? O_ACCEPT :
+#endif
+                                O_DENY;
 }
index e5dd1f337358232bc3cfb38e996eb647410bad74..b55a36b93817c016a2b9183095acd64b34b41e8a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/appleapiopts.h>
 
 #ifdef PRIVATE
+
+#include <netinet/ip_flowid.h>
+
+/* Apply ipv6 mask on ipv6 addr */
+#define APPLY_MASK(addr,mask)                          \
+    (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \
+    (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \
+    (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \
+    (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3];
+
 /*
  * Definition of dummynet data structures. In the structures, I decided
  * not to use the macros in <sys/queue.h> in the hope of making the code
@@ -147,20 +157,48 @@ struct dn_heap {
  */
 #ifdef KERNEL
 #include <netinet/ip_var.h>    /* for ip_out_args */
+#include <netinet/ip6.h>       /* for ip6_out_args */
+#include <netinet6/ip6_var.h>  /* for ip6_out_args */
 
 struct dn_pkt_tag {
-    struct ip_fw *rule;                /* matching rule */
-    int dn_dir;                        /* action when packet comes out. */
+    struct ip_fw       *dn_ipfw_rule;          /* matching IPFW rule */
+    void               *dn_pf_rule;            /* matching PF rule */
+    int                        dn_dir;                 /* action when packet comes out. */
 #define DN_TO_IP_OUT   1
 #define DN_TO_IP_IN    2
 #define DN_TO_BDG_FWD  3
-
-    dn_key output_time;                /* when the pkt is due for delivery     */
-    struct ifnet *ifp;         /* interface, for ip_output             */
-    struct sockaddr_in dn_dst ;
-    struct route ro;           /* route, for ip_output. MUST COPY      */
-    int flags ;                        /* flags, for ip_output (IPv6 ?)        */
-    struct ip_out_args ipoa;   /* output args, for ip_output. MUST COPY */
+#define DN_TO_IP6_IN    4
+#define DN_TO_IP6_OUT   5
+    dn_key             dn_output_time;         /* when the pkt is due for delivery     */
+    struct ifnet       *dn_ifp;                /* interface, for ip[6]_output          */
+    union {
+       struct sockaddr_in      _dn_dst;
+       struct sockaddr_in6     _dn_dst6 ;
+    }                  dn_dst_;
+#define dn_dst dn_dst_._dn_dst
+#define dn_dst6 dn_dst_._dn_dst6
+    union {
+       struct route            _dn_ro;         /* route, for ip_output. MUST COPY      */
+       struct route_in6        _dn_ro6;        /* route, for ip6_output. MUST COPY     */
+       }               dn_ro_;
+#define dn_ro dn_ro_._dn_ro
+#define dn_ro6 dn_ro_._dn_ro6
+    struct route_in6   dn_ro6_pmtu;            /* for ip6_output */
+    struct ifnet       *dn_origifp;            /* for ip6_output */
+    u_int32_t          dn_mtu;                 /* for ip6_output */
+    int                        dn_alwaysfrag;          /* for ip6_output */
+    u_int32_t          dn_unfragpartlen;       /* for ip6_output */
+    struct ip6_exthdrs         dn_exthdrs;             /* for ip6_output */
+    int                        dn_flags ;              /* flags, for ip[6]_output */
+    int                        dn_client;
+#define DN_CLIENT_IPFW 1
+#define DN_CLIENT_PF   2
+    union {
+       struct ip_out_args      _dn_ipoa;       /* output args, for ip_output. MUST COPY */
+       struct ip6_out_args     _dn_ip6oa;      /* output args, for ip_output. MUST COPY */
+    }                  dn_ipoa_;
+#define dn_ipoa dn_ipoa_._dn_ipoa
+#define dn_ip6oa dn_ipoa_._dn_ip6oa
 };
 #else
 struct dn_pkt;
@@ -236,7 +274,7 @@ flow using a number of heaps defined into the pipe itself.
  */
 struct dn_flow_queue {
     struct dn_flow_queue *next ;
-    struct ipfw_flow_id id ;
+    struct ip_flow_id id ;
 
     struct mbuf *head, *tail ; /* queue of packets */
     u_int len ;
@@ -299,7 +337,7 @@ struct dn_flow_set {
     int qsize ;                        /* queue size in slots or bytes */
     int plr ;                  /* pkt loss rate (2^31-1 means 100%) */
 
-    struct ipfw_flow_id flow_mask ;
+    struct ip_flow_id flow_mask ;
 
     /* hash table of queues onto this flow_set */
     int rq_size ;              /* number of slots */
@@ -384,12 +422,11 @@ SLIST_HEAD(dn_pipe_head, dn_pipe);
 void ip_dn_init(void); /* called from raw_ip.c:load_ipfw() */
 
 typedef        int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */
-typedef        void ip_dn_ruledel_t(void *); /* ip_fw.c */
 typedef        int ip_dn_io_t(struct mbuf *m, int pipe_nr, int dir,
-       struct ip_fw_args *fwa);
+       struct ip_fw_args *fwa, int );
 extern ip_dn_ctl_t *ip_dn_ctl_ptr;
-extern ip_dn_ruledel_t *ip_dn_ruledel_ptr;
 extern ip_dn_io_t *ip_dn_io_ptr;
+void dn_ipfw_rule_delete(void *);
 #define        DUMMYNET_LOADED (ip_dn_io_ptr != NULL)
 
 #pragma pack(4)
@@ -403,7 +440,7 @@ struct dn_heap_32 {
 
 struct dn_flow_queue_32 {
     user32_addr_t next ;
-    struct ipfw_flow_id id ;
+    struct ip_flow_id id ;
 
     user32_addr_t head, tail ; /* queue of packets */
     u_int len ;
@@ -454,7 +491,7 @@ struct dn_flow_set_32 {
     int qsize ;                        /* queue size in slots or bytes */
     int plr ;                  /* pkt loss rate (2^31-1 means 100%) */
        
-    struct ipfw_flow_id flow_mask ;
+    struct ip_flow_id flow_mask ;
        
     /* hash table of queues onto this flow_set */
     int rq_size ;              /* number of slots */
@@ -528,7 +565,7 @@ struct dn_heap_64 {
 
 struct dn_flow_queue_64 {
     user64_addr_t next ;
-    struct ipfw_flow_id id ;
+    struct ip_flow_id id ;
 
     user64_addr_t head, tail ; /* queue of packets */
     u_int len ;
@@ -579,7 +616,7 @@ struct dn_flow_set_64 {
     int qsize ;                        /* queue size in slots or bytes */
     int plr ;                  /* pkt loss rate (2^31-1 means 100%) */
        
-    struct ipfw_flow_id flow_mask ;
+    struct ip_flow_id flow_mask ;
        
     /* hash table of queues onto this flow_set */
     int rq_size ;              /* number of slots */
@@ -654,7 +691,7 @@ ip_dn_claim_rule(struct mbuf *m)
                                                                          KERNEL_TAG_TYPE_DUMMYNET, NULL);
        if (mtag != NULL) {
                mtag->m_tag_type = KERNEL_TAG_TYPE_NONE;
-               return (((struct dn_pkt_tag *)(mtag+1))->rule);
+               return (((struct dn_pkt_tag *)(mtag+1))->dn_ipfw_rule);
        } else
                return (NULL);
 }
index 0d487326a28b0c1d3b3908bf2dd608e0ada99894..db393938ff605240536e6a357bec5dee4a2d860f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000,2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -88,6 +88,7 @@
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/errno.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
@@ -171,6 +172,9 @@ encap4_input(m, off)
        va_end(ap);
 #endif
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip = mtod(m, struct ip *);
 #ifdef __APPLE__
        proto = ip->ip_p;
@@ -268,8 +272,10 @@ encap6_input(struct mbuf **mp, int *offp, int proto)
        struct encaptab *ep, *match;
        int prio, matchprio;
 
-       ip6 = mtod(m, struct ip6_hdr *);
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
+       ip6 = mtod(m, struct ip6_hdr *);
        bzero(&s, sizeof(s));
        s.sin6_family = AF_INET6;
        s.sin6_len = sizeof(struct sockaddr_in6);
diff --git a/bsd/netinet/ip_flowid.h b/bsd/netinet/ip_flowid.h
new file mode 100644 (file)
index 0000000..1fe2103
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef __IP_FLOWID_H__
+#define __IP_FLOWID_H__
+
+#include <sys/types.h>
+#include <netinet/in.h>
+
+/*
+ * This structure is used as a flow mask and a flow id for various
+ * parts of the code.
+ */
+struct ip_flow_id {
+       u_int32_t       dst_ip;
+       u_int32_t       src_ip;
+       u_int16_t       dst_port;
+       u_int16_t       src_port;
+       u_int8_t        proto;
+       u_int8_t        flags;  /* protocol-specific flags */
+       u_int8_t        addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */
+       struct in6_addr dst_ip6;        /* could also store MAC addr! */
+       struct in6_addr src_ip6;
+       u_int32_t       flow_id6;
+       u_int32_t       frag_id6;
+};
+
+#define IS_IP6_FLOW_ID(id)      ((id)->addr_type == 6)
+
+#ifdef KERNEL
+struct route_in6;
+struct sockaddr_in6;
+struct pf_rule;
+struct ip_fw;
+
+/*
+ * Arguments for calling ipfw_chk() and dummynet_io(). We put them
+ * all into a structure because this way it is easier and more
+ * efficient to pass variables around and extend the interface.
+ */
+struct ip_fw_args {
+       struct mbuf             *fwa_m;         /* the mbuf chain               */
+       struct ifnet            *fwa_oif;       /* output interface             */
+       struct sockaddr_in      *fwa_next_hop;  /* forward address              */
+       struct ip_fw            *fwa_ipfw_rule; /* matching IPFW rule           */
+       struct pf_rule          *fwa_pf_rule;   /* matching PF rule             */
+       struct ether_header     *fwa_eh;        /* for bridged packets          */
+       int                     fwa_flags;      /* for dummynet                 */
+       int                     fwa_oflags;     /* for dummynet         */
+       union {
+               struct ip_out_args  *_fwa_ipoa;     /* for dummynet                */
+               struct ip6_out_args *_fwa_ip6oa;    /* for dummynet               */
+       } fwa_ipoa_;
+       union {
+               struct route        *_fwa_ro;       /* for dummynet         */
+               struct route_in6    *_fwa_ro6;      /* for dummynet         */
+       } fwa_ro_;
+       union {
+               struct sockaddr_in  *_fwa_dst;      /* for dummynet         */
+               struct sockaddr_in6 *_fwa_dst6;     /* for IPv6 dummynet         */
+       } fwa_dst_;
+       struct route_in6        *fwa_ro6_pmtu;  /* for IPv6 output */
+       struct ifnet            *fwa_origifp;   /* for IPv6 output */
+       u_int32_t               fwa_mtu;        /* for IPv6 output */
+       int                     fwa_alwaysfrag; /* for IPv6 output */
+       u_int32_t               fwa_unfragpartlen;  /* for IPv6 output */
+       struct ip6_exthdrs      *fwa_exthdrs;   /* for IPv6 output */
+       struct ip_flow_id       fwa_id;         /* grabbed from IP header       */
+       u_int16_t               fwa_divert_rule;/* divert cookie                */
+       u_int32_t               fwa_cookie;
+};
+#define fwa_ipoa fwa_ipoa_._fwa_ipoa
+#define fwa_ip6oa fwa_ipoa_._fwa_ip6oa
+#define fwa_ro fwa_ro_._fwa_ro
+#define fwa_ro6 fwa_ro_._fwa_ro6
+#define fwa_dst fwa_dst_._fwa_dst
+#define fwa_dst6 fwa_dst_._fwa_dst6
+
+#endif /* KERNEL */
+
+#endif /* __IP_FLOWID_H__ */
index bb66be5d7b64b5197be9afae9fa9913802846b4e..a8422fa31feb1040923006531cfc8e340715f3db 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -71,6 +71,7 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
@@ -79,6 +80,7 @@
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <sys/kern_event.h>
+#include <sys/kauth.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -140,8 +142,6 @@ int fw_verbose;
 static int verbose_limit;
 extern int fw_bypass;
 
-#define        IPFW_DEFAULT_RULE       65535
-
 #define IPFW_RULE_INACTIVE 1
 
 /*
@@ -301,14 +301,11 @@ static ip_fw_chk_t        ipfw_chk;
 lck_grp_t         *ipfw_mutex_grp;
 lck_grp_attr_t    *ipfw_mutex_grp_attr;
 lck_attr_t        *ipfw_mutex_attr;
-lck_mtx_t         *ipfw_mutex;
+decl_lck_mtx_data(,ipfw_mutex_data);
+lck_mtx_t         *ipfw_mutex = &ipfw_mutex_data;
 
 extern  void    ipfwsyslog( int level, const char *format,...);
 
-#if DUMMYNET
-ip_dn_ruledel_t *ip_dn_ruledel_ptr = NULL;     /* hook into dummynet */
-#endif /* DUMMYNET */
-
 #define KEV_LOG_SUBCLASS 10
 #define IPFWLOGEVENT    0
 
@@ -350,7 +347,10 @@ void    ipfwsyslog( int level, const char *format,...)
         ev_msg.event_code         = IPFWLOGEVENT;
 
        /* get rid of the trailing \n */
-       dptr[loglen-1] = 0;
+       if (loglen < msgsize)
+               dptr[loglen-1] = 0;
+       else
+               dptr[msgsize-1] = 0;
 
         pri = LOG_PRI(level);
 
@@ -693,6 +693,19 @@ copyfrom64fw( struct ip_fw_64 *fw64, struct ip_fw *user_ip_fw, size_t copysize)
        return( sizeof(struct ip_fw) + cmdsize - 4);
 }
 
+void
+externalize_flow_id(struct ipfw_flow_id *dst, struct ip_flow_id *src);
+void
+externalize_flow_id(struct ipfw_flow_id *dst, struct ip_flow_id *src)
+{
+       dst->dst_ip = src->dst_ip;
+       dst->src_ip = src->src_ip;
+       dst->dst_port = src->dst_port;
+       dst->src_port = src->src_port;
+       dst->proto = src->proto;
+       dst->flags = src->flags;
+}
+
 static
 void cp_dyn_to_comp_32( struct ipfw_dyn_rule_compat_32 *dyn_rule_vers1, int *len)
 {
@@ -704,8 +717,8 @@ void cp_dyn_to_comp_32( struct ipfw_dyn_rule_compat_32 *dyn_rule_vers1, int *len
                for (i = 0; i < curr_dyn_buckets; i++) {
                        for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next) {
                                dyn_rule_vers1->chain = (user32_addr_t)(p->rule->rulenum);
-                               dyn_rule_vers1->id = p->id;
-                               dyn_rule_vers1->mask = p->id;
+                               externalize_flow_id(&dyn_rule_vers1->id, &p->id);
+                               externalize_flow_id(&dyn_rule_vers1->mask, &p->id);
                                dyn_rule_vers1->type = p->dyn_type;
                                dyn_rule_vers1->expire = p->expire;
                                dyn_rule_vers1->pcnt = p->pcnt;
@@ -739,8 +752,8 @@ void cp_dyn_to_comp_64( struct ipfw_dyn_rule_compat_64 *dyn_rule_vers1, int *len
                for (i = 0; i < curr_dyn_buckets; i++) {
                        for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next) {
                                dyn_rule_vers1->chain = (user64_addr_t) p->rule->rulenum;
-                               dyn_rule_vers1->id = p->id;
-                               dyn_rule_vers1->mask = p->id;
+                               externalize_flow_id(&dyn_rule_vers1->id, &p->id);
+                               externalize_flow_id(&dyn_rule_vers1->mask, &p->id);
                                dyn_rule_vers1->type = p->dyn_type;
                                dyn_rule_vers1->expire = p->expire;
                                dyn_rule_vers1->pcnt = p->pcnt;
@@ -1239,7 +1252,7 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh,
  * and we want to find both in the same bucket.
  */
 static __inline int
-hash_packet(struct ipfw_flow_id *id)
+hash_packet(struct ip_flow_id *id)
 {
        u_int32_t i;
 
@@ -1355,7 +1368,7 @@ next:
  * lookup a dynamic rule.
  */
 static ipfw_dyn_rule *
-lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
+lookup_dyn_rule(struct ip_flow_id *pkt, int *match_direction,
        struct tcphdr *tcp)
 {
        /*
@@ -1527,7 +1540,7 @@ realloc_dynamic_table(void)
  * - "parent" rules for the above (O_LIMIT_PARENT).
  */
 static ipfw_dyn_rule *
-add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
+add_dyn_rule(struct ip_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
 {
        ipfw_dyn_rule *r;
        int i;
@@ -1585,7 +1598,7 @@ add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
  * If the lookup fails, then install one.
  */
 static ipfw_dyn_rule *
-lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule)
+lookup_dyn_parent(struct ip_flow_id *pkt, struct ip_fw *rule)
 {
        ipfw_dyn_rule *q;
        int i;
@@ -1629,10 +1642,10 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
 
        DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n",
            cmd->o.opcode,
-           (args->f_id.src_ip), (args->f_id.src_port),
-           (args->f_id.dst_ip), (args->f_id.dst_port) );)
+           (args->fwa_id.src_ip), (args->fwa_id.src_port),
+           (args->fwa_id.dst_ip), (args->fwa_id.dst_port) );)
 
-       q = lookup_dyn_rule(&args->f_id, NULL, NULL);
+       q = lookup_dyn_rule(&args->fwa_id, NULL, NULL);
 
        if (q != NULL) { /* should never occur */
                if (last_log != timenow.tv_sec) {
@@ -1658,13 +1671,13 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
 
        switch (cmd->o.opcode) {
        case O_KEEP_STATE: /* bidir rule */
-               add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
+               add_dyn_rule(&args->fwa_id, O_KEEP_STATE, rule);
                break;
 
        case O_LIMIT: /* limit number of sessions */
            {
                u_int16_t limit_mask = cmd->limit_mask;
-               struct ipfw_flow_id id;
+               struct ip_flow_id id;
                ipfw_dyn_rule *parent;
 
                DEB(printf("ipfw: installing dyn-limit rule %d\n",
@@ -1672,16 +1685,16 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
 
                id.dst_ip = id.src_ip = 0;
                id.dst_port = id.src_port = 0;
-               id.proto = args->f_id.proto;
+               id.proto = args->fwa_id.proto;
 
                if (limit_mask & DYN_SRC_ADDR)
-                       id.src_ip = args->f_id.src_ip;
+                       id.src_ip = args->fwa_id.src_ip;
                if (limit_mask & DYN_DST_ADDR)
-                       id.dst_ip = args->f_id.dst_ip;
+                       id.dst_ip = args->fwa_id.dst_ip;
                if (limit_mask & DYN_SRC_PORT)
-                       id.src_port = args->f_id.src_port;
+                       id.src_port = args->fwa_id.src_port;
                if (limit_mask & DYN_DST_PORT)
-                       id.dst_port = args->f_id.dst_port;
+                       id.dst_port = args->fwa_id.dst_port;
                parent = lookup_dyn_parent(&id, rule);
                if (parent == NULL) {
                        printf("ipfw: add parent failed\n");
@@ -1701,14 +1714,14 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
                                return 1;
                        }
                }
-               add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
+               add_dyn_rule(&args->fwa_id, O_LIMIT, (struct ip_fw *)parent);
            }
                break;
        default:
                printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode);
                return 1;
        }
-       lookup_dyn_rule(&args->f_id, NULL, NULL); /* XXX just set lifetime */
+       lookup_dyn_rule(&args->fwa_id, NULL, NULL); /* XXX just set lifetime */
        return 0;
 }
 
@@ -1719,7 +1732,7 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
  * Otherwise we are sending a keepalive, and flags & TH_
  */
 static struct mbuf *
-send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags)
+send_pkt(struct ip_flow_id *id, u_int32_t seq, u_int32_t ack, int flags)
 {
        struct mbuf *m;
        struct ip *ip;
@@ -1803,35 +1816,35 @@ send_reject(struct ip_fw_args *args, int code, int offset, __unused int ip_len)
 
        if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
                /* We need the IP header in host order for icmp_error(). */
-               if (args->eh != NULL) {
-                       struct ip *ip = mtod(args->m, struct ip *);
+               if (args->fwa_eh != NULL) {
+                       struct ip *ip = mtod(args->fwa_m, struct ip *);
                        ip->ip_len = ntohs(ip->ip_len);
                        ip->ip_off = ntohs(ip->ip_off);
                }
-               args->m->m_flags |= M_SKIP_FIREWALL;
-               icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
-       } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
+               args->fwa_m->m_flags |= M_SKIP_FIREWALL;
+               icmp_error(args->fwa_m, ICMP_UNREACH, code, 0L, 0);
+       } else if (offset == 0 && args->fwa_id.proto == IPPROTO_TCP) {
                struct tcphdr *const tcp =
-                   L3HDR(struct tcphdr, mtod(args->m, struct ip *));
+                   L3HDR(struct tcphdr, mtod(args->fwa_m, struct ip *));
                if ( (tcp->th_flags & TH_RST) == 0) {
                        struct mbuf *m;
-                       
-                       m = send_pkt(&(args->f_id), ntohl(tcp->th_seq),
+
+                       m = send_pkt(&(args->fwa_id), ntohl(tcp->th_seq),
                                ntohl(tcp->th_ack),
                                tcp->th_flags | TH_RST);
                        if (m != NULL) {
                                struct route sro;       /* fake route */
-                               
+
                                bzero (&sro, sizeof (sro));
                                ip_output_list(m, 0, NULL, &sro, 0, NULL, NULL);
                                if (sro.ro_rt)
                                        RTFREE(sro.ro_rt);
                        }
                }
-               m_freem(args->m);
+               m_freem(args->fwa_m);
        } else
-               m_freem(args->m);
-       args->m = NULL;
+               m_freem(args->fwa_m);
+       args->fwa_m = NULL;
 }
 
 /**
@@ -1877,18 +1890,18 @@ lookup_next_rule(struct ip_fw *me)
  *
  * Parameters:
  *
- *     args->m (in/out) The packet; we set to NULL when/if we nuke it.
+ *     args->fwa_m     (in/out) The packet; we set to NULL when/if we nuke it.
  *             Starts with the IP header.
- *     args->eh (in)   Mac header if present, or NULL for layer3 packet.
- *     args->oif       Outgoing interface, or NULL if packet is incoming.
+ *     args->fwa_eh (in)       Mac header if present, or NULL for layer3 packet.
+ *     args->fwa_oif   Outgoing interface, or NULL if packet is incoming.
  *             The incoming interface is in the mbuf. (in)
- *     args->divert_rule (in/out)
+ *     args->fwa_divert_rule (in/out)
  *             Skip up to the first rule past this rule number;
  *             upon return, non-zero port number for divert or tee.
  *
- *     args->rule      Pointer to the last matching rule (in/out)
- *     args->next_hop  Socket we are forwarding to (out).
- *     args->f_id      Addresses grabbed from the packet (out)
+ *     args->fwa_ipfw_rule     Pointer to the last matching rule (in/out)
+ *     args->fwa_next_hop      Socket we are forwarding to (out).
+ *     args->fwa_id    Addresses grabbed from the packet (out)
  *
  * Return value:
  *
@@ -1917,10 +1930,10 @@ ipfw_chk(struct ip_fw_args *args)
         * the implementation of the various instructions to make sure
         * that they still work.
         *
-        * args->eh     The MAC header. It is non-null for a layer2
+        * args->fwa_eh The MAC header. It is non-null for a layer2
         *      packet, it is NULL for a layer-3 packet.
         *
-        * m | args->m  Pointer to the mbuf, as received from the caller.
+        * m | args->fwa_m      Pointer to the mbuf, as received from the caller.
         *      It may change if ipfw_chk() does an m_pullup, or if it
         *      consumes the packet because it calls send_reject().
         *      XXX This has to change, so that ipfw_chk() never modifies
@@ -1929,16 +1942,16 @@ ipfw_chk(struct ip_fw_args *args)
         *      in sync with it (the packet is  supposed to start with
         *      the ip header).
         */
-       struct mbuf *m = args->m;
+       struct mbuf *m = args->fwa_m;
        struct ip *ip = mtod(m, struct ip *);
 
        /*
-        * oif | args->oif      If NULL, ipfw_chk has been called on the
+        * oif | args->fwa_oif  If NULL, ipfw_chk has been called on the
         *      inbound path (ether_input, bdg_forward, ip_input).
         *      If non-NULL, ipfw_chk has been called on the outbound path
         *      (ether_output, ip_output).
         */
-       struct ifnet *oif = args->oif;
+       struct ifnet *oif = args->fwa_oif;
 
        struct ip_fw *f = NULL;         /* matching rule */
        int retval = 0;
@@ -2003,23 +2016,23 @@ ipfw_chk(struct ip_fw_args *args)
         */
 
        pktlen = m->m_pkthdr.len;
-       if (args->eh == NULL ||         /* layer 3 packet */
+       if (args->fwa_eh == NULL ||             /* layer 3 packet */
                ( m->m_pkthdr.len >= sizeof(struct ip) &&
-                   ntohs(args->eh->ether_type) == ETHERTYPE_IP))
+                   ntohs(args->fwa_eh->ether_type) == ETHERTYPE_IP))
                        hlen = ip->ip_hl << 2;
 
        /*
         * Collect parameters into local variables for faster matching.
         */
        if (hlen == 0) {        /* do not grab addresses for non-ip pkts */
-               proto = args->f_id.proto = 0;   /* mark f_id invalid */
+               proto = args->fwa_id.proto = 0; /* mark f_id invalid */
                goto after_ip_checks;
        }
 
-       proto = args->f_id.proto = ip->ip_p;
+       proto = args->fwa_id.proto = ip->ip_p;
        src_ip = ip->ip_src;
        dst_ip = ip->ip_dst;
-       if (args->eh != NULL) { /* layer 2 packets are as on the wire */
+       if (args->fwa_eh != NULL) { /* layer 2 packets are as on the wire */
                offset = ntohs(ip->ip_off) & IP_OFFMASK;
                ip_len = ntohs(ip->ip_len);
        } else {
@@ -2031,7 +2044,7 @@ ipfw_chk(struct ip_fw_args *args)
 #define PULLUP_TO(len)                                         \
                do {                                            \
                        if ((m)->m_len < (len)) {               \
-                           args->m = m = m_pullup(m, (len));   \
+                           args->fwa_m = m = m_pullup(m, (len));       \
                            if (m == 0)                         \
                                goto pullup_failed;             \
                            ip = mtod(m, struct ip *);          \
@@ -2048,7 +2061,7 @@ ipfw_chk(struct ip_fw_args *args)
                        tcp = L3HDR(struct tcphdr, ip);
                        dst_port = tcp->th_dport;
                        src_port = tcp->th_sport;
-                       args->f_id.flags = tcp->th_flags;
+                       args->fwa_id.flags = tcp->th_flags;
                        }
                        break;
 
@@ -2065,7 +2078,7 @@ ipfw_chk(struct ip_fw_args *args)
 
                case IPPROTO_ICMP:
                        PULLUP_TO(hlen + 4);    /* type, code and checksum. */
-                       args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type;
+                       args->fwa_id.flags = L3HDR(struct icmp, ip)->icmp_type;
                        break;
 
                default:
@@ -2074,13 +2087,13 @@ ipfw_chk(struct ip_fw_args *args)
 #undef PULLUP_TO
        }
 
-       args->f_id.src_ip = ntohl(src_ip.s_addr);
-       args->f_id.dst_ip = ntohl(dst_ip.s_addr);
-       args->f_id.src_port = src_port = ntohs(src_port);
-       args->f_id.dst_port = dst_port = ntohs(dst_port);
+       args->fwa_id.src_ip = ntohl(src_ip.s_addr);
+       args->fwa_id.dst_ip = ntohl(dst_ip.s_addr);
+       args->fwa_id.src_port = src_port = ntohs(src_port);
+       args->fwa_id.dst_port = dst_port = ntohs(dst_port);
 
 after_ip_checks:
-       if (args->rule) {
+       if (args->fwa_ipfw_rule) {
                /*
                 * Packet has already been tagged. Look for the next rule
                 * to restart processing.
@@ -2094,18 +2107,18 @@ after_ip_checks:
                        return 0;
                }
 
-               f = args->rule->next_rule;
+               f = args->fwa_ipfw_rule->next_rule;
                if (f == NULL)
-                       f = lookup_next_rule(args->rule);
+                       f = lookup_next_rule(args->fwa_ipfw_rule);
        } else {
                /*
                 * Find the starting rule. It can be either the first
                 * one, or the one after divert_rule if asked so.
                 */
-               int skipto = args->divert_rule;
+               int skipto = args->fwa_divert_rule;
 
                f = layer3_chain;
-               if (args->eh == NULL && skipto != 0) {
+               if (args->fwa_eh == NULL && skipto != 0) {
                        if (skipto >= IPFW_DEFAULT_RULE) {
                                lck_mtx_unlock(ipfw_mutex);
                                return(IP_FW_PORT_DENY_FLAG); /* invalid */
@@ -2118,7 +2131,7 @@ after_ip_checks:
                        }
                }
        }
-       args->divert_rule = 0;  /* reset to avoid confusion later */
+       args->fwa_divert_rule = 0;      /* reset to avoid confusion later */
 
        /*
         * Now scan the rules, and parse microinstructions for each rule.
@@ -2224,7 +2237,7 @@ check_body:
                                if (cmd->opcode == O_UID) {
                                        match = 
 #ifdef __APPLE__
-                                               (pcb->inp_socket->so_uid == (uid_t)((ipfw_insn_u32 *)cmd)->d[0]);
+                                               (kauth_cred_getuid(pcb->inp_socket->so_cred) == (uid_t)((ipfw_insn_u32 *)cmd)->d[0]);
 #else
                                                !socheckuid(pcb->inp_socket,
                                                   (uid_t)((ipfw_insn_u32 *)cmd)->d[0]);
@@ -2258,12 +2271,12 @@ check_body:
                                break;
 
                        case O_MACADDR2:
-                               if (args->eh != NULL) { /* have MAC header */
+                               if (args->fwa_eh != NULL) {     /* have MAC header */
                                        u_int32_t *want = (u_int32_t *)
                                                ((ipfw_insn_mac *)cmd)->addr;
                                        u_int32_t *mask = (u_int32_t *)
                                                ((ipfw_insn_mac *)cmd)->mask;
-                                       u_int32_t *hdr = (u_int32_t *)args->eh;
+                                       u_int32_t *hdr = (u_int32_t *)args->fwa_eh;
 
                                        match =
                                            ( want[0] == (hdr[0] & mask[0]) &&
@@ -2273,9 +2286,9 @@ check_body:
                                break;
 
                        case O_MAC_TYPE:
-                               if (args->eh != NULL) {
+                               if (args->fwa_eh != NULL) {
                                        u_int16_t t =
-                                           ntohs(args->eh->ether_type);
+                                           ntohs(args->fwa_eh->ether_type);
                                        u_int16_t *p =
                                            ((ipfw_insn_u16 *)cmd)->ports;
                                        int i;
@@ -2295,7 +2308,7 @@ check_body:
                                break;
 
                        case O_LAYER2:
-                               match = (args->eh != NULL);
+                               match = (args->fwa_eh != NULL);
                                break;
 
                        case O_PROTO:
@@ -2341,8 +2354,8 @@ check_body:
                                        u_int32_t *d = (u_int32_t *)(cmd+1);
                                        u_int32_t addr =
                                            cmd->opcode == O_IP_DST_SET ?
-                                               args->f_id.dst_ip :
-                                               args->f_id.src_ip;
+                                               args->fwa_id.dst_ip :
+                                               args->fwa_id.src_ip;
 
                                            if (addr < d[0])
                                                    break;
@@ -2478,7 +2491,7 @@ check_body:
 
                        case O_LOG:
                                if (fw_verbose)
-                                       ipfw_log(f, hlen, args->eh, m, oif);
+                                       ipfw_log(f, hlen, args->fwa_eh, m, oif);
                                match = 1;
                                break;
 
@@ -2564,7 +2577,7 @@ check_body:
                                 * to be run first).
                                 */
                                if (dyn_dir == MATCH_UNKNOWN &&
-                                   (q = lookup_dyn_rule(&args->f_id,
+                                   (q = lookup_dyn_rule(&args->fwa_id,
                                     &dyn_dir, proto == IPPROTO_TCP ?
                                        L3HDR(struct tcphdr, ip) : NULL))
                                        != NULL) {
@@ -2596,15 +2609,15 @@ check_body:
 
                        case O_PIPE:
                        case O_QUEUE:
-                               args->rule = f; /* report matching rule */
+                               args->fwa_ipfw_rule = f; /* report matching rule */
                                retval = cmd->arg1 | IP_FW_PORT_DYNT_FLAG;
                                goto done;
 
                        case O_DIVERT:
                        case O_TEE:
-                               if (args->eh) /* not on layer 2 */
+                               if (args->fwa_eh) /* not on layer 2 */
                                        break;
-                               args->divert_rule = f->rulenum;
+                               args->fwa_divert_rule = f->rulenum;
                                retval = (cmd->opcode == O_DIVERT) ?
                                    cmd->arg1 :
                                    cmd->arg1 | IP_FW_PORT_TEE_FLAG;
@@ -2636,7 +2649,7 @@ check_body:
                                    !IN_MULTICAST(dst_ip.s_addr)) {
                                        send_reject(args, cmd->arg1,
                                            offset,ip_len);
-                                       m = args->m;
+                                       m = args->fwa_m;
                                }
                                /* FALLTHROUGH */
                        case O_DENY:
@@ -2644,10 +2657,10 @@ check_body:
                                goto done;
 
                        case O_FORWARD_IP:
-                               if (args->eh)   /* not valid on layer2 pkts */
+                               if (args->fwa_eh)       /* not valid on layer2 pkts */
                                        break;
                                if (!q || dyn_dir == MATCH_FORWARD)
-                                       args->next_hop =
+                                       args->fwa_next_hop =
                                            &((ipfw_insn_sa *)cmd)->sa;
                                retval = 0;
                                goto done;
@@ -2843,7 +2856,7 @@ delete_rule(struct ip_fw **head, struct ip_fw *prev, struct ip_fw *rule)
 
 #if DUMMYNET
        if (DUMMYNET_LOADED)
-               ip_dn_ruledel_ptr(rule);
+               dn_ipfw_rule_delete(rule);
 #endif /* DUMMYNET */
        _FREE(rule, M_IPFW);
        return n;
@@ -3505,7 +3518,7 @@ ipfw_ctl(struct sockopt *sopt)
                                                ipfw_dyn_dst->parent = CAST_DOWN(user64_addr_t, p->parent);
                                                ipfw_dyn_dst->pcnt = p->pcnt;
                                                ipfw_dyn_dst->bcnt = p->bcnt;
-                                               ipfw_dyn_dst->id = p->id;
+                                               externalize_flow_id(&ipfw_dyn_dst->id, &p->id);
                                                ipfw_dyn_dst->expire =
                                                        TIME_LEQ(p->expire, timenow.tv_sec) ?
                                                        0 : p->expire - timenow.tv_sec;
@@ -3531,7 +3544,7 @@ ipfw_ctl(struct sockopt *sopt)
                                                ipfw_dyn_dst->parent = CAST_DOWN_EXPLICIT(user32_addr_t, p->parent);
                                                ipfw_dyn_dst->pcnt = p->pcnt;
                                                ipfw_dyn_dst->bcnt = p->bcnt;
-                                               ipfw_dyn_dst->id = p->id;
+                                               externalize_flow_id(&ipfw_dyn_dst->id, &p->id);
                                                ipfw_dyn_dst->expire =
                                                        TIME_LEQ(p->expire, timenow.tv_sec) ?
                                                        0 : p->expire - timenow.tv_sec;
@@ -3915,7 +3928,7 @@ ipfw_tick(__unused void * unused)
                }
        }
        lck_mtx_unlock(ipfw_mutex);
-       
+
        for (m = mnext = m0; m != NULL; m = mnext) {
                struct route sro;       /* fake route */
 
@@ -3939,11 +3952,7 @@ ipfw_init(void)
        ipfw_mutex_grp_attr = lck_grp_attr_alloc_init();
        ipfw_mutex_grp = lck_grp_alloc_init("ipfw", ipfw_mutex_grp_attr);
        ipfw_mutex_attr = lck_attr_alloc_init();
-
-       if ((ipfw_mutex = lck_mtx_alloc_init(ipfw_mutex_grp, ipfw_mutex_attr)) == NULL) {
-               printf("ipfw_init: can't alloc ipfw_mutex\n");
-               return;
-       }
+       lck_mtx_init(ipfw_mutex, ipfw_mutex_grp, ipfw_mutex_attr);
 
        layer3_chain = NULL;
 
index 5e093b170ef63dcba2b842618baeec008bafab76..10566531d13b6a098c8f540c9caabccae34cd03f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -415,6 +415,40 @@ struct ipfw_flow_id {
  */
 typedef struct _ipfw_dyn_rule ipfw_dyn_rule;
 
+#ifdef XNU_KERNEL_PRIVATE
+
+#include <netinet/ip_flowid.h>
+
+/*
+ * Note: 
+ * The internal version of "struct _ipfw_dyn_rule" differs from 
+ * its external version because the field "id" is of type
+ * "struct ip_flow_id" in the internal version. The type of the
+ * field "id" for the external version is "ipfw_dyn_rule for
+ * backwards compatibility reasons.
+ */
+
+struct _ipfw_dyn_rule {
+       ipfw_dyn_rule   *next;          /* linked list of rules.        */
+       struct ip_fw *rule;             /* pointer to rule              */
+       /* 'rule' is used to pass up the rule number (from the parent)  */
+
+       ipfw_dyn_rule *parent;          /* pointer to parent rule       */
+       u_int64_t       pcnt;           /* packet match counter         */
+       u_int64_t       bcnt;           /* byte match counter           */
+       struct ip_flow_id id;           /* (masked) flow id             */
+       u_int32_t       expire;         /* expire time                  */
+       u_int32_t       bucket;         /* which bucket in hash table   */
+       u_int32_t       state;          /* state of this rule (typically a
+                                        * combination of TCP flags)
+                                        */
+       u_int32_t       ack_fwd;        /* most recent ACKs in forward  */
+       u_int32_t       ack_rev;        /* and reverse directions (used */
+                                       /* to generate keepalives)      */
+       u_int16_t       dyn_type;       /* rule type                    */
+       u_int16_t       count;          /* refcount                     */
+};
+#else /* XNU_KERNEL_PRIVATE */
 struct _ipfw_dyn_rule {
        ipfw_dyn_rule   *next;          /* linked list of rules.        */
        struct ip_fw *rule;             /* pointer to rule              */
@@ -435,6 +469,7 @@ struct _ipfw_dyn_rule {
        u_int16_t       dyn_type;       /* rule type                    */
        u_int16_t       count;          /* refcount                     */
 };
+#endif /* XNU_KERNEL_PRIVATE */
 
 /*
  * Definitions for IP option names.
@@ -585,35 +620,20 @@ typedef struct  _ipfw_insn_pipe_32{
 #endif /* KERNEL */
 
 #ifdef KERNEL
+
+#define IPFW_DEFAULT_RULE       65535
+
 #if IPFIREWALL
 
 #define        IP_FW_PORT_DYNT_FLAG    0x10000
 #define        IP_FW_PORT_TEE_FLAG     0x20000
 #define        IP_FW_PORT_DENY_FLAG    0x40000
 
-/*
- * Arguments for calling ipfw_chk() and dummynet_io(). We put them
- * all into a structure because this way it is easier and more
- * efficient to pass variables around and extend the interface.
- */
-struct ip_fw_args {
-       struct mbuf     *m;             /* the mbuf chain               */
-       struct ifnet    *oif;           /* output interface             */
-       struct sockaddr_in *next_hop;   /* forward address              */
-       struct ip_fw    *rule;          /* matching rule                */
-       struct ether_header *eh;        /* for bridged packets          */
-
-       struct route    *ro;            /* for dummynet                 */
-       struct sockaddr_in *dst;        /* for dummynet                 */
-       int flags;                      /* for dummynet                 */
-       struct ip_out_args *ipoa;       /* for dummynet                 */
-
-       struct ipfw_flow_id f_id;       /* grabbed from IP header       */
-       u_int16_t       divert_rule;    /* divert cookie                */
-       u_int32_t       retval;
-};
-//struct ip_fw_args;
-
+#ifdef PRIVATE
+#include <netinet/ip_flowid.h>
+#else
+struct ip_fw_args;
+#endif
 /*
  * Function definitions.
  */
index 712f4924102db8e349857f5f9971463375233dcd..1022e03f1921c4fb453ad9338c586f8d547767f7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1515,7 +1515,7 @@ ipfw_version_latest_to_one_32(struct ip_fw_32 *curr_rule, struct ip_fw_compat_32
        if (!rule_vers1)
                return;
                
-       bzero(rule_vers1, sizeof(struct ip_fw_compat));
+       bzero(rule_vers1, sizeof(struct ip_fw_compat_32));
        
        rule_vers1->version = IP_FW_VERSION_1;
        rule_vers1->context = CAST_DOWN_EXPLICIT(user32_addr_t,curr_rule->context);
@@ -1541,7 +1541,7 @@ ipfw_version_latest_to_one_64(struct ip_fw_64 *curr_rule, struct ip_fw_compat_64
        if (!rule_vers1)
                return;
                
-       bzero(rule_vers1, sizeof(struct ip_fw_compat));
+       bzero(rule_vers1, sizeof(struct ip_fw_compat_64));
        
        rule_vers1->version = IP_FW_VERSION_1;
        rule_vers1->context = CAST_DOWN_EXPLICIT(__uint64_t, curr_rule->context);
index 48ea2f0f56829d31692c85ed73d8a6ab194ae1a5..5760672478de6c0500a3dfd5ecca2368468a2172 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -69,6 +69,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #if IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
-#endif
-
-#if defined(NFAITH) && NFAITH > 0
-#include "faith.h"
-#include <net/if_types.h>
 #endif
 
  /* XXX This one should go in sys/mbuf.h. It is used to avoid that
@@ -144,22 +140,32 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
        &log_redirect, 0, "");
 
 #if ICMP_BANDLIM 
+
+/* Default values in case CONFIG_ICMP_BANDLIM is not defined in the MASTER file */
+#ifndef CONFIG_ICMP_BANDLIM
+#if !CONFIG_EMBEDDED
+#define CONFIG_ICMP_BANDLIM 250
+#else /* CONFIG_EMBEDDED */
+#define CONFIG_ICMP_BANDLIM 50
+#endif /* CONFIG_EMBEDDED */
+#endif /* CONFIG_ICMP_BANDLIM */
+
 /*    
  * ICMP error-response bandwidth limiting sysctl.  If not enabled, sysctl
  *      variable content is -1 and read-only.
  */     
     
-static int      icmplim = 250;
+static int      icmplim = CONFIG_ICMP_BANDLIM;
 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW | CTLFLAG_LOCKED,
        &icmplim, 0, "");
-#else
+
+#else /* ICMP_BANDLIM */
 
 static int      icmplim = -1;
 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD | CTLFLAG_LOCKED,
        &icmplim, 0, "");
        
-#endif 
+#endif /* ICMP_BANDLIM */
 
 /*
  * ICMP broadcast echo sysctl
@@ -192,11 +198,16 @@ icmp_error(
        u_int32_t nextmtu)
 {
        struct ip *oip = mtod(n, struct ip *), *nip;
-       unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
+       unsigned oiplen;
        struct icmp *icp;
        struct mbuf *m;
        unsigned icmplen;
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(n);
+
+       oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
+
 #if ICMPPRINTFS
        if (icmpprintfs)
                printf("icmp_error(%p, %x, %d)\n", oip, type, code);
@@ -212,7 +223,8 @@ icmp_error(
                goto freeit;
        if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
          n->m_len >= oiplen + ICMP_MINLEN &&
-         !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
+         !ICMP_INFOTYPE(((struct icmp *)(void *)((caddr_t)oip + oiplen))->
+         icmp_type)) {
                icmpstat.icps_oldicmp++;
                goto freeit;
        }
@@ -312,12 +324,17 @@ icmp_input(struct mbuf *m, int hlen)
 {
        struct icmp *icp;
        struct ip *ip = mtod(m, struct ip *);
-       int icmplen = ip->ip_len;
+       int icmplen;
        int i;
        struct in_ifaddr *ia;
        void (*ctlfunc)(int, struct sockaddr *, void *);
        int code;
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
+       icmplen = ip->ip_len;
+
        /*
         * Locate icmp structure in mbuf, and check
         * that not corrupted and of at least minimum length.
@@ -353,21 +370,6 @@ icmp_input(struct mbuf *m, int hlen)
        m->m_len += hlen;
        m->m_data -= hlen;
 
-#if defined(NFAITH) && 0 < NFAITH
-       if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
-               /*
-                * Deliver very specific ICMP type only.
-                */
-               switch (icp->icmp_type) {
-               case ICMP_UNREACH:
-               case ICMP_TIMXCEED:
-                       break;
-               default:
-                       goto freeit;
-               }
-       }
-#endif
-
 #if ICMPPRINTFS
        if (icmpprintfs)
                printf("icmp_input, type %d code %d\n", icp->icmp_type,
@@ -514,7 +516,6 @@ icmp_input(struct mbuf *m, int hlen)
                        goto reflect;
 
        case ICMP_MASKREQ:
-#define        satosin(sa)     ((struct sockaddr_in *)(sa))
                if (icmpmaskrepl == 0)
                        break;
                /*
@@ -810,10 +811,13 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
        int hlen;
        struct icmp *icp;
        struct route ro;
-       struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 },
+           IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR };
 
-       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
+       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) {
                ipoa.ipoa_boundif = m->m_pkthdr.rcvif->if_index;
+               ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+       }
 
        hlen = IP_VHL_HL(ip->ip_vhl) << 2;
        m->m_data += hlen;
@@ -1059,9 +1063,6 @@ icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
                case IP_PORTRANGE:
                case IP_RECVIF:
                case IP_IPSEC_POLICY:
-#if defined(NFAITH) && NFAITH > 0
-               case IP_FAITH:
-#endif
                case IP_STRIPHDR:
                case IP_RECVTTL:
                case IP_BOUND_IF:
@@ -1092,6 +1093,8 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n
        int icmplen;
 
        if ((inp->inp_flags & INP_HDRINCL) != 0) {
+               /* Expect 32-bit aligned data pointer on strict-align platforms */
+               MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
                /*
                 * This is not raw IP, we liberal only for fields TOS, id and TTL 
                 */
@@ -1141,8 +1144,8 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n
 ours:
                /* Do not trust we got a valid checksum */
                ip->ip_sum = 0;
-               
-               icp = (struct icmp *)(((char *)m->m_data) + hlen);
+
+               icp = (struct icmp *)(void *)(((char *)m->m_data) + hlen);
                icmplen = m->m_pkthdr.len - hlen;
        } else {
                if ((icmplen = m->m_pkthdr.len) < ICMP_MINLEN) {
index 761b4b40c303d74f56ca4bfa58019ab6b794d0c1..6953044bd753bbe4bd4a05bde07d85da5e84ad20 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netkey/key.h>
 #endif
 
-#include "faith.h"
-#if defined(NFAITH) && NFAITH > 0
-#include <net/if_types.h>
-#endif
-
 #if DUMMYNET
 #include <netinet/ip_dummynet.h>
 #endif
 #include <net/pfvar.h>
 #endif /* PF */
 
+#include <netinet/lro_ext.h>
+
 #if IPSEC
 extern int ipsec_bypass;
 extern lck_mtx_t *sadb_mutex;
@@ -155,7 +152,8 @@ extern lck_mtx_t *sadb_mutex;
 lck_grp_t         *sadb_stat_mutex_grp;
 lck_grp_attr_t    *sadb_stat_mutex_grp_attr;
 lck_attr_t        *sadb_stat_mutex_attr;
-lck_mtx_t         *sadb_stat_mutex;
+decl_lck_mtx_data(, sadb_stat_mutex_data);
+lck_mtx_t         *sadb_stat_mutex = &sadb_stat_mutex_data;
 
 #endif
 
@@ -188,9 +186,8 @@ SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
     "Enable accepting source routed IP packets");
 
 static int     ip_keepfaith = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &ip_keepfaith,  0,
-       "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
+SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RD | CTLFLAG_LOCKED,
+       &ip_keepfaith,  0, "");
 
 static int     nipq = 0;       /* total # of reass queues */
 static int     maxnipq;
@@ -213,6 +210,10 @@ int        ip_doscopedroute = 1;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RD | CTLFLAG_LOCKED,
      &ip_doscopedroute, 0, "Enable IPv4 scoped routing");
 
+int    ip_restrictrecvif = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, restrictrecvif, CTLFLAG_RW | CTLFLAG_LOCKED,
+     &ip_restrictrecvif, 0, "Enable inbound interface restrictions");
+
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
@@ -230,7 +231,6 @@ static int  ip_checkinterface = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
-
 #if DIAGNOSTIC
 static int     ipprintfs = 0;
 #endif
@@ -244,7 +244,8 @@ static int  ipqmaxlen = IFQ_MAXLEN;
 static lck_grp_attr_t  *in_ifaddr_rwlock_grp_attr;
 static lck_grp_t       *in_ifaddr_rwlock_grp;
 static lck_attr_t      *in_ifaddr_rwlock_attr;
-lck_rw_t               *in_ifaddr_rwlock;
+decl_lck_rw_data(, in_ifaddr_rwlock_data);
+lck_rw_t               *in_ifaddr_rwlock = &in_ifaddr_rwlock_data;
 
 /* Protected by in_ifaddr_rwlock */
 struct in_ifaddrhead in_ifaddrhead;            /* first inet address */
@@ -280,7 +281,6 @@ lck_attr_t          *ip_mutex_attr;
 lck_grp_t              *ip_mutex_grp;
 lck_grp_attr_t         *ip_mutex_grp_attr;
 lck_mtx_t              *inet_domain_mutex;
-extern lck_mtx_t       *domain_proto_mtx;
 
 #if IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW | CTLFLAG_LOCKED,
@@ -300,14 +300,12 @@ ip_fw_chk_t *ip_fw_chk_ptr;
 int fw_enable = 1;
 int fw_bypass = 1;
 int fw_one_pass = 0;
+#endif /* IPFIREWALL */
 
 #if DUMMYNET
 ip_dn_io_t *ip_dn_io_ptr;
 #endif
 
-int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL;
-#endif /* IPFIREWALL */
-
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local");
 
 struct ip_linklocal_stat ip_linklocal_stat;
@@ -358,6 +356,7 @@ static void ip_fwd_route_copyout(struct ifnet *, struct route *);
 static void ip_fwd_route_copyin(struct ifnet *, struct route *);
 void   ipintr(void);
 void   in_dinit(void);
+static inline u_short ip_cksum(struct mbuf *, int);
 
 #if RANDOM_IP_ID
 extern u_short ip_id;
@@ -367,8 +366,37 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_use_randomid, 0, "Randomize IP packets IDs");
 #endif
 
-#define        satosin(sa)     ((struct sockaddr_in *)(sa))
-#define        ifatoia(ifa)    ((struct in_ifaddr *)(ifa))
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), check if the IP header pointer is 32-bit aligned; if not,
+ * copy the contents of the mbuf chain into a new chain, and free the original
+ * one.  Create some head room in the first mbuf of the new chain, in case
+ * it's needed later on.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define        IP_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do { } while (0)
+#else /* !__i386__ && !__x86_64__ */
+#define        IP_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do {                  \
+       if (!IP_HDR_ALIGNED_P(mtod(_m, caddr_t))) {                     \
+               struct mbuf *_n;                                        \
+               struct ifnet *__ifp = (_ifp);                           \
+               atomic_add_64(&(__ifp)->if_alignerrs, 1);               \
+               if (((_m)->m_flags & M_PKTHDR) &&                       \
+                   (_m)->m_pkthdr.header != NULL)                      \
+                       (_m)->m_pkthdr.header = NULL;                   \
+               _n = m_defrag_offset(_m, max_linkhdr, M_NOWAIT);        \
+               if (_n == NULL) {                                       \
+                       atomic_add_32(&ipstat.ips_toosmall, 1);         \
+                       m_freem(_m);                                    \
+                       (_m) = NULL;                                    \
+                       _action                                         \
+               } else {                                                \
+                       VERIFY(_n != (_m));                             \
+                       (_m) = _n;                                      \
+               }                                                       \
+       }                                                               \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
 
 /*
  * IP initialization: fill in IP protocol switch table.
@@ -392,7 +420,7 @@ ip_init(void)
                in_ifaddr_rwlock_grp = lck_grp_alloc_init("in_ifaddr_rwlock",
                    in_ifaddr_rwlock_grp_attr);
                in_ifaddr_rwlock_attr = lck_attr_alloc_init();
-               in_ifaddr_rwlock = lck_rw_alloc_init(in_ifaddr_rwlock_grp,
+               lck_rw_init(in_ifaddr_rwlock, in_ifaddr_rwlock_grp,
                    in_ifaddr_rwlock_attr);
 
                TAILQ_INIT(&in_ifaddrhead);
@@ -447,10 +475,7 @@ ip_init(void)
                sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat", sadb_stat_mutex_grp_attr);
                sadb_stat_mutex_attr = lck_attr_alloc_init();
 
-               if ((sadb_stat_mutex = lck_mtx_alloc_init(sadb_stat_mutex_grp, sadb_stat_mutex_attr)) == NULL) {
-                       printf("ip_init: can't alloc sadb_stat_mutex\n");
-                       return;
-               }
+               lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp, sadb_stat_mutex_attr);
 
 #endif
                arp_init();
@@ -540,7 +565,6 @@ in_dinit(void)
 
        if (!inetdomain_initted)
        {
-               /* kprintf("Initing %d protosw entries\n", in_proto_count); */
                dp = &inetdomain;
                dp->dom_flags = DOM_REENTRANT;
 
@@ -549,18 +573,21 @@ in_dinit(void)
                inet_domain_mutex = dp->dom_mtx;
                inetdomain_initted = 1;
        
-               lck_mtx_unlock(domain_proto_mtx);       
+               domain_proto_mtx_unlock(TRUE);
                proto_register_input(PF_INET, ip_proto_input, NULL, 1);
-               lck_mtx_lock(domain_proto_mtx); 
+               domain_proto_mtx_lock();
        }
 }
 
+void
+ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto)
+{
+       ip_proto_dispatch_in(m, hlen, proto, 0); 
+}
+
 __private_extern__ void
-ip_proto_dispatch_in(
-                                       struct mbuf     *m,
-                                       int                     hlen,
-                                       u_int8_t        proto,
-                                       ipfilter_t      inject_ipfref)
+ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto,
+    ipfilter_t inject_ipfref)
 {
        struct ipfilter *filter;
        int seen = (inject_ipfref == 0);
@@ -568,7 +595,7 @@ ip_proto_dispatch_in(
        struct ip *ip;
        void (*pr_input)(struct mbuf *, int len);
 
-       if (!TAILQ_EMPTY(&ipv4_filters)) {      
+       if (!TAILQ_EMPTY(&ipv4_filters)) {
                ipf_ref();
                TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
                        if (seen == 0) {
@@ -576,8 +603,17 @@ ip_proto_dispatch_in(
                                        seen = 1;
                        } else if (filter->ipf_filter.ipf_input) {
                                errno_t result;
-               
+
                                if (changed_header == 0) {
+                                       /*
+                                        * Perform IP header alignment fixup,
+                                        * if needed, before passing packet
+                                        * into filter(s).
+                                        */
+                                       IP_HDR_ALIGNMENT_FIXUP(m,
+                                           m->m_pkthdr.rcvif,
+                                           ipf_unref(); return;);
+
                                        changed_header = 1;
                                        ip = mtod(m, struct ip *);
                                        ip->ip_len = htons(ip->ip_len + hlen);
@@ -586,7 +622,8 @@ ip_proto_dispatch_in(
                                        ip->ip_sum = in_cksum(m, hlen);
                                }
                                result = filter->ipf_filter.ipf_input(
-                                       filter->ipf_filter.cookie, (mbuf_t*)&m, hlen, proto);
+                                   filter->ipf_filter.cookie, (mbuf_t*)&m,
+                                   hlen, proto);
                                if (result == EJUSTRETURN) {
                                        ipf_unref();
                                        return;
@@ -596,10 +633,14 @@ ip_proto_dispatch_in(
                                        m_freem(m);
                                        return;
                                }
-       }
+                       }
                }
                ipf_unref();
        }
+
+       /* Perform IP header alignment fixup (post-filters), if needed */
+       IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return;);
+
        /*
         * If there isn't a specific lock for the protocol
         * we're about to call, use the generic lock for AF_INET.
@@ -633,12 +674,14 @@ ip_input(struct mbuf *m)
        struct ip *ip;
        struct ipq *fp;
        struct in_ifaddr *ia = NULL;
-       int    hlen, checkif;
-       u_short sum;
+       unsigned int    hlen, checkif;
+       u_short sum = 0;
        struct in_addr pkt_dst;
 #if IPFIREWALL
        int i;
        u_int32_t div_info = 0;         /* packet divert/tee info */
+#endif
+#if IPFIREWALL || DUMMYNET
        struct ip_fw_args args;
        struct m_tag    *tag;
 #endif
@@ -647,12 +690,11 @@ ip_input(struct mbuf *m)
        /* Check if the mbuf is still valid after interface filter processing */
        MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif);
 
-#if IPFIREWALL
-       args.eh = NULL;
-       args.oif = NULL;
-       args.rule = NULL;
-       args.divert_rule = 0;                   /* divert cookie */
-       args.next_hop = NULL;
+       /* Perform IP header alignment fixup, if needed */
+       IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, goto bad;);
+
+#if IPFIREWALL || DUMMYNET
+       bzero(&args, sizeof(struct ip_fw_args));
 
        /*
         * Don't bother searching for tag(s) if there's none.
@@ -667,7 +709,8 @@ ip_input(struct mbuf *m)
                struct dn_pkt_tag       *dn_tag;
 
                dn_tag = (struct dn_pkt_tag *)(tag+1);
-               args.rule = dn_tag->rule;
+               args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule;
+               args.fwa_pf_rule = dn_tag->dn_pf_rule;
 
                m_tag_delete(m, tag);
        }
@@ -679,7 +722,7 @@ ip_input(struct mbuf *m)
                struct divert_tag       *div_tag;
 
                div_tag = (struct divert_tag *)(tag+1);
-               args.divert_rule = div_tag->cookie;
+               args.fwa_divert_rule = div_tag->cookie;
 
                m_tag_delete(m, tag);
        }
@@ -690,7 +733,7 @@ ip_input(struct mbuf *m)
                struct ip_fwd_tag       *ipfwd_tag;
 
                ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
-               args.next_hop = ipfwd_tag->next_hop;
+               args.fwa_next_hop = ipfwd_tag->next_hop;
 
                m_tag_delete(m, tag);
        }
@@ -700,17 +743,24 @@ ip_input(struct mbuf *m)
                panic("ip_input no HDR");
 #endif
 
-       if (args.rule) {        /* dummynet already filtered us */
+#if DUMMYNET
+       if (args.fwa_ipfw_rule || args.fwa_pf_rule) {   /* dummynet already filtered us */
                ip = mtod(m, struct ip *);
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
                inject_filter_ref = ipf_get_inject_filter(m);
-               goto iphack ;
+#if IPFIREWALL
+               if (args.fwa_ipfw_rule)
+                       goto iphack;
+#endif /* IPFIREWALL */
+               if (args.fwa_pf_rule)
+                       goto check_with_pf;
        }
+#endif /* DUMMYNET */
 ipfw_tags_done:
-#endif /* IPFIREWALL */
+#endif /* IPFIREWALL || DUMMYNET*/
 
        /*
-        * No need to proccess packet twice if we've already seen it.
+        * No need to process packet twice if we've already seen it.
         */
        if (!SLIST_EMPTY(&m->m_pkthdr.tags))
                inject_filter_ref = ipf_get_inject_filter(m);
@@ -729,7 +779,6 @@ ipfw_tags_done:
        }
 
        OSAddAtomic(1, &ipstat.ips_total);
-
        if (m->m_pkthdr.len < sizeof(struct ip))
                goto tooshort;
 
@@ -781,38 +830,9 @@ ipfw_tags_done:
                                goto bad;
                }
        }
-       if ((IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) == 0) 
-           || (apple_hwcksum_rx == 0) ||
-          ((m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) && ip->ip_p != IPPROTO_TCP)) {
-                       m->m_pkthdr.csum_flags = 0; /* invalidate HW generated checksum flags */
-       }
 
-       if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
-               sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
-       } else if (!(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
-           apple_hwcksum_tx == 0) {
-               /*
-                * Either this is not loopback packet coming from an interface
-                * that does not support checksum offloading, or it is loopback
-                * packet that has undergone software checksumming at the send
-                * side because apple_hwcksum_tx was set to 0.  In this case,
-                * calculate the checksum in software to validate the packet.
-                */
-               sum = in_cksum(m, hlen);
-       } else {
-               /*
-                * This is a loopback packet without any valid checksum since
-                * the send side has bypassed it (apple_hwcksum_tx set to 1).
-                * We get here because apple_hwcksum_rx was set to 0, and so
-                * we pretend that all is well.
-                */
-               sum = 0;
-               m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
-                   CSUM_IP_CHECKED | CSUM_IP_VALID;
-                m->m_pkthdr.csum_data = 0xffff;
-       }
+       sum = ip_cksum(m, hlen);
        if (sum) {
-               OSAddAtomic(1, &ipstat.ips_badsum);
                goto bad;
        }
 
@@ -878,19 +898,27 @@ tooshort:
                        m_adj(m, ip->ip_len - m->m_pkthdr.len);
        }
 
+
+#if DUMMYNET
+check_with_pf:
+#endif
 #if PF
        /* Invoke inbound packet filter */
-       if (PF_IS_ENABLED) { 
+       if (PF_IS_ENABLED) {
                int error;
-               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE);
-               if (error != 0) {
+#if DUMMYNET
+               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE, &args);
+#else
+               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE, NULL);
+#endif /* DUMMYNET */
+               if (error != 0 || m == NULL) {
                        if (m != NULL) {
                                panic("%s: unexpected packet %p\n", __func__, m);
                                /* NOTREACHED */
                        }
                        /* Already freed by callee */
                        return;
-               } 
+               }
                ip = mtod(m, struct ip *);
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
        }
@@ -909,28 +937,20 @@ iphack:
         * Check if we want to allow this packet to be processed.
         * Consider it to be bad if not.
         */
-       if (fr_checkp) {
-               struct  mbuf    *m1 = m;
-
-               if (fr_checkp(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1) {
-                       return;
-               }
-               ip = mtod(m = m1, struct ip *);
-       }
        if (fw_enable && IPFW_LOADED) {
 #if IPFIREWALL_FORWARD
                /*
                 * If we've been forwarded from the output side, then
                 * skip the firewall a second time
                 */
-               if (args.next_hop)
+               if (args.fwa_next_hop)
                        goto ours;
 #endif /* IPFIREWALL_FORWARD */
 
-               args.m = m;
+               args.fwa_m = m;
 
                i = ip_fw_chk_ptr(&args);
-               m = args.m;
+               m = args.fwa_m;
 
                if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
                        if (m)
@@ -939,13 +959,13 @@ iphack:
                }
                ip = mtod(m, struct ip *); /* just in case m changed */
                
-               if (i == 0 && args.next_hop == NULL) {  /* common case */
+               if (i == 0 && args.fwa_next_hop == NULL) {      /* common case */
                        goto pass;
                }
 #if DUMMYNET
                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
                        /* Send packet to the appropriate pipe */
-                       ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
+                       ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args, DN_CLIENT_IPFW);
                        return;
                }
 #endif /* DUMMYNET */
@@ -957,7 +977,7 @@ iphack:
                }
 #endif
 #if IPFIREWALL_FORWARD
-               if (i == 0 && args.next_hop != NULL) {
+               if (i == 0 && args.fwa_next_hop != NULL) {
                        goto pass;
                }
 #endif
@@ -978,7 +998,7 @@ pass:
         */
        ip_nhops = 0;           /* for source routed packets */
 #if IPFIREWALL
-       if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop)) {
+       if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.fwa_next_hop)) {
 #else
        if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL)) {
 #endif
@@ -1009,8 +1029,8 @@ pass:
         * changed by use of 'ipfw fwd'.
         */
 #if IPFIREWALL
-       pkt_dst = args.next_hop == NULL ?
-           ip->ip_dst : args.next_hop->sin_addr;
+       pkt_dst = args.fwa_next_hop == NULL ?
+           ip->ip_dst : args.fwa_next_hop->sin_addr;
 #else
        pkt_dst = ip->ip_dst;
 #endif
@@ -1032,7 +1052,7 @@ pass:
        checkif = ip_checkinterface && (ipforwarding == 0) && 
            ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0)
 #if IPFIREWALL
-           && (args.next_hop == NULL);
+           && (args.fwa_next_hop == NULL);
 #else
                ;
 #endif
@@ -1069,7 +1089,6 @@ pass:
        if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
                struct ifaddr *ifa;
                struct ifnet *ifp = m->m_pkthdr.rcvif;
-
                ifnet_lock_shared(ifp);
                TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
                        IFA_LOCK_SPIN(ifa);
@@ -1159,19 +1178,6 @@ pass:
                ip = mtod(m, struct ip *); /* in case it changed */
        }
 
-#if defined(NFAITH) && 0 < NFAITH
-       /*
-        * FAITH(Firewall Aided Internet Translator)
-        */
-       if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
-               if (ip_keepfaith) {
-                       if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
-                               goto ours;
-               }
-               m_freem(m);
-               return;
-       }
-#endif
        /*
         * Not for us; forward if possible and desirable.
         */
@@ -1180,7 +1186,7 @@ pass:
                m_freem(m);
        } else {
 #if IPFIREWALL
-               ip_forward(m, 0, args.next_hop);
+               ip_forward(m, 0, args.fwa_next_hop);
 #else
                ip_forward(m, 0, NULL);
 #endif
@@ -1288,13 +1294,13 @@ found:
                /*
                 * Attempt reassembly; if it succeeds, proceed.
                 * ip_reass() will return a different mbuf, and update
-                * the divert info in div_info and args.divert_rule.
+                * the divert info in div_info and args.fwa_divert_rule.
                 */
                        OSAddAtomic(1, &ipstat.ips_fragments);
                        m->m_pkthdr.header = ip;
 #if IPDIVERT
                        m = ip_reass(m, fp, &ipq[sum],
-                           (u_int16_t *)&div_info, &args.divert_rule);
+                           (u_int16_t *)&div_info, &args.fwa_divert_rule);
 #else
                        m = ip_reass(m, fp, &ipq[sum]);
 #endif
@@ -1355,7 +1361,7 @@ found:
 #endif
                /* Deliver packet to divert input routine */
                OSAddAtomic(1, &ipstat.ips_delivered);
-               divert_packet(m, 1, div_info & 0xffff, args.divert_rule);
+               divert_packet(m, 1, div_info & 0xffff, args.fwa_divert_rule);
 
                /* If 'tee', continue with original packet */
                if (clone == NULL) {
@@ -1386,7 +1392,7 @@ found:
        OSAddAtomic(1, &ipstat.ips_delivered);
        {
 #if IPFIREWALL
-               if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
+               if (args.fwa_next_hop && ip->ip_p == IPPROTO_TCP) {
                        /* TCP needs IPFORWARD info if available */
                        struct m_tag *fwd_tag;
                        struct ip_fwd_tag       *ipfwd_tag;
@@ -1399,14 +1405,18 @@ found:
                        }
                        
                        ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
-                       ipfwd_tag->next_hop = args.next_hop;
+                       ipfwd_tag->next_hop = args.fwa_next_hop;
 
                        m_tag_prepend(m, fwd_tag);
        
                        KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, 
                             ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
        
-       
+                       if (sw_lro) {
+                               m = tcp_lro(m, hlen);
+                               if (m == NULL)
+                                       return;
+                       }               
                        /* TCP deals with its own locking */
                        ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
                } else {
@@ -1416,6 +1426,11 @@ found:
                        ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
                }
 #else
+               if ((sw_lro) && (ip->ip_p == IPPROTO_TCP)) {
+                       m = tcp_lro(m, hlen);
+                       if (m == NULL)
+                               return;
+               }
                ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
 #endif
                
@@ -1819,6 +1834,9 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop)
        struct sockaddr_in ipaddr = {
            sizeof (ipaddr), AF_INET , 0 , { 0 }, { 0, } };
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        dst = ip->ip_dst;
        cp = (u_char *)(ip + 1);
        cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
@@ -1928,9 +1946,10 @@ nosourcerouting:
 
                        if (opt == IPOPT_SSRR) {
 #define        INA     struct in_ifaddr *
-#define        SA      struct sockaddr *
-                           if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) {
-                                       ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+                               if ((ia = (INA)ifa_ifwithdstaddr(
+                                   (struct sockaddr *)&ipaddr)) == 0) {
+                                       ia = (INA)ifa_ifwithnet(
+                                           (struct sockaddr *)&ipaddr);
                                }
                        } else {
                                ia = ip_rtaddr(ipaddr.sin_addr);
@@ -1975,7 +1994,8 @@ nosourcerouting:
                         * locate outgoing interface; if we're the destination,
                         * use the incoming interface (should be same).
                         */
-                       if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0) {
+                       if ((ia = (INA)ifa_ifwithaddr((struct sockaddr *)
+                           &ipaddr)) == 0) {
                                if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
                                        type = ICMP_UNREACH;
                                        code = ICMP_UNREACH_HOST;
@@ -1993,7 +2013,7 @@ nosourcerouting:
 
                case IPOPT_TS:
                        code = cp - (u_char *)ip;
-                       ipt = (struct ip_timestamp *)cp;
+                       ipt = (struct ip_timestamp *)(void *)cp;
                        if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
                                code = (u_char *)&ipt->ipt_len - (u_char *)ip;
                                goto bad;
@@ -2011,7 +2031,7 @@ nosourcerouting:
                                }
                                break;
                        }
-                       sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
+                       sin = (struct in_addr *)(void *)(cp + ipt->ipt_ptr - 1);
                        switch (ipt->ipt_flg) {
 
                        case IPOPT_TS_TSONLY:
@@ -2025,8 +2045,8 @@ nosourcerouting:
                                        goto bad;
                                }
                                ipaddr.sin_addr = dst;
-                               ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
-                                                           m->m_pkthdr.rcvif);
+                               ia = (INA)ifaof_ifpforaddr((struct sockaddr *)
+                                   &ipaddr, m->m_pkthdr.rcvif);
                                if (ia == 0)
                                        continue;
                                IFA_LOCK(&ia->ia_ifa);
@@ -2047,7 +2067,8 @@ nosourcerouting:
                                }
                                (void)memcpy(&ipaddr.sin_addr, sin,
                                    sizeof(struct in_addr));
-                               if ((ia = (struct in_ifaddr*)ifa_ifwithaddr((SA)&ipaddr)) == 0)
+                               if ((ia = (struct in_ifaddr*)ifa_ifwithaddr(
+                                   (struct sockaddr *)&ipaddr)) == 0)
                                        continue;
                                IFA_REMREF(&ia->ia_ifa);
                                ia = NULL;
@@ -2090,7 +2111,7 @@ ip_rtaddr(struct in_addr dst)
        struct route ro;
 
        bzero(&ro, sizeof (ro));
-       sin = (struct sockaddr_in *)&ro.ro_dst;
+       sin = (struct sockaddr_in *)(void *)&ro.ro_dst;
        sin->sin_family = AF_INET;
        sin->sin_len = sizeof (*sin);
        sin->sin_addr = dst;
@@ -2173,7 +2194,7 @@ ip_srcroute(void)
        ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
        (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
            &ip_srcrt.nop, OPTSIZ);
-       q = (struct in_addr *)(mtod(m, caddr_t) +
+       q = (struct in_addr *)(void *)(mtod(m, caddr_t) +
            sizeof(struct in_addr) + OPTSIZ);
 #undef OPTSIZ
        /*
@@ -2213,6 +2234,9 @@ ip_stripoptions(struct mbuf *m, __unused struct mbuf *mopt)
        caddr_t opts;
        int olen;
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
        opts = (caddr_t)(ip + 1);
        i = m->m_len - (sizeof (struct ip) + olen);
@@ -2331,7 +2355,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
        n_long dest;
        struct in_addr pkt_dst;
        u_int32_t nextmtu = 0;
-       struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, 0 };
        struct ifnet *ifp = m->m_pkthdr.rcvif;
 #if PF
        struct pf_mtag *pf_mtag;
@@ -2374,13 +2398,15 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 
 #if PF
        pf_mtag = pf_find_mtag(m);
-       if (pf_mtag != NULL && pf_mtag->rtableid != IFSCOPE_NONE)
-               ipoa.ipoa_boundif = pf_mtag->rtableid;
+       if (pf_mtag != NULL && pf_mtag->pftag_rtableid != IFSCOPE_NONE) {
+               ipoa.ipoa_boundif = pf_mtag->pftag_rtableid;
+               ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+       }
 #endif /* PF */
 
        ip_fwd_route_copyout(ifp, &fwd_rt);
 
-       sin = (struct sockaddr_in *)&fwd_rt.ro_dst;
+       sin = (struct sockaddr_in *)(void *)&fwd_rt.ro_dst;
        if (fwd_rt.ro_rt == NULL ||
            fwd_rt.ro_rt->generation_id != route_generation ||
            pkt_dst.s_addr != sin->sin_addr.s_addr) {
@@ -2715,7 +2741,7 @@ ip_savecontrol(
                                goto makedummy;
 
                        IFA_LOCK_SPIN(ifa);
-                       sdp = (struct sockaddr_dl *)ifa->ifa_addr;
+                       sdp = (struct sockaddr_dl *)(void *)ifa->ifa_addr;
                        /*
                         * Change our mind and don't try copy.
                         */
@@ -2749,8 +2775,8 @@ makedummy:
                }
        }
        if ((inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) != 0) {
-               int tc = m->m_pkthdr.prio;
-               
+               int tc = m_get_traffic_class(m);
+
                mp = sbcreatecontrol_mbuf((caddr_t) &tc, sizeof(tc),
                        SO_TRAFFIC_CLASS, SOL_SOCKET, mp);
                if (*mp == NULL) {
@@ -2814,3 +2840,51 @@ ip_rsvp_done(void)
        }
        return 0;
 }
+
+static inline u_short
+ip_cksum(struct mbuf *m, int hlen)
+{
+
+       u_short sum;
+       struct ip *ip;
+
+       ip = mtod(m, struct ip *);
+
+       if ((IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) == 0)
+                   || (apple_hwcksum_rx == 0) ||
+                  ((m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) && ip->ip_p != IPPROTO_TCP)) {
+               m->m_pkthdr.csum_flags = 0; /* invalidate HW generated checksum flags */
+
+       }
+
+       if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
+               sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+       } else if (!(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
+                   apple_hwcksum_tx == 0) {
+               /*
+                * Either this is not loopback packet coming from an interface
+                * that does not support checksum offloading, or it is loopback
+                * packet that has undergone software checksumming at the send
+                * side because apple_hwcksum_tx was set to 0.  In this case,
+                * calculate the checksum in software to validate the packet.
+                */
+               sum = in_cksum(m, hlen);
+       } else {
+               /*
+                * This is a loopback packet without any valid checksum since
+                * the send side has bypassed it (apple_hwcksum_tx set to 1).
+                * We get here because apple_hwcksum_rx was set to 0, and so
+                * we pretend that all is well.
+                */
+               sum = 0;
+               m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
+                           CSUM_IP_CHECKED | CSUM_IP_VALID;
+           m->m_pkthdr.csum_data = 0xffff;
+       }
+
+       if (sum) {
+               OSAddAtomic(1, &ipstat.ips_badsum);
+       }
+
+       return sum;
+}
index c4530e9947830770c844f6b3b0ecd7557cd7ea8f..aece80368a5a3c04112e6909586f980334e904a1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <security/mac_framework.h>
 #endif
 
-#include "faith.h"
-
 #include <net/dlil.h>
 #include <sys/kdebug.h>
 #include <libkern/OSAtomic.h>
                                                  (ntohl(a.s_addr))&0xFF);
 #endif
 
-
 u_short ip_id;
 
 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
@@ -162,8 +159,6 @@ int ip_optcopy(struct ip *, struct ip *);
 void in_delayed_cksum_offset(struct mbuf *, int );
 void in_cksum_offset(struct mbuf* , size_t );
 
-extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
-
 extern struct protosw inetsw[];
 
 extern struct ip_linklocal_stat ip_linklocal_stat;
@@ -252,7 +247,6 @@ ip_output(
  *     ipsec4_getpolicybysock:???      [IPSEC 4th argument, contents modified]
  *     key_spdacquire:???              [IPSEC]
  *     ipsec4_output:???               [IPSEC]
- *     <fr_checkp>:???                 [firewall]
  *     ip_dn_io_ptr:???                [dummynet]
  *     dlil_output:???                 [DLIL]
  *     dlil_output_list:???            [DLIL]
@@ -269,12 +263,11 @@ ip_output_list(
        struct route *ro,
        int flags,
        struct ip_moptions *imo,
-       struct ip_out_args *ipoa
-       )
+       struct ip_out_args *ipoa)
 {
        struct ip *ip;
        struct ifnet *ifp = NULL;
-       struct mbuf *m = m0, **mppn = NULL;
+       struct mbuf *m = m0, *prevnxt = NULL, **mppn = &prevnxt;
        int hlen = sizeof (struct ip);
        int len = 0, error = 0;
        struct sockaddr_in *dst = NULL;
@@ -293,9 +286,11 @@ ip_output_list(
 #endif
 #if IPFIREWALL
        int off;
+       struct sockaddr_in *next_hop_from_ipfwd_tag = NULL;
+#endif
+#if IPFIREWALL || DUMMYNET
        struct ip_fw_args args;
        struct m_tag    *tag;
-       struct sockaddr_in *next_hop_from_ipfwd_tag = NULL;
 #endif
        int didfilter = 0;
        ipfilter_t inject_filter_ref = 0;
@@ -307,9 +302,11 @@ ip_output_list(
        struct mbuf * packetlist;
        int pktcnt = 0, tso = 0;
        u_int32_t       bytecnt = 0;
-       unsigned int ifscope;
-       unsigned int nocell;
-       boolean_t select_srcif;
+       unsigned int ifscope = IFSCOPE_NONE;
+       unsigned int nocell = 0;
+       boolean_t select_srcif, srcbound;
+       struct flowadv *adv = NULL;
+
        KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
 
 #if IPSEC
@@ -317,12 +314,8 @@ ip_output_list(
 #endif /* IPSEC */
 
        packetlist = m0;
-#if IPFIREWALL
-       args.next_hop = NULL;
-       args.eh = NULL;
-       args.rule = NULL;
-       args.divert_rule = 0;                   /* divert cookie */
-       args.ipoa = NULL;
+#if IPFIREWALL || DUMMYNET
+       bzero(&args, sizeof(struct ip_fw_args));
 
        if (SLIST_EMPTY(&m0->m_pkthdr.tags))
                goto ipfw_tags_done;
@@ -334,18 +327,21 @@ ip_output_list(
                struct dn_pkt_tag       *dn_tag;
 
                dn_tag = (struct dn_pkt_tag *)(tag+1);
-               args.rule = dn_tag->rule;
+               args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule;
+               args.fwa_pf_rule = dn_tag->dn_pf_rule;
                opt = NULL;
-               saved_route = dn_tag->ro;
+               saved_route = dn_tag->dn_ro;
                ro = &saved_route;
 
                imo = NULL;
                bcopy(&dn_tag->dn_dst, &dst_buf, sizeof(dst_buf));
                dst = &dst_buf;
-               ifp = dn_tag->ifp;
-               flags = dn_tag->flags;
-               saved_ipoa = dn_tag->ipoa;
-               ipoa = &saved_ipoa;
+               ifp = dn_tag->dn_ifp;
+               flags = dn_tag->dn_flags;
+               if ((dn_tag->dn_flags & IP_OUTARGS)) {
+                       saved_ipoa = dn_tag->dn_ipoa;
+                       ipoa = &saved_ipoa;
+               }
 
                m_tag_delete(m0, tag);
        }
@@ -357,24 +353,27 @@ ip_output_list(
                struct divert_tag       *div_tag;
 
                div_tag = (struct divert_tag *)(tag+1);
-               args.divert_rule = div_tag->cookie;
+               args.fwa_divert_rule = div_tag->cookie;
 
                m_tag_delete(m0, tag);
        }
 #endif /* IPDIVERT */
 
+#if IPFIREWALL
        if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
            KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
                struct ip_fwd_tag       *ipfwd_tag;
 
                ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
                next_hop_from_ipfwd_tag = ipfwd_tag->next_hop;
-               
+
                m_tag_delete(m0, tag);
        }
-ipfw_tags_done:
 #endif /* IPFIREWALL */
 
+ipfw_tags_done:
+#endif /* IPFIREWALL || DUMMYNET */
+
        m = m0;
 
 #if    DIAGNOSTIC
@@ -388,34 +387,47 @@ ipfw_tags_done:
        bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
        ippo = &ipf_pktopts;
 
-       /*
-        * At present the IP_OUTARGS flag implies a request for IP to
-        * perform source interface selection.  In the forwarding case,
-        * only the ifscope value is used, as source interface selection
-        * doesn't take place.
-        */
        if (ip_doscopedroute && (flags & IP_OUTARGS)) {
-               select_srcif = !(flags & IP_FORWARDING);
-               ifscope = ipoa->ipoa_boundif;
-               ipf_pktopts.ippo_flags = IPPOF_BOUND_IF;
-               ipf_pktopts.ippo_flags |= (ifscope << IPPOF_SHIFT_IFSCOPE);
+               /*
+                * In the forwarding case, only the ifscope value is used,
+                * as source interface selection doesn't take place.
+                */
+               if ((select_srcif = (!(flags & IP_FORWARDING) &&
+                   (ipoa->ipoa_flags & IPOAF_SELECT_SRCIF)))) {
+                       ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
+               }
+
+               if ((ipoa->ipoa_flags & IPOAF_BOUND_IF) &&
+                   ipoa->ipoa_boundif != IFSCOPE_NONE) {
+                       ifscope = ipoa->ipoa_boundif;
+                       ipf_pktopts.ippo_flags |=
+                           (IPPOF_BOUND_IF | (ifscope << IPPOF_SHIFT_IFSCOPE));
+               }
+
+               if ((srcbound = (ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR)))
+                       ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
        } else {
                select_srcif = FALSE;
+               srcbound = FALSE;
                ifscope = IFSCOPE_NONE;
        }
 
+       if ((flags & IP_OUTARGS) && (ipoa->ipoa_flags & IPOAF_NO_CELLULAR)) {
+               nocell = 1;
+               ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
+       }
+
        if (flags & IP_OUTARGS) {
-               nocell = ipoa->ipoa_nocell;
-               if (nocell)
-                       ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
-       } else {
-               nocell = 0;
+               adv = &ipoa->ipoa_flowadv;
+               adv->code = FADV_SUCCESS;
        }
 
-#if IPFIREWALL
-       if (args.rule != NULL) {        /* dummynet already saw us */
+#if DUMMYNET
+       if (args.fwa_ipfw_rule != NULL || args.fwa_pf_rule != NULL) {
+               /* dummynet already saw us */
                ip = mtod(m, struct ip *);
-               hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
+               hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+               pkt_dst = ip->ip_dst;
                if (ro->ro_rt != NULL) {
                        RT_LOCK_SPIN(ro->ro_rt);
                        ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
@@ -431,10 +443,15 @@ ipfw_tags_done:
                        so = ipsec_getsocket(m);
                        (void)ipsec_setsocket(m, NULL);
                }
-#endif
-               goto sendit;
+#endif /* IPSEC */
+#if IPFIREWALL 
+               if (args.fwa_ipfw_rule != NULL)
+                       goto skip_ipsec;
+#endif /* #if IPFIREWALL  */
+               if (args.fwa_pf_rule != NULL)
+                       goto sendit;
        }
-#endif /* IPFIREWALL */
+#endif /* DUMMYNET */
 
 #if IPSEC
        if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
@@ -455,6 +472,12 @@ loopit:
        if (opt) {
                m = ip_insertoptions(m, opt, &len);
                hlen = len;
+               /* Update the chain */
+               if (m != m0) {
+                       if (m0 == packetlist)
+                               packetlist = m;
+                       m0 = m;
+               }
        }
        ip = mtod(m, struct ip *);
 #if IPFIREWALL
@@ -466,8 +489,8 @@ loopit:
         * packet of the chain. This could cause the route to be inavertandly changed 
         * to the route to the gateway address (instead of the route to the destination).
         */
-       args.next_hop = next_hop_from_ipfwd_tag;
-       pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
+       args.fwa_next_hop = next_hop_from_ipfwd_tag;
+       pkt_dst = args.fwa_next_hop ? args.fwa_next_hop->sin_addr : ip->ip_dst;
 #else
        pkt_dst = ip->ip_dst;
 #endif
@@ -496,7 +519,7 @@ loopit:
        } else {
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
        }
-       
+
 #if DEBUG
        /* For debugging, we let the stack forge congestion */
        if (forge_ce != 0 &&
@@ -509,8 +532,8 @@ loopit:
 
        KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, 
                     ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
-       
-       dst = (struct sockaddr_in *)&ro->ro_dst;
+
+       dst = (struct sockaddr_in *)(void *)&ro->ro_dst;
 
        /*
         * If there is a cached route,
@@ -562,8 +585,6 @@ loopit:
         * If routing to interface only,
         * short circuit routing lookup.
         */
-#define ifatoia(ifa)   ((struct in_ifaddr *)(ifa))
-#define sintosa(sin)   ((struct sockaddr *)(sin))
        if (flags & IP_ROUTETOIF) {
                if (ia)
                        IFA_REMREF(&ia->ia_ifa);
@@ -621,14 +642,15 @@ loopit:
                        }
 
                        /*
-                        * If the source address is spoofed (in the case
-                        * of IP_RAWOUTPUT), or if this is destined for
-                        * local/loopback, just let it go out using the
-                        * interface of the route.  Otherwise, there's no
-                        * interface having such an address, so bail out.
+                        * If the source address is spoofed (in the case of
+                        * IP_RAWOUTPUT on an unbounded socket), or if this
+                        * is destined for local/loopback, just let it go out
+                        * using the interface of the route.  Otherwise,
+                        * there's no interface having such an address,
+                        * so bail out.
                         */
-                       if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
-                           ifscope != lo_ifp->if_index) {
+                       if (ifa == NULL && (!(flags & IP_RAWOUTPUT) ||
+                           srcbound) && ifscope != lo_ifp->if_index) {
                                error = EADDRNOTAVAIL;
                                goto bad;
                        }
@@ -737,8 +759,10 @@ loopit:
                }
                ifp = ro->ro_rt->rt_ifp;
                ro->ro_rt->rt_use++;
-               if (ro->ro_rt->rt_flags & RTF_GATEWAY)
-                       dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+               if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
+                       dst = (struct sockaddr_in *)(void *)
+                           ro->ro_rt->rt_gateway;
+               }
                if (ro->ro_rt->rt_flags & RTF_HOST) {
                        isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
                } else {
@@ -761,7 +785,7 @@ loopit:
                 * still points to the address in "ro".  (It may have been
                 * changed to point to a gateway address, above.)
                 */
-               dst = (struct sockaddr_in *)&ro->ro_dst;
+               dst = (struct sockaddr_in *)(void *)&ro->ro_dst;
                /*
                 * See if the caller provided any multicast options
                 */
@@ -906,6 +930,7 @@ loopit:
                                        m_freem(m);
                                        if (inm != NULL)
                                                INM_REMREF(inm);
+                                       OSAddAtomic(1, &ipstat.ips_cantforward);
                                        goto done;
                                }
                        }
@@ -928,7 +953,6 @@ loopit:
 
                goto sendit;
        }
-#ifndef notdef
        /*
         * If source address not specified yet, use address
         * of outgoing interface.
@@ -946,7 +970,6 @@ loopit:
                fwd_rewrite_src++;
 #endif /* IPFIREWALL_FORWARD */
        }
-#endif /* notdef */
 
        /*
         * Look for broadcast address and
@@ -975,14 +998,31 @@ loopit:
 sendit:
 #if PF
        /* Invoke outbound packet filter */
-       if ( PF_IS_ENABLED) {
+       if (PF_IS_ENABLED) {
                int rc;
-               rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE);
-               if (rc != 0) {
-                       if (packetlist == m0) {
+
+               m0 = m; /* Save for later */    
+#if DUMMYNET
+               args.fwa_m = m;
+               args.fwa_next_hop = dst;
+               args.fwa_oif = ifp;
+               args.fwa_ro = ro;
+               args.fwa_dst = dst;
+               args.fwa_oflags = flags;
+               if (flags & IP_OUTARGS)
+                       args.fwa_ipoa = ipoa;
+               rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, &args);
+#else /* DUMMYNET */
+               rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, NULL);
+#endif /* DUMMYNET */
+               if (rc != 0 || m == NULL) {
+                       /* Move to the next packet */
+                       m = *mppn;
+
+                       /* Skip ahead if first packet in list got dropped */
+                       if (packetlist == m0)
                                packetlist = m;
-                               mppn = NULL;
-                       }
+
                        if (m != NULL) {
                                m0 = m;
                                /* Next packet in the chain */
@@ -1007,7 +1047,7 @@ sendit:
                ip_linklocal_stat.iplls_out_total++;
                if (ip->ip_ttl != MAXTTL) {
                        ip_linklocal_stat.iplls_out_badttl++;
-                       ip->ip_ttl = MAXTTL;
+                       ip->ip_ttl = MAXTTL;
                }
         }
 
@@ -1026,7 +1066,7 @@ sendit:
                }
 
                ipf_ref();
-               
+
                /* 4135317 - always pass network byte order to filter */
 
 #if BYTE_ORDER != BIG_ENDIAN
@@ -1051,7 +1091,7 @@ sendit:
                                }
                        }
                }
-               
+
                /* set back to host byte order */
                ip = mtod(m, struct ip *);
 
@@ -1079,7 +1119,7 @@ sendit:
                sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
 
        if (sp == NULL) {
-               IPSEC_STAT_INCREMENT(ipsecstat.out_inval);              
+               IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
                KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                goto bad;
        }
@@ -1102,7 +1142,7 @@ sendit:
                /* no need to do IPsec. */
                KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
                goto skip_ipsec;
-       
+
        case IPSEC_POLICY_IPSEC:
                if (sp->req == NULL) {
                        /* acquire a policy */
@@ -1146,9 +1186,9 @@ sendit:
                struct ip *, ip, struct ip6_hdr *, NULL);
 
        error = ipsec4_output(&ipsec_state, sp, flags);
-    
+
        m0 = m = ipsec_state.m;
-       
+
        if (flags & IP_ROUTETOIF) {
                /*
                 * if we have tunnel mode SA, we may need to ignore
@@ -1163,7 +1203,7 @@ sendit:
                ipsec_saved_route = ro;
                ro = &ipsec_state.ro;
        }
-       dst = (struct sockaddr_in *)ipsec_state.dst;
+       dst = (struct sockaddr_in *)(void *)ipsec_state.dst;
        if (error) {
                /* mbuf is already reclaimed in ipsec4_output. */
                m0 = NULL;
@@ -1189,7 +1229,7 @@ sendit:
 
        /* be sure to update variables that are affected by ipsec4_output() */
        ip = mtod(m, struct ip *);
-       
+
 #ifdef _IP_VHL
        hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 #else
@@ -1239,13 +1279,13 @@ sendit:
        NTOHS(ip->ip_len);
        NTOHS(ip->ip_off);
 #endif
-       
+
        KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
-       
+
        /* Pass to filters again */
        if (!TAILQ_EMPTY(&ipv4_filters)) {
                struct ipfilter *filter;
-               
+
                ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
 
                /* Check that a TSO frame isn't passed to a filter.
@@ -1258,7 +1298,7 @@ sendit:
                }
 
                ipf_ref();
-               
+
                /* 4135317 - always pass network byte order to filter */
 
 #if BYTE_ORDER != BIG_ENDIAN
@@ -1280,7 +1320,7 @@ sendit:
                                }
                        }
                }
-               
+
                /* set back to host byte order */
                ip = mtod(m, struct ip *);
 
@@ -1295,35 +1335,19 @@ skip_ipsec:
 #endif /*IPSEC*/
 
 #if IPFIREWALL
-       /*
-        * IpHack's section.
-        * - Xlate: translate packet's addr/port (NAT).
-        * - Firewall: deny/allow/etc.
-        * - Wrap: fake packet's addr/port <unimpl.>
-        * - Encapsulate: put it in another IP and send out. <unimp.>
-        */ 
-       if (fr_checkp) {
-               struct  mbuf    *m1 = m;
-
-               if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
-                       goto done;
-               }
-               ip = mtod(m0 = m = m1, struct ip *);
-       }
-
        /*
         * Check with the firewall...
         * but not if we are already being fwd'd from a firewall.
         */
-       if (fw_enable && IPFW_LOADED && !args.next_hop) {
+       if (fw_enable && IPFW_LOADED && !args.fwa_next_hop) {
                struct sockaddr_in *old = dst;
 
-               args.m = m;
-               args.next_hop = dst;
-               args.oif = ifp;
+               args.fwa_m = m;
+               args.fwa_next_hop = dst;
+               args.fwa_oif = ifp;
                off = ip_fw_chk_ptr(&args);
-               m = args.m;
-               dst = args.next_hop;
+               m = args.fwa_m;
+               dst = args.fwa_next_hop;
 
                 /*
                  * On return we must do the following:
@@ -1347,12 +1371,12 @@ skip_ipsec:
                        goto done ;
                }
                ip = mtod(m, struct ip *);
-               
+
                if (off == 0 && dst == old) {/* common case */
                        goto pass ;
                }
 #if DUMMYNET
-                if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
+               if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
                        /*
                         * pass the pkt to dummynet. Need to include
                         * pipe number, m, ifp, ro, dst because these are
@@ -1362,14 +1386,14 @@ skip_ipsec:
                         * XXX note: if the ifp or ro entry are deleted
                         * while a pkt is in dummynet, we are in trouble!
                         */
-                       args.ro = ro;
-                       args.dst = dst;
-                       args.flags = flags;
+                       args.fwa_ro = ro;
+                       args.fwa_dst = dst;
+                       args.fwa_oflags = flags;
                        if (flags & IP_OUTARGS)
-                               args.ipoa = ipoa;
+                               args.fwa_ipoa = ipoa;
 
                        error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
-                           &args);
+                           &args, DN_CLIENT_IPFW);
                        goto done;
                }
 #endif /* DUMMYNET */
@@ -1398,7 +1422,7 @@ skip_ipsec:
 #endif
 
                        /* Deliver packet to divert input routine */
-                       divert_packet(m, 0, off & 0xffff, args.divert_rule);
+                       divert_packet(m, 0, off & 0xffff, args.fwa_divert_rule);
 
                        /* If 'tee', continue with original packet */
                        if (clone != NULL) {
@@ -1474,7 +1498,7 @@ skip_ipsec:
                                }
 
                                ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
-                               ipfwd_tag->next_hop = args.next_hop;
+                               ipfwd_tag->next_hop = args.fwa_next_hop;
 
                                m_tag_prepend(m, fwd_tag);
 
@@ -1500,13 +1524,14 @@ skip_ipsec:
 #if BYTE_ORDER != BIG_ENDIAN
                                HTONS(ip->ip_len);
                                HTONS(ip->ip_off);
-#endif                         
-                               
+#endif
+
                                /*  we need to call dlil_output to run filters
                                 *      and resync to avoid recursion loops.
                                 */
                                if (lo_ifp) {
-                                       dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
+                                       dlil_output(lo_ifp, PF_INET, m, 0,
+                                           (struct sockaddr *)dst, 0, adv);
                                }
                                else {
                                        printf("ip_output: no loopback ifp for forwarding!!!\n");
@@ -1540,7 +1565,7 @@ skip_ipsec:
                        ifp = ro_fwd->ro_rt->rt_ifp;
                        ro_fwd->ro_rt->rt_use++;
                        if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
-                               dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
+                               dst = (struct sockaddr_in *)(void *)ro_fwd->ro_rt->rt_gateway;
                        if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
                                isbroadcast =
                                    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
@@ -1552,7 +1577,7 @@ skip_ipsec:
                        RT_UNLOCK(ro_fwd->ro_rt);
                        rtfree(ro->ro_rt);
                        ro->ro_rt = ro_fwd->ro_rt;
-                       dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
+                       dst = (struct sockaddr_in *)(void *)&ro_fwd->ro_dst;
 
                        /*
                         * If we added a default src ip earlier,
@@ -1601,7 +1626,7 @@ pass:
 #endif
        m->m_pkthdr.csum_flags |= CSUM_IP;
        tso =  (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4);
-                    
+
        sw_csum = m->m_pkthdr.csum_flags 
                & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
 
@@ -1616,12 +1641,11 @@ pass:
                        /* Apple GMAC HW, expects STUFF_OFFSET << 16  | START_OFFSET */
                        u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
                        u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
-                       m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
+                       m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
                        m->m_pkthdr.csum_data = (csumprev + offset)  << 16 ;
                        m->m_pkthdr.csum_data += offset; 
-                       sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
-               }
-               else {
+                       sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
+               } else {
                        /* let the software handle any UDP or TCP checksums */
                        sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
                }
@@ -1629,7 +1653,7 @@ pass:
                sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
                    m->m_pkthdr.csum_flags;
        }
-       
+
        if (sw_csum & CSUM_DELAY_DATA) {
                in_delayed_cksum(m);
                sw_csum &= ~CSUM_DELAY_DATA;
@@ -1649,20 +1673,20 @@ pass:
         */
        if ((u_short)ip->ip_len <= ifp->if_mtu || tso ||
            ifp->if_hwassist & CSUM_FRAGMENT) {
-               if (tso) 
+               if (tso)
                        m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4;
-                       
+
 
 #if BYTE_ORDER != BIG_ENDIAN
                HTONS(ip->ip_len);
                HTONS(ip->ip_off);
 #endif
-               
+
                ip->ip_sum = 0;
                if (sw_csum & CSUM_DELAY_IP) {
                        ip->ip_sum = in_cksum(m, hlen);
                }
-               
+
 #ifndef __APPLE__
                /* Record statistics for this interface address. */
                if (!(flags & IP_FORWARDING) && ia != NULL) {
@@ -1679,8 +1703,8 @@ pass:
                if (packetchain == 0) {
                        if (ro->ro_rt && nstat_collect)
                                nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
-                       error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
-                           (struct sockaddr *)dst);
+                       error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
+                           (struct sockaddr *)dst, 0, adv);
                        goto done;
                }
                else { /* packet chaining allows us to reuse the route for all packets */
@@ -1696,12 +1720,12 @@ sendchain:
                                if (ro->ro_rt && nstat_collect)
                                        nstat_route_tx(ro->ro_rt, pktcnt, bytecnt, 0);
                                //send
-                               error = ifnet_output(ifp, PF_INET, packetlist,
-                                   ro->ro_rt, (struct sockaddr *)dst);
+                               error = dlil_output(ifp, PF_INET, packetlist,
+                                   ro->ro_rt, (struct sockaddr *)dst, 0, adv);
                                pktcnt = 0;
                                bytecnt = 0;
                                goto done;
-       
+
                        }
                        m0 = m;
                        pktcnt++;
@@ -1768,8 +1792,8 @@ sendchain:
                                panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
                        if (ro->ro_rt && nstat_collect)
                                nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
-                       error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
-                           (struct sockaddr *)dst);
+                       error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
+                           (struct sockaddr *)dst, 0, adv);
                } else
                        m_freem(m);
        }
@@ -1873,6 +1897,10 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
                m->m_pkthdr.rcvif = 0;
                m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
                m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
+
+               M_COPY_PFTAG(m, m0);
+               m_set_service_class(m, m0->m_pkthdr.svc);
+
 #if CONFIG_MACF_NET
                mac_netinet_fragment(m0, m);
 #endif
@@ -1909,7 +1937,7 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
 #if BYTE_ORDER != BIG_ENDIAN
        HTONS(ip->ip_off);
 #endif
-       
+
        ip->ip_sum = 0;
        if (sw_csum & CSUM_DELAY_IP) {
                ip->ip_sum = in_cksum(m, hlen);
@@ -1952,30 +1980,35 @@ in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
                ip_offset -= m->m_len;
                m = m->m_next;
                if (m == NULL) {
-                       printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
+                       printf("in_delayed_cksum_withoffset failed - "
+                           "ip_offset wasn't in the packet\n");
                        return;
                }
        }
-       
-       /* Sometimes the IP header is not contiguous, yes this can happen! */
-       if (ip_offset + sizeof(struct ip) > m->m_len) {
-#if DEBUG              
+
+       /*
+        * In case the IP header is not contiguous, or not 32-bit
+        * aligned, copy it to a local buffer.
+        */
+       if ((ip_offset + sizeof(struct ip) > m->m_len) ||
+           !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
+#if DEBUG
                printf("delayed m_pullup, m->len: %d  off: %d\n",
                        m->m_len, ip_offset);
 #endif
                m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
-               
-               ip = (struct ip *)buf;
+
+               ip = (struct ip *)(void *)buf;
        } else {
-               ip = (struct ip*)(m->m_data + ip_offset);
+               ip = (struct ip*)(void *)(m->m_data + ip_offset);
        }
-       
+
        /* Gross */
        if (ip_offset) {
                m->m_len -= ip_offset;
                m->m_data += ip_offset;
        }
-       
+
        offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
 
        /*
@@ -2021,15 +2054,18 @@ in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
        /* Insert the checksum in the existing chain */
        if (offset + ip_offset + sizeof(u_short) > m->m_len) {
                char tmp[2];
-               
+
 #if DEBUG
                printf("delayed m_copyback, m->len: %d  off: %d  p: %d\n",
                    m->m_len, offset + ip_offset, ip->ip_p);
 #endif
-               *(u_short *)tmp = csum;
+               *(u_short *)(void *)tmp = csum;
                m_copyback(m, offset + ip_offset, 2, tmp);
-       } else
-               *(u_short *)(m->m_data + offset + ip_offset) = csum;
+       } else if (IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
+               *(u_short *)(void *)(m->m_data + offset + ip_offset) = csum;
+       } else {
+               bcopy(&csum, (m->m_data + offset + ip_offset), sizeof (csum));
+       }
 }
 
 void
@@ -2049,33 +2085,38 @@ in_cksum_offset(struct mbuf* m, size_t ip_offset)
         /* Save copy of first mbuf pointer and the ip_offset before modifying */
         struct mbuf* m0 = m;
         size_t ip_offset_copy = ip_offset;
-       
+
        while (ip_offset >= m->m_len) {
                ip_offset -= m->m_len;
                m = m->m_next;
                if (m == NULL) {
-                       printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
+                       printf("in_cksum_offset failed - ip_offset wasn't "
+                           "in the packet\n");
                        return;
                }
        }
-       
-       /* Sometimes the IP header is not contiguous, yes this can happen! */
-       if (ip_offset + sizeof(struct ip) > m->m_len) {
 
+       /*
+        * In case the IP header is not contiguous, or not 32-bit
+        * aligned, copy it to a local buffer.
+        */
+       if ((ip_offset + sizeof(struct ip) > m->m_len) ||
+           !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
 #if DEBUG
-               printf("in_cksum_offset - delayed m_pullup, m->len: %d  off: %lu\n",
-                       m->m_len, ip_offset);
-#endif 
+               printf("in_cksum_offset - delayed m_pullup, m->len: %d "
+                   "off: %lu\n", m->m_len, ip_offset);
+#endif
                m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
 
-               ip = (struct ip *)buf;
+               ip = (struct ip *)(void *)buf;
                ip->ip_sum = 0;
-               m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
+               m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2,
+                   (caddr_t)&ip->ip_sum);
        } else {
-               ip = (struct ip*)(m->m_data + ip_offset);
+               ip = (struct ip*)(void *)(m->m_data + ip_offset);
                ip->ip_sum = 0;
        }
-       
+
        /* Gross */
        if (ip_offset) {
                m->m_len -= ip_offset;
@@ -2122,16 +2163,25 @@ in_cksum_offset(struct mbuf* m, size_t ip_offset)
                m->m_data -= ip_offset;
        }
 
-       /* Insert the checksum in the existing chain if IP header not contiguous */
+       /*
+        * Insert the checksum in the existing chain if IP header not
+        * contiguous, or if it's not 32-bit aligned, i.e. all the cases
+        * where it was copied to a local buffer.
+        */
        if (ip_offset + sizeof(struct ip) > m->m_len) {
                char tmp[2];
 
 #if DEBUG
-               printf("in_cksum_offset m_copyback, m->len: %u  off: %lu  p: %d\n",
-                   m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
+               printf("in_cksum_offset m_copyback, m->len: %u off: %lu "
+                   "p: %d\n", m->m_len,
+                   ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
 #endif
-               *(u_short *)tmp = ip->ip_sum;
+               *(u_short *)(void *)tmp = ip->ip_sum;
                m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
+       } else if (!IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
+               bcopy(&ip->ip_sum,
+                   (m->m_data + ip_offset + offsetof(struct ip, ip_sum)),
+                   sizeof (u_short));
        }
 }
 
@@ -2286,9 +2336,6 @@ ip_ctloutput(so, sopt)
                case IP_RECVDSTADDR:
                case IP_RECVIF:
                case IP_RECVTTL:
-#if defined(NFAITH) && NFAITH > 0
-               case IP_FAITH:
-#endif
                case IP_RECVPKTINFO:
                        error = sooptcopyin(sopt, &optval, sizeof optval,
                                            sizeof optval);
@@ -2329,11 +2376,6 @@ ip_ctloutput(so, sopt)
                                OPTSET(INP_RECVTTL);
                                break;
 
-#if defined(NFAITH) && NFAITH > 0
-                       case IP_FAITH:
-                               OPTSET(INP_FAITH);
-                               break;
-#endif
                        case IP_RECVPKTINFO:
                                OPTSET(INP_PKTINFO);
                                break;
@@ -2400,7 +2442,7 @@ ip_ctloutput(so, sopt)
                                 */
                                ifnet_release(ifp);
                        }
-                       inp_bindif(inp, ifscope);
+                       error = inp_bindif(inp, ifscope);
                }
                break;
 #endif
@@ -2533,7 +2575,7 @@ ip_ctloutput(so, sopt)
                        if (error)
                                break;
 
-                       inp_bindif(inp, optval);
+                       error = inp_bindif(inp, optval);
                        break;
 
                case IP_NO_IFT_CELLULAR:
@@ -2584,9 +2626,6 @@ ip_ctloutput(so, sopt)
                case IP_RECVIF:
                case IP_RECVTTL:
                case IP_PORTRANGE:
-#if defined(NFAITH) && NFAITH > 0
-               case IP_FAITH:
-#endif
                case IP_RECVPKTINFO:
                        switch (sopt->sopt_name) {
 
@@ -2629,11 +2668,6 @@ ip_ctloutput(so, sopt)
                                        optval = 0;
                                break;
 
-#if defined(NFAITH) && NFAITH > 0
-                       case IP_FAITH:
-                               optval = OPTBIT(INP_FAITH);
-                               break;
-#endif
                        case IP_RECVPKTINFO:
                                optval = OPTBIT(INP_PKTINFO);
                                break;
@@ -2681,7 +2715,7 @@ ip_ctloutput(so, sopt)
 
                case IP_BOUND_IF:
                        if (inp->inp_flags & INP_BOUND_IF)
-                               optval = inp->inp_boundif;
+                               optval = inp->inp_boundifp->if_index;
                        error = sooptcopyout(sopt, &optval, sizeof (optval));
                        break;
 
@@ -2691,7 +2725,8 @@ ip_ctloutput(so, sopt)
                        break;
 
                case IP_OUT_IF:
-                       optval = inp->inp_last_outif;
+                       optval = (inp->inp_last_outifp != NULL) ?
+                           inp->inp_last_outifp->if_index : 0;
                        error = sooptcopyout(sopt, &optval, sizeof (optval));
                        break;
 
@@ -3045,7 +3080,7 @@ ip_mloopback(ifp, m, dst, hlen)
        if (lo_ifp) {
                copym->m_pkthdr.rcvif = ifp;
                dlil_output(lo_ifp, PF_INET, copym, 0,
-                   (struct sockaddr *) dst, 0);
+                   (struct sockaddr *) dst, 0, NULL);
        } else {
                printf("Warning: ip_output call to dlil_find_dltag failed!\n");
                m_freem(copym);
index 971a881266fcb0f33f14ee67750f0b1657e034ad..763c1e91940fefe6761d442be70066aece786ba1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -236,17 +236,44 @@ struct ip_linklocal_stat {
 #define        IP_ALLOWBROADCAST       SO_BROADCAST    /* can send broadcast packets (0x0020) */
 #define        IP_OUTARGS              0x100           /* has ancillary output info */
 
+#ifdef XNU_KERNEL_PRIVATE
+#define IP_HDR_ALIGNED_P(_ip)  ((((uintptr_t)(_ip)) & ((uintptr_t)3)) == 0)
+
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), this macro checks whether the pointer to the IP header
+ * is 32-bit aligned, and assert otherwise.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define        IP_HDR_STRICT_ALIGNMENT_CHECK(_ip) do { } while (0)
+#else /* !__i386__ && !__x86_64__ */
+#define        IP_HDR_STRICT_ALIGNMENT_CHECK(_ip) do {                         \
+       if (!IP_HDR_ALIGNED_P(_ip)) {                                   \
+               panic_plain("\n%s: Unaligned IP header %p\n",           \
+                   __func__, _ip);                                     \
+       }                                                               \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
+#endif /* XNU_KERNEL_PRIVATE */
+
 struct ip;
 struct inpcb;
 struct route;
 struct sockopt;
 
+#include <net/flowadv.h>
+
 /*
  * Extra information passed to ip_output when IP_OUTARGS is set.
  */
 struct ip_out_args {
-       unsigned int    ipoa_boundif;   /* bound outgoing interface */
-       unsigned int    ipoa_nocell;    /* don't use IFT_CELLULAR */
+       unsigned int    ipoa_boundif;   /* boundif interface index */
+       struct flowadv  ipoa_flowadv;   /* flow advisory code */
+       u_int32_t       ipoa_flags;     /* IPOAF flags (see below) */
+#define        IPOAF_SELECT_SRCIF      0x00000001      /* src interface selection */
+#define        IPOAF_BOUND_IF          0x00000002      /* boundif value is valid */
+#define        IPOAF_BOUND_SRCADDR     0x00000004      /* bound to src address */
+#define        IPOAF_NO_CELLULAR       0x00000010      /* skip IFT_CELLULAR */
 };
 
 extern struct  ipstat  ipstat;
@@ -263,6 +290,7 @@ extern u_int32_t    (*ip_mcast_src)(int);
 extern int rsvp_on;
 extern struct  pr_usrreqs rip_usrreqs;
 extern int     ip_doscopedroute;
+extern int     ip_restrictrecvif;
 
 extern void ip_moptions_init(void);
 extern struct ip_moptions *ip_allocmoptions(int);
@@ -304,7 +332,7 @@ int ip_rsvp_done(void);
 int    ip_rsvp_vif_init(struct socket *, struct sockopt *);
 int    ip_rsvp_vif_done(struct socket *, struct sockopt *);
 void   ip_rsvp_force_done(struct socket *);
-
+void   ip_proto_dispatch_in_wrapper(struct mbuf *, int, u_int8_t);
 void   in_delayed_cksum(struct mbuf *m);
 
 extern void tcp_in_cksum_stats(u_int32_t);
index b03f56cd15942a5a8098dc25f957fd22f6523fa7..ecab6700a78cf0f01d41899144c7d58d8fd08ff5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -59,7 +59,8 @@
  * the IP filter is marjed and kipf_delayed_remove is set so that when 
  * kipf_ref eventually goes down to zero, the IP filter is removed
  */
-static lck_mtx_t *kipf_lock = 0;
+decl_lck_mtx_data(static, kipf_lock_data);
+static lck_mtx_t *kipf_lock = &kipf_lock_data;
 static u_int32_t kipf_ref = 0;
 static u_int32_t kipf_delayed_remove = 0;
 u_int32_t kipf_count = 0;
@@ -270,7 +271,7 @@ ipf_injectv4_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options)
        errno_t error = 0;
        struct m_tag *mtag = NULL;
        struct ip_moptions *imo = NULL;
-       struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, 0 };
 
        /* Make the IP header contiguous in the mbuf */
        if ((size_t)m->m_len < sizeof (struct ip)) {
@@ -298,14 +299,18 @@ ipf_injectv4_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options)
                imo->imo_multicast_loop = options->ippo_mcast_loop;
        }
 
-       if (options != NULL &&
-           (options->ippo_flags & (IPPOF_BOUND_IF | IPPOF_NO_IFT_CELLULAR))) {
+       if (options != NULL) {
+               if (options->ippo_flags & IPPOF_SELECT_SRCIF)
+                       ipoa.ipoa_flags |= IPOAF_SELECT_SRCIF;
                if (options->ippo_flags & IPPOF_BOUND_IF) {
+                       ipoa.ipoa_flags |= IPOAF_BOUND_IF;
                        ipoa.ipoa_boundif = options->ippo_flags >>
                            IPPOF_SHIFT_IFSCOPE;
                }
                if (options->ippo_flags & IPPOF_NO_IFT_CELLULAR)
-                       ipoa.ipoa_nocell = 1;
+                       ipoa.ipoa_flags |= IPOAF_NO_CELLULAR;
+               if (options->ippo_flags & IPPOF_BOUND_SRCADDR)
+                       ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
        }
 
        bzero(&ro, sizeof(struct route));
@@ -341,7 +346,7 @@ ipf_injectv6_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options)
        errno_t error = 0;
        struct m_tag *mtag = NULL;
        struct ip6_moptions *im6o = NULL;
-       struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+       struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0 };
 
        /* Make the IP header contiguous in the mbuf */
        if ((size_t)m->m_len < sizeof(struct ip6_hdr)) {
@@ -369,14 +374,18 @@ ipf_injectv6_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options)
                im6o->im6o_multicast_loop = options->ippo_mcast_loop;
        }
 
-       if (options != NULL &&
-           (options->ippo_flags & (IPPOF_BOUND_IF | IPPOF_NO_IFT_CELLULAR))) {
+       if (options != NULL) {
+               if (options->ippo_flags & IPPOF_SELECT_SRCIF)
+                       ip6oa.ip6oa_flags |= IP6OAF_SELECT_SRCIF;
                if (options->ippo_flags & IPPOF_BOUND_IF) {
+                       ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
                        ip6oa.ip6oa_boundif = options->ippo_flags >>
                            IPPOF_SHIFT_IFSCOPE;
                }
                if (options->ippo_flags & IPPOF_NO_IFT_CELLULAR)
-                       ip6oa.ip6oa_nocell = 1;
+                       ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
+               if (options->ippo_flags & IPPOF_BOUND_SRCADDR)
+                       ip6oa.ip6oa_flags |= IP6OAF_BOUND_SRCADDR;
        }
 
        bzero(&ro, sizeof(struct route_in6));
@@ -481,19 +490,9 @@ ipf_init(void)
                goto done;
        }
        
-       kipf_lock = lck_mtx_alloc_init(lck_grp, lck_attributes);
-       if (kipf_lock == 0) {
-               printf("ipf_init: lck_mtx_alloc_init failed\n");
-               error = ENOMEM;
-               goto done;
-       }
+       lck_mtx_init(kipf_lock, lck_grp, lck_attributes);
+
        done:
-       if (error != 0) {
-               if (kipf_lock) {
-                       lck_mtx_free(kipf_lock, lck_grp);
-                       kipf_lock = 0;
-               }
-       }
        if (lck_grp) {
                lck_grp_free(lck_grp);
                lck_grp = 0;
index 1f7fae6f07afbeb6558ca8894fbd3e6b43f91182..6fe3727fee3daef9a54cc936a3d9c2b68cd8fea2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -54,6 +54,8 @@ struct ipf_pktopts {
 #ifdef PRIVATE
 #define IPPOF_BOUND_IF         0x2
 #define IPPOF_NO_IFT_CELLULAR  0x4
+#define IPPOF_SELECT_SRCIF     0x8
+#define IPPOF_BOUND_SRCADDR    0x10
 #define IPPOF_SHIFT_IFSCOPE    16
 #endif /* PRIVATE */
 
diff --git a/bsd/netinet/lro_ext.h b/bsd/netinet/lro_ext.h
new file mode 100644 (file)
index 0000000..db2a432
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef LRO_EXT_H_
+#define LRO_EXT_H_
+
+#ifdef BSD_KERNEL_PRIVATE
+
+/* All definitions exported from LRO go into this file */
+
+extern int sw_lro;
+extern int lrodebug;
+
+/* flow return values */
+#define TCP_LRO_NAN            0x00    /* No flow exists */
+#define TCP_LRO_CONSUMED       0x01    /* LRO consumed the packet */   
+#define TCP_LRO_EJECT_FLOW     0x02    /* LRO ejected the flow */
+#define TCP_LRO_COALESCE       0x03    /* LRO to coalesce the packet */
+#define TCP_LRO_COLLISION      0x04    /* Two flows map to the same slot */
+
+void tcp_lro_init(void);
+
+/* When doing LRO in IP call this function */
+struct mbuf* tcp_lro(struct mbuf *m, unsigned int hlen);
+
+/* TCP calls this to start coalescing a flow */
+int tcp_start_coalescing(struct ip *, struct tcphdr *, int tlen);
+
+/* TCP calls this to stop coalescing a flow */
+int tcp_lro_remove_state(struct in_addr, struct in_addr, unsigned short, 
+       unsigned short);
+
+/* TCP calls this to keep the seq number updated */
+void tcp_update_lro_seq(__uint32_t, struct in_addr, struct in_addr,
+                unsigned short, unsigned short);
+
+#endif
+
+#endif /* LRO_EXT_H_ */
index 0b63a3c0d9225c43916be95c79e30515d8269d2b..f06517a26748f28f8ef99df3b2d0ca57a1fec3bf 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -71,6 +71,7 @@
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
@@ -130,10 +131,10 @@ struct    inpcbinfo ripcbinfo;
 /* control hooks for ipfw and dummynet */
 #if IPFIREWALL
 ip_fw_ctl_t *ip_fw_ctl_ptr;
+#endif /* IPFIREWALL */
 #if DUMMYNET
 ip_dn_ctl_t *ip_dn_ctl_ptr;
 #endif /* DUMMYNET */
-#endif /* IPFIREWALL */
 
 /*
  * Nominal space allocated to a raw ip socket.
@@ -202,6 +203,9 @@ rip_input(m, iphlen)
        struct mbuf *opts = 0;
        int skipit = 0, ret = 0;
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ripsrc.sin_addr = ip->ip_src;
        lck_rw_lock_shared(ripcbinfo.mtx);
        LIST_FOREACH(inp, &ripcb, inp_list) {
@@ -349,21 +353,28 @@ rip_output(
        register struct ip *ip;
        register struct inpcb *inp = sotoinpcb(so);
        int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
-       struct ip_out_args ipoa;
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF };
        struct ip_moptions *imo;
        int error = 0;
-       mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
+       mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
 
        if (control != NULL) {
-               mtc = mbuf_traffic_class_from_control(control);
+               msc = mbuf_service_class_from_control(control);
 
                m_freem(control);
        }
-       /* If socket was bound to an ifindex, tell ip_output about it */
-       ipoa.ipoa_boundif = (inp->inp_flags & INP_BOUND_IF) ?
-           inp->inp_boundif : IFSCOPE_NONE;
-       ipoa.ipoa_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+
        flags |= IP_OUTARGS;
+       /* If socket was bound to an ifindex, tell ip_output about it */
+       if (inp->inp_flags & INP_BOUND_IF) {
+               ipoa.ipoa_boundif = inp->inp_boundifp->if_index;
+               ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+       }
+       if (inp->inp_flags & INP_NO_IFT_CELLULAR)
+               ipoa.ipoa_flags |=  IPOAF_NO_CELLULAR;
+
+       if (inp->inp_flowhash == 0)
+               inp->inp_flowhash = inp_calc_flowhash(inp);
 
        /*
         * If the user handed us a complete IP packet, use it.
@@ -411,6 +422,9 @@ rip_output(
                OSAddAtomic(1, &ipstat.ips_rawout);
        }
 
+       if (inp->inp_laddr.s_addr != INADDR_ANY)
+               ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
+
 #if IPSEC
        if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) {
                m_freem(m);
@@ -424,7 +438,9 @@ rip_output(
                inp->inp_route.ro_rt = NULL;
        }
 
-       set_packet_tclass(m, so, mtc, 0);
+       set_packet_service_class(m, so, msc, 0);
+       m->m_pkthdr.m_flowhash = inp->inp_flowhash;
+       m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH;
 
 #if CONFIG_MACF_NET
        mac_mbuf_label_associate_inpcb(inp, m);
@@ -446,7 +462,7 @@ rip_output(
 
        if (inp->inp_route.ro_rt != NULL) {
                struct rtentry *rt = inp->inp_route.ro_rt;
-               unsigned int outif;
+               struct ifnet *outif;
 
                if ((rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) ||
                    inp->inp_socket == NULL ||
@@ -463,12 +479,11 @@ rip_output(
                }
                /*
                 * If this is a connected socket and the destination
-                * route is unicast, update outif with that of the route
-                * interface index used by IP.
+                * route is unicast, update outif with that of the
+                * route interface used by IP.
                 */
-               if (rt != NULL &&
-                   (outif = rt->rt_ifp->if_index) != inp->inp_last_outif)
-                       inp->inp_last_outif = outif;
+               if (rt != NULL && (outif = rt->rt_ifp) != inp->inp_last_outifp)
+                       inp->inp_last_outifp = outif;
        }
 
        return (error);
@@ -503,7 +518,9 @@ rip_ctloutput(so, sopt)
        struct  inpcb *inp = sotoinpcb(so);
        int     error, optval;
 
-       if (sopt->sopt_level != IPPROTO_IP)
+       /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
+       if (sopt->sopt_level != IPPROTO_IP &&
+           !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH))
                return (EINVAL);
 
        error = 0;
@@ -516,10 +533,10 @@ rip_ctloutput(so, sopt)
                        error = sooptcopyout(sopt, &optval, sizeof optval);
                        break;
 
-        case IP_STRIPHDR:
-            optval = inp->inp_flags & INP_STRIPHDR;
-            error = sooptcopyout(sopt, &optval, sizeof optval);
-            break;
+               case IP_STRIPHDR:
+                       optval = inp->inp_flags & INP_STRIPHDR;
+                       error = sooptcopyout(sopt, &optval, sizeof optval);
+                       break;
 
 #if IPFIREWALL
                case IP_FW_ADD:
@@ -537,6 +554,8 @@ rip_ctloutput(so, sopt)
 
 #if DUMMYNET
                case IP_DUMMYNET_GET:
+                       if (!DUMMYNET_LOADED)
+                               ip_dn_init();
                        if (DUMMYNET_LOADED)
                                error = ip_dn_ctl_ptr(sopt);
                        else
@@ -576,17 +595,16 @@ rip_ctloutput(so, sopt)
                                inp->inp_flags &= ~INP_HDRINCL;
                        break;
 
-        case IP_STRIPHDR:
-            error = sooptcopyin(sopt, &optval, sizeof optval,
-                        sizeof optval);
-            if (error)
-                break;
-            if (optval)
-                inp->inp_flags |= INP_STRIPHDR;
-            else
-                inp->inp_flags &= ~INP_STRIPHDR;
-            break;
-
+               case IP_STRIPHDR:
+                       error = sooptcopyin(sopt, &optval, sizeof optval,
+                           sizeof optval);
+                       if (error)
+                               break;
+                       if (optval)
+                               inp->inp_flags |= INP_STRIPHDR;
+                       else
+                               inp->inp_flags &= ~INP_STRIPHDR;
+                       break;
 
 #if IPFIREWALL
                case IP_FW_ADD:
@@ -612,6 +630,8 @@ rip_ctloutput(so, sopt)
                case IP_DUMMYNET_CONFIGURE:
                case IP_DUMMYNET_DEL:
                case IP_DUMMYNET_FLUSH:
+                       if (!DUMMYNET_LOADED)
+                               ip_dn_init();
                        if (DUMMYNET_LOADED)
                                error = ip_dn_ctl_ptr(sopt);
                        else
@@ -632,11 +652,11 @@ rip_ctloutput(so, sopt)
                case IP_RSVP_VIF_ON:
                        error = ip_rsvp_vif_init(so, sopt);
                        break;
-                       
+
                case IP_RSVP_VIF_OFF:
                        error = ip_rsvp_vif_done(so, sopt);
                        break;
-               
+
                case MRT_INIT:
                case MRT_DONE:
                case MRT_ADD_VIF:
@@ -649,6 +669,14 @@ rip_ctloutput(so, sopt)
                        break;
 #endif /* MROUTING */
 
+               case SO_FLUSH:
+                       if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval))) != 0)
+                               break;
+
+                       error = inp_flush(inp, optval);
+                       break;
+
                default:
                        error = ip_ctloutput(so, sopt);
                        break;
@@ -822,9 +850,9 @@ __private_extern__ int
 rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 {
        struct inpcb *inp = sotoinpcb(so);
-       struct sockaddr_in *addr = (struct sockaddr_in *)nam;
+       struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam;
        struct ifaddr *ifa = NULL;
-       unsigned int outif = 0;
+       struct ifnet *outif = NULL;
 
        if (nam->sa_len != sizeof(*addr))
                return EINVAL;
@@ -837,12 +865,12 @@ rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
        }
        else if (ifa) {
                IFA_LOCK(ifa);
-               outif = ifa->ifa_ifp->if_index;
+               outif = ifa->ifa_ifp;
                IFA_UNLOCK(ifa);
                IFA_REMREF(ifa);
        }
        inp->inp_laddr = addr->sin_addr;
-       inp->inp_last_outif = outif;
+       inp->inp_last_outifp = outif;
        return 0;
 }
 
@@ -850,7 +878,7 @@ __private_extern__ int
 rip_connect(struct socket *so, struct sockaddr *nam, __unused  struct proc *p)
 {
        struct inpcb *inp = sotoinpcb(so);
-       struct sockaddr_in *addr = (struct sockaddr_in *)nam;
+       struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam;
 
        if (nam->sa_len != sizeof(*addr))
                return EINVAL;
@@ -861,6 +889,7 @@ rip_connect(struct socket *so, struct sockaddr *nam, __unused  struct proc *p)
                return EAFNOSUPPORT;
        inp->inp_faddr = addr->sin_addr;
        soisconnected(so);
+
        return 0;
 }
 
@@ -889,7 +918,7 @@ rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr
                        m_freem(m);
                        return ENOTCONN;
                }
-               dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
+               dst = ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr;
        }
        return rip_output(m, so, dst, control);
 }
index a3a183bfe15034336639ea980bbd6093ec886c68..99264dcbe717314ebe8e510beda4fcedecc265fa 100644 (file)
@@ -213,19 +213,13 @@ struct tcphdr {
                                         * buffer queues.
                                         */
 #ifdef PRIVATE
-#define        TCP_INFO                                0x200   /* retrieve tcp_info structure */
-
+#define        TCP_INFO                0x200   /* retrieve tcp_info structure */
+#define TCP_NOTSENT_LOWAT      0x201   /* Low water mark for TCP unsent data */
+#define TCP_MEASURE_SND_BW     0x202   /* Measure sender's bandwidth for this connection */
+#define TCP_MEASURE_BW_BURST   0x203   /* Burst size to use for bandwidth measurement */
+#define TCP_PEER_PID           0x204   /* Lookup pid of the process we're connected to */
 /*
- * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
- * the caller to query certain information about the state of a TCP
- * connection.  We provide an overlapping set of fields with the Linux
- * implementation, but since this is a fixed size structure, room has been
- * left for growth.  In order to maximize potential future compatibility with
- * the Linux API, the same variable names and order have been adopted, and
- * padding left to make room for omitted fields in case they are added later.
- *
- * XXX: This is currently an unstable ABI/API, in that it is expected to
- * change.
+ * The TCP_INFO socket option is a private API and is subject to change
  */
 #pragma pack(4)
 
@@ -234,26 +228,52 @@ struct tcphdr {
 #define        TCPI_OPT_WSCALE         0x04
 #define        TCPI_OPT_ECN            0x08
 
+#define TCPI_FLAG_LOSSRECOVERY 0x01    /* Currently in loss recovery */
+
 struct tcp_info {
        u_int8_t        tcpi_state;                     /* TCP FSM state. */
        u_int8_t        tcpi_options;           /* Options enabled on conn. */
        u_int8_t        tcpi_snd_wscale;        /* RFC1323 send shift value. */
        u_int8_t        tcpi_rcv_wscale;        /* RFC1323 recv shift value. */
 
+       u_int32_t       tcpi_flags;                     /* extra flags (TCPI_FLAG_xxx) */
+
+       u_int32_t       tcpi_rto;                       /* Retransmission timeout in milliseconds */
        u_int32_t       tcpi_snd_mss;           /* Max segment size for send. */
        u_int32_t       tcpi_rcv_mss;           /* Max segment size for receive. */
 
+       u_int32_t       tcpi_rttcur;            /* Most recent value of RTT */
+       u_int32_t       tcpi_srtt;                      /* Smoothed RTT */
+       u_int32_t       tcpi_rttvar;            /* RTT variance */
+
        u_int32_t       tcpi_snd_ssthresh;      /* Slow start threshold. */
        u_int32_t       tcpi_snd_cwnd;          /* Send congestion window. */
 
        u_int32_t       tcpi_rcv_space;         /* Advertised recv window. */
 
        u_int32_t       tcpi_snd_wnd;           /* Advertised send window. */
-       u_int32_t       tcpi_snd_bwnd;          /* Bandwidth send window. */
        u_int32_t       tcpi_snd_nxt;           /* Next egress seqno */
        u_int32_t       tcpi_rcv_nxt;           /* Next ingress seqno */
        
        int32_t         tcpi_last_outif;        /* if_index of interface used to send last */
+       u_int32_t       tcpi_snd_sbbytes;       /* bytes in snd buffer including data inflight */
+       
+       u_int64_t       tcpi_txbytes __attribute__((aligned(8)));
+                                                                       /* total bytes sent */  
+       u_int64_t       tcpi_txretransmitbytes __attribute__((aligned(8)));
+                                                                       /* total bytes retransmitted */ 
+       u_int64_t       tcpi_txunacked __attribute__((aligned(8)));
+                                                                       /* current number of bytes not acknowledged */  
+       u_int64_t       tcpi_rxbytes __attribute__((aligned(8)));
+                                                                       /* total bytes received */
+       u_int64_t       tcpi_rxduplicatebytes __attribute__((aligned(8)));
+                                                                       /* total duplicate bytes received */
+       u_int64_t       tcpi_snd_bw __attribute__((aligned(8)));                /* measured send bandwidth in bits/sec */
+};
+
+struct tcp_measure_bw_burst {
+       u_int32_t       min_burst_size; /* Minimum number of packets to use */
+       u_int32_t       max_burst_size; /* Maximum number of packets to use */
 };
 
 /*
index c78ba35318953e3eda247bef05b26a2b232d00ff..cf1f0fb8006a58d0b71766f7f40883e5866b60e5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -97,7 +97,7 @@ struct tcp_cc_algo {
        void (*ack_rcvd) (struct tcpcb *tp, struct tcphdr *th);
 
        /* called before entering FR */
-       void (*pre_fr) (struct tcpcb *tp, struct tcphdr *th);
+       void (*pre_fr) (struct tcpcb *tp);
 
        /*  after exiting FR */
        void (*post_fr) (struct tcpcb *tp, struct tcphdr *th);
@@ -120,5 +120,8 @@ extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
 
 #define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index])
 
+extern void tcp_cc_resize_sndbuf(struct tcpcb *tp);
+extern void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp);
+
 #endif /* KERNEL */
 #endif /* _NETINET_CC_H_ */
index 6f06b2b1478871a4edecb4c1f92ec1f92b643f7c..7d29a31c24a74e5750ee8563d3508f167e893cab 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -78,6 +78,7 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
 
 #include <kern/cpu_number.h>   /* before tcp_seq.h, for tcp_random18() */
 
@@ -134,6 +135,7 @@ struct tcphdr tcp_savetcp;
 #endif /* CONFIG_MACF_NET || CONFIG_MACF_SOCKET */
 
 #include <sys/kdebug.h>
+#include <netinet/lro_ext.h>
 
 #define DBG_LAYER_BEG          NETDBG_CODE(DBG_NETTCP, 0)
 #define DBG_LAYER_END          NETDBG_CODE(DBG_NETTCP, 2)
@@ -147,6 +149,8 @@ tcp_cc      tcp_ccgen;
 extern int ipsec_bypass;
 #endif
 
+extern int32_t total_sbmb_cnt;
+
 struct tcpstat tcpstat;
 
 static int log_in_vain = 0;
@@ -225,11 +229,49 @@ int       tcp_acc_iaj_high_thresh = ACC_IAJ_HIGH_THRESH;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_high_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
         &tcp_acc_iaj_high_thresh, 0, "Used in calculating maximum accumulated IAJ");
 
+u_int32_t tcp_do_autorcvbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautorcvbuf, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_do_autorcvbuf, 0, "Enable automatic socket buffer tuning");
+
+u_int32_t tcp_autorcvbuf_inc_shift = 3;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufincshift, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_autorcvbuf_inc_shift, 0, "Shift for increment in receive socket buffer size");
+
+u_int32_t tcp_autorcvbuf_max = 512 * 1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufmax, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_autorcvbuf_max, 0, "Maximum receive socket buffer size");
+
+int sw_lro = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &sw_lro, 0, "Used to coalesce TCP packets");
+
+int lrodebug = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, lrodbg, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &lrodebug, 0, "Used to debug SW LRO");
+
+int lro_start = 3;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_startcnt, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &lro_start, 0, "Segments for starting LRO computed as power of 2");
+
+extern int tcp_do_autosendbuf;
+
 #if CONFIG_IFEF_NOWINDOWSCALE
 int tcp_obey_ifef_nowindowscale = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW | CTLFLAG_LOCKED,
        &tcp_obey_ifef_nowindowscale, 0, "");
 #endif
+/* This limit will determine when the receive socket buffer tuning will
+ * kick in. Currently it will start when the bw*delay measured in 
+ * last RTT is more than half of the current hiwat on the buffer.
+ */
+uint32_t tcp_rbuf_hiwat_shift = 1;
+
+/* This limit will determine when the socket buffer will be increased
+ * to accommodate an application reading slowly. When the amount of 
+ * space left in the buffer is less than one forth of the bw*delay
+ * measured in last RTT.
+ */
+uint32_t tcp_rbuf_win_shift = 2;
 
 extern int tcp_TCPTV_MIN;
 extern int tcp_acc_iaj_high;
@@ -258,13 +300,23 @@ static inline int tcp_stretch_ack_enable(struct tcpcb *tp);
 #if TRAFFIC_MGT
 static inline void update_iaj_state(struct tcpcb *tp, uint32_t tlen, int reset_size);
 void compute_iaj(struct tcpcb *tp);
-static inline void clear_iaj_state(struct tcpcb *tp);
 #endif /* TRAFFIC_MGT */
 
 #if INET6
 static inline unsigned int tcp_maxmtu6(struct rtentry *);
 #endif
 
+static void tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sb, 
+       struct tcpopt *to, u_int32_t tlen);
+
+void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
+static void tcp_sbsnd_trim(struct sockbuf *sbsnd);
+static inline void tcp_sbrcv_tstmp_check(struct tcpcb *tp);
+static inline void tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sb,
+       u_int32_t newsize, u_int32_t idealsize);
+
+#define TCPTV_RCVNOTS_QUANTUM 100
+#define TCP_RCVNOTS_BYTELEVEL 204800
 /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
 #if INET6
 #define ND6_HINT(tp) \
@@ -284,8 +336,6 @@ extern void postevent(struct socket *, struct sockbuf *, int);
 extern  void    ipfwsyslog( int level, const char *format,...);
 extern int ChkAddressOK( __uint32_t dstaddr, __uint32_t srcaddr );
 extern int fw_verbose;
-__private_extern__ int tcp_sockthreshold;
-__private_extern__ int tcp_win_scale;
 
 #if IPFIREWALL
 #define log_in_vain_log( a ) {            \
@@ -301,6 +351,8 @@ __private_extern__ int tcp_win_scale;
 int tcp_rcvunackwin = TCPTV_UNACKWIN;
 int tcp_maxrcvidle = TCPTV_MAXRCVIDLE;
 int tcp_rcvsspktcnt = TCP_RCV_SS_PKTCOUNT;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rcvsspktcnt, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &tcp_rcvsspktcnt, 0, "packets to be seen before receiver stretches acks");
 
 #define DELAY_ACK(tp, th) (CC_ALGO(tp)->delay_ack != NULL && CC_ALGO(tp)->delay_ack(tp, th))
 
@@ -312,6 +364,7 @@ uint32_t get_base_rtt(struct tcpcb *tp);
 void tcp_set_background_cc(struct socket *so);
 void tcp_set_foreground_cc(struct socket *so);
 static void tcp_set_new_cc(struct socket *so, uint16_t cc_index);
+static void tcp_bwmeas_check(struct tcpcb *tp);
 
 #if TRAFFIC_MGT
 void
@@ -319,7 +372,7 @@ reset_acc_iaj(struct tcpcb *tp)
 {
        tp->acc_iaj = 0;
        tp->iaj_rwintop = 0;
-       clear_iaj_state(tp);
+       CLEAR_IAJ_STATE(tp);
 }
 
 static inline void
@@ -334,12 +387,6 @@ update_iaj_state(struct tcpcb *tp, uint32_t size, int rst_size)
        }
 }
 
-static inline void
-clear_iaj_state(struct tcpcb *tp)
-{
-       tp->iaj_rcv_ts = 0;
-}
-
 /* For every 32 bit unsigned integer(v), this function will find the 
  * largest integer n such that (n*n <= v). This takes at most 16 iterations 
  * irrespective of the value of v and does not involve multiplications. 
@@ -433,6 +480,34 @@ compute_iaj(struct tcpcb *tp)
 }
 #endif /* TRAFFIC_MGT */
 
+/* Check if enough amount of data has been acknowledged since 
+ * bw measurement was started
+ */
+static void
+tcp_bwmeas_check(struct tcpcb *tp)
+{
+       int32_t bw_meas_bytes;
+       uint32_t bw, bytes, elapsed_time;
+       bw_meas_bytes = tp->snd_una - tp->t_bwmeas->bw_start;
+       if ((tp->t_flagsext & TF_BWMEAS_INPROGRESS) != 0 &&
+           bw_meas_bytes >= (int32_t)(tp->t_bwmeas->bw_size)) {
+               bytes = bw_meas_bytes;
+               elapsed_time = tcp_now - tp->t_bwmeas->bw_ts;
+               if (elapsed_time > 0) {
+                       bw = bytes / elapsed_time;
+                       if ( bw > 0) {
+                               if (tp->t_bwmeas->bw_sndbw > 0) {
+                                       tp->t_bwmeas->bw_sndbw = 
+                                           (((tp->t_bwmeas->bw_sndbw << 3) - tp->t_bwmeas->bw_sndbw) + bw) >> 3;
+                               } else {
+                                       tp->t_bwmeas->bw_sndbw = bw;
+                               }
+                       }
+               }
+               tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS);
+       }
+}
+
 static int
 tcp_reass(tp, th, tlenp, m)
        register struct tcpcb *tp;
@@ -596,8 +671,16 @@ present:
        if (!TCPS_HAVEESTABLISHED(tp->t_state))
                return (0);
        q = LIST_FIRST(&tp->t_segq);
-       if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+       if (!q || q->tqe_th->th_seq != tp->rcv_nxt) {
+               /* Stop using LRO once out of order packets arrive */
+               if (tp->t_flagsext & TF_LRO_OFFLOADED) {
+                       tcp_lro_remove_state(tp->t_inpcb->inp_laddr,
+                               tp->t_inpcb->inp_faddr,
+                               th->th_dport, th->th_sport);
+                       tp->t_flagsext &= ~TF_LRO_OFFLOADED;    
+               }
                return (0);
+       }       
        do {
                tp->rcv_nxt += q->tqe_len;
                flags = q->tqe_th->th_flags & TH_FIN;
@@ -609,6 +692,11 @@ present:
                        so_recv_data_stat(so, q->tqe_m, 0); /* XXXX */
                        if (sbappendstream(&so->so_rcv, q->tqe_m))
                                dowakeup = 1;
+                       if (tp->t_flagsext & TF_LRO_OFFLOADED) {        
+                               tcp_update_lro_seq(tp->rcv_nxt, 
+                                tp->t_inpcb->inp_laddr,
+                                tp->t_inpcb->inp_faddr, th->th_dport, th->th_sport);
+                       }
                }
                zfree(tcp_reass_zone, q);
                tcp_reass_qsize--;
@@ -645,7 +733,7 @@ present:
  */
 static void
 tcp_reduce_congestion_window(
-       struct tcpcb    *tp, struct tcphdr *th)
+       struct tcpcb    *tp)
 {
        /*
         * If the current tcp cc module has
@@ -653,7 +741,7 @@ tcp_reduce_congestion_window(
         * before entering FR, call it
         */
        if (CC_ALGO(tp)->pre_fr != NULL)
-               CC_ALGO(tp)->pre_fr(tp, th);
+               CC_ALGO(tp)->pre_fr(tp);
        ENTER_FASTRECOVERY(tp);
        tp->snd_recover = tp->snd_max;
        tp->t_timer[TCPT_REXMT] = 0;
@@ -675,9 +763,13 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
 #pragma unused(proto)
        register struct mbuf *m = *mp;
        struct in6_ifaddr *ia6;
+       struct ifnet *ifp = ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) ? m->m_pkthdr.rcvif: NULL;
 
        IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), return IPPROTO_DONE);
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        /*
         * draft-itojun-ipv6-tcp-to-anycast
         * better place to put this in?
@@ -694,6 +786,10 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
                        icmp6_error(m, ICMP6_DST_UNREACH,
                            ICMP6_DST_UNREACH_ADDR,
                            (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
+                               
+                       if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->icmp6unreach, 1);
+                               
                        return (IPPROTO_DONE);
                }
                IFA_UNLOCK(&ia6->ia_ifa);
@@ -705,6 +801,238 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
 }
 #endif
 
+/* Depending on the usage of mbuf space in the system, this function
+ * will return true or false. This is used to determine if a socket 
+ * buffer can take more memory from the system for auto-tuning or not.
+ */
+u_int8_t
+tcp_cansbgrow(struct sockbuf *sb)
+{
+       /* Calculate the host level space limit in terms of MSIZE buffers.
+        * We can use a maximum of half of the available mbuf space for
+        * socket buffers. 
+        */
+       u_int32_t mblim = ((nmbclusters >> 1) << (MCLSHIFT - MSIZESHIFT));
+
+       /* Calculate per sb limit in terms of bytes. We optimize this limit
+        * for upto 16 socket buffers.
+        */
+
+       u_int32_t sbspacelim = ((nmbclusters >> 4) << MCLSHIFT);
+
+       if ((total_sbmb_cnt < mblim) &&
+               (sb->sb_hiwat < sbspacelim)) {
+               return(1);
+       }
+       return(0);
+}
+
+void
+tcp_sbrcv_reserve(struct tcpcb *tp,
+       struct sockbuf *sbrcv,
+       u_int32_t newsize,
+       u_int32_t idealsize) {
+
+       /* newsize should not exceed max */
+       newsize = min(newsize, tcp_autorcvbuf_max);
+
+       /* The receive window scale negotiated at the 
+        * beginning of the connection will also set a 
+        * limit on the socket buffer size
+        */
+       newsize = min(newsize, TCP_MAXWIN << tp->rcv_scale);
+
+       /* Set new socket buffer size */
+       if (newsize > sbrcv->sb_hiwat &&
+               (sbreserve(sbrcv, newsize) == 1)) {
+               sbrcv->sb_idealsize = min(max(sbrcv->sb_idealsize, 
+                       (idealsize != 0) ? idealsize : newsize), 
+                       tcp_autorcvbuf_max);
+
+               /* Again check the limit set by the advertised 
+                * window scale 
+                */
+               sbrcv->sb_idealsize = min(sbrcv->sb_idealsize, 
+                       TCP_MAXWIN << tp->rcv_scale);
+       }
+}
+
+/* 
+ * This function is used to grow  a receive socket buffer. It
+ * will take into account system-level memory usage and the
+ * bandwidth available on the link to make a decision.
+ */
+static void
+tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, 
+       struct tcpopt *to, u_int32_t pktlen) {
+       
+       if (tcp_do_autorcvbuf == 0 ||
+               (sbrcv->sb_flags & SB_AUTOSIZE) == 0 ||
+               tcp_cansbgrow(sbrcv) == 0 ||
+               sbrcv->sb_hiwat >= tcp_autorcvbuf_max) {
+               /* Can not resize the socket buffer, just return */
+               goto out;
+       }
+
+       if (TSTMP_GT(tcp_now,
+               tp->rfbuf_ts + TCPTV_RCVBUFIDLE)) {
+               /* If there has been an idle period in the
+                * connection, just restart the measurement
+                */
+               goto out;
+       }
+
+       if ((tp->t_flags & (TF_REQ_TSTMP | TF_RCVD_TSTMP)) !=
+               (TF_REQ_TSTMP | TF_RCVD_TSTMP)) {
+               /*
+                * Timestamp option is not supported on this connection.
+                * If the connection reached a state to indicate that
+                * the receive socket buffer needs to grow, increase
+                * the high water mark. 
+                */ 
+               if (TSTMP_GEQ(tcp_now, 
+                       tp->rfbuf_ts + TCPTV_RCVNOTS_QUANTUM)) {
+                       if (tp->rfbuf_cnt >= TCP_RCVNOTS_BYTELEVEL) {
+                               tcp_sbrcv_reserve(tp, sbrcv,
+                                       tcp_autorcvbuf_max, 0);
+                       }
+                       goto out;
+               } else {
+                       tp->rfbuf_cnt += pktlen;
+                       return;
+               }       
+       } else if (to->to_tsecr != 0) {
+               /* If the timestamp shows that one RTT has
+                * completed, we can stop counting the
+                * bytes. Here we consider increasing
+                * the socket buffer if it fits the following 
+                * criteria: 
+                * 1. the bandwidth measured in last rtt, is more 
+                * than half of sb_hiwat, this will help to scale the
+                * buffer according to the bandwidth on the link.
+                * 2. the space left in sbrcv is less than 
+                * one forth of the bandwidth measured in last rtt, this
+                * will help to accommodate an application reading slowly.
+                */
+               if (TSTMP_GEQ(to->to_tsecr, tp->rfbuf_ts)) {
+                       if ((tp->rfbuf_cnt > (sbrcv->sb_hiwat -
+                               (sbrcv->sb_hiwat >> tcp_rbuf_hiwat_shift)) ||
+                               (sbrcv->sb_hiwat - sbrcv->sb_cc) <
+                               (tp->rfbuf_cnt >> tcp_rbuf_win_shift))) {
+                               u_int32_t rcvbuf_inc; 
+                               /*
+                                * Increment the receive window by a multiple of
+                                * maximum sized segments. This will prevent a 
+                                * connection from sending smaller segments on 
+                                * wire if it is limited by the receive window.
+                                *
+                                * Set the ideal size based on current bandwidth 
+                                * measurements. We set the ideal size on receive 
+                                * socket buffer to be twice the bandwidth delay 
+                                * product.
+                                */
+                               rcvbuf_inc = tp->t_maxseg << tcp_autorcvbuf_inc_shift;
+                               tcp_sbrcv_reserve(tp, sbrcv,
+                                       sbrcv->sb_hiwat + rcvbuf_inc, 
+                                       (tp->rfbuf_cnt * 2));
+                       }
+                       goto out;
+               } else {
+                       tp->rfbuf_cnt += pktlen;
+                       return;
+               }
+       }
+out:
+       /* Restart the measurement */
+       tp->rfbuf_ts = 0;
+       tp->rfbuf_cnt = 0;
+       return;
+}
+
+/* This function will trim the excess space added to the socket buffer
+ * to help a slow-reading app. The ideal-size of a socket buffer depends
+ * on the link bandwidth or it is set by an application and we aim to 
+ * reach that size.
+ */
+void
+tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sbrcv) {
+       if (tcp_do_autorcvbuf == 1 && sbrcv->sb_idealsize > 0 &&
+               sbrcv->sb_hiwat > sbrcv->sb_idealsize) {
+               int32_t trim;
+               /* compute the difference between ideal and current sizes */
+               u_int32_t diff = sbrcv->sb_hiwat - sbrcv->sb_idealsize;
+
+               /* Compute the maximum advertised window for
+                * this connection.
+                */
+               u_int32_t advwin = tp->rcv_adv - tp->rcv_nxt;
+               
+               /* How much can we trim the receive socket buffer?
+                * 1. it can not be trimmed beyond the max rcv win advertised
+                * 2. if possible, leave 1/16 of bandwidth*delay to 
+                * avoid closing the win completely
+                */
+               u_int32_t leave = max(advwin, (sbrcv->sb_idealsize >> 4));
+
+               /* Sometimes leave can be zero, in that case leave at least
+                * a few segments worth of space.
+                */
+               if (leave == 0)
+                       leave = tp->t_maxseg << tcp_autorcvbuf_inc_shift;
+               
+               trim = sbrcv->sb_hiwat - (sbrcv->sb_cc + leave);
+               trim = imin(trim, (int32_t)diff);
+
+               if (trim > 0)
+                       sbreserve(sbrcv, (sbrcv->sb_hiwat - trim));
+       }
+}
+
+/* We may need to trim the send socket buffer size for two reasons:
+ * 1. if the rtt seen on the connection is climbing up, we do not
+ * want to fill the buffers any more.
+ * 2. if the congestion win on the socket backed off, there is no need
+ * to hold more mbufs for that connection than what the cwnd will allow.
+ */
+void
+tcp_sbsnd_trim(struct sockbuf *sbsnd) {
+       if (tcp_do_autosendbuf == 1 && 
+               ((sbsnd->sb_flags & (SB_AUTOSIZE | SB_TRIM)) == 
+                       (SB_AUTOSIZE | SB_TRIM)) &&
+               (sbsnd->sb_idealsize > 0) &&
+               (sbsnd->sb_hiwat > sbsnd->sb_idealsize)) {
+               u_int32_t trim = 0;
+               if (sbsnd->sb_cc <= sbsnd->sb_idealsize) {
+                       trim = sbsnd->sb_hiwat - sbsnd->sb_idealsize;
+               } else {
+                       trim = sbsnd->sb_hiwat - sbsnd->sb_cc;
+               }
+               sbreserve(sbsnd, (sbsnd->sb_hiwat - trim));
+       }
+       if (sbsnd->sb_hiwat <= sbsnd->sb_idealsize)
+               sbsnd->sb_flags &= ~(SB_TRIM);
+}
+
+/* 
+ * If timestamp option was not negotiated on this connection
+ * and this connection is on the receiving side of a stream
+ * then we can not measure the delay on the link accurately.
+ * Instead of enabling automatic receive socket buffer
+ * resizing, just give more space to the receive socket buffer.
+ */
+static inline void 
+tcp_sbrcv_tstmp_check(struct tcpcb *tp) {
+       struct socket *so = tp->t_inpcb->inp_socket;
+       u_int32_t newsize = 2 * tcp_recvspace;
+       struct sockbuf *sbrcv = &so->so_rcv;
+
+       if ((tp->t_flags & (TF_REQ_TSTMP | TF_RCVD_TSTMP)) !=
+               (TF_REQ_TSTMP | TF_RCVD_TSTMP) &&
+               (sbrcv->sb_flags & SB_AUTOSIZE) != 0) {
+               tcp_sbrcv_reserve(tp, sbrcv, newsize, 0);
+       }
+}
+
 /* A receiver will evaluate the flow of packets on a connection 
  * to see if it can reduce ack traffic. The receiver will start 
  * stretching acks if all of the following conditions are met:
@@ -732,6 +1060,7 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
                (tp->rcv_waitforss >= tcp_rcvsspktcnt))) {
                return(1);
        }
+                
        return(0);
 }
 
@@ -770,8 +1099,7 @@ tcp_input(m, off0)
        struct in6_addr laddr6;
 #endif
        int dropsocket = 0;
-       int iss = 0;
-       int nosock = 0;
+       int iss = 0, nosock = 0; 
        u_int32_t tiwin;
        struct tcpopt to;               /* options in this segment */
        struct sockaddr_in *next_hop = NULL;
@@ -782,24 +1110,20 @@ tcp_input(m, off0)
        u_char ip_ecn = IPTOS_ECN_NOTECT;
        unsigned int ifscope, nocell = 0;
        uint8_t isconnected, isdisconnected;
+       struct ifnet *ifp = ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) ? m->m_pkthdr.rcvif: NULL;
+       int nlropkts = m->m_pkthdr.lro_npkts;
+       int mauxf_sw_lro_pkt = (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) ? 1 : 0;
+       int turnoff_lro = 0;
+#define TCP_INC_VAR(stat, npkts) do {                  \
+       if (mauxf_sw_lro_pkt) { \
+               stat += npkts;                                  \
+       } else {                                                \
+               stat++;                                         \
+       }                                                       \
+} while (0)
 
-       /*
-        * Record the interface where this segment arrived on; this does not
-        * affect normal data output (for non-detached TCP) as it provides a
-        * hint about which route and interface to use for sending in the
-        * absence of a PCB, when scoped routing (and thus source interface
-        * selection) are enabled.
-        */
-       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
-               ifscope = m->m_pkthdr.rcvif->if_index;
-       else
-               ifscope = IFSCOPE_NONE;
-
-        /* Since this is an entry point for input processing of tcp packets, we
-         * can update the tcp clock here.
-         */
-        calculate_tcp_clock();
-
+       TCP_INC_VAR(tcpstat.tcps_rcvtotal, nlropkts);
+       
        /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
        if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
                fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
@@ -828,34 +1152,51 @@ tcp_input(m, off0)
 #endif
        bzero((char *)&to, sizeof(to));
 
-       tcpstat.tcps_rcvtotal++;
-
-
-
 #if INET6
        if (isipv6) {
+               /* Expect 32-bit aligned data pointer on strict-align platforms */
+               MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
                /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */
                ip6 = mtod(m, struct ip6_hdr *);
                tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
-               th = (struct tcphdr *)((caddr_t)ip6 + off0);
+               th = (struct tcphdr *)(void *)((caddr_t)ip6 + off0);
 
                if ((apple_hwcksum_rx != 0) && (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) {
                        if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
                                th->th_sum = m->m_pkthdr.csum_data;
-                       else
-                               th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
-                                       &ip6->ip6_dst, htonl(sizeof(struct tcphdr)),
-                                       htonl(IPPROTO_TCP));
+                       else {
+                               /* 
+                                * There is no established protocol for the case 
+                                * where IPv6 psuedoheader checksum is not computed
+                                * with our current drivers. Current drivers set
+                                * CSUM_PSEUDO_HDR. So if we do get here, we should
+                                * recalculate checksum.
+                                */
+                               if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
+                                       th->th_sum = 0;
+                               } else {
+                                       th->th_sum = 0xffff;
+                               }
+                       }
 
                        th->th_sum ^= 0xffff;
                        if (th->th_sum) {
                                tcpstat.tcps_rcvbadsum++;
+                               
+                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->badformat, 1);
+                               
                                goto dropnosock;
                        }
                }
                else {
                        if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
                                tcpstat.tcps_rcvbadsum++;
+                               
+                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->badformat, 1);
+                                       
                                goto dropnosock;
                        }
                }
@@ -873,6 +1214,10 @@ tcp_input(m, off0)
                 */
                if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
                        /* XXX stat */
+                       
+                       if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                               atomic_add_64(&ifp->if_tcp_stat->unspecv6, 1);
+                       
                        goto dropnosock;
                }
                DTRACE_TCP5(receive, sruct mbuf *, m, struct inpcb *, NULL,
@@ -900,39 +1245,32 @@ tcp_input(m, off0)
                        return;
                }
        }
+
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip = mtod(m, struct ip *);
        ipov = (struct ipovly *)ip;
-       th = (struct tcphdr *)((caddr_t)ip + off0);
+       th = (struct tcphdr *)(void *)((caddr_t)ip + off0);
        tlen = ip->ip_len;
 
-       DTRACE_TCP5(receive, struct mbuf *, m, struct inpcb *, NULL,
-               struct ip *, ip, struct tcpcb *, NULL, struct tcphdr *, th);
-
-       KERNEL_DEBUG(DBG_LAYER_BEG, ((th->th_dport << 16) | th->th_sport),
-                    (((ip->ip_src.s_addr & 0xffff) << 16) | (ip->ip_dst.s_addr & 0xffff)),
-                    th->th_seq, th->th_ack, th->th_win);
-
+       if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_DID_CSUM) {
+               goto skip_checksum;
+       }
        if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
                if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) {
                        u_short pseudo;
                        char b[9];
-                       *(uint32_t*)&b[0] = *(uint32_t*)&ipov->ih_x1[0];
-                       *(uint32_t*)&b[4] = *(uint32_t*)&ipov->ih_x1[4];
-                       *(uint8_t*)&b[8] = *(uint8_t*)&ipov->ih_x1[8];
-                       
-                       bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
-                       ipov->ih_len = (u_short)tlen;
 
+                       bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1));
+                       bzero(ipov->ih_x1, sizeof (ipov->ih_x1));
+                       ipov->ih_len = (u_short)tlen;
 #if BYTE_ORDER != BIG_ENDIAN
                        HTONS(ipov->ih_len);
 #endif
-
                        pseudo = in_cksum(m, sizeof (struct ip));
-                       
-                       *(uint32_t*)&ipov->ih_x1[0] = *(uint32_t*)&b[0];
-                       *(uint32_t*)&ipov->ih_x1[4] = *(uint32_t*)&b[4];
-                       *(uint8_t*)&ipov->ih_x1[8] = *(uint8_t*)&b[8];
-                       
+                       bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1));
+
                        th->th_sum = in_addword(pseudo, (m->m_pkthdr.csum_data & 0xFFFF));
                } else {
                        if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
@@ -948,35 +1286,40 @@ tcp_input(m, off0)
                /*
                 * Checksum extended TCP header and data.
                 */
-               *(uint32_t*)&b[0] = *(uint32_t*)&ipov->ih_x1[0];
-               *(uint32_t*)&b[4] = *(uint32_t*)&ipov->ih_x1[4];
-               *(uint8_t*)&b[8] = *(uint8_t*)&ipov->ih_x1[8];
-               
-               len = sizeof (struct ip) + tlen;
-               bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
+               bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1));
+               bzero(ipov->ih_x1, sizeof (ipov->ih_x1));
                ipov->ih_len = (u_short)tlen;
-
 #if BYTE_ORDER != BIG_ENDIAN
                HTONS(ipov->ih_len);
 #endif
-
+               len = sizeof (struct ip) + tlen;
                th->th_sum = in_cksum(m, len);
-               
-               *(uint32_t*)&ipov->ih_x1[0] = *(uint32_t*)&b[0];
-               *(uint32_t*)&ipov->ih_x1[4] = *(uint32_t*)&b[4];
-               *(uint8_t*)&ipov->ih_x1[8] = *(uint8_t*)&b[8];
+               bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1));
 
                tcp_in_cksum_stats(len);
        }
        if (th->th_sum) {
                tcpstat.tcps_rcvbadsum++;
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->badformat, 1);
+               if (lrodebug) printf("tcp_input: bad xsum len = %d, tlen = %d, flags = %x, csum_flags = %x.\n",len, tlen, m->m_flags, m->m_pkthdr.csum_flags);                          
                goto dropnosock;
        }
+skip_checksum:
 #if INET6
        /* Re-initialization for later version check */
        ip->ip_v = IPVERSION;
 #endif
        ip_ecn = (ip->ip_tos & IPTOS_ECN_MASK);
+
+       DTRACE_TCP5(receive, struct mbuf *, m, struct inpcb *, NULL,
+               struct ip *, ip, struct tcpcb *, NULL, struct tcphdr *, th);
+       
+       KERNEL_DEBUG(DBG_LAYER_BEG, ((th->th_dport << 16) | th->th_sport),
+               (((ip->ip_src.s_addr & 0xffff) << 16) | (ip->ip_dst.s_addr & 0xffff)),
+                 th->th_seq, th->th_ack, th->th_win);
+
        }
 
        /*
@@ -986,6 +1329,10 @@ tcp_input(m, off0)
        off = th->th_off << 2;
        if (off < sizeof (struct tcphdr) || off > tlen) {
                tcpstat.tcps_rcvbadoff++;
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->badformat, 1);
+               
                goto dropnosock;
        }
        tlen -= off;    /* tlen is used instead of ti->ti_len */
@@ -994,7 +1341,7 @@ tcp_input(m, off0)
                if (isipv6) {
                        IP6_EXTHDR_CHECK(m, off0, off, return);
                        ip6 = mtod(m, struct ip6_hdr *);
-                       th = (struct tcphdr *)((caddr_t)ip6 + off0);
+                       th = (struct tcphdr *)(void *)((caddr_t)ip6 + off0);
                } else
 #endif /* INET6 */
                {
@@ -1005,7 +1352,7 @@ tcp_input(m, off0)
                                }
                                ip = mtod(m, struct ip *);
                                ipov = (struct ipovly *)ip;
-                               th = (struct tcphdr *)((caddr_t)ip + off0);
+                               th = (struct tcphdr *)(void *)((caddr_t)ip + off0);
                        }
                }
                optlen = off - sizeof (struct tcphdr);
@@ -1020,11 +1367,11 @@ tcp_input(m, off0)
                if ((optlen == TCPOLEN_TSTAMP_APPA ||
                        (optlen > TCPOLEN_TSTAMP_APPA &&
                        optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
-                       *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+                       *(u_int32_t *)(void *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
                        (th->th_flags & TH_SYN) == 0) {
                        to.to_flags |= TOF_TS;
-                       to.to_tsval = ntohl(*(u_int32_t *)(optp + 4));
-                       to.to_tsecr = ntohl(*(u_int32_t *)(optp + 8));
+                       to.to_tsval = ntohl(*(u_int32_t *)(void *)(optp + 4));
+                       to.to_tsecr = ntohl(*(u_int32_t *)(void *)(optp + 8));
                        optp = NULL;    /* we've parsed the options */
                }
        }
@@ -1038,19 +1385,13 @@ tcp_input(m, off0)
         *
         * This is a violation of the TCP specification.
         */
-       if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN))
+       if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN)) {
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->synfin, 1);
+                       
                goto dropnosock;
-#endif
-
-       /*
-        * Convert TCP protocol specific fields to host format.
-        */
-
-#if BYTE_ORDER != BIG_ENDIAN
-       NTOHL(th->th_seq);
-       NTOHL(th->th_ack);
-       NTOHS(th->th_win);
-       NTOHS(th->th_urp);
+       }
 #endif
 
        /*
@@ -1062,6 +1403,34 @@ tcp_input(m, off0)
         * parameters to be unchanged.
         */
        drop_hdrlen = off0 + off;
+       
+       /* Since this is an entry point for input processing of tcp packets, we
+        * can update the tcp clock here.
+        */
+       calculate_tcp_clock();
+
+       /*
+        * Record the interface where this segment arrived on; this does not
+        * affect normal data output (for non-detached TCP) as it provides a
+        * hint about which route and interface to use for sending in the
+        * absence of a PCB, when scoped routing (and thus source interface
+        * selection) are enabled.
+        */
+       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
+               ifscope = m->m_pkthdr.rcvif->if_index;
+       else
+               ifscope = IFSCOPE_NONE;
+
+       /*
+        * Convert TCP protocol specific fields to host format.
+        */
+
+#if BYTE_ORDER != BIG_ENDIAN
+       NTOHL(th->th_seq);
+       NTOHL(th->th_ack);
+       NTOHS(th->th_win);
+       NTOHS(th->th_urp);
+#endif
 
        /*
         * Locate pcb for segment.
@@ -1120,7 +1489,8 @@ findpcb:
         * the segment arrived on.
         */
        if (inp != NULL && (inp->inp_flags & INP_BOUND_IF))
-               ifscope = inp->inp_boundif;
+               ifscope = inp->inp_boundifp->if_index;
+
        /*
         * If the PCB is present and the socket isn't allowed to use
         * the cellular interface, indicate it as such for tcp_respond.
@@ -1136,6 +1506,10 @@ findpcb:
                                IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio);
                                if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) 
                                        inp = NULL;     // pretend we didn't find it 
+                               
+                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->badformatipsec, 1);    
+                                       
                                goto dropnosock;
                        }
                } else
@@ -1144,6 +1518,10 @@ findpcb:
                                IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
                                if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) 
                                        inp = NULL;     // pretend we didn't find it 
+                               
+                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->badformatipsec, 1);    
+                                       
                                goto dropnosock;
                        }
        }
@@ -1189,7 +1567,7 @@ findpcb:
                                        ntohs(th->th_sport), thflags);
                                break;
                        case 3:
-                               if ((thflags & TH_SYN) &&
+                               if ((thflags & TH_SYN) && !(thflags & TH_ACK) &&
                                        !(m->m_flags & (M_BCAST | M_MCAST)) &&
 #if INET6
                                        ((isipv6 && !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) ||
@@ -1210,6 +1588,7 @@ findpcb:
                }
                if (blackhole) { 
                        if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type != IFT_LOOP)
+                               
                                switch (blackhole) {
                                case 1:
                                        if (thflags & TH_SYN)
@@ -1222,6 +1601,10 @@ findpcb:
                                }
                }
                rstreason = BANDLIM_RST_CLOSEDPORT;
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->noconnnolist, 1);
+
                goto dropwithresetnosock;
        }
        so = inp->inp_socket;
@@ -1248,6 +1631,10 @@ findpcb:
        tp = intotcpcb(inp);
        if (tp == 0) {
                rstreason = BANDLIM_RST_CLOSEDPORT;
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->noconnlist, 1);
+
                goto dropwithreset;
        }
        if (tp->t_state == TCPS_CLOSED)
@@ -1290,14 +1677,16 @@ findpcb:
 #if INET6
                        struct inpcb *oinp = sotoinpcb(so);
 #endif /* INET6 */
-                       unsigned int head_ifscope;
-                       unsigned int head_nocell;
+                       struct ifnet *head_ifscope;
+                       unsigned int head_nocell, head_recvanyif;
 
                        /* Get listener's bound-to-interface, if any */
                        head_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
-                           inp->inp_boundif : IFSCOPE_NONE;
+                           inp->inp_boundifp : NULL;
                        /* Get listener's no-cellular information, if any */
                        head_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+                       /* Get listener's recv-any-interface, if any */
+                       head_recvanyif = (inp->inp_flags & INP_RECV_ANYIF);
 
                        /*
                         * If the state is LISTEN then ignore segment if it contains an RST.
@@ -1306,6 +1695,10 @@ findpcb:
                         * If it is from this socket, drop it, it must be forged.
                         */
                        if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
+                               
+                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->listbadsyn, 1);
+                       
                                if (thflags & TH_RST) {
                                        goto drop;
                                }
@@ -1398,6 +1791,10 @@ findpcb:
                                                IFA_REMREF(&ia6->ia_ifa);
                                                tp = NULL;
                                                rstreason = BANDLIM_RST_OPENPORT;
+                                               
+                                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                                       atomic_add_64(&ifp->if_tcp_stat->deprecate6, 1);
+                                                       
                                                goto dropwithreset;
                                        }
                                        IFA_UNLOCK(&ia6->ia_ifa);
@@ -1466,13 +1863,15 @@ findpcb:
                        dropsocket++;
                        /*
                         * Inherit INP_BOUND_IF from listener; testing if
-                        * head_ifscope is non-zero is sufficient, since it
+                        * head_ifscope is non-NULL is sufficient, since it
                         * can only be set to a non-zero value earlier if
                         * the listener has such a flag set.
                         */
-                       if (head_ifscope != IFSCOPE_NONE) {
+                       if (head_ifscope != NULL) {
                                inp->inp_flags |= INP_BOUND_IF;
-                               inp->inp_boundif = head_ifscope;
+                               inp->inp_boundifp = head_ifscope;
+                       } else {
+                               inp->inp_flags &= ~INP_BOUND_IF;
                        }
                        /*
                         * Inherit INP_NO_IFT_CELLULAR from listener.
@@ -1480,6 +1879,13 @@ findpcb:
                        if (head_nocell) {
                                inp->inp_flags |= INP_NO_IFT_CELLULAR;
                        }
+                       /*
+                        * Inherit {IN,IN6}_RECV_ANYIF from listener.
+                        */
+                       if (head_recvanyif)
+                               inp->inp_flags |= INP_RECV_ANYIF;
+                       else
+                               inp->inp_flags &= ~INP_RECV_ANYIF;
 #if INET6
                        if (isipv6)
                                inp->in6p_laddr = ip6->ip6_dst;
@@ -1502,7 +1908,7 @@ findpcb:
                                        inp->in6p_laddr = in6addr_any;
                                else
 #endif /* INET6 */
-                               inp->inp_laddr.s_addr = INADDR_ANY;
+                                       inp->inp_laddr.s_addr = INADDR_ANY;
                                inp->inp_lport = 0;
                                tcp_lock(oso, 0, 0);    /* release ref on parent */
                                tcp_unlock(oso, 1, 0);
@@ -1531,7 +1937,7 @@ findpcb:
                                                                M_NOWAIT);
                        } else
 #endif /* INET6 */
-                       inp->inp_options = ip_srcroute();
+                               inp->inp_options = ip_srcroute();
                        tcp_lock(oso, 0, 0);
 #if IPSEC
                        /* copy old policy into new socket's */
@@ -1553,21 +1959,13 @@ findpcb:
                        tp->t_flagsext |= (tp0->t_flagsext & TF_RXTFINDROP);
                        tp->t_keepinit = tp0->t_keepinit;
                        tp->t_inpcb->inp_ip_ttl = tp0->t_inpcb->inp_ip_ttl;
+                       if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0)
+                               tp->t_notsent_lowat = tp0->t_notsent_lowat;
 
                        /* now drop the reference on the listener */
                        tcp_unlock(oso, 1, 0);
 
-                       /* Compute proper scaling value from buffer space */
-                       if (inp->inp_pcbinfo->ipi_count < tcp_sockthreshold) {
-                               tp->request_r_scale = max(tcp_win_scale, tp->request_r_scale);
-                               so->so_rcv.sb_hiwat = imin(TCP_MAXWIN << tp->request_r_scale, (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES);  
-                       }
-                       else {
-                               while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
-                               TCP_MAXWIN << tp->request_r_scale <
-                               so->so_rcv.sb_hiwat)
-                                       tp->request_r_scale++;
-                       }
+                       tcp_set_max_rwinscale(tp, so);
 
                        KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN | DBG_FUNC_END,0,0,0,0,0);
                }
@@ -1643,7 +2041,7 @@ findpcb:
                 * certain criteria defined in tcp_stretch_ack_enable function.
                 */
                if ((tp->t_flagsext & TF_RCVUNACK_WAITSS) != 0) {
-                       tp->rcv_waitforss++;
+                       TCP_INC_VAR(tp->rcv_waitforss, nlropkts);
                }
                if (tcp_stretch_ack_enable(tp)) {
                        tp->t_flags |= TF_STRETCHACK;
@@ -1659,7 +2057,13 @@ findpcb:
                        tp->rcv_by_unackwin = tlen + off;
                }
        }
-       
+
+       /* 
+        * Keep track of how many bytes were received in the LRO packet
+        */
+       if ((mauxf_sw_lro_pkt) && (nlropkts > 2))  {
+               tp->t_lropktlen += tlen;
+       }
        /*
           Explicit Congestion Notification - Flag that we need to send ECT if
                + The IP Congestion experienced flag was set.
@@ -1671,8 +2075,7 @@ findpcb:
                TE_SENDECE will be cleared when we receive a packet with TH_CWR set.
         */
        if (ip_ecn == IPTOS_ECN_CE && tp->t_state == TCPS_ESTABLISHED &&
-               (tp->ecn_flags & (TE_SETUPSENT | TE_SETUPRECEIVED)) ==
-                (TE_SETUPSENT | TE_SETUPRECEIVED) && tlen > 0 &&
+               ((tp->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON)) && tlen > 0 &&
                SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
                SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
                tp->ecn_flags |= TE_SENDECE;
@@ -1693,7 +2096,19 @@ findpcb:
        if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags & TF_STRETCHACK) != 0 &&
                ((ip_ecn == IPTOS_ECN_CE) || ((thflags & TH_CWR) == TH_CWR)))
                tcp_reset_stretch_ack(tp);
-       
+
+       /* 
+        * Try to determine if we are receiving a packet after a long time.
+        * Use our own approximation of idletime to roughly measure remote 
+        * end's idle time. Since slowstart is used after an idle period
+        * we want to avoid doing LRO if the remote end is not up to date
+        * on initial window support and starts with 1 or 2 packets as its IW.
+        */
+        if (sw_lro && (tp->t_flagsext & TF_LRO_OFFLOADED) &&
+               ((tcp_now - tp->t_rcvtime) >= (TCP_IDLETIMEOUT(tp)))) {
+               turnoff_lro = 1;
+        }
+
        /*
         * Segment received on connection.
         * Reset idle time and keep-alive timer.
@@ -1748,25 +2163,29 @@ findpcb:
             TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
            th->th_seq == tp->rcv_nxt &&
            LIST_EMPTY(&tp->t_segq)) {
+               int seg_size = tlen;
                if (tp->iaj_pktcnt <= IAJ_IGNORE_PKTCNT) {
-                       tp->iaj_pktcnt++;
+                       TCP_INC_VAR(tp->iaj_pktcnt, nlropkts);
                }
 
-               if ( tp->iaj_size == 0 || tlen > tp->iaj_size ||
-                       (tlen == tp->iaj_size && tp->iaj_rcv_ts == 0)) {
+               if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) {
+                       seg_size = m->m_pkthdr.lro_pktlen;
+               }
+               if ( tp->iaj_size == 0 || seg_size > tp->iaj_size ||
+                       (seg_size == tp->iaj_size && tp->iaj_rcv_ts == 0)) {
                        /* State related to inter-arrival jitter is uninitialized 
                         * or we are trying to find a good first packet to start 
                         * computing the metric
                         */
-                       update_iaj_state(tp, tlen, 0);
+                       update_iaj_state(tp, seg_size, 0);
                } else {
-                       if (tlen == tp->iaj_size) {
+                       if (seg_size == tp->iaj_size) {
                                /* Compute inter-arrival jitter taking this packet 
                                 * as the second packet
                                 */
                                compute_iaj(tp);
                        } 
-                       if (tlen  < tp->iaj_size) {
+                       if (seg_size  < tp->iaj_size) {
                                /* There is a smaller packet in the stream.
                                 * Some times the maximum size supported on a path can 
                                 * change if there is a new link with smaller MTU. 
@@ -1776,16 +2195,16 @@ findpcb:
                                 */
                                tp->iaj_small_pkt++;
                                if (tp->iaj_small_pkt > RESET_IAJ_SIZE_THRESH) {
-                                       update_iaj_state(tp, tlen, 1);
+                                       update_iaj_state(tp, seg_size, 1);
                                } else {
-                                       clear_iaj_state(tp);
+                                       CLEAR_IAJ_STATE(tp);
                                }
                        } else {
-                               update_iaj_state(tp, tlen, 0);
+                               update_iaj_state(tp, seg_size, 0);
                        }
                }
        } else {
-               clear_iaj_state(tp);
+               CLEAR_IAJ_STATE(tp);
        }
 #endif /* TRAFFIC_MGT */
 
@@ -1860,6 +2279,7 @@ findpcb:
                                        tp->t_badrxtwin = 0;
                                        tp->t_rxtshift = 0;
                                        tp->rxt_start = 0;
+                                       tcp_bad_rexmt_fix_sndbuf(tp);
                                        DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb,
                                                struct tcpcb *, tp, struct tcphdr *, th,
                                                int32_t, TCP_CC_BAD_REXMT_RECOVERY);
@@ -1875,9 +2295,9 @@ findpcb:
                                if (((to.to_flags & TOF_TS) != 0) && (to.to_tsecr != 0) &&
                                        TSTMP_GEQ(tcp_now, to.to_tsecr)) { 
                                        tcp_xmit_timer(tp,
-                                           tcp_now - to.to_tsecr);
+                                               tcp_now - to.to_tsecr);
                                } else if (tp->t_rtttime &&
-                                           SEQ_GT(th->th_ack, tp->t_rtseq)) {
+                                       SEQ_GT(th->th_ack, tp->t_rtseq)) {
                                        tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);
                                }
                                acked = th->th_ack - tp->snd_una;
@@ -1896,10 +2316,13 @@ findpcb:
                                        int32_t, TCP_CC_INSEQ_ACK_RCVD);
 
                                sbdrop(&so->so_snd, acked);
+                               tcp_sbsnd_trim(&so->so_snd);
+
                                if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
                                    SEQ_LEQ(th->th_ack, tp->snd_recover))
                                        tp->snd_recover = th->th_ack - 1;
                                tp->snd_una = th->th_ack;
+
                                /*
                                 * pull snd_wl2 up to prevent seq wrap relative
                                 * to th_ack.
@@ -1923,6 +2346,9 @@ findpcb:
                                else if (tp->t_timer[TCPT_PERSIST] == 0)
                                        tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 
+                               if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
+                                       tp->t_bwmeas != NULL)
+                                       tcp_bwmeas_check(tp);
                                sowwakeup(so); /* has to be done with socket lock held */
                                if ((so->so_snd.sb_cc) || (tp->t_flags & TF_ACKNOW)) {
                                        (void) tcp_output(tp);
@@ -1941,6 +2367,31 @@ findpcb:
                         * with nothing on the reassembly queue and
                         * we have enough buffer space to take it.
                         */
+
+                       /*
+                        * If this is a connection in steady state, start
+                        * coalescing packets belonging to this flow.
+                        */
+                       if (turnoff_lro) {
+                               tcp_lro_remove_state(tp->t_inpcb->inp_laddr,
+                                tp->t_inpcb->inp_faddr,
+                                tp->t_inpcb->inp_lport, 
+                                tp->t_inpcb->inp_fport);
+                               tp->t_flagsext &= ~TF_LRO_OFFLOADED;
+                               tp->t_idleat = tp->rcv_nxt;
+                       } else if (sw_lro && !mauxf_sw_lro_pkt && !isipv6 &&
+                           (so->so_flags & SOF_USELRO) &&      
+                           (m->m_pkthdr.rcvif->if_type != IFT_CELLULAR) &&
+                           (m->m_pkthdr.rcvif->if_type != IFT_LOOP) &&
+                           ((th->th_seq - tp->irs) > 
+                               (tp->t_maxseg << lro_start)) &&
+                           ((tp->t_idleat == 0) || ((th->th_seq - 
+                            tp->t_idleat) > (tp->t_maxseg << lro_start)))) {
+                               tp->t_flagsext |= TF_LRO_OFFLOADED;
+                               tcp_start_coalescing(ip, th, tlen);
+                               tp->t_idleat = 0;
+                       }
+
                        /* Clean receiver SACK report if present */
                        if (tp->sack_enable && tp->rcv_numsacks)
                                tcp_clean_sackreport(tp);
@@ -1956,13 +2407,21 @@ findpcb:
                         * rcv_nxt.
                         */
                        tp->rcv_up = tp->rcv_nxt;
-                       tcpstat.tcps_rcvpack++;
+                       TCP_INC_VAR(tcpstat.tcps_rcvpack, nlropkts);
                        tcpstat.tcps_rcvbyte += tlen;
                        if (nstat_collect) {
-                               locked_add_64(&inp->inp_stat->rxpackets, 1);
+                               if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) {
+                                       locked_add_64(&inp->inp_stat->rxpackets, m->m_pkthdr.lro_npkts);
+                               }
+                               else {
+                                       locked_add_64(&inp->inp_stat->rxpackets, 1);
+                               }
                                locked_add_64(&inp->inp_stat->rxbytes, tlen);
                        }
                        ND6_HINT(tp);   /* some progress has been done */
+
+                       tcp_sbrcv_grow(tp, &so->so_rcv, &to, tlen);
+                       
                        /*
                         * Add data to socket buffer.
                         */
@@ -1983,12 +2442,12 @@ findpcb:
                                        (((ip->ip_src.s_addr & 0xffff) << 16) | (ip->ip_dst.s_addr & 0xffff)),
                                        th->th_seq, th->th_ack, th->th_win); 
                        }
+                       TCP_INC_VAR(tp->t_unacksegs, nlropkts);
                        if (DELAY_ACK(tp, th))  {
                                if ((tp->t_flags & TF_DELACK) == 0) {
                                        tp->t_flags |= TF_DELACK;
                                        tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
                                }
-                               tp->t_unacksegs++;
                        } else {
                                tp->t_flags |= TF_ACKNOW;
                                tcp_output(tp);
@@ -2117,6 +2576,9 @@ findpcb:
                        tp->t_keepinit ? tp->t_keepinit : tcp_keepinit);
                dropsocket = 0;         /* committed to socket */
 
+               if (inp->inp_flowhash == 0)
+                       inp->inp_flowhash = inp_calc_flowhash(inp);
+
                /* reset the incomp processing flag */
                so->so_flags &= ~(SOF_INCOMP_INPROGRESS);
                tcpstat.tcps_accepts++;
@@ -2124,6 +2586,7 @@ findpcb:
                        /* ECN-setup SYN */
                        tp->ecn_flags |= (TE_SETUPRECEIVED | TE_SENDIPECT);
                }
+
 #if CONFIG_IFEF_NOWINDOWSCALE
                if (tcp_obey_ifef_nowindowscale && m->m_pkthdr.rcvif != NULL &&
                    (m->m_pkthdr.rcvif->if_eflags & IFEF_NOWINDOWSCALE)) {
@@ -2143,6 +2606,10 @@ findpcb:
                    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
                     SEQ_GT(th->th_ack, tp->snd_max))) {
                                rstreason = BANDLIM_RST_OPENPORT;
+                               
+                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->ooopacket, 1);
+                                       
                                goto dropwithreset;
                }
                break;
@@ -2164,10 +2631,17 @@ findpcb:
                    (SEQ_LEQ(th->th_ack, tp->iss) ||
                     SEQ_GT(th->th_ack, tp->snd_max))) {
                        rstreason = BANDLIM_UNLIMITED;
+                       
+                       if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                               atomic_add_64(&ifp->if_tcp_stat->ooopacket, 1);
+                               
                        goto dropwithreset;
                }
                if (thflags & TH_RST) {
                        if ((thflags & TH_ACK) != 0) {
+                               soevent(so, 
+                                   (SO_FILT_HINT_LOCKED |
+                                   SO_FILT_HINT_CONNRESET));
                                tp = tcp_drop(tp, ECONNREFUSED);
                                postevent(so, 0, EV_RESET);
                        }
@@ -2202,18 +2676,18 @@ findpcb:
                                tp->snd_scale = tp->requested_s_scale;
                                tp->rcv_scale = tp->request_r_scale;
                        }
-                       tp->rcv_adv += tp->rcv_wnd;
+                       tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale);
                        tp->snd_una++;          /* SYN is acked */
                        /*
                         * If there's data, delay ACK; if there's also a FIN
                         * ACKNOW will be turned on later.
                         */
-                       if (DELAY_ACK(tp, th) && tlen != 0) {
+                       TCP_INC_VAR(tp->t_unacksegs, nlropkts);
+                       if (DELAY_ACK(tp, th) && tlen != 0 ) {
                                if ((tp->t_flags & TF_DELACK) == 0) {
                                        tp->t_flags |= TF_DELACK;
                                        tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
                                }
-                               tp->t_unacksegs++;
                        }
                        else {
                                tp->t_flags |= TF_ACKNOW;
@@ -2225,6 +2699,7 @@ findpcb:
                         *      SYN_SENT* --> FIN_WAIT_1
                         */
                        tp->t_starttime = tcp_now;
+                       tcp_sbrcv_tstmp_check(tp);
                        if (tp->t_flags & TF_NEEDFIN) {
                                DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
                                        struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1);
@@ -2382,6 +2857,8 @@ trimthenstep6:
                        switch (tp->t_state) {
 
                        case TCPS_SYN_RECEIVED:
+                               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                                       atomic_add_64(&ifp->if_tcp_stat->rstinsynrcv, 1);
                                so->so_error = ECONNREFUSED;
                                goto close;
 
@@ -2401,6 +2878,11 @@ trimthenstep6:
                                postevent(so, 0, EV_RESET);
                                DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
                                        struct tcpcb *, tp, int32_t, TCPS_CLOSED);
+
+                               soevent(so,
+                                   (SO_FILT_HINT_LOCKED |
+                                   SO_FILT_HINT_CONNRESET));
+
                                tp->t_state = TCPS_CLOSED;
                                tcpstat.tcps_drops++;
                                tp = tcp_close(tp);
@@ -2464,6 +2946,10 @@ trimthenstep6:
         */
        if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
                rstreason = BANDLIM_RST_OPENPORT;
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->dospacket, 1);
+                       
                goto dropwithreset;
        }
 
@@ -2495,9 +2981,14 @@ trimthenstep6:
                         * But keep on processing for RST or ACK.
                         */
                        tp->t_flags |= TF_ACKNOW;
+                       if (todrop == 1) {
+                               /* This could be a keepalive */
+                               soevent(so, SO_FILT_HINT_LOCKED |
+                                       SO_FILT_HINT_KEEPALIVE);
+                       }
                        todrop = tlen;
                        tcpstat.tcps_rcvduppack++;
-                       tcpstat.tcps_rcvdupbyte += todrop;
+                       tcpstat.tcps_rcvdupbyte += todrop; 
                } else {
                        tcpstat.tcps_rcvpartduppack++;
                        tcpstat.tcps_rcvpartdupbyte += todrop;
@@ -2528,6 +3019,10 @@ trimthenstep6:
                tp = tcp_close(tp);
                tcpstat.tcps_rcvafterclose++;
                rstreason = BANDLIM_UNLIMITED;
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->cleanup, 1);
+                       
                goto dropwithreset;
        }
 
@@ -2607,6 +3102,10 @@ trimthenstep6:
                tp = tcp_drop(tp, ECONNRESET);
                rstreason = BANDLIM_UNLIMITED;
                postevent(so, 0, EV_RESET);
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->synwindow, 1);
+                       
                goto dropwithreset;
        }
 
@@ -2653,6 +3152,7 @@ trimthenstep6:
                 *      SYN-RECEIVED* -> FIN-WAIT-1
                 */
                tp->t_starttime = tcp_now;
+               tcp_sbrcv_tstmp_check(tp);
                if (tp->t_flags & TF_NEEDFIN) {
                        DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
                                struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1);
@@ -2787,12 +3287,14 @@ trimthenstep6:
                                         * before entering FR, call it
                                         */
                                        if (CC_ALGO(tp)->pre_fr != NULL)
-                                               CC_ALGO(tp)->pre_fr(tp, th);
+                                               CC_ALGO(tp)->pre_fr(tp);
                                        ENTER_FASTRECOVERY(tp);
                                        tp->snd_recover = tp->snd_max;
                                        tp->t_timer[TCPT_REXMT] = 0;
                                        tp->t_rtttime = 0;
-                                       tp->ecn_flags |= TE_SENDCWR;
+                                       if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) {
+                                               tp->ecn_flags |= TE_SENDCWR;
+                                       }
                                        if (tp->sack_enable) {
                                                tcpstat.tcps_sack_recovery_episode++;
                                                tp->sack_newdata = tp->snd_nxt;
@@ -2900,6 +3402,7 @@ process_ACK:
                        tp->t_badrxtwin = 0;    /* XXX probably not required */ 
                        tp->t_rxtshift = 0;
                        tp->rxt_start = 0;
+                       tcp_bad_rexmt_fix_sndbuf(tp);
 
                        DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
                                struct tcpcb *, tp, struct tcphdr *, th,
@@ -2948,13 +3451,13 @@ process_ACK:
                        goto step6;
 
                if ((thflags & TH_ECE) != 0 &&
-                       (tp->ecn_flags & TE_SETUPSENT) != 0) {
+                       ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON)) {
                        /*
                         * Reduce the congestion window if we haven't done so.
                         */
                        if (!tp->sack_enable && !IN_FASTRECOVERY(tp) &&
                                SEQ_GEQ(th->th_ack, tp->snd_recover)) {
-                               tcp_reduce_congestion_window(tp, th);
+                               tcp_reduce_congestion_window(tp);
                                DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
                                        struct tcpcb *, tp, struct tcphdr *, th, 
                                        int32_t, TCP_CC_ECN_RCVD);
@@ -2983,6 +3486,7 @@ process_ACK:
                        ourfinisacked = 1;
                } else {
                        sbdrop(&so->so_snd, acked);
+                       tcp_sbsnd_trim(&so->so_snd);
                        tp->snd_wnd -= acked;
                        ourfinisacked = 0;
                }
@@ -3003,7 +3507,10 @@ process_ACK:
                }
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
-                       
+               if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
+                       tp->t_bwmeas != NULL)
+                       tcp_bwmeas_check(tp);
+
                /*
                 * sowwakeup must happen after snd_una, et al. are updated so that
                 * the sequence numbers are in sync with so_snd
@@ -3219,25 +3726,32 @@ dodata:
                if (th->th_seq == tp->rcv_nxt &&
                    LIST_EMPTY(&tp->t_segq) &&
                    TCPS_HAVEESTABLISHED(tp->t_state)) {
-                       if (DELAY_ACK(tp, th) && ((tp->t_flags & TF_ACKNOW) == 0)) {
+                       TCP_INC_VAR(tp->t_unacksegs, nlropkts);
+                       if (DELAY_ACK(tp, th) && 
+                               ((tp->t_flags & TF_ACKNOW) == 0) ) {
                                if ((tp->t_flags & TF_DELACK) == 0) {
                                        tp->t_flags |= TF_DELACK;
                                        tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
                                }
-                               tp->t_unacksegs++;
                        }         
                        else {
                                tp->t_flags |= TF_ACKNOW;
                        }
                        tp->rcv_nxt += tlen;
                        thflags = th->th_flags & TH_FIN;
-                       tcpstat.tcps_rcvpack++;
+                       TCP_INC_VAR(tcpstat.tcps_rcvpack, nlropkts);
                        tcpstat.tcps_rcvbyte += tlen;
                        if (nstat_collect) {
-                               locked_add_64(&inp->inp_stat->rxpackets, 1);
+                               if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) {
+                                       locked_add_64(&inp->inp_stat->rxpackets, m->m_pkthdr.lro_npkts);
+                               } else { 
+                                       locked_add_64(&inp->inp_stat->rxpackets, 1);
+                               }
                                locked_add_64(&inp->inp_stat->rxbytes, tlen);
                        }
                        ND6_HINT(tp);
+                       
+                       tcp_sbrcv_grow(tp, &so->so_rcv, &to, tlen);
                        so_recv_data_stat(so, m, drop_hdrlen);
                        if (sbappendstream(&so->so_rcv, m))
                                sorwakeup(so);
@@ -3266,14 +3780,6 @@ dodata:
                        }
                                
                }
-               /*
-                * Note the amount of data that peer has sent into
-                * our window, in order to estimate the sender's
-                * buffer size.
-                */
-               len = (u_int)(so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt));
-               if (len > so->so_rcv.sb_maxused)
-                       so->so_rcv.sb_maxused = len;
        } else {
                m_freem(m);
                thflags &= ~TH_FIN;
@@ -3288,20 +3794,18 @@ dodata:
                        socantrcvmore(so);
                        postevent(so, 0, EV_FIN);
                        /*
-                        *  If connection is half-synchronized
-                        *  (ie NEEDSYN flag on) then delay ACK,
                         * If connection is half-synchronized
                         * (ie NEEDSYN flag on) then delay ACK,
                         * so it may be piggybacked when SYN is sent.
                         * Otherwise, since we received a FIN then no
                         * more input can be expected, send ACK now.
                         */
+                       TCP_INC_VAR(tp->t_unacksegs, nlropkts);
                        if (DELAY_ACK(tp, th) && (tp->t_flags & TF_NEEDSYN)) {
                                if ((tp->t_flags & TF_DELACK) == 0) {
                                        tp->t_flags |= TF_DELACK;
                                        tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
                                }
-                               tp->t_unacksegs++;
                        }
                        else {
                                tp->t_flags |= TF_ACKNOW;
@@ -3403,6 +3907,10 @@ dropafterack:
            (SEQ_GT(tp->snd_una, th->th_ack) ||
             SEQ_GT(th->th_ack, tp->snd_max)) ) {
                rstreason = BANDLIM_RST_OPENPORT;
+               
+               if (ifp != NULL && ifp->if_tcp_stat != NULL)
+                       atomic_add_64(&ifp->if_tcp_stat->dospacket, 1);
+                       
                goto dropwithreset;
        }
 #if TCPDEBUG
@@ -3723,8 +4231,9 @@ tcp_xmit_timer(tp, rtt)
                delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
                if ((tp->t_rttvar += delta) <= 0)
                        tp->t_rttvar = 1;
-               if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
-                   tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
+               if (tp->t_rttbest == 0  || 
+                       tp->t_rttbest > (tp->t_srtt + tp->t_rttvar))
+                       tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
        } else {
                /*
                 * No rtt measurement yet - use the unsmoothed rtt.
@@ -3733,7 +4242,6 @@ tcp_xmit_timer(tp, rtt)
                 */
                tp->t_srtt = rtt << TCP_RTT_SHIFT;
                tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
-               tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
        }
        nstat_route_rtt(tp->t_inpcb->inp_route.ro_rt, tp->t_srtt, tp->t_rttvar);
        tp->t_rtttime = 0;
@@ -3784,13 +4292,20 @@ static inline unsigned int
 tcp_maxmtu6(struct rtentry *rt)
 {
        unsigned int maxmtu;
+       struct nd_ifinfo *ndi;
 
        RT_LOCK_ASSERT_HELD(rt);
        lck_rw_lock_shared(nd_if_rwlock);
+       if ((ndi = ND_IFINFO(rt->rt_ifp)) != NULL && !ndi->initialized)
+               ndi = NULL;
+       if (ndi != NULL)
+               lck_mtx_lock(&ndi->lock);
        if (rt->rt_rmx.rmx_mtu == 0)
                maxmtu = IN6_LINKMTU(rt->rt_ifp);
        else
                maxmtu = MIN(rt->rt_rmx.rmx_mtu, IN6_LINKMTU(rt->rt_ifp));
+       if (ndi != NULL)
+               lck_mtx_unlock(&ndi->lock);
        lck_rw_done(nd_if_rwlock);
 
        return (maxmtu);
@@ -3935,33 +4450,11 @@ tcp_mss(tp, offer, input_ifscope)
         * or rttvar.  Convert from the route-table units
         * to scaled multiples of the slow timeout timer.
         */
-       if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
-               /*
-                * XXX the lock bit for RTT indicates that the value
-                * is also a minimum value; this is subject to time.
-                */
-               if (rt->rt_rmx.rmx_locks & RTV_RTT)
-                       tp->t_rttmin = rtt / (RTM_RTTUNIT / TCP_RETRANSHZ);
-               else
-                       tp->t_rttmin = isnetlocal ? tcp_TCPTV_MIN : TCPTV_REXMTMIN;
-               tp->t_srtt = rtt / (RTM_RTTUNIT / (TCP_RETRANSHZ * TCP_RTT_SCALE));
-               tcpstat.tcps_usedrtt++;
-               if (rt->rt_rmx.rmx_rttvar) {
-                       tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
-                           (RTM_RTTUNIT / (TCP_RETRANSHZ * TCP_RTTVAR_SCALE));
-                       tcpstat.tcps_usedrttvar++;
-               } else {
-                       /* default variation is +- 1 rtt */
-                       tp->t_rttvar =
-                           tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
-               }
-               TCPT_RANGESET(tp->t_rxtcur,
-                             ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-                             tp->t_rttmin, TCPTV_REXMTMAX, 
-                             TCP_ADD_REXMTSLOP(tp));
-       }
-       else
+       if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt) != 0) {
+               tcp_getrt_rtt(tp, rt);
+       } else {
                tp->t_rttmin = isnetlocal ? tcp_TCPTV_MIN : TCPTV_REXMTMIN;
+       }
 
 #if INET6
        mss = (isipv6 ? tcp_maxmtu6(rt) : tcp_maxmtu(rt));
@@ -4289,7 +4782,6 @@ tcp_dropdropablreq(struct socket *head)
                lck_rw_lock_exclusive(tcbinfo.mtx);
 
                tcp_lock(so, 0, 0);
-
                /* Release the reference held for so_incomp queue */
                so->so_usecount--;
 
@@ -4362,6 +4854,54 @@ tcp_set_new_cc(struct socket *so, uint16_t cc_index)
        }
 }
 
+void
+tcp_set_recv_bg(struct socket *so)
+{
+       if (!IS_TCP_RECV_BG(so))
+               so->so_traffic_mgt_flags |= TRAFFIC_MGT_TCP_RECVBG;
+}
+
+void
+tcp_clear_recv_bg(struct socket *so)
+{
+       if (IS_TCP_RECV_BG(so))
+               so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
+}
+
+void
+inp_fc_unthrottle_tcp(struct inpcb *inp)
+{
+       struct tcpcb *tp = inp->inp_ppcb;
+       /*
+        * Back off the slow-start threshold and enter
+        * congestion avoidance phase
+        */
+       if (CC_ALGO(tp)->pre_fr != NULL)
+               CC_ALGO(tp)->pre_fr(tp);
+
+       tp->snd_cwnd = tp->snd_ssthresh;
+
+       /*
+        * Restart counting for ABC as we changed the
+        * congestion window just now.
+        */
+       tp->t_bytes_acked = 0;
+
+       /* Reset retransmit shift as we know that the reason
+        * for delay in sending a packet is due to flow 
+        * control on the outgoing interface. There is no need
+        * to backoff retransmit timer.
+        */
+       tp->t_rxtshift = 0;
+
+       /*
+        * Start the output stream again. Since we are
+        * not retransmitting data, do not reset the
+        * retransmit timer or rtt calculation.
+        */
+       tcp_output(tp);
+}
+
 static int
 tcp_getstat SYSCTL_HANDLER_ARGS
 {
index 5baf28bea3ddfd78277350f13198e1053748dde1..1d1d5e5e7ab28f8d870efed24058b7600a58cf68 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -60,7 +60,7 @@ int tcp_ledbat_cleanup(struct tcpcb *tp);
 void tcp_ledbat_cwnd_init(struct tcpcb *tp);
 void tcp_ledbat_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
 void tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
-void tcp_ledbat_pre_fr(struct tcpcb *tp, struct tcphdr *th);
+void tcp_ledbat_pre_fr(struct tcpcb *tp);
 void tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th);
 void tcp_ledbat_after_idle(struct tcpcb *tp);
 void tcp_ledbat_after_timeout(struct tcpcb *tp);
@@ -290,9 +290,7 @@ tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) {
 }
 
 void
-tcp_ledbat_pre_fr(struct tcpcb *tp, struct tcphdr *th) {
-#pragma unused(th)
-
+tcp_ledbat_pre_fr(struct tcpcb *tp) {
        uint32_t win;
 
        win = min(tp->snd_wnd, tp->snd_cwnd) / 
@@ -302,6 +300,8 @@ tcp_ledbat_pre_fr(struct tcpcb *tp, struct tcphdr *th) {
        tp->snd_ssthresh = win * tp->t_maxseg; 
        if (tp->bg_ssthresh > tp->snd_ssthresh)
                tp->bg_ssthresh = tp->snd_ssthresh;
+
+       tcp_cc_resize_sndbuf(tp);
 }
 
 void
@@ -380,6 +380,8 @@ tcp_ledbat_after_timeout(struct tcpcb *tp) {
 
                if (tp->bg_ssthresh > tp->snd_ssthresh)
                        tp->bg_ssthresh = tp->snd_ssthresh;
+
+               tcp_cc_resize_sndbuf(tp);
        }
 }
 
@@ -401,7 +403,7 @@ int
 tcp_ledbat_delay_ack(struct tcpcb *tp, struct tcphdr *th) {
        if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
                (th->th_flags & TH_PUSH) == 0 &&
-               (tp->t_flags & TF_DELACK) == 0)
+               (tp->t_unacksegs == 1))
                return(1);
        return(0);
 }
diff --git a/bsd/netinet/tcp_lro.c b/bsd/netinet/tcp_lro.c
new file mode 100644 (file)
index 0000000..55ebb0e
--- /dev/null
@@ -0,0 +1,997 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#include <sys/mcache.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_lro.h>
+#include <netinet/lro_ext.h>
+#include <kern/locks.h>
+
+unsigned int lrocount = 0; /* A counter used for debugging only */
+unsigned int lro_seq_outoforder = 0; /* Counter for debugging */
+unsigned int lro_seq_mismatch = 0; /* Counter for debugging */
+unsigned int lro_eject_req = 0; /* Counter for tracking flow ejections */
+unsigned int lro_flushes = 0; /* Counter for tracking number of flushes */
+unsigned int lro_single_flushes = 0;
+unsigned int lro_double_flushes = 0;
+unsigned int lro_good_flushes = 0;
+
+unsigned int coalesc_sz = LRO_MX_COALESCE_PKTS;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_sz, CTLFLAG_RW | CTLFLAG_LOCKED,
+               &coalesc_sz, 0, "Max coalescing size");
+
+unsigned int coalesc_time = LRO_MX_TIME_TO_BUFFER;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_time, CTLFLAG_RW | CTLFLAG_LOCKED,
+               &coalesc_time, 0, "Max coalescing time");
+
+struct lro_flow lro_flow_list[TCP_LRO_NUM_FLOWS]; 
+
+char lro_flow_map[TCP_LRO_FLOW_MAP]; 
+
+static lck_attr_t *tcp_lro_mtx_attr = NULL;            /* mutex attributes */
+static lck_grp_t *tcp_lro_mtx_grp = NULL;              /* mutex group */
+static lck_grp_attr_t *tcp_lro_mtx_grp_attr = NULL;    /* mutex group attrs */
+decl_lck_mtx_data( ,tcp_lro_lock);     /* Used to synchronize updates */
+
+unsigned int lro_byte_count = 0;
+
+uint64_t lro_deadline = 0; /* LRO's sense of time - protected by tcp_lro_lock */
+uint32_t lro_timer_set = 0;
+
+/* Some LRO stats */
+u_int32_t lro_pkt_count = 0; /* Number of packets encountered in an LRO period */
+thread_call_t tcp_lro_timer;
+
+extern u_int32_t kipf_count;
+
+static void    tcp_lro_timer_proc(void*, void*);
+static void    lro_update_stats(struct mbuf*);
+static void    lro_update_flush_stats(struct mbuf *);
+static void    tcp_lro_flush_flows(void);
+static void    tcp_lro_sched_timer(uint64_t);
+static void    lro_proto_input(struct mbuf *);
+
+static struct mbuf *lro_tcp_xsum_validate(struct mbuf*,  struct ipovly *,
+                               struct tcphdr*);
+static struct mbuf *tcp_lro_process_pkt(struct mbuf*, struct ip*, struct tcphdr*,
+                               int);
+
+void
+tcp_lro_init(void)
+{
+       int i;
+
+       bzero(lro_flow_list, sizeof (struct lro_flow) * TCP_LRO_NUM_FLOWS);
+       for (i = 0; i < TCP_LRO_FLOW_MAP; i++) {
+               lro_flow_map[i] = TCP_LRO_FLOW_UNINIT;
+       }
+
+       /*
+        * allocate lock group attribute, group and attribute for tcp_lro_lock
+        */
+       tcp_lro_mtx_grp_attr = lck_grp_attr_alloc_init();
+       tcp_lro_mtx_grp = lck_grp_alloc_init("tcplro", tcp_lro_mtx_grp_attr);
+       tcp_lro_mtx_attr = lck_attr_alloc_init();
+       lck_mtx_init(&tcp_lro_lock, tcp_lro_mtx_grp, tcp_lro_mtx_attr);
+
+       tcp_lro_timer = thread_call_allocate(tcp_lro_timer_proc, NULL);
+       if (tcp_lro_timer == NULL) {
+               panic_plain("%s: unable to allocate lro timer", __func__);
+       }
+
+       return;
+}
+
+static int
+tcp_lro_matching_tuple(struct ip* ip_hdr, struct tcphdr *tcp_hdr, int *hash, 
+                       int *flow_id )
+{
+       struct lro_flow *flow;
+       tcp_seq seqnum;
+       unsigned int off = 0;
+       int payload_len = 0;
+
+       *hash = LRO_HASH(ip_hdr->ip_src.s_addr, ip_hdr->ip_dst.s_addr, 
+               tcp_hdr->th_sport, tcp_hdr->th_dport, (TCP_LRO_FLOW_MAP - 1));
+
+       *flow_id = lro_flow_map[*hash];
+       if (*flow_id == TCP_LRO_FLOW_NOTFOUND) {
+               return TCP_LRO_NAN;
+       }
+
+       seqnum = tcp_hdr->th_seq;
+       off = tcp_hdr->th_off << 2;
+       payload_len = ip_hdr->ip_len - off;
+
+       flow = &lro_flow_list[*flow_id];
+
+       if ((flow->lr_faddr.s_addr == ip_hdr->ip_src.s_addr) &&
+                       (flow->lr_laddr.s_addr == ip_hdr->ip_dst.s_addr) &&
+                       (flow->lr_fport == tcp_hdr->th_sport) &&
+                       (flow->lr_lport == tcp_hdr->th_dport)) {
+               if (flow->lr_tcphdr == NULL) {
+                       if (ntohl(seqnum) == flow->lr_seq) {
+                               return TCP_LRO_COALESCE;
+                       }
+                       if (lrodebug >= 4) {
+                               printf("%s: seqnum = %x, lr_seq = %x\n",
+                                       __func__, ntohl(seqnum), flow->lr_seq);
+                       }
+                       lro_seq_mismatch++;
+                       if (SEQ_GT(ntohl(seqnum), flow->lr_seq)) {
+                               lro_seq_outoforder++;
+                               /* 
+                                * Whenever we receive out of order packets it
+                                * signals loss and recovery and LRO doesn't 
+                                * let flows recover quickly. So eject.
+                                */
+                                flow->lr_flags |= LRO_EJECT_REQ;
+
+                       }
+                       return TCP_LRO_NAN;
+               }
+
+               if (flow->lr_flags & LRO_EJECT_REQ) {
+                       if (lrodebug)
+                               printf("%s: eject. \n", __func__);
+                       return TCP_LRO_EJECT_FLOW;
+               }
+               if (SEQ_GT(tcp_hdr->th_ack, flow->lr_tcphdr->th_ack)) { 
+                       if (lrodebug) {
+                               printf("%s: th_ack = %x flow_ack = %x \n", 
+                                       __func__, tcp_hdr->th_ack, 
+                                       flow->lr_tcphdr->th_ack);
+                       }
+                       return TCP_LRO_EJECT_FLOW;
+               }
+
+               if (ntohl(seqnum) == (ntohl(lro_flow_list[*flow_id].lr_tcphdr->th_seq) + lro_flow_list[*flow_id].lr_len)) { 
+                       return TCP_LRO_COALESCE;
+               } else {
+                       /* LRO does not handle loss recovery well, eject */
+                       flow->lr_flags |= LRO_EJECT_REQ;
+                       return TCP_LRO_EJECT_FLOW;
+               }
+       }
+       if (lrodebug) printf("tcp_lro_matching_tuple: collision \n");
+       return TCP_LRO_COLLISION;
+}
+
+static void
+tcp_lro_init_flow(int flow_id, struct ip* ip_hdr, struct tcphdr *tcp_hdr, 
+                       int hash, u_int32_t timestamp, int payload_len)
+{
+       struct lro_flow *flow = NULL;
+
+       flow = &lro_flow_list[flow_id];
+
+       flow->lr_hash_map = hash;
+       flow->lr_faddr.s_addr = ip_hdr->ip_src.s_addr;
+       flow->lr_laddr.s_addr = ip_hdr->ip_dst.s_addr;
+       flow->lr_fport = tcp_hdr->th_sport;
+       flow->lr_lport = tcp_hdr->th_dport;
+       lro_flow_map[hash] = flow_id;
+       flow->lr_timestamp = timestamp;
+       flow->lr_seq = ntohl(tcp_hdr->th_seq) + payload_len;
+       flow->lr_flags = 0;
+       return;
+}
+
+static void
+tcp_lro_coalesce(int flow_id, struct mbuf *lro_mb, struct tcphdr *tcphdr, 
+                       int payload_len, int drop_hdrlen, struct tcpopt *topt, 
+                       u_int32_t* tsval, u_int32_t* tsecr, int thflags)
+{
+       struct lro_flow *flow = NULL;
+       struct mbuf *last;
+       struct ip *ip = NULL;
+
+       flow =  &lro_flow_list[flow_id];
+       if (flow->lr_mhead) {
+               if (lrodebug) 
+                       printf("%s: lr_mhead %x %d \n", __func__, flow->lr_seq,
+                               payload_len);
+               m_adj(lro_mb, drop_hdrlen);
+
+               last = flow->lr_mtail;
+               while (last->m_next != NULL) {
+                       last = last->m_next;
+               }
+               last->m_next = lro_mb;
+
+               flow->lr_mtail = lro_mb;
+
+               ip = mtod(flow->lr_mhead, struct ip *);
+               ip->ip_len += lro_mb->m_pkthdr.len;
+               flow->lr_mhead->m_pkthdr.len += lro_mb->m_pkthdr.len;
+
+               if (flow->lr_len == 0) {
+                       panic_plain("%s: Inconsistent LRO flow state", __func__);
+               }
+               flow->lr_len += payload_len;
+               flow->lr_seq += payload_len;
+               /* 
+                * This bit is re-OR'd each time a packet is added to the 
+                * large coalesced packet.
+                */
+               flow->lr_mhead->m_pkthdr.aux_flags |= MAUXF_SW_LRO_PKT;
+               flow->lr_mhead->m_pkthdr.lro_npkts++; /* for tcpstat.tcps_rcvpack */
+               if (flow->lr_mhead->m_pkthdr.lro_pktlen < 
+                               lro_mb->m_pkthdr.lro_pktlen) {
+                       /* 
+                        * For TCP Inter Arrival Jitter calculation, return max  
+                        * size encountered while coalescing a stream of pkts.
+                        */
+                       flow->lr_mhead->m_pkthdr.lro_pktlen = 
+                                               lro_mb->m_pkthdr.lro_pktlen;
+               }
+               /* Update the timestamp value */
+               if (topt->to_flags & TOF_TS) {
+                       if ((flow->lr_tsval) && 
+                               (TSTMP_GT(topt->to_tsval, ntohl(*(flow->lr_tsval))))) {
+                               *(flow->lr_tsval) = htonl(topt->to_tsval);
+                       }
+                       if ((flow->lr_tsecr) &&
+                               (topt->to_tsecr != 0) &&
+                               (TSTMP_GT(topt->to_tsecr, ntohl(*(flow->lr_tsecr))))) {
+                               if (lrodebug >= 2) {
+                                       printf("%s: instantaneous RTT = %d \n", __func__, 
+                                               topt->to_tsecr - ntohl(*(flow->lr_tsecr)));
+                               }
+                               *(flow->lr_tsecr) = htonl(topt->to_tsecr);
+                       }
+               }
+               /* Coalesce the flags */
+               if (thflags) {
+                       flow->lr_tcphdr->th_flags |= thflags;
+               }
+               /* Update receive window */
+               flow->lr_tcphdr->th_win = tcphdr->th_win;
+       } else {
+               if (lro_mb) {
+                       flow->lr_mhead = flow->lr_mtail = lro_mb;
+                       flow->lr_mhead->m_pkthdr.aux_flags |= MAUXF_SW_LRO_PKT;
+                       flow->lr_tcphdr = tcphdr;
+                       if ((topt) && (topt->to_flags & TOF_TS)) {
+                               ASSERT(tsval != NULL);
+                               ASSERT(tsecr != NULL);
+                               flow->lr_tsval = tsval; 
+                               flow->lr_tsecr = tsecr;
+                       }        
+                       flow->lr_len = payload_len;
+                       flow->lr_timestamp = tcp_now;
+                       tcp_lro_sched_timer(0);
+               }       
+               flow->lr_seq = ntohl(tcphdr->th_seq) + payload_len;
+       }
+       if (lro_mb) { 
+               tcpstat.tcps_coalesced_pack++;
+       }       
+       return;
+}
+
+static struct mbuf *
+tcp_lro_eject_flow(int flow_id)
+{
+       struct mbuf *mb = NULL;
+
+       mb = lro_flow_list[flow_id].lr_mhead;
+       ASSERT(lro_flow_map[lro_flow_list[flow_id].lr_hash_map] == flow_id);
+       lro_flow_map[lro_flow_list[flow_id].lr_hash_map] = TCP_LRO_FLOW_UNINIT;
+       bzero(&lro_flow_list[flow_id], sizeof(struct lro_flow));
+       
+       return mb;
+}
+
+static struct mbuf*
+tcp_lro_eject_coalesced_pkt(int flow_id)
+{
+       struct mbuf *mb = NULL;
+       mb = lro_flow_list[flow_id].lr_mhead;
+       lro_flow_list[flow_id].lr_mhead = 
+               lro_flow_list[flow_id].lr_mtail = NULL;
+       lro_flow_list[flow_id].lr_tcphdr = NULL;
+       return mb;
+}
+
+static struct mbuf*
+tcp_lro_insert_flow(struct mbuf *lro_mb, struct ip *ip_hdr, 
+                       struct tcphdr *tcp_hdr, int payload_len, 
+                       int drop_hdrlen, int hash, struct tcpopt *topt, 
+                       u_int32_t *tsval, u_int32_t *tsecr)
+{
+       int i;
+       int slot_available = 0;
+       int candidate_flow = 0; 
+       u_int32_t oldest_timestamp;
+       struct mbuf *mb = NULL;
+       int collision = 0;
+
+       oldest_timestamp = tcp_now;
+       
+       /* handle collision */
+       if (lro_flow_map[hash] != TCP_LRO_FLOW_UNINIT) {
+               if (lrodebug) {
+                       collision = 1;
+               }
+               candidate_flow = lro_flow_map[hash];
+               tcpstat.tcps_flowtbl_collision++;
+               goto kick_flow;
+       }
+
+       for (i = 0; i < TCP_LRO_NUM_FLOWS; i++) {
+               if (lro_flow_list[i].lr_mhead == NULL) {
+                       candidate_flow = i;
+                       slot_available = 1;
+                       break;
+               }
+               if (oldest_timestamp >= lro_flow_list[i].lr_timestamp) {
+                       candidate_flow = i;
+                       oldest_timestamp = lro_flow_list[i].lr_timestamp;
+               }
+       }
+
+       if (!slot_available) {
+               tcpstat.tcps_flowtbl_full++;
+kick_flow:
+               /* kick the oldest flow */
+               mb = tcp_lro_eject_flow(candidate_flow);
+
+               if (lrodebug) {
+                       if (!slot_available) {
+                               printf("%s: slot unavailable.\n",__func__);
+                       }
+                       if (collision) {
+                               printf("%s: collision.\n",__func__);
+                       }
+               }
+       } else {
+               candidate_flow = i; /* this is now the flow to be used */
+
+       }
+
+       tcp_lro_init_flow(candidate_flow, ip_hdr, tcp_hdr, hash, 
+                               tcp_now, payload_len);
+       tcp_lro_coalesce(candidate_flow, lro_mb, tcp_hdr, payload_len, 
+                               drop_hdrlen, topt, tsval, tsecr, 0);
+       return mb;
+}
+
+struct mbuf*
+tcp_lro_process_pkt(struct mbuf *lro_mb, struct ip *ip_hdr, 
+                               struct tcphdr *tcp_hdr, int drop_hdrlen)
+{
+       int flow_id = TCP_LRO_FLOW_UNINIT;
+       int hash;
+       unsigned int off = 0;
+       int eject_flow = 0;
+       int optlen;
+       int retval = 0;
+       struct mbuf *mb = NULL;
+       int payload_len = 0;
+       u_char *optp = NULL;
+       int thflags = 0;
+       struct tcpopt to;
+       int ret_response = TCP_LRO_CONSUMED;
+       int coalesced = 0, tcpflags = 0, unknown_tcpopts = 0;
+       u_int8_t ecn;
+       
+       if (lro_mb->m_len < (int32_t)sizeof (struct tcpiphdr)) {
+               if ((lro_mb = m_pullup(lro_mb, sizeof(struct tcpiphdr))) == 0) {
+                       tcpstat.tcps_rcvshort++;
+                       m_freem(lro_mb); 
+                       if (lrodebug) {
+                               printf("tcp_lro_process_pkt:mbuf too short.\n");
+                       }
+                       return NULL;
+               }
+       }
+
+       if ((lro_mb = lro_tcp_xsum_validate(lro_mb, 
+                               (struct ipovly*)ip_hdr, tcp_hdr)) == NULL) {
+               if (lrodebug) {
+                       printf("tcp_lro_process_pkt: TCP xsum failed.\n");
+               }
+               return NULL; 
+       }
+
+       /* Update stats */
+       lro_pkt_count++;
+
+       /* Avoids checksumming in tcp_input */
+       lro_mb->m_pkthdr.aux_flags |= MAUXF_SW_LRO_DID_CSUM;    
+       
+       off = tcp_hdr->th_off << 2;
+       optlen = off - sizeof (struct tcphdr);
+       payload_len = ip_hdr->ip_len - off;
+       optp = (u_char *)(tcp_hdr + 1);
+       /*
+        * Do quick retrieval of timestamp options ("options
+        * prediction?").  If timestamp is the only option and it's
+        * formatted as recommended in RFC 1323 appendix A, we
+        * quickly get the values now and not bother calling
+        * tcp_dooptions(), etc.
+        */
+       if ((optlen == TCPOLEN_TSTAMP_APPA ||
+                       (optlen > TCPOLEN_TSTAMP_APPA &&
+                       optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
+                       *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+                       (tcp_hdr->th_flags & TH_SYN) == 0) {
+                       to.to_flags |= TOF_TS;
+                       to.to_tsval = ntohl(*(u_int32_t *)(void *)(optp + 4));
+                       to.to_tsecr = ntohl(*(u_int32_t *)(void *)(optp + 8));
+       } else {
+               /*
+                * If TCP timestamps are not in use, or not the first option, 
+                * skip LRO path since timestamps are used to avoid LRO 
+                * from introducing additional latencies for retransmissions
+                * and other slow-paced transmissions.
+                */
+               to.to_flags = to.to_tsecr = 0;
+               eject_flow = 1;
+       }
+
+       /* list all the conditions that can trigger a flow ejection here */
+       
+       thflags = tcp_hdr->th_flags;
+       if (thflags & (TH_SYN | TH_URG | TH_ECE | TH_CWR | TH_PUSH | TH_RST | TH_FIN)) { 
+               eject_flow = tcpflags = 1;
+       } 
+       
+       if (optlen && !((optlen == TCPOLEN_TSTAMP_APPA) && 
+                       (to.to_flags & TOF_TS))) {
+               eject_flow = unknown_tcpopts = 1;
+       } 
+       
+       if (payload_len <= LRO_MIN_COALESC_SZ) { /* zero payload ACK */
+               eject_flow = 1;
+       }
+
+       /* Can't coalesce ECN marked packets. */
+       ecn = ip_hdr->ip_tos & IPTOS_ECN_MASK;
+       if (ecn == IPTOS_ECN_CE) {
+               /*
+                * ECN needs quick notification
+                */
+               if (lrodebug) {
+                       printf("%s: ECE bits set.\n", __func__);
+               }
+               eject_flow = 1;
+       }
+
+       lck_mtx_lock_spin(&tcp_lro_lock);
+
+       retval = tcp_lro_matching_tuple(ip_hdr, tcp_hdr, &hash, &flow_id);
+
+       switch (retval) {
+       case TCP_LRO_NAN:
+               lck_mtx_unlock(&tcp_lro_lock);
+               ret_response = TCP_LRO_FLOW_NOTFOUND;
+               break;
+
+       case TCP_LRO_COALESCE:
+               if ((payload_len != 0) && (unknown_tcpopts == 0) && 
+                       (tcpflags == 0) && (ecn == 0) && (to.to_flags & TOF_TS)) { 
+                       tcp_lro_coalesce(flow_id, lro_mb, tcp_hdr, payload_len,
+                               drop_hdrlen, &to, 
+                               (to.to_flags & TOF_TS) ? (u_int32_t *)(void *)(optp + 4) : NULL,
+                               (to.to_flags & TOF_TS) ? (u_int32_t *)(void *)(optp + 8) : NULL,
+                               thflags);
+                       if (lrodebug >= 2) { 
+                               printf("tcp_lro_process_pkt: coalesce len = %d. flow_id = %d payload_len = %d drop_hdrlen = %d optlen = %d lport = %d seqnum = %x.\n",
+                                       lro_flow_list[flow_id].lr_len, flow_id, 
+                                       payload_len, drop_hdrlen, optlen,
+                                       ntohs(lro_flow_list[flow_id].lr_lport),
+                                       ntohl(tcp_hdr->th_seq));
+                       }
+                       if (lro_flow_list[flow_id].lr_mhead->m_pkthdr.lro_npkts >= coalesc_sz) {
+                               eject_flow = 1;
+                       }
+                       coalesced = 1;
+               }
+               if (eject_flow) {
+                       mb = tcp_lro_eject_coalesced_pkt(flow_id);
+                       lro_flow_list[flow_id].lr_seq = ntohl(tcp_hdr->th_seq) +
+                                                               payload_len;
+                       lck_mtx_unlock(&tcp_lro_lock);
+                       if (mb) {
+                               lro_proto_input(mb);
+                       }
+                       if (!coalesced) {
+                               if (lrodebug >= 2) {
+                                       printf("%s: pkt payload_len = %d \n", __func__, payload_len);
+                               }
+                               lro_proto_input(lro_mb);
+                       }
+               } else {
+                       lck_mtx_unlock(&tcp_lro_lock);
+               }
+               break;
+
+       case TCP_LRO_EJECT_FLOW:
+               mb = tcp_lro_eject_coalesced_pkt(flow_id);
+               lck_mtx_unlock(&tcp_lro_lock);
+               if (mb) {
+                       if (lrodebug) 
+                               printf("tcp_lro_process_pkt eject_flow, len = %d\n", mb->m_pkthdr.len);
+                       lro_proto_input(mb);
+               }
+
+               lro_proto_input(lro_mb);
+               break;
+
+       case TCP_LRO_COLLISION:
+               lck_mtx_unlock(&tcp_lro_lock);
+               ret_response = TCP_LRO_FLOW_NOTFOUND;
+               break;
+
+       default:
+               lck_mtx_unlock(&tcp_lro_lock);
+               panic_plain("%s: unrecognized type %d", __func__, retval);
+               break; 
+       }
+
+       if (ret_response == TCP_LRO_FLOW_NOTFOUND) {
+               lro_proto_input(lro_mb);
+       }
+       return NULL;
+}
+
+static void
+tcp_lro_timer_proc(void *arg1, void *arg2)
+{
+#pragma unused(arg1, arg2)
+
+       lck_mtx_lock_spin(&tcp_lro_lock);
+       lro_timer_set = 0;
+       lck_mtx_unlock(&tcp_lro_lock);
+       tcp_lro_flush_flows();
+}
+
+static void
+tcp_lro_flush_flows(void)
+{
+       int i = 0;
+       struct mbuf *mb;
+       struct lro_flow *flow;
+       int active_flows = 0;
+       int outstanding_flows = 0;
+       int tcpclock_updated = 0;
+
+       lck_mtx_lock(&tcp_lro_lock);
+
+       while (i < TCP_LRO_NUM_FLOWS) {
+               flow = &lro_flow_list[i];
+               if (flow->lr_mhead != NULL) {
+                       active_flows++;
+                       if (!tcpclock_updated) {
+                               calculate_tcp_clock();
+                               tcpclock_updated = 1;
+                       }
+                       if (((tcp_now - flow->lr_timestamp) >= coalesc_time) || 
+                               (flow->lr_mhead->m_pkthdr.lro_npkts >= 
+                                       coalesc_sz)) {
+
+                               if (lrodebug >= 2) 
+                                       printf("tcp_lro_flush_flows: len =%d n_pkts = %d %d %d \n",
+                                       flow->lr_len, 
+                                       flow->lr_mhead->m_pkthdr.lro_npkts, 
+                                       flow->lr_timestamp, tcp_now);
+
+                               mb = tcp_lro_eject_flow(i);
+
+                               if (mb) {
+                                       lck_mtx_unlock(&tcp_lro_lock);
+                                       lro_update_flush_stats(mb);
+                                       lro_proto_input(mb);
+                                       lck_mtx_lock(&tcp_lro_lock);
+                               }
+
+                       } else {
+                               tcp_lro_sched_timer(0);
+                               outstanding_flows++;
+                               if (lrodebug >= 2) {
+                                       printf("tcp_lro_flush_flows: did not flush flow of len =%d deadline = %x timestamp = %x \n", 
+                                               flow->lr_len, tcp_now, flow->lr_timestamp);
+                               }
+                       }
+               }
+               if (flow->lr_flags & LRO_EJECT_REQ) {
+                       mb = tcp_lro_eject_flow(i);
+                       if (mb) {
+                               lck_mtx_unlock(&tcp_lro_lock);
+                               lro_proto_input(mb);
+                               lro_eject_req++;
+                               lck_mtx_lock(&tcp_lro_lock);
+                       }
+               }
+               i++;
+       }
+       lck_mtx_unlock(&tcp_lro_lock);
+#if 0
+       if (lrocount == 900) {
+               printf("%s: %d %d %d %d oo: %d mismatch: %d ej_req: %d coll: %d \n", 
+                       __func__,
+                       tcpstat.tcps_coalesced_pack,
+                       tcpstat.tcps_lro_twopack,
+                       tcpstat.tcps_lro_multpack, 
+                       tcpstat.tcps_lro_largepack,
+                       lro_seq_outoforder,
+                       lro_seq_mismatch,
+                       lro_eject_req,
+                       tcpstat.tcps_flowtbl_collision);
+               printf("%s: all: %d single: %d double: %d good: %d \n",
+                       __func__, lro_flushes, lro_single_flushes, 
+                       lro_double_flushes, lro_good_flushes);
+               lrocount = 0;   
+       } else {
+               lrocount++;
+       }
+       if ((lrodebug >= 2) && (active_flows > 1)) {
+               printf("lro_flush_flows: active_flows = %d \n", active_flows);
+       }
+#endif 
+}
+
+/*
+ * Must be called with tcp_lro_lock held.
+ * The hint is non-zero for longer waits. The wait time dictated by coalesc_time
+ * takes precedence, so lro_timer_set is not set for the hint case
+ */
+static void
+tcp_lro_sched_timer(uint64_t hint)
+{
+       if (lro_timer_set) {
+               return;
+       }
+
+       lro_timer_set = 1;
+       if (!hint) {
+               /* the intent is to wake up every coalesc_time msecs */
+               clock_interval_to_deadline(coalesc_time, 
+                       (NSEC_PER_SEC / TCP_RETRANSHZ), &lro_deadline);
+       } else {
+               clock_interval_to_deadline(hint, NSEC_PER_SEC / TCP_RETRANSHZ,
+                        &lro_deadline);
+       }
+       thread_call_enter_delayed(tcp_lro_timer, lro_deadline);
+}
+
+struct mbuf*
+tcp_lro(struct mbuf *m, unsigned int hlen)
+{
+       struct ip *ip_hdr;
+       unsigned int tlen;
+       struct tcphdr * tcp_hdr = NULL;
+       unsigned int off = 0;
+
+       if (kipf_count != 0) 
+               return m;
+
+       /* 
+        * Experiments on cellular show that the RTT is much higher  
+        * than the coalescing time of 5 msecs, causing lro to flush
+        * 80% of the time on a single packet. Increasing 
+        * coalescing time for cellular does not show marked 
+        * improvement to throughput either. Loopback perf is hurt
+        * by the 5 msec latency and it already sends large packets.
+        */
+       if ((m->m_pkthdr.rcvif->if_type == IFT_CELLULAR) ||
+               (m->m_pkthdr.rcvif->if_type == IFT_LOOP)) {
+               return m;
+       }
+
+       ip_hdr = mtod(m, struct ip*);
+
+       /* only TCP is coalesced */
+       if (ip_hdr->ip_p != IPPROTO_TCP) {
+               return m;
+       }
+
+       if (m->m_len < (int32_t) sizeof (struct tcpiphdr)) {
+               if (lrodebug) printf("tcp_lro m_pullup \n");
+               if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+                       tcpstat.tcps_rcvshort++; 
+                       if (lrodebug) {
+                               printf("ip_lro: rcvshort.\n");
+                       }
+                       return NULL;
+               }
+       }
+
+       tcp_hdr = (struct tcphdr *)((caddr_t)ip_hdr + hlen);
+       tlen = ip_hdr->ip_len ; //ignore IP header bytes len
+       m->m_pkthdr.lro_pktlen = tlen; /* Used to return max pkt encountered to tcp */
+       m->m_pkthdr.lro_npkts = 1; /* Initialize a counter to hold num pkts coalesced */
+       off = tcp_hdr->th_off << 2;
+       if (off < sizeof (struct tcphdr) || off > tlen) {
+               tcpstat.tcps_rcvbadoff++; 
+               if (lrodebug) {
+                       printf("ip_lro: TCP off greater than TCP header.\n");
+               }
+               return m;
+       }
+
+       return (tcp_lro_process_pkt(m, ip_hdr, tcp_hdr, hlen + off));
+}
+
+static void
+lro_proto_input(struct mbuf *m)
+{
+       struct ip* ip_hdr = mtod(m, struct ip*);
+
+       if (lrodebug >= 3) {
+               printf("lro_proto_input: ip_len = %d \n", 
+                       ip_hdr->ip_len);
+       }
+       lro_update_stats(m);
+       ip_proto_dispatch_in_wrapper(m, ip_hdr->ip_hl << 2, ip_hdr->ip_p);
+}
+
+static struct mbuf *
+lro_tcp_xsum_validate(struct mbuf *m,  struct ipovly *ipov, struct tcphdr * th)
+{
+
+       struct ip* ip = (struct ip*)ipov;
+       int tlen = ip->ip_len;
+       int len;
+       struct ifnet *ifp = ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) ? 
+                               m->m_pkthdr.rcvif: NULL;
+
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
+       if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+               if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) {
+                       u_short pseudo;
+                       char b[9];
+
+                       bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1));
+                       bzero(ipov->ih_x1, sizeof (ipov->ih_x1));
+                       ipov->ih_len = (u_short)tlen;
+#if BYTE_ORDER != BIG_ENDIAN
+                       HTONS(ipov->ih_len);
+#endif
+                       pseudo = in_cksum(m, sizeof (struct ip));
+                       bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1));
+
+                       th->th_sum = in_addword(pseudo, (m->m_pkthdr.csum_data & 0xFFFF));
+               } else {
+                       if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+                               th->th_sum = m->m_pkthdr.csum_data;
+                       else
+                               th->th_sum = in_pseudo(ip->ip_src.s_addr,
+                                       ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data +
+                                       ip->ip_len + IPPROTO_TCP));
+               }
+               th->th_sum ^= 0xffff;
+       } else {
+               char b[9];
+               /*
+                * Checksum extended TCP header and data.
+                */
+               bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1));
+               bzero(ipov->ih_x1, sizeof (ipov->ih_x1));
+               ipov->ih_len = (u_short)tlen;
+#if BYTE_ORDER != BIG_ENDIAN
+               HTONS(ipov->ih_len);
+#endif
+               len = sizeof (struct ip) + tlen;
+               th->th_sum = in_cksum(m, len);
+               bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1));
+
+               tcp_in_cksum_stats(len);
+       }
+       if (th->th_sum) {
+               tcpstat.tcps_rcvbadsum++;
+               if (ifp != NULL && ifp->if_tcp_stat != NULL) {
+                       atomic_add_64(&ifp->if_tcp_stat->badformat, 1);
+               }
+               if (lrodebug) 
+                       printf("lro_tcp_xsum_validate: bad xsum and drop m = %p.\n",m);
+               m_freem(m);
+               return NULL;
+       }
+       /* revert back the order as IP will look into this again. */
+#if BYTE_ORDER != BIG_ENDIAN
+       NTOHS(ipov->ih_len);
+#endif
+       return m;
+}
+
+/*
+ * When TCP detects a stable, steady flow without out of ordering, 
+ * with a sufficiently high cwnd, it invokes LRO.
+ */
+int
+tcp_start_coalescing(struct ip *ip_hdr, struct tcphdr *tcp_hdr, int tlen) 
+{
+       int hash;
+       int flow_id;
+       struct mbuf *eject_mb;
+       struct lro_flow *lf;
+
+       hash = LRO_HASH(ip_hdr->ip_src.s_addr, ip_hdr->ip_dst.s_addr, 
+               tcp_hdr->th_sport, tcp_hdr->th_dport,
+               (TCP_LRO_FLOW_MAP - 1));
+
+       
+       lck_mtx_lock_spin(&tcp_lro_lock);
+       flow_id = lro_flow_map[hash];
+       if (flow_id != TCP_LRO_FLOW_NOTFOUND) {
+               lf = &lro_flow_list[flow_id];
+               if ((lf->lr_faddr.s_addr == ip_hdr->ip_src.s_addr) &&
+                   (lf->lr_laddr.s_addr == ip_hdr->ip_dst.s_addr) &&
+                   (lf->lr_fport == tcp_hdr->th_sport) &&
+                   (lf->lr_lport == tcp_hdr->th_dport)) {
+                       if ((lf->lr_tcphdr == NULL) &&
+                               (lf->lr_seq != (tcp_hdr->th_seq + tlen))) {
+                               lf->lr_seq = tcp_hdr->th_seq + tlen;
+                       }       
+                       lf->lr_flags &= ~LRO_EJECT_REQ;
+               }
+               lck_mtx_unlock(&tcp_lro_lock); 
+               return 0;
+       }
+
+       HTONL(tcp_hdr->th_seq);
+       HTONL(tcp_hdr->th_ack);
+       eject_mb = 
+               tcp_lro_insert_flow(NULL, ip_hdr, tcp_hdr, tlen, 0, hash, 
+               NULL, NULL, NULL);
+
+       lck_mtx_unlock(&tcp_lro_lock);
+
+       NTOHL(tcp_hdr->th_seq);
+       NTOHL(tcp_hdr->th_ack);
+       if (lrodebug >= 3) {
+               printf("%s: src = %x dst = %x sport = %d dport = %d seq %x \n",
+                       __func__, ip_hdr->ip_src.s_addr, ip_hdr->ip_dst.s_addr,
+                       tcp_hdr->th_sport, tcp_hdr->th_dport, tcp_hdr->th_seq);
+       }
+       ASSERT(eject_mb == NULL);
+       return 0;
+}
+
+/*
+ * When TCP detects loss or idle condition, it stops offloading
+ * to LRO. 
+ */
+int
+tcp_lro_remove_state(struct in_addr saddr, struct in_addr daddr, 
+               unsigned short sport, unsigned short dport)
+{
+       int hash, flow_id;
+       struct lro_flow *lf;
+
+       hash = LRO_HASH(daddr.s_addr, saddr.s_addr, dport, sport,
+               (TCP_LRO_FLOW_MAP - 1));
+       lck_mtx_lock_spin(&tcp_lro_lock);
+       flow_id = lro_flow_map[hash];
+       if (flow_id == TCP_LRO_FLOW_UNINIT) {
+               lck_mtx_unlock(&tcp_lro_lock);
+               return 0;
+       }
+       lf = &lro_flow_list[flow_id];
+       if ((lf->lr_faddr.s_addr == daddr.s_addr) && 
+           (lf->lr_laddr.s_addr == saddr.s_addr) &&
+           (lf->lr_fport == dport) &&
+           (lf->lr_lport == sport)) {
+               if (lrodebug) {
+                       printf("%s: %x %x\n", __func__, 
+                               lf->lr_flags, lf->lr_seq);
+               }
+               lf->lr_flags |= LRO_EJECT_REQ;
+       }
+       lck_mtx_unlock(&tcp_lro_lock);
+       return 0;
+}
+
+void
+tcp_update_lro_seq(__uint32_t rcv_nxt, struct in_addr saddr, struct in_addr daddr,
+               unsigned short sport, unsigned short dport)
+{
+       int hash, flow_id;
+       struct lro_flow *lf;
+
+       hash = LRO_HASH(daddr.s_addr, saddr.s_addr, dport, sport, 
+               (TCP_LRO_FLOW_MAP - 1));
+       lck_mtx_lock_spin(&tcp_lro_lock);
+       flow_id = lro_flow_map[hash];
+       if (flow_id == TCP_LRO_FLOW_UNINIT) {
+               lck_mtx_unlock(&tcp_lro_lock);
+               return;
+       }
+       lf = &lro_flow_list[flow_id];
+       if ((lf->lr_faddr.s_addr == daddr.s_addr) &&
+           (lf->lr_laddr.s_addr == saddr.s_addr) &&
+           (lf->lr_fport == dport) &&
+           (lf->lr_lport == sport) &&
+           (lf->lr_tcphdr == NULL)) {
+               lf->lr_seq = (tcp_seq)rcv_nxt;
+       }
+       lck_mtx_unlock(&tcp_lro_lock);
+       return;
+}
+
+static void
+lro_update_stats(struct mbuf *m)
+{
+       switch(m->m_pkthdr.lro_npkts) {
+       case 0: /* fall through */
+       case 1: 
+               break;
+       
+       case 2: 
+               tcpstat.tcps_lro_twopack++;
+               break;
+       
+       case 3: /* fall through */
+       case 4:
+               tcpstat.tcps_lro_multpack++;
+               break;
+       
+       default: 
+               tcpstat.tcps_lro_largepack++;
+               break;
+       }
+       return;
+}
+
+static void
+lro_update_flush_stats(struct mbuf *m)
+{
+       lro_flushes++;
+       switch(m->m_pkthdr.lro_npkts) {
+       case 0: ASSERT(0);
+       case 1: lro_single_flushes++;
+               break;
+       case 2: lro_double_flushes++;
+               break;
+       default: lro_good_flushes++;
+               break;
+       }
+       return;
+}
diff --git a/bsd/netinet/tcp_lro.h b/bsd/netinet/tcp_lro.h
new file mode 100644 (file)
index 0000000..9f1fe01
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef TCP_LRO_H_
+#define TCP_LRO_H_
+
+#ifdef BSD_KERNEL_PRIVATE
+
+#define TCP_LRO_NUM_FLOWS (16) /* must be <= 255 for char lro_flow_map */
+#define TCP_LRO_FLOW_MAP  (1024)
+
+struct lro_flow {
+       struct mbuf             *lr_mhead;      /* coalesced mbuf chain head */
+       struct mbuf             *lr_mtail;      /* coalesced mbuf chain tail */
+       struct tcphdr           *lr_tcphdr;     /* ptr to TCP hdr in frame */
+       u_int32_t               *lr_tsval;      /* address of tsval in frame */
+       u_int32_t               *lr_tsecr;      /* tsecr field in TCP header */
+       tcp_seq                 lr_seq;         /* next expected seq num */
+       unsigned int            lr_len;         /* length of LRO frame */
+       struct in_addr          lr_faddr;       /* foreign address */
+       struct in_addr          lr_laddr;       /* local address */
+       unsigned short int      lr_fport;       /* foreign port */
+       unsigned short int      lr_lport;       /* local port */
+       u_int32_t               lr_timestamp;   /* for ejecting the flow */
+       unsigned short int      lr_hash_map;    /* back pointer to hash map */
+       unsigned short int      lr_flags;       /* pad */
+} __attribute__((aligned(8)));
+
+/* lr_flags - only 16 bits available */
+#define LRO_EJECT_REQ  0x1 
+
+
+#define TCP_LRO_FLOW_UNINIT TCP_LRO_NUM_FLOWS+1
+#define TCP_LRO_FLOW_NOTFOUND TCP_LRO_FLOW_UNINIT
+
+/* Max packets to be coalesced before pushing to app */
+#define LRO_MX_COALESCE_PKTS (8)
+
+/*
+ * Min num of bytes in a packet to trigger coalescing
+ */
+#define LRO_MIN_COALESC_SZ  (1300)
+
+/*
+ * Max amount of time to wait before flushing flows in msecs.
+ * Units are in msecs. 
+ * This number has been carefully chosen and should be altered with care.
+ */
+#define LRO_MX_TIME_TO_BUFFER 10
+
+/* similar to INP_PCBHASH */
+#define LRO_HASH(faddr, laddr, fport, lport, mask) \
+       (((faddr) ^ ((laddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
+#endif
+
+#endif /* TCP_LRO_H_ */
index 5c9db2de9c4b336b5987c0bdb5d76340b7525379..8d256db71ea6be68fe20fd13e6416dc831461b81 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the University of
+ *      California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.16 2001/08/22 00:59:12 silby Exp $
+ */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
+#include <sys/socketvar.h>
 
 #include <net/route.h>
 #include <netinet/in.h>
@@ -52,7 +88,7 @@ int tcp_newreno_cleanup(struct tcpcb *tp);
 void tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp);
 void tcp_newreno_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
 void tcp_newreno_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
-void tcp_newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th);
+void tcp_newreno_pre_fr(struct tcpcb *tp);
 void tcp_newreno_post_fr(struct tcpcb *tp, struct tcphdr *th);
 void tcp_newreno_after_idle(struct tcpcb *tp);
 void tcp_newreno_after_timeout(struct tcpcb *tp);
@@ -77,6 +113,43 @@ struct tcp_cc_algo tcp_cc_newreno = {
 extern int tcp_do_rfc3465;
 extern int tcp_do_rfc3465_lim2;
 extern int maxseg_unacked;
+extern u_int32_t tcp_autosndbuf_max;
+
+#define SET_SNDSB_IDEAL_SIZE(sndsb, size) \
+       sndsb->sb_idealsize = min(max(tcp_sendspace, tp->snd_ssthresh), \
+               tcp_autosndbuf_max); 
+
+void tcp_cc_resize_sndbuf(struct tcpcb *tp) {
+       struct sockbuf *sb;
+       /* If the send socket buffer size is bigger than ssthresh,
+        * it is time to trim it because we do not want to hold
+        * too many mbufs in the socket buffer
+        */
+       sb = &(tp->t_inpcb->inp_socket->so_snd);
+       if (sb->sb_hiwat > tp->snd_ssthresh &&
+               (sb->sb_flags & SB_AUTOSIZE) != 0) {
+               if (sb->sb_idealsize > tp->snd_ssthresh) {
+                       SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh);
+               }
+               sb->sb_flags |= SB_TRIM;
+       }
+}
+
+void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp) {
+       struct sockbuf *sb;
+       sb = &(tp->t_inpcb->inp_socket->so_snd);
+       if ((sb->sb_flags & (SB_TRIM|SB_AUTOSIZE)) == (SB_TRIM|SB_AUTOSIZE)) {
+               /* If there was a retransmission that was not necessary 
+                * then the size of socket buffer can be restored to
+                * what it was before
+                */
+               SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh);
+               if (sb->sb_hiwat <= sb->sb_idealsize) {
+                       sbreserve(sb, sb->sb_idealsize);
+                       sb->sb_flags &= ~SB_TRIM;
+               }
+       }
+}
 
 int tcp_newreno_init(struct tcpcb *tp) {
 #pragma unused(tp)
@@ -202,8 +275,7 @@ tcp_newreno_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) {
 }
 
 void
-tcp_newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th) {
-#pragma unused(th)
+tcp_newreno_pre_fr(struct tcpcb *tp) {
 
        uint32_t win;
 
@@ -212,6 +284,8 @@ tcp_newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th) {
        if ( win < 2 )
                win = 2;
        tp->snd_ssthresh = win * tp->t_maxseg; 
+       tcp_cc_resize_sndbuf(tp);
+
 }
 
 void
@@ -273,6 +347,8 @@ tcp_newreno_after_timeout(struct tcpcb *tp) {
                tp->snd_ssthresh = win * tp->t_maxseg;
                tp->t_bytes_acked = 0;
                tp->t_dupacks = 0;
+
+               tcp_cc_resize_sndbuf(tp);
        }
 }
 
@@ -302,15 +378,15 @@ tcp_newreno_delay_ack(struct tcpcb *tp, struct tcphdr *th) {
        case 2:
                if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
                        (th->th_flags & TH_PUSH) == 0 &&
-                       (tp->t_flags & TF_DELACK) == 0)
+                       (tp->t_unacksegs == 1))
                        return(1);
                break;
        case 3:
                if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
                        (th->th_flags & TH_PUSH) == 0 &&
-                       ((tp->t_unacksegs == 0) ||
+                       ((tp->t_unacksegs == 1) ||
                        ((tp->t_flags & TF_STRETCHACK) != 0 &&
-                       tp->t_unacksegs < (maxseg_unacked - 1))))
+                       tp->t_unacksegs < (maxseg_unacked))))
                        return(1);
                break;
        }
index 5c310770d023a26f3122bc0e90fd72c15f2478f3..8a9eeb9cf07bf4a5ca08dc405f66ce1dcde8a745 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -83,6 +83,8 @@
 #include <net/route.h>
 #include <net/ntstat.h>
 #include <net/if_var.h>
+#include <net/if.h>
+#include <net/if_types.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <security/mac_framework.h>
 #endif /* MAC_SOCKET */
 
+#include <netinet/lro_ext.h>
+
 #define DBG_LAYER_BEG          NETDBG_CODE(DBG_NETTCP, 1)
 #define DBG_LAYER_END          NETDBG_CODE(DBG_NETTCP, 3)
 #define DBG_FNC_TCP_OUTPUT     NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1)
@@ -171,6 +175,22 @@ int tcp_acc_iaj_react_limit = ACC_IAJ_REACT_LIMIT;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_react_limit, CTLFLAG_RW | CTLFLAG_LOCKED,
         &tcp_acc_iaj_react_limit, 1, "Accumulated IAJ when receiver starts to react");
 
+uint32_t tcp_do_autosendbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautosndbuf, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_do_autosendbuf, 1, "Enable send socket buffer auto-tuning");
+
+uint32_t tcp_autosndbuf_inc = 8 * 1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, autosndbufinc, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_autosndbuf_inc, 1, "Increment in send socket bufffer size");
+
+uint32_t tcp_autosndbuf_max = 512 * 1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, autosndbufmax, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_autosndbuf_max, 1, "Maximum send socket buffer size");
+
+uint32_t tcp_prioritize_acks = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, ack_prioritize, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_prioritize_acks, 1, "Prioritize pure acks");
+
 static int32_t packchain_newlist = 0;
 static int32_t packchain_looped = 0;
 static int32_t packchain_sent = 0;
@@ -193,11 +213,13 @@ extern int                ip_use_randomid;
 extern u_int32_t dlil_filter_count;
 extern u_int32_t kipf_count;
 extern int tcp_recv_bg;
+extern int maxseg_unacked;
 
 static int tcp_ip_output(struct socket *, struct tcpcb *, struct mbuf *, int,
-    struct mbuf *, int, int, int32_t);
+    struct mbuf *, int, int, int32_t, boolean_t);
 
-static inline int is_tcp_recv_bg(struct socket *so);
+extern uint32_t get_base_rtt(struct tcpcb *tp);
+static struct mbuf* tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th);
 
 static __inline__ u_int16_t
 get_socket_id(struct socket * s)
@@ -214,12 +236,6 @@ get_socket_id(struct socket * s)
        return (val);
 }
 
-static inline int
-is_tcp_recv_bg(struct socket *so)
-{
-       return (so->so_traffic_mgt_flags & TRAFFIC_MGT_TCP_RECVBG);
-}
-
 /*
  * Tcp output routine: figure out what should be sent and send it.
  *
@@ -237,7 +253,14 @@ is_tcp_recv_bg(struct socket *so)
  *     ip_output_list:EMSGSIZE
  *     ip_output_list:ENOBUFS
  *     ip_output_list:???              [ignorable: mostly IPSEC/firewall/DLIL]
- *     ip6_output:???                  [IPV6 only]
+ *     ip6_output_list:EINVAL
+ *     ip6_output_list:EOPNOTSUPP
+ *     ip6_output_list:EHOSTUNREACH
+ *     ip6_output_list:EADDRNOTAVAIL
+ *     ip6_output_list:ENETUNREACH
+ *     ip6_output_list:EMSGSIZE
+ *     ip6_output_list:ENOBUFS
+ *     ip6_output_list:???             [ignorable: mostly IPSEC/firewall/DLIL]
  */
 int
 tcp_output(struct tcpcb *tp)
@@ -271,12 +294,15 @@ tcp_output(struct tcpcb *tp)
        struct mbuf *tp_inp_options = tp->t_inpcb->inp_depend4.inp4_options;
 #if INET6
        int isipv6 = tp->t_inpcb->inp_vflag & INP_IPV6 ;
-       struct ip6_pktopts *inp6_pktopts = tp->t_inpcb->inp_depend6.inp6_outputopts;
 #endif
        short packchain_listadd = 0;
        u_int16_t       socket_id = get_socket_id(so);
        int so_options = so->so_options;
        struct rtentry *rt;
+       u_int32_t basertt, svc_flags = 0;
+       u_int32_t lro_ackmore = (tp->t_lropktlen != 0) ? 1 : 0;
+       struct mbuf *mnext = NULL;
+       int sackoptlen = 0;
 
        /*
         * Determine length of data that should be transmitted,
@@ -290,7 +316,7 @@ tcp_output(struct tcpcb *tp)
         * will take care of wrap around of tcp_now
         */
        idle_time = tcp_now - tp->t_rcvtime;
-       if (idle && idle_time >= tp->t_rxtcur) {
+       if (idle && idle_time >= TCP_IDLETIMEOUT(tp)) {
                if (CC_ALGO(tp)->after_idle != NULL) 
                        CC_ALGO(tp)->after_idle(tp);
                DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb,
@@ -309,14 +335,12 @@ again:
 
 #if INET6
        if (isipv6) {
-       
                KERNEL_DEBUG(DBG_LAYER_BEG,
                     ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
                     (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) |
                      (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)),
                     sendalot,0,0);
-       }
-       else
+       } else
 #endif
 
        {
@@ -325,6 +349,7 @@ again:
                     (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) |
                      (tp->t_inpcb->inp_faddr.s_addr & 0xffff)),
                     sendalot,0,0);
+       }
        /*
         * If the route generation id changed, we need to check that our
         * local (source) IP address is still valid. If it isn't either
@@ -335,7 +360,9 @@ again:
        if (rt != NULL && (!(rt->rt_flags & RTF_UP) ||
            rt->generation_id != route_generation)) {
                struct ifnet *ifp;
-               struct in_ifaddr *ia;
+               struct in_ifaddr *ia = NULL;
+               struct in6_ifaddr *ia6 = NULL;
+               int found_srcaddr = 0;
 
                /* disable multipages at the socket */
                somultipages(so, FALSE);
@@ -343,8 +370,21 @@ again:
                /* Disable TSO for the socket until we know more */
                tp->t_flags &= ~TF_TSO;
 
+               if (isipv6) {
+                       ia6 = ifa_foraddr6(&tp->t_inpcb->in6p_laddr);
+                       if (ia6 != NULL)
+                               found_srcaddr = 1;
+               } else {
+                       ia = ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr);
+                       if (ia != NULL)
+                               found_srcaddr = 1;
+               }
+
                /* check that the source address is still valid */
-               if ((ia = ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr)) == NULL) {
+               if (found_srcaddr == 0) {
+
+                       soevent(so,
+                           (SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR));
 
                        if (tp->t_state >= TCPS_CLOSE_WAIT) {
                                tcp_drop(tp, EADDRNOTAVAIL);
@@ -380,7 +420,11 @@ again:
                                return(0); /* silently ignore, keep data in socket: address may be back */
                        }
                }
-               IFA_REMREF(&ia->ia_ifa);
+               if (ia != NULL)
+                       IFA_REMREF(&ia->ia_ifa);
+
+               if (ia6 != NULL)
+                       IFA_REMREF(&ia6->ia_ifa);
 
                /*
                 * Address is still valid; check for multipages capability
@@ -408,7 +452,6 @@ again:
                        tp->t_flags |= TF_PMTUD;
 
                RT_UNLOCK(rt);
-        }
        }
 
        /*
@@ -467,8 +510,9 @@ again:
                                /* Can rexmit part of the current hole */
                                len = ((int32_t)min(cwin,
                                                   tp->snd_recover - p->rxmit));
-               } else
+               } else {
                        len = ((int32_t)min(cwin, p->end - p->rxmit));
+               }
                if (len > 0) {
                        off = p->rxmit - tp->snd_una; /* update off only if we really transmit SACK data */
                        sack_rxmit = 1;
@@ -477,14 +521,16 @@ again:
                        tcpstat.tcps_sack_rexmit_bytes +=
                            min(len, tp->t_maxseg);
                        if (nstat_collect) {
-                               nstat_route_tx(tp->t_inpcb->inp_route.ro_rt, 1, min(len, tp->t_maxseg), NSTAT_TX_FLAG_RETRANSMIT);
+                               nstat_route_tx(tp->t_inpcb->inp_route.ro_rt, 1, 
+                                       min(len, tp->t_maxseg), NSTAT_TX_FLAG_RETRANSMIT);
                                locked_add_64(&tp->t_inpcb->inp_stat->txpackets, 1);
-                               locked_add_64(&tp->t_inpcb->inp_stat->txbytes, min(len, tp->t_maxseg));
+                               locked_add_64(&tp->t_inpcb->inp_stat->txbytes, 
+                                       min(len, tp->t_maxseg));
                                tp->t_stat.txretransmitbytes += min(len, tp->t_maxseg);
                        }
-               }
-               else 
+               } else {
                        len = 0;
+               }
        }
 after_sack_rexmit:
        /*
@@ -589,23 +635,31 @@ after_sack_rexmit:
                flags &= ~TH_SYN;
                off--, len++;
                if (len > 0 && tp->t_state == TCPS_SYN_SENT) {
-                       while (!(tp->t_flags & TF_SENDINPROG) &&
-                           tp->t_pktlist_head != NULL) {
+                       while (tp->t_inpcb->inp_sndinprog_cnt == 0 &&
+                               tp->t_pktlist_head != NULL) {
                                packetlist = tp->t_pktlist_head;
                                packchain_listadd = tp->t_lastchain;
                                packchain_sent++;
                                TCP_PKTLIST_CLEAR(tp);
-                               tp->t_flags |= TF_SENDINPROG;
 
                                error = tcp_ip_output(so, tp, packetlist,
                                    packchain_listadd, tp_inp_options,
-                                   (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0)), 0);
+                                   (so_options & SO_DONTROUTE),
+                                   (sack_rxmit | (sack_bytes_rxmt != 0)), 0,
+#ifdef INET6
+                                   isipv6);
+#else
+                                   0);
+#endif
+
 
-                               tp->t_flags &= ~TF_SENDINPROG;
                        }
-                       /* tcp was closed while we were in ip; resume close */
-                       if ((tp->t_flags &
-                           (TF_CLOSING|TF_SENDINPROG)) == TF_CLOSING) {
+                       /*
+                        * tcp was closed while we were in ip,
+                        * resume close 
+                        */
+                       if (tp->t_inpcb->inp_sndinprog_cnt == 0 &&
+                               (tp->t_flags & TF_CLOSING)) {
                                tp->t_flags &= ~TF_CLOSING;
                                (void) tcp_close(tp);
                        } else {
@@ -613,7 +667,7 @@ after_sack_rexmit:
                        }
                        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,
                            0,0,0,0,0);
-                       return 0;
+                       return(0);
                }
        }
 
@@ -654,6 +708,46 @@ after_sack_rexmit:
                }
        }
 
+       /* Automatic sizing of send socket buffer. Increase the send socket buffer
+        * size if all of the following criteria are met
+        *      1. the receiver has enough buffer space for this data
+        *      2. send buffer is filled to 7/8th with data (so we actually
+        *         have data to make use of it);
+        *      3. our send window (slow start and congestion controlled) is
+        *         larger than sent but unacknowledged data in send buffer.
+        */
+       basertt = get_base_rtt(tp);
+       if (tcp_do_autosendbuf == 1 &&
+           !INP_WAIT_FOR_IF_FEEDBACK(tp->t_inpcb) && !IN_FASTRECOVERY(tp) &&
+           (so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE &&
+           tcp_cansbgrow(&so->so_snd)) {
+               if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
+                       so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
+                       sendwin >= (so->so_snd.sb_cc - 
+                               (tp->snd_nxt - tp->snd_una))) {
+                       /* Also increase the send buffer only if the 
+                        * round-trip time is not increasing because we do
+                        * not want to contribute to latency by filling buffers.
+                        * We also do not want to hold onto application's
+                        * old data for too long. Interactive applications would
+                        * rather discard old data.
+                        */
+                       if (tp->t_rttcur <= 
+                               (basertt + 25)) {
+                               if (sbreserve(&so->so_snd, 
+                                       min(so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
+                                       tcp_autosndbuf_max)) == 1) {
+                                       so->so_snd.sb_idealsize = so->so_snd.sb_hiwat;
+                               }
+                       } else {
+                               so->so_snd.sb_idealsize = 
+                                   max(tcp_sendspace, so->so_snd.sb_hiwat -
+                                       (2 * tcp_autosndbuf_inc));
+                               so->so_snd.sb_flags |= SB_TRIM;
+                       }
+               }
+       }
+
        /*
         * Truncate to the maximum segment length or enable TCP Segmentation
         * Offloading (if supported by hardware) and ensure that FIN is removed
@@ -717,47 +811,57 @@ after_sack_rexmit:
         * Sender silly window avoidance.   We transmit under the following
         * conditions when len is non-zero:
         *
+        *      - we've timed out (e.g. persist timer)
+        *      - we need to retransmit
         *      - We have a full segment (or more with TSO)
         *      - This is the last buffer in a write()/send() and we are
         *        either idle or running NODELAY
-        *      - we've timed out (e.g. persist timer)
         *      - we have more then 1/2 the maximum send window's worth of
         *        data (receiver may be limited the window size)
-        *      - we need to retransmit
         */
        if (len) {
-               if (len >= tp->t_maxseg) {
-                       tp->t_flags |= TF_MAXSEGSNT;
-                       goto send;
-               }
-               if (!(tp->t_flags & TF_MORETOCOME) &&
-                   (idle || tp->t_flags & TF_NODELAY || tp->t_flags & TF_MAXSEGSNT) &&
-                   (tp->t_flags & TF_NOPUSH) == 0 &&
-                   len + off >= so->so_snd.sb_cc) {
-                       tp->t_flags &= ~TF_MAXSEGSNT;
-                       goto send;
-               }
                if (tp->t_force) {
                        tp->t_flags &= ~TF_MAXSEGSNT;
                        goto send;
                }
-               if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) {
-                       tp->t_flags &= ~TF_MAXSEGSNT;
-                       goto send;
-               }
                if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
                        tp->t_flags &= ~TF_MAXSEGSNT;
                        goto send;
                }
                if (sack_rxmit)
                        goto send;
+
+               /*
+                * Send new data on the connection only if it is
+                * not flow controlled
+                */
+               if (!INP_WAIT_FOR_IF_FEEDBACK(tp->t_inpcb) ||
+                   tp->t_state != TCPS_ESTABLISHED) {
+                       if (len >= tp->t_maxseg) {
+                               tp->t_flags |= TF_MAXSEGSNT;
+                               goto send;
+                       }
+                       if (!(tp->t_flags & TF_MORETOCOME) &&
+                           (idle || tp->t_flags & TF_NODELAY || tp->t_flags & TF_MAXSEGSNT) &&
+                           (tp->t_flags & TF_NOPUSH) == 0 &&
+                           len + off >= so->so_snd.sb_cc) {
+                               tp->t_flags &= ~TF_MAXSEGSNT;
+                               goto send;
+                       }
+                       if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) {
+                               tp->t_flags &= ~TF_MAXSEGSNT;
+                               goto send;
+                       }
+               } else {
+                       tcpstat.tcps_fcholdpacket++;
+               }
        }
 
        /*
         * Compare available window to amount of window
         * known to peer (as advertised window less
         * next expected input).  If the difference is at least two
-        * max size segments, or at least 50% of the maximum possible
+        * max size segments, or at least 25% of the maximum possible
         * window, then want to send a window update to peer.
         * Skip this if the connection is in T/TCP half-open state.
         */
@@ -771,17 +875,31 @@ after_sack_rexmit:
                        (tp->rcv_adv - tp->rcv_nxt);
 
                if (adv >= (int32_t) (2 * tp->t_maxseg)) {
-                       
-                       /* 
-                        * Update only if the resulting scaled value of the window changed, or
+                       /* Update only if the resulting scaled value of the window changed, or
                         * if there is a change in the sequence since the last ack.
                         * This avoids what appears as dupe ACKS (see rdar://5640997)
+                        *
+                        * If streaming is detected avoid sending too many window updates.
+                        * We will depend on the delack timer to send a window update
+                        * when needed.
                         */
-
-                       if ((tp->last_ack_sent != tp->rcv_nxt) || (((recwin + adv) >> tp->rcv_scale) > recwin)) 
+                       if ((tp->t_flags & TF_STRETCHACK) == 0 &&
+                               (tp->last_ack_sent != tp->rcv_nxt || 
+                               ((recwin + adv) >> tp->rcv_scale) > recwin)) {
                                goto send;
+                       }
+
+                       /* Make sure that the delayed ack timer is set if we
+                        * delayed sending a window update because of streaming
+                        * detection.
+                        */
+                       if ((tp->t_flags & TF_STRETCHACK) != 0 &&
+                               (tp->t_flags & TF_DELACK) == 0) { 
+                               tp->t_flags |= TF_DELACK;
+                               tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
+                       }
                }
-               if (2 * adv >= (int32_t) so->so_rcv.sb_hiwat) 
+               if (4 * adv >= (int32_t) so->so_rcv.sb_hiwat) 
                                goto send;
        }
 
@@ -808,8 +926,9 @@ after_sack_rexmit:
         * after the retransmission timer has been turned off.  Make sure
         * that the retransmission timer is set.
         */
-       if (tp->sack_enable && (tp->t_state >= TCPS_ESTABLISHED) && SEQ_GT(tp->snd_max, tp->snd_una) &&
-               tp->t_timer[TCPT_REXMT] == 0 &&
+       if (tp->sack_enable && (tp->t_state >= TCPS_ESTABLISHED) && 
+           SEQ_GT(tp->snd_max, tp->snd_una) &&
+           tp->t_timer[TCPT_REXMT] == 0 &&
            tp->t_timer[TCPT_PERSIST] == 0) {
                        tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
                        goto just_return;
@@ -847,20 +966,25 @@ just_return:
         * If there is no reason to send a segment, just return.
         * but if there is some packets left in the packet list, send them now.
         */
-       while (!(tp->t_flags & TF_SENDINPROG) && tp->t_pktlist_head != NULL) {
+       while (tp->t_inpcb->inp_sndinprog_cnt == 0 &&
+               tp->t_pktlist_head != NULL) {
                packetlist = tp->t_pktlist_head;
                packchain_listadd = tp->t_lastchain;
                packchain_sent++;
                TCP_PKTLIST_CLEAR(tp);
-               tp->t_flags |= TF_SENDINPROG;
 
                error = tcp_ip_output(so, tp, packetlist, packchain_listadd,
-                   tp_inp_options, (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0)), recwin);
-
-               tp->t_flags &= ~TF_SENDINPROG;
+                   tp_inp_options, (so_options & SO_DONTROUTE),
+                   (sack_rxmit | (sack_bytes_rxmt != 0)), recwin,
+#ifdef INET6
+                   isipv6);
+#else
+                   0);
+#endif
        }
        /* tcp was closed while we were in ip; resume close */
-       if ((tp->t_flags & (TF_CLOSING|TF_SENDINPROG)) == TF_CLOSING) {
+       if (tp->t_inpcb->inp_sndinprog_cnt == 0 &&
+               (tp->t_flags & TF_CLOSING)) {
                tp->t_flags &= ~TF_CLOSING;
                (void) tcp_close(tp);
        } else {
@@ -884,7 +1008,7 @@ send:
                hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
        else
 #endif
-       hdrlen = sizeof (struct tcpiphdr);
+               hdrlen = sizeof (struct tcpiphdr);
        if (flags & TH_SYN) {
                tp->snd_nxt = tp->iss;
                if ((tp->t_flags & TF_NOOPT) == 0) {
@@ -899,7 +1023,7 @@ send:
                        if ((tp->t_flags & TF_REQ_SCALE) &&
                            ((flags & TH_ACK) == 0 ||
                            (tp->t_flags & TF_RCVD_SCALE))) {
-                               *((u_int32_t *)(opt + optlen)) = htonl(
+                               *((u_int32_t *)(void *)(opt + optlen)) = htonl(
                                        TCPOPT_NOP << 24 |
                                        TCPOPT_WINDOW << 16 |
                                        TCPOLEN_WINDOW << 8 |
@@ -980,7 +1104,7 @@ send:
         * the CWR flag on data packets. Pure acks don't have this set.
         */
        if ((tp->ecn_flags & TE_SENDCWR) != 0 && len != 0 &&
-               !SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+               !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) {
                flags |= TH_CWR;
                tp->ecn_flags &= ~TE_SENDCWR;
        }
@@ -1001,7 +1125,7 @@ send:
            (flags & TH_RST) == 0 &&
            ((flags & TH_ACK) == 0 ||
             (tp->t_flags & TF_RCVD_TSTMP))) {
-               u_int32_t *lp = (u_int32_t *)(opt + optlen);
+               u_int32_t *lp = (u_int32_t *)(void *)(opt + optlen);
 
                /* Form timestamp option as shown in appendix A of RFC 1323. */
                *lp++ = htonl(TCPOPT_TSTAMP_HDR);
@@ -1010,6 +1134,10 @@ send:
                optlen += TCPOLEN_TSTAMP_APPA;
        }
 
+       /* Note the timestamp for receive buffer autosizing */
+       if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
+               tp->rfbuf_ts = tcp_now;
+
        if (tp->sack_enable && ((tp->t_flags & TF_NOOPT) == 0)) {
                /* 
                 * Tack on the SACK permitted option *last*.
@@ -1051,7 +1179,7 @@ send:
                if (TCPS_HAVEESTABLISHED(tp->t_state) &&
                    (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 &&
                    MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) {
-                       int nsack, sackoptlen, padlen;
+                       int nsack, padlen;
                        u_char *bp = (u_char *)opt + optlen;
                        u_int32_t *lp;
 
@@ -1072,7 +1200,7 @@ send:
                        tcpstat.tcps_sack_send_blocks++;
                        *bp++ = TCPOPT_SACK;
                        *bp++ = sackoptlen;
-                       lp = (u_int32_t *)bp;
+                       lp = (u_int32_t *)(void *)bp;
                        for (i = 0; i < nsack; i++) {
                                struct sackblk sack = tp->sackblks[i];
                                *lp++ = htonl(sack.start);
@@ -1157,6 +1285,23 @@ send:
 #endif
 /*#endif*/
 
+       /* Check if there is enough data in the send socket
+        * buffer to start measuring bw 
+        */
+       if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
+               (tp->t_bwmeas != NULL) &&
+               (tp->t_flagsext & TF_BWMEAS_INPROGRESS) == 0 &&
+               (so->so_snd.sb_cc - (tp->snd_max - tp->snd_una)) >= 
+                       tp->t_bwmeas->bw_minsize) {
+               tp->t_bwmeas->bw_size = min((so->so_snd.sb_cc - (tp->snd_max - tp->snd_una)),
+                       tp->t_bwmeas->bw_maxsize);
+               tp->t_flagsext |= TF_BWMEAS_INPROGRESS;
+               tp->t_bwmeas->bw_start = tp->snd_max;
+               tp->t_bwmeas->bw_ts = tcp_now;
+       }
+
+       VERIFY(tp->t_inpcb->inp_flowhash != 0);
+       
        /*
         * Grab a header mbuf, attaching a copy of data to
         * be transmitted, and initialize the header from
@@ -1315,7 +1460,7 @@ send:
                        MH_ALIGN(m, hdrlen);
                } else
 #endif
-               m->m_data += max_linkhdr;
+                       m->m_data += max_linkhdr;
                m->m_len = hdrlen;
        }
        m->m_pkthdr.rcvif = 0;
@@ -1325,22 +1470,23 @@ send:
 #if INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
-               th = (struct tcphdr *)(ip6 + 1);
+               th = (struct tcphdr *)(void *)(ip6 + 1);
                tcp_fillheaders(tp, ip6, th);
                if ((tp->ecn_flags & TE_SENDIPECT) != 0 && len &&
-                       !SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+                       !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) {
                        ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
                }
+               svc_flags |= PKT_SCF_IPV6;
        } else
 #endif /* INET6 */
        {
                ip = mtod(m, struct ip *);
                ipov = (struct ipovly *)ip;
-               th = (struct tcphdr *)(ip + 1);
+               th = (struct tcphdr *)(void *)(ip + 1);
                /* this picks up the pseudo header (w/o the length) */
                tcp_fillheaders(tp, ip, th);
                if ((tp->ecn_flags & TE_SENDIPECT) != 0 && len &&
-                       !SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+                       !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) {
                        ip->ip_tos = IPTOS_ECN_ECT0;
                }
        }
@@ -1350,7 +1496,7 @@ send:
         * window for use in delaying messages about window sizes.
         * If resending a FIN, be sure not to use a new sequence number.
         */
-       if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
+       if (flags & TH_FIN && (tp->t_flags & TF_SENTFIN) &&
            tp->snd_nxt == tp->snd_max)
                tp->snd_nxt--;
        /*
@@ -1398,7 +1544,7 @@ send:
        }
 
 #if TRAFFIC_MGT
-       if (tcp_recv_bg == 1  || is_tcp_recv_bg(so)) {
+       if (tcp_recv_bg == 1  || IS_TCP_RECV_BG(so)) {
                if (tp->acc_iaj > tcp_acc_iaj_react_limit) {
                        uint32_t min_iaj_win = tcp_min_iaj_win * tp->t_maxseg;
                        if (tp->iaj_rwintop == 0 ||
@@ -1567,21 +1713,12 @@ timer:
         * to handle ttl and tos; we could keep them in
         * the template, but need a way to checksum without them.
         */
+#ifdef INET6
        /*
         * m->m_pkthdr.len should have been set before cksum calcuration,
         * because in6_cksum() need it.
         */
-#if INET6
        if (isipv6) {
-               struct rtentry *rt6;
-               struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
-               unsigned int outif;
-
-               KERNEL_DEBUG(DBG_LAYER_BEG,
-                    ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
-                    (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) |
-                     (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)),
-                    0,0,0);
                /*
                 * we separately set hoplimit for every segment, since the
                 * user might want to change the value via setsockopt.
@@ -1594,49 +1731,23 @@ timer:
                                               : NULL);
 
                /* TODO: IPv6 IP6TOS_ECT bit on */
-#if IPSEC
-               if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) {
-                       m_freem(m);
-                       error = ENOBUFS;
-                       goto out;
-               }
-#endif /*IPSEC*/
-               m->m_pkthdr.socket_id = socket_id;
-
-               rt6 = tp->t_inpcb->in6p_route.ro_rt;
-               if (rt6 != NULL && rt6->rt_ifp != NULL 
-                       && rt6->rt_ifp != lo_ifp)
-                       set_packet_tclass(m, so, MBUF_TC_UNSPEC, 1);
-
-               DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb, struct ip6_hdr *, ip6,
-                       struct tcpcb *, tp, struct tcphdr *, th);
-
-               if (tp->t_inpcb->inp_flags & INP_BOUND_IF)
-                       ip6oa.ip6oa_boundif = tp->t_inpcb->inp_boundif;
-
-               ip6oa.ip6oa_nocell = (tp->t_inpcb->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
-
-               error = ip6_output(m, inp6_pktopts, &tp->t_inpcb->in6p_route,
-                   (so_options & SO_DONTROUTE) | IPV6_OUTARGS, NULL, NULL,
-                   &ip6oa);
-
-               /* Refresh rt6 as we may have lost the route while in ip6_output() */
-               if ((rt6 = tp->t_inpcb->in6p_route.ro_rt) != NULL && 
-                   (outif = rt6->rt_ifp->if_index) != tp->t_inpcb->in6p_last_outif)
-                       tp->t_inpcb->in6p_last_outif = outif;
+               KERNEL_DEBUG(DBG_LAYER_BEG,
+                   ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
+                   (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) |
+                   (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)),
+                   sendalot,0,0);
        } else
 #endif /* INET6 */
-    {
-       ip->ip_len = m->m_pkthdr.len;
-       ip->ip_ttl = tp->t_inpcb->inp_ip_ttl;   /* XXX */
-       ip->ip_tos |= (tp->t_inpcb->inp_ip_tos & ~IPTOS_ECN_MASK);      /* XXX */
-
-
-       KERNEL_DEBUG(DBG_LAYER_BEG,
-            ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
-            (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) |
-             (tp->t_inpcb->inp_faddr.s_addr & 0xffff)),
-            0,0,0);
+       {
+               ip->ip_len = m->m_pkthdr.len;
+               ip->ip_ttl = tp->t_inpcb->inp_ip_ttl;   /* XXX */
+               ip->ip_tos |= (tp->t_inpcb->inp_ip_tos & ~IPTOS_ECN_MASK);/* XXX */
+               KERNEL_DEBUG(DBG_LAYER_BEG,
+                   ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
+                   (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) |
+                   (tp->t_inpcb->inp_faddr.s_addr & 0xffff)),
+                   0,0,0);
+       }
 
        /*
         * See if we should do MTU discovery.
@@ -1647,9 +1758,11 @@ timer:
         *      4) the MTU is not locked (if it is, then discovery has been
         *         disabled for that route)
         */
-
-       if (path_mtu_discovery && (tp->t_flags & TF_PMTUD))
-               ip->ip_off |= IP_DF;
+#ifdef INET6
+       if (!isipv6)
+#endif
+               if (path_mtu_discovery && (tp->t_flags & TF_PMTUD))
+                       ip->ip_off |= IP_DF;
 
 #if IPSEC
        if (ipsec_bypass == 0)
@@ -1661,18 +1774,50 @@ timer:
         */
        lost = 0;
        m->m_pkthdr.socket_id = socket_id;
+
+       /*
+        * Embed the flow hash in pkt hdr and mark the packet as
+        * capable of flow controlling
+        */
+       m->m_pkthdr.m_flowhash = tp->t_inpcb->inp_flowhash;
+       m->m_pkthdr.m_fhflags |=
+           (PF_TAG_TCP | PF_TAG_FLOWHASH | PF_TAG_FLOWADV);
+
        m->m_nextpkt = NULL;
 
-       if (tp->t_inpcb->inp_route.ro_rt != NULL && 
-               tp->t_inpcb->inp_route.ro_rt->rt_ifp != NULL &&
-               tp->t_inpcb->inp_route.ro_rt->rt_ifp != lo_ifp)
-               set_packet_tclass(m, so, MBUF_TC_UNSPEC, 0);
+       if (tp->t_inpcb->inp_last_outifp != NULL &&
+           tp->t_inpcb->inp_last_outifp != lo_ifp) {
+               /* Hint to prioritize this packet if
+                * 1. if the packet has no data
+                * 2. the interface supports transmit-start model and did 
+                *    not disable ACK prioritization.
+                * 3. Only ACK flag is set.
+                * 4. there is no outstanding data on this connection.
+                */
+               if (tcp_prioritize_acks != 0 && len == 0 &&
+                   (tp->t_inpcb->inp_last_outifp->if_eflags & 
+                       (IFEF_TXSTART | IFEF_NOACKPRI)) == IFEF_TXSTART &&
+                   th->th_flags == TH_ACK && tp->snd_una == tp->snd_max &&
+                   tp->t_timer[TCPT_REXMT] == 0) {
+                       svc_flags |= PKT_SCF_TCP_ACK;
+               }
+               set_packet_service_class(m, so, MBUF_SC_UNSPEC, svc_flags);
+       }
 
        tp->t_pktlist_sentlen += len;
        tp->t_lastchain++;
 
-       DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb,
-               struct ip *, ip, struct tcpcb *, tp, struct tcphdr *, th);
+#ifdef INET6
+       if (isipv6) {
+               DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb,
+                       struct ip6 *, ip6, struct tcpcb *, tp, struct tcphdr *,
+                       th);
+       } else
+#endif
+       {
+               DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb,
+                       struct ip *, ip, struct tcpcb *, tp, struct tcphdr *, th);
+       }
 
        if (tp->t_pktlist_head != NULL) {
                tp->t_pktlist_tail->m_nextpkt = m;
@@ -1682,25 +1827,53 @@ timer:
                tp->t_pktlist_head = tp->t_pktlist_tail = m;
        }
 
+       if ((lro_ackmore) && (!sackoptlen) && (!tp->t_timer[TCPT_PERSIST]) &&
+                       ((th->th_flags & TH_ACK) == TH_ACK) && (!len) &&
+                       (tp->t_state == TCPS_ESTABLISHED)) {
+               /* For a pure ACK, see if you need to send more of them */      
+               mnext = tcp_send_lroacks(tp, m, th);
+               if (mnext) {
+                       tp->t_pktlist_tail->m_nextpkt = mnext;
+                       if (mnext->m_nextpkt == NULL) {
+                               tp->t_pktlist_tail = mnext;
+                               tp->t_lastchain++;
+                       } else {
+                               struct mbuf *tail, *next;
+                               next = mnext->m_nextpkt;
+                               tail = next->m_nextpkt;
+                               while (tail) {
+                                       next = tail;
+                                       tail = tail->m_nextpkt;
+                                       tp->t_lastchain++;
+                               }
+                               tp->t_pktlist_tail = next;
+                       }
+               }
+       }
+
        if (sendalot == 0 || (tp->t_state != TCPS_ESTABLISHED) ||
              (tp->snd_cwnd <= (tp->snd_wnd / 8)) ||
              (tp->t_flags & (TH_PUSH | TF_ACKNOW)) || tp->t_force != 0 ||
              tp->t_lastchain >= tcp_packet_chaining) {
                error = 0;
-               while (!(tp->t_flags & TF_SENDINPROG) &&
-                   tp->t_pktlist_head != NULL) {
+               while (tp->t_inpcb->inp_sndinprog_cnt == 0 &&
+                       tp->t_pktlist_head != NULL) {
                        packetlist = tp->t_pktlist_head;
                        packchain_listadd = tp->t_lastchain;
                        packchain_sent++;
                        lost = tp->t_pktlist_sentlen;
                        TCP_PKTLIST_CLEAR(tp);
-                       tp->t_flags |= TF_SENDINPROG;
 
                        error = tcp_ip_output(so, tp, packetlist,
                            packchain_listadd, tp_inp_options,
-                           (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0)), recwin);
+                           (so_options & SO_DONTROUTE),
+                           (sack_rxmit | (sack_bytes_rxmt != 0)), recwin,
+#ifdef INET6
+                           isipv6);
+#else
+                           0);
+#endif
 
-                       tp->t_flags &= ~TF_SENDINPROG;
                        if (error) {
                                /*
                                 * Take into account the rest of unsent
@@ -1715,20 +1888,19 @@ timer:
                        }
                }
                /* tcp was closed while we were in ip; resume close */
-               if ((tp->t_flags & (TF_CLOSING|TF_SENDINPROG)) == TF_CLOSING) {
+               if (tp->t_inpcb->inp_sndinprog_cnt == 0 &&
+                       (tp->t_flags & TF_CLOSING)) {
                        tp->t_flags &= ~TF_CLOSING;
                        (void) tcp_close(tp);
                        return (0);
                }
-       }
-       else {
+       } else {
                error = 0;
                packchain_looped++;
                tcpstat.tcps_sndtotal++;
 
                goto again;
        }
-   }
        if (error) {
                /*
                 * Assume that the packets were lost, so back out the
@@ -1757,8 +1929,9 @@ out:
 
                if (error == ENOBUFS) {
                        if (!tp->t_timer[TCPT_REXMT] &&
-                                !tp->t_timer[TCPT_PERSIST])
-                                       tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
+                               !tp->t_timer[TCPT_PERSIST])
+                               tp->t_timer[TCPT_REXMT] = 
+                                       OFFSET_FROM_START(tp, tp->t_rxtcur);
 
                        tp->snd_cwnd = tp->t_maxseg;
                        tp->t_bytes_acked = 0;
@@ -1807,25 +1980,6 @@ out:
 
        tcpstat.tcps_sndtotal++;
 
-#if INET6
-       /*
-        * Data sent (as far as we can tell).
-        * If this advertises a larger window than any other segment,
-        * then remember the size of the advertised window.
-        * Make sure ACK/DELACK conditions are cleared before
-        * we unlock the socket.
-        *  NOTE: for now, this is done in tcp_ip_output for IPv4
-        */
-       if (isipv6) {
-               if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
-                       tp->rcv_adv = tp->rcv_nxt + recwin;
-               tp->last_ack_sent = tp->rcv_nxt;
-               tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
-               tp->t_timer[TCPT_DELACK] = 0;
-               tp->t_unacksegs = 0;
-       }
-#endif
-
        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
        if (sendalot)
                goto again;
@@ -1836,24 +1990,63 @@ out:
 
 static int
 tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
-    int cnt, struct mbuf *opt, int flags, int sack_in_progress, int recwin)
+    int cnt, struct mbuf *opt, int flags, int sack_in_progress, int recwin,
+    boolean_t isipv6)
 {
        int error = 0;
        boolean_t chain;
        boolean_t unlocked = FALSE;
        struct inpcb *inp = tp->t_inpcb;
-       struct ip_out_args ipoa;
+       struct ip_out_args ipoa =
+           { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR };
        struct route ro;
-       unsigned int outif;
+       struct ifnet *outif = NULL;
+#ifdef INET6
+       struct ip6_out_args ip6oa =
+           { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR };
+       struct route_in6 ro6;
+       struct flowadv *adv =
+           (isipv6 ? &ip6oa.ip6oa_flowadv : &ipoa.ipoa_flowadv);
+#else
+       struct flowadv *adv = &ipoa.ipoa_flowadv;
+#endif /* !INET6 */
 
        /* If socket was bound to an ifindex, tell ip_output about it */
-       ipoa.ipoa_boundif = (inp->inp_flags & INP_BOUND_IF) ?
-           inp->inp_boundif : IFSCOPE_NONE;
-       ipoa.ipoa_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
-       flags |= IP_OUTARGS;
+       if (inp->inp_flags & INP_BOUND_IF) {
+#ifdef INET6
+               if (isipv6) {
+                       ip6oa.ip6oa_boundif = inp->inp_boundifp->if_index;
+                       ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
+               } else
+#endif
+               {
+                       ipoa.ipoa_boundif = inp->inp_boundifp->if_index;
+                       ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+               }
+       }
+
+       if (inp->inp_flags & INP_NO_IFT_CELLULAR) {
+#ifdef INET6
+               if (isipv6)
+                       ip6oa.ip6oa_flags |=  IP6OAF_NO_CELLULAR;
+               else
+#endif
+                       ipoa.ipoa_flags |=  IPOAF_NO_CELLULAR;
+       }
+#ifdef INET6
+       if (isipv6)
+               flags |= IPV6_OUTARGS;
+       else
+#endif
+               flags |= IP_OUTARGS;
 
        /* Copy the cached route and take an extra reference */
-       inp_route_copyout(inp, &ro);
+#ifdef INET6
+       if (isipv6)
+               in6p_route_copyout(inp, &ro6);
+       else
+#endif
+               inp_route_copyout(inp, &ro);
 
        /*
         * Data sent (as far as we can tell).
@@ -1869,6 +2062,9 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
        tp->t_timer[TCPT_DELACK] = 0;
        tp->t_unacksegs = 0;
 
+       /* Increment the count of outstanding send operations */
+       inp->inp_sndinprog_cnt++;
+
        /*
         * If allowed, unlock TCP socket while in IP
         * but only if the connection is established and
@@ -1878,9 +2074,10 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
         * - we're not in Fast Recovery mode
         * - if we're not sending from an upcall.
         */
-       if (tcp_output_unlocked && ((so->so_flags & SOF_UPCALLINUSE) == 0) &&
+       if (tcp_output_unlocked && !so->so_upcallusecount &&
            (tp->t_state == TCPS_ESTABLISHED) && (sack_in_progress == 0) &&
            ((tp->t_flags & TF_FASTRECOVERY) == 0)) {
+
                unlocked = TRUE;
                socket_unlock(so, 0);
        }
@@ -1920,7 +2117,16 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
                         */
                        cnt = 0;
                }
-               error = ip_output_list(pkt, cnt, opt, &ro, flags, 0, &ipoa);
+#ifdef INET6
+               if (isipv6)
+                       error = ip6_output_list(pkt, cnt,
+                           inp->in6p_outputopts, &ro6, flags, NULL, NULL,
+                           &ip6oa);
+               else
+#endif
+                       error = ip_output_list(pkt, cnt, opt, &ro, flags, NULL,
+                           &ipoa);
+
                if (chain || error) {
                        /*
                         * If we sent down a chain then we are done since
@@ -1937,13 +2143,71 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
        if (unlocked)
                socket_lock(so, 0);
 
-       if (ro.ro_rt != NULL &&
-           (outif = ro.ro_rt->rt_ifp->if_index) != inp->inp_last_outif)
-               inp->inp_last_outif = outif;
+       /* 
+        * Enter flow controlled state if the connection is established
+        * and is not in recovery.
+        *
+        * A connection will enter suspended state even if it is in 
+        * recovery.
+        */
+       if (((adv->code == FADV_FLOW_CONTROLLED && !IN_FASTRECOVERY(tp)) ||
+           adv->code == FADV_SUSPENDED) && 
+           !(tp->t_flags & TF_CLOSING) &&
+           tp->t_state == TCPS_ESTABLISHED) {
+               int rc;
+               rc = inp_set_fc_state(inp, adv->code);
+
+               if (rc == 1) 
+                       DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+                           struct tcpcb *, tp, struct tcphdr *, NULL,
+                           int32_t, ((adv->code == FADV_FLOW_CONTROLLED) ?
+                           TCP_CC_FLOW_CONTROL : TCP_CC_SUSPEND));
+       }
 
-       /* Synchronize cached PCB route */
-       inp_route_copyin(inp, &ro);
+       /* 
+        * When an interface queue gets suspended, some of the
+        * packets are dropped. Return ENOBUFS, to update the
+        * pcb state.
+        */
+       if (adv->code == FADV_SUSPENDED)
+               error = ENOBUFS;
+
+       VERIFY(inp->inp_sndinprog_cnt > 0);
+       if ( --inp->inp_sndinprog_cnt == 0)
+               inp->inp_flags &= ~(INP_FC_FEEDBACK);
 
+#ifdef INET6
+       if (isipv6) {
+               if (ro6.ro_rt != NULL && (outif = ro6.ro_rt->rt_ifp) !=
+                   inp->in6p_last_outifp)
+                       inp->in6p_last_outifp = outif;
+       } else
+#endif
+               if (ro.ro_rt != NULL && (outif = ro.ro_rt->rt_ifp) !=
+                   inp->inp_last_outifp)
+                       inp->inp_last_outifp = outif;
+
+       if ((inp->inp_flags & INP_NO_IFT_CELLULAR) && outif != NULL &&
+           outif->if_type == IFT_CELLULAR)
+               soevent(inp->inp_socket,
+                   (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED));
+
+       /* Synchronize cached PCB route & options */
+#ifdef INET6
+       if (isipv6)
+               in6p_route_copyin(inp, &ro6);
+       else
+#endif
+               inp_route_copyin(inp, &ro);
+
+       if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift == 0 && 
+               tp->t_inpcb->inp_route.ro_rt != NULL) {
+               /* If we found the route and there is an rtt on it
+                * reset the retransmit timer
+                */
+               tcp_getrt_rtt(tp, tp->t_inpcb->in6p_route.ro_rt);
+               tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
+       }
        return (error);
 }
 
@@ -1970,10 +2234,87 @@ tcp_setpersist(tp)
         */
        TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
            t * tcp_backoff[tp->t_rxtshift],
-           TCPTV_PERSMIN, TCPTV_PERSMAX,
-           TCP_ADD_REXMTSLOP(tp));
+           TCPTV_PERSMIN, TCPTV_PERSMAX, 0);
        tp->t_timer[TCPT_PERSIST] = OFFSET_FROM_START(tp, tp->t_timer[TCPT_PERSIST]);
 
        if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
                tp->t_rxtshift++;
 }
+
+/*
+ * Send as many acks as data coalesced. Every other packet when stretch
+ * ACK is not enabled. Every 8 packets, if stretch ACK is enabled.
+ */
+static struct mbuf*
+tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th)
+{
+       struct mbuf *mnext = NULL, *ack_chain = NULL, *tail = NULL;
+       int count = 0;
+       tcp_seq org_ack = ntohl(th->th_ack);
+       tcp_seq prev_ack = 0;
+       int tack_offset = 28; /* XXX IPv6 not supported */
+       int ack_size = (tp->t_flags & TF_STRETCHACK) ?
+                       (maxseg_unacked * tp->t_maxseg) : (tp->t_maxseg << 1);
+       int segs_acked = (tp->t_flags & TF_STRETCHACK) ? maxseg_unacked : 2;
+       struct mbuf *prev_ack_pkt = NULL;
+       struct socket *so = tp->t_inpcb->inp_socket;
+
+       count = tp->t_lropktlen/tp->t_maxseg;
+
+       prev_ack = (org_ack - tp->t_lropktlen) + ack_size;
+       if (prev_ack < org_ack) {
+               ack_chain = m_dup(m, M_DONTWAIT);
+               if (ack_chain) {
+                       th->th_ack = htonl(prev_ack);
+                       tail = ack_chain;
+                       count -= segs_acked; /* accounts for prev_ack packet */
+                       count = (count <= segs_acked) ? 0 : count - segs_acked;
+                       tcpstat.tcps_sndacks++;
+                       so_tc_update_stats(m, so, m_get_service_class(m));
+               } else {
+                       return NULL;
+               }
+       }       
+       else {
+               tp->t_lropktlen = 0;
+               return NULL;
+       }
+
+       prev_ack_pkt = ack_chain;
+       
+       while (count > 0) {
+               if ((prev_ack + ack_size) < org_ack) {
+                       prev_ack += ack_size;
+               } else {
+                       /*
+                        * The last ACK sent must have the ACK number that TCP
+                        * thinks is the last sent ACK number.
+                        */
+                        prev_ack = org_ack;
+               }
+               mnext = m_dup(prev_ack_pkt, M_DONTWAIT);
+               if (mnext) {
+                       HTONL(prev_ack);
+                       bcopy(&prev_ack, mtod(prev_ack_pkt, caddr_t) + tack_offset, 4);
+                       NTOHL(prev_ack);
+                       tail->m_nextpkt = mnext;
+                       tail = mnext;
+                       count -= segs_acked;
+                       tcpstat.tcps_sndacks++;
+                       so_tc_update_stats(m, so, m_get_service_class(m));
+                       if (lrodebug == 5) { 
+                               printf("%s: lropktlen = %d count = %d, th_ack = %x \n", 
+                                       __func__, tp->t_lropktlen, count, 
+                                       th->th_ack);
+                       }
+               } else {
+                       if (lrodebug == 5) {
+                               printf("%s: failed to alloc mbuf.\n", __func__);
+                       }
+                       break;
+               }
+               prev_ack_pkt = mnext;
+       } 
+       tp->t_lropktlen = 0;
+       return ack_chain;
+}
index df7bfa4e98bc5cc47edc0c26edba46955328b08b..5eb5c3b936b87421e738f0d5a4303f22474f74a4 100644 (file)
 
 #define        tcp_sendseqinit(tp) \
        (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
-       (tp)->snd_recover = (tp)->snd_high = (tp)->iss  
+       (tp)->snd_recover = (tp)->iss  
 
 #define TCP_PAWS_IDLE  (24 * 24 * 60 * 60 * TCP_RETRANSHZ)
                                        /* timestamp wrap-around time */
index 8cf6584826e32a5ba5fe92b27257aea281b4a90a..355d4f0d5b8d3a7b35f75784dcd3e915717f39c2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/kdebug.h>
 #include <mach/sdt.h>
 
+#include <netinet/lro_ext.h>
+
 #define DBG_FNC_TCP_CLOSE      NETDBG_CODE(DBG_NETTCP, ((5 << 8) | 2))
 
 extern int tcp_lq_overflow;
@@ -162,6 +164,8 @@ SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
        "Default TCP Maximum Segment Size for IPv6");
 #endif
 
+extern int tcp_do_autorcvbuf;
+
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
@@ -242,23 +246,32 @@ extern struct tcp_cc_algo tcp_cc_ledbat;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_sockets, CTLFLAG_RD | CTLFLAG_LOCKED,
        &tcp_cc_ledbat.num_sockets, 0, "Number of sockets using background transport");
 
+__private_extern__ int tcp_win_scale = 3;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_win_scale, 0, "Window scaling factor");
+
 static void    tcp_cleartaocache(void);
 static void    tcp_notify(struct inpcb *, int);
 static void    tcp_cc_init(void);
 
 struct zone    *sack_hole_zone;
 struct zone    *tcp_reass_zone;
+struct zone    *tcp_bwmeas_zone;
 
 /* The array containing pointers to currently implemented TCP CC algorithms */
 struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
 
-extern unsigned int total_mb_cnt;
-extern unsigned int total_cl_cnt;
-extern int sbspace_factor;
-extern int tcp_sockthreshold;
 extern int slowlink_wsize;     /* window correction for slow links */
 extern int path_mtu_discovery;
 
+extern u_int32_t tcp_autorcvbuf_max;
+extern u_int32_t tcp_autorcvbuf_inc_shift;
+static void tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb);
+
+#define TCP_BWMEAS_BURST_MINSIZE 6
+#define TCP_BWMEAS_BURST_MAXSIZE 25
+
+static uint32_t bwmeas_elm_size;
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
@@ -300,6 +313,7 @@ static void tcpcb_to_otcpcb(struct tcpcb *, struct otcpcb *);
 static lck_attr_t *tcp_uptime_mtx_attr = NULL;         /* mutex attributes */
 static lck_grp_t *tcp_uptime_mtx_grp = NULL;           /* mutex group definition */
 static lck_grp_attr_t *tcp_uptime_mtx_grp_attr = NULL; /* mutex group attributes */
+int tcp_notsent_lowat_check(struct socket *so);
 
 
 int  get_inpcb_str_size(void)
@@ -382,14 +396,25 @@ tcp_init()
        zone_change(tcp_reass_zone, Z_CALLERACCT, FALSE);
        zone_change(tcp_reass_zone, Z_EXPAND, TRUE);
 
+       bwmeas_elm_size = P2ROUNDUP(sizeof(struct bwmeas), sizeof(u_int64_t));
+       tcp_bwmeas_zone = zinit(bwmeas_elm_size, (100 * bwmeas_elm_size), 0, "tcp_bwmeas_zone");
+       if (tcp_bwmeas_zone == NULL) {
+               panic("%s: failed allocating tcp_bwmeas_zone", __func__);
+               /* NOTREACHED */
+       }
+       zone_change(tcp_bwmeas_zone, Z_CALLERACCT, FALSE);
+       zone_change(tcp_bwmeas_zone, Z_EXPAND, TRUE);
+
 #if INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
 #endif /* INET6 */
-       if (max_protohdr < TCP_MINPROTOHDR)
-               max_protohdr = TCP_MINPROTOHDR;
-       if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
+       if (max_protohdr < TCP_MINPROTOHDR) {
+               _max_protohdr = TCP_MINPROTOHDR;
+               _max_protohdr = max_protohdr;   /* round it up */
+       }
+       if (max_linkhdr + max_protohdr > MHLEN)
                panic("tcp_init");
 #undef TCP_MINPROTOHDR
 
@@ -440,6 +465,9 @@ tcp_init()
 
        /* Initialize TCP congestion control algorithms list */
        tcp_cc_init();
+
+       /* Initialize TCP LRO data structures */
+       tcp_lro_init();
 }
 
 /*
@@ -563,7 +591,7 @@ tcp_respond(
        struct ip6_hdr *ip6;
        int isipv6;
 #endif /* INET6 */
-       unsigned int outif;
+       struct ifnet *outif;
 
 #if INET6
        isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6;
@@ -606,13 +634,13 @@ tcp_respond(
                        bcopy((caddr_t)ip6, mtod(m, caddr_t), 
                              sizeof(struct ip6_hdr));
                        ip6 = mtod(m, struct ip6_hdr *);
-                       nth = (struct tcphdr *)(ip6 + 1);
+                       nth = (struct tcphdr *)(void *)(ip6 + 1);
                } else
 #endif /* INET6 */
                {
                        bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
                        ip = mtod(m, struct ip *);
-                       nth = (struct tcphdr *)(ip + 1);
+                       nth = (struct tcphdr *)(void *)(ip + 1);
                }
                bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
                flags = TH_ACK;
@@ -625,13 +653,17 @@ tcp_respond(
 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #if INET6
                if (isipv6) {
+                       /* Expect 32-bit aligned IP on strict-align platforms */
+                       IP6_HDR_STRICT_ALIGNMENT_CHECK(ip6);
                        xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
-                       nth = (struct tcphdr *)(ip6 + 1);
+                       nth = (struct tcphdr *)(void *)(ip6 + 1);
                } else
 #endif /* INET6 */
              {
+               /* Expect 32-bit aligned IP on strict-align platforms */
+               IP_HDR_STRICT_ALIGNMENT_CHECK(ip);
                xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long);
-               nth = (struct tcphdr *)(ip + 1);
+               nth = (struct tcphdr *)(void *)(ip + 1);
              }
                if (th != nth) {
                        /*
@@ -717,12 +749,29 @@ tcp_respond(
        }
 #endif
 
-       if (tp != NULL)
-               set_packet_tclass(m, tp->t_inpcb->inp_socket, MBUF_TC_UNSPEC, isipv6);
+       if (tp != NULL) {
+               u_int32_t svc_flags = 0;
+               if (isipv6) {
+                       svc_flags |= PKT_SCF_IPV6;
+               }
+               set_packet_service_class(m, tp->t_inpcb->inp_socket,
+                   MBUF_SC_UNSPEC, svc_flags);
+
+               /* Embed flowhash and flow control flags */
+               m->m_pkthdr.m_flowhash = tp->t_inpcb->inp_flowhash;
+               m->m_pkthdr.m_fhflags |=
+                   (PF_TAG_TCP | PF_TAG_FLOWHASH | PF_TAG_FLOWADV);
+       }
 
 #if INET6
        if (isipv6) {
-               struct ip6_out_args ip6oa = { ifscope, nocell };
+               struct ip6_out_args ip6oa = { ifscope, { 0 },
+                   IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR };
+
+               if (ifscope != IFSCOPE_NONE)
+                       ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
+               if (nocell)
+                       ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
 
                (void) ip6_output(m, NULL, ro6, IPV6_OUTARGS, NULL,
                    NULL, &ip6oa);
@@ -730,15 +779,21 @@ tcp_respond(
                        if (ro6 == &sro6) {
                                rtfree(ro6->ro_rt);
                                ro6->ro_rt = NULL;
-                       } else if ((outif = ro6->ro_rt->rt_ifp->if_index) !=
-                           tp->t_inpcb->in6p_last_outif) {
-                               tp->t_inpcb->in6p_last_outif = outif;
+                       } else if ((outif = ro6->ro_rt->rt_ifp) !=
+                           tp->t_inpcb->in6p_last_outifp) {
+                               tp->t_inpcb->in6p_last_outifp = outif;
                        }
                }
        } else
 #endif /* INET6 */
        {
-               struct ip_out_args ipoa = { ifscope, nocell };
+               struct ip_out_args ipoa = { ifscope, { 0 },
+                   IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR };
+
+               if (ifscope != IFSCOPE_NONE)
+                       ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+               if (nocell)
+                       ipoa.ipoa_flags |= IPOAF_NO_CELLULAR;
 
                if (ro != &sro) {
                        /* Copy the cached route and take an extra reference */
@@ -751,9 +806,9 @@ tcp_respond(
 
                if (ro != &sro) {
                        if (sro.ro_rt != NULL &&
-                           (outif = sro.ro_rt->rt_ifp->if_index) !=
-                           tp->t_inpcb->inp_last_outif)
-                               tp->t_inpcb->inp_last_outif = outif;
+                           (outif = sro.ro_rt->rt_ifp) !=
+                           tp->t_inpcb->inp_last_outifp)
+                               tp->t_inpcb->inp_last_outifp = outif;
                        /* Synchronize cached PCB route */
                        inp_route_copyin(tp->t_inpcb, &sro);
                } else if (sro.ro_rt != NULL) {
@@ -782,11 +837,11 @@ tcp_newtcpcb(inp)
        calculate_tcp_clock();
 
        if (so->cached_in_sock_layer == 0) {
-            it = (struct inp_tp *)inp;
+            it = (struct inp_tp *)(void *)inp;
             tp = &it->tcb;
        }
        else
-            tp = (struct tcpcb *) inp->inp_saved_ppcb;
+            tp = (struct tcpcb *)(void *)inp->inp_saved_ppcb;
        
        bzero((char *) tp, sizeof(struct tcpcb));
        LIST_INIT(&tp->t_segq);
@@ -820,11 +875,9 @@ tcp_newtcpcb(inp)
        }
 
        tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
-       tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
        tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
        tp->snd_ssthresh_prev = TCP_MAXWIN << TCP_MAX_WINSHIFT;
        tp->t_rcvtime = tcp_now;
-       tp->t_bw_rtttime = 0;
        tp->tentry.timer_start = tcp_now;
        tp->t_persist_timeout = tcp_max_persist_timeout;
        tp->t_persist_stop = 0;
@@ -852,7 +905,7 @@ tcp_drop(tp, errno)
        struct socket *so = tp->t_inpcb->inp_socket;
 #if CONFIG_DTRACE
        struct inpcb *inp = tp->t_inpcb;
-#endif /* CONFIG_DTRACE */
+#endif
 
        if (TCPS_HAVERCVDSYN(tp->t_state)) {
                DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
@@ -868,6 +921,39 @@ tcp_drop(tp, errno)
        return (tcp_close(tp));
 }
 
+void
+tcp_getrt_rtt(struct tcpcb *tp, struct rtentry *rt) 
+{
+       u_int32_t rtt = rt->rt_rmx.rmx_rtt;
+       int isnetlocal = (tp->t_flags & TF_LOCAL);
+
+       if (rtt != 0) {
+               /*
+                * XXX the lock bit for RTT indicates that the value
+                * is also a minimum value; this is subject to time.
+                */
+               if (rt->rt_rmx.rmx_locks & RTV_RTT)
+                       tp->t_rttmin = rtt / (RTM_RTTUNIT / TCP_RETRANSHZ);
+               else
+                       tp->t_rttmin = isnetlocal ? tcp_TCPTV_MIN : TCPTV_REXMTMIN;
+               tp->t_srtt = rtt / (RTM_RTTUNIT / (TCP_RETRANSHZ * TCP_RTT_SCALE));
+               tcpstat.tcps_usedrtt++;
+               if (rt->rt_rmx.rmx_rttvar) {
+                       tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+                               (RTM_RTTUNIT / (TCP_RETRANSHZ * TCP_RTTVAR_SCALE));
+                       tcpstat.tcps_usedrttvar++;
+               } else {
+                       /* default variation is +- 1 rtt */
+                       tp->t_rttvar =
+                               tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+               }
+               TCPT_RANGESET(tp->t_rxtcur,
+                       ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+                       tp->t_rttmin, TCPTV_REXMTMAX,
+                       TCP_ADD_REXMTSLOP(tp));
+       }
+}
+
 /*
  * Close a TCP control block:
  *     discard all space held by the tcp
@@ -904,7 +990,8 @@ tcp_close(tp)
         * point both flags should be cleared and we can proceed further
         * with the cleanup.
         */
-       if (tp->t_flags & (TF_CLOSING|TF_SENDINPROG)) {
+       if ((tp->t_flags & TF_CLOSING) ||
+               inp->inp_sndinprog_cnt > 0) {
                tp->t_flags |= TF_CLOSING;
                return (NULL);
        }
@@ -941,14 +1028,14 @@ tcp_close(tp)
 
                        if (rt == NULL)
                                goto no_valid_rt;
-                       sin6 = (struct sockaddr_in6 *)rt_key(rt);
+                       sin6 = (struct sockaddr_in6 *)(void *)rt_key(rt);
                        if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
                                goto no_valid_rt;
                }
                else
 #endif /* INET6 */
                if (rt == NULL || !(rt->rt_flags & RTF_UP) ||
-                   ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr ==
+                   ((struct sockaddr_in *)(void *)rt_key(rt))->sin_addr.s_addr ==
                    INADDR_ANY || rt->generation_id != route_generation) {
                        if (tp->t_state >= TCPS_CLOSE_WAIT) {
                                DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
@@ -1051,6 +1138,9 @@ no_valid_rt:
        (void) tcp_freeq(tp);
 
        tcp_free_sackholes(tp);
+       if (tp->t_bwmeas != NULL) {
+               tcp_bwmeas_free(tp);
+       }
 
        /* Free the packet list */
        if (tp->t_pktlist_head != NULL)
@@ -1066,6 +1156,16 @@ no_valid_rt:
         */
        sodisconnectwakeup(so);
 
+       /* 
+        * Clean up any LRO state 
+        */
+       if (tp->t_flagsext & TF_LRO_OFFLOADED) {
+               tcp_lro_remove_state(inp->inp_laddr, inp->inp_faddr, 
+                       inp->inp_lport,
+                       inp->inp_fport);
+               tp->t_flagsext &= ~TF_LRO_OFFLOADED;
+       }
+
 #if INET6
        if (INP_CHECK_SOCKAF(so, AF_INET6))
                in6_pcbdetach(inp);
@@ -1178,6 +1278,30 @@ tcp_notify(inp, error)
 #endif
 }
 
+struct bwmeas*
+tcp_bwmeas_alloc(struct tcpcb *tp)
+{
+       struct bwmeas *elm;
+       elm = zalloc(tcp_bwmeas_zone);
+       if (elm == NULL)
+               return(elm);
+
+       bzero(elm, bwmeas_elm_size);
+       elm->bw_minsizepkts = TCP_BWMEAS_BURST_MINSIZE;
+       elm->bw_maxsizepkts = TCP_BWMEAS_BURST_MAXSIZE;
+       elm->bw_minsize = elm->bw_minsizepkts * tp->t_maxseg;
+       elm->bw_maxsize = elm->bw_maxsizepkts * tp->t_maxseg;
+       return(elm);
+}
+
+void
+tcp_bwmeas_free(struct tcpcb* tp)
+{
+       zfree(tcp_bwmeas_zone, tp->t_bwmeas);
+       tp->t_bwmeas = NULL;
+       tp->t_flagsext &= ~(TF_MEASURESNDBW);
+}
+
 /*
  * tcpcb_to_otcpcb copies specific bits of a tcpcb to a otcpcb format.
  * The otcpcb data structure is passed to user space and must not change.
@@ -1334,7 +1458,7 @@ tcp_pcblist SYSCTL_HANDLER_ARGS
                        inpcb_to_compat(inp, &xt.xt_inp);
                        inp_ppcb = inp->inp_ppcb;
                        if (inp_ppcb != NULL) {
-                               tcpcb_to_otcpcb((struct tcpcb *)inp_ppcb,
+                               tcpcb_to_otcpcb((struct tcpcb *)(void *)inp_ppcb,
                                    &xt.xt_tp);
                        } else {
                                bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
@@ -1566,6 +1690,18 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0,
             tcp_pcblist_n, "S,xtcpcb_n", "List of active TCP connections");
 
 
+__private_extern__ void
+tcp_get_ports_used(unsigned int ifindex, uint8_t *bitfield)
+{
+       inpcb_get_ports_used(ifindex, bitfield, &tcbinfo);
+}
+
+__private_extern__ uint32_t
+tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags)
+{
+       return inpcb_count_opportunistic(ifindex, &tcbinfo, flags);
+}
+
 void
 tcp_ctlinput(cmd, sa, vip)
        int cmd;
@@ -1574,16 +1710,13 @@ tcp_ctlinput(cmd, sa, vip)
 {
        tcp_seq icmp_tcp_seq;
        struct ip *ip = vip;
-       struct tcphdr *th;
        struct in_addr faddr;
        struct inpcb *inp;
        struct tcpcb *tp;
-       
-       void (*notify)(struct inpcb *, int) = tcp_notify;
 
-       struct icmp *icp;
+       void (*notify)(struct inpcb *, int) = tcp_notify;
 
-       faddr = ((struct sockaddr_in *)sa)->sin_addr;
+       faddr = ((struct sockaddr_in *)(void *)sa)->sin_addr;
        if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
                return;
 
@@ -1603,19 +1736,22 @@ tcp_ctlinput(cmd, sa, vip)
        else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)
                return;
        if (ip) {
-               icp = (struct icmp *)((caddr_t)ip
-                               - offsetof(struct icmp, icmp_ip));
-               th = (struct tcphdr *)((caddr_t)ip 
-                              + (IP_VHL_HL(ip->ip_vhl) << 2));
-               inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport,
-                   ip->ip_src, th->th_sport, 0, NULL);
+               struct tcphdr th;
+               struct icmp *icp;
+
+               icp = (struct icmp *)(void *)
+                   ((caddr_t)ip - offsetof(struct icmp, icmp_ip));
+               bcopy(((caddr_t)ip + (IP_VHL_HL(ip->ip_vhl) << 2)),
+                   &th, sizeof (th));
+               inp = in_pcblookup_hash(&tcbinfo, faddr, th.th_dport,
+                   ip->ip_src, th.th_sport, 0, NULL);
                if (inp != NULL && inp->inp_socket != NULL) {
                        tcp_lock(inp->inp_socket, 1, 0);
                        if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
                                tcp_unlock(inp->inp_socket, 1, 0);
                                return;
                        }
-                       icmp_tcp_seq = htonl(th->th_seq);
+                       icmp_tcp_seq = htonl(th.th_seq);
                        tp = intotcpcb(inp);
                        if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
                            SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
@@ -2001,7 +2137,7 @@ tcp_rtlookup(inp, input_ifscope)
 
                        ro->ro_dst.sa_family = AF_INET;
                        ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
-                       ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+                       ((struct sockaddr_in *)(void *)&ro->ro_dst)->sin_addr =
                                inp->inp_faddr;
 
                        /*
@@ -2012,7 +2148,7 @@ tcp_rtlookup(inp, input_ifscope)
                         * input_ifscope is IFSCOPE_NONE).
                         */
                        ifscope = (inp->inp_flags & INP_BOUND_IF) ?
-                           inp->inp_boundif : input_ifscope;
+                           inp->inp_boundifp->if_index : input_ifscope;
 
                        if (rt != NULL)
                                RT_UNLOCK(rt);
@@ -2095,7 +2231,7 @@ tcp_rtlookup6(inp, input_ifscope)
                         * input_ifscope is IFSCOPE_NONE).
                         */
                        ifscope = (inp->inp_flags & INP_BOUND_IF) ?
-                           inp->inp_boundif : input_ifscope;
+                           inp->inp_boundifp->if_index : input_ifscope;
 
                        if (rt != NULL)
                                RT_UNLOCK(rt);
@@ -2175,7 +2311,7 @@ ipsec_hdrsiz_tcp(tp)
 #if INET6
        if ((inp->inp_vflag & INP_IPV6) != 0) {
                ip6 = mtod(m, struct ip6_hdr *);
-               th = (struct tcphdr *)(ip6 + 1);
+               th = (struct tcphdr *)(void *)(ip6 + 1);
                m->m_pkthdr.len = m->m_len =
                        sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
                tcp_fillheaders(tp, ip6, th);
@@ -2328,11 +2464,31 @@ tcp_getlock(
        }
 }
 
+/* Determine if we can grow the recieve socket buffer to avoid sending
+ * a zero window update to the peer. We allow even socket buffers that 
+ * have fixed size (set by the application) to grow if the resource
+ * constraints are met. They will also be trimmed after the application
+ * reads data.
+ */
+static void
+tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) {
+       u_int32_t rcvbufinc = tp->t_maxseg << tcp_autorcvbuf_inc_shift;
+       if (tcp_do_autorcvbuf == 1 &&
+               tcp_cansbgrow(sb) &&
+               (tp->t_flags & TF_SLOWLINK) == 0 &&
+               (sb->sb_hiwat - sb->sb_cc) < rcvbufinc &&
+               (sb->sb_hiwat < tcp_autorcvbuf_max)) {
+               sbreserve(sb, (sb->sb_hiwat + rcvbufinc));
+       }
+}
+
 int32_t
 tcp_sbspace(struct tcpcb *tp)
 {
        struct sockbuf *sb = &tp->t_inpcb->inp_socket->so_rcv;
-       int32_t space, newspace;
+       int32_t space;
+
+       tcp_sbrcv_grow_rwin(tp, sb);
 
        space =  ((int32_t) imin((sb->sb_hiwat - sb->sb_cc), 
                (sb->sb_mbmax - sb->sb_mbcnt)));
@@ -2352,21 +2508,6 @@ tcp_sbspace(struct tcpcb *tp)
        if (((tp->t_flags & TF_SLOWLINK) != 0) && slowlink_wsize > 0 )  
                return imin(space, slowlink_wsize);
 
-       /*
-        * Check for ressources constraints before over-ajusting the amount of space we can
-        * advertise in the TCP window size updates.
-        */
-
-       if (sbspace_factor && (tp->t_inpcb->inp_pcbinfo->ipi_count < tcp_sockthreshold) &&
-                   (total_mb_cnt / 8) < (mbstat.m_clusters / sbspace_factor)) {
-               if (space < (int32_t)(sb->sb_maxused - sb->sb_cc)) {/* make sure we don't constrain the window if we have enough ressources */
-                       space = (int32_t) imax((sb->sb_maxused - sb->sb_cc), tp->rcv_maxbyps);
-               }
-               newspace = (int32_t) imax(((int32_t)sb->sb_maxused - sb->sb_cc), (int32_t)tp->rcv_maxbyps);
-
-               if (newspace > space)
-                       space = newspace;
-       }
        return space;
 }
 /*
@@ -2451,4 +2592,57 @@ calculate_tcp_clock()
         return;
 }
 
+/* Compute receive window scaling that we are going to request 
+ * for this connection based on  sb_hiwat. Try to leave some 
+ * room to potentially increase the window size upto a maximum 
+ * defined by the constant tcp_autorcvbuf_max.
+ */
+void
+tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so) {
+       u_int32_t maxsockbufsize;
+
+       tp->request_r_scale = max(tcp_win_scale, tp->request_r_scale);
+       maxsockbufsize = ((so->so_rcv.sb_flags & SB_USRSIZE) != 0) ?
+               so->so_rcv.sb_hiwat : tcp_autorcvbuf_max;
+
+       while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+               (TCP_MAXWIN << tp->request_r_scale) < maxsockbufsize)
+               tp->request_r_scale++;
+       tp->request_r_scale = min(tp->request_r_scale, TCP_MAX_WINSHIFT);
+
+}
+
+int
+tcp_notsent_lowat_check(struct socket *so) {
+       struct inpcb *inp = sotoinpcb(so);
+       struct tcpcb *tp = NULL;
+       int notsent = 0;
+       if (inp != NULL) {
+               tp = intotcpcb(inp);
+       }
+
+       notsent = so->so_snd.sb_cc - 
+               (tp->snd_nxt - tp->snd_una);
+
+       /* When we send a FIN or SYN, not_sent can be negative.
+        * In that case also we need to send a write event to the 
+        * process if it is waiting. In the FIN case, it will
+        * get an error from send because cantsendmore will be set.
+        */
+       if (notsent <= tp->t_notsent_lowat) {
+               return(1);
+       }
+
+       /* When Nagle's algorithm is not disabled, it is better
+        * to wakeup the client until there is atleast one
+        * maxseg of data to write.
+        */
+       if ((tp->t_flags & TF_NODELAY) == 0 && 
+               notsent > 0 && notsent < tp->t_maxseg) {
+               return(1);
+       }
+       return(0);
+}
+
+
 /* DSEP Review Done pl-20051213-v02 @3253,@3391,@3400 */
index a8369ca71a164ccd805b78b92a2cd9c6348d4df8..a8df389472f99eb08bf47c24b5ff73bf3b3fb24d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -78,6 +78,7 @@
 #include <kern/cpu_number.h>   /* before tcp_seq.h, for tcp_random18() */
 
 #include <net/route.h>
+#include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -536,7 +537,7 @@ tcp_timers(tp, timer)
        int timer;
 {
        register int rexmt;
-       struct socket *so_tmp;
+       struct socket *so;
        struct tcptemp *t_template;
        int optlen = 0;
        int idle_time = 0;
@@ -549,7 +550,7 @@ tcp_timers(tp, timer)
        int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0;
 #endif /* INET6 */
 
-       so_tmp = tp->t_inpcb->inp_socket;
+       so = tp->t_inpcb->inp_socket;
        idle_time = tcp_now - tp->t_rcvtime;
 
        switch (timer) {
@@ -581,7 +582,6 @@ tcp_timers(tp, timer)
         * to a longer retransmit interval and retransmit one segment.
         */
        case TCPT_REXMT:
-               tcp_free_sackholes(tp);
                /* Drop a connection in the retransmit timer
                 * 1. If we have retransmitted more than TCP_MAXRXTSHIFT times
                 * 2. If the time spent in this retransmission episode is more than
@@ -602,9 +602,12 @@ tcp_timers(tp, timer)
                                tcpstat.tcps_timeoutdrop++;
                        }
                        tp->t_rxtshift = TCP_MAXRXTSHIFT;
+                       postevent(so, 0, EV_TIMEOUT);                   
+                       soevent(so, 
+                           (SO_FILT_HINT_LOCKED|SO_FILT_HINT_TIMEOUT));
                        tp = tcp_drop(tp, tp->t_softerror ?
                            tp->t_softerror : ETIMEDOUT);
-                       postevent(so_tmp, 0, EV_TIMEOUT);                       
+
                        break;
                }
 
@@ -633,6 +636,7 @@ tcp_timers(tp, timer)
                        tp->rxt_start = tcp_now;
                }
                tcpstat.tcps_rexmttimeo++;
+
                if (tp->t_state == TCPS_SYN_SENT)
                        rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
                else
@@ -642,12 +646,17 @@ tcp_timers(tp, timer)
                        TCP_ADD_REXMTSLOP(tp));
                tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 
+               if (INP_WAIT_FOR_IF_FEEDBACK(tp->t_inpcb))
+                       goto fc_output;
+
+               tcp_free_sackholes(tp);
                /*
                 * Check for potential Path MTU Discovery Black Hole 
                 */
 
                if (tcp_pmtud_black_hole_detect && (tp->t_state == TCPS_ESTABLISHED)) {
-                       if (((tp->t_flags & (TF_PMTUD|TF_MAXSEGSNT)) == (TF_PMTUD|TF_MAXSEGSNT)) && (tp->t_rxtshift == 2)) {
+                       if (((tp->t_flags & (TF_PMTUD|TF_MAXSEGSNT)) == (TF_PMTUD|TF_MAXSEGSNT)) &&
+                                (tp->t_rxtshift == 2)) {
                                /* 
                                 * Enter Path MTU Black-hole Detection mechanism:
                                 * - Disable Path MTU Discovery (IP "DF" bit).
@@ -708,6 +717,7 @@ tcp_timers(tp, timer)
                if ((tp->t_state == TCPS_SYN_SENT) &&
                    (tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres))
                        tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC);
+
                /*
                 * If losing, let the lower level know and try for
                 * a better route.  Also, if we backed off this far,
@@ -747,6 +757,13 @@ tcp_timers(tp, timer)
                tp->t_dupacks = 0;
                EXIT_FASTRECOVERY(tp);
 
+               /* CWR notifications are to be sent on new data right after
+                * RTOs, Fast Retransmits and ECE notification receipts.
+                */
+               if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) {
+                       tp->ecn_flags |= TE_SENDCWR;
+               }
+fc_output:
                DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb,
                        struct tcpcb *, tp, struct tcphdr *, NULL,
                        int32_t, TCP_CC_REXMT_TIMEOUT);
@@ -774,11 +791,13 @@ tcp_timers(tp, timer)
                if ((tp->t_rxtshift == TCP_MAXRXTSHIFT &&
                    (idle_time >= tcp_maxpersistidle ||
                    idle_time >= TCP_REXMTVAL(tp) * tcp_totbackoff)) || 
-                   ((tp->t_persist_stop != 0) && (tp->t_persist_stop <= tcp_now))) {
+                   ((tp->t_persist_stop != 0) && 
+                       TSTMP_LEQ(tp->t_persist_stop, tcp_now))) {
                        tcpstat.tcps_persistdrop++;
-                       so_tmp = tp->t_inpcb->inp_socket;
+                       postevent(so, 0, EV_TIMEOUT);
+                       soevent(so,
+                           (SO_FILT_HINT_LOCKED|SO_FILT_HINT_TIMEOUT));
                        tp = tcp_drop(tp, ETIMEDOUT);
-                       postevent(so_tmp, 0, EV_TIMEOUT);
                        break;
                }
                tcp_setpersist(tp);
@@ -818,7 +837,7 @@ tcp_timers(tp, timer)
                                unsigned int ifscope, nocell = 0;
 
                                if (tp->t_inpcb->inp_flags & INP_BOUND_IF)
-                                       ifscope = tp->t_inpcb->inp_boundif;
+                                       ifscope = tp->t_inpcb->inp_boundifp->if_index;
                                else
                                        ifscope = IFSCOPE_NONE;
 
@@ -851,6 +870,13 @@ tcp_timers(tp, timer)
                        if ((tp->t_flags & TF_STRETCHACK) != 0)
                                tcp_reset_stretch_ack(tp);
 
+                       /* If we are measuring inter packet arrival jitter for 
+                        * throttling a connection, this delayed ack might be 
+                        * the reason for accumulating some jitter. So let's
+                        * restart the measurement.
+                        */
+                       CLEAR_IAJ_STATE(tp);
+
                        tcpstat.tcps_delack++;
                        (void) tcp_output(tp);
                }
@@ -863,8 +889,10 @@ tcp_timers(tp, timer)
 #endif
        dropit:
                tcpstat.tcps_keepdrops++;
+               postevent(so, 0, EV_TIMEOUT);
+               soevent(so,
+                   (SO_FILT_HINT_LOCKED|SO_FILT_HINT_TIMEOUT));
                tp = tcp_drop(tp, ETIMEDOUT);
-               postevent(so_tmp, 0, EV_TIMEOUT);
                break;
        }
        return (tp);
index df1162053f2e7ef1db26a6e501feedc1d1bf0815..213e87a395f241ca9960ee70324097ff4def978b 100644 (file)
 #define TCPTV_UNACKWIN ( TCP_RETRANSHZ/10 )    /* Window for counting rcv bytes to see if 
                                                   ack-stretching can start (default 100 ms) */
 #define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 )    /* Receiver idle time, avoid ack-stretching after that*/
+#define TCPTV_RCVBUFIDLE (TCP_RETRANSHZ/2)     /* Receiver idle time, for rcv socket buffer resizing */
 
 /* No ack stretching during slow-start, until we see some packets.
  * By the time the receiver gets 512 packets, the senders cwnd 
- * should open by a few hundred packets considering the progression
- * during slow-start.
+ * should open by a few hundred packets consdering the 
+ * slow-start progression.
  */
 #define TCP_RCV_SS_PKTCOUNT     512
 
@@ -232,7 +233,7 @@ struct tcptimerlist {
 #define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 )    /* rexmt slop allowed (200 ms) */
 
 /* macro to decide when retransmit slop (described above) should be added */
-#define TCP_ADD_REXMTSLOP(tp) ((tp->t_flags & TF_LOCAL) != 0 || tp->t_state >= TCPS_ESTABLISHED) 
+#define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED) 
 
 #define        TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \
        (tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \
@@ -246,6 +247,13 @@ struct tcptimerlist {
        (tp->t_keepidle && (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \
                tp->t_keepidle : tcp_keepidle)
 
+/* Since we did not add rexmt slop for local connections, we should add
+ * it to idle timeout. Otherwise local connections will reach idle state
+ * quickly
+ */
+#define TCP_IDLETIMEOUT(tp) \
+       (((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur)
+
 extern int tcp_keepinit;               /* time to establish connection */
 extern int tcp_keepidle;               /* time before keepalive probes begin */
 extern int tcp_keepintvl;              /* time between keepalive probes */
index d4fddb517f371072233020fcce105ff492f20f32..19405c584e28b18a91630d49a31c99a9faa1e55c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -126,10 +126,6 @@ static struct tcpcb *
 static struct tcpcb *
                tcp_usrclosed(struct tcpcb *);
 
-__private_extern__ int tcp_win_scale = 3;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_win_scale, 0, "Window scaling factor");
-
 static u_int32_t tcps_in_sw_cksum;
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, in_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcps_in_sw_cksum, 0,
@@ -150,6 +146,10 @@ SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LO
     &tcps_out_sw_cksum_bytes,
     "Amount of transmitted data checksummed in software");
 
+extern uint32_t tcp_autorcvbuf_max;
+
+extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
+
 #if TCPDEBUG
 #define        TCPDEBUG0       int ostate = 0
 #define        TCPDEBUG1()     ostate = tp ? tp->t_state : 0
@@ -161,15 +161,6 @@ SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LO
 #define        TCPDEBUG2(req)
 #endif
 
-#if CONFIG_USESOCKTHRESHOLD
-__private_extern__ unsigned int        tcp_sockthreshold = 64;
-#else
-__private_extern__ unsigned int        tcp_sockthreshold = 0;
-#endif
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, sockthreshold, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_sockthreshold , 0, "TCP Socket size increased if less than threshold");
-
-
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
     0 , 0, tcp_sysctl_info, "S", "TCP info per tuple");
 
@@ -288,7 +279,7 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
         * Must check for multicast addresses and disallow binding
         * to them.
         */
-       sinp = (struct sockaddr_in *)nam;
+       sinp = (struct sockaddr_in *)(void *)nam;
        if (sinp->sin_family == AF_INET &&
            IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
                error = EAFNOSUPPORT;
@@ -321,7 +312,7 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
         * Must check for multicast addresses and disallow binding
         * to them.
         */
-       sin6p = (struct sockaddr_in6 *)nam;
+       sin6p = (struct sockaddr_in6 *)(void *)nam;
        if (sin6p->sin6_family == AF_INET6 &&
            IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
                error = EAFNOSUPPORT;
@@ -436,7 +427,7 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
        /*
         * Must disallow TCP ``connections'' to multicast addresses.
         */
-       sinp = (struct sockaddr_in *)nam;
+       sinp = (struct sockaddr_in *)(void *)nam;
        if (sinp->sin_family == AF_INET
            && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
                error = EAFNOSUPPORT;
@@ -469,7 +460,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
        /*
         * Must disallow TCP ``connections'' to multicast addresses.
         */
-       sin6p = (struct sockaddr_in6 *)nam;
+       sin6p = (struct sockaddr_in6 *)(void *)nam;
        if (sin6p->sin6_family == AF_INET6
            && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
                error = EAFNOSUPPORT;
@@ -633,6 +624,8 @@ tcp_usr_rcvd(struct socket *so, __unused int flags)
         /* In case we got disconnected from the peer */
         if (tp == 0)
             goto out;
+       tcp_sbrcv_trim(tp, &so->so_rcv);
+
        tcp_output(tp);
        COMMON_END(PRU_RCVD);
 }
@@ -728,7 +721,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
                                error = tcp6_connect(tp, nam, p);
                        else
 #endif /* INET6 */
-                       error = tcp_connect(tp, nam, p);
+                               error = tcp_connect(tp, nam, p);
                        if (error)
                                goto out;
                        tp->snd_wnd = TTCP_CLIENT_SND_WND;
@@ -899,12 +892,12 @@ tcp_connect(tp, nam, p)
        struct inpcb *inp = tp->t_inpcb, *oinp;
        struct socket *so = inp->inp_socket;
        struct tcpcb *otp;
-       struct sockaddr_in *sin = (struct sockaddr_in *)nam;
+       struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
        struct sockaddr_in ifaddr;
        struct rmxp_tao *taop;
        struct rmxp_tao tao_noncached;
        int error;
-       unsigned int outif = 0;
+       struct ifnet *outif = NULL;
 
        if (inp->inp_lport == 0) {
                error = in_pcbbind(inp, (struct sockaddr *)0, p);
@@ -965,30 +958,17 @@ skip_oinp:
        }
        if (inp->inp_laddr.s_addr == INADDR_ANY) {
                inp->inp_laddr = ifaddr.sin_addr;
-               inp->inp_last_outif = outif;
+               inp->inp_last_outifp = outif;
        }
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;
        in_pcbrehash(inp);
        lck_rw_done(inp->inp_pcbinfo->mtx);
 
-       /* Compute window scaling to requesti according to sb_hiwat
-        * or leave us some room to increase potentially increase the window size depending
-        * on the default win scale
-        */
-       while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
-        (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
-               tp->request_r_scale++;
+       if (inp->inp_flowhash == 0)
+               inp->inp_flowhash = inp_calc_flowhash(inp);
 
-       /*
-        * Inflate window size only if no setsockopt was performed on the recv sockbuf and
-        * if we're not over our number of active pcbs.
-        */
-
-       if (((so->so_rcv.sb_flags & SB_USRSIZE) == 0) && (inp->inp_pcbinfo->ipi_count < tcp_sockthreshold)) {
-               tp->request_r_scale = max(tcp_win_scale, tp->request_r_scale);
-               so->so_rcv.sb_hiwat = min(TCP_MAXWIN << tp->request_r_scale, (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES);  
-       }
+       tcp_set_max_rwinscale(tp, so);
 
        soisconnecting(so);
        tcpstat.tcps_connattempt++;
@@ -1031,27 +1011,31 @@ tcp6_connect(tp, nam, p)
        struct inpcb *inp = tp->t_inpcb, *oinp;
        struct socket *so = inp->inp_socket;
        struct tcpcb *otp;
-       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
        struct in6_addr addr6;
        struct rmxp_tao *taop;
        struct rmxp_tao tao_noncached;
-       int error;
-       unsigned int outif = 0;
+       int error = 0;
+       struct ifnet *outif = NULL;
 
        if (inp->inp_lport == 0) {
                error = in6_pcbbind(inp, (struct sockaddr *)0, p);
                if (error)
-                       return error;
+                       goto done;
        }
 
        /*
         * Cannot simply call in_pcbconnect, because there might be an
         * earlier incarnation of this same connection still in
         * TIME_WAIT state, creating an ADDRINUSE error.
+        *
+        * in6_pcbladdr() might return an ifp with its reference held
+        * even in the error case, so make sure that it's released
+        * whenever it's non-NULL.
         */
        error = in6_pcbladdr(inp, nam, &addr6, &outif);
        if (error)
-               return error;
+               goto done;
        tcp_unlock(inp->inp_socket, 0, 0);
        oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
                                  &sin6->sin6_addr, sin6->sin6_port,
@@ -1064,10 +1048,12 @@ tcp6_connect(tp, nam, p)
                if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
                    otp->t_state == TCPS_TIME_WAIT &&
                    ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
-                   (otp->t_flags & TF_RCVD_CC))
+                   (otp->t_flags & TF_RCVD_CC)) {
                        otp = tcp_close(otp);
-               else
-                       return EADDRINUSE;
+               } else {
+                       error = EADDRINUSE;
+                       goto done;
+               }
        }
        if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
                /*lock inversion issue, mostly with udp multicast packets */
@@ -1077,7 +1063,7 @@ tcp6_connect(tp, nam, p)
        }
        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
                inp->in6p_laddr = addr6;
-               inp->in6p_last_outif = outif;
+               inp->in6p_last_outifp = outif;  /* no reference needed */
        }
        inp->in6p_faddr = sin6->sin6_addr;
        inp->inp_fport = sin6->sin6_port;
@@ -1086,10 +1072,10 @@ tcp6_connect(tp, nam, p)
        in_pcbrehash(inp);
        lck_rw_done(inp->inp_pcbinfo->mtx);
 
-       /* Compute window scaling to request.  */
-       while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
-           (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
-               tp->request_r_scale++;
+       if (inp->inp_flowhash == 0)
+               inp->inp_flowhash = inp_calc_flowhash(inp);
+
+       tcp_set_max_rwinscale(tp, so);
 
        soisconnecting(so);
        tcpstat.tcps_connattempt++;
@@ -1119,7 +1105,11 @@ tcp6_connect(tp, nam, p)
                tp->t_flags |= TF_SENDCCNEW;
        }
 
-       return 0;
+done:
+       if (outif != NULL)
+               ifnet_release(outif);
+
+       return (error);
 }
 #endif /* INET6 */
 
@@ -1129,11 +1119,13 @@ tcp6_connect(tp, nam, p)
 __private_extern__ void
 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
 {
+       struct inpcb *inp = tp->t_inpcb;
+       
        bzero(ti, sizeof(*ti));
 
        ti->tcpi_state = tp->t_state;
        
-    if (tp->t_state > TCPS_LISTEN) {
+       if (tp->t_state > TCPS_LISTEN) {
                if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
                        ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
                if (tp->t_flags & TF_SACK_PERMIT)
@@ -1143,21 +1135,48 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
                        ti->tcpi_snd_wscale = tp->snd_scale;
                        ti->tcpi_rcv_wscale = tp->rcv_scale;
                }
-               
+
+               /* Are we in retranmission episode */
+               if (tp->snd_max != tp->snd_nxt)
+                       ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY;
+               else
+                               ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY;
+
+               ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0;
                ti->tcpi_snd_mss = tp->t_maxseg;
                ti->tcpi_rcv_mss = tp->t_maxseg;
 
+               ti->tcpi_rttcur = tp->t_rttcur;
+               ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT;
+               ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
+
                ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
                ti->tcpi_snd_cwnd = tp->snd_cwnd;
+               ti->tcpi_snd_sbbytes = tp->t_inpcb->inp_socket->so_snd.sb_cc;
        
                ti->tcpi_rcv_space = tp->rcv_wnd;
 
                ti->tcpi_snd_wnd = tp->snd_wnd;
-               ti->tcpi_snd_bwnd = tp->snd_bwnd;
                ti->tcpi_snd_nxt = tp->snd_nxt;
                ti->tcpi_rcv_nxt = tp->rcv_nxt;
+
+               /* convert bytes/msec to bits/sec */
+               if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
+                       tp->t_bwmeas != NULL) {
+                       ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000);
+               }
                
-               ti->tcpi_last_outif = tp->t_inpcb->inp_last_outif;
+               ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 :
+                   tp->t_inpcb->inp_last_outifp->if_index;
+
+               //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes);
+               ti->tcpi_txbytes = inp->inp_stat->txbytes;
+               ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
+               ti->tcpi_txunacked = tp->snd_max - tp->snd_una;
+               
+               //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes);
+               ti->tcpi_rxbytes = inp->inp_stat->rxbytes;
+               ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
        }
 }
 
@@ -1249,6 +1268,41 @@ tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused
        return 0;
 }
 
+static int
+tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid)
+{
+       int error = EHOSTUNREACH;
+       *out_pid = -1;
+       if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN;
+       
+       struct inpcb    *inp = (struct inpcb*)so->so_pcb;
+       uint16_t                lport = inp->inp_lport;
+       uint16_t                fport = inp->inp_fport;
+       struct inpcb    *finp = NULL;
+       
+       if (inp->inp_vflag & INP_IPV6) {
+               struct  in6_addr        laddr6 = inp->in6p_laddr;
+               struct  in6_addr        faddr6 = inp->in6p_faddr;
+               socket_unlock(so, 0);
+               finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL);
+               socket_lock(so, 0);
+       } else if (inp->inp_vflag & INP_IPV4) {
+               struct  in_addr laddr4 = inp->inp_laddr;
+               struct  in_addr faddr4 = inp->inp_faddr;
+               socket_unlock(so, 0);
+               finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL);
+               socket_lock(so, 0);
+       }
+       
+       if (finp) {
+               *out_pid = finp->inp_socket->last_pid;
+               error = 0;
+               in_pcb_checkstate(finp, WNT_RELEASE, 0);
+       }
+       
+       return error;
+}
+
 /*
  * The new sockopt interface makes it possible for us to block in the
  * copyin/out step (if we take a page fault).  Taking a page fault at
@@ -1270,7 +1324,9 @@ tcp_ctloutput(so, sopt)
        if (inp == NULL) {
                return (ECONNRESET);
        }
-       if (sopt->sopt_level != IPPROTO_TCP) {
+       /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
+       if (sopt->sopt_level != IPPROTO_TCP &&
+           !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH)) {
 #if INET6
                if (INP_CHECK_SOCKAF(so, AF_INET6))
                        error = ip6_ctloutput(so, sopt);
@@ -1328,6 +1384,58 @@ tcp_ctloutput(so, sopt)
                        else
                                tp->t_flagsext &= ~opt;
                        break;
+               case TCP_MEASURE_SND_BW:
+                       error = sooptcopyin(sopt, &optval, sizeof optval,
+                               sizeof optval);
+                       if (error)
+                               break;
+                       opt = TF_MEASURESNDBW;
+                       if (optval) {
+                               if (tp->t_bwmeas == NULL) {
+                                       tp->t_bwmeas = tcp_bwmeas_alloc(tp);
+                                       if (tp->t_bwmeas == NULL) {
+                                               error = ENOMEM;
+                                               break;
+                                       }
+                               }
+                               tp->t_flagsext |= opt;
+                       } else {
+                               tp->t_flagsext &= ~opt;
+                               /* Reset snd bw measurement state */
+                               tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS);
+                               if (tp->t_bwmeas != NULL) {
+                                       tcp_bwmeas_free(tp);
+                               }
+                       }
+                       break;
+               case TCP_MEASURE_BW_BURST: {
+                       struct tcp_measure_bw_burst in;
+                       uint32_t minpkts, maxpkts;
+                       bzero(&in, sizeof(in));
+
+                       error = sooptcopyin(sopt, &in, sizeof(in),
+                               sizeof(in));
+                       if (error)
+                               break;
+                       if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
+                               tp->t_bwmeas == NULL) {
+                               error = EINVAL;
+                               break;
+                       }
+                       minpkts = (in.min_burst_size != 0) ? in.min_burst_size : 
+                               tp->t_bwmeas->bw_minsizepkts;
+                       maxpkts = (in.max_burst_size != 0) ? in.max_burst_size :
+                               tp->t_bwmeas->bw_maxsizepkts;
+                       if (minpkts > maxpkts) {
+                               error = EINVAL;
+                               break;
+                       }
+                       tp->t_bwmeas->bw_minsizepkts = minpkts;
+                       tp->t_bwmeas->bw_maxsizepkts = maxpkts;
+                       tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg);
+                       tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg);
+                       break;
+               }
                case TCP_MAXSEG:
                        error = sooptcopyin(sopt, &optval, sizeof optval,
                                            sizeof optval);
@@ -1366,7 +1474,7 @@ tcp_ctloutput(so, sopt)
                        else 
                                tp->t_keepinit = optval * TCP_RETRANSHZ;
                        break;
-               
+
                case PERSIST_TIMEOUT:
                        error = sooptcopyin(sopt, &optval, sizeof optval,
                                                sizeof optval);
@@ -1387,6 +1495,33 @@ tcp_ctloutput(so, sopt)
                        else
                                tp->rxt_conndroptime = optval * TCP_RETRANSHZ;
                        break;
+               case TCP_NOTSENT_LOWAT:
+                       error = sooptcopyin(sopt, &optval, sizeof(optval),
+                               sizeof(optval));
+                       if (error)
+                               break;
+                       if (optval < 0) {
+                               error = EINVAL;
+                               break;
+                       } else {
+                               if (optval == 0) {
+                                       so->so_flags &= ~(SOF_NOTSENT_LOWAT);
+                                       tp->t_notsent_lowat = 0;
+                               } else { 
+                                       so->so_flags |= SOF_NOTSENT_LOWAT;
+                                       tp->t_notsent_lowat = optval;
+                               }
+                       }
+                       break;
+
+               case SO_FLUSH:
+                       if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval))) != 0)
+                               break;
+
+                       error = inp_flush(inp, optval);
+                       break;
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -1422,12 +1557,42 @@ tcp_ctloutput(so, sopt)
                case TCP_RXT_FINDROP:
                        optval = tp->t_flagsext & TF_RXTFINDROP;
                        break; 
+               case TCP_MEASURE_SND_BW:
+                       optval = tp->t_flagsext & TF_MEASURESNDBW;
+                       break;
                case TCP_INFO: {
                        struct tcp_info ti;
 
                        tcp_fill_info(tp, &ti);
                        error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
                        goto done;
+                       /* NOT REACHED */
+               }
+               case TCP_MEASURE_BW_BURST: {
+                       struct tcp_measure_bw_burst out;
+                       if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
+                               tp->t_bwmeas == NULL) {
+                               error = EINVAL;
+                               break;
+                       }
+                       out.min_burst_size = tp->t_bwmeas->bw_minsizepkts;
+                       out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts;
+                       error = sooptcopyout(sopt, &out, sizeof(out));
+                       goto done;
+               }
+               case TCP_NOTSENT_LOWAT:
+                       if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
+                               optval = tp->t_notsent_lowat;
+                       } else {
+                               optval = 0;
+                       }
+                       break;
+               case TCP_PEER_PID: {
+                       pid_t   pid;
+                       error = tcp_lookup_peer_pid_locked(so, &pid);
+                       if (error == 0)
+                               error = sooptcopyout(sopt, &pid, sizeof(pid));
+                       goto done;
                }
                default:
                        error = ENOPROTOOPT;
@@ -1508,7 +1673,6 @@ tcp_attach(so, p)
        register struct tcpcb *tp;
        struct inpcb *inp;
        int error;
-       u_long sb_effective_max;
 #if INET6
        int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
 #endif
@@ -1520,27 +1684,14 @@ tcp_attach(so, p)
        inp = sotoinpcb(so);
 
        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
-               /*
-                * The goal is to let clients machines use large send/rcv default windows to compensate for link
-                * latency and make sure the receiver is not constraining the sender window.
-                * But we doon't want to have a few connections use all our mbuf space for servers.
-                * This is done by watching a threshold of tcpcbs in use and bumping the default send and rcvspace
-                * only if that threshold isn't reached.
-                * We're also advertising a much bigger window size (tuneable by sysctl) in correlation with                             * the max socket buffer size if 
-                * we consider that we have enough ressources for it. This window will be adjusted depending on the
-                * global socket layer buffer use with the use of tcp_sbpace
-                */
-
-               if (inp->inp_pcbinfo->ipi_count < tcp_sockthreshold) {
-                       sb_effective_max = (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES;  
-                       error = soreserve(so, max(min((TCP_MAXWIN << tcp_win_scale)/4, sb_effective_max), tcp_sendspace),
-                                       max(min((TCP_MAXWIN << tcp_win_scale)/2, sb_effective_max), tcp_recvspace));
-               }
-               else    
-                       error = soreserve(so, tcp_sendspace, tcp_recvspace);
+               error = soreserve(so, tcp_sendspace, tcp_recvspace);
                if (error)
                        return (error);
        }
+       if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0)
+               so->so_rcv.sb_flags |= SB_AUTOSIZE;
+       if ((so->so_snd.sb_flags & SB_USRSIZE) == 0)
+               so->so_snd.sb_flags |= SB_AUTOSIZE;
 
 #if INET6
        if (isipv6) {
index 4066829cb29e8a6514c4b7d5fd35318677e41d29..efffc7bad97311114ffcd4cfe4e79de1c887e8e0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -170,6 +170,17 @@ struct tcptemp {
        struct  tcphdr tt_t;
 };
 
+struct bwmeas {
+       tcp_seq bw_start;               /* start of bw measurement */
+       uint32_t bw_ts;         /* timestamp when bw measurement started */
+       uint32_t bw_size;               /* burst size in bytes for this bw measurement */
+       uint32_t bw_minsizepkts;        /* Min burst size as segments */
+       uint32_t bw_maxsizepkts;        /* Max burst size as segments */
+       uint32_t bw_minsize;    /* Min size in bytes */
+       uint32_t bw_maxsize;    /* Max size in bytes */
+       uint32_t bw_sndbw;              /* Measured send bw */
+};
+
 #define tcp6cb         tcpcb  /* for KAME src sync over BSD*'s */
 
 /*
@@ -210,7 +221,6 @@ struct tcpcb {
 #define        TF_WASFRECOVERY 0x400000        /* was in NewReno Fast Recovery */
 #define        TF_SIGNATURE    0x800000        /* require MD5 digests (RFC2385) */
 #define        TF_MAXSEGSNT    0x1000000       /* last segment sent was a full segment */
-#define        TF_SENDINPROG   0x2000000       /* send is in progress */
 #define TF_PMTUD       0x4000000       /* Perform Path MTU Discovery for this connection */
 #define        TF_CLOSING      0x8000000       /* pending tcp close */
 #define TF_TSO         0x10000000      /* TCP Segment Offloading is enable on this connection */
@@ -239,12 +249,10 @@ struct tcpcb {
 
        u_int32_t       snd_wnd;                /* send window */
        u_int32_t       snd_cwnd;               /* congestion-controlled window */
-       u_int32_t       snd_bwnd;               /* bandwidth-controlled window */
        u_int32_t       snd_ssthresh;           /* snd_cwnd size threshold for
                                         * for slow start exponential to
                                         * linear switch
                                         */
-       u_int32_t       snd_bandwidth;  /* calculated bandwidth or 0 */
        tcp_seq snd_recover;            /* for use in NewReno Fast Recovery */
 
        u_int   t_maxopd;               /* mss plus options */
@@ -254,8 +262,8 @@ struct tcpcb {
        int     t_rtttime;              /* tcp clock when rtt calculation was started */
        tcp_seq t_rtseq;                /* sequence number being timed */
 
-       int     t_bw_rtttime;           /* used for bandwidth calculation */
-       tcp_seq t_bw_rtseq;             /* used for bandwidth calculation */
+       u_int32_t rfbuf_ts;             /* recv buffer autoscaling timestamp */
+       u_int32_t rfbuf_cnt;            /* recv buffer autoscaling byte count */
 
        int     t_rxtcur;               /* current retransmit value (ticks) */
        u_int   t_maxseg;               /* maximum segment size */
@@ -302,7 +310,7 @@ struct tcpcb {
        int     t_unacksegs;            /* received but unacked segments: used for delaying acks */
        u_int32_t       t_persist_timeout;      /* ZWP persistence limit as set by PERSIST_TIMEOUT */
        u_int32_t       t_persist_stop;         /* persistence limit deadline if triggered by ZWP */
-
+       u_int32_t       t_notsent_lowat;        /* Low water for not sent data */
 
 /* 3529618 MSS overload prevention */
        u_int32_t       rcv_reset;
@@ -320,10 +328,9 @@ struct tcpcb {
 #define TE_SENDIPECT           0x04    /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
 #define TE_SENDCWR             0x08    /* Indicate that the next non-retransmit should have the TCP CWR flag set */
 #define TE_SENDECE             0x10    /* Indicate that the next packet should have the TCP ECE flag set */
-       tcp_seq snd_high;               /* for use in NewReno Fast Recovery */
-       tcp_seq snd_high_prev;  /* snd_high prior to retransmit */
+#define TE_ECN_ON              (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
+
        tcp_seq snd_recover_prev;       /* snd_recover prior to retransmit */
-       u_char  snd_limited;            /* segments limited transmitted */
 /* anti DoS counters */
        u_int32_t       rcv_second;             /* start of interval second */
 
@@ -362,7 +369,9 @@ struct tcpcb {
        uint32_t        t_flagsext;             /* Another field to accommodate more flags */
 #define TF_RXTFINDROP  0x1                     /* Drop conn after retransmitting FIN 3 times */
 #define TF_RCVUNACK_WAITSS     0x2             /* set when the receiver should not stretch acks */
-
+#define TF_BWMEAS_INPROGRESS   0x4             /* Indicate BW meas is happening */
+#define TF_MEASURESNDBW                0x8             /* Measure send bw on this connection */
+#define TF_LRO_OFFLOADED       0x10            /* Connection LRO offloaded */
 #if TRAFFIC_MGT
        /* Inter-arrival jitter related state */
        uint32_t        iaj_rcv_ts;             /* tcp clock when the first packet was received */
@@ -374,10 +383,26 @@ struct tcpcb {
        uint32_t        avg_iaj;                /* Mean */
        uint32_t        std_dev_iaj;            /* Standard deviation */
 #endif /* TRAFFIC_MGT */
+       struct bwmeas   *t_bwmeas;              /* State for bandwidth measurement */ 
+       uint32_t        t_lropktlen;            /* Bytes in a LRO frame */
+       tcp_seq         t_idleat;               /* rcv_nxt at idle time */
 };
 
 #define IN_FASTRECOVERY(tp)    (tp->t_flags & TF_FASTRECOVERY)
-#define ENTER_FASTRECOVERY(tp) tp->t_flags |= TF_FASTRECOVERY
+
+/*
+ * If the connection is in a throttled state due to advisory feedback from 
+ * the interface output queue, reset that state. We do this in favor
+ * of entering recovery because the data transfer during recovery 
+ * should be just a trickle and it will help to improve performance.
+ * We also do not want to back off twice in the same RTT.
+ */
+#define ENTER_FASTRECOVERY(_tp_) do {                          \
+       (_tp_)->t_flags |= TF_FASTRECOVERY;                     \
+       if (INP_IS_FLOW_CONTROLLED((_tp_)->t_inpcb))            \
+               inp_reset_fc_state((_tp_)->t_inpcb);            \
+} while(0)
+
 #define EXIT_FASTRECOVERY(tp)  tp->t_flags &= ~TF_FASTRECOVERY
 
 #if CONFIG_DTRACE
@@ -394,7 +419,9 @@ enum tcp_cc_event {
        TCP_CC_ECN_RCVD,
        TCP_CC_BAD_REXMT_RECOVERY,
        TCP_CC_OUTPUT_ERROR,
-       TCP_CC_CHANGE_ALGO
+       TCP_CC_CHANGE_ALGO,
+       TCP_CC_FLOW_CONTROL,
+       TCP_CC_SUSPEND
 };
 #endif /* CONFIG_DTRACE */
 
@@ -685,14 +712,40 @@ struct    tcpstat {
 
        /* SACK related stats */
        u_int32_t       tcps_sack_recovery_episode; /* SACK recovery episodes */
-       u_int32_t  tcps_sack_rexmits;       /* SACK rexmit segments   */
-       u_int32_t  tcps_sack_rexmit_bytes;          /* SACK rexmit bytes      */
-       u_int32_t  tcps_sack_rcv_blocks;            /* SACK blocks (options) received */
-       u_int32_t  tcps_sack_send_blocks;           /* SACK blocks (options) sent     */
-       u_int32_t  tcps_sack_sboverflow;            /* SACK sendblock overflow   */
+       u_int32_t       tcps_sack_rexmits;          /* SACK rexmit segments   */
+       u_int32_t       tcps_sack_rexmit_bytes;     /* SACK rexmit bytes      */
+       u_int32_t       tcps_sack_rcv_blocks;       /* SACK blocks (options) received */
+       u_int32_t       tcps_sack_send_blocks;      /* SACK blocks (options) sent     */
+       u_int32_t       tcps_sack_sboverflow;       /* SACK sendblock overflow   */
 
        u_int32_t       tcps_bg_rcvtotal;       /* total background packets received */
        u_int32_t       tcps_rxtfindrop;        /* drop conn after retransmitting FIN */
+       u_int32_t       tcps_fcholdpacket;      /* packets withheld because of flow control */
+
+       /* LRO related stats */
+       u_int32_t       tcps_coalesced_pack;    /* number of coalesced packets */
+       u_int32_t       tcps_flowtbl_full;      /* times flow table was full */
+       u_int32_t       tcps_flowtbl_collision; /* collisions in flow tbl */
+       u_int32_t       tcps_lro_twopack;       /* 2 packets coalesced */
+       u_int32_t       tcps_lro_multpack;      /* 3 or 4 pkts coalesced */
+       u_int32_t       tcps_lro_largepack;     /* 5 or more pkts coalesced */
+};
+
+struct tcpstat_local {
+       u_int64_t badformat;
+       u_int64_t unspecv6;
+       u_int64_t synfin;
+       u_int64_t badformatipsec;
+       u_int64_t noconnnolist;
+       u_int64_t noconnlist;
+       u_int64_t listbadsyn;
+       u_int64_t icmp6unreach;
+       u_int64_t deprecate6;
+       u_int64_t ooopacket;
+       u_int64_t rstinsynrcv;
+       u_int64_t dospacket;
+       u_int64_t cleanup;
+       u_int64_t synwindow;
 };
 
 #pragma pack(4)
@@ -961,6 +1014,7 @@ int         tcp_ctloutput(struct socket *, struct sockopt *);
 struct tcpcb *
         tcp_drop(struct tcpcb *, int);
 void    tcp_drain(void);
+void    tcp_getrt_rtt(struct tcpcb *tp, struct rtentry *rt);
 struct rmxp_tao *
         tcp_gettaocache(struct inpcb *);
 void    tcp_init(void) __attribute__((section("__TEXT, initcode")));
@@ -998,8 +1052,22 @@ void       tcp_free_sackholes(struct tcpcb *tp);
 int32_t         tcp_sbspace(struct tcpcb *tp);
 void    tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp);
 void    tcp_reset_stretch_ack(struct tcpcb *tp);
+void    tcp_get_ports_used(unsigned int , uint8_t *);
+uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags);
+void    tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so);
+u_int8_t tcp_cansbgrow(struct sockbuf *sb);
+struct bwmeas* tcp_bwmeas_alloc(struct tcpcb *tp);
+void tcp_bwmeas_free(struct tcpcb *tp);
+
+extern void tcp_set_background_cc(struct socket *);
+extern void tcp_set_foreground_cc(struct socket *);
+extern void tcp_set_recv_bg(struct socket *);
+extern void tcp_clear_recv_bg(struct socket *);
+#define        IS_TCP_RECV_BG(_so)     \
+       ((_so)->so_traffic_mgt_flags & TRAFFIC_MGT_TCP_RECVBG)
 
 #if TRAFFIC_MGT
+#define CLEAR_IAJ_STATE(_tp_) (_tp_)->iaj_rcv_ts = 0
 void    reset_acc_iaj(struct tcpcb *tp);
 #endif /* TRAFFIC_MGT */
 
index 37cc4153cca8b9a2cbc1c6f27a74b7a912bcbb95..e5462393d0df2948afe2af3d2bbd5e46c3e042c0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -273,6 +273,7 @@ udp_input(m, iphlen)
        };
        struct udp_ip6 udp_ip6;
 #endif /* INET6 */
+       struct ifnet *ifp = (m->m_pkthdr.rcvif != NULL) ? m->m_pkthdr.rcvif: NULL;
 
        udpstat.udps_ipackets++;
 
@@ -280,6 +281,9 @@ udp_input(m, iphlen)
        if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16)
                m->m_pkthdr.csum_flags = 0; /* invalidate hwcksum for UDP */
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        /*
         * Strip IP options, if any; should skip this,
         * make available to user, and use on returned packets,
@@ -303,11 +307,16 @@ udp_input(m, iphlen)
                }
                ip = mtod(m, struct ip *);
        }
-       uh = (struct udphdr *)((caddr_t)ip + iphlen);
+       uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
 
        /* destination port of 0 is illegal, based on RFC768. */
-       if (uh->uh_dport == 0)
+       if (uh->uh_dport == 0) {
+       
+               if (ifp->if_udp_stat != NULL)
+                       atomic_add_64(&ifp->if_udp_stat->port0, 1);
+                               
                goto bad;
+       }
 
        KERNEL_DEBUG(DBG_LAYER_IN_BEG, uh->uh_dport, uh->uh_sport,
                     ip->ip_src.s_addr, ip->ip_dst.s_addr, uh->uh_ulen);
@@ -320,6 +329,10 @@ udp_input(m, iphlen)
        if (ip->ip_len != len) {
                if (len > ip->ip_len || len < sizeof(struct udphdr)) {
                        udpstat.udps_badlen++;
+                       
+                       if (ifp->if_udp_stat != NULL)
+                               atomic_add_64(&ifp->if_udp_stat->badlength, 1);
+                       
                        goto bad;
                }
                m_adj(m, len - ip->ip_len);
@@ -344,21 +357,23 @@ udp_input(m, iphlen)
                } else {
                        char b[9];
 doudpcksum:
-                       *(uint32_t*)&b[0] = *(uint32_t*)&((struct ipovly *)ip)->ih_x1[0];
-                       *(uint32_t*)&b[4] = *(uint32_t*)&((struct ipovly *)ip)->ih_x1[4];
-                       *(uint8_t*)&b[8] = *(uint8_t*)&((struct ipovly *)ip)->ih_x1[8];
-                       
-                       bzero(((struct ipovly *)ip)->ih_x1, 9);
+                       bcopy(((struct ipovly *)ip)->ih_x1, b,
+                           sizeof (((struct ipovly *)ip)->ih_x1));
+                       bzero(((struct ipovly *)ip)->ih_x1,
+                           sizeof (((struct ipovly *)ip)->ih_x1));
                        ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
                        uh->uh_sum = in_cksum(m, len + sizeof (struct ip));
-                       
-                       *(uint32_t*)&((struct ipovly *)ip)->ih_x1[0] = *(uint32_t*)&b[0];
-                       *(uint32_t*)&((struct ipovly *)ip)->ih_x1[4] = *(uint32_t*)&b[4];
-                       *(uint8_t*)&((struct ipovly *)ip)->ih_x1[8] = *(uint8_t*)&b[8];
+                       bcopy(b, ((struct ipovly *)ip)->ih_x1,
+                           sizeof (((struct ipovly *)ip)->ih_x1));
+
                        udp_in_cksum_stats(len);
                }
                if (uh->uh_sum) {
                        udpstat.udps_badsum++;
+                       
+                       if (ifp->if_udp_stat != NULL)
+                               atomic_add_64(&ifp->if_udp_stat->badchksum, 1);
+                       
                        m_freem(m);
                        KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return;
@@ -369,7 +384,7 @@ doudpcksum:
                udpstat.udps_nosum++;
 #endif
 
-       isbroadcast = in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif);
+       isbroadcast = in_broadcast(ip->ip_dst, ifp);
 
        if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || isbroadcast) {
 
@@ -414,6 +429,11 @@ doudpcksum:
                         if ((inp->inp_vflag & INP_IPV4) == 0)
                                 continue;
 #endif
+                       if (ip_restrictrecvif && ifp != NULL &&
+                           (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                           !(inp->inp_flags & INP_RECV_ANYIF))
+                               continue;
+
                        if ((inp->inp_moptions == NULL) && 
                                (ntohl(ip->ip_dst.s_addr) != INADDR_ALLHOSTS_GROUP) && 
                                (isbroadcast == 0) )
@@ -466,7 +486,7 @@ doudpcksum:
                                        group.sin_family = AF_INET;
                                        group.sin_addr = ip->ip_dst;
 
-                                       blocked = imo_multi_filter(imo, m->m_pkthdr.rcvif,
+                                       blocked = imo_multi_filter(imo, ifp,
                                                (struct sockaddr *)&group,
                                                (struct sockaddr *)&udp_in);
                                        if (blocked == MCAST_PASS) 
@@ -524,11 +544,17 @@ doudpcksum:
                         */
                        if (reuse_sock == 0 || m == NULL)
                                break;
+
+                       /*
+                        * Expect 32-bit aligned data pointer on strict-align
+                        * platforms.
+                        */
+                       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
                        /*
                         * Recompute IP and UDP header pointers for new mbuf
                         */
                        ip = mtod(m, struct ip *);
-                       uh = (struct udphdr *)((caddr_t)ip + iphlen);
+                       uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
                }
                lck_rw_done(pcbinfo->mtx);
 
@@ -539,6 +565,10 @@ doudpcksum:
                         * for a broadcast or multicast datgram.)
                         */
                        udpstat.udps_noportbcast++;
+                       
+                       if (ifp->if_udp_stat != NULL)
+                               atomic_add_64(&ifp->if_udp_stat->port_unreach, 1);
+                       
                        goto bad;
                }
 
@@ -565,8 +595,14 @@ doudpcksum:
                                KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
                                return;
                        }
+                       /*
+                        * Expect 32-bit aligned data pointer on strict-align
+                        * platforms.
+                        */
+                       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
                        ip = mtod(m, struct ip *);
-                       uh = (struct udphdr *)((caddr_t)ip + iphlen);
+                       uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
                }
                /* Check for NAT keepalive packet */
                if (payload_len == 1 && *(u_int8_t*)((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
@@ -574,7 +610,7 @@ doudpcksum:
                        KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return;
                }
-               else if (payload_len == 4 && *(u_int32_t*)((caddr_t)uh + sizeof(struct udphdr)) != 0) {
+               else if (payload_len == 4 && *(u_int32_t*)(void *)((caddr_t)uh + sizeof(struct udphdr)) != 0) {
                        /* UDP encapsulated IPSec packet to pass through NAT */
                        KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
                        /* preserve the udp header */
@@ -588,8 +624,12 @@ doudpcksum:
         * Locate pcb for datagram.
         */
        inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
-           ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+           ip->ip_dst, uh->uh_dport, 1, ifp);
        if (inp == NULL) {
+               
+               if (ifp->if_udp_stat != NULL)
+                       atomic_add_64(&ifp->if_udp_stat->port_unreach, 1);
+       
                if (log_in_vain) {
                        char buf[MAX_IPv4_STR_LEN];
                        char buf2[MAX_IPv4_STR_LEN];
@@ -621,7 +661,7 @@ doudpcksum:
                        goto bad;
 #endif
                if (blackhole)
-                       if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type != IFT_LOOP)
+                       if (ifp && ifp->if_type != IFT_LOOP)
                                goto bad;
                *ip = save_ip;
                ip->ip_len += iphlen;
@@ -633,6 +673,10 @@ doudpcksum:
 
        if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
                udp_unlock(inp->inp_socket, 1, 0);
+               
+               if (ifp->if_udp_stat != NULL)
+                       atomic_add_64(&ifp->if_udp_stat->cleanup, 1);
+               
                goto bad;
        }
 #if IPSEC
@@ -640,8 +684,12 @@ doudpcksum:
                if (ipsec4_in_reject_so(m, inp->inp_socket)) {
                        IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
                        udp_unlock(inp->inp_socket, 1, 0);
+                       
+                       if (ifp->if_udp_stat != NULL)
+                               atomic_add_64(&ifp->if_udp_stat->badipsec, 1);
+                       
                        goto bad;
-               }
+               }
        }
 #endif /*IPSEC*/
 
@@ -718,10 +766,14 @@ ip_2_ip6_hdr(ip6, ip)
        ip6->ip6_plen = ip->ip_len;
        ip6->ip6_nxt = ip->ip_p;
        ip6->ip6_hlim = ip->ip_ttl;
-       ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] =
-               IPV6_ADDR_INT32_SMP;
-       ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
-       ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
+       if (ip->ip_src.s_addr) {
+               ip6->ip6_src.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
+               ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
+       }
+       if (ip->ip_dst.s_addr) {
+               ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
+               ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
+       }
 }
 #endif
 
@@ -825,14 +877,13 @@ udp_ctlinput(cmd, sa, vip)
        void *vip;
 {
        struct ip *ip = vip;
-       struct udphdr *uh;
        void (*notify)(struct inpcb *, int) = udp_notify;
         struct in_addr faddr;
        struct inpcb *inp;
 
-       faddr = ((struct sockaddr_in *)sa)->sin_addr;
+       faddr = ((struct sockaddr_in *)(void *)sa)->sin_addr;
        if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
-               return;
+               return;
 
        if (PRC_IS_REDIRECT(cmd)) {
                ip = 0;
@@ -842,12 +893,15 @@ udp_ctlinput(cmd, sa, vip)
        else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
                return;
        if (ip) {
-               uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
-               inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
-                    ip->ip_src, uh->uh_sport, 0, NULL);
+               struct udphdr uh;
+
+               bcopy(((caddr_t)ip + (ip->ip_hl << 2)), &uh, sizeof (uh));
+               inp = in_pcblookup_hash(&udbinfo, faddr, uh.uh_dport,
+                    ip->ip_src, uh.uh_sport, 0, NULL);
                if (inp != NULL && inp->inp_socket != NULL) {
                        udp_lock(inp->inp_socket, 1, 0);
-                       if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)  {
+                       if (in_pcb_checkstate(inp, WNT_RELEASE, 1) ==
+                           WNT_STOPUSING)  {
                                udp_unlock(inp->inp_socket, 1, 0);
                                return;
                        }
@@ -864,7 +918,9 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
        int     error, optval;
        struct  inpcb *inp;
 
-       if (sopt->sopt_level != IPPROTO_UDP)
+       /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
+       if (sopt->sopt_level != IPPROTO_UDP &&
+           !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH))
                return (ip_ctloutput(so, sopt));
 
        error = 0;
@@ -890,6 +946,14 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
                                inp->inp_flags &= ~INP_UDP_NOCKSUM;
                        break;
 
+               case SO_FLUSH:
+                       if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval))) != 0)
+                               break;
+
+                       error = inp_flush(inp, optval);
+                       break;
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -1131,11 +1195,22 @@ udp_pcblist_n SYSCTL_HANDLER_ARGS
        return error;
 }
 
-
 SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
             udp_pcblist_n, "S,xinpcb_n", "List of active UDP sockets");
 
 
+__private_extern__ void
+udp_get_ports_used(unsigned int ifindex, uint8_t *bitfield)
+{
+       inpcb_get_ports_used(ifindex, bitfield, &udbinfo);
+}
+
+__private_extern__ uint32_t
+udp_count_opportunistic(unsigned int ifindex, u_int32_t flags)
+{
+       return inpcb_count_opportunistic(ifindex, &udbinfo, flags);
+}
+
 static __inline__ u_int16_t
 get_socket_id(struct socket * s)
 {
@@ -1152,10 +1227,10 @@ get_socket_id(struct socket * s)
 }
 
 static int
-udp_check_pktinfo(struct mbuf *control, unsigned int *ifindex, struct in_addr *laddr)
+udp_check_pktinfo(struct mbuf *control, struct ifnet **outif, struct in_addr *laddr)
 {
        struct cmsghdr *cm = 0;
-       struct in_pktinfo *pktinfo;     
+       struct in_pktinfo *pktinfo;
        struct ifnet *ifp;
 
        /*
@@ -1171,14 +1246,14 @@ udp_check_pktinfo(struct mbuf *control, unsigned int *ifindex, struct in_addr *l
        for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
                if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len)
                        return (EINVAL);
-       
+
                if (cm->cmsg_level != IPPROTO_IP || cm->cmsg_type != IP_PKTINFO)
                        continue;
 
-               if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 
+               if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
                        return (EINVAL);
 
-               pktinfo =  (struct in_pktinfo *)CMSG_DATA(cm);
+               pktinfo =  (struct in_pktinfo *)(void *)CMSG_DATA(cm);
 
                /* Check for a valid ifindex in pktinfo */
                ifnet_head_lock_shared();
@@ -1199,7 +1274,8 @@ udp_check_pktinfo(struct mbuf *control, unsigned int *ifindex, struct in_addr *l
 
                        ifnet_head_done();
 
-                       *ifindex = pktinfo->ipi_ifindex;
+                       if (outif != NULL)
+                               *outif = ifp;
                        laddr->s_addr = INADDR_ANY;
                        break;
                }
@@ -1207,7 +1283,8 @@ udp_check_pktinfo(struct mbuf *control, unsigned int *ifindex, struct in_addr *l
                ifnet_head_done();
 
                /* Use the provided ipi_spec_dst address for temp source address */
-               *ifindex = 0;
+               if (outif != NULL)
+                       *outif = NULL;
                *laddr = pktinfo->ipi_spec_dst;
                break;
        }
@@ -1234,23 +1311,32 @@ udp_output(inp, m, addr, control, p)
        struct mbuf *inpopts;
        struct ip_moptions *mopts;
        struct route ro;
-       struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
-       mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
-       unsigned int origoutif;
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF };
+       struct ifnet *outif = NULL;
+       struct flowadv *adv = &ipoa.ipoa_flowadv;
+       mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
+       struct ifnet *origoutifp;
+       int flowadv = 0;
+
+       /* Enable flow advisory only when connected */
+       flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0;
 
        pi_laddr.s_addr = INADDR_ANY;
 
        KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
 
+       lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
        if (control != NULL) {
-               mtc = mbuf_traffic_class_from_control(control);
+               msc = mbuf_service_class_from_control(control);
 
-               error = udp_check_pktinfo(control, &ipoa.ipoa_boundif, &pi_laddr);
+               error = udp_check_pktinfo(control, &outif, &pi_laddr);
 
                m_freem(control);
                if (error)
                        goto release;
                pktinfo++;
+               if (outif != NULL)
+                       ipoa.ipoa_boundif = outif->if_index;
        }
 
        KERNEL_DEBUG(DBG_LAYER_OUT_BEG, inp->inp_fport, inp->inp_lport,
@@ -1262,16 +1348,26 @@ udp_output(inp, m, addr, control, p)
                goto release;
        }
 
-        lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-
+       if (flowadv && INP_WAIT_FOR_IF_FEEDBACK(inp)) {
+               /*
+                * The socket is flow-controlled, drop the packets
+                * until the inp is not flow controlled
+                */
+               error = ENOBUFS;
+               goto release;
+       }
        /*
         * If socket was bound to an ifindex, tell ip_output about it.
         * If the ancillary IP_PKTINFO option contains an interface index,
         * it takes precedence over the one specified by IP_BOUND_IF.
         */
-       if (ipoa.ipoa_boundif == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
-               ipoa.ipoa_boundif = inp->inp_boundif;
-       ipoa.ipoa_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+       if (ipoa.ipoa_boundif == IFSCOPE_NONE && 
+               (inp->inp_flags & INP_BOUND_IF)) {
+               outif = inp->inp_boundifp;
+               ipoa.ipoa_boundif = outif->if_index;
+       }
+       if (inp->inp_flags & INP_NO_IFT_CELLULAR)
+               ipoa.ipoa_flags |=  IPOAF_NO_CELLULAR;
        soopts |= IP_OUTARGS;
 
        /* If there was a routing change, discard cached route and check
@@ -1284,18 +1380,22 @@ udp_output(inp, m, addr, control, p)
 
                /* src address is gone? */
                if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
-                       if (((inp->inp_flags & INP_INADDR_ANY) == 0) || (so->so_state & SS_ISCONNECTED)) {
+                       if (((inp->inp_flags & INP_INADDR_ANY) == 0) || 
+                           (so->so_state & SS_ISCONNECTED)) {
                                /* Rdar://5448998
                                 * If the source address is gone, return an error if:
                                 * - the source was specified
                                 * - the socket was already connected
                                 */
+                               soevent(so,
+                                   (SO_FILT_HINT_LOCKED | 
+                                   SO_FILT_HINT_NOSRCADDR));
                                error = EADDRNOTAVAIL;
                                goto release;
                        } else {
                                /* new src will be set later */
                                inp->inp_laddr.s_addr = INADDR_ANY;
-                               inp->inp_last_outif = 0;
+                               inp->inp_last_outifp = NULL;
                        }
                }
                if (ia != NULL)
@@ -1305,7 +1405,7 @@ udp_output(inp, m, addr, control, p)
                inp->inp_route.ro_rt = NULL;
        }
 
-       origoutif = inp->inp_last_outif;
+       origoutifp = inp->inp_last_outifp;
 
        /* IP_PKTINFO option check.
         * If a temporary scope or src address is provided, use it for this packet only
@@ -1322,13 +1422,13 @@ udp_output(inp, m, addr, control, p)
                origladdr = laddr = inp->inp_laddr;
        }
 
-       origoutif = inp->inp_last_outif;
+       origoutifp = inp->inp_last_outifp;
        faddr = inp->inp_faddr;
        lport = inp->inp_lport;
        fport = inp->inp_fport;
 
        if (addr) {
-               sin = (struct sockaddr_in *)addr;
+               sin = (struct sockaddr_in *)(void *)addr;
                if (faddr.s_addr != INADDR_ANY) {
                        error = EISCONN;
                        goto release;
@@ -1342,7 +1442,7 @@ udp_output(inp, m, addr, control, p)
 
                        if (pi_laddr.s_addr != INADDR_ANY)      /* if we have a source address specified, use that */
                                inp->inp_laddr = pi_laddr;
-                       error = in_pcbconnect(inp, addr, p, &ipoa.ipoa_boundif); /* if a scope is specified, use it */
+                       error = in_pcbconnect(inp, addr, p, &outif); /* if a scope is specified, use it */
                        if (error) {
                                goto release;
                        }
@@ -1351,6 +1451,8 @@ udp_output(inp, m, addr, control, p)
                        faddr = inp->inp_faddr;
                        fport = inp->inp_fport;
                        udp_dodisconnect = 1;
+                       ipoa.ipoa_boundif = (outif != NULL) ?
+                           outif->if_index : IFSCOPE_NONE;
                }
                else {
                        /* Fast path case
@@ -1361,10 +1463,12 @@ udp_output(inp, m, addr, control, p)
                         * priority is always given to the scope provided by INP_BOUND_IF.
                         */
                        if (laddr.s_addr == INADDR_ANY) {
-                          if ((error = in_pcbladdr(inp, addr, &ifaddr, &ipoa.ipoa_boundif)) != 0)
-                                  goto release;
-                          laddr = ifaddr.sin_addr;
-                          inp->inp_flags |= INP_INADDR_ANY; /* from pcbconnect: remember we don't care about src addr.*/
+                               if ((error = in_pcbladdr(inp, addr, &ifaddr, &outif)) != 0)
+                                       goto release;
+                               laddr = ifaddr.sin_addr;
+                               inp->inp_flags |= INP_INADDR_ANY; /* from pcbconnect: remember we don't care about src addr.*/
+                               ipoa.ipoa_boundif = (outif != NULL) ?
+                                   outif->if_index : IFSCOPE_NONE;
                        }
 
                        faddr = sin->sin_addr;
@@ -1380,6 +1484,8 @@ udp_output(inp, m, addr, control, p)
 #if CONFIG_MACF_NET
        mac_mbuf_label_associate_inpcb(inp, m);
 #endif
+       if (inp->inp_flowhash == 0)
+               inp->inp_flowhash = inp_calc_flowhash(inp);
 
        /*
         * Calculate data length and get a mbuf
@@ -1429,18 +1535,37 @@ udp_output(inp, m, addr, control, p)
                goto abort;
        }
 #endif /*IPSEC*/
-       m->m_pkthdr.socket_id = get_socket_id(inp->inp_socket);
 
        inpopts = inp->inp_options;
        soopts |= (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
        mopts = inp->inp_moptions;
-       if (mopts != NULL)
-               IMO_ADDREF(mopts);
+       if (mopts != NULL) {
+               IMO_LOCK(mopts);
+               IMO_ADDREF_LOCKED(mopts);
+               if (IN_MULTICAST(ntohl(ui->ui_dst.s_addr)) &&
+                               mopts->imo_multicast_ifp != NULL) {
+                       inp->inp_last_outifp = mopts->imo_multicast_ifp;
+               }
+               IMO_UNLOCK(mopts);
+       }
 
        /* Copy the cached route and take an extra reference */
        inp_route_copyout(inp, &ro);
 
-       set_packet_tclass(m, so, mtc, 0);
+       set_packet_service_class(m, so, msc, 0);
+       m->m_pkthdr.socket_id = get_socket_id(inp->inp_socket);
+       m->m_pkthdr.m_flowhash = inp->inp_flowhash;
+       m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH;
+       if (flowadv)
+               m->m_pkthdr.m_fhflags |= PF_TAG_FLOWADV;
+
+       if (ipoa.ipoa_boundif != IFSCOPE_NONE)
+               ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+
+       if (laddr.s_addr != INADDR_ANY)
+               ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
+
+       inp->inp_sndinprog_cnt++;
 
        socket_unlock(so, 0);
        error = ip_output_list(m, 0, inpopts, &ro, soopts, mopts, &ipoa);
@@ -1453,6 +1578,20 @@ udp_output(inp, m, addr, control, p)
                locked_add_64(&inp->inp_stat->txpackets, 1);
                locked_add_64(&inp->inp_stat->txbytes, len);
        }
+
+       if (flowadv && (adv->code == FADV_FLOW_CONTROLLED ||
+           adv->code == FADV_SUSPENDED)) {
+               /* return a hint to the application that 
+                * the packet has been dropped
+                */
+               error = ENOBUFS;
+               inp_set_fc_state(inp, adv->code);
+       }
+
+       VERIFY(inp->inp_sndinprog_cnt > 0);
+       if ( --inp->inp_sndinprog_cnt == 0)
+               inp->inp_flags &= ~(INP_FC_FEEDBACK);
+
        /* Synchronize PCB cached route */
        inp_route_copyin(inp, &ro);
 
@@ -1465,10 +1604,10 @@ abort:
                }
                in_pcbdisconnect(inp);
                inp->inp_laddr = origladdr;     /* XXX rehash? */
-               inp->inp_last_outif = origoutif;
+               inp->inp_last_outifp = origoutifp;
        } else if (inp->inp_route.ro_rt != NULL) {
                struct rtentry *rt = inp->inp_route.ro_rt;
-               unsigned int outif;
+               struct ifnet *outifp;
 
                if (rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST))
                        rt = NULL;      /* unusable */
@@ -1480,12 +1619,11 @@ abort:
                        inp->inp_route.ro_rt = NULL;
                }
                /*
-                * If the destination route is unicast, update outif with
-                * that of the route interface index used by IP.
+                * If the destination route is unicast, update outifp with
+                * that of the route interface used by IP.
                 */
-               if (rt != NULL &&
-                   (outif = rt->rt_ifp->if_index) != inp->inp_last_outif)
-                       inp->inp_last_outif = outif;
+               if (rt != NULL && (outifp = rt->rt_ifp) != inp->inp_last_outifp)
+                       inp->inp_last_outifp = outifp;
        }
 
 release:
@@ -1496,8 +1634,8 @@ release:
 }
 
 u_int32_t      udp_sendspace = 9216;           /* really max datagram size */
-/* 40 1K datagrams */
-u_int32_t      udp_recvspace = 40 * (1024 +
+/* 187 1K datagrams (approx 192 KB) */
+u_int32_t      udp_recvspace = 187 * (1024 +
 #if INET6
                                      sizeof(struct sockaddr_in6)
 #else
@@ -1573,7 +1711,8 @@ udp_attach(struct socket *so, __unused int proto, struct proc *p)
        inp = (struct inpcb *)so->so_pcb;
        inp->inp_vflag |= INP_IPV4;
        inp->inp_ip_ttl = ip_defttl;
-       nstat_udp_new_pcb(inp);
+       if (nstat_collect)
+               nstat_udp_new_pcb(inp);
        return 0;
 }
 
@@ -1606,8 +1745,11 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
        if (inp->inp_faddr.s_addr != INADDR_ANY)
                return EISCONN;
        error = in_pcbconnect(inp, nam, p, NULL);
-       if (error == 0) 
+       if (error == 0) {
                soisconnected(so);
+               if (inp->inp_flowhash == 0)
+                       inp->inp_flowhash = inp_calc_flowhash(inp);
+       }
        return error;
 }
 
@@ -1636,9 +1778,13 @@ udp_disconnect(struct socket *so)
                return ENOTCONN;
 
        in_pcbdisconnect(inp);
+       
+       /* reset flow controlled state, just in case */
+       inp_reset_fc_state(inp);
+
        inp->inp_laddr.s_addr = INADDR_ANY;
        so->so_state &= ~SS_ISCONNECTED;                /* XXX */
-       inp->inp_last_outif = 0;
+       inp->inp_last_outifp = NULL;
        return 0;
 }
 
index 3a75d1faf263dbbf834e33870becb9532ebc67c5..776109a592aa19e3715c91fb0713912dc464b16a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -128,6 +128,17 @@ struct     udpstat {
 
 SYSCTL_DECL(_net_inet_udp);
 
+struct udpstat_local {
+       u_int64_t       port_unreach;
+       u_int64_t       faithprefix;    /* deprecated */
+       u_int64_t       port0;
+       u_int64_t       badlength;
+       u_int64_t       badchksum;
+       u_int64_t       badmcast;
+       u_int64_t       cleanup;
+       u_int64_t       badipsec;
+}; 
+
 extern struct  pr_usrreqs udp_usrreqs;
 extern struct  inpcbhead udb;
 extern struct  inpcbinfo udbinfo;
@@ -152,6 +163,8 @@ lck_mtx_t * udp_getlock (struct socket *, int);
 #else
 void * udp_getlock (struct socket *, int);
 #endif
+void udp_get_ports_used(unsigned int, uint8_t *);
+uint32_t udp_count_opportunistic(unsigned int, u_int32_t);
 
 #endif /* KERNEL_PRIVATE */
 #endif /* _NETINET_UDP_VAR_H_ */
index f765bace425d06ef53929b66b0e6bc191370fb2e..141f5086092f5a41819db512eafd4dd24b6c8d65 100644 (file)
@@ -18,14 +18,14 @@ EXPINC_SUBDIRS_I386 = \
 DATAFILES = \
        ah.h ipsec.h pim6.h  \
        esp.h in6.h ipcomp.h raw_ip6.h \
-       in6_var.h ip6_mroute.h nd6.h ip6_fw.h
+       in6_var.h ip6_mroute.h nd6.h
 
 PRIVATE_DATAFILES = \
-       in6_pcb.h ip6_var.h pim6_var.h mld6_var.h
+       in6_pcb.h ip6_var.h pim6_var.h mld6_var.h ip6_fw.h
 
 PRIVATE_KERNELFILES = \
        ah6.h esp6.h esp_rijndael.h in6_gif.h in6_ifattach.h \
-        in6_prefix.h ip6_ecn.h ip6_fw.h  \
+        in6_prefix.h ip6_ecn.h \
        ip6protosw.h ipcomp6.h ipsec6.h \
        raw_ip6.h scope6_var.h tcp6_var.h udp6_var.h
 
index 27098a76f7380a113858569d0068db7dd1df38e4..a471825e90d89b83cc50003d15a2502b1c628e9d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netkey/keydb.h>
 #include <libkern/crypto/md5.h>
 #include <libkern/crypto/sha1.h>
-#include <crypto/sha2/sha2.h>
+#include <libkern/crypto/sha2.h>
 
 #include <net/net_osdep.h>
 
@@ -300,7 +300,7 @@ ah_keyed_md5_init(state, sav)
 {
        size_t padlen;
        size_t keybitlen;
-       u_int8_t buf[32];
+       u_int8_t buf[32] __attribute__((aligned(4)));
 
        if (!state)
                panic("ah_keyed_md5_init: what?");
@@ -369,7 +369,7 @@ ah_keyed_md5_result(state, addr, l)
        caddr_t addr;
        size_t l;
 {
-       u_char digest[16];
+       u_char digest[16] __attribute__((aligned(4)));
 
        if (!state)
                panic("ah_keyed_md5_result: what?");
@@ -420,7 +420,7 @@ ah_keyed_sha1_init(state, sav)
        SHA1_CTX *ctxt;
        size_t padlen;
        size_t keybitlen;
-       u_int8_t buf[32];
+       u_int8_t buf[32] __attribute__((aligned(4)));
 
        if (!state)
                panic("ah_keyed_sha1_init: what?");
@@ -491,7 +491,7 @@ ah_keyed_sha1_result(state, addr, l)
        caddr_t addr;
        size_t l;
 {
-       u_char digest[SHA1_RESULTLEN];  /* SHA-1 generates 160 bits */
+       u_char digest[SHA1_RESULTLEN] __attribute__((aligned(4)));      /* SHA-1 generates 160 bits */
        SHA1_CTX *ctxt;
 
        if (!state || !state->foo)
@@ -543,7 +543,7 @@ ah_hmac_md5_init(state, sav)
 {
        u_char *ipad;
        u_char *opad;
-       u_char tk[16];
+       u_char tk[16] __attribute__((aligned(4)));
        u_char *key;
        size_t keylen;
        size_t i;
@@ -559,7 +559,7 @@ ah_hmac_md5_init(state, sav)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 64);
-       ctxt = (MD5_CTX *)(opad + 64);
+       ctxt = (MD5_CTX *)(void *)(opad + 64);
 
        /* compress the key if necessery */
        if (64 < _KEYLEN(state->sav->key_auth)) {
@@ -599,7 +599,7 @@ ah_hmac_md5_loop(state, addr, len)
 
        if (!state || !state->foo)
                panic("ah_hmac_md5_loop: what?");
-       ctxt = (MD5_CTX *)(((caddr_t)state->foo) + 128);
+       ctxt = (MD5_CTX *)(void *)(((caddr_t)state->foo) + 128);
        MD5Update(ctxt, addr, len);
 }
 
@@ -609,7 +609,7 @@ ah_hmac_md5_result(state, addr, l)
        caddr_t addr;
        size_t l;
 {
-       u_char digest[16];
+       u_char digest[16] __attribute__((aligned(4)));
        u_char *ipad;
        u_char *opad;
        MD5_CTX *ctxt;
@@ -619,7 +619,7 @@ ah_hmac_md5_result(state, addr, l)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 64);
-       ctxt = (MD5_CTX *)(opad + 64);
+       ctxt = (MD5_CTX *)(void *)(opad + 64);
 
        MD5Final(&digest[0], ctxt);
 
@@ -669,7 +669,7 @@ ah_hmac_sha1_init(state, sav)
        u_char *ipad;
        u_char *opad;
        SHA1_CTX *ctxt;
-       u_char tk[SHA1_RESULTLEN];      /* SHA-1 generates 160 bits */
+       u_char tk[SHA1_RESULTLEN] __attribute__((aligned(4)));  /* SHA-1 generates 160 bits */
        u_char *key;
        size_t keylen;
        size_t i;
@@ -685,7 +685,7 @@ ah_hmac_sha1_init(state, sav)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 64);
-       ctxt = (SHA1_CTX *)(opad + 64);
+       ctxt = (SHA1_CTX *)(void *)(opad + 64);
 
        /* compress the key if necessery */
        if (64 < _KEYLEN(state->sav->key_auth)) {
@@ -726,7 +726,7 @@ ah_hmac_sha1_loop(state, addr, len)
        if (!state || !state->foo)
                panic("ah_hmac_sha1_loop: what?");
 
-       ctxt = (SHA1_CTX *)(((u_char *)state->foo) + 128);
+       ctxt = (SHA1_CTX *)(void *)(((u_char *)state->foo) + 128);
        SHA1Update(ctxt, (caddr_t)addr, (size_t)len);
 }
 
@@ -736,7 +736,7 @@ ah_hmac_sha1_result(state, addr, l)
        caddr_t addr;
        size_t l;
 {
-       u_char digest[SHA1_RESULTLEN];  /* SHA-1 generates 160 bits */
+       u_char digest[SHA1_RESULTLEN] __attribute__((aligned(4)));      /* SHA-1 generates 160 bits */
        u_char *ipad;
        u_char *opad;
        SHA1_CTX *ctxt;
@@ -746,7 +746,7 @@ ah_hmac_sha1_result(state, addr, l)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 64);
-       ctxt = (SHA1_CTX *)(opad + 64);
+       ctxt = (SHA1_CTX *)(void *)(opad + 64);
 
        SHA1Final((caddr_t)&digest[0], ctxt);
 
@@ -809,7 +809,7 @@ ah_hmac_sha2_256_init(state, sav)
        u_char *ipad;
        u_char *opad;
        SHA256_CTX *ctxt;
-       u_char tk[SHA256_DIGEST_LENGTH];
+       u_char tk[SHA256_DIGEST_LENGTH] __attribute__((aligned(4)));
        u_char *key;
        size_t keylen;
        size_t i;
@@ -825,7 +825,7 @@ ah_hmac_sha2_256_init(state, sav)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 64);
-       ctxt = (SHA256_CTX *)(opad + 64);
+       ctxt = (SHA256_CTX *)(void *)(opad + 64);
 
        /* compress the key if necessery */
        if (64 < _KEYLEN(state->sav->key_auth)) {
@@ -869,7 +869,7 @@ ah_hmac_sha2_256_loop(state, addr, len)
        if (!state || !state->foo)
                panic("ah_hmac_sha2_256_loop: what?");
 
-       ctxt = (SHA256_CTX *)(((u_char *)state->foo) + 128);
+       ctxt = (SHA256_CTX *)(void *)(((u_char *)state->foo) + 128);
        SHA256_Update(ctxt, (const u_int8_t *)addr, (size_t)len);
 }
 
@@ -879,7 +879,7 @@ ah_hmac_sha2_256_result(state, addr, l)
        caddr_t addr;
        size_t l;
 {
-       u_char digest[SHA256_DIGEST_LENGTH];
+       u_char digest[SHA256_DIGEST_LENGTH] __attribute__((aligned(4)));
        u_char *ipad;
        u_char *opad;
        SHA256_CTX *ctxt;
@@ -889,7 +889,7 @@ ah_hmac_sha2_256_result(state, addr, l)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 64);
-       ctxt = (SHA256_CTX *)(opad + 64);
+       ctxt = (SHA256_CTX *)(void *)(opad + 64);
 
        SHA256_Final((u_int8_t *)digest, ctxt);
 
@@ -951,7 +951,7 @@ ah_hmac_sha2_384_init(state, sav)
        u_char *ipad;
        u_char *opad;
        SHA384_CTX *ctxt;
-       u_char tk[SHA384_DIGEST_LENGTH];
+       u_char tk[SHA384_DIGEST_LENGTH] __attribute__((aligned(4)));
        u_char *key;
        size_t keylen;
        size_t i;
@@ -968,7 +968,7 @@ ah_hmac_sha2_384_init(state, sav)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 128);
-       ctxt = (SHA384_CTX *)(opad + 128);
+       ctxt = (SHA384_CTX *)(void *)(opad + 128);
 
        /* compress the key if necessery */
        if (128 < _KEYLEN(state->sav->key_auth)) {
@@ -1012,7 +1012,7 @@ ah_hmac_sha2_384_loop(state, addr, len)
        if (!state || !state->foo)
                panic("ah_hmac_sha2_384_loop: what?");
 
-       ctxt = (SHA384_CTX *)(((u_char *)state->foo) + 256);
+       ctxt = (SHA384_CTX *)(void *)(((u_char *)state->foo) + 256);
        SHA384_Update(ctxt, (const u_int8_t *)addr, (size_t)len);
 }
 
@@ -1032,7 +1032,7 @@ ah_hmac_sha2_384_result(state, addr, l)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 128);
-       ctxt = (SHA384_CTX *)(opad + 128);
+       ctxt = (SHA384_CTX *)(void *)(opad + 128);
 
        SHA384_Final((u_int8_t *)digest, ctxt);
 
@@ -1094,7 +1094,7 @@ ah_hmac_sha2_512_init(state, sav)
        u_char *ipad;
        u_char *opad;
        SHA512_CTX *ctxt;
-       u_char tk[SHA512_DIGEST_LENGTH];
+       u_char tk[SHA512_DIGEST_LENGTH] __attribute__((aligned(4)));
        u_char *key;
        size_t keylen;
        size_t i;
@@ -1111,7 +1111,7 @@ ah_hmac_sha2_512_init(state, sav)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 128);
-       ctxt = (SHA512_CTX *)(opad + 128);
+       ctxt = (SHA512_CTX *)(void *)(opad + 128);
 
        /* compress the key if necessery */
        if (128 < _KEYLEN(state->sav->key_auth)) {
@@ -1155,7 +1155,7 @@ ah_hmac_sha2_512_loop(state, addr, len)
        if (!state || !state->foo)
                panic("ah_hmac_sha2_512_loop: what?");
 
-       ctxt = (SHA512_CTX *)(((u_char *)state->foo) + 256);
+       ctxt = (SHA512_CTX *)(void *)(((u_char *)state->foo) + 256);
        SHA512_Update(ctxt, (const u_int8_t *) addr, (size_t)len);
 }
 
@@ -1165,7 +1165,7 @@ ah_hmac_sha2_512_result(state, addr, l)
        caddr_t addr;
        size_t l;
 {
-       u_char digest[SHA512_DIGEST_LENGTH];
+       u_char digest[SHA512_DIGEST_LENGTH] __attribute__((aligned(4)));
        u_char *ipad;
        u_char *opad;
        SHA512_CTX *ctxt;
@@ -1175,7 +1175,7 @@ ah_hmac_sha2_512_result(state, addr, l)
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 128);
-       ctxt = (SHA512_CTX *)(opad + 128);
+       ctxt = (SHA512_CTX *)(void *)(opad + 128);
 
        SHA512_Final((u_int8_t *)digest, ctxt);
 
@@ -1257,7 +1257,7 @@ ah4_calccksum(m, ahdat, len, algo, sav)
        int hdrtype;
        size_t advancewidth;
        struct ah_algorithm_state algos;
-       u_char sumbuf[AH_MAXSUMSIZE];
+       u_char sumbuf[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        int error = 0;
        int ahseen;
        struct mbuf *n = NULL;
@@ -1503,7 +1503,7 @@ ah6_calccksum(m, ahdat, len, algo, sav)
        int error;
        int ahseen;
        struct ah_algorithm_state algos;
-       u_char sumbuf[AH_MAXSUMSIZE];
+       u_char sumbuf[AH_MAXSUMSIZE] __attribute__((aligned(4)));
 
        if ((m->m_flags & M_PKTHDR) == 0)
                return EINVAL;
index a448295b7bb8afa0cf185014589d1b3aa9aa1dbf..05d575b5ae23fdbd93186a1cbae67fb82dfea90c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -66,6 +66,7 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
@@ -152,9 +153,15 @@ ah4_input(struct mbuf *m, int off)
                }
        }
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip = mtod(m, struct ip *);
-       ah = (struct ah *)(((caddr_t)ip) + off);
+       ah = (struct ah *)(void *)(((caddr_t)ip) + off);
 #else
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip = mtod(m, struct ip *);
        IP6_EXTHDR_GET(ah, struct ah *, m, off, sizeof(struct newah));
        if (ah == NULL) {
@@ -260,9 +267,11 @@ ah4_input(struct mbuf *m, int off)
                        IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
                        goto fail;
                }
+               /* Expect 32-bit aligned data ptr on strict-align platforms */
+               MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
                ip = mtod(m, struct ip *);
-               ah = (struct ah *)(((caddr_t)ip) + off);
+               ah = (struct ah *)(void *)(((caddr_t)ip) + off);
        }
 #else
        IP6_EXTHDR_GET(ah, struct ah *, m, off,
@@ -628,7 +637,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
 
 #ifndef PULLDOWN_TEST
        IP6_EXTHDR_CHECK(m, off, sizeof(struct ah), {return IPPROTO_DONE;});
-       ah = (struct ah *)(mtod(m, caddr_t) + off);
+       ah = (struct ah *)(void *)(mtod(m, caddr_t) + off);
 #else
        IP6_EXTHDR_GET(ah, struct ah *, m, off, sizeof(struct newah));
        if (ah == NULL) {
@@ -637,6 +646,9 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
                return IPPROTO_DONE;
        }
 #endif
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip6 = mtod(m, struct ip6_hdr *);
        nxt = ah->ah_nxt;
 
@@ -1059,7 +1071,7 @@ ah6_ctlinput(cmd, sa, d)
                        m_copydata(m, off, sizeof(ah), (caddr_t)&ah);
                        ahp = &ah;
                } else
-                       ahp = (struct newah *)(mtod(m, caddr_t) + off);
+                       ahp = (struct newah *)(void *)(mtod(m, caddr_t) + off);
 
                if (cmd == PRC_MSGSIZE) {
                        int valid = 0;
@@ -1069,7 +1081,7 @@ ah6_ctlinput(cmd, sa, d)
                         * the address in the ICMP message payload.
                         */
                        sa6_src = ip6cp->ip6c_src;
-                       sa6_dst = (struct sockaddr_in6 *)sa;
+                       sa6_dst = (struct sockaddr_in6 *)(void *)sa;
                        sav = key_allocsa(AF_INET6,
                                          (caddr_t)&sa6_src->sin6_addr,
                                          (caddr_t)&sa6_dst->sin6_addr,
index 918196d8e32dec7652d4892e98531e250e60711e..18391b1873d1de6d9622c374f9f67eb9b0b953ad 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -277,7 +277,7 @@ ah4_output(m, sav)
        if (sav->flags & SADB_X_EXT_OLD) {
                struct ah *ahdr;
 
-               ahdr = (struct ah *)ahdrpos;
+               ahdr = (struct ah *)(void *)ahdrpos;
                ahsumpos = (u_char *)(ahdr + 1);
                ahdr->ah_len = plen >> 2;
                ahdr->ah_nxt = ip->ip_p;
@@ -287,7 +287,7 @@ ah4_output(m, sav)
        } else {
                struct newah *ahdr;
 
-               ahdr = (struct newah *)ahdrpos;
+               ahdr = (struct newah *)(void *)ahdrpos;
                ahsumpos = (u_char *)(ahdr + 1);
                ahdr->ah_len = (plen >> 2) + 1; /* plus one for seq# */
                ahdr->ah_nxt = ip->ip_p;
@@ -617,7 +617,7 @@ ah4_finaldst(m)
                                return NULL;
                        }
                        i += q[i + IPOPT_OLEN] - sizeof(struct in_addr);
-                       return (struct in_addr *)(q + i);
+                       return (struct in_addr *)(void *)(q + i);
                default:
                        if (q[i + IPOPT_OLEN] < 2 ||
                            optlen - i < q[i + IPOPT_OLEN]) {
index 905de9ba235eec69d6824b419c43be439e965e19..8a1d06c0a077dd1fdc18fa541b232e99499fc169 100644 (file)
 #include <net/pfkeyv2.h>
 #include <netkey/keydb.h>
 #include <netkey/key.h>
-#include <crypto/des/des.h>
-#include <crypto/blowfish/blowfish.h>
-#include <crypto/cast128/cast128.h>
+#include <libkern/crypto/des.h>
 
 #include <net/net_osdep.h>
 
 #define DBG_LAYER_BEG          NETDBG_CODE(DBG_NETIPSEC, 1)
 #define DBG_LAYER_END          NETDBG_CODE(DBG_NETIPSEC, 3)
 #define DBG_FNC_ESPAUTH                NETDBG_CODE(DBG_NETIPSEC, (8 << 8))
+#define MAX_SBUF_LEN            2000
 
 extern lck_mtx_t *sadb_mutex;
 
@@ -130,22 +129,6 @@ static int esp_des_blockdecrypt(const struct esp_algorithm *,
 static int esp_des_blockencrypt(const struct esp_algorithm *,
        struct secasvar *, u_int8_t *, u_int8_t *);
 static int esp_cbc_mature(struct secasvar *);
-#if ALLCRYPTO
-static int esp_blowfish_schedule(const struct esp_algorithm *,
-       struct secasvar *);
-static int esp_blowfish_schedlen(const struct esp_algorithm *);
-static int esp_blowfish_blockdecrypt(const struct esp_algorithm *,
-       struct secasvar *, u_int8_t *, u_int8_t *);
-static int esp_blowfish_blockencrypt(const struct esp_algorithm *,
-       struct secasvar *, u_int8_t *, u_int8_t *);
-static int esp_cast128_schedule(const struct esp_algorithm *,
-       struct secasvar *);
-static int esp_cast128_schedlen(const struct esp_algorithm *);
-static int esp_cast128_blockdecrypt(const struct esp_algorithm *,
-       struct secasvar *, u_int8_t *, u_int8_t *);
-static int esp_cast128_blockencrypt(const struct esp_algorithm *,
-       struct secasvar *, u_int8_t *, u_int8_t *);
-#endif /* ALLCRYPTO */
 static int esp_3des_schedule(const struct esp_algorithm *,
        struct secasvar *);
 static int esp_3des_schedlen(const struct esp_algorithm *);
@@ -178,19 +161,6 @@ static const struct esp_algorithm null_esp =
        { 1, 0, esp_null_mature, 0, 2048, 0, "null",
                esp_common_ivlen, esp_null_decrypt,
                esp_null_encrypt, NULL, NULL, NULL };
-#if ALLCRYPTO
-static const struct esp_algorithm blowfish_cbc =
-       { 8, 8, esp_cbc_mature, 40, 448, esp_blowfish_schedlen, "blowfish-cbc",
-               esp_common_ivlen, esp_cbc_decrypt,
-               esp_cbc_encrypt, esp_blowfish_schedule,
-               esp_blowfish_blockdecrypt, esp_blowfish_blockencrypt, };
-static const struct esp_algorithm cast128_cbc =
-       { 8, 8, esp_cbc_mature, 40, 128, esp_cast128_schedlen,
-               "cast128-cbc",
-               esp_common_ivlen, esp_cbc_decrypt,
-               esp_cbc_encrypt, esp_cast128_schedule,
-               esp_cast128_blockdecrypt, esp_cast128_blockencrypt, };
-#endif /* ALLCRYPTO */
 static const struct esp_algorithm aes_cbc =
        { 16, 16, esp_cbc_mature, 128, 256, esp_aes_schedlen,
                "aes-cbc",
@@ -202,10 +172,6 @@ static const struct esp_algorithm *esp_algorithms[] = {
        &des_cbc,
        &des3_cbc,
        &null_esp,
-#if ALLCRYPTO
-       &blowfish_cbc,
-       &cast128_cbc,
-#endif /* ALLCRYPTO */
        &aes_cbc
 };
 
@@ -213,7 +179,6 @@ const struct esp_algorithm *
 esp_algorithm_lookup(idx)
        int idx;
 {
-
        switch (idx) {
        case SADB_EALG_DESCBC:
                return &des_cbc;
@@ -221,12 +186,6 @@ esp_algorithm_lookup(idx)
                return &des3_cbc;
        case SADB_EALG_NULL:
                return &null_esp;
-#if ALLCRYPTO
-       case SADB_X_EALG_BLOWFISHCBC:
-               return &blowfish_cbc;
-       case SADB_X_EALG_CAST128CBC:
-               return &cast128_cbc;
-#endif /* ALLCRYPTO */
        case SADB_X_EALG_RIJNDAELCBC:
                return &aes_cbc;
        default:
@@ -401,8 +360,7 @@ static int
 esp_des_schedlen(
        __unused const struct esp_algorithm *algo)
 {
-
-       return sizeof(des_key_schedule);
+       return sizeof(des_ecb_key_schedule);
 }
 
 static int
@@ -412,8 +370,8 @@ esp_des_schedule(
 {
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
-       if (des_key_sched((des_cblock *)_KEYBUF(sav->key_enc),
-           *(des_key_schedule *)sav->sched))
+       if (des_ecb_key_sched((des_cblock *)_KEYBUF(sav->key_enc),
+           (des_ecb_key_schedule *)sav->sched))
                return EINVAL;
        else
                return 0;
@@ -426,11 +384,10 @@ esp_des_blockdecrypt(
        u_int8_t *s,
        u_int8_t *d)
 {
-
        /* assumption: d has a good alignment */
        bcopy(s, d, sizeof(DES_LONG) * 2);
        des_ecb_encrypt((des_cblock *)d, (des_cblock *)d,
-           *(des_key_schedule *)sav->sched, DES_DECRYPT);
+           (des_ecb_key_schedule *)sav->sched, DES_DECRYPT);
        return 0;
 }
 
@@ -441,11 +398,10 @@ esp_des_blockencrypt(
        u_int8_t *s,
        u_int8_t *d)
 {
-
        /* assumption: d has a good alignment */
        bcopy(s, d, sizeof(DES_LONG) * 2);
        des_ecb_encrypt((des_cblock *)d, (des_cblock *)d,
-           *(des_key_schedule *)sav->sched, DES_ENCRYPT);
+           (des_ecb_key_schedule *)sav->sched, DES_ENCRYPT);
        return 0;
 }
 
@@ -498,9 +454,6 @@ esp_cbc_mature(sav)
                        return 1;
                }
                break;
-       case SADB_X_EALG_BLOWFISHCBC:
-       case SADB_X_EALG_CAST128CBC:
-               break;
        case SADB_X_EALG_RIJNDAELCBC:
                /* allows specific key sizes only */
                if (!(keylen == 128 || keylen == 192 || keylen == 256)) {
@@ -515,123 +468,12 @@ esp_cbc_mature(sav)
        return 0;
 }
 
-#if ALLCRYPTO
-static int
-esp_blowfish_schedlen(
-       __unused const struct esp_algorithm *algo)
-{
-
-       return sizeof(BF_KEY);
-}
-
-static int
-esp_blowfish_schedule(
-       __unused const struct esp_algorithm *algo,
-       struct secasvar *sav)
-{
-
-       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
-       BF_set_key((BF_KEY *)sav->sched, _KEYLEN(sav->key_enc),
-           (u_int8_t *) _KEYBUF(sav->key_enc));
-       return 0;
-}
-
-static int
-esp_blowfish_blockdecrypt(
-       __unused const struct esp_algorithm *algo,
-       struct secasvar *sav,
-       u_int8_t *s,
-       u_int8_t *d)
-{
-       /* HOLY COW!  BF_decrypt() takes values in host byteorder */
-       BF_LONG t[2];
-
-       bcopy(s, t, sizeof(t));
-       t[0] = ntohl(t[0]);
-       t[1] = ntohl(t[1]);
-       BF_decrypt(t, (BF_KEY *)sav->sched);
-       t[0] = htonl(t[0]);
-       t[1] = htonl(t[1]);
-       bcopy(t, d, sizeof(t));
-       return 0;
-}
-
-static int
-esp_blowfish_blockencrypt(
-       __unused const struct esp_algorithm *algo,
-       struct secasvar *sav,
-       u_int8_t *s,
-       u_int8_t *d)
-{
-       /* HOLY COW!  BF_encrypt() takes values in host byteorder */
-       BF_LONG t[2];
-
-       bcopy(s, t, sizeof(t));
-       t[0] = ntohl(t[0]);
-       t[1] = ntohl(t[1]);
-       BF_encrypt(t, (BF_KEY *)sav->sched);
-       t[0] = htonl(t[0]);
-       t[1] = htonl(t[1]);
-       bcopy(t, d, sizeof(t));
-       return 0;
-}
-
-static int
-esp_cast128_schedlen(
-       __unused const struct esp_algorithm *algo)
-{
-
-       return sizeof(u_int32_t) * 32;
-}
-
-static int
-esp_cast128_schedule(
-       __unused const struct esp_algorithm *algo,
-       struct secasvar *sav)
-{
-       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
-       set_cast128_subkey((u_int32_t *)sav->sched, (u_int8_t *) _KEYBUF(sav->key_enc),
-               _KEYLEN(sav->key_enc));
-       return 0;
-}
-
-static int
-esp_cast128_blockdecrypt(
-       __unused const struct esp_algorithm *algo,
-       struct secasvar *sav,
-       u_int8_t *s,
-       u_int8_t *d)
-{
-
-       if (_KEYLEN(sav->key_enc) <= 80 / 8)
-               cast128_decrypt_round12(d, s, (u_int32_t *)sav->sched);
-       else
-               cast128_decrypt_round16(d, s, (u_int32_t *)sav->sched);
-       return 0;
-}
-
-static int
-esp_cast128_blockencrypt(
-       __unused const struct esp_algorithm *algo,
-       struct secasvar *sav,
-       u_int8_t *s,
-       u_int8_t *d)
-{
-
-       if (_KEYLEN(sav->key_enc) <= 80 / 8)
-               cast128_encrypt_round12(d, s, (u_int32_t *)sav->sched);
-       else
-               cast128_encrypt_round16(d, s, (u_int32_t *)sav->sched);
-       return 0;
-}
-#endif /* ALLCRYPTO */
-
 static int
 esp_3des_schedlen(
        __unused const struct esp_algorithm *algo)
 {
 
-       return sizeof(des_key_schedule) * 3;
+       return sizeof(des3_ecb_key_schedule);
 }
 
 static int
@@ -639,20 +481,13 @@ esp_3des_schedule(
        __unused const struct esp_algorithm *algo,
        struct secasvar *sav)
 {
-       int error;
-       des_key_schedule *p;
-       int i;
-       char *k;
-
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
-       p = (des_key_schedule *)sav->sched;
-       k = _KEYBUF(sav->key_enc);
-       for (i = 0; i < 3; i++) {
-               error = des_key_sched((des_cblock *)(k + 8 * i), p[i]);
-               if (error)
-                       return EINVAL;
-       }
-       return 0;
+
+       if (des3_ecb_key_sched((des_cblock *)_KEYBUF(sav->key_enc),
+           (des3_ecb_key_schedule *)sav->sched))
+               return EINVAL;
+       else
+               return 0;
 }
 
 static int
@@ -662,13 +497,10 @@ esp_3des_blockdecrypt(
        u_int8_t *s,
        u_int8_t *d)
 {
-       des_key_schedule *p;
-
        /* assumption: d has a good alignment */
-       p = (des_key_schedule *)sav->sched;
        bcopy(s, d, sizeof(DES_LONG) * 2);
-       des_ecb3_encrypt((des_cblock *)d, (des_cblock *)d, 
-                        p[0], p[1], p[2], DES_DECRYPT);
+       des3_ecb_encrypt((des_cblock *)d, (des_cblock *)d,
+                        (des3_ecb_key_schedule *)sav->sched, DES_DECRYPT);
        return 0;
 }
 
@@ -679,13 +511,10 @@ esp_3des_blockencrypt(
        u_int8_t *s,
        u_int8_t *d)
 {
-       des_key_schedule *p;
-
        /* assumption: d has a good alignment */
-       p = (des_key_schedule *)sav->sched;
        bcopy(s, d, sizeof(DES_LONG) * 2);
-       des_ecb3_encrypt((des_cblock *)d, (des_cblock *)d, 
-                        p[0], p[1], p[2], DES_ENCRYPT);
+       des3_ecb_encrypt((des_cblock *)d, (des_cblock *)d,
+                        (des3_ecb_key_schedule *)sav->sched, DES_ENCRYPT);
        return 0;
 }
 
@@ -713,12 +542,12 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen)
        int soff, doff; /* offset from the head of chain, to head of this mbuf */
        int sn, dn;     /* offset from the head of the mbuf, to meat */
        size_t ivoff, bodyoff;
-       u_int8_t iv[MAXIVLEN], *ivp;
-       u_int8_t sbuf[MAXIVLEN], *sp;
+       u_int8_t iv[MAXIVLEN] __attribute__((aligned(4))), *ivp;
+       u_int8_t *sbuf = NULL, *sp, *sp_unaligned;
        u_int8_t *p, *q;
        struct mbuf *scut;
        int scutoff;
-       int i;
+       int i, result = 0;
        int blocklen;
        int derived;
 
@@ -820,6 +649,10 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen)
        while (s && s->m_len == 0)
                s = s->m_next;
 
+       // Allocate blocksized buffer for unaligned or non-contiguous access
+       sbuf = (u_int8_t *)_MALLOC(blocklen, M_SECA, M_DONTWAIT);
+       if (sbuf == NULL)
+               return ENOBUFS;
        while (soff < m->m_pkthdr.len) {
                /* source */
                if (sn + blocklen <= s->m_len) {
@@ -848,12 +681,19 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen)
                                m_freem(m);
                                if (d0)
                                        m_freem(d0);
-                               return ENOBUFS;
+                               result = ENOBUFS;
+                               goto end;
                        }
                        if (!d0)
                                d0 = d;
                        if (dp)
                                dp->m_next = d;
+
+                       // try to make mbuf data aligned
+                       if (!IPSEC_IS_P2ALIGNED(d->m_data)) {
+                               m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data));
+                       }
+
                        d->m_len = 0;
                        d->m_len = (M_TRAILINGSPACE(d) / blocklen) * blocklen;
                        if (d->m_len > i)
@@ -862,8 +702,22 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen)
                }
 
                /* decrypt */
+               // check input pointer alignment and use a separate aligned buffer (if sp is unaligned on 4-byte boundary).
+               if (IPSEC_IS_P2ALIGNED(sp)) {
+                       sp_unaligned = NULL;
+               } else {
+                       sp_unaligned = sp;
+                       sp = sbuf;
+                       memcpy(sp, sp_unaligned, blocklen);
+               }
+               // no need to check output pointer alignment
                (*algo->blockdecrypt)(algo, sav, sp, mtod(d, u_int8_t *) + dn);
 
+               // update unaligned pointers
+               if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) {
+                       sp = sp_unaligned;
+               }
+
                /* xor */
                p = ivp ? ivp : iv;
                q = mtod(d, u_int8_t *) + dn;
@@ -895,8 +749,10 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen)
        /* just in case */
        bzero(iv, sizeof(iv));
        bzero(sbuf, sizeof(sbuf));
-
-       return 0;
+end:
+       if (sbuf != NULL)
+               FREE(sbuf, M_SECA);
+       return result;
 }
 
 static int
@@ -913,12 +769,12 @@ esp_cbc_encrypt(
        int soff, doff; /* offset from the head of chain, to head of this mbuf */
        int sn, dn;     /* offset from the head of the mbuf, to meat */
        size_t ivoff, bodyoff;
-       u_int8_t iv[MAXIVLEN], *ivp;
-       u_int8_t sbuf[MAXIVLEN], *sp;
+       u_int8_t iv[MAXIVLEN] __attribute__((aligned(4))), *ivp;
+       u_int8_t *sbuf = NULL, *sp, *sp_unaligned;
        u_int8_t *p, *q;
        struct mbuf *scut;
        int scutoff;
-       int i;
+       int i, result = 0;
        int blocklen;
        int derived;
 
@@ -1026,6 +882,10 @@ esp_cbc_encrypt(
        while (s && s->m_len == 0)
                s = s->m_next;
 
+       // Allocate blocksized buffer for unaligned or non-contiguous access
+        sbuf = (u_int8_t *)_MALLOC(blocklen, M_SECA, M_DONTWAIT);
+        if (sbuf == NULL)
+                return ENOBUFS;
        while (soff < m->m_pkthdr.len) {
                /* source */
                if (sn + blocklen <= s->m_len) {
@@ -1054,12 +914,19 @@ esp_cbc_encrypt(
                                m_freem(m);
                                if (d0)
                                        m_freem(d0);
-                               return ENOBUFS;
+                               result = ENOBUFS;
+                               goto end;
                        }
                        if (!d0)
                                d0 = d;
                        if (dp)
                                dp->m_next = d;
+
+                       // try to make mbuf data aligned
+                       if (!IPSEC_IS_P2ALIGNED(d->m_data)) {
+                               m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data));
+                       }
+
                        d->m_len = 0;
                        d->m_len = (M_TRAILINGSPACE(d) / blocklen) * blocklen;
                        if (d->m_len > i)
@@ -1074,8 +941,22 @@ esp_cbc_encrypt(
                        q[i] ^= p[i];
 
                /* encrypt */
+               // check input pointer alignment and use a separate aligned buffer (if sp is not aligned on 4-byte boundary).
+               if (IPSEC_IS_P2ALIGNED(sp)) {
+                       sp_unaligned = NULL;
+               } else {
+                       sp_unaligned = sp;
+                       sp = sbuf;
+                       memcpy(sp, sp_unaligned, blocklen);
+               }
+               // no need to check output pointer alignment
                (*algo->blockencrypt)(algo, sav, sp, mtod(d, u_int8_t *) + dn);
 
+               // update unaligned pointers
+               if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) {
+                       sp = sp_unaligned;
+               }
+
                /* next iv */
                ivp = mtod(d, u_int8_t *) + dn;
 
@@ -1099,8 +980,10 @@ esp_cbc_encrypt(
        bzero(sbuf, sizeof(sbuf));
 
        key_sa_stir_iv(sav);
-
-       return 0;
+end:
+       if (sbuf != NULL)
+               FREE(sbuf, M_SECA);
+       return result;
 }
 
 /*------------------------------------------------------------*/
@@ -1117,7 +1000,7 @@ esp_auth(m0, skip, length, sav, sum)
        struct mbuf *m;
        size_t off;
        struct ah_algorithm_state s;
-       u_char sumbuf[AH_MAXSUMSIZE];
+       u_char sumbuf[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        const struct ah_algorithm *algo;
        size_t siz;
        int error;
index 2052493abeba91c5253c09a2b3a1a512f1fc1b04..7470d023efee29fa37812e5af25346990098cada 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -66,6 +66,7 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
@@ -196,6 +197,9 @@ esp4_input(m, off)
                }
        }
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip = mtod(m, struct ip *);
        // expect udp-encap and esp packets only
        if (ip->ip_p != IPPROTO_ESP &&
@@ -205,7 +209,7 @@ esp4_input(m, off)
                IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
                goto bad;
        }
-       esp = (struct esp *)(((u_int8_t *)ip) + off);
+       esp = (struct esp *)(void *)(((u_int8_t *)ip) + off);
 #ifdef _IP_VHL
        hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 #else
@@ -276,8 +280,8 @@ esp4_input(m, off)
 
        /* check ICV */
     {
-       u_char sum0[AH_MAXSUMSIZE];
-       u_char sum[AH_MAXSUMSIZE];
+       u_char sum0[AH_MAXSUMSIZE] __attribute__((aligned(4)));
+       u_char sum[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        const struct ah_algorithm *sumalgo;
        size_t siz;
 
@@ -438,14 +442,25 @@ noreplaycheck:
                    (sav->flags & SADB_X_EXT_OLD) == 0 &&
                    seq && sav->replay &&
                    seq >= sav->replay->lastseq)  {
-                       struct udphdr *encap_uh = (__typeof__(encap_uh))((caddr_t)ip + off);
+                       struct udphdr *encap_uh = (__typeof__(encap_uh))(void *)((caddr_t)ip + off);
                        if (encap_uh->uh_sport &&
                            ntohs(encap_uh->uh_sport) != sav->remote_ike_port) {
                                sav->remote_ike_port = ntohs(encap_uh->uh_sport);
                        }
                }
                ip = esp4_input_strip_UDP_encap(m, off);
-               esp = (struct esp *)(((u_int8_t *)ip) + off);
+               esp = (struct esp *)(void *)(((u_int8_t *)ip) + off);
+       }
+
+       if (sav->utun_is_keepalive_fn) {
+               if (sav->utun_is_keepalive_fn(sav->utun_pcb, &m, nxt, sav->flags, (off + esplen + ivlen))) {
+                       if (m) {
+                               // not really bad, we just wanna exit
+                               IPSEC_STAT_INCREMENT(ipsecstat.in_success);
+                               m = NULL;
+                       }
+                       goto bad;
+               }
        }
 
        /* was it transmitted over the IPsec tunnel SA? */
@@ -513,6 +528,12 @@ noreplaycheck:
                                }
                        }
 
+                       /*
+                        * Expect 32-bit aligned data pointer on strict-align
+                        * platforms.
+                        */
+                       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
                        ip6 = mtod(m, struct ip6_hdr *);
 
                        /* ECN consideration. */
@@ -560,6 +581,15 @@ noreplaycheck:
 
                /* Clear the csum flags, they can't be valid for the inner headers */
                m->m_pkthdr.csum_flags = 0;
+
+               if (sav->utun_in_fn) {
+                       if (!(sav->utun_in_fn(sav->utun_pcb, &m, ifamily == AF_INET ? PF_INET : PF_INET6))) {
+                               m = NULL;
+                               // we just wanna exit since packet has been completely processed
+                               goto bad;
+                       }
+               }
+
                if (proto_input(ifamily == AF_INET ? PF_INET : PF_INET6, m) != 0)
                        goto bad;
 
@@ -633,7 +663,7 @@ noreplaycheck:
                                        }
                                        ip = mtod(m, struct ip *);
                                }
-                               udp = (struct udphdr *)(((u_int8_t *)ip) + off);
+                               udp = (struct udphdr *)(void *)(((u_int8_t *)ip) + off);
                        
                                lck_mtx_lock(sadb_mutex);
                                if (sav->natt_encapsulated_src_port == 0) {     
@@ -652,6 +682,14 @@ noreplaycheck:
                                struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
                                struct ip *, ip, struct ip6_hdr *, NULL);
 
+                       if (sav->utun_in_fn) {
+                               if (!(sav->utun_in_fn(sav->utun_pcb, &m, PF_INET))) {
+                                       m = NULL;
+                                       // we just wanna exit since packet has been completely processed
+                                       goto bad;
+                               }
+                       }
+
                        ip_proto_dispatch_in(m, off, nxt, 0);
                } else
                        m_freem(m);
@@ -708,7 +746,7 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
 
 #ifndef PULLDOWN_TEST
        IP6_EXTHDR_CHECK(m, off, ESPMAXLEN, {return IPPROTO_DONE;});
-       esp = (struct esp *)(mtod(m, caddr_t) + off);
+       esp = (struct esp *)(void *)(mtod(m, caddr_t) + off);
 #else
        IP6_EXTHDR_GET(esp, struct esp *, m, off, ESPMAXLEN);
        if (esp == NULL) {
@@ -716,6 +754,9 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
                return IPPROTO_DONE;
        }
 #endif
+       /* Expect 32-bit data aligned pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip6 = mtod(m, struct ip6_hdr *);
 
        if (ntohs(ip6->ip6_plen) == 0) {
@@ -790,8 +831,8 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
 
        /* check ICV */
     {
-       u_char sum0[AH_MAXSUMSIZE];
-       u_char sum[AH_MAXSUMSIZE];
+       u_char sum0[AH_MAXSUMSIZE] __attribute__((aligned(4)));
+       u_char sum[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        const struct ah_algorithm *sumalgo;
        size_t siz;
 
@@ -926,6 +967,17 @@ noreplaycheck:
        ip6 = mtod(m, struct ip6_hdr *);
        ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - taillen);
 
+       if (sav->utun_is_keepalive_fn) {
+               if (sav->utun_is_keepalive_fn(sav->utun_pcb, &m, nxt, sav->flags, (off + esplen + ivlen))) {
+                       if (m) {
+                               // not really bad, we just wanna exit
+                               IPSEC_STAT_INCREMENT(ipsec6stat.in_success);
+                               m = NULL;
+                       }
+                       goto bad;
+               }
+       }
+
        /* was it transmitted over the IPsec tunnel SA? */
        if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) {
                ifaddr_t ifa;
@@ -993,6 +1045,14 @@ noreplaycheck:
                        }
                }
 
+               if (sav->utun_in_fn) {
+                       if (!(sav->utun_in_fn(sav->utun_pcb, &m, PF_INET6))) {
+                               m = NULL;
+                               // we just wanna exit since packet has been completely processed
+                               goto bad;
+                       }
+               }
+
                if (proto_input(PF_INET6, m) != 0)
                        goto bad;
                nxt = IPPROTO_DONE;
@@ -1091,6 +1151,14 @@ noreplaycheck:
                        IPSEC_STAT_INCREMENT(ipsec6stat.in_nomem);
                        goto bad;
                }
+
+               if (sav->utun_in_fn) {
+                       if (!(sav->utun_in_fn(sav->utun_pcb, &m, PF_INET6))) {
+                               m = NULL;
+                               // we just wanna exit since packet has been completely processed
+                               goto bad;
+                       }
+               }
        }
 
        *offp = off;
@@ -1183,7 +1251,7 @@ esp6_ctlinput(cmd, sa, d)
                        m_copydata(m, off, sizeof(esp), (caddr_t)&esp);
                        espp = &esp;
                } else
-                       espp = (struct newesp*)(mtod(m, caddr_t) + off);
+                       espp = (struct newesp*)(void *)(mtod(m, caddr_t) + off);
 
                if (cmd == PRC_MSGSIZE) {
                        int valid = 0;
@@ -1193,7 +1261,7 @@ esp6_ctlinput(cmd, sa, d)
                         * the address in the ICMP message payload.
                         */
                        sa6_src = ip6cp->ip6c_src;
-                       sa6_dst = (struct sockaddr_in6 *)sa;
+                       sa6_dst = (struct sockaddr_in6 *)(void *)sa;
                        sav = key_allocsa(AF_INET6,
                                          (caddr_t)&sa6_src->sin6_addr,
                                          (caddr_t)&sa6_dst->sin6_addr,
index 8d16d2c623b0a024c8c6a79de57992ba933b6d0c..9f6c0e0f0cbe0dbee378120e718a99d8c1f9997c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -457,7 +457,7 @@ esp_output(m, nexthdrp, md, af, sav)
                m->m_pkthdr.len += esphlen;
                if (udp_encapsulate) {
                        udp = mtod(n, struct udphdr *);
-                       esp = (struct esp *)((caddr_t)udp + sizeof(struct udphdr));
+                       esp = (struct esp *)(void *)((caddr_t)udp + sizeof(struct udphdr));
                } else {
                        esp = mtod(n, struct esp *);
                }
@@ -468,7 +468,7 @@ esp_output(m, nexthdrp, md, af, sav)
                esp = mtod(md, struct esp *);
                if (udp_encapsulate) {
                        udp = mtod(md, struct udphdr *);
-                       esp = (struct esp *)((caddr_t)udp + sizeof(struct udphdr));
+                       esp = (struct esp *)(void *)((caddr_t)udp + sizeof(struct udphdr));
                } else {
                        esp = mtod(md, struct esp *);
                }
@@ -726,7 +726,7 @@ esp_output(m, nexthdrp, md, af, sav)
 
     {
                const struct ah_algorithm *aalgo;
-               u_char authbuf[AH_MAXSUMSIZE];
+               u_char authbuf[AH_MAXSUMSIZE] __attribute__((aligned(4)));
                u_char *p;
                size_t siz;
        #if INET
index 0f3ce7c27353b8360c1f7edd567c5f9bbc5e3112..af5ddff1f8d26f60cd2a606af1d7f42742ea41e3 100644 (file)
@@ -64,6 +64,7 @@
 #include <sys/queue.h>
 #include <sys/syslog.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 
 #include <kern/locks.h>
 
 #include <netinet6/esp.h>
 #include <netinet6/esp_rijndael.h>
 
-#include <crypto/aes/aes.h>
+#include <libkern/crypto/aes.h>
 
 #include <netkey/key.h>
 
 #include <net/net_osdep.h>
 
 #define AES_BLOCKLEN 16
+#define MAX_SBUF_LEN 2000
 
 extern lck_mtx_t *sadb_mutex;
 
@@ -149,8 +151,8 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen)
        int soff;       /* offset from the head of chain, to head of this mbuf */
        int sn, dn;     /* offset from the head of the mbuf, to meat */
        size_t ivoff, bodyoff;
-       u_int8_t iv[AES_BLOCKLEN], *dptr;
-       u_int8_t sbuf[AES_BLOCKLEN], *sp;
+       u_int8_t iv[AES_BLOCKLEN] __attribute__((aligned(4))), *dptr;
+       u_int8_t sbuf[MAX_SBUF_LEN] __attribute__((aligned(4))), *sp, *sp_unaligned;
        struct mbuf *scut;
        int scutoff;
        int     i, len;
@@ -251,6 +253,12 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen)
                                d0 = d;
                        if (dp)
                                dp->m_next = d;
+
+                       // try to make mbuf data aligned
+                       if (!IPSEC_IS_P2ALIGNED(d->m_data)) {
+                               m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data));
+                       }
+
                        d->m_len = M_TRAILINGSPACE(d);
                        d->m_len -= d->m_len % AES_BLOCKLEN;
                        if (d->m_len > i)
@@ -264,9 +272,23 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen)
                        len = d->m_len - dn;
 
                /* decrypt */
+               // check input pointer alignment and use a separate aligned buffer (if sp is unaligned on 4-byte boundary).
+               if (IPSEC_IS_P2ALIGNED(sp)) {
+                       sp_unaligned = NULL;
+               } else {
+                       sp_unaligned = sp;
+                       sp = sbuf;
+                       memcpy(sp, sp_unaligned, len);
+               }
+               // no need to check output pointer alignment
                aes_decrypt_cbc(sp, iv, len >> 4, dptr + dn, 
                                (aes_decrypt_ctx*)(&(((aes_ctx*)sav->sched)->decrypt)));
                
+               // update unaligned pointers
+               if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) {
+                       sp = sp_unaligned;
+               }
+
                /* udpate offsets */
                sn += len;
                dn += len;
@@ -309,8 +331,9 @@ esp_cbc_encrypt_aes(
        int soff;       /* offset from the head of chain, to head of this mbuf */
        int sn, dn;     /* offset from the head of the mbuf, to meat */
        size_t ivoff, bodyoff;
-       u_int8_t *ivp, *dptr;
-       u_int8_t sbuf[AES_BLOCKLEN], *sp;
+       u_int8_t *ivp, *dptr, *ivp_unaligned;
+       u_int8_t sbuf[MAX_SBUF_LEN] __attribute__((aligned(4))), *sp, *sp_unaligned;
+       u_int8_t ivp_aligned_buf[AES_BLOCKLEN] __attribute__((aligned(4)));
        struct mbuf *scut;
        int scutoff;
        int i, len;
@@ -412,6 +435,11 @@ esp_cbc_encrypt_aes(
                        if (dp)
                                dp->m_next = d;
 
+                       // try to make mbuf data aligned
+                       if (!IPSEC_IS_P2ALIGNED(d->m_data)) {
+                               m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data));
+                       }
+
                        d->m_len = M_TRAILINGSPACE(d);
                        d->m_len -= d->m_len % AES_BLOCKLEN;
                        if (d->m_len > i)
@@ -425,9 +453,34 @@ esp_cbc_encrypt_aes(
                        len = d->m_len - dn;
                
                /* encrypt */
+               // check input pointer alignment and use a separate aligned buffer (if sp is not aligned on 4-byte boundary).
+               if (IPSEC_IS_P2ALIGNED(sp)) {
+                       sp_unaligned = NULL;
+               } else {
+                       sp_unaligned = sp;
+                       sp = sbuf;
+                       memcpy(sp, sp_unaligned, len);
+               }
+               // check ivp pointer alignment and use a separate aligned buffer (if ivp is not aligned on 4-byte boundary).
+               if (IPSEC_IS_P2ALIGNED(ivp)) {
+                       ivp_unaligned = NULL;
+               } else {
+                       ivp_unaligned = ivp;
+                       ivp = ivp_aligned_buf;
+                       memcpy(ivp, ivp_unaligned, len);
+               }
+               // no need to check output pointer alignment
                aes_encrypt_cbc(sp, ivp, len >> 4, dptr + dn, 
                        (aes_encrypt_ctx*)(&(((aes_ctx*)sav->sched)->encrypt)));
 
+               // update unaligned pointers
+               if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) {
+                       sp = sp_unaligned;
+               }
+               if (!IPSEC_IS_P2ALIGNED(ivp_unaligned)) {
+                       ivp = ivp_unaligned;
+               }
+
                /* update offsets */
                sn += len;
                dn += len;
index b6b68b920f95be055cf40ed16d71942f4128e9f4..00174a628b581014fd67060af8e6300df2ee478e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -61,6 +61,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
@@ -180,6 +181,9 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
        struct sockaddr_in6 *dst;
 #endif
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
        IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), return IPPROTO_DONE);
index 43a61a6d22f4cff125b0af5feae2a99695a87b76..cdf92a564c9939b260a1b8d21a31c2300266be4d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -98,6 +98,7 @@
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/domain.h>
+#include <sys/kauth.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -434,7 +436,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
        struct icmp6_hdr *icmp6, *nicmp6;
        int off = *offp;
        int icmp6len = m->m_pkthdr.len - *offp;
-       int code, sum, noff;
+       int code, sum, noff, proxy = 0;
 
        ifp = m->m_pkthdr.rcvif;
 
@@ -443,11 +445,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
        /* m might change if M_LOOP.  So, call mtod after this */
 #endif
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        /*
         * Locate icmp6 structure in mbuf, and check
         * that not corrupted and of at least minimum length
         */
-
        ip6 = mtod(m, struct ip6_hdr *);
        if (icmp6len < sizeof(struct icmp6_hdr)) {
                icmp6stat.icp6s_tooshort++;
@@ -466,9 +470,16 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
                in6_multihead_lock_done();
 
                if (inm == NULL) {
-                       ip6stat.ip6s_notmember++;
-                       in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
-                       goto freeit;
+                       /*
+                        * Don't discard if this is a Neighbor Solicitation
+                        * that needs to be proxied (see check down below.)
+                        */
+                       if (!(m->m_pkthdr.aux_flags & MAUXF_PROXY_DST)) {
+                               ip6stat.ip6s_notmember++;
+                               in6_ifstat_inc(m->m_pkthdr.rcvif,
+                                   ifs6_in_discard);
+                               goto freeit;
+                       }
                } else {
                        IN6M_REMREF(inm);
                }
@@ -496,23 +507,22 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
                goto freeit;
        }
 
-#if defined(NFAITH) && 0 < NFAITH
-       if (faithprefix(&ip6->ip6_dst)) {
+       if (m->m_pkthdr.aux_flags & MAUXF_PROXY_DST) {
                /*
-                * Deliver very specific ICMP6 type only.
-                * This is important to deliver TOOBIG.  Otherwise PMTUD
-                * will not work.
+                * This is the special case of proxying NS (dst is either
+                * solicited-node multicast or unicast); process it locally
+                * but don't deliver it to sockets.  It practically lets us
+                * steer the packet to nd6_prproxy_ns_input, where more
+                * specific tests and actions will be taken.
                 */
                switch (icmp6->icmp6_type) {
-               case ICMP6_DST_UNREACH:
-               case ICMP6_PACKET_TOO_BIG:
-               case ICMP6_TIME_EXCEEDED:
+               case ND_NEIGHBOR_SOLICIT:
+                       proxy = 1;
                        break;
                default:
                        goto freeit;
                }
        }
-#endif
 
        icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
        icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
@@ -658,8 +668,12 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
                                sizeof(*nicmp6));
                        noff = off;
                }
-               nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
-               nicmp6->icmp6_code = 0;
+               if(nicmp6 == NULL)
+                       panic("nicmp6 is NULL in %s, which isn't good!\n", __FUNCTION__);
+               else {
+                       nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
+                       nicmp6->icmp6_code = 0;
+               }
                if (n) {
                        icmp6stat.icp6s_reflect++;
                        icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
@@ -919,12 +933,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
                goto freeit;
        }
 rate_limit_checked:
-       /* deliver the packet to appropriate sockets */
-       icmp6_rip6_input(&m, *offp);
-
-       return IPPROTO_DONE;
+       /* deliver the packet to appropriate sockets (unless proxying) */
+       if (!proxy) {
+               icmp6_rip6_input(&m, *offp);
+               return IPPROTO_DONE;
+       }
 
- freeit:
+freeit:
        m_freem(m);
        return IPPROTO_DONE;
 }
@@ -1051,7 +1066,7 @@ icmp6_notify_error(m, off, icmp6len, code)
                                        /* just ignore a bogus header */
                                        if ((rth0->ip6r0_len % 2) == 0 &&
                                            (hops = rth0->ip6r0_len/2))
-                                               finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
+                                               finaldst = (struct in6_addr *)(void *)(rth0 + 1) + (hops - 1);
                                }
                                eoff += rthlen;
                                nxt = rth->ip6r_nxt;
@@ -1764,7 +1779,7 @@ ni6_addrs(ni6, ifpp, subj)
                case ICMP6_NI_SUBJ_IPV6:
                        if (subj == NULL) /* must be impossible... */
                                return(0);
-                       subj_ip6 = (struct sockaddr_in6 *)subj;
+                       subj_ip6 = (struct sockaddr_in6 *)(void *)subj;
                        break;
                default:
                        /*
@@ -2007,7 +2022,7 @@ ni6_store_addrs(ni6, nni6, ifp0, resid)
                              sizeof(struct in6_addr));
                        /* XXX: KAME link-local hack; remove ifindex */
                        if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr))
-                               ((struct in6_addr *)cp)->s6_addr16[1] = 0;
+                               ((struct in6_addr *)(void *)cp)->s6_addr16[1] = 0;
                        cp += sizeof(struct in6_addr);
 
                        resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
@@ -2067,9 +2082,9 @@ icmp6_rip6_input(mp, off)
        rip6src.sin6_family = AF_INET6;
        rip6src.sin6_len = sizeof(struct sockaddr_in6);
        rip6src.sin6_addr = ip6->ip6_src;
-       if (sa6_recoverscope(&rip6src)) 
+       if (sa6_recoverscope(&rip6src, TRUE)) 
                return (IPPROTO_DONE);
-       
+
        lck_rw_lock_shared(ripcbinfo.mtx);
        LIST_FOREACH(in6p, &ripcb, inp_list)
        {
@@ -2141,8 +2156,7 @@ error:
        m_freem(m);
        m_freem(opts);
        ip6stat.ip6s_delivered--;
-       return IPPROTO_DONE;            
-       
+       return IPPROTO_DONE;
 }
 
 /*
@@ -2162,11 +2176,15 @@ icmp6_reflect(m, off)
        int type, code;
        struct ifnet *outif = NULL;
        struct sockaddr_in6 sa6_src, sa6_dst;
+       struct nd_ifinfo *ndi;
        u_int32_t oflow;
-       struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+       struct ip6_out_args ip6oa =
+           { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR };
 
-       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
+       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) {
                ip6oa.ip6oa_boundif = m->m_pkthdr.rcvif->if_index;
+               ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
+       }
 
        /* too short to reflect */
        if (off < sizeof(struct ip6_hdr)) {
@@ -2284,6 +2302,11 @@ icmp6_reflect(m, off)
                sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */
 
                bzero(&ro, sizeof(ro));
+               /*
+                * in6_selectsrc() might return outif with its reference held
+                * even in the error case, so we always need to release it
+                * if non-NULL.
+                */
                src = in6_selectsrc(&sin6, NULL, NULL, &ro, &outif,
                    &src_storage, ip6oa.ip6oa_boundif, &e);
                if (ro.ro_rt)
@@ -2306,11 +2329,19 @@ icmp6_reflect(m, off)
        }
        ip6->ip6_nxt = IPPROTO_ICMPV6;
        lck_rw_lock_shared(nd_if_rwlock);
-       if (outif)
-               ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
-       if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_index < nd_ifinfo_indexlim) {
+       if (outif != NULL && (ndi = ND_IFINFO(outif)) != NULL &&
+           ndi->initialized) {
+               lck_mtx_lock(&ndi->lock);
+               ip6->ip6_hlim = ndi->chlim;
+               lck_mtx_unlock(&ndi->lock);
+       }
+       if (m->m_pkthdr.rcvif != NULL &&
+           (ndi = ND_IFINFO(m->m_pkthdr.rcvif)) != NULL &&
+           ndi->initialized) {
                /* XXX: This may not be the outgoing interface */
-               ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim;
+               lck_mtx_lock(&ndi->lock);
+               ip6->ip6_hlim = ndi->chlim;
+               lck_mtx_unlock(&ndi->lock);
        } else {
                ip6->ip6_hlim = ip6_defhlim;
        }
@@ -2335,6 +2366,9 @@ icmp6_reflect(m, off)
                ifnet_release(outif);
                outif = NULL;
        }
+       m->m_pkthdr.rcvif = NULL;
+       m->m_pkthdr.csum_data = 0;
+       m->m_pkthdr.csum_flags = 0;
        ip6_output(m, NULL, NULL, IPV6_OUTARGS, NULL, &outif, &ip6oa);
        if (outif != NULL) {
                icmp6_ifoutstat_inc(outif, type, code);
@@ -2385,8 +2419,11 @@ icmp6_redirect_input(m, off)
        if (!m || !ifp)
                return;
 
-       /* XXX if we are router, we don't update route by icmp6 redirect */
-       if (ip6_forwarding)
+       /*
+        * If we are an advertising router on this interface,
+        * don't update route by icmp6 redirect.
+        */
+       if (ifp->if_eflags & IFEF_IPV6_ROUTER)
                goto freeit;
        if (!icmp6_rediraccept)
                goto freeit;
@@ -2446,7 +2483,8 @@ icmp6_redirect_input(m, off)
                        goto bad;
                }
 
-               gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
+               gw6 = &(((struct sockaddr_in6 *)(void *)
+                   rt->rt_gateway)->sin6_addr);
                if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
                        nd6log((LOG_ERR,
                                "ICMP6 redirect rejected; "
@@ -2551,7 +2589,7 @@ icmp6_redirect_input(m, off)
        sdst.sin6_family = AF_INET6;
        sdst.sin6_len = sizeof(struct sockaddr_in6);
        bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
-       
+
        /*
          * Radar 6843900
         * Release the IPv6 domain lock because we are going to take domain_proto_mtx
@@ -2591,7 +2629,8 @@ icmp6_redirect_output(m0, rt)
        u_char *p;
        struct ifnet *outif = NULL;
        struct sockaddr_in6 src_sa;
-       struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+       struct ip6_out_args ip6oa =
+           { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR };
 
        icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
 
@@ -2602,8 +2641,11 @@ icmp6_redirect_output(m0, rt)
        if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
                goto fail;
 
-       /* if we are not router, we don't send icmp6 redirect */
-       if (!ip6_forwarding || ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))
+       /*
+        * If we are not a router to begin with, or not an advertising
+        * router on this interface, don't send icmp6 redirect.
+        */
+       if (!ip6_forwarding || !(ifp->if_eflags & IFEF_IPV6_ROUTER))
                goto fail;
 
        /*
@@ -2672,7 +2714,7 @@ icmp6_redirect_output(m0, rt)
        /* get ip6 linklocal address for the router. */
        if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
                struct sockaddr_in6 *sin6;
-               sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
+               sin6 = (struct sockaddr_in6 *)(void *)rt->rt_gateway;
                router_ll6 = &sin6->sin6_addr;
                if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
                        router_ll6 = (struct in6_addr *)NULL;
@@ -2747,8 +2789,8 @@ icmp6_redirect_output(m0, rt)
                if (!(rt_router->rt_flags & RTF_GATEWAY) &&
                        (rt_router->rt_flags & RTF_LLINFO) &&
                        (rt_router->rt_gateway->sa_family == AF_LINK) &&
-                       (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) &&
-                       sdl->sdl_alen) {
+                       (sdl = (struct sockaddr_dl *)(void *)
+                       rt_router->rt_gateway) && sdl->sdl_alen) {
                                nd_opt = (struct nd_opt_hdr *)p;
                                nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
                                nd_opt->nd_opt_len = len >> 3;
@@ -2758,7 +2800,7 @@ icmp6_redirect_output(m0, rt)
                }
                RT_REMREF_LOCKED(rt_router);
                RT_UNLOCK(rt_router);
-       }       
+       }
 
 nolladdropt:;
 
@@ -2863,6 +2905,7 @@ noredhdropt:;
 #endif /*IPSEC*/
 
        ip6oa.ip6oa_boundif = ifp->if_index;
+       ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
 
        ip6_output(m, NULL, NULL, IPV6_OUTARGS, NULL, &outif, &ip6oa);
        if (outif) {
@@ -2972,7 +3015,7 @@ icmp6_ctloutput(so, sopt)
 int
 icmp6_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
 {
-       if (so->so_uid == 0)
+       if (kauth_cred_issuser(so->so_cred))
                return icmp6_ctloutput(so, sopt);
 
        if (sopt->sopt_level == IPPROTO_ICMPV6) {
@@ -2983,14 +3026,13 @@ icmp6_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
                                return EPERM;
                }
        }
-       
+
        if (sopt->sopt_level != IPPROTO_IPV6)
                return EINVAL;
-               
+
        switch (sopt->sopt_name) {
                case IPV6_UNICAST_HOPS:
                case IPV6_CHECKSUM:
-               case IPV6_FAITH:
                case IPV6_V6ONLY:
                case IPV6_USE_MIN_MTU:
                case IPV6_RECVRTHDR:
@@ -3020,11 +3062,9 @@ icmp6_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
                case IPV6_NO_IFT_CELLULAR:
 
                        return ip6_ctloutput(so, sopt);
-               
+
                default:
                        return EPERM;
-                       
-               
        }
 }
 
@@ -3036,11 +3076,12 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m,
        int error = 0;
        struct inpcb *inp = sotoinpcb(so);
        struct sockaddr_in6 tmp;
-       struct sockaddr_in6 *dst = (struct sockaddr_in6 *)nam;
+       struct sockaddr_in6 *dst = (struct sockaddr_in6 *)(void *)nam;
        struct icmp6_hdr *icmp6;
 
-       if (so->so_uid == 0)
-               return rip6_output(m, so, (struct sockaddr_in6 *) nam, control, 0);
+       if (kauth_cred_issuser(so->so_cred))
+               return rip6_output(m, so, (struct sockaddr_in6 *)(void *)nam,
+                   control, 0);
 
        /* always copy sockaddr to avoid overwrites */
        if (so->so_state & SS_ISCONNECTED) {
@@ -3060,7 +3101,7 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m,
                        m_freem(m);
                        return ENOTCONN;
                }
-               tmp = *(struct sockaddr_in6 *)nam;
+               tmp = *(struct sockaddr_in6 *)(void *)nam;
                dst = &tmp;
        }
 
@@ -3074,7 +3115,7 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m,
                                goto bad;
                }
                icmp6 = mtod(m, struct icmp6_hdr *);
-       
+
                /*
                 * Allow only to send echo request and node information request
                 * See RFC 2463 for Echo Request Message format
@@ -3097,7 +3138,8 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m,
        }
 #endif
 
-       return rip6_output(m, so, (struct sockaddr_in6 *) nam, control, 0);
+       return rip6_output(m, so, (struct sockaddr_in6 *)(void *)nam,
+          control, 0);
 bad:
        m_freem(m);
        return error;
@@ -3113,10 +3155,10 @@ icmp6_dgram_attach(struct socket *so, int proto, struct proc *p)
         inp = sotoinpcb(so);
         if (inp)
                 panic("icmp6_dgram_attach");
-               
+
                if (proto != IPPROTO_ICMPV6)
                        return EINVAL;
-                       
+
         error = soreserve(so, rip_sendspace, rip_recvspace);
         if (error)
                 return error;
index f11a990414e5ad1a4b3e436ad263f8c9cd75ee31..8a39cc647b8655ba991e261e9ffb45f3b39f9115 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -170,8 +170,10 @@ const struct in6_addr in6mask128 = IN6MASK128;
 const struct sockaddr_in6 sa6_any = {sizeof(sa6_any), AF_INET6,
                                     0, 0, IN6ADDR_ANY_INIT, 0};
 
-static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t,
+static int in6_lifaddr_ioctl(struct socket *, u_long, struct if_laddrreq *,
        struct ifnet *, struct proc *);
+static int in6_autoconf(struct ifnet *, int);
+static int in6_setrouter(struct ifnet *, int);
 static int in6_ifinit(struct ifnet *, struct in6_ifaddr *,
                           struct sockaddr_in6 *, int);
 static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
@@ -183,6 +185,8 @@ static void in6_ifaddr_trace(struct ifaddr *, int);
 static struct in6_aliasreq *in6_aliasreq_to_native(void *, int,
     struct in6_aliasreq *);
 
+static void in6_ifaddr_set_dadprogress(struct in6_ifaddr *);
+
 extern lck_mtx_t *nd6_mutex;
 extern int in6_init2done;
 
@@ -483,7 +487,7 @@ in6_aliasreq_to_native(void *data, int data_is_64, struct in6_aliasreq *dst)
 {
 #if defined(__LP64__)
        if (data_is_64)
-               dst = data;
+               bcopy(data, dst, sizeof (*dst));
        else
                in6_aliasreq_32_to_64((struct in6_aliasreq_32 *)data,
                    (struct in6_aliasreq_64 *)dst);
@@ -492,147 +496,157 @@ in6_aliasreq_to_native(void *data, int data_is_64, struct in6_aliasreq *dst)
                in6_aliasreq_64_to_32((struct in6_aliasreq_64 *)data,
                    (struct in6_aliasreq_32 *)dst);
        else
-               dst = data;
+               bcopy(data, dst, sizeof (*dst));
 #endif /* __LP64__ */
        return (dst);
 }
 
-#define ifa2ia6(ifa)   ((struct in6_ifaddr *)(ifa))
-#define ia62ifa(ia6)   (&((ia6)->ia_ifa))
+#define ifa2ia6(ifa)   ((struct in6_ifaddr *)(void *)(ifa))
 
 int
 in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
     struct proc *p)
 {
-       struct  in6_ifreq *ifr = (struct in6_ifreq *)data;
+       struct in6_aliasreq sifra, *ifra = NULL;
        struct  in6_ifaddr *ia = NULL;
-       struct  in6_aliasreq sifra;
-       struct  in6_aliasreq *ifra = NULL;
-       struct sockaddr_in6 *sa6;
+       struct sockaddr_in6 sin6, *sa6 = NULL;
        int index, privileged, error = 0;
+       u_int32_t ifru_scope_id[16];
        struct timeval timenow;
        int p64 = proc_is64bit(p);
 
        getmicrotime(&timenow);
 
        privileged = (proc_suser(p) == 0);
-#if MROUTING
        switch (cmd) {
-       case SIOCGETSGCNT_IN6:
-       case SIOCGETMIFCNT_IN6_32:
-       case SIOCGETMIFCNT_IN6_64:
+#if MROUTING
+       case SIOCGETSGCNT_IN6:          /* struct sioc_sg_req6 */
+       case SIOCGETMIFCNT_IN6_32:      /* struct sioc_mif_req6_32 */
+       case SIOCGETMIFCNT_IN6_64:      /* struct sioc_mif_req6_64 */
                return (mrt6_ioctl(cmd, data));
-       }
+               /* NOTREACHED */
 #endif
 
-       switch(cmd) {
-       case SIOCAADDRCTL_POLICY:
-       case SIOCDADDRCTL_POLICY:
-        if (!privileged)
+       case SIOCAADDRCTL_POLICY:       /* struct in6_addrpolicy */
+       case SIOCDADDRCTL_POLICY:       /* struct in6_addrpolicy */
+               if (!privileged)
                        return (EPERM);
                return (in6_src_ioctl(cmd, data));
-       }
+               /* NOTREACHED */
 
-       switch (cmd) {
-       case SIOCDRADD_IN6_32:
-       case SIOCDRADD_IN6_64:
-       case SIOCDRDEL_IN6_32:
-       case SIOCDRDEL_IN6_64:
+       case SIOCDRADD_IN6_32:          /* struct in6_defrouter_32 */
+       case SIOCDRADD_IN6_64:          /* struct in6_defrouter_64 */
+       case SIOCDRDEL_IN6_32:          /* struct in6_defrouter_32 */
+       case SIOCDRDEL_IN6_64:          /* struct in6_defrouter_64 */
                 if (!privileged)
                         return (EPERM);
                return (defrtrlist_ioctl(cmd, data));
+               /* NOTREACHED */
        }
 
        if (ifp == NULL)
                return (EOPNOTSUPP);
 
        switch (cmd) {
-       case SIOCAUTOCONF_START:
-       case SIOCAUTOCONF_STOP:
-       case SIOCLL_START_32:
-       case SIOCLL_START_64:
-       case SIOCLL_STOP:
-       case SIOCPROTOATTACH_IN6_32:
-       case SIOCPROTOATTACH_IN6_64:
-       case SIOCPROTODETACH_IN6:
+       case SIOCAUTOCONF_START:        /* struct in6_ifreq */
+       case SIOCAUTOCONF_STOP:         /* struct in6_ifreq */
+       case SIOCLL_START_32:           /* struct in6_aliasreq_32 */
+       case SIOCLL_START_64:           /* struct in6_aliasreq_64 */
+       case SIOCLL_STOP:               /* struct in6_ifreq */
+       case SIOCSETROUTERMODE_IN6:     /* struct in6_ifreq */
+       case SIOCPROTOATTACH_IN6_32:    /* struct in6_aliasreq_32 */
+       case SIOCPROTOATTACH_IN6_64:    /* struct in6_aliasreq_64 */
+       case SIOCPROTODETACH_IN6:       /* struct in6_ifreq */
                 if (!privileged)
                         return (EPERM);
                break;
-       case SIOCSNDFLUSH_IN6:
-       case SIOCSPFXFLUSH_IN6:
-       case SIOCSRTRFLUSH_IN6:
-       case SIOCSDEFIFACE_IN6_32:
-       case SIOCSDEFIFACE_IN6_64:
-       case SIOCSIFINFO_FLAGS:
+
+       case SIOCSNDFLUSH_IN6:          /* struct in6_ifreq */
+       case SIOCSPFXFLUSH_IN6:         /* struct in6_ifreq */
+       case SIOCSRTRFLUSH_IN6:         /* struct in6_ifreq */
+       case SIOCSDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
+       case SIOCSDEFIFACE_IN6_64:      /* struct in6_ndifreq_64 */
+       case SIOCSIFINFO_FLAGS:         /* struct in6_ndireq */
                if (!privileged)
                        return (EPERM);
-               /* fall through */
-       case OSIOCGIFINFO_IN6:
-       case SIOCGIFINFO_IN6:
-       case SIOCGDRLST_IN6_32:
-       case SIOCGDRLST_IN6_64:
-       case SIOCGPRLST_IN6_32:
-       case SIOCGPRLST_IN6_64:
-       case SIOCGNBRINFO_IN6_32:
-       case SIOCGNBRINFO_IN6_64:
-       case SIOCGDEFIFACE_IN6_32:
-       case SIOCGDEFIFACE_IN6_64:
+               /* FALLTHRU */
+       case OSIOCGIFINFO_IN6:          /* struct in6_ondireq */
+       case SIOCGIFINFO_IN6:           /* struct in6_ondireq */
+       case SIOCGDRLST_IN6_32:         /* struct in6_drlist_32 */
+       case SIOCGDRLST_IN6_64:         /* struct in6_drlist_64 */
+       case SIOCGPRLST_IN6_32:         /* struct in6_prlist_32 */
+       case SIOCGPRLST_IN6_64:         /* struct in6_prlist_64 */
+       case SIOCGNBRINFO_IN6_32:       /* struct in6_nbrinfo_32 */
+       case SIOCGNBRINFO_IN6_64:       /* struct in6_nbrinfo_64 */
+       case SIOCGDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
+       case SIOCGDEFIFACE_IN6_64:      /* struct in6_ndifreq_64 */
                return (nd6_ioctl(cmd, data, ifp));
-       }
+               /* NOTREACHED */
 
-       switch (cmd) {
-       case SIOCSIFPREFIX_IN6:
-       case SIOCDIFPREFIX_IN6:
-       case SIOCAIFPREFIX_IN6:
-       case SIOCCIFPREFIX_IN6:
-       case SIOCSGIFPREFIX_IN6:
-       case SIOCGIFPREFIX_IN6:
+       case SIOCSIFPREFIX_IN6:         /* struct in6_prefixreq */
+       case SIOCDIFPREFIX_IN6:         /* struct in6_prefixreq */
+       case SIOCAIFPREFIX_IN6:         /* struct in6_rrenumreq */
+       case SIOCCIFPREFIX_IN6:         /* struct in6_rrenumreq */
+       case SIOCSGIFPREFIX_IN6:        /* struct in6_rrenumreq */
+       case SIOCGIFPREFIX_IN6:         /* struct in6_prefixreq */
                log(LOG_NOTICE,
                    "prefix ioctls are now invalidated. "
                    "please use ifconfig.\n");
                return (EOPNOTSUPP);
-       }
+               /* NOTREACHED */
+
+       case SIOCSSCOPE6: {             /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       switch (cmd) {
-       case SIOCSSCOPE6:
                if (!privileged)
                        return (EPERM);
-               return (scope6_set(ifp, ifr->ifr_ifru.ifru_scope_id));
+
+               bcopy(ifr->ifr_ifru.ifru_scope_id, ifru_scope_id,
+                   sizeof (ifru_scope_id));
+
+               return (scope6_set(ifp, ifru_scope_id));
                /* NOTREACHED */
+       }
+
+       case SIOCGSCOPE6: {             /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       case SIOCGSCOPE6:
-               return (scope6_get(ifp, ifr->ifr_ifru.ifru_scope_id));
+               bcopy(ifr->ifr_ifru.ifru_scope_id, ifru_scope_id,
+                   sizeof (ifru_scope_id));
+
+               return (scope6_get(ifp, ifru_scope_id));
                /* NOTREACHED */
+       }
 
-       case SIOCGSCOPE6DEF:
-               return (scope6_get_default(ifr->ifr_ifru.ifru_scope_id));
+       case SIOCGSCOPE6DEF: {          /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
+
+               bcopy(ifr->ifr_ifru.ifru_scope_id, ifru_scope_id,
+                   sizeof (ifru_scope_id));
+
+               return (scope6_get_default(ifru_scope_id));
+               /* NOTREACHED */
        }
 
-       switch (cmd) {
-       case SIOCALIFADDR:
-       case SIOCDLIFADDR:
+       case SIOCALIFADDR:              /* struct if_laddrreq */
+       case SIOCDLIFADDR:              /* struct if_laddrreq */
                if (!privileged)
                        return(EPERM);
-               /* fall through */
-       case SIOCGLIFADDR:
-               return (in6_lifaddr_ioctl(so, cmd, data, ifp, p));
+               /* FALLTHRU */
+       case SIOCGLIFADDR: {            /* struct if_laddrreq */
+               struct if_laddrreq iflr;
+
+               bcopy(data, &iflr, sizeof (iflr));
+               error = in6_lifaddr_ioctl(so, cmd, &iflr, ifp, p);
+               bcopy(&iflr, data, sizeof (iflr));
+               return (error);
+               /* NOTREACHED */
+       }
        }
 
-       /*
-        * Find address for this interface, if it exists.
-        *
-        * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
-        * only, and used the first interface address as the target of other
-        * operations (without checking ifra_addr).  This was because netinet
-        * code/API assumed at most 1 interface address per interface.
-        * Since IPv6 allows a node to assign multiple addresses
-        * on a single interface, we almost always look and check the
-        * presence of ifra_addr, and reject invalid ones here.
-        * It also decreases duplicated code among SIOC*_IN6 operations.
-        */
        switch (cmd) {
-       case SIOCLL_START_32:
-       case SIOCAIFADDR_IN6_32:
+       case SIOCLL_START_32:           /* struct in6_aliasreq_32 */
+       case SIOCAIFADDR_IN6_32: {      /* struct in6_aliasreq_32 */
                /*
                 * Convert user ifra to the kernel form, when appropriate.
                 * This allows the conversion between different data models
@@ -640,88 +654,65 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                 * routines that are expecting the kernel form.
                 */
                ifra = in6_aliasreq_to_native(data, 0, &sifra);
-               sa6 = (struct sockaddr_in6 *)&ifra->ifra_addr;
+               bcopy(&ifra->ifra_addr, &sin6, sizeof (sin6));
+               sa6 = &sin6;
                break;
+       }
 
-       case SIOCLL_START_64:
-       case SIOCAIFADDR_IN6_64:
+       case SIOCLL_START_64:           /* struct in6_aliasreq_64 */
+       case SIOCAIFADDR_IN6_64: {      /* struct in6_aliasreq_64 */
+               /*
+                * Convert user ifra to the kernel form, when appropriate.
+                * This allows the conversion between different data models
+                * to be centralized, so that it can be passed around to other
+                * routines that are expecting the kernel form.
+                */
                ifra = in6_aliasreq_to_native(data, 1, &sifra);
-               sa6 = (struct sockaddr_in6 *)&ifra->ifra_addr;
+               bcopy(&ifra->ifra_addr, &sin6, sizeof (sin6));
+               sa6 = &sin6;
                break;
+       }
 
-       case SIOCSIFADDR_IN6:           /* deprecated */
-       case SIOCGIFADDR_IN6:
-       case SIOCSIFDSTADDR_IN6:        /* deprecated */
-       case SIOCSIFNETMASK_IN6:        /* deprecated */
-       case SIOCGIFDSTADDR_IN6:
-       case SIOCGIFNETMASK_IN6:
-       case SIOCDIFADDR_IN6:
-       case SIOCGIFPSRCADDR_IN6:
-       case SIOCGIFPDSTADDR_IN6:
-       case SIOCGIFAFLAG_IN6:
-       case SIOCSNDFLUSH_IN6:
-       case SIOCSPFXFLUSH_IN6:
-       case SIOCSRTRFLUSH_IN6:
-       case SIOCGIFALIFETIME_IN6:
-       case SIOCSIFALIFETIME_IN6:
-       case SIOCGIFSTAT_IN6:
-       case SIOCGIFSTAT_ICMP6:
-               sa6 = &ifr->ifr_addr;
+       case SIOCSIFADDR_IN6:           /* struct in6_ifreq (deprecated) */
+       case SIOCGIFADDR_IN6:           /* struct in6_ifreq */
+       case SIOCSIFDSTADDR_IN6:        /* struct in6_ifreq (deprecated) */
+       case SIOCSIFNETMASK_IN6:        /* struct in6_ifreq (deprecated) */
+       case SIOCGIFDSTADDR_IN6:        /* struct in6_ifreq */
+       case SIOCGIFNETMASK_IN6:        /* struct in6_ifreq */
+       case SIOCDIFADDR_IN6:           /* struct in6_ifreq */
+       case SIOCGIFPSRCADDR_IN6:       /* struct in6_ifreq */
+       case SIOCGIFPDSTADDR_IN6:       /* struct in6_ifreq */
+       case SIOCGIFAFLAG_IN6:          /* struct in6_ifreq */
+       case SIOCSNDFLUSH_IN6:          /* struct in6_ifreq */
+       case SIOCSPFXFLUSH_IN6:         /* struct in6_ifreq */
+       case SIOCSRTRFLUSH_IN6:         /* struct in6_ifreq */
+       case SIOCGIFALIFETIME_IN6:      /* struct in6_ifreq */
+       case SIOCSIFALIFETIME_IN6:      /* struct in6_ifreq */
+       case SIOCGIFSTAT_IN6:           /* struct in6_ifreq */
+       case SIOCGIFSTAT_ICMP6: {       /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
+
+               bcopy(&ifr->ifr_addr, &sin6, sizeof (sin6));
+               sa6 = &sin6;
                break;
+       }
 
        default:
-               sa6 = NULL;
                break;
        }
 
        switch (cmd) {
-
        case SIOCAUTOCONF_START:
-               ifnet_lock_exclusive(ifp);
-               ifp->if_eflags |= IFEF_ACCEPT_RTADVD;
-               ifnet_lock_done(ifp);
-               return (0);
+               return (in6_autoconf(ifp, TRUE));
                /* NOTREACHED */
 
-       case SIOCAUTOCONF_STOP: {
-               ifnet_lock_exclusive(ifp);
-               ifp->if_eflags &= ~IFEF_ACCEPT_RTADVD;
-               ifnet_lock_done(ifp);
-
-               /* Remove autoconfigured address from interface */
-               lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
-               ia = in6_ifaddrs;
-               while (ia != NULL) {
-                       if (ia->ia_ifa.ifa_ifp != ifp) {
-                               ia = ia->ia_next;
-                               continue;
-                       }
-                       IFA_LOCK(&ia->ia_ifa);
-                       if (ia->ia6_flags & IN6_IFF_AUTOCONF) {
-                               IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for us */
-                               IFA_UNLOCK(&ia->ia_ifa);
-                               lck_rw_done(&in6_ifaddr_rwlock);
-                               in6_purgeaddr(&ia->ia_ifa);
-                               IFA_REMREF(&ia->ia_ifa);        /* for us */
-                               lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
-                               /*
-                                * Purging the address caused in6_ifaddr_rwlock
-                                * to be dropped and reacquired;
-                                * therefore search again from the beginning
-                                * of in6_ifaddrs list.
-                                */
-                               ia = in6_ifaddrs;
-                               continue;
-                       }
-                       IFA_UNLOCK(&ia->ia_ifa);
-                       ia = ia->ia_next;
-               }
-               lck_rw_done(&in6_ifaddr_rwlock);
-               return (0);
-       }
+       case SIOCAUTOCONF_STOP:
+               return (in6_autoconf(ifp, FALSE));
+               /* NOTREACHED */
 
        case SIOCLL_START_32:
        case SIOCLL_START_64:
+               VERIFY(ifra != NULL);
                /*
                 * NOTE: All the interface specific DLIL attachements should
                 * be done here.  They are currently done in in6_ifattach()
@@ -733,12 +724,12 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                        /* some interfaces may provide LinkLocal addresses */
                        error = in6_if_up(ifp, ifra);
                } else {
-                       error = in6_if_up(ifp, 0);
+                       error = in6_if_up(ifp, NULL);
                }
                return (error);
                /* NOTREACHED */
 
-       case SIOCLL_STOP: {
+       case SIOCLL_STOP:
                /* Remove link local addresses from interface */
                lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
                ia = in6_ifaddrs;
@@ -769,14 +760,25 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                }
                lck_rw_done(&in6_ifaddr_rwlock);
                return (0);
+               /* NOTREACHED */
+
+       case SIOCSETROUTERMODE_IN6: {   /* struct in6_ifreq */
+               int intval;
+
+               VERIFY(ifp != NULL);
+               bcopy(&((struct in6_ifreq *)(void *)data)->ifr_intval,
+                   &intval, sizeof (intval));
+
+               return (in6_setrouter(ifp, intval));
+               /* NOTREACHED */
        }
 
-       case SIOCPROTOATTACH_IN6_32:
-       case SIOCPROTOATTACH_IN6_64:
+       case SIOCPROTOATTACH_IN6_32:    /* struct in6_aliasreq_32 */
+       case SIOCPROTOATTACH_IN6_64:    /* struct in6_aliasreq_64 */
                return (in6_domifattach(ifp));
                /* NOTREACHED */
 
-       case SIOCPROTODETACH_IN6:
+       case SIOCPROTODETACH_IN6:       /* struct in6_ifreq */
                /* Cleanup interface routes and addresses */
                in6_purgeif(ifp);
 
@@ -784,11 +786,20 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                        printf("SIOCPROTODETACH_IN6: %s error=%d\n",
                            if_name(ifp), error);
                return (error);
+               /* NOTREACHED */
        }
 
        /*
-        * Find address for this interface, if it exists; depending
-        * on the ioctl command, sa6 points to the address in ifra/ifr.
+        * Find address for this interface, if it exists.
+        *
+        * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
+        * only, and used the first interface address as the target of other
+        * operations (without checking ifra_addr).  This was because netinet
+        * code/API assumed at most 1 interface address per interface.
+        * Since IPv6 allows a node to assign multiple addresses
+        * on a single interface, we almost always look and check the
+        * presence of ifra_addr, and reject invalid ones here.
+        * It also decreases duplicated code among SIOC*_IN6 operations.
         */
        if (sa6 != NULL && sa6->sin6_family == AF_INET6) {
                if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) {
@@ -813,9 +824,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        }
 
        switch (cmd) {
-       case SIOCSIFADDR_IN6:
-       case SIOCSIFDSTADDR_IN6:
-       case SIOCSIFNETMASK_IN6:
+       case SIOCSIFADDR_IN6:           /* struct in6_ifreq */
+       case SIOCSIFDSTADDR_IN6:        /* struct in6_ifreq */
+       case SIOCSIFNETMASK_IN6:        /* struct in6_ifreq */
                /*
                 * Since IPv6 allows a node to assign multiple addresses
                 * on a single interface, SIOCSIFxxx ioctls are deprecated.
@@ -824,7 +835,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                error = EINVAL;
                goto ioctl_cleanup;
 
-       case SIOCDIFADDR_IN6:
+       case SIOCDIFADDR_IN6:           /* struct in6_ifreq */
                /*
                 * for IPv4, we look for existing in_ifaddr here to allow
                 * "ifconfig if0 delete" to remove the first IPv4 address on
@@ -837,8 +848,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                        goto ioctl_cleanup;
                }
                /* FALLTHROUGH */
-       case SIOCAIFADDR_IN6_32:
-       case SIOCAIFADDR_IN6_64:
+       case SIOCAIFADDR_IN6_32:        /* struct in6_aliasreq_32 */
+       case SIOCAIFADDR_IN6_64:        /* struct in6_aliasreq_64 */
+               VERIFY(sa6 != NULL);
                /*
                 * We always require users to specify a valid IPv6 address for
                 * the corresponding operation.  Use "sa6" instead of "ifra"
@@ -853,16 +865,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                        error = EPERM;
                        goto ioctl_cleanup;
                }
-
                break;
 
-       case SIOCGIFADDR_IN6:
+       case SIOCGIFADDR_IN6:           /* struct in6_ifreq */
                /* This interface is basically deprecated. use SIOCGIFCONF. */
-               /* fall through */
-       case SIOCGIFAFLAG_IN6:
-       case SIOCGIFNETMASK_IN6:
-       case SIOCGIFDSTADDR_IN6:
-       case SIOCGIFALIFETIME_IN6:
+               /* FALLTHRU */
+       case SIOCGIFAFLAG_IN6:          /* struct in6_ifreq */
+       case SIOCGIFNETMASK_IN6:        /* struct in6_ifreq */
+       case SIOCGIFDSTADDR_IN6:        /* struct in6_ifreq */
+       case SIOCGIFALIFETIME_IN6:      /* struct in6_ifreq */
                /* must think again about its semantics */
                if (ia == NULL) {
                        error = EADDRNOTAVAIL;
@@ -870,7 +881,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                }
                break;
 
-       case SIOCSIFALIFETIME_IN6:
+       case SIOCSIFALIFETIME_IN6: {    /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
+
                if (!privileged) {
                        error = EPERM;
                        goto ioctl_cleanup;
@@ -881,39 +894,37 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                }
                /* sanity for overflow - beware unsigned */
                if (p64) {
-                       struct in6_addrlifetime_64 *lt;
+                       struct in6_addrlifetime_64 lt;
 
-                       lt = (struct in6_addrlifetime_64 *)
-                           &ifr->ifr_ifru.ifru_lifetime;
+                       bcopy(&ifr->ifr_ifru.ifru_lifetime, &lt, sizeof (lt));
                        if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
-                             || lt->ia6t_vltime != ND6_INFINITE_LIFETIME)
-                           && lt->ia6t_vltime + timenow.tv_sec <
+                             || lt.ia6t_vltime != ND6_INFINITE_LIFETIME)
+                           && lt.ia6t_vltime + timenow.tv_sec <
                               timenow.tv_sec) {
                                error = EINVAL;
                                goto ioctl_cleanup;
                        }
                        if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
-                             || lt->ia6t_pltime != ND6_INFINITE_LIFETIME)
-                           && lt->ia6t_pltime + timenow.tv_sec <
+                             || lt.ia6t_pltime != ND6_INFINITE_LIFETIME)
+                           && lt.ia6t_pltime + timenow.tv_sec <
                               timenow.tv_sec) {
                                error = EINVAL;
                                goto ioctl_cleanup;
                        }
                } else {
-                       struct in6_addrlifetime_32 *lt;
+                       struct in6_addrlifetime_32 lt;
 
-                       lt = (struct in6_addrlifetime_32 *)
-                           &ifr->ifr_ifru.ifru_lifetime;
+                       bcopy(&ifr->ifr_ifru.ifru_lifetime, &lt, sizeof (lt));
                        if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
-                             || lt->ia6t_vltime != ND6_INFINITE_LIFETIME)
-                           && lt->ia6t_vltime + timenow.tv_sec <
+                             || lt.ia6t_vltime != ND6_INFINITE_LIFETIME)
+                           && lt.ia6t_vltime + timenow.tv_sec <
                               timenow.tv_sec) {
                                error = EINVAL;
                                goto ioctl_cleanup;
                        }
                        if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
-                             || lt->ia6t_pltime != ND6_INFINITE_LIFETIME)
-                           && lt->ia6t_pltime + timenow.tv_sec <
+                             || lt.ia6t_pltime != ND6_INFINITE_LIFETIME)
+                           && lt.ia6t_pltime + timenow.tv_sec <
                               timenow.tv_sec) {
                                error = EINVAL;
                                goto ioctl_cleanup;
@@ -921,20 +932,28 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                }
                break;
        }
+       }
 
        switch (cmd) {
+       case SIOCGIFADDR_IN6: {         /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
+               struct sockaddr_in6 addr;
 
-       case SIOCGIFADDR_IN6:
                IFA_LOCK(&ia->ia_ifa);
-               ifr->ifr_addr = ia->ia_addr;
+               bcopy(&ia->ia_addr, &addr, sizeof (addr));
                IFA_UNLOCK(&ia->ia_ifa);
-               if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0) {
+               if ((error = sa6_recoverscope(&addr, TRUE)) != 0) {
                        IFA_REMREF(&ia->ia_ifa);
                        return (error);
                }
+               bcopy(&addr, &ifr->ifr_addr, sizeof (addr));
                break;
+       }
+
+       case SIOCGIFDSTADDR_IN6: {      /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
+               struct sockaddr_in6 dstaddr;
 
-       case SIOCGIFDSTADDR_IN6:
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
                        error = EINVAL;
                        goto ioctl_cleanup;
@@ -944,45 +963,61 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                 * an error?
                 */
                IFA_LOCK(&ia->ia_ifa);
-               ifr->ifr_dstaddr = ia->ia_dstaddr;
+               bcopy(&ia->ia_dstaddr, &dstaddr, sizeof (dstaddr));
                IFA_UNLOCK(&ia->ia_ifa);
-               if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0) {
+               if ((error = sa6_recoverscope(&dstaddr, TRUE)) != 0) {
                        IFA_REMREF(&ia->ia_ifa);
                        return (error);
                }
+               bcopy(&dstaddr, &ifr->ifr_dstaddr, sizeof (dstaddr));
                break;
+       }
+
+       case SIOCGIFNETMASK_IN6: {      /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       case SIOCGIFNETMASK_IN6:
                IFA_LOCK(&ia->ia_ifa);
-               ifr->ifr_addr = ia->ia_prefixmask;
+               bcopy(&ia->ia_prefixmask, &ifr->ifr_addr,
+                   sizeof (struct sockaddr_in6));
                IFA_UNLOCK(&ia->ia_ifa);
                break;
+       }
+
+       case SIOCGIFAFLAG_IN6: {        /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       case SIOCGIFAFLAG_IN6:
                IFA_LOCK(&ia->ia_ifa);
-               ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
+               bcopy(&ia->ia6_flags, &ifr->ifr_ifru.ifru_flags6,
+                   sizeof (ifr->ifr_ifru.ifru_flags6));
                IFA_UNLOCK(&ia->ia_ifa);
                break;
+       }
+
+       case SIOCGIFSTAT_IN6: {         /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       case SIOCGIFSTAT_IN6:
                if (ifp == NULL) {
                        error = EINVAL;
                        goto ioctl_cleanup;
                }
                index = ifp->if_index;
                lck_rw_lock_shared(&in6_ifs_rwlock);
-               if (in6_ifstat == NULL || index >= in6_ifstatmax
-                || in6_ifstat[index] == NULL) {
+               if (in6_ifstat == NULL || index >= in6_ifstatmax ||
+                   in6_ifstat[index] == NULL) {
                        /* return EAFNOSUPPORT? */
                        bzero(&ifr->ifr_ifru.ifru_stat,
                            sizeof (ifr->ifr_ifru.ifru_stat));
                } else {
-                       ifr->ifr_ifru.ifru_stat = *in6_ifstat[index];
+                       bcopy(in6_ifstat[index], &ifr->ifr_ifru.ifru_stat,
+                           sizeof (ifr->ifr_ifru.ifru_stat));
                }
                lck_rw_done(&in6_ifs_rwlock);
                break;
+       }
+
+       case SIOCGIFSTAT_ICMP6: {       /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       case SIOCGIFSTAT_ICMP6:
                if (ifp == NULL) {
                        error = EINVAL;
                        goto ioctl_cleanup;
@@ -995,61 +1030,64 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                        bzero(&ifr->ifr_ifru.ifru_stat,
                            sizeof (ifr->ifr_ifru.ifru_icmp6stat));
                } else {
-                       ifr->ifr_ifru.ifru_icmp6stat = *icmp6_ifstat[index];
+                       bcopy(icmp6_ifstat[index],
+                           &ifr->ifr_ifru.ifru_icmp6stat,
+                           sizeof (ifr->ifr_ifru.ifru_icmp6stat));
                }
                lck_rw_done(&icmp6_ifs_rwlock);
                break;
+       }
+
+       case SIOCGIFALIFETIME_IN6: {    /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       case SIOCGIFALIFETIME_IN6:
                IFA_LOCK(&ia->ia_ifa);
                if (p64) {
-                       struct in6_addrlifetime_64 *lt;
-
-                       lt = (struct in6_addrlifetime_64 *)
-                           &ifr->ifr_ifru.ifru_lifetime;
-                       lt->ia6t_expire = ia->ia6_lifetime.ia6t_expire;
-                       lt->ia6t_preferred = ia->ia6_lifetime.ia6t_preferred;
-                       lt->ia6t_vltime = ia->ia6_lifetime.ia6t_vltime;
-                       lt->ia6t_pltime = ia->ia6_lifetime.ia6t_pltime;
+                       struct in6_addrlifetime_64 lt;
+
+                       bzero(&lt, sizeof (lt));
+                       lt.ia6t_expire = ia->ia6_lifetime.ia6t_expire;
+                       lt.ia6t_preferred = ia->ia6_lifetime.ia6t_preferred;
+                       lt.ia6t_vltime = ia->ia6_lifetime.ia6t_vltime;
+                       lt.ia6t_pltime = ia->ia6_lifetime.ia6t_pltime;
+                       bcopy(&lt, &ifr->ifr_ifru.ifru_lifetime, sizeof (lt));
                } else {
-                       struct in6_addrlifetime_32 *lt;
+                       struct in6_addrlifetime_32 lt;
 
-                       lt = (struct in6_addrlifetime_32 *)
-                           &ifr->ifr_ifru.ifru_lifetime;
-                       lt->ia6t_expire =
-                           (uint32_t)ia->ia6_lifetime.ia6t_expire;
-                       lt->ia6t_preferred =
+                       bzero(&lt, sizeof (lt));
+                       lt.ia6t_expire = (uint32_t)ia->ia6_lifetime.ia6t_expire;
+                       lt.ia6t_preferred =
                            (uint32_t)ia->ia6_lifetime.ia6t_preferred;
-                       lt->ia6t_vltime =
-                           (uint32_t)ia->ia6_lifetime.ia6t_vltime;
-                       lt->ia6t_pltime =
-                           (uint32_t)ia->ia6_lifetime.ia6t_pltime;
+                       lt.ia6t_vltime = (uint32_t)ia->ia6_lifetime.ia6t_vltime;
+                       lt.ia6t_pltime = (uint32_t)ia->ia6_lifetime.ia6t_pltime;
+                       bcopy(&lt, &ifr->ifr_ifru.ifru_lifetime, sizeof (lt));
                }
                IFA_UNLOCK(&ia->ia_ifa);
                break;
+       }
+
+       case SIOCSIFALIFETIME_IN6: {    /* struct in6_ifreq */
+               struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data;
 
-       case SIOCSIFALIFETIME_IN6:
                IFA_LOCK(&ia->ia_ifa);
                if (p64) {
-                       struct in6_addrlifetime_64 *lt;
-
-                       lt = (struct in6_addrlifetime_64 *)
-                           &ifr->ifr_ifru.ifru_lifetime;
-                       ia->ia6_lifetime.ia6t_expire = lt->ia6t_expire;
-                       ia->ia6_lifetime.ia6t_preferred = lt->ia6t_preferred;
-                       ia->ia6_lifetime.ia6t_vltime = lt->ia6t_vltime;
-                       ia->ia6_lifetime.ia6t_pltime = lt->ia6t_pltime;
+                       struct in6_addrlifetime_64 lt;
+
+                       bcopy(&ifr->ifr_ifru.ifru_lifetime, &lt, sizeof (lt));
+                       ia->ia6_lifetime.ia6t_expire = lt.ia6t_expire;
+                       ia->ia6_lifetime.ia6t_preferred = lt.ia6t_preferred;
+                       ia->ia6_lifetime.ia6t_vltime = lt.ia6t_vltime;
+                       ia->ia6_lifetime.ia6t_pltime = lt.ia6t_pltime;
                } else {
-                       struct in6_addrlifetime_32 *lt;
+                       struct in6_addrlifetime_32 lt;
 
-                       lt = (struct in6_addrlifetime_32 *)
-                           &ifr->ifr_ifru.ifru_lifetime;
+                       bcopy(&ifr->ifr_ifru.ifru_lifetime, &lt, sizeof (lt));
                        ia->ia6_lifetime.ia6t_expire =
-                           (uint32_t)lt->ia6t_expire;
+                           (uint32_t)lt.ia6t_expire;
                        ia->ia6_lifetime.ia6t_preferred =
-                           (uint32_t)lt->ia6t_preferred;
-                       ia->ia6_lifetime.ia6t_vltime = lt->ia6t_vltime;
-                       ia->ia6_lifetime.ia6t_pltime = lt->ia6t_pltime;
+                           (uint32_t)lt.ia6t_preferred;
+                       ia->ia6_lifetime.ia6t_vltime = lt.ia6t_vltime;
+                       ia->ia6_lifetime.ia6t_pltime = lt.ia6t_pltime;
                }
                /* for sanity */
                if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME ||
@@ -1066,12 +1104,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                        ia->ia6_lifetime.ia6t_preferred = 0;
                IFA_UNLOCK(&ia->ia_ifa);
                break;
+       }
 
-       case SIOCAIFADDR_IN6_32:
-       case SIOCAIFADDR_IN6_64: {
+       case SIOCAIFADDR_IN6_32:        /* struct in6_aliasreq_32 */
+       case SIOCAIFADDR_IN6_64: {      /* struct in6_aliasreq_64 */
                int i;
                struct nd_prefix pr0, *pr;
 
+               VERIFY(ifra != NULL);
+
                /* Attempt to attach the protocol, in case it isn't attached */
                error = in6_domifattach(ifp);
                if (error) {
@@ -1104,6 +1145,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                 * been validated in in6_update_ifa().
                 */
                bzero(&pr0, sizeof(pr0));
+               lck_mtx_init(&pr0.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr);
                pr0.ndpr_ifp = ifp;
                pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
                                             NULL);
@@ -1136,7 +1178,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                         * nd6_prelist_add will install the corresponding
                         * interface route.
                         */
-                       if ((error = nd6_prelist_add(&pr0, NULL, &pr, FALSE)) != 0)
+                       if ((error = nd6_prelist_add(&pr0, NULL, &pr,
+                           FALSE)) != 0)
                                goto ioctl_cleanup;
                        if (pr == NULL) {
                                log(LOG_ERR, "nd6_prelist_add succedded but "
@@ -1147,8 +1190,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                }
                if (ia != NULL)
                        IFA_REMREF(&ia->ia_ifa);
-               if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
-                   == NULL) {
+               if ((ia = in6ifa_ifpwithaddr(ifp,
+                   &ifra->ifra_addr.sin6_addr)) == NULL) {
                        /* XXX: this should not happen! */
                        log(LOG_ERR, "in6_control: addition succeeded, but"
                            " no ifaddr\n");
@@ -1207,7 +1250,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                break;
        }
 
-       case SIOCDIFADDR_IN6: {
+       case SIOCDIFADDR_IN6: {         /* struct in6_ifreq */
                int i = 0;
                struct nd_prefix pr0, *pr;
 
@@ -1252,7 +1295,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                            pr->ndpr_addrcnt == 1) ||
                            ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 &&
                            pr->ndpr_addrcnt == 0)) {
-                               pr->ndpr_expire = 1; /* XXX: just for expiration */
+                               /* XXX: just for expiration */
+                               pr->ndpr_expire = 1;
                        }
                        NDPR_UNLOCK(pr);
                        IFA_UNLOCK(&ia->ia_ifa);
@@ -1273,12 +1317,126 @@ purgeaddr:
                error = ifnet_ioctl(ifp, PF_INET6, cmd, data);
                goto ioctl_cleanup;
        }
+
 ioctl_cleanup:
        if (ia != NULL)
                IFA_REMREF(&ia->ia_ifa);
        return (error);
 }
 
+static int
+in6_autoconf(struct ifnet *ifp, int enable)
+{
+       int error = 0;
+
+       if (ifp->if_flags & IFF_LOOPBACK)
+               return (EINVAL);
+
+       if (enable) {
+               /*
+                * An interface in IPv6 router mode implies that it
+                * is either configured with a static IP address or
+                * autoconfigured via a locally-generated RA.  Prevent
+                * SIOCAUTOCONF_START from being set in that mode.
+                */
+               ifnet_lock_exclusive(ifp);
+               if (ifp->if_eflags & IFEF_IPV6_ROUTER) {
+                       ifp->if_eflags &= ~IFEF_ACCEPT_RTADV;
+                       error = EBUSY;
+               } else {
+                       ifp->if_eflags |= IFEF_ACCEPT_RTADV;
+               }
+               ifnet_lock_done(ifp);
+       } else {
+               struct in6_ifaddr *ia = NULL;
+
+               ifnet_lock_exclusive(ifp);
+               ifp->if_eflags &= ~IFEF_ACCEPT_RTADV;
+               ifnet_lock_done(ifp);
+
+               /* Remove autoconfigured address from interface */
+               lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+               ia = in6_ifaddrs;
+               while (ia != NULL) {
+                       if (ia->ia_ifa.ifa_ifp != ifp) {
+                               ia = ia->ia_next;
+                               continue;
+                       }
+                       IFA_LOCK(&ia->ia_ifa);
+                       if (ia->ia6_flags & IN6_IFF_AUTOCONF) {
+                               IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for us */
+                               IFA_UNLOCK(&ia->ia_ifa);
+                               lck_rw_done(&in6_ifaddr_rwlock);
+                               in6_purgeaddr(&ia->ia_ifa);
+                               IFA_REMREF(&ia->ia_ifa);        /* for us */
+                               lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+                               /*
+                                * Purging the address caused in6_ifaddr_rwlock
+                                * to be dropped and reacquired;
+                                * therefore search again from the beginning
+                                * of in6_ifaddrs list.
+                                */
+                               ia = in6_ifaddrs;
+                               continue;
+                       }
+                       IFA_UNLOCK(&ia->ia_ifa);
+                       ia = ia->ia_next;
+               }
+               lck_rw_done(&in6_ifaddr_rwlock);
+       }
+       return (error);
+}
+
+/*
+ * Handle SIOCSETROUTERMODE_IN6 to set or clear the IPv6 router mode flag on
+ * the interface.  Entering or exiting this mode will result in the removal of
+ * autoconfigured IPv6 addresses on the interface.
+ */
+static int
+in6_setrouter(struct ifnet *ifp, int enable)
+{
+       if (ifp->if_flags & IFF_LOOPBACK)
+               return (ENODEV);
+
+       if (enable) {
+               struct nd_ifinfo *ndi;
+
+               lck_rw_lock_shared(nd_if_rwlock);
+               ndi = ND_IFINFO(ifp);
+               if (ndi != NULL && ndi->initialized) {
+                       lck_mtx_lock(&ndi->lock);
+                       if (ndi->flags & ND6_IFF_PROXY_PREFIXES) {
+                               /* No proxy if we are an advertising router */
+                               ndi->flags &= ~ND6_IFF_PROXY_PREFIXES;
+                               lck_mtx_unlock(&ndi->lock);
+                               lck_rw_done(nd_if_rwlock);
+                               (void) nd6_if_prproxy(ifp, FALSE);
+                       } else {
+                               lck_mtx_unlock(&ndi->lock);
+                               lck_rw_done(nd_if_rwlock);
+                       }
+               } else {
+                       lck_rw_done(nd_if_rwlock);
+               }
+       }
+
+       ifnet_lock_exclusive(ifp);
+       if (enable) {
+               ifp->if_eflags |= IFEF_IPV6_ROUTER;
+       } else {
+               ifp->if_eflags &= ~IFEF_IPV6_ROUTER;
+       }
+       ifnet_lock_done(ifp);
+
+       lck_mtx_lock(nd6_mutex);
+       defrouter_select(ifp);
+       lck_mtx_unlock(nd6_mutex);
+
+       if_allmulti(ifp, enable);
+
+       return (in6_autoconf(ifp, FALSE));
+}
+
 /*
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
@@ -1570,13 +1728,20 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
                ia->ia6_lifetime.ia6t_preferred = timenow.tv_sec;
        }
        /*
-        * Make the address tentative before joining multicast addresses,
+        * Mark the address as tentative before joining multicast addresses,
         * so that corresponding MLD responses would not have a tentative
         * source address.
         */
        ia->ia6_flags &= ~IN6_IFF_DUPLICATED;   /* safety */
        if (hostIsNew && in6if_do_dad(ifp))
-               ia->ia6_flags |= IN6_IFF_TENTATIVE;
+               in6_ifaddr_set_dadprogress(ia);
+
+       /*
+        * Do not delay sending neighbor solicitations when using optimistic
+        * duplicate address detection, c.f. RFC 4429.
+        */
+       if (ia->ia6_flags & IN6_IFF_OPTIMISTIC)
+               flags &= ~IN6_IFAUPDATE_DADDELAY;
 
        /*
         * We are done if we have simply modified an existing address.
@@ -1669,9 +1834,8 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
                rt = rtalloc1_scoped((struct sockaddr *)&mltaddr, 0, 0UL,
                    ia->ia_ifp->if_index);
                if (rt) {
-                       if (memcmp(&mltaddr.sin6_addr,
-                           &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
-                           MLTMASK_LEN)) {
+                       if (memcmp(&mltaddr.sin6_addr, &((struct sockaddr_in6 *)
+                           (void *)rt_key(rt))->sin6_addr, MLTMASK_LEN)) {
                                rtfree(rt);
                                rt = NULL;
                        }
@@ -1746,9 +1910,8 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
                rt = rtalloc1_scoped((struct sockaddr *)&mltaddr, 0, 0UL,
                    ia->ia_ifp->if_index);
                if (rt) {
-                       if (memcmp(&mltaddr.sin6_addr,
-                           &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
-                           MLTMASK_LEN)) {
+                       if (memcmp(&mltaddr.sin6_addr, &((struct sockaddr_in6 *)
+                           (void *)rt_key(rt))->sin6_addr, MLTMASK_LEN)) {
                                rtfree(rt);
                                rt = NULL;
                        }
@@ -1795,12 +1958,13 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
         */
        if (hostIsNew && in6if_do_dad(ifp) &&
            ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
-           (ia->ia6_flags & IN6_IFF_TENTATIVE))
+           (ia->ia6_flags & IN6_IFF_DADPROGRESS))
        {
                int mindelay, maxdelay;
+               int *delayptr, delayval;
 
                IFA_UNLOCK(ifa);
-               delay = 0;
+               delayptr = NULL;
                if ((flags & IN6_IFAUPDATE_DADDELAY)) {
                        /*
                         * We need to impose a delay before sending an NS
@@ -1819,14 +1983,15 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
                        }
                        maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
                        if (maxdelay - mindelay == 0)
-                               delay = 0;
+                               delayval = 0;
                        else {
-                               delay =
+                               delayval =
                                    (random() % (maxdelay - mindelay)) +
                                    mindelay;
                        }
+                       delayptr = &delayval;
                }
-               nd6_dad_start((struct ifaddr *)ia, &delay);
+               nd6_dad_start((struct ifaddr *)ia, delayptr);
        } else {
                IFA_UNLOCK(ifa);
        }
@@ -2013,8 +2178,8 @@ in6_purgeif(struct ifnet *ifp)
                IFA_ADDREF(&ia->ia_ifa);        /* for us */
                lck_rw_done(&in6_ifaddr_rwlock);
                in6_purgeaddr(&ia->ia_ifa);
-               lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
                IFA_REMREF(&ia->ia_ifa);        /* for us */
+               lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
                /*
                 * Purging the address would have caused
                 * in6_ifaddr_rwlock to be dropped and reacquired;
@@ -2052,19 +2217,14 @@ in6_purgeif(struct ifnet *ifp)
  * address encoding scheme. (see figure on page 8)
  */
 static int
-in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
+in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr,
     struct ifnet *ifp, struct proc *p)
 {
-       struct if_laddrreq *iflr = (struct if_laddrreq *)data;
        struct ifaddr *ifa = NULL;
        struct sockaddr *sa;
        int p64 = proc_is64bit(p);
 
-       /* sanity checks */
-       if (!data || !ifp) {
-               panic("invalid argument to in6_lifaddr_ioctl");
-               /*NOTREACHED*/
-       }
+       VERIFY(ifp != NULL);
 
        switch (cmd) {
        case SIOCGLIFADDR:
@@ -2414,10 +2574,15 @@ in6_ifinit(ifp, ia, sin6, newhost)
        ia->ia_addr = *sin6;
        IFA_UNLOCK(ifa);
 
-       if (ifacount <= 1 && 
+       /*
+        * NOTE: SIOCSIFADDR is defined with struct ifreq as parameter,
+        * but here we are sending it down to the interface with a pointer
+        * to struct ifaddr, for legacy reasons.
+        */
+       if (ifacount <= 1 &&
            (error = ifnet_ioctl(ifp, PF_INET6, SIOCSIFADDR, ia))) {
-               if (error == EOPNOTSUPP)
-                       error = 0;
+           if (error == EOPNOTSUPP)
+               error = 0;
                else if (error)
                        return(error);
        }
@@ -2534,6 +2699,31 @@ in6ifa_ifpwithaddr(ifp, addr)
        return((struct in6_ifaddr *)ifa);
 }
 
+struct in6_ifaddr *
+in6ifa_prproxyaddr(struct in6_addr *addr)
+{
+       struct in6_ifaddr *ia;
+
+       lck_rw_lock_shared(&in6_ifaddr_rwlock);
+       for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+               IFA_LOCK(&ia->ia_ifa);
+               if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(&ia->ia_ifa))) {
+                       IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for caller */
+                       IFA_UNLOCK(&ia->ia_ifa);
+                       break;
+               }
+               IFA_UNLOCK(&ia->ia_ifa);
+       }
+       lck_rw_done(&in6_ifaddr_rwlock);
+
+       if (ia != NULL && !nd6_prproxy_ifaddr(ia)) {
+               IFA_REMREF(&ia->ia_ifa);
+               ia = NULL;
+       }
+
+       return (ia);
+}
+
 /*
  * Convert IP6 address to printable (loggable) representation.
  */
@@ -2752,7 +2942,6 @@ in6_ifawithscope(
        struct in6_ifaddr *ifa_best = NULL;
        
        if (oifp == NULL) {
-               /* output interface is not specified */
                return(NULL);
        }
 
@@ -3199,9 +3388,6 @@ in6_if_up(
        int dad_delay;          /* delay ticks before DAD output */
        int error;
 
-       if (!in6_init2done)
-               return ENXIO;
-
        /*
         * special cases, like 6to4, are handled in in6_ifattach
         */
@@ -3220,7 +3406,7 @@ in6_if_up(
                        continue;
                }
                ia = (struct in6_ifaddr *)ifa;
-               if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
+               if (ia->ia6_flags & IN6_IFF_DADPROGRESS) {
                        IFA_UNLOCK(ifa);
                        nd6_dad_start(ifa, &dad_delay);
                } else {
@@ -3240,12 +3426,14 @@ in6if_do_dad(
                return(0);
 
        /*
-        * Skip DAD on service triggered interfaces, for now,
-        * until we have support for Opportunistic Duplicate
-        * Address Detection [RFC 4429] and we can then back
-        * this out.
+        * If we are using the alternative neighbor discovery
+        * interface on this interface, then skip DAD.
+        *
+        * Also, skip it for interfaces marked "local private"
+        * for now, even when not marked as using the alternative
+        * interface.  This is for historical reasons.
         */
-       if (ifp->if_eflags & IFEF_SERVICE_TRIGGERED)
+       if (ifp->if_eflags & (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE))
                return (0);
 
        switch (ifp->if_type) {
@@ -3283,17 +3471,25 @@ in6if_do_dad(
  * to in6_maxmtu.
  */
 void
-in6_setmaxmtu()
+in6_setmaxmtu(void)
 {
        u_int32_t maxmtu = 0;
        struct ifnet *ifp;
 
        ifnet_head_lock_shared();
        TAILQ_FOREACH(ifp, &ifnet_head, if_list) {
+               struct nd_ifinfo *ndi;
+
                lck_rw_lock_shared(nd_if_rwlock);
+               if ((ndi = ND_IFINFO(ifp)) != NULL && !ndi->initialized)
+                       ndi = NULL;
+               if (ndi != NULL)
+                       lck_mtx_lock(&ndi->lock);
                if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
                    IN6_LINKMTU(ifp) > maxmtu)
                        maxmtu = IN6_LINKMTU(ifp);
+               if (ndi != NULL)
+                       lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
        }
        ifnet_head_done();
@@ -3347,6 +3543,8 @@ in6_if2idlen(struct ifnet *ifp)
                return (64);    /* for utun interfaces */
        case IFT_CELLULAR:
                return (64);    /* Packet Data over Cellular */
+       case IFT_BRIDGE:
+               return (64);    /* Transparent bridge interface */
        default:
                /*
                 * Unknown link type:
@@ -3387,8 +3585,14 @@ in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
        sin6->sin6_port = sin->sin_port;
        sin6->sin6_addr.s6_addr32[0] = 0;
        sin6->sin6_addr.s6_addr32[1] = 0;
-       sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
-       sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
+       if (sin->sin_addr.s_addr) {
+               sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
+               sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
+       }
+       else {
+               sin6->sin6_addr.s6_addr32[2] = 0;
+               sin6->sin6_addr.s6_addr32[3] = 0;
+       }
 }
 
 /* Convert sockaddr_in6 into sockaddr_in. */
@@ -3402,8 +3606,8 @@ in6_sin6_2_sin_in_sock(struct sockaddr *nam)
         * Save original sockaddr_in6 addr and convert it
         * to sockaddr_in.
         */
-       sin6 = *(struct sockaddr_in6 *)nam;
-       sin_p = (struct sockaddr_in *)nam;
+       sin6 = *(struct sockaddr_in6 *)(void *)nam;
+       sin_p = (struct sockaddr_in *)(void *)nam;
        in6_sin6_2_sin(sin_p, &sin6);
 }
 
@@ -3418,7 +3622,7 @@ in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
               M_WAITOK);
        if (sin6_p == NULL)
                return ENOBUFS;
-       sin_p = (struct sockaddr_in *)*nam;
+       sin_p = (struct sockaddr_in *)(void *)*nam;
        in6_sin_2_v4mapsin6(sin_p, sin6_p);
        FREE(*nam, M_SONAME);
        *nam = (struct sockaddr *)sin6_p;
@@ -3466,7 +3670,7 @@ in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa)
        IFA_UNLOCK(&ifa->ia_ifa);
 
        if (ifp != NULL) {
-               strncpy(&in6_event_data.link_data.if_name[0],
+               (void) strncpy(&in6_event_data.link_data.if_name[0],
                    ifp->if_name, IFNAMSIZ);
                in6_event_data.link_data.if_family = ifp->if_family;
                in6_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
@@ -3630,3 +3834,37 @@ in6_ifaddr_trace(struct ifaddr *ifa, int refhold)
        idx = atomic_add_16_ov(cnt, 1) % IN6IFA_TRACE_HIST_SIZE;
        ctrace_record(&tr[idx]);
 }
+
+static void
+in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia)
+{
+       uint32_t flags = IN6_IFF_TENTATIVE;
+       uint32_t optdad = nd6_optimistic_dad;
+       
+       if (optdad && (ia->ia_ifp->if_eflags & IFEF_IPV6_ROUTER) == 0) {
+               if ((optdad & ND6_OPTIMISTIC_DAD_LINKLOCAL) &&
+                   IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
+                       flags = IN6_IFF_OPTIMISTIC;
+               else if ((optdad & ND6_OPTIMISTIC_DAD_AUTOCONF) &&
+                        (ia->ia6_flags & IN6_IFF_AUTOCONF)) {
+                       if (ia->ia6_flags & IN6_IFF_TEMPORARY) {
+                               if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY)
+                                       flags = IN6_IFF_OPTIMISTIC;
+                       } else {
+                               flags = IN6_IFF_OPTIMISTIC;
+                       }
+               } else if ((optdad & ND6_OPTIMISTIC_DAD_DYNAMIC) &&
+                        (ia->ia6_flags & IN6_IFF_DYNAMIC)) {
+                       if (ia->ia6_flags & IN6_IFF_TEMPORARY) {
+                               if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY)
+                                       flags = IN6_IFF_OPTIMISTIC;
+                       } else {
+                               flags = IN6_IFF_OPTIMISTIC;
+                       }
+               }
+       }
+       
+       ia->ia6_flags &= ~(IN6_IFF_DUPLICATED | IN6_IFF_DADPROGRESS);
+       ia->ia6_flags |= flags;
+}
+
index c0838ec4300f5ee4cc539172746e9db17967d157..875af39a60dcf08bc5b17f5f003ea0c1e3ec9059 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -216,6 +216,10 @@ extern const struct in6_addr in6mask32;
 extern const struct in6_addr in6mask64;
 extern const struct in6_addr in6mask96;
 extern const struct in6_addr in6mask128;
+
+#define        SIN6(s)         ((struct sockaddr_in6 *)(void *)s)
+#define        satosin6(sa)    SIN6(sa)
+#define        sin6tosa(sin6)  ((struct sockaddr *)(void *)(sin6))
 #endif /* KERNEL_PRIVATE */
 
 #ifdef KERNEL  /*XXX nonstandard*/
@@ -556,7 +560,7 @@ struct route_in6 {
 #if 1 /*IPSEC*/
 #define IPV6_IPSEC_POLICY      28 /* struct; get/set security policy */
 #endif /* 1 */
-#define IPV6_FAITH             29 /* bool; accept FAITH'ed connections */
+#define IPV6_FAITH             29 /* deprecated */
 
 #if 1 /*IPV6FIREWALL*/
 #define IPV6_FW_ADD            30 /* add a firewall rule to chain */
@@ -725,7 +729,7 @@ struct ip6_mtuinfo {
 #define IPV6CTL_SOURCECHECK    10      /* verify source route and intf */
 #define IPV6CTL_SOURCECHECK_LOGINT 11  /* minimume logging interval */
 #define IPV6CTL_ACCEPT_RTADV   12
-#define IPV6CTL_KEEPFAITH      13
+#define IPV6CTL_KEEPFAITH      13      /* deprecated */
 #define IPV6CTL_LOG_INTERVAL   14
 #define IPV6CTL_HDRNESTLIMIT   15
 #define IPV6CTL_DAD_COUNT      16
@@ -842,11 +846,6 @@ extern void in6_sin_2_v4mapsin6(struct sockaddr_in *sin,
 extern void in6_sin6_2_sin_in_sock(struct sockaddr *nam);
 extern int in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam);
 extern void in6_delayed_cksum(struct mbuf *, u_int16_t);
-
-#define        satosin6(sa)    ((struct sockaddr_in6 *)(sa))
-#define        sin6tosa(sin6)  ((struct sockaddr *)(sin6))
-#define        ifatoia6(ifa)   ((struct in6_ifaddr *)(ifa))
-
 extern int in6addr_local(struct in6_addr *);
 
 #define DEBUG_HWCKSUM 1 /* IPv6 Hardware checksum on/off */
index 77dd7e1afb2e7e5ab55b767d42f656bb09213ab1..bf8fe680d39d84ff6db3acf8b74440453f1715fa 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -219,7 +219,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off,
                        break;
                m = m->m_next;
        }
-       w = (u_int16_t *)(mtod(m, u_char *) + off);
+       w = (u_int16_t *)(void *)(mtod(m, u_char *) + off);
        mlen = m->m_len - off;
        if (len < mlen)
                mlen = len;
@@ -231,7 +231,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off,
                REDUCE;
                sum <<= 8;
                s_util.c[0] = *(u_char *)w;
-               w = (u_int16_t *)((char *)w + 1);
+               w = (u_int16_t *)(void *)((char *)w + 1);
                mlen--;
                byte_swapped = 1;
        }
@@ -292,7 +292,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off,
                         */
                        s_util.c[1] = *(char *)w;
                        sum += s_util.s;
-                       w = (u_int16_t *)((char *)w + 1);
+                       w = (u_int16_t *)(void *)((char *)w + 1);
                        mlen = m->m_len - 1;
                        len--;
                } else
@@ -307,7 +307,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off,
                        REDUCE;
                        sum <<= 8;
                        s_util.c[0] = *(u_char *)w;
-                       w = (u_int16_t *)((char *)w + 1);
+                       w = (u_int16_t *)(void *)((char *)w + 1);
                        mlen--;
                        byte_swapped = 1;
                }
index d620db95efbcdbfb6018b2ed2e05f5315f449647..c977e9a7a64cf4295e86ef9e731787ee2db3f086 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -115,8 +115,8 @@ in6_gif_output(
 {
        struct gif_softc *sc = ifnet_softc(ifp);
        struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst;
-       struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)sc->gif_psrc;
-       struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)sc->gif_pdst;
+       struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)(void *)sc->gif_psrc;
+       struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)(void *)sc->gif_pdst;
        struct ip6_hdr *ip6;
        int proto;
        u_int8_t itos, otos;
@@ -331,8 +331,8 @@ gif_validate6(
 {
        struct sockaddr_in6 *src, *dst;
 
-       src = (struct sockaddr_in6 *)sc->gif_psrc;
-       dst = (struct sockaddr_in6 *)sc->gif_pdst;
+       src = (struct sockaddr_in6 *)(void *)sc->gif_psrc;
+       dst = (struct sockaddr_in6 *)(void *)sc->gif_pdst;
 
        /*
         * Check for address match.  Note that the check is for an incoming
index 10bf295f4795c678c3bf5b0379d35bc0d6677c49..31f707c81a1b39e1dfd01cebb268d41870e7548d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,7 +65,7 @@
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
-#include <libkern/crypto/md5.h>
+#include <libkern/crypto/sha1.h>
 #include <libkern/OSAtomic.h>
 #include <kern/lock.h>
 
 
 #include <net/net_osdep.h>
 
+#define        IN6_IFSTAT_ALLOC_SIZE   \
+    sizeof(void *) + sizeof(struct in6_ifstat) + sizeof(uint64_t) 
+#define        ICMP6_IFSTAT_ALLOC_SIZE \
+    sizeof(void *) + sizeof(struct icmp6_ifstat) + sizeof(uint64_t)
+
 struct in6_ifstat **in6_ifstat = NULL;
 struct icmp6_ifstat **icmp6_ifstat = NULL;
 size_t in6_ifstatmax = 0;
@@ -117,23 +122,12 @@ static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
 static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *, struct in6_aliasreq *);
 static int in6_ifattach_loopback(struct ifnet *);
 
-#define EUI64_GBIT     0x01
-#define EUI64_UBIT     0x02
-#define EUI64_TO_IFID(in6)     do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (0)
-#define EUI64_GROUP(in6)       ((in6)->s6_addr[8] & EUI64_GBIT)
-#define EUI64_INDIVIDUAL(in6)  (!EUI64_GROUP(in6))
-#define EUI64_LOCAL(in6)       ((in6)->s6_addr[8] & EUI64_UBIT)
-#define EUI64_UNIVERSAL(in6)   (!EUI64_LOCAL(in6))
-
-#define IFID_LOCAL(in6)                (!EUI64_LOCAL(in6))
-#define IFID_UNIVERSAL(in6)    (!EUI64_UNIVERSAL(in6))
-
 /*
  * Generate a last-resort interface identifier, when the machine has no
  * IEEE802/EUI64 address sources.
  * The goal here is to get an interface identifier that is
  * (1) random enough and (2) does not change across reboot.
- * We currently use MD5(hostname) for it.
+ * We currently use SHA1(hostname) for it.
  *
  * in6 - upper 64bits are preserved
  */
@@ -142,8 +136,8 @@ get_rand_ifid(
        __unused struct ifnet *ifp,
        struct in6_addr *in6)   /* upper 64bits are preserved */
 {
-       MD5_CTX ctxt;
-       u_int8_t digest[16];
+       SHA1_CTX ctxt;
+       u_int8_t digest[SHA1_RESULTLEN];
        int hostnlen    = strlen(hostname);
 
 #if 0
@@ -154,19 +148,19 @@ get_rand_ifid(
 
        /* generate 8 bytes of pseudo-random value. */
        bzero(&ctxt, sizeof(ctxt));
-       MD5Init(&ctxt);
-       MD5Update(&ctxt, hostname, hostnlen);
-       MD5Final(digest, &ctxt);
+       SHA1Init(&ctxt);
+       SHA1Update(&ctxt, hostname, hostnlen);
+       SHA1Final(digest, &ctxt);
 
        /* assumes sizeof(digest) > sizeof(ifid) */
        bcopy(digest, &in6->s6_addr[8], 8);
 
        /* make sure to set "u" bit to local, and "g" bit to individual. */
-       in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */
-       in6->s6_addr[8] |= EUI64_UBIT;  /* u bit to "local" */
+       in6->s6_addr[8] &= ~ND6_EUI64_GBIT;     /* g bit to "individual" */
+       in6->s6_addr[8] |= ND6_EUI64_UBIT;      /* u bit to "local" */
 
        /* convert EUI64 into IPv6 interface identifier */
-       EUI64_TO_IFID(in6);
+       ND6_EUI64_TO_IFID(in6);
 
        return 0;
 }
@@ -177,8 +171,8 @@ generate_tmp_ifid(
        const u_int8_t *seed1,
        u_int8_t *ret)
 {
-       MD5_CTX ctxt;
-       u_int8_t seed[16], digest[16], nullbuf[8];
+       SHA1_CTX ctxt;
+       u_int8_t seed[16], nullbuf[8], digest[SHA1_RESULTLEN];
        u_int32_t val32;
        struct timeval tv;
 
@@ -211,17 +205,17 @@ generate_tmp_ifid(
 
        /* generate 16 bytes of pseudo-random value. */
        bzero(&ctxt, sizeof(ctxt));
-       MD5Init(&ctxt);
-       MD5Update(&ctxt, seed, sizeof(seed));
-       MD5Final(digest, &ctxt);
+       SHA1Init(&ctxt);
+       SHA1Update(&ctxt, seed, sizeof(seed));
+       SHA1Final(digest, &ctxt);
 
        /*
         * RFC 4941 3.2.1. (3)
-        * Take the left-most 64-bits of the MD5 digest and set bit 6 (the
+        * Take the left-most 64-bits of the SHA1 digest and set bit 6 (the
         * left-most bit is numbered 0) to zero.
         */
        bcopy(digest, ret, 8);
-       ret[0] &= ~EUI64_UBIT;
+       ret[0] &= ~ND6_EUI64_UBIT;
 
        /*
         * XXX: we'd like to ensure that the generated value is not zero
@@ -230,7 +224,7 @@ generate_tmp_ifid(
         */
        if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) {
                nd6log((LOG_INFO,
-                   "generate_tmp_ifid: computed MD5 value is zero.\n"));
+                   "generate_tmp_ifid: computed SHA1 value is zero.\n"));
 
                microtime(&tv);
                val32 = random() ^ tv.tv_usec;
@@ -239,7 +233,7 @@ generate_tmp_ifid(
 
        /*
         * RFC 4941 3.2.1. (4)
-        * Take the rightmost 64-bits of the MD5 digest and save them in
+        * Take the next 64-bits of the SHA1 digest and save them in
         * stable storage as the history value to be used in the next
         * iteration of the algorithm.
         */
@@ -280,7 +274,7 @@ in6_get_hw_ifid(
        /* Why doesn't this code use ifnet_addrs? */
        ifnet_lock_shared(ifp);
        ifa = ifp->if_lladdr;
-       sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+       sdl = (struct sockaddr_dl *)(void *)ifa->ifa_addr;
        if (sdl->sdl_alen == 0) {
                ifnet_lock_done(ifp);
                return (-1);
@@ -351,8 +345,8 @@ in6_get_hw_ifid(
                /*
                 * due to insufficient bitwidth, we mark it local.
                 */
-               in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */
-               in6->s6_addr[8] |= EUI64_UBIT;  /* u bit to "local" */
+               in6->s6_addr[8] &= ~ND6_EUI64_GBIT;     /* g bit to "individual" */
+               in6->s6_addr[8] |= ND6_EUI64_UBIT;      /* u bit to "local" */
                break;
 
        case IFT_GIF:
@@ -375,17 +369,17 @@ in6_get_hw_ifid(
        }
 
        /* sanity check: g bit must not indicate "group" */
-       if (EUI64_GROUP(in6))
+       if (ND6_EUI64_GROUP(in6))
                goto done;
 
        /* convert EUI64 into IPv6 interface identifier */
-       EUI64_TO_IFID(in6);
+       ND6_EUI64_TO_IFID(in6);
 
        /*
         * sanity check: ifid must not be all zero, avoid conflict with
         * subnet router anycast
         */
-       if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 &&
+       if ((in6->s6_addr[8] & ~(ND6_EUI64_GBIT | ND6_EUI64_UBIT)) == 0x00 &&
            bcmp(&in6->s6_addr[9], allzero, 7) == 0) {
                goto done;
        }
@@ -443,7 +437,7 @@ get_ifid(
                 * to borrow ifid from other interface, ifid needs to be
                 * globally unique
                 */
-               if (IFID_UNIVERSAL(in6)) {
+               if (ND6_IFID_UNIVERSAL(in6)) {
                        nd6log((LOG_DEBUG,
                            "%s: borrow interface identifier from %s\n",
                            if_name(ifp0), if_name(ifp)));
@@ -685,8 +679,8 @@ in6_nigroup(
 {
        const char *p;
        u_char *q;
-       MD5_CTX ctxt;
-       u_int8_t digest[16];
+       SHA1_CTX ctxt;
+       u_int8_t digest[SHA1_RESULTLEN];
        char l;
        char n[64];     /* a single label must not exceed 63 chars */
 
@@ -708,10 +702,10 @@ in6_nigroup(
 
        /* generate 8 bytes of pseudo-random value. */
        bzero(&ctxt, sizeof(ctxt));
-       MD5Init(&ctxt);
-       MD5Update(&ctxt, &l, sizeof(l));
-       MD5Update(&ctxt, n, l);
-       MD5Final(digest, &ctxt);
+       SHA1Init(&ctxt);
+       SHA1Update(&ctxt, &l, sizeof(l));
+       SHA1Update(&ctxt, n, l);
+       SHA1Final(digest, &ctxt);
 
        bzero(in6, sizeof(*in6));
        in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL;
@@ -756,6 +750,7 @@ in6_ifattach(
        struct in6_ifaddr *ia;
        struct in6_addr in6;
        int error;
+       void *buf;
 
        lck_rw_lock_exclusive(&in6_ifs_rwlock);
        /*
@@ -776,29 +771,30 @@ in6_ifattach(
                caddr_t q;
         
                n = if_indexlim * sizeof(struct in6_ifstat *);
-               q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK);
+               q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK|M_ZERO);
                if (q == NULL) {
                        lck_rw_done(&in6_ifs_rwlock);
                        return ENOBUFS;
                }
-               bzero(q, n);
                if (in6_ifstat) {
                        bcopy((caddr_t)in6_ifstat, q,
                                in6_ifstatmax * sizeof(struct in6_ifstat *));
                        FREE((caddr_t)in6_ifstat, M_IFADDR);
                }
-               in6_ifstat = (struct in6_ifstat **)q;
+               in6_ifstat = (struct in6_ifstat **)(void *)q;
                in6_ifstatmax = if_indexlim;
        }
     
        if (in6_ifstat[ifp->if_index] == NULL) {
-               in6_ifstat[ifp->if_index] = (struct in6_ifstat *)
-                       _MALLOC(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK);
-               if (in6_ifstat[ifp->if_index] == NULL) {
+               buf = _MALLOC(IN6_IFSTAT_ALLOC_SIZE, M_IFADDR, M_WAITOK);
+               if (buf == NULL) {
                        lck_rw_done(&in6_ifs_rwlock);
                        return ENOBUFS;
                }
-               bzero(in6_ifstat[ifp->if_index], sizeof(struct in6_ifstat));
+               bzero(buf, IN6_IFSTAT_ALLOC_SIZE);
+               in6_ifstat[ifp->if_index] = (struct in6_ifstat *)
+                   P2ROUNDUP((intptr_t)buf + sizeof(void *), sizeof(uint64_t));
+               VERIFY(IS_P2ALIGNED(in6_ifstat[ifp->if_index], sizeof(uint64_t)));
        }
        lck_rw_done(&in6_ifs_rwlock);
 
@@ -808,29 +804,30 @@ in6_ifattach(
                caddr_t q;
         
                n = if_indexlim * sizeof(struct icmp6_ifstat *);
-               q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK);
+               q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK|M_ZERO);
                if (q == NULL) {
                        lck_rw_done(&icmp6_ifs_rwlock);
                        return ENOBUFS;
                }
-               bzero(q, n);
                if (icmp6_ifstat) {
                        bcopy((caddr_t)icmp6_ifstat, q,
                                icmp6_ifstatmax * sizeof(struct icmp6_ifstat *));
                        FREE((caddr_t)icmp6_ifstat, M_IFADDR);
                }
-               icmp6_ifstat = (struct icmp6_ifstat **)q;
+               icmp6_ifstat = (struct icmp6_ifstat **)(void *)q;
                icmp6_ifstatmax = if_indexlim;
        }
 
        if (icmp6_ifstat[ifp->if_index] == NULL) {
-               icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *)
-                       _MALLOC(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK);
-               if (icmp6_ifstat[ifp->if_index] == NULL) {
+               buf = _MALLOC(ICMP6_IFSTAT_ALLOC_SIZE, M_IFADDR, M_WAITOK);
+               if (buf == NULL) {
                        lck_rw_done(&icmp6_ifs_rwlock);
                        return ENOBUFS;
                }
-               bzero(icmp6_ifstat[ifp->if_index], sizeof(struct icmp6_ifstat));
+               bzero(buf, ICMP6_IFSTAT_ALLOC_SIZE);
+               icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *)
+                   P2ROUNDUP((intptr_t)buf + sizeof(void *), sizeof(uint64_t));
+               VERIFY(IS_P2ALIGNED(icmp6_ifstat[ifp->if_index], sizeof(uint64_t)));
        }
        lck_rw_done(&icmp6_ifs_rwlock);
 
@@ -1113,7 +1110,9 @@ in6_get_tmpifid(
        struct nd_ifinfo *ndi;
 
        lck_rw_lock_shared(nd_if_rwlock);
-       ndi = &nd_ifinfo[ifp->if_index];
+       ndi = ND_IFINFO(ifp);
+       VERIFY(ndi != NULL && ndi->initialized);
+       lck_mtx_lock(&ndi->lock);
        bzero(nullbuf, sizeof(nullbuf));
        if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) {
                /* we've never created a random ID.  Create a new one. */
@@ -1128,13 +1127,14 @@ in6_get_tmpifid(
                                        ndi->randomid);
        }
        bcopy(ndi->randomid, retbuf, 8);
+       lck_mtx_unlock(&ndi->lock);
        lck_rw_done(nd_if_rwlock);
 }
 
 void
-in6_tmpaddrtimer(
-       __unused void *ignored_arg)
+in6_tmpaddrtimer(void *arg)
 {
+#pragma unused(arg)
        int i;
        struct nd_ifinfo *ndi;
        u_int8_t nullbuf[8];
@@ -1143,23 +1143,25 @@ in6_tmpaddrtimer(
                      (ip6_temp_preferred_lifetime - ip6_desync_factor -
                       ip6_temp_regen_advance) * hz);
 
-       if (ip6_use_tempaddr) {
-               lck_rw_lock_shared(nd_if_rwlock);
-               bzero(nullbuf, sizeof(nullbuf));
-               for (i = 1; i < nd_ifinfo_indexlim + 1; i++) {
-                       ndi = &nd_ifinfo[i];
-                       if ((ndi->flags | ND6_IFF_PERFORMNUD) != ND6_IFF_PERFORMNUD)
-                               continue;
-                       if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
-                               /*
-                                * We've been generating a random ID on this interface.
-                                * Create a new one.
-                                */
-                               (void)generate_tmp_ifid(ndi->randomseed0,
-                                                       ndi->randomseed1,
-                                                       ndi->randomid);
-                       }
+       lck_rw_lock_shared(nd_if_rwlock);
+       bzero(nullbuf, sizeof(nullbuf));
+       for (i = 1; i < if_index + 1; i++) {
+               if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
+                       break;
+               ndi = &nd_ifinfo[i];
+               if (!ndi->initialized)
+                       continue;
+               lck_mtx_lock(&ndi->lock);
+               if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
+                       /*
+                        * We've been generating a random ID on this interface.
+                        * Create a new one.
+                        */
+                       (void)generate_tmp_ifid(ndi->randomseed0,
+                                               ndi->randomseed1,
+                                               ndi->randomid);
                }
-               lck_rw_done(nd_if_rwlock);
+               lck_mtx_unlock(&ndi->lock);
        }
+       lck_rw_done(nd_if_rwlock);
 }
index 05670d211420ac2d139ed6d7af0317164801989a..715403cf82fd7100389d0ced5c98c3cc9af2ec0c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -351,7 +351,7 @@ im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
 
        IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo));
 
-       gsin6 = (const struct sockaddr_in6 *)group;
+       gsin6 = (struct sockaddr_in6 *)(uintptr_t)(size_t)group;
 
        /* The im6o_membership array may be lazy allocated. */
        if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0)
@@ -407,7 +407,7 @@ im6o_match_source(const struct ip6_moptions *imo, const size_t gidx,
                return (NULL);
        imf = &imo->im6o_mfilters[gidx];
 
-       psa = (const sockunion_t *)src;
+       psa = (sockunion_t *)(uintptr_t)(size_t)src;
        find.im6s_addr = psa->sin6.sin6_addr;
        in6_clearscope(&find.im6s_addr);                /* XXX */
        ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
@@ -1662,7 +1662,11 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 
        if (ifp == NULL)
                return (EADDRNOTAVAIL);
-               
+
+       if ((size_t) msfr.msfr_nsrcs >
+           SIZE_MAX / sizeof(struct sockaddr_storage))
+               msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage);
+
        if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
                msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
 
@@ -1703,12 +1707,13 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
 
        if (tmp_ptr != USER_ADDR_NULL && msfr.msfr_nsrcs > 0) {
-               tss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+               tss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*tss),
                    M_TEMP, M_WAITOK | M_ZERO);
                if (tss == NULL) {
                        IM6O_UNLOCK(imo);
                        return (ENOBUFS);
                }
+               bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss));
        }
 
        /*
@@ -1738,8 +1743,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
        IM6O_UNLOCK(imo);
 
        if (tss != NULL) {
-               error = copyout(tss, tmp_ptr,
-                   sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+               error = copyout(tss, tmp_ptr, ncsrcs * sizeof(*tss));
                FREE(tss, M_TEMP);
                if (error)
                        return (error);
@@ -1870,7 +1874,7 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p,
                return NULL;
 
        if (in6p != NULL && (in6p->inp_flags & INP_BOUND_IF))
-               ifscope = in6p->inp_boundif;
+               ifscope = in6p->inp_boundifp->if_index;
 
        ifp = NULL;
        memset(&ro6, 0, sizeof(struct route_in6));
@@ -1911,7 +1915,7 @@ in6p_lookup_v4addr(struct ipv6_mreq *mreq, struct ip_mreq *v4mreq)
        ifa = ifa_ifpgetprimary(ifp, AF_INET);
        if (ifa == NULL)
                return (EADDRNOTAVAIL);
-       sin = (struct sockaddr_in *)ifa->ifa_addr;
+       sin = (struct sockaddr_in *)(uintptr_t)(size_t)ifa->ifa_addr;
        v4mreq->imr_interface.s_addr = sin->sin_addr.s_addr;
        IFA_REMREF(ifa);
 
@@ -2083,7 +2087,8 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
         * a VERIFY() in in6_mc_join().
         */
        if ((IN6_IS_ADDR_MC_LINKLOCAL(&gsa->sin6.sin6_addr) ||
-           IN6_IS_ADDR_MC_INTFACELOCAL(&gsa->sin6.sin6_addr)) && scopeid == 0)
+           IN6_IS_ADDR_MC_INTFACELOCAL(&gsa->sin6.sin6_addr)) &&
+           (scopeid == 0 || gsa->sin6.sin6_addr.s6_addr16[1] == 0))
                return (EINVAL);
 
        imo = in6p_findmoptions(inp);
@@ -2626,6 +2631,10 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
                memcpy(&msfr, &msfr32, sizeof(msfr));
        }
 
+       if ((size_t) msfr.msfr_nsrcs >
+           SIZE_MAX / sizeof(struct sockaddr_storage))
+               msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage);
+
        if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
                return (ENOBUFS);
 
@@ -2697,7 +2706,7 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
 
                MLD_PRINTF(("%s: loading %lu source list entries\n",
                    __func__, (unsigned long)msfr.msfr_nsrcs));
-               kss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+               kss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*kss),
                    M_TEMP, M_WAITOK);
                if (kss == NULL) {
                        error = ENOMEM;
@@ -2705,7 +2714,7 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
                }
 
                error = copyin(tmp_ptr, kss,
-                   sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+                   (size_t) msfr.msfr_nsrcs * sizeof(*kss));
                if (error) {
                        FREE(kss, M_TEMP);
                        goto out_imo_locked;
index 2ea4d7a5d5d656b2c98c1bb8c664f505e80342c3..4c59255289d6189ea536c12a71ab03f28a9ac112 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 #include <net/if_types.h>
+#include <net/if_var.h>
 
 #include <kern/kern_types.h>
 #include <kern/zalloc.h>
@@ -186,8 +187,10 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
        struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
        u_short lport = 0;
        int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+#if !CONFIG_EMBEDDED
        int error;
        kauth_cred_t cred;
+#endif
 
        if (!in6_ifaddrs) /* XXX broken! */
                return (EADDRNOTAVAIL);
@@ -198,9 +201,9 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
        socket_unlock(so, 0); /* keep reference */
        lck_rw_lock_exclusive(pcbinfo->mtx);
        if (nam) {
-               unsigned int outif = 0;
+               struct ifnet *outif = NULL;
 
-               sin6 = (struct sockaddr_in6 *)nam;
+               sin6 = (struct sockaddr_in6 *)(void *)nam;
                if (nam->sa_len != sizeof(*sin6)) {
                        lck_rw_done(pcbinfo->mtx);
                        socket_lock(so, 0);
@@ -262,7 +265,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                        socket_lock(so, 0);
                                        return(EADDRNOTAVAIL);
                                }
-                               outif = ifa->ifa_ifp->if_index;
+                               outif = ifa->ifa_ifp;
                                IFA_UNLOCK(ifa);
                                IFA_REMREF(ifa);
                        }
@@ -271,6 +274,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                        struct inpcb *t;
 
                        /* GROSS */
+#if !CONFIG_EMBEDDED
                        if (ntohs(lport) < IPV6PORT_RESERVED) {
                                cred = kauth_cred_proc_ref(p);
                                error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
@@ -281,8 +285,9 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                        return(EACCES);
                                }
                        }
+#endif
 
-                       if (so->so_uid &&
+                       if (kauth_cred_getuid(so->so_cred) &&
                            !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
                                t = in6_pcblookup_local_and_cleanup(pcbinfo,
                                    &sin6->sin6_addr, lport,
@@ -292,7 +297,8 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
                                     (t->inp_socket->so_options &
                                      SO_REUSEPORT) == 0) &&
-                                    (so->so_uid != t->inp_socket->so_uid) &&
+                                    (kauth_cred_getuid(so->so_cred) !=
+                                        kauth_cred_getuid(t->inp_socket->so_cred)) &&
                                     ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
                                        lck_rw_done(pcbinfo->mtx);
                                        socket_lock(so, 0);
@@ -307,8 +313,8 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                                sin.sin_addr, lport,
                                                INPLOOKUP_WILDCARD);
                                        if (t && (t->inp_socket->so_options & SO_REUSEPORT) == 0 &&
-                                           (so->so_uid !=
-                                            t->inp_socket->so_uid) &&
+                                           (kauth_cred_getuid(so->so_cred) !=
+                                               kauth_cred_getuid(t->inp_socket->so_cred)) &&
                                            (ntohl(t->inp_laddr.s_addr) !=
                                             INADDR_ANY ||
                                             INP_SOCKAF(so) ==
@@ -348,7 +354,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                        }
                }
                inp->in6p_laddr = sin6->sin6_addr;
-               inp->in6p_last_outif = outif;
+               inp->in6p_last_outifp = outif;
        }
        socket_lock(so, 0);
        if (lport == 0) {
@@ -363,7 +369,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                if (in_pcbinshash(inp, 1) != 0) {
                        inp->in6p_laddr = in6addr_any;
                        inp->inp_lport = 0;
-                       inp->in6p_last_outif = 0;
+                       inp->in6p_last_outifp = NULL;
                        lck_rw_done(pcbinfo->mtx);
                        return (EAGAIN);
                }
@@ -374,27 +380,32 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 }
 
 /*
- *   Transform old in6_pcbconnect() into an inner subroutine for new
- *   in6_pcbconnect(): Do some validity-checking on the remote
- *   address (in mbuf 'nam') and then determine local host address
- *   (i.e., which interface) to use to access that remote host.
+ * Transform old in6_pcbconnect() into an inner subroutine for new
+ * in6_pcbconnect(): Do some validity-checking on the remote
+ * address (in mbuf 'nam') and then determine local host address
+ * (i.e., which interface) to use to access that remote host.
  *
- *   This preserves definition of in6_pcbconnect(), while supporting a
- *   slightly different version for T/TCP.  (This is more than
- *   a bit of a kludge, but cleaning up the internal interfaces would
- *   have forced minor changes in every protocol).
+ * This preserves definition of in6_pcbconnect(), while supporting a
+ * slightly different version for T/TCP.  (This is more than
+ * a bit of a kludge, but cleaning up the internal interfaces would
+ * have forced minor changes in every protocol).
+ *
+ * This routine might return an ifp with a reference held if the caller
+ * provides a non-NULL outif, even in the error case.  The caller is
+ * responsible for releasing its reference.
  */
-
 int
 in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
-    struct in6_addr *plocal_addr6, unsigned int *poutif)
+    struct in6_addr *plocal_addr6, struct ifnet **outif)
 {
-       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
        struct in6_addr *addr6 = NULL;
        struct in6_addr src_storage;
        int error = 0;
        unsigned int ifscope;
 
+       if (outif != NULL)
+               *outif = NULL;
        if (nam->sa_len != sizeof (*sin6))
                return (EINVAL);
        if (sin6->sin6_family != AF_INET6)
@@ -404,7 +415,7 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
 
        /* KAME hack: embed scopeid */
        if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL, NULL) != 0)
-               return EINVAL;
+               return (EINVAL);
 
        if (in6_ifaddrs) {
                /*
@@ -416,36 +427,54 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
        }
 
        ifscope = (inp->inp_flags & INP_BOUND_IF) ?
-          inp->inp_boundif : IFSCOPE_NONE;
+          inp->inp_boundifp->if_index : IFSCOPE_NONE;
 
        /*
         * XXX: in6_selectsrc might replace the bound local address
         * with the address specified by setsockopt(IPV6_PKTINFO).
         * Is it the intended behavior?
+        *
+        * in6_selectsrc() might return outif with its reference held
+        * even in the error case; caller always needs to release it
+        * if non-NULL.
         */
        addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp,
-           &inp->in6p_route, NULL, &src_storage, ifscope, &error);
-       if (addr6 == 0) {
-               if (error == 0)
-                       error = EADDRNOTAVAIL;
-               return(error);
+           &inp->in6p_route, outif, &src_storage, ifscope, &error);
+
+       if (outif != NULL) {
+               struct rtentry *rt = inp->in6p_route.ro_rt;
+               /*
+                * If in6_selectsrc() returns a route, it should be one
+                * which points to the same ifp as outif.  Just in case
+                * it isn't, use the one from the route for consistency.
+                * Otherwise if there is no route, leave outif alone as
+                * it could still be useful to the caller.
+                */
+               if (rt != NULL && rt->rt_ifp != *outif) {
+                       ifnet_reference(rt->rt_ifp);    /* for caller */
+                       if (*outif != NULL)
+                               ifnet_release(*outif);
+                       *outif = rt->rt_ifp;
+               }
        }
 
-       if (poutif != NULL) {
-               struct rtentry *rt;
-               if ((rt = inp->in6p_route.ro_rt) != NULL)
-                       *poutif = rt->rt_ifp->if_index;
-               else
-                       *poutif = 0;
+       if (addr6 == NULL) {
+               if (outif != NULL && (*outif) != NULL &&
+                       (inp->inp_flags & INP_NO_IFT_CELLULAR) &&
+                       (*outif)->if_type == IFT_CELLULAR)
+                       soevent(inp->inp_socket,
+                           (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED));
+               if (error == 0)
+                       error = EADDRNOTAVAIL;
+               return (error);
        }
 
        *plocal_addr6 = *addr6;
        /*
         * Don't do pcblookup call here; return interface in
-        * plocal_addr6
-        * and exit to caller, that will do the lookup.
+        * plocal_addr6 and exit to caller, that will do the lookup.
         */
-       return(0);
+       return (0);
 }
 
 /*
@@ -462,17 +491,27 @@ in6_pcbconnect(
        struct proc *p)
 {
        struct in6_addr addr6;
-       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
        struct inpcb *pcb;
-       int error;
-       unsigned int outif = 0;
+       int error = 0;
+       struct ifnet *outif = NULL;
 
        /*
         * Call inner routine, to assign local interface address.
         * in6_pcbladdr() may automatically fill in sin6_scope_id.
+        *
+        * in6_pcbladdr() might return an ifp with its reference held
+        * even in the error case, so make sure that it's released
+        * whenever it's non-NULL.
         */
-       if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0)
-               return(error);
+       if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0) {
+               if ((inp->inp_flags & INP_NO_IFT_CELLULAR) &&
+                       outif != NULL &&
+                       outif->if_type == IFT_CELLULAR)
+                       soevent(inp->inp_socket, 
+                           (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED));
+               goto done;
+       }
        socket_unlock(inp->inp_socket, 0);
        pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
                               sin6->sin6_port,
@@ -482,16 +521,17 @@ in6_pcbconnect(
        socket_lock(inp->inp_socket, 0);
        if (pcb != NULL) {
                in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
-               return (EADDRINUSE);
+               error = EADDRINUSE;
+               goto done;
        }
        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
                if (inp->inp_lport == 0) {
                        error = in6_pcbbind(inp, (struct sockaddr *)0, p);
                        if (error)
-                               return (error);
+                               goto done;
                }
                inp->in6p_laddr = addr6;
-               inp->in6p_last_outif = outif;
+               inp->in6p_last_outifp = outif;  /* no reference needed */
        }
        if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
                /*lock inversion issue, mostly with udp multicast packets */
@@ -509,7 +549,12 @@ in6_pcbconnect(
 
        in_pcbrehash(inp);
        lck_rw_done(inp->inp_pcbinfo->mtx);
-       return (0);
+
+done:
+       if (outif != NULL)
+               ifnet_release(outif);
+
+       return (error);
 }
 
 void
@@ -559,7 +604,7 @@ in6_pcbdetach(
                inp->inp_gencnt = ++ipi->ipi_gencnt;
                if (inp->in6p_options)
                        m_freem(inp->in6p_options);
-               ip6_freepcbopts(inp->in6p_outputopts);
+               ip6_freepcbopts(inp->in6p_outputopts);
                if (inp->in6p_route.ro_rt) {
                        rtfree(inp->in6p_route.ro_rt);
                        inp->in6p_route.ro_rt = NULL;
@@ -749,14 +794,15 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify)
        if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6)
                return;
 
-       sa6_dst = (struct sockaddr_in6 *)dst;
+       sa6_dst = (struct sockaddr_in6 *)(void *)dst;
        if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
                return;
 
        /*
         * note that src can be NULL when we get notify by local fragmentation.
         */
-       sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
+       sa6_src = (src == NULL) ?
+           sa6_any : *(struct sockaddr_in6 *)(uintptr_t)(size_t)src;
        flowinfo = sa6_src.sin6_flowinfo;
 
        /*
@@ -795,8 +841,8 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify)
                if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
                    (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
                     IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) {
-                       ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
-                                       (u_int32_t *)cmdarg);
+                       ip6_notify_pmtu(inp, (struct sockaddr_in6 *)(void *)dst,
+                           (u_int32_t *)cmdarg);
                }
 
                /*
@@ -1000,20 +1046,13 @@ in6_pcblookup_hash_exists(
        int wildcard,
        uid_t *uid,
        gid_t *gid,
-       __unused struct ifnet *ifp)
+       struct ifnet *ifp)
 {
        struct inpcbhead *head;
        struct inpcb *inp;
        u_short fport = fport_arg, lport = lport_arg;
-       int faith;
        int found;
 
-#if defined(NFAITH) && NFAITH > 0
-       faith = faithprefix(laddr);
-#else
-       faith = 0;
-#endif
-
        *uid = UID_MAX;
        *gid = GID_MAX;
 
@@ -1028,6 +1067,12 @@ in6_pcblookup_hash_exists(
        LIST_FOREACH(inp, head, inp_hash) {
                if ((inp->inp_vflag & INP_IPV6) == 0)
                        continue;
+
+               if (ip6_restrictrecvif && ifp != NULL &&
+                   (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                   !(inp->in6p_flags & IN6P_RECV_ANYIF))
+                       continue;
+
                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
                    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
                    inp->inp_fport == fport &&
@@ -1036,8 +1081,10 @@ in6_pcblookup_hash_exists(
                                /*
                                 * Found. Check if pcb is still valid
                                 */
-                               *uid = inp->inp_socket->so_uid;
-                               *gid = inp->inp_socket->so_gid;
+                               *uid = kauth_cred_getuid(
+                                   inp->inp_socket->so_cred);
+                               *gid = kauth_cred_getgid(
+                                   inp->inp_socket->so_cred);
                        }
                        lck_rw_done(pcbinfo->mtx);
                        return (found);
@@ -1051,15 +1098,21 @@ in6_pcblookup_hash_exists(
                LIST_FOREACH(inp, head, inp_hash) {
                        if ((inp->inp_vflag & INP_IPV6) == 0)
                                continue;
+
+                       if (ip6_restrictrecvif && ifp != NULL &&
+                           (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                           !(inp->in6p_flags & IN6P_RECV_ANYIF))
+                               continue;
+
                        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
                            inp->inp_lport == lport) {
-                               if (faith && (inp->inp_flags & INP_FAITH) == 0)
-                                       continue;
                                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
                                                       laddr)) {
                                        if ((found = (inp->inp_socket != NULL))) {
-                                               *uid = inp->inp_socket->so_uid;
-                                               *gid = inp->inp_socket->so_gid;
+                                               *uid = kauth_cred_getuid(
+                                                   inp->inp_socket->so_cred);
+                                               *gid = kauth_cred_getgid(
+                                                   inp->inp_socket->so_cred);
                                        }
                                        lck_rw_done(pcbinfo->mtx);
                                        return (found);
@@ -1070,8 +1123,10 @@ in6_pcblookup_hash_exists(
                }
                if (local_wild) {
                        if ((found = (local_wild->inp_socket != NULL))) {
-                               *uid = local_wild->inp_socket->so_uid;
-                               *gid = local_wild->inp_socket->so_gid;
+                               *uid = kauth_cred_getuid(
+                                   local_wild->inp_socket->so_cred);
+                               *gid = kauth_cred_getgid(
+                                   local_wild->inp_socket->so_cred);
                        }
                        lck_rw_done(pcbinfo->mtx);
                        return (found);
@@ -1101,13 +1156,6 @@ in6_pcblookup_hash(
        struct inpcbhead *head;
        struct inpcb *inp;
        u_short fport = fport_arg, lport = lport_arg;
-       int faith;
-
-#if defined(NFAITH) && NFAITH > 0
-       faith = faithprefix(laddr);
-#else
-       faith = 0;
-#endif
 
        lck_rw_lock_shared(pcbinfo->mtx);
 
@@ -1120,20 +1168,26 @@ in6_pcblookup_hash(
        LIST_FOREACH(inp, head, inp_hash) {
                if ((inp->inp_vflag & INP_IPV6) == 0)
                        continue;
+
+               if (ip6_restrictrecvif && ifp != NULL &&
+                   (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                   !(inp->in6p_flags & IN6P_RECV_ANYIF))
+                       continue;
+
                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
                    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
                    inp->inp_fport == fport &&
                    inp->inp_lport == lport) {
                        /*
-                       * Found. Check if pcb is still valid
-                       */
+                       * Found. Check if pcb is still valid
+                       */
                        if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
                                lck_rw_done(pcbinfo->mtx);
                                return (inp);
                        }
                        else {  /* it's there but dead, say it isn't found */
-                               lck_rw_done(pcbinfo->mtx);      
-                               return(NULL);
+                               lck_rw_done(pcbinfo->mtx);
+                               return (NULL);
                        }
                }
        }
@@ -1145,10 +1199,14 @@ in6_pcblookup_hash(
                LIST_FOREACH(inp, head, inp_hash) {
                        if ((inp->inp_vflag & INP_IPV6) == 0)
                                continue;
+
+                       if (ip6_restrictrecvif && ifp != NULL &&
+                           (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                           !(inp->in6p_flags & IN6P_RECV_ANYIF))
+                               continue;
+
                        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
                            inp->inp_lport == lport) {
-                               if (faith && (inp->inp_flags & INP_FAITH) == 0)
-                                       continue;
                                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
                                                       laddr)) {
                                        if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
@@ -1156,8 +1214,8 @@ in6_pcblookup_hash(
                                                return (inp);
                                        }
                                        else {  /* it's there but dead, say it isn't found */
-                                               lck_rw_done(pcbinfo->mtx);      
-                                               return(NULL);
+                                               lck_rw_done(pcbinfo->mtx);
+                                               return (NULL);
                                        }
                                }
                                else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
@@ -1199,3 +1257,32 @@ init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
 
        return;
 }
+
+void
+in6p_route_copyout(struct inpcb *inp, struct route_in6 *dst)
+{
+       struct route_in6 *src = &inp->in6p_route;
+
+       lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
+       /* Minor sanity check */
+       if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6)
+               panic("%s: wrong or corrupted route: %p", __func__, src);
+       
+       route_copyout((struct route *)dst, (struct route *)src, sizeof(*dst));
+}
+
+void
+in6p_route_copyin(struct inpcb *inp, struct route_in6 *src)
+{
+       struct route_in6 *dst = &inp->in6p_route;
+
+       lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
+       /* Minor sanity check */
+       if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6)
+               panic("%s: wrong or corrupted route: %p", __func__, src);
+
+       route_copyin((struct route *)src, (struct route *)dst, sizeof(*src));
+}
+
index d83836bbcab73e1e18b2ba2ffecb6b02ae2e2fdd..1ad240007228cdb5e45a9e22dd6341f88c18fde9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/appleapiopts.h>
 
 #ifdef KERNEL_PRIVATE
-#define        satosin6(sa)    ((struct sockaddr_in6 *)(sa))
-#define        sin6tosa(sin6)  ((struct sockaddr *)(sin6))
-#define        ifatoia6(ifa)   ((struct in6_ifaddr *)(ifa))
-
 extern void in6_losing(struct inpcb *);
 extern int in6_pcbbind(struct inpcb *, struct sockaddr *, struct proc *);
 extern int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *);
 extern void in6_pcbdetach(struct inpcb *);
 extern void in6_pcbdisconnect(struct inpcb *);
 extern int in6_pcbladdr(struct inpcb *, struct sockaddr *,
-    struct in6_addr *, unsigned int *);
+    struct in6_addr *, struct ifnet **);
 extern struct inpcb *in6_pcblookup_local(struct inpcbinfo *, struct in6_addr *,
     u_int, int);
 extern struct inpcb *in6_pcblookup_hash(struct inpcbinfo *, struct in6_addr *,
@@ -126,6 +122,8 @@ extern int in6_selecthlim(struct in6pcb *, struct ifnet *);
 extern int in6_pcbsetport(struct in6_addr *, struct inpcb *,
     struct proc *, int);
 extern void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m);
+extern void in6p_route_copyout(struct inpcb *, struct route_in6 *);
+extern void in6p_route_copyin(struct inpcb *, struct route_in6 *);
 #endif /* KERNEL_PRIVATE */
 
 #endif /* !_NETINET6_IN6_PCB_H_ */
index c0228feebf40de7a41a110fab5416088435d3f74..cb44f9b41f729680d28039c3e6595de2bcee1a4b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -395,7 +395,7 @@ int ip6_forwarding = IPV6FORWARDING;        /* act as router? */
 int    ip6_sendredirects = IPV6_SENDREDIRECTS;
 int    ip6_defhlim = IPV6_DEFHLIM;
 int    ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
-int    ip6_accept_rtadv = 0;   /* "IPV6FORWARDING ? 0 : 1" is dangerous */
+int    ip6_accept_rtadv = 1;   /* deprecated */
 int    ip6_maxfragpackets;     /* initialized in frag6.c:frag6_init() */
 int    ip6_maxfrags;
 int    ip6_log_interval = 5;
@@ -417,7 +417,7 @@ int ip6_maxdynroutes = 1024;        /* Max # of routes created via redirect */
 int    ip6_only_allow_rfc4193_prefix = 0;      /* Only allow RFC4193 style Unique Local IPv6 Unicast prefixes */
 
 u_int32_t ip6_id = 0UL;
-int    ip6_keepfaith = 0;
+static int ip6_keepfaith = 0;
 time_t ip6_log_time = (time_t)0L;
 int    nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (as in RFC 4861) */
 
@@ -521,10 +521,10 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS,
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS,
         maxfrags, CTLFLAG_RW | CTLFLAG_LOCKED,           &ip6_maxfrags,  0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV,
-       accept_rtadv, CTLFLAG_RW | CTLFLAG_LOCKED,
+       accept_rtadv, CTLFLAG_RD | CTLFLAG_LOCKED,
        &ip6_accept_rtadv,      0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH,
-       keepfaith, CTLFLAG_RW | CTLFLAG_LOCKED,         &ip6_keepfaith, 0, "");
+       keepfaith, CTLFLAG_RD | CTLFLAG_LOCKED,         &ip6_keepfaith, 0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL,
        log_interval, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_log_interval,   0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT,
@@ -609,3 +609,5 @@ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG,
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
        nd6_onlink_ns_rfc4861, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_onlink_ns_rfc4861, 0,
        "Accept 'on-link' nd6 NS in compliance with RFC 4861.");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_OPTIMISTIC_DAD,
+       nd6_optimistic_dad, CTLFLAG_RW | CTLFLAG_LOCKED,        &nd6_optimistic_dad,            0, "");
index 63a66121d2bb56670b460d4d5f28cac0aa13e6d6..509bba0939a3c99d9f62ea55a51e56400a13aa18 100644 (file)
@@ -154,7 +154,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
            struct radix_node *treenodes)
 {
        struct rtentry *rt = (struct rtentry *)treenodes;
-       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)rt_key(rt);
        struct radix_node *ret;
 
        lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
index 1eb5cd60fb98f2e23afdbd5cee60c0a4a56f8d6b..40bad0948c0377cf270c86367302ba3a2d398949 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -151,11 +151,11 @@ extern lck_mtx_t *addrsel_mutex;
 
 static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
        struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *,
-       struct ifnet **, struct rtentry **, int, int, unsigned int,
-       unsigned int);
+       struct ifnet **, struct rtentry **, int, int,
+       const struct ip6_out_args *ip6oa);
 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
-       struct ip6_moptions *, struct route_in6 *ro, unsigned int,
-       unsigned int, struct ifnet **);
+       struct ip6_moptions *, struct route_in6 *ro,
+       const struct ip6_out_args *, struct ifnet **);
 static void init_policy_queue(void);
 static int add_addrsel_policyent(const struct in6_addrpolicy *);
 #ifdef ENABLE_ADDRSEL
@@ -192,6 +192,11 @@ void addrsel_policy_init(void);
        goto out;               /* XXX: we can't use 'break' here */ \
 } while(0)
 
+/*
+ * Regardless of error, it will return an ifp with a reference held if the
+ * caller provides a non-NULL ifpp.  The caller is responsible for checking
+ * if the returned ifp is valid and release its reference at all times.
+ */
 struct in6_addr *
 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct inpcb *inp, struct route_in6 *ro,
@@ -208,7 +213,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
        int prefer_tempaddr;
        struct ip6_moptions *mopts;
        struct timeval timenow;
-       unsigned int nocell;
+       struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF };
        boolean_t islocal = FALSE;
 
        getmicrotime(&timenow);
@@ -220,12 +225,15 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
 
        if (inp != NULL) {
                mopts = inp->in6p_moptions;
-               nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+               if (inp->inp_flags & INP_NO_IFT_CELLULAR)
+                       ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
        } else {
                mopts = NULL;
-               nocell = 0;
        }
 
+       if (ip6oa.ip6oa_boundif != IFSCOPE_NONE)
+               ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
+
        /*
         * If the source address is explicitly specified by the caller,
         * check if the requested source address is indeed a unicast address
@@ -238,9 +246,10 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                struct in6_ifaddr *ia6;
 
                /* get the outgoing interface */
-               if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, ifscope,
-                   nocell, &ifp)) != 0) {
-                       return (NULL);
+               if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
+                   &ifp)) != 0) {
+                       src_storage = NULL;
+                       goto done;
                }
 
                /*
@@ -254,48 +263,44 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                srcsock.sin6_family = AF_INET6;
                srcsock.sin6_len = sizeof(srcsock);
                srcsock.sin6_addr = pi->ipi6_addr;
-               if (ifp) {
+               if (ifp != NULL) {
                        *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
                        if (*errorp != 0) {
-                               ifnet_release(ifp);
-                               return (NULL);
+                               src_storage = NULL;
+                               goto done;
                        }
                }
-               ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
+               ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)
+                   (&srcsock));
                if (ia6 == NULL) {
                        *errorp = EADDRNOTAVAIL;
-                       if (ifp != NULL)
-                               ifnet_release(ifp);
-                       return (NULL);
+                       src_storage = NULL;
+                       goto done;
                }
                IFA_LOCK_SPIN(&ia6->ia_ifa);
                if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) ||
-                   (nocell && (ia6->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR))) {
+                   ((ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) &&
+                    (ia6->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR))) {
                        IFA_UNLOCK(&ia6->ia_ifa);
                        IFA_REMREF(&ia6->ia_ifa);
                        *errorp = EADDRNOTAVAIL;
-                       if (ifp != NULL)
-                               ifnet_release(ifp);
-                       return (NULL);
+                       src_storage = NULL;
+                       goto done;
                }
 
                *src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
                IFA_UNLOCK(&ia6->ia_ifa);
                IFA_REMREF(&ia6->ia_ifa);
-               if (ifpp != NULL) {
-                       /* if ifp is non-NULL, refcnt held in in6_selectif() */
-                       *ifpp = ifp;
-               } else if (ifp != NULL) {
-                       ifnet_release(ifp);
-               }
-               return (src_storage);
+               goto done;
        }
 
        /*
         * Otherwise, if the socket has already bound the source, just use it.
         */
-       if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 
-               return (&inp->in6p_laddr);
+       if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+               src_storage = &inp->in6p_laddr;
+               goto done;
+       }
 
        /*
         * If the address is not specified, choose the best one based on
@@ -303,19 +308,16 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
         */
 
        /* get the outgoing interface */
-       if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, ifscope, nocell,
-           &ifp)) != 0)
-               return (NULL);
+       if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
+           &ifp)) != 0) {
+               src_storage = NULL;
+               goto done;
+       }
 
-#ifdef DIAGNOSTIC
-       if (ifp == NULL)        /* this should not happen */
-               panic("in6_selectsrc: NULL ifp");
-#endif
        *errorp = in6_setscope(&dst, ifp, &odstzone);
        if (*errorp != 0) {
-               if (ifp != NULL)
-                       ifnet_release(ifp);
-               return (NULL);
+               src_storage = NULL;
+               goto done;
        }
        lck_rw_lock_shared(&in6_ifaddr_rwlock);
 
@@ -351,6 +353,10 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
                        goto next;
 
+               if (!nd6_optimistic_dad &&
+                    (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0)
+                       goto next;
+
                /* Rule 1: Prefer same address */
                if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr))
                        BREAK(1); /* there should be no better candidate */
@@ -381,6 +387,17 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
                        REPLACE(3);
 
+               /*
+                * RFC 4429 says that optimistic addresses are equivalent to
+                * deprecated addresses, so avoid them here.
+                */
+               if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) == 0 &&
+                   (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0)
+                       NEXTSRC(3);
+               if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) != 0 &&
+                   (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0)
+                       REPLACE(3);
+
                /* Rule 4: Prefer home addresses */
                /*
                 * XXX: This is a TODO.  We should probably merge the MIP6
@@ -507,23 +524,24 @@ out:
 
        lck_rw_done(&in6_ifaddr_rwlock);
 
-       if (nocell && ia_best != NULL &&
-           (ia_best->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR)) {
+       if (ia_best != NULL &&
+           (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) &&
+           ia_best->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) {
                IFA_REMREF(&ia_best->ia_ifa);
                ia_best = NULL;
        }
 
-       if ( (ia = ia_best) == NULL) {
+       if ((ia = ia_best) == NULL) {
                *errorp = EADDRNOTAVAIL;
-               if (ifp != NULL)
-                       ifnet_release(ifp);
-               return (NULL);
+               src_storage = NULL;
+               goto done;
        }
 
        IFA_LOCK_SPIN(&ia->ia_ifa);
        *src_storage = satosin6(&ia->ia_addr)->sin6_addr;
        IFA_UNLOCK(&ia->ia_ifa);
        IFA_REMREF(&ia->ia_ifa);
+done:
        if (ifpp != NULL) {
                /* if ifp is non-NULL, refcnt held in in6_selectif() */
                *ifpp = ifp;
@@ -542,7 +560,10 @@ out:
  * i.e. for any given pcb there can only be one thread performing output at
  * the IPv6 layer.
  *
- * This routine is analogous to in_selectsrcif() for IPv4.
+ * This routine is analogous to in_selectsrcif() for IPv4.  Regardless of
+ * error, it will return an ifp with a reference held if the caller provides
+ * a non-NULL retifp.  The caller is responsible for checking if the
+ * returned ifp is valid and release its reference at all times.
  *
  * clone - meaningful only for bsdi and freebsd
  */
@@ -550,17 +571,18 @@ static int
 selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
     struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ifnet **retifp, struct rtentry **retrt, int clone,
-    int norouteok, unsigned int ifscope, unsigned int nocell)
+    int norouteok, const struct ip6_out_args *ip6oa)
 {
        int error = 0;
-       struct ifnet *ifp = NULL;
+       struct ifnet *ifp = NULL, *ifp0 = NULL;
        struct route_in6 *route = NULL;
        struct sockaddr_in6 *sin6_next;
        struct in6_pktinfo *pi = NULL;
        struct in6_addr *dst = &dstsock->sin6_addr;
        struct ifaddr *ifa = NULL;
        char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
-       boolean_t select_srcif;
+       boolean_t select_srcif, proxied_ifa = FALSE;
+       unsigned int ifscope = ip6oa->ip6oa_boundif;
 
 #if 0
        char ip6buf[INET6_ADDRSTRLEN];
@@ -622,7 +644,8 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
                ifscope = pi->ipi6_ifindex;
                ifnet_head_lock_shared();
                /* ifp may be NULL if detached or out of range */
-               ifp = (ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL;
+               ifp = ifp0 =
+                   ((ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL);
                ifnet_head_done();
                if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) {
                        /*
@@ -645,7 +668,7 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
         */
        if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
                IM6O_LOCK(mopts);
-               if ((ifp = mopts->im6o_multicast_ifp) != NULL) {
+               if ((ifp = ifp0 = mopts->im6o_multicast_ifp) != NULL) {
                        IM6O_UNLOCK(mopts);
                        goto done; /* we do not need a route for multicast. */
                }
@@ -711,6 +734,21 @@ getsrcif:
                ifa = (struct ifaddr *)
                    ifa_foraddr6_scoped(&srcsock->sin6_addr, scope);
 
+               /*
+                * If we are forwarding and proxying prefix(es), see if the
+                * source address is one of ours and is a proxied address;
+                * if so, use it.
+                */
+               if (ifa == NULL && ip6_forwarding && nd6_prproxy) {
+                       ifa = (struct ifaddr *)
+                           ifa_foraddr6(&srcsock->sin6_addr);
+                       if (ifa != NULL && !(proxied_ifa =
+                           nd6_prproxy_ifaddr((struct in6_ifaddr *)ifa))) {
+                               IFA_REMREF(ifa);
+                               ifa = NULL;
+                       }
+               }
+
                if (ip6_select_srcif_debug && ifa != NULL) {
                        if (ro->ro_rt != NULL) {
                                printf("%s->%s ifscope %d->%d ifa_if %s "
@@ -746,7 +784,7 @@ getsrcif:
        }
 
 getroute:
-       if (ifa != NULL)
+       if (ifa != NULL && !proxied_ifa)
                ifscope = ifa->ifa_ifp->if_index;
 
        /*
@@ -776,7 +814,7 @@ getroute:
                    (RTF_UP | RTF_LLINFO) ||
                    ron->ro_rt->generation_id != route_generation ||
                    (select_srcif && (ifa == NULL ||
-                   ifa->ifa_ifp != ron->ro_rt->rt_ifp)))) ||
+                   (ifa->ifa_ifp != ron->ro_rt->rt_ifp && !proxied_ifa))))) ||
                    !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
                    &sin6_next->sin6_addr)) {
                        if (ron->ro_rt != NULL) {
@@ -804,7 +842,7 @@ getroute:
                        }
                }
                route = ron;
-               ifp = ron->ro_rt->rt_ifp;
+               ifp = ifp0 = ron->ro_rt->rt_ifp;
 
                /*
                 * When cloning is required, try to allocate a route to the
@@ -836,7 +874,7 @@ getroute:
            ro->ro_rt->generation_id != route_generation ||
            !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) ||
            (select_srcif && (ifa == NULL ||
-           ifa->ifa_ifp != ro->ro_rt->rt_ifp)))) {
+           (ifa->ifa_ifp != ro->ro_rt->rt_ifp && !proxied_ifa))))) {
                RT_UNLOCK(ro->ro_rt);
                rtfree(ro->ro_rt);
                ro->ro_rt = NULL;
@@ -874,7 +912,7 @@ getroute:
 
        if (ro->ro_rt != NULL) {
                RT_LOCK_ASSERT_HELD(ro->ro_rt);
-               ifp = ro->ro_rt->rt_ifp;
+               ifp = ifp0 = ro->ro_rt->rt_ifp;
        } else {
                error = EHOSTUNREACH;
        }
@@ -883,6 +921,7 @@ getroute:
 validateroute:
        if (select_srcif) {
                boolean_t has_route = (route != NULL && route->ro_rt != NULL);
+               boolean_t srcif_selected = FALSE;
 
                if (has_route)
                        RT_LOCK_ASSERT_HELD(route->ro_rt);
@@ -895,28 +934,48 @@ validateroute:
                if (has_route && (ifa == NULL ||
                    (ifa->ifa_ifp != ifp && ifp != lo_ifp) ||
                    !(route->ro_rt->rt_flags & RTF_UP))) {
-                       if (ip6_select_srcif_debug) {
-                               if (ifa != NULL) {
-                                       printf("%s->%s ifscope %d ro_if %s "
-                                           "!= ifa_if %s (cached route "
-                                           "cleared)\n", s_src, s_dst,
-                                           ifscope, if_name(ifp),
-                                           if_name(ifa->ifa_ifp));
-                               } else {
-                                       printf("%s->%s ifscope %d ro_if %s "
-                                           "(no ifa_if found)\n", s_src,
-                                           s_dst, ifscope, if_name(ifp));
+                       /*
+                        * If the destination address belongs to a proxied
+                        * prefix, relax the requirement and allow the packet
+                        * to come out of the proxy interface with the source
+                        * address of the real interface.
+                        */
+                       if (ifa != NULL && proxied_ifa &&
+                           (route->ro_rt->rt_flags & (RTF_UP|RTF_PROXY)) ==
+                           (RTF_UP|RTF_PROXY)) {
+                               srcif_selected = TRUE;
+                       } else {
+                               if (ip6_select_srcif_debug) {
+                                       if (ifa != NULL) {
+                                               printf("%s->%s ifscope %d "
+                                                   "ro_if %s != ifa_if %s "
+                                                   "(cached route cleared)\n",
+                                                   s_src, s_dst,
+                                                   ifscope, if_name(ifp),
+                                                   if_name(ifa->ifa_ifp));
+                                       } else {
+                                               printf("%s->%s ifscope %d "
+                                                   "ro_if %s (no ifa_if "
+                                                   "found)\n", s_src, s_dst,
+                                                   ifscope, if_name(ifp));
+                                       }
                                }
+                               RT_UNLOCK(route->ro_rt);
+                               rtfree(route->ro_rt);
+                               route->ro_rt = NULL;
+                               route->ro_flags &= ~ROF_SRCIF_SELECTED;
+                               error = EHOSTUNREACH;
+                               /* Undo the settings done above */
+                               route = NULL;
+                               ifp = NULL;     /* ditch ifp; keep ifp0 */
+                               has_route = FALSE;
                        }
-                       RT_UNLOCK(route->ro_rt);
-                       rtfree(route->ro_rt);
-                       route->ro_rt = NULL;
-                       route->ro_flags &= ~ROF_SRCIF_SELECTED;
-                       error = EHOSTUNREACH;
-                       /* Undo the settings done above */
-                       route = NULL;
-                       ifp = NULL;
                } else if (has_route) {
+                       srcif_selected = TRUE;
+               }
+
+               if (srcif_selected) {
+                       VERIFY(has_route);
                        route->ro_flags |= ROF_SRCIF_SELECTED;
                        route->ro_rt->generation_id = route_generation;
                        RT_UNLOCK(route->ro_rt);
@@ -943,17 +1002,18 @@ validateroute:
        }
 
 done:
-       if (nocell && error == 0) {
-               if ((ifp != NULL && ifp->if_type == IFT_CELLULAR) ||
+       if (error == 0) {
+               if ((ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) &&
+                   ((ifp != NULL && ifp->if_type == IFT_CELLULAR) ||
                    (route != NULL && route->ro_rt != NULL &&
-                   route->ro_rt->rt_ifp->if_type == IFT_CELLULAR)) {
+                   route->ro_rt->rt_ifp->if_type == IFT_CELLULAR))) {
                        if (route != NULL && route->ro_rt != NULL) {
                                rtfree(route->ro_rt);
                                route->ro_rt = NULL;
                                route->ro_flags &= ~ROF_SRCIF_SELECTED;
                                route = NULL;
                        }
-                       ifp = NULL;
+                       ifp = NULL;     /* ditch ifp; keep ifp0 */
                        error = EHOSTUNREACH;
                }
        }
@@ -968,12 +1028,19 @@ done:
        if (error == EHOSTUNREACH)
                ip6stat.ip6s_noroute++;
 
+       /*
+        * We'll return ifp regardless of error, so pick it up from ifp0
+        * in case it was nullified above.  Caller is responsible for
+        * releasing the ifp if it is non-NULL.
+        */
+       ifp = ifp0;
+       if (retifp != NULL) {
+               if (ifp != NULL)
+                       ifnet_reference(ifp);   /* for caller */
+               *retifp = ifp;
+       }
+
        if (error == 0) {
-               if (retifp != NULL) {
-                       if (ifp != NULL)
-                               ifnet_reference(ifp);   /* for caller */
-                       *retifp = ifp;
-               }
                if (retrt != NULL && route != NULL)
                        *retrt = route->ro_rt;  /* ro_rt may be NULL */
        } else if (select_srcif && ip6_select_srcif_debug) {
@@ -989,12 +1056,17 @@ done:
        return (error);
 }
 
+/*
+ * Regardless of error, it will return an ifp with a reference held if the
+ * caller provides a non-NULL retifp.  The caller is responsible for checking
+ * if the returned ifp is valid and release its reference at all times.
+ */
 static int
 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
-    struct ip6_moptions *mopts, struct route_in6 *ro, unsigned int ifscope,
-    unsigned int nocell, struct ifnet **retifp)
+    struct ip6_moptions *mopts, struct route_in6 *ro,
+    const struct ip6_out_args *ip6oa, struct ifnet **retifp)
 {
-       int error;
+       int err = 0;
        struct route_in6 sro;
        struct rtentry *rt = NULL;
 
@@ -1003,12 +1075,9 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                ro = &sro;
        }
 
-       if ((error = selectroute(NULL, dstsock, opts, mopts, ro, retifp,
-           &rt, 0, 1, ifscope, nocell)) != 0) {
-               if (ro == &sro && rt && rt == sro.ro_rt)
-                       rtfree(rt);
-               return (error);
-       }
+       if ((err = selectroute(NULL, dstsock, opts, mopts, ro, retifp,
+           &rt, 0, 1, ip6oa)) != 0)
+               goto done;
 
        /*
         * do not use a rejected or black hole route.
@@ -1028,11 +1097,8 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
         * We thus reject the case here.
         */
        if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
-               int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
-
-               if (ro == &sro && rt && rt == sro.ro_rt)
-                       rtfree(rt);
-               return (flags);
+               err = ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH);
+               goto done;
        }
 
        /*
@@ -1042,30 +1108,41 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
         * destination address (which should probably be one of our own
         * addresses.)
         */
-       if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) {
+       if (rt != NULL && rt->rt_ifa != NULL && rt->rt_ifa->ifa_ifp != NULL &&
+           retifp != NULL) {
+               ifnet_reference(rt->rt_ifa->ifa_ifp);
                if (*retifp != NULL)
                        ifnet_release(*retifp);
                *retifp = rt->rt_ifa->ifa_ifp;
-               ifnet_reference(*retifp);
        }
 
+done:
        if (ro == &sro && rt && rt == sro.ro_rt)
                rtfree(rt);
-       return (0);
+
+       /*
+        * retifp might point to a valid ifp with a reference held;
+        * caller is responsible for releasing it if non-NULL.
+        */
+       return (err);
 }
 
 /*
+ * Regardless of error, it will return an ifp with a reference held if the
+ * caller provides a non-NULL retifp.  The caller is responsible for checking
+ * if the returned ifp is valid and release its reference at all times.
+ *
  * clone - meaningful only for bsdi and freebsd
  */
 int
 in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
     struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ifnet **retifp, struct rtentry **retrt, int clone,
-    unsigned int ifscope, unsigned int nocell)
+    const struct ip6_out_args *ip6oa)
 {
 
        return (selectroute(srcsock, dstsock, opts, mopts, ro, retifp,
-           retrt, clone, 0, ifscope, nocell));
+           retrt, clone, 0, ip6oa));
 }
 
 /*
@@ -1085,7 +1162,16 @@ in6_selecthlim(
        } else {
                lck_rw_lock_shared(nd_if_rwlock);
                if (ifp && ifp->if_index < nd_ifinfo_indexlim) {
-                       u_int8_t chlim = nd_ifinfo[ifp->if_index].chlim;
+                       u_int8_t chlim;
+                       struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
+
+                       if (ndi->initialized) {
+                               lck_mtx_lock(&ndi->lock);
+                               chlim = ndi->chlim;
+                               lck_mtx_unlock(&ndi->lock);
+                       } else {
+                               chlim = ip6_defhlim;
+                       }
                        lck_rw_done(nd_if_rwlock);
                        return (chlim);
                } else {
@@ -1166,7 +1252,7 @@ in6_pcbsetport(
                                 * occurred above.
                                 */
                                inp->in6p_laddr = in6addr_any;
-                               inp->in6p_last_outif = 0;
+                               inp->in6p_last_outifp = NULL;
                                if (!locked)
                                        lck_rw_done(pcbinfo->mtx);
                                return (EAGAIN);
@@ -1190,7 +1276,7 @@ in6_pcbsetport(
                                 * occurred above.
                                 */
                                inp->in6p_laddr = in6addr_any;
-                               inp->in6p_last_outif = 0;
+                               inp->in6p_last_outifp = NULL;
                                if (!locked)
                                        lck_rw_done(pcbinfo->mtx);
                                return (EAGAIN);
@@ -1207,7 +1293,7 @@ in6_pcbsetport(
        if (in_pcbinshash(inp, 1) != 0) {
                inp->in6p_laddr = in6addr_any;
                inp->inp_lport = 0;
-               inp->in6p_last_outif = 0;
+               inp->in6p_last_outifp = NULL;
                if (!locked)
                        lck_rw_done(pcbinfo->mtx);
                return (EAGAIN);
@@ -1530,7 +1616,7 @@ in6_src_ioctl(u_long cmd, caddr_t data)
        if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
                return (EOPNOTSUPP); /* check for safety */
 
-       ent0 = *(struct in6_addrpolicy *)data;
+       bcopy(data, &ent0, sizeof (ent0));
 
        if (ent0.label == ADDR_LABEL_NOTAPP)
                return (EINVAL);
index 97025049465d5093b86834bba1e35df734e9e094..ca7528f2d1e0debb97b3f5727f69d02e478ab5e3 100644 (file)
@@ -168,6 +168,8 @@ struct      in6_ifaddr {
        /* multicast addresses joined from the kernel */
        LIST_HEAD(, in6_multi_mship) ia6_memberships;
 };
+
+#define        ifatoia6(ifa)   ((struct in6_ifaddr *)(void *)(ifa))
 #endif /* XNU_KERNEL_PRIVATE */
 
 /* control structure to manage address selection policy */
@@ -306,6 +308,7 @@ struct      in6_ifreq {
                int     ifru_flags;
                int     ifru_flags6;
                int     ifru_metric;
+               int     ifru_intval;
                caddr_t ifru_data;
                struct in6_addrlifetime ifru_lifetime;
                struct in6_ifstat ifru_stat;
@@ -435,10 +438,12 @@ struct    in6_rrenumreq {
 #define IA6_MASKIN6(ia)        (&((ia)->ia_prefixmask.sin6_addr))
 #define IA6_SIN6(ia)   (&((ia)->ia_addr))
 #define IA6_DSTSIN6(ia)        (&((ia)->ia_dstaddr))
-#define IFA_IN6(x)     (&((struct sockaddr_in6 *)((x)->ifa_addr))->sin6_addr)
-#define IFA_DSTIN6(x)  (&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr)
-
-#define IFPR_IN6(x)    (&((struct sockaddr_in6 *)((x)->ifpr_prefix))->sin6_addr)
+#define IFA_IN6(x)     \
+       (&((struct sockaddr_in6 *)(void *)((x)->ifa_addr))->sin6_addr)
+#define IFA_DSTIN6(x)  \
+       (&((struct sockaddr_in6 *)(void *)((x)->ifa_dstaddr))->sin6_addr)
+#define IFPR_IN6(x)    \
+       (&((struct sockaddr_in6 *)(void *)((x)->ifpr_prefix))->sin6_addr)
 #endif /* XNU_KERNEL_PRIVATE */
 
 /*
@@ -607,6 +612,7 @@ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *);
 #define SIOCDRDEL_IN6_32 _IOWR('u', 135, struct in6_defrouter_32)
 #define SIOCDRDEL_IN6_64 _IOWR('u', 135, struct in6_defrouter_64)
 #endif /* XNU_KERNEL_PRIVATE */
+#define        SIOCSETROUTERMODE_IN6   _IOWR('i', 136, struct in6_ifreq) /* enable/disable IPv6 router mode on interface */
 #endif /* PRIVATE */
 
 #define IN6_IFF_ANYCAST                0x01    /* anycast address */
@@ -619,10 +625,15 @@ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *);
                                         */
 #define IN6_IFF_AUTOCONF       0x40    /* autoconfigurable address. */
 #define IN6_IFF_TEMPORARY      0x80    /* temporary (anonymous) address. */
+#define IN6_IFF_DYNAMIC                0x100   /* assigned by DHCPv6 service */
+#define IN6_IFF_OPTIMISTIC     0x200   /* optimistic DAD, i.e. RFC 4429 */
 #define IN6_IFF_NOPFX          0x8000  /* skip kernel prefix management.
                                         * XXX: this should be temporary.
                                         */
 
+/* Duplicate Address Detection [DAD] in progress. */
+#define IN6_IFF_DADPROGRESS    (IN6_IFF_TENTATIVE|IN6_IFF_OPTIMISTIC)
+
 /* do not input/output */
 #define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)
 
@@ -653,6 +664,7 @@ do {                                                                \
 } while (0)
 
 __private_extern__ lck_rw_t in6_ifaddr_rwlock;
+__private_extern__ lck_mtx_t proxy6_lock;
 
 extern struct ifqueue ip6intrq;                /* IP6 packet input queue */
 extern struct in6_addr zeroin6_addr;
@@ -931,6 +943,7 @@ extern void in6_restoremkludge(struct in6_ifaddr *, struct ifnet *);
 extern void in6_purgemkludge(struct ifnet *);
 extern struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int);
 extern struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *);
+extern struct in6_ifaddr *in6ifa_prproxyaddr(struct in6_addr *);
 extern char *ip6_sprintf(const struct in6_addr *);
 extern int in6_addr2scopeid(struct ifnet *, struct in6_addr *);
 extern int in6_matchlen(struct in6_addr *, struct in6_addr *);
index f0d56fd7b6d61897020aaa9d99057a5a6da585cd..42156858ef82deff0a40b3cdfa33c546fccaac1e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -118,7 +118,7 @@ extern int ipsec_bypass;
  *
  */
 
-void
+struct mbuf *
 ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
     int srcrt)
 {
@@ -126,6 +126,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
        struct sockaddr_in6 *dst;
        struct rtentry *rt;
        int error, type = 0, code = 0;
+       boolean_t proxy = FALSE;
        struct mbuf *mcopy = NULL;
        struct ifnet *ifp, *origifp;    /* maybe unnecessary */
        u_int32_t inzone, outzone;
@@ -142,8 +143,24 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
        getmicrotime(&timenow);
 #if PF
        pf_mtag = pf_find_mtag(m);
-       if (pf_mtag != NULL && pf_mtag->rtableid != IFSCOPE_NONE)
-               ifscope = pf_mtag->rtableid;
+       if (pf_mtag != NULL && pf_mtag->pftag_rtableid != IFSCOPE_NONE)
+               ifscope = pf_mtag->pftag_rtableid;
+
+       /*
+        * If the caller provides a route which is on a different interface
+        * than the one specified for scoped forwarding, discard the route
+        * and do a lookup below.
+        */
+       if (ifscope != IFSCOPE_NONE && (rt = ip6forward_rt->ro_rt) != NULL) {
+               RT_LOCK(rt);
+               if (rt->rt_ifp->if_index != ifscope) {
+                       RT_UNLOCK(rt);
+                       rtfree(rt);
+                       rt = ip6forward_rt->ro_rt = NULL;
+               } else {
+                       RT_UNLOCK(rt);
+               }
+       }
 #endif /* PF */
 
 #if IPSEC
@@ -158,7 +175,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                if (ipsec6_in_reject(m, NULL)) {
                        IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio);
                        m_freem(m);
-                       return;
+                       return (NULL);
                }
        }
 #endif /*IPSEC*/
@@ -185,15 +202,34 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                            if_name(m->m_pkthdr.rcvif));
                }
                m_freem(m);
-               return;
+               return (NULL);
        }
 
        if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
                /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
                icmp6_error(m, ICMP6_TIME_EXCEEDED,
                                ICMP6_TIME_EXCEED_TRANSIT, 0);
-               return;
+               return (NULL);
+       }
+
+       /*
+        * See if the destination is a proxied address, and if so pretend
+        * that it's for us.  This is mostly to handle NUD probes against
+        * the proxied addresses.  We filter for ICMPv6 here and will let
+        * icmp6_input handle the rest.
+        */
+       if (!srcrt && nd6_prproxy) {
+               VERIFY(!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst));
+               proxy = nd6_prproxy_isours(m, ip6, ip6forward_rt, ifscope);
+               /*
+                * Don't update hop limit while proxying; RFC 4389 4.1.
+                * Also skip IPsec forwarding path processing as this
+                * packet is not to be forwarded.
+                */
+               if (proxy)
+                       goto skip_ipsec;
        }
+
        ip6->ip6_hlim -= IPV6_HLIMDEC;
 
        /*
@@ -224,7 +260,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 #endif
                }
                m_freem(m);
-               return;
+               return (NULL);
        }
 
        error = 0;
@@ -247,7 +283,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 #endif
                }
                m_freem(m);
-               return;
+               return (NULL);
 
        case IPSEC_POLICY_BYPASS:
        case IPSEC_POLICY_NONE:
@@ -269,7 +305,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 #endif
                        }
                        m_freem(m);
-                       return;
+                       return (NULL);
                }
                /* do IPsec */
                break;
@@ -300,7 +336,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
        error = ipsec6_output_tunnel(&state, sp, 0);
        key_freesp(sp, KEY_SADB_UNLOCKED);
        if (state.tunneled == 4)
-               return;  /* packet is gone - sent over IPv4 */
+               return (NULL);  /* packet is gone - sent over IPv4 */
                
        m = state.m;
        if (state.ro.ro_rt) {
@@ -332,7 +368,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 #endif
                }
                m_freem(m);
-               return;
+               return (NULL);
        }
     }
     skip_ipsec:
@@ -375,7 +411,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                                icmp6_error(mcopy, ICMP6_DST_UNREACH,
                                            ICMP6_DST_UNREACH_NOROUTE, 0);
                        m_freem(m);
-                       return;
+                       return (NULL);
                }
                RT_LOCK_ASSERT_HELD(rt);
        } else if (rt == NULL || !(rt->rt_flags & RTF_UP) ||
@@ -402,7 +438,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                                icmp6_error(mcopy, ICMP6_DST_UNREACH,
                                    ICMP6_DST_UNREACH_NOROUTE, 0);
                        m_freem(m);
-                       return;
+                       return (NULL);
                }
                RT_LOCK(rt);
                /* Take an extra ref for ourselves */
@@ -414,7 +450,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
         * destination for the reason that the destination is beyond the scope
         * of the source address, discard the packet and return an icmp6
         * destination unreachable error with Code 2 (beyond scope of source
-        * address).  We use a local copy of ip6_src, since in6_setscope()
+        * address) unless we are proxying (source address is link local
+        * for NUDs.)  We use a local copy of ip6_src, since in6_setscope()
         * will possibly modify its first argument.
         * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1]
         */
@@ -424,15 +461,16 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                ip6stat.ip6s_cantforward++;
                ip6stat.ip6s_badscope++;
                m_freem(m);
-               return;
+               return (NULL);
        }
        if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) {
                ip6stat.ip6s_cantforward++;
                ip6stat.ip6s_badscope++;
                m_freem(m);
-               return;
+               return (NULL);
        }
-       if (inzone != outzone) {
+
+       if (inzone != outzone && !proxy) {
                ip6stat.ip6s_cantforward++;
                ip6stat.ip6s_badscope++;
                in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
@@ -455,7 +493,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                                    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
                }
                m_freem(m);
-               return;
+               return (NULL);
        }
 
        /*
@@ -472,7 +510,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                ip6stat.ip6s_cantforward++;
                ip6stat.ip6s_badscope++;
                m_freem(m);
-               return;
+               return (NULL);
        }
 
        if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) {
@@ -520,11 +558,11 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                        RT_UNLOCK(rt);
                }
                m_freem(m);
-               return;
+               return (NULL);
        }
 
        if (rt->rt_flags & RTF_GATEWAY)
-               dst = (struct sockaddr_in6 *)rt->rt_gateway;
+               dst = (struct sockaddr_in6 *)(void *)rt->rt_gateway;
 
        /*
         * If we are to forward the packet using the same interface
@@ -535,7 +573,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
         * Also, don't send redirect if forwarding using a route
         * modified by a redirect.
         */
-       if (ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
+       if (!proxy &&
+           ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
            (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
                if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
                        /*
@@ -553,7 +592,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
                        icmp6_error(mcopy, ICMP6_DST_UNREACH,
                                    ICMP6_DST_UNREACH_ADDR, 0);
                        m_freem(m);
-                       return;
+                       return (NULL);
                }
                type = ND_REDIRECT;
        }
@@ -628,11 +667,23 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
        /* Drop the lock but retain the extra ref */
        RT_UNLOCK(rt);
 
+       /*
+        * If this is to be processed locally, let ip6_input have it.
+        */
+       if (proxy) {
+               VERIFY(m->m_pkthdr.aux_flags & MAUXF_PROXY_DST);
+               /* Release extra ref */
+               RT_REMREF(rt);
+               if (mcopy != NULL)
+                       m_freem(mcopy);
+               return (m);
+       }
+
 #if PF
        /* Invoke outbound packet filter */
-       error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE);
+       error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
 
-       if (error) {
+       if (error != 0 || m == NULL) {
                if (m != NULL) {
                        panic("%s: unexpected packet %p\n", __func__, m);
                        /* NOTREACHED */
@@ -643,7 +694,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
        ip6 = mtod(m, struct ip6_hdr *);
 #endif /* PF */
 
-       error = nd6_output(ifp, origifp, m, dst, rt);
+       error = nd6_output(ifp, origifp, m, dst, rt, NULL);
        if (error) {
                in6_ifstat_inc(ifp, ifs6_out_discard);
                ip6stat.ip6s_cantforward++;
@@ -664,7 +715,7 @@ senderr:
        if (mcopy == NULL) {
                /* Release extra ref */
                RT_REMREF(rt);
-               return;
+               return (NULL);
        }
        switch (error) {
        case 0:
@@ -673,7 +724,7 @@ senderr:
                        icmp6_redirect_output(mcopy, rt);
                        /* Release extra ref */
                        RT_REMREF(rt);
-                       return;
+                       return (NULL);
                }
 #endif
                goto freecopy;
@@ -698,11 +749,11 @@ senderr:
        icmp6_error(mcopy, type, code, 0);
        /* Release extra ref */
        RT_REMREF(rt);
-       return;
+       return (NULL);
 
  freecopy:
        m_freem(mcopy);
        /* Release extra ref */
        RT_REMREF(rt);
-       return;
+       return (NULL);
 }
index ae221caad4e4fd2891b130acc2be081107468704..3f0e4b23fe3a89a4a7c362a215672ec7a5796733 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
index ae8fecd689a080106caa62721d80281c2b655e04..9fcc992cdab38469d443f5221a55cfc59fc2c950 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <net/route.h>
 #include <net/kpi_protocol.h>
 #include <net/ntstat.h>
+#include <net/init.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -147,13 +148,17 @@ extern int ipsec_bypass;
 
 #include <netinet6/ip6_fw.h>
 
+#if DUMMYNET
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#endif /* DUMMYNET */
+
 #include <netinet/kpi_ipfilter_var.h>
 
 #include <netinet6/ip6protosw.h>
 
 /* we need it for NLOOP. */
 #include "loop.h"
-#include "faith.h"
 
 #include <net/net_osdep.h>
 
@@ -181,13 +186,10 @@ int ip6_sourcecheck_interval;             /* XXX */
 const int int6intrq_present = 1;
 
 int ip6_ours_check_algorithm;
-int in6_init2done = 0;
-int in6_init_done = 0;
 
-#define _CASSERT(x)    \
-       switch (0) { case 0: case (x): ; }
 #define IN6_IFSTAT_REQUIRE_ALIGNED_64(f)       \
        _CASSERT(!(offsetof(struct in6_ifstat, f) % sizeof (uint64_t)))
+
 #define ICMP6_IFSTAT_REQUIRE_ALIGNED_64(f)     \
        _CASSERT(!(offsetof(struct icmp6_ifstat, f) % sizeof (uint64_t)))
 
@@ -203,12 +205,18 @@ struct ip6stat ip6stat;
 #ifdef __APPLE__
 struct ifqueue ip6intrq;
 decl_lck_mtx_data(, ip6_init_mutex);
-lck_mtx_t              *dad6_mutex;
-lck_mtx_t              *nd6_mutex;
-lck_mtx_t              *prefix6_mutex;
-lck_mtx_t              *scope6_mutex;
+decl_lck_mtx_data(, proxy6_lock);
+decl_lck_mtx_data(, dad6_mutex_data);
+decl_lck_mtx_data(, nd6_mutex_data);
+decl_lck_mtx_data(, prefix6_mutex_data);
+decl_lck_mtx_data(, scope6_mutex_data);
+lck_mtx_t              *dad6_mutex = &dad6_mutex_data;
+lck_mtx_t              *nd6_mutex = &nd6_mutex_data;
+lck_mtx_t              *prefix6_mutex = &prefix6_mutex_data;
+lck_mtx_t              *scope6_mutex = &scope6_mutex_data;
 #ifdef ENABLE_ADDRSEL
-lck_mtx_t              *addrsel_mutex;
+decl_lck_mtx_data(, addrsel_mutex_data);
+lck_mtx_t              *addrsel_mutex = &addrsel_mutex_data;
 #endif
 decl_lck_rw_data(, in6_ifs_rwlock);
 decl_lck_rw_data(, icmp6_ifs_rwlock);
@@ -220,7 +228,7 @@ extern lck_mtx_t    *inet6_domain_mutex;
 extern int loopattach_done;
 extern void addrsel_policy_init(void);
 
-static void ip6_init2(void *);
+static void ip6_init_delayed(void);
 static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
 
 static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -230,18 +238,56 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
 
 #ifdef __APPLE__
 void gifattach(void);
-void faithattach(void);
 void stfattach(void);
 #endif
 
-extern lck_mtx_t *domain_proto_mtx;
-
 SYSCTL_DECL(_net_inet6_ip6);
 
 int    ip6_doscopedroute = 1;
 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, scopedroute, CTLFLAG_RD | CTLFLAG_LOCKED,
      &ip6_doscopedroute, 0, "Enable IPv6 scoped routing");
 
+int    ip6_restrictrecvif = 1;
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, restrictrecvif,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_restrictrecvif, 0,
+    "Enable inbound interface restrictions");
+
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), check if the IP header pointer is 32-bit aligned; if not,
+ * copy the contents of the mbuf chain into a new chain, and free the original
+ * one.  Create some head room in the first mbuf of the new chain, in case
+ * it's needed later on.
+ *
+ * RFC 2460 says that IPv6 headers are 64-bit aligned, but network interfaces
+ * mostly align to 32-bit boundaries.  Care should be taken never to use 64-bit
+ * load/store operations on the fields in IPv6 headers.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define        IP6_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do { } while (0)
+#else /* !__i386__ && !__x86_64__ */
+#define        IP6_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do {                 \
+       if (!IP6_HDR_ALIGNED_P(mtod(_m, caddr_t))) {                    \
+               struct mbuf *_n;                                        \
+               struct ifnet *__ifp = (_ifp);                           \
+               atomic_add_64(&(__ifp)->if_alignerrs, 1);               \
+               if (((_m)->m_flags & M_PKTHDR) &&                       \
+                   (_m)->m_pkthdr.header != NULL)                      \
+                       (_m)->m_pkthdr.header = NULL;                   \
+               _n = m_defrag_offset(_m, max_linkhdr, M_NOWAIT);        \
+               if (_n == NULL) {                                       \
+                       ip6stat.ip6s_toosmall++;                        \
+                       m_freem(_m);                                    \
+                       (_m) = NULL;                                    \
+                       _action                                         \
+               } else {                                                \
+                       VERIFY(_n != (_m));                             \
+                       (_m) = _n;                                      \
+               }                                                       \
+       }                                                               \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
+
 static void
 ip6_proto_input(
        __unused protocol_family_t      protocol,
@@ -286,30 +332,20 @@ ip6_init()
        ip6_mutex_grp = lck_grp_alloc_init("ip6", ip6_mutex_grp_attr);
        ip6_mutex_attr = lck_attr_alloc_init();
 
-       if ((dad6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
-               panic("ip6_init: can't alloc dad6_mutex\n");
-       }
-       if ((nd6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
-               panic("ip6_init: can't alloc nd6_mutex\n");
-       }
-
-       if ((prefix6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
-               panic("ip6_init: can't alloc prefix6_mutex\n");
-       }
-
-       if ((scope6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
-               panic("ip6_init: can't alloc scope6_mutex\n");
-       }
+       lck_mtx_init(dad6_mutex, ip6_mutex_grp, ip6_mutex_attr);
+       lck_mtx_init(nd6_mutex, ip6_mutex_grp, ip6_mutex_attr);
+       lck_mtx_init(prefix6_mutex, ip6_mutex_grp, ip6_mutex_attr);
+       lck_mtx_init(scope6_mutex, ip6_mutex_grp, ip6_mutex_attr);
 
 #ifdef ENABLE_ADDRSEL
-       if ((addrsel_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
-               panic("ip6_init: can't alloc addrsel_mutex\n");
-       }
+       lck_mtx_init(addrsel_mutex, ip6_mutex_grp, ip6_mutex_attr);
 #endif
 
+       lck_mtx_init(&proxy6_lock, ip6_mutex_grp, ip6_mutex_attr);
+       lck_mtx_init(&ip6_init_mutex, ip6_mutex_grp, ip6_mutex_attr);
+
        lck_rw_init(&in6_ifs_rwlock, ip6_mutex_grp, ip6_mutex_attr);
        lck_rw_init(&icmp6_ifs_rwlock, ip6_mutex_grp, ip6_mutex_attr);
-       lck_mtx_init(&ip6_init_mutex, ip6_mutex_grp, ip6_mutex_attr);
 
        inet6domain.dom_flags = DOM_REENTRANT;  
 
@@ -393,25 +429,22 @@ ip6_init()
        ip6_flow_seq = random() ^ tv.tv_usec;
        microtime(&tv);
        ip6_desync_factor = (random() ^ tv.tv_usec) % MAX_TEMP_DESYNC_FACTOR;
-       timeout(ip6_init2, (caddr_t)0, 1 * hz);
 
-       lck_mtx_unlock(domain_proto_mtx);       
+       /*
+        * P2P interfaces often route the local address to the loopback
+        * interface. At this point, lo0 hasn't been initialized yet, which
+        * means that we need to delay the IPv6 configuration of lo0.
+        */
+       net_init_add(ip6_init_delayed);
+
+       domain_proto_mtx_unlock(TRUE);
        proto_register_input(PF_INET6, ip6_proto_input, NULL, 0);
-       lck_mtx_lock(domain_proto_mtx); 
+       domain_proto_mtx_lock();
 }
 
 static void
-ip6_init2(
-       __unused void *dummy)
+ip6_init_delayed(void)
 {
-       /*
-        * to route local address of p2p link to loopback,
-        * assign loopback address first.
-        */
-       if (loopattach_done == 0) {
-               timeout(ip6_init2, (caddr_t)0, 1 * hz);
-               return;
-       }
        (void) in6_ifattach(lo_ifp, NULL, NULL);
 
 #ifdef __APPLE__
@@ -426,29 +459,10 @@ ip6_init2(
 #if NGIF
        gifattach();
 #endif
-#if NFAITH
-       faithattach();
-#endif
 #if NSTF
        stfattach();
 #endif
-#endif
-       in6_init2done = 1;
-
-       lck_mtx_lock(&ip6_init_mutex);
-       in6_init_done = 1;
-       wakeup(&in6_init_done);
-       lck_mtx_unlock(&ip6_init_mutex);
-}
-
-void
-ip6_fin()
-{
-       lck_mtx_lock(&ip6_init_mutex);
-       while (in6_init_done == 0) {
-               (void) msleep(&in6_init_done, &ip6_init_mutex, 0, "ip6_fin()", NULL);
-       }
-       lck_mtx_unlock(&ip6_init_mutex);
+#endif /* __APPLE__ */
 }
 
 void
@@ -465,6 +479,12 @@ ip6_input(struct mbuf *m)
        struct in6_ifaddr *ia6 = NULL;
        struct route_in6 ip6_forward_rt;
        struct sockaddr_in6 *dst6;
+#if DUMMYNET
+       struct m_tag    *tag;
+       struct ip_fw_args args;
+               
+       bzero(&args, sizeof(struct ip_fw_args));
+#endif /* DUMMYNET */
 
        bzero(&ip6_forward_rt, sizeof(ip6_forward_rt));
 
@@ -473,6 +493,28 @@ ip6_input(struct mbuf *m)
         */
        MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif);
 
+       /* Perform IP header alignment fixup, if needed */
+       IP6_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return;);
+
+#if DUMMYNET
+       if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
+           KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
+               struct dn_pkt_tag       *dn_tag;
+
+               dn_tag = (struct dn_pkt_tag *)(tag+1);
+               
+               args.fwa_pf_rule = dn_tag->dn_pf_rule;
+               
+               m_tag_delete(m, tag);
+       }
+       
+       if (args.fwa_pf_rule) {
+               ip6 = mtod(m, struct ip6_hdr *); /* In case PF got disabled */
+
+               goto check_with_pf;
+       }
+#endif /* DUMMYNET */
+
        /*
         * No need to proccess packet twice if we've 
         * already seen it
@@ -485,7 +527,7 @@ ip6_input(struct mbuf *m)
                goto injectit;
        } else
                seen = 1;
-       
+
 #if IPSEC
        /*
         * should the inner packet be considered authentic?
@@ -524,7 +566,11 @@ ip6_input(struct mbuf *m)
 #undef M2MMAX
        }
 
-       /* drop the packet if IPv6 operation is disabled on the IF */
+       /*
+        * Drop the packet if IPv6 operation is disabled on the IF;
+        * accessing the flag is done without acquiring nd_ifinfo lock
+        * for performance reasons.
+        */
        lck_rw_lock_shared(nd_if_rwlock);
        if (m->m_pkthdr.rcvif->if_index < nd_ifinfo_indexlim &&
            (nd_ifinfo[m->m_pkthdr.rcvif->if_index].flags & ND6_IFF_IFDISABLED)) {
@@ -684,12 +730,19 @@ ip6_input(struct mbuf *m)
                }
        }
 
+#if DUMMYNET
+check_with_pf:
+#endif
 #if PF
        /* Invoke inbound packet filter */
        if (PF_IS_ENABLED) {
                int error;
-               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE);
-               if (error != 0) {
+#if DUMMYNET
+               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE, &args);
+#else
+               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE, NULL);
+#endif
+               if (error != 0 || m == NULL) {
                        if (m != NULL) {
                                panic("%s: unexpected packet %p\n", __func__, m);
                                /* NOTREACHED */
@@ -740,12 +793,11 @@ ip6_input(struct mbuf *m)
                if (in6m != NULL) {
                        IN6M_REMREF(in6m);
                        ours = 1;
-               }
-               else 
+               } else if (!nd6_prproxy
 #if MROUTING
-               if (!ip6_mrouter)
+                   && !ip6_mrouter
 #endif
-               {
+                   ) {
                        ip6stat.ip6s_notmember++;
                        ip6stat.ip6s_cantforward++;
                        in6_ifstat_inc(ifp, ifs6_in_discard);
@@ -841,21 +893,6 @@ ip6_input(struct mbuf *m)
                goto bad;
        }
 
-       /*
-        * FAITH (Firewall Aided Internet Translator)
-        */
-#if defined(NFAITH) && 0 < NFAITH
-       if (ip6_keepfaith) {
-               if (ip6_forward_rt.ro_rt && ip6_forward_rt.ro_rt->rt_ifp
-                && ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
-                       /* XXX do we need more sanity checks? */
-                       ours = 1;
-                       deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */
-                       RT_UNLOCK(ip6_forward_rt.ro_rt);
-                       goto hbhcheck;
-               }
-       }
-#endif
        if (ip6_forward_rt.ro_rt != NULL)
                RT_UNLOCK(ip6_forward_rt.ro_rt);
 
@@ -873,8 +910,7 @@ ip6_input(struct mbuf *m)
        /*
         * record address information into m_aux, if we don't have one yet.
         * note that we are unable to record it, if the address is not listed
-        * as our interface address (e.g. multicast addresses, addresses
-        * within FAITH prefixes and such).
+        * as our interface address (e.g. multicast addresses, etc.)
         */
        if (deliverifp && (ia6 = ip6_getdstifaddr(m)) == NULL) {
                ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
@@ -1007,12 +1043,40 @@ ip6_input(struct mbuf *m)
                        goto bad;
                }
 #endif
+               if (!ours && nd6_prproxy) {
+                       /*
+                        * If this isn't for us, this might be a Neighbor
+                        * Solicitation (dst is solicited-node multicast)
+                        * against an address in one of the proxied prefixes;
+                        * if so, claim the packet and let icmp6_input()
+                        * handle the rest.
+                        */
+                       ours = nd6_prproxy_isours(m, ip6, NULL, IFSCOPE_NONE);
+                       VERIFY(!ours ||
+                           (m->m_pkthdr.aux_flags & MAUXF_PROXY_DST));
+               }
                if (!ours)
                        goto bad;
        } else if (!ours) {
-               ip6_forward(m, &ip6_forward_rt, 0);
-               goto done;
-       }       
+               /*
+                * The unicast forwarding function might return the packet
+                * if we are proxying prefix(es), and if the packet is an
+                * ICMPv6 packet that has failed the zone checks, but is
+                * targetted towards a proxied address (this is optimized by
+                * way of RTF_PROXY test.)  If so, claim the packet as ours
+                * and let icmp6_input() handle the rest.  The packet's hop
+                * limit value is kept intact (it's not decremented).  This
+                * is for supporting Neighbor Unreachability Detection between
+                * proxied nodes on different links (src is link-local, dst
+                * is target address.)
+                */
+               if ((m = ip6_forward(m, &ip6_forward_rt, 0)) == NULL)
+                       goto done;
+               VERIFY(ip6_forward_rt.ro_rt != NULL);
+               VERIFY(m->m_pkthdr.aux_flags & MAUXF_PROXY_DST);
+               deliverifp = ip6_forward_rt.ro_rt->rt_ifp;
+               ours = 1;
+       }
 
        ip6 = mtod(m, struct ip6_hdr *);
 
@@ -1041,6 +1105,13 @@ ip6_input(struct mbuf *m)
 injectit:
        nest = 0;
 
+       /*
+        * Perform IP header alignment fixup again, if needed.  Note that
+        * we do it once for the outermost protocol, and we assume each
+        * protocol handler wouldn't mess with the alignment afterwards.
+        */
+       IP6_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return;);
+
        while (nxt != IPPROTO_DONE) {
                struct ipfilter *filter;
                int (*pr_input)(struct mbuf **, int *, int);
@@ -1067,7 +1138,8 @@ injectit:
                 * note that we do not visit this with protocols with pcb layer
                 * code - like udp/tcp/raw ip.
                 */
-               if ((ipsec_bypass == 0) && (ip6_protox[nxt]->pr_flags & PR_LASTHDR) != 0) {
+               if ((ipsec_bypass == 0) &&
+                   (ip6_protox[nxt]->pr_flags & PR_LASTHDR) != 0) {
                        if (ipsec6_in_reject(m, NULL)) {
                                IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio);
                                goto bad;
@@ -1082,13 +1154,15 @@ injectit:
                        ipf_ref();
                        TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
                                if (seen == 0) {
-                                       if ((struct ipfilter *)inject_ipfref == filter)
+                                       if ((struct ipfilter *)inject_ipfref ==
+                                           filter)
                                                seen = 1;
                                } else if (filter->ipf_filter.ipf_input) {
                                        errno_t result;
-                                       
+
                                        result = filter->ipf_filter.ipf_input(
-                                               filter->ipf_filter.cookie, (mbuf_t*)&m, off, nxt);
+                                               filter->ipf_filter.cookie,
+                                               (mbuf_t *)&m, off, nxt);
                                        if (result == EJUSTRETURN) {
                                                ipf_unref();
                                                goto done;
@@ -1431,12 +1505,12 @@ ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
                                return NULL;
         }
        if ((inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) != 0) {
-               int tc = m->m_pkthdr.prio;
-               
+               int tc = m_get_traffic_class(m);
+
                mp = sbcreatecontrol_mbuf((caddr_t) &tc, sizeof(tc),
                        SO_TRAFFIC_CLASS, SOL_SOCKET, mp);
-               if (*mp == NULL) 
-                       return NULL;
+               if (*mp == NULL)
+                       return (NULL);
        }
 
        if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
@@ -1716,7 +1790,7 @@ ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
        bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */
        mtuctl.ip6m_mtu = *mtu;
        mtuctl.ip6m_addr = *dst;
-       if (sa6_recoverscope(&mtuctl.ip6m_addr))
+       if (sa6_recoverscope(&mtuctl.ip6m_addr, TRUE))
                return;
 
        if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
@@ -1950,13 +2024,13 @@ ip6_lasthdr(m, off, proto, nxtp)
 }
 
 struct ip6aux *
-ip6_addaux(
-       struct mbuf *m)
+ip6_addaux(struct mbuf *m)
 {
        struct m_tag            *tag;
-       
+
        /* Check if one is already allocated */
-       tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6, NULL);
+       tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
+           KERNEL_TAG_TYPE_INET6, NULL);
        if (tag == NULL) {
                /* Allocate a tag */
                tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6,
@@ -1967,28 +2041,28 @@ ip6_addaux(
                        m_tag_prepend(m, tag);
                }
        }
-       
-       return tag ? (struct ip6aux*)(tag + 1) : NULL;
+
+       return (tag ? (struct ip6aux *)(tag + 1) : NULL);
 }
 
 struct ip6aux *
-ip6_findaux(
-       struct mbuf *m)
+ip6_findaux(struct mbuf *m)
 {
        struct m_tag    *tag;
-       
-       tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6, NULL);
-       
-       return tag ? (struct ip6aux*)(tag + 1) : NULL;
+
+       tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
+           KERNEL_TAG_TYPE_INET6, NULL);
+
+       return (tag ? (struct ip6aux *)(tag + 1) : NULL);
 }
 
 void
-ip6_delaux(
-       struct mbuf *m)
+ip6_delaux(struct mbuf *m)
 {
        struct m_tag    *tag;
 
-       tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6, NULL);
+       tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
+           KERNEL_TAG_TYPE_INET6, NULL);
        if (tag) {
                m_tag_delete(m, tag);
        }
index 39f14628426d8d84842995a02e51a2a55a31c077..f6504807beb3b8e3143e12b0eb93709d9e43b505 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -358,12 +358,17 @@ mrt6_ioctl(u_long cmd, caddr_t data)
        int error = 0;
 
        switch (cmd) {
-       case SIOCGETSGCNT_IN6:
-               return (get_sg_cnt((struct sioc_sg_req6 *)data));
-               /* NOTREACHED */
+       case SIOCGETSGCNT_IN6: {        /* struct sioc_sg_req6 */
+               struct sioc_sg_req6 req;
+
+               bcopy(data, &req, sizeof (req));
+               error = get_sg_cnt(&reg);
+               bcopy(&req, data, sizeof (req));
+               break;
+       }
 
-       case SIOCGETMIFCNT_IN6_32:
-       case SIOCGETMIFCNT_IN6_64:
+       case SIOCGETMIFCNT_IN6_32:      /* struct sioc_mif_req6_32 */
+       case SIOCGETMIFCNT_IN6_64:      /* struct sioc_mif_req6_64 */
                return (get_mif6_cnt(data, cmd == SIOCGETMIFCNT_IN6_64));
                /* NOTREACHED */
 
@@ -405,28 +410,36 @@ get_mif6_cnt(void *data, int p64)
 {
        if (p64) {
                struct sioc_mif_req6_64 *req = data;
+               mifi_t mifi;
 
-               mifi_t mifi = req->mifi;
-
+               bcopy(&req->mifi, &mifi, sizeof (mifi));
                if (mifi >= nummifs)
                        return (EINVAL);
 
-               req->icount = mif6table[mifi].m6_pkt_in;
-               req->ocount = mif6table[mifi].m6_pkt_out;
-               req->ibytes = mif6table[mifi].m6_bytes_in;
-               req->obytes = mif6table[mifi].m6_bytes_out;
+               bcopy(&mif6table[mifi].m6_pkt_in, &req->icount,
+                   sizeof (req->icount));
+               bcopy(&mif6table[mifi].m6_pkt_out, &req->ocount,
+                   sizeof (req->ocount));
+               bcopy(&mif6table[mifi].m6_bytes_in, &req->ibytes,
+                   sizeof (req->ibytes));
+               bcopy(&mif6table[mifi].m6_bytes_out, &req->obytes,
+                   sizeof (req->obytes));
        } else {
                struct sioc_mif_req6_32 *req = data;
+               mifi_t mifi;
 
-               mifi_t mifi = req->mifi;
-
+               bcopy(&req->mifi, &mifi, sizeof (mifi));
                if (mifi >= nummifs)
                        return (EINVAL);
 
-               req->icount = mif6table[mifi].m6_pkt_in;
-               req->ocount = mif6table[mifi].m6_pkt_out;
-               req->ibytes = mif6table[mifi].m6_bytes_in;
-               req->obytes = mif6table[mifi].m6_bytes_out;
+               bcopy(&mif6table[mifi].m6_pkt_in, &req->icount,
+                   sizeof (req->icount));
+               bcopy(&mif6table[mifi].m6_pkt_out, &req->ocount,
+                   sizeof (req->ocount));
+               bcopy(&mif6table[mifi].m6_bytes_in, &req->ibytes,
+                   sizeof (req->ibytes));
+               bcopy(&mif6table[mifi].m6_bytes_out, &req->obytes,
+                   sizeof (req->obytes));
        }
        return (0);
 }
@@ -1547,7 +1560,7 @@ phyint_send(ip6, mifp, m)
                mb_copy->m_pkthdr.csum_flags = 0;
 
                error = dlil_output(ifp, PF_INET6, mb_copy,
-                               NULL, (struct sockaddr *)&ro.ro_dst, 0);
+                               NULL, (struct sockaddr *)&ro.ro_dst, 0, NULL);
 #else
                error = (*ifp->if_output)(ifp, mb_copy,
                                          (struct sockaddr *)&ro.ro_dst,
@@ -1676,6 +1689,9 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
 
        ++pim6stat.pim6s_rcv_total;
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip6 = mtod(m, struct ip6_hdr *);
        pimlen = m->m_pkthdr.len - *offp;
 
@@ -1876,7 +1892,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
 #ifdef __APPLE__
 
                 if (lo_ifp) {
-                    dlil_output(lo_ifp, PF_INET6, m, 0, (struct sockaddr *)&dst, 0);
+                    dlil_output(lo_ifp, PF_INET6, m, 0, (struct sockaddr *)&dst, 0, NULL);
                }
                 else {
                     printf("Warning: pim6_input call to dlil_find_dltag failed!\n");
index 9b58e7ad7bf396999652e6c1170b3ab43adb1fd2..d96db58f7c632bbc3b81ec8ec851dbb713269f4e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -148,6 +148,11 @@ extern int ipsec_bypass;
 
 #include <netinet6/ip6_fw.h>
 
+#if DUMMYNET
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#endif /* DUMMYNET */
+
 #include <net/net_osdep.h>
 
 #include <netinet/kpi_ipfilter_var.h>
@@ -160,14 +165,6 @@ extern int ipsec_bypass;
 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
 #endif
 
-struct ip6_exthdrs {
-       struct mbuf *ip6e_ip6;
-       struct mbuf *ip6e_hbh;
-       struct mbuf *ip6e_dest1;
-       struct mbuf *ip6e_rthdr;
-       struct mbuf *ip6e_dest2;
-};
-
 int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt);
 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
                            struct socket *, struct sockopt *sopt);
@@ -216,6 +213,49 @@ static struct zone *im6o_zone;             /* zone for ip6_moptions */
 #define        IM6O_ZONE_MAX           64              /* maximum elements in zone */
 #define        IM6O_ZONE_NAME          "ip6_moptions"  /* zone name */
 
+SYSCTL_DECL(_net_inet6_ip6);
+
+static int     ip6_maxchainsent = 0;
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip6_maxchainsent, 0, "use dlil_output_list");
+
+/*
+ * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
+ * walks through the packet chain and sends each mbuf separately.
+ */
+int
+ip6_output_list(
+       struct mbuf *m0,
+       int packetlist,
+       struct ip6_pktopts *opt,
+       struct route_in6 *ro,
+       int flags,
+       struct ip6_moptions *im6o,
+       struct ifnet **ifpp,    /* XXX: just for statistics */
+       struct ip6_out_args *ip6oap)
+{
+#pragma unused(packetlist)
+       struct mbuf *m = m0, *nextpkt;
+       int error = 0;
+
+       while (m) {
+               /*
+                * Break the chain before calling ip6_output() and free the
+                * mbufs if there was an error.
+                */
+               nextpkt = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+               error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oap);
+               if (error) {
+                       if (nextpkt)
+                               m_freem_list(nextpkt);
+                       return (error);
+               }
+               m = nextpkt;
+       }
+
+       return (error);
+}
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
@@ -236,7 +276,7 @@ ip6_output(
        int flags,
        struct ip6_moptions *im6o,
        struct ifnet **ifpp,    /* XXX: just for statistics */
-       struct ip6_out_args *ip6oa)
+       struct ip6_out_args *ip6oap)
 {
        struct ip6_hdr *ip6, *mhip6;
        struct ifnet *ifp = NULL, *origifp = NULL;
@@ -258,11 +298,26 @@ ip6_output(
        int needipsec = 0;
        ipfilter_t inject_filter_ref;
        int tso;
-       unsigned int ifscope;
-       unsigned int nocell;
        boolean_t select_srcif;
        struct ipf_pktopts *ippo = NULL, ipf_pktopts;
+       struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0 };
+       struct flowadv *adv = NULL;
        u_int32_t ifmtu;
+#if DUMMYNET
+       struct m_tag *tag;
+       struct route_in6 saved_route;
+       struct route_in6 saved_ro_pmtu;
+       struct ip_fw_args args;
+       struct sockaddr_in6 dst_buf;
+
+       bzero(&args, sizeof(struct ip_fw_args));
+#endif /* DUMMYNET */
+
+       if ((flags & IPV6_OUTARGS) && ip6oap != NULL) {
+               ip6oa = *ip6oap;
+               adv = &ip6oap->ip6oa_flowadv;
+               adv->code = FADV_SUCCESS;
+       }
 
 #if IPSEC
        int needipsectun = 0;
@@ -272,7 +327,7 @@ ip6_output(
        struct ipsec_output_state ipsec_state;
 
        bzero(&ipsec_state, sizeof(ipsec_state));
-               
+
        /* for AH processing. stupid to have "socket" variable in IP layer... */
        if (ipsec_bypass == 0)
        {
@@ -286,26 +341,75 @@ ip6_output(
 
        ip6 = mtod(m, struct ip6_hdr *);
        inject_filter_ref = ipf_get_inject_filter(m);
-       
+
+       /* Grab info from mtags prepended to the chain */
+#if DUMMYNET
+       if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
+           KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
+               struct dn_pkt_tag       *dn_tag;
+
+               dn_tag = (struct dn_pkt_tag *)(tag+1);
+               args.fwa_pf_rule = dn_tag->dn_pf_rule;
+
+               bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf));
+               dst = &dst_buf;
+               ifp = dn_tag->dn_ifp;
+               if (ifp)
+                       ifnet_reference(ifp);
+               flags = dn_tag->dn_flags;
+               if (dn_tag->dn_flags & IPV6_OUTARGS)
+                       ip6oa = dn_tag->dn_ip6oa;
+
+               saved_route = dn_tag->dn_ro6;
+               ro = &saved_route;
+               saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
+               ro_pmtu = &saved_ro_pmtu;
+               origifp = dn_tag->dn_origifp;
+               if (origifp)
+                       ifnet_reference(origifp);
+               mtu = dn_tag->dn_mtu;
+               alwaysfrag = dn_tag->dn_alwaysfrag;
+               unfragpartlen = dn_tag->dn_unfragpartlen;
+
+               bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs));
+
+               m_tag_delete(m0, tag);
+       }
+#endif /* DUMMYNET */
+
        finaldst = ip6->ip6_dst;
 
        if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
-               select_srcif = !(flags & (IPV6_FORWARDING | IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL));
-               ifscope = ip6oa->ip6oa_boundif;
-               ipf_pktopts.ippo_flags = IPPOF_BOUND_IF;
-               ipf_pktopts.ippo_flags |= (ifscope << IPPOF_SHIFT_IFSCOPE);
+               if ((select_srcif = (!(flags & (IPV6_FORWARDING |
+                   IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
+                   (ip6oa.ip6oa_flags & IP6OAF_SELECT_SRCIF))))
+                       ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
+
+               if ((ip6oa.ip6oa_flags & IP6OAF_BOUND_IF) &&
+                   ip6oa.ip6oa_boundif != IFSCOPE_NONE) {
+                       ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
+                           (ip6oa.ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
+               }
+
+               if (ip6oa.ip6oa_flags & IP6OAF_BOUND_SRCADDR)
+                       ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
        } else {
                select_srcif = FALSE;
-               ifscope = IFSCOPE_NONE;
+               ip6oa.ip6oa_boundif = IFSCOPE_NONE;
+               ip6oa.ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_IF |
+                   IP6OAF_BOUND_SRCADDR);
        }
 
-       if (flags & IPV6_OUTARGS) {
-               nocell = ip6oa->ip6oa_nocell;
-               if (nocell)
-                       ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
-       } else {
-               nocell = 0;
+       if ((flags & IPV6_OUTARGS) && (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR))
+               ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
+
+#if DUMMYNET
+       if (args.fwa_pf_rule) {
+               ip6 = mtod(m, struct ip6_hdr *);
+
+               goto check_with_pf;
        }
+#endif /* DUMMYNET */
 
 #define MAKE_EXTHDR(hp, mp)                                            \
     do {                                                               \
@@ -317,9 +421,9 @@ ip6_output(
                        goto freehdrs;                                  \
        }                                                               \
     } while (0)
-       
+
        bzero(&exthdrs, sizeof(exthdrs));
-       
+
        if (opt) {
                /* Hop-by-Hop options header */
                MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
@@ -346,7 +450,7 @@ ip6_output(
 #if IPSEC
        if (ipsec_bypass != 0)
                goto skip_ipsec;
-       
+
        /* get a security policy for this packet */
        if (so == NULL)
                sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
@@ -375,7 +479,7 @@ ip6_output(
                /* no need to do IPsec. */
                needipsec = 0;
                break;
-       
+
        case IPSEC_POLICY_IPSEC:
                if (sp->req == NULL) {
                        /* acquire a policy */
@@ -542,7 +646,7 @@ ip6_output(
                                                        seen = 1;
                                        } else if (filter->ipf_filter.ipf_output) {
                                                errno_t result;
-                                               
+
                                                result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
                                                if (result == EJUSTRETURN) {
                                                        ipf_unref();
@@ -631,7 +735,7 @@ skip_ipsec2:
                switch (rh->ip6r_type) {
                case IPV6_RTHDR_TYPE_0:
                         rh0 = (struct ip6_rthdr0 *)rh;
-                        addr = (struct in6_addr *)(rh0 + 1);
+                        addr = (struct in6_addr *)(void *)(rh0 + 1);
 
                         /*
                          * construct a sockaddr_in6 form of
@@ -743,12 +847,11 @@ skip_ipsec2:
                dst->sin6_len = sizeof(struct sockaddr_in6);
                dst->sin6_addr = ip6->ip6_dst;
        }
-
 #if IPSEC
        if (needipsec && needipsectun) {
 #if CONFIG_DTRACE
                struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
-#endif /* CONFIG_DTRACE */
+#endif
                /*
                 * All the extension headers will become inaccessible
                 * (since they can be encrypted).
@@ -775,7 +878,7 @@ skip_ipsec2:
                m = ipsec_state.m;
                ipsec_saved_route = ro;
                ro = (struct route_in6 *)&ipsec_state.ro;
-               dst = (struct sockaddr_in6 *)ipsec_state.dst;
+               dst = (struct sockaddr_in6 *)(void *)ipsec_state.dst;
                if (error) {
                        /* mbuf is already reclaimed in ipsec6_output_tunnel. */
                        m0 = m = NULL;
@@ -797,11 +900,12 @@ skip_ipsec2:
                        }
                        goto bad;
                }
-               /* 
+               /*
                 * The packet has been encapsulated so the ifscope is no longer valid
                 * since it does not apply to the outer address: ignore the ifscope.
                 */
-               ifscope = IFSCOPE_NONE;
+               ip6oa.ip6oa_boundif = IFSCOPE_NONE;
+               ip6oa.ip6oa_flags &= ~IP6OAF_BOUND_IF;
                if (opt != NULL && opt->ip6po_pktinfo != NULL) {
                        if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
                                opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
@@ -830,8 +934,12 @@ skip_ipsec2:
        dst_sa.sin6_len = sizeof(dst_sa);
        dst_sa.sin6_addr = ip6->ip6_dst;
 
+       /*
+        * in6_selectroute() might return an ifp with its reference held
+        * even in the error case, so make sure to release its reference.
+        */
        if ((error = in6_selectroute(select_srcif ? &src_sa : NULL,
-           &dst_sa, opt, im6o, ro, &ifp, &rt, 0, ifscope, nocell)) != 0) {
+           &dst_sa, opt, im6o, ro, &ifp, &rt, 0, &ip6oa)) != 0) {
                switch (error) {
                case EHOSTUNREACH:
                        ip6stat.ip6s_noroute++;
@@ -842,6 +950,7 @@ skip_ipsec2:
                }
                if (ifp != NULL)
                        in6_ifstat_inc(ifp, ifs6_out_discard);
+               /* ifp (if non-NULL) will be released at the end */
                goto bad;
        }
        if (rt == NULL) {
@@ -874,13 +983,13 @@ skip_ipsec2:
         * case of sending packets to an address of our own.
         */
        if (ia != NULL && ia->ia_ifp) {
-               ifnet_reference(ia->ia_ifp);
+               ifnet_reference(ia->ia_ifp);    /* for origifp */
                if (origifp != NULL)
                        ifnet_release(origifp);
                origifp = ia->ia_ifp;
        } else {
                if (ifp != NULL)
-                       ifnet_reference(ifp);
+                       ifnet_reference(ifp);   /* for origifp */
                if (origifp != NULL)
                        ifnet_release(origifp);
                origifp = ifp;
@@ -892,7 +1001,7 @@ skip_ipsec2:
        src_sa.sin6_family = AF_INET6;
        src_sa.sin6_len = sizeof(src_sa);
        src_sa.sin6_addr = ip6->ip6_src;
-       if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
+       if (sa6_recoverscope(&src_sa, TRUE) || zone != src_sa.sin6_scope_id)
                goto badscope;
 
        dst0 = ip6->ip6_dst;
@@ -903,7 +1012,7 @@ skip_ipsec2:
        dst_sa.sin6_family = AF_INET6;
        dst_sa.sin6_len = sizeof(dst_sa);
        dst_sa.sin6_addr = ip6->ip6_dst;
-       if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
+       if (sa6_recoverscope(&dst_sa, TRUE) || zone != dst_sa.sin6_scope_id) {
                goto badscope;
        }
 
@@ -925,10 +1034,10 @@ skip_ipsec2:
                         * application.  We assume the next hop is an IPv6
                         * address.
                         */
-                       dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
+                       dst = (struct sockaddr_in6 *)(void *)opt->ip6po_nexthop;
                }
                else if ((rt->rt_flags & RTF_GATEWAY))
-                       dst = (struct sockaddr_in6 *)rt->rt_gateway;
+                       dst = (struct sockaddr_in6 *)(void *)rt->rt_gateway;
        }
 
        if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
@@ -1113,12 +1222,40 @@ skip_ipsec2:
                m->m_pkthdr.rcvif = NULL;
        }
 
+#if DUMMYNET
+check_with_pf:
+#endif
 #if PF
        if (PF_IS_ENABLED) {
+#if DUMMYNET
+               /*
+                * TBD: Need to save opt->ip6po_flags for reinjection rdar://10434993
+                */
+               args.fwa_m = m;
+               args.fwa_oif = ifp;
+               args.fwa_oflags = flags;
+               if ((flags & IPV6_OUTARGS))
+                       args.fwa_ip6oa = &ip6oa;
+               args.fwa_ro6 = ro;
+               args.fwa_dst6 = dst;
+               args.fwa_ro6_pmtu = ro_pmtu;
+               args.fwa_origifp = origifp;
+               args.fwa_mtu = mtu;
+               args.fwa_alwaysfrag = alwaysfrag;
+               args.fwa_unfragpartlen = unfragpartlen;
+               args.fwa_exthdrs = &exthdrs;
                /* Invoke outbound packet filter */
-               error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE);
+               error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
+#else
+               error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
+#endif /* DUMMYNET */
 
-               if (error) {
+               if (error != 0 || m == NULL) {
+                       /*
+                        * Note that if we ever handle packet chain, we will
+                        * have to restore the linkage from the previous 
+                        * packet to the next like in ip_outout_list()
+                        */
                        if (m != NULL) {
                                panic("%s: unexpected packet %p\n", __func__, m);
                                /* NOTREACHED */
@@ -1162,6 +1299,7 @@ skip_ipsec2:
        }
 
        lck_rw_lock_shared(nd_if_rwlock);
+       /* Access without acquiring nd_ifinfo lock for performance */
        ifmtu = IN6_LINKMTU(ifp);
        lck_rw_done(nd_if_rwlock);
 
@@ -1214,7 +1352,7 @@ skip_ipsec2:
                }
                if (ro->ro_rt)
                        RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
-               error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
+               error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
                goto done;
        }
 
@@ -1321,6 +1459,10 @@ skip_ipsec2:
                        m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
                        m->m_pkthdr.rcvif = 0;
                        m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
+
+                       M_COPY_PFTAG(m, m0);
+                       m_set_service_class(m, m0->m_pkthdr.svc);
+
 #ifdef __darwin8_notyet
 #if CONFIG_MACF_NET
                        mac_create_fragment(m0, m);
@@ -1358,7 +1500,8 @@ sendorfree:
                        /* clean ipsec history once it goes out of the node */
                        ipsec_delaux(m);
 #endif
-                       error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
+                       error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
+                           adv);
 
                } else
                        m_freem(m);
@@ -1452,8 +1595,10 @@ in6_delayed_cksum(struct mbuf *m, uint16_t offset)
        offset += (m->m_pkthdr.csum_data & 0xffff);
        if ((offset + sizeof(csum)) > m->m_len) {
                m_copyback(m, offset, sizeof(csum), &csum);
+       } else if (IP6_HDR_ALIGNED_P(mtod(m, char *))) {
+               *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
        } else {
-               *(uint16_t *)(mtod(m, char *) + offset) = csum;
+               bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
        }
 }
 /*
@@ -1636,6 +1781,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
                u_int32_t ifmtu;
 
                lck_rw_lock_shared(nd_if_rwlock);
+               /* Access without acquiring nd_ifinfo lock for performance */
                ifmtu = IN6_LINKMTU(ifp);
                lck_rw_done(nd_if_rwlock);
 
@@ -1673,6 +1819,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
        } else {
                if (ifp) {
                        lck_rw_lock_shared(nd_if_rwlock);
+                       /* Don't hold nd_ifinfo lock for performance */
                        mtu = IN6_LINKMTU(ifp);
                        lck_rw_done(nd_if_rwlock);
                } else
@@ -1757,7 +1904,6 @@ ip6_ctloutput(so, sopt)
                                /* FALLTHROUGH */
                        case IPV6_UNICAST_HOPS:
                        case IPV6_HOPLIMIT:
-                       case IPV6_FAITH:
 
                        case IPV6_RECVPKTINFO:
                        case IPV6_RECVHOPLIMIT:
@@ -1874,10 +2020,6 @@ do { \
                                        OPTSET(IN6P_RTHDR);
                                        break;
 
-                               case IPV6_FAITH:
-                                       OPTSET(INP_FAITH);
-                                       break;
-
                                case IPV6_RECVPATHMTU:
                                        /*
                                         * We ignore this option for TCP
@@ -1985,6 +2127,7 @@ do { \
                        case IPV6_RTHDRDSTOPTS:
                        case IPV6_3542NEXTHOP:
                        {
+                               struct ip6_pktopts **optp;
                                /* new advanced API (RFC3542) */
                                struct mbuf *m;
 
@@ -2001,8 +2144,9 @@ do { \
                                        m_freem(m);
                                        break;
                                }
+                               optp = &in6p->in6p_outputopts;
                                error = ip6_pcbopt(optname, mtod(m, u_char *), 
-                                       m->m_len, &in6p->in6p_outputopts, uproto);
+                                       m->m_len, optp, uproto);
                                m_freem(m);
                                break;
                        }
@@ -2106,7 +2250,7 @@ do { \
                                if (error)
                                        break;
 
-                               inp_bindif(in6p, optval);
+                               error = inp_bindif(in6p, optval);
                                break;
 
                        case IPV6_NO_IFT_CELLULAR:
@@ -2161,7 +2305,6 @@ do { \
                        case IPV6_RECVRTHDR:
                        case IPV6_RECVPATHMTU:
 
-                       case IPV6_FAITH:
                        case IPV6_V6ONLY:
                        case IPV6_PORTRANGE:
                        case IPV6_RECVTCLASS:
@@ -2200,10 +2343,6 @@ do { \
                                        optval = OPTBIT(IN6P_MTU);
                                        break;
 
-                               case IPV6_FAITH:
-                                       optval = OPTBIT(INP_FAITH);
-                                       break;
-
                                case IPV6_V6ONLY:
                                        optval = OPTBIT(IN6P_IPV6_V6ONLY);
                                        break;
@@ -2355,7 +2494,7 @@ do { \
 
                        case IPV6_BOUND_IF:
                                if (in6p->inp_flags & INP_BOUND_IF)
-                                       optval = in6p->inp_boundif;
+                                       optval = in6p->inp_boundifp->if_index;
                                error = sooptcopyout(sopt, &optval,
                                    sizeof (optval));
                                break;
@@ -2368,7 +2507,8 @@ do { \
                                break;
 
                        case IPV6_OUT_IF:
-                               optval = in6p->in6p_last_outif;
+                               optval = (in6p->in6p_last_outifp != NULL) ?
+                                   in6p->in6p_last_outifp->if_index : 0;
                                error = sooptcopyout(sopt, &optval,
                                    sizeof (optval));
                                break;
@@ -2522,7 +2662,8 @@ ip6_initpktopts(struct ip6_pktopts *opt)
 }
 
 static int
-ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto)
+ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
+    int uproto)
 {
        struct ip6_pktopts *opt;
 
@@ -2550,6 +2691,7 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
        int defminmtu = IP6PO_MINMTU_MCASTONLY;
        int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
+
        switch (optname) {
        case IPV6_PKTINFO:
                if (pktopt && pktopt->ip6po_pktinfo)
@@ -2637,9 +2779,7 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 }
 
 void
-ip6_clearpktopts(pktopt, optname)
-       struct ip6_pktopts *pktopt;
-       int optname;
+ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 {
        if (pktopt == NULL)
                return;
@@ -2704,7 +2844,7 @@ static int
 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
        if (dst == NULL || src == NULL)  {
-               printf("ip6_clearpktopts: invalid argument\n");
+               printf("copypktopts: invalid argument\n");
                return (EINVAL);
        }
 
@@ -2758,8 +2898,7 @@ ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 }
 
 void
-ip6_freepcbopts(pktopt)
-       struct ip6_pktopts *pktopt;
+ip6_freepcbopts(struct ip6_pktopts *pktopt)
 {
        if (pktopt == NULL)
                return;
@@ -2982,6 +3121,12 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                return (EINVAL);
        }
 
+       /*
+        * Caller must have ensured that the buffer is at least
+        * aligned on 32-bit boundary.
+        */
+       VERIFY(IS_P2ALIGNED(buf, sizeof (u_int32_t)));
+
        /*
         * IPV6_2292xxx is for backward compatibility to RFC2292, and should
         * not be specified in the context of RFC3542.  Conversely,
@@ -3026,7 +3171,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                if (len != sizeof(struct in6_pktinfo))
                        return (EINVAL);
 
-               pktinfo = (struct in6_pktinfo *)buf;
+               pktinfo = (struct in6_pktinfo *)(void *)buf;
 
                /*
                 * An application can clear any sticky IPV6_PKTINFO option by
@@ -3098,7 +3243,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
 
                if (len != sizeof(int))
                        return (EINVAL);
-               hlimp = (int *)buf;
+               hlimp = (int *)(void *)buf;
                if (*hlimp < -1 || *hlimp > 255)
                        return (EINVAL);
 
@@ -3112,7 +3257,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
 
                if (len != sizeof(int))
                        return (EINVAL);
-               tclass = *(int *)buf;
+               tclass = *(int *)(void *)buf;
                if (tclass < -1 || tclass > 255)
                        return (EINVAL);
 
@@ -3138,7 +3283,8 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                switch (((struct sockaddr *)buf)->sa_family) {
                case AF_INET6:
                {
-                       struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
+                       struct sockaddr_in6 *sa6 =
+                           (struct sockaddr_in6 *)(void *)buf;
 
                        if (sa6->sin6_len != sizeof(struct sockaddr_in6))
                                return (EINVAL);
@@ -3189,7 +3335,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                /* message length validation */
                if (len < sizeof(struct ip6_hbh))
                        return (EINVAL);
-               hbh = (struct ip6_hbh *)buf;
+               hbh = (struct ip6_hbh *)(void *)buf;
                hbhlen = (hbh->ip6h_len + 1) << 3;
                if (len != hbhlen)
                        return (EINVAL);
@@ -3223,7 +3369,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                /* message length validation */
                if (len < sizeof(struct ip6_dest))
                        return (EINVAL);
-               dest = (struct ip6_dest *)buf;
+               dest = (struct ip6_dest *)(void *)buf;
                destlen = (dest->ip6d_len + 1) << 3;
                if (len != destlen)
                        return (EINVAL);
@@ -3283,7 +3429,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                /* message length validation */
                if (len < sizeof(struct ip6_rthdr))
                        return (EINVAL);
-               rth = (struct ip6_rthdr *)buf;
+               rth = (struct ip6_rthdr *)(void *)buf;
                rthlen = (rth->ip6r_len + 1) << 3;
                if (len != rthlen)
                        return (EINVAL);
@@ -3314,7 +3460,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
        case IPV6_USE_MIN_MTU:
                if (len != sizeof(int))
                        return (EINVAL);
-               minmtupolicy = *(int *)buf;
+               minmtupolicy = *(int *)(void *)buf;
                if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
                    minmtupolicy != IP6PO_MINMTU_DISABLE &&
                    minmtupolicy != IP6PO_MINMTU_ALL) {
@@ -3327,7 +3473,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                if (len != sizeof(int))
                        return (EINVAL);
 
-               if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
+               if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
                        /*
                         * we ignore this option for TCP sockets.
                         * (RFC3542 leaves this case unspecified.)
@@ -3340,7 +3486,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
        case IPV6_PREFER_TEMPADDR:
                if (len != sizeof(int))
                        return (EINVAL);
-               preftemp = *(int *)buf;
+               preftemp = *(int *)(void *)buf;
                if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
                    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
                    preftemp != IP6PO_TEMPADDR_PREFER) {
@@ -3416,7 +3562,8 @@ ip6_mloopback(
 
        if (lo_ifp) {
                copym->m_pkthdr.rcvif = ifp;
-               dlil_output(lo_ifp, PF_INET6, copym, 0, (struct sockaddr *)dst, 0);
+               dlil_output(lo_ifp, PF_INET6, copym, 0, 
+                   (struct sockaddr *)dst, 0, NULL);
        } else
                m_free(copym);
 #else
index acb9c3857309956929105aafc644db74da801f4e..9507c904b6ee5813592b27aac4dfd271a3a3543e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -175,6 +175,14 @@ struct     ip6_moptions {
 #define        IM6O_REMREF(_im6o)                                              \
        im6o_remref(_im6o)
 
+struct ip6_exthdrs {
+       struct mbuf *ip6e_ip6;
+       struct mbuf *ip6e_hbh;
+       struct mbuf *ip6e_dest1;
+       struct mbuf *ip6e_rthdr;
+       struct mbuf *ip6e_dest2;
+};
+
 /*
  * Control options for outgoing packets
  */
@@ -348,18 +356,39 @@ struct ip6aux {
 #define        IPV6_FLAG_NOSRCIFSEL    0x80    /* bypas source address selection */
 #define        IPV6_OUTARGS            0x100   /* has ancillary output info */
 
-#ifdef __NO_STRICT_ALIGNMENT
-#define IP6_HDR_ALIGNED_P(ip)  1
-#else
-#define IP6_HDR_ALIGNED_P(ip)  ((((intptr_t) (ip)) & 3) == 0)
-#endif
+#ifdef XNU_KERNEL_PRIVATE
+#define IP6_HDR_ALIGNED_P(_ip6)        ((((uintptr_t)(_ip6)) & ((uintptr_t)3)) == 0)
+
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), this macro checks whether the pointer to the IP header
+ * is 32-bit aligned, and assert otherwise.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define        IP6_HDR_STRICT_ALIGNMENT_CHECK(_ip6) do { } while (0)
+#else /* !__i386__ && !__x86_64__ */
+#define        IP6_HDR_STRICT_ALIGNMENT_CHECK(_ip6) do {                       \
+       if (!IP_HDR_ALIGNED_P(_ip6)) {                                  \
+               panic_plain("\n%s: Unaligned IPv6 header %p\n",         \
+                   __func__, _ip6);                                    \
+       }                                                               \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
+#endif /* XNU_KERNEL_PRIVATE */
+
+#include <net/flowadv.h>
 
 /*
- * Extra information passed to ip6_output when IP6_OUTARGS is set.
+ * Extra information passed to ip6_output when IPV6_OUTARGS is set.
  */
 struct ip6_out_args {
        unsigned int    ip6oa_boundif;  /* bound outgoing interface */
-       unsigned int    ip6oa_nocell;   /* don't use IFT_CELLULAR */
+       struct flowadv  ip6oa_flowadv;  /* flow advisory code */
+       u_int32_t       ip6oa_flags;    /* IP6OAF flags (see below) */
+#define        IP6OAF_SELECT_SRCIF     0x00000001      /* src interface selection */
+#define        IP6OAF_BOUND_IF         0x00000002      /* boundif value is valid */
+#define        IP6OAF_BOUND_SRCADDR    0x00000004      /* bound to src address */
+#define        IP6OAF_NO_CELLULAR      0x00000010      /* skip IFT_CELLULAR */
 };
 
 extern struct  ip6stat ip6stat;        /* statistics */
@@ -388,8 +417,7 @@ extern int  ip6_maxfragpackets; /* Maximum packets in reassembly queue */
 extern int     ip6_maxfrags;   /* Maximum fragments in reassembly queue */
 extern int     ip6_sourcecheck;        /* Verify source interface */
 extern int     ip6_sourcecheck_interval; /* Interval between log messages */
-extern int     ip6_accept_rtadv;       /* Acts as a host not a router */
-extern int     ip6_keepfaith;          /* Firewall Aided Internet Translator */
+extern int     ip6_accept_rtadv;       /* deprecated */
 extern int     ip6_log_interval;
 extern time_t  ip6_log_time;
 extern int     ip6_hdrnestlimit; /* upper limit of # of extension headers */
@@ -415,6 +443,7 @@ extern struct       pr_usrreqs rip6_usrreqs;
 extern struct   pr_usrreqs icmp6_dgram_usrreqs;
 
 extern int     ip6_doscopedroute;
+extern int     ip6_restrictrecvif;
 
 struct sockopt;
 
@@ -428,7 +457,6 @@ int         icmp6_dgram_attach(struct socket *, int , struct proc *);
 
 struct in6_ifaddr;
 void   ip6_init(void);
-void ip6_fin(void);
 void   ip6_input(struct mbuf *);
 struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *);
 void   ip6_freepcbopts(struct ip6_pktopts *);
@@ -454,12 +482,14 @@ int       ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *,
 struct mbuf    **ip6_savecontrol_v4(struct inpcb *, struct mbuf *,
            struct mbuf **, int *);
 int    ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **);
-void   ip6_forward(struct mbuf *, struct route_in6 *, int);
+struct mbuf *ip6_forward(struct mbuf *, struct route_in6 *, int);
 void   ip6_notify_pmtu __P((struct inpcb *, struct sockaddr_in6 *,
                             u_int32_t *));
 void   ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
 int    ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *,
-           int, struct ip6_moptions *, struct ifnet **,
+           int, struct ip6_moptions *, struct ifnet **, struct ip6_out_args *);
+int    ip6_output_list(struct mbuf *, int, struct ip6_pktopts *,
+           struct route_in6 *, int, struct ip6_moptions *, struct ifnet **,
            struct ip6_out_args *);
 int    ip6_ctloutput(struct socket *, struct sockopt *sopt);
 void   ip6_initpktopts(struct ip6_pktopts *);
@@ -488,7 +518,7 @@ extern struct in6_addrpolicy *
        in6_addrsel_lookup_policy(struct sockaddr_in6 *);
 int in6_selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
        struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *,
-       struct ifnet **, struct rtentry **, int, unsigned int, unsigned int);
+       struct ifnet **, struct rtentry **, int, const struct ip6_out_args *);
 int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, int uproto);
 u_int32_t ip6_randomid(void);
 u_int32_t ip6_randomflowlabel(void);
index 3c6a9a43df5d8e9ee3b8fc10b97b98a834b9bf67..c3b33030576d09d633828465d9e72e3a76011b8e 100644 (file)
@@ -39,6 +39,7 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
@@ -96,7 +97,6 @@ ipcomp4_input(struct mbuf *m, int off)
        size_t newlen, olen;
        struct secasvar *sav = NULL;
 
-
        if (m->m_pkthdr.len < off + sizeof(struct ipcomp)) {
                ipseclog((LOG_DEBUG, "IPv4 IPComp input: assumption failed "
                    "(packet too short)\n"));
@@ -113,6 +113,10 @@ ipcomp4_input(struct mbuf *m, int off)
                goto fail;
        }
        ipcomp = mtod(md, struct ipcomp *);
+
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip = mtod(m, struct ip *);
        nxt = ipcomp->comp_nxt;
 #ifdef _IP_VHL
@@ -266,6 +270,10 @@ ipcomp6_input(struct mbuf **mp, int *offp, int proto)
                goto fail;
        }
        ipcomp = mtod(md, struct ipcomp *);
+
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip6 = mtod(m, struct ip6_hdr *);
        nxt = ipcomp->comp_nxt;
 
index 6de4b97dc9689aafe81fc8c385470e0d5f427cba..271dbe1ab495012e042eae5bc1fcd655984e79f3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -66,6 +66,7 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mcache.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
@@ -257,17 +258,7 @@ static int ipsec_set_policy(struct secpolicy **pcb_sp,
 static int ipsec_get_policy(struct secpolicy *pcb_sp, struct mbuf **mp);
 static void vshiftl(unsigned char *, int, int);
 static int ipsec_in_reject(struct secpolicy *, struct mbuf *);
-#if INET
-static struct mbuf *ipsec4_splithdr(struct mbuf *);
-#endif
-#if INET6
-static struct mbuf *ipsec6_splithdr(struct mbuf *);
-#endif
-#if INET
-static int ipsec4_encapsulate(struct mbuf *, struct secasvar *);
-#endif
 #if INET6
-static int ipsec6_encapsulate(struct mbuf *, struct secasvar *);
 static int ipsec64_encapsulate(struct mbuf *, struct secasvar *);
 #endif
 static struct ipsec_tag *ipsec_addaux(struct mbuf *);
@@ -1264,7 +1255,7 @@ ipsec_init_policy(so, pcb_sp)
        bzero(new, sizeof(*new));
 
 #ifdef __APPLE__
-       if (so->so_uid == 0)
+       if (kauth_cred_issuser(so->so_cred))
 #else
        if (so->so_cred != 0 && !suser(so->so_cred->pc_ucred, NULL))
 #endif
@@ -1399,7 +1390,7 @@ ipsec_set_policy(
                return EINVAL;
        if (len < sizeof(*xpl))
                return EINVAL;
-       xpl = (struct sadb_x_policy *)request;
+       xpl = (struct sadb_x_policy *)(void *)request;
 
        KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
                printf("ipsec_set_policy: passed policy\n");
@@ -1467,13 +1458,24 @@ ipsec4_set_policy(inp, optname, request, len, priv)
        struct sadb_x_policy *xpl;
        struct secpolicy **pcb_sp;
        int     error = 0;
+       struct sadb_x_policy xpl_aligned_buf;
+       u_int8_t             *xpl_unaligned;
 
        /* sanity check. */
        if (inp == NULL || request == NULL)
                return EINVAL;
        if (len < sizeof(*xpl))
                return EINVAL;
-       xpl = (struct sadb_x_policy *)request;
+       xpl = (struct sadb_x_policy *)(void *)request;
+
+       /* This is a new mbuf allocated by soopt_getm() */
+       if (IPSEC_IS_P2ALIGNED(xpl)) {
+               xpl_unaligned = NULL;
+       } else {
+               xpl_unaligned = (__typeof__(xpl_unaligned))xpl;
+               memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf));
+               xpl = (__typeof__(xpl))&xpl_aligned_buf;
+       }
 
        if (inp->inp_sp == NULL) {
                error = ipsec_init_policy(inp->inp_socket, &inp->inp_sp);
@@ -1512,6 +1514,8 @@ ipsec4_get_policy(inp, request, len, mp)
        struct sadb_x_policy *xpl;
        struct secpolicy *pcb_sp;
        int     error = 0;
+       struct sadb_x_policy xpl_aligned_buf;
+       u_int8_t *xpl_unaligned;
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
 
@@ -1520,8 +1524,17 @@ ipsec4_get_policy(inp, request, len, mp)
                return EINVAL;
        if (len < sizeof(*xpl))
                return EINVAL;
-       xpl = (struct sadb_x_policy *)request;
-       
+       xpl = (struct sadb_x_policy *)(void *)request;
+
+       /* This is a new mbuf allocated by soopt_getm() */
+       if (IPSEC_IS_P2ALIGNED(xpl)) {
+               xpl_unaligned = NULL;
+       } else {
+               xpl_unaligned = (__typeof__(xpl_unaligned))xpl;
+               memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf));
+               xpl = (__typeof__(xpl))&xpl_aligned_buf;
+       }
+
        if (inp->inp_sp == NULL) {
                error = ipsec_init_policy(inp->inp_socket, &inp->inp_sp);
                if (error)
@@ -1586,14 +1599,25 @@ ipsec6_set_policy(in6p, optname, request, len, priv)
        struct sadb_x_policy *xpl;
        struct secpolicy **pcb_sp;
        int error = 0;
+       struct sadb_x_policy xpl_aligned_buf;
+       u_int8_t *xpl_unaligned;
 
        /* sanity check. */
        if (in6p == NULL || request == NULL)
                return EINVAL;
        if (len < sizeof(*xpl))
                return EINVAL;
-       xpl = (struct sadb_x_policy *)request;
-       
+       xpl = (struct sadb_x_policy *)(void *)request;
+
+       /* This is a new mbuf allocated by soopt_getm() */
+       if (IPSEC_IS_P2ALIGNED(xpl)) {
+               xpl_unaligned = NULL;
+       } else {
+               xpl_unaligned = (__typeof__(xpl_unaligned))xpl;
+               memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf));
+               xpl = (__typeof__(xpl))&xpl_aligned_buf;
+       }
+
        if (in6p->in6p_sp == NULL) {
                error = ipsec_init_policy(in6p->inp_socket, &in6p->in6p_sp);
                if (error)
@@ -1631,14 +1655,25 @@ ipsec6_get_policy(in6p, request, len, mp)
        struct sadb_x_policy *xpl;
        struct secpolicy *pcb_sp;
        int error = 0;
+       struct sadb_x_policy xpl_aligned_buf;
+       u_int8_t *xpl_unaligned;
 
        /* sanity check. */
        if (in6p == NULL || request == NULL || mp == NULL)
                return EINVAL;
        if (len < sizeof(*xpl))
                return EINVAL;
-       xpl = (struct sadb_x_policy *)request;
-       
+       xpl = (struct sadb_x_policy *)(void *)request;
+
+       /* This is a new mbuf allocated by soopt_getm() */
+       if (IPSEC_IS_P2ALIGNED(xpl)) {
+               xpl_unaligned = NULL;
+       } else {
+               xpl_unaligned = (__typeof__(xpl_unaligned))xpl;
+               memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf));
+               xpl = (__typeof__(xpl))&xpl_aligned_buf;
+       }
+
        if (in6p->in6p_sp == NULL) {
                error = ipsec_init_policy(in6p->inp_socket, &in6p->in6p_sp);
                if (error)
@@ -2171,7 +2206,7 @@ ipsec6_hdrsiz(m, dir, in6p)
  * encapsulate for ipsec tunnel.
  * ip->ip_src must be fixed later on.
  */
-static int
+int
 ipsec4_encapsulate(m, sav)
        struct mbuf *m;
        struct secasvar *sav;
@@ -2288,10 +2323,103 @@ ipsec4_encapsulate(m, sav)
 
        return 0;
 }
+
+/*
+ * encapsulate for ipsec tunnel.
+ * ip->ip_src must be fixed later on.
+ */
+int
+ipsec4_encapsulate_utun_esp_keepalive(m_ptr, sav)
+       struct mbuf **m_ptr;
+       struct secasvar *sav;
+{
+       struct ip *ip;
+       size_t plen;
+       struct mbuf *m = *m_ptr;
+
+       /* can't tunnel between different AFs */
+       if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
+               != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family
+        || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) {
+               m_freem(m);
+               *m_ptr = NULL;
+               return EINVAL;
+       }
+
+       plen = m->m_pkthdr.len;
+
+       /*
+        * grow the mbuf to accomodate the new IPv4 header.
+        * NOTE: IPv4 options will never be copied.
+        */
+       {
+               struct mbuf *n;
+               MGETHDR(n, M_DONTWAIT, MT_HEADER);     /* MAC-OK */
+               if (!n) {
+                       m_freem(m);
+                       *m_ptr = NULL;
+                       return ENOBUFS;
+               }
+               if (m->m_flags & M_PKTHDR) {
+                       M_COPY_PKTHDR(n, m);
+                       m->m_flags &= ~M_PKTHDR;
+               }
+               MH_ALIGN(n, sizeof(*ip));
+               n->m_len = sizeof(*ip);
+               n->m_next = m;
+               n->m_pkthdr.len = (plen + n->m_len);
+               m_fixhdr(m);
+               m = n;
+               *m_ptr = m;
+               plen = m->m_pkthdr.len;
+       }
+       ip = mtod(m, __typeof__(ip));
+
+       /* construct new IPv4 header. see RFC 2401 5.1.2.1 */
+       // ip_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &oip->ip_tos);
+#ifdef _IP_VHL
+       ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(*ip) >> 2);
+#else
+       ip->ip_hl = sizeof(*ip) >> 2;
+#endif
+       ip->ip_off &= htons(~IP_OFFMASK);
+       ip->ip_off &= htons(~IP_MF);
+       switch (ip4_ipsec_dfbit) {
+       case 0: /* clear DF bit */
+               ip->ip_off &= htons(~IP_DF);
+               break;
+       case 1: /* set DF bit */
+               ip->ip_off |= htons(IP_DF);
+               break;
+       default:        /* copy DF bit */
+               break;
+       }
+       ip->ip_p = IPPROTO_IPIP;
+       if (plen < IP_MAXPACKET)
+               ip->ip_len = htons(plen);
+       else {
+               ipseclog((LOG_ERR, "IPv4 ipsec: size exceeds limit: "
+                       "leave ip_len as is (invalid packet)\n"));
+       }
+#ifdef RANDOM_IP_ID
+       ip->ip_id = ip_randomid();
+#else
+       ip->ip_id = htons(ip_id++);
+#endif
+       bcopy(&((struct sockaddr_in *)&sav->sah->saidx.src)->sin_addr,
+               &ip->ip_src, sizeof(ip->ip_src));
+       bcopy(&((struct sockaddr_in *)&sav->sah->saidx.dst)->sin_addr,
+               &ip->ip_dst, sizeof(ip->ip_dst));
+       ip->ip_ttl = IPDEFTTL;
+
+       /* XXX Should ip_src be updated later ? */
+
+       return 0;
+}
 #endif /*INET*/
 
 #if INET6
-static int
+int
 ipsec6_encapsulate(m, sav)
        struct mbuf *m;
        struct secasvar *sav;
@@ -2454,6 +2582,70 @@ ipsec64_encapsulate(m, sav)
 
        return 0;
 }
+
+int
+ipsec6_encapsulate_utun_esp_keepalive(m_ptr, sav)
+       struct mbuf **m_ptr;
+       struct secasvar *sav;
+{
+       struct ip6_hdr *ip6;
+       size_t plen;
+       struct mbuf *m = *m_ptr;
+
+       /* can't tunnel between different AFs */
+       if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
+               != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family
+        || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET6) {
+               m_freem(m);
+               *m_ptr = NULL;
+               return EINVAL;
+       }
+
+       plen = m->m_pkthdr.len;
+
+       /*
+        * grow the mbuf to accomodate the new IPv6 header.
+        */
+       {
+               struct mbuf *n;
+               MGETHDR(n, M_DONTWAIT, MT_HEADER);     /* MAC-OK */
+               if (!n) {
+                       m_freem(m);
+                       *m_ptr = NULL;
+                       return ENOBUFS;
+               }
+               if (m->m_flags & M_PKTHDR) {
+                       M_COPY_PKTHDR(n, m);
+                       m->m_flags &= ~M_PKTHDR;
+               }
+               MH_ALIGN(n, sizeof(*ip6));
+               n->m_len = sizeof(*ip6);
+               n->m_next = m;
+               n->m_pkthdr.len = (plen + n->m_len);
+               m_fixhdr(m);
+               m = n;
+               *m_ptr = m;
+               plen = m->m_pkthdr.len;
+       }
+       ip6 = mtod(m, __typeof__(ip6));
+
+       /* construct new IPv6 header. see RFC 2401 5.1.2.2 */
+       if (plen < IPV6_MAXPACKET)
+               ip6->ip6_plen = htons(plen);
+       else {
+               /* ip6->ip6_plen will be updated in ip6_output() */
+       }
+       ip6->ip6_nxt = IPPROTO_IPV6;
+       bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.src)->sin6_addr,
+               &ip6->ip6_src, sizeof(ip6->ip6_src));
+       bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.dst)->sin6_addr,
+               &ip6->ip6_dst, sizeof(ip6->ip6_dst));
+       ip6->ip6_hlim = IPV6_DEFHLIM;
+
+       /* XXX Should ip6_src be updated later ? */
+
+       return 0;
+}
 #endif /*INET6*/
 
 /*
@@ -2672,7 +2864,7 @@ ipsec4_logpacketstr(ip, spi)
        struct ip *ip;
        u_int32_t spi;
 {
-       static char buf[256];
+       static char buf[256] __attribute__((aligned(4)));
        char *p;
        u_int8_t *s, *d;
 
@@ -2702,7 +2894,7 @@ ipsec6_logpacketstr(ip6, spi)
        struct ip6_hdr *ip6;
        u_int32_t spi;
 {
-       static char buf[256];
+       static char buf[256] __attribute__((aligned(4)));
        char *p;
 
        p = buf;
@@ -2727,7 +2919,7 @@ const char *
 ipsec_logsastr(sav)
        struct secasvar *sav;
 {
-       static char buf[256];
+       static char buf[256] __attribute__((aligned(4)));
        char *p;
        struct secasindex *saidx = &sav->sah->saidx;
 
@@ -2883,7 +3075,7 @@ ipsec4_output(
                                                }
                                                ip = mtod(state->m, struct ip *);
                                        }
-                                       udp = (struct udphdr *)(((u_int8_t *)ip) + hlen);
+                                       udp = (struct udphdr *)(void *)(((u_int8_t *)ip) + hlen);
                                        sin->sin_port = udp->uh_dport;
                                }
                        }
@@ -2962,7 +3154,7 @@ ipsec4_output(
                        // grab sadb_mutex, before updating sah's route cache
                        lck_mtx_lock(sadb_mutex);
                        ro4= &sav->sah->sa_route;
-                       dst4 = (struct sockaddr_in *)&ro4->ro_dst;
+                       dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst;
                        if (ro4->ro_rt != NULL) {
                                RT_LOCK(ro4->ro_rt);
                        }
@@ -2999,7 +3191,7 @@ ipsec4_output(
                         * addressed by SA_SIZE roundup in that routine.
                         */
                        if (ro4->ro_rt->rt_flags & RTF_GATEWAY)
-                               dst4 = (struct sockaddr_in *)ro4->ro_rt->rt_gateway;
+                               dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway;
                        RT_UNLOCK(ro4->ro_rt);
                        if (state->ro.ro_rt != NULL) {
                                rtfree(state->ro.ro_rt);
@@ -3397,7 +3589,8 @@ ipsec6_output_tunnel(
                                struct sockaddr_in* dst4;
                                struct route *ro4 = NULL;
                                struct route  ro4_copy;
-                               struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+                               struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 },
+                                   IPOAF_SELECT_SRCIF };
 
                                /*
                                 * must be last isr because encapsulated IPv6 packet
@@ -3422,7 +3615,7 @@ ipsec6_output_tunnel(
                                // grab sadb_mutex, to update sah's route cache and get a local copy of it
                                lck_mtx_lock(sadb_mutex);
                                ro4 = &sav->sah->sa_route;
-                               dst4 = (struct sockaddr_in *)&ro4->ro_dst;
+                               dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst;
                                if (ro4->ro_rt) {
                                        RT_LOCK(ro4->ro_rt);
                                }
@@ -3534,7 +3727,7 @@ ipsec6_output_tunnel(
                        // grab sadb_mutex, before updating sah's route cache
                        lck_mtx_lock(sadb_mutex);
                        ro6 = &sav->sah->sa_route;
-                       dst6 = (struct sockaddr_in6 *)&ro6->ro_dst;
+                       dst6 = (struct sockaddr_in6 *)(void *)&ro6->ro_dst;
                        if (ro6->ro_rt) {
                                RT_LOCK(ro6->ro_rt);
                        }
@@ -3575,7 +3768,7 @@ ipsec6_output_tunnel(
                         * addressed by SA_SIZE roundup in that routine.
                         */
                        if (ro6->ro_rt->rt_flags & RTF_GATEWAY)
-                               dst6 = (struct sockaddr_in6 *)ro6->ro_rt->rt_gateway;
+                               dst6 = (struct sockaddr_in6 *)(void *)ro6->ro_rt->rt_gateway;
                        RT_UNLOCK(ro6->ro_rt);
                        if (state->ro.ro_rt != NULL) {
                                rtfree(state->ro.ro_rt);
@@ -3652,7 +3845,7 @@ bad:
 /*
  * Chop IP header and option off from the payload.
  */
-static struct mbuf *
+struct mbuf *
 ipsec4_splithdr(m)
        struct mbuf *m;
 {
@@ -3661,7 +3854,7 @@ ipsec4_splithdr(m)
        int hlen;
 
        if (m->m_len < sizeof(struct ip))
-               panic("ipsec4_splithdr: first mbuf too short");
+               panic("ipsec4_splithdr: first mbuf too short, m_len %d, pkt_len %d, m_flag %x", m->m_len, m->m_pkthdr.len, m->m_flags);
        ip = mtod(m, struct ip *);
 #ifdef _IP_VHL
        hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
@@ -3694,7 +3887,7 @@ ipsec4_splithdr(m)
 #endif
 
 #if INET6
-static struct mbuf *
+struct mbuf *
 ipsec6_splithdr(m)
        struct mbuf *m;
 {
@@ -3777,6 +3970,18 @@ ipsec4_tunnel_validate(m, off, nxt0, sav, ifamily)
        if (bcmp(&oip->ip_dst, &sin->sin_addr, sizeof(oip->ip_dst)) != 0)
                return 0;
 
+       if (sav->utun_in_fn) {
+               // the utun SAs don't have a policy (yet).
+               if (nxt == IPPROTO_IPV4) {
+                       *ifamily = AF_INET;
+               } else if (nxt == IPPROTO_IPV6) {
+                       *ifamily = AF_INET6;
+               } else {
+                       return 0;
+               }
+               return 1;
+       }
+
        /* XXX slow */
        bzero(&osrc, sizeof(osrc));
        bzero(&odst, sizeof(odst));
@@ -3874,6 +4079,11 @@ ipsec6_tunnel_validate(m, off, nxt0, sav)
        if (!IN6_ARE_ADDR_EQUAL(&oip6->ip6_dst, &sin6->sin6_addr))
                return 0;
 
+       if (sav->utun_in_fn) {
+               // the utun SAs don't have a policy (yet).
+               return 1;
+       }
+       
        /* XXX slow */
        bzero(&osrc, sizeof(osrc));
        bzero(&odst, sizeof(odst));
@@ -4191,51 +4401,58 @@ __private_extern__ int
 ipsec_send_natt_keepalive(
        struct secasvar *sav)
 {
-       struct mbuf     *m;
-       struct udphdr *uh;
-       struct ip *ip;
-       int error;
-       struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
-       struct route ro;
+       struct mbuf            *m;
+       struct ip          *ip;
+       int                 error;
+       struct ip_out_args  ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF };
+       struct route        ro;
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
-       
+
        if ((esp_udp_encap_port & 0xFFFF) == 0 || sav->remote_ike_port == 0) return FALSE;
 
        // natt timestamp may have changed... reverify
        if ((natt_now - sav->natt_last_activity) < natt_keepalive_interval) return FALSE;
 
+       if (sav->flags & SADB_X_EXT_ESP_KEEPALIVE) return FALSE; // don't send these from the kernel
+
        m = m_gethdr(M_NOWAIT, MT_DATA);
        if (m == NULL) return FALSE;
-       
-       /*
-        * Create a UDP packet complete with IP header.
-        * We must do this because UDP output requires
-        * an inpcb which we don't have. UDP packet
-        * contains one byte payload. The byte is set
-        * to 0xFF.
-        */
-       ip = (struct ip*)m_mtod(m);
-       uh = (struct udphdr*)((char*)m_mtod(m) + sizeof(struct ip));
-       m->m_len = sizeof(struct udpiphdr) + 1;
-       bzero(m_mtod(m), m->m_len);
-       m->m_pkthdr.len = m->m_len;
-
-       ip->ip_len = m->m_len;
-       ip->ip_ttl = ip_defttl;
-       ip->ip_p = IPPROTO_UDP;
-       if (sav->sah->dir != IPSEC_DIR_INBOUND) {
-               ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr;
-               ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr;
-       } else {
-               ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr;
-               ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr;
+
+       ip = (__typeof__(ip))m_mtod(m);
+
+       // this sends one type of NATT keepalives (Type 1, ESP keepalives, aren't sent by kernel)
+       if ((sav->flags & SADB_X_EXT_ESP_KEEPALIVE) == 0) {
+               struct udphdr      *uh;
+               
+               /*
+                * Type 2: a UDP packet complete with IP header.
+                * We must do this because UDP output requires
+                * an inpcb which we don't have. UDP packet
+                * contains one byte payload. The byte is set
+                * to 0xFF.
+                */
+               uh = (__typeof__(uh))(void *)((char *)m_mtod(m) + sizeof(*ip));
+               m->m_len = sizeof(struct udpiphdr) + 1;
+               bzero(m_mtod(m), m->m_len);
+               m->m_pkthdr.len = m->m_len;
+
+               ip->ip_len = m->m_len;
+               ip->ip_ttl = ip_defttl;
+               ip->ip_p = IPPROTO_UDP;
+               if (sav->sah->dir != IPSEC_DIR_INBOUND) {
+                       ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr;
+                       ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr;
+               } else {
+                       ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr;
+                       ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr;
+               }
+               uh->uh_sport = htons((u_short)esp_udp_encap_port);
+               uh->uh_dport = htons(sav->remote_ike_port);
+               uh->uh_ulen = htons(1 + sizeof(*uh));
+               uh->uh_sum = 0;
+               *(u_int8_t*)((char*)m_mtod(m) + sizeof(*ip) + sizeof(*uh)) = 0xFF;
        }
-       uh->uh_sport = htons((u_short)esp_udp_encap_port);
-       uh->uh_dport = htons(sav->remote_ike_port);
-       uh->uh_ulen = htons(1 + sizeof(struct udphdr));
-       uh->uh_sum = 0;
-       *(u_int8_t*)((char*)m_mtod(m) + sizeof(struct ip) + sizeof(struct udphdr)) = 0xFF;
 
        // grab sadb_mutex, to get a local copy of sah's route cache
        lck_mtx_lock(sadb_mutex);
index c3115557644b560fdd47b9c459ab03ca37dac97e..ead5dc68cc9d7d14b4a86a0ff0639eccf5867ce3 100644 (file)
@@ -278,6 +278,10 @@ struct ipsecstat {
 }
 
 #ifdef KERNEL
+
+#define IPSEC_IS_P2ALIGNED(p)        1
+#define IPSEC_GET_P2UNALIGNED_OFS(p) 0
+
 struct ipsec_output_state {
     int tunneled;
        struct mbuf *m;
@@ -340,6 +344,16 @@ extern const char *ipsec_logsastr(struct secasvar *);
 extern void ipsec_dumpmbuf(struct mbuf *);
 
 extern int ipsec4_output(struct ipsec_output_state *, struct secpolicy *, int);
+#if INET
+extern struct mbuf * ipsec4_splithdr(struct mbuf *);
+extern int ipsec4_encapsulate(struct mbuf *, struct secasvar *);
+extern int ipsec4_encapsulate_utun_esp_keepalive(struct mbuf **, struct secasvar *);
+#endif
+#if INET6
+extern struct mbuf * ipsec6_splithdr(struct mbuf *);
+extern int ipsec6_encapsulate(struct mbuf *, struct secasvar *);
+extern int ipsec6_encapsulate_utun_esp_keepalive(struct mbuf **, struct secasvar *);
+#endif
 extern int ipsec4_tunnel_validate(struct mbuf *, int, u_int, struct secasvar *, sa_family_t *);
 extern struct mbuf *ipsec_copypkt(struct mbuf *);
 extern void ipsec_delaux(struct mbuf *);
index 11f02db1e2209a8598e75ec4ee1c4b6673450d7b..7cedd2e22163f08207acadec834d3a448c2c7f30 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet6/mld6.h>
 #include <netinet6/mld6_var.h>
 
-/* Lock group and attribute for mld6_mtx */
+/* Lock group and attribute for mld_mtx */
 static lck_attr_t       *mld_mtx_attr;
 static lck_grp_t        *mld_mtx_grp;
 static lck_grp_attr_t   *mld_mtx_grp_attr;
@@ -236,16 +236,14 @@ static int interface_timers_running6;
 static int state_change_timers_running6;
 static int current_state_timers_running6;
 
-static decl_lck_mtx_data(, mld6_mtx);
-
 #define        MLD_LOCK()                      \
-       lck_mtx_lock(&mld6_mtx)
+       lck_mtx_lock(&mld_mtx)
 #define        MLD_LOCK_ASSERT_HELD()          \
-       lck_mtx_assert(&mld6_mtx, LCK_MTX_ASSERT_OWNED)
+       lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_OWNED)
 #define        MLD_LOCK_ASSERT_NOTHELD()       \
-       lck_mtx_assert(&mld6_mtx, LCK_MTX_ASSERT_NOTOWNED)
+       lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
 #define        MLD_UNLOCK()                    \
-       lck_mtx_unlock(&mld6_mtx)
+       lck_mtx_unlock(&mld_mtx)
 
 #define        MLD_ADD_DETACHED_IN6M(_head, _in6m) {                           \
        SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle);                     \
@@ -498,6 +496,9 @@ mld_domifattach(struct ifnet *ifp, int how)
        MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
        MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
        MLI_UNLOCK(mli);
+       ifnet_lock_shared(ifp);
+       mld6_initsilent(ifp, mli);
+       ifnet_lock_done(ifp);
 
        LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 
@@ -528,6 +529,9 @@ mld_domifreattach(struct mld_ifinfo *mli)
        mli->mli_debug |= IFD_ATTACHED;
        MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
        MLI_UNLOCK(mli);
+       ifnet_lock_shared(ifp);
+       mld6_initsilent(ifp, mli);
+       ifnet_lock_done(ifp);
 
        LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 
@@ -593,6 +597,21 @@ mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
        panic("%s: mld_ifinfo not found for ifp %p\n", __func__,  ifp);
 }
 
+__private_extern__ void
+mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
+{
+       ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
+
+       MLI_LOCK_ASSERT_NOTHELD(mli);
+       MLI_LOCK(mli);
+       if (!(ifp->if_flags & IFF_MULTICAST) &&
+           (ifp->if_eflags & (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE)))
+               mli->mli_flags |= MLIF_SILENT;
+       else
+               mli->mli_flags &= ~MLIF_SILENT;
+       MLI_UNLOCK(mli);
+}
+
 static void
 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
 {
@@ -606,9 +625,6 @@ mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
        mli->mli_qri = MLD_QRI_INIT;
        mli->mli_uri = MLD_URI_INIT;
 
-       /* ifnet is not yet attached; no need to hold ifnet lock */
-       if (!(ifp->if_flags & IFF_MULTICAST))
-               mli->mli_flags |= MLIF_SILENT;
        if (mld_use_allow)
                mli->mli_flags |= MLIF_USEALLOW;
        if (!reattach)
@@ -1164,7 +1180,7 @@ mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
                for (i = 0; i < nsrc; i++) {
                        sp = mtod(m, uint8_t *) + soff;
                        retval = in6m_record_source(inm,
-                           (const struct in6_addr *)sp);
+                           (const struct in6_addr *)(void *)sp);
                        if (retval < 0)
                                break;
                        nrecorded += retval;
@@ -1989,7 +2005,6 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
 
        mh->m_flags |= M_MLDV1;
 
-       
        /*
         * Due to the fact that at this point we are possibly holding
         * in6_multihead_lock in shared or exclusive mode, we can't call
@@ -3286,6 +3301,13 @@ mld_dispatch_packet(struct mbuf *m)
        mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
        type = mld->mld_type;
 
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               /* Use control service class if the outgoing 
+                * interface supports transmit-start model.
+                */
+               (void) m_set_service_class(m0, MBUF_SC_CTL);
+       }
+
        error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
            &oifp, NULL);
 
@@ -3424,7 +3446,7 @@ mld_init(void)
 
        MLD_PRINTF(("%s: initializing\n", __func__));
 
-        /* Setup lock group and attribute for mld6_mtx */
+        /* Setup lock group and attribute for mld_mtx */
         mld_mtx_grp_attr = lck_grp_attr_alloc_init();
         mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
         mld_mtx_attr = lck_attr_alloc_init();
index 7652cdca97cda52cf2adb38953962ae79e4208ea..7ccbcbf0b30635b7c0f871750a639cc145fbb20f 100644 (file)
@@ -237,6 +237,7 @@ extern void mld_slowtimo(void);
 extern void mld_init(void);
 extern void mli_addref(struct mld_ifinfo *, int);
 extern void mli_remref(struct mld_ifinfo *);
+__private_extern__ void mld6_initsilent(struct ifnet *, struct mld_ifinfo *);
 
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet6_mld);
index 77ab7630a7def41d18eb286ca2a6a0b13dc089e8..7cae1d7e7255af7edc204ec3eaa5802a4fbb5db1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
-#define        SA(p) ((struct sockaddr *)(p))
-#define SIN6(s) ((struct sockaddr_in6 *)s)
-#define SDL(s) ((struct sockaddr_dl *)s)
 #define        equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
 
 /* timer values */
@@ -135,6 +132,10 @@ int nd6_debug = 1;
 int nd6_debug = 0;
 #endif
 
+int nd6_optimistic_dad =
+       (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF|
+       ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC);
+
 static int nd6_is_new_addr_neighbor (struct sockaddr_in6 *, struct ifnet *);
 
 /* for debugging? */
@@ -174,10 +175,11 @@ struct llinfo_nd6 llinfo_nd6 = {
 size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
 struct nd_ifinfo *nd_ifinfo = NULL;
 
-static lck_grp_attr_t  *nd_if_rwlock_grp_attr;
-static lck_grp_t       *nd_if_rwlock_grp;
-static lck_attr_t      *nd_if_rwlock_attr;
-lck_rw_t               *nd_if_rwlock;
+static lck_grp_attr_t  *nd_if_lock_grp_attr;
+static lck_grp_t       *nd_if_lock_grp;
+static lck_attr_t      *nd_if_lock_attr;
+decl_lck_rw_data(, nd_if_rwlock_data);
+lck_rw_t               *nd_if_rwlock = &nd_if_rwlock_data;
 
 /* Protected by nd6_mutex */
 struct nd_drhead nd_defrouter;
@@ -199,9 +201,10 @@ static struct llinfo_nd6 *nd6_llinfo_alloc(void);
 static void nd6_llinfo_free(void *);
 static void nd6_llinfo_purge(struct rtentry *);
 static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
+static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
 
-static void nd6_siocgdrlst(void *, int);
-static void nd6_siocgprlst(void *, int);
+static int nd6_siocgdrlst(void *, int);
+static int nd6_siocgprlst(void *, int);
 
 /*
  * Insertion and removal from llinfo_nd6 must be done with rnh_lock held.
@@ -248,11 +251,10 @@ nd6_init()
        /* initialization of the default router list */
        TAILQ_INIT(&nd_defrouter);
 
-       nd_if_rwlock_grp_attr = lck_grp_attr_alloc_init();
-       nd_if_rwlock_grp = lck_grp_alloc_init("nd_if_rwlock",
-           nd_if_rwlock_grp_attr);
-       nd_if_rwlock_attr = lck_attr_alloc_init();
-       nd_if_rwlock = lck_rw_alloc_init(nd_if_rwlock_grp, nd_if_rwlock_attr);
+       nd_if_lock_grp_attr = lck_grp_attr_alloc_init();
+       nd_if_lock_grp = lck_grp_alloc_init("nd_if_lock", nd_if_lock_grp_attr);
+       nd_if_lock_attr = lck_attr_alloc_init();
+       lck_rw_init(nd_if_rwlock, nd_if_lock_grp, nd_if_lock_attr);
 
        llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6),
            LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0,
@@ -265,6 +267,7 @@ nd6_init()
 
        nd6_nbr_init();
        nd6_rtr_init();
+       nd6_prproxy_init();
 
        nd6_init_done = 1;
 
@@ -326,12 +329,38 @@ nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
 
        if (lr == NULL) {
                bzero(ri, sizeof (*ri));
+               ri->ri_rssi = IFNET_RSSI_UNKNOWN;
+               ri->ri_lqm = IFNET_LQM_THRESH_OFF;
+               ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
        } else {
                IFLR_LOCK(lr);
                /* Export to rt_reach_info structure */
                ifnet_lr2ri(lr, ri);
-               /* Export ND6 send expiration time */
-               ri->ri_snd_expire = ifnet_llreach_up2cal(lr, ln->ln_lastused);
+               /* Export ND6 send expiration (calendar) time */
+               ri->ri_snd_expire =
+                   ifnet_llreach_up2calexp(lr, ln->ln_lastused);
+               IFLR_UNLOCK(lr);
+       }
+}
+
+static void
+nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
+{
+       struct llinfo_nd6 *ln = rt->rt_llinfo;
+       struct if_llreach *lr = ln->ln_llreach;
+
+       if (lr == NULL) {
+               bzero(iflri, sizeof (*iflri));
+               iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
+               iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
+               iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
+       } else {
+               IFLR_LOCK(lr);
+               /* Export to ifnet_llreach_info structure */
+               ifnet_lr2iflri(lr, iflri);
+               /* Export ND6 send expiration (uptime) time */
+               iflri->iflri_snd_expire =
+                   ifnet_llreach_up2upexp(lr, ln->ln_lastused);
                IFLR_UNLOCK(lr);
        }
 }
@@ -339,7 +368,6 @@ nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
 int
 nd6_ifattach(struct ifnet *ifp)
 {
-
        /*
         * We have some arrays that should be indexed by if_index.
         * since if_index will grow dynamically, they should grow too.
@@ -358,7 +386,7 @@ nd6_ifattach(struct ifnet *ifp)
                q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK);
                if (q == NULL) {
                        lck_rw_done(nd_if_rwlock);
-                       return ENOBUFS;
+                       return (ENOBUFS);
                }
                bzero(q, n);
                nd_ifinfo_indexlim = newlim;
@@ -370,24 +398,19 @@ nd6_ifattach(struct ifnet *ifp)
                         */
                        FREE((caddr_t)nd_ifinfo, M_IP6NDP);
                }
-               nd_ifinfo = (struct nd_ifinfo *)q;
+               nd_ifinfo = (struct nd_ifinfo *)(void *)q;
        }
-       lck_rw_done(nd_if_rwlock);
 
 #define ND nd_ifinfo[ifp->if_index]
-
        /*
         * Don't initialize if called twice.
-        * XXX: to detect this, we should choose a member that is never set
-        * before initialization of the ND structure itself.  We formaly used
-        * the linkmtu member, which was not suitable because it could be 
-        * initialized via "ifconfig mtu".
         */
-       lck_rw_lock_shared(nd_if_rwlock);
-       if (ND.basereachable) {
+       if (ND.initialized) {
                lck_rw_done(nd_if_rwlock);
-               return 0;
+               return (0);
        }
+       lck_mtx_init(&ND.lock, nd_if_lock_grp, nd_if_lock_attr);
+       ND.initialized = TRUE;
        ND.linkmtu = ifp->if_mtu;
        ND.chlim = IPV6_DEFHLIM;
        ND.basereachable = REACHABLE_TIME;
@@ -395,10 +418,11 @@ nd6_ifattach(struct ifnet *ifp)
        ND.retrans = RETRANS_TIMER;
        ND.flags = ND6_IFF_PERFORMNUD;
        lck_rw_done(nd_if_rwlock);
-       nd6_setmtu(ifp);
 #undef ND
-       
-       return 0;
+
+       nd6_setmtu(ifp);
+
+       return (0);
 }
 
 /*
@@ -416,12 +440,15 @@ nd6_setmtu(struct ifnet *ifp)
         * because this can be called directly from SIOCSIFMTU for IPv4
         */
        lck_rw_lock_shared(nd_if_rwlock);
-       if (ifp->if_index >= nd_ifinfo_indexlim) {
+       if (ifp->if_index >= nd_ifinfo_indexlim ||
+           !nd_ifinfo[ifp->if_index].initialized) {
                lck_rw_done(nd_if_rwlock);
-               return; /* we're  out of bound for nd_ifinfo */
+               return; /* nd_ifinfo out of bound, or not yet initialized */
        }
 
        ndi = &nd_ifinfo[ifp->if_index];
+       VERIFY(ndi->initialized);
+       lck_mtx_lock(&ndi->lock);
        oldmaxmtu = ndi->maxmtu;
 
        /*
@@ -449,6 +476,7 @@ nd6_setmtu(struct ifnet *ifp)
                    ifp->if_name, ifp->if_unit, (uint32_t)ndi->maxmtu);
        }
        ndi->linkmtu = ifp->if_mtu;
+       lck_mtx_unlock(&ndi->lock);
        lck_rw_done(nd_if_rwlock);
 
        /* also adjust in6_maxmtu if necessary. */
@@ -639,6 +667,8 @@ again:
                struct rtentry *rt;
                struct sockaddr_in6 *dst;
                struct llinfo_nd6 *next;
+               struct nd_ifinfo *ndi;
+               u_int32_t retrans, flags;
 
                /* ln_next/prev/rt is protected by rnh_lock */
                next = ln->ln_next;
@@ -667,7 +697,7 @@ again:
                }
 
                /* rt_key should never be NULL */
-               dst = (struct sockaddr_in6 *)rt_key(rt);
+               dst = (struct sockaddr_in6 *)(void *)rt_key(rt);
                if (dst == NULL) {
                        panic("%s: rt(%p) key is NULL ln(%p)", __func__,
                            rt, ln);
@@ -683,7 +713,6 @@ again:
                        continue;
                }
 
-               /* Make a copy (we're using it read-only anyway) */
                lck_rw_lock_shared(nd_if_rwlock);
                if (ifp->if_index >= nd_ifinfo_indexlim) {
                        lck_rw_done(nd_if_rwlock);
@@ -691,6 +720,12 @@ again:
                        ln = next;
                        continue;
                }
+               ndi = ND_IFINFO(ifp);
+               VERIFY(ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+               retrans = ndi->retrans;
+               flags = ndi->flags;
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
 
                RT_LOCK_ASSERT_HELD(rt);
@@ -699,15 +734,17 @@ again:
                case ND6_LLINFO_INCOMPLETE:
                        if (ln->ln_asked < nd6_mmaxtries) {
                                ln->ln_asked++;
-                               lck_rw_lock_shared(nd_if_rwlock);
-                               ln->ln_expire = timenow.tv_sec +
-                                   nd_ifinfo[ifp->if_index].retrans / 1000;
-                               lck_rw_done(nd_if_rwlock);
+                               ln->ln_expire = timenow.tv_sec + retrans / 1000;
                                RT_ADDREF_LOCKED(rt);
                                RT_UNLOCK(rt);
                                lck_mtx_unlock(rnh_lock);
-                               nd6_ns_output(ifp, NULL, &dst->sin6_addr,
-                                       ln, 0);
+                               if (ip6_forwarding) {
+                                       nd6_prproxy_ns_output(ifp, NULL,
+                                           &dst->sin6_addr, ln);
+                               } else {
+                                       nd6_ns_output(ifp, NULL,
+                                           &dst->sin6_addr, ln, 0);
+                               }
                                RT_REMREF(rt);
                        } else {
                                struct mbuf *m = ln->ln_hold;
@@ -759,15 +796,11 @@ again:
                        break;
 
                case ND6_LLINFO_DELAY:
-                       lck_rw_lock_shared(nd_if_rwlock);
-                       if ((nd_ifinfo[ifp->if_index].flags &
-                           ND6_IFF_PERFORMNUD) != 0) {
+                       if ((flags & ND6_IFF_PERFORMNUD) != 0) {
                                /* We need NUD */
                                ln->ln_asked = 1;
                                ln->ln_state = ND6_LLINFO_PROBE;
-                               ln->ln_expire = timenow.tv_sec +
-                                   nd_ifinfo[ifp->if_index].retrans / 1000;
-                               lck_rw_done(nd_if_rwlock);
+                               ln->ln_expire = timenow.tv_sec + retrans / 1000;
                                RT_ADDREF_LOCKED(rt);
                                RT_UNLOCK(rt);
                                lck_mtx_unlock(rnh_lock);
@@ -778,7 +811,6 @@ again:
                                RT_REMREF(rt);
                                goto again;
                        }
-                       lck_rw_done(nd_if_rwlock);
                        ln->ln_state = ND6_LLINFO_STALE; /* XXX */
                        ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
                            nd6_gctimer);
@@ -788,10 +820,7 @@ again:
                case ND6_LLINFO_PROBE:
                        if (ln->ln_asked < nd6_umaxtries) {
                                ln->ln_asked++;
-                               lck_rw_lock_shared(nd_if_rwlock);
-                               ln->ln_expire = timenow.tv_sec +
-                                   nd_ifinfo[ifp->if_index].retrans / 1000;
-                               lck_rw_done(nd_if_rwlock);
+                               ln->ln_expire = timenow.tv_sec + retrans / 1000;
                                RT_ADDREF_LOCKED(rt);
                                RT_UNLOCK(rt);
                                lck_mtx_unlock(rnh_lock);
@@ -872,15 +901,17 @@ addrloop:
                         * addresses.  Although we may have to restart the
                         * loop (see below), it must be after purging the
                         * address.  Otherwise, we'd see an infinite loop of
-                        * regeneration. 
+                        * regeneration.
                         */
                        if (ip6_use_tempaddr &&
                            (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
-                               /* NOTE: We have to drop the lock here because 
-                                * regen_tmpaddr() eventually calls in6_update_ifa(),  
-                                * which must take the lock and would otherwise cause a 
-                                * hang. This is safe because the goto addrloop 
-                                * leads to a reevaluation of the in6_ifaddrs list
+                               /*
+                                * NOTE: We have to drop the lock here
+                                * because regen_tmpaddr() eventually calls
+                                * in6_update_ifa(), which must take the lock
+                                * and would otherwise cause a hang.  This is
+                                * safe because the goto addrloop leads to a
+                                * re-evaluation of the in6_ifaddrs list
                                 */
                                IFA_UNLOCK(&ia6->ia_ifa);
                                lck_rw_done(&in6_ifaddr_rwlock);
@@ -929,7 +960,7 @@ addrloop:
                                         * would not cause disaster (because
                                         * it's not a deletion, but an
                                         * addition,) we'd rather restart the
-                                        * loop just for safety.  Or does this 
+                                        * loop just for safety.  Or does this
                                         * significantly reduce performance??
                                         */
                                        /* Release extra reference */
@@ -1011,6 +1042,64 @@ addrloop:
        lck_mtx_unlock(nd6_mutex);
 }
 
+/*
+ * ND6 router advertisement kernel notification
+ */
+void
+nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list,
+    u_int32_t list_length, u_int32_t mtu, char *dl_addr, u_int32_t dl_addr_len)
+{
+       struct kev_msg ev_msg;
+       struct kev_nd6_ra_data nd6_ra_msg_data;
+       struct nd_prefix_list *itr = prefix_list;
+
+       bzero(&ev_msg, sizeof(struct kev_msg));
+       ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+       ev_msg.kev_class      = KEV_NETWORK_CLASS;
+       ev_msg.kev_subclass   = KEV_ND6_SUBCLASS;
+       ev_msg.event_code     = code;
+
+       bzero(&nd6_ra_msg_data, sizeof(nd6_ra_msg_data));
+       nd6_ra_msg_data.lladdrlen = (dl_addr_len <= ND6_ROUTER_LL_SIZE) ?
+           dl_addr_len : ND6_ROUTER_LL_SIZE;
+       bcopy(dl_addr, &nd6_ra_msg_data.lladdr, nd6_ra_msg_data.lladdrlen);
+
+       if (mtu > 0 && mtu >= IPV6_MMTU) {
+               nd6_ra_msg_data.mtu = mtu;
+               nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_MTU;
+       }
+
+       if (list_length > 0 && prefix_list != NULL) {
+               nd6_ra_msg_data.list_length = list_length;
+               nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_PREFIX;
+       }
+
+       while (itr != NULL && nd6_ra_msg_data.list_index < list_length) {
+               bcopy(&itr->pr.ndpr_prefix, &nd6_ra_msg_data.prefix.prefix,
+                   sizeof (nd6_ra_msg_data.prefix.prefix));
+               nd6_ra_msg_data.prefix.raflags = itr->pr.ndpr_raf;
+               nd6_ra_msg_data.prefix.prefixlen = itr->pr.ndpr_plen;
+               nd6_ra_msg_data.prefix.origin = PR_ORIG_RA;
+               nd6_ra_msg_data.prefix.vltime = itr->pr.ndpr_vltime;
+               nd6_ra_msg_data.prefix.pltime = itr->pr.ndpr_pltime;
+               nd6_ra_msg_data.prefix.expire = itr->pr.ndpr_expire;
+               nd6_ra_msg_data.prefix.flags = itr->pr.ndpr_stateflags;
+               nd6_ra_msg_data.prefix.refcnt = itr->pr.ndpr_addrcnt;
+               nd6_ra_msg_data.prefix.if_index = itr->pr.ndpr_ifp->if_index;
+
+               /* send the message up */
+               ev_msg.dv[0].data_ptr       = &nd6_ra_msg_data;
+               ev_msg.dv[0].data_length    = sizeof(nd6_ra_msg_data);
+               ev_msg.dv[1].data_length    = 0;
+               kev_post_msg(&ev_msg);
+
+               /* clean up for the next prefix */
+               bzero(&nd6_ra_msg_data.prefix, sizeof(nd6_ra_msg_data.prefix));
+               itr = itr->next;
+               nd6_ra_msg_data.list_index++;
+       }
+}
+
 /*
  * ND6 timer routine to expire default route list and prefix list
  */
@@ -1182,10 +1271,13 @@ nd6_purge(
                nd6_setdefaultiface(0);
        }
 
-       if (!ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { 
+       /*
+        * Perform default router selection even when we are a router,
+        * if Scoped Routing is enabled.
+        */
+       if (ip6_doscopedroute || !ip6_forwarding) {
                lck_mtx_lock(nd6_mutex);
                /* refresh default router list */
-               defrouter_reset();
                defrouter_select(ifp);
                lck_mtx_unlock(nd6_mutex);
        }
@@ -1352,11 +1444,16 @@ nd6_lookup(
         * interfaces to a same link, install a link prefix to an interface,
         * and try to install a neighbor cache on an interface that does not
         * have a route to the prefix.
+        *
+        * If the address is from a proxied prefix, the ifa_ifp and ifp might
+        * not match, because nd6_na_input() could have modified the ifp
+        * of the route to point to the interface where the NA arrived on,
+        * hence the test for RTF_PROXY.
         */
-       if (ifp == NULL || 
-           (rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
+       if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
            rt->rt_gateway->sa_family != AF_LINK ||  rt->rt_llinfo == NULL ||
-           (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
+           (ifp && rt->rt_ifa->ifa_ifp != ifp &&
+           !(rt->rt_flags & RTF_PROXY))) {
                RT_REMREF_LOCKED(rt);
                RT_UNLOCK(rt);
                if (create) {
@@ -1401,7 +1498,7 @@ nd6_is_new_addr_neighbor(
                 * content (XXX).
                 */
                sin6_copy = *addr;
-               if (sa6_recoverscope(&sin6_copy))
+               if (sa6_recoverscope(&sin6_copy, FALSE))
                        return (0); /* XXX: should be impossible */
                if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
                        return (0);
@@ -1454,8 +1551,12 @@ nd6_is_new_addr_neighbor(
         * as on-link, and thus, as a neighbor.
         * XXX: we restrict the condition to hosts, because routers usually do
         * not have the "default router list".
+        * XXX: this block should eventually be removed (it is disabled when
+        * Scoped Routing is in effect); treating all destinations as on-link
+        * in the absence of a router is rather harmful.
         */
-       if (!ip6_forwarding && TAILQ_FIRST(&nd_defrouter) == NULL &&
+       if (!ip6_doscopedroute && !ip6_forwarding &&
+           TAILQ_FIRST(&nd_defrouter) == NULL &&
            nd6_defifindex == ifp->if_index) {
                return (1);
        }
@@ -1516,7 +1617,7 @@ nd6_free(
        RT_LOCK(rt);
        RT_ADDREF_LOCKED(rt);   /* Extra ref */
        ln = rt->rt_llinfo;
-       in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
+       in6 = ((struct sockaddr_in6 *)(void *)rt_key(rt))->sin6_addr;
 
        /*
         * Prevent another thread from modifying rt_key, rt_gateway
@@ -1526,14 +1627,13 @@ nd6_free(
        rt->rt_flags |= RTF_CONDEMNED;
 
        /*
-        * we used to have pfctlinput(PRC_HOSTDEAD) here. 
-        * even though it is not harmful, it was not really necessary.
+        * We used to have pfctlinput(PRC_HOSTDEAD) here.  Even though it is
+        * not harmful, it was not really necessary.  Perform default router
+        * selection even when we are a router, if Scoped Routing is enabled.
         */
-
-       if (!ip6_forwarding && (ip6_accept_rtadv ||
-           (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
-               dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->
-                   sin6_addr, rt->rt_ifp);
+       if (ip6_doscopedroute || !ip6_forwarding) {
+               dr = defrouter_lookup(&((struct sockaddr_in6 *)(void *)
+                   rt_key(rt))->sin6_addr, rt->rt_ifp);
 
                if ((ln && ln->ln_router) || dr) {
                        /*
@@ -1655,9 +1755,14 @@ nd6_nud_hint(
 
        ln->ln_state = ND6_LLINFO_REACHABLE;
        if (ln->ln_expire) {
+               struct nd_ifinfo *ndi;
+
                lck_rw_lock_shared(nd_if_rwlock);
-               ln->ln_expire = timenow.tv_sec +
-                       nd_ifinfo[rt->rt_ifp->if_index].reachable;
+               ndi = ND_IFINFO(rt->rt_ifp);
+               VERIFY(ndi != NULL && ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+               ln->ln_expire = timenow.tv_sec + ndi->reachable;
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
        }
 done:
@@ -1820,6 +1925,7 @@ nd6_rtrequest(
                        break;
                }
                rt->rt_llinfo_get_ri = nd6_llinfo_get_ri;
+               rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri;
                rt->rt_llinfo_purge = nd6_llinfo_purge;
                rt->rt_llinfo_free = nd6_llinfo_free;
 
@@ -2005,20 +2111,28 @@ nd6_rtrequest(
        }
 }
 
-static void
+static int
 nd6_siocgdrlst(void *data, int data_is_64)
 {
-       struct in6_drlist_64 *drl_64 = (struct in6_drlist_64 *)data;
-       struct in6_drlist_32 *drl_32 = (struct in6_drlist_32 *)data;
+       struct in6_drlist_32 *drl_32;
        struct nd_defrouter *dr;
        int i = 0;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 
-       bzero(data, data_is_64 ? sizeof (*drl_64) : sizeof (*drl_32));
        dr = TAILQ_FIRST(&nd_defrouter);
+
+       /* For 64-bit process */
        if (data_is_64) {
-               /* For 64-bit process */
+               struct in6_drlist_64 *drl_64;
+
+               drl_64 = _MALLOC(sizeof (*drl_64), M_TEMP, M_WAITOK|M_ZERO);
+               if (drl_64 == NULL)
+                       return (ENOMEM);
+
+               /* preserve the interface name */
+               bcopy(data, drl_64, sizeof (drl_64->ifname));
+
                while (dr && i < DRLSTSIZ) {
                        drl_64->defrouter[i].rtaddr = dr->rtaddr;
                        if (IN6_IS_ADDR_LINKLOCAL(&drl_64->defrouter[i].rtaddr)) {
@@ -2037,9 +2151,19 @@ nd6_siocgdrlst(void *data, int data_is_64)
                        i++;
                        dr = TAILQ_NEXT(dr, dr_entry);
                }
-               return;
+               bcopy(drl_64, data, sizeof (*drl_64));
+               _FREE(drl_64, M_TEMP);
+               return (0);
        }
+
        /* For 32-bit process */
+       drl_32 = _MALLOC(sizeof (*drl_32), M_TEMP, M_WAITOK|M_ZERO);
+       if (drl_32 == NULL)
+               return (ENOMEM);
+
+       /* preserve the interface name */
+       bcopy(data, drl_32, sizeof (drl_32->ifname));
+
        while (dr && i < DRLSTSIZ) {
                drl_32->defrouter[i].rtaddr = dr->rtaddr;
                if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) {
@@ -2058,26 +2182,38 @@ nd6_siocgdrlst(void *data, int data_is_64)
                i++;
                dr = TAILQ_NEXT(dr, dr_entry);
        }
+       bcopy(drl_32, data, sizeof (*drl_32));
+       _FREE(drl_32, M_TEMP);
+       return (0);
 }
 
-static void
+/*
+ * XXX meaning of fields, especialy "raflags", is very
+ * differnet between RA prefix list and RR/static prefix list.
+ * how about separating ioctls into two?
+ */
+static int
 nd6_siocgprlst(void *data, int data_is_64)
 {
-       struct in6_prlist_64 *prl_64 = (struct in6_prlist_64 *)data;
-       struct in6_prlist_32 *prl_32 = (struct in6_prlist_32 *)data;
+       struct in6_prlist_32 *prl_32;
        struct nd_prefix *pr;
        int i = 0;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
-       /*
-        * XXX meaning of fields, especialy "raflags", is very
-        * differnet between RA prefix list and RR/static prefix list.
-        * how about separating ioctls into two?
-        */
-       bzero(data, data_is_64 ? sizeof (*prl_64) : sizeof (*prl_32));
+
        pr = nd_prefix.lh_first;
+
+       /* For 64-bit process */
        if (data_is_64) {
-               /* For 64-bit process */
+               struct in6_prlist_64 *prl_64;
+
+               prl_64 = _MALLOC(sizeof (*prl_64), M_TEMP, M_WAITOK|M_ZERO);
+               if (prl_64 == NULL)
+                       return (ENOMEM);
+
+               /* preserve the interface name */
+               bcopy(data, prl_64, sizeof (prl_64->ifname));
+
                while (pr && i < PRLSTSIZ) {
                        struct nd_pfxrouter *pfr;
                        int j;
@@ -2120,10 +2256,19 @@ nd6_siocgprlst(void *data, int data_is_64)
                        i++;
                        pr = pr->ndpr_next;
                }
-
-               return;
+               bcopy(prl_64, data, sizeof (*prl_64));
+               _FREE(prl_64, M_TEMP);
+               return (0);
        }
+
        /* For 32-bit process */
+       prl_32 = _MALLOC(sizeof (*prl_32), M_TEMP, M_WAITOK|M_ZERO);
+       if (prl_32 == NULL)
+               return (ENOMEM);
+
+       /* preserve the interface name */
+       bcopy(data, prl_32, sizeof (prl_32->ifname));
+
        while (pr && i < PRLSTSIZ) {
                struct nd_pfxrouter *pfr;
                int j;
@@ -2166,75 +2311,108 @@ nd6_siocgprlst(void *data, int data_is_64)
                i++;
                pr = pr->ndpr_next;
        }
+       bcopy(prl_32, data, sizeof (*prl_32));
+       _FREE(prl_32, M_TEMP);
+       return (0);
 }
 
 int
 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 {
-       struct in6_ndireq *ndi = (struct in6_ndireq *)data;
-       struct in6_ondireq *ondi = (struct in6_ondireq *)data;
        struct nd_defrouter *dr;
        struct nd_prefix *pr;
        struct rtentry *rt;
        int i = ifp->if_index, error = 0;
 
        switch (cmd) {
-       case SIOCGDRLST_IN6_32:
-       case SIOCGDRLST_IN6_64:
+       case SIOCGDRLST_IN6_32:         /* struct in6_drlist_32 */
+       case SIOCGDRLST_IN6_64:         /* struct in6_drlist_64 */
                /*
                 * obsolete API, use sysctl under net.inet6.icmp6
                 */
                lck_mtx_lock(nd6_mutex);
-               nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64);
+               error = nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64);
                lck_mtx_unlock(nd6_mutex);
                break;
 
-       case SIOCGPRLST_IN6_32:
-       case SIOCGPRLST_IN6_64:
+       case SIOCGPRLST_IN6_32:         /* struct in6_prlist_32 */
+       case SIOCGPRLST_IN6_64:         /* struct in6_prlist_64 */
                /*
                 * obsolete API, use sysctl under net.inet6.icmp6
                 */
                lck_mtx_lock(nd6_mutex);
-               nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64);
+               error = nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64);
                lck_mtx_unlock(nd6_mutex);
                break;
 
-       case OSIOCGIFINFO_IN6:
-       case SIOCGIFINFO_IN6:
+       case OSIOCGIFINFO_IN6:          /* struct in6_ondireq */
+       case SIOCGIFINFO_IN6: {         /* struct in6_ondireq */
+               u_int32_t linkmtu;
+               struct in6_ondireq *ondi = (struct in6_ondireq *)(void *)data;
+               struct nd_ifinfo *ndi;
                /*
                 * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
                 * instead of in6_ndireq, so we treat it as such.
                 */
                lck_rw_lock_shared(nd_if_rwlock);
-               if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
+               ndi = ND_IFINFO(ifp);
+               if (!nd_ifinfo || i >= nd_ifinfo_indexlim ||
+                   !ndi->initialized) {
                        lck_rw_done(nd_if_rwlock);
                        error = EINVAL;
                        break;
                }
-               ondi->ndi.linkmtu = IN6_LINKMTU(ifp);
-               ondi->ndi.maxmtu = nd_ifinfo[i].maxmtu;
-               ondi->ndi.basereachable = nd_ifinfo[i].basereachable;
-               ondi->ndi.reachable = nd_ifinfo[i].reachable;
-               ondi->ndi.retrans = nd_ifinfo[i].retrans;
-               ondi->ndi.flags = nd_ifinfo[i].flags;
-               ondi->ndi.recalctm = nd_ifinfo[i].recalctm;
+               lck_mtx_lock(&ndi->lock);
+               linkmtu = IN6_LINKMTU(ifp);
+               bcopy(&linkmtu, &ondi->ndi.linkmtu, sizeof (linkmtu));
+               bcopy(&nd_ifinfo[i].maxmtu, &ondi->ndi.maxmtu,
+                   sizeof (u_int32_t));
+               bcopy(&nd_ifinfo[i].basereachable, &ondi->ndi.basereachable,
+                   sizeof (u_int32_t));
+               bcopy(&nd_ifinfo[i].reachable, &ondi->ndi.reachable,
+                   sizeof (u_int32_t));
+               bcopy(&nd_ifinfo[i].retrans, &ondi->ndi.retrans,
+                   sizeof (u_int32_t));
+               bcopy(&nd_ifinfo[i].flags, &ondi->ndi.flags,
+                   sizeof (u_int32_t));
+               bcopy(&nd_ifinfo[i].recalctm, &ondi->ndi.recalctm,
+                   sizeof (int));
                ondi->ndi.chlim = nd_ifinfo[i].chlim;
+               ondi->ndi.receivedra = 0;
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
                break;
+       }
 
-       case SIOCSIFINFO_FLAGS:
-               /* XXX: almost all other fields of ndi->ndi is unused */
+       case SIOCSIFINFO_FLAGS: {       /* struct in6_ndireq */
+               struct in6_ndireq *cndi = (struct in6_ndireq *)(void *)data;
+               u_int32_t oflags, flags;
+               struct nd_ifinfo *ndi;
+
+               /* XXX: almost all other fields of cndi->ndi is unused */
                lck_rw_lock_shared(nd_if_rwlock);
-               if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
+               ndi = ND_IFINFO(ifp);
+               if (!nd_ifinfo || i >= nd_ifinfo_indexlim ||
+                   !ndi->initialized) {
                        lck_rw_done(nd_if_rwlock);
                        error = EINVAL;
                        break;
                }
-               nd_ifinfo[i].flags = ndi->ndi.flags;
+               lck_mtx_lock(&ndi->lock);
+               oflags = nd_ifinfo[i].flags;
+               bcopy(&cndi->ndi.flags, &nd_ifinfo[i].flags, sizeof (flags));
+               flags = nd_ifinfo[i].flags;
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
+
+               if (oflags == flags)
+                       break;
+
+               error = nd6_setifinfo(ifp, oflags, flags);
                break;
+       }
 
-       case SIOCSNDFLUSH_IN6:  /* XXX: the ioctl name is confusing... */
+       case SIOCSNDFLUSH_IN6:          /* struct in6_ifreq */
                /* flush default router list */
                /*
                 * xxx sumikawa: should not delete route if default
@@ -2247,7 +2425,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                /* xxx sumikawa: flush prefix list */
                break;
 
-       case SIOCSPFXFLUSH_IN6: {
+       case SIOCSPFXFLUSH_IN6: {       /* struct in6_ifreq */
                /* flush all the prefix advertised by routers */
                struct nd_prefix *next;
 
@@ -2285,9 +2463,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                                        lck_rw_done(&in6_ifaddr_rwlock);
                                        lck_mtx_unlock(nd6_mutex);
                                        in6_purgeaddr(&ia->ia_ifa);
+                                       IFA_REMREF(&ia->ia_ifa);
                                        lck_mtx_lock(nd6_mutex);
                                        lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
-                                       IFA_REMREF(&ia->ia_ifa);
                                        /*
                                         * Purging the address caused
                                         * in6_ifaddr_rwlock to be
@@ -2324,7 +2502,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                break;
        }
 
-       case SIOCSRTRFLUSH_IN6: {
+       case SIOCSRTRFLUSH_IN6: {       /* struct in6_ifreq */
                /* flush all the default routers */
                struct nd_defrouter *next;
 
@@ -2347,19 +2525,21 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                break;
        }
 
-       case SIOCGNBRINFO_IN6_32: {
+       case SIOCGNBRINFO_IN6_32: {     /* struct in6_nbrinfo_32 */
                struct llinfo_nd6 *ln;
-               struct in6_nbrinfo_32 *nbi_32 = (struct in6_nbrinfo_32 *)data;
-               /* make local for safety */
-               struct in6_addr nb_addr = nbi_32->addr;
+               struct in6_nbrinfo_32 nbi_32;
+               struct in6_addr nb_addr; /* make local for safety */
 
+               bcopy(data, &nbi_32, sizeof (nbi_32));
+               nb_addr = nbi_32.addr;
                /*
                 * XXX: KAME specific hack for scoped addresses
                 *      XXXX: for other scopes than link-local?
                 */
-               if (IN6_IS_ADDR_LINKLOCAL(&nbi_32->addr) ||
-                   IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32->addr)) {
-                       u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
+               if (IN6_IS_ADDR_LINKLOCAL(&nbi_32.addr) ||
+                   IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32.addr)) {
+                       u_int16_t *idp =
+                           (u_int16_t *)(void *)&nb_addr.s6_addr[2];
 
                        if (*idp == 0)
                                *idp = htons(ifp->if_index);
@@ -2372,28 +2552,31 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                }
                RT_LOCK_ASSERT_HELD(rt);
                ln = rt->rt_llinfo;
-               nbi_32->state = ln->ln_state;
-               nbi_32->asked = ln->ln_asked;
-               nbi_32->isrouter = ln->ln_router;
-               nbi_32->expire = ln->ln_expire;
+               nbi_32.state = ln->ln_state;
+               nbi_32.asked = ln->ln_asked;
+               nbi_32.isrouter = ln->ln_router;
+               nbi_32.expire = ln->ln_expire;
                RT_REMREF_LOCKED(rt);
                RT_UNLOCK(rt);
+               bcopy(&nbi_32, data, sizeof (nbi_32));
                break;
        }
 
-       case SIOCGNBRINFO_IN6_64: {
+       case SIOCGNBRINFO_IN6_64: {     /* struct in6_nbrinfo_64 */
                struct llinfo_nd6 *ln;
-               struct in6_nbrinfo_64 *nbi_64 = (struct in6_nbrinfo_64 *)data;
-               /* make local for safety */
-               struct in6_addr nb_addr = nbi_64->addr;
+               struct in6_nbrinfo_64 nbi_64;
+               struct in6_addr nb_addr; /* make local for safety */
 
+               bcopy(data, &nbi_64, sizeof (nbi_64));
+               nb_addr = nbi_64.addr;
                /*
                 * XXX: KAME specific hack for scoped addresses
                 *      XXXX: for other scopes than link-local?
                 */
-               if (IN6_IS_ADDR_LINKLOCAL(&nbi_64->addr) ||
-                   IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64->addr)) {
-                       u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
+               if (IN6_IS_ADDR_LINKLOCAL(&nbi_64.addr) ||
+                   IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64.addr)) {
+                       u_int16_t *idp =
+                           (u_int16_t *)(void *)&nb_addr.s6_addr[2];
 
                        if (*idp == 0)
                                *idp = htons(ifp->if_index);
@@ -2406,34 +2589,50 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                }
                RT_LOCK_ASSERT_HELD(rt);
                ln = rt->rt_llinfo;
-               nbi_64->state = ln->ln_state;
-               nbi_64->asked = ln->ln_asked;
-               nbi_64->isrouter = ln->ln_router;
-               nbi_64->expire = ln->ln_expire;
+               nbi_64.state = ln->ln_state;
+               nbi_64.asked = ln->ln_asked;
+               nbi_64.isrouter = ln->ln_router;
+               nbi_64.expire = ln->ln_expire;
                RT_REMREF_LOCKED(rt);
                RT_UNLOCK(rt);
+               bcopy(&nbi_64, data, sizeof (nbi_64));
                break;
        }
 
-       case SIOCGDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */
-       case SIOCGDEFIFACE_IN6_64: {
-               struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data;
-               struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data;
+       case SIOCGDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
+       case SIOCGDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
+               struct in6_ndifreq_64 *ndif_64 =
+                   (struct in6_ndifreq_64 *)(void *)data;
+               struct in6_ndifreq_32 *ndif_32 =
+                   (struct in6_ndifreq_32 *)(void *)data;
 
-               if (cmd == SIOCGDEFIFACE_IN6_64)
-                       ndif_64->ifindex = nd6_defifindex;
-               else
-                       ndif_32->ifindex = nd6_defifindex;
+               if (cmd == SIOCGDEFIFACE_IN6_64) {
+                       u_int64_t j = nd6_defifindex;
+                       bcopy(&j, &ndif_64->ifindex, sizeof (j));
+               } else {
+                       bcopy(&nd6_defifindex, &ndif_32->ifindex,
+                           sizeof (u_int32_t));
+               }
                break;
        }
 
-       case SIOCSDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */
-       case SIOCSDEFIFACE_IN6_64: {
-               struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data;
-               struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data;
+       case SIOCSDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
+       case SIOCSDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
+               struct in6_ndifreq_64 *ndif_64 =
+                   (struct in6_ndifreq_64 *)(void *)data;
+               struct in6_ndifreq_32 *ndif_32 =
+                   (struct in6_ndifreq_32 *)(void *)data;
+               u_int32_t idx;
 
-               error = nd6_setdefaultiface(cmd == SIOCSDEFIFACE_IN6_64 ?
-                   ndif_64->ifindex : ndif_32->ifindex);
+               if (cmd == SIOCSDEFIFACE_IN6_64) {
+                       u_int64_t j;
+                       bcopy(&ndif_64->ifindex, &j, sizeof (j));
+                       idx = (u_int32_t)j;
+               } else {
+                       bcopy(&ndif_32->ifindex, &idx, sizeof (idx));
+               }
+
+               error = nd6_setdefaultiface(idx);
                return (error);
                /* NOTREACHED */
        }
@@ -2592,7 +2791,7 @@ fail:
                                 * set the 2nd argument as the 1st one.
                                 */
                                RT_UNLOCK(rt);
-                               nd6_output(ifp, ifp, m, &sin6, rt);
+                               nd6_output(ifp, ifp, m, &sin6, rt, NULL);
                                RT_LOCK(rt);
                        }
                } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
@@ -2672,17 +2871,12 @@ fail:
         * created, it might affect the selection policy.
         * Question: can we restrict the first condition to the "is_newentry"
         * case?
-        * XXX: when we hear an RA from a new router with the link-layer
-        * address option, defrouter_select() is called twice, since
-        * defrtrlist_update called the function as well.  However, I believe
-        * we can compromise the overhead, since it only happens the first
-        * time.
-        * XXX: although defrouter_select() should not have a bad effect
-        * for those are not autoconfigured hosts, we explicitly avoid such
-        * cases for safety.
+        *
+        * Note: Perform default router selection even when we are a router,
+        * if Scoped Routing is enabled.
         */
-       if (do_update && ln->ln_router && !ip6_forwarding &&
-           (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
+       if (do_update && ln->ln_router &&
+           (ip6_doscopedroute || !ip6_forwarding)) {
                RT_REMREF_LOCKED(rt);
                RT_UNLOCK(rt);
                lck_mtx_lock(nd6_mutex);
@@ -2706,6 +2900,9 @@ nd6_slowtimo(
                if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
                        break;
                nd6if = &nd_ifinfo[i];
+               if (!nd6if->initialized)
+                       break;
+               lck_mtx_lock(&nd6if->lock);
                if (nd6if->basereachable && /* already initialized */
                    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
                        /*
@@ -2717,6 +2914,7 @@ nd6_slowtimo(
                        nd6if->recalctm = nd6_recalc_reachtm_interval;
                        nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
                }
+               lck_mtx_unlock(&nd6if->lock);
        }
        lck_rw_done(nd_if_rwlock);
        timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz);
@@ -2725,7 +2923,7 @@ nd6_slowtimo(
 #define senderr(e) { error = (e); goto bad;}
 int
 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
-    struct sockaddr_in6 *dst, struct rtentry *hint0)
+    struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
 {
        struct mbuf *m = m0;
        struct rtentry *rt = hint0, *hint = hint0;
@@ -2733,6 +2931,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
        int error = 0;
        struct timeval timenow;
        struct rtentry *rtrele = NULL;
+       struct nd_ifinfo *ndi;
 
        if (rt != NULL) {
                RT_LOCK_SPIN(rt);
@@ -2758,7 +2957,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
         * route with a reference held for that placeholder.
         *
         * This logic is similar to, though not exactly the same as the one
-        * used by arp_route_to_gateway_route().
+        * used by route_to_gwroute().
         */
        if (rt != NULL) {
                /*
@@ -2777,7 +2976,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
                                        /* XXX: loop care? */
                                        RT_UNLOCK(rt);
                                        error = nd6_output(ifp, origifp, m0,
-                                           dst, rt);
+                                           dst, rt, adv);
                                        rtfree(rt);
                                        return (error);
                                }
@@ -2821,7 +3020,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
                        }
 
                        RT_LOCK_SPIN(rt);
-                       gw6 = *((struct sockaddr_in6 *)rt->rt_gateway);
+                       gw6 = *((struct sockaddr_in6 *)(void *)rt->rt_gateway);
 
                        /* If hint is now down, give up */
                        if (!(rt->rt_flags & RTF_UP)) {
@@ -2844,15 +3043,15 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
                         */
                        RT_LOCK_SPIN(gwrt);
                        if (!(gwrt->rt_flags & RTF_UP)) {
-                               struct rtentry *ogwrt;
-
                                rt->rt_gwroute = NULL;
                                RT_UNLOCK(gwrt);
                                RT_UNLOCK(rt);
                                rtfree(gwrt);
 lookup:
-                               gwrt = rtalloc1_scoped((struct sockaddr *)&gw6,
-                                   1, 0, ifp->if_index);
+                               lck_mtx_lock(rnh_lock);
+                               gwrt = rtalloc1_scoped_locked(
+                                   (struct sockaddr *)&gw6, 1, 0,
+                                   ifp->if_index);
 
                                RT_LOCK(rt);
                                /*
@@ -2869,57 +3068,68 @@ lookup:
                                        }
                                        RT_UNLOCK(rt);
                                        if (gwrt != NULL)
-                                               rtfree(gwrt);
+                                               rtfree_locked(gwrt);
+                                       lck_mtx_unlock(rnh_lock);
                                        senderr(EHOSTUNREACH);
                                }
-
-                               /* Remove any existing gwrt */
-                               ogwrt = rt->rt_gwroute;
-                               if ((rt->rt_gwroute = gwrt) != NULL)
-                                       RT_ADDREF(gwrt);
-
+                               VERIFY(gwrt != NULL);
+                               /*
+                                * Set gateway route; callee adds ref to gwrt;
+                                * gwrt has an extra ref from rtalloc1() for
+                                * this routine.
+                                */
+                               rt_set_gwroute(rt, rt_key(rt), gwrt);
                                RT_UNLOCK(rt);
-                               /* Now free the replaced gwrt */
-                               if (ogwrt != NULL)
-                                       rtfree(ogwrt);
-                               /* If still no route to gateway, bail out */
-                               if (gwrt == NULL)
-                                       senderr(EHOSTUNREACH);
+                               lck_mtx_unlock(rnh_lock);
                                /* Remember to release/free "rt" at the end */
                                rtrele = rt;
                                rt = gwrt;
-                               RT_LOCK_SPIN(rt);
-                               /* If gwrt is now down, give up */
-                               if (!(rt->rt_flags & RTF_UP)) {
-                                       RT_UNLOCK(rt);
-                                       rtfree(rt);
-                                       rt = NULL;
-                                       /* "rtrele" == original "rt" */
-                                       senderr(EHOSTUNREACH);
-                               }
                        } else {
                                RT_ADDREF_LOCKED(gwrt);
                                RT_UNLOCK(gwrt);
                                RT_UNLOCK(rt);
-                               RT_LOCK_SPIN(gwrt);
-                               /* If gwrt is now down, give up */
-                               if (!(gwrt->rt_flags & RTF_UP)) {
-                                       RT_UNLOCK(gwrt);
-                                       rtfree(gwrt);
-                                       senderr(EHOSTUNREACH);
-                               }
                                /* Remember to release/free "rt" at the end */
                                rtrele = rt;
                                rt = gwrt;
                        }
+                       VERIFY(rt == gwrt);
+
+                       /*
+                        * This is an opportunity to revalidate the parent
+                        * route's gwroute, in case it now points to a dead
+                        * route entry.  Parent route won't go away since the
+                        * clone (hint) holds a reference to it.  rt == gwrt.
+                        */
+                       RT_LOCK_SPIN(hint);
+                       if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
+                           (RTF_WASCLONED | RTF_UP)) {
+                               struct rtentry *prt = hint->rt_parent;
+                               VERIFY(prt != NULL);
+
+                               RT_CONVERT_LOCK(hint);
+                               RT_ADDREF(prt);
+                               RT_UNLOCK(hint);
+                               rt_revalidate_gwroute(prt, rt);
+                               RT_REMREF(prt);
+                       } else {
+                               RT_UNLOCK(hint);
+                       }
+
+                       RT_LOCK_SPIN(rt);
+                       /* rt == gwrt; if it is now down, give up */
+                       if (!(rt->rt_flags & RTF_UP)) {
+                               RT_UNLOCK(rt);
+                               rtfree(rt);
+                               rt = NULL;
+                               /* "rtrele" == original "rt" */
+                               senderr(EHOSTUNREACH);
+                       }
                }
+
                /* Become a regular mutex */
                RT_CONVERT_LOCK(rt);
        }
 
-       if (rt != NULL)
-               RT_LOCK_ASSERT_HELD(rt);
-
        /*
         * Address resolution or Neighbor Unreachability Detection
         * for the next hop.
@@ -2970,8 +3180,12 @@ lookup:
                if (rt != NULL)
                        RT_UNLOCK(rt);
                lck_rw_lock_shared(nd_if_rwlock);
+               ndi = ND_IFINFO(ifp);
+               VERIFY(ndi != NULL && ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
-                   !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
+                   !(ndi->flags & ND6_IFF_PERFORMNUD)) {
+                       lck_mtx_unlock(&ndi->lock);
                        lck_rw_done(nd_if_rwlock);
                        log(LOG_DEBUG,
                            "nd6_output: can't allocate llinfo for %s "
@@ -2979,6 +3193,7 @@ lookup:
                            ip6_sprintf(&dst->sin6_addr), ln, rt);
                        senderr(EIO);   /* XXX: good error? */
                }
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
 
                goto sendpkt;   /* send anyway */
@@ -3047,12 +3262,18 @@ lookup:
            ln->ln_expire < timenow.tv_sec) {
                ln->ln_asked++;
                lck_rw_lock_shared(nd_if_rwlock);
-               ln->ln_expire = timenow.tv_sec +
-                       nd_ifinfo[ifp->if_index].retrans / 1000;
+               ndi = ND_IFINFO(ifp);
+               VERIFY(ndi != NULL && ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+               ln->ln_expire = timenow.tv_sec + ndi->retrans / 1000;
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
                RT_UNLOCK(rt);
                /* We still have a reference on rt (for ln) */
-               nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
+               if (ip6_forwarding)
+                       nd6_prproxy_ns_output(ifp, NULL, &dst->sin6_addr, ln);
+               else
+                       nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
        } else {
                RT_UNLOCK(rt);
        }
@@ -3087,7 +3308,10 @@ sendpkt:
 
        /* discard the packet if IPv6 operation is disabled on the interface */
        lck_rw_lock_shared(nd_if_rwlock);
-       if ((nd_ifinfo[ifp->if_index].flags & ND6_IFF_IFDISABLED)) {
+       ndi = ND_IFINFO(ifp);
+       VERIFY(ndi != NULL && ndi->initialized);
+       /* test is done here without holding ndi lock, for performance */
+       if (ndi->flags & ND6_IFF_IFDISABLED) {
                lck_rw_done(nd_if_rwlock);
                error = ENETDOWN; /* better error? */
                goto bad;
@@ -3098,7 +3322,7 @@ sendpkt:
                /* forwarding rules require the original scope_id */
                m->m_pkthdr.rcvif = origifp;
                error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt,
-                   (struct sockaddr *)dst, 0);
+                   (struct sockaddr *)dst, 0, adv);
                goto release;
        } else {
                /* Do not allow loopback address to wind up on a wire */
@@ -3133,7 +3357,7 @@ sendpkt:
 
        m->m_pkthdr.rcvif = NULL;
        error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt,
-           (struct sockaddr *)dst, 0);
+           (struct sockaddr *)dst, 0, adv);
        goto release;
 
 bad:
@@ -3287,8 +3511,8 @@ nd6_lookup_ipv6(ifnet_t    ifp, const struct sockaddr_in6 *ip6_dest,
                 * Callee holds a reference on the route and returns
                 * with the route entry locked, upon success.
                 */
-               result = arp_route_to_gateway_route(
-                   (const struct sockaddr*)ip6_dest, hint, &route);
+               result = route_to_gwroute((const struct sockaddr *)ip6_dest,
+                   hint, &route);
                if (result != 0)
                        return (result);
                if (route != NULL)
@@ -3309,7 +3533,7 @@ nd6_lookup_ipv6(ifnet_t    ifp, const struct sockaddr_in6 *ip6_dest,
        if (route == NULL) {
                /*
                 * This could happen, if we could not allocate memory or
-                * if arp_route_to_gateway_route() didn't return a route.
+                * if route_to_gwroute() didn't return a route.
                 */
                result = ENOBUFS;
                goto release;
@@ -3345,6 +3569,21 @@ release:
        return (result);
 }
 
+int
+nd6_setifinfo(struct ifnet *ifp, u_int32_t before, u_int32_t after)
+{
+       /*
+        * We only care about ND6_IFF_PROXY_PREFIXES for now.
+        */
+       before &= ND6_IFF_PROXY_PREFIXES;
+       after &= ND6_IFF_PROXY_PREFIXES;
+
+       if (before == after)
+               return (0);
+
+       return (nd6_if_prproxy(ifp, ((int32_t)(after - before) > 0)));
+}
+
 SYSCTL_DECL(_net_inet6_icmp6);
 
 static int
@@ -3366,8 +3605,9 @@ nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
                for (dr = TAILQ_FIRST(&nd_defrouter);
                     dr;
                     dr = TAILQ_NEXT(dr, dr_entry)) {
-                       d = (struct in6_defrouter_64 *)buf;
-                       de = (struct in6_defrouter_64 *)(buf + sizeof (buf));
+                       d = (struct in6_defrouter_64 *)(void *)buf;
+                       de = (struct in6_defrouter_64 *)
+                           (void *)(buf + sizeof (buf));
 
                        if (d + 1 <= de) {
                                bzero(d, sizeof (*d));
@@ -3398,8 +3638,9 @@ nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
                for (dr = TAILQ_FIRST(&nd_defrouter);
                     dr;
                     dr = TAILQ_NEXT(dr, dr_entry)) {
-                       d_32 = (struct in6_defrouter_32 *)buf;
-                       de_32 = (struct in6_defrouter_32 *)(buf + sizeof (buf));
+                       d_32 = (struct in6_defrouter_32 *)(void *)buf;
+                       de_32 = (struct in6_defrouter_32 *)
+                           (void *)(buf + sizeof (buf));
 
                        if (d_32 + 1 <= de_32) {
                                bzero(d_32, sizeof (*d_32));
@@ -3451,8 +3692,9 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
                        struct sockaddr_in6 *sin6, *s6;
                        struct nd_pfxrouter *pfr;
 
-                       p = (struct in6_prefix_64 *)buf;
-                       pe = (struct in6_prefix_64 *)(buf + sizeof (buf));
+                       p = (struct in6_prefix_64 *)(void *)buf;
+                       pe = (struct in6_prefix_64 *)
+                           (void *)(buf + sizeof (buf));
 
                        if (p + 1 <= pe) {
                                bzero(p, sizeof (*p));
@@ -3515,8 +3757,9 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
                        struct sockaddr_in6 *sin6, *s6;
                        struct nd_pfxrouter *pfr;
 
-                       p_32 = (struct in6_prefix_32 *)buf;
-                       pe_32 = (struct in6_prefix_32 *)(buf + sizeof (buf));
+                       p_32 = (struct in6_prefix_32 *)(void *)buf;
+                       pe_32 = (struct in6_prefix_32 *)
+                           (void *)(buf + sizeof (buf));
 
                        if (p_32 + 1 <= pe_32) {
                                bzero(p_32, sizeof (*p_32));
index 601e075aaddf2e5b795a396e60e877b2b0a1c7cf..a831fb5eef86a8ad6d8f84e4b38be462b5e331f0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,7 +70,9 @@
 #include <sys/queue.h>
 
 #ifdef XNU_KERNEL_PRIVATE
+#include <net/flowadv.h>
 #include <kern/locks.h>
+#include <sys/tree.h>
 
 struct llinfo_nd6 {
        /*
@@ -117,16 +119,17 @@ struct    llinfo_nd6 {
 #ifdef XNU_KERNEL_PRIVATE
 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE)
 #define ND6_LLINFO_PERMANENT(n) (((n)->ln_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE))
-#define ND6_IFF_PERFORMNUD     0x1
-#define ND6_IFF_ACCEPT_RTADV   0x2 /* APPLE: not used. Innterface specific router advertisments are
-                                    * handled with a specific ifnet flag: IFEF_ACCEPT_RTADVD
-                                    */
-#define ND6_IFF_PREFER_SOURCE  0x4 /* APPLE: NOT USED not related to ND. */
-#define ND6_IFF_IFDISABLED     0x8 /* IPv6 operation is disabled due to
-                                    * DAD failure.  (XXX: not ND-specific)
-                                    */
-#define ND6_IFF_DONT_SET_IFROUTE       0x10 /* NOT USED */
 
+#define ND6_EUI64_GBIT 0x01
+#define ND6_EUI64_UBIT 0x02
+
+#define ND6_EUI64_TO_IFID(in6)         do {(in6)->s6_addr[8] ^= ND6_EUI64_UBIT; } while (0)
+#define ND6_EUI64_GROUP(in6)           ((in6)->s6_addr[8] & ND6_EUI64_GBIT)
+#define ND6_EUI64_INDIVIDUAL(in6)      (!ND6_EUI64_GROUP(in6))
+#define ND6_EUI64_LOCAL(in6)           ((in6)->s6_addr[8] & ND6_EUI64_UBIT)
+#define ND6_EUI64_UNIVERSAL(in6)       (!ND6_EUI64_LOCAL(in6))
+#define ND6_IFID_LOCAL(in6)            (!ND6_EUI64_LOCAL(in6))
+#define ND6_IFID_UNIVERSAL(in6)                (!ND6_EUI64_UNIVERSAL(in6))
 #endif /* XNU_KERNEL_PRIVATE */
 
 #if !defined(XNU_KERNEL_PRIVATE)
@@ -145,13 +148,15 @@ struct nd_ifinfo_compat {
        u_int8_t chlim;                 /* CurHopLimit */
        u_int8_t receivedra;
        /* the following 3 members are for privacy extension for addrconf */
-       u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
+       u_int8_t randomseed0[8]; /* upper 64 bits of SHA1 digest */
        u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
        u_int8_t randomid[8];   /* current random ID */
 };
 
 #if defined(XNU_KERNEL_PRIVATE)
 struct nd_ifinfo {
+       decl_lck_mtx_data(, lock);
+       boolean_t initialized; /* Flag to see the entry is initialized */
        u_int32_t linkmtu;              /* LinkMTU */
        u_int32_t maxmtu;               /* Upper bound of LinkMTU */
        u_int32_t basereachable;        /* BaseReachableTime */
@@ -160,9 +165,9 @@ struct nd_ifinfo {
        u_int32_t flags;                /* Flags */
        int recalctm;                   /* BaseReacable re-calculation timer */
        u_int8_t chlim;                 /* CurHopLimit */
-       u_int8_t initialized; /* Flag to see the entry is initialized */
+       u_int8_t _pad[3];
        /* the following 3 members are for privacy extension for addrconf */
-       u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
+       u_int8_t randomseed0[8]; /* upper 64 bits of SHA1 digest */
        u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
        u_int8_t randomid[8];   /* current random ID */
        /* keep track of routers and prefixes on this link */
@@ -171,7 +176,20 @@ struct nd_ifinfo {
 };
 #endif /* XNU_KERNEL_PRIVATE */
 
-#define ND6_IFF_PERFORMNUD     0x1
+#define ND6_IFF_PERFORMNUD             0x1
+#if defined(PRIVATE)
+#define ND6_IFF_ACCEPT_RTADV           0x2 /* APPLE: not used. Innterface specific router
+                                            * advertisments are handled with a specific ifnet
+                                            * flag: IFEF_ACCEPT_RTADVD
+                                            */
+#define ND6_IFF_PREFER_SOURCE          0x4 /* APPLE: NOT USED not related to ND. */
+#define ND6_IFF_IFDISABLED             0x8 /* IPv6 operation is disabled due to
+                                            * DAD failure.  (XXX: not ND-specific)
+                                            */
+#define ND6_IFF_DONT_SET_IFROUTE       0x10 /* NOT USED */
+#endif /* PRIVATE */
+#define ND6_IFF_PROXY_PREFIXES         0x20
+#define ND6_IFF_IGNORE_NA              0x40
 
 struct in6_nbrinfo {
        char ifname[IFNAMSIZ];  /* if name, e.g. "en0" */
@@ -425,8 +443,9 @@ struct      in6_ndifreq_64 {
 /* Prefix status */
 #define NDPRF_ONLINK           0x1
 #define NDPRF_DETACHED         0x2
-#define        NDPRF_STATIC            0x100
-#define        NDPRF_IFSCOPE           0x1000
+#define NDPRF_STATIC           0x100
+#define NDPRF_IFSCOPE          0x1000
+#define NDPRF_PRPROXY          0x2000
 #ifdef XNU_KERNEL_PRIVATE
 #define        NDPRF_PROCESSED         0x08000
 #endif
@@ -451,7 +470,9 @@ __private_extern__ lck_rw_t *nd_if_rwlock;
 /*
  * In a more readable form, we derive linkmtu based on:
  *
- * if (ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < ifp->if_mtu)
+ * if (ND_IFINFO(ifp) == NULL || !ND_IFINFO(ifp)->initialized)
+ *         linkmtu = ifp->if_mtu;
+ * else if (ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < ifp->if_mtu)
  *         linkmtu = ND_IFINFO(ifp)->linkmtu;
  * else if ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < ifp->if_mtu))
  *         linkmtu = ND_IFINFO(ifp)->maxmtu;
@@ -459,8 +480,8 @@ __private_extern__ lck_rw_t *nd_if_rwlock;
  *         linkmtu = ifp->if_mtu;
  */
 #define IN6_LINKMTU(ifp)                                                     \
-       (ND_IFINFO(ifp) == NULL ? (ifp)->if_mtu :                             \
-       ((ND_IFINFO(ifp)->linkmtu &&                                          \
+       ((ND_IFINFO(ifp) == NULL || !ND_IFINFO(ifp)->initialized) ?           \
+       (ifp)->if_mtu : ((ND_IFINFO(ifp)->linkmtu &&                          \
        ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) ? ND_IFINFO(ifp)->linkmtu :  \
        ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) ? \
        ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu)))
@@ -530,11 +551,15 @@ struct    nd_defrouter {
 #define        NDDR_REMREF_LOCKED(_nddr)                                       \
        nddr_remref(_nddr, 1)
 
+/* define struct prproxy_sols_tree */
+RB_HEAD(prproxy_sols_tree, nd6_prproxy_soltgt);
+
 struct nd_prefix {
        decl_lck_mtx_data(, ndpr_lock);
        u_int32_t       ndpr_refcount;  /* reference count */
        u_int32_t       ndpr_debug;     /* see ifa_debug flags */
        struct ifnet *ndpr_ifp;
+       struct rtentry *ndpr_rt;
        LIST_ENTRY(nd_prefix) ndpr_entry;
        struct sockaddr_in6 ndpr_prefix;        /* prefix */
        struct in6_addr ndpr_mask; /* netmask derived from the prefix */
@@ -550,6 +575,9 @@ struct nd_prefix {
        LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
        u_char  ndpr_plen;
        int     ndpr_addrcnt;   /* reference counter from addresses */
+       u_int32_t ndpr_allmulti_cnt;            /* total all-multi reqs */
+       u_int32_t ndpr_prproxy_sols_cnt;        /* total # of proxied NS */
+       struct prproxy_sols_tree ndpr_prproxy_sols; /* tree of proxied NS */
        void (*ndpr_trace)              /* callback fn for tracing refs */
            (struct nd_prefix *, int);
 };
@@ -636,6 +664,51 @@ struct nd_pfxrouter {
 
 LIST_HEAD(nd_prhead, nd_prefix);
 
+struct nd_prefix_list {
+       struct nd_prefix_list *next;
+       struct nd_prefix pr;
+};
+#endif /* XNU_KERNEL_PRIVATE */
+
+#if defined(PRIVATE)
+/* ND6 kernel event subclass value */
+#define KEV_ND6_SUBCLASS               7
+/* ND6 kernel event action type */
+#define KEV_ND6_RA                     1
+/* ND6 RA L2 source address length */
+#define ND6_ROUTER_LL_SIZE             64
+
+struct nd6_ra_prefix {
+       struct sockaddr_in6 prefix;
+       struct prf_ra raflags;
+       u_int32_t prefixlen;
+       u_int32_t origin;
+       u_int64_t vltime;
+       u_int64_t pltime;
+       u_int64_t expire;
+       u_int32_t flags;
+       u_int32_t refcnt;
+       u_int32_t if_index;
+       u_int32_t pad;
+};
+
+/* ND6 router advertisement valid bits */
+#define KEV_ND6_DATA_VALID_MTU         (0x1 << 0)
+#define KEV_ND6_DATA_VALID_PREFIX      (0x1 << 1)
+
+struct kev_nd6_ra_data {
+       u_int8_t lladdr[ND6_ROUTER_LL_SIZE];
+       u_int32_t lladdrlen;
+       u_int32_t mtu;
+       u_int32_t list_index;
+       u_int32_t list_length;
+       u_int32_t flags;
+       struct nd6_ra_prefix prefix;
+       u_int32_t pad;
+};
+#endif /* PRIVATE */
+
+#if defined(XNU_KERNEL_PRIVATE)
 /* nd6.c */
 extern int nd6_prune;
 extern int nd6_delay;
@@ -652,10 +725,16 @@ extern struct nd_prhead nd_prefix;
 extern int nd6_debug;
 extern size_t nd_ifinfo_indexlim;
 extern int nd6_onlink_ns_rfc4861;
+extern int nd6_optimistic_dad;
 
 #define nd6log(x)      do { if (nd6_debug >= 1) log x; } while (0)
 #define nd6log2(x)     do { if (nd6_debug >= 2) log x; } while (0)
 
+#define ND6_OPTIMISTIC_DAD_LINKLOCAL   (1 << 0)
+#define ND6_OPTIMISTIC_DAD_AUTOCONF    (1 << 1)
+#define ND6_OPTIMISTIC_DAD_TEMPORARY   (1 << 2)
+#define ND6_OPTIMISTIC_DAD_DYNAMIC     (1 << 3)
+
 /* nd6_rtr.c */
 extern int nd6_defifindex;
 extern int ip6_desync_factor;  /* seconds */
@@ -710,11 +789,13 @@ extern int nd6_ioctl(u_long, caddr_t, struct ifnet *);
 extern void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
     char *, int, int, int);
 extern int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
-    struct sockaddr_in6 *, struct rtentry *);
+    struct sockaddr_in6 *, struct rtentry *, struct flowadv *);
 extern int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *,
     struct sockaddr *, u_char *);
 extern int nd6_need_cache(struct ifnet *);
 extern void nd6_drain(void *);
+extern void nd6_post_msg(u_int32_t, struct nd_prefix_list *, u_int32_t, u_int32_t, char *, u_int32_t);
+extern int nd6_setifinfo(struct ifnet *, u_int32_t, u_int32_t);
 
 /* nd6_nbr.c */
 extern void nd6_nbr_init(void);
@@ -732,14 +813,17 @@ extern void nd6_llreach_alloc(struct rtentry *, struct ifnet *, void *,
     unsigned int, boolean_t);
 extern void nd6_llreach_set_reachable(struct ifnet *, void *, unsigned int);
 extern void nd6_llreach_use(struct llinfo_nd6 *);
+extern void nd6_alt_node_addr_decompose(struct ifnet *, struct sockaddr *,
+    struct sockaddr_dl *, struct sockaddr_in6 *);
+extern void nd6_alt_node_present(struct ifnet *, struct sockaddr_in6 *,
+    struct sockaddr_dl *, int32_t, int, int);
+extern void nd6_alt_node_absent(struct ifnet *, struct sockaddr_in6 *);
 
 /* nd6_rtr.c */
 extern void nd6_rtr_init(void);
 extern void nd6_rs_input(struct mbuf *, int, int);
 extern void nd6_ra_input(struct mbuf *, int, int);
 extern void prelist_del(struct nd_prefix *);
-extern void defrouter_addreq(struct nd_defrouter *, boolean_t);
-extern void defrouter_delreq(struct nd_defrouter *);
 extern void defrouter_select(struct ifnet *);
 extern void defrouter_reset(void);
 extern int defrtrlist_ioctl(u_long, caddr_t);
@@ -765,6 +849,24 @@ extern void nddr_addref(struct nd_defrouter *, int);
 extern struct nd_defrouter *nddr_remref(struct nd_defrouter *, int);
 extern void ndpr_addref(struct nd_prefix *, int);
 extern struct nd_prefix *ndpr_remref(struct nd_prefix *, int);
+
+/* nd6_prproxy.c */
+struct ip6_hdr;
+extern u_int32_t nd6_prproxy;
+extern void nd6_prproxy_init(void);
+extern int nd6_if_prproxy(struct ifnet *, boolean_t);
+extern void nd6_prproxy_prelist_update(struct nd_prefix *, struct nd_prefix *);
+extern boolean_t nd6_prproxy_ifaddr(struct in6_ifaddr *);
+extern boolean_t nd6_prproxy_isours(struct mbuf *, struct ip6_hdr *,
+    struct route_in6 *, unsigned int);
+extern void nd6_prproxy_ns_output(struct ifnet *, struct in6_addr *,
+    struct in6_addr *, struct llinfo_nd6 *);
+extern void nd6_prproxy_ns_input(struct ifnet *, struct in6_addr *,
+    char *, int, struct in6_addr *, struct in6_addr *);
+extern void nd6_prproxy_na_input(struct ifnet *, struct in6_addr *,
+    struct in6_addr *, struct in6_addr *, int);
+extern void nd6_prproxy_sols_reap(struct nd_prefix *);
+extern void nd6_prproxy_sols_prune(struct nd_prefix *, u_int32_t);
 #endif /* XNU_KERNEL_PRIVATE */
 
 #ifdef KERNEL
index b2abd816960fd4f336e5efcdca3a843df8288d29..be58f8dac3ba2392cee37d897192d110c5019097 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -101,8 +101,6 @@ extern int ipsec_bypass;
 
 #include <net/net_osdep.h>
 
-#define SDL(s) ((struct sockaddr_dl *)s)
-
 struct dadq;
 static struct dadq *nd6_dad_find(struct ifaddr *);
 void nd6_dad_stoptimer(struct ifaddr *);
@@ -159,14 +157,14 @@ extern int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
 
 static int nd6_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */
 
+static struct sockaddr_in6 hostrtmask;
+
 SYSCTL_DECL(_net_inet6_icmp6);
 
 SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_llreach_base,
     CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_llreach_base, LL_BASE_REACHABLE,
     "default ND6 link-layer reachability max lifetime (in seconds)");
 
-#define SIN6(s)        ((struct sockaddr_in6 *)s)
-
 /*
  * Obtain a link-layer source cache entry for the sender.
  *
@@ -263,10 +261,14 @@ nd6_ns_input(
        char *lladdr = NULL;
        struct ifaddr *ifa = NULL;
        int lladdrlen = 0;
-       int anycast = 0, proxy = 0, tentative = 0;
+       int anycast = 0, proxy = 0, dadprogress = 0;
        int tlladdr;
        union nd_opts ndopts;
        struct sockaddr_dl proxydl;
+       boolean_t advrouter;
+
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
 #ifndef PULLDOWN_TEST
        IP6_EXTHDR_CHECK(m, off, icmp6len, return);
@@ -283,7 +285,7 @@ nd6_ns_input(
        if (in6_setscope(&taddr6, ifp, NULL) != 0)
                goto bad;
 
-       if (ip6->ip6_hlim != 255) {
+       if (ip6->ip6_hlim != IPV6_MAXHLIM) {
                nd6log((LOG_ERR,
                    "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
                    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
@@ -368,7 +370,7 @@ nd6_ns_input(
         * Target address (taddr6) must be either:
         * (1) Valid unicast/anycast address for my receiving interface,
         * (2) Unicast address for which I'm offering proxy service, or
-        * (3) "tentative" address on which DAD is being performed.
+        * (3) "tentative" or "optimistic" address [DAD is in progress].
         */
        /* (1) and (3) check. */
        ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
@@ -404,18 +406,31 @@ nd6_ns_input(
                        rtfree(rt);
                }
        }
+       if (ifa == NULL && ip6_forwarding && nd6_prproxy) {
+               /*
+                * Is the target address part of the prefix that is being
+                * proxied and installed on another interface?
+                */
+               ifa = (struct ifaddr *)in6ifa_prproxyaddr(&taddr6);
+       }
        if (ifa == NULL) {
                /*
-                * We've got an NS packet, and we don't have that adddress
-                * assigned for us.  We MUST silently ignore it.
-                * See RFC2461 7.2.3.
+                * We've got an NS packet, and we don't have that address
+                * assigned for us.  We MUST silently ignore it on this
+                * interface, c.f. RFC 4861 7.2.3.
+                *
+                * Forwarding associated with NDPRF_PRPROXY may apply.
                 */
+               if (ip6_forwarding && nd6_prproxy)
+                       nd6_prproxy_ns_input(ifp, &saddr6, lladdr,
+                           lladdrlen, &daddr6, &taddr6);
                goto freeit;
        }
        IFA_LOCK(ifa);
        myaddr6 = *IFA_IN6(ifa);
        anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST;
-       tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE;
+       dadprogress =
+           ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DADPROGRESS;
        if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) {
                IFA_UNLOCK(ifa);
                goto freeit;
@@ -439,7 +454,7 @@ nd6_ns_input(
 
        /*
         * We have neighbor solicitation packet, with target address equals to
-        * one of my tentative address.
+        * one of my DAD in-progress addresses.
         *
         * src addr     how to process?
         * ---          ---
@@ -447,9 +462,9 @@ nd6_ns_input(
         * unicast      somebody is doing address resolution -> ignore
         * unspec       dup address detection
         *
-        * The processing is defined in RFC 2462.
+        * The processing is defined in RFC 2462 (and updated by RFC 4429)
         */
-       if (tentative) {
+       if (dadprogress) {
                /*
                 * If source address is unspecified address, it is for
                 * duplicate address detection.
@@ -463,11 +478,15 @@ nd6_ns_input(
                goto freeit;
        }
 
+       /* Are we an advertising router on this interface? */
+       advrouter = (ifp->if_eflags & IFEF_IPV6_ROUTER);
+
        /*
         * If the source address is unspecified address, entries must not
         * be created or updated.
-        * It looks that sender is performing DAD.  Output NA toward
-        * all-node multicast address, to tell the sender that I'm using
+        * It looks that sender is performing DAD.  If I'm using the address,
+        * and it's a "preferred" address, i.e. not optimistic, then output NA
+        * toward all-node multicast address, to tell the sender that I'm using
         * the address.
         * S bit ("solicited") must be zero.
         */
@@ -475,21 +494,22 @@ nd6_ns_input(
                saddr6 = in6addr_linklocal_allnodes;
                if (in6_setscope(&saddr6, ifp, NULL) != 0)
                        goto bad;
-               nd6_na_output(ifp, &saddr6, &taddr6,
-                             ((anycast || proxy || !tlladdr)
-                                     ? 0 : ND_NA_FLAG_OVERRIDE)
-                               | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0),
-                     tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL);
+               if ((dadprogress & IN6_IFF_OPTIMISTIC) == 0)
+                       nd6_na_output(ifp, &saddr6, &taddr6,
+                           ((anycast || proxy || !tlladdr) ? 0 :
+                           ND_NA_FLAG_OVERRIDE) | (advrouter ?
+                           ND_NA_FLAG_ROUTER : 0), tlladdr, proxy ?
+                           (struct sockaddr *)&proxydl : NULL);
                goto freeit;
        }
 
-       nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0);
+       nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
+           ND_NEIGHBOR_SOLICIT, 0);
 
        nd6_na_output(ifp, &saddr6, &taddr6,
-                     ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE)
-                       | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0)
-                       | ND_NA_FLAG_SOLICITED,
-                     tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL);
+           ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
+           (advrouter ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED,
+           tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL);
  freeit:
        m_freem(m);
        if (ifa != NULL)
@@ -514,6 +534,7 @@ nd6_ns_input(
  *
  * Based on RFC 2461
  * Based on RFC 2462 (duplicate address detection)
+ * Updated by RFC 4429 (optimistic duplicate address detection)
  *
  * Caller must bump up ln->ln_rt refcnt to make sure 'ln' doesn't go
  * away if there is a llinfo_nd6 passed in.
@@ -538,14 +559,17 @@ nd6_ns_output(
        int flags;
        caddr_t mac;
        struct route_in6 ro;
-       struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+       struct ip6_out_args ip6oa =
+           { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR };
+       u_int32_t rtflags = 0;
 
-       bzero(&ro, sizeof(ro));
-
-       if (IN6_IS_ADDR_MULTICAST(taddr6))
+       if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) || IN6_IS_ADDR_MULTICAST(taddr6))
                return;
 
+       bzero(&ro, sizeof(ro));
+
        ip6oa.ip6oa_boundif = ifp->if_index;
+       ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
 
        /* estimate the size of message */
        maxlen = sizeof(*ip6) + sizeof(*nd_ns);
@@ -580,7 +604,7 @@ nd6_ns_output(
                }
 
                im6o->im6o_multicast_ifp = ifp;
-               im6o->im6o_multicast_hlim = 255;
+               im6o->im6o_multicast_hlim = IPV6_MAXHLIM;
                im6o->im6o_multicast_loop = 0;
        }
 
@@ -595,7 +619,7 @@ nd6_ns_output(
        ip6->ip6_vfc |= IPV6_VERSION;
        /* ip6->ip6_plen will be set later */
        ip6->ip6_nxt = IPPROTO_ICMPV6;
-       ip6->ip6_hlim = 255;
+       ip6->ip6_hlim = IPV6_MAXHLIM;
        if (daddr6)
                ip6->ip6_dst = *daddr6;
        else {
@@ -648,17 +672,16 @@ nd6_ns_output(
                                ln->ln_llreach->lr_probes++;
                                IFLR_UNLOCK(ln->ln_llreach);
                        }
+                       rtflags = ln->ln_rt->rt_flags;
                        RT_UNLOCK(ln->ln_rt);
-
                }
                if (ia != NULL) {
                        IFA_REMREF(&ia->ia_ifa);
                        ia = NULL;
                }
-               if (hsrc != NULL && (ia = in6ifa_ifpwithaddr(ifp, hsrc))) {
+               if (hsrc != NULL && (ia = in6ifa_ifpwithaddr(ifp, hsrc)) &&
+                   (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0) {
                        src = hsrc;
-                       IFA_REMREF(&ia->ia_ifa);
-                       ia = NULL;
                } else {
                        int error;
                        struct sockaddr_in6 dst_sa;
@@ -679,6 +702,15 @@ nd6_ns_output(
                                    error));
                                goto bad;
                        }
+
+                       ia = in6ifa_ifpwithaddr(ifp, src);
+                       if (!ia || (ia->ia6_flags & IN6_IFF_OPTIMISTIC)) {
+                               nd6log((LOG_DEBUG,
+                                   "nd6_ns_output: no preferred source "
+                                   "available: dst=%s\n",
+                                   ip6_sprintf(&dst_sa.sin6_addr)));
+                               goto bad;
+                       }
                }
        } else {
                /*
@@ -690,6 +722,7 @@ nd6_ns_output(
                 */
                bzero(&src_in, sizeof(src_in));
                src = &src_in;
+               ip6oa.ip6oa_flags &= ~IP6OAF_BOUND_SRCADDR;
        }
        ip6->ip6_src = *src;
        nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
@@ -716,7 +749,7 @@ nd6_ns_output(
                struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
                /* 8 byte alignments... */
                optlen = (optlen + 7) & ~7;
-               
+
                m->m_pkthdr.len += optlen;
                m->m_len += optlen;
                icmp6len += optlen;
@@ -739,6 +772,23 @@ nd6_ns_output(
        flags = dad ? IPV6_UNSPECSRC : 0;
        flags |= IPV6_OUTARGS;
 
+       /*
+        * If this is a NS for resolving the (default) router, mark
+        * the packet accordingly so that the driver can find out,
+        * in case it needs to perform driver-specific action(s).
+        */
+       if (rtflags & RTF_ROUTER) {
+               m->m_pkthdr.aux_flags |= MAUXF_INET6_RESOLVE_RTR;
+               VERIFY(!(m->m_pkthdr.aux_flags & MAUXF_INET_RESOLVE_RTR));
+       }
+
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               /* Use control service class if the interface 
+                * supports transmit-start model
+                */
+               (void) m_set_service_class(m, MBUF_SC_CTL);
+       }
+
        ip6_output(m, NULL, NULL, flags, im6o, &outif, &ip6oa);
        if (outif) {
                icmp6_ifstat_inc(outif, ifs6_out_msg);
@@ -787,6 +837,7 @@ nd6_na_input(
        struct ifnet *ifp = m->m_pkthdr.rcvif;
        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
        struct nd_neighbor_advert *nd_na;
+       struct in6_addr saddr6 = ip6->ip6_src;
        struct in6_addr daddr6 = ip6->ip6_dst;
        struct in6_addr taddr6;
        int flags;
@@ -802,7 +853,10 @@ nd6_na_input(
        union nd_opts ndopts;
        struct timeval timenow;
 
-       if (ip6->ip6_hlim != 255) {
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
+       if (ip6->ip6_hlim != IPV6_MAXHLIM) {
                nd6log((LOG_ERR,
                    "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
                    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
@@ -862,17 +916,34 @@ nd6_na_input(
        /*
         * Target address matches one of my interface address.
         *
-        * If my address is tentative, this means that there's somebody
-        * already using the same address as mine.  This indicates DAD failure.
-        * This is defined in RFC 2462.
+        * If my address is tentative or optimistic, this means that there's
+        * somebody already using the same address as mine.  This indicates DAD
+        * failure.  This is defined in RFC 2462 and updated by RFC 4429.
         *
         * Otherwise, process as defined in RFC 2461.
         */
        if (ifa != NULL) {
                IFA_LOCK(ifa);
-               if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE) {
+               if (((struct in6_ifaddr *)ifa)->ia6_flags &
+                   IN6_IFF_DADPROGRESS) {
+                       struct nd_ifinfo *ndi;
+                       boolean_t ignorena = FALSE;
+
                        IFA_UNLOCK(ifa);
-                       nd6_dad_na_input(ifa, lladdr, lladdrlen);
+                       lck_rw_lock_shared(nd_if_rwlock);
+                       ndi = ND_IFINFO(ifp);
+                       if (ndi != NULL && ndi->initialized) {
+                               lck_mtx_lock(&ndi->lock);
+                               ignorena = ndi->flags & ND6_IFF_IGNORE_NA;
+                               lck_mtx_unlock(&ndi->lock);
+                       }
+                       lck_rw_done(nd_if_rwlock);
+                       if (ignorena)
+                               log(LOG_ERR, "%s: ignoring duplicate DAD due "
+                                   "to sleep proxy (%s)\n", __func__,
+                                   if_name(ifp));
+                       else 
+                               nd6_dad_na_input(ifa, lladdr, lladdrlen);
                        goto freeit;
                }
                IFA_UNLOCK(ifa);
@@ -894,12 +965,41 @@ nd6_na_input(
                goto bad;
        }
 
+       /* Forwarding associated with NDPRF_PRPROXY may apply. */
+       if (ip6_forwarding && nd6_prproxy)
+               nd6_prproxy_na_input(ifp, &saddr6, &daddr6, &taddr6, flags);
+
        /*
         * If no neighbor cache entry is found, NA SHOULD silently be
-        * discarded.
+        * discarded.  If we are forwarding (and Scoped Routing is in
+        * effect), try to see if there is a neighbor cache entry on
+        * another interface (in case we are doing prefix proxying.)
         */
-       if ((rt = nd6_lookup(&taddr6, 0, ifp, 0)) == NULL)
-               goto freeit;
+       if ((rt = nd6_lookup(&taddr6, 0, ifp, 0)) == NULL) {
+               if (!ip6_forwarding || !ip6_doscopedroute || !nd6_prproxy)
+                       goto freeit;
+
+               if ((rt = nd6_lookup(&taddr6, 0, NULL, 0)) == NULL)
+                       goto freeit;
+
+               RT_LOCK_ASSERT_HELD(rt);
+               if (rt->rt_ifp != ifp) {
+                       /*
+                        * Purge any link-layer info caching.
+                        */
+                       if (rt->rt_llinfo_purge != NULL)
+                               rt->rt_llinfo_purge(rt);
+
+                       /* Adjust route ref count for the interfaces */
+                       if (rt->rt_if_ref_fn != NULL) {
+                               rt->rt_if_ref_fn(ifp, 1);
+                               rt->rt_if_ref_fn(rt->rt_ifp, -1);
+                       }
+
+                       /* Change the interface when the existing route is on */
+                       rt->rt_ifp = ifp;
+               }
+       }
 
        RT_LOCK_ASSERT_HELD(rt);
        if ((ln = rt->rt_llinfo) == NULL ||
@@ -930,9 +1030,15 @@ nd6_na_input(
                        ln->ln_state = ND6_LLINFO_REACHABLE;
                        ln->ln_byhint = 0;
                        if (ln->ln_expire) {
+                               struct nd_ifinfo *ndi;
+
                                lck_rw_lock_shared(nd_if_rwlock);
+                               ndi = ND_IFINFO(rt->rt_ifp);
+                               VERIFY(ndi != NULL && ndi->initialized);
+                               lck_mtx_lock(&ndi->lock);
                                ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
-                                   nd_ifinfo[rt->rt_ifp->if_index].reachable);
+                                   ndi->reachable);
+                               lck_mtx_unlock(&ndi->lock);
                                lck_rw_done(nd_if_rwlock);
                        }
                } else {
@@ -1022,10 +1128,16 @@ nd6_na_input(
                                ln->ln_state = ND6_LLINFO_REACHABLE;
                                ln->ln_byhint = 0;
                                if (ln->ln_expire) {
+                                       struct nd_ifinfo *ndi;
+
                                        lck_rw_lock_shared(nd_if_rwlock);
+                                       ndi = ND_IFINFO(ifp);
+                                       VERIFY(ndi != NULL && ndi->initialized);
+                                       lck_mtx_lock(&ndi->lock);
                                        ln->ln_expire =
                                            rt_expiry(rt, timenow.tv_sec,
-                                           nd_ifinfo[ifp->if_index].reachable);
+                                               ndi->reachable);
+                                       lck_mtx_unlock(&ndi->lock);
                                        lck_rw_done(nd_if_rwlock);
                                }
                        } else {
@@ -1047,14 +1159,9 @@ nd6_na_input(
                        struct in6_addr *in6;
                        struct ifnet *rt_ifp = rt->rt_ifp;
 
-                       in6 = &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
+                       in6 = &((struct sockaddr_in6 *)
+                           (void *)rt_key(rt))->sin6_addr;
 
-                       /*
-                        * Lock to protect the default router list.
-                        * XXX: this might be unnecessary, since this function
-                        * is only called under the network software interrupt
-                        * context.  However, we keep it just for safety.
-                        */
                        RT_UNLOCK(rt);
                        lck_mtx_lock(nd6_mutex);
                        dr = defrouter_lookup(in6, rt_ifp);
@@ -1062,16 +1169,15 @@ nd6_na_input(
                                defrtrlist_del(dr);
                                NDDR_REMREF(dr);
                                lck_mtx_unlock(nd6_mutex);
-                       }
-                       else {
+                       } else {
                                lck_mtx_unlock(nd6_mutex);
-                               if (!ip6_forwarding && (ip6_accept_rtadv || (rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
+                               if (ip6_doscopedroute || !ip6_forwarding) {
                                        /*
-                                        * Even if the neighbor is not in the default
-                                        * router list, the neighbor may be used
-                                        * as a next hop for some destinations
-                                        * (e.g. redirect case). So we must
-                                        * call rt6_flush explicitly.
+                                        * Even if the neighbor is not in the
+                                        * default router list, the neighbor
+                                        * may be used as a next hop for some
+                                        * destinations (e.g. redirect case).
+                                        * So we must call rt6_flush explicitly.
                                         */
                                        rt6_flush(&ip6->ip6_src, rt_ifp);
                                }
@@ -1107,7 +1213,7 @@ nd6_na_input(
                         * the 2nd argument as the 1st one.
                         */
                        RT_UNLOCK(rt);
-                       nd6_output(ifp, ifp, m_hold, &sin6, rt);
+                       nd6_output(ifp, ifp, m_hold, &sin6, rt, NULL);
                        RT_LOCK_SPIN(rt);
                }
                ln->ln_hold = NULL;
@@ -1157,16 +1263,19 @@ nd6_na_output(
        caddr_t mac = NULL;
        struct route_in6 ro;
        struct in6_addr *src, src_storage, daddr6;
+       struct in6_ifaddr *ia;
        struct sockaddr_in6 dst_sa;
        int icmp6len, maxlen, error;
         struct ifnet *outif = NULL;
-       struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+       struct ip6_out_args ip6oa =
+           { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR };
 
        bzero(&ro, sizeof(ro));
 
        daddr6 = *daddr6_0;     /* make a local copy for modification */
 
        ip6oa.ip6oa_boundif = ifp->if_index;
+       ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
 
        /* estimate the size of message */
        maxlen = sizeof(*ip6) + sizeof(*nd_na);
@@ -1201,7 +1310,7 @@ nd6_na_output(
                }
 
                im6o->im6o_multicast_ifp = ifp;
-               im6o->im6o_multicast_hlim = 255;
+               im6o->im6o_multicast_hlim = IPV6_MAXHLIM;
                im6o->im6o_multicast_loop = 0;
        }
 
@@ -1215,7 +1324,7 @@ nd6_na_output(
        ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
        ip6->ip6_vfc |= IPV6_VERSION;
        ip6->ip6_nxt = IPPROTO_ICMPV6;
-       ip6->ip6_hlim = 255;
+       ip6->ip6_hlim = IPV6_MAXHLIM;
        if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
                /* reply to DAD */
                daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
@@ -1249,6 +1358,17 @@ nd6_na_output(
        }
        ip6->ip6_src = *src;
 
+       /*
+        * RFC 4429 requires not setting "override" flag on NA packets sent
+        * from optimistic addresses.
+        */
+       ia = in6ifa_ifpwithaddr(ifp, src);
+       if (ia != NULL) {
+               if (ia->ia6_flags & IN6_IFF_OPTIMISTIC)
+                       flags &= ~ND_NA_FLAG_OVERRIDE;
+               IFA_REMREF(&ia->ia_ifa);
+       }
+
        nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
        nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
        nd_na->nd_na_code = 0;
@@ -1271,7 +1391,7 @@ nd6_na_output(
                        mac = nd6_ifptomac(ifp);
                else if (sdl0->sa_family == AF_LINK) {
                        struct sockaddr_dl *sdl;
-                       sdl = (struct sockaddr_dl *)sdl0;
+                       sdl = (struct sockaddr_dl *)(void *)sdl0;
                        if (sdl->sdl_alen == ifp->if_addrlen)
                                mac = LLADDR(sdl);
                }
@@ -1304,6 +1424,14 @@ nd6_na_output(
        if (ipsec_bypass == 0)
                (void)ipsec_setsocket(m, NULL);
 #endif
+
+       if (ifp->if_eflags & IFEF_TXSTART) {
+               /* Use control service class if the interface supports
+                * transmit-start model.
+                */
+               (void) m_set_service_class(m, MBUF_SC_CTL);
+       }
+
        ip6_output(m, NULL, NULL, IPV6_OUTARGS, im6o, &outif, &ip6oa);
        if (outif) {
                icmp6_ifstat_inc(outif, ifs6_out_msg);
@@ -1376,6 +1504,8 @@ static struct dadq_head dadq;
 void
 nd6_nbr_init(void)
 {
+       int i;
+       
        TAILQ_INIT(&dadq);
 
        dad_size = sizeof (struct dadq);
@@ -1386,6 +1516,12 @@ nd6_nbr_init(void)
        }
        zone_change(dad_zone, Z_EXPAND, TRUE);
        zone_change(dad_zone, Z_CALLERACCT, FALSE);
+
+       bzero(&hostrtmask, sizeof hostrtmask);
+       hostrtmask.sin6_family = AF_INET6;
+       hostrtmask.sin6_len = sizeof hostrtmask;
+       for (i = 0; i < sizeof hostrtmask.sin6_addr; ++i)
+               hostrtmask.sin6_addr.s6_addr[i] = 0xff;
 }
 
 static struct dadq *
@@ -1434,29 +1570,25 @@ nd6_dad_start(
         * - the interface address is anycast
         */
        IFA_LOCK(&ia->ia_ifa);
-       if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) {
+       if (!(ia->ia6_flags & IN6_IFF_DADPROGRESS)) {
                log(LOG_DEBUG,
-                       "nd6_dad_start: called with non-tentative address "
+                       "nd6_dad_start: not a tentative or optimistic address "
                        "%s(%s)\n",
                        ip6_sprintf(&ia->ia_addr.sin6_addr),
                        ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
                IFA_UNLOCK(&ia->ia_ifa);
                return;
        }
-       if (ia->ia6_flags & IN6_IFF_ANYCAST) {
-               ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
-               IFA_UNLOCK(&ia->ia_ifa);
-               return;
-       }
-       if (!ip6_dad_count) {
-               ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+       if (!ip6_dad_count || (ia->ia6_flags & IN6_IFF_ANYCAST) != 0) {
+               ia->ia6_flags &= ~IN6_IFF_DADPROGRESS;
                IFA_UNLOCK(&ia->ia_ifa);
                return;
        }
        IFA_UNLOCK(&ia->ia_ifa);
        if (ifa->ifa_ifp == NULL)
                panic("nd6_dad_start: ifa->ifa_ifp == NULL");
-       if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
+       if (!(ifa->ifa_ifp->if_flags & IFF_UP) ||
+           (ifa->ifa_ifp->if_eflags & IFEF_IPV6_ND6ALT)) {
                return;
        }
        if ((dp = nd6_dad_find(ifa)) != NULL) {
@@ -1479,7 +1611,9 @@ nd6_dad_start(
        /* Callee adds one reference for us */
        dp = nd6_dad_attach(dp, ifa);
 
-       nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
+       nd6log((LOG_DEBUG, "%s: starting %sDAD for %s\n",
+           if_name(ifa->ifa_ifp),
+           (ia->ia_flags & IN6_IFF_OPTIMISTIC) ? "optimistic " : "",
            ip6_sprintf(&ia->ia_addr.sin6_addr)));
 
        /*
@@ -1490,9 +1624,15 @@ nd6_dad_start(
         */
        if (tick_delay == NULL) {
                u_int32_t retrans;
+               struct nd_ifinfo *ndi;
+
                nd6_dad_ns_output(dp, ifa);
                lck_rw_lock_shared(nd_if_rwlock);
-               retrans = nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000;
+               ndi = ND_IFINFO(ifa->ifa_ifp);
+               VERIFY(ndi != NULL && ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+               retrans = ndi->retrans * hz / 1000;
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
                timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans);
        } else {
@@ -1582,7 +1722,8 @@ nd6_unsol_na_output(struct ifaddr *ifa)
        struct in6_addr saddr6, taddr6;
 
        if ((ifp->if_flags & IFF_UP) == 0 ||
-           (ifp->if_flags & IFF_RUNNING) == 0)
+           (ifp->if_flags & IFF_RUNNING) == 0 ||
+           (ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0)
                return;
 
        IFA_LOCK_SPIN(&ia->ia_ifa);
@@ -1625,9 +1766,9 @@ nd6_dad_timer(struct ifaddr *ifa)
                IFA_UNLOCK(&ia->ia_ifa);
                goto done;
        }
-       if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
-               log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
-                       "%s(%s)\n",
+       if ((ia->ia6_flags & IN6_IFF_DADPROGRESS) == 0) {
+               log(LOG_ERR, "nd6_dad_timer: not a tentative or optimistic "
+                       "address %s(%s)\n",
                        ip6_sprintf(&ia->ia_addr.sin6_addr),
                        ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
                IFA_UNLOCK(&ia->ia_ifa);
@@ -1649,13 +1790,19 @@ nd6_dad_timer(struct ifaddr *ifa)
        /* Need more checks? */
        if (dp->dad_ns_ocount < dp->dad_count) {
                u_int32_t retrans;
+               struct nd_ifinfo *ndi;
+
                DAD_UNLOCK(dp);
                /*
                 * We have more NS to go.  Send NS packet for DAD.
                 */
                nd6_dad_ns_output(dp, ifa);
                lck_rw_lock_shared(nd_if_rwlock);
-               retrans = nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000;
+               ndi = ND_IFINFO(ifa->ifa_ifp);
+               VERIFY(ndi != NULL && ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+               retrans = ndi->retrans * hz / 1000;
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
                timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans);
        } else {
@@ -1690,7 +1837,7 @@ nd6_dad_timer(struct ifaddr *ifa)
                         * No duplicate address found.
                         */
                        IFA_LOCK_SPIN(&ia->ia_ifa);
-                       ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+                       ia->ia6_flags &= ~IN6_IFF_DADPROGRESS;
                        IFA_UNLOCK(&ia->ia_ifa);
 
                        nd6log((LOG_DEBUG,
@@ -1726,7 +1873,6 @@ nd6_dad_duplicated(struct ifaddr *ifa, boolean_t dontignhwdup)
                log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n");
                return;
        }
-
        hwdupposs = 0;
        IFA_LOCK(&ia->ia_ifa);
        DAD_LOCK(dp);
@@ -1737,7 +1883,7 @@ nd6_dad_duplicated(struct ifaddr *ifa, boolean_t dontignhwdup)
            dp->dad_na_ixcount);
        hwdupposs = dp->dad_na_ixcount;
        DAD_UNLOCK(dp);
-       ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+       ia->ia6_flags &= ~IN6_IFF_DADPROGRESS;
        ia->ia6_flags |= IN6_IFF_DUPLICATED;
        IFA_UNLOCK(&ia->ia_ifa);
 
@@ -1761,7 +1907,7 @@ nd6_dad_duplicated(struct ifaddr *ifa, boolean_t dontignhwdup)
                    ND6_IFF_IFDISABLED;
                lck_rw_done(nd_if_rwlock);
        }
-       
+
        /* Send an event to the configuration agent so that the
         * duplicate address will be notified to the user and will
         * be removed.
@@ -1906,7 +2052,8 @@ nd6_dad_na_input(struct ifaddr *ifa, caddr_t lladdr, int lladdrlen)
                                
                                llifa = ifp->if_lladdr;
                                IFA_LOCK(llifa);
-                               sdl = (struct sockaddr_dl *)llifa->ifa_addr;
+                               sdl = (struct sockaddr_dl *)(void *)
+                                   llifa->ifa_addr;
                                if (lladdrlen == sdl->sdl_alen ||
                                    bcmp(lladdr, LLADDR(sdl), lladdrlen) == 0)
                                        hwdupposs = 1;
@@ -2008,3 +2155,166 @@ nd6_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
 
        ifnet_llreach_set_reachable(ifp, ETHERTYPE_IPV6, addr, alen);
 }
+
+void
+nd6_alt_node_addr_decompose(struct ifnet *ifp, struct sockaddr *sa,
+    struct sockaddr_dl* sdl, struct sockaddr_in6 *sin6)
+{
+       static const size_t EUI64_LENGTH = 8;
+       
+       VERIFY(nd6_need_cache(ifp));
+       VERIFY(sa);
+       VERIFY(sdl && (void *)sa != (void *)sdl);
+       VERIFY(sin6 && (void *)sa != (void *)sin6);
+       
+       bzero(sin6, sizeof *sin6);
+       sin6->sin6_len = sizeof *sin6;
+       sin6->sin6_family = AF_INET6;
+       
+       bzero(sdl, sizeof *sdl);
+       sdl->sdl_len = sizeof *sdl;
+       sdl->sdl_family = AF_LINK;
+       sdl->sdl_type = ifp->if_type;
+       sdl->sdl_index = ifp->if_index;
+       
+       switch (sa->sa_family) {
+       case AF_INET6: {
+               struct sockaddr_in6 *sin6a = (struct sockaddr_in6 *)(void *)sa;
+               struct in6_addr *in6 = &sin6a->sin6_addr;
+               
+               VERIFY(sa->sa_len == sizeof *sin6);
+               
+               sdl->sdl_nlen = strlen(ifp->if_name);
+               bcopy(ifp->if_name, sdl->sdl_data, sdl->sdl_nlen);
+               if (in6->s6_addr[11] == 0xff && in6->s6_addr[12] == 0xfe) {
+                       sdl->sdl_alen = ETHER_ADDR_LEN;
+                       LLADDR(sdl)[0] = (in6->s6_addr[8] ^ ND6_EUI64_UBIT);
+                       LLADDR(sdl)[1] = in6->s6_addr[9];
+                       LLADDR(sdl)[2] = in6->s6_addr[10];
+                       LLADDR(sdl)[3] = in6->s6_addr[13];
+                       LLADDR(sdl)[4] = in6->s6_addr[14];
+                       LLADDR(sdl)[5] = in6->s6_addr[15];
+               }
+               else {
+                       sdl->sdl_alen = EUI64_LENGTH;
+                       bcopy(&in6->s6_addr[8], LLADDR(sdl), EUI64_LENGTH);
+               }
+               
+               sdl->sdl_slen = 0;
+               break;
+       }
+       case AF_LINK: {
+               struct sockaddr_dl *sdla = (struct sockaddr_dl *)(void *)sa;
+               struct in6_addr *in6 = &sin6->sin6_addr;
+               caddr_t lla = LLADDR(sdla);
+               
+               VERIFY(sa->sa_len <= sizeof *sdl);
+               bcopy(sa, sdl, sa->sa_len);
+               
+               sin6->sin6_scope_id = sdla->sdl_index;
+               if (sin6->sin6_scope_id == 0)
+                       sin6->sin6_scope_id = ifp->if_index;
+               in6->s6_addr[0] = 0xfe;
+               in6->s6_addr[1] = 0x80;
+               if (sdla->sdl_alen == EUI64_LENGTH)
+                       bcopy(lla, &in6->s6_addr[8], EUI64_LENGTH);
+               else {
+                       VERIFY(sdla->sdl_alen == ETHER_ADDR_LEN);
+                       
+                       in6->s6_addr[8] = ((uint8_t) lla[0] ^ ND6_EUI64_UBIT);
+                       in6->s6_addr[9] = (uint8_t) lla[1];
+                       in6->s6_addr[10] = (uint8_t) lla[2];
+                       in6->s6_addr[11] = 0xff;
+                       in6->s6_addr[12] = 0xfe;
+                       in6->s6_addr[13] = (uint8_t) lla[3];
+                       in6->s6_addr[14] = (uint8_t) lla[4];
+                       in6->s6_addr[15] = (uint8_t) lla[5];
+               }
+               
+               break;
+       }
+       default:
+               VERIFY(false);
+               break;
+       }
+}
+
+void
+nd6_alt_node_present(struct ifnet *ifp, struct sockaddr_in6 *sin6,
+    struct sockaddr_dl *sdl, int32_t rssi, int lqm, int npm)
+{
+       struct rtentry *rt;
+       struct llinfo_nd6 *ln;
+       struct  if_llreach *lr;
+
+       nd6_cache_lladdr(ifp, &sin6->sin6_addr, LLADDR(sdl),
+           sdl->sdl_alen, ND_NEIGHBOR_ADVERT, 0);
+
+       lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
+       lck_mtx_lock(rnh_lock);
+
+       rt = rtalloc1_scoped_locked((struct sockaddr *)sin6, 1, 0,
+           ifp->if_index);
+       if (rt != NULL) {
+               RT_LOCK(rt);
+               VERIFY(rt->rt_flags & RTF_LLINFO);
+               VERIFY(rt->rt_llinfo);
+
+               ln = rt->rt_llinfo;
+               ln->ln_state = ND6_LLINFO_REACHABLE;
+               ln->ln_expire = 0;
+
+               lr = ln->ln_llreach;
+               if (lr) {
+                       IFLR_LOCK(lr);
+                       lr->lr_rssi = rssi;
+                       lr->lr_lqm = (int32_t) lqm;
+                       lr->lr_npm = (int32_t) npm;
+                       IFLR_UNLOCK(lr);
+               }
+
+               RT_UNLOCK(rt);
+               RT_REMREF(rt);
+       }
+
+       lck_mtx_unlock(rnh_lock);
+
+       if (rt == NULL) {
+               log(LOG_ERR, "%s: failed to add/update host route to %s.\n",
+                   __func__, ip6_sprintf(&sin6->sin6_addr));
+       }
+}
+
+void
+nd6_alt_node_absent(struct ifnet *ifp, struct sockaddr_in6 *sin6)
+{
+       struct rtentry *rt;
+
+       lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
+       lck_mtx_lock(rnh_lock);
+
+       rt = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0, 0,
+           ifp->if_index);
+       if (rt != NULL) {
+               RT_LOCK(rt);
+
+               if (!(rt->rt_flags & (RTF_PINNED|RTF_CLONING|RTF_PRCLONING)) &&
+                   (rt->rt_flags & (RTF_HOST|RTF_LLINFO|RTF_WASCLONED)) ==
+                     (RTF_HOST|RTF_LLINFO|RTF_WASCLONED)) {
+                       rt->rt_flags |= RTF_CONDEMNED;
+                       RT_UNLOCK(rt);
+
+                       (void) rtrequest_locked(RTM_DELETE, rt_key(rt),
+                           (struct sockaddr *)NULL, rt_mask(rt), 0,
+                           (struct rtentry **)NULL);
+
+                       rtfree_locked(rt);
+               }
+               else {
+                       RT_REMREF_LOCKED(rt);
+                       RT_UNLOCK(rt);
+               }
+       }
+
+       lck_mtx_unlock(rnh_lock);
+}
diff --git a/bsd/netinet6/nd6_prproxy.c b/bsd/netinet6/nd6_prproxy.c
new file mode 100644 (file)
index 0000000..3bac47e
--- /dev/null
@@ -0,0 +1,1357 @@
+/*
+ * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Prefix-based Neighbor Discovery Proxy
+ *
+ * When an interface is marked with the ND6_IFF_PROXY_PREFIXES flag, all
+ * of current and future non-scoped on-link prefixes configured on the
+ * interface will be shared with the scoped variant of such prefixes on
+ * other interfaces.  This allows for one or more prefixes to be shared
+ * across multiple links, with full support for Duplicate Addres Detection,
+ * Address Resolution and Neighbor Unreachability Detection.
+ *
+ * A non-scoped prefix may be configured statically, or dynamically via
+ * Router Advertisement.  An interface is said to be an "upstream" interface
+ * when it is marked with ND6_IFF_PROXY_PREFIXES and has at least one prefix
+ * that is non-scoped (global, not scoped.)  Such prefixes are marked with
+ * the NDPRF_PRPROXY flag.
+ *
+ * A scoped prefix typically gets configured by way of adding an address
+ * to a "downstream" interface, when the added address is part of an existing
+ * prefix that is allowed to be shared (i.e. NDPRF_PRPROXY prefixes.)  Unlike
+ * non-scoped prefixes, however, scoped prefixes will never be marked with
+ * the NDPRF_PRPROXY flag.
+ *
+ * The setting of NDPRF_PRPROXY depends on whether the prefix is on-link;
+ * an off-link prefix on an interface marked with ND6_IFF_PROXY_PREFIXES
+ * will not cause NDPRF_PRPROXY to be set (it will only happen when that
+ * prefix goes on-link.)  Likewise, a previously on-link prefix that has
+ * transitioned to off-link will cause its NDPRF_PRPROXY flag to be cleared.
+ *
+ * Prefix proxying relies on IPv6 Scoped Routing to be in effect, as it would
+ * otherwise be impossible to install scoped prefix route entries in the
+ * routing table.  By default, such cloning prefix routes will generate cloned
+ * routes that are scoped according to their interfaces.  Because prefix
+ * proxying is essentially creating a larger network comprised of multiple
+ * links sharing a prefix, we need to treat the cloned routes as if they
+ * weren't scoped route entries.  This requires marking such cloning prefix
+ * routes with the RTF_PROXY flag, which serves as an indication that the
+ * route entry (and its clones) are part of a proxied prefix, and that the
+ * entries are non-scoped.
+ *
+ * In order to handle solicited-node destined ND packets (Address Resolution,
+ * Neighbor Unreachability Detection), prefix proxying also requires that the
+ * "upstream" and "downstream" interfaces be configured for all-multicast mode.
+ *
+ * The setting and clearing of RTF_PROXY flag, as well as the entering and
+ * exiting of all-multicast mode on those interfaces happen when a prefix
+ * transitions between on-link and off-link (vice versa.)
+ *
+ * Note that this is not a strict implementation of RFC 4389, but rather a
+ * derivative based on similar concept.  In particular, we only proxy NS and
+ * NA packets; RA packets are never proxied.  Care should be taken to enable
+ * prefix proxying only on non-looping network topology.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <sys/protosw.h>
+
+#include <kern/queue.h>
+#include <kern/zalloc.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
+
+struct nd6_prproxy_prelist {
+       SLIST_ENTRY(nd6_prproxy_prelist) ndprl_le;
+       struct nd_prefix *ndprl_pr;             /* prefix */
+       struct nd_prefix *ndprl_up;             /* non-NULL for upstream */
+       struct ifnet    *ndprl_fwd_ifp;         /* outgoing interface */
+       boolean_t       ndprl_sol;              /* unicast solicitor? */
+       struct in6_addr ndprl_sol_saddr;        /* solicitor's address */
+};
+
+/*
+ * Soliciting node (source) record.
+ */
+struct nd6_prproxy_solsrc {
+       TAILQ_ENTRY(nd6_prproxy_solsrc) solsrc_tqe;
+       struct in6_addr solsrc_saddr;           /* soliciting (src) address */
+       struct ifnet    *solsrc_ifp;            /* iface where NS arrived on */
+};
+
+/*
+ * Solicited node (target) record.
+ */
+struct nd6_prproxy_soltgt {
+       RB_ENTRY(nd6_prproxy_soltgt) soltgt_link; /* RB tree links */
+       struct soltgt_key_s {
+               struct in6_addr taddr;          /* solicited (tgt) address */
+       } soltgt_key;
+       u_int64_t       soltgt_expire;          /* expiration time */
+       u_int32_t       soltgt_cnt;             /* total # of solicitors */
+       TAILQ_HEAD(, nd6_prproxy_solsrc) soltgt_q;
+};
+
+SLIST_HEAD(nd6_prproxy_prelist_head, nd6_prproxy_prelist);
+
+static void nd6_prproxy_prelist_setroute(boolean_t enable,
+    struct nd6_prproxy_prelist_head *, struct nd6_prproxy_prelist_head *);
+static struct nd6_prproxy_prelist *nd6_ndprl_alloc(int);
+static void nd6_ndprl_free(struct nd6_prproxy_prelist *);
+static struct nd6_prproxy_solsrc *nd6_solsrc_alloc(int);
+static void nd6_solsrc_free(struct nd6_prproxy_solsrc *);
+static boolean_t nd6_solsrc_enq(struct nd_prefix *, struct ifnet *,
+    struct in6_addr *, struct in6_addr *);
+static boolean_t nd6_solsrc_deq(struct nd_prefix *, struct in6_addr *,
+    struct in6_addr *, struct ifnet **);
+static struct nd6_prproxy_soltgt *nd6_soltgt_alloc(int);
+static void nd6_soltgt_free(struct nd6_prproxy_soltgt *);
+static void nd6_soltgt_prune(struct nd6_prproxy_soltgt *, u_int32_t);
+static __inline int soltgt_cmp(const struct nd6_prproxy_soltgt *,
+    const struct nd6_prproxy_soltgt *);
+static void nd6_prproxy_sols_purge(struct nd_prefix *, u_int64_t);
+
+RB_PROTOTYPE_SC_PREV(__private_extern__, prproxy_sols_tree, nd6_prproxy_soltgt,
+    soltgt_link, soltgt_cmp);
+
+/*
+ * Time (in seconds) before a target record expires (is idle).
+ */
+#define        ND6_TGT_SOLS_EXPIRE                     5
+
+/*
+ * Maximum number of queued soliciting (source) records per target.
+ */
+#define        ND6_MAX_SRC_SOLS_DEFAULT                4
+
+/*
+ * Maximum number of queued solicited (target) records per prefix.
+ */
+#define        ND6_MAX_TGT_SOLS_DEFAULT                8
+
+static u_int32_t nd6_max_tgt_sols = ND6_MAX_TGT_SOLS_DEFAULT;
+static u_int32_t nd6_max_src_sols = ND6_MAX_SRC_SOLS_DEFAULT;
+
+static unsigned int ndprl_size;                        /* size of zone element */
+static struct zone *ndprl_zone;                        /* nd6_prproxy_prelist zone */
+
+#define        NDPRL_ZONE_MAX  256                     /* maximum elements in zone */
+#define        NDPRL_ZONE_NAME "nd6_prproxy_prelist"   /* name for zone */
+
+static unsigned int solsrc_size;               /* size of zone element */
+static struct zone *solsrc_zone;               /* nd6_prproxy_solsrc zone */
+
+#define        SOLSRC_ZONE_MAX  256                    /* maximum elements in zone */
+#define        SOLSRC_ZONE_NAME "nd6_prproxy_solsrc"   /* name for zone */
+
+static unsigned int soltgt_size;               /* size of zone element */
+static struct zone *soltgt_zone;               /* nd6_prproxy_soltgt zone */
+
+#define        SOLTGT_ZONE_MAX  256                    /* maximum elements in zone */
+#define        SOLTGT_ZONE_NAME "nd6_prproxy_soltgt"   /* name for zone */
+
+/* The following is protected by ndpr_lock */
+RB_GENERATE_PREV(prproxy_sols_tree, nd6_prproxy_soltgt,
+    soltgt_link, soltgt_cmp);
+
+/* The following is protected by proxy6_lock (for updates) */
+u_int32_t nd6_prproxy;
+
+extern lck_mtx_t *nd6_mutex;
+
+SYSCTL_DECL(_net_inet6_icmp6);
+
+SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, nd6_maxsolstgt,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_max_tgt_sols, ND6_MAX_TGT_SOLS_DEFAULT,
+    "maximum number of outstanding solicited targets per prefix");
+
+SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, nd6_maxproxiedsol,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_max_src_sols, ND6_MAX_SRC_SOLS_DEFAULT,
+    "maximum number of outstanding solicitations per target");
+
+SYSCTL_UINT(_net_inet6_icmp6, OID_AUTO, prproxy_cnt,
+    CTLFLAG_RD | CTLFLAG_LOCKED, &nd6_prproxy, 0,
+    "total number of proxied prefixes");
+
+/*
+ * Called by nd6_init() during initialization time.
+ */
+void
+nd6_prproxy_init(void)
+{
+       ndprl_size = sizeof (struct nd6_prproxy_prelist);
+       ndprl_zone = zinit(ndprl_size, NDPRL_ZONE_MAX * ndprl_size, 0,
+           NDPRL_ZONE_NAME);
+       if (ndprl_zone == NULL)
+               panic("%s: failed allocating ndprl_zone", __func__);
+
+       zone_change(ndprl_zone, Z_EXPAND, TRUE);
+       zone_change(ndprl_zone, Z_CALLERACCT, FALSE);
+
+       solsrc_size = sizeof (struct nd6_prproxy_solsrc);
+       solsrc_zone = zinit(solsrc_size, SOLSRC_ZONE_MAX * solsrc_size, 0,
+           SOLSRC_ZONE_NAME);
+       if (solsrc_zone == NULL)
+               panic("%s: failed allocating solsrc_zone", __func__);
+
+       zone_change(solsrc_zone, Z_EXPAND, TRUE);
+       zone_change(solsrc_zone, Z_CALLERACCT, FALSE);
+
+       soltgt_size = sizeof (struct nd6_prproxy_soltgt);
+       soltgt_zone = zinit(soltgt_size, SOLTGT_ZONE_MAX * soltgt_size, 0,
+           SOLTGT_ZONE_NAME);
+       if (soltgt_zone == NULL)
+               panic("%s: failed allocating soltgt_zone", __func__);
+
+       zone_change(soltgt_zone, Z_EXPAND, TRUE);
+       zone_change(soltgt_zone, Z_CALLERACCT, FALSE);
+}
+
+static struct nd6_prproxy_prelist *
+nd6_ndprl_alloc(int how)
+{
+       struct nd6_prproxy_prelist *ndprl;
+
+       ndprl = (how == M_WAITOK) ? zalloc(ndprl_zone) :
+           zalloc_noblock(ndprl_zone);
+       if (ndprl != NULL)
+               bzero(ndprl, ndprl_size);
+
+       return (ndprl);
+}
+
+static void
+nd6_ndprl_free(struct nd6_prproxy_prelist *ndprl)
+{
+       zfree(ndprl_zone, ndprl);
+}
+
+/*
+ * Apply routing function on the affected upstream and downstream prefixes,
+ * i.e. either set or clear RTF_PROXY on the cloning prefix route; all route
+ * entries that were cloned off these prefixes will be blown away.  Caller
+ * must have acquried proxy6_lock and must not be holding nd6_mutex.
+ */
+static void
+nd6_prproxy_prelist_setroute(boolean_t enable,
+    struct nd6_prproxy_prelist_head *up_head,
+    struct nd6_prproxy_prelist_head *down_head)
+{
+       struct nd6_prproxy_prelist *up, *down, *ndprl_tmp;
+       struct nd_prefix *pr;
+
+       lck_mtx_assert(&proxy6_lock, LCK_MTX_ASSERT_OWNED);
+       lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+       SLIST_FOREACH_SAFE(up, up_head, ndprl_le, ndprl_tmp) {
+               struct rtentry *rt;
+               boolean_t prproxy;
+
+               SLIST_REMOVE(up_head, up, nd6_prproxy_prelist, ndprl_le);
+               pr = up->ndprl_pr;
+               VERIFY(up->ndprl_up == NULL);
+
+               NDPR_LOCK(pr);
+               prproxy = (pr->ndpr_stateflags & NDPRF_PRPROXY);
+               VERIFY(!prproxy || ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
+                   !(pr->ndpr_stateflags & NDPRF_IFSCOPE)));
+
+               nd6_prproxy_sols_reap(pr);
+               VERIFY(pr->ndpr_prproxy_sols_cnt == 0);
+               VERIFY(RB_EMPTY(&pr->ndpr_prproxy_sols));
+
+               if (enable && pr->ndpr_allmulti_cnt == 0) {
+                       nd6_prproxy++;
+                       pr->ndpr_allmulti_cnt++;
+                       if_allmulti(pr->ndpr_ifp, TRUE);
+               } else if (!enable && pr->ndpr_allmulti_cnt > 0) {
+                       nd6_prproxy--;
+                       pr->ndpr_allmulti_cnt--;
+                       if_allmulti(pr->ndpr_ifp, FALSE);
+               }
+
+               if ((rt = pr->ndpr_rt) != NULL) {
+                       if ((enable && prproxy) || (!enable && !prproxy))
+                               RT_ADDREF(rt);
+                       else
+                               rt = NULL;
+                       NDPR_UNLOCK(pr);
+               } else {
+                       NDPR_UNLOCK(pr);
+               }
+               NDPR_REMREF(pr);
+               if (rt != NULL) {
+                       rt_set_proxy(rt, enable);
+                       rtfree(rt);
+               }
+               nd6_ndprl_free(up);
+       }
+
+       SLIST_FOREACH_SAFE(down, down_head, ndprl_le, ndprl_tmp) {
+               struct nd_prefix *pr_up;
+               struct rtentry *rt;
+               boolean_t prproxy;
+
+               SLIST_REMOVE(down_head, down, nd6_prproxy_prelist, ndprl_le);
+               pr = down->ndprl_pr;
+               pr_up = down->ndprl_up;
+               VERIFY(pr_up != NULL);
+
+               NDPR_LOCK(pr_up);
+               prproxy = (pr_up->ndpr_stateflags & NDPRF_PRPROXY);
+               VERIFY(!prproxy || ((pr_up->ndpr_stateflags & NDPRF_ONLINK) &&
+                   !(pr_up->ndpr_stateflags & NDPRF_IFSCOPE)));
+               NDPR_UNLOCK(pr_up);
+
+               NDPR_LOCK(pr);
+               if (enable && pr->ndpr_allmulti_cnt == 0) {
+                       pr->ndpr_allmulti_cnt++;
+                       if_allmulti(pr->ndpr_ifp, TRUE);
+               } else if (!enable && pr->ndpr_allmulti_cnt > 0) {
+                       pr->ndpr_allmulti_cnt--;
+                       if_allmulti(pr->ndpr_ifp, FALSE);
+               }
+
+               if ((rt = pr->ndpr_rt) != NULL) {
+                       if ((enable && prproxy) || (!enable && !prproxy))
+                               RT_ADDREF(rt);
+                       else
+                               rt = NULL;
+                       NDPR_UNLOCK(pr);
+               } else {
+                       NDPR_UNLOCK(pr);
+               }
+               NDPR_REMREF(pr);
+               NDPR_REMREF(pr_up);
+               if (rt != NULL) {
+                       rt_set_proxy(rt, enable);
+                       rtfree(rt);
+               }
+               nd6_ndprl_free(down);
+       }
+}
+
+/*
+ * Enable/disable prefix proxying on an interface; typically called
+ * as part of handling SIOCSIFINFO_FLAGS[IFEF_IPV6_ROUTER].
+ */
+int
+nd6_if_prproxy(struct ifnet *ifp, boolean_t enable)
+{
+       SLIST_HEAD(, nd6_prproxy_prelist) up_head;
+       SLIST_HEAD(, nd6_prproxy_prelist) down_head;
+       struct nd6_prproxy_prelist *up, *down;
+       struct nd_prefix *pr;
+
+       /* Can't be enabled if we are an advertising router on the interface */
+       ifnet_lock_shared(ifp);
+       if (enable && (ifp->if_eflags & IFEF_IPV6_ROUTER)) {
+               ifnet_lock_done(ifp);
+               return (EBUSY);
+       }
+       ifnet_lock_done(ifp);
+
+       SLIST_INIT(&up_head);
+       SLIST_INIT(&down_head);
+
+       /*
+        * Serialize the clearing/setting of NDPRF_PRPROXY.
+        */
+       lck_mtx_lock(&proxy6_lock);
+
+       /*
+        * First build a list of upstream prefixes on this interface for
+        * which we need to enable/disable prefix proxy functionality.
+        */
+       lck_mtx_lock(nd6_mutex);
+       for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+               NDPR_LOCK(pr);
+               if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
+                   (!enable && !(pr->ndpr_stateflags & NDPRF_PRPROXY)) ||
+                   (enable && (pr->ndpr_stateflags & NDPRF_PRPROXY)) ||
+                   (pr->ndpr_stateflags & NDPRF_IFSCOPE) ||
+                   pr->ndpr_ifp != ifp) {
+                       NDPR_UNLOCK(pr);
+                       continue;
+               }
+
+               /*
+                * At present, in order for the prefix to be eligible
+                * as a proxying/proxied prefix, we require that the
+                * prefix route entry be marked as a cloning route with
+                * RTF_PROXY; i.e. nd6_need_cache() needs to return
+                * true for the interface type.
+                */
+               if (enable && (pr->ndpr_stateflags & NDPRF_ONLINK) &&
+                   nd6_need_cache(ifp)) {
+                       pr->ndpr_stateflags |= NDPRF_PRPROXY;
+                       NDPR_ADDREF_LOCKED(pr);
+                       NDPR_UNLOCK(pr);
+               } else if (!enable) {
+                       pr->ndpr_stateflags &= ~NDPRF_PRPROXY;
+                       NDPR_ADDREF_LOCKED(pr);
+                       NDPR_UNLOCK(pr);
+               } else {
+                       NDPR_UNLOCK(pr);
+                       pr = NULL;      /* don't go further */
+               }
+
+               if (pr == NULL)
+                       continue;
+
+               up = nd6_ndprl_alloc(M_WAITOK);
+               if (up == NULL) {
+                       NDPR_REMREF(pr);
+                       continue;
+               }
+
+               up->ndprl_pr = pr;      /* keep reference from above */
+               SLIST_INSERT_HEAD(&up_head, up, ndprl_le);
+       }
+
+       /*
+        * Now build a list of matching (scoped) downstream prefixes on other
+        * interfaces which need to be enabled/disabled accordingly.  Note that
+        * the NDPRF_PRPROXY is never set/cleared on the downstream prefixes.
+        */
+       SLIST_FOREACH(up, &up_head, ndprl_le) {
+               struct nd_prefix *fwd;
+               struct in6_addr pr_addr;
+               u_char pr_len;
+
+               pr = up->ndprl_pr;
+
+               NDPR_LOCK(pr);
+               bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
+               pr_len = pr->ndpr_plen;
+               NDPR_UNLOCK(pr);
+
+               for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
+                       NDPR_LOCK(fwd);
+                       if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
+                           !(fwd->ndpr_stateflags & NDPRF_IFSCOPE) ||
+                           fwd->ndpr_plen != pr_len ||
+                           !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
+                           &pr_addr, pr_len)) {
+                               NDPR_UNLOCK(fwd);
+                               continue;
+                       }
+                       NDPR_UNLOCK(fwd);
+
+                       down = nd6_ndprl_alloc(M_WAITOK);
+                       if (down == NULL)
+                               continue;
+
+                       NDPR_ADDREF(fwd);
+                       down->ndprl_pr = fwd;
+                       NDPR_ADDREF(pr);
+                       down->ndprl_up = pr;
+                       SLIST_INSERT_HEAD(&down_head, down, ndprl_le);
+               }
+       }
+       lck_mtx_unlock(nd6_mutex);
+
+       /*
+        * Apply routing function on prefixes; callee will free resources.
+        */
+       nd6_prproxy_prelist_setroute(enable,
+           (struct nd6_prproxy_prelist_head *)&up_head,
+           (struct nd6_prproxy_prelist_head *)&down_head);
+
+       VERIFY(SLIST_EMPTY(&up_head));
+       VERIFY(SLIST_EMPTY(&down_head));
+
+       lck_mtx_unlock(&proxy6_lock);
+
+       return (0);
+}
+
+/*
+ * Called from the input path to determine whether the packet is destined
+ * to a proxied node; if so, mark the mbuf with MAUXF_PROXY_DST so that
+ * icmp6_input() knows that this is not to be delivered to socket(s).
+ */
+boolean_t
+nd6_prproxy_isours(struct mbuf *m, struct ip6_hdr *ip6, struct route_in6 *ro6,
+    unsigned int ifscope)
+{
+       struct rtentry *rt;
+       boolean_t ours = FALSE;
+
+       if (ip6->ip6_hlim != IPV6_MAXHLIM || ip6->ip6_nxt != IPPROTO_ICMPV6)
+               goto done;
+
+       if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst) ||
+           IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) {
+               VERIFY(ro6 == NULL);
+               ours = TRUE;
+               goto done;
+       } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+               goto done;
+       }
+
+       if (ro6 == NULL)
+               goto done;
+
+       if ((rt = ro6->ro_rt) != NULL)
+               RT_LOCK(rt);
+
+       if (rt == NULL || !(rt->rt_flags & RTF_UP) ||
+           rt->generation_id != route_generation) {
+               if (rt != NULL) {
+                       RT_UNLOCK(rt);
+                       rtfree(rt);
+                       rt = ro6->ro_rt = NULL;
+               }
+
+               /* Caller must have ensured this condition (not srcrt) */
+               VERIFY(IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
+                   &ro6->ro_dst.sin6_addr));
+
+               rtalloc_scoped_ign((struct route *)ro6, RTF_PRCLONING, ifscope);
+               if ((rt = ro6->ro_rt) == NULL)
+                       goto done;
+
+               RT_LOCK(rt);
+       }
+
+       ours = (rt->rt_flags & RTF_PROXY) ? TRUE : FALSE;
+       RT_UNLOCK(rt);
+
+done:
+       if (ours)
+               m->m_pkthdr.aux_flags |= MAUXF_PROXY_DST;
+
+       return (ours);
+}
+
+/*
+ * Called when a prefix transitions between on-link and off-link.  Perform
+ * routing (RTF_PROXY) and interface (all-multicast) related operations on
+ * the affected prefixes.
+ */
+void
+nd6_prproxy_prelist_update(struct nd_prefix *pr_cur, struct nd_prefix *pr_up)
+{
+       SLIST_HEAD(, nd6_prproxy_prelist) up_head;
+       SLIST_HEAD(, nd6_prproxy_prelist) down_head;
+       struct nd6_prproxy_prelist *up, *down;
+       struct nd_prefix *pr;
+       struct in6_addr pr_addr;
+       boolean_t enable;
+       u_char pr_len;
+
+       SLIST_INIT(&up_head);
+       SLIST_INIT(&down_head);
+       VERIFY(pr_cur != NULL);
+
+       lck_mtx_assert(&proxy6_lock, LCK_MTX_ASSERT_OWNED);
+
+       /*
+        * Upstream prefix.  If caller did not specify one, search for one
+        * based on the information in current prefix.  Caller is expected
+        * to have held an extra reference for the passed-in prefixes.
+        */
+       lck_mtx_lock(nd6_mutex);
+       if (pr_up == NULL) {
+               NDPR_LOCK(pr_cur);
+               bcopy(&pr_cur->ndpr_prefix.sin6_addr, &pr_addr,
+                   sizeof (pr_addr));
+               pr_len = pr_cur->ndpr_plen;
+               NDPR_UNLOCK(pr_cur);
+
+               for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+                       NDPR_LOCK(pr);
+                       if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
+                           !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
+                           pr->ndpr_plen != pr_len ||
+                           !in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
+                           &pr_addr, pr_len)) {
+                               NDPR_UNLOCK(pr);
+                               continue;
+                       }
+                       NDPR_UNLOCK(pr);
+                       break;
+               }
+
+               if ((pr_up = pr) == NULL) {
+                       lck_mtx_unlock(nd6_mutex);
+                       goto done;
+               }
+               NDPR_LOCK(pr_up);
+       } else {
+               NDPR_LOCK(pr_up);
+               bcopy(&pr_up->ndpr_prefix.sin6_addr, &pr_addr,
+                   sizeof (pr_addr));
+               pr_len = pr_up->ndpr_plen;
+       }
+       NDPR_LOCK_ASSERT_HELD(pr_up);
+       /*
+        * Upstream prefix could be offlink by now; therefore we cannot
+        * assert that NDPRF_PRPROXY is set; however, we can insist that
+        * it must not be a scoped prefix.
+        */
+       VERIFY(!(pr_up->ndpr_stateflags & NDPRF_IFSCOPE));
+       enable = (pr_up->ndpr_stateflags & NDPRF_PRPROXY);
+       NDPR_UNLOCK(pr_up);
+
+       up = nd6_ndprl_alloc(M_WAITOK);
+       if (up == NULL) {
+               lck_mtx_unlock(nd6_mutex);
+               goto done;
+       }
+
+       NDPR_ADDREF(pr_up);
+       up->ndprl_pr = pr_up;
+       SLIST_INSERT_HEAD(&up_head, up, ndprl_le);
+
+       /*
+        * Now build a list of matching (scoped) downstream prefixes on other
+        * interfaces which need to be enabled/disabled accordingly.  Note that
+        * the NDPRF_PRPROXY is never set/cleared on the downstream prefixes.
+        */
+       for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+               NDPR_LOCK(pr);
+               if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
+                   !(pr->ndpr_stateflags & NDPRF_IFSCOPE) ||
+                   pr->ndpr_plen != pr_len ||
+                   !in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
+                   &pr_addr, pr_len)) {
+                       NDPR_UNLOCK(pr);
+                       continue;
+               }
+               NDPR_UNLOCK(pr);
+
+               down = nd6_ndprl_alloc(M_WAITOK);
+               if (down == NULL)
+                       continue;
+
+               NDPR_ADDREF(pr);
+               down->ndprl_pr = pr;
+               NDPR_ADDREF(pr_up);
+               down->ndprl_up = pr_up;
+               SLIST_INSERT_HEAD(&down_head, down, ndprl_le);
+       }
+       lck_mtx_unlock(nd6_mutex);
+
+       /*
+        * Apply routing function on prefixes; callee will free resources.
+        */
+       nd6_prproxy_prelist_setroute(enable,
+           (struct nd6_prproxy_prelist_head *)&up_head,
+           (struct nd6_prproxy_prelist_head *)&down_head);
+
+done:
+       VERIFY(SLIST_EMPTY(&up_head));
+       VERIFY(SLIST_EMPTY(&down_head));
+}
+
+/*
+ * Given an interface address, determine whether or not the address
+ * is part of of a proxied prefix.
+ */
+boolean_t
+nd6_prproxy_ifaddr(struct in6_ifaddr *ia)
+{
+       struct nd_prefix *pr;
+       struct in6_addr addr, pr_mask;
+       u_int32_t pr_len;
+       boolean_t proxied = FALSE;
+
+       lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+       IFA_LOCK(&ia->ia_ifa);
+       bcopy(&ia->ia_addr.sin6_addr, &addr, sizeof (addr));
+       bcopy(&ia->ia_prefixmask.sin6_addr, &pr_mask, sizeof (pr_mask));
+       pr_len = ia->ia_plen;
+       IFA_UNLOCK(&ia->ia_ifa);
+
+       lck_mtx_lock(nd6_mutex);
+       for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+               NDPR_LOCK(pr);
+               if ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
+                   (pr->ndpr_stateflags & NDPRF_PRPROXY) &&
+                   in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
+                   &addr, pr_len)) {
+                       NDPR_UNLOCK(pr);
+                       proxied = TRUE;
+                       break;
+               }
+               NDPR_UNLOCK(pr);
+       }
+       lck_mtx_unlock(nd6_mutex);
+
+       return (proxied);
+}
+
+/*
+ * Perform automatic proxy function with NS output.
+ *
+ * If the target address matches a global prefix obtained from a router
+ * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES
+ * flag set, then we send solicitations for the target address to all other
+ * interfaces where a matching prefix is currently on-link, in addition to
+ * the original interface.
+ */
+void
+nd6_prproxy_ns_output(struct ifnet *ifp, struct in6_addr *daddr,
+    struct in6_addr *taddr, struct llinfo_nd6 *ln)
+{
+       SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
+       struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
+       struct nd_prefix *pr, *fwd;
+       struct ifnet *fwd_ifp;
+       struct in6_addr pr_addr;
+       u_char pr_len;
+
+       SLIST_INIT(&ndprl_head);
+
+       lck_mtx_lock(nd6_mutex);
+
+       for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+               NDPR_LOCK(pr);
+               if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
+                   !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
+                   !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
+                   taddr, &pr->ndpr_mask)) {
+                       NDPR_UNLOCK(pr);
+                       continue;
+               }
+
+               VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
+               bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
+               pr_len = pr->ndpr_plen;
+               NDPR_UNLOCK(pr);
+
+               for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
+                       NDPR_LOCK(fwd);
+                       if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
+                           fwd->ndpr_ifp == ifp ||
+                           fwd->ndpr_plen != pr_len ||
+                           !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
+                           &pr_addr, pr_len)) {
+                               NDPR_UNLOCK(fwd);
+                               continue;
+                       }
+
+                       fwd_ifp = fwd->ndpr_ifp;
+                       NDPR_UNLOCK(fwd);
+
+                       ndprl = nd6_ndprl_alloc(M_WAITOK);
+                       if (ndprl == NULL)
+                               continue;
+
+                       NDPR_ADDREF(fwd);
+                       ndprl->ndprl_pr = fwd;
+                       ndprl->ndprl_fwd_ifp = fwd_ifp;
+
+                       SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
+               }
+               break;
+       }
+
+       lck_mtx_unlock(nd6_mutex);
+
+       SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
+               SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
+
+               pr = ndprl->ndprl_pr;
+               fwd_ifp = ndprl->ndprl_fwd_ifp;
+
+               if ((fwd_ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
+                       NDPR_REMREF(pr);
+                       nd6_ndprl_free(ndprl);
+                       continue;
+               }
+
+               NDPR_LOCK(pr);
+               if (pr->ndpr_stateflags & NDPRF_ONLINK) {
+                       NDPR_UNLOCK(pr);
+                       nd6log2((LOG_DEBUG,
+                           "%s%d: Sending cloned NS who has %s on %s%d\n",
+                           fwd_ifp->if_name, fwd_ifp->if_unit,
+                           ip6_sprintf(taddr), ifp->if_name,
+                           ifp->if_unit));
+
+                       nd6_ns_output(fwd_ifp, daddr, taddr, NULL, 0);
+               } else {
+                       NDPR_UNLOCK(pr);
+               }
+               NDPR_REMREF(pr);
+
+               nd6_ndprl_free(ndprl);
+       }
+       VERIFY(SLIST_EMPTY(&ndprl_head));
+
+       nd6_ns_output(ifp, daddr, taddr, ln, 0);
+}
+
+/*
+ * Perform automatic proxy function with NS input.
+ *
+ * If the target address matches a global prefix obtained from a router
+ * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES
+ * flag set, then we send solicitations for the target address to all other
+ * interfaces where a matching prefix is currently on-link.
+ */
+void
+nd6_prproxy_ns_input(struct ifnet *ifp, struct in6_addr *saddr,
+    char *lladdr, int lladdrlen, struct in6_addr *daddr, struct in6_addr *taddr)
+{
+       SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
+       struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
+       struct nd_prefix *pr, *fwd;
+       struct ifnet *fwd_ifp;
+       struct in6_addr pr_addr;
+       u_char pr_len;
+       boolean_t solrec = FALSE;
+
+       SLIST_INIT(&ndprl_head);
+
+       lck_mtx_lock(nd6_mutex);
+
+       for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+               NDPR_LOCK(pr);
+               if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
+                   !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
+                   !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
+                   taddr, &pr->ndpr_mask)) {
+                       NDPR_UNLOCK(pr);
+                       continue;
+               }
+
+               VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
+               bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr));
+               pr_len = pr->ndpr_plen;
+
+               /*
+                * If this is a NS for NUD/AR, record it so that we know
+                * how to forward the NA reply later on (if/when it arrives.)
+                * Give up if we fail to save the NS info.
+                */
+               if ((solrec = !IN6_IS_ADDR_UNSPECIFIED(saddr)) &&
+                   !nd6_solsrc_enq(pr, ifp, saddr, taddr)) {
+                       NDPR_UNLOCK(pr);
+                       solrec = FALSE;
+                       break;                  /* bail out */
+               } else {
+                       NDPR_UNLOCK(pr);
+               }
+
+               for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) {
+                       NDPR_LOCK(fwd);
+                       if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
+                           fwd->ndpr_ifp == ifp ||
+                           fwd->ndpr_plen != pr_len ||
+                           !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr,
+                           &pr_addr, pr_len)) {
+                               NDPR_UNLOCK(fwd);
+                               continue;
+                       }
+
+                       fwd_ifp = fwd->ndpr_ifp;
+                       NDPR_UNLOCK(fwd);
+
+                       ndprl = nd6_ndprl_alloc(M_WAITOK);
+                       if (ndprl == NULL)
+                               continue;
+
+                       NDPR_ADDREF(fwd);
+                       ndprl->ndprl_pr = fwd;
+                       ndprl->ndprl_fwd_ifp = fwd_ifp;
+                       ndprl->ndprl_sol = solrec;
+
+                       SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
+               }
+               break;
+       }
+
+       lck_mtx_unlock(nd6_mutex);
+
+       /*
+        * If this is a recorded solicitation (NS for NUD/AR), create
+        * or update the neighbor cache entry for the soliciting node.
+        * Later on, when the NA reply arrives, we will need this cache
+        * entry in order to send the NA back to the original solicitor.
+        * Without a neighbor cache entry, we'd end up with an endless
+        * cycle of NS ping-pong between the us (the proxy) and the node
+        * which is soliciting for the address.
+        */
+       if (solrec) {
+               VERIFY(!IN6_IS_ADDR_UNSPECIFIED(saddr));
+               nd6_cache_lladdr(ifp, saddr, lladdr, lladdrlen,
+                   ND_NEIGHBOR_SOLICIT, 0);
+       }
+
+       SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
+               SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
+
+               pr = ndprl->ndprl_pr;
+               fwd_ifp = ndprl->ndprl_fwd_ifp;
+
+               if ((fwd_ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
+                       NDPR_REMREF(pr);
+                       nd6_ndprl_free(ndprl);
+                       continue;
+               }
+
+               NDPR_LOCK(pr);
+               if (pr->ndpr_stateflags & NDPRF_ONLINK) {
+                       NDPR_UNLOCK(pr);
+                       nd6log2((LOG_DEBUG,
+                           "%s%d: Forwarding NS (%s) from %s to %s who has %s "
+                           "on %s%d\n", fwd_ifp->if_name, fwd_ifp->if_unit,
+                           ndprl->ndprl_sol ? "NUD/AR" : "DAD",
+                           ip6_sprintf(saddr), ip6_sprintf(daddr),
+                           ip6_sprintf(taddr), ifp->if_name, ifp->if_unit));
+
+                       nd6_ns_output(fwd_ifp, ndprl->ndprl_sol ? taddr : NULL,
+                           taddr, NULL, !ndprl->ndprl_sol);
+               } else {
+                       NDPR_UNLOCK(pr);
+               }
+               NDPR_REMREF(pr);
+
+               nd6_ndprl_free(ndprl);
+       }
+       VERIFY(SLIST_EMPTY(&ndprl_head));
+}
+
+/*
+ * Perform automatic proxy function with NA input.
+ *
+ * If the target address matches a global prefix obtained from a router
+ * advertisement received on an interface with the ND6_IFF_PROXY_PREFIXES flag
+ * set, then we send neighbor advertisements for the target address on all
+ * other interfaces where a matching prefix is currently on link.
+ */
+void
+nd6_prproxy_na_input(struct ifnet *ifp, struct in6_addr *saddr,
+    struct in6_addr *daddr0, struct in6_addr *taddr, int flags)
+{
+       SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head;
+       struct nd6_prproxy_prelist *ndprl, *ndprl_tmp;
+       struct nd_prefix *pr;
+       struct ifnet *fwd_ifp;
+       struct in6_addr daddr;
+
+       SLIST_INIT(&ndprl_head);
+
+
+       lck_mtx_lock(nd6_mutex);
+
+       for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+               NDPR_LOCK(pr);
+               if (!(pr->ndpr_stateflags & NDPRF_ONLINK) ||
+                   !(pr->ndpr_stateflags & NDPRF_PRPROXY) ||
+                   !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
+                   taddr, &pr->ndpr_mask)) {
+                       NDPR_UNLOCK(pr);
+                       continue;
+               }
+
+               VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
+               /*
+                * If this is a NA for NUD, see if there is a record created
+                * for the corresponding NS; upon success, we get back the
+                * interface where the NS originally arrived on, as well as
+                * the soliciting node's address.  Give up if we can't find it.
+                */
+               if (!IN6_IS_ADDR_MULTICAST(daddr0)) {
+                       fwd_ifp = NULL;
+                       bzero(&daddr, sizeof (daddr));
+                       if (!nd6_solsrc_deq(pr, taddr, &daddr, &fwd_ifp)) {
+                               NDPR_UNLOCK(pr);
+                               break;          /* bail out */
+                       }
+                       VERIFY(!IN6_IS_ADDR_UNSPECIFIED(&daddr) && fwd_ifp);
+                       NDPR_UNLOCK(pr);
+
+                       ndprl = nd6_ndprl_alloc(M_WAITOK);
+                       if (ndprl == NULL)
+                               break;          /* bail out */
+
+                       ndprl->ndprl_fwd_ifp = fwd_ifp;
+                       ndprl->ndprl_sol = TRUE;
+                       ndprl->ndprl_sol_saddr = *(&daddr);
+
+                       SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
+               } else {
+                       struct nd_prefix *fwd;
+                       struct in6_addr pr_addr;
+                       u_char pr_len;
+
+                       bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr,
+                           sizeof (pr_addr));
+                       pr_len = pr->ndpr_plen;
+                       NDPR_UNLOCK(pr);
+
+                       for (fwd = nd_prefix.lh_first; fwd;
+                           fwd = fwd->ndpr_next) {
+                               NDPR_LOCK(fwd);
+                               if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) ||
+                                   fwd->ndpr_ifp == ifp ||
+                                   fwd->ndpr_plen != pr_len ||
+                                   !in6_are_prefix_equal(
+                                   &fwd->ndpr_prefix.sin6_addr,
+                                   &pr_addr, pr_len)) {
+                                       NDPR_UNLOCK(fwd);
+                                       continue;
+                               }
+
+                               fwd_ifp = fwd->ndpr_ifp;
+                               NDPR_UNLOCK(fwd);
+
+                               ndprl = nd6_ndprl_alloc(M_WAITOK);
+                               if (ndprl == NULL)
+                                       continue;
+
+                               NDPR_ADDREF(fwd);
+                               ndprl->ndprl_pr = fwd;
+                               ndprl->ndprl_fwd_ifp = fwd_ifp;
+
+                               SLIST_INSERT_HEAD(&ndprl_head, ndprl, ndprl_le);
+                       }
+               }
+               break;
+       }
+
+       lck_mtx_unlock(nd6_mutex);
+
+       SLIST_FOREACH_SAFE(ndprl, &ndprl_head, ndprl_le, ndprl_tmp) {
+               boolean_t send_na;
+
+               SLIST_REMOVE(&ndprl_head, ndprl, nd6_prproxy_prelist, ndprl_le);
+
+               pr = ndprl->ndprl_pr;
+               fwd_ifp = ndprl->ndprl_fwd_ifp;
+
+               if (ndprl->ndprl_sol) {
+                       VERIFY(pr == NULL);
+                       daddr = *(&ndprl->ndprl_sol_saddr);
+                       VERIFY(!IN6_IS_ADDR_UNSPECIFIED(&daddr));
+                       send_na = (in6_setscope(&daddr, fwd_ifp, NULL) == 0);
+               } else {
+                       VERIFY(pr != NULL);
+                       daddr = *daddr0;
+                       NDPR_LOCK(pr);
+                       send_na = ((pr->ndpr_stateflags & NDPRF_ONLINK) &&
+                           in6_setscope(&daddr, fwd_ifp, NULL) == 0);
+                       NDPR_UNLOCK(pr);
+               }
+
+               if (send_na) {
+                       if (!ndprl->ndprl_sol) {
+                               nd6log2((LOG_DEBUG,
+                                   "%s%d: Forwarding NA (DAD) from %s to %s "
+                                   "tgt is %s on %s%d\n",
+                                   fwd_ifp->if_name, fwd_ifp->if_unit,
+                                   ip6_sprintf(saddr), ip6_sprintf(&daddr),
+                                   ip6_sprintf(taddr), ifp->if_name,
+                                   ifp->if_unit));
+                       } else {
+                               nd6log2((LOG_DEBUG,
+                                   "%s%d: Forwarding NA (NUD/AR) from %s to "
+                                   "%s (was %s) tgt is %s on %s%d\n",
+                                   fwd_ifp->if_name, fwd_ifp->if_unit,
+                                   ip6_sprintf(saddr), ip6_sprintf(&daddr),
+                                   ip6_sprintf(daddr0), ip6_sprintf(taddr),
+                                   ifp->if_name, ifp->if_unit));
+                       }
+
+                       nd6_na_output(fwd_ifp, &daddr, taddr, flags, 1, NULL);
+               }
+
+               if (pr != NULL)
+                       NDPR_REMREF(pr);
+
+               nd6_ndprl_free(ndprl);
+       }
+       VERIFY(SLIST_EMPTY(&ndprl_head));
+}
+
+static struct nd6_prproxy_solsrc *
+nd6_solsrc_alloc(int how)
+{
+       struct nd6_prproxy_solsrc *ssrc;
+
+       ssrc = (how == M_WAITOK) ? zalloc(solsrc_zone) :
+           zalloc_noblock(solsrc_zone);
+       if (ssrc != NULL)
+               bzero(ssrc, solsrc_size);
+
+       return (ssrc);
+}
+
+static void
+nd6_solsrc_free(struct nd6_prproxy_solsrc *ssrc)
+{
+       zfree(solsrc_zone, ssrc);
+}
+
+static void
+nd6_prproxy_sols_purge(struct nd_prefix *pr, u_int64_t max_stgt)
+{
+       struct nd6_prproxy_soltgt *soltgt, *tmp;
+       u_int64_t expire = (max_stgt > 0) ? net_uptime() : 0;
+
+       NDPR_LOCK_ASSERT_HELD(pr);
+
+       /* Either trim all or those that have expired or are idle */
+       RB_FOREACH_SAFE(soltgt, prproxy_sols_tree,
+           &pr->ndpr_prproxy_sols, tmp) {
+               VERIFY(pr->ndpr_prproxy_sols_cnt > 0);
+               if (expire == 0 || soltgt->soltgt_expire <= expire ||
+                   soltgt->soltgt_cnt == 0) {
+                       pr->ndpr_prproxy_sols_cnt--;
+                       RB_REMOVE(prproxy_sols_tree,
+                           &pr->ndpr_prproxy_sols, soltgt);
+                       nd6_soltgt_free(soltgt);
+               }
+       }
+
+       if (max_stgt == 0 || pr->ndpr_prproxy_sols_cnt < max_stgt) {
+               VERIFY(max_stgt != 0 || (pr->ndpr_prproxy_sols_cnt == 0 &&
+                   RB_EMPTY(&pr->ndpr_prproxy_sols)));
+               return;
+       }
+
+       /* Brute force; mercilessly evict entries until we are under limit */
+       RB_FOREACH_SAFE(soltgt, prproxy_sols_tree,
+           &pr->ndpr_prproxy_sols, tmp) {
+               VERIFY(pr->ndpr_prproxy_sols_cnt > 0);
+               pr->ndpr_prproxy_sols_cnt--;
+               RB_REMOVE(prproxy_sols_tree, &pr->ndpr_prproxy_sols, soltgt);
+               nd6_soltgt_free(soltgt);
+               if (pr->ndpr_prproxy_sols_cnt < max_stgt)
+                       break;
+       }
+}
+
+/*
+ * Purges all solicitation records on a given prefix.
+ * Caller is responsible for holding prefix lock.
+ */
+void
+nd6_prproxy_sols_reap(struct nd_prefix *pr)
+{
+       nd6_prproxy_sols_purge(pr, 0);
+}
+
+/*
+ * Purges expired or idle solicitation records on a given prefix.
+ * Caller is responsible for holding prefix lock.
+ */
+void
+nd6_prproxy_sols_prune(struct nd_prefix *pr, u_int32_t max_stgt)
+{
+       nd6_prproxy_sols_purge(pr, max_stgt);
+}
+
+/*
+ * Enqueue a soliciation record in the target record of a prefix.
+ */
+static boolean_t
+nd6_solsrc_enq(struct nd_prefix *pr, struct ifnet *ifp,
+    struct in6_addr *saddr, struct in6_addr *taddr)
+{
+       struct nd6_prproxy_soltgt find, *soltgt;
+       struct nd6_prproxy_solsrc *ssrc;
+       u_int32_t max_stgt = nd6_max_tgt_sols;
+       u_int32_t max_ssrc = nd6_max_src_sols;
+
+       NDPR_LOCK_ASSERT_HELD(pr);
+       VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
+       VERIFY((pr->ndpr_stateflags & (NDPRF_ONLINK|NDPRF_PRPROXY)) ==
+           (NDPRF_ONLINK|NDPRF_PRPROXY));
+       VERIFY(!IN6_IS_ADDR_UNSPECIFIED(saddr));
+
+       ssrc = nd6_solsrc_alloc(M_WAITOK);
+       if (ssrc == NULL)
+               return (FALSE);
+
+       ssrc->solsrc_saddr = *saddr;
+       ssrc->solsrc_ifp = ifp;
+
+       find.soltgt_key.taddr = *taddr;         /* search key */
+
+       soltgt = RB_FIND(prproxy_sols_tree, &pr->ndpr_prproxy_sols, &find);
+       if (soltgt == NULL) {
+               if (max_stgt != 0 && pr->ndpr_prproxy_sols_cnt >= max_stgt) {
+                       VERIFY(!RB_EMPTY(&pr->ndpr_prproxy_sols));
+                       nd6_prproxy_sols_prune(pr, max_stgt);
+                       VERIFY(pr->ndpr_prproxy_sols_cnt < max_stgt);
+               }
+
+               soltgt = nd6_soltgt_alloc(M_WAITOK);
+               if (soltgt == NULL) {
+                       nd6_solsrc_free(ssrc);
+                       return (FALSE);
+               }
+
+               soltgt->soltgt_key.taddr = *taddr;
+               VERIFY(soltgt->soltgt_cnt == 0);
+               VERIFY(TAILQ_EMPTY(&soltgt->soltgt_q));
+
+               pr->ndpr_prproxy_sols_cnt++;
+               VERIFY(pr->ndpr_prproxy_sols_cnt != 0);
+               RB_INSERT(prproxy_sols_tree, &pr->ndpr_prproxy_sols, soltgt);
+       }
+
+       if (max_ssrc != 0 && soltgt->soltgt_cnt >= max_ssrc) {
+               VERIFY(!TAILQ_EMPTY(&soltgt->soltgt_q));
+               nd6_soltgt_prune(soltgt, max_ssrc);
+               VERIFY(soltgt->soltgt_cnt < max_ssrc);
+       }
+
+       soltgt->soltgt_cnt++;
+       VERIFY(soltgt->soltgt_cnt != 0);
+       TAILQ_INSERT_TAIL(&soltgt->soltgt_q, ssrc, solsrc_tqe);
+       if (soltgt->soltgt_cnt == 1)
+               soltgt->soltgt_expire = net_uptime() + ND6_TGT_SOLS_EXPIRE;
+
+       return (TRUE);
+}
+
+/*
+ * Dequeue a solicitation record from a target record of a prefix.
+ */
+static boolean_t
+nd6_solsrc_deq(struct nd_prefix *pr, struct in6_addr *taddr,
+    struct in6_addr *daddr, struct ifnet **ifp)
+{
+       struct nd6_prproxy_soltgt find, *soltgt;
+       struct nd6_prproxy_solsrc *ssrc;
+
+       NDPR_LOCK_ASSERT_HELD(pr);
+       VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE));
+       VERIFY((pr->ndpr_stateflags & (NDPRF_ONLINK|NDPRF_PRPROXY)) ==
+           (NDPRF_ONLINK|NDPRF_PRPROXY));
+
+       bzero(daddr, sizeof (*daddr));
+       *ifp = NULL;
+
+       find.soltgt_key.taddr = *taddr;         /* search key */
+
+       soltgt = RB_FIND(prproxy_sols_tree, &pr->ndpr_prproxy_sols, &find);
+       if (soltgt == NULL || soltgt->soltgt_cnt == 0) {
+               VERIFY(soltgt == NULL || TAILQ_EMPTY(&soltgt->soltgt_q));
+               return (FALSE);
+       }
+
+       VERIFY(soltgt->soltgt_cnt != 0);
+       --soltgt->soltgt_cnt;
+       ssrc = TAILQ_FIRST(&soltgt->soltgt_q);
+       VERIFY(ssrc != NULL);
+       TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
+       *daddr = *(&ssrc->solsrc_saddr);
+       *ifp = ssrc->solsrc_ifp;
+       nd6_solsrc_free(ssrc);
+
+       return (TRUE);
+}
+
+static struct nd6_prproxy_soltgt *
+nd6_soltgt_alloc(int how)
+{
+       struct nd6_prproxy_soltgt *soltgt;
+
+       soltgt = (how == M_WAITOK) ? zalloc(soltgt_zone) :
+           zalloc_noblock(soltgt_zone);
+       if (soltgt != NULL) {
+               bzero(soltgt, soltgt_size);
+               TAILQ_INIT(&soltgt->soltgt_q);
+       }
+       return (soltgt);
+}
+
+static void
+nd6_soltgt_free(struct nd6_prproxy_soltgt *soltgt)
+{
+       struct nd6_prproxy_solsrc *ssrc, *tssrc;
+
+       TAILQ_FOREACH_SAFE(ssrc, &soltgt->soltgt_q, solsrc_tqe, tssrc) {
+               VERIFY(soltgt->soltgt_cnt > 0);
+               soltgt->soltgt_cnt--;
+               TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
+               nd6_solsrc_free(ssrc);
+       }
+
+       VERIFY(soltgt->soltgt_cnt == 0);
+       VERIFY(TAILQ_EMPTY(&soltgt->soltgt_q));
+
+       zfree(soltgt_zone, soltgt);
+}
+
+static void
+nd6_soltgt_prune(struct nd6_prproxy_soltgt *soltgt, u_int32_t max_ssrc)
+{
+       while (soltgt->soltgt_cnt >= max_ssrc) {
+               struct nd6_prproxy_solsrc *ssrc;
+
+               VERIFY(soltgt->soltgt_cnt != 0);
+               --soltgt->soltgt_cnt;
+               ssrc = TAILQ_FIRST(&soltgt->soltgt_q);
+               VERIFY(ssrc != NULL);
+               TAILQ_REMOVE(&soltgt->soltgt_q, ssrc, solsrc_tqe);
+               nd6_solsrc_free(ssrc);
+       }
+}
+
+/*
+ * Solicited target tree comparison function.
+ *
+ * An ordered predicate is necessary; bcmp() is not documented to return
+ * an indication of order, memcmp() is, and is an ISO C99 requirement.
+ */
+static __inline int
+soltgt_cmp(const struct nd6_prproxy_soltgt *a,
+    const struct nd6_prproxy_soltgt *b)
+{
+       return (memcmp(&a->soltgt_key, &b->soltgt_key, sizeof (a->soltgt_key)));
+}
index 2e5c5eae549948a3d65cd41907fe18344ad50612..34bfb18a6cc3630092758bd2daada438a1e3faf1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -93,8 +93,8 @@
 
 #include <net/net_osdep.h>
 
-#define SDL(s) ((struct sockaddr_dl *)s)
-
+static void defrouter_addreq(struct nd_defrouter *, boolean_t);
+static void defrouter_delreq(struct nd_defrouter *);
 static struct nd_defrouter *defrtrlist_update_common(struct nd_defrouter *,
     boolean_t);
 static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
@@ -102,8 +102,6 @@ static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
 static struct in6_ifaddr *in6_ifadd(struct nd_prefix *, int);
 static void defrtrlist_sync(struct ifnet *);
 
-static void defrouter_select_common(struct ifnet *, int);
-
 static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
        struct nd_defrouter *);
 static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
@@ -280,8 +278,11 @@ nd6_rs_input(
        int lladdrlen = 0;
        union nd_opts ndopts;
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        /* If I'm not a router, ignore it. */
-       if (ip6_accept_rtadv != 0 || (ifp->if_eflags & IFEF_ACCEPT_RTADVD) || ip6_forwarding != 1)
+       if (!ip6_forwarding || !(ifp->if_eflags & IFEF_IPV6_ROUTER))
                goto freeit;
 
        /* Sanity checks */
@@ -297,10 +298,10 @@ nd6_rs_input(
         * Don't update the neighbor cache, if src = :: or a non-neighbor.
         * The former case indicates that the src has no IP address assigned
         * yet.  See nd6_ns_input() for the latter case.
-        */
-       if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src))
+        */
+       if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
                goto freeit;
-       else {
+       else {
                struct sockaddr_in6 src_sa6;
 
                bzero(&src_sa6, sizeof(src_sa6));
@@ -368,7 +369,7 @@ nd6_rs_input(
 void
 nd6_ra_input(
        struct  mbuf *m,
-       int off, 
+       int off,
        int icmp6len)
 {
        struct ifnet *ifp = m->m_pkthdr.rcvif;
@@ -380,12 +381,38 @@ nd6_ra_input(
        union nd_opts ndopts;
        struct nd_defrouter *dr = NULL;
        struct timeval timenow;
+       u_int32_t mtu = 0;
+       char *lladdr = NULL;
+       u_int32_t lladdrlen = 0;
+       struct nd_prefix_list *nd_prefix_list_head = NULL;
+       u_int32_t nd_prefix_list_length = 0;
+       struct in6_ifaddr *ia6 = NULL;
+
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
        getmicrotime(&timenow);
 
-       if (ip6_accept_rtadv == 0 && ((ifp->if_eflags & IFEF_ACCEPT_RTADVD) == 0))
+       /*
+        * Discard RA unless IFEF_ACCEPT_RTADV is set (as host), or when
+        * IFEF_IPV6_ROUTER is set (as router) but the RA is not locally
+        * generated.  For convenience, we allow locally generated (rtadvd)
+        * RAs to be processed on the advertising interface, as a router.
+        *
+        * Note that we don't test against ip6_forwarding as we could be
+        * both a host and a router on different interfaces, hence the
+        * check against the per-interface flags.
+        */
+       if (!(ifp->if_eflags & (IFEF_ACCEPT_RTADV | IFEF_IPV6_ROUTER)) ||
+           ((ifp->if_eflags & IFEF_IPV6_ROUTER) &&
+           (ia6 = ifa_foraddr6(&saddr6)) == NULL))
                goto freeit;
 
+       if (ia6 != NULL) {
+               IFA_REMREF(&ia6->ia_ifa);
+               ia6 = NULL;
+       }
+
        if (ip6->ip6_hlim != 255) {
                nd6log((LOG_ERR,
                    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
@@ -435,6 +462,8 @@ nd6_ra_input(
                goto freeit;
        }
        ndi = &nd_ifinfo[ifp->if_index];
+       VERIFY(ndi->initialized);
+       lck_mtx_lock(&ndi->lock);
        bzero(&dr0, sizeof (dr0));
        dr0.rtaddr = saddr6;
        dr0.flags  = nd_ra->nd_ra_flags_reserved;
@@ -455,6 +484,7 @@ nd6_ra_input(
                ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
        if (nd_ra->nd_ra_curhoplimit)
                ndi->chlim = nd_ra->nd_ra_curhoplimit;
+       lck_mtx_unlock(&ndi->lock);
        lck_rw_done(nd_if_rwlock);
        ndi = NULL;
        lck_mtx_lock(nd6_mutex);
@@ -523,19 +553,22 @@ nd6_ra_input(
                        /*
                         * Exceptions to stateless autoconfiguration processing:
                         * + nd6_accept_6to4 == 0 && address has 6to4 prefix
-                        * + ip6_only_allow_rfc4193_prefix != 0 && address not RFC 4193
+                        * + ip6_only_allow_rfc4193_prefix != 0 &&
+                        * address not RFC 4193
                         */
                        if (ip6_only_allow_rfc4193_prefix &&
                            !IN6_IS_ADDR_UNIQUE_LOCAL(&pi->nd_opt_pi_prefix)) {
                                nd6log((LOG_INFO,
-                                   "nd6_ra_input: no SLAAC on prefix %s [not RFC 4193]\n",
+                                   "nd6_ra_input: no SLAAC on prefix %s "
+                                   "[not RFC 4193]\n",
                                    ip6_sprintf(&pi->nd_opt_pi_prefix)));
                                pr.ndpr_raf_auto = 0;
                        }
                        else if (!nd6_accept_6to4 &&
                                     IN6_IS_ADDR_6TO4(&pi->nd_opt_pi_prefix)) {
                                nd6log((LOG_INFO,
-                                   "nd6_ra_input: no SLAAC on prefix %s [6to4]\n",
+                                   "nd6_ra_input: no SLAAC on prefix %s "
+                                   "[6to4]\n",
                                    ip6_sprintf(&pi->nd_opt_pi_prefix)));
                                pr.ndpr_raf_auto = 0;
                        }
@@ -549,6 +582,37 @@ nd6_ra_input(
                        }
                        (void)prelist_update(&pr, dr, m, mcast);
                        lck_mtx_destroy(&pr.ndpr_lock, ifa_mtx_grp);
+
+                       /*
+                        * We have to copy the values out after the
+                        * prelist_update call since some of these values won't
+                        * be properly set until after the router advertisement
+                        * updating can vet the values.
+                        */
+                       struct nd_prefix_list *prfl = NULL;
+                       MALLOC(prfl, struct nd_prefix_list *, sizeof (*prfl),
+                           M_TEMP, M_WAITOK | M_ZERO);
+
+                       if (prfl == NULL) {
+                               log(LOG_DEBUG, "%s: unable to MALLOC RA prefix "
+                                   "structure\n", __func__);
+                               continue;
+                       }
+
+                       bcopy(&pr.ndpr_prefix, &prfl->pr.ndpr_prefix,
+                           sizeof (prfl->pr.ndpr_prefix));
+                       prfl->pr.ndpr_raf = pr.ndpr_raf;
+                       prfl->pr.ndpr_plen = pr.ndpr_plen;
+                       prfl->pr.ndpr_vltime = pr.ndpr_vltime;
+                       prfl->pr.ndpr_pltime = pr.ndpr_pltime;
+                       prfl->pr.ndpr_expire = pr.ndpr_expire;
+                       prfl->pr.ndpr_stateflags = pr.ndpr_stateflags;
+                       prfl->pr.ndpr_addrcnt = pr.ndpr_addrcnt;
+                       prfl->pr.ndpr_ifp = pr.ndpr_ifp;
+
+                       prfl->next = nd_prefix_list_head;
+                       nd_prefix_list_head = prfl;
+                       nd_prefix_list_length++;
                }
        }
 
@@ -556,7 +620,7 @@ nd6_ra_input(
         * MTU
         */
        if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
-               u_int32_t mtu = ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
+               mtu = ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
 
                /* lower bound */
                if (mtu < IPV6_MMTU) {
@@ -572,12 +636,15 @@ nd6_ra_input(
                        goto freeit;
                }
                ndi = &nd_ifinfo[ifp->if_index];
+               VERIFY(ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
                /* upper bound */
                if (ndi->maxmtu) {
                        if (mtu <= ndi->maxmtu) {
                                int change = (ndi->linkmtu != mtu);
 
                                ndi->linkmtu = mtu;
+                               lck_mtx_unlock(&ndi->lock);
                                lck_rw_done(nd_if_rwlock);
                                if (change) /* in6_maxmtu may change */
                                        in6_setmaxmtu();
@@ -587,9 +654,11 @@ nd6_ra_input(
                                    "exceeds maxmtu %d, ignoring\n",
                                    mtu, ip6_sprintf(&ip6->ip6_src),
                                    ndi->maxmtu));
+                               lck_mtx_unlock(&ndi->lock);
                                lck_rw_done(nd_if_rwlock);
                        }
                } else {
+                       lck_mtx_unlock(&ndi->lock);
                        lck_rw_done(nd_if_rwlock);
                        nd6log((LOG_INFO, "nd6_ra_input: mtu option "
                            "mtu=%d sent from %s; maxmtu unknown, "
@@ -600,14 +669,10 @@ nd6_ra_input(
        }
 
  skip:
-       
+
        /*
         * Source link layer address
         */
-    {
-       char *lladdr = NULL;
-       int lladdrlen = 0;
-       
        if (ndopts.nd_opts_src_lladdr) {
                lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
                lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
@@ -621,7 +686,12 @@ nd6_ra_input(
                goto bad;
        }
 
-       nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_ADVERT, 0);
+       nd6_cache_lladdr(ifp, &saddr6, lladdr, (int)lladdrlen,
+           ND_ROUTER_ADVERT, 0);
+
+       /* Post message */
+       nd6_post_msg(KEV_ND6_RA, nd_prefix_list_head, nd_prefix_list_length,
+           mtu, lladdr, lladdrlen);
 
        /*
         * Installing a link-layer address might change the state of the
@@ -631,12 +701,20 @@ nd6_ra_input(
        lck_mtx_lock(nd6_mutex);
        pfxlist_onlink_check();
        lck_mtx_unlock(nd6_mutex);
-    }
 
  freeit:
        m_freem(m);
        if (dr)
                NDDR_REMREF(dr);
+
+    {
+       struct nd_prefix_list *prfl = NULL;
+       while ((prfl = nd_prefix_list_head) != NULL) {
+               nd_prefix_list_head = prfl->next;
+               FREE(prfl, M_TEMP);
+       }
+    }
+
        return;
 
  bad:
@@ -676,7 +754,7 @@ nd6_rtmsg(cmd, rt)
        ifnet_lock_done(ifp);
 }
 
-void
+static void
 defrouter_addreq(struct nd_defrouter *new, boolean_t scoped)
 {
        struct sockaddr_in6 def, mask, gate;
@@ -689,6 +767,14 @@ defrouter_addreq(struct nd_defrouter *new, boolean_t scoped)
        if (new->stateflags & NDDRF_INSTALLED)
                return;
 
+       if (new->ifp->if_eflags & IFEF_IPV6_ROUTER) {
+               nd6log2((LOG_INFO, "%s: ignoring router %s, scoped=%d, "
+                   "static=%d on advertising interface\n", if_name(new->ifp),
+                   ip6_sprintf(&new->rtaddr), scoped,
+                   (new->stateflags & NDDRF_STATIC) ? 1 : 0));
+               return;
+       }
+
        nd6log2((LOG_INFO, "%s: adding default router %s, scoped=%d, "
            "static=%d\n", if_name(new->ifp), ip6_sprintf(&new->rtaddr),
            scoped, (new->stateflags & NDDRF_STATIC) ? 1 : 0));
@@ -754,7 +840,7 @@ defrouter_lookup(
  * This is just a subroutine function for defrouter_select(), and should
  * not be called from anywhere else.
  */
-void
+static void
 defrouter_delreq(struct nd_defrouter *dr)
 {
        struct sockaddr_in6 def, mask, gate;
@@ -861,29 +947,39 @@ defrouter_reset(void)
 int
 defrtrlist_ioctl(u_long cmd, caddr_t data)
 {
-       struct in6_defrouter_32 *r_32 = (struct in6_defrouter_32 *)data;
-       struct in6_defrouter_64 *r_64 = (struct in6_defrouter_64 *)data;
        struct nd_defrouter dr0;
        unsigned int ifindex;
        struct ifnet *dr_ifp;
        int error = 0, add = 0;
 
        switch (cmd) {
-       case SIOCDRADD_IN6_32:
-       case SIOCDRADD_IN6_64:
+       case SIOCDRADD_IN6_32:          /* struct in6_defrouter_32 */
+       case SIOCDRADD_IN6_64:          /* struct in6_defrouter_64 */
                ++add;
                /* FALLTHRU */
-       case SIOCDRDEL_IN6_32:
-       case SIOCDRDEL_IN6_64:
+       case SIOCDRDEL_IN6_32:          /* struct in6_defrouter_32 */
+       case SIOCDRDEL_IN6_64:          /* struct in6_defrouter_64 */
                bzero(&dr0, sizeof (dr0));
                if (cmd == SIOCDRADD_IN6_64 || cmd == SIOCDRDEL_IN6_64) {
-                       dr0.rtaddr = r_64->rtaddr.sin6_addr;
+                       struct in6_defrouter_64 *r_64 =
+                           (struct in6_defrouter_64 *)(void *)data;
+                       u_int16_t i;
+
+                       bcopy(&r_64->rtaddr.sin6_addr, &dr0.rtaddr,
+                           sizeof (dr0.rtaddr));
                        dr0.flags = r_64->flags;
-                       ifindex = r_64->if_index;
+                       bcopy(&r_64->if_index, &i, sizeof (i));
+                       ifindex = i;
                } else {
-                       dr0.rtaddr = r_32->rtaddr.sin6_addr;
+                       struct in6_defrouter_32 *r_32 =
+                           (struct in6_defrouter_32 *)(void *)data;
+                       u_int16_t i;
+
+                       bcopy(&r_32->rtaddr.sin6_addr, &dr0.rtaddr,
+                           sizeof (dr0.rtaddr));
                        dr0.flags = r_32->flags;
-                       ifindex = r_32->if_index;
+                       bcopy(&r_32->if_index, &i, sizeof (i));
+                       ifindex = i;
                }
                ifnet_head_lock_shared();
                /* Don't need to check is ifindex is < 0 since it's unsigned */
@@ -937,8 +1033,7 @@ defrtrlist_del(struct nd_defrouter *dr)
         * Flush all the routing table entries that use the router
         * as a next hop.
         */
-       if (!ip6_forwarding &&
-           (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
+       if (ip6_doscopedroute || !ip6_forwarding) {
                /* above is a good condition? */
                NDDR_ADDREF(dr);
                lck_mtx_unlock(nd6_mutex);
@@ -947,7 +1042,7 @@ defrtrlist_del(struct nd_defrouter *dr)
                NDDR_REMREF(dr);
        }
 
-       if (dr == TAILQ_FIRST(&nd_defrouter))
+       if (!ip6_doscopedroute && dr == TAILQ_FIRST(&nd_defrouter))
                deldr = dr;     /* The router is primary. */
 
        TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
@@ -986,18 +1081,17 @@ defrtrlist_del(struct nd_defrouter *dr)
         * Routing is enabled, always try to pick another eligible router
         * on this interface.
         */
-       if ((deldr || ip6_doscopedroute) && !ip6_forwarding &&
-           (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD)))
+       if (deldr || ip6_doscopedroute)
                defrouter_select(ifp);
 
        lck_rw_lock_shared(nd_if_rwlock);
        if (ifp->if_index < nd_ifinfo_indexlim) {
                struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
-               atomic_add_32(&ndi->ndefrouters, -1);
-               if (ndi->ndefrouters < 0) {
-                       log(LOG_WARNING, "defrtrlist_del: negative "
-                           "count on %s\n", if_name(ifp));
-               }
+               VERIFY(ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+               VERIFY(ndi->ndefrouters > 0);
+               ndi->ndefrouters--;
+               lck_mtx_unlock(&ndi->lock);
        }
        lck_rw_done(nd_if_rwlock);
 
@@ -1084,8 +1178,7 @@ rtpref(struct nd_defrouter *dr)
 }
 
 /*
- * Default Router Selection according to Section 6.3.6 of RFC 2461 and
- * draft-ietf-ipngwg-router-selection:
+ * Default Router Selection according to Section 6.3.6 of RFC 2461 and RFC 4191:
  *
  * 1) Routers that are reachable or probably reachable should be preferred.
  *    If we have more than one (probably) reachable router, prefer ones
@@ -1114,9 +1207,10 @@ rtpref(struct nd_defrouter *dr)
  * Since the code below covers both with and without router preference cases,
  * we do not need to classify the cases by ifdef.
  */
-static void
-defrouter_select_common(struct ifnet *ifp, int ignore)
+void
+defrouter_select(struct ifnet *ifp)
 {
+#pragma unused(ifp)
        struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
        struct nd_defrouter *installed_dr0 = NULL;
        struct rtentry *rt = NULL;
@@ -1127,16 +1221,17 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 
        /*
-        * This function should be called only when acting as an autoconfigured
-        * host.  Although the remaining part of this function is not effective
-        * if the node is not an autoconfigured host, we explicitly exclude
-        * such cases here for safety.
+        * We no longer install (default) interface route; only prefix routes
+        * are installed as interface routes.  Therefore, there is no harm in
+        * going through this routine even if a default interface is specified,
+        * which happens when Scoped Routing is enabled.  But for efficiency,
+        * we fall back to the original KAME logic when Scoped Routing is
+        * not in effect.
         */
-       if (ip6_forwarding || (!ignore && !ip6_accept_rtadv &&
-           !(ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
+       if (ip6_forwarding && !ip6_doscopedroute) {
                nd6log((LOG_WARNING,
-                   "defrouter_select: called unexpectedly (forwarding=%d, "
-                   "accept_rtadv=%d)\n", ip6_forwarding, ip6_accept_rtadv));
+                   "defrouter_select: called unexpectedly (forwarding=%d)\n",
+                   ip6_forwarding));
                return;
        }
 
@@ -1170,20 +1265,31 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
         */
        for (dr = TAILQ_FIRST(&nd_defrouter); dr;
             dr = TAILQ_NEXT(dr, dr_entry)) {
-               boolean_t reachable;
+               boolean_t reachable, advrouter;
+               struct in6_addr rtaddr;
+               struct ifnet *drifp;
+               struct nd_defrouter *drrele;
 
-               /* Callee returns a locked route upon success */
+               drrele = NULL;
                reachable = FALSE;
-               NDDR_ADDREF(dr);        /* for this for loop */
+               NDDR_LOCK(dr);
+               rtaddr = *(&dr->rtaddr);
+               drifp = dr->ifp;
+               advrouter = (drifp != NULL &&
+                   (drifp->if_eflags & IFEF_IPV6_ROUTER));
+               NDDR_ADDREF_LOCKED(dr); /* for this for loop */
+               NDDR_UNLOCK(dr);
+
                lck_mtx_unlock(nd6_mutex);
-               if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp, 0)) != NULL) {
+               /* Callee returns a locked route upon success */
+               if ((rt = nd6_lookup(&rtaddr, 0, drifp, 0)) != NULL) {
                        RT_LOCK_ASSERT_HELD(rt);
                        if ((ln = rt->rt_llinfo) != NULL &&
                            ND6_IS_LLINFO_PROBREACH(ln)) {
                                reachable = TRUE;
                                if (selected_dr == NULL &&
                                    (!ip6_doscopedroute ||
-                                   dr->ifp == nd6_defifp)) {
+                                   (drifp == nd6_defifp && !advrouter))) {
                                        selected_dr = dr;
                                        NDDR_ADDREF(selected_dr);
                                }
@@ -1195,15 +1301,19 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
                lck_mtx_lock(nd6_mutex);
 
                /* Handle case (b) */
-               if (ip6_doscopedroute && dr->ifp == nd6_defifp &&
+               NDDR_LOCK(dr);
+               if (ip6_doscopedroute && drifp == nd6_defifp && !advrouter &&
                    (selected_dr == NULL || rtpref(dr) > rtpref(selected_dr) ||
                    (rtpref(dr) == rtpref(selected_dr) &&
                    (dr->stateflags & NDDRF_STATIC) &&
                    !(selected_dr->stateflags & NDDRF_STATIC)))) {
-                       if (selected_dr)
-                               NDDR_REMREF(selected_dr);
+                       if (selected_dr) {
+                               /* Release it later on */
+                               VERIFY(drrele == NULL);
+                               drrele = selected_dr;
+                       }
                        selected_dr = dr;
-                       NDDR_ADDREF(selected_dr);
+                       NDDR_ADDREF_LOCKED(selected_dr);
                }
 
                if (!(dr->stateflags & NDDRF_INSTALLED)) {
@@ -1212,16 +1322,20 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
                         * reachable, try to install it later on below.
                         * If it's static, try to install it anyway.
                         */
-                       if (reachable || (dr->stateflags & NDDRF_STATIC)) {
+                       if (!advrouter && (reachable ||
+                           (dr->stateflags & NDDRF_STATIC))) {
                                dr->genid = -1;
                                ++update;
                                nd6log2((LOG_INFO, "%s: possible router %s, "
-                                   "scoped=%d, static=%d\n", if_name(dr->ifp),
-                                   ip6_sprintf(&dr->rtaddr),
+                                   "scoped=%d, static=%d\n", if_name(drifp),
+                                   ip6_sprintf(&rtaddr),
                                    (dr->stateflags & NDDRF_IFSCOPE) ? 1 : 0,
                                    (dr->stateflags & NDDRF_STATIC) ? 1 : 0));
                        }
+                       NDDR_UNLOCK(dr);
                        NDDR_REMREF(dr);        /* for this for loop */
+                       if (drrele != NULL)
+                               NDDR_REMREF(drrele);
                        continue;
                }
 
@@ -1229,7 +1343,7 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
                if (!ip6_doscopedroute || !(dr->stateflags & NDDRF_IFSCOPE)) {
                        if (installed_dr == NULL) {
                                installed_dr = dr;
-                               NDDR_ADDREF(installed_dr);
+                               NDDR_ADDREF_LOCKED(installed_dr);
                        } else {
                                /* this should not happen; warn for diagnosis */
                                log(LOG_ERR, "defrouter_select: more than one "
@@ -1237,7 +1351,10 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
                                    ip6_doscopedroute ? "non-scoped" : "");
                        }
                }
+               NDDR_UNLOCK(dr);
                NDDR_REMREF(dr);        /* for this for loop */
+               if (drrele != NULL)
+                       NDDR_REMREF(drrele);
        }
 
        /* If none was selected, use the currently installed one */
@@ -1286,7 +1403,8 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
                }
 
                /* If none so far, schedule it to be installed below */
-               if (_dr == NULL) {
+               if (_dr == NULL && dr->ifp != NULL &&
+                   !(dr->ifp->if_eflags & IFEF_IPV6_ROUTER)) {
                        dr->genid = -1;
                        ++update;
                        nd6log2((LOG_INFO, "%s: possible router %s, "
@@ -1407,7 +1525,7 @@ defrouter_select_common(struct ifnet *ifp, int ignore)
                            if_name(dr->ifp), ip6_sprintf(&dr->rtaddr)));
                }
                if (!ip6_doscopedroute && installed_dr != dr) {
-                       /* 
+                       /*
                         * No need to ADDREF dr because at this point
                         * dr points to selected_dr, which already holds
                         * a reference.
@@ -1588,18 +1706,12 @@ out:
        }
 }
 
-void
-defrouter_select(struct ifnet *ifp)
-{
-       return (defrouter_select_common(ifp, 0));
-}
-
 static struct nd_defrouter *
 defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped)
 {
        struct nd_defrouter *dr, *n;
        struct ifnet *ifp = new->ifp;
-       struct nd_ifinfo *ndi;
+       struct nd_ifinfo *ndi = NULL;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 
@@ -1667,10 +1779,6 @@ defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped)
                        new->stateflags = dr->stateflags;
                        new->stateflags &= ~NDDRF_PROCESSED;
 
-                       lck_rw_lock_shared(nd_if_rwlock);
-                       VERIFY(ifp->if_index < nd_ifinfo_indexlim);
-                       ndi = &nd_ifinfo[ifp->if_index];
-                       lck_rw_done(nd_if_rwlock);
                        n = dr;
                        goto insert;
                }
@@ -1690,11 +1798,14 @@ defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped)
        }
 
        lck_rw_lock_shared(nd_if_rwlock);
-       ndi = &nd_ifinfo[ifp->if_index];
        if (ifp->if_index >= nd_ifinfo_indexlim)
                goto freeit;
+       ndi = &nd_ifinfo[ifp->if_index];
+       VERIFY(ndi->initialized);
+       lck_mtx_lock(&ndi->lock);
        if (ip6_maxifdefrouters >= 0 &&
            ndi->ndefrouters >= ip6_maxifdefrouters) {
+               lck_mtx_unlock(&ndi->lock);
 freeit:
                lck_rw_done(nd_if_rwlock);
                nddr_free(n);
@@ -1705,7 +1816,9 @@ freeit:
        NDDR_ADDREF(n); /* for the caller */
 
        ++nd6_defrouter_genid;
-       atomic_add_32(&ndi->ndefrouters, 1);
+       ndi->ndefrouters++;
+       VERIFY(ndi->ndefrouters != 0);
+       lck_mtx_unlock(&ndi->lock);
        lck_rw_done(nd_if_rwlock);
 
        nd6log2((LOG_INFO, "%s: allocating defrouter %s\n", if_name(ifp),
@@ -1746,8 +1859,7 @@ insert:
        else
                TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry);
 
-       /* Ignore auto-configuration checks for static route entries */
-       defrouter_select_common(ifp, (n->stateflags & NDDRF_STATIC));
+       defrouter_select(ifp);
 
        return (n);
 }
@@ -1785,11 +1897,7 @@ defrtrlist_sync(struct ifnet *ifp)
        }
 
        if (dr == NULL) {
-               /*
-                * Set ignore flag; the chosen default interface might
-                * not be configured to accept RAs.
-                */
-               defrouter_select_common(ifp, 1);
+               defrouter_select(ifp);
        } else {
                memcpy(&new.rtaddr, &dr->rtaddr, sizeof(new.rtaddr));
                new.flags = dr->flags;
@@ -1921,8 +2029,8 @@ repeat:
                                ifnet_lock_done(ifp);
                                lck_mtx_unlock(nd6_mutex);
                                in6_purgeaddr(ifa);
-                               lck_mtx_lock(nd6_mutex);
                                IFA_REMREF(ifa); /* drop ours */
+                               lck_mtx_lock(nd6_mutex);
                                pr = nd_prefix.lh_first;
                                goto repeat;
                        }
@@ -1963,7 +2071,10 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr,
                        return (EINVAL);
                }
                ndi = &nd_ifinfo[ifp->if_index];
+               VERIFY(ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
                if (ndi->nprefixes >= ip6_maxifprefixes / 2) {
+                       lck_mtx_unlock(&ndi->lock);
                        lck_rw_done(nd_if_rwlock);
                        purge_detached(ifp);
                        lck_rw_lock_shared(nd_if_rwlock);
@@ -1973,11 +2084,14 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr,
                         * isn't necessary since the array never shrinks.
                         */
                        ndi = &nd_ifinfo[ifp->if_index];
+                       lck_mtx_lock(&ndi->lock);
                }
                if (ndi->nprefixes >= ip6_maxifprefixes) {
+                       lck_mtx_unlock(&ndi->lock);
                        lck_rw_done(nd_if_rwlock);
                        return(ENOMEM);
                }
+               lck_mtx_unlock(&ndi->lock);
                lck_rw_done(nd_if_rwlock);
        }
 
@@ -2048,7 +2162,11 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr,
         * isn't necessary since the array never shrinks.
         */
        ndi = &nd_ifinfo[ifp->if_index];
-       atomic_add_32(&ndi->nprefixes, 1);
+       VERIFY(ndi->initialized);
+       lck_mtx_lock(&ndi->lock);
+       ndi->nprefixes++;
+       VERIFY(ndi->nprefixes != 0);
+       lck_mtx_unlock(&ndi->lock);
        lck_rw_done(nd_if_rwlock);
 
        lck_mtx_unlock(nd6_mutex);
@@ -2112,11 +2230,11 @@ prelist_remove(struct nd_prefix *pr)
        lck_rw_lock_shared(nd_if_rwlock);
        if (ifp->if_index < nd_ifinfo_indexlim) {
                struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
-               atomic_add_32(&ndi->nprefixes, -1);
-               if (ndi->nprefixes < 0) {
-                       log(LOG_WARNING, "prelist_remove: negative "
-                           "count on %s\n", if_name(ifp));
-               }
+               VERIFY(ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+               VERIFY(ndi->nprefixes > 0);
+               ndi->nprefixes--;
+               lck_mtx_unlock(&ndi->lock);
        }
        lck_rw_done(nd_if_rwlock);
 
@@ -2505,7 +2623,7 @@ prelist_update(
                        lck_mtx_unlock(nd6_mutex);
                } else {
                        /* just set an error. do not bark here. */
-                       error = EADDRNOTAVAIL; /* XXX: might be unused. */
+                       error = EADDRNOTAVAIL;
                }
        }
 
@@ -2642,6 +2760,7 @@ ndpr_alloc(int how)
        if (pr != NULL) {
                bzero(pr, ndpr_size);
                lck_mtx_init(&pr->ndpr_lock, ifa_mtx_grp, ifa_mtx_attr);
+               RB_INIT(&pr->ndpr_prproxy_sols);
                pr->ndpr_debug |= IFD_ALLOC;
                if (ndpr_debug != 0) {
                        pr->ndpr_debug |= IFD_DEBUG;
@@ -2661,6 +2780,17 @@ ndpr_free(struct nd_prefix *pr)
        } else if (!(pr->ndpr_debug & IFD_ALLOC)) {
                panic("%s: ndpr %p cannot be freed", __func__, pr);
                /* NOTREACHED */
+       } else if (pr->ndpr_rt != NULL) {
+               panic("%s: ndpr %p route %p not freed", __func__, pr,
+                   pr->ndpr_rt);
+               /* NOTREACHED */
+       } else if (pr->ndpr_prproxy_sols_cnt != 0) {
+               panic("%s: ndpr %p non-zero solicitors count (%d)",
+                   __func__, pr, pr->ndpr_prproxy_sols_cnt);
+               /* NOTREACHED */
+       } else if (!RB_EMPTY(&pr->ndpr_prproxy_sols)) {
+               panic("%s: ndpr %p non-empty solicitors tree", __func__, pr);
+               /* NOTREACHED */
        }
        pr->ndpr_debug &= ~IFD_ALLOC;
        NDPR_UNLOCK(pr);
@@ -2808,6 +2938,9 @@ pfxlist_onlink_check(void)
        struct in6_ifaddr *ifa;
        struct nd_defrouter *dr;
        struct nd_pfxrouter *pfxrtr = NULL;
+       int err, i, found = 0;
+       struct ifaddr **ifap = NULL;
+       struct nd_prefix *ndpr;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 
@@ -3005,13 +3138,23 @@ pfxlist_onlink_check(void)
         * detached.  Note, however, that a manually configured address should
         * always be attached.
         * The precise detection logic is same as the one for prefixes.
+        *
+        * ifnet_get_address_list_family_internal() may fail due to memory
+        * pressure, but we will eventually be called again when we receive
+        * another NA, RA, or when the link status changes.
         */
-       lck_rw_lock_shared(&in6_ifaddr_rwlock);
-       for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) {
-               struct nd_prefix *ndpr;
-
+       err = ifnet_get_address_list_family_internal(NULL, &ifap, AF_INET6, 0,
+           M_NOWAIT);
+       if (err != 0 || ifap == NULL) {
+               nd6log((LOG_ERR, "%s: ifnet_get_address_list_family_internal "
+                   "failed", __func__));
+               return;
+       }
+       for (i = 0; ifap[i]; i++) {
+               ifa = ifatoia6(ifap[i]);
                IFA_LOCK(&ifa->ia_ifa);
-               if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
+               if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0 ||
+                   (ifap[i]->ifa_debug & IFD_ATTACHED) == 0) {
                        IFA_UNLOCK(&ifa->ia_ifa);
                        continue;
                }
@@ -3031,17 +3174,18 @@ pfxlist_onlink_check(void)
                if (find_pfxlist_reachable_router(ndpr)) {
                        NDPR_UNLOCK(ndpr);
                        NDPR_REMREF(ndpr);
+                       found = 1;
                        break;
                }
                NDPR_UNLOCK(ndpr);
                NDPR_REMREF(ndpr);
        }
-       if (ifa) {
-               for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) {
-                       struct nd_prefix *ndpr;
-
+       if (found) {
+               for (i = 0; ifap[i]; i++) {
+                       ifa = ifatoia6(ifap[i]);
                        IFA_LOCK(&ifa->ia_ifa);
-                       if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
+                       if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0 ||
+                           (ifap[i]->ifa_debug & IFD_ATTACHED) == 0) {
                                IFA_UNLOCK(&ifa->ia_ifa);
                                continue;
                        }
@@ -3072,9 +3216,9 @@ pfxlist_onlink_check(void)
                        }
                        NDPR_REMREF(ndpr);
                }
-       }
-       else {
-               for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) {
+       } else {
+               for (i = 0; ifap[i]; i++) {
+                       ifa = ifatoia6(ifap[i]);
                        IFA_LOCK(&ifa->ia_ifa);
                        if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
                                IFA_UNLOCK(&ifa->ia_ifa);
@@ -3091,7 +3235,7 @@ pfxlist_onlink_check(void)
                        }
                }
        }
-       lck_rw_done(&in6_ifaddr_rwlock);
+       ifnet_free_address_list(ifap);
 }
 
 static struct nd_prefix *
@@ -3235,7 +3379,7 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped,
        struct sockaddr_in6 mask6, prefix;
        struct nd_prefix *opr;
        u_int32_t rtflags;
-       int error = 0;
+       int error = 0, prproxy = 0;
        struct rtentry *rt = NULL;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
@@ -3322,6 +3466,9 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped,
        mask6.sin6_len = sizeof(mask6);
        mask6.sin6_addr = pr->ndpr_mask;
        prefix = pr->ndpr_prefix;
+       if ((rt = pr->ndpr_rt) != NULL)
+               pr->ndpr_rt = NULL;
+       NDPR_ADDREF_LOCKED(pr);         /* keep reference for this routine */
        NDPR_UNLOCK(pr);
 
        IFA_LOCK_SPIN(ifa);
@@ -3339,15 +3486,25 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped,
 
        lck_mtx_unlock(nd6_mutex);
 
+       if (rt != NULL) {
+               rtfree(rt);
+               rt = NULL;
+       }
+
        error = rtrequest_scoped(RTM_ADD, (struct sockaddr *)&prefix,
            ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt,
            ifscope);
 
+       /*
+        * Serialize the setting of NDPRF_PRPROXY.
+        */
+       lck_mtx_lock(&proxy6_lock);
+
        if (rt != NULL) {
                RT_LOCK(rt);
                nd6_rtmsg(RTM_ADD, rt);
                RT_UNLOCK(rt);
-               RT_REMREF(rt);
+               NDPR_LOCK(pr);
        } else {
                NDPR_LOCK(pr);
                nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
@@ -3355,25 +3512,71 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped,
                    " scoped=%d, errno = %d\n",
                    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
                    pr->ndpr_plen, if_name(ifp),
-                   ip6_sprintf(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
+                   ip6_sprintf(&((struct sockaddr_in6 *)
+                   (void *)ifa->ifa_addr)->sin6_addr),
                    ip6_sprintf(&mask6.sin6_addr), rtflags,
                    (ifscope != IFSCOPE_NONE), error));
-               NDPR_UNLOCK(pr);
        }
+       NDPR_LOCK_ASSERT_HELD(pr);
 
-       lck_mtx_lock(nd6_mutex);
+       pr->ndpr_stateflags &= ~(NDPRF_IFSCOPE | NDPRF_PRPROXY);
 
-       NDPR_LOCK(pr);
-       pr->ndpr_stateflags &= ~NDPRF_IFSCOPE;
+       /*
+        * TODO: If the prefix route exists, we should really find it and
+        * refer the prefix to it; otherwise ndpr_rt is NULL.
+        */
        if (rt != NULL || error == EEXIST) {
+               struct nd_ifinfo *ndi;
+
+               VERIFY(pr->ndpr_prproxy_sols_cnt == 0);
+               VERIFY(RB_EMPTY(&pr->ndpr_prproxy_sols));
+
+               lck_rw_lock_shared(nd_if_rwlock);
+               ndi = ND_IFINFO(ifp);
+               VERIFY(ndi != NULL && ndi->initialized);
+               lck_mtx_lock(&ndi->lock);
+
+               pr->ndpr_rt = rt;       /* keep reference from rtrequest */
                pr->ndpr_stateflags |= NDPRF_ONLINK;
-               if (ifscope != IFSCOPE_NONE)
+               if (ifscope != IFSCOPE_NONE) {
                        pr->ndpr_stateflags |= NDPRF_IFSCOPE;
+               } else if ((rtflags & RTF_CLONING) &&
+                   (ndi->flags & ND6_IFF_PROXY_PREFIXES) &&
+                   !IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
+                       /*
+                        * At present, in order for the prefix to be eligible
+                        * as a proxying/proxied prefix, we require that the
+                        * prefix route entry be marked as a cloning route with
+                        * RTF_PROXY; i.e. nd6_need_cache() needs to return
+                        * true for the interface type, hence the test for
+                        * RTF_CLONING above.
+                        */
+                       pr->ndpr_stateflags |= NDPRF_PRPROXY;
+               }
+
+               lck_mtx_unlock(&ndi->lock);
+               lck_rw_done(nd_if_rwlock);
        }
+
+       prproxy = (pr->ndpr_stateflags & NDPRF_PRPROXY);
+       VERIFY(!prproxy || !(pr->ndpr_stateflags & NDPRF_IFSCOPE));
        NDPR_UNLOCK(pr);
 
        IFA_REMREF(ifa);
 
+       /*
+        * If this is an upstream prefix, find the downstream ones (if any)
+        * and re-configure their prefix routes accordingly.  Otherwise,
+        * this could be potentially be a downstream prefix, and so find the
+        * upstream prefix, if any.
+        */
+       nd6_prproxy_prelist_update(pr, prproxy ? pr : NULL);
+
+       NDPR_REMREF(pr);        /* release reference for this routine */
+       lck_mtx_unlock(&proxy6_lock);
+
+       lck_mtx_lock(nd6_mutex);
+
        return (error);
 }
 
@@ -3392,11 +3595,11 @@ nd6_prefix_onlink_scoped(struct nd_prefix *pr, unsigned int ifscope)
 int
 nd6_prefix_offlink(struct nd_prefix *pr)
 {
-       int plen, error = 0;
+       int plen, error = 0, prproxy;
        struct ifnet *ifp = pr->ndpr_ifp;
        struct nd_prefix *opr;
        struct sockaddr_in6 sa6, mask6, prefix;
-       struct rtentry *rt = NULL;
+       struct rtentry *rt = NULL, *ndpr_rt = NULL;
        unsigned int ifscope;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
@@ -3424,6 +3627,9 @@ nd6_prefix_offlink(struct nd_prefix *pr)
        bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
        prefix = pr->ndpr_prefix;
        plen = pr->ndpr_plen;
+       if ((ndpr_rt = pr->ndpr_rt) != NULL)
+               pr->ndpr_rt = NULL;
+       NDPR_ADDREF_LOCKED(pr);         /* keep reference for this routine */
        NDPR_UNLOCK(pr);
 
        ifscope = (pr->ndpr_stateflags & NDPRF_IFSCOPE) ?
@@ -3506,10 +3712,34 @@ nd6_prefix_offlink(struct nd_prefix *pr)
                    (ifscope != IFSCOPE_NONE), error));
        }
 
+       if (ndpr_rt != NULL)
+               rtfree(ndpr_rt);
+
+       lck_mtx_lock(&proxy6_lock);
+
        NDPR_LOCK(pr);
-       pr->ndpr_stateflags &= ~(NDPRF_ONLINK | NDPRF_IFSCOPE);
+       prproxy = (pr->ndpr_stateflags & NDPRF_PRPROXY);
+       VERIFY(!prproxy || !(pr->ndpr_stateflags & NDPRF_IFSCOPE));
+       pr->ndpr_stateflags &= ~(NDPRF_ONLINK | NDPRF_IFSCOPE | NDPRF_PRPROXY);
+       if (pr->ndpr_prproxy_sols_cnt > 0) {
+               VERIFY(prproxy);
+               nd6_prproxy_sols_reap(pr);
+               VERIFY(pr->ndpr_prproxy_sols_cnt == 0);
+               VERIFY(RB_EMPTY(&pr->ndpr_prproxy_sols));
+       }
        NDPR_UNLOCK(pr);
 
+       /*
+        * If this was an upstream prefix, find the downstream ones and do
+        * some cleanups.  If this was a downstream prefix, the prefix route
+        * has been removed from the routing table above, but there may be
+        * other tasks to perform.
+        */
+       nd6_prproxy_prelist_update(pr, prproxy ? pr : NULL);
+
+       NDPR_REMREF(pr);        /* release reference for this routine */
+       lck_mtx_unlock(&proxy6_lock);
+
        return (error);
 }
 
@@ -3644,10 +3874,22 @@ in6_ifadd(
        }
 
        ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
+       if (ia == NULL) {
+               /*
+                * XXX: both in6_ifadd and in6_iftmpadd follow this admittedly
+                * suboptimal pattern of calling in6_update_ifa to add the
+                * interface address, then calling in6ifa_ifpwithaddr to
+                * retrieve it from the interface address list after some
+                * concurrent kernel thread has first had the opportunity to
+                * call in6_purgeaddr and delete everything.
+                */
+               nd6log((LOG_ERR,
+                   "in6_ifadd: ifa update succeeded, but we got no ifaddr\n"));
+               return(NULL);
+       }
 
-       in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia); 
-
-       return(ia);             /* this must NOT be NULL. */
+       in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia);
+       return(ia);
 }
 
 #define        IA6_NONCONST(i) ((struct in6_ifaddr *)(uintptr_t)(i))
@@ -3756,11 +3998,19 @@ again:
                return (error);
 
        newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
-       if (newia == NULL) {    /* XXX: can it happen? */
+       if (newia == NULL) {
+               /*
+                * XXX: both in6_ifadd and in6_iftmpadd follow this admittedly
+                * suboptimal pattern of calling in6_update_ifa to add the
+                * interface address, then calling in6ifa_ifpwithaddr to
+                * retrieve it from the interface address list after some
+                * concurrent kernel thread has first had the opportunity to
+                * call in6_purgeaddr and delete everything.
+                */
                nd6log((LOG_ERR,
                    "in6_tmpifadd: ifa update succeeded, but we got "
                    "no ifaddr\n"));
-               return(EINVAL); /* XXX */
+               return(EINVAL);
        }
        IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa);
        ndpr = ia0->ia6_ndpr;
@@ -3894,7 +4144,6 @@ rt6_deleteroute(
        struct radix_node *rn,
        void *arg)
 {
-#define SIN6(s)        ((struct sockaddr_in6 *)s)
        struct rtentry *rt = (struct rtentry *)rn;
        struct in6_addr *gate = (struct in6_addr *)arg;
 
@@ -3936,7 +4185,6 @@ rt6_deleteroute(
        RT_UNLOCK(rt);
        return (rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway,
            rt_mask(rt), rt->rt_flags, 0));
-#undef SIN6
 }
 
 int
@@ -3945,7 +4193,7 @@ nd6_setdefaultiface(
 {
        int error = 0;
        ifnet_t def_ifp = NULL;
-       
+
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
 
        ifnet_head_lock_shared();
index f5c48648eaafed7ca1b35e6441e7648b6c2adfcb..a08cf7139beeb5e6fda49e20327e09ae471fd638 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,6 +93,7 @@
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 extern int ipsec_bypass;
 #endif /*IPSEC*/
 
-#define        satosin6(sa)    ((struct sockaddr_in6 *)(sa))
-#define        ifatoia6(ifa)   ((struct in6_ifaddr *)(ifa))
-
 /*
  * Raw interface to IP6 protocol.
  */
@@ -161,15 +159,10 @@ rip6_input(
        struct sockaddr_in6 rip6src;
        int ret;
 
-       rip6stat.rip6s_ipackets++;
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
-#if defined(NFAITH) && 0 < NFAITH
-       if (faithprefix(&ip6->ip6_dst)) {
-               /* XXX send icmp6 host/port unreach? */
-               m_freem(m);
-               return IPPROTO_DONE;
-       }
-#endif
+       rip6stat.rip6s_ipackets++;
 
        init_sin6(&rip6src, m); /* general init */
 
@@ -186,7 +179,7 @@ rip6_input(
                if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
                    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
                        continue;
-               if (in6p->in6p_cksum != -1) {
+               if (proto == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) {
                        rip6stat.rip6s_isum++;
                        if (in6_cksum(m, ip6->ip6_nxt, *offp,
                            m->m_pkthdr.len - *offp)) {
@@ -353,8 +346,9 @@ rip6_output(
        struct ip6_moptions *im6o = NULL;
        struct ifnet *oifp = NULL;
        int type = 0, code = 0;         /* for ICMPv6 output statistics only */
-       mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
-       struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+       mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
+       struct ip6_out_args ip6oa =
+           { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF };
        int flags = IPV6_OUTARGS;
 
        if (dstsock && IN6_IS_ADDR_V4MAPPED(&dstsock->sin6_addr)) {
@@ -364,13 +358,16 @@ rip6_output(
 
        in6p = sotoin6pcb(so);
 
-       ip6oa.ip6oa_boundif = (in6p->inp_flags & INP_BOUND_IF) ?
-           in6p->inp_boundif : IFSCOPE_NONE;
-       ip6oa.ip6oa_nocell = (in6p->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+       if (in6p->inp_flags & INP_BOUND_IF) {
+               ip6oa.ip6oa_boundif = in6p->inp_boundifp->if_index;
+               ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
+       }
+       if (in6p->inp_flags & INP_NO_IFT_CELLULAR)
+               ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
 
        dst = &dstsock->sin6_addr;
        if (control) {
-               mtc = mbuf_traffic_class_from_control(control);
+               msc = mbuf_service_class_from_control(control);
 
                if ((error = ip6_setpktopts(control, &opt, NULL, so->so_proto->pr_protocol)) != 0)
                        goto bad;
@@ -394,6 +391,9 @@ rip6_output(
                code = icmp6->icmp6_code;
        }
 
+       if (in6p->inp_flowhash == 0)
+               in6p->inp_flowhash = inp_calc_flowhash(in6p);
+
        M_PREPEND(m, sizeof(*ip6), M_WAIT);
        if (m == NULL) {
                error = ENOBUFS;
@@ -463,7 +463,7 @@ rip6_output(
                struct in6_addr *in6a;
                struct in6_addr storage;
                u_short index = 0;
-               
+
                if (israw != 0 && optp && optp->ip6po_pktinfo && !IN6_IS_ADDR_UNSPECIFIED(&optp->ip6po_pktinfo->ipi6_addr)) {
                        in6a = &optp->ip6po_pktinfo->ipi6_addr;
                        flags |= IPV6_FLAG_NOSRCIFSEL;
@@ -473,6 +473,8 @@ rip6_output(
                        if (error == 0)
                                error = EADDRNOTAVAIL;
                        goto bad;
+               } else {
+                       ip6oa.ip6oa_flags |= IP6OAF_BOUND_SRCADDR;
                }
                ip6->ip6_src = *in6a;
                if (in6p->in6p_route.ro_rt != NULL) {
@@ -524,7 +526,7 @@ rip6_output(
                }
                if (!n)
                        goto bad;
-               p = (u_int16_t *)(mtod(n, caddr_t) + off);
+               p = (u_int16_t *)(void *)(mtod(n, caddr_t) + off);
                *p = 0;
                *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
        }
@@ -547,8 +549,10 @@ rip6_output(
                oifp = NULL;
        }
 
-       set_packet_tclass(m, so, mtc, 1);
-       
+       set_packet_service_class(m, so, msc, PKT_SCF_IPV6);
+       m->m_pkthdr.m_flowhash = in6p->inp_flowhash;
+       m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH;
+
        if (im6o != NULL)
                IM6O_ADDREF(im6o);
 
@@ -560,7 +564,7 @@ rip6_output(
 
        if (in6p->in6p_route.ro_rt != NULL) {
                struct rtentry *rt = in6p->in6p_route.ro_rt;
-               unsigned int outif;
+               struct ifnet *outif;
 
                if ((rt->rt_flags & RTF_MULTICAST) ||
                    in6p->in6p_socket == NULL ||
@@ -581,8 +585,8 @@ rip6_output(
                 * the route interface index used by IP.
                 */
                if (rt != NULL &&
-                   (outif = rt->rt_ifp->if_index) != in6p->in6p_last_outif)
-                       in6p->in6p_last_outif = outif;
+                   (outif = rt->rt_ifp) != in6p->in6p_last_outifp)
+                       in6p->in6p_last_outifp = outif;
        }
 
        if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
@@ -629,15 +633,17 @@ rip6_ctloutput(
        struct socket *so,
        struct sockopt *sopt)
 {
-       int error;
+       int error, optval;
 
+       /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
        if (sopt->sopt_level == IPPROTO_ICMPV6)
                /*
                 * XXX: is it better to call icmp6_ctloutput() directly
                 * from protosw?
                 */
                return(icmp6_ctloutput(so, sopt));
-       else if (sopt->sopt_level != IPPROTO_IPV6)
+       else if (sopt->sopt_level != IPPROTO_IPV6 &&
+           !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH))
                return (EINVAL);
 
        error = 0;
@@ -711,6 +717,15 @@ rip6_ctloutput(
                case IPV6_CHECKSUM:
                        error = ip6_raw_ctloutput(so, sopt);
                        break;
+
+               case SO_FLUSH:
+                       if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval))) != 0)
+                               break;
+
+                       error = inp_flush(sotoinpcb(so), optval);
+                       break;
+
                default:
                        error = ip6_ctloutput(so, sopt);
                        break;
@@ -795,9 +810,9 @@ static int
 rip6_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 {
        struct inpcb *inp = sotoinpcb(so);
-       struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
+       struct sockaddr_in6 *addr = (struct sockaddr_in6 *)(void *)nam;
        struct ifaddr *ifa = NULL;
-       unsigned int outif = 0;
+       struct ifnet *outif = NULL;
 
        if (nam->sa_len != sizeof(*addr))
                return EINVAL;
@@ -821,12 +836,12 @@ rip6_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
                        IFA_REMREF(ifa);
                        return(EADDRNOTAVAIL);
                }
-               outif = ifa->ifa_ifp->if_index;
+               outif = ifa->ifa_ifp;
                IFA_UNLOCK(ifa);
                IFA_REMREF(ifa);
        }
        inp->in6p_laddr = addr->sin6_addr;
-       inp->in6p_last_outif = outif;
+       inp->in6p_last_outifp = outif;
        return 0;
 }
 
@@ -834,14 +849,15 @@ static int
 rip6_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 {
        struct inpcb *inp = sotoinpcb(so);
-       struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
+       struct sockaddr_in6 *addr = (struct sockaddr_in6 *)(void *)nam;
        struct in6_addr *in6a = NULL;
        struct in6_addr storage;
        int error = 0;
 #if ENABLE_DEFAULT_SCOPE
        struct sockaddr_in6 tmp;
 #endif
-       unsigned int outif = 0, ifscope;
+       unsigned int ifscope;
+       struct ifnet *outif = NULL;
 
        if (nam->sa_len != sizeof(*addr))
                return EINVAL;
@@ -859,7 +875,7 @@ rip6_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 #endif
 
        ifscope = (inp->inp_flags & INP_BOUND_IF) ?
-           inp->inp_boundif : IFSCOPE_NONE;
+           inp->inp_boundifp->if_index : IFSCOPE_NONE;
 
        /* Source address selection. XXX: need pcblookup? */
        in6a = in6_selectsrc(addr, inp->in6p_outputopts, inp, &inp->in6p_route,
@@ -869,8 +885,8 @@ rip6_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
        inp->in6p_laddr = *in6a;
        inp->in6p_faddr = addr->sin6_addr;
        if (inp->in6p_route.ro_rt != NULL)
-               outif = inp->in6p_route.ro_rt->rt_ifp->if_index;
-       inp->in6p_last_outif = outif;
+               outif = inp->in6p_route.ro_rt->rt_ifp;
+       inp->in6p_last_outifp = outif;
        soisconnected(so);
        return 0;
 }
@@ -889,7 +905,7 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 #pragma unused(flags, p)
        struct inpcb *inp = sotoinpcb(so);
        struct sockaddr_in6 tmp;
-       struct sockaddr_in6 *dst = (struct sockaddr_in6 *)nam;
+       struct sockaddr_in6 *dst = (struct sockaddr_in6 *)(void *)nam;
 
        /* always copy sockaddr to avoid overwrites */
        if (so->so_state & SS_ISCONNECTED) {
@@ -909,7 +925,7 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
                        m_freem(m);
                        return ENOTCONN;
                }
-               tmp = *(struct sockaddr_in6 *)nam;
+               tmp = *(struct sockaddr_in6 *)(void *)nam;
                dst = &tmp;
        }
 #if ENABLE_DEFAULT_SCOPE
index a0dc6c6a603decab6de889615a36868b2aa1a311..9325aadece0fd82d247da8a74ba70a76e6d84228 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  */
 
 #include <sys/param.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
+#include <kern/debug.h>
 #include <string.h>
 
 #include <net/if.h>
@@ -101,9 +103,16 @@ route6_input(struct mbuf **mp, int *offp, int proto)
 
 #ifndef PULLDOWN_TEST
        IP6_EXTHDR_CHECK(m, off, sizeof(*rh), return IPPROTO_DONE);
+
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip6 = mtod(m, struct ip6_hdr *);
        rh = (struct ip6_rthdr *)((caddr_t)ip6 + off);
 #else
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ip6 = mtod(m, struct ip6_hdr *);
        IP6_EXTHDR_GET(rh, struct ip6_rthdr *, m, off, sizeof(*rh));
        if (rh == NULL) {
index 2d4eedf7621726d993b7c6493ded1028912a32a9..27ad764929858cc8771df93295bdb82160ba2ee2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -117,7 +117,7 @@ scope6_ifattach(
                        bcopy((caddr_t)scope6_ids, q, n/2);
                        FREE((caddr_t)scope6_ids, M_IFADDR);
                }
-               scope6_ids = (struct scope6_id *)q;
+               scope6_ids = (struct scope6_id *)(void *)q;
        }
 
 #define SID scope6_ids[ifp->if_index]
@@ -365,7 +365,7 @@ rtkey_to_sa6(struct rtentry *rt, struct sockaddr_in6 *sin6)
 {
        VERIFY(rt_key(rt)->sa_family == AF_INET6);
 
-       *sin6 = *((struct sockaddr_in6 *)rt_key(rt));
+       *sin6 = *((struct sockaddr_in6 *)(void *)rt_key(rt));
        sin6->sin6_scope_id = 0;
 }
 
@@ -374,7 +374,7 @@ rtgw_to_sa6(struct rtentry *rt, struct sockaddr_in6 *sin6)
 {
        VERIFY(rt->rt_flags & RTF_GATEWAY);
 
-       *sin6 = *((struct sockaddr_in6 *)rt->rt_gateway);
+       *sin6 = *((struct sockaddr_in6 *)(void *)rt->rt_gateway);
        sin6->sin6_scope_id = 0;
 }
 
@@ -382,7 +382,7 @@ rtgw_to_sa6(struct rtentry *rt, struct sockaddr_in6 *sin6)
  * generate standard sockaddr_in6 from embedded form.
  */
 int
-sa6_recoverscope(struct sockaddr_in6 *sin6)
+sa6_recoverscope(struct sockaddr_in6 *sin6, boolean_t attachcheck)
 {
        u_int32_t zoneid;
 
@@ -402,12 +402,25 @@ sa6_recoverscope(struct sockaddr_in6 *sin6)
                        /* sanity check */
                        if (if_index < zoneid)
                                return (ENXIO);
-                       ifnet_head_lock_shared();
-                       if (ifindex2ifnet[zoneid] == NULL) {
+                       /*
+                        * We use the attachcheck parameter to skip the
+                        * interface attachment check.
+                        * Some callers might hold the ifnet_head lock in
+                        * exclusive mode. This means that:
+                        * 1) the interface can't go away -- hence we don't
+                        *    need to perform this check
+                        * 2) we can't perform this check because the lock is
+                        *    in exclusive mode and trying to lock it in shared
+                        *    mode would cause a deadlock.
+                        */
+                       if (attachcheck) {
+                               ifnet_head_lock_shared();
+                               if (ifindex2ifnet[zoneid] == NULL) {
+                                       ifnet_head_done();
+                                       return (ENXIO);
+                               }
                                ifnet_head_done();
-                               return (ENXIO);
                        }
-                       ifnet_head_done();
                        sin6->sin6_addr.s6_addr16[1] = 0;
                        sin6->sin6_scope_id = zoneid;
                }
@@ -503,8 +516,14 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
 
        scope = in6_addrscope(in6);
 
-#define SID scope6_ids[index]
        lck_mtx_lock(scope6_mutex);
+       if (index >= if_scope_indexlim) {
+               lck_mtx_unlock(scope6_mutex);
+               if (ret_id != NULL)
+                       *ret_id = 0;
+               return (EINVAL);
+       }
+#define SID scope6_ids[index]
        switch (scope) {
        case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */
                zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL];
index d028aefb8a5c364d35676a872150bbbc6cc9e978..300e8228c51580ebebd68476ee0aa24849f0ffa1 100644 (file)
@@ -82,7 +82,7 @@ int   scope6_get_default(u_int32_t *);
 u_int32_t scope6_in6_addrscope(struct in6_addr *);
 u_int32_t scope6_addr2default(struct in6_addr *);
 int    sa6_embedscope (struct sockaddr_in6 *, int);
-int    sa6_recoverscope (struct sockaddr_in6 *);
+int    sa6_recoverscope (struct sockaddr_in6 *, boolean_t);
 int    in6_setscope (struct in6_addr *, struct ifnet *, u_int32_t *);
 int    in6_clearscope (struct in6_addr *);
 extern void rtkey_to_sa6(struct rtentry *, struct sockaddr_in6 *);
index 0fb9a6993a70bb6a8d1da2d8b270f917ebd7137e..fa609ea19a95ab59e07f2feead05b83a6210cd26 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 extern int ipsec_bypass;
 #endif /*IPSEC*/
 
-#include "faith.h"
-
 #include <net/net_osdep.h>
 
 /*
@@ -186,16 +184,29 @@ udp6_output(in6p, m, addr6, control, p)
        int flags;
        struct sockaddr_in6 tmp;
        struct  in6_addr storage;
-       mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
-       struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+       mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
+       struct ip6_out_args ip6oa =
+           { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF };
+       struct flowadv *adv = &ip6oa.ip6oa_flowadv;
+       int flowadv = 0;
+
+       /* Enable flow advisory only when connected */
+       flowadv = (in6p->inp_socket->so_state & SS_ISCONNECTED) ? 1 : 0;
 
-       if (in6p->inp_flags & INP_BOUND_IF)
-               ip6oa.ip6oa_boundif = in6p->inp_boundif;
+       if (flowadv && INP_WAIT_FOR_IF_FEEDBACK(in6p)) {
+               error = ENOBUFS;
+               goto release;
+       }
 
-       ip6oa.ip6oa_nocell = (in6p->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+       if (in6p->inp_flags & INP_BOUND_IF) {
+               ip6oa.ip6oa_boundif = in6p->inp_boundifp->if_index;
+               ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
+       }
+       if (in6p->inp_flags & INP_NO_IFT_CELLULAR)
+               ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
 
        if (control) {
-               mtc = mbuf_traffic_class_from_control(control);
+               msc = mbuf_service_class_from_control(control);
 
                if ((error = ip6_setpktopts(control, &opt, NULL, IPPROTO_UDP)) != 0)
                        goto release;
@@ -212,7 +223,9 @@ udp6_output(in6p, m, addr6, control, p)
                 * and in6_pcbsetport in order to fill in the local address
                 * and the local port.
                 */
-               struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr6;
+               struct sockaddr_in6 *sin6 =
+                   (struct sockaddr_in6 *)(void *)addr6;
+
                if (sin6->sin6_port == 0) {
                        error = EADDRNOTAVAIL;
                        goto release;
@@ -296,6 +309,9 @@ udp6_output(in6p, m, addr6, control, p)
                fport = in6p->in6p_fport;
        }
 
+       if (in6p->inp_flowhash == 0)
+               in6p->inp_flowhash = inp_calc_flowhash(in6p);
+
        if (af == AF_INET)
                hlen = sizeof(struct ip);
 
@@ -312,7 +328,7 @@ udp6_output(in6p, m, addr6, control, p)
        /*
         * Stuff checksum and output datagram.
         */
-       udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
+       udp6 = (struct udphdr *)(void *)(mtod(m, caddr_t) + hlen);
        udp6->uh_sport = in6p->in6p_lport; /* lport is always set in the PCB */
        udp6->uh_dport = fport;
        if (plen <= 0xffff)
@@ -342,6 +358,9 @@ udp6_output(in6p, m, addr6, control, p)
                m->m_pkthdr.csum_flags = CSUM_UDPIPV6;
                m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 
+               if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
+                       ip6oa.ip6oa_flags |= IP6OAF_BOUND_SRCADDR;
+
                flags = IPV6_OUTARGS;
 
                udp6stat.udp6s_opackets++;
@@ -353,26 +372,53 @@ udp6_output(in6p, m, addr6, control, p)
 #endif /*IPSEC*/
                m->m_pkthdr.socket_id = get_socket_id(in6p->in6p_socket);
 
-               set_packet_tclass(m, in6p->in6p_socket, mtc, 1);
+               set_packet_service_class(m, in6p->in6p_socket, msc, PKT_SCF_IPV6);
+
+               m->m_pkthdr.m_flowhash = in6p->inp_flowhash;
+               m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH;
+               if (flowadv)
+                       m->m_pkthdr.m_fhflags |= PF_TAG_FLOWADV;
 
                im6o = in6p->in6p_moptions;
-               if (im6o != NULL)
-                       IM6O_ADDREF(im6o);
+               if (im6o != NULL) {
+                       IM6O_LOCK(im6o);
+                       IM6O_ADDREF_LOCKED(im6o);
+                       if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
+                           im6o->im6o_multicast_ifp != NULL) {
+                               in6p->in6p_last_outifp = im6o->im6o_multicast_ifp;
+                       }
+                       IM6O_UNLOCK(im6o);
+               }
+
+               in6p->inp_sndinprog_cnt++;
 
                error = ip6_output(m, optp, &in6p->in6p_route,
                    flags, im6o, NULL, &ip6oa);
 
                if (im6o != NULL)
                        IM6O_REMREF(im6o);
-               
+
                if (error == 0 && nstat_collect) {
                        locked_add_64(&in6p->inp_stat->txpackets, 1);
                        locked_add_64(&in6p->inp_stat->txbytes, ulen);
                }
 
+               if (flowadv && (adv->code == FADV_FLOW_CONTROLLED ||
+                   adv->code == FADV_SUSPENDED)) {
+                       /* return an error to indicate
+                        * that the packet has been dropped
+                        */
+                       error = ENOBUFS;
+                       inp_set_fc_state(in6p, adv->code);
+               }
+
+               VERIFY(in6p->inp_sndinprog_cnt > 0);
+               if ( --in6p->inp_sndinprog_cnt == 0)
+                       in6p->inp_flags &= ~(INP_FC_FEEDBACK);
+
                if (in6p->in6p_route.ro_rt != NULL) {
                        struct rtentry *rt = in6p->in6p_route.ro_rt;
-                       unsigned int outif;
+                       struct ifnet *outif;
 
                        if ((rt->rt_flags & RTF_MULTICAST) ||
                            in6p->in6p_socket == NULL ||
@@ -390,11 +436,11 @@ udp6_output(in6p, m, addr6, control, p)
                        /*
                         * If this is a connected socket and the destination
                         * route is not multicast, update outif with that of
-                        * the route interface index used by IP.
+                        * the route interface used by IP.
                         */
-                       if (rt != NULL && (outif = rt->rt_ifp->if_index) !=
-                           in6p->in6p_last_outif)
-                               in6p->in6p_last_outif = outif;
+                       if (rt != NULL &&
+                           (outif = rt->rt_ifp) != in6p->in6p_last_outifp)
+                               in6p->in6p_last_outifp = outif;
                }
                break;
        case AF_INET:
index c88c0d169e0e96b3f6862ebd997873b6cf4d6e01..5e9ac2da0c99f49cf27ba157e9fa318330e789ea 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -217,31 +217,35 @@ udp6_input(
 
        IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), return IPPROTO_DONE);
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        ifp = m->m_pkthdr.rcvif;
        ip6 = mtod(m, struct ip6_hdr *);
 
-#if defined(NFAITH) && 0 < NFAITH
-       if (faithprefix(&ip6->ip6_dst)) {
-               /* XXX send icmp6 host/port unreach? */
-               m_freem(m);
-               return IPPROTO_DONE;
-       }
-#endif
-
        udpstat.udps_ipackets++;
 
        plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
-       uh = (struct udphdr *)((caddr_t)ip6 + off);
+       uh = (struct udphdr *)(void *)((caddr_t)ip6 + off);
        ulen = ntohs((u_short)uh->uh_ulen);
 
        if (plen != ulen) {
                udpstat.udps_badlen++;
+
+               if (ifp->if_udp_stat != NULL)
+                       atomic_add_64(&ifp->if_udp_stat->badlength, 1);
+
                goto bad;
        }
 
        /* destination port of 0 is illegal, based on RFC768. */
-       if (uh->uh_dport == 0)
+       if (uh->uh_dport == 0) {
+
+               if (ifp->if_udp_stat != NULL)
+                       atomic_add_64(&ifp->if_udp_stat->port0, 1);
+
                goto bad;
+       }
 
        /*
         * Checksum extended UDP header and data.
@@ -254,6 +258,10 @@ udp6_input(
                else {
                        if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
                                udpstat.udps_badsum++;
+
+                               if (ifp->if_udp_stat != NULL)
+                                       atomic_add_64(&ifp->if_udp_stat->badchksum, 1);
+
                                goto bad;
                        }
                }
@@ -322,10 +330,15 @@ udp6_input(
                        if ((in6p->inp_vflag & INP_IPV6) == 0)
                                continue;
 
+                       if (ip6_restrictrecvif && ifp != NULL &&
+                           (ifp->if_eflags & IFEF_RESTRICTED_RECV) &&
+                           !(in6p->in6p_flags & IN6P_RECV_ANYIF))
+                               continue;
+
                        if (in_pcb_checkstate(in6p, WNT_ACQUIRE, 0) == WNT_STOPUSING)
                                continue;
 
-                       udp_lock(in6p->in6p_socket, 1, 0);      
+                       udp_lock(in6p->in6p_socket, 1, 0);
 
                        if (in_pcb_checkstate(in6p, WNT_RELEASE, 1) == WNT_STOPUSING) {
                                udp_unlock(in6p->in6p_socket, 1, 0);
@@ -345,7 +358,7 @@ udp6_input(
                                struct sockaddr_in6      mcaddr;
                                int                      blocked;
 
-                               IM6O_LOCK(imo); 
+                               IM6O_LOCK(imo);
                                bzero(&mcaddr, sizeof(struct sockaddr_in6));
                                mcaddr.sin6_len = sizeof(struct sockaddr_in6);
                                mcaddr.sin6_family = AF_INET6;
@@ -354,7 +367,7 @@ udp6_input(
                                blocked = im6o_mc_filter(imo, ifp,
                                        (struct sockaddr *)&mcaddr,
                                        (struct sockaddr *)&fromsa);
-                               IM6O_UNLOCK(imo);       
+                               IM6O_UNLOCK(imo);
                                if (blocked != MCAST_PASS) {
                                        udp_unlock(in6p->in6p_socket, 1, 0);
                                        continue;
@@ -411,11 +424,18 @@ udp6_input(
                         */
                        if (reuse_sock == 0 || ((m = n) == NULL))
                                break;
+
+                       /*
+                        * Expect 32-bit aligned data pointer on strict-align
+                        * platforms.
+                        */
+                       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
                        /*
                         * Recompute IP and UDP header pointers for new mbuf
                         */
                        ip6 = mtod(m, struct ip6_hdr *);
-                       uh = (struct udphdr *)((caddr_t)ip6 + off);
+                       uh = (struct udphdr *)(void *)((caddr_t)ip6 + off);
                }
                lck_rw_done(pcbinfo->mtx);
 
@@ -429,6 +449,9 @@ udp6_input(
 #ifndef __APPLE__
                        udpstat.udps_noportmcast++;
 #endif
+                       if (ifp->if_udp_stat != NULL)
+                               atomic_add_64(&ifp->if_udp_stat->port_unreach, 1);
+
                        goto bad;
                }
 
@@ -442,7 +465,11 @@ udp6_input(
        in6p = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport,
                                  &ip6->ip6_dst, uh->uh_dport, 1,
                                  m->m_pkthdr.rcvif);
-       if (in6p == 0) {
+       if (in6p == NULL) {
+
+               if (ifp->if_udp_stat != NULL)
+                       atomic_add_64(&ifp->if_udp_stat->port_unreach, 1);
+
                if (log_in_vain) {
                        char buf[INET6_ADDRSTRLEN];
 
@@ -465,6 +492,9 @@ udp6_input(
 #ifndef __APPLE__
                        udpstat.udps_noportmcast++;
 #endif
+                       if (ifp->if_udp_stat != NULL)
+                               atomic_add_64(&ifp->if_udp_stat->badmcast, 1);
+
                        goto bad;
                }
                icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
@@ -478,6 +508,10 @@ udp6_input(
                if (ipsec6_in_reject_so(m, in6p->in6p_socket)) {
                        IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio);
                        in_pcb_checkstate(in6p, WNT_RELEASE, 0);
+
+                       if (ifp->if_udp_stat != NULL)
+                               atomic_add_64(&ifp->if_udp_stat->badipsec, 1);
+
                        goto bad;
                }
        }
@@ -491,12 +525,16 @@ udp6_input(
 
        if (in_pcb_checkstate(in6p, WNT_RELEASE, 1) == WNT_STOPUSING) {
                udp_unlock(in6p->in6p_socket, 1, 0);
+
+               if (ifp->if_udp_stat != NULL)
+                       atomic_add_64(&ifp->if_udp_stat->cleanup, 1);
+
                goto bad;
        }
-               
+
        init_sin6(&udp_in6, m); /* general init */
        udp_in6.sin6_port = uh->uh_sport;
-       if ((in6p->in6p_flags & IN6P_CONTROLOPTS) != 0 || 
+       if ((in6p->in6p_flags & IN6P_CONTROLOPTS) != 0 ||
                (in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0 ||
                (in6p->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
                ret = ip6_savecontrol(in6p, m, &opts);
@@ -688,7 +726,8 @@ udp6_attach(struct socket *so, __unused int proto, struct proc *p)
         * which may match an IPv4-mapped IPv6 address.
         */
        inp->inp_ip_ttl = ip_defttl;
-       nstat_udp_new_pcb(inp);
+       if (nstat_collect)
+               nstat_udp_new_pcb(inp);
        return 0;
 }
 
@@ -707,7 +746,7 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
        if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
                struct sockaddr_in6 *sin6_p;
 
-               sin6_p = (struct sockaddr_in6 *)nam;
+               sin6_p = (struct sockaddr_in6 *)(void *)nam;
 
                if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
                        inp->inp_vflag |= INP_IPV4;
@@ -739,7 +778,7 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
        if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
                struct sockaddr_in6 *sin6_p;
 
-               sin6_p = (struct sockaddr_in6 *)nam;
+               sin6_p = (struct sockaddr_in6 *)(void *)nam;
                if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
                        struct sockaddr_in sin;
 
@@ -765,6 +804,8 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
                        inp->inp_vflag |= INP_IPV6;
                }
                soisconnected(so);
+               if (inp->inp_flowhash == 0)
+                       inp->inp_flowhash = inp_calc_flowhash(inp);
        }
        return error;
 }
@@ -801,8 +842,12 @@ udp6_disconnect(struct socket *so)
                return ENOTCONN;
 
        in6_pcbdisconnect(inp);
+
+       /* reset flow-controlled state, just in case */
+       inp_reset_fc_state(inp);
+
        inp->in6p_laddr = in6addr_any;
-       inp->in6p_last_outif = 0;
+       inp->in6p_last_outifp = NULL;
        so->so_state &= ~SS_ISCONNECTED;                /* XXX */
        return 0;
 }
@@ -838,7 +883,7 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
                if (addr == 0)
                        hasv4addr = (inp->inp_vflag & INP_IPV4);
                else {
-                       sin6 = (struct sockaddr_in6 *)addr;
+                       sin6 = (struct sockaddr_in6 *)(void *)addr;
                        hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
                                ? 1 : 0;
                }
index 457f772ecfc24cdb387aba87c3d24c5b0e434962..d1d59cd6fb56a6d00cd3e0b00b62fae42cb50969 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #include <net/net_osdep.h>
 
-#ifndef satosin
-#define satosin(s) ((struct sockaddr_in *)s)
-#endif
-
 #define FULLMASK       0xff
 
 lck_grp_t         *sadb_mutex_grp;
 lck_grp_attr_t    *sadb_mutex_grp_attr;
 lck_attr_t        *sadb_mutex_attr;
-lck_mtx_t         *sadb_mutex;
+decl_lck_mtx_data(, sadb_mutex_data);
+lck_mtx_t         *sadb_mutex = &sadb_mutex_data;
 
 lck_grp_t         *pfkey_stat_mutex_grp;
 lck_grp_attr_t    *pfkey_stat_mutex_grp_attr;
 lck_attr_t        *pfkey_stat_mutex_attr;
-lck_mtx_t         *pfkey_stat_mutex;
+decl_lck_mtx_data(, pfkey_stat_mutex_data);
+lck_mtx_t         *pfkey_stat_mutex = &pfkey_stat_mutex_data;
 
 /*
  * Note on SA reference counting:
@@ -175,7 +173,7 @@ static int key_blockacq_count = 10; /* counter for blocking SADB_ACQUIRE.*/
 static int key_blockacq_lifetime = 20; /* lifetime for blocking SADB_ACQUIRE.*/
 static int key_preferred_oldsa = 0;    /* preferred old sa rather than new sa.*/
 __private_extern__ int natt_keepalive_interval = 20;   /* interval between natt keepalives.*/
-static int ipsec_policy_count = 0;
+__private_extern__ int ipsec_policy_count = 0;
 static int ipsec_sav_count = 0;
 
 static u_int32_t acq_seq = 0;
@@ -456,10 +454,8 @@ static struct mbuf *key_setdumpsp(struct secpolicy *,
 static u_int key_getspreqmsglen(struct secpolicy *);
 static int key_spdexpire(struct secpolicy *);
 static struct secashead *key_newsah(struct secasindex *, u_int8_t);
-static void key_delsah(struct secashead *);
 static struct secasvar *key_newsav(struct mbuf *,
        const struct sadb_msghdr *, struct secashead *, int *);
-static void key_delsav(struct secasvar *);
 static struct secashead *key_getsah(struct secasindex *);
 static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t);
 static void key_setspi __P((struct secasvar *, u_int32_t));
@@ -550,9 +546,24 @@ static int key_promisc(struct socket *, struct mbuf *,
 static int key_senderror(struct socket *, struct mbuf *, int);
 static int key_validate_ext(const struct sadb_ext *, int);
 static int key_align(struct mbuf *, struct sadb_msghdr *);
-static void key_sa_chgstate(struct secasvar *, u_int8_t);
 static struct mbuf *key_alloc_mbuf(int);
 static int key_getsastat (struct socket *, struct mbuf *, const struct sadb_msghdr *);
+static int key_setsaval2(struct secasvar      *sav,
+                        u_int8_t              satype,
+                        u_int8_t              alg_auth,
+                        u_int8_t              alg_enc,
+                        u_int32_t             flags,
+                        u_int8_t              replay,
+                        struct sadb_key      *key_auth,
+                        u_int16_t             key_auth_len,
+                        struct sadb_key      *key_enc,
+                        u_int16_t             key_enc_len,
+                        u_int16_t             natt_port,
+                        u_int32_t             seq,
+                        u_int32_t             spi,
+                        u_int32_t             pid,
+                        struct sadb_lifetime *lifetime_hard,
+                        struct sadb_lifetime *lifetime_soft);
 
 extern int ipsec_bypass;
 extern int esp_udp_encap_port;
@@ -577,19 +588,13 @@ key_init(void)
        sadb_mutex_grp = lck_grp_alloc_init("sadb", sadb_mutex_grp_attr);
        sadb_mutex_attr = lck_attr_alloc_init();
 
-       if ((sadb_mutex = lck_mtx_alloc_init(sadb_mutex_grp, sadb_mutex_attr)) == NULL) {
-               printf("key_init: can't alloc sadb_mutex\n");
-               return;
-       }
+       lck_mtx_init(sadb_mutex, sadb_mutex_grp, sadb_mutex_attr);
        
        pfkey_stat_mutex_grp_attr = lck_grp_attr_alloc_init();
        pfkey_stat_mutex_grp = lck_grp_alloc_init("pfkey_stat", pfkey_stat_mutex_grp_attr);
        pfkey_stat_mutex_attr = lck_attr_alloc_init();
 
-       if ((pfkey_stat_mutex = lck_mtx_alloc_init(pfkey_stat_mutex_grp, pfkey_stat_mutex_attr)) == NULL) {
-               printf("key_init: can't alloc pfkey_stat_mutex\n");
-               return;
-       }
+       lck_mtx_init(pfkey_stat_mutex, pfkey_stat_mutex_grp, pfkey_stat_mutex_attr);
 
        for (i = 0; i < SPIHASHSIZE; i++)
                LIST_INIT(&spihash[i]);
@@ -1860,8 +1865,8 @@ key_msg2sp(
                                return NULL;
                        }
 
-                       xisr = (struct sadb_x_ipsecrequest *)((caddr_t)xisr
-                                        + xisr->sadb_x_ipsecrequest_len);
+                       xisr = (struct sadb_x_ipsecrequest *)(void *)
+                           ((caddr_t)xisr + xisr->sadb_x_ipsecrequest_len);
                }
            }
                break;
@@ -1881,13 +1886,40 @@ key_newreqid(void)
 {
        lck_mtx_lock(sadb_mutex);
        static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1;
+       int done = 0;
 
-       auto_reqid = (auto_reqid == ~0
-                       ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1);
-       lck_mtx_unlock(sadb_mutex);
+       /* The reqid must be limited to 16 bits because the PF_KEY message format only uses
+          16 bits for this field.  Once it becomes larger than 16 bits - ipsec fails to
+          work anymore. Changing the PF_KEY message format would introduce compatibility
+          issues. This code now tests to see if the tentative reqid is in use */
+
+       while (!done) {
+               struct secpolicy *sp;
+               struct ipsecrequest *isr;         
+               int dir;
+
+               auto_reqid = (auto_reqid == 0xFFFF
+                             ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1);
 
-       /* XXX should be unique check */
+               /* check for uniqueness */
+               done = 1;
+               for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
+                       LIST_FOREACH(sp, &sptree[dir], chain) {
+                               for (isr = sp->req; isr != NULL; isr = isr->next) {
+                                       if (isr->saidx.reqid == auto_reqid) {
+                                               done = 0;
+                                               break;
+                                       }
+                               }
+                               if (done == 0)
+                                       break;
+                       }
+                       if (done == 0)
+                               break;
+               }       
+       }
 
+       lck_mtx_unlock(sadb_mutex);
        return auto_reqid;
 }
 
@@ -1935,7 +1967,7 @@ key_sp2msg(
 
                for (isr = sp->req; isr != NULL; isr = isr->next) {
 
-                       xisr = (struct sadb_x_ipsecrequest *)p;
+                       xisr = (struct sadb_x_ipsecrequest *)(void *)p;
 
                        xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto;
                        xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode;
@@ -1988,7 +2020,7 @@ key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp,
                        if (len > MHLEN)
                                panic("assumption failed");
 #endif
-                       MGETHDR(n, M_DONTWAIT, MT_DATA);
+                       MGETHDR(n, M_WAITOK, MT_DATA);
                        if (!n)
                                goto fail;
                        n->m_len = len;
@@ -2007,7 +2039,7 @@ key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp,
                            mtod(n, caddr_t));
                } else {
                        n = m_copym(m, mhp->extoff[idx], mhp->extlen[idx],
-                           M_DONTWAIT);
+                           M_WAITOK);
                }
                if (n == NULL)
                        goto fail;
@@ -2085,12 +2117,13 @@ key_spdadd(
                        ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n"));
                        return key_senderror(so, m, EINVAL);
                }
-               lft = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD];
+               lft = (struct sadb_lifetime *)
+                   (void *)mhp->ext[SADB_EXT_LIFETIME_HARD];
        }
 
        src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
-       xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
+       xpl0 = (struct sadb_x_policy *)(void *)mhp->ext[SADB_X_EXT_POLICY];
 
        /* make secindex */
        /* XXX boundary check against sa_len */
@@ -2289,7 +2322,7 @@ key_spdadd(
                /* n is already freed */
                return key_senderror(so, m, ENOBUFS);
        }
-       xpl = (struct sadb_x_policy *)(mtod(mpolicy, caddr_t) + off);
+       xpl = (struct sadb_x_policy *)(void *)(mtod(mpolicy, caddr_t) + off);
        if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) {
                m_freem(n);
                return key_senderror(so, m, EINVAL);
@@ -2377,7 +2410,7 @@ key_spddelete(
 
        src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
-       xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
+       xpl0 = (struct sadb_x_policy *)(void *)mhp->ext[SADB_X_EXT_POLICY];
 
        /* make secindex */
        /* XXX boundary check against sa_len */
@@ -2469,7 +2502,8 @@ key_spddelete2(
                return 0;
        }
 
-       id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id;
+       id = ((struct sadb_x_policy *)
+           (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id;
 
        /* Is there SP in SPD ? */
        lck_mtx_lock(sadb_mutex);
@@ -2493,9 +2527,9 @@ key_spddelete2(
 
        if (len > MCLBYTES)
                return key_senderror(so, m, ENOBUFS);
-       MGETHDR(n, M_DONTWAIT, MT_DATA);
+       MGETHDR(n, M_WAITOK, MT_DATA);
        if (n && len > MHLEN) {
-               MCLGET(n, M_DONTWAIT);
+               MCLGET(n, M_WAITOK);
                if ((n->m_flags & M_EXT) == 0) {
                        m_freem(n);
                        n = NULL;
@@ -2517,7 +2551,7 @@ key_spddelete2(
 #endif
 
        n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY],
-           mhp->extlen[SADB_X_EXT_POLICY], M_DONTWAIT);
+                           mhp->extlen[SADB_X_EXT_POLICY], M_WAITOK);
        if (!n->m_next) {
                m_freem(n);
                return key_senderror(so, m, ENOBUFS);
@@ -2570,7 +2604,8 @@ key_spdget(
                return key_senderror(so, m, EINVAL);
        }
 
-       id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id;
+       id = ((struct sadb_x_policy *)
+           (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id;
 
        /* Is there SP in SPD ? */
        lck_mtx_lock(sadb_mutex);
@@ -2940,7 +2975,7 @@ key_spdexpire(
        lt->sadb_lifetime_bytes = 0;
        lt->sadb_lifetime_addtime = sp->created;
        lt->sadb_lifetime_usetime = sp->lastused;
-       lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2);
+       lt = (struct sadb_lifetime *)(void *)(mtod(m, caddr_t) + len / 2);
        lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
        lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
        lt->sadb_lifetime_allocations = 0;
@@ -3061,7 +3096,7 @@ key_newsah(
 /*
  * delete SA index and all SA registerd.
  */
-static void
+void
 key_delsah(
        struct secashead *sah)
 {
@@ -3185,7 +3220,7 @@ key_newsav(
                        *errp = EINVAL;
                        return NULL;
                }
-               xsa = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA];
+               xsa = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
                key_setspi(newsav, xsa->sadb_sa_spi);
                newsav->seq = mhp->msg->sadb_msg_seq;
                break;
@@ -3226,10 +3261,115 @@ key_newsav(
        return newsav;
 }
 
+/*
+ * allocating a new SA with LARVAL state.  key_add() and key_getspi() call,
+ * and copy the values passed into new buffer.
+ * When SAD message type is GETSPI:
+ *     to set sequence number from acq_seq++,
+ *     to set zero to SPI.
+ *     not to call key_setsava().
+ * OUT:        NULL    : fail
+ *     others  : pointer to new secasvar.
+ */
+struct secasvar *
+key_newsav2(struct secashead     *sah,
+                       u_int8_t              satype,
+                       u_int8_t              alg_auth,
+                       u_int8_t              alg_enc,
+                       u_int32_t             flags,
+                       u_int8_t              replay,
+                       struct sadb_key      *key_auth,
+                       u_int16_t             key_auth_len,
+                       struct sadb_key      *key_enc,
+                       u_int16_t             key_enc_len,
+                       u_int16_t             natt_port,
+                       u_int32_t             seq,
+                       u_int32_t             spi,
+                       u_int32_t             pid,
+                       struct sadb_lifetime *lifetime_hard,
+                       struct sadb_lifetime *lifetime_soft)
+{
+       struct secasvar *newsav;
+       
+       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
+       
+       /* sanity check */
+       if (sah == NULL)
+               panic("key_newsa: NULL pointer is passed.\n");
+       
+       KMALLOC_NOWAIT(newsav, struct secasvar *, sizeof(struct secasvar));
+       if (newsav == NULL) {
+               lck_mtx_unlock(sadb_mutex);
+               KMALLOC_WAIT(newsav, struct secasvar *, sizeof(struct secasvar));
+               lck_mtx_lock(sadb_mutex);
+               if (newsav == NULL) {
+                       ipseclog((LOG_DEBUG, "key_newsa: No more memory.\n"));
+                       return NULL;
+               }
+       }
+       bzero((caddr_t)newsav, sizeof(struct secasvar));
+
+#if IPSEC_DOSEQCHECK
+       /* sync sequence number */
+       if (seq == 0)
+               newsav->seq = (acq_seq = (acq_seq == ~0 ? 1 : ++acq_seq));
+       else
+#endif
+               newsav->seq = seq;
+       key_setspi(newsav, spi);
+
+       if (key_setsaval2(newsav,
+                                         satype,
+                                         alg_auth,
+                                         alg_enc,
+                                         flags,
+                                         replay,
+                                         key_auth,
+                                         key_auth_len,
+                                         key_enc,
+                                         key_enc_len,
+                                         natt_port,
+                                         seq,
+                                         spi,
+                                         pid,
+                                         lifetime_hard,
+                                         lifetime_soft)) {
+               if (newsav->spihash.le_prev || newsav->spihash.le_next)
+                       LIST_REMOVE(newsav, spihash);
+               KFREE(newsav);
+               return NULL;
+       }
+       
+       /* reset created */
+    {
+               struct timeval tv;
+               microtime(&tv);
+               newsav->created = tv.tv_sec;
+    }
+       
+       newsav->pid = pid;
+       
+       /* add to satree */
+       newsav->sah = sah;
+       newsav->refcnt = 1;
+       if (spi && key_auth && key_auth_len && key_enc && key_enc_len) {
+               newsav->state = SADB_SASTATE_MATURE;
+               LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_MATURE], newsav,
+                                                secasvar, chain);
+       } else {
+               newsav->state = SADB_SASTATE_LARVAL;
+               LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_LARVAL], newsav,
+                                                secasvar, chain);
+       }
+       ipsec_sav_count++;
+       
+       return newsav;
+}
+
 /*
  * free() SA variable entry.
  */
-static void
+void
 key_delsav(
        struct secasvar *sav)
 {
@@ -3316,6 +3456,21 @@ key_getsah(
        return NULL;
 }
 
+struct secashead *
+key_newsah2 (struct secasindex *saidx,
+                        u_int8_t           dir)
+{
+       struct secashead *sah;
+
+       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
+
+       sah = key_getsah(saidx);
+       if (!sah) {
+               return(key_newsah(saidx, dir));
+       }
+       return sah;
+}
+
 /*
  * check not to be duplicated SPI.
  * NOTE: this function is too slow due to searching all SAD.
@@ -3448,7 +3603,7 @@ key_setsaval(
        if (mhp->ext[SADB_EXT_SA] != NULL) {
                const struct sadb_sa *sa0;
 
-               sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA];
+               sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
                if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) {
                        ipseclog((LOG_DEBUG, "key_setsaval: invalid message size.\n"));
                        error = EINVAL;
@@ -3647,7 +3802,8 @@ key_setsaval(
     {
        const struct sadb_lifetime *lft0;
 
-       lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD];
+       lft0 = (struct sadb_lifetime *)
+           (void *)mhp->ext[SADB_EXT_LIFETIME_HARD];
        if (lft0 != NULL) {
                if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) {
                        ipseclog((LOG_DEBUG, "key_setsaval: invalid hard lifetime ext len.\n"));
@@ -3664,7 +3820,8 @@ key_setsaval(
                /* to be initialize ? */
        }
 
-       lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT];
+       lft0 = (struct sadb_lifetime *)
+           (void *)mhp->ext[SADB_EXT_LIFETIME_SOFT];
        if (lft0 != NULL) {
                if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) {
                        ipseclog((LOG_DEBUG, "key_setsaval: invalid soft lifetime ext len.\n"));
@@ -3725,6 +3882,224 @@ key_setsaval(
        return error;
 }
 
+/*
+ * copy SA values from PF_KEY message except *SPI, SEQ, PID, STATE and TYPE*.
+ * You must update these if need.
+ * OUT:        0:      success.
+ *     !0:     failure.
+ *
+ * does not modify mbuf.  does not free mbuf on error.
+ */
+int
+key_setsaval2(struct secasvar      *sav,
+                         u_int8_t              satype,
+                         u_int8_t              alg_auth,
+                         u_int8_t              alg_enc,
+                         u_int32_t             flags,
+                         u_int8_t              replay,
+                         struct sadb_key      *key_auth,
+                         u_int16_t             key_auth_len,
+                         struct sadb_key      *key_enc,
+                         u_int16_t             key_enc_len,
+                         u_int16_t             natt_port,
+                         u_int32_t             seq,
+                         u_int32_t             spi,
+                         u_int32_t             pid,
+                         struct sadb_lifetime *lifetime_hard,
+                         struct sadb_lifetime *lifetime_soft)
+{
+#if IPSEC_ESP
+       const struct esp_algorithm *algo;
+#endif
+       int error = 0;
+       struct timeval tv;
+
+       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
+
+       /* initialization */
+       sav->replay = NULL;
+       sav->key_auth = NULL;
+       sav->key_enc = NULL;
+       sav->sched = NULL;
+       sav->schedlen = 0;
+       sav->iv = NULL;
+       sav->lft_c = NULL;
+       sav->lft_h = NULL;
+       sav->lft_s = NULL;
+       sav->remote_ike_port = 0;
+       sav->natt_last_activity = natt_now;
+       sav->natt_encapsulated_src_port = 0;
+
+       sav->alg_auth = alg_auth;
+       sav->alg_enc = alg_enc;
+       sav->flags = flags;
+       sav->pid = pid;
+       sav->seq = seq;
+       key_setspi(sav, htonl(spi));
+
+       /*
+        * Verify that a nat-traversal port was specified if
+        * the nat-traversal flag is set.
+        */
+       if ((sav->flags & SADB_X_EXT_NATT) != 0) {
+               if (natt_port == 0) {
+                       ipseclog((LOG_DEBUG, "key_setsaval2: natt port not set.\n"));
+                       error = EINVAL;
+                       goto fail;
+               }
+               sav->remote_ike_port = natt_port;
+       }
+
+       /*
+        * Verify if SADB_X_EXT_NATT_MULTIPLEUSERS flag is set that
+        * SADB_X_EXT_NATT is set and SADB_X_EXT_NATT_KEEPALIVE is not 
+        * set (we're not behind nat) - otherwise clear it.
+        */
+       if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0)
+               if ((sav->flags & SADB_X_EXT_NATT) == 0 ||
+                       (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0)
+                       sav->flags &= ~SADB_X_EXT_NATT_MULTIPLEUSERS;
+
+       /* replay window */
+       if ((flags & SADB_X_EXT_OLD) == 0) {
+               sav->replay = keydb_newsecreplay(replay);
+               if (sav->replay == NULL) {
+                       ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n"));
+                       error = ENOBUFS;
+                       goto fail;
+               }
+       }
+       
+       /* Authentication keys */
+       sav->key_auth = (__typeof__(sav->key_auth))key_newbuf(key_auth, key_auth_len);
+       if (sav->key_auth == NULL) {
+               ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n"));
+               error = ENOBUFS;
+               goto fail;
+       }
+       
+       /* Encryption key */
+       sav->key_enc = (__typeof__(sav->key_enc))key_newbuf(key_enc, key_enc_len);
+       if (sav->key_enc == NULL) {
+               ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n"));
+               error = ENOBUFS;
+               goto fail;
+       }
+       
+       /* set iv */
+       sav->ivlen = 0;
+
+       if (satype == SADB_SATYPE_ESP) {
+#if IPSEC_ESP
+               algo = esp_algorithm_lookup(sav->alg_enc);
+               if (algo && algo->ivlen)
+                       sav->ivlen = (*algo->ivlen)(algo, sav);
+               if (sav->ivlen != 0) {
+                       KMALLOC_NOWAIT(sav->iv, caddr_t, sav->ivlen);
+                       if (sav->iv == 0) {
+                               lck_mtx_unlock(sadb_mutex);
+                               KMALLOC_WAIT(sav->iv, caddr_t, sav->ivlen);
+                               lck_mtx_lock(sadb_mutex);
+                               if (sav->iv == 0) {
+                                       ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n"));
+                                       error = ENOBUFS;
+                                       goto fail;
+                               }
+                       }
+                       /* initialize */
+                       key_randomfill(sav->iv, sav->ivlen);
+               }
+#endif
+       }
+
+       /* reset created */
+       microtime(&tv);
+       sav->created = tv.tv_sec;
+       
+       /* make lifetime for CURRENT */
+       KMALLOC_NOWAIT(sav->lft_c, struct sadb_lifetime *,
+                                  sizeof(struct sadb_lifetime));
+       if (sav->lft_c == NULL) {
+               lck_mtx_unlock(sadb_mutex);
+               KMALLOC_WAIT(sav->lft_c, struct sadb_lifetime *,
+                                        sizeof(struct sadb_lifetime));
+           lck_mtx_lock(sadb_mutex);
+               if (sav->lft_c == NULL) {
+                       ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n"));
+                       error = ENOBUFS;
+                       goto fail;
+               }
+       }
+       
+       microtime(&tv);
+       
+       sav->lft_c->sadb_lifetime_len =
+       PFKEY_UNIT64(sizeof(struct sadb_lifetime));
+       sav->lft_c->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
+       sav->lft_c->sadb_lifetime_allocations = 0;
+       sav->lft_c->sadb_lifetime_bytes = 0;
+       sav->lft_c->sadb_lifetime_addtime = tv.tv_sec;
+       sav->lft_c->sadb_lifetime_usetime = 0;
+       
+       /* lifetimes for HARD and SOFT */
+       sav->lft_h = (__typeof__(sav->lft_h))key_newbuf(lifetime_hard,
+                                                                                                       sizeof(*lifetime_hard));
+       if (sav->lft_h == NULL) {
+               ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n"));
+               error = ENOBUFS;
+               goto fail;
+       }
+       sav->lft_s = (__typeof__(sav->lft_s))key_newbuf(lifetime_soft,
+                                                                                                       sizeof(*lifetime_soft));
+       if (sav->lft_s == NULL) {
+               ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n"));
+               error = ENOBUFS;
+               goto fail;
+       }
+       
+       return 0;
+       
+fail:
+       /* initialization */
+       if (sav->replay != NULL) {
+               keydb_delsecreplay(sav->replay);
+               sav->replay = NULL;
+       }
+       if (sav->key_auth != NULL) {
+               bzero(_KEYBUF(sav->key_auth), _KEYLEN(sav->key_auth));
+               KFREE(sav->key_auth);
+               sav->key_auth = NULL;
+       }
+       if (sav->key_enc != NULL) {
+               bzero(_KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc));
+               KFREE(sav->key_enc);
+               sav->key_enc = NULL;
+       }
+       if (sav->sched) {
+               bzero(sav->sched, sav->schedlen);
+               KFREE(sav->sched);
+               sav->sched = NULL;
+       }
+       if (sav->iv != NULL) {
+               KFREE(sav->iv);
+               sav->iv = NULL;
+       }
+       if (sav->lft_c != NULL) {
+               KFREE(sav->lft_c);
+               sav->lft_c = NULL;
+       }
+       if (sav->lft_h != NULL) {
+               KFREE(sav->lft_h);
+               sav->lft_h = NULL;
+       }
+       if (sav->lft_s != NULL) {
+               KFREE(sav->lft_s);
+               sav->lft_s = NULL;
+       }
+       
+       return error;
+}
+
 /*
  * validation with a secasvar entry, and set SADB_SATYPE_MATURE.
  * OUT:        0:      valid
@@ -4016,7 +4391,7 @@ key_setdumpsa(
                if ((!m && !p) || (m && p))
                        goto fail;
                if (p && tres) {
-                       M_PREPEND(tres, l, M_DONTWAIT);
+                       M_PREPEND(tres, l, M_WAITOK);
                        if (!tres)
                                goto fail;
                        bcopy(p, mtod(tres, caddr_t), l);
@@ -4416,7 +4791,7 @@ key_ismyaddr(
 #if INET
        case AF_INET:
                lck_rw_lock_shared(in_ifaddr_rwlock);
-               sin = (struct sockaddr_in *)sa;
+               sin = (struct sockaddr_in *)(void *)sa;
                for (ia = in_ifaddrhead.tqh_first; ia;
                     ia = ia->ia_link.tqe_next) {
                        IFA_LOCK_SPIN(&ia->ia_ifa);
@@ -4435,7 +4810,7 @@ key_ismyaddr(
 #endif
 #if INET6
        case AF_INET6:
-               return key_ismyaddr6((struct sockaddr_in6 *)sa);
+               return key_ismyaddr6((struct sockaddr_in6 *)(void *)sa);
 #endif
        }
 
@@ -4922,7 +5297,8 @@ key_timehandler(void)
                         */
                        if (savkabuf && savkacount < savbufcount) {
                                sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]);   //%%% should we check dying list if this is empty???
-                               if (natt_keepalive_interval && sav && (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0) {
+                               if (natt_keepalive_interval && sav &&
+                                       (sav->flags & (SADB_X_EXT_NATT_KEEPALIVE | SADB_X_EXT_ESP_KEEPALIVE)) != 0) {
                                        sav->refcnt++;
                                        *savkaptr++ = sav;
                                        savkacount++;
@@ -5363,8 +5739,10 @@ key_getspi(
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
-               mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
-               reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
+               mode = ((struct sadb_x_sa2 *)
+                   (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
+               reqid = ((struct sadb_x_sa2 *)
+                   (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
        } else {
                mode = IPSEC_MODE_ANY;
                reqid = 0;
@@ -5385,13 +5763,13 @@ key_getspi(
                if (((struct sockaddr *)(src0 + 1))->sa_len !=
                    sizeof(struct sockaddr_in))
                        return key_senderror(so, m, EINVAL);
-               ((struct sockaddr_in *)(src0 + 1))->sin_port = 0;
+               ((struct sockaddr_in *)(void *)(src0 + 1))->sin_port = 0;
                break;
        case AF_INET6:
                if (((struct sockaddr *)(src0 + 1))->sa_len !=
                    sizeof(struct sockaddr_in6))
                        return key_senderror(so, m, EINVAL);
-               ((struct sockaddr_in6 *)(src0 + 1))->sin6_port = 0;
+               ((struct sockaddr_in6 *)(void *)(src0 + 1))->sin6_port = 0;
                break;
        default:
                ; /*???*/
@@ -5401,13 +5779,13 @@ key_getspi(
                if (((struct sockaddr *)(dst0 + 1))->sa_len !=
                    sizeof(struct sockaddr_in))
                        return key_senderror(so, m, EINVAL);
-               ((struct sockaddr_in *)(dst0 + 1))->sin_port = 0;
+               ((struct sockaddr_in *)(void *)(dst0 + 1))->sin_port = 0;
                break;
        case AF_INET6:
                if (((struct sockaddr *)(dst0 + 1))->sa_len !=
                    sizeof(struct sockaddr_in6))
                        return key_senderror(so, m, EINVAL);
-               ((struct sockaddr_in6 *)(dst0 + 1))->sin6_port = 0;
+               ((struct sockaddr_in6 *)(void *)(dst0 + 1))->sin6_port = 0;
                break;
        default:
                ; /*???*/
@@ -5417,10 +5795,10 @@ key_getspi(
        KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
        lck_mtx_lock(sadb_mutex);
-       
+
        /* SPI allocation */
-       spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE],
-                              &saidx);
+       spi = key_do_getnewspi((struct sadb_spirange *)
+           (void *)mhp->ext[SADB_EXT_SPIRANGE], &saidx);
        if (spi == 0) {
                lck_mtx_unlock(sadb_mutex);
                return key_senderror(so, m, EINVAL);
@@ -5476,9 +5854,9 @@ key_getspi(
        if (len > MCLBYTES)
                return key_senderror(so, m, ENOBUFS);
 
-       MGETHDR(n, M_DONTWAIT, MT_DATA);
-       if (len > MHLEN) {
-               MCLGET(n, M_DONTWAIT);
+       MGETHDR(n, M_WAITOK, MT_DATA);
+       if (n && len > MHLEN) {
+               MCLGET(n, M_WAITOK);
                if ((n->m_flags & M_EXT) == 0) {
                        m_freem(n);
                        n = NULL;
@@ -5494,7 +5872,7 @@ key_getspi(
        m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off);
        off += PFKEY_ALIGN8(sizeof(struct sadb_msg));
 
-       m_sa = (struct sadb_sa *)(mtod(n, caddr_t) + off);
+       m_sa = (struct sadb_sa *)(void *)(mtod(n, caddr_t) + off);
        m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa));
        m_sa->sadb_sa_exttype = SADB_EXT_SA;
        m_sa->sadb_sa_spi = htonl(spi);
@@ -5533,9 +5911,65 @@ key_getspi(
     }
 }
 
+u_int32_t
+key_getspi2(struct sockaddr      *src,
+                       struct sockaddr      *dst,
+                       u_int8_t              proto,
+                       u_int8_t              mode,
+                       u_int32_t             reqid,
+                       struct sadb_spirange *spirange)
+{
+       u_int32_t         spi;
+       struct secasindex saidx;
+
+       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+       /* XXX boundary check against sa_len */
+       KEY_SETSECASIDX(proto, mode, reqid, src, dst, &saidx);
+
+       /* make sure if port number is zero. */
+       switch (((struct sockaddr *)&saidx.src)->sa_family) {
+               case AF_INET:
+                       if (((struct sockaddr *)&saidx.src)->sa_len != sizeof(struct sockaddr_in))
+                               return 0;
+                       ((struct sockaddr_in *)&saidx.src)->sin_port = 0;
+                       break;
+               case AF_INET6:
+                       if (((struct sockaddr *)&saidx.src)->sa_len != sizeof(struct sockaddr_in6))
+                               return 0;
+                       ((struct sockaddr_in6 *)&saidx.src)->sin6_port = 0;
+                       break;
+               default:
+                       ; /*???*/
+       }
+       switch (((struct sockaddr *)&saidx.dst)->sa_family) {
+               case AF_INET:
+                       if (((struct sockaddr *)&saidx.dst)->sa_len != sizeof(struct sockaddr_in))
+                               return 0;
+                       ((struct sockaddr_in *)&saidx.dst)->sin_port = 0;
+                       break;
+               case AF_INET6:
+                       if (((struct sockaddr *)&saidx.dst)->sa_len != sizeof(struct sockaddr_in6))
+                               return 0;
+                       ((struct sockaddr_in6 *)&saidx.dst)->sin6_port = 0;
+                       break;
+               default:
+                       ; /*???*/
+       }
+
+       lck_mtx_lock(sadb_mutex);
+       
+       /* SPI allocation */
+       spi = key_do_getnewspi(spirange, &saidx);
+
+       lck_mtx_unlock(sadb_mutex);
+
+       return spi;
+}
+
 /*
  * allocating new SPI
- * called by key_getspi().
+ * called by key_getspi() and key_getspi2().
  * OUT:
  *     0:      failure.
  *     others: success.
@@ -5673,15 +6107,17 @@ key_update(
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
-               mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
-               reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
+               mode = ((struct sadb_x_sa2 *)
+                   (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
+               reqid = ((struct sadb_x_sa2 *)
+                   (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
        } else {
                mode = IPSEC_MODE_ANY;
                reqid = 0;
        }
        /* XXX boundary checking for other extensions */
 
-       sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
+       sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
        dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
 
@@ -5689,7 +6125,7 @@ key_update(
        KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
        lck_mtx_lock(sadb_mutex);
-       
+
        /* get a SA header */
        if ((sah = key_getsah(&saidx)) == NULL) {
                lck_mtx_unlock(sadb_mutex);
@@ -5896,14 +6332,16 @@ key_add(
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
-               mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
-               reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
+               mode = ((struct sadb_x_sa2 *)
+                   (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode;
+               reqid = ((struct sadb_x_sa2 *)
+                   (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid;
        } else {
                mode = IPSEC_MODE_ANY;
                reqid = 0;
        }
 
-       sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
+       sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 
@@ -5911,7 +6349,7 @@ key_add(
        KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
        lck_mtx_lock(sadb_mutex);
-       
+
        /* get a SA header */
        if ((newsah = key_getsah(&saidx)) == NULL) {
                /* create a new SA header: key_addspi is always used for outbound spi */
@@ -6011,8 +6449,10 @@ key_setident(
                return EINVAL;
        }
 
-       idsrc = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_SRC];
-       iddst = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_DST];
+       idsrc = (const struct sadb_ident *)
+           (void *)mhp->ext[SADB_EXT_IDENTITY_SRC];
+       iddst = (const struct sadb_ident *)
+           (void *)mhp->ext[SADB_EXT_IDENTITY_DST];
        idsrclen = mhp->extlen[SADB_EXT_IDENTITY_SRC];
        iddstlen = mhp->extlen[SADB_EXT_IDENTITY_DST];
 
@@ -6167,7 +6607,7 @@ key_delete(
                return key_senderror(so, m, EINVAL);
        }
 
-       sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
+       sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
        dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
 
@@ -6354,7 +6794,7 @@ key_get(
                return key_senderror(so, m, EINVAL);
        }
 
-       sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA];
+       sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 
@@ -6535,7 +6975,7 @@ key_getcomb_esp(void)
                        if (l > MLEN)
                                panic("assumption failed in key_getcomb_esp");
 #endif
-                       MGET(m, M_DONTWAIT, MT_DATA);
+                       MGET(m, M_WAITOK, MT_DATA);
                        if (m) {
                                M_ALIGN(m, l);
                                m->m_len = l;
@@ -6560,7 +7000,8 @@ key_getcomb_esp(void)
                                /* m is already freed */
                                goto fail;
                        }
-                       comb = (struct sadb_comb *)(mtod(n, caddr_t) + o);
+                       comb = (struct sadb_comb *)
+                           (void *)(mtod(n, caddr_t) + o);
                        bzero(comb, sizeof(*comb));
                        key_getcomb_setlifetime(comb);
                        comb->sadb_comb_encrypt = i;
@@ -6619,14 +7060,14 @@ key_getcomb_ah(void)
                        if (l > MLEN)
                                panic("assumption failed in key_getcomb_ah");
 #endif
-                       MGET(m, M_DONTWAIT, MT_DATA);
+                       MGET(m, M_WAITOK, MT_DATA);
                        if (m) {
                                M_ALIGN(m, l);
                                m->m_len = l;
                                m->m_next = NULL;
                        }
                } else
-                       M_PREPEND(m, l, M_DONTWAIT);
+                       M_PREPEND(m, l, M_WAITOK);
                if (!m)
                        return NULL;
 
@@ -6665,14 +7106,14 @@ key_getcomb_ipcomp(void)
                        if (l > MLEN)
                                panic("assumption failed in key_getcomb_ipcomp");
 #endif
-                       MGET(m, M_DONTWAIT, MT_DATA);
+                       MGET(m, M_WAITOK, MT_DATA);
                        if (m) {
                                M_ALIGN(m, l);
                                m->m_len = l;
                                m->m_next = NULL;
                        }
                } else
-                       M_PREPEND(m, l, M_DONTWAIT);
+                       M_PREPEND(m, l, M_WAITOK);
                if (!m)
                        return NULL;
 
@@ -6718,7 +7159,7 @@ key_getprop(
 
        if (!m)
                return NULL;
-       M_PREPEND(m, l, M_DONTWAIT);
+       M_PREPEND(m, l, M_WAITOK);
        if (!m)
                return NULL;
 
@@ -6846,7 +7287,7 @@ key_acquire(
                }
                m_cat(result, m);
        }
-
+       
        /* XXX identity (optional) */
 #if 0
        if (idexttype && fqdn) {
@@ -7271,9 +7712,9 @@ key_register(
        if (len > MCLBYTES)
                return key_senderror(so, m, ENOBUFS);
 
-       MGETHDR(n, M_DONTWAIT, MT_DATA);
-       if (len > MHLEN) {
-               MCLGET(n, M_DONTWAIT);
+       MGETHDR(n, M_WAITOK, MT_DATA);
+       if (n && len > MHLEN) {
+               MCLGET(n, M_WAITOK);
                if ((n->m_flags & M_EXT) == 0) {
                        m_freem(n);
                        n = NULL;
@@ -7294,7 +7735,7 @@ key_register(
 
        /* for authentication algorithm */
        if (alen) {
-               sup = (struct sadb_supported *)(mtod(n, caddr_t) + off);
+               sup = (struct sadb_supported *)(void *)(mtod(n, caddr_t) + off);
                sup->sadb_supported_len = PFKEY_UNIT64(alen);
                sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
                off += PFKEY_ALIGN8(sizeof(*sup));
@@ -7305,7 +7746,8 @@ key_register(
                        aalgo = ah_algorithm_lookup(i);
                        if (!aalgo)
                                continue;
-                       alg = (struct sadb_alg *)(mtod(n, caddr_t) + off);
+                       alg = (struct sadb_alg *)
+                           (void *)(mtod(n, caddr_t) + off);
                        alg->sadb_alg_id = i;
                        alg->sadb_alg_ivlen = 0;
                        alg->sadb_alg_minbits = aalgo->keymin;
@@ -7317,7 +7759,7 @@ key_register(
 #if IPSEC_ESP
        /* for encryption algorithm */
        if (elen) {
-               sup = (struct sadb_supported *)(mtod(n, caddr_t) + off);
+               sup = (struct sadb_supported *)(void *)(mtod(n, caddr_t) + off);
                sup->sadb_supported_len = PFKEY_UNIT64(elen);
                sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT;
                off += PFKEY_ALIGN8(sizeof(*sup));
@@ -7328,7 +7770,8 @@ key_register(
                        ealgo = esp_algorithm_lookup(i);
                        if (!ealgo)
                                continue;
-                       alg = (struct sadb_alg *)(mtod(n, caddr_t) + off);
+                       alg = (struct sadb_alg *)
+                           (void *)(mtod(n, caddr_t) + off);
                        alg->sadb_alg_id = i;
                        if (ealgo && ealgo->ivlen) {
                                /*
@@ -7464,7 +7907,7 @@ key_expire(
        lt->sadb_lifetime_bytes = sav->lft_c->sadb_lifetime_bytes;
        lt->sadb_lifetime_addtime = sav->lft_c->sadb_lifetime_addtime;
        lt->sadb_lifetime_usetime = sav->lft_c->sadb_lifetime_usetime;
-       lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2);
+       lt = (struct sadb_lifetime *)(void *)(mtod(m, caddr_t) + len / 2);
        bcopy(sav->lft_s, lt, sizeof(*lt));
        m_cat(result, m);
 
@@ -7893,9 +8336,9 @@ key_parse(
        if (m->m_next) {
                struct mbuf *n;
 
-               MGETHDR(n, M_DONTWAIT, MT_DATA);
+               MGETHDR(n, M_WAITOK, MT_DATA);
                if (n && m->m_pkthdr.len > MHLEN) {
-                       MCLGET(n, M_DONTWAIT);
+                       MCLGET(n, M_WAITOK);
                        if ((n->m_flags & M_EXT) == 0) {
                                m_free(n);
                                n = NULL;
@@ -8137,7 +8580,7 @@ key_align(
                        /* m is already freed */
                        return ENOBUFS;
                }
-               ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff);
+               ext = (struct sadb_ext *)(void *)(mtod(n, caddr_t) + toff);
 
                /* set pointer */
                switch (ext->sadb_ext_type) {
@@ -8197,7 +8640,7 @@ key_align(
                        /* m is already freed */
                        return ENOBUFS;
                }
-               ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff);
+               ext = (struct sadb_ext *)(void *)(mtod(n, caddr_t) + toff);
 
                mhp->ext[ext->sadb_ext_type] = ext;
                mhp->extoff[ext->sadb_ext_type] = off;
@@ -8245,8 +8688,8 @@ key_validate_ext(
                break;
        case SADB_EXT_IDENTITY_SRC:
        case SADB_EXT_IDENTITY_DST:
-               if (((const struct sadb_ident *)ext)->sadb_ident_type ==
-                   SADB_X_IDENTTYPE_ADDR) {
+               if (((struct sadb_ident *)(uintptr_t)(size_t)ext)->
+                   sadb_ident_type == SADB_X_IDENTTYPE_ADDR) {
                        baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident));
                        checktype = ADDR;
                } else
@@ -8379,7 +8822,7 @@ key_sa_routechange(
        return;
 }
 
-static void
+void
 key_sa_chgstate(
        struct secasvar *sav,
        u_int8_t state)
@@ -8567,7 +9010,8 @@ key_getsastat (struct socket *so,
        }
        bzero(sa_stats_sav, bufsize);
 
-       sa_stats_arg = (__typeof__(sa_stats_arg))mhp->ext[SADB_EXT_SASTAT];
+       sa_stats_arg = (__typeof__(sa_stats_arg))
+           (void *)mhp->ext[SADB_EXT_SASTAT];
        arg_count = sa_stats_arg->sadb_sastat_list_len;
        // exit early if there are no requested SAs
        if (arg_count == 0) {
@@ -8591,7 +9035,8 @@ key_getsastat (struct socket *so,
                goto end;
        }
 
-       session_id = (__typeof__(session_id))mhp->ext[SADB_EXT_SESSION_ID];
+       session_id = (__typeof__(session_id))
+           (void *)mhp->ext[SADB_EXT_SESSION_ID];
 
        /* send this to the userland. */
        n = key_setdumpsastats(sa_stats_arg->sadb_sastat_dir,
index 3dda20469ef606a0a5afcc93b7f4f47f37127745..f2a2729a10dd231e3fffd892e591493e8e643c3c 100644 (file)
@@ -48,6 +48,10 @@ struct sockaddr;
 struct socket;
 struct sadb_msg;
 struct sadb_x_policy;
+struct secasindex;
+struct secashead;
+struct sadb_key;
+struct sadb_lifetime;
 
 extern struct secpolicy *key_allocsp(struct secpolicyindex *, u_int);
 extern struct secasvar *key_allocsa_policy(struct secasindex *);
@@ -75,7 +79,34 @@ extern void key_domain_init(void);
 extern int key_checktunnelsanity(struct secasvar *, u_int, caddr_t, caddr_t);
 extern void key_sa_recordxfer(struct secasvar *, struct mbuf *);
 extern void key_sa_routechange(struct sockaddr *);
+extern void key_sa_chgstate(struct secasvar *, u_int8_t);
 extern void key_sa_stir_iv(struct secasvar *);
+extern void key_delsah(struct secashead *sah);
+extern struct secashead *key_newsah2 (struct secasindex *saidx, u_int8_t dir);
+extern u_int32_t key_getspi2(struct sockaddr      *src,
+                            struct sockaddr      *dst,
+                            u_int8_t              proto,
+                            u_int8_t              mode,
+                            u_int32_t             reqid,
+                            struct sadb_spirange *spirange);
+extern struct secasvar * key_newsav2(struct secashead     *sah,
+                                    u_int8_t              satype,
+                                    u_int8_t              alg_auth,
+                                    u_int8_t              alg_enc,
+                                    u_int32_t             flags,
+                                    u_int8_t              replay,
+                                    struct sadb_key      *key_auth,
+                                    u_int16_t             key_auth_len,
+                                    struct sadb_key      *key_enc,
+                                    u_int16_t             key_enc_len,
+                                    u_int16_t             natt_port,
+                                    u_int32_t             seq,
+                                    u_int32_t             spi,
+                                    u_int32_t             pid,
+                                    struct sadb_lifetime *lifetime_hard,
+                                    struct sadb_lifetime *lifetime_soft);
+extern void key_delsav(struct secasvar *sav);
+
 
 #endif /* KERNEL_PRIVATE */
 #endif /* _NETKEY_KEY_H_ */
index e304c336f2906ecac6a12f846144b1a848a95b98..079f6e28877c078c922ac2cec42dedd80f9439bf 100644 (file)
@@ -37,6 +37,7 @@
 #ifdef KERNEL_PRIVATE
 
 #include <netkey/key_var.h>
+#include <net/if_utun.h>
 
 /* Security Assocciation Index */
 /* NOTE: Ensure to be same address family */
@@ -68,6 +69,9 @@ struct secashead {
        struct route sa_route;          /* route cache */
 };
 
+typedef int (*utun_is_keepalive_func) __P((void *, void *, u_int16_t, u_int32_t, size_t));
+typedef int (*utun_input_func) __P((void *, void *, protocol_family_t family));
+
 /* Security Association */
 struct secasvar {
        LIST_ENTRY(secasvar) chain;
@@ -103,6 +107,10 @@ struct secasvar {
        u_int32_t       natt_last_activity;
        u_int16_t       remote_ike_port;
        u_int16_t       natt_encapsulated_src_port;     /* network byte order */
+
+       void              *utun_pcb;
+       utun_is_keepalive_func    utun_is_keepalive_fn;
+       utun_input_func    utun_in_fn;
 };
 
 /* replay prevention */
index 41b025389fc26a8f0583fbd05273f6d5a17fb470..bbb466d93c40c42d269a3590f06c4407785484aa 100644 (file)
@@ -86,7 +86,7 @@ __private_extern__ int nfs_ticks;
 #define        NFS_MAXREXMIT   100             /* Stop counting after this many */
 #define        NFS_RETRANS     10              /* Num of retrans for soft mounts */
 #define        NFS_TRYLATERDEL 4               /* Initial try later delay (sec) */
-#define        NFS_MAXGRPS     16              /* Max. size of groups list */
+#define        NFS_MAXGRPS     16U             /* Max. size of groups list */
 #define        NFS_MINATTRTIMO 5               /* Attribute cache timeout in sec */
 #define        NFS_MAXATTRTIMO 60
 #define        NFS_MINDIRATTRTIMO 5            /* directory attribute cache timeout in sec */
@@ -476,6 +476,7 @@ struct user_nfs_export_args {
 #define NX_MAPALL              0x0008  /* map all access to anon credential */
 #define NX_32BITCLIENTS                0x0020  /* restrict directory cookies to 32 bits */
 #define NX_OFFLINE             0x0040  /* export is offline */
+#define NX_MANGLEDNAMES                0x0080  /* export will return mangled names for names > 255 bytes */
 
 /*
  * fs.nfs sysctl(3) export stats record structures
@@ -675,6 +676,13 @@ __private_extern__ int nfsrv_async, nfsrv_export_hash_size,
                        nfsrv_reqcache_size, nfsrv_sock_max_rec_queue_length;
 __private_extern__ uint32_t nfsrv_gss_context_ttl;
 __private_extern__ struct nfsstats nfsstats;
+#define NFS_UC_Q_DEBUG
+#ifdef NFS_UC_Q_DEBUG
+__private_extern__ int nfsrv_uc_use_proxy;
+__private_extern__ uint32_t nfsrv_uc_queue_limit;
+__private_extern__ uint32_t nfsrv_uc_queue_max_seen;
+__private_extern__ volatile uint32_t nfsrv_uc_queue_count;                                                   
+#endif
 
 #endif // KERNEL
 
@@ -686,38 +694,38 @@ __private_extern__ struct nfsstats nfsstats;
  * Stats structure
  */
 struct nfsstats {
-       int     attrcache_hits;
-       int     attrcache_misses;
-       int     lookupcache_hits;
-       int     lookupcache_misses;
-       int     direofcache_hits;
-       int     direofcache_misses;
-       int     biocache_reads;
-       int     read_bios;
-       int     read_physios;
-       int     biocache_writes;
-       int     write_bios;
-       int     write_physios;
-       int     biocache_readlinks;
-       int     readlink_bios;
-       int     biocache_readdirs;
-       int     readdir_bios;
-       int     rpccnt[NFS_NPROCS];
-       int     rpcretries;
-       int     srvrpccnt[NFS_NPROCS];
-       int     srvrpc_errs;
-       int     srv_errs;
-       int     rpcrequests;
-       int     rpctimeouts;
-       int     rpcunexpected;
-       int     rpcinvalid;
-       int     srvcache_inproghits;
-       int     srvcache_idemdonehits;
-       int     srvcache_nonidemdonehits;
-       int     srvcache_misses;
-       int     srvvop_writes;
-       int pageins;
-       int pageouts;
+       uint64_t        attrcache_hits;
+       uint64_t        attrcache_misses;
+       uint64_t        lookupcache_hits;
+       uint64_t        lookupcache_misses;
+       uint64_t        direofcache_hits;
+       uint64_t        direofcache_misses;
+       uint64_t        biocache_reads;
+       uint64_t        read_bios;
+       uint64_t        read_physios;
+       uint64_t        biocache_writes;
+       uint64_t        write_bios;
+       uint64_t        write_physios;
+       uint64_t        biocache_readlinks;
+       uint64_t        readlink_bios;
+       uint64_t        biocache_readdirs;
+       uint64_t        readdir_bios;
+       uint64_t        rpccnt[NFS_NPROCS];
+       uint64_t        rpcretries;
+       uint64_t        srvrpccnt[NFS_NPROCS];
+       uint64_t        srvrpc_errs;
+       uint64_t        srv_errs;
+       uint64_t        rpcrequests;
+       uint64_t        rpctimeouts;
+       uint64_t        rpcunexpected;
+       uint64_t        rpcinvalid;
+       uint64_t        srvcache_inproghits;
+       uint64_t        srvcache_idemdonehits;
+       uint64_t        srvcache_nonidemdonehits;
+       uint64_t        srvcache_misses;
+       uint64_t        srvvop_writes;
+       uint64_t        pageins;
+       uint64_t        pageouts;
 };
 #endif
 
@@ -790,6 +798,7 @@ struct nfs_fs_locations;
 struct nfs_location_index;
 struct nfs_socket;
 struct nfs_socket_search;
+struct nfsrv_uc_arg;
 
 /*
  * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
@@ -942,6 +951,8 @@ __private_extern__ int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des;
 __private_extern__ int nfs_tprintf_initial_delay, nfs_tprintf_delay;
 __private_extern__ int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes;
 __private_extern__ int nfs_idmap_ctrl, nfs_callback_port;
+__private_extern__ int nfs_is_mobile;
+__private_extern__ uint32_t nfs_squishy_flags;
 
 /* bits for nfs_idmap_ctrl: */
 #define NFS_IDMAP_CTRL_USE_IDMAP_SERVICE               0x00000001 /* use the ID mapping service */
@@ -971,6 +982,7 @@ struct nfsrv_sock {
        TAILQ_ENTRY(nfsrv_sock) ns_chain;       /* List of all nfsrv_sock's */
        TAILQ_ENTRY(nfsrv_sock) ns_svcq;        /* List of sockets needing servicing */
        TAILQ_ENTRY(nfsrv_sock) ns_wgq;         /* List of sockets with a pending write gather */
+       struct nfsrv_uc_arg *ns_ua;             /* Opaque pointer to upcall */
        lck_rw_t        ns_rwlock;              /* lock for most fields */
        socket_t        ns_so;
        mbuf_t          ns_nam;
@@ -1429,6 +1441,12 @@ void     nfs_ephemeral_mount_harvester_start(void);
 void   nfs_ephemeral_mount_harvester(__unused void *arg, __unused wait_result_t wr);
 #endif
 
+/* socket upcall interfaces */
+void nfsrv_uc_init(void);
+void nfsrv_uc_cleanup(void);
+void nfsrv_uc_addsock(struct nfsrv_sock *, int);
+void nfsrv_uc_dequeue(struct nfsrv_sock *);
+    
 __END_DECLS
 
 #endif /* KERNEL */
index 84d0aa9f7db52e45e2efbe100d1c136c65694525..69f12d1f705011b3ce52036e80bb5cd3541fa2ef 100644 (file)
@@ -439,7 +439,7 @@ out:
  * get the list of supported security flavors
  *
  * How we get them depends on what args we are given:
- * 
+ *
  * FH?   Name?  Action
  * ----- -----  ------
  * YES   YES    Use the fh and name provided
@@ -1666,6 +1666,8 @@ nfs4_parsefattr(
                        nfsm_chain_get_32(error, nmc, ace_flags);
                        nfsm_chain_get_32(error, nmc, ace_mask);
                        nfsm_chain_get_32(error, nmc, len);
+                       if (!error && len >= NFS_MAX_WHO)
+                               error = EBADRPC;
                        acl->acl_ace[i].ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
                        acl->acl_ace[i].ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
                        acl->acl_ace[i].ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
@@ -1675,16 +1677,12 @@ nfs4_parsefattr(
                                        s = sbuf;
                                        slen = sizeof(sbuf);
                                }
-                               if (len >= NFS_MAX_WHO) {
-                                       error = EBADRPC;
-                               } else {
-                                       /* Let's add a bit more if we can to the allocation as to try and avoid future allocations */
-                                       MALLOC(s, char*, (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO, M_TEMP, M_WAITOK);
-                                       if (s)
-                                               slen = (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO;
-                                       else
-                                               error = ENOMEM;
-                               }
+                               /* Let's add a bit more if we can to the allocation as to try and avoid future allocations */
+                               MALLOC(s, char*, (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO, M_TEMP, M_WAITOK);
+                               if (s)
+                                       slen = (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO;
+                               else
+                                       error = ENOMEM;
                        }
                        if (error2)
                                nfsm_chain_adv(error, nmc, nfsm_rndup(len));
@@ -1999,22 +1997,20 @@ nfs4_parsefattr(
        }
        if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER)) {
                nfsm_chain_get_32(error, nmc, len);
+               if (!error && len >= NFS_MAX_WHO)
+                       error = EBADRPC;
                if (!error && (len >= slen)) {
                        if (s != sbuf) {
                                FREE(s, M_TEMP);
                                s = sbuf;
                                slen = sizeof(sbuf);
                        }
-                       if (len >= NFS_MAX_WHO) {
-                               error = EBADRPC;
-                       } else {
-                               /* Let's add a bit more if we can to the allocation as to try and avoid future allocations */
-                               MALLOC(s, char*, (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO, M_TEMP, M_WAITOK);
-                               if (s)
-                                       slen = (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO;
-                               else
-                                       error = ENOMEM;
-                       }
+                       /* Let's add a bit more if we can to the allocation as to try and avoid future allocations */
+                       MALLOC(s, char*, (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO, M_TEMP, M_WAITOK);
+                       if (s)
+                               slen = (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO;
+                       else
+                               error = ENOMEM;
                }
                nfsm_chain_get_opaque(error, nmc, len, s);
                if (!error) {
@@ -2036,22 +2032,20 @@ nfs4_parsefattr(
        }
        if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP)) {
                nfsm_chain_get_32(error, nmc, len);
+               if (!error && len >= NFS_MAX_WHO)
+                       error = EBADRPC;
                if (!error && (len >= slen)) {
                        if (s != sbuf) {
                                FREE(s, M_TEMP);
                                s = sbuf;
                                slen = sizeof(sbuf);
                        }
-                       if (len >= NFS_MAX_WHO) {
-                               error = EBADRPC;
-                       } else {
-                               /* Let's add a bit more if we can to the allocation as to try and avoid future allocations */
-                               MALLOC(s, char*, (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO, M_TEMP, M_WAITOK);
-                               if (s)
-                                       slen = (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO;
-                               else
-                                       error = ENOMEM;
-                       }
+                       /* Let's add a bit more if we can to the allocation as to try and avoid future allocations */
+                       MALLOC(s, char*, (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO, M_TEMP, M_WAITOK);
+                       if (s)
+                               slen = (len + 16 < NFS_MAX_WHO) ? len+16 : NFS_MAX_WHO;
+                       else
+                               error = ENOMEM;
                }
                nfsm_chain_get_opaque(error, nmc, len, s);
                if (!error) {
@@ -2696,4 +2690,3 @@ recheckdeleg:
                        vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error);
        }
 }
-
index ca874aa7cd4943e9edf11fe00adab83d879ee1f8..6259af5930f9bcbdea6d2cb299086ecc0ecce115 100644 (file)
@@ -778,7 +778,7 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
        } else {
                cookie = bp->nb_lblkno;
                /* increment with every buffer read */
-               OSAddAtomic(1, &nfsstats.readdir_bios);
+               OSAddAtomic64(1, &nfsstats.readdir_bios);
        }
        lastcookie = cookie;
 
@@ -946,7 +946,7 @@ nextbuffer:
                                space_free = nfs_dir_buf_freespace(bp, rdirplus);
                                dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
                                /* increment with every buffer read */
-                               OSAddAtomic(1, &nfsstats.readdir_bios);
+                               OSAddAtomic64(1, &nfsstats.readdir_bios);
                        }
                        nmrepsave = nmrep;
                        dp->d_fileno = cookie; /* placeholder */
@@ -2830,6 +2830,26 @@ out:
        }
        if (noop)
                nfs_open_owner_rele(noop);
+
+       if (!error) {
+               int ismapped = 0;
+               nfs_node_lock_force(np);
+               if ((np->n_flag & NISMAPPED) == 0) {
+                       np->n_flag |= NISMAPPED;
+                       ismapped = 1;
+               }
+               nfs_node_unlock(np);
+               if (ismapped) {
+                       lck_mtx_lock(&nmp->nm_lock);
+                       nmp->nm_state &= ~NFSSTA_SQUISHY;
+                       nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
+                       if (nmp->nm_curdeadtimeout <= 0)
+                               nmp->nm_deadto_start = 0;
+                       nmp->nm_mappers++;
+                       lck_mtx_unlock(&nmp->nm_lock);
+               }
+       }
+
        return (error);
 }
 
@@ -2849,11 +2869,27 @@ nfs_vnop_mnomap(
        struct nfs_open_file *nofp = NULL;
        off_t size;
        int error;
-
+       int is_mapped_flag = 0;
+       
        nmp = VTONMP(vp);
        if (!nmp)
                return (ENXIO);
 
+       nfs_node_lock_force(np);
+       if (np->n_flag & NISMAPPED) {
+               is_mapped_flag = 1;
+               np->n_flag &= ~NISMAPPED;
+       }
+       nfs_node_unlock(np);
+       if (is_mapped_flag) {
+               lck_mtx_lock(&nmp->nm_lock);
+               if (nmp->nm_mappers)
+                       nmp->nm_mappers--;
+               else
+                       NP(np, "nfs_vnop_mnomap: removing mmap reference from mount, but mount has no files mmapped");
+               lck_mtx_unlock(&nmp->nm_lock);
+       }
+
        /* flush buffers/ubc before we drop the open (in case it's our last open) */
        nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
        if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp)))
@@ -3797,7 +3833,8 @@ error_out:
                        wakeup(newnflp);
                } else {
                        /* remove newnflp from lock list and destroy */
-                       TAILQ_REMOVE(&np->n_locks, newnflp, nfl_link);
+                       if (inqueue)
+                               TAILQ_REMOVE(&np->n_locks, newnflp, nfl_link);
                        nfs_file_lock_destroy(newnflp);
                }
                lck_mtx_unlock(&np->n_openlock);
@@ -5753,6 +5790,7 @@ nfs_release_open_state_for_node(nfsnode_t np, int force)
                lck_mtx_lock(&nofp->nof_lock);
                nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
                nofp->nof_flags |= NFS_OPEN_FILE_LOST;
+               
                lck_mtx_unlock(&nofp->nof_lock);
                if (!force && nmp && (nmp->nm_vers >= NFS_VER4))
                        nfs4_close_rpc(np, nofp, NULL, nofp->nof_owner->noo_cred, R_RECOVER);
@@ -6984,7 +7022,7 @@ nfs4_named_attr_get(
                        /* FALLTHROUGH */
                case -1:
                        /* cache hit, not really an error */
-                       OSAddAtomic(1, &nfsstats.lookupcache_hits);
+                       OSAddAtomic64(1, &nfsstats.lookupcache_hits);
                        if (!anp && avp)
                                *anpp = anp = VTONFS(avp);
 
@@ -7524,7 +7562,7 @@ nfsmout:
                        /* don't save the data if dirty or potential I/O conflict */
                        if (!error && bp && !bp->nb_dirtyoff && !(bp->nb_dirty & pagemask) &&
                            timevalcmp(&anp->n_lastio, &now, <)) {
-                               OSAddAtomic(1, &nfsstats.read_bios);
+                               OSAddAtomic64(1, &nfsstats.read_bios);
                                CLR(bp->nb_flags, (NB_DONE|NB_ASYNC));
                                SET(bp->nb_flags, NB_READ);
                                NFS_BUF_MAP(bp);
@@ -7951,7 +7989,7 @@ nfs4_vnop_listxattr(
        nextcookie = lbn = 0;
 
        while (!error && !done) {
-               OSAddAtomic(1, &nfsstats.biocache_readdirs);
+               OSAddAtomic64(1, &nfsstats.biocache_readdirs);
                cookie = nextcookie;
 getbuffer:
                error = nfs_buf_get(adnp, lbn, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
index 4bd1bff615c57dcb37a4450294aefd371718cd86..b9d22e786360379987859fb0f6901a3c83c41233 100644 (file)
@@ -1475,7 +1475,7 @@ nfs_buf_read(struct nfsbuf *bp)
 
        NFS_BUF_MAP(bp);
 
-       OSAddAtomic(1, &nfsstats.read_bios);
+       OSAddAtomic64(1, &nfsstats.read_bios);
 
        error = nfs_buf_read_rpc(bp, thd, cred);
        /*
@@ -2028,7 +2028,7 @@ nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx)
                        }
                        /* count any biocache reads that we just copied directly */
                        if (lbn != (uio_offset(uio)/biosize)) {
-                               OSAddAtomic((uio_offset(uio)/biosize) - lbn, &nfsstats.biocache_reads);
+                               OSAddAtomic64((uio_offset(uio)/biosize) - lbn, &nfsstats.biocache_reads);
                                FSDBG(514, np, 0xcacefeed, uio_offset(uio), error);
                        }
                }
@@ -2059,7 +2059,7 @@ nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx)
                        readaheads = 1;
                }
 
-               OSAddAtomic(1, &nfsstats.biocache_reads);
+               OSAddAtomic64(1, &nfsstats.biocache_reads);
 
                /*
                 * If the block is in the cache and has the required data
@@ -2425,7 +2425,7 @@ nfs_buf_write(struct nfsbuf *bp)
                bp->nb_offio = doff;
                bp->nb_endio = dend;
 
-               OSAddAtomic(1, &nfsstats.write_bios);
+               OSAddAtomic64(1, &nfsstats.write_bios);
 
                SET(bp->nb_flags, NB_WRITEINPROG);
                error = nfs_buf_write_rpc(bp, iomode, thd, cred);
@@ -2613,7 +2613,7 @@ nfs_buf_write_dirty_pages(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred)
                return (0);
 
        /* there are pages marked dirty that need to be written out */
-       OSAddAtomic(1, &nfsstats.write_bios);
+       OSAddAtomic64(1, &nfsstats.write_bios);
        NFS_BUF_MAP(bp);
        SET(bp->nb_flags, NB_WRITEINPROG);
        npages = bp->nb_bufsize / PAGE_SIZE;
index f6d019ba291f9f32172438c6b7fde94bfeb65e3a..7633c00b97a00ea139902df0fbc202231d656abf 100644 (file)
@@ -89,7 +89,7 @@
 #include <libkern/libkern.h>
 
 #include <mach/task.h>
-#include <mach/task_special_ports.h>
+#include <mach/host_special_ports.h>
 #include <mach/host_priv.h>
 #include <mach/thread_act.h>
 #include <mach/mig_errors.h>
@@ -199,8 +199,8 @@ static int  nfs_gss_svc_gssd_upcall(struct nfs_gss_svc_ctx *);
 static int     nfs_gss_svc_seqnum_valid(struct nfs_gss_svc_ctx *, uint32_t);
 #endif /* NFSSERVER */
 
-static void    task_release_special_port(mach_port_t);
-static mach_port_t task_copy_special_port(mach_port_t);
+static void    host_release_special_port(mach_port_t);
+static mach_port_t host_copy_special_port(mach_port_t);
 static void    nfs_gss_mach_alloc_buffer(u_char *, uint32_t, vm_map_copy_t *);
 static int     nfs_gss_mach_vmcopyout(vm_map_copy_t, uint32_t, u_char *);
 static int     nfs_gss_token_get(gss_key_info *ki, u_char *, u_char *, int, uint32_t *, u_char *);
@@ -1320,9 +1320,47 @@ nfs_gss_clnt_svcname(struct nfsmount *nmp)
        return (svcname);
 }
 
+/*
+ * Get a mach port to talk to gssd.
+ * gssd lives in the root bootstrap, so we call gssd's lookup routine
+ * to get a send right to talk to a new gssd instance that launchd has launched
+ * based on the cred's uid and audit session id.
+ */
+#define kauth_cred_getasid(cred) ((cred)->cr_audit.as_aia_p->ai_asid)
+#define kauth_cred_getauid(cred) ((cred)->cr_audit.as_aia_p->ai_auid)
+
+static mach_port_t
+nfs_gss_clnt_get_upcall_port(kauth_cred_t credp)
+{
+       mach_port_t gssd_host_port, uc_port = IPC_PORT_NULL;
+       kern_return_t kr;
+       au_asid_t asid;
+       uid_t uid;
+
+       kr = host_get_gssd_port(host_priv_self(), &gssd_host_port);
+       if (kr != KERN_SUCCESS) {
+               printf("nfs_gss_get_upcall_port: can't get gssd port, status %x (%d)\n", kr, kr);
+               return (IPC_PORT_NULL);
+       }
+       if (!IPC_PORT_VALID(gssd_host_port)) {
+               printf("nfs_gss_get_upcall_port: gssd port not valid\n");
+               return (IPC_PORT_NULL);
+       }
+
+       asid = kauth_cred_getasid(credp);
+       uid = kauth_cred_getauid(credp);
+       if (uid == AU_DEFAUDITID)
+               uid = kauth_cred_getuid(credp);
+       kr = mach_gss_lookup(gssd_host_port, uid, asid, &uc_port);
+       if (kr != KERN_SUCCESS)
+               printf("nfs_gss_clnt_get_upcall_port: mach_gssd_lookup failed: status %x (%d)\n", kr, kr);
+
+       return (uc_port);
+}
+
 /*
  * Make an upcall to the gssd using Mach RPC
- * The upcall is made using a task special port.
+ * The upcall is made using a host special port.
  * This allows launchd to fire up the gssd in the
  * user's session.  This is important, since gssd
  * must have access to the user's credential cache.
@@ -1351,16 +1389,9 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp)
         */
        uprinc[0] = '\0';
        if (!IPC_PORT_VALID(cp->gss_clnt_mport)) {
-               kr = task_get_gssd_port(get_threadtask(req->r_thread), &cp->gss_clnt_mport);
-               if (kr != KERN_SUCCESS) {
-                       printf("nfs_gss_clnt_gssd_upcall: can't get gssd port, status %x (%d)\n", kr, kr);
+               cp->gss_clnt_mport = nfs_gss_clnt_get_upcall_port(req->r_cred);
+               if (cp->gss_clnt_mport == IPC_PORT_NULL)
                        goto out;
-               }
-               if (!IPC_PORT_VALID(cp->gss_clnt_mport)) {
-                       printf("nfs_gss_clnt_gssd_upcall: gssd port not valid\n");
-                       cp->gss_clnt_mport = NULL;
-                       goto out;
-               }
        }
 
        if (cp->gss_clnt_tokenlen > 0)
@@ -1394,8 +1425,9 @@ retry:
                                nfs_gss_mach_alloc_buffer(cp->gss_clnt_token, cp->gss_clnt_tokenlen, &itoken);
                        goto retry;
                }
-               task_release_special_port(cp->gss_clnt_mport);
-               cp->gss_clnt_mport = NULL;
+
+               host_release_special_port(cp->gss_clnt_mport);
+               cp->gss_clnt_mport = IPC_PORT_NULL;
                goto out;
        }
 
@@ -1583,7 +1615,7 @@ nfs_gss_clnt_ctx_remove(struct nfsmount *nmp, struct nfs_gss_clnt_ctx *cp)
        if (nmp != NULL)
                TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
 
-       task_release_special_port(cp->gss_clnt_mport);
+       host_release_special_port(cp->gss_clnt_mport);
 
        if (cp->gss_clnt_mtx)
                lck_mtx_destroy(cp->gss_clnt_mtx, nfs_gss_clnt_grp);
@@ -1623,7 +1655,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req)
                return (0);     // already being renewed
        }
        saved_uid = cp->gss_clnt_uid;
-       saved_mport = task_copy_special_port(cp->gss_clnt_mport);
+       saved_mport = host_copy_special_port(cp->gss_clnt_mport);
 
        /* Remove the old context */
        cp->gss_clnt_flags |= GSS_CTX_INVAL;
@@ -1649,7 +1681,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req)
        }
 
        ncp->gss_clnt_uid = saved_uid;
-       ncp->gss_clnt_mport = task_copy_special_port(saved_mport); // re-use the gssd port
+       ncp->gss_clnt_mport = host_copy_special_port(saved_mport); // re-use the gssd port
        ncp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL);
        ncp->gss_clnt_thread = current_thread();
        lck_mtx_lock(&nmp->nm_lock);
@@ -1662,7 +1694,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req)
 
        error = nfs_gss_clnt_ctx_init_retry(req, ncp); // Initialize new context
 out:
-       task_release_special_port(saved_mport);
+       host_release_special_port(saved_mport);
        if (error)
                nfs_gss_clnt_ctx_unref(req);
 
@@ -2557,7 +2589,7 @@ nfs_gss_svc_gssd_upcall(struct nfs_gss_svc_ctx *cp)
        int error = 0;
        char svcname[] = "nfs";
 
-       kr = task_get_gssd_port(get_threadtask(current_thread()), &mp);
+       kr = host_get_gssd_port(host_priv_self(), &mp);
        if (kr != KERN_SUCCESS) {
                printf("nfs_gss_svc_gssd_upcall: can't get gssd port, status %x (%d)\n", kr, kr);
                goto out;
@@ -2595,11 +2627,11 @@ retry:
                                nfs_gss_mach_alloc_buffer(cp->gss_svc_token, cp->gss_svc_tokenlen, &itoken);
                        goto retry;
                }
-               task_release_special_port(mp);
+               host_release_special_port(mp);
                goto out;
        }
 
-       task_release_special_port(mp);
+       host_release_special_port(mp);
 
        if (skeylen > 0) {
                if (skeylen != SKEYLEN && skeylen != SKEYLEN3) {
@@ -2765,8 +2797,8 @@ nfs_gss_svc_cleanup(void)
  */
 
 /*
- * Release a task special port that was obtained by task_get_special_port
- * or one of its macros (task_get_gssd_port in this case).
+ * Release a host special port that was obtained by host_get_special_port
+ * or one of its macros (host_get_gssd_port in this case).
  * This really should be in a public kpi. 
  */
 
@@ -2775,16 +2807,16 @@ extern void ipc_port_release_send(ipc_port_t);
 extern ipc_port_t ipc_port_copy_send(ipc_port_t);
 
 static void
-task_release_special_port(mach_port_t mp)
+host_release_special_port(mach_port_t mp)
 {
        if (IPC_PORT_VALID(mp))
                ipc_port_release_send(mp);
 }
 
 static mach_port_t
-task_copy_special_port(mach_port_t mp)
+host_copy_special_port(mach_port_t mp)
 {
-       return ipc_port_copy_send(mp);
+       return (ipc_port_copy_send(mp));
 }
 
 /*
@@ -3393,15 +3425,15 @@ gss_des_crypt(gss_key_info *ki, des_cblock *in, des_cblock *out,
        switch (ki->type) {
        case NFS_GSS_1DES:
                        {
-                               des_key_schedule *sched = ((usage == KG_USAGE_SEAL) ?
+                               des_cbc_key_schedule *sched = ((usage == KG_USAGE_SEAL) ?
                                                        &ki->ks_u.des.gss_sched_Ke :
                                                        &ki->ks_u.des.gss_sched);
-                               des_cbc_encrypt(in, out, len, *sched, iv, retiv, encrypt); 
+                               des_cbc_encrypt(in, out, len, sched, iv, retiv, encrypt);
                        }
                        break;
        case NFS_GSS_3DES:
 
-                       des3_cbc_encrypt(in, out, len, ki->ks_u.des3.gss_sched, iv, retiv, encrypt);
+                       des3_cbc_encrypt(in, out, len, &ki->ks_u.des3.gss_sched, iv, retiv, encrypt);
                        break;
        }
 }
@@ -3419,12 +3451,12 @@ gss_key_init(gss_key_info *ki, uint32_t skeylen)
                                ki->type = NFS_GSS_1DES;
                                ki->hash_len = MD5_DESCBC_DIGEST_LENGTH;
                                ki->ks_u.des.key = (des_cblock *)ki->skey;
-                               rc = des_key_sched(ki->ks_u.des.key, ki->ks_u.des.gss_sched);
+                               rc = des_cbc_key_sched(ki->ks_u.des.key, &ki->ks_u.des.gss_sched);
                                if (rc)
                                        return (rc);
                                for (i = 0; i < ki->keybytes; i++)
                                        k[0][i] = 0xf0 ^ (*ki->ks_u.des.key)[i];
-                               rc = des_key_sched(&k[0], ki->ks_u.des.gss_sched_Ke);
+                               rc = des_cbc_key_sched(&k[0], &ki->ks_u.des.gss_sched_Ke);
                                break;
        case 3*sizeof(des_cblock):      
                                ki->type = NFS_GSS_3DES;
@@ -3432,7 +3464,7 @@ gss_key_init(gss_key_info *ki, uint32_t skeylen)
                                ki->ks_u.des3.key = (des_cblock (*)[3])ki->skey;
                                des3_derive_key(*ki->ks_u.des3.key, ki->ks_u.des3.ckey,
                                                KEY_USAGE_DES3_SIGN, KEY_USAGE_LEN);
-                               rc = des3_key_sched(*ki->ks_u.des3.key, ki->ks_u.des3.gss_sched);
+                               rc = des3_cbc_key_sched(*ki->ks_u.des3.key, &ki->ks_u.des3.gss_sched);
                                if (rc)
                                        return (rc);
                                break;
index ad056e7f277f8a9c9a0486a920622749eddbed2d..e8cdb5a6a89480880e8c4230e1455301e479e142 100644 (file)
@@ -31,7 +31,7 @@
 
 #include <gssd/gssd_mach.h>
 #include <sys/param.h>
-#include <crypto/des/des.h>
+#include <libkern/crypto/des.h>
 
 #define RPCSEC_GSS                     6
 #define        RPCSEC_GSS_VERS_1               1
@@ -78,13 +78,13 @@ typedef struct {
        union {
                struct {
                        des_cblock  *key;
-                       des_key_schedule gss_sched;
-                       des_key_schedule gss_sched_Ke;
+                       des_cbc_key_schedule gss_sched;
+                       des_cbc_key_schedule gss_sched_Ke;
                } des;
                struct {
                        des_cblock              (*key)[3];
                        des_cblock              ckey[3];
-                       des_key_schedule        gss_sched[3];
+                       des3_cbc_key_schedule   gss_sched;
                } des3;
        } ks_u;
 } gss_key_info;
index 1d275ba8f878b63b6899123ef76ec4190c150471..370560b6763604922aff918bd657b01789469a18 100644 (file)
@@ -177,100 +177,6 @@ des3_make_key(const unsigned char randombits[21], des_cblock key[3])
        }
 }
 
-/*
- * Make a triple des key schedule, from a triple des key.
- */
-int
-des3_key_sched(des_cblock key[3], des_key_schedule sched[3])
-{
-       int i;
-       int rc = 0;
-       
-       for (i = 0; i < 3; i++)
-               rc |= des_key_sched(&key[i], sched[i]);
-
-       return (rc);
-}
-
-/*
- * Triple DES cipher block chaining mode encryption.
- */
-void
-des3_cbc_encrypt(des_cblock *input, des_cblock *output, int32_t length, 
-                des_key_schedule schedule[3], des_cblock *ivec, des_cblock *retvec, int encrypt)
-{
-       register DES_LONG tin0,tin1;
-       register DES_LONG tout0,tout1,xor0,xor1;
-       register unsigned char *in,*out,*retval;
-       register int32_t l=length;
-       DES_LONG tin[2];
-       unsigned char *iv;
-       tin0 = tin1 = 0;
-
-       in=(unsigned char *)input;
-       out=(unsigned char *)output;
-       retval=(unsigned char *)retvec;
-       iv=(unsigned char *)ivec;
-
-       if (encrypt) {
-               c2l(iv,tout0);
-               c2l(iv,tout1);
-               for (l-=8; l>=0; l-=8) {
-                       c2l(in,tin0);
-                       c2l(in,tin1);
-                       tin0^=tout0; tin[0]=tin0;
-                       tin1^=tout1; tin[1]=tin1;
-                       des_encrypt3((DES_LONG *)tin,schedule[0], schedule[1], schedule[2]);
-                       tout0=tin[0]; l2c(tout0,out);
-                       tout1=tin[1]; l2c(tout1,out);
-               }
-               if (l != -8) {
-                       c2ln(in,tin0,tin1,l+8);
-                       tin0^=tout0; tin[0]=tin0;
-                       tin1^=tout1; tin[1]=tin1;
-                       des_encrypt3((DES_LONG *)tin,schedule[0], schedule[1], schedule[2]);
-                       tout0=tin[0]; l2c(tout0,out);
-                       tout1=tin[1]; l2c(tout1,out);
-               }
-               if (retval) {
-                       l2c(tout0,retval);
-                       l2c(tout1,retval);
-               }
-       } else {
-               c2l(iv,xor0);
-               c2l(iv,xor1);
-               for (l-=8; l>=0; l-=8) {
-                       c2l(in,tin0); tin[0]=tin0;
-                       c2l(in,tin1); tin[1]=tin1;
-                       des_decrypt3((DES_LONG *)tin,schedule[0],schedule[1],schedule[2]);
-                       tout0=tin[0]^xor0;
-                       tout1=tin[1]^xor1;
-                       l2c(tout0,out);
-                       l2c(tout1,out);
-                       xor0=tin0;
-                       xor1=tin1;
-               }
-               if (l != -8) {
-                       c2l(in,tin0); tin[0]=tin0;
-                       c2l(in,tin1); tin[1]=tin1;
-                       des_decrypt3((DES_LONG *)tin,schedule[0],schedule[1],schedule[2]);
-                       tout0=tin[0]^xor0;
-                       tout1=tin[1]^xor1;
-                       l2cn(tout0,tout1,out,l+8);
-               /*      xor0=tin0;
-                       xor1=tin1; */
-               }
-               if (retval) {
-                       l2c(tin0,retval);
-                       l2c(tin1,retval);
-               }
-       }
-       tin0=tin1=tout0=tout1=xor0=xor1=0;
-       tin[0]=tin[1]=0;
-}
-
 /*
  * Key derivation for triple DES.
  * Given the session key in in key, produce a new key in out key using
@@ -282,7 +188,7 @@ des3_derive_key(des_cblock inkey[3], des_cblock outkey[3],
                const unsigned char *constant, int clen)
 {
        des_cblock inblock, outblock, ivec;
-       des_key_schedule sched[3];
+       des3_cbc_key_schedule sched;
        unsigned char rawkey[21];
        size_t n, keybytes = sizeof(rawkey);
 
@@ -297,9 +203,9 @@ des3_derive_key(des_cblock inkey[3], des_cblock outkey[3],
        /* loop encrypting the blocks until enough key bytes are generated */
 
        bzero(ivec, sizeof(ivec));
-       des3_key_sched(inkey, sched);
+       des3_cbc_key_sched(inkey, &sched);
        for (n = 0; n < sizeof(rawkey); n += sizeof(des_cblock)) {
-               des3_cbc_encrypt(&inblock, &outblock, sizeof(outblock), sched, &ivec, NULL, 1);
+               des3_cbc_encrypt(&inblock, &outblock, sizeof(outblock), &sched, &ivec, NULL, 1);
                if ((keybytes - n) <= sizeof (des_cblock)) {
                        memcpy(rawkey+n, outblock, (keybytes - n));
                        break;
@@ -316,7 +222,7 @@ des3_derive_key(des_cblock inkey[3], des_cblock outkey[3],
        bzero(inblock, sizeof (des_cblock));
        bzero(outblock, sizeof (des_cblock));
        bzero(rawkey, keybytes);
-       bzero(sched, sizeof (sched));
+       bzero(&sched, sizeof (sched));
 
        return(0);
 }
@@ -375,130 +281,11 @@ HMAC_SHA1_DES3KD_Final(void *digest, HMAC_SHA1_DES3KD_CTX *ctx)
        SHA1Final(digest, &ctx->sha1_ctx);
 }
 
-/*
- * XXX This function borrowed from OpenBSD.
- * It will likely be moved into kernel crypto.
- */
-DES_LONG
-des_cbc_cksum(des_cblock *input, des_cblock *output,
-               int32_t length, des_key_schedule schedule, des_cblock *ivec)
-{
-       register DES_LONG tout0,tout1,tin0,tin1;
-       register int32_t l=length;
-       DES_LONG tin[2];
-       unsigned char *in,*out,*iv;
-
-       in=(unsigned char *)input;
-       out=(unsigned char *)output;
-       iv=(unsigned char *)ivec;
-
-       c2l(iv,tout0);
-       c2l(iv,tout1);
-       for (; l>0; l-=8) {
-               if (l >= 8) {
-                       c2l(in,tin0);
-                       c2l(in,tin1);
-               } else
-                       c2ln(in,tin0,tin1,l);
-                       
-               tin0^=tout0; tin[0]=tin0;
-               tin1^=tout1; tin[1]=tin1;
-               des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT);
-               /* fix 15/10/91 eay - thanks to keithr@sco.COM */
-               tout0=tin[0];
-               tout1=tin[1];
-       }
-       if (out != NULL) {
-               l2c(tout0,out);
-               l2c(tout1,out);
-       }
-       tout0=tin0=tin1=tin[0]=tin[1]=0;
-       return(tout1);
-}
-
-/*
- * XXX This function borrowed from OpenBSD.
- * It will likely be moved into kernel crypto.
- */
-void
-des_cbc_encrypt(des_cblock *input, des_cblock *output, int32_t length,
-               des_key_schedule schedule, des_cblock *ivec, des_cblock *retvec, int encrypt)
-{
-       register DES_LONG tin0,tin1;
-       register DES_LONG tout0,tout1,xor0,xor1;
-       register unsigned char *in,*out,*retval;
-       register int32_t l=length;
-       DES_LONG tin[2];
-       unsigned char *iv;
-       tin0 = tin1 = 0;
-
-       in=(unsigned char *)input;
-       out=(unsigned char *)output;
-       retval=(unsigned char *)retvec;
-       iv=(unsigned char *)ivec;
-
-       if (encrypt) {
-               c2l(iv,tout0);
-               c2l(iv,tout1);
-               for (l-=8; l>=0; l-=8) {
-                       c2l(in,tin0);
-                       c2l(in,tin1);
-                       tin0^=tout0; tin[0]=tin0;
-                       tin1^=tout1; tin[1]=tin1;
-                       des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT);
-                       tout0=tin[0]; l2c(tout0,out);
-                       tout1=tin[1]; l2c(tout1,out);
-               }
-               if (l != -8) {
-                       c2ln(in,tin0,tin1,l+8);
-                       tin0^=tout0; tin[0]=tin0;
-                       tin1^=tout1; tin[1]=tin1;
-                       des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT);
-                       tout0=tin[0]; l2c(tout0,out);
-                       tout1=tin[1]; l2c(tout1,out);
-               }
-               if (retval) {
-                       l2c(tout0,retval);
-                       l2c(tout1,retval);
-               }
-       } else {
-               c2l(iv,xor0);
-               c2l(iv,xor1);
-               for (l-=8; l>=0; l-=8) {
-                       c2l(in,tin0); tin[0]=tin0;
-                       c2l(in,tin1); tin[1]=tin1;
-                       des_encrypt1((DES_LONG *)tin,schedule,DES_DECRYPT);
-                       tout0=tin[0]^xor0;
-                       tout1=tin[1]^xor1;
-                       l2c(tout0,out);
-                       l2c(tout1,out);
-                       xor0=tin0;
-                       xor1=tin1;
-               }
-               if (l != -8) {
-                       c2l(in,tin0); tin[0]=tin0;
-                       c2l(in,tin1); tin[1]=tin1;
-                       des_encrypt1((DES_LONG *)tin,schedule,DES_DECRYPT);
-                       tout0=tin[0]^xor0;
-                       tout1=tin[1]^xor1;
-                       l2cn(tout0,tout1,out,l+8);
-               /*      xor0=tin0;
-                       xor1=tin1; */
-               }
-               if (retval) {
-                       l2c(tin0,retval);
-                       l2c(tin1,retval);
-               }
-       }
-       tin0=tin1=tout0=tout1=xor0=xor1=0;
-       tin[0]=tin[1]=0;
-}
-
 /*
  * Initialize an MD5 DES CBC context with a schedule.
  */
  
-void MD5_DESCBC_Init(MD5_DESCBC_CTX *ctx, des_key_schedule *sched)
+void MD5_DESCBC_Init(MD5_DESCBC_CTX *ctx, des_cbc_key_schedule *sched)
 {
        MD5Init(&ctx->md5_ctx);
        ctx->sched = sched;
@@ -519,7 +306,6 @@ void MD5_DESCBC_Update(MD5_DESCBC_CTX *ctx, void *data, size_t len)
  
 void MD5_DESCBC_Final(void *digest, MD5_DESCBC_CTX *ctx)
 {
-       des_cblock iv0;
        unsigned char md5_digest[MD5_DIGEST_LENGTH];
        
        MD5Final(md5_digest, &ctx->md5_ctx);
@@ -527,8 +313,7 @@ void MD5_DESCBC_Final(void *digest, MD5_DESCBC_CTX *ctx)
        /*
         * Now get the DES CBC checksum for the digest.
         */
-       bzero(iv0, sizeof (iv0));
-       (void) des_cbc_cksum((des_cblock *) md5_digest, (des_cblock *)digest,
-                               sizeof (md5_digest), *ctx->sched, &iv0);
+       des_cbc_cksum((des_cblock *) md5_digest, (des_cblock *)digest,
+                               sizeof (md5_digest), ctx->sched);
 }      
 
index 677647f16fd4c6da1c6f5e9abdce3e180bf09f3b..4819dcd9d4a97805fd37850543155532c6fb5868 100644 (file)
@@ -32,7 +32,7 @@
 #include <libkern/libkern.h>
 #include <libkern/crypto/sha1.h>
 #include <libkern/crypto/md5.h>
-#include <crypto/des/des_locl.h>
+#include <libkern/crypto/des.h>
 
 #define KG_USAGE_SEAL 22
 #define KG_USAGE_SIGN 23
@@ -50,7 +50,7 @@ typedef struct {
 
 typedef struct {
        MD5_CTX md5_ctx;
-       des_key_schedule *sched;
+       des_cbc_key_schedule *sched;
 } MD5_DESCBC_CTX;
 
 #define MD5_DESCBC_DIGEST_LENGTH 8
@@ -59,18 +59,13 @@ __BEGIN_DECLS
 
 void krb5_nfold(unsigned int, const unsigned char *, unsigned int, unsigned char *);
 void des3_make_key(const unsigned char[21], des_cblock[3]);
-int des3_key_sched(des_cblock[3], des_key_schedule[3]);
-void des3_cbc_encrypt(des_cblock *, des_cblock *, int32_t,
-                       des_key_schedule[3], des_cblock *, des_cblock *, int);
 int des3_derive_key(des_cblock[3], des_cblock[3], const unsigned char *, int);
+
 void HMAC_SHA1_DES3KD_Init(HMAC_SHA1_DES3KD_CTX *, des_cblock[3], int);
 void HMAC_SHA1_DES3KD_Update(HMAC_SHA1_DES3KD_CTX *, void *, size_t);
 void HMAC_SHA1_DES3KD_Final(void *, HMAC_SHA1_DES3KD_CTX *);
-DES_LONG des_cbc_cksum(des_cblock *, des_cblock *, int32_t, des_key_schedule, des_cblock *);
-void   des_cbc_encrypt(des_cblock *, des_cblock *, int32_t, des_key_schedule,
-                       des_cblock *, des_cblock *, int);
 
-void MD5_DESCBC_Init(MD5_DESCBC_CTX *, des_key_schedule *);
+void MD5_DESCBC_Init(MD5_DESCBC_CTX *, des_cbc_key_schedule *);
 void MD5_DESCBC_Update(MD5_DESCBC_CTX *, void *, size_t);
 void MD5_DESCBC_Final(void *, MD5_DESCBC_CTX *);
 
index f76a9b6d0b9b863703405fd47a5357ef21466d4c..ad6a0cb53c3e0d9be0198a53d60aa707997b96e4 100644 (file)
@@ -425,10 +425,9 @@ nfs3_lockd_request(
        bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
        if (nmp->nm_vers == NFS_VER3)
                msg->lm_flags |= LOCKD_MSG_NFSV3;
-#if 0 /* not yet */
+
        if (nmp->nm_sotype != SOCK_DGRAM)
                msg->lm_flags |= LOCKD_MSG_TCP;
-#endif
 
        microuptime(&now);
        starttime = now.tv_sec;
index b3f2a47b9e669d8397bad62a94b526e67e4a8e4f..7fc9ddaef8c8eed4132aa73247b0fae0666a2170 100644 (file)
@@ -76,6 +76,7 @@
 #include <sys/ubc.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
+#include <sys/time.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
@@ -1177,3 +1178,36 @@ nfs_data_update_size(nfsnode_t np, int datalocked)
        FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize);
 }
 
+#define DODEBUG 1
+int
+nfs_mount_is_dirty(mount_t mp)
+{
+       u_long i;
+       nfsnode_t np;
+#ifdef DODEBUG 
+       struct timeval now, then, diff;
+       u_long ncnt = 0;
+       microuptime(&now);
+#endif
+       lck_mtx_lock(nfs_node_hash_mutex);
+       for (i = 0; i <= nfsnodehash; i++) {
+               LIST_FOREACH(np, &nfsnodehashtbl[i], n_hash) {
+#ifdef DODEBUG
+                       ncnt++;
+#endif                 
+                       if (np->n_mount == mp && !LIST_EMPTY(&np->n_dirtyblkhd))
+                               goto out;
+               }
+       }
+out:
+       lck_mtx_unlock(nfs_node_hash_mutex);
+#ifdef DODEBUG
+       microuptime(&then);
+       timersub(&then, &now, &diff);
+       
+       printf("nfs_mount_is_dirty took %lld mics for %ld slots and %ld nodes return %d\n",
+              (uint64_t)diff.tv_sec * 1000000LL + diff.tv_usec, i, ncnt, (i <= nfsnodehash));
+#endif
+
+       return (i <=  nfsnodehash);
+}
index 956cc9285961a1eb43f18d2f172a96b7feb10b89..8cc717b8eb779834c179490a43921d957447c5f6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -237,6 +237,9 @@ nfsrv_init(void)
        nfsrv_udpsock = NULL;
        nfsrv_udp6sock = NULL;
 
+       /* Setup the up-call handling */
+       nfsrv_uc_init();
+       
        /* initialization complete */
        nfsrv_initted = NFSRV_INITIALIZED;
 }
@@ -1280,7 +1283,7 @@ nfsrv_write(
                        ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 
                error = VNOP_WRITE(vp, auio, ioflags, ctx);
-               OSAddAtomic(1, &nfsstats.srvvop_writes);
+               OSAddAtomic64(1, &nfsstats.srvvop_writes);
 
                /* update export stats */
                NFSStatAdd64(&nx->nx_stats.bytes_written, len);
@@ -1559,7 +1562,7 @@ loop1:
                            if ((tlen = mbuf_len(m)) > 0)
                                uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), tlen);
                        error = VNOP_WRITE(vp, auio, ioflags, ctx);
-                       OSAddAtomic(1, &nfsstats.srvvop_writes);
+                       OSAddAtomic64(1, &nfsstats.srvvop_writes);
 
                        /* update export stats */
                        NFSStatAdd64(&nx->nx_stats.bytes_written, nd->nd_len);
@@ -2142,6 +2145,7 @@ nfsrv_mknod(
        uint32_t len = 0, cnflags;
        u_int32_t major = 0, minor = 0;
        enum vtype vtyp;
+       nfstype nvtype;
        vnode_t vp, dvp, dirp;
        struct nfs_filehandle nfh;
        struct nfs_export *nx = NULL;
@@ -2192,9 +2196,9 @@ nfsrv_mknod(
        dvp = ni.ni_dvp;
        vp = ni.ni_vp;
 
-       nfsm_chain_get_32(error, nmreq, vtyp);
+       nfsm_chain_get_32(error, nmreq, nvtype);
        nfsmerr_if(error);
-       vtyp = nfstov_type(vtyp, NFS_VER3);
+       vtyp = nfstov_type(nvtype, NFS_VER3);
        if (!error && (vtyp != VCHR) && (vtyp != VBLK) && (vtyp != VSOCK) && (vtyp != VFIFO)) {
                error = NFSERR_BADTYPE;
                goto out;
@@ -3938,8 +3942,12 @@ nfsrv_readdir(
        error = nfsrv_credcheck(nd, ctx, nx, nxo);
        nfsmerr_if(error);
 
+       if (nxo->nxo_flags & NX_MANGLEDNAMES || nd->nd_vers == NFS_VER2)
+               vnopflag |= VNODE_READDIR_NAMEMAX;
+
        if ((nd->nd_vers == NFS_VER2) || (nxo->nxo_flags & NX_32BITCLIENTS))
                vnopflag |= VNODE_READDIR_SEEKOFF32;
+
        if (nd->nd_vers == NFS_VER3) {
                nfsm_srv_vattr_init(&attr, NFS_VER3);
                error = attrerr = vnode_getattr(vp, &attr, ctx);
@@ -4160,6 +4168,9 @@ nfsrv_readdirplus(
        if (nxo->nxo_flags & NX_32BITCLIENTS)
                vnopflag |= VNODE_READDIR_SEEKOFF32;
 
+       if (nxo->nxo_flags & NX_MANGLEDNAMES)
+               vnopflag |= VNODE_READDIR_NAMEMAX;
+
        nfsm_srv_vattr_init(&attr, NFS_VER3);
        error = attrerr = vnode_getattr(vp, &attr, ctx);
        if (!error && toff && verf && (verf != attr.va_filerev))
index 71b6e5c447a228f104cfaab97cf3950e7cf6a5ce..27126d218c8d74cc25fb73314724d8d2bebec16b 100644 (file)
@@ -157,6 +157,9 @@ void        nfs_reqbusy(struct nfsreq *);
 struct nfsreq *nfs_reqnext(struct nfsreq *);
 int    nfs_wait_reply(struct nfsreq *);
 void   nfs_softterm(struct nfsreq *);
+int    nfs_can_squish(struct nfsmount *);
+int    nfs_is_squishy(struct nfsmount *);
+int    nfs_is_dead(int, struct nfsmount *);
 
 #ifdef NFS_SOCKET_DEBUGGING
 #define NFS_SOCK_DBG(X)        printf X
@@ -584,7 +587,7 @@ nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso)
        int on = 1, proto;
 
        timeo.tv_usec = 0;
-       timeo.tv_sec = NMFLAG(nmp, SOFT) ? 5 : 60;
+       timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60;
        sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
        sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
        if (nso->nso_sotype == SOCK_STREAM) {
@@ -1115,7 +1118,7 @@ keepsearching:
                        else if (ss.ss_family == AF_INET6)
                                ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
                        error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
-                                       nso->nso_so, NFS_PROG, nfsvers, 
+                                       nso->nso_so, NFS_PROG, nfsvers,
                                        (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
                        if (!error) {
                                if (ss.ss_family == AF_INET)
@@ -1128,7 +1131,7 @@ keepsearching:
                        if (error && !nmp->nm_vers) {
                                nfsvers = NFS_VER2;
                                error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
-                                               nso->nso_so, NFS_PROG, nfsvers, 
+                                               nso->nso_so, NFS_PROG, nfsvers,
                                                (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
                                if (!error) {
                                        if (ss.ss_family == AF_INET)
@@ -1246,7 +1249,7 @@ keepsearching:
                if (saddr)
                        MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
                if (saddr && fh)
-                       MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 
+                       MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
                if (!saddr || !fh || !path) {
                        if (!error)
                                error = ENOMEM;
@@ -1498,13 +1501,19 @@ nfs_reconnect(struct nfsmount *nmp)
        thread_t thd = current_thread();
        int error, wentdown = 0, verbose = 1;
        time_t lastmsg;
+       int timeo;
 
        microuptime(&now);
        lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay);
 
        nfs_disconnect(nmp);
 
-       while ((error = nfs_connect(nmp, verbose, 30))) {
+
+       lck_mtx_lock(&nmp->nm_lock);
+       timeo = nfs_is_squishy(nmp) ? 8 : 30;
+       lck_mtx_unlock(&nmp->nm_lock);
+
+       while ((error = nfs_connect(nmp, verbose, timeo))) {
                verbose = 0;
                nfs_disconnect(nmp);
                if ((error == EINTR) || (error == ERESTART))
@@ -1849,16 +1858,18 @@ nfs_mount_check_dead_timeout(struct nfsmount *nmp)
 {
        struct timeval now;
 
-       if (nmp->nm_deadtimeout <= 0)
-               return;
        if (nmp->nm_deadto_start == 0)
                return;
        if (nmp->nm_state & NFSSTA_DEAD)
                return;
+       nfs_is_squishy(nmp);
+       if (nmp->nm_curdeadtimeout <= 0)
+               return;
        microuptime(&now);
-       if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_deadtimeout)
+       if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout)
                return;
-       printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+       printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
+              (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
        nmp->nm_state |= NFSSTA_DEAD;
        vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0);
 }
@@ -2360,7 +2371,7 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
                                status = error;
                        else if ((error == ENOBUFS) || (error == ENOMEM))
                                status = NFSERR_RESOURCE;
-                       else 
+                       else
                                status = NFSERR_SERVERFAULT;
                        error = 0;
                        nfsm_chain_null(&nmrep);
@@ -2508,7 +2519,7 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
                                status = error;
                        else if ((error == ENOBUFS) || (error == ENOMEM))
                                status = NFSERR_RESOURCE;
-                       else 
+                       else
                                status = NFSERR_SERVERFAULT;
                        error = 0;
                }
@@ -2529,7 +2540,7 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
 
 nfsmout:
        if (status == EBADRPC)
-               OSAddAtomic(1, &nfsstats.rpcinvalid);
+               OSAddAtomic64(1, &nfsstats.rpcinvalid);
 
        /* build reply header */
        error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead);
@@ -2838,7 +2849,7 @@ again:
                                microuptime(&now);
                                if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) {
                                        /* soft mount in reconnect for a while... terminate ASAP */
-                                       OSAddAtomic(1, &nfsstats.rpctimeouts);
+                                       OSAddAtomic64(1, &nfsstats.rpctimeouts);
                                        req->r_flags |= R_SOFTTERM;
                                        req->r_error = error = ETIMEDOUT;
                                        break;
@@ -2918,7 +2929,7 @@ again:
                } else {
                        /*
                         * When retransmitting, turn timing off
-                        * and divide congestion window by 2. 
+                        * and divide congestion window by 2.
                         */
                        req->r_flags &= ~R_TIMING;
                        nmp->nm_cwnd >>= 1;
@@ -2970,7 +2981,7 @@ again:
                /* SUCCESS */
                req->r_flags &= ~R_RESENDERR;
                if (rexmit)
-                       OSAddAtomic(1, &nfsstats.rpcretries);
+                       OSAddAtomic64(1, &nfsstats.rpcretries);
                req->r_flags |= R_SENT;
                if (req->r_flags & R_WAITSENT) {
                        req->r_flags &= ~R_WAITSENT;
@@ -3052,6 +3063,9 @@ again:
                        !req->r_nmp ? "<unmounted>" :
                        vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
 
+       if (nfs_is_dead(error, nmp))
+               error = EIO;
+
        /* prefer request termination error over other errors */
        error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
        if (error2)
@@ -3201,6 +3215,7 @@ nfs_sock_poke(struct nfsmount *nmp)
        msg.msg_iovlen = 1;
        error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len);
        NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
+       nfs_is_dead(error, nmp);
 }
 
 /*
@@ -3219,7 +3234,7 @@ nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
        nfsm_chain_get_32(error, &nmrep, rxid);
        nfsm_chain_get_32(error, &nmrep, reply);
        if (error || (reply != RPC_REPLY)) {
-               OSAddAtomic(1, &nfsstats.rpcinvalid);
+               OSAddAtomic64(1, &nfsstats.rpcinvalid);
                mbuf_freem(mrep);
                return;
        }
@@ -3307,7 +3322,7 @@ nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
        if (!req) {
                /* not matched to a request, so drop it. */
                lck_mtx_unlock(nfs_request_mutex);
-               OSAddAtomic(1, &nfsstats.rpcunexpected);
+               OSAddAtomic64(1, &nfsstats.rpcunexpected);
                mbuf_freem(mrep);
        }
 }
@@ -3443,7 +3458,7 @@ nfs_request_create(
        }
 
        if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS))
-               OSAddAtomic(1, &nfsstats.rpccnt[procnum]);
+               OSAddAtomic64(1, &nfsstats.rpccnt[procnum]);
        if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL))
                panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
 
@@ -3667,7 +3682,7 @@ nfs_request_send(struct nfsreq *req, int wait)
                    ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
        }
 
-       OSAddAtomic(1, &nfsstats.rpcrequests);
+       OSAddAtomic64(1, &nfsstats.rpcrequests);
 
        /*
         * Chain request into list of outstanding requests. Be sure
@@ -3884,7 +3899,7 @@ nfs_request_finish(
                        if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) {
                                /* we're not yet completely mounted and */
                                /* we can't complete an RPC, so we fail */
-                               OSAddAtomic(1, &nfsstats.rpctimeouts);
+                               OSAddAtomic64(1, &nfsstats.rpctimeouts);
                                nfs_softterm(req);
                                error = req->r_error;
                                goto nfsmout;
@@ -3904,7 +3919,7 @@ nfs_request_finish(
                        }
                        if (NMFLAG(nmp, SOFT) && (req->r_delay == 30) && !(req->r_flags & R_NOINTR)) {
                                /* for soft mounts, just give up after a short while */
-                               OSAddAtomic(1, &nfsstats.rpctimeouts);
+                               OSAddAtomic64(1, &nfsstats.rpctimeouts);
                                nfs_softterm(req);
                                error = req->r_error;
                                goto nfsmout;
@@ -4174,7 +4189,7 @@ nfs_request2(
  * server. Associate the context that we are setting up with the request that we
  * are sending.
  */
+
 int
 nfs_request_gss(
                mount_t mp,
@@ -4192,7 +4207,7 @@ nfs_request_gss(
        if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req)))
                return (error);
        req->r_flags |= (flags & R_OPTMASK);
-       
+
        if (cp == NULL) {
                printf("nfs_request_gss request has no context\n");
                nfs_request_rele(req);
@@ -4218,7 +4233,7 @@ nfs_request_gss(
        nfs_request_rele(req);
        return (error);
 }
-       
+
 /*
  * Create and start an asynchronous NFS request.
  */
@@ -4533,7 +4548,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
                                lck_mtx_unlock(&nmp->nm_lock);
                                /* we're not yet completely mounted and */
                                /* we can't complete an RPC, so we fail */
-                               OSAddAtomic(1, &nfsstats.rpctimeouts);
+                               OSAddAtomic64(1, &nfsstats.rpctimeouts);
                                nfs_softterm(req);
                                finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT));
                                wakeup(req);
@@ -4549,10 +4564,10 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
                 * Put a reasonable limit on the maximum timeout,
                 * and reduce that limit when soft mounts get timeouts or are in reconnect.
                 */
-               if (!NMFLAG(nmp, SOFT))
+               if (!NMFLAG(nmp, SOFT) && !nfs_can_squish(nmp))
                        maxtime = NFS_MAXTIMEO;
                else if ((req->r_flags & (R_SETUP|R_RECOVER)) ||
-                        ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
+                        ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
                        maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2;
                else
                        maxtime = NFS_MINTIMEO/4;
@@ -4608,10 +4623,10 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
                }
 
                /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
-               if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) &&
+               if ((NMFLAG(nmp, SOFT) ||  (req->r_flags & (R_SETUP|R_RECOVER))) &&
                    ((req->r_rexmit >= req->r_retry) || /* too many */
                     ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
-                       OSAddAtomic(1, &nfsstats.rpctimeouts);
+                       OSAddAtomic64(1, &nfsstats.rpctimeouts);
                        lck_mtx_lock(&nmp->nm_lock);
                        if (!(nmp->nm_state & NFSSTA_TIMEO)) {
                                lck_mtx_unlock(&nmp->nm_lock);
@@ -5037,7 +5052,7 @@ nfs_portmap_lookup(
                pmvers = RPCBVERS4;
                pmproc = RPCBPROC_GETVERSADDR;
        } else {
-               return (EINVAL);
+               return (EINVAL);
        }
        nfsm_chain_null(&nmreq);
        nfsm_chain_null(&nmrep);
@@ -5140,6 +5155,144 @@ nfs_msg(thread_t thd,
        return (0);
 }
 
+#define        NFS_SQUISH_MOBILE_ONLY          0x0001          /* Squish mounts only on mobile machines */
+#define NFS_SQUISH_AUTOMOUNTED_ONLY    0x0002          /* Squish mounts only if the are automounted */
+#define NFS_SQUISH_SOFT                        0x0004          /* Treat all soft mounts as though they were on a mobile machine */
+#define NFS_SQUISH_QUICK               0x0008          /* Try to squish mounts more quickly. */
+#define NFS_SQUISH_SHUTDOWN            0x1000          /* Squish all mounts on shutdown. Currently not implemented */
+
+uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK;
+int32_t nfs_is_mobile;
+
+#define        NFS_SQUISHY_DEADTIMEOUT         8       /* Dead time out for squishy mounts */
+#define NFS_SQUISHY_QUICKTIMEOUT       4       /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/
+
+/*
+ * Could this mount be squished?
+ */
+int
+nfs_can_squish(struct nfsmount *nmp)
+{
+       uint64_t flags = vfs_flags(nmp->nm_mountp);
+       int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT));
+
+       if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0)
+               return (0);
+
+       if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0)
+               return (0);
+
+       return (1);
+}
+
+/*
+ * NFS mounts default to "rw,hard" - but frequently on mobile clients
+ * the mount may become "not responding".  It's desirable to be able
+ * to unmount these dead mounts, but only if there is no risk of
+ * losing data or crashing applications.  A "squishy" NFS mount is one
+ * that can be force unmounted with little risk of harm.
+ *
+ * nfs_is_squishy checks if a mount is in a squishy state.  A mount is
+ * in a squishy state iff it is allowed to be squishy and there are no
+ * dirty pages and there are no mmapped files and there are no files
+ * open for write. Mounts are allowed to be squishy is controlled by
+ * the settings of the nfs_squishy_flags and its mobility state. These
+ * flags can be set by sysctls.
+ *
+ * If nfs_is_squishy determines that we are in a squishy state we will
+ * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT
+ * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see
+ * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just
+ * update the current dead timeout with the mount's nm_deadtimeout
+ * value set at mount time.
+ *
+ * Assumes that nm_lock is held.
+ *
+ * Note this routine is racey, but its effects on setting the
+ * dead timeout only have effects when we're in trouble and are likely
+ * to stay that way. Since by default its only for automounted
+ * volumes on mobile machines; this is a reasonable trade off between
+ * data integrity and user experience. It can be disabled or set via
+ * nfs.conf file.
+ */
+
+int
+nfs_is_squishy(struct nfsmount *nmp)
+{
+       mount_t mp = nmp->nm_mountp;
+       int squishy = 0;
+       int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT;
+
+       NFS_SOCK_DBG(("nfs_is_squishy: %s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n",
+                     vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout,  nfs_is_mobile));
+
+       if (!nfs_can_squish(nmp))
+               goto out;
+
+       timeo =  (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo;
+       NFS_SOCK_DBG(("nfs_is_squishy:  nm_writers = %d  nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo));
+
+       if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) {
+               uint64_t flags = mp ? vfs_flags(mp) : 0;
+               squishy = 1;
+               
+               /* 
+                * Walk the nfs nodes and check for dirty buffers it we're not 
+                * RDONLY and we've not already been declared as squishy since
+                * this can be a bit expensive.
+                */
+               if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY)) 
+                       squishy = !nfs_mount_is_dirty(mp);
+       }
+
+out:
+       if (squishy)
+               nmp->nm_state |= NFSSTA_SQUISHY;
+       else
+               nmp->nm_state &= ~NFSSTA_SQUISHY;
+
+       nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout;
+                       
+       NFS_SOCK_DBG(("nfs_is_squishy: nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout));
+
+       return (squishy);
+}
+
+/*
+ * On a send operation, if we can't reach the server and we've got only one server to talk to
+ * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead
+ * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise.
+ */
+static int
+nfs_is_dead_lock(int error, struct nfsmount *nmp)
+{
+       if (nmp->nm_state & NFSSTA_DEAD)
+               return (1);
+
+       if ((error != ENETUNREACH && error != EHOSTUNREACH) ||
+           !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1))
+               return (0);
+       if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) {
+               printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+               nmp->nm_state |= NFSSTA_DEAD;
+               vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0);
+               return (1);
+       }
+       return (0);
+}
+
+int
+nfs_is_dead(int error, struct nfsmount *nmp)
+{
+       int is_dead;
+
+       lck_mtx_lock(&nmp->nm_lock);
+       is_dead = nfs_is_dead_lock(error, nmp);
+       lck_mtx_unlock(&nmp->nm_lock);
+
+       return (is_dead);
+}
+
 void
 nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg)
 {
@@ -5169,14 +5322,17 @@ nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *m
 
        unresponsive = (nmp->nm_state & timeoutmask);
 
-       if (unresponsive && (nmp->nm_deadtimeout > 0)) {
+       nfs_is_squishy(nmp);
+
+       if (unresponsive && (nmp->nm_curdeadtimeout > 0)) {
                microuptime(&now);
                if (!wasunresponsive) {
                        nmp->nm_deadto_start = now.tv_sec;
                        nfs_mount_sock_thread_wake(nmp);
-               } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_deadtimeout) {
+               } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout) {
                        if (!(nmp->nm_state & NFSSTA_DEAD))
-                               printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+                               printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname,
+                                      (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "");
                        nmp->nm_state |= NFSSTA_DEAD;
                }
        }
@@ -5225,8 +5381,9 @@ nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
 
        unresponsive = (nmp->nm_state & timeoutmask);
 
-       if (nmp->nm_deadto_start)
-               nmp->nm_deadto_start = 0;
+       nmp->nm_deadto_start = 0;
+       nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
+       nmp->nm_state &= ~NFSSTA_SQUISHY;
        lck_mtx_unlock(&nmp->nm_lock);
 
        if (softnobrowse)
@@ -5350,7 +5507,7 @@ done:
 
        *nmrepp = nmrep;
        if ((err != 0) && (err != NFSERR_RETVOID))
-               OSAddAtomic(1, &nfsstats.srvrpc_errs);
+               OSAddAtomic64(1, &nfsstats.srvrpc_errs);
        return (0);
 }
 
@@ -5487,11 +5644,11 @@ nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag)
                        ns_flag = SLP_NEEDQ;
                        goto dorecs;
                }
-               
+
                bzero(&msg, sizeof(msg));
                msg.msg_name = (caddr_t)&nam;
                msg.msg_namelen = sizeof(nam);
-               
+
                do {
                        bytes_read = 1000000000;
                        error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read);
@@ -5670,7 +5827,7 @@ nfsrv_getstream(struct nfsrv_sock *slp, int waitflag)
            if (slp->ns_frag == NULL) {
                slp->ns_frag = recm;
            } else {
-               m = slp->ns_frag;
+               m = slp->ns_frag;
                while ((m2 = mbuf_next(m)))
                    m = m2;
                if ((error = mbuf_setnext(m, recm)))
@@ -5918,4 +6075,3 @@ nfsrv_wakenfsd(struct nfsrv_sock *slp)
 }
 
 #endif /* NFSSERVER */
-
index 7fde3da6b04711bb9838a6f0712c89dec13ad60b..b0eb21d7347c56ba600c315f6cd06cb4eedf22d1 100644 (file)
@@ -253,10 +253,10 @@ loop:
                        if (rp->rc_state == RC_UNUSED)
                                panic("nfsrv cache");
                        if (rp->rc_state == RC_INPROG) {
-                               OSAddAtomic(1, &nfsstats.srvcache_inproghits);
+                               OSAddAtomic64(1, &nfsstats.srvcache_inproghits);
                                ret = RC_DROPIT;
                        } else if (rp->rc_flag & RC_REPSTATUS) {
-                               OSAddAtomic(1, &nfsstats.srvcache_nonidemdonehits);
+                               OSAddAtomic64(1, &nfsstats.srvcache_nonidemdonehits);
                                nd->nd_repstat = rp->rc_status;
                                error = nfsrv_rephead(nd, slp, &nmrep, 0);
                                if (error) {
@@ -268,7 +268,7 @@ loop:
                                        *mrepp = nmrep.nmc_mhead;
                                }
                        } else if (rp->rc_flag & RC_REPMBUF) {
-                               OSAddAtomic(1, &nfsstats.srvcache_nonidemdonehits);
+                               OSAddAtomic64(1, &nfsstats.srvcache_nonidemdonehits);
                                error = mbuf_copym(rp->rc_reply, 0, MBUF_COPYALL, MBUF_WAITOK, mrepp);
                                if (error) {
                                        printf("nfsrv cache: reply copym failed for nonidem request hit\n");
@@ -277,7 +277,7 @@ loop:
                                        ret = RC_REPLY;
                                }
                        } else {
-                               OSAddAtomic(1, &nfsstats.srvcache_idemdonehits);
+                               OSAddAtomic64(1, &nfsstats.srvcache_idemdonehits);
                                rp->rc_state = RC_INPROG;
                                ret = RC_DOIT;
                        }
@@ -290,7 +290,7 @@ loop:
                        return (ret);
                }
        }
-       OSAddAtomic(1, &nfsstats.srvcache_misses);
+       OSAddAtomic64(1, &nfsstats.srvcache_misses);
        if (nfsrv_reqcache_count < nfsrv_reqcache_size) {
                /* try to allocate a new entry */
                MALLOC(rp, struct nfsrvcache *, sizeof *rp, M_NFSD, M_WAITOK);
index dccead918b8f413ecaae330e58dc488e48b9f1ec..e0e9446bf6c0872586b4e1090b8af123e4e25b7b 100644 (file)
 /*
  * NFS globals
  */
-struct nfsstats        nfsstats;
+struct nfsstats        __attribute__((aligned(8))) nfsstats;
 size_t nfs_mbuf_mhlen = 0, nfs_mbuf_minclsize = 0;
 
 /*
@@ -1055,7 +1055,7 @@ nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, in
                        if (groupcount < 1)
                                return (EINVAL);
 
-                       auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ?
+                       auth_len = (((((uint32_t)groupcount - 1) > nmp->nm_numgrps) ?
                                nmp->nm_numgrps : (groupcount - 1)) << 2) +
                                5 * NFSX_UNSIGNED;
                        break;
@@ -1169,7 +1169,7 @@ add_cred:
                        error = 0;
                        req->r_auth = auth_type = RPCAUTH_SYS;
                        (void)kauth_cred_getgroups(cred, grouplist, &groupcount);
-                       auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ?
+                       auth_len = (((((uint32_t)groupcount - 1) > nmp->nm_numgrps) ?
                                nmp->nm_numgrps : (groupcount - 1)) << 2) +
                                5 * NFSX_UNSIGNED;
                        authsiz = nfsm_rndup(auth_len);
@@ -1216,6 +1216,7 @@ nfs_parsefattr(struct nfsm_chain *nmc, int nfsvers, struct nfs_vattr *nvap)
 {
        int error = 0;
        enum vtype vtype;
+       nfstype nvtype;
        u_short vmode;
        uint32_t val, val2;
        dev_t rdev;
@@ -1237,12 +1238,12 @@ nfs_parsefattr(struct nfsm_chain *nmc, int nfsvers, struct nfs_vattr *nvap)
        NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_MODIFY);
        NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_METADATA);
 
-       nfsm_chain_get_32(error, nmc, vtype);
+       nfsm_chain_get_32(error, nmc, nvtype);
        nfsm_chain_get_32(error, nmc, vmode);
        nfsmout_if(error);
 
        if (nfsvers == NFS_VER3) {
-               nvap->nva_type = nfstov_type(vtype, nfsvers);
+               nvap->nva_type = vtype = nfstov_type(nvtype, nfsvers);
        } else {
                /*
                 * The duplicate information returned in fa_type and fa_mode
@@ -1261,7 +1262,7 @@ nfs_parsefattr(struct nfsm_chain *nmc, int nfsvers, struct nfs_vattr *nvap)
                 * contain any type information (while also introducing
                 * sockets and FIFOs for fa_type).
                 */
-               vtype = nfstov_type(vtype, nfsvers);
+               vtype = nfstov_type(nvtype, nfsvers);
                if ((vtype == VNON) || ((vtype == VREG) && ((vmode & S_IFMT) != 0)))
                        vtype = IFTOVT(vmode);
                nvap->nva_type = vtype;
@@ -1635,7 +1636,7 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags)
        /* Check if the attributes are valid. */
        if (!NATTRVALID(np) || ((flags & NGA_ACL) && !NACLVALID(np))) {
                FSDBG(528, np, 0, 0xffffff01, ENOENT);
-               OSAddAtomic(1, &nfsstats.attrcache_misses);
+               OSAddAtomic64(1, &nfsstats.attrcache_misses);
                return (ENOENT);
        }
 
@@ -1644,18 +1645,18 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags)
        microuptime(&nowup);
        if ((nowup.tv_sec - np->n_attrstamp) >= timeo) {
                FSDBG(528, np, 0, 0xffffff02, ENOENT);
-               OSAddAtomic(1, &nfsstats.attrcache_misses);
+               OSAddAtomic64(1, &nfsstats.attrcache_misses);
                return (ENOENT);
        }
        if ((flags & NGA_ACL) && ((nowup.tv_sec - np->n_aclstamp) >= timeo)) {
                FSDBG(528, np, 0, 0xffffff02, ENOENT);
-               OSAddAtomic(1, &nfsstats.attrcache_misses);
+               OSAddAtomic64(1, &nfsstats.attrcache_misses);
                return (ENOENT);
        }
 
        nvap = &np->n_vattr;
        FSDBG(528, np, nvap->nva_size, np->n_size, 0xcace);
-       OSAddAtomic(1, &nfsstats.attrcache_hits);
+       OSAddAtomic64(1, &nfsstats.attrcache_hits);
 
        if (nvap->nva_type != VREG) {
                np->n_size = nvap->nva_size;
@@ -2099,12 +2100,12 @@ nfsrv_namei(
 
        /* Check for encountering a symbolic link */
        if (cnp->cn_flags & ISSYMLINK) {
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                if ((cnp->cn_flags & FSNODELOCKHELD)) {
                        cnp->cn_flags &= ~FSNODELOCKHELD;
                        unlock_fsnode(nip->ni_dvp, NULL);
                }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
                if (cnp->cn_flags & (LOCKPARENT | WANTPARENT))
                        vnode_put(nip->ni_dvp);
                if (nip->ni_vp) {
index d6de219ba6f386330ca32ef6ddfa9221fce5d34f..c79ab007e272773d0371a6f94b7453b23b5c78d0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -166,6 +166,9 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLA
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
+
 #endif /* NFSCLIENT */
 
 #if NFSSERVER
@@ -184,6 +187,12 @@ SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOC
 #endif
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
+#ifdef NFS_UC_Q_DEBUG
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, "");
+#endif
 #endif /* NFSSERVER */
 
 
@@ -724,6 +733,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
 {
        struct nfsrv_sock *slp;
        int error = 0, sodomain, sotype, soprotocol, on = 1;
+       int first;
        struct timeval timeo;
 
        /* make sure mbuf constants are set up */
@@ -808,6 +818,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        }
 
        /* add the socket to the list */
+       first = TAILQ_EMPTY(&nfsrv_socklist);
        TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
 
        sock_retain(so); /* grab a retain count on the socket */
@@ -815,10 +826,8 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        slp->ns_sotype = sotype;
        slp->ns_nam = mynam;
 
-       /* set up the socket upcall */
-       sock_setupcall(so, nfsrv_rcv, slp);
-       /* just playin' it safe */
-       sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
+       /* set up the socket up-call */
+       nfsrv_uc_addsock(slp, first);
 
        /* mark that the socket is not in the nfsrv_sockwg list */
        slp->ns_wgq.tqe_next = SLPNOLIST;
@@ -878,6 +887,7 @@ nfssvc_nfsd(void)
        u_quad_t cur_usec;
        struct timeval now;
        struct vfs_context context;
+       struct timespec to;
 
 #ifndef nolint
        cacherep = RC_DOIT;
@@ -891,11 +901,16 @@ nfssvc_nfsd(void)
        lck_mtx_lock(nfsd_mutex);
        if (nfsd_thread_count++ == 0)
                nfsrv_initcache();              /* Init the server request cache */
+       
        TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
        lck_mtx_unlock(nfsd_mutex);
 
        context.vc_thread = current_thread();
 
+       /* Set time out so that nfsd threads can wake up a see if they are still needed. */
+       to.tv_sec = 5;
+       to.tv_nsec = 0;
+
        /*
         * Loop getting rpc requests until SIGKILL.
         */
@@ -923,12 +938,14 @@ nfssvc_nfsd(void)
                                }
                                nfsd->nfsd_flag |= NFSD_WAITING;
                                TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
-                               error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL);
+                               error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
                                if (error) {
                                        if (nfsd->nfsd_flag & NFSD_WAITING) {
                                                TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
                                                nfsd->nfsd_flag &= ~NFSD_WAITING;
                                        }
+                                       if (error == EWOULDBLOCK)
+                                               continue;
                                        goto done;
                                }
                        }
@@ -1083,7 +1100,7 @@ nfssvc_nfsd(void)
 
                        }
                        if (error) {
-                               OSAddAtomic(1, &nfsstats.srv_errs);
+                               OSAddAtomic64(1, &nfsstats.srv_errs);
                                nfsrv_updatecache(nd, FALSE, mrep);
                                if (nd->nd_nam2) {
                                        mbuf_freem(nd->nd_nam2);
@@ -1091,7 +1108,7 @@ nfssvc_nfsd(void)
                                }
                                break;
                        }
-                       OSAddAtomic(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
+                       OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
                        nfsrv_updatecache(nd, TRUE, mrep);
                        /* FALLTHRU */
 
@@ -1282,8 +1299,8 @@ nfsrv_zapsock(struct nfsrv_sock *slp)
                return;
 
        /*
-        * Attempt to deter future upcalls, but leave the
-        * upcall info in place to avoid a race with the
+        * Attempt to deter future up-calls, but leave the
+        * up-call info in place to avoid a race with the
         * networking code.
         */
        socket_lock(so, 1);
@@ -1291,6 +1308,11 @@ nfsrv_zapsock(struct nfsrv_sock *slp)
        socket_unlock(so, 1);
 
        sock_shutdown(so, SHUT_RDWR);
+
+       /*
+        * Remove from the up-call queue
+        */
+       nfsrv_uc_dequeue(slp);
 }
 
 /*
@@ -1316,6 +1338,9 @@ nfsrv_slpfree(struct nfsrv_sock *slp)
        slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
        slp->ns_reccnt = 0;
 
+       if (slp->ns_ua)
+               FREE(slp->ns_ua, M_NFSSVC);
+
        for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
                nnwp = nwp->nd_tq.le_next;
                LIST_REMOVE(nwp, nd_tq);
@@ -1506,6 +1531,8 @@ nfsrv_cleanup(void)
        lck_mtx_unlock(nfsrv_fmod_mutex);
 #endif
 
+       nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
+       
        nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
 
        nfsrv_cleancache();     /* And clear out server cache */
diff --git a/bsd/nfs/nfs_upcall.c b/bsd/nfs/nfs_upcall.c
new file mode 100644 (file)
index 0000000..7d6f85f
--- /dev/null
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2011 Apple Inc.  All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <stdint.h>
+#include <sys/param.h>
+#include <sys/mount_internal.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+
+#include <libkern/libkern.h>
+#include <libkern/OSAtomic.h>
+#include <kern/debug.h>
+#include <kern/thread.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+
+#ifdef NFS_UC_DEBUG
+#define DPRINT(fmt, ...) printf(fmt,## __VA_ARGS__)
+#else
+#define DPRINT(fmt, ...)
+#endif
+
+struct nfsrv_uc_arg {
+       TAILQ_ENTRY(nfsrv_uc_arg) nua_svcq;
+       socket_t nua_so;
+       struct nfsrv_sock *nua_slp;
+       int nua_waitflag;  /* Should always be MBUF_DONTWAIT */
+       uint32_t nua_flags;
+       uint32_t nua_qi;
+};
+#define NFS_UC_QUEUED  0x0001
+
+#define NFS_UC_HASH_SZ 7
+#define NFS_UC_HASH(x) ((((uint32_t)(uintptr_t)(x)) >> 3) % nfsrv_uc_thread_count)
+
+TAILQ_HEAD(nfsrv_uc_q, nfsrv_uc_arg);
+
+static struct nfsrv_uc_queue {
+       lck_mtx_t               *ucq_lock;
+       struct nfsrv_uc_q       ucq_queue[1];
+       thread_t                ucq_thd;
+       uint32_t                ucq_flags;
+} nfsrv_uc_queue_tbl[NFS_UC_HASH_SZ];
+#define NFS_UC_QUEUE_SLEEPING  0x0001
+
+static lck_grp_t *nfsrv_uc_group;
+static lck_mtx_t *nfsrv_uc_shutdown_lock;
+static volatile int nfsrv_uc_shutdown = 0;
+static int32_t nfsrv_uc_thread_count;
+
+extern kern_return_t thread_terminate(thread_t);
+
+#ifdef NFS_UC_Q_DEBUG
+int nfsrv_uc_use_proxy = 1;
+uint32_t nfsrv_uc_queue_limit;
+uint32_t nfsrv_uc_queue_max_seen;
+volatile uint32_t nfsrv_uc_queue_count;
+#endif
+
+/*
+ * Thread that dequeues up-calls and runs the nfsrv_rcv routine
+ */
+static void
+nfsrv_uc_thread(void *arg, wait_result_t wr __unused)
+{
+       int qi = (int)(uintptr_t)arg;
+       int error;
+       struct nfsrv_uc_arg *ep = NULL;
+       struct nfsrv_uc_queue *myqueue = &nfsrv_uc_queue_tbl[qi];
+
+       DPRINT("nfsrv_uc_thread %d started\n", qi);
+       while (!nfsrv_uc_shutdown) {
+               lck_mtx_lock(myqueue->ucq_lock);
+
+               while (!nfsrv_uc_shutdown && TAILQ_EMPTY(myqueue->ucq_queue)) {
+                       myqueue->ucq_flags |= NFS_UC_QUEUE_SLEEPING;
+                       error = msleep(myqueue, myqueue->ucq_lock, PSOCK, "nfsd_upcall_handler", NULL);
+                       myqueue->ucq_flags &= ~NFS_UC_QUEUE_SLEEPING;
+                       if (error) {
+                               printf("nfsrv_uc_thread received error %d\n", error);
+                       }
+               }
+               if (nfsrv_uc_shutdown) {
+                       lck_mtx_unlock(myqueue->ucq_lock);
+                       break;
+               }
+
+
+               ep = TAILQ_FIRST(myqueue->ucq_queue);
+               DPRINT("nfsrv_uc_thread:%d dequeue %p from %p\n", qi, ep, myqueue);
+
+               TAILQ_REMOVE(myqueue->ucq_queue, ep, nua_svcq);
+
+               ep->nua_flags &= ~NFS_UC_QUEUED;
+
+               lck_mtx_unlock(myqueue->ucq_lock);
+
+#ifdef NFS_UC_Q_DEBUG
+               OSDecrementAtomic(&nfsrv_uc_queue_count);
+#endif
+
+               DPRINT("calling nfsrv_rcv for %p\n", (void *)ep->nua_slp);
+               nfsrv_rcv(ep->nua_so, (void *)ep->nua_slp, ep->nua_waitflag);
+       }
+
+       lck_mtx_lock(nfsrv_uc_shutdown_lock);
+       nfsrv_uc_thread_count--;
+       wakeup(&nfsrv_uc_thread_count);
+       lck_mtx_unlock(nfsrv_uc_shutdown_lock);
+
+       thread_terminate(current_thread());
+}
+
+/*
+ * Dequeue a closed nfsrv_sock if needed from the up-call queue.
+ * Call from nfsrv_zapsock
+ */
+void
+nfsrv_uc_dequeue(struct nfsrv_sock *slp)
+{
+       struct nfsrv_uc_arg *ap = slp->ns_ua;
+       struct nfsrv_uc_queue *myqueue = &nfsrv_uc_queue_tbl[ap->nua_qi];
+
+       /*
+        * We assume that the socket up-calls have been stop and the socket
+        * is shutting down so no need for acquiring the lock to check that
+        * the flag is cleared.
+        */
+       if (ap == NULL || (ap->nua_flags & NFS_UC_QUEUED) == 0)
+               return;
+       /* If we're queued we might race with nfsrv_uc_thread */
+       lck_mtx_lock(myqueue->ucq_lock);
+       if (ap->nua_flags & NFS_UC_QUEUED) {
+               printf("nfsrv_uc_dequeue remove %p\n", ap);
+               TAILQ_REMOVE(myqueue->ucq_queue, ap, nua_svcq);
+               ap->nua_flags &= ~NFS_UC_QUEUED;
+#ifdef NFS_UC_Q_DEBUG
+               OSDecrementAtomic(&nfsrv_uc_queue_count);
+#endif         
+       }
+       lck_mtx_unlock(myqueue->ucq_lock);
+}
+
+/*
+ * Allocate and initialize globals for nfsrv_sock up-call support.
+ */
+void
+nfsrv_uc_init(void)
+{
+       int i;
+
+       nfsrv_uc_group = lck_grp_alloc_init("nfs_upcall_locks", LCK_GRP_ATTR_NULL);
+       for (i = 0; i < NFS_UC_HASH_SZ; i++) {
+               TAILQ_INIT(nfsrv_uc_queue_tbl[i].ucq_queue);
+               nfsrv_uc_queue_tbl[i].ucq_lock = lck_mtx_alloc_init(nfsrv_uc_group, LCK_ATTR_NULL);
+               nfsrv_uc_queue_tbl[i].ucq_thd = THREAD_NULL;
+               nfsrv_uc_queue_tbl[i].ucq_flags = 0;
+       }
+       nfsrv_uc_shutdown_lock = lck_mtx_alloc_init(nfsrv_uc_group, LCK_ATTR_NULL);
+}
+
+/*
+ * Start up-call threads to service nfsrv_sock(s)
+ * Called from the first call of nfsrv_uc_addsock
+ */
+static void
+nfsrv_uc_start(void)
+{
+       int32_t i;
+       int error;
+
+#ifdef NFS_UC_Q_DEBUG
+       if (!nfsrv_uc_use_proxy)
+               return;
+#endif
+       DPRINT("nfsrv_uc_start\n");
+
+       /* Wait until previous shutdown finishes */
+       lck_mtx_lock(nfsrv_uc_shutdown_lock);
+       while (nfsrv_uc_shutdown || nfsrv_uc_thread_count > 0)
+               msleep(&nfsrv_uc_thread_count, nfsrv_uc_shutdown_lock, PSOCK, "nfsd_upcall_shutdown_wait", NULL);
+
+       /* Start up-call threads */
+       for (i = 0; i < NFS_UC_HASH_SZ; i++) {
+               error = kernel_thread_start(nfsrv_uc_thread, (void *)(uintptr_t)i, &nfsrv_uc_queue_tbl[nfsrv_uc_thread_count].ucq_thd);
+               if (!error) {
+                       nfsrv_uc_thread_count++;
+               } else {
+                       printf("nfsd: Could not start nfsrv_uc_thread: %d\n", error);
+               }
+       }
+       if (nfsrv_uc_thread_count == 0) {
+               printf("nfsd: Could not start nfsd proxy up-call service. Falling back\n");
+               goto out;
+       }
+
+out:
+#ifdef NFS_UC_Q_DEBUG
+       nfsrv_uc_queue_count = 0ULL;
+       nfsrv_uc_queue_max_seen = 0ULL;
+#endif
+       lck_mtx_unlock(nfsrv_uc_shutdown_lock);
+}
+
+/*
+ * Stop the up-call threads.
+ * Called from nfsrv_uc_cleanup.
+ */
+static void
+nfsrv_uc_stop(void)
+{
+       int32_t i;
+       int32_t thread_count = nfsrv_uc_thread_count;
+
+       DPRINT("Entering nfsrv_uc_stop\n");
+
+       /* Signal up-call threads to stop */
+       nfsrv_uc_shutdown = 1;
+       for (i = 0; i < thread_count; i++) {
+               lck_mtx_lock(nfsrv_uc_queue_tbl[i].ucq_lock);
+               wakeup(&nfsrv_uc_queue_tbl[i]);
+               lck_mtx_unlock(nfsrv_uc_queue_tbl[i].ucq_lock);
+       }
+
+       /* Wait until they are done shutting down */
+       lck_mtx_lock(nfsrv_uc_shutdown_lock);
+       while (nfsrv_uc_thread_count > 0)
+               msleep(&nfsrv_uc_thread_count, nfsrv_uc_shutdown_lock, PSOCK, "nfsd_upcall_shutdown_stop", NULL);
+
+       /* Deallocate old threads */
+       for (i = 0; i < nfsrv_uc_thread_count; i++) {
+               if (nfsrv_uc_queue_tbl[i].ucq_thd != THREAD_NULL)
+                       thread_deallocate(nfsrv_uc_queue_tbl[i].ucq_thd);
+               nfsrv_uc_queue_tbl[i].ucq_thd = THREAD_NULL;
+       }
+
+       /* Enable restarting */
+       nfsrv_uc_shutdown = 0;
+       lck_mtx_unlock(nfsrv_uc_shutdown_lock);
+}
+
+/*
+ * Shutdown up-calls for nfsrv_socks.
+ *     Make sure nothing is queued on the up-call queues
+ *     Shutdown the up-call threads
+ * Called from nfssvc_cleanup.
+ */
+void
+nfsrv_uc_cleanup(void)
+{
+       int i;
+
+       DPRINT("Entering nfsrv_uc_cleanup\n");
+
+       /*
+        * Every thing should be dequeued at this point or will be as sockets are closed
+        * but to be safe, we'll make sure.
+        */
+       for (i = 0; i < NFS_UC_HASH_SZ; i++) {
+               struct nfsrv_uc_queue *queue = &nfsrv_uc_queue_tbl[i];
+
+               lck_mtx_lock(queue->ucq_lock);
+               while (!TAILQ_EMPTY(queue->ucq_queue)) {
+                       struct nfsrv_uc_arg *ep = TAILQ_FIRST(queue->ucq_queue);
+                       TAILQ_REMOVE(queue->ucq_queue, ep, nua_svcq);
+                       ep->nua_flags &= ~NFS_UC_QUEUED;
+               }
+               lck_mtx_unlock(queue->ucq_lock);
+       }
+
+       nfsrv_uc_stop();
+}
+
+/*
+ * This is the nfs up-call routine for server sockets.
+ * We used to set nfsrv_rcv as the up-call routine, but
+ * recently that seems like we are doing to much work for
+ * the interface thread, so we just queue the arguments
+ * that we would have gotten for nfsrv_rcv and let a
+ * worker thread dequeue them and pass them on to nfsrv_rcv.
+ */
+static void
+nfsrv_uc_proxy(socket_t so, void *arg, int waitflag)
+{
+       struct nfsrv_uc_arg *uap = (struct nfsrv_uc_arg *)arg;
+       int qi = uap->nua_qi;
+       struct nfsrv_uc_queue *myqueue = &nfsrv_uc_queue_tbl[qi];
+
+       lck_mtx_lock(myqueue->ucq_lock);
+       DPRINT("nfsrv_uc_proxy called for %p (%p)\n", uap, uap->nua_slp);
+       DPRINT("\tUp-call queued on %d for wakeup of %p\n", qi, myqueue);
+       if (uap->nua_flags & NFS_UC_QUEUED) {
+               lck_mtx_unlock(myqueue->ucq_lock);
+               return;  /* Already queued */
+       }
+
+       uap->nua_so = so;
+       uap->nua_waitflag = waitflag;
+
+       TAILQ_INSERT_TAIL(myqueue->ucq_queue, uap, nua_svcq);
+
+       uap->nua_flags |= NFS_UC_QUEUED;
+       if (myqueue->ucq_flags | NFS_UC_QUEUE_SLEEPING)
+               wakeup(myqueue);
+
+#ifdef NFS_UC_Q_DEBUG
+       {
+               uint32_t count = OSIncrementAtomic(&nfsrv_uc_queue_count);
+       
+               /* This is a bit racey but just for debug */
+               if (count > nfsrv_uc_queue_max_seen)
+                       nfsrv_uc_queue_max_seen = count;
+
+               if (nfsrv_uc_queue_limit && count > nfsrv_uc_queue_limit) {
+                       panic("nfsd up-call queue limit exceeded\n");
+               }
+       }
+#endif
+       lck_mtx_unlock(myqueue->ucq_lock);
+}
+
+
+/*
+ * Set the up-call routine on the socket associated with the passed in
+ * nfsrv_sock.
+ * Assumes nfsd_mutex is held.
+ */
+void
+nfsrv_uc_addsock(struct nfsrv_sock *slp, int start)
+{
+       int on = 1;
+       struct nfsrv_uc_arg *arg;
+
+       if (start && nfsrv_uc_thread_count == 0)
+               nfsrv_uc_start();
+
+       /*
+        * We don't take a lock since once we're up nfsrv_uc_thread_count does
+        * not change until shutdown and then we should not be adding sockets to
+        * generate up-calls.
+        */
+       if (nfsrv_uc_thread_count) {
+               MALLOC(arg, struct nfsrv_uc_arg *, sizeof (struct nfsrv_uc_arg), M_NFSSVC, M_WAITOK | M_ZERO);
+               if (arg == NULL)
+                       goto direct;
+
+               slp->ns_ua = arg;
+               arg->nua_slp = slp;
+               arg->nua_qi = NFS_UC_HASH(slp);
+
+               sock_setupcall(slp->ns_so, nfsrv_uc_proxy, arg);
+       } else {
+direct:
+               slp->ns_ua = NULL;
+               DPRINT("setting nfsrv_rcv up-call\n");
+               sock_setupcall(slp->ns_so, nfsrv_rcv, slp);
+       }
+
+       /* just playin' it safe */
+       sock_setsockopt(slp->ns_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
+
+       return;
+}
+
index 7a0323fde8191f3d361c2874dc657bf4f17ab7ee..1c571a21f48f955bf90b45461953891d271bfd40 100644 (file)
@@ -1575,8 +1575,12 @@ nfs_convert_old_nfs_args(mount_t mp, user_addr_t data, vfs_context_t ctx, int ar
        /* copy socket address */
        if (inkernel)
                bcopy(CAST_DOWN(void *, args.addr), &ss, args.addrlen);
-       else
-               error = copyin(args.addr, &ss, args.addrlen);
+       else {
+               if ((size_t)args.addrlen > sizeof (struct sockaddr_storage))
+                       error = EINVAL;
+               else
+                       error = copyin(args.addr, &ss, args.addrlen);
+       }
        nfsmout_if(error);
        ss.ss_len = args.addrlen;
 
@@ -2694,6 +2698,7 @@ mountnfs(
                nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
                nmp->nm_auth = RPCAUTH_SYS;
                nmp->nm_deadtimeout = 0;
+               nmp->nm_curdeadtimeout = 0;
                NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL);
        }
 
index a5917a2977773be7528b68f556a1511689e03a95..eb636101d7e253c89575779689a784d3603c821a 100644 (file)
@@ -855,9 +855,34 @@ out:
                NP(np, "nfs_vnop_open: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
        if (noop)
                nfs_open_owner_rele(noop);
+       if (!error && vtype == VREG && (ap->a_mode & FWRITE)) {
+               lck_mtx_lock(&nmp->nm_lock);
+               nmp->nm_state &= ~NFSSTA_SQUISHY;
+               nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
+               if (nmp->nm_curdeadtimeout <= 0)
+                       nmp->nm_deadto_start = 0;
+               nmp->nm_writers++;
+               lck_mtx_unlock(&nmp->nm_lock);
+       }
+               
        return (error);
 }
 
+static uint32_t
+nfs_no_of_open_file_writers(nfsnode_t np)
+{
+       uint32_t writers = 0;
+       struct nfs_open_file *nofp;
+
+       TAILQ_FOREACH(nofp,  &np->n_opens, nof_link) {
+               writers += nofp->nof_w + nofp->nof_rw + nofp->nof_w_dw + nofp->nof_rw_dw +
+                       nofp->nof_w_drw + nofp->nof_rw_drw + nofp->nof_d_w_dw +
+                       nofp->nof_d_rw_dw + nofp->nof_d_w_drw + nofp->nof_d_rw_drw +
+                       nofp->nof_d_w + nofp->nof_d_rw;
+       }
+       
+       return (writers);
+}
 
 /*
  * NFS close vnode op
@@ -990,11 +1015,36 @@ nfs_vnop_close(
                 * Guess this is the final close.
                 * We should unlock all locks and close all opens.
                 */
+               uint32_t writers;
                mount_t mp = vnode_mount(vp);
                int force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT));
+
+               writers = nfs_no_of_open_file_writers(np);
                nfs_release_open_state_for_node(np, force);
+               if (writers) {
+                       lck_mtx_lock(&nmp->nm_lock);
+                       if (writers > nmp->nm_writers) {
+                               NP(np, "nfs_vnop_close: number of write opens for mount underrun. Node has %d"
+                                  " opens for write. Mount has total of %d opens for write\n", 
+                                  writers, nmp->nm_writers);
+                               nmp->nm_writers = 0;
+                       } else {
+                               nmp->nm_writers -= writers;
+                       }
+                       lck_mtx_unlock(&nmp->nm_lock);
+               }
+               
                return (error);
+       } else if (fflag & FWRITE) {
+               lck_mtx_lock(&nmp->nm_lock);
+               if (nmp->nm_writers == 0) {
+                       NP(np, "nfs_vnop_close: removing open writer from mount, but mount has no files open for writing");
+               } else {
+                       nmp->nm_writers--;
+               }
+               lck_mtx_unlock(&nmp->nm_lock);
        }
+       
 
        noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
        if (!noop) {
@@ -1065,7 +1115,7 @@ nfs_close(
        struct nfs_lock_owner *nlop;
        int error = 0, changed = 0, delegated = 0, closed = 0, downgrade = 0;
        uint32_t newAccessMode, newDenyMode;
-
+       
        /* warn if modes don't match current state */
        if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode))
                NP(np, "nfs_close: mode mismatch %d %d, current %d %d, %d",
@@ -1191,6 +1241,7 @@ v3close:
                NP(np, "nfs_close: LOST%s, %d", !nofp->nof_opencnt ? " (last)" : "",
                        kauth_cred_getuid(nofp->nof_owner->noo_cred));
        }
+               
        return (error);
 }
 
@@ -2011,7 +2062,7 @@ nfs_vnop_lookup(
                /* FALLTHROUGH */
        case -1:
                /* cache hit, not really an error */
-               OSAddAtomic(1, &nfsstats.lookupcache_hits);
+               OSAddAtomic64(1, &nfsstats.lookupcache_hits);
 
                nfs_node_clear_busy(dnp);
                busyerror = ENOENT;
@@ -2063,7 +2114,7 @@ nfs_vnop_lookup(
        error = 0;
        newvp = NULLVP;
 
-       OSAddAtomic(1, &nfsstats.lookupcache_misses);
+       OSAddAtomic64(1, &nfsstats.lookupcache_misses);
 
        error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
        nfsmout_if(error);
@@ -2182,14 +2233,14 @@ nfs_vnop_readlink(
                return (error);
        }
 
-       OSAddAtomic(1, &nfsstats.biocache_readlinks);
+       OSAddAtomic64(1, &nfsstats.biocache_readlinks);
        error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_READ, &bp);
        if (error) {
                FSDBG(531, np, 0xd1e0002, 0, error);
                return (error);
        }
        if (!ISSET(bp->nb_flags, NB_CACHE)) {
-               OSAddAtomic(1, &nfsstats.readlink_bios);
+               OSAddAtomic64(1, &nfsstats.readlink_bios);
                buflen = bp->nb_bufsize;
                error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
                if (error) {
@@ -2542,7 +2593,7 @@ nfs_vnop_write(
        }
 
        do {
-               OSAddAtomic(1, &nfsstats.biocache_writes);
+               OSAddAtomic64(1, &nfsstats.biocache_writes);
                lbn = uio_offset(uio) / biosize;
                on = uio_offset(uio) % biosize;
                n = biosize - on;
@@ -4705,7 +4756,7 @@ nfs_vnop_readdir(
        }
 
        while (!error && !done) {
-               OSAddAtomic(1, &nfsstats.biocache_readdirs);
+               OSAddAtomic64(1, &nfsstats.biocache_readdirs);
                cookie = nextcookie;
 getbuffer:
                error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp);
@@ -4955,7 +5006,7 @@ nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
 
        if (cookie == dnp->n_eofcookie) { /* EOF cookie */
                nfs_node_unlock(dnp);
-               OSAddAtomic(1, &nfsstats.direofcache_hits);
+               OSAddAtomic64(1, &nfsstats.direofcache_hits);
                *ptc = 0;
                return (-1);
        }
@@ -4969,7 +5020,7 @@ nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
                        /* found a match for this cookie */
                        *lbnp = ndcc->cookies[i].lbn;
                        nfs_node_unlock(dnp);
-                       OSAddAtomic(1, &nfsstats.direofcache_hits);
+                       OSAddAtomic64(1, &nfsstats.direofcache_hits);
                        *ptc = 0;
                        return (0);
                }
@@ -4981,14 +5032,14 @@ nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
        if (eofptc) {
                /* but 32-bit match hit the EOF cookie */
                nfs_node_unlock(dnp);
-               OSAddAtomic(1, &nfsstats.direofcache_hits);
+               OSAddAtomic64(1, &nfsstats.direofcache_hits);
                return (-1);
        }
        if (iptc >= 0) {
                /* but 32-bit match got a hit */
                *lbnp = ndcc->cookies[iptc].lbn;
                nfs_node_unlock(dnp);
-               OSAddAtomic(1, &nfsstats.direofcache_hits);
+               OSAddAtomic64(1, &nfsstats.direofcache_hits);
                return (0);
        }
        nfs_node_unlock(dnp);
@@ -5065,13 +5116,13 @@ nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
        }
        lck_mtx_unlock(nfs_buf_mutex);
        if (found) {
-               OSAddAtomic(1, &nfsstats.direofcache_hits);
+               OSAddAtomic64(1, &nfsstats.direofcache_hits);
                return (0);
        }
 
        /* still not found... oh well, just start a new block */
        *lbnp = cookie;
-       OSAddAtomic(1, &nfsstats.direofcache_misses);
+       OSAddAtomic64(1, &nfsstats.direofcache_misses);
        return (0);
 }
 
@@ -5333,7 +5384,7 @@ noplus:
        } else {
                cookie = bp->nb_lblkno;
                /* increment with every buffer read */
-               OSAddAtomic(1, &nfsstats.readdir_bios);
+               OSAddAtomic64(1, &nfsstats.readdir_bios);
        }
        lastcookie = cookie;
 
@@ -5446,7 +5497,7 @@ nextbuffer:
                                space_free = nfs_dir_buf_freespace(bp, rdirplus);
                                dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
                                /* increment with every buffer read */
-                               OSAddAtomic(1, &nfsstats.readdir_bios);
+                               OSAddAtomic64(1, &nfsstats.readdir_bios);
                        }
                        nmrepsave = nmrep;
                        dp->d_fileno = fileno;
@@ -6631,7 +6682,7 @@ nfs_vnop_pagein(
        if (size <= 0) {
                printf("nfs_pagein: invalid size %ld", size);
                if (!nofreeupl)
-                       (void) ubc_upl_abort(pl, 0);
+                       (void) ubc_upl_abort_range(pl, pl_offset, size, 0);
                return (EINVAL);
        }
        if (f_offset < 0 || f_offset >= (off_t)np->n_size || (f_offset & PAGE_MASK_64)) {
@@ -6698,7 +6749,7 @@ tryagain:
 #if UPL_DEBUG
                        upl_ubc_alias_set(pl, (uintptr_t) current_thread(), (uintptr_t) 2);
 #endif /* UPL_DEBUG */
-                       OSAddAtomic(1, &nfsstats.pageins);
+                       OSAddAtomic64(1, &nfsstats.pageins);
                        error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req[nextwait], uio, &retsize, NULL);
                        req[nextwait] = NULL;
                        nextwait = (nextwait + 1) % MAXPAGINGREQS;
@@ -6944,7 +6995,7 @@ nfs_vnop_pageout(
        if (size <= 0) {
                printf("nfs_pageout: invalid size %ld", size);
                if (!nofreeupl)
-                       ubc_upl_abort(pl, 0);
+                       ubc_upl_abort_range(pl, pl_offset, size, 0);
                return (EINVAL);
        }
 
@@ -6977,7 +7028,7 @@ nfs_vnop_pageout(
                                nfs_data_unlock_noupdate(np);
                                /* no panic. just tell vm we are busy */
                                if (!nofreeupl)
-                                       ubc_upl_abort(pl, 0);
+                                       ubc_upl_abort_range(pl, pl_offset, size, 0);
                                return (EBUSY);
                        }
                        if (bp->nb_dirtyend > 0) {
@@ -7024,7 +7075,7 @@ nfs_vnop_pageout(
                                    lck_mtx_unlock(nfs_buf_mutex);
                                    nfs_data_unlock_noupdate(np);
                                    if (!nofreeupl)
-                                       ubc_upl_abort(pl, 0);
+                                           ubc_upl_abort_range(pl, pl_offset, size, 0);
                                    return (EBUSY);
                                }
                                if ((bp->nb_dirtyoff < start) ||
@@ -7135,7 +7186,7 @@ tryagain:
                        uio_reset(auio, txoffset, UIO_SYSSPACE, UIO_WRITE);
                        uio_addiov(auio, CAST_USER_ADDR_T(txaddr), iosize);
                        FSDBG(323, uio_offset(auio), iosize, txaddr, txsize);
-                       OSAddAtomic(1, &nfsstats.pageouts);
+                       OSAddAtomic64(1, &nfsstats.pageouts);
                        nfs_node_lock_force(np);
                        np->n_numoutput++;
                        nfs_node_unlock(np);
index 97f955e2fcf3cf145e2a4018a931c9bf57ed04ac..574b5a70ee584cfb5d2bb80e800b1fa63b6b54d5 100644 (file)
@@ -261,7 +261,7 @@ struct nfsmount {
        mount_t nm_mountp;              /* VFS structure for this filesystem */
        nfsnode_t nm_dnp;               /* root directory nfsnode pointer */
        struct nfs_fs_locations nm_locations; /* file system locations */
-       int     nm_numgrps;             /* Max. size of groupslist */
+       uint32_t nm_numgrps;            /* Max. size of groupslist */
        TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gsscl; /* GSS user contexts */
        int     nm_timeo;               /* Init timer for NFSMNT_DUMBTIMR */
        int     nm_retry;               /* Max retries */
@@ -275,6 +275,8 @@ struct nfsmount {
        uint32_t nm_acdirmin;           /* dir min attr cache timeout */
        uint32_t nm_acdirmax;           /* dir max attr cache timeout */
        uint32_t nm_auth;               /* security mechanism flavor being used */
+       uint32_t nm_writers;            /* Number of nodes open for writing */
+       uint32_t nm_mappers;            /* Number of nodes that have mmapped */
        struct nfs_sec nm_sec;          /* acceptable security mechanism flavors */
        struct nfs_sec nm_servsec;      /* server's acceptable security mechanism flavors */
        fhandle_t *nm_fh;               /* initial file handle */
@@ -330,7 +332,8 @@ struct nfsmount {
        time_t  nm_reconnect_start;     /* reconnect start time */
        int     nm_tprintf_initial_delay;       /* delay first "server down" */
        int     nm_tprintf_delay;       /* delay between "server down" */
-       int     nm_deadtimeout;         /* delay between first "server down" and dead */
+       int     nm_deadtimeout;         /* delay between first "server down" and dead set at mount time */
+       int     nm_curdeadtimeout;      /* current dead timeout. Adjusted by mount state and mobility */
        int     nm_srtt[4];             /* Timers for RPCs */
        int     nm_sdrtt[4];
        int     nm_timeouts;            /* Request timeouts */
@@ -372,6 +375,7 @@ struct nfsmount {
 #define NFSSTA_RECOVER         0x08000000  /* mount state needs to be recovered */
 #define NFSSTA_RECOVER_EXPIRED 0x10000000  /* mount state expired */
 #define NFSSTA_REVOKE          0x20000000  /* need to scan for revoked nodes */
+#define        NFSSTA_SQUISHY          0x40000000  /* we can ask to be forcibly unmounted */
 
 /* flags for nm_sockflags */
 #define NMSOCK_READY           0x0001  /* socket is ready for use */
index cce1399caad55601e2a5cfc37002a9cd27e34cd1..adf50cc528009829278149fb3ec2da56ca173b8d 100644 (file)
@@ -595,8 +595,8 @@ struct nfsnode {
                struct nfs_sillyrename *nf_silly;/* Ptr to silly rename struct */
                struct nfsdmap *nd_cookiecache; /* dir cookie cache */
        } n_un3;
+       uint32_t                n_flag;         /* node flags */
        u_short                 n_fhsize;       /* size in bytes, of fh */
-       u_short                 n_flag;         /* node flags */
        u_short                 n_hflag;        /* node hash flags */
        u_short                 n_bflag;        /* node buffer flags */
        u_short                 n_mflag;        /* node mount flags */
@@ -672,22 +672,22 @@ struct nfsnode {
 /*
  * Flags for n_flag
  */
-#define        NUPDATESIZE     0x000 /* size of file needs updating */
-#define        NREVOKE         0x000 /* node revoked */
-#define        NMODIFIED       0x000 /* Might have a modified buffer in bio */
-#define        NWRITEERR       0x000 /* Flag write errors so close will know */
-#define        NNEEDINVALIDATE 0x0010  /* need to call vinvalbuf() */
-#define        NGETATTRINPROG  0x0020  /* GETATTR RPC in progress */
-#define        NGETATTRWANT    0x0040  /* waiting for GETATTR RPC */
-#define        NACC            0x0100  /* Special file accessed */
-#define        NUPD            0x0200  /* Special file updated */
-#define        NCHG            0x0400  /* Special file times changed */
-#define        NNEGNCENTRIES   0x0800  /* directory has negative name cache entries */
-#define        NBUSY           0x1000  /* node is busy */
-#define        NBUSYWANT       0x2000  /* waiting on busy node */
-#define NISDOTZFS      0x4000  /* a ".zfs" directory */
-#define NISDOTZFSCHILD 0x8000  /* a child of a ".zfs" directory */
-  
+#define        NUPDATESIZE     0x00001 /* size of file needs updating */
+#define        NREVOKE         0x00002 /* node revoked */
+#define        NMODIFIED       0x00004 /* Might have a modified buffer in bio */
+#define        NWRITEERR       0x00008 /* Flag write errors so close will know */
+#define        NNEEDINVALIDATE 0x00010 /* need to call vinvalbuf() */
+#define        NGETATTRINPROG  0x00020 /* GETATTR RPC in progress */
+#define        NGETATTRWANT    0x00040 /* waiting for GETATTR RPC */
+#define        NACC            0x00100 /* Special file accessed */
+#define        NUPD            0x00200 /* Special file updated */
+#define        NCHG            0x00400 /* Special file times changed */
+#define        NNEGNCENTRIES   0x00800 /* directory has negative name cache entries */
+#define        NBUSY           0x01000 /* node is busy */
+#define        NBUSYWANT       0x02000 /* waiting on busy node */
+#define NISDOTZFS      0x04000 /* a ".zfs" directory */
+#define NISDOTZFSCHILD 0x08000 /* a child of a ".zfs" directory */
+#define NISMAPPED      0x10000 /* node is mmapped   */
 
 /*
  * Flags for n_hflag
@@ -824,6 +824,7 @@ void nfs_data_update_size(nfsnode_t, int);
 /* other stuff */
 int nfs_removeit(struct nfs_sillyrename *);
 int nfs_nget(mount_t,nfsnode_t,struct componentname *,u_char *,int,struct nfs_vattr *,u_int64_t *,uint32_t,int,nfsnode_t*);
+int nfs_mount_is_dirty(mount_t);
 void nfs_dir_cookie_cache(nfsnode_t, uint64_t, uint64_t);
 int nfs_dir_cookie_to_lbn(nfsnode_t, uint64_t, int *, uint64_t *);
 void nfs_invaldir(nfsnode_t);
index 72db99f35901b0ecc6bb96227e4472e9a846b6bf..4293b781bc4656e19264e5f40078ab7b10e83f42 100644 (file)
 
 #define        NUM_MALLOC_TYPES        11
 
-#ifdef M_NOWAIT
-#undef         M_NOWAIT
-#endif
-#define        M_NOWAIT        0x0000  /* do not block */
 #ifdef M_WAITOK
 #undef         M_WAITOK
-#define M_WAITOK       0x0001  /* ok to block */
+#define M_WAITOK       0x0000  /* ok to block */
+#endif
+#ifdef M_NOWAIT
+#undef         M_NOWAIT
 #endif
+#define        M_NOWAIT        0x0001  /* do not block */
 #ifdef M_ZERO
 #undef M_ZERO
 #endif
index fe24ed2a6f5f2573c6a66dc10c53836c3a0694bb..70c87ae2bc7a0759e836ece1fcd5581e681905d8 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2008-2009 Apple Inc.
+ * Copyright (c) 2008-2011 Apple Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -452,6 +452,7 @@ static const struct bsm_errno bsm_errnos[] = {
        { BSM_ERRNO_EINPROGRESS, EINPROGRESS,
            ES("Operation now in progress") },
        { BSM_ERRNO_ESTALE, ESTALE, ES("Stale NFS file handle") },
+       { BSM_ERRNO_EQFULL, EQFULL, ES("Interface output queue is full") },
        { BSM_ERRNO_EPWROFF,
 #ifdef EPWROFF
        EPWROFF,
index 9f64bba8490ce476da65100aeb300f38f86fcb6d..943cac431c43771ac2ad1ef27709b0b80e9dfbac 100644 (file)
@@ -571,7 +571,7 @@ audit_pipe_alloc(void)
 
        AUDIT_PIPE_LIST_WLOCK_ASSERT();
 
-       ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
+       ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_WAITOK | M_ZERO);
        if (ap == NULL)
                return (NULL);
 
index 4b63e0082ab09c5ba75214d8c55768ca0d221561..9d26833f57d94542fd876793f89b837bff8b250c 100644 (file)
@@ -1603,7 +1603,7 @@ audit_sdev_alloc(void)
 
        AUDIT_SDEV_LIST_WLOCK_ASSERT();
 
-       asdev = malloc(sizeof (*asdev), M_AUDIT_SDEV, M_NOWAIT | M_ZERO);
+       asdev = malloc(sizeof (*asdev), M_AUDIT_SDEV, M_WAITOK | M_ZERO);
        if (NULL == asdev)
                return (NULL);
 
index 43d93bdda646bfc7f9645055d426b45248feac90..359717f7b71a1c31de889175f5395a534eb6dd06 100644 (file)
@@ -800,129 +800,6 @@ setauid(proc_t p, struct setauid_args *uap, __unused int32_t *retval)
        return (error);
 }
 
-static int
-getaudit_internal(proc_t p, user_addr_t user_addr)
-{
-       struct auditinfo ai;
-       kauth_cred_t scred;
-
-       scred = kauth_cred_proc_ref(p);
-       if (scred->cr_audit.as_aia_p->ai_termid.at_type == AU_IPv6) {
-               kauth_cred_unref(&scred);
-               return (ERANGE);
-       }
-
-       bzero(&ai, sizeof(ai));
-       ai.ai_auid = scred->cr_audit.as_aia_p->ai_auid;
-       ai.ai_asid = scred->cr_audit.as_aia_p->ai_asid;
-
-       /*
-        * Only superuser gets to see the real mask.
-        */
-       if (suser(scred, &p->p_acflag)) {
-               ai.ai_mask.am_success = ~0;
-               ai.ai_mask.am_failure = ~0;
-       } else {
-               ai.ai_mask.am_success = scred->cr_audit.as_mask.am_success;
-               ai.ai_mask.am_failure = scred->cr_audit.as_mask.am_failure;
-       }
-       ai.ai_termid.machine = scred->cr_audit.as_aia_p->ai_termid.at_addr[0];
-       ai.ai_termid.port = scred->cr_audit.as_aia_p->ai_termid.at_port;
-       kauth_cred_unref(&scred);
-
-       return (copyout(&ai, user_addr,  sizeof (ai)));
-}
-
-/*
- * System calls to get and set process audit information.
- */
-/* ARGSUSED */
-int
-getaudit(proc_t p, struct getaudit_args *uap, __unused int32_t *retval)
-{
-       int error;
-
-#if CONFIG_MACF
-       error = mac_proc_check_getaudit(p);
-       if (error)
-               return (error);
-#endif
-       return (getaudit_internal(p, uap->auditinfo));
-}
-
-/* ARGSUSED */
-int
-setaudit(proc_t p, struct setaudit_args *uap, __unused int32_t *retval)
-{
-       struct auditinfo ai;
-       struct auditinfo_addr newaia;
-       kauth_cred_t scred;
-       int error;
-
-       error = copyin(uap->auditinfo, &ai, sizeof(ai));
-       if (error)
-               return (error);
-       AUDIT_ARG(auditinfo, &ai);
-
-       if (ai.ai_asid != AU_ASSIGN_ASID && 
-           (uint32_t)ai.ai_asid > ASSIGNED_ASID_MAX)
-               return (EINVAL);
-
-#if CONFIG_MACF
-       {
-       struct auditinfo_addr aia = {
-               .ai_auid = ai.ai_auid,
-               .ai_mask = ai.ai_mask,
-               .ai_termid = {
-                       .at_port = ai.ai_termid.port,
-                       .at_type = AU_IPv4,
-                       .at_addr = { ai.ai_termid.machine, 0, 0, 0 } },
-               .ai_asid = ai.ai_asid,
-               .ai_flags = 0 };
-       error = mac_proc_check_setaudit(p, &aia);
-       }
-       if (error)
-               return (error);
-#endif
-
-       bzero(&newaia, sizeof(newaia));
-       scred = kauth_cred_proc_ref(p);
-       error = suser(scred, &p->p_acflag);
-       if (error) {
-               kauth_cred_unref(&scred);
-               return (error);
-       }
-       newaia.ai_flags = scred->cr_audit.as_aia_p->ai_flags;
-       kauth_cred_unref(&scred);
-       
-       WARN_IF_BAD_ASID(ai.ai_asid, "setaudit(2)");
-
-       newaia.ai_auid = ai.ai_auid;
-       bcopy(&ai.ai_mask, &newaia.ai_mask, sizeof(au_mask_t));
-       AUDIT_CHECK_IF_KEVENTS_MASK(ai.ai_mask);
-       newaia.ai_asid = ai.ai_asid;
-       if (ai.ai_asid == AU_DEFAUDITSID)
-               newaia.ai_asid = AU_ASSIGN_ASID;
-       else
-               newaia.ai_asid = ai.ai_asid;
-       newaia.ai_termid.at_addr[0] = ai.ai_termid.machine;
-       newaia.ai_termid.at_port = ai.ai_termid.port;
-       newaia.ai_termid.at_type = AU_IPv4;
-
-       error = audit_session_setaia(p, &newaia);
-       if (error)
-               return (error);
-
-       /*
-        * If asked to assign an ASID then let the user know what the ASID is
-        * by copying the auditinfo struct back out.
-        */
-       if (newaia.ai_asid == AU_ASSIGN_ASID)
-               error = getaudit_internal(p, uap->auditinfo);
-       
-       return (error);
-}
-
 static int
 getaudit_addr_internal(proc_t p, user_addr_t user_addr, size_t length)
 {
@@ -1126,22 +1003,6 @@ setauid(proc_t p, struct setauid_args *uap, int32_t *retval)
        return (ENOSYS);
 }
 
-int
-getaudit(proc_t p, struct getaudit_args *uap, int32_t *retval)
-{
-#pragma unused(p, uap, retval)
-
-       return (ENOSYS);
-}
-
-int
-setaudit(proc_t p, struct setaudit_args *uap, int32_t *retval)
-{
-#pragma unused(p, uap, retval)
-
-       return (ENOSYS);
-}
-
 int
 getaudit_addr(proc_t p, struct getaudit_addr_args *uap, int32_t *retval)
 {
index d9ef366a2bd90018aaaceaf0a0c48081622d2ad1..5ebb842ef3ae46af998b52139020b1dc1fe621d0 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 1999-2010 Apple Inc.
+ * Copyright (c) 1999-2011 Apple Inc.
  * Copyright (c) 2006-2008 Robert N. M. Watson
  * All rights reserved.
  *
@@ -105,10 +105,10 @@ static struct vnode               *audit_vp;
 #define        AUDIT_WORKER_SX_DESTROY()       slck_destroy(&audit_worker_sl)
 
 /*
- * The audit_draining flag is set when audit is disabled and the audit
+ * The audit_q_draining flag is set when audit is disabled and the audit
  * worker queue is being drained.
  */
-static int                     audit_draining;
+static int                     audit_q_draining;
 
 /*
  * The special kernel audit record, audit_drain_kar, is used to mark the end of
@@ -460,7 +460,7 @@ audit_worker(void)
                while ((ar = TAILQ_FIRST(&ar_worklist))) {
                        TAILQ_REMOVE(&ar_worklist, ar, k_q);
                        if (ar->k_ar_commit & AR_DRAIN_QUEUE) {
-                               audit_draining = 0;
+                               audit_q_draining = 0;
                                cv_broadcast(&audit_drain_cv);
                        } else {
                                audit_worker_process_record(ar);
@@ -485,51 +485,54 @@ audit_rotate_vnode(kauth_cred_t cred, struct vnode *vp)
 {
        kauth_cred_t old_audit_cred;
        struct vnode *old_audit_vp;
-       int audit_was_enabled;
 
        KASSERT((cred != NULL && vp != NULL) || (cred == NULL && vp == NULL),
            ("audit_rotate_vnode: cred %p vp %p", cred, vp));
 
-       /*
-        * Rotate the vnode/cred, and clear the rotate flag so that we will
-        * send a rotate trigger if the new file fills.
-        */
-       AUDIT_WORKER_SX_XLOCK();
-       old_audit_cred = audit_ctx.vc_ucred;
-       old_audit_vp = audit_vp;
-       audit_ctx.vc_ucred = cred;
-       audit_file_rotate_wait = 0;
-       audit_was_enabled = audit_enabled;
-       if ((audit_enabled = (NULL != vp)))
-               audit_vp = vp;
-       audit_draining = (audit_was_enabled && !audit_enabled);
-       AUDIT_WORKER_SX_XUNLOCK();
 
-       /*
-        * If audit (was enabled and) is now disabled then drain the audit
-        * record queue and wait until it is done.
-        */
        mtx_lock(&audit_mtx);
-       if (audit_draining) {
+       if (audit_enabled && (NULL == vp)) {
+               /* Auditing is currently enabled but will be disabled. */
+
                /*
-                * Insert the special drain record in the queue.
+                * Disable auditing now so nothing more is added while the
+                * audit worker thread is draining the audit record queue.
                 */
+               audit_enabled = 0;
+
+               /*
+                * Drain the auditing queue by inserting a drain record at the
+                * end of the queue and waiting for the audit worker thread
+                * to find this record and signal that it is done before
+                * we close the audit trail.
+                */
+               audit_q_draining = 1;
                while (audit_q_len >= audit_qctrl.aq_hiwater)
                        cv_wait(&audit_watermark_cv, &audit_mtx);
                TAILQ_INSERT_TAIL(&audit_q, &audit_drain_kar, k_q);
                audit_q_len++;
                cv_signal(&audit_worker_cv);
-
-               /*
-                * Wait for the audit worker thread to signal it is done.
-                */
-               while (audit_draining)
-                       cv_wait(&audit_drain_cv, &audit_mtx);
-
-               audit_vp = NULL;
        }
+
+       /* If the audit queue is draining then wait here until it's done. */
+       while (audit_q_draining)
+               cv_wait(&audit_drain_cv, &audit_mtx);
        mtx_unlock(&audit_mtx);
 
+
+       /*
+        * Rotate the vnode/cred, and clear the rotate flag so that we will
+        * send a rotate trigger if the new file fills.
+        */
+       AUDIT_WORKER_SX_XLOCK();
+       old_audit_cred = audit_ctx.vc_ucred;
+       old_audit_vp = audit_vp;
+       audit_ctx.vc_ucred = cred;
+       audit_vp = vp;
+       audit_file_rotate_wait = 0;
+       audit_enabled = (audit_vp != NULL);
+       AUDIT_WORKER_SX_XUNLOCK();
+
        /*
         * If there was an old vnode/credential, close and free.
         */
index 53f457741bba52a237dbccb5df548d2054cecc38..211d741b9f98195940efd5dba7874677f252715d 100644 (file)
@@ -40,6 +40,7 @@ DATAFILES = \
 PRIVATE_DATAFILES = \
        codesign.h \
        content_protection.h \
+       decmpfs.h \
        disklabel.h \
        fileport.h \
        fsctl.h \
@@ -47,6 +48,7 @@ PRIVATE_DATAFILES = \
        fslog.h \
        imgsrc.h \
        ipcs.h \
+       kas_info.h \
        shm_internal.h \
        spawn_internal.h \
        tree.h \
@@ -66,7 +68,7 @@ KERNELFILES = \
        errno.h ev.h event.h fcntl.h file.h filio.h \
        ioccom.h ioctl.h ipc.h \
        ioctl_compat.h  kernel.h kernel_types.h kern_event.h lctx.h lock.h lockf.h \
-       kauth.h kdebug.h  md5.h kern_callout.h kern_control.h kern_memorystatus.h imgact.h malloc.h namei.h \
+       kauth.h kdebug.h  md5.h kern_control.h kern_memorystatus.h imgact.h malloc.h namei.h \
        mman.h mbuf.h mount.h netport.h param.h paths.h \
        proc.h  queue.h random.h resource.h \
        sbuf.h posix_sem.h posix_shm.h sem.h shm.h \
@@ -120,7 +122,7 @@ INSTALL_MI_DIR = sys
 
 EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} linker_set.h bsdtask_info.h pthread_internal.h filedesc.h pipe.h resourcevar.h semaphore.h \
                                                                vnode_internal.h proc_internal.h file_internal.h mount_internal.h \
-                                                               uio_internal.h tree.h
+                                                               uio_internal.h tree.h munge.h
 
 EXPORT_MI_GEN_LIST = syscall.h sysproto.h
 
@@ -152,12 +154,12 @@ sysproto.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS)
 MAKE_POSIX_AVAILABILITY = $(SRCROOT)/bsd/sys/make_posix_availability.sh
 _posix_availability.h: $(MAKE_POSIX_AVAILABILITY)
        @echo "Generating bsd/sys/$@"
-       $(_v)$(MAKE_POSIX_AVAILABILITY) $@
+       $(_v)$(MAKE_POSIX_AVAILABILITY) "$@"
 
 MAKE_SYMBOL_ALIASING = $(SRCROOT)/bsd/sys/make_symbol_aliasing.sh
 _symbol_aliasing.h: $(MAKE_SYMBOL_ALIASING)
        @echo "Generating bsd/sys/$@"
-       $(_v)$(MAKE_SYMBOL_ALIASING) $@
+       $(_v)$(MAKE_SYMBOL_ALIASING) "$(SDKROOT)" "$@"
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 42a8b7673fc3e21f25637e0d1cac4c9482828030..e33ad2b8688385dc85eb9b278882a702aaa61c5e 100644 (file)
@@ -53,8 +53,6 @@
 #define FSOPT_EXCHANGE_DATA_ONLY 0x0000010
 #endif
 
-
-
 /* we currently aren't anywhere near this amount for a valid
  * fssearchblock.sizeofsearchparams1 or fssearchblock.sizeofsearchparams2
  * but we put a sanity check in to avoid abuse of the value passed in from
index 7b7b566108b9403b5704afe42750c8768b9d8861..416e4b475c616c8d35f8f1e67e4bb1cf07e2f135 100644 (file)
@@ -107,7 +107,7 @@ extern uint32_t vnode_vid(void *vp);
 
 extern int fill_procregioninfo(task_t t, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vp, uint32_t *vid);
 void fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo);
-int fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_internal * ptinfo, void *, int *);
+int fill_taskthreadinfo(task_t task, uint64_t thaddr, int thuniqueid, struct proc_threadinfo_internal * ptinfo, void *, int *);
 int fill_taskthreadlist(task_t task, void * buffer, int thcount);
 int get_numthreads(task_t);
 void bsd_getthreadname(void *uth, char* buffer);
index 08216cac0e1e898b9eb60eb073a8d4222019eeed..43af7fb1a44dd2e086ed8e42f770e95dd652a749 100644 (file)
@@ -91,6 +91,8 @@
 #define        B_IOSTREAMING   0x00001000      /* sequential access pattern detected */
 #define B_THROTTLED_IO 0x00002000      /* low priority I/O */
 #define B_ENCRYPTED_IO 0x00004000      /* Encrypted I/O */
+#define B_STATICCONTENT 0x00008000     /* Buffer is likely to remain unaltered */
+
 /*
  * make sure to check when adding flags that
  * that the new flags don't overlap the definitions
@@ -1033,35 +1035,105 @@ void   buf_set_redundancy_flags(buf_t, uint32_t);
  */
 bufattr_t buf_attr(buf_t);
 
+/*!
+ @function buf_markstatic
+ @abstract Mark a buffer as being likely to contain static data.
+ @param bp Buffer to mark.
+ @return void.
+ */
+ void buf_markstatic(buf_t);
+
+/*!
+ @function buf_static
+ @abstract Check if a buffer contains static data.
+ @param bp Buffer to test.
+ @return Nonzero if buffer has static data, 0 otherwise.
+ */
+int    buf_static(buf_t);
+
 #ifdef KERNEL_PRIVATE
 void   buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void (**)(buf_t, void *), void **);
 
+bufattr_t bufattr_alloc(void);
+
+void bufattr_free(bufattr_t bap);
 
 /*!
- @function buf_getcpaddr
- @abstract Set the address of cp_entry on a buffer.
- @param bp Buffer whose cp entry value has to be set
+ @function bufattr_cpaddr
+ @abstract Get the address of cp_entry on a buffer.
+ @param bap Buffer Attribute whose cp_entry to get.
+ @return int.
+ */
+void *bufattr_cpaddr(bufattr_t);
+
+/*!
+ @function bufattr_cpoff
+ @abstract Gets the file offset on the buffer.
+ @param bap Buffer Attribute whose file offset value is used
  @return void.
  */
-void buf_setcpaddr(buf_t, void *);
+uint64_t bufattr_cpoff(bufattr_t);
+
 
 /*!
- @function buf_getcpaddr
- @abstract Get the address of cp_entry on a buffer.
- @param bp Buffer whose error value to set.
- @return int.
+ @function bufattr_setcpaddr
+ @abstract Set the address of cp_entry on a buffer attribute.
+ @param bap Buffer Attribute whose cp entry value has to be set
+ @return void.
+ */
+void bufattr_setcpaddr(bufattr_t, void *);
+
+/*!
+ @function bufattr_setcpoff
+ @abstract Set the file offset for a content protected I/O on 
+ a buffer attribute.
+ @param bap Buffer Attribute whose cp file offset has to be set
+ @return void.
  */
-void *buf_getcpaddr(buf_t);
+void bufattr_setcpoff(bufattr_t, uint64_t);
 
 /*!
- @function buf_throttled
+ @function bufattr_rawencrypted
+ @abstract Check if a buffer contains raw encrypted data.
+ @param bap Buffer attribute to test.
+ @return Nonzero if buffer has raw encrypted data, 0 otherwise.
+ */
+int bufattr_rawencrypted(bufattr_t bap);
+
+/*!
+ @function bufattr_throttled
  @abstract Check if a buffer is throttled.
  @param bap Buffer attribute to test.
  @return Nonzero if the buffer is throttled, 0 otherwise.
  */
 int bufattr_throttled(bufattr_t bap);
-#endif /* KERNEL_PRIVATE */
 
+/*!
+ @function bufattr_nocache
+ @abstract Check if a buffer has nocache attribute.
+ @param bap Buffer attribute to test.
+ @return Nonzero if the buffer is not cached, 0 otherwise.
+ */
+int bufattr_nocache(bufattr_t bap);
+
+/*!
+ @function bufattr_meta
+ @abstract Check if a buffer has meta attribute.
+ @param bap Buffer attribute to test.
+ @return Nonzero if the buffer has meta attribute, 0 otherwise.
+ */
+
+int bufattr_meta(bufattr_t bap);
+
+/*!
+ @function bufattr_delayidlesleep
+ @abstract Check if a buffer is marked to delay idle sleep on disk IO.
+ @param bap Buffer attribute to test.
+ @return Nonzero if the buffer is marked to delay idle sleep on disk IO, 0 otherwise.
+ */
+int bufattr_delayidlesleep(bufattr_t bap);
+
+#endif /* KERNEL_PRIVATE */
 
 __END_DECLS
 
index 2d259ac2a20f4aff239e2fa52ea1bab0e83a1247..016632623e784d4fe3c07085c53e365bbf4396f8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/buf.h>
 #include <sys/lock.h>
 
-
 #define NOLIST ((struct buf *)0x87654321)
 
 /*
  * Attributes of an I/O to be used by lower layers
  */
 struct bufattr {
+#if CONFIG_PROTECT
+       struct cprotect *ba_cpentry;    /* address of cp_entry */
+       uint64_t ba_cp_file_off;        /* rounded file offset. See buf_setcpoff() for more comments */
+#endif
        uint64_t ba_flags;      /* flags. Some are only in-use on embedded devices */
 };
 
@@ -132,9 +135,6 @@ struct buf {
 #ifdef BUF_MAKE_PRIVATE
        buf_t   b_data_store;
 #endif
-#if CONFIG_PROTECT
-       struct cprotect *b_cpentry;     /* address of cp_entry, to be passed further down  */
-#endif /* CONFIG_PROTECT */
        struct bufattr b_attr;
 #ifdef JOE_DEBUG
         void * b_owner;
@@ -160,7 +160,7 @@ struct buf {
 #define b_cliodone   b_wcred
 
 /*
- * These flags are kept in b_lflags... 
+ * These flags are kept in b_lflags...
  * buf_mtxp must be held before examining/updating
  */
 #define        BL_BUSY         0x00000001      /* I/O in progress. */
@@ -171,20 +171,20 @@ struct buf {
 #define BL_EXTERNAL    0x00000040
 #define BL_WAITSHADOW  0x00000080
 #define BL_IOBUF_ALLOC 0x00000100
+#define BL_WANTED_REF  0x00000200
 
 /*
  * Parameters for buffer cache garbage collection 
  */
 #define BUF_STALE_THRESHHOLD   30      /* Collect if untouched in the last 30 seconds */
-#define BUF_MAX_GC_COUNT       1024    /* Generally 6-8 MB */
-#define BUF_MAX_GC_BATCH_SIZE  128     /* Under a single grab of the lock */
+#define BUF_MAX_GC_BATCH_SIZE  64      /* Under a single grab of the lock */
 
 /*
  * mask used by buf_flags... these are the readable external flags
  */
 #define BUF_X_RDFLAGS (B_PHYS | B_RAW | B_LOCKED | B_ASYNC | B_READ | B_WRITE | B_PAGEIO |\
                       B_META | B_CLUSTER | B_DELWRI | B_FUA | B_PASSIVE | B_IOSTREAMING | B_THROTTLED_IO |\
-                      B_ENCRYPTED_IO)
+                      B_ENCRYPTED_IO | B_STATICCONTENT)
 /*
  * mask used by buf_clearflags/buf_setflags... these are the writable external flags
  */
@@ -230,7 +230,11 @@ struct buf {
  * ba_flags (Buffer Attribute flags)
  * Some of these may be in-use only on embedded devices.
  */
-#define BA_THROTTLED_IO         0x000000002
+#define BA_RAW_ENCRYPTED_IO     0x00000001
+#define BA_THROTTLED_IO         0x00000002
+#define BA_DELAYIDLESLEEP       0x00000004     /* Process is marked to delay idle sleep on disk IO */
+#define BA_NOCACHE             0x00000008
+#define BA_META                        0x00000010
 
 
 extern int niobuf_headers;             /* The number of IO buffer headers for cluster IO */
@@ -281,10 +285,18 @@ errno_t   buf_acquire(buf_t, int, int, int);
 int    count_busy_buffers(void);
 int    count_lock_queue(void);
 
+int buf_flushdirtyblks_skipinfo (vnode_t, int, int, const char *);
+void buf_wait_for_shadow_io (vnode_t, daddr64_t);
+
 #ifdef BUF_MAKE_PRIVATE
 errno_t        buf_make_private(buf_t bp);
 #endif
 
+#ifdef CONFIG_PROTECT
+void buf_setcpaddr(buf_t, struct cprotect *);
+void buf_setcpoff (buf_t, uint64_t);
+#endif
+
 __END_DECLS
 
 
index 7076ef5723f2094810751f37a2f20e7c2cf9da82..2cbc7fef6e3399b39d45cb9fd162ed0cf3fdf54a 100644 (file)
 #define        __END_DECLS
 #endif
 
+/* This SDK is designed to work with clang and specific versions of
+ * gcc >= 4.0 with Apple's patch sets */
+#if !defined(__GNUC__) || __GNUC__ < 4
+#warning "Unsupported compiler detected"
+#endif
+
 /*
  * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
  * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
 #endif /* !NO_ANSI_KEYWORDS */
 #endif /* !(__STDC__ || __cplusplus) */
 
-/*
- * GCC1 and some versions of GCC2 declare dead (non-returning) and
- * pure (no side effects) functions using "volatile" and "const";
- * unfortunately, these then cause warnings under "-ansi -pedantic".
- * GCC2 uses a new, peculiar __attribute__((attrs)) style.  All of
- * these work for GNU C++ (modulo a slight glitch in the C++ grammar
- * in the distribution version of 2.5.5).
+#define __dead2                __attribute__((noreturn))
+#define __pure2                __attribute__((const))
+
+/* __unused denotes variables and functions that may not be used, preventing
+ * the compiler from warning about it if not used.
  */
-#if defined(__MWERKS__) && (__MWERKS__ > 0x2400)
-       /* newer Metrowerks compilers support __attribute__() */
-#elif __GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 5
-#define        __dead2         __attribute__((__noreturn__))
-#define        __pure2         __attribute__((__const__))
-#if __GNUC__ == 2 && __GNUC_MINOR__ >= 5 && __GNUC_MINOR__ < 7
-#define        __unused        /* no attribute */
-#else
-#define        __unused        __attribute__((__unused__))
-#endif
-#else
-#define        __attribute__(x)        /* delete __attribute__ if non-gcc or gcc1 */
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
-/* __dead and __pure are depreciated.  Use __dead2 and __pure2 instead */
-#define        __dead          __volatile
-#define        __pure          __const
-#endif
-#endif
+#define __unused       __attribute__((unused))
+
+/* __used forces variables and functions to be included even if it appears
+ * to the compiler that they are not used (and would thust be discarded).
+ */
+#define __used         __attribute__((used))
+
+/* __deprecated causes the compiler to produce a warning when encountering
+ * code using the deprecated functionality.  This may require turning on
+ * such wardning with the -Wdeprecated flag.
+ */
+#define __deprecated   __attribute__((deprecated))
+
+/* __unavailable causes the compiler to error out when encountering
+ * code using the tagged function of variable.
+ */
+#define __unavailable  __attribute__((unavailable))
 
 /* Delete pseudo-keywords wherever they are not available or needed. */
 #ifndef __dead
 #define        __dead
 #define        __pure
 #endif
-#ifndef __dead2
-#define        __dead2
-#define        __pure2
-#define        __unused
-#endif
-
-#if defined(__GNUC__) && __GNUC__ >= 4
-#define __used __attribute__((__used__))
-#else
-#define __used
-#endif
 
 /*
- * GCC 2.95 provides `__restrict' as an extension to C90 to support the
- * C99-specific `restrict' type qualifier.  We happen to use `__restrict' as
- * a way to define the `restrict' type qualifier without disturbing older
- * software that is unaware of C99 keywords.
+ * We use `__restrict' as a way to define the `restrict' type qualifier
+ * without disturbing older software that is unaware of C99 keywords.
  */
-#if !(__GNUC__ == 2 && __GNUC_MINOR__ == 95)
 #if __STDC_VERSION__ < 199901
 #define __restrict
 #else
 #define __restrict     restrict
 #endif
-#endif
 
 /*
  * Compiler-dependent macros to declare that functions take printf-like
  * mismatch between the format string and subsequent function parameter
  * types.
  */
-#if __GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 7
 #define __printflike(fmtarg, firstvararg) \
                __attribute__((__format__ (__printf__, fmtarg, firstvararg)))
 #define __scanflike(fmtarg, firstvararg) \
                __attribute__((__format__ (__scanf__, fmtarg, firstvararg)))
-#else
-#define __printflike(fmtarg, firstvararg)
-#define __scanflike(fmtarg, firstvararg)
-#endif
 
 #define __IDSTRING(name,string) static const char name[] __used = string
 
 #define __PROJECT_VERSION(s) __IDSTRING(project_version,s)
 #endif
 
+/* Source compatibility only, ID string not emitted in object file */
+#ifndef __FBSDID
+#define __FBSDID(s) 
+#endif
+
+
 /*
  * COMPILATION ENVIRONMENTS -- see compat(5) for additional detail
  *
 #else
 #include <sys/_symbol_aliasing.h>
 
-#if defined(__IPHONE_OS_VERSION_MIN_REQUIRED)
+#if defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__)
 #define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)   __DARWIN_ALIAS_STARTING_IPHONE_##_iphone(x)
 #elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
 #define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)   __DARWIN_ALIAS_STARTING_MAC_##_mac(x)
 #define _POSIX_C_SOURCE         198808L
 #endif
 
-/*
- * Deprecation macro
- */
-#if defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
-#define __deprecated __attribute__((deprecated))
-#define __unavailable __attribute__((unavailable))
-#else
-#define __deprecated /* nothing */
-#define __unavailable /* nothing */
-#endif
-
 /* POSIX C deprecation macros */
 #ifdef KERNEL
 #define __POSIX_C_DEPRECATED(ver)
                                && (__STDC_VERSION__-0 < 199901L) \
                                && !defined(__GNUG__))
 
-/*
- * Long double compatibility macro allow selecting variant symbols based
- * on the old (compatible) 64-bit long doubles, or the new 128-bit
- * long doubles.  This applies only to ppc; i386 already has long double
- * support, while ppc64 doesn't have any backwards history.
- */
-#if   defined(__i386__) || defined(__x86_64__)
-#  define      __DARWIN_LDBL_COMPAT(x) /* nothing */
-#  define      __DARWIN_LDBL_COMPAT2(x) /* nothing */
-#  define      __DARWIN_LONG_DOUBLE_IS_DOUBLE  0
-#else
-#  error Unknown architecture
-#endif
-
 /*****************************************
  *  Public darwin-specific feature macros
  *****************************************/
 #define _DARWIN_FEATURE_64_BIT_INODE           1
 #endif
 
-/*
- * _DARWIN_FEATURE_LONG_DOUBLE_IS_DOUBLE indicates when the long double type
- * is the same as the double type (ppc and arm only)
- */
-#if __DARWIN_LONG_DOUBLE_IS_DOUBLE
-#define _DARWIN_FEATURE_LONG_DOUBLE_IS_DOUBLE  1
-#endif
-
 /*
  * _DARWIN_FEATURE_64_ONLY_BIT_INODE indicates that the ino_t type may only
  * be 64-bit; there is no support for 32-bit ino_t when this macro is defined
index e72c25044f6e557d45c076c7ff73734a62f19a3f..5894d982cf544d5be96cd0b5f9c3082380bd5b43 100644 (file)
@@ -57,6 +57,8 @@ __BEGIN_DECLS
 
 /* code sign operations */
 int csops(pid_t pid, unsigned int  ops, void * useraddr, size_t usersize);
+/* works only with CS_OPS_PIDPATH and CS_OPS_ENTITLEMENTS_BLOB */
+int csops_audittoken(pid_t pid, unsigned int  ops, void * useraddr, size_t usersize, audit_token_t * token);
 
 __END_DECLS
 
index 39e4fef37a864e7ce860447195f8205a26e78011..bd6e518b0309b57413d27fdaa14d26d3c358fae2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,6 +93,12 @@ struct vnode;
 #define        D_TTY   3
 
 #ifdef KERNEL
+/*
+ * Flags for d_type (squeezed into the top half of d_type).
+ */
+#define        D_TYPEMASK      0xffff
+#define        D_TRACKCLOSE    0x00010000      /* track all closes */
+
 /* 
  * Device switch function types.
  */
@@ -166,6 +172,7 @@ d_write_t   nowrite;
 
 #ifdef KERNEL_PRIVATE
 extern struct bdevsw bdevsw[];
+extern int (*bootcache_contains_block)(dev_t device, u_int64_t blkno);
 #endif /* KERNEL_PRIVATE */
 
 /*
index bebe3bb4308735b47b03f358c76a943d850ff376..0dda075ac9e7ec31c1a60f7c4990558ca6aed8ea 100644 (file)
@@ -38,9 +38,13 @@ extern "C" {
 #include <sys/cdefs.h>
 #include <sys/content_protection.h>
 #include <sys/kernel_types.h>
+#include <crypto/aes.h>
 
-#define CP_KEYSIZE 32                          /* 8x4 = 32, 32x8 = 256 */
-#define CP_WRAPPEDKEYSIZE  40          /* 2x4 = 8, 8x8 = 64 */
+#define CP_IV_KEYSIZE 20                       /* 16x8 = 128, but SHA1 pushes 20 bytes so keep space for that */
+#define CP_MAX_KEYSIZE 32              /* 8x4 = 32, 32x8 = 256 */
+#define CP_MAX_WRAPPEDKEYSIZE  128     /* The size of the largest allowed key */
+#define CP_INITIAL_WRAPPEDKEYSIZE 40
+#define CP_V2_WRAPPEDKEYSIZE 40                /* Size of the wrapped key in a v2 EA */
 
 /* lock events from AppleKeyStore */
 #define CP_LOCKED_STATE 0              /* Device is locked */
@@ -53,15 +57,18 @@ extern "C" {
 #define CP_NEEDS_KEYS          0x1             /* File needs persistent keys */
 #define CP_KEY_FLUSHED         0x2             /* File's unwrapped key has been purged from memory */
 #define CP_NO_XATTR                    0x4             /* Key info has not been saved as EA to the FS */
+#define CP_OFF_IV_ENABLED      0x8             /* Only go down relative IV route if this flag is set */
+
+#define CP_RELOCATION_INFLIGHT 0x10    /* File with offset IVs is in the process of being relocated. */
 
 /* Content Protection VNOP Operation flags */
 #define CP_READ_ACCESS         0x1
 #define CP_WRITE_ACCESS 0x2
 
 #define CONTENT_PROTECTION_XATTR_NAME  "com.apple.system.cprotect"
-#define CP_CURRENT_MAJOR_VERS 2
-#define CP_CURRENT_MINOR_VERS 0
-
+#define CP_NEW_MAJOR_VERS 4
+#define CP_PREV_MAJOR_VERS 2
+#define CP_MINOR_VERS 0
 
 typedef struct cprotect *cprotect_t;
 typedef struct cp_wrap_func *cp_wrap_func_t;
@@ -73,18 +80,32 @@ typedef struct cnode * cnode_ptr_t;
 struct hfsmount;
 
 /* The wrappers are invoked by the AKS kext */
-typedef int wrapper_t(uint32_t properties, void *key_bytes, size_t key_length, void *wrapped_data, size_t *wrapped_length);
+typedef int wrapper_t(uint32_t properties, uint64_t file_id, void *key_bytes, size_t key_length, void *wrapped_data, size_t *wrapped_length);
 typedef        int unwrapper_t(uint32_t properties, void *wrapped_data, size_t wrapped_data_length, void *key_bytes, size_t *key_length);
 
 /* 
  * Runtime-only structure containing the content protection status 
  * for the given file.  This is contained within the cnode 
+ * This is passed down to IOStorageFamily via the bufattr struct
+ *
+ ******************************************************
+ * Some Key calculation information for offset based IV
+ ******************************************************
+ * Kf  = original 256 bit per file key
+ * Kiv = SHA1(Kf), use full Kf, but truncate Kiv to 128 bits
+ * Kiv can be cached in the cprotect, so it only has to be calculated once for the file init
+ *
+ * IVb = Encrypt(Kiv, offset)
+ *
  */
 struct cprotect {
-       uint8_t         cp_cache_key[CP_KEYSIZE];
-       uint8_t         cp_persistent_key[CP_WRAPPEDKEYSIZE];
        uint32_t        cp_flags;
        uint32_t        cp_pclass;
+       aes_encrypt_ctx cp_cache_iv_ctx;
+       uint32_t        cp_cache_key_len;
+       uint8_t         cp_cache_key[CP_MAX_KEYSIZE];
+       uint32_t        cp_persistent_key_len;
+       uint8_t         cp_persistent_key[];
 };
 
 struct cp_wrap_func {
@@ -94,7 +115,8 @@ struct cp_wrap_func {
 
 struct cp_global_state {
        uint8_t wrap_functions_set;
-       uint8_t lock_state;
+       uint8_t lock_state;
+       u_int16_t reserved;
 };
 
 /*
@@ -103,13 +125,27 @@ struct cp_global_state {
  * little-endian on-disk.  This means they must be endian swapped to
  * L.E on getxattr() and converted to LE on setxattr().        
  */
-struct cp_xattr {
-       u_int16_t       xattr_major_version;
-       u_int16_t       xattr_minor_version;
-       u_int32_t       flags;
-       u_int32_t       persistent_class;
-       u_int32_t       key_size;
-       uint8_t         persistent_key[CP_WRAPPEDKEYSIZE];      
+struct cp_xattr_v2 {
+       u_int16_t xattr_major_version;
+       u_int16_t xattr_minor_version;
+       u_int32_t flags;
+       u_int32_t persistent_class;
+       u_int32_t key_size;
+       uint8_t   persistent_key[CP_V2_WRAPPEDKEYSIZE];
+};
+
+struct cp_xattr_v4 {
+       u_int16_t xattr_major_version;
+       u_int16_t xattr_minor_version;
+       u_int32_t flags;
+       u_int32_t persistent_class;
+       u_int32_t key_size;
+       u_int32_t reserved1;
+       u_int32_t reserved2;
+       u_int32_t reserved3;
+       u_int32_t reserved4;
+       u_int32_t reserved5;
+       uint8_t   persistent_key[CP_MAX_WRAPPEDKEYSIZE];
 };
 
 /* Same is true for the root EA, all fields must be written little endian. */
@@ -130,20 +166,31 @@ struct cp_root_xattr {
  */
 int cp_vnode_getclass(vnode_t, int *);
 int cp_vnode_setclass(vnode_t, uint32_t);
+int cp_vnode_transcode(vnode_t);
 
 int cp_key_store_action(int);
 int cp_register_wraps(cp_wrap_func_t);
 
 int cp_entry_init(cnode_ptr_t, struct mount *);
-int cp_entry_create_keys(cnode_ptr_t);
-void cp_entry_destroy(cnode_ptr_t);
+int cp_entry_create_keys(struct cprotect **entry_ptr, struct cnode *dcp, struct hfsmount *hfsmp, 
+               uint32_t input_class, uint32_t fileid, mode_t cmode);
+int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp);
+void cp_entry_destroy(struct cprotect **entry_ptr);
 
 cnode_ptr_t cp_get_protected_cnode(vnode_t);
-int cp_handle_vnop(cnode_ptr_t, int);
+int cp_handle_vnop(vnode_t, int, int);
 int cp_fs_protected (mount_t);
 int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr);
 int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr);
-int cp_handle_relocate (cnode_ptr_t cp);
+int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options);
+int cp_update_mkb (struct cprotect *entry, uint32_t fileid);
+int cp_handle_relocate (cnode_ptr_t cp, struct hfsmount *hfsmp);
+int cp_handle_open(struct vnode *vp, int mode);
+int cp_get_root_major_vers (struct vnode *vp, uint32_t *level);
+
+#if 0
+int cp_isdevice_locked (void);
+#endif
 
 #endif /* KERNEL_PRIVATE */
 
index f8a61d2888b599d62e5627c46a3916a29565aeef..8cef87b69bc91f3238401c98676cd1ea8f814a7f 100644 (file)
@@ -28,6 +28,8 @@
 #ifndef _SYS_DECMPFS_H_
 #define _SYS_DECMPFS_H_ 1
 
+#include <sys/kernel_types.h>
+
 #define MAX_DECMPFS_XATTR_SIZE 3802
 
 /*
@@ -61,7 +63,7 @@ enum {
     
     /* additional types defined in AppleFSCompression project */
     
-    CMP_MAX         = 255
+    CMP_MAX         = 255 /* Highest compression_type supported */
 };
 
 typedef struct {
@@ -71,25 +73,19 @@ typedef struct {
 
 #if KERNEL
 
-#include <kern/locks.h>
+#if XNU_KERNEL_PRIVATE
 
-#if defined(__i386__) || defined(__x86_64__)
-#define DECMPFS_SUPPORTS_SWAP64 1
-/* otherwise, no OSCompareAndSwap64, so use a mutex */
-#endif
+#include <kern/locks.h>
 
 typedef struct decmpfs_cnode {
-       uint8_t cmp_state;
-       uint8_t cmp_minimal_xattr;       /* if non-zero, this file's com.apple.decmpfs xattr contained only the minimal decmpfs_disk_header */
-       uint32_t cmp_type;
-       uint32_t lockcount;
-       void    *lockowner;              /* cnode's lock owner (if a thread is currently holding an exclusive lock) */
+    uint8_t cmp_state;
+    uint8_t cmp_minimal_xattr;       /* if non-zero, this file's com.apple.decmpfs xattr contained only the minimal decmpfs_disk_header */
+    uint32_t cmp_type;
+    uint32_t lockcount;
+    void    *lockowner;              /* cnode's lock owner (if a thread is currently holding an exclusive lock) */
     uint64_t uncompressed_size __attribute__((aligned(8)));
+    uint64_t decompression_flags;
     lck_rw_t compressed_data_lock;
-#if !DECMPFS_SUPPORTS_SWAP64
-    /* we need a lock since we can't atomically fetch/set 64 bits */
-    lck_mtx_t uncompressed_size_mtx;
-#endif /* !DECMPFS_SUPPORTS_SWAP64 */
 } decmpfs_cnode;
 
 /* return values from decmpfs_file_is_compressed */
@@ -128,19 +124,32 @@ int decmpfs_update_attributes(vnode_t vp, struct vnode_attr *vap);
 errno_t decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmpfs_cnode *cp);
 errno_t decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_cnode *cp);
 
+#endif /* XNU_KERNEL_PRIVATE */
+
 /* types shared between the kernel and kexts */
 typedef int (*decmpfs_validate_compressed_file_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr);
 typedef void (*decmpfs_adjust_fetch_region_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr, off_t *offset, user_ssize_t *size);
 typedef int (*decmpfs_fetch_uncompressed_data_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read);
 typedef int (*decmpfs_free_compressed_data_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr);
+typedef uint64_t (*decmpfs_get_decompression_flags_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr); // returns flags from the DECMPFS_FLAGS enumeration below
+
+enum {
+    DECMPFS_FLAGS_FORCE_FLUSH_ON_DECOMPRESS = 1 << 0,
+};
+
+/* Versions that are supported for binary compatibility */
+#define DECMPFS_REGISTRATION_VERSION_V1 1
+#define DECMPFS_REGISTRATION_VERSION_V3 3
+
+#define DECMPFS_REGISTRATION_VERSION (DECMPFS_REGISTRATION_VERSION_V3)
 
-#define DECMPFS_REGISTRATION_VERSION 1
 typedef struct {
     int                                   decmpfs_registration;
     decmpfs_validate_compressed_file_func validate;
     decmpfs_adjust_fetch_region_func      adjust_fetch;
     decmpfs_fetch_uncompressed_data_func  fetch;
     decmpfs_free_compressed_data_func     free_data;
+    decmpfs_get_decompression_flags_func  get_flags;
 } decmpfs_registration;
 
 /* hooks for kexts to call */
index 2bdd79a55337be0b3601c07719a95025b5cac7a4..ba454ab6979b591cd10cebf4992fe791695f39fc 100644 (file)
@@ -149,16 +149,6 @@ typedef struct
 #define DKIOCGETPHYSICALBLOCKSIZE             _IOR('d', 77, uint32_t)
 #define DKIOCGETCOMMANDPOOLSIZE               _IOR('d', 78, uint32_t)
 
-typedef struct
-{
-    uint64_t               offset;
-    uint64_t               length;
-
-    uint8_t                reserved0128[16];       /* reserved, clear to zero */
-} dk_discard_t __attribute__ ((deprecated));
-
-#define DKIOCDISCARD                          _IOW('d', 31, dk_discard_t)
-
 #ifdef KERNEL
 #define DK_FEATURE_FORCE_UNIT_ACCESS          0x00000001
 
@@ -182,10 +172,22 @@ typedef struct
 #define DKIOCLOCKPHYSICALEXTENTS              _IO('d', 81)
 #define DKIOCGETPHYSICALEXTENT                _IOWR('d', 82, dk_physical_extent_t)
 #define DKIOCUNLOCKPHYSICALEXTENTS            _IO('d', 83)
-
 #ifdef PRIVATE
-#define _DKIOCGETMIGRATIONUNITBYTESIZE        _IOR('d', 85, uint32_t)
+typedef struct _dk_cs_pin {
+       dk_extent_t     cp_extent;
+       int64_t         cp_flags;
+} _dk_cs_pin_t;
+#define _DKIOCSPINDISCARDDATA (1 << 0)
+#define _DKIOCCSPINEXTENT                     _IOW('d', 199, _dk_cs_pin_t)
+#define _DKIOCCSUNPINEXTENT                   _IOW('d', 200, _dk_cs_pin_t)
+#define _DKIOCGETMIGRATIONUNITBYTESIZE        _IOR('d', 201, uint32_t)
 #endif /* PRIVATE */
 #endif /* KERNEL */
 
+#ifdef PRIVATE
+#ifdef TARGET_OS_EMBEDDED
+#define _DKIOCSETSTATIC                       _IO('d', 84)
+#endif /* TARGET_OS_EMBEDDED */
+#endif /* PRIVATE */
+
 #endif /* _SYS_DISK_H_ */
index 9edf0db5ebaacff8db089e0e5972e61391737a2b..9f6bd965fd4ff41da3c413911077896abd0ab5f2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2006, 2012 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -120,6 +120,8 @@ extern struct       domain localdomain;
 __BEGIN_DECLS
 extern void    net_add_domain(struct domain *dp);
 extern int     net_del_domain(struct domain *);
+extern int     domain_proto_mtx_lock(void);
+extern void    domain_proto_mtx_unlock(int locked);
 __END_DECLS
 
 #define DOMAIN_SET(domain_set) 
index d81a48a4f282c20c0e3cb436109edce5a95f8fc3..cffaefdefaf6299b52534da8fcb96c1433fafb75 100644 (file)
@@ -2423,7 +2423,7 @@ extern void (*dtrace_cpu_init)(processorid_t);
 extern void (*dtrace_modload)(struct modctl *);
 extern void (*dtrace_modunload)(struct modctl *);
 #else
-extern int (*dtrace_modload)(struct kmod_info *);
+extern int (*dtrace_modload)(struct kmod_info *, uint32_t);
 extern int (*dtrace_modunload)(struct kmod_info *);
 #endif /* __APPLE__ */
 extern void (*dtrace_helpers_cleanup)(proc_t*);
@@ -2442,8 +2442,6 @@ extern hrtime_t dtrace_gethrtime(void);
 extern void dtrace_sync(void);
 extern void dtrace_toxic_ranges(void (*)(uintptr_t, uintptr_t));
 extern void dtrace_xcall(processorid_t, dtrace_xcall_t, void *);
-extern void dtrace_vpanic(const char *, __va_list);
-extern void dtrace_panic(const char *, ...);
 
 extern int dtrace_safe_defer_signal(void);
 extern void dtrace_safe_synchronous_signal(void);
index 7f42cff5e07a306f2268445a1b7e4327f951c811..38614c300abe8f30d7802038ef87265b30adb9ca 100644 (file)
@@ -1353,6 +1353,7 @@ extern void dtrace_probe_error(dtrace_state_t *, dtrace_epid_t, int, int,
 extern int dtrace_assfail(const char *, const char *, int);
 extern int dtrace_attached(void);
 extern hrtime_t dtrace_gethrestime(void);
+extern void dtrace_isa_init(void);
 
 #ifdef __sparc
 extern void dtrace_flush_windows(void);
index 231c68eada9e0dfd9dd72eefbbf65dd1f4a6b547..8105a42b3624bf08f82218edd0b0e81031ef0680 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -256,7 +256,8 @@ __END_DECLS
 #endif
 
 #if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
-#define        ELAST           105             /* Must be equal largest errno */
+#define        EQFULL          106             /* Interface output queue is full */
+#define        ELAST           106             /* Must be equal largest errno */
 #endif
 
 #ifdef KERNEL
@@ -272,8 +273,10 @@ __END_DECLS
 #define ECVCERORR      256
 #define ECVPERORR      512
 #else /* BSD_KERNEL_PRIVATE */
-/* -5 and -6 are reserved for kernel internal use */
+/* -5, -6 and -7 and -106 are reserved for kernel internal use */
 #endif /* BSD_KERNEL_PRIVATE */
-
-#endif
+#ifdef PRIVATE
+#define        EQSUSPENDED     (-EQFULL)       /* Output queue is suspended */
+#endif /* PRIVATE */
+#endif /* KERNEL */
 #endif /* _SYS_ERRNO_H_ */
index 05b31174a5739e89da99b96344996abb0ea6c6da..d22d5efb2050c3e3afda7eeceb97dd88c066ee3f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
                                        /* (-11) unused */
 #define EVFILT_VM              (-12)   /* Virtual memory events */
 
-#define EVFILT_SYSCOUNT                12
+#ifdef PRIVATE
+#define EVFILT_SOCK            (-13)   /* Socket events */
+#endif /* PRIVATE */
+
+#define EVFILT_SYSCOUNT                13
 #define EVFILT_THREADMARKER    EVFILT_SYSCOUNT /* Internal use only */
 
 #pragma pack(4)
@@ -242,6 +246,18 @@ struct kevent64_s {
 #define        NOTE_SIGNAL     0x08000000              /* shared with EVFILT_SIGNAL */
 #define        NOTE_EXITSTATUS 0x04000000              /* exit status to be returned, valid for child process only */
 #define        NOTE_RESOURCEEND 0x02000000             /* resource limit reached, resource type returned */
+
+#if CONFIG_EMBEDDED
+/* 0x01000000  is reserved for future use */
+
+/* App states notification */
+#define        NOTE_APPACTIVE          0x00800000      /* app went to active state */
+#define        NOTE_APPBACKGROUND      0x00400000      /* app went to background */
+#define        NOTE_APPNONUI           0x00200000      /* app went to active with no UI */
+#define        NOTE_APPINACTIVE        0x00100000      /* app went to inactive state */
+#define NOTE_APPALLSTATES      0x00f00000
+#endif /* CONFIG_EMBEDDED */
+
 #define        NOTE_PDATAMASK  0x000fffff              /* mask for pid/signal */
 #define        NOTE_PCTRLMASK  (~NOTE_PDATAMASK)
 
@@ -265,6 +281,23 @@ struct kevent64_s {
 #define NOTE_NSECONDS  0x00000004              /* data is nanoseconds     */
 #define NOTE_ABSOLUTE  0x00000008              /* absolute timeout        */
                                                /* ... implicit EV_ONESHOT */
+#ifdef PRIVATE
+/*
+ * data/hint fflags for EVFILT_SOCK, shared with userspace.
+ *
+ */
+#define        NOTE_CONNRESET          0x00000001 /* Received RST */
+#define        NOTE_READCLOSED         0x00000002 /* Read side is shutdown */
+#define        NOTE_WRITECLOSED        0x00000004 /* Write side is shutdown */
+#define        NOTE_TIMEOUT            0x00000008 /* timeout: rexmt, keep-alive or persist */
+#define        NOTE_NOSRCADDR          0x00000010 /* source address not available */
+#define        NOTE_IFDENIED           0x00000020 /* interface denied connection */
+#define        NOTE_SUSPEND            0x00000040 /* output queue suspended */
+#define        NOTE_RESUME             0x00000080 /* output queue resumed */
+#define NOTE_KEEPALIVE         0x00000100 /* TCP Keepalive received */
+
+#endif /* PRIVATE */
+
 /*
  * data/hint fflags for EVFILT_MACHPORT, shared with userspace.
  *
@@ -382,8 +415,8 @@ extern void klist_init(struct klist *list);
 extern void    knote(struct klist *list, long hint);
 extern int     knote_attach(struct klist *list, struct knote *kn);
 extern int     knote_detach(struct klist *list, struct knote *kn);
-extern int     knote_link_wait_queue(struct knote *kn, struct wait_queue *wq); 
-extern void    knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq);
+extern int     knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql);  
+extern int     knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp);
 extern void    knote_fdclose(struct proc *p, int fd);
 extern void    knote_markstayqueued(struct knote *kn);
 
index f6cbe9d5ac83153dd0c3e0cc72a04e6f81da92f3..acd5f4c2fd7ce082985a98a4ed1a731990f01834 100644 (file)
@@ -179,6 +179,20 @@ typedef __darwin_pid_t     pid_t;
 #define        O_CLOEXEC       0x1000000       /* implicitly set FD_CLOEXEC */
 #endif
 
+#ifdef KERNEL
+#define FENCRYPTED     0x2000000
+#endif
+
+#ifdef KERNEL
+#define FSINGLE_WRITER 0x4000000       /* fcntl(F_SINGLE_WRITER, 1) */
+#endif
+
+/* Data Protection Flags */
+#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#define O_DP_GETRAWENCRYPTED   0x0001
+#endif
+
+
 #ifdef KERNEL
 /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
 #define        FFLAGS(oflags)  ((oflags) + 1)
@@ -270,16 +284,32 @@ typedef __darwin_pid_t    pid_t;
 
 #define        F_GETLKPID              66              /* get record locking information, per-process */
 
+/* See F_DUPFD_CLOEXEC below for 67 */
+
 #ifdef PRIVATE
-#define F_MOVEDATAEXTENTS      69      /* Swap only the data associated with two files */
+#define F_SETSTATICCONTENT             68              /* 
+                                                                                * indicate to the filesystem/storage driver that the content to be 
+                                                                                * written is usually static.  a nonzero value enables it, 0 disables it.
+                                                                                */
+#define F_MOVEDATAEXTENTS      69              /* Swap only the data associated with two files */
 #endif
 
 #define F_SETBACKINGSTORE      70      /* Mark the file as being the backing store for another filesystem */
 #define F_GETPATH_MTMINFO      71      /* return the full path of the FD, but error in specific mtmd circumstances */
 
+/* 72 is free.  It used to be F_GETENCRYPTEDDATA, which is now removed. */
+
 #define F_SETNOSIGPIPE         73      /* No SIGPIPE generated on EPIPE */
 #define F_GETNOSIGPIPE         74      /* Status of SIGPIPE for this fd */
 
+#define F_TRANSCODEKEY         75      /* For some cases, we need to rewrap the key for AKS/MKB */
+
+#define F_SINGLE_WRITER                76      /* file being written to a by single writer... if throttling enabled, writes */
+                                        /* may be broken into smaller chunks with throttling in between */
+
+#define F_GETPROTECTIONLEVEL   77      /* Get the protection version number for this filesystem */
+
+
 // FS-specific fcntl()'s numbers begin at 0x00010000 and go up
 #define FCNTL_FS_SPECIFIC_BASE  0x00010000
 
@@ -300,8 +330,9 @@ typedef __darwin_pid_t      pid_t;
 #define        F_WAIT          0x010           /* Wait until lock is granted */
 #define        F_FLOCK         0x020           /* Use flock(2) semantics for lock */
 #define        F_POSIX         0x040           /* Use POSIX semantics for lock */
-#define        F_PROV          0x080           /* Non-coelesced provisional lock */
+#define        F_PROV          0x080           /* Non-coalesced provisional lock */
 #define F_WAKE1_SAFE    0x100           /* its safe to only wake one waiter */
+#define        F_ABORT         0x200           /* lock attempt aborted (force umount) */
 #endif
 
 /*
@@ -386,7 +417,6 @@ struct flock {
        short   l_whence;       /* type of l_start */
 };
 
-
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 /*
  * advisory file read data type -
@@ -603,6 +633,8 @@ int     fileport_makeport(int, fileport_t*);
 int     fileport_makefd(fileport_t);
 #endif /* PRIVATE */
 int    openx_np(const char *, int, filesec_t);
+/* data-protected non-portable open(2) */
+int open_dprotected_np ( const char *, int, int, int, ...);
 int    flock(int, int);
 filesec_t filesec_init(void);
 filesec_t filesec_dup(filesec_t);
index bd3629144a47dedff8547820a775cd2acad688b0..dcf08f44814f7f74f64c87c5682e406e3a2eddf5 100644 (file)
@@ -85,23 +85,6 @@ struct posix_cred;
 typedef struct posix_cred *posix_cred_t;
 #endif /* !_KAUTH_CRED_T */
 
-#pragma pack(4)
-
-/* for the compat sake;  */
-struct extern_file {
-       LIST_ENTRY(extern_file) f_list; /* list of active files */
-       short   f_flag;         /* see fcntl.h */
-       short   f_type;         /* descriptor type */
-       short   f_count;        /* reference count */
-       short   f_msgcount;     /* references from message queue */
-       kauth_cred_t f_cred;    /* credentials associated with descriptor */
-       void * f_ops;
-       off_t   f_offset;
-       caddr_t f_data;         /* vnode or socket or SHM or semaphore */
-};
-
-#pragma pack()
-
 __BEGIN_DECLS
 #ifdef KERNEL
 int file_socket(int, socket_t *);
index 9fcb4d1f32f60b9aba17e32770ba2b7ba0424d4c..473415d1d399ad2b1c2f7ac35bf220e1d58855b5 100644 (file)
@@ -139,7 +139,6 @@ typedef enum {
 #define FG_NOSIGPIPE   0x40    /* don't deliver SIGPIPE with EPIPE return */
 
 struct fileglob {
-       LIST_ENTRY(fileglob) f_list;/* list of active files */
        LIST_ENTRY(fileglob) f_msglist;/* list of active files */
        int32_t fg_flag;                /* see fcntl.h */
        file_type_t fg_type;            /* descriptor type */
@@ -172,9 +171,7 @@ struct fileglob {
 };
 
 #ifdef __APPLE_API_PRIVATE
-LIST_HEAD(filelist, fileglob);
 LIST_HEAD(fmsglist, fileglob);
-extern struct filelist filehead;       /* head of list of open files */
 extern struct fmsglist fmsghead;       /* head of list of open files */
 extern int maxfiles;                   /* kernel limit on number of open files */
 extern int nfiles;                     /* actual number of open files */
index 0a194b77967e678ea53f60a2b9a6627d24b05f01..dd25d3a58e50cc8912589e620540d2d0db90565d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2005, 2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -122,10 +122,7 @@ struct image_params {
  */
 #define        IMGPF_NONE              0x00000000      /* No flags */
 #define        IMGPF_INTERPRET         0x00000001      /* Interpreter invoked */
-#define        IMGPF_POWERPC           0x00000002      /* ppc mode for x86 */
-#if CONFIG_EMBEDDED
-#undef IMGPF_POWERPC
-#endif
+#define        IMGPF_RESERVED          0x00000002
 #define        IMGPF_WAS_64BIT         0x00000004      /* exec from a 64Bit binary */
 #define        IMGPF_IS_64BIT          0x00000008      /* exec to a 64Bit binary */
 #define        IMGPF_SPAWN             0x00000010      /* spawn (without setexec) */
diff --git a/bsd/sys/kas_info.h b/bsd/sys/kas_info.h
new file mode 100644 (file)
index 0000000..c1be076
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef        _SYS_KAS_INFO_H_
+#define        _SYS_KAS_INFO_H_
+
+#include <sys/cdefs.h>
+#include <Availability.h>
+
+/*
+ * kas_info() ("Kernel Address Space Info") is a private interface that allows
+ * appropriately privileged system components to introspect the overall
+ * kernel address space layout.
+ */
+
+__BEGIN_DECLS
+
+/* The slide of the main kernel compared to its static link address */
+#define        KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR     (0)     /* returns uint64_t     */
+#define        KAS_INFO_MAX_SELECTOR           (1)
+
+#ifndef KERNEL
+
+int kas_info(int selector, void *value, size_t *size) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_NA);
+
+#endif /* KERNEL */
+
+__END_DECLS
+
+#endif /* !_SYS_KAS_INFO_H_ */
index 94f0b1e1e8a29830e4a984e85c89f1688f49dfdc..a077ceefa627e5bd05eabbce22ce44091cc982fe 100644 (file)
@@ -128,10 +128,18 @@ struct kauth_identity_extlookup {
        u_int32_t       el_member_valid; /* TTL on group lookup result */
 };
 
+struct kauth_cache_sizes {
+       u_int32_t kcs_group_size;
+       u_int32_t kcs_id_size;
+};
+
 #define KAUTH_EXTLOOKUP_REGISTER       (0)
 #define KAUTH_EXTLOOKUP_RESULT         (1<<0)
 #define KAUTH_EXTLOOKUP_WORKER         (1<<1)
 #define        KAUTH_EXTLOOKUP_DEREGISTER      (1<<2)
+#define        KAUTH_GET_CACHE_SIZES           (1<<3)
+#define        KAUTH_SET_CACHE_SIZES           (1<<4)
+#define        KAUTH_CLEAR_CACHES              (1<<5)
 
 
 #ifdef KERNEL
@@ -772,10 +780,12 @@ extern lck_grp_t *kauth_lck_grp;
 #ifdef XNU_KERNEL_PRIVATE
 __BEGIN_DECLS
 extern void    kauth_init(void) __attribute__((section("__TEXT, initcode")));
+extern void    kauth_cred_init(void) __attribute__((section("__TEXT, initcode")));
+#if CONFIG_EXT_RESOLVER
 extern void    kauth_identity_init(void) __attribute__((section("__TEXT, initcode")));
 extern void    kauth_groups_init(void) __attribute__((section("__TEXT, initcode")));
-extern void    kauth_cred_init(void) __attribute__((section("__TEXT, initcode")));
 extern void    kauth_resolver_init(void) __attribute__((section("__TEXT, initcode")));
+#endif
 __END_DECLS
 #endif /* XNU_KERNEL_PRIVATE */
 
index 393c413dfa4effaa95730d34bc690c212f9e8826..fbaddae7b74b89198395eb59b7e0e6887582306e 100644 (file)
@@ -92,11 +92,13 @@ __BEGIN_DECLS
 #define DBG_DLIL               8
 #define DBG_SECURITY           9
 #define DBG_CORESTORAGE                10
+#define DBG_CG                         11
 #define DBG_MISC               20
 #define DBG_DYLD               31
 #define DBG_QT                 32
 #define DBG_APPS               33
 #define DBG_LAUNCHD            34
+#define DBG_PERF                37
 #define DBG_MIG                        255
 
 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */
@@ -137,12 +139,16 @@ __BEGIN_DECLS
 #define MACH_MOVED              0xb    /* did not use original scheduling decision */
 #define MACH_FAIRSHARE_ENTER    0xc    /* move to fairshare band */
 #define MACH_FAIRSHARE_EXIT     0xd    /* exit fairshare band */
-#define MACH_FAILSAFE          0xe     /* tripped fixed-pri/RT failsafe */
+#define MACH_FAILSAFE           0xe    /* tripped fixed-pri/RT failsafe */
+#define MACH_BLOCK              0xf    /* thread block */
+#define MACH_WAIT              0x10    /* thread wait assertion */
 #define        MACH_GET_URGENCY        0x14    /* Urgency queried by platform */
 #define        MACH_URGENCY            0x15    /* Urgency (RT/BG/NORMAL) communicated
-                                        * to platform */
+                                        * to platform
+                                        */
 #define        MACH_REDISPATCH         0x16    /* "next thread" thread redispatched */
 #define        MACH_REMOTE_AST         0x17    /* AST signal issued to remote processor */
+
 #define        MACH_SCHED_LPA_BROKEN   0x18    /* last_processor affinity broken in choose_processor */
 
 /* Codes for pmap (DBG_MACH_PMAP) */     
@@ -242,6 +248,8 @@ __BEGIN_DECLS
 #define DBG_DRVINFINIBAND      17      /* Infiniband */
 #define DBG_DRVGRAPHICS                18  /* Graphics */
 #define DBG_DRVSD              19      /* Secure Digital */
+#define DBG_DRVNAND            20      /* NAND drivers and layers */
+#define DBG_SSD                        21      /* SSD */
 
 /* Backwards compatibility */
 #define        DBG_DRVPOINTING         DBG_DRVHID              /* OBSOLETE: Use DBG_DRVHID instead */
@@ -263,6 +271,8 @@ __BEGIN_DECLS
 #define DBG_IOCTL     6       /* ioctl to the disk */
 #define DBG_BOOTCACHE 7       /* bootcache operations */
 #define DBG_HFS       8       /* HFS-specific events; see bsd/hfs/hfs_kdebug.h */
+#define DBG_EXFAT     0xE     /* ExFAT-specific events; see the exfat project */
+#define DBG_MSDOS     0xF     /* FAT-specific events; see the msdosfs project */
 
 /* The Kernel Debug Sub Classes for BSD */
 #define DBG_BSD_PROC           0x01    /* process/signals related */
@@ -284,6 +294,8 @@ __BEGIN_DECLS
 /* The Kernel Debug Sub Classes for DBG_CORESTORAGE */
 #define DBG_CS_IO      0
 
+/* Sub-class codes for CoreGraphics (DBG_CG) are defined in its component. */
+
 /* The Kernel Debug Sub Classes for DBG_MISC */
 #define DBG_EVENT      0x10
 #define        DBG_BUFFER      0x20
@@ -299,10 +311,12 @@ __BEGIN_DECLS
 #define DKIO_PAGING    0x10
 #define DKIO_THROTTLE  0x20
 #define DKIO_PASSIVE   0x40
+#define DKIO_NOCACHE   0x80
 
 /* Codes for Application Sub Classes */
 #define DBG_APP_SAMBA  128
 
+
 /**********************************************************************/
 
 #define KDBG_CODE(Class, SubClass, code) (((Class & 0xff) << 24) | ((SubClass & 0xff) << 16) | ((code & 0x3fff)  << 2))
@@ -335,6 +349,7 @@ __BEGIN_DECLS
 
 #define PMAP_CODE(code) MACHDBG_CODE(DBG_MACH_PMAP, code)
 
+
 /*   Usage:
 * kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START), 
 *      offset, 0, 0, 0,0) 
@@ -366,41 +381,96 @@ extern unsigned int kdebug_enable;
 #define KDEBUG_ENABLE_TRACE   0x1
 #define KDEBUG_ENABLE_ENTROPY 0x2
 #define KDEBUG_ENABLE_CHUD    0x4
+#define KDEBUG_ENABLE_PPT     0x8
 
-#if    (!defined(NO_KDEBUG))
-#ifdef XNU_KERNEL_PRIVATE
+/*
+ * Infer the supported kernel debug event level from config option.
+ * Use (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) as a guard to protect
+ * unaudited debug code. 
+ */
+#define KDEBUG_LEVEL_NONE     0
+#define KDEBUG_LEVEL_IST      1
+#define KDEBUG_LEVEL_STANDARD 2
+#define KDEBUG_LEVEL_FULL     3
+
+#if NO_KDEBUG
+#define KDEBUG_LEVEL KDEBUG_LEVEL_NONE    
+#elif IST_KDEBUG
+#define KDEBUG_LEVEL KDEBUG_LEVEL_IST
+#elif KDEBUG
+#define KDEBUG_LEVEL KDEBUG_LEVEL_FULL
+#else
+#define KDEBUG_LEVEL KDEBUG_LEVEL_STANDARD
+#endif
+
+#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD)
+#ifdef XNU_KERNEL_PRIVATE
 #define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e)                             \
 do {                                                                   \
-       if (__improbable(kdebug_enable))                                        \
+       if (__improbable(kdebug_enable & ~KDEBUG_ENABLE_PPT))                                   \
         kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,         \
                       (uintptr_t)d,(uintptr_t)e);                      \
 } while(0)
 
 #define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e)                            \
 do {                                                                   \
-       if (__improbable(kdebug_enable))                                        \
+       if (__improbable(kdebug_enable & ~KDEBUG_ENABLE_PPT))                                   \
         kernel_debug1(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,                \
                        (uintptr_t)d,(uintptr_t)e);                     \
 } while(0)
 #else  /* XNU_KERNEL_PRIVATE */
 #define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e)                             \
 do {                                                                   \
-       if (kdebug_enable)                                              \
+       if (kdebug_enable & ~KDEBUG_ENABLE_PPT)                                         \
         kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,         \
                       (uintptr_t)d,(uintptr_t)e);                      \
 } while(0)
 
 #define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e)                            \
 do {                                                                   \
-       if (kdebug_enable)                                              \
+       if (kdebug_enable & ~KDEBUG_ENABLE_PPT)                                         \
         kernel_debug1(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,                \
                        (uintptr_t)d,(uintptr_t)e);                     \
 } while(0)
 #endif /* XNU_KERNEL_PRIVATE */
-#else /*!NO_KDEBUG */
+#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */
 #define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e) do { } while(0)
 #define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e) do { } while(0)
+#endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */
+
+/* 
+ * Specify KDEBUG_PPT to indicate that the event belongs to the
+ * limited PPT set.
+ */
+#define KDEBUG_COMMON (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_ENTROPY|KDEBUG_ENABLE_CHUD|KDEBUG_ENABLE_PPT)
+#define KDEBUG_TRACE  (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_ENTROPY|KDEBUG_ENABLE_CHUD)
+#define KDEBUG_PPT    (KDEBUG_ENABLE_PPT)
 
+/*
+ * KERNEL_DEBUG_CONSTANT_IST events provide an audited subset of
+ * tracepoints for userland system tracing tools.
+ */
+#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
+#ifdef XNU_KERNEL_PRIVATE
+#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e)                            \
+do {                                                                   \
+       if (__improbable(kdebug_enable & type))                                 \
+        kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,         \
+                       (uintptr_t)d,(uintptr_t)e);                     \
+} while(0)
+#else /* XNU_KERNEL_PRIVATE */
+#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e)                            \
+do {                                                                   \
+       if (kdebug_enable & type)                                               \
+        kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,         \
+                       (uintptr_t)d,(uintptr_t)e);                     \
+} while(0)
+#endif /* XNU_KERNEL_PRIVATE */
+#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
+#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e) do { } while(0)
+#endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
+
+#if NO_KDEBUG
 #define __kdebug_constant_only __unused
 #endif
 
@@ -421,18 +491,18 @@ extern void kernel_debug1(
                uintptr_t arg5);
 
 
-#if    (KDEBUG && (!defined(NO_KDEBUG)))
+#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL)
 #ifdef XNU_KERNEL_PRIVATE
 #define KERNEL_DEBUG(x,a,b,c,d,e)                                      \
 do {                                                                   \
-       if (__improbable(kdebug_enable))                                \
+       if (__improbable(kdebug_enable & ~KDEBUG_ENABLE_PPT))                   \
         kernel_debug((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,         \
                     (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);         \
 } while(0)
 
 #define KERNEL_DEBUG1(x,a,b,c,d,e)                                     \
 do {                                                                   \
-       if (__improbable(kdebug_enable))                                \
+       if (__improbable(kdebug_enable & ~KDEBUG_ENABLE_PPT))                   \
         kernel_debug1((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,                \
                      (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);        \
 } while(0)
@@ -441,25 +511,24 @@ do {                                                                      \
 #else /* !XNU_KERNEL_PRIVATE */
 #define KERNEL_DEBUG(x,a,b,c,d,e)                                      \
 do {                                                                   \
-       if (kdebug_enable)                                              \
+       if (kdebug_enable & ~KDEBUG_ENABLE_PPT)                         \
         kernel_debug((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,         \
                     (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);         \
 } while(0)
 
 #define KERNEL_DEBUG1(x,a,b,c,d,e)                                     \
 do {                                                                   \
-       if (kdebug_enable)                                              \
+       if (kdebug_enable & ~KDEBUG_ENABLE_PPT)                         \
         kernel_debug1((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,                \
                      (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);        \
 } while(0)
 #endif /* XNU_KERNEL_PRIVATE */
-#else
-
+#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) */
 #define KERNEL_DEBUG(x,a,b,c,d,e) do {} while (0)
 #define KERNEL_DEBUG1(x,a,b,c,d,e) do {} while (0)
 
 #define __kdebug_only __unused
-#endif
+#endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) */
 
 #ifdef KERNEL_PRIVATE
 #include <mach/boolean.h>
@@ -483,6 +552,8 @@ void trace_handler_unmap_bufinfo(void);
 void trace_handler_map_buffer(int index, uintptr_t addr, unsigned long size);
 void trace_handler_unmap_buffer(int index);
 void trace_set_timebases(uint64_t tsc, uint64_t ns);
+
+
 #endif  /* KERNEL_PRIVATE */
 
 
@@ -571,6 +642,9 @@ kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t thetime, int cpu)
 }
 #endif
 
+/* 2^16 bits (8 kilobytes), one for each possible class/subclass combination */
+#define KDBG_TYPEFILTER_BITMAP_SIZE            ( (256 * 256) / 8 )
+
 /* Debug Flags */
 #define        KDBG_INIT       0x001
 #define        KDBG_NOWRAP     0x002
@@ -628,6 +702,8 @@ typedef struct {
 #define        KDBG_RANGECHECK 0x100000
 #define        KDBG_VALCHECK   0x200000        /* Check up to 4 individual values */
 
+#define        KDBG_TYPEFILTER_CHECK   ((uint32_t) 0x400000)        /* Check class and subclass against a bitmap */ 
+
 #define        KDBG_BUFINIT    0x80000000
 
 /* Control operations */
diff --git a/bsd/sys/kern_callout.h b/bsd/sys/kern_callout.h
deleted file mode 100644 (file)
index 6ac7642..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef KPI_KERN_CALLOUT_H
-#define KPI_KERN_CALLOUT_H
-
-#ifdef KERNEL
-
-/*
- * Default sample threshold for validity
- */
-#define MA_SMA_SAMPLES 10                      /* simple moving average */
-
-/*
- * Flags bits for the ma_flags field
- */
-#define        KCO_MA_F_SMA            0x00000001      /* Simple moving average */
-#define        KCO_MA_F_WMA            0x00000002      /* Weighted moving average */
-#define        KCO_MA_F_NEEDS_INIT     0x80000000      /* Need initialization */
-
-struct kco_moving_average {
-       int             ma_flags;               /* flags */
-       uint64_t        ma_sma;                 /* simple over MA_SMA_SAMPLES*/
-       uint64_t        ma_old_sma;             /* previous value */
-       uint64_t        ma_sma_samples[MA_SMA_SAMPLES]; /* sample history */
-       int32_t         ma_sma_threshold;       /* trigger delta (%) */
-       int             ma_sma_trigger_count;   /* number of time triggered */
-       uint64_t        ma_wma;                 /* weighted */
-       uint64_t        ma_old_wma;             /* previous value */
-       int             ma_wma_weight;          /* weighting (< 100) */
-       int32_t         ma_wma_threshold;       /* trigger delta (%) */
-       int             ma_wma_trigger_count;   /* number of time triggered */
-};
-
-__BEGIN_DECLS
-int kco_ma_addsample(struct kco_moving_average *map, uint64_t sample_time);
-void kco_ma_init(struct kco_moving_average *map, int32_t threshold, int kind);
-int kco_ma_info(struct kco_moving_average *map, int kind, uint64_t *averagep, uint64_t *old_averagep, int32_t *thresholdp, int *countp);
-__END_DECLS
-
-#endif /* KERNEL */
-
-#endif /* KPI_KERN_CONTROL_H */
index 4a05a490f5a405fe99802049a0a2ed00d9309f41..66fabee01d0ee97aec17f1b1c8cff8dffc6fb11b 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*!
-       @header kern_memorystatus.h
-       This header defines a kernel event subclass for the OSMemoryNotification API
- */
 
-#ifndef SYS_KERN_MEMORYSTATUS_H
-#define SYS_KERN_MEMORYSTATUS_H
-
-#ifndef MACH_KERNEL_PRIVATE
+#ifndef SYS_MEMORYSTATUS_H
+#define SYS_MEMORYSTATUS_H
 
 #include <stdint.h>
 #include <sys/time.h>
 #include <sys/proc.h>
+#include <sys/param.h>
+
+#define DEFAULT_JETSAM_PRIORITY -100
+
+enum {
+       kMemorystatusFlagsFrontmost =         (1 << 0),
+       kMemorystatusFlagsKilled =            (1 << 1),
+       kMemorystatusFlagsKilledHiwat =       (1 << 2),
+       kMemorystatusFlagsFrozen     =        (1 << 3),
+       kMemorystatusFlagsKilledVnodes =      (1 << 4),
+       kMemorystatusFlagsKilledSwap =        (1 << 5),
+       kMemorystatusFlagsThawed =            (1 << 6),
+       kMemorystatusFlagsKilledVM =          (1 << 7),
+       kMemorystatusFlagsSuspForDiagnosis =  (1 << 8),
+       kMemorystatusFlagsActive =            (1 << 9),
+       kMemorystatusFlagsSupportsIdleExit =  (1 << 10),
+       kMemorystatusFlagsDirty =             (1 << 11)
+};
+
+#if TARGET_OS_EMBEDDED || CONFIG_EMBEDDED
 
 /*
  * Define Memory Status event subclass.
 #define KEV_MEMORYSTATUS_SUBCLASS        3
 
 enum {
-       kMemoryStatusLevelNote = 1,
-       kMemoryStatusSnapshotNote = 2,
-       kMemoryStatusHibernationNote = 3
+       kMemorystatusLevelNote = 1,
+       kMemorystatusSnapshotNote = 2,
+       kMemorystatusFreezeNote = 3,
+       kMemorystatusPressureNote = 4
 };
 
 enum {
-       kMemoryStatusLevelAny = -1,
-       kMemoryStatusLevelNormal = 0,
-       kMemoryStatusLevelWarning = 1,
-       kMemoryStatusLevelUrgent = 2,
-       kMemoryStatusLevelCritical = 3
+       kMemorystatusLevelAny = -1,
+       kMemorystatusLevelNormal = 0,
+       kMemorystatusLevelWarning = 1,
+       kMemorystatusLevelUrgent = 2,
+       kMemorystatusLevelCritical = 3
 };
 
-typedef struct jetsam_priority_entry {
+typedef struct memorystatus_priority_entry {
        pid_t pid;
        uint32_t flags;
        int32_t hiwat_pages;
-       int32_t hiwat_reserved1;
-       int32_t hiwat_reserved2;
-       int32_t hiwat_reserved3;
-} jetsam_priority_entry_t;
-
-/*
-** maximum killable processes to keep track of
-*/
-#define kMaxPriorityEntries 64 
-
-typedef struct jetsam_snapshot_entry {
-       pid_t pid;
-       char name[MAXCOMLEN+1];
-       uint32_t pages;
-       uint32_t flags;
-       uint8_t uuid[16];
-} jetsam_snapshot_entry_t;
+       int32_t priority;
+       int32_t reserved;
+       int32_t reserved2;
+} memorystatus_priority_entry_t;
 
 /*
 ** how many processes to snapshot
 */
 #define kMaxSnapshotEntries 128 
 
-typedef struct jetsam_kernel_stats {
+typedef struct memorystatus_kernel_stats {
        uint32_t free_pages;
        uint32_t active_pages;
        uint32_t inactive_pages;
+       uint32_t throttled_pages;
        uint32_t purgeable_pages;
        uint32_t wired_pages;
-} jetsam_kernel_stats_t;
+} memorystatus_kernel_stats_t;
 
 /*
 ** This is a variable-length struct.
-** Allocate a buffer of the size returned by the sysctl, cast to a jetsam_snapshot_t *
+** Allocate a buffer of the size returned by the sysctl, cast to a memorystatus_snapshot_t *
 */
 
+typedef struct jetsam_snapshot_entry {
+       pid_t pid;
+       char name[MAXCOMLEN+1];
+       int32_t priority;
+       uint32_t pages;
+       uint32_t flags;
+       uint8_t uuid[16];
+} memorystatus_jetsam_snapshot_entry_t;
+
 typedef struct jetsam_snapshot {
-       jetsam_kernel_stats_t stats;
+       uint64_t snapshot_time;
+       uint64_t notification_time;
+       memorystatus_kernel_stats_t stats;
        size_t entry_count;
-       jetsam_snapshot_entry_t entries[1];
-} jetsam_snapshot_t;
+       memorystatus_jetsam_snapshot_entry_t entries[1];
+} memorystatus_jetsam_snapshot_t;
+
+typedef memorystatus_priority_entry_t          jetsam_priority_entry_t;
+typedef memorystatus_jetsam_snapshot_t                 jetsam_snapshot_t;
+typedef memorystatus_jetsam_snapshot_entry_t   jetsam_snapshot_entry_t;
+
+#define kMemoryStatusLevelNote                 kMemorystatusLevelNote
+#define kMemoryStatusSnapshotNote      kMemorystatusSnapshotNote
+#define kMemoryStatusFreezeNote                kMemorystatusFreezeNote
+#define kMemoryStatusPressureNote      kMemorystatusPressureNote
 
-typedef struct jetsam_hibernation_entry {
-       uint32_t pid;
+typedef struct memorystatus_freeze_entry {
+       int32_t pid;
        uint32_t flags;
        uint32_t pages;
-} jetsam_hibernation_entry_t;
+} memorystatus_freeze_entry_t;
+
+#endif /* TARGET_OS_EMBEDDED */
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* General tunables */
 
-#endif /* !MACH_KERNEL_PRIVATE */
+#define DELTA_PERCENT                    5
+#define CRITICAL_PERCENT                 5
+#define HIGHWATER_PERCENT               10
+#define PRESSURE_PERCENT                15
+#define FREEZE_PERCENT                  50
+
+#define POLICY_MORE_FREE_OFFSET_PERCENT    5
+#define POLICY_DIAGNOSTIC_OFFSET_PERCENT   5
+
+#define IDLE_EXIT_TIME_SECS                    10
 
 enum {
-       kJetsamFlagsFrontmost =        (1 << 0),
-       kJetsamFlagsKilled =           (1 << 1),
-       kJetsamFlagsKilledHiwat =      (1 << 2),
-       kJetsamFlagsHibernated =       (1 << 3),
-       kJetsamFlagsKilledVnodes =     (1 << 4),
-       kJetsamFlagsKilledSwap =       (1 << 5),
-       kJetsamFlagsThawed =           (1 << 6),
-       kJetsamFlagsKilledVM =         (1 << 7),
-       kJetsamFlagsSuspForDiagnosis = (1 << 8)
+       kProcessSuspended             = (1 << 0), 
+       kProcessFrozen                = (1 << 1),
+       kProcessNoReclaimWorth        = (1 << 2),
+       kProcessIgnored               = (1 << 3),
+       kProcessLocked                = (1 << 4),
+       kProcessKilled                =  (1 << 5),
+       kProcessNotifiedForPressure   = (1 << 6),
+       kProcessPriorityUpdated       = (1 << 7),
+       kProcessActive                = (1 << 8),
+       kProcessForeground            = (1 << 9),
+       kProcessSuspendedForDiag      = (1 << 10),
+       kProcessSupportsIdleExit      = (1 << 11),
+       kProcessDirty                 = (1 << 12),
+       kProcessIgnoreIdleExit        = (1 << 13)
 };
 
-#ifdef KERNEL
-extern void kern_memorystatus_init(void) __attribute__((section("__TEXT, initcode")));
-extern int jetsam_kill_top_proc(boolean_t any, uint32_t reason);
+typedef struct memorystatus_node {
+       TAILQ_ENTRY(memorystatus_node) link;
+       pid_t pid;
+       int32_t priority;
+       uint32_t state;
+#if CONFIG_JETSAM
+       int32_t hiwat_pages;
+#endif
+#if CONFIG_FREEZE
+       uint32_t resident_pages;
+#endif
+       uint64_t clean_time;
+} memorystatus_node;
+
+extern int memorystatus_wakeup;
+extern unsigned int memorystatus_running;
+
+extern unsigned int memorystatus_available_pages;
+extern unsigned int memorystatus_available_pages_critical;
+extern unsigned int memorystatus_level;
+extern unsigned int memorystatus_delta;
+
+extern void memorystatus_init(void) __attribute__((section("__TEXT, initcode")));
+
+extern kern_return_t memorystatus_list_add(int pid, int priority, int high_water_mark);
+extern kern_return_t memorystatus_list_change(boolean_t effective, int pid, int priority, int state_flags, int high_water_mark);
+extern kern_return_t memorystatus_list_remove(int pid);
+
+extern kern_return_t memorystatus_on_track_dirty(int pid, boolean_t track);
+extern kern_return_t memorystatus_on_dirty(int pid, boolean_t dirty);
+
+extern void memorystatus_on_suspend(int pid);
+extern void memorystatus_on_resume(int pid);
+extern void memorystatus_on_inactivity(int pid);
+
+#if CONFIG_JETSAM
+
+typedef enum memorystatus_policy_t {
+       kPolicyDefault        = 0x0, 
+       kPolicyMoreFree       = 0x1,
+       kPolicyDiagnoseAll    = 0x2,
+       kPolicyDiagnoseFirst  = 0x4,
+       kPolicyDiagnoseActive = (kPolicyDiagnoseAll | kPolicyDiagnoseFirst),
+} memorystatus_policy_t;
+
+extern int memorystatus_jetsam_wakeup;
+extern unsigned int memorystatus_jetsam_running;
 
-extern int kern_memorystatus_wakeup;
-extern int kern_memorystatus_level;
-extern unsigned int kern_memorystatus_delta;
+extern int memorystatus_kill_top_proc(boolean_t any, uint32_t reason);
+extern int memorystatus_kill_top_proc_from_VM(void);
+
+extern void memorystatus_update(unsigned int pages_avail);
+
+#if VM_PRESSURE_EVENTS
+
+#define MEMORYSTATUS_SUSPENDED_THRESHOLD  4
+
+extern int memorystatus_request_vm_pressure_candidate(void);
+extern void memorystatus_send_pressure_note(int pid);
+
+#endif /* VM_PRESSURE_EVENTS */
+
+#endif /* CONFIG_JETSAM */
 
 #ifdef CONFIG_FREEZE
-extern void kern_hibernation_init(void) __attribute__((section("__TEXT, initcode")));
-extern int kern_hibernation_wakeup;
 
-void kern_hibernation_on_pid_suspend(int pid);
-void kern_hibernation_on_pid_resume(int pid, task_t task);
-void kern_hibernation_on_pid_hibernate(int pid);
-#endif
+#define FREEZE_PAGES_MIN   ( 1 * 1024 * 1024 / PAGE_SIZE)
+#define FREEZE_PAGES_MAX   (16 * 1024 * 1024 / PAGE_SIZE)
 
-#if CONFIG_EMBEDDED
-#define VM_CHECK_MEMORYSTATUS do { vm_check_memorystatus(); } while(0)
-#else /*CONFIG_EMBEDDED*/
-#define VM_CHECK_MEMORYSTATUS do {} while(0)
-#endif
+#define FREEZE_SUSPENDED_THRESHOLD_LOW     2
+#define FREEZE_SUSPENDED_THRESHOLD_DEFAULT 4
+
+#define FREEZE_DAILY_MB_MAX      1024
+#define FREEZE_DAILY_PAGEOUTS_MAX (FREEZE_DAILY_MB_MAX * (1024 * 1024 / PAGE_SIZE))
+
+typedef struct throttle_interval_t {
+       uint32_t mins;
+       uint32_t burst_multiple;
+       uint32_t pageouts;
+       uint32_t max_pageouts;
+       mach_timespec_t ts;
+       boolean_t throttle;
+} throttle_interval_t;
+
+extern boolean_t memorystatus_freeze_enabled;
+extern int memorystatus_freeze_wakeup;
+
+extern void memorystatus_freeze_init(void) __attribute__((section("__TEXT, initcode")));
+
+#endif /* CONFIG_FREEZE */
+
+#endif /* XNU_KERNEL_PRIVATE */
 
-#endif /* KERNEL */
-#endif /* SYS_KERN_MEMORYSTATUS_H */
+#endif /* SYS_MEMORYSTATUS_H */
index 24239b9f484862ff1ac81153018f7ff33327cf30..3cb4c787de4e07c277fd07479e6a9917d257d18a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1363,7 +1363,7 @@ typedef enum {
 #ifdef XNU_KERNEL_PRIVATE
        MBUF_TC_UNSPEC  = -1,           /* Internal: not specified */
 #endif
-       MBUF_TC_BE              = 0,
+       MBUF_TC_BE              = 0,
        MBUF_TC_BK              = 1,
        MBUF_TC_VI              = 2,
        MBUF_TC_VO              = 3
@@ -1385,11 +1385,133 @@ extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf);
        @function mbuf_set_traffic_class
        @discussion Set the traffic class of an mbuf packet.
        @param mbuf The mbuf to set the traffic class on.
-       @ac The traffic class
+       @tc The traffic class
        @result 0 on success, EINVAL if bad paramater is passed
 */
 extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc);
 
+/*!
+       @function mbuf_is_traffic_class_privileged
+       @discussion Returns the privileged status of the traffic class
+               of the packet specified by the mbuf.
+       @param mbuf The mbuf to retrieve the status from.
+       @result Non-zero if privileged, 0 otherwise.
+ */
+extern int mbuf_is_traffic_class_privileged(mbuf_t mbuf);
+
+#ifdef KERNEL_PRIVATE
+/*!
+       @enum mbuf_svc_class_t
+       @abstract Service class of a packet
+       @discussion Property that represents the category of service
+               of a packet. This information may be used by the driver
+               and at the link level.
+       @constant MBUF_SC_BK_SYS "Background System-Initiated", high delay
+               tolerant, high loss tolerant, elastic flow, variable size &
+               long-lived.
+       @constant MBUF_SC_BK "Background", user-initiated, high delay tolerant,
+               high loss tolerant, elastic flow, variable size.  This level
+               corresponds to WMM access class "BG", or MBUF_TC_BK.
+       @constant MBUF_SC_BE "Best Effort", unclassified/standard.  This is
+               the default service class; pretty much a mix of everything.
+               This level corresponds to WMM access class "BE" or MBUF_TC_BE.
+       @constant MBUF_SC_RD
+               "Responsive Data", a notch higher than "Best Effort", medium
+               delay tolerant, medium loss tolerant, elastic flow, bursty,
+               long-lived.
+       @constant MBUF_SC_OAM "Operations, Administration, and Management",
+               medium delay tolerant, low-medium loss tolerant, elastic &
+               inelastic flows, variable size.
+       @constant MBUF_SC_AV "Multimedia Audio/Video Streaming", medium delay
+               tolerant, low-medium loss tolerant, elastic flow, constant
+               packet interval, variable rate & size.
+       @constant MBUF_SC_RV "Responsive Multimedia Audio/Video", low delay
+               tolerant, low-medium loss tolerant, elastic flow, variable
+               packet interval, rate and size.
+       @constant MBUF_SC_VI "Interactive Video", low delay tolerant, low-
+               medium loss tolerant, elastic flow, constant packet interval,
+               variable rate & size.  This level corresponds to WMM access
+               class "VI" or MBUF_TC_VI.
+       @constant MBUF_SC_VO "Interactive Voice", low delay tolerant, low loss
+               tolerant, inelastic flow, constant packet rate, somewhat fixed
+               size.  This level corresponds to WMM access class "VO" or
+               MBUF_TC_VO.
+       @constant MBUF_SC_CTL "Network Control", low delay tolerant, low loss
+               tolerant, inelastic flow, rate is short & burst, variable size.
+*/
+typedef enum {
+#ifdef XNU_KERNEL_PRIVATE
+       MBUF_SC_UNSPEC          = -1,           /* Internal: not specified */
+#endif
+       MBUF_SC_BK_SYS          = 0x00080090,   /* lowest class */
+       MBUF_SC_BK              = 0x00100080,
+
+       MBUF_SC_BE              = 0x00000000,
+       MBUF_SC_RD              = 0x00180010,
+       MBUF_SC_OAM             = 0x00200020,
+
+       MBUF_SC_AV              = 0x00280120,
+       MBUF_SC_RV              = 0x00300110,
+       MBUF_SC_VI              = 0x00380100,
+
+       MBUF_SC_VO              = 0x00400180,
+       MBUF_SC_CTL             = 0x00480190,   /* highest class */
+} mbuf_svc_class_t;
+
+/*!
+       @function mbuf_get_service_class
+       @discussion Get the service class of an mbuf packet
+       @param mbuf The mbuf to get the service class of.
+       @result The service class
+*/
+extern mbuf_svc_class_t mbuf_get_service_class(mbuf_t mbuf);
+
+/*!
+       @function mbuf_set_servicec_class
+       @discussion Set the service class of an mbuf packet.
+       @param mbuf The mbuf to set the service class on.
+       @sc The service class
+       @result 0 on success, EINVAL if bad paramater is passed
+*/
+extern errno_t mbuf_set_service_class(mbuf_t mbuf, mbuf_svc_class_t sc);
+
+/*!
+       @function mbuf_is_service_class_privileged
+       @discussion Returns the privileged status of the service class
+               of the packet specified by the mbuf.
+       @param mbuf The mbuf to retrieve the status from.
+       @result Non-zero if privileged, 0 otherwise.
+ */
+extern int mbuf_is_service_class_privileged(mbuf_t mbuf);
+
+/*
+       @enum mbuf_pkthdr_aux_flags_t
+       @abstract Constants defining mbuf auxiliary flags.  Only the flags
+               listed below can be retrieved.
+       @constant MBUF_PKTAUXF_INET_RESOLVE_RTR Indicates this is an ARP
+               request packet, whose target is the address of the default
+               IPv4 router.
+       @constant MBUF_PKTAUXF_INET6_RESOLVE_RTR Indicates this is an ICMPv6
+               Neighbor Solicitation packet, whose target is the address of
+               the default IPv6 router.
+ */
+enum {
+       MBUF_PKTAUXF_INET_RESOLVE_RTR   = 0x0004,
+       MBUF_PKTAUXF_INET6_RESOLVE_RTR  = 0x0008,
+};
+typedef u_int32_t mbuf_pkthdr_aux_flags_t;
+
+/*
+       @function mbuf_pkthdr_aux_flags
+       @discussion Returns the auxiliary flags of a packet.
+       @param mbuf The mbuf containing the packet header.
+       @param paux_flags Pointer to mbuf_pkthdr_aux_flags_t variable.
+       @result 0 upon success otherwise the errno error.
+*/
+extern errno_t mbuf_pkthdr_aux_flags(mbuf_t mbuf,
+    mbuf_pkthdr_aux_flags_t *paux_flags);
+#endif /* KERNEL_PRIVATE */
+
 /* IF_QUEUE interaction */
 
 #define IF_ENQUEUE_MBUF(ifq, m) {                                      \
index e5ace3c036381606d6fb012fde2ec86fabf41788..10ab4323c9e1516152cfe02110a2121e10de88fa 100644 (file)
@@ -68,11 +68,15 @@ struct sockaddr;
                option.
        @constant SFLT_EXTENDED Indicates that this socket filter utilizes
                the extended fields within the sflt_filter structure.
+       @constant SFLT_EXTENDED_REGISTRY Indicates that this socket filter
+               wants to attach to all the sockets already present on the
+               system. It will also receive notifications for these sockets.
 */
 enum {
        SFLT_GLOBAL             = 0x01,
        SFLT_PROG               = 0x02,
-       SFLT_EXTENDED           = 0x04
+       SFLT_EXTENDED           = 0x04,
+       SFLT_EXTENDED_REGISTRY  = 0x08
 };
 typedef        u_int32_t       sflt_flags;
 
index ffa77957356febb00624fe6d29dac5ba3b1b6b08..4cbefc091e61725d80b4909e941dacc9cb25bffd 100644 (file)
@@ -108,6 +108,7 @@ __BEGIN_DECLS
 int    lf_advlock(struct vnop_advlock_args *);
 int    lf_assert(struct vnop_advlock_args *, void **);
 void   lf_commit(void *, int);
+void   lf_abort_advlocks(vnode_t);
 
 #ifdef LOCKF_DEBUG
 void   lf_print(char *, struct lockf *);
index 74b5ee6a006de60f9df1877a51672412fa622571..a9e536d7ae014f15ba8c94c94707c558b8bf0d6f 100644 (file)
@@ -90,6 +90,7 @@ extern "C" {
 #define        LS_LCK_RW_LOCK_EXCL_TO_SHARED_ILK_SPIN  38
 
 #define        LS_NPROBES                      40
+#define LS_LCK_INVALID                 LS_NPROBES
 
 /*
  * Name the various locking functions...
@@ -168,9 +169,13 @@ extern void (*lockstat_probe)(dtrace_id_t, uint64_t, uint64_t,
     uint64_t, uint64_t, uint64_t);
 
 
+
 #ifdef _KERNEL
 
 #if    CONFIG_DTRACE
+
+extern void (lockstat_probe_wrapper)(int, uintptr_t, int);
+       
 /*
  * Macros to record lockstat probes.
  */
index 5aa58b364553f3877a6fa49f3ca34ef2644cbeee..3fb652b5bd6da06fcf9ab8987d33f9dfeb5f6c0d 100755 (executable)
 # @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 #
 
+function usage() {
+    echo "Usage: $0 <output>" 1>&2
+    exit 1
+}
+
+if [ $# -ne 1 ]; then
+    usage
+fi
+
+OUTPUT="$1"
+
 POSIX_VALUES="198808L 199009L 199209L 199309L 199506L 200112L 200809L"
 
 {
@@ -67,5 +78,5 @@ for value in ${POSIX_VALUES} ; do
     echo "#endif"
     echo
 done
-} > $1
+} > "$OUTPUT"
 
index fa5f0e33cd066f625f54b41f50604bb65863eec3..a4a9b881f2cfe52af1c0a7c5f15cecaeb7302193 100755 (executable)
 # @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 #
 
+function usage() {
+    echo "Usage: $0 <sdk> <output>" 1>&2
+    exit 1
+}
+
+if [ $# -ne 2 ]; then
+    usage
+fi
+
+SDKROOT="$1"
+OUTPUT="$2"
+
+if [ ! -x "${SDKROOT}/usr/local/libexec/availability.pl" ] ; then
+    echo "Unable to locate ${SDKROOT}/usr/local/libexec/availability.pl (or not executable)" >&2
+    exit 1
+fi
+           
 {
 cat <<EOF
 /* Copyright (c) 2010 Apple Inc. All rights reserved.
@@ -82,5 +99,5 @@ for ver in $(${SDKROOT}/usr/local/libexec/availability.pl --macosx) ; do
     echo "#endif"
     echo ""
 done
-} > $1
+} > "$OUTPUT"
 
index 4e8688735f8611a3243be526aa56c5bbf0efc5e3..80883f08bdfc9eabf9db7f1701c431cb544b67a1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define M_FILEGLOB     99      /* fileglobal */
 #define M_KAUTH                100     /* kauth subsystem */
 #define M_DUMMYNET     101     /* dummynet */
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
 #define M_UNSAFEFS     102     /* storage for vnode lock state for unsafe FS */
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 #define M_MACPIPELABEL 103     /* MAC pipe labels */
 #define M_MACTEMP      104     /* MAC framework */
 #define M_SBUF         105     /* string buffers */
index f0d45c565b00a0e1da08723b0b796f78bae7ef52..e3ad20c4a91f6bdfe636eef32334ad855bcae4f6 100644 (file)
 #include <sys/lock.h>
 #include <sys/queue.h>
 
-#if PF_PKTHDR
-#include <net/pf_mtag.h>
-#endif /* PF_PKTHDR */
-
 /*
  * Mbufs are of a single size, MSIZE (machine/param.h), which
  * includes overhead.  An mbuf may add a single "mbuf cluster" of size
@@ -151,21 +147,69 @@ struct m_tag {
        u_int32_t               m_tag_id;       /* Module ID */
 };
 
-#ifdef __LP64__
-#define        M_TAG_ALIGN(len) \
-       P2ROUNDUP(len, sizeof (u_int64_t)) + sizeof (struct m_tag)
-#else
 #define        M_TAG_ALIGN(len) \
-       P2ROUNDUP(len, sizeof (u_int32_t)) + sizeof (struct m_tag)
-#endif /* !__LP64__ */
+       (P2ROUNDUP(len, sizeof (u_int64_t)) + sizeof (struct m_tag))
 
 #define        M_TAG_VALID_PATTERN     0xfeedfacefeedfaceULL
 #define        M_TAG_FREE_PATTERN      0xdeadbeefdeadbeefULL
 
+/*
+ * Packet tag header structure (at the top of mbuf).  Pointers are
+ * 32-bit in ILP32; m_tag needs 64-bit alignment, hence padded.
+ */
 struct m_taghdr {
+#ifndef __LP64__
+       u_int32_t               pad;            /* For structure alignment */
+#endif /* !__LP64__ */
        u_int64_t               refcnt;         /* Number of tags in this mbuf */
 };
 
+/* Values for pftag_flags */
+#define        PF_TAG_GENERATED        0x000001        /* pkt generated by PF */
+#define        PF_TAG_FRAGCACHE        0x000002
+#define        PF_TAG_TRANSLATE_LOCALHOST 0x000004
+#define        PF_TAG_FLOWHASH         0x000100        /* valid flowhash value */
+#define        PF_TAG_HDR_INET         0x000200        /* hdr points to IPv4 */
+#define        PF_TAG_HDR_INET6        0x000400        /* hdr points to IPv6 */
+#define        PF_TAG_TCP              0x000800        /* payload is TCP */
+#define        PF_TAG_FLOWADV          0x010000        /* local flow advisory */
+#define        PF_TAG_QUEUE1           0x100000        /* queue-specific */
+
+#define IF_PKTSEQ_SHIFT                4
+
+/* PF mbuf tag */
+struct pf_mtag {
+       void            *pftag_hdr;     /* saved hdr pos in mbuf, for ECN */
+       unsigned int    pftag_rtableid; /* alternate routing table id */
+       union {
+               struct {
+                       u_int32_t       qid;
+                       union {
+                               u_int8_t        val8[4];
+                               u_int16_t       val16[2];
+                               u_int32_t       val32;
+                       } __qpriv_u;    /* for queue-specific use */
+               } __pf_data;
+               u_int64_t       pktseq;
+       } __pfifseq_u; /* Used for pf or interface bandwidth measurement */
+#define        pftag_qid       __pfifseq_u.__pf_data.qid
+#define        pftag_qpriv8    __pfifseq_u.__pf_data.__qpriv_u.val8
+#define        pftag_qpriv16   __pfifseq_u.__pf_data.__qpriv_u.val16
+#define        pftag_qpriv32   __pfifseq_u.__pf_data.__qpriv_u.val32
+#define        pftag_pktseq    __pfifseq_u.pktseq
+       u_int32_t       pftag_flowhash;
+       u_int16_t       pftag_tag;
+       u_int16_t       pftag_routed;
+       u_int32_t       pftag_flags;    /* PF_TAG flags */
+};
+
+/* TCP specific mbuf tag */
+struct tcp_mtag {
+       u_int           tm_tso_segz;    /* TSO segment size (actual MSS) */
+       u_int16_t       tm_pktlen;      /* LRO - max segment size encountered */
+       u_int16_t       tm_npkts;       /* LRO - number of coalesced TCP pkts */
+};
+
 /* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
 struct pkthdr {
        int     len;                    /* total packet length */
@@ -177,24 +221,21 @@ struct    pkthdr {
        /* Note: csum_flags is used for hardware checksum and VLAN */
        int     csum_flags;             /* flags regarding checksum */
        int     csum_data;              /* data field used by csum routines */
-       u_int   tso_segsz;              /* TSO segment size (actual MSS) */
        u_short vlan_tag;               /* VLAN tag, host byte order */
        u_short socket_id;              /* socket id */
        SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
-#if PF_PKTHDR
-       /*
-        * Be careful; {en,dis}abling PF_PKTHDR will require xnu recompile;
-        * private code outside of xnu must use mbuf_get_mhlen() instead
-        * of MHLEN.
-        */
-       struct pf_mtag pf_mtag;
-#endif /* PF_PKTHDR */
-       u_int32_t prio;                 /* packet priority */
-       u_short vt_nrecs;               /* # of IGMPv3 records in this chain */
-       u_short _pad;
+       struct pf_mtag pf_mtag;         /* built-in PF tag */
+#define m_flowhash     pf_mtag.pftag_flowhash
+#define m_fhflags      pf_mtag.pftag_flags
+       u_int32_t svc;                  /* MBUF_SVC value */
+       u_int16_t vt_nrecs;             /* # of IGMPv3/MLDv2 records */
+       u_int16_t aux_flags;            /* auxiliary packet flags */
+       struct tcp_mtag tcp_mtag;       /* tcp related data */
+#define tso_segsz      tcp_mtag.tm_tso_segz    
+#define lro_pktlen     tcp_mtag.tm_pktlen      
+#define lro_npkts      tcp_mtag.tm_npkts
 };
 
-
 /* description of external storage mapped into mbuf, valid if M_EXT set */
 struct m_ext {
        caddr_t ext_buf;                /* start of buffer */
@@ -238,6 +279,8 @@ struct mbuf {
 #define        m_ext           M_dat.MH.MH_dat.MH_ext
 #define        m_pktdat        M_dat.MH.MH_dat.MH_databuf
 #define        m_dat           M_dat.M_databuf
+#define        m_pktlen(_m)    ((_m)->m_pkthdr.len)
+#define        m_pftag(_m)     (&(_m)->m_pkthdr.pf_mtag)
 
 /* mbuf flags (private) */
 #define        M_EXT           0x0001  /* has associated external storage */
@@ -271,7 +314,7 @@ struct mbuf {
        M_LOOP|M_PROTO5|M_BCAST|M_MCAST|M_FRAG |                        \
        M_FIRSTFRAG|M_LASTFRAG|M_PROMISC|M_HASFCS)
 
-/* flags indicating hw checksum support and sw checksum requirements [freebsd4.1] */
+/* flags indicating hw checksum support and sw checksum requirements */
 #define        CSUM_IP                 0x0001          /* will csum IP */
 #define        CSUM_TCP                0x0002          /* will csum TCP */
 #define        CSUM_UDP                0x0004          /* will csum UDP */
@@ -303,6 +346,18 @@ struct mbuf {
 /* TCP Segment Offloading requested on this mbuf */
 #define        CSUM_TSO_IPV4           0x100000        /* This mbuf needs to be segmented by the NIC */
 #define        CSUM_TSO_IPV6           0x200000        /* This mbuf needs to be segmented by the NIC */
+
+/*
+ * Auxiliary packet flags.  Unlike m_flags, all auxiliary flags are copied
+ * along when copying m_pkthdr, i.e. no equivalent of M_COPYFLAGS here.
+ * Note that this flag is 16-bit wide.
+ */
+#define        MAUXF_PRIO_PRIVILEGED   0x0001  /* packet priority is privileged */
+#define        MAUXF_PROXY_DST         0x0002  /* processed but not locally destined */
+#define        MAUXF_INET_RESOLVE_RTR  0x0004  /* pkt is for resolving IPv4 router */
+#define        MAUXF_INET6_RESOLVE_RTR 0x0008  /* pkt is for resolving IPv6 router */
+#define        MAUXF_SW_LRO_PKT        0x0010  /* pkt is a large coalesced pkt */
+#define        MAUXF_SW_LRO_DID_CSUM   0x0020  /* IP and TCP checksums done by LRO*/
 #endif /* XNU_KERNEL_PRIVATE */
 
 /* mbuf types */
@@ -402,6 +457,8 @@ union m16kcluster {
  */
 #define        M_COPY_PKTHDR(to, from)         m_copy_pkthdr(to, from)
 
+#define M_COPY_PFTAG(to, from)         m_copy_pftag(to, from)
+
 /*
  * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
  * an object of the specified size at the end of the mbuf, longword aligned.
@@ -455,7 +512,7 @@ do {                                                                        \
 
 /*
  * M_STRUCT_GET ensures that intermediate protocol header (from "off" to
- * "len") is located in single mbuf, on contiguous memory region.
+ * "off+len") is located in single mbuf, on contiguous memory region.
  * The pointer to the region will be returned to pointer variable "val",
  * with type "typ".
  *
@@ -486,14 +543,14 @@ do {                                                                      \
 do {                                                                   \
        struct mbuf *t;                                                 \
                                                                        \
-       if ((off) == 0) {                                               \
-               (val) = (typ)mtod(m, caddr_t);                          \
+       if ((off) == 0 && ((m)->m_len >= (len))) {                      \
+               (val) = (typ)(void *)mtod(m, caddr_t);                  \
        } else {                                                        \
                t = m_pulldown((m), (off), (len), NULL);                \
                if (t != NULL) {                                        \
                        if (t->m_len < (len))                           \
                                panic("m_pulldown malfunction");        \
-                       (val) = (typ)mtod(t, caddr_t);                  \
+                       (val) = (typ)(void *)mtod(t, caddr_t);          \
                } else {                                                \
                        (val) = (typ)NULL;                              \
                        (m) = NULL;                                     \
@@ -508,7 +565,7 @@ do {                                                                        \
            m->m_len > ((njcl > 0) ? njclbytes : MBIGCLBYTES) ||        \
            m->m_type == MT_FREE ||                                     \
            ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_buf == NULL)) {  \
-               panic("Failed mbuf validity check: mbuf %p len %d "     \
+               panic_plain("Failed mbuf validity check: mbuf %p len %d "  \
                    "type %d flags 0x%x data %p rcvif %s%d ifflags 0x%x",  \
                    m, m->m_len, m->m_type, m->m_flags,                    \
                    ((m->m_flags & M_EXT) ? m->m_ext.ext_buf : m->m_data), \
@@ -517,6 +574,92 @@ do {                                                                       \
        }                                                               \
 } while (0)
 
+/*
+ * Simple mbuf queueing system
+ *
+ * This is basically a SIMPLEQ adapted to mbuf use (i.e. using
+ * m_nextpkt instead of field.sqe_next).
+ *
+ * m_next is ignored, so queueing chains of mbufs is possible
+ */
+#define MBUFQ_HEAD(name)                                       \
+struct name {                                                  \
+       struct mbuf *mq_first;  /* first packet */              \
+       struct mbuf **mq_last;  /* addr of last next packet */  \
+}
+
+#define MBUFQ_INIT(q)          do {                            \
+       MBUFQ_FIRST(q) = NULL;                                  \
+       (q)->mq_last = &MBUFQ_FIRST(q);                         \
+} while (0)
+
+#define MBUFQ_PREPEND(q, m)    do {                            \
+       if ((MBUFQ_NEXT(m) = MBUFQ_FIRST(q)) == NULL)           \
+               (q)->mq_last = &MBUFQ_NEXT(m);                  \
+       MBUFQ_FIRST(q) = (m);                                   \
+} while (0)
+
+#define MBUFQ_ENQUEUE(q, m)    do {                            \
+       MBUFQ_NEXT(m) = NULL;                                   \
+       *(q)->mq_last = (m);                                    \
+       (q)->mq_last = &MBUFQ_NEXT(m);                          \
+} while (0)
+
+#define MBUFQ_ENQUEUE_MULTI(q, m, n)   do {                    \
+       MBUFQ_NEXT(n) = NULL;                                   \
+       *(q)->mq_last = (m);                                    \
+       (q)->mq_last = &MBUFQ_NEXT(n);                          \
+} while (0)
+
+#define MBUFQ_DEQUEUE(q, m)    do {                            \
+       if (((m) = MBUFQ_FIRST(q)) != NULL) {                   \
+               if ((MBUFQ_FIRST(q) = MBUFQ_NEXT(m)) == NULL)   \
+                       (q)->mq_last = &MBUFQ_FIRST(q);         \
+               else                                            \
+                       MBUFQ_NEXT(m) = NULL;                   \
+       }                                                       \
+} while (0)
+
+#define        MBUFQ_REMOVE(q, m)      do {                            \
+       if (MBUFQ_FIRST(q) == (m)) {                            \
+               MBUFQ_DEQUEUE(q, m);                            \
+       } else {                                                \
+               struct mbuf *_m = MBUFQ_FIRST(q);               \
+               while (MBUFQ_NEXT(_m) != (m))                   \
+                       _m = MBUFQ_NEXT(_m);                    \
+               if ((MBUFQ_NEXT(_m) =                           \
+                   MBUFQ_NEXT(MBUFQ_NEXT(_m))) == NULL)        \
+                       (q)->mq_last = &MBUFQ_NEXT(_m);         \
+       }                                                       \
+} while (0)
+
+#define MBUFQ_DRAIN(q)         do {                            \
+       struct mbuf *__m0;                                      \
+       while ((__m0 = MBUFQ_FIRST(q)) != NULL) {               \
+               MBUFQ_FIRST(q) = MBUFQ_NEXT(__m0);              \
+               MBUFQ_NEXT(__m0) = NULL;                        \
+               m_freem(__m0);                                  \
+       }                                                       \
+       (q)->mq_last = &MBUFQ_FIRST(q);                         \
+} while (0)
+
+#define        MBUFQ_FOREACH(m, q)                                     \
+       for ((m) = MBUFQ_FIRST(q);                              \
+           (m);                                                \
+           (m) = MBUFQ_NEXT(m))
+
+#define        MBUFQ_FOREACH_SAFE(m, q, tvar)                          \
+       for ((m) = MBUFQ_FIRST(q);                              \
+           (m) && ((tvar) = MBUFQ_NEXT(m), 1);                 \
+           (m) = (tvar))
+
+#define        MBUFQ_EMPTY(q)          ((q)->mq_first == NULL)
+#define MBUFQ_FIRST(q)         ((q)->mq_first)
+#define MBUFQ_NEXT(m)          ((m)->m_nextpkt)
+#define MBUFQ_LAST(q)          (*(q)->mq_last)
+
+#define        max_linkhdr     P2ROUNDUP(_max_linkhdr, sizeof (u_int32_t))
+#define        max_protohdr    P2ROUNDUP(_max_protohdr, sizeof (u_int32_t))
 #endif /* XNU_KERNEL_PRIVATE */
 
 /*
@@ -741,16 +884,123 @@ extern struct mbuf *m_pullup(struct mbuf *, int);
 extern struct mbuf *m_split(struct mbuf *, int, int);
 extern void m_mclfree(caddr_t p);
 
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), this macro checks whether the data pointer of an mbuf
+ * is 32-bit aligned (this is the expected minimum alignment for protocol
+ * headers), and assert otherwise.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define        MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(_m)
+#else /* !__i386__ && !__x86_64__ */
+#define        MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(_m) do {                    \
+       if (!IS_P2ALIGNED((_m)->m_data, sizeof (u_int32_t))) {          \
+               if (((_m)->m_flags & M_PKTHDR) &&                       \
+                   (_m)->m_pkthdr.rcvif != NULL) {                     \
+                       panic_plain("\n%s: mbuf %p data ptr %p is not " \
+                           "32-bit aligned [%s%d: alignerrs=%lld]\n",  \
+                           __func__, (_m), (_m)->m_data,               \
+                           (_m)->m_pkthdr.rcvif->if_name,              \
+                           (_m)->m_pkthdr.rcvif->if_unit,              \
+                           (_m)->m_pkthdr.rcvif->if_alignerrs);        \
+               } else {                                                \
+                       panic_plain("\n%s: mbuf %p data ptr %p is not " \
+                           "32-bit aligned\n",                         \
+                           __func__, (_m), (_m)->m_data);              \
+               }                                                       \
+       }                                                               \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
+
+/* Maximum number of MBUF_SC values (excluding MBUF_SC_UNSPEC) */
+#define        MBUF_SC_MAX_CLASSES     10
+
+/*
+ * These conversion macros rely on the corresponding MBUF_SC and
+ * MBUF_TC values in order to establish the following mapping:
+ *
+ *     MBUF_SC_BK_SYS  ] ==>   MBUF_TC_BK
+ *     MBUF_SC_BK      ]
+ *
+ *     MBUF_SC_BE      ] ==>   MBUF_TC_BE
+ *     MBUF_SC_RD      ]
+ *     MBUF_SC_OAM     ]
+ *
+ *     MBUF_SC_AV      ] ==>   MBUF_TC_VI
+ *     MBUF_SC_RV      ]
+ *     MBUF_SC_VI      ]
+ *
+ *     MBUF_SC_VO      ] ==>   MBUF_TC_VO
+ *     MBUF_SC_CTL     ]
+ *
+ * The values assigned to each service class allows for a fast mapping to
+ * the corresponding MBUF_TC traffic class values, as well as to retrieve the
+ * assigned index; therefore care must be taken when comparing against these
+ * values.  Use the corresponding class and index macros to retrieve the
+ * corresponding portion, and never assume that a higher class corresponds
+ * to a higher index.
+ */
+#define        MBUF_SCVAL(x)           ((x) & 0xffff)
+#define        MBUF_SCIDX(x)           ((((x) >> 16) & 0xff) >> 3)
+#define        MBUF_SC2TC(_sc)         (MBUF_SCVAL(_sc) >> 7)
+#define        MBUF_TC2SCVAL(_tc)      ((_tc) << 7)
+#define IS_MBUF_SC_BACKGROUND(_sc) (((_sc) == MBUF_SC_BK_SYS) || \
+       ((_sc) == MBUF_SC_BK))
+
+#define        SCIDX_BK_SYS            MBUF_SCIDX(MBUF_SC_BK_SYS)
+#define        SCIDX_BK                MBUF_SCIDX(MBUF_SC_BK)
+#define        SCIDX_BE                MBUF_SCIDX(MBUF_SC_BE)
+#define        SCIDX_RD                MBUF_SCIDX(MBUF_SC_RD)
+#define        SCIDX_OAM               MBUF_SCIDX(MBUF_SC_OAM)
+#define        SCIDX_AV                MBUF_SCIDX(MBUF_SC_AV)
+#define        SCIDX_RV                MBUF_SCIDX(MBUF_SC_RV)
+#define        SCIDX_VI                MBUF_SCIDX(MBUF_SC_VI)
+#define        SCIDX_VO                MBUF_SCIDX(MBUF_SC_VO)
+#define        SCIDX_CTL               MBUF_SCIDX(MBUF_SC_CTL)
+
+#define        SCVAL_BK_SYS            MBUF_SCVAL(MBUF_SC_BK_SYS)
+#define        SCVAL_BK                MBUF_SCVAL(MBUF_SC_BK)
+#define        SCVAL_BE                MBUF_SCVAL(MBUF_SC_BE)
+#define        SCVAL_RD                MBUF_SCVAL(MBUF_SC_RD)
+#define        SCVAL_OAM               MBUF_SCVAL(MBUF_SC_OAM)
+#define        SCVAL_AV                MBUF_SCVAL(MBUF_SC_AV)
+#define        SCVAL_RV                MBUF_SCVAL(MBUF_SC_RV)
+#define        SCVAL_VI                MBUF_SCVAL(MBUF_SC_VI)
+#define        SCVAL_VO                MBUF_SCVAL(MBUF_SC_VO)
+#define        SCVAL_CTL               MBUF_SCVAL(MBUF_SC_CTL)
+
+#define        MBUF_VALID_SC(c)                                                \
+       (c == MBUF_SC_BK_SYS || c == MBUF_SC_BK || c == MBUF_SC_BE ||   \
+       c == MBUF_SC_RD || c == MBUF_SC_OAM || c == MBUF_SC_AV ||       \
+       c == MBUF_SC_RV || c == MBUF_SC_VI || c == MBUF_SC_VO ||        \
+       c == MBUF_SC_CTL)
+
+#define        MBUF_VALID_SCIDX(c)                                             \
+       (c == SCIDX_BK_SYS || c == SCIDX_BK || c == SCIDX_BE ||         \
+       c == SCIDX_RD || c == SCIDX_OAM || c == SCIDX_AV ||             \
+       c == SCIDX_RV || c == SCIDX_VI || c == SCIDX_VO ||              \
+       c == SCIDX_CTL)
+
+#define        MBUF_VALID_SCVAL(c)                                             \
+       (c == SCVAL_BK_SYS || c == SCVAL_BK || c == SCVAL_BE ||         \
+       c == SCVAL_RD || c == SCVAL_OAM || c == SCVAL_AV ||             \
+       c == SCVAL_RV || c == SCVAL_VI || c == SCVAL_VO ||              \
+       c == SCVAL_CTL)
+
 __private_extern__ union mbigcluster *mbutl;   /* start VA of mbuf pool */
 __private_extern__ union mbigcluster *embutl;  /* end VA of mbuf pool */
 __private_extern__ unsigned int nmbclusters;   /* number of mapped clusters */
 __private_extern__ int njcl;           /* # of jumbo clusters  */
 __private_extern__ int njclbytes;      /* size of a jumbo cluster */
-__private_extern__ int max_linkhdr;    /* largest link-level header */
-__private_extern__ int max_protohdr;   /* largest protocol header */
 __private_extern__ int max_hdr;                /* largest link+protocol header */
 __private_extern__ int max_datalen;    /* MHLEN - max_hdr */
 
+/* Use max_linkhdr instead of _max_linkhdr */
+__private_extern__ int _max_linkhdr;   /* largest link-level header */
+
+/* Use max_protohdr instead of _max_protohdr */
+__private_extern__ int _max_protohdr;  /* largest protocol header */
+
 __private_extern__ unsigned int mbuf_default_ncl(int, u_int64_t);
 __private_extern__ void mbinit(void);
 __private_extern__ struct mbuf *m_clattach(struct mbuf *, int, caddr_t,
@@ -766,6 +1016,10 @@ __private_extern__ struct mbuf *m_free(struct mbuf *);
 __private_extern__ struct mbuf *m_getclr(int, int);
 __private_extern__ struct mbuf *m_getptr(struct mbuf *, int, int *);
 __private_extern__ unsigned int m_length(struct mbuf *);
+__private_extern__ unsigned int m_length2(struct mbuf *, struct mbuf **);
+__private_extern__ unsigned int m_fixhdr(struct mbuf *);
+__private_extern__ struct mbuf *m_defrag(struct mbuf *, int);
+__private_extern__ struct mbuf *m_defrag_offset(struct mbuf *, u_int32_t, int);
 __private_extern__ struct mbuf *m_prepend(struct mbuf *, int, int);
 __private_extern__ struct mbuf *m_copyup(struct mbuf *, int, int);
 __private_extern__ struct mbuf *m_retry(int, int);
@@ -781,6 +1035,7 @@ __private_extern__ struct mbuf *m_getcl(int, int, int);
 __private_extern__ caddr_t m_mclalloc(int);
 __private_extern__ int m_mclhasreference(struct mbuf *);
 __private_extern__ void m_copy_pkthdr(struct mbuf *, struct mbuf *);
+__private_extern__ void m_copy_pftag(struct mbuf *, struct mbuf *);
 
 __private_extern__ struct mbuf *m_dtom(void *);
 __private_extern__ int m_mtocl(void *);
@@ -867,11 +1122,22 @@ __private_extern__ void m_tag_init(struct mbuf *);
 __private_extern__ struct  m_tag *m_tag_first(struct mbuf *);
 __private_extern__ struct  m_tag *m_tag_next(struct mbuf *, struct m_tag *);
 
-__private_extern__ void m_prio_init(struct mbuf *);
-
 __END_DECLS
 #endif /* XNU_KERNEL_PRIVATE */
 #ifdef KERNEL
 #include <sys/kpi_mbuf.h>
+#ifdef XNU_KERNEL_PRIVATE
+__BEGIN_DECLS
+
+__private_extern__ void m_service_class_init(struct mbuf *);
+__private_extern__ int m_set_service_class(struct mbuf *, mbuf_svc_class_t);
+__private_extern__ mbuf_svc_class_t m_get_service_class(struct mbuf *);
+__private_extern__ mbuf_svc_class_t m_service_class_from_idx(u_int32_t);
+__private_extern__ mbuf_svc_class_t m_service_class_from_val(u_int32_t);
+__private_extern__ int m_set_traffic_class(struct mbuf *, mbuf_traffic_class_t);
+__private_extern__ mbuf_traffic_class_t m_get_traffic_class(struct mbuf *);
+
+__END_DECLS
+#endif /* XNU_KERNEL_PRIVATE */
 #endif /* KERNEL */
 #endif /* !_SYS_MBUF_H_ */
index 443e05b014ddf959b368befee2b6550af15d5385..428a865ecd592303570cef611106b60834ff7599 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -58,6 +58,15 @@ extern "C" {
 #define        ASSERT(EX)      ((void)0)
 #endif
 
+/*
+ * Compile time assert; this should be on its own someday.
+ */
+#define        _CASSERT(x)     \
+       switch (0) { case 0: case (x): ; }
+
+/*
+ * Atomic macros; these should be on their own someday.
+ */
 #define        atomic_add_16_ov(a, n)                                          \
        ((u_int16_t) OSAddAtomic16(n, (volatile SInt16 *)a))
 
@@ -245,9 +254,11 @@ typedef struct mcache {
 #define        MCF_TRACE       0x00000002      /* enable transaction auditing */
 #define        MCF_NOCPUCACHE  0x00000010      /* disable CPU layer caching */
 #define        MCF_NOLEAKLOG   0x00000100      /* disable leak logging */
+#define        MCF_EXPLEAKLOG  0x00000200      /* expose leak info to user space */
 
 #define        MCF_DEBUG       (MCF_VERIFY | MCF_TRACE)
-#define        MCF_FLAGS_MASK  (MCF_DEBUG | MCF_NOCPUCACHE | MCF_NOLEAKLOG)
+#define        MCF_FLAGS_MASK  \
+       (MCF_DEBUG | MCF_NOCPUCACHE | MCF_NOLEAKLOG | MCF_EXPLEAKLOG)
 
 /* Valid values for notify callback */
 #define        MCN_RETRYALLOC  0x00000001      /* Allocation should be retried */
index f6594c0ef7ff02341b107f01be3170ba2253217f..9a2c732f0873533f285662a7dc5b51f6e7630b4f 100644 (file)
@@ -343,9 +343,10 @@ struct vfs_attr {
  * External filesystem control flags.
  */
 #define        MNT_UPDATE      0x00010000      /* not a real mount, just an update */
+#define        MNT_NOBLOCK     0x00020000      /* don't block unmount if not responding */
 #define        MNT_RELOAD      0x00040000      /* reload filesystem data */
 #define        MNT_FORCE       0x00080000      /* force unmount or readonly change */
-#define MNT_CMDFLAGS   (MNT_UPDATE|MNT_RELOAD|MNT_FORCE)
+#define MNT_CMDFLAGS   (MNT_UPDATE|MNT_NOBLOCK|MNT_RELOAD|MNT_FORCE)
 
 
 
@@ -442,6 +443,7 @@ union union_vfsidctl { /* the fields vc_vers and vc_fsid are compatible */
 #define VFS_CTL_TIMEO  0x00010005      /* set timeout for vfs notification */
 #define VFS_CTL_NOLOCKS        0x00010006      /* disable file locking */
 #define VFS_CTL_SADDR  0x00010007      /* get server address */
+#define VFS_CTL_DISC    0x00010008     /* server disconnected */
 
 struct vfsquery {
        u_int32_t       vq_flags;
@@ -482,7 +484,7 @@ struct vfsioattr {
        void *          io_reserved[2];         /* extended attribute information */
 };
 
-#define VFS_IOATTR_FLAGS_FUA           0x01    /* Write-through cache supported */
+#define VFS_IOATTR_FLAGS_FUA   0x01    /* Write-through cache supported */
 #define VFS_IOATTR_FLAGS_UNMAP         0x02    /* Unmap (trim) supported */
 
 /*
index 141fb3eeb394bd16124352496305d0a9cd53549c..ccf31dd9ac1444eb2292220a55f0ef4e4d5ab100 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -265,7 +265,7 @@ extern struct mount * dead_mountp;
 #define MNTK_AUTH_OPAQUE_ACCESS 0x40000000      /* VNOP_ACCESS is reliable for remote auth */
 #define MNTK_EXTENDED_SECURITY 0x80000000      /* extended security supported */
 
-#define        MNT_LBUSY               0x00000001      /* mount is busy */
+#define        MNT_LNOTRESP            0x00000001      /* mount not responding */
 #define MNT_LUNMOUNT           0x00000002      /* mount in unmount */
 #define MNT_LFORCE             0x00000004      /* mount in forced unmount */
 #define MNT_LDRAIN             0x00000008      /* mount in drain */
@@ -319,9 +319,9 @@ struct vfstable {
 #define        VFC_VFSPREFLIGHT        0x040
 #define        VFC_VFSREADDIR_EXTENDED 0x080
 #define        VFC_VFS64BITREADY       0x100
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
 #define        VFC_VFSTHREADSAFE       0x200
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 #define        VFC_VFSNOMACLABEL       0x1000
 #define        VFC_VFSVNOP_PAGEINV2    0x2000
 #define        VFC_VFSVNOP_PAGEOUTV2   0x4000
@@ -466,8 +466,10 @@ boolean_t vfs_iskernelmount(mount_t);
 #endif
 
 /* throttled I/O api */
-int throttle_get_io_policy(struct uthread **ut);
-int throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp);
+int  throttle_get_io_policy(struct uthread **ut);
+int  throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp);
+void throttle_info_update_by_mount(mount_t mp);
+void unthrottle_thread(uthread_t);
 
 /* throttled I/O helper function */
 /* convert the lowest bit to a device index */
diff --git a/bsd/sys/munge.h b/bsd/sys/munge.h
new file mode 100644 (file)
index 0000000..19a3dcd
--- /dev/null
@@ -0,0 +1,69 @@
+#ifndef __MUNGE_H__
+#define __MUNGE_H__
+/*
+ * Coyright (c) 2005-2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+void munge_w(const void *arg0 __unused, void *args);
+void munge_ww(const void *arg0 __unused, void *args);
+void munge_www(const void *arg0 __unused, void *args);
+void munge_wwww(const void *arg0 __unused, void *args);
+void munge_wwwww(const void *arg0 __unused, void *args);
+void munge_wwwwww(const void *arg0 __unused, void *args);
+void munge_wwwwwww(const void *arg0 __unused, void *args);
+void munge_wwwwwwww(const void *arg0 __unused, void *args);
+void munge_wl(const void *arg0 __unused, void *args);
+void munge_wwl(const void *arg0 __unused, void *args);
+void munge_wwlw(const void *arg0 __unused, void *args);
+void munge_wwlll(const void *arg0 __unused, void *args);
+void munge_wlw(const void *arg0 __unused, void *args);
+void munge_wlwwwll(const void *arg0 __unused, void *args);
+void munge_wlwwwllw(const void *arg0 __unused, void *args);
+void munge_wlwwlwlw(const void *arg0 __unused, void *args);
+void munge_wll(const void *arg0 __unused, void *args);
+void munge_wllww(const void *arg0 __unused, void *args);
+void munge_wlll(const void *arg0 __unused, void *args);
+void munge_wllwwll(const void *arg0 __unused, void *args);
+void munge_wwwlw(const void *arg0 __unused, void *args);
+void munge_wwwlww(const void *arg0 __unused, void *args);
+void munge_wwwl(const void *arg0 __unused, void *args);
+void munge_wwwwlw(const void *arg0 __unused, void *args);
+void munge_wwwwl(const void *arg0 __unused, void *args);
+void munge_wwwwwl(const void *arg0 __unused, void *args);
+void munge_wwwwwlww(const void *arg0 __unused, void *args);
+void munge_wwwwwllw(const void *arg0 __unused, void *args);
+void munge_wwwwwlll(const void *arg0 __unused, void *args);
+void munge_wwwwwwl(const void *arg0 __unused, void *args);
+void munge_wwwwwwlw(const void *arg0 __unused, void *args);
+void munge_wwwwwwll(const void *arg0 __unused, void *args);
+void munge_wsw(const void *arg0 __unused, void *args);
+void munge_wws(const void *arg0 __unused, void *args);
+void munge_wwwsw(const void *arg0 __unused, void *args);
+void munge_llllll(const void *arg0 __unused, void *args __unused);
+void munge_l(const void *arg0 __unused, void *args __unused);
+void munge_lw(const void *arg0 __unused, void *args);
+void munge_lwww(const void *arg0 __unused, void *args);
+#endif /* __MUNGE_H__ */
index 56d3ecf139bf99138bf7980368196cdcae076890..803a9d8d7deae438cd3232ad4ccf86e3393ad776 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -175,9 +175,9 @@ struct nameidata {
 #define        AUDITVNPATH2    0x00200000 /* audit the path/vnode info */
 #define        USEDVP          0x00400000 /* start the lookup at ndp.ni_dvp */
 #define        CN_VOLFSPATH    0x00800000 /* user path was a volfs style path */
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
 #define FSNODELOCKHELD 0x01000000
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 #define UNIONCREATED   0x02000000 /* union fs creation of vnode */
 #if NAMEDRSRCFORK
 #define CN_WANTSRSRCFORK 0x04000000
index f45128bfb467a8e7e56f2e221eacdd5a6a9414ab..3437710b253f8f1c7148472c66f262d80beab005 100644 (file)
@@ -71,6 +71,8 @@
 #define PIPE_SIZE      16384
 #endif
 
+#define PIPE_KVAMAX    (1024 * 1024 * 16)
+
 #ifndef BIG_PIPE_SIZE
 #define BIG_PIPE_SIZE  (64*1024)
 #endif
index 1abb898bf1bdf1aa24ffef3fd3c277aac3751539..78f553f8a86bde2965c903b191f24dd5d28152c8 100644 (file)
  */
 #define        PRIV_ADJTIME            1000    /* Set time adjustment. */
 
+/*
+ * Virtual memory privileges.
+ */
+#define        PRIV_VM_PRESSURE        6000    /* Check VM pressure. */
+#define        PRIV_VM_JETSAM          6001    /* Adjust jetsam configuration. */
+
+/*
+ * Network stack privileges.
+ */
+#define        PRIV_NET_PRIVILEGED_TRAFFIC_CLASS       10000   /* Set SO_PRIVILEGED_TRAFFIC_CLASS. */ 
+
 /*
  * IPv4 and IPv6 privileges.
  */
index 8cec174a3a6b54f8a86562b675f8bd5dbd7e33a6..9b80718f365150ec55406911d7de1a0f2b4caff9 100644 (file)
@@ -182,7 +182,7 @@ struct extern_proc {
 #define        P_SSTEP         0x20000 / * process needs single-step fixup ??? * /
 */
 
-#define        P_RESV5         0x00040000      /* (P_WAITING) process has a wait() in progress */
+#define        P_DELAYIDLESLEEP 0x00040000     /* Process is marked to delay idle sleep on disk IO */
 #define        P_CHECKOPENEVT  0x00080000      /* check if a vnode has the OPENEVT flag set on open */
 
 #define        P_DEPENDENCY_CAPABLE    0x00100000      /* process is ok to call vfs_markdependency() */
@@ -207,6 +207,16 @@ struct extern_proc {
 #define        P_FSTRACE       0               /* Obsolete: retained for compilation */
 #define        P_SSTEP         0               /* Obsolete: retained for compilation */
 
+#define P_DIRTY_TRACK                           0x00000001      /* track dirty state */
+#define P_DIRTY_ALLOW_IDLE_EXIT                 0x00000002      /* process can be idle-exited when clean */
+#define P_DIRTY                                 0x00000004      /* process is dirty */
+#define P_DIRTY_SHUTDOWN                        0x00000008      /* process is dirty during shutdown */
+#define P_DIRTY_TERMINATED                      0x00000010      /* process has been marked for termination */
+#define P_DIRTY_BUSY                            0x00000020      /* serialization flag */
+
+#define P_DIRTY_CAN_IDLE_EXIT                   (P_DIRTY_TRACK | P_DIRTY_ALLOW_IDLE_EXIT)
+#define P_DIRTY_IS_DIRTY                        (P_DIRTY | P_DIRTY_SHUTDOWN)
+
 #endif /* XNU_KERNEL_PRIVATE || !KERNEL */
 
 #ifdef KERNEL
@@ -289,6 +299,7 @@ extern int IS_64BIT_PROCESS(proc_t);
 extern int     tsleep(void *chan, int pri, const char *wmesg, int timo);
 extern int     msleep1(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, u_int64_t timo);
 
+task_t proc_task(proc_t);
 extern int proc_pidversion(proc_t);
 extern int proc_getcdhash(proc_t, unsigned char *);
 #endif /* KERNEL_PRIVATE */
index 67842664db5f0eee98a000a23b8c2eddfb0759d9..381aef8f3c65f9ddc45461d97c82e22287299e4b 100644 (file)
@@ -118,6 +118,8 @@ struct proc_bsdshortinfo {
 #ifdef  PRIVATE
 #define PROC_FLAG_DARWINBG     0x8000  /* process in darwin background */
 #define PROC_FLAG_EXT_DARWINBG 0x10000 /* process in darwin background - external enforcement */
+#define PROC_FLAG_IOS_APPLEDAEMON      0x20000 /* Process is apple daemon  */
+#define PROC_FLAG_DELAYIDLESLEEP       0x40000 /* Process is marked to delay idle sleep on disk IO */
 #endif
 
 
@@ -642,6 +644,9 @@ struct proc_fileportinfo {
 #define PROC_PIDLISTFILEPORTS          14
 #define PROC_PIDLISTFILEPORTS_SIZE     (sizeof(struct proc_fileportinfo))
 
+#define PROC_PIDTHREADID64INFO         15
+#define PROC_PIDTHREADID64INFO_SIZE    (sizeof(struct proc_threadinfo))
+
 /* Flavors for proc_pidfdinfo */
 
 #define PROC_PIDFDVNODEINFO            1
@@ -691,6 +696,24 @@ struct proc_fileportinfo {
 
 #define PROC_SELFSET_VMRSRCOWNER       3
 
+#define PROC_SELFSET_DELAYIDLESLEEP    4
+
+/* used for proc_dirtycontrol */
+#define PROC_DIRTYCONTROL_TRACK         1
+#define PROC_DIRTYCONTROL_SET           2
+#define PROC_DIRTYCONTROL_GET           3
+
+/* proc_track_dirty() flags */
+#define PROC_DIRTY_TRACK                0x1
+#define PROC_DIRTY_ALLOW_IDLE_EXIT      0x2
+
+#define PROC_DIRTY_TRACK_MASK           (PROC_DIRTY_TRACK|PROC_DIRTY_ALLOW_IDLE_EXIT)
+
+/* proc_get_dirty() flags */
+#define PROC_DIRTY_TRACKED              0x1
+#define PROC_DIRTY_ALLOWS_IDLE_EXIT     0x2
+#define PROC_DIRTY_IS_DIRTY             0x4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
+
 #ifdef XNU_KERNEL_PRIVATE
 #ifndef pshmnode
 struct pshmnode;
index 26b91b3cdd0a596132d13a1a4e1ed8834348676d..e4c7497bf6987fceeae2f24e03a9d217e2460b07 100644 (file)
@@ -330,13 +330,15 @@ struct    proc {
        char    p_name[(2*MAXCOMLEN)+1];        /* PL */
 
        struct  pgrp *p_pgrp;   /* Pointer to process group. (LL) */
-#if CONFIG_EMBEDDED
-       int             p_iopol_disk;   /* disk I/O policy (PL) */
-#endif /* CONFIG_EMBEDDED */
        uint32_t        p_csflags;      /* flags for codesign (PL) */
        uint32_t        p_pcaction;     /* action  for process control on starvation */
        uint8_t p_uuid[16];             /* from LC_UUID load command */
 
+#if !CONFIG_EMBEDDED
+#define PROC_LEGACY_BEHAVIOR_IOTHROTTLE (0x00000001)
+        uint32_t       p_legacy_behavior;
+#endif
+
 /* End area that is copied on creation. */
 /* XXXXXXXXXXXXX End of BCOPY'ed on fork (AIOLOCK)XXXXXXXXXXXXXXXX */
 #define        p_endcopy       p_aio_total_count
@@ -379,6 +381,10 @@ struct     proc {
 #endif /* SIGNAL_DEBUG */
 #endif /* DIAGNOSTIC */
        uint64_t        p_dispatchqueue_offset;
+#if VM_PRESSURE_EVENTS
+       struct timeval  vm_pressure_last_notify_tstamp;
+#endif
+       int             p_dirty;                        /* dirty state */ 
 };
 
 #define PGRPID_DEAD 0xdeaddead
@@ -430,10 +436,8 @@ struct     proc {
 #define P_UNUSED       0x00200000      /* Unused */
 #define P_LRAGE_VNODES 0x00400000
 #define P_LREGISTER    0x00800000      /* thread start fns registered  */
-#if CONFIG_EMBEDDED
-#define P_LBACKGROUND  0x01000000
-#endif /* CONFIG_EMBEDDED */
-#define P_LVMRSRCOWNER 0x02000000      /* can handle the resource ownership of  */
+#define P_LVMRSRCOWNER 0x01000000      /* can handle the resource ownership of  */
+#define P_LPTERMINATE  0x02000000      /* can handle the resource ownership of  */
 
 /* Process control state for resource starvation */
 #define P_PCTHROTTLE   1
@@ -686,6 +690,7 @@ extern int  tsleep1(void *chan, int pri, const char *wmesg, u_int64_t abstime, in
 extern int     msleep0(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, int timo, int (*continuation)(int));
 extern void    vfork_return(struct proc *child, int32_t *retval, int rval);
 extern int     exit1(struct proc *, int, int *);
+extern int     exit1_internal(struct proc *, int, int *, boolean_t, boolean_t);
 extern int     fork1(proc_t, thread_t *, int);
 extern void vfork_exit_internal(struct proc *p, int rv, int forced);
 extern void proc_reparentlocked(struct proc *child, struct proc * newparent, int cansignal, int locked);
@@ -694,6 +699,7 @@ extern int proc_iterate(int flags, int (*callout)(proc_t , void *), void *arg, i
 extern int proc_rebootscan(int (*callout)(proc_t , void *), void *arg, int (*filterfn)(proc_t , void *), void *filterarg);
 extern int proc_childrenwalk(proc_t p, int (*callout)(proc_t , void *), void *arg);
 extern proc_t proc_findinternal(int pid, int funneled);
+extern proc_t proc_findthread(thread_t thread);
 extern void proc_refdrain(proc_t);
 extern void proc_childdrainlocked(proc_t);
 extern void proc_childdrainstart(proc_t);
@@ -739,6 +745,7 @@ extern int proc_pendingsignals(proc_t, sigset_t);
 int proc_getpcontrol(int pid, int * pcontrolp);
 int proc_dopcontrol(proc_t p, void *unused_arg);
 int proc_resetpcontrol(int pid);
+extern void proc_removethrottle(proc_t);
 #if PSYNCH
 void pth_proc_hashinit(proc_t);
 void pth_proc_hashdelete(proc_t);
index 19f3c26177b47ba4dcbbabf15dc98f86ac85d939..d9ad4827286878d7a83e5ff0e633d58cdec2c367 100644 (file)
@@ -59,7 +59,11 @@ __BEGIN_DECLS
 #define PROC_POLICY_HARDWARE_ACCESS    2       /* access to various hardware */
 #define PROC_POLICY_RESOURCE_STARVATION        3       /* behavior on resource starvation */
 #define PROC_POLICY_RESOURCE_USAGE     4       /* behavior on resource consumption */
+#if CONFIG_EMBEDDED
+#define PROC_POLICY_APP_LIFECYCLE      5       /* app life cycle management */
+#else /* CONFIG_EMBEDDED */
 #define PROC_POLICY_RESERVED           5       /* behavior on resource consumption */
+#endif /* CONFIG_EMBEDDED */
 #define PROC_POLICY_APPTYPE            6       /* behavior on resource consumption */
 
 /* sub policies for background policy */
@@ -85,28 +89,29 @@ __BEGIN_DECLS
 /* attribute values for disk hardware access, bit different as it should reflect IOPOL_XXX */
 #define PROC_POLICY_DISKACC_NONE       0
 #define PROC_POLICY_DISKACC_NORMAL     1       /* normal access to the disk */
+#define PROC_POLICY_DISKACC_FULLACCESS 1       /* normal access to the disk */
 #define PROC_POLICY_DISKACC_PASSIVE    2       /* treat the I/Os as passive */
 #define PROC_POLICY_DISKACC_THROTTLE   3       /* throttle the disk IOs */
-#define PROC_POLICY_DISKACC_DEFAULT    0
+#define PROC_POLICY_DISKACC_DEFAULT    PROC_POLICY_DISKACC_FULLACCESS
 
 /* attribute values for GPU hardware access */
 #define PROC_POLICY_GPUACC_NONE        0
 #define PROC_POLICY_GPUACC_FULLACCESS  0       /* complete access to the GPU */
 #define PROC_POLICY_GPUACC_DENYACCESS  1       /* deny any access to the GPU */
-#define PROC_POLICY_GPUACC_DEFAULT     0       /*  default is complete access */
+#define PROC_POLICY_GPUACC_DEFAULT     PROC_POLICY_GPUACC_FULLACCESS /*  default is complete access */
 
 /* atrribute values for  network hardware access */
 #define PROC_POLICY_NETACC_NONE        0
-#define PROC_POLICY_NETACC_NORMAL      0       /* complete access to the network */
+#define PROC_POLICY_NETACC_FULLACCESS  0       /* complete access to the network */
 #define PROC_POLICY_NETACC_THROTTLE    1       /* throttle access to network */
-#define PROC_POLICY_NETACC_DEFAULT     0       /*  default is complete access */
+#define PROC_POLICY_NETACC_DEFAULT     PROC_POLICY_NETACC_FULLACCESS /*  default is complete access */
 
 /* atrribute values for  network hardware access */
 #define PROC_POLICY_CPUACC_NONE                0
-#define PROC_POLICY_CPUACC_ALL         0       /* access to all avialable cpus */
+#define PROC_POLICY_CPUACC_FULLACCESS  0       /* access to all avialable cpus */
 #define PROC_POLICY_CPUACC_ONE         1       /* access to only one available cpu */
 #define PROC_POLICY_CPUACC_LLCACHE     2       /* access to only one last level cache */
-#define PROC_POLICY_CPUACC_DEFAULT     0       /*  default is access to all cpus */
+#define PROC_POLICY_CPUACC_DEFAULT     PROC_POLICY_CPUACC_FULLACCESS /*  default is access to all cpus */
 
 
 /* System Resource management (ie usage and starvation related) definitions */
@@ -124,12 +129,13 @@ __BEGIN_DECLS
 #define PROC_POLICY_RUSAGE_NETWORK     5       /* amount of network usage */
 #define PROC_POLICY_RUSAGE_POWER       6       /* amount of power/battery consumption */
 
-/* attribute values for the resource usage and low resource */
+/* attribute values for the resource usage and low resource - MUST match corresponding task definitions */
 #define PROC_POLICY_RSRCACT_NONE       0
 #define PROC_POLICY_RSRCACT_THROTTLE   1       /* throttle on resource condition */
 #define PROC_POLICY_RSRCACT_SUSPEND    2       /* suspend on resource condition */
 #define PROC_POLICY_RSRCACT_TERMINATE  3       /* kill on resource condition */
-#define PROC_POLICY_RSRCACT_NOTIFY     4       /* send kqueue notification */
+#define PROC_POLICY_RSRCACT_NOTIFY_KQ  4       /* send kqueue notification */
+#define PROC_POLICY_RSRCACT_NOTIFY_EXC 5       /* send exception */
 
 
 /* type of resource for kqueue notifiction */
@@ -158,14 +164,33 @@ typedef struct proc_policy_cpuusage_attr {
        uint64_t        ppattr_cpu_attr_deadline;     /* 64bit deadline in nsecs */
 } proc_policy_cpuusage_attr_t;
 
+#if CONFIG_EMBEDDED
+/* sub policies for app lifecycle management */
+#define        PROC_POLICY_APPLIFE_NONE        0       /* does nothing.. */
+#define        PROC_POLICY_APPLIFE_STATE       1       /* sets the app to various lifecycle states */
+#define        PROC_POLICY_APPLIFE_DEVSTATUS   2       /* notes the device in inactive or short/long term */
+#define        PROC_POLICY_APPLIFE_PIDBIND     3       /* a thread is to be bound to another processes app state */
+#endif /* CONFIG_EMBEDDED */
 
 /* sub policies for PROC_POLICY_APPTYPE */
+#define        PROC_POLICY_APPTYPE_NONE        0       /* does nothing.. */
+#define        PROC_POLICY_APPTYPE_MODIFY      1       /* sets the app to various lifecycle states */
+#if CONFIG_EMBEDDED
+#define        PROC_POLICY_APPTYPE_THREADTHR   2       /* notes the device in inactive or short/long term */
+#endif /* CONFIG_EMBEDDED */
+
+
 #define PROC_POLICY_OSX_APPTYPE_NONE            0
+#if CONFIG_EMBEDDED
+#define PROC_POLICY_IOS_RESV1_APPTYPE           1      /* TAL based launched */
+#define PROC_POLICY_IOS_APPLE_DAEMON           2       /* for user of IOS apple daemons  */
+#define PROC_POLICY_IOS_APPTYPE                 3      /* ios specific handling */
+#define PROC_POLICY_IOS_NONUITYPE               4      /* ios non graphics type */
+#else
 #define PROC_POLICY_OSX_APPTYPE_TAL             1      /* TAL based launched */
 #define PROC_POLICY_OSX_APPTYPE_WIDGET          2      /* for dashboard client */
 #define PROC_POLICY_OSX_APPTYPE_DASHCLIENT      2      /* rename to move away from widget */
-#define PROC_POLICY_IOS_APPTYPE                 3      /* ios specific handling */
-#define PROC_POLICY_IOS_NONUITYPE               4      /* ios non graphics type */
+#endif
 
 #ifndef KERNEL
 int process_policy(int scope, int action, int policy, int policy_subtype, proc_policy_attribute_t * attrp, pid_t target_pid, uint64_t target_threadid);
index f33117241ccebe740600a4ee0d4988d577c511b3..b22de04d862a79c60404ae84cc04ba6076457478 100644 (file)
@@ -49,8 +49,6 @@ typedef struct ksyn_waitq_element * ksyn_waitq_element_t;
 #define KWE_THREAD_BROADCAST    4
 
 
-#define WORKITEM_SIZE 64
-
 #define WORKQUEUE_HIGH_PRIOQUEUE    0       /* high priority queue */
 #define WORKQUEUE_DEFAULT_PRIOQUEUE 1       /* default priority queue */
 #define WORKQUEUE_LOW_PRIOQUEUE     2       /* low priority queue */
@@ -82,26 +80,13 @@ struct threadlist {
 #define TH_LIST_CONSTRAINED    0x40
 
 
-struct workitem {
-       TAILQ_ENTRY(workitem) wi_entry;
-       user_addr_t wi_item;
-       uint32_t wi_affinity;
-};
-
-struct workitemlist {
-       TAILQ_HEAD(, workitem) wl_itemlist;
-       TAILQ_HEAD(, workitem) wl_freelist;
-};
-
 struct workqueue {
-       struct workitem wq_array[WORKITEM_SIZE * WORKQUEUE_NUMPRIOS];
         proc_t         wq_proc;
         vm_map_t       wq_map;
         task_t         wq_task;
         thread_call_t  wq_atimer_call;
        int             wq_flags;
        int             wq_lflags;
-        int            wq_itemcount;
        uint64_t        wq_thread_yielded_timestamp;
        uint32_t        wq_thread_yielded_count;
        uint32_t        wq_timer_interval;
@@ -110,13 +95,14 @@ struct workqueue {
        uint32_t        wq_constrained_threads_scheduled;
        uint32_t        wq_nthreads;
         uint32_t       wq_thidlecount;
-       uint32_t        wq_reqconc[WORKQUEUE_NUMPRIOS];   /* requested concurrency for each priority level */
-       struct workitemlist  wq_list[WORKQUEUE_NUMPRIOS]; /* priority based item list */
-       uint32_t        wq_list_bitmap;
+       uint32_t        wq_reqcount;
        TAILQ_HEAD(, threadlist) wq_thrunlist;
        TAILQ_HEAD(, threadlist) wq_thidlelist;
-        uint32_t       *wq_thactive_count[WORKQUEUE_NUMPRIOS];
-        uint32_t       *wq_thscheduled_count[WORKQUEUE_NUMPRIOS];
+       uint16_t        wq_requests[WORKQUEUE_NUMPRIOS];
+       uint16_t        wq_ocrequests[WORKQUEUE_NUMPRIOS];
+       uint16_t        wq_reqconc[WORKQUEUE_NUMPRIOS];                 /* requested concurrency for each priority level */
+        uint16_t       *wq_thscheduled_count[WORKQUEUE_NUMPRIOS];
+        uint32_t       *wq_thactive_count[WORKQUEUE_NUMPRIOS];         /* must be uint32_t since we OSAddAtomic on these */
         uint64_t       *wq_lastblocked_ts[WORKQUEUE_NUMPRIOS];
 };
 #define WQ_LIST_INITED         0x01
@@ -151,6 +137,8 @@ struct workqueue {
 #define WQOPS_QUEUE_REMOVE_OBSOLETE 2 
 #define WQOPS_THREAD_RETURN 4
 #define WQOPS_THREAD_SETCONC  8
+#define WQOPS_QUEUE_NEWSPISUPP  0x10   /* this is to check for newer SPI support */
+#define WQOPS_QUEUE_REQTHREADS  0x20   /* request number of threads of a prio */
 
 #define PTH_DEFAULT_STACKSIZE 512*1024
 #define PTH_DEFAULT_GUARDSIZE 4*1024
index 6c64e53b8e5694ee0bdb1fcab64dfdc506ff2f14..18e3662e945faa249e928fa9d6fdbbb75d173101 100644 (file)
 
 #include <sys/appleapiopts.h>
 
-#ifdef KERNEL_BUILD
-#include <mach_kdb.h>
-#endif /* KERNEL_BUILD */
-
 /*
  * Arguments to reboot system call.
  */
index fbe8e62663deffa9b3f5775c9f7a8949d0854e8d..311e1f60150849d8342e3b3a0736ed4e6b459b37 100644 (file)
@@ -284,6 +284,7 @@ struct rlimit {
 #define IOPOL_NORMAL   1
 #define IOPOL_PASSIVE  2
 #define IOPOL_THROTTLE 3
+#define IOPOL_UTILITY  4
 
 #ifdef PRIVATE
 /*
index 18eeb2061291360e583cb4795ed7aaaabfed02c2..35689b2fc7a07ea2e14724f7d225b1776ed60635 100644 (file)
@@ -164,16 +164,6 @@ struct sembuf {
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 
-/*
- * System imposed limit on the value of the third parameter to semop().
- * This is arbitrary, and the standards unfortunately do not provide a
- * way for user applications to retrieve this value (e.g. via sysconf()
- * or from a manifest value in <unistd.h>).  The value shown here is
- * informational, and subject to change in future revisions.
- */
-#define MAX_SOPS       5       /* maximum # of sembuf's per semop call */
-
-
 /*
  * Union used as the fourth argment to semctl() in all cases.  Specific
  * member values are used for different values of the third parameter:
index 42fd3bffbf9e2f8a36ef3a2b3749fd1960d0cc9a..bf697a1eadb8888a0e90b0efefb6bd78b1dddd74 100644 (file)
@@ -146,7 +146,7 @@ typedef union user_semun user_semun_t;
 #define SEMMSL SEMMNS          /* max # of semaphores per id */
 #endif
 #ifndef SEMOPM
-#define SEMOPM 100             /* max # of operations per semop call */
+#define SEMOPM 5               /* max # of operations per semop call */
 #endif
 
 
index d2e40fd76e36ddf77d2d3cde1c6551efe9ce466f..0ec1754362d847a6e0da7478d8855419d8b2c4cd 100644 (file)
@@ -294,6 +294,9 @@ typedef struct user32_siginfo {
        user32_ulong_t  __pad[7];               /* Reserved for Future Use */
 } user32_siginfo_t;
 
+void siginfo_user_to_user32(user_siginfo_t *, user32_siginfo_t *);
+void siginfo_user_to_user64(user_siginfo_t *, user64_siginfo_t *);
+
 #endif /* BSD_KERNEL_PRIVATE */
 
 /* 
index 69ff9e15cf27ecb813bab88089f046e9f077ad02..390d1b76408f7d7ee70f6441340551ade1954042 100644 (file)
@@ -194,11 +194,11 @@ int sigprop[NSIG + 1] = {
 int    coredump(struct proc *p);
 void   execsigs(struct proc *p, thread_t thread);
 void   gsignal(int pgid, int sig);
-int    issignal(struct proc *p);
+int    issignal_locked(struct proc *p);
 int    CURSIG(struct proc *p);
 int clear_procsiglist(struct proc *p, int bit, int in_signalstart);
 int set_procsigmask(struct proc *p, int bit);
-void   postsig(int sig);
+void   postsig_locked(int sig);
 void   siginit(struct proc *p) __attribute__((section("__TEXT, initcode")));
 void   trapsignal(struct proc *p, int sig, unsigned code);
 void   pt_setrunnable(struct proc *p);
index 3fc35997c342d4950b9f5a2015c3c7684958303c..204f5363b867122df7743ab90fffc0f56a11844e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -140,12 +140,14 @@ struct iovec {
 #endif
 
 #ifdef PRIVATE
-#define SO_TCDBG_PID   0x01    /* Set/get traffic class for PID */
-#define SO_TCDBG_PNAME 0x02    /* Set/get traffic class for processes of that name */
-#define SO_TCDBG_PURGE 0x04    /* Purge entries for unused PIDs */
-#define SO_TCDBG_FLUSH 0x08    /* Flush all entries */
-#define SO_TCDBG_COUNT 0x10    /* Get count of entries */
-#define SO_TCDBG_LIST  0x20    /* List entries */
+#define SO_TCDBG_PID           0x01    /* Set/get traffic class for PID */
+#define SO_TCDBG_PNAME         0x02    /* Set/get traffic class for processes of that name */
+#define SO_TCDBG_PURGE         0x04    /* Purge entries for unused PIDs */
+#define SO_TCDBG_FLUSH         0x08    /* Flush all entries */
+#define SO_TCDBG_COUNT         0x10    /* Get count of entries */
+#define SO_TCDBG_LIST          0x20    /* List entries */
+#define SO_TCDBG_DELETE                0x40    /* Delete a process entry */
+#define SO_TCDBG_TCFLUSH_PID   0x80    /* Flush traffic class for PID */
 
 struct so_tcdbg {
        u_int32_t       so_tcdbg_cmd;
@@ -153,6 +155,7 @@ struct so_tcdbg {
        u_int32_t       so_tcdbg_count;
        pid_t           so_tcdbg_pid;
        char            so_tcdbg_pname[MAXCOMLEN + 1];
+       int32_t         so_tcdbg_opportunistic; /* -1: unspecified, 0: off, 1: on, other: errors */
 };
 #endif /* PRIVATE */
  
@@ -192,9 +195,10 @@ struct so_tcdbg {
 #else
 #define SO_DONTTRUNC   0x2000          /* APPLE: Retain unread data */
                                        /*  (ATOMIC proto) */
-#define SO_WANTMORE            0x4000          /* APPLE: Give hint when more data ready */
+#define SO_WANTMORE    0x4000          /* APPLE: Give hint when more data ready */
 #define SO_WANTOOBFLAG 0x8000          /* APPLE: Want OOB in MSG_FLAG on receive */
-#endif
+
+#endif  /* (!__APPLE__) */
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 /*
@@ -234,12 +238,85 @@ struct so_tcdbg {
 
 #ifdef PRIVATE
 #define        SO_EXECPATH     0x1085          /* Application Firewall Socket option */
-#define SO_TRAFFIC_CLASS               0x1086          /* Traffic class (int)*/
-#define  SO_TC_BE      0               /* Best effort, normal */
-#define  SO_TC_BK      1               /* Background, low priority or bulk traffic */
-#define  SO_TC_VI      2               /* Interactive video, constant bit rate, low latency */
-#define  SO_TC_VO      3               /* Interactive voice, constant bit rate, lowest latency */
-#define  SO_TC_MAX     4               /* Max traffic class value */
+/*
+ * Traffic service class definitions (lowest to highest):
+ *
+ * SO_TC_BK_SYS
+ *     "Background System-Initiated", high delay tolerant, high loss
+ *     tolerant, elastic flow, variable size & long-lived.  E.g: system-
+ *     initiated iCloud synching or Time Capsule backup, for which there
+ *     is no progress feedbacks.
+ *
+ * SO_TC_BK
+ *     "Background", user-initiated, high delay tolerant, high loss tolerant,
+ *     elastic flow, variable size.  E.g. user-initiated iCloud synching or
+ *     Time Capsule backup; or traffics of background applications, for which
+ *     there is some progress feedbacks.
+ *
+ * SO_TC_BE
+ *     "Best Effort", unclassified/standard.  This is the default service
+ *     class; pretty much a mix of everything.
+ *
+ * SO_TC_RD
+ *     "Responsive Data", a notch higher than "Best Effort", medium delay
+ *     tolerant, elastic & inelastic flow, bursty, long-lived.  E.g. email,
+ *     instant messaging, for which there is a sense of interactivity and
+ *     urgency (user waiting for output).
+ *
+ * SO_TC_OAM
+ *     "Operations, Administration, and Management", medium delay tolerant,
+ *     low-medium loss tolerant, elastic & inelastic flows, variable size.
+ *     E.g. VPN tunnels.
+ *
+ * SO_TC_AV
+ *     "Multimedia Audio/Video Streaming", medium delay tolerant, low-medium
+ *     loss tolerant, elastic flow, constant packet interval, variable rate &
+ *     size.  E.g. AirPlay playback (both video and audio).
+ *
+ * SO_TC_RV
+ *     "Responsive Multimedia Audio/Video", low delay tolerant, low-medium
+ *     loss tolerant, elastic flow, variable packet interval, rate and size.
+ *     E.g. AirPlay mirroring, screen sharing.
+ *
+ * SO_TC_VI
+ *     "Interactive Video", low delay tolerant, low-medium loss tolerant,
+ *     elastic flow, constant packet interval, variable rate & size.  E.g.
+ *     FaceTime video.
+ *
+ * SO_TC_VO
+ *     "Interactive Voice", low delay tolerant, low loss tolerant, inelastic
+ *     flow, constant packet rate, somewhat fixed size.  E.g. VoIP including
+ *     FaceTime audio.
+ *
+ * SO_TC_CTL
+ *     "Network Control", low delay tolerant, low loss tolerant, inelastic
+ *     flow, rate is bursty but short, variable size.  E.g. DNS queries;
+ *     certain types of locally-originated ICMP, ICMPv6; IGMP/MLD join/leave,
+ *     ARP.
+ */
+#define SO_TRAFFIC_CLASS       0x1086  /* Traffic service class (int) */
+#define         SO_TC_BK_SYS   100             /* lowest class */
+#define         SO_TC_BK       200
+#define  SO_TC_BE      0
+#define         SO_TC_RD       300
+#define         SO_TC_OAM      400
+#define         SO_TC_AV       500
+#define         SO_TC_RV       600
+#define         SO_TC_VI       700
+#define         SO_TC_VO       800
+#define         SO_TC_CTL      900             /* highest class */
+#define  SO_TC_MAX     10              /* Total # of traffic classes */
+#ifdef XNU_KERNEL_PRIVATE
+#define  _SO_TC_BK     1               /* deprecated */
+#define  _SO_TC_VI     2               /* deprecated */
+#define  _SO_TC_VO     3               /* deprecated */
+#define  _SO_TC_MAX    4               /* deprecated */
+
+#define        SO_VALID_TC(c)                                                  \
+       (c == SO_TC_BK_SYS || c == SO_TC_BK || c == SO_TC_BE ||         \
+       c == SO_TC_RD || c == SO_TC_OAM || c == SO_TC_AV ||             \
+       c == SO_TC_RV || c == SO_TC_VI || c == SO_TC_VO || c == SO_TC_CTL)
+#endif /* XNU_KERNEL_PRIVATE */
 
 /* Background socket configuration flags */
 #define TRAFFIC_MGT_SO_BACKGROUND       0x0001  /* background socket */
@@ -248,8 +325,21 @@ struct so_tcdbg {
 #define SO_RECV_TRAFFIC_CLASS  0x1087          /* Receive traffic class (bool)*/
 #define SO_TRAFFIC_CLASS_DBG   0x1088          /* Debug traffic class (struct so_tcdbg) */
 #define SO_TRAFFIC_CLASS_STATS 0x1089          /* Traffic class statistics */
+#define SO_PRIVILEGED_TRAFFIC_CLASS 0x1090     /* Privileged traffic class (bool) */
 #define        SO_DEFUNCTOK    0x1100          /* can be defunct'd */
 #define        SO_ISDEFUNCT    0x1101          /* get defunct status */
+
+#define        SO_OPPORTUNISTIC        0x1102  /* deprecated; use SO_TRAFFIC_CLASS */
+
+/*
+ * SO_FLUSH flushes any unsent data generated by a given socket.  It takes
+ * an integer parameter, which can be any of the SO_TC traffic class values,
+ * or the special SO_TC_ALL value.
+ */
+#define        SO_FLUSH        0x1103          /* flush unsent data (int) */
+#define         SO_TC_ALL      (-1)
+
+#define        SO_RECV_ANYIF   0x1104          /* unrestricted inbound processing */
 #endif /* PRIVATE */
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
@@ -284,7 +374,8 @@ struct so_np_extensions {
 
 #ifdef KERNEL_PRIVATE
 #define SONPX_MASK_VALID               (SONPX_SETOPTSHUT)
-#endif
+#define IS_SO_TC_BACKGROUND(_tc_) ((_tc_) == SO_TC_BK || (_tc_) == SO_TC_BK_SYS)
+#endif /* KERNEL_PRIVATE */
 
 #endif
 #endif
@@ -360,7 +451,10 @@ struct so_np_extensions {
 #define        AF_NETGRAPH     32              /* Netgraph sockets */
 #endif
 #define AF_IEEE80211    37              /* IEEE 802.11 protocol */
-#define        AF_MAX          38
+#ifdef __APPLE__
+#define AF_UTUN                38
+#endif
+#define        AF_MAX          39
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 /*
@@ -460,6 +554,9 @@ struct sockaddr_storage {
 #define        PF_NETGRAPH     AF_NETGRAPH
 #endif
 
+#ifdef __APPLE__
+#define PF_UTUN                AF_UTUN
+#endif
 #define        PF_MAX          AF_MAX
 
 /*
@@ -773,6 +870,8 @@ struct omsghdr {
        void            *msg_accrights;         /* access rights sent/rcvd */
        int             msg_accrightslen;
 };
+
+#define        SA(s)   ((struct sockaddr *)(void *)(s))
 #endif /* KERNEL_PRIVATE */
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
index 3c81716fe92fdd161f2d7ccc9aaba0f346ac8963..06e6f5c41a920cc542ffdb50cefb5365ef9a9d84 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -128,10 +128,10 @@ struct accept_filter;
 struct socket {
        int     so_zone;                /* zone we were allocated from */
        short   so_type;                /* generic type, see socket.h */
-       short   so_options;             /* from socket call, see socket.h */
+       u_int32_t so_options;           /* from socket call, see socket.h */
        short   so_linger;              /* time to linger while closing */
        short   so_state;               /* internal state flags SS_*, below */
-       void    *so_pcb;                        /* protocol control block */
+       void    *so_pcb;                /* protocol control block */
        struct  protosw *so_proto;      /* protocol handle */
        /*
         * Variables for connection queueing.
@@ -179,7 +179,7 @@ struct socket {
                struct  selinfo sb_sel; /* process selecting read/write */
                short   sb_flags;       /* flags, see below */
                struct timeval sb_timeo; /* timeout for read/write */
-               u_int   sb_maxused;     /* max char count ever used in sockbuf */
+               u_int32_t sb_idealsize; /* Ideal size for the sb based on bandwidth and delay */
                void    *reserved1[4];  /* for future use */
        } so_rcv, so_snd;
 #define        SB_MAX          (8192*1024)     /* default for max chars in sockbuf */
@@ -199,15 +199,16 @@ struct socket {
 #define        SB_NOTIFY       (SB_WAIT|SB_SEL|SB_ASYNC)
 #define        SB_DROP         0x400           /* does not accept any more data */
 #define        SB_UNIX         0x800           /* UNIX domain socket buffer */
+#define        SB_AUTOSIZE     0x1000          /* automatically size socket buffer */
+#define        SB_TRIM         0x2000          /* Trim the socket buffer */
 #define        SB_RECV         0x8000          /* this is rcv sb */
 
-       caddr_t so_tpcb;        /* Wisc. protocol control block - XXX unused? */
+       caddr_t so_tpcb;        /* Wisc. protocol control block, used by some kexts */
 #endif
 
        void    (*so_upcall)(struct socket *so, caddr_t arg, int waitf);
        caddr_t so_upcallarg;           /* Arg for above */
-       uid_t   so_uid;                 /* who opened the socket */
-       gid_t   so_gid;                 /* gid of whoever opened the socket */
+       kauth_cred_t    so_cred;        /* cred of who opened the socket */
        /* NB: generation count must not be first; easiest to make it last. */
        so_gen_t so_gencnt;             /* generation count */
 #ifndef __APPLE__
@@ -234,7 +235,6 @@ struct socket {
 #define        SOF_PCBCLEARING 0x4     /* pru_disconnect done; don't call pru_detach */
 #define        SOF_DEFUNCT     0x8     /* socket marked as inactive */
 #define        SOF_CLOSEWAIT   0x10    /* blocked in close awaiting some events */
-#define        SOF_UPCALLINUSE 0x20    /* socket upcall is currently in progress */
 #define SOF_REUSESHAREUID      0x40    /* Allows SO_REUSEADDR/SO_REUSEPORT for multiple so_uid */
 #define        SOF_MULTIPAGES  0x80    /* jumbo clusters may be used for sosend */
 #define SOF_ABORTED    0x100   /* soabort was already called once on the socket */
@@ -247,7 +247,13 @@ struct socket {
 #define SOF_NPX_SETOPTSHUT 0x2000 /* Non POSIX extension to allow setsockopt(2) after shut down */
 #define SOF_RECV_TRAFFIC_CLASS 0x4000  /* Receive traffic class as ancillary data */
 #define        SOF_NODEFUNCT   0x8000  /* socket cannot be defunct'd */
-#define SOF_INCOMP_INPROGRESS 0x10000 /* incomp socket still being processed */
+#define        SOF_PRIVILEGED_TRAFFIC_CLASS 0x10000 /* traffic class is privileged */
+#define SOF_SUSPENDED          0x20000 /* interface output queue is suspended */
+#define SOF_INCOMP_INPROGRESS  0x40000 /* incomp socket still being processed */
+#define        SOF_NOTSENT_LOWAT       0x80000 /* A different lowat on not sent data has been set */
+#define SOF_KNOTE      0x100000 /* socket is on the EV_SOCK klist */
+#define SOF_USELRO     0x200000 /* TCP must use LRO on these sockets */
+       uint32_t        so_upcallusecount;      /* number of upcalls in progress */
        int     so_usecount;    /* refcounting of socket use */;
        int     so_retaincnt;
        u_int32_t so_filteruse; /* usecount for the socket filters */
@@ -268,12 +274,12 @@ struct socket {
        struct  label *so_peerlabel;    /* cached MAC label for socket peer */
        thread_t        so_background_thread;   /* thread that marked this socket background */
        int             so_traffic_class;
-       
+
        // last process to interact with this socket
        u_int64_t       last_upid;
        pid_t           last_pid;
-
        struct data_stats       so_tc_stats[SO_TC_STATS_MAX];
+       struct klist    so_klist; /* klist for EV_SOCK events */
 };
 
 /* Control message accessor in mbufs */
@@ -285,8 +291,8 @@ struct socket {
 
 #define M_FIRST_CMSGHDR(m)                                                                      \
         ((char *)(m) != (char *)0L && (size_t)(m)->m_len >= sizeof(struct cmsghdr) &&           \
-         (socklen_t)(m)->m_len >= __DARWIN_ALIGN32(((struct cmsghdr *)(m)->m_data)->cmsg_len) ?\
-         (struct cmsghdr *)(m)->m_data :                                                        \
+         (socklen_t)(m)->m_len >= __DARWIN_ALIGN32(((struct cmsghdr *)(void *)(m)->m_data)->cmsg_len) ?\
+         (struct cmsghdr *)(void *)(m)->m_data :                                                        \
          (struct cmsghdr *)0L)
 
 #define M_NXT_CMSGHDR(m, cmsg)                                                  \
@@ -294,7 +300,7 @@ struct socket {
             _MIN_NXT_CMSGHDR_PTR(cmsg) > ((char *)(m)->m_data) + (m)->m_len ||  \
             _MIN_NXT_CMSGHDR_PTR(cmsg) < (char *)(m)->m_data ?                  \
                 (struct cmsghdr *)0L /* NULL */ :                               \
-                (struct cmsghdr *)((unsigned char *)(cmsg) +                    \
+                (struct cmsghdr *)(void *)((unsigned char *)(cmsg) +            \
                             __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len)))
 
 #endif /* KERNEL_PRIVATE */
@@ -327,6 +333,12 @@ struct socket {
 #define        _XSOCKET_PTR(x)         x
 #endif
 
+#ifdef PRIVATE
+/* Flags returned in data field for EVFILT_SOCK events. */
+#define SOCKEV_CONNECTED       0x00000001 /* connected */
+#define SOCKEV_DISCONNECTED    0x00000002 /* disconnected */
+#endif /* PRIVATE */
+
 #pragma pack(4)
 
 struct xsockbuf {
@@ -404,7 +416,7 @@ struct      xsocket_n {
        u_int32_t               xso_kind;               /* XSO_SOCKET */
        u_int64_t               xso_so; /* makes a convenient handle */
        short                   so_type;
-       short                   so_options;
+       u_int32_t               so_options;
        short                   so_linger;
        short                   so_state;
        u_int64_t               so_pcb;         /* another convenient handle */
@@ -488,8 +500,25 @@ struct kextcb {
 #define        sotokextcb(so) (so ? so->so_ext : 0)
 
 #ifdef KERNEL
-
-#define        SO_FILT_HINT_LOCKED 0x1
+#include <sys/kpi_mbuf.h>
+
+/* Hints for socket event processing */
+#define SO_FILT_HINT_LOCKED      0x00000001    /* socket is already locked */
+#define SO_FILT_HINT_CONNRESET   0x00000002    /* Reset is received */
+#define SO_FILT_HINT_CANTRCVMORE 0x00000004    /* No more data to read */
+#define SO_FILT_HINT_CANTSENDMORE 0x00000008   /* Can't write more data */
+#define SO_FILT_HINT_TIMEOUT     0x00000010    /* timeout */
+#define SO_FILT_HINT_NOSRCADDR   0x00000020    /* No src address available */
+#define SO_FILT_HINT_IFDENIED    0x00000040    /* interface denied connection */
+#define SO_FILT_HINT_SUSPEND     0x00000080    /* output queue suspended */
+#define SO_FILT_HINT_RESUME      0x00000100    /* output queue resumed */
+#define SO_FILT_HINT_KEEPALIVE  0x00000200     /* TCP Keepalive received */
+
+#define SO_FILT_HINT_EV (SO_FILT_HINT_CONNRESET | \
+       SO_FILT_HINT_CANTRCVMORE | SO_FILT_HINT_CANTSENDMORE | \
+       SO_FILT_HINT_TIMEOUT | SO_FILT_HINT_NOSRCADDR | \
+       SO_FILT_HINT_IFDENIED | SO_FILT_HINT_SUSPEND | \
+       SO_FILT_HINT_RESUME | SO_FILT_HINT_KEEPALIVE)
 
 /*
  * Argument structure for sosetopt et seq.  This is in the KERNEL
@@ -527,6 +556,7 @@ extern int  socket_debug;
 extern int sosendjcl;
 extern int sosendjcl_ignore_capab;
 extern int sodefunctlog;
+extern int sothrottlelog;
 extern int somaxconn;
 
 struct file;
@@ -552,8 +582,20 @@ struct so_tcdbg;
        }                                       \
 }
 
+#define SB_MB_CHECK(sb) do {                   \
+       if (((sb)->sb_mb != NULL &&             \
+               (sb)->sb_cc == 0) ||            \
+               ((sb)->sb_mb == NULL &&         \
+               (sb)->sb_cc > 0))               \
+               panic("corrupt so_rcv: sb_mb %p sb_cc %d\n", \
+                       (sb)->sb_mb, (sb)->sb_cc);      \
+} while(0)
+                
+
 #define        SODEFUNCTLOG(x)         do { if (sodefunctlog) printf x; } while (0)
 
+#define        SOTHROTTLELOG(x)        do { if (sothrottlelog) printf x; } while (0)
+
 /*
  * For debugging traffic class behaviors
  */
@@ -562,7 +604,8 @@ struct so_tcdbg;
 #define SOTCDB_NO_SENDTCPBG    0x04    /* Do not use background TCP CC algorithm for sender */
 #define SOTCDB_NO_LCLTST       0x08    /* Do not test for local destination for setting DSCP */
 #define SOTCDB_NO_DSCPTST      0x10    /* Overwritte any existing DSCP code */
-#define SOTCDB_NO_RECVTCPBG    0x20    /* Do not use throttling on receiver-side of TCP */ 
+#define SOTCDB_NO_RECVTCPBG    0x20    /* Do not use throttling on receiver-side of TCP */
+#define        SOTCDB_NO_PRIVILEGED    0x40    /* Do not set privileged traffic flag */
 
 extern u_int32_t sotcdb;
 
@@ -630,7 +673,8 @@ extern void soisconnecting(struct socket *so);
 extern void soisdisconnected(struct socket *so);
 extern void sodisconnectwakeup(struct socket *so);
 extern void soisdisconnecting(struct socket *so);
-extern int soisbackground(struct socket *so);
+extern int soisthrottled(struct socket *so);
+extern int soisprivilegedtraffic(struct socket *so);
 extern int solisten(struct socket *so, int backlog);
 extern struct socket *sodropablereq(struct socket *head);
 extern struct socket *sonewconn(struct socket *head, int connstatus,
@@ -643,15 +687,34 @@ extern int socket_unlock(struct socket *so, int refcount);
 extern void sofreelastref(struct socket *, int);
 extern int sogetaddr_locked(struct socket *, struct sockaddr **, int);
 extern const char *solockhistory_nr(struct socket *);
-extern void set_packet_tclass(struct mbuf *, struct socket *, int, int);
-extern int mbuf_traffic_class_from_control(struct mbuf *);
+extern void soevent(struct socket *so, long hint);
+extern void get_sockev_state(struct socket *, u_int32_t *);
+
+#ifdef BSD_KERNEL_PRIVATE
+/* Service class flags used for setting service class on a packet */
+#define PKT_SCF_IPV6           0x00000001      /* IPv6 packet */
+#define PKT_SCF_TCP_ACK                0x00000002      /* Pure TCP ACK */
+
+extern void set_packet_service_class(struct mbuf *, struct socket *,
+    mbuf_svc_class_t, u_int32_t);
+extern void so_tc_update_stats(struct mbuf *, struct socket *, mbuf_svc_class_t );
+extern mbuf_svc_class_t mbuf_service_class_from_control(struct mbuf *);
+extern mbuf_svc_class_t so_tc2msc(int);
+extern int so_svc2tc(mbuf_svc_class_t);
+
 extern void set_tcp_stream_priority(struct socket *so);
 extern int so_set_traffic_class(struct socket *, int);
 extern void so_set_default_traffic_class(struct socket *);
+extern int so_set_opportunistic(struct socket *, int);
+extern int so_get_opportunistic(struct socket *);
+extern int so_set_recv_anyif(struct socket *, int);
+extern int so_get_recv_anyif(struct socket *);
 extern void socket_tclass_init(void);
 extern int so_set_tcdbg(struct socket *, struct so_tcdbg *);
 extern int sogetopt_tcdbg(struct socket *, struct sockopt *);
 extern void so_recv_data_stat(struct socket *, struct mbuf *, size_t);
+extern int so_wait_for_if_feedback(struct socket *);
+#endif /* BSD_KERNEL_PRIVATE */
 
 /*
  * XXX; prepare mbuf for (__FreeBSD__ < 3) routines.
index 3a6b1371b7dbb4f52d2db21338d12068410be308..98009542c7eda73058bf93cc442a554967e8ee03 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #ifdef PRIVATE
 #define        SIOCGIFGETRTREFCNT _IOWR('i', 137, struct ifreq) /* get interface route refcnt */
+#define        SIOCGIFLINKQUALITYMETRIC _IOWR('i', 138, struct ifreq) /* get LQM */
+#define SIOCSIFOPPORTUNISTIC    _IOWR('i', 139, struct ifreq)  /* deprecated; use SIOCSIFTHROTTLE */
+#define SIOCGIFOPPORTUNISTIC    _IOWR('i', 140, struct ifreq)  /* deprecated; use SIOCGIFTHROTTLE */
+#define        SIOCSETROUTERMODE       _IOWR('i', 141, struct ifreq)   /* enable/disable IPv4 router mode on interface */
+#define        SIOCGIFEFLAGS           _IOWR('i', 142, struct ifreq)   /* get extended ifnet flags */
+#define        SIOCSIFDESC     _IOWR('i', 143, struct if_descreq)
+#define        SIOCGIFDESC     _IOWR('i', 144, struct if_descreq)
+#define        SIOCSIFLINKPARAMS _IOWR('i', 145, struct if_linkparamsreq)
+#define        SIOCGIFLINKPARAMS _IOWR('i', 146, struct if_linkparamsreq)
+#define        SIOCGIFQUEUESTATS _IOWR('i', 147, struct if_qstatsreq)
+#define        SIOCSIFTHROTTLE _IOWR('i', 148, struct if_throttlereq)
+#define        SIOCGIFTHROTTLE _IOWR('i', 149, struct if_throttlereq)
 #endif /* PRIVATE */
 
-
 #endif /* !_SYS_SOCKIO_H_ */
index 4947902ddd1c59738bc819356dfffcc804c84ec8..85377d5f0406c6e9a4cf5240d5d9fe7c52404c5c 100644 (file)
 #define        POSIX_SPAWN_START_SUSPENDED     0x0080
 #ifdef PRIVATE
 #define        _POSIX_SPAWN_DISABLE_ASLR       0x0100
-#define        _POSIX_SPAWN_ALLOW_DATA_EXEC    0x2000
+/* unused                              0x0200 */
+/* for compat sake */
 #define        POSIX_SPAWN_OSX_TALAPP_START    0x0400
 #define        POSIX_SPAWN_OSX_WIDGET_START    0x0800
 #define        POSIX_SPAWN_OSX_DBCLIENT_START  0x0800          /* not a bug, same as widget just rename */
-#define        POSIX_SPAWN_IOS_APP_START       0x1000
+#define        POSIX_SPAWN_OSX_RESVAPP_START   0x1000          /* reserved for app start usages */
+#define        _POSIX_SPAWN_ALLOW_DATA_EXEC    0x2000
 #endif /* PRIVATE */
 #define        POSIX_SPAWN_CLOEXEC_DEFAULT     0x4000
 
index d295260951465e63e62274632d99dac6ce9b546c..42f4687f8befd421e13dc72a4f7854b6986b4b69 100644 (file)
@@ -38,7 +38,7 @@
  */
 
 #ifndef        _SYS_SPAWN_INTERNAL_H_
-#define        _SYS_SPAWN__INTERNALH_
+#define        _SYS_SPAWN_INTERNAL_H_
 
 #include <sys/_types.h>                /* __offsetof(), __darwin_size_t */
 #include <sys/syslimits.h>     /* PATH_MAX */
@@ -64,7 +64,7 @@ typedef enum {
 typedef struct _ps_port_action {
        pspa_t                  port_type;
        exception_mask_t        mask;
-       mach_port_t             new_port;
+       mach_port_name_t        new_port;
        exception_behavior_t    behavior;
        thread_state_flavor_t   flavor;
        int                     which;
@@ -99,10 +99,55 @@ typedef struct _posix_spawnattr {
        sigset_t        psa_sigmask;            /* signal set to mask */
        pid_t           psa_pgroup;             /* pgroup to spawn into */
        cpu_type_t      psa_binprefs[NBINPREFS];   /* cpu affinity prefs*/
-       _posix_spawn_port_actions_t     psa_ports; /* special/exception ports */
        int             psa_pcontrol;           /* process control bits on resource starvation */
+       int             psa_apptype;            /* app type and process spec behav */
+       uint64_t        psa_cpumonitor_percent; /* CPU usage monitor percentage */
+       uint64_t        psa_cpumonitor_interval; /* CPU usage monitor interval, in seconds */
+       _posix_spawn_port_actions_t     psa_ports; /* special/exception ports */
+       /* XXX - k64/u32 unaligned below here */
+#if CONFIG_MEMORYSTATUS || CONFIG_EMBEDDED || TARGET_OS_EMBEDDED
+       /* Jetsam related */
+       short       psa_jetsam_flags; /* flags */
+       int         psa_priority;   /* relative importance */
+       int         psa_high_water_mark; /* resident page count limit */
+#endif
 } *_posix_spawnattr_t;
 
+/*
+ * Jetsam flags
+ */
+#if CONFIG_MEMORYSTATUS || CONFIG_EMBEDDED || TARGET_OS_EMBEDDED
+#define        POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY       0x1
+#endif
+
+/*
+ * DEPRECATED: maintained for transition purposes only
+ * posix_spawn apptype settings.
+ */
+#if TARGET_OS_EMBEDDED || CONFIG_EMBEDDED
+/* for compat sake */
+#define POSIX_SPAWN_OSX_TALAPP_START    0x0400
+#define POSIX_SPAWN_IOS_RESV1_APP_START 0x0400
+#define POSIX_SPAWN_IOS_APPLE_DAEMON_START      0x0800          /* not a bug, same as widget just rename */
+#define POSIX_SPAWN_IOS_APP_START       0x1000
+#else /* TARGET_OS_EMBEDDED */
+#define POSIX_SPAWN_OSX_TALAPP_START    0x0400
+#define POSIX_SPAWN_OSX_WIDGET_START    0x0800
+#define POSIX_SPAWN_OSX_DBCLIENT_START  0x0800          /* not a bug, same as widget just rename */
+#define POSIX_SPAWN_OSX_RESVAPP_START   0x1000          /* reserved for app start usages */
+#endif /* TARGET_OS_EMBEDDED */
+
+
+/*
+ * posix_spawn apptype and process attribute settings.
+ */
+#if TARGET_OS_EMBEDDED || CONFIG_EMBEDDED
+#define POSIX_SPAWN_APPTYPE_IOS_APPLEDAEMON    0x0001          /* it is an iOS apple daemon  */
+#else /* TARGET_OS_EMBEDDED */
+#define POSIX_SPAWN_APPTYPE_OSX_TAL    0x0001          /* it is a TAL app */
+#define POSIX_SPAWN_APPTYPE_OSX_WIDGET 0x0002          /* it is a widget */
+#define POSIX_SPAWN_APPTYPE_DELAYIDLESLEEP   0x10000000        /* Process is marked to delay idle sleep on disk IO */
+#endif /* TARGET_OS_EMBEDDED */
 
 /*
  * Allowable posix_spawn() file actions
@@ -190,7 +235,7 @@ struct _posix_spawn_args_desc {
        __darwin_size_t file_actions_size;      /* size of file actions block */
        _posix_spawn_file_actions_t
                                file_actions;   /* pointer to block */
-       __darwin_size_t port_actions_size; /* size of port actions block */
+       __darwin_size_t port_actions_size;      /* size of port actions block */
        _posix_spawn_port_actions_t
                                port_actions;   /* pointer to port block */
 };
index a1f06467a47c1723cc8251f2ccb91a413ac9e155..29cec47763c3598ea2895e4cf28b1d8144d42179 100644 (file)
@@ -544,7 +544,9 @@ SYSCTL_DECL(_user);
 #define KERN_KDGETENTROPY       16
 #define KERN_KDWRITETR         17
 #define KERN_KDWRITEMAP                18
-
+#define KERN_KDENABLE_BG_TRACE 19
+#define KERN_KDDISABLE_BG_TRACE        20
+#define KERN_KDSET_TYPEFILTER   22
 
 /* KERN_PANICINFO types */
 #define        KERN_PANICINFO_MAXSIZE  1       /* quad: panic UI image size limit */
index df71d010fa505779e6c78219fd9468be633edcb0..f2560fd7902b033395e85397a73e66a42e92d507 100644 (file)
@@ -56,7 +56,7 @@ extern struct sysent sysent[];
 #endif /* __INIT_SYSENT_C__ */
 
 extern int nsysent;
-#define NUM_SYSENT     439     /* Current number of defined syscalls */
+#define NUM_SYSENT     440     /* Current number of defined syscalls */
 
 /* sy_funnel flags bits */
 #define FUNNEL_MASK    0x07f
index a020b3919808010f8365507d350f0b839387c152..56528fa2ad81e491afa3b18d605f444296f4a8f2 100644 (file)
@@ -73,8 +73,8 @@
  * compile time; you *cannot* set it higher than the hard limit!!
  */
 #define        ARG_MAX            (256 * 1024) /* max bytes for an exec function */
-#define        CHILD_MAX                  266  /* max simultaneous processes */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#define        CHILD_MAX                  266  /* max simultaneous processes */
 #define        GID_MAX            2147483647U  /* max value for a gid_t (2^31-2) */
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 #define        LINK_MAX                32767   /* max file link count */
index 71004cf2a36a2bf044484bd67087f4be4a4c6f43..71f546ae535f60399e8c3fe51520430e782ea39f 100644 (file)
@@ -229,9 +229,9 @@ __BEGIN_DECLS
 void   closelog(void);
 void   openlog(const char *, int, int);
 int    setlogmask(int);
-void   syslog(int, const char *, ...) __printflike(2, 3) __DARWIN_LDBL_COMPAT(syslog);
+void   syslog(int, const char *, ...) __printflike(2, 3);
 #if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
-void   vsyslog(int, const char *, __darwin_va_list) __printflike(2, 0) __DARWIN_LDBL_COMPAT(vsyslog);
+void   vsyslog(int, const char *, __darwin_va_list) __printflike(2, 0);
 #endif
 __END_DECLS
 
index f08bc477cda279b302a4765fb59fe9b58f313197..263e9223b42e1c48ff64456a1c4ae918e065a4e4 100644 (file)
@@ -229,11 +229,22 @@ void      throttle_info_release(void *throttle_info);
 void   throttle_info_update(void *throttle_info, int flags);
 uint32_t throttle_lowpri_io(int sleep_amount);
 void   throttle_set_thread_io_policy(int policy);
+
 typedef struct __throttle_info_handle *throttle_info_handle_t;
-int            throttle_info_ref_by_mask(
-       uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle);
+int    throttle_info_ref_by_mask(uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle);
 void   throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle);
 void   throttle_info_update_by_mask(void *throttle_info_handle, int flags);
+
+void throttle_legacy_process_incr(void);
+void throttle_legacy_process_decr(void);
+
+/*
+ * 'throttle_info_handle' acquired via 'throttle_info_ref_by_mask'
+ * 'policy' should be specified as either IOPOL_UTILITY or IPOL_THROTTLE,
+ * all other values will be treated as IOPOL_NORMAL (i.e. no throttling)
+ */
+int    throttle_info_io_will_be_throttled(void *throttle_info_handle, int policy);
+
 __END_DECLS
 
 #endif /* !_SYS_SYSTM_H_ */
index ecfb234d5ba4a4e995b9ddd12bbf7ae009ca9edd..28ee788a5c650f6e92bdc46b8672bc27a14f1eff 100644 (file)
@@ -144,6 +144,7 @@ struct tty {
        int     t_lowat;                /* Low water mark. */
        int     t_gen;                  /* Generation number. */
        void    *t_iokit;               /* IOKit management */
+       int     t_refcnt;               /* reference count */
 };
 
 #define TTY_NULL (struct tty *)0
index 4ee9e86cf2dff4d13fe6766415a393b3f77024bf..37bcdfcff6e1798ade727c94376132649c8f98dc 100644 (file)
@@ -109,6 +109,7 @@ int ubc_upl_commit(upl_t);
 int    ubc_upl_commit_range(upl_t, upl_offset_t, upl_size_t, int);
 int    ubc_upl_abort(upl_t, int);
 int    ubc_upl_abort_range(upl_t, upl_offset_t, upl_size_t, int);
+void   ubc_upl_range_needed(upl_t, int, int);
 
 upl_page_info_t *ubc_upl_pageinfo(upl_t);
 upl_size_t ubc_upl_maxbufsize(void);
index 400620396d9c1dbd620b12d26e8e2735caa35435..9a75d32c3294749da96e1d51ef05b5255227dc2f 100644 (file)
@@ -89,6 +89,7 @@ struct        sockaddr_un {
 
 /* Socket options. */
 #define LOCAL_PEERCRED          0x001           /* retrieve peer credentails */
+#define LOCAL_PEERPID           0x002           /* retrieve peer pid */
 
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
index a50aebe3672b6e98dae4b235bc109ba9b5b5403d..d4d566654db15d91a29975613d5423b41bf3eb4f 100644 (file)
@@ -166,7 +166,7 @@ struct      unpcb {
        _UNPCB_LIST_ENTRY(unpcb_compat) unp_link;       /* glue on list of all PCBs */
        _UNPCB_PTR(struct socket *)     unp_socket;     /* pointer back to socket */
        _UNPCB_PTR(struct vnode *)      unp_vnode;      /* if associated with file */
-       ino_t                           unp_ino;        /* fake inode number */
+       u_int32_t                       unp_ino;        /* fake inode number */
        _UNPCB_PTR(struct unpcb_compat *) unp_conn;     /* control block of connected socket */
 #if defined(KERNEL)
        u_int32_t                       unp_refs;
index 4a59aa866fa784c83acac40d9b06983fcdff3e34..b5c1106e50238680b6c729530944b860492a86e9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -80,7 +80,6 @@
 #include <sys/vm.h>            /* XXX */
 #include <sys/sysctl.h>
 
-
 #ifdef KERNEL
 #ifdef BSD_KERNEL_PRIVATE
 #include <sys/pthread_internal.h> /* for uu_kwe entry */
@@ -103,6 +102,8 @@ struct vfs_context {
 /* XXX Deprecated: xnu source compatability */
 #define uu_ucred       uu_context.vc_ucred
 
+struct label;          /* MAC label dummy struct */
+
 #define MAXTHREADNAMESIZE 64
 /*
  *     Per-thread U area.
@@ -159,10 +160,8 @@ struct uthread {
        caddr_t uu_wchan;                       /* sleeping thread wait channel */
        const char *uu_wmesg;                   /* ... wait message */
        int uu_flag;
-#if CONFIG_EMBEDDED
-       int uu_iopol_disk;                      /* disk I/O policy */
-#endif /* CONFIG_EMBEDDED */
        struct proc * uu_proc;
+       thread_t uu_thread;
        void * uu_userstate;
        wait_queue_set_t uu_wqset;                      /* cached across select calls */
        size_t uu_allocsize;                            /* ...size of select cache */
@@ -177,11 +176,13 @@ struct uthread {
 
        struct kaudit_record    *uu_ar;                 /* audit record */
        struct task*    uu_aio_task;                    /* target task for async io */
-
+    
+       u_int32_t       uu_network_lock_held;           /* network support for pf locking */
        lck_mtx_t       *uu_mtx;
 
+       TAILQ_ENTRY(uthread) uu_throttlelist;   /* List of uthreads currently throttled */
+       int             uu_on_throttlelist;
        int             uu_lowpri_window;
-       boolean_t       uu_throttle_isssd;
        boolean_t       uu_throttle_bc;
        void    *       uu_throttle_info;       /* pointer to throttled I/Os info */
 
@@ -203,8 +204,8 @@ struct uthread {
        uint32_t        t_dtrace_errno; /* Most recent errno */
         uint8_t         t_dtrace_stop;  /* indicates a DTrace desired stop */
         uint8_t         t_dtrace_sig;   /* signal sent via DTrace's raise() */
-        uint64_t       t_dtrace_resumepid; /* DTrace's pidresume() pid */
-
+        uint64_t        t_dtrace_resumepid; /* DTrace's pidresume() pid */
+                            
         union __tdu {
                 struct __tds {
                         uint8_t _t_dtrace_on;   /* hit a fasttrap tracepoint */
@@ -236,10 +237,12 @@ struct uthread {
 #if __sol64 || defined(__APPLE__)
         uint64_t        t_dtrace_regv;  /* DTrace saved reg from fasttrap */
 #endif
+       void *          t_dtrace_syscall_args;
 #endif /* CONFIG_DTRACE */
        void *          uu_threadlist;
        char *          pth_name;
        struct ksyn_waitq_element  uu_kwe;              /* user for pthread synch */
+       struct label *  uu_label;       /* MAC label */
 };
 
 typedef struct uthread * uthread_t;
@@ -256,9 +259,7 @@ typedef struct uthread * uthread_t;
 #define UT_PASSIVE_IO  0x00000100      /* this thread issues passive I/O */
 #define UT_PROCEXIT    0x00000200      /* this thread completed the  proc exit */
 #define UT_RAGE_VNODES 0x00000400      /* rapid age any vnodes created by this thread */       
-#if CONFIG_EMBEDDED
-#define UT_BACKGROUND  0x00000800      /* this thread is in background state */        
-#endif /* !CONFIG_EMBEDDED */
+/* 0x00000800 unused, used to be UT_BACKGROUND */
 #define UT_BACKGROUND_TRAFFIC_MGT      0x00001000 /* background traffic is regulated */
 
 #define        UT_VFORK        0x02000000      /* thread has vfork children */
index 965518cb08a0a266196a5a3b2ebd95e717550beb..1a93437295f9c7b4d25a8210ee9562525c157000 100644 (file)
@@ -145,8 +145,11 @@ enum vtagtype      {
 #define IO_BACKGROUND IO_PASSIVE /* used for backward compatibility.  to be removed after IO_BACKGROUND is no longer
                                                                  * used by DiskImages in-kernel mode */
 #define        IO_NOAUTH       0x8000          /* No authorization checks. */
-#define IO_NODIRECT    0x10000         /* don't use direct synchronous writes if IO_NOCACHE is specified */
-
+#define IO_NODIRECT     0x10000                /* don't use direct synchronous writes if IO_NOCACHE is specified */
+#define IO_ENCRYPTED   0x20000         /* Retrieve encrypted blocks from the filesystem */
+#define IO_RETURN_ON_THROTTLE  0x40000
+#define IO_SINGLE_WRITER       0x80000
+#define IO_SYSCALL_DISPATCH            0x100000        /* I/O origin is file table syscall */
 
 /*
  * Component Name: this structure describes the pathname
@@ -466,7 +469,7 @@ struct vnode_trigger_param {
  * Note that this structure may be extended, but existing fields must not move.
  */
 
-#define VATTR_INIT(v)                  do {(v)->va_supported = (v)->va_active = 0ll; (v)->va_vaflags = 0;} while(0)
+#define VATTR_INIT(v)                  do {(v)->va_supported = (v)->va_active = 0ll; (v)->va_vaflags = 0; } while(0)
 #define VATTR_SET_ACTIVE(v, a)         ((v)->va_active |= VNODE_ATTR_ ## a)
 #define VATTR_SET_SUPPORTED(v, a)      ((v)->va_supported |= VNODE_ATTR_ ## a)
 #define VATTR_IS_SUPPORTED(v, a)       ((v)->va_supported & VNODE_ATTR_ ## a)
@@ -517,6 +520,8 @@ struct vnode_trigger_param {
 #define VNODE_ATTR_va_nchildren                (1LL<<28)       /* 10000000 */
 #define VNODE_ATTR_va_dirlinkcount     (1LL<<29)       /* 20000000 */
 #define VNODE_ATTR_va_addedtime                (1LL<<30)               /* 40000000 */
+#define VNODE_ATTR_va_dataprotect_class                (1LL<<31)               /* 80000000 */
+#define VNODE_ATTR_va_dataprotect_flags                (1LL<<32)               /* 100000000 */
 
 #define VNODE_ATTR_BIT(n)      (VNODE_ATTR_ ## n)
 /*
@@ -537,8 +542,8 @@ struct vnode_trigger_param {
                                VNODE_ATTR_BIT(va_name) |               \
                                VNODE_ATTR_BIT(va_type) |               \
                                VNODE_ATTR_BIT(va_nchildren) |          \
-                               VNODE_ATTR_BIT(va_dirlinkcount)|                \
-                VNODE_ATTR_BIT(va_addedtime)) 
+                               VNODE_ATTR_BIT(va_dirlinkcount) |       \
+                               VNODE_ATTR_BIT(va_addedtime)) 
 /*
  * Attributes that can be applied to a new file object.
  */
@@ -554,7 +559,9 @@ struct vnode_trigger_param {
                                VNODE_ATTR_BIT(va_encoding) |           \
                                VNODE_ATTR_BIT(va_type) |               \
                                VNODE_ATTR_BIT(va_uuuid) |              \
-                               VNODE_ATTR_BIT(va_guuid))
+                               VNODE_ATTR_BIT(va_guuid) |              \
+                               VNODE_ATTR_BIT(va_dataprotect_class) |  \
+                               VNODE_ATTR_BIT(va_dataprotect_flags))
 
 
 struct vnode_attr {
@@ -617,11 +624,21 @@ struct vnode_attr {
 #else
        void *          va_reserved1;
 #endif /* BSD_KERNEL_PRIVATE */
-    struct timespec va_addedtime;      /* timestamp when item was added to parent directory */
-
+       struct timespec va_addedtime;   /* timestamp when item was added to parent directory */
                
+       /* Data Protection fields */
+       uint32_t va_dataprotect_class;  /* class specified for this file if it didn't exist */
+       uint32_t va_dataprotect_flags;  /* flags from NP open(2) to the filesystem */
 };
 
+#ifdef BSD_KERNEL_PRIVATE
+/* 
+ * Flags for va_dataprotect_flags
+ */
+#define VA_DP_RAWENCRYPTED 0x0001
+
+#endif
+
 /*
  * Flags for va_vaflags.
  */
@@ -666,14 +683,15 @@ extern int                vttoif_tab[];
 
 #define        REVOKEALL       0x0001          /* vnop_revoke: revoke all aliases */
 
-/* VNOP_REMOVE: do not delete busy files (Carbon remove file semantics) */
-#define VNODE_REMOVE_NODELETEBUSY  0x0001  
+/* VNOP_REMOVE/unlink flags: */
+#define VNODE_REMOVE_NODELETEBUSY                      0x0001 /* Do not delete busy files (Carbon) */
+#define VNODE_REMOVE_SKIP_NAMESPACE_EVENT      0x0002 /* Do not upcall to userland handlers */
 
 /* VNOP_READDIR flags: */
 #define VNODE_READDIR_EXTENDED    0x0001   /* use extended directory entries */
 #define VNODE_READDIR_REQSEEKOFF  0x0002   /* requires seek offset (cookies) */
 #define VNODE_READDIR_SEEKOFF32   0x0004   /* seek offset values should fit in 32 bits */
-
+#define VNODE_READDIR_NAMEMAX     0x0008   /* For extended readdir, try to limit names to NAME_MAX bytes */
 
 #define        NULLVP  ((struct vnode *)NULL)
 
@@ -1835,6 +1853,14 @@ vnode_t  vnode_getparent(vnode_t vp);
  @result 0 to indicate that a vnode can be opened, or an error that should be passed up to VFS.
  */
 int vnode_lookup_continue_needed(vnode_t vp, struct componentname *cnp);
+
+/*!
+ @function vnode_istty
+ @abstract Determine if the given vnode represents a tty device.
+ @param vp Vnode to examine.
+ @result Non-zero to indicate that the vnode represents a tty device. Zero otherwise.
+ */
+int vnode_istty(vnode_t vp);
 #endif /* KERNEL_PRIVATE */
 
 #ifdef BSD_KERNEL_PRIVATE
@@ -1864,7 +1890,6 @@ int       vnode_makeimode(int, int);
 enum vtype     vnode_iftovt(int);
 int    vnode_vttoif(enum vtype);
 int    vnode_isshadow(vnode_t);
-int    vnode_istty(vnode_t vp);
 /*
  * Indicate that a file has multiple hard links.  VFS will always call
  * VNOP_LOOKUP on this vnode.  Volfs will always ask for it's parent
index 7d17be99ee7add3130f3fadc5f658cb904074bb1..e846ac6799e6958482d15222a90947111f56ec98 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,12 +93,13 @@ struct label;
 
 LIST_HEAD(buflists, buf);
 
-
+#if CONFIG_VFS_FUNNEL
 struct unsafe_fsnode {
        lck_mtx_t fsnodelock;
        int32_t   fsnode_count;
        void *    fsnodeowner;
 };
+#endif /* CONFIG_VFS_FUNNEL */
 
 #if CONFIG_TRIGGERS
 /*
@@ -177,12 +178,14 @@ struct vnode {
        const char *v_name;                     /* name component of the vnode */
        vnode_t v_parent;                       /* pointer to parent vnode */
        struct lockf    *v_lockf;               /* advisory lock list head */
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
         struct unsafe_fsnode *v_unsafefs;      /* pointer to struct used to lock */
 #else 
        int32_t         v_reserved1;
+#ifdef __LP64__
        int32_t         v_reserved2;
-#endif /* __LP64__ */
+#endif
+#endif /* CONFIG_VFS_FUNNEL */
        int     (**v_op)(void *);               /* vnode operations vector */
        mount_t v_mount;                        /* ptr to vfs we are in */
        void *  v_data;                         /* private data for fs */
@@ -213,8 +216,9 @@ struct vnode {
 /*
  * v_listflag
  */
-#define VLIST_RAGE    0x01            /* vnode is currently in the rapid age list */
-#define VLIST_DEAD    0x02            /* vnode is currently in the dead list */
+#define VLIST_RAGE               0x01          /* vnode is currently in the rapid age list */
+#define VLIST_DEAD               0x02          /* vnode is currently in the dead list */
+#define VLIST_ASYNC_WORK         0x04          /* vnode is currently on the deferred async work queue */
 
 /*
  * v_lflags
@@ -372,6 +376,8 @@ struct ostat;
 
 #define BUILDPATH_NO_FS_ENTER 0x1 /* Use cache values, do not enter file system */
 #define BUILDPATH_CHECKACCESS 0x2 /* Check if parents have search rights */
+#define BUILDPATH_CHECK_MOVED 0x4 /* Return EAGAIN if the parent hierarchy is modified */
+
 int    build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx);
 
 int    bdevvp(dev_t dev, struct vnode **vpp);
index bd91c3c31da77282bec427c2cd22b7f5c4ec12ff..b4fe1ac99a0dcab1e2365524e058eba3065e9226 100644 (file)
@@ -49,6 +49,7 @@
 
 #define        XATTR_MAXNAMELEN   127
 
+/* See the ATTR_CMN_FNDRINFO section of getattrlist(2) for details on FinderInfo */
 #define        XATTR_FINDERINFO_NAME     "com.apple.FinderInfo"
 
 #define        XATTR_RESOURCEFORK_NAME   "com.apple.ResourceFork"
index dfbe2e5d22ed383ed44457c6e1f8f2d42f917f1c..3175c316377ef76452d43cdadb0578321e67eaaa 100644 (file)
@@ -239,7 +239,6 @@ catch_mach_exception_raise(
        task_t                  self = current_task();
        thread_t                th_act;
        ipc_port_t              thread_port;
-       struct task             *sig_task;
        struct proc             *p;
        kern_return_t           result = MACH_MSG_SUCCESS;
        int                     ux_signal = 0;
@@ -273,10 +272,9 @@ catch_mach_exception_raise(
            ux_exception(exception, code[0], code[1], &ux_signal, &ucode);
 
            ut = get_bsdthread_info(th_act);
-           sig_task = get_threadtask(th_act);
-           p = (struct proc *) get_bsdtask_info(sig_task);
+           p = proc_findthread(th_act);
 
-           /* Can't deliver a signal without a bsd process */
+           /* Can't deliver a signal without a bsd process reference */
            if (p == NULL) {
                    ux_signal = 0;
                    result = KERN_FAILURE;
@@ -298,15 +296,9 @@ catch_mach_exception_raise(
                    struct sigacts      *ps;
 
                    sp = code[1];
-                   if (ut && (ut->uu_flag & UT_VFORK))
-                           p = ut->uu_proc;
-#if STACK_GROWTH_UP
-                   stack_min = p->user_stack;
-                   stack_max = p->user_stack + MAXSSIZ;
-#else /* STACK_GROWTH_UP */
+
                    stack_max = p->user_stack;
                    stack_min = p->user_stack - MAXSSIZ;
-#endif /* STACK_GROWTH_UP */
                    if (sp >= stack_min &&
                        sp < stack_max) {
                            /*
@@ -344,7 +336,8 @@ catch_mach_exception_raise(
                        ut->uu_subcode = code[1];                       
                        threadsignal(th_act, ux_signal, code[0]);
            }
-
+           if (p != NULL) 
+                   proc_rele(p);
            thread_deallocate(th_act);
        }
        else
index a18760397a7f81d57db01738e6ffee8bd73b8d5d..2e200b08c5ca4472fc413566d23d69bfb6073155 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define COMPAT_ONLY
 
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
 #define THREAD_SAFE_FS(VP)  \
        ((VP)->v_unsafefs ? 0 : 1)
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
 #define NATIVE_XATTR(VP)  \
        ((VP)->v_mount ? (VP)->v_mount->mnt_kern_flag & MNTK_EXTENDED_ATTRS : 0)
@@ -166,7 +166,7 @@ vnode_setneedinactive(vnode_t vp)
 }
 
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
 int
 lock_fsnode(vnode_t vp, int *funnel_state)
 {
@@ -206,7 +206,7 @@ unlock_fsnode(vnode_t vp, int *funnel_state)
        if (funnel_state)
                (void) thread_funnel_set(kernel_flock, *funnel_state);
 }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
 
 
@@ -221,21 +221,21 @@ int
 VFS_MOUNT(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_mount == 0))
                return(ENOTSUP);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
-       
+#endif /* CONFIG_VFS_FUNNEL */
+        
        if (vfs_context_is64bit(ctx)) {
                if (vfs_64bitready(mp)) {
                        error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, ctx);
@@ -248,11 +248,11 @@ VFS_MOUNT(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx)
                error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, ctx);
        }
        
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (error);
 }
@@ -261,29 +261,28 @@ int
 VFS_START(mount_t mp, int flags, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_start == 0))
                return(ENOTSUP);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
-
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_start)(mp, flags, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (error);
 }
@@ -292,29 +291,28 @@ int
 VFS_UNMOUNT(mount_t mp, int flags, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_unmount == 0))
                return(ENOTSUP);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
-
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_unmount)(mp, flags, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (error);
 }
@@ -340,10 +338,10 @@ int
 VFS_ROOT(mount_t mp, struct vnode  ** vpp, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_root == 0))
                return(ENOTSUP);
@@ -352,20 +350,20 @@ VFS_ROOT(mount_t mp, struct vnode  ** vpp, vfs_context_t ctx)
                ctx = vfs_context_current();
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_root)(mp, vpp, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (error);
 }
@@ -374,28 +372,28 @@ int
 VFS_QUOTACTL(mount_t mp, int cmd, uid_t uid, caddr_t datap, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_quotactl == 0))
                return(ENOTSUP);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_quotactl)(mp, cmd, uid, datap, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (error);
 }
@@ -404,10 +402,10 @@ int
 VFS_GETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_getattr == 0))
                return(ENOTSUP);
@@ -416,20 +414,20 @@ VFS_GETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx)
                ctx = vfs_context_current();
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_getattr)(mp, vfa, ctx);
        
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(error);
 }
@@ -438,10 +436,10 @@ int
 VFS_SETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_setattr == 0))
                return(ENOTSUP);
@@ -450,20 +448,20 @@ VFS_SETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx)
                ctx = vfs_context_current();
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_setattr)(mp, vfa, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(error);
 }
@@ -472,10 +470,10 @@ int
 VFS_SYNC(mount_t mp, int flags, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_sync == 0))
                return(ENOTSUP);
@@ -484,20 +482,20 @@ VFS_SYNC(mount_t mp, int flags, vfs_context_t ctx)
                ctx = vfs_context_current();
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_sync)(mp, flags, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(error);
 }
@@ -506,10 +504,10 @@ int
 VFS_VGET(mount_t mp, ino64_t ino, struct vnode **vpp, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_vget == 0))
                return(ENOTSUP);
@@ -518,20 +516,20 @@ VFS_VGET(mount_t mp, ino64_t ino, struct vnode **vpp, vfs_context_t ctx)
                ctx = vfs_context_current();
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_vget)(mp, ino, vpp, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(error);
 }
@@ -540,10 +538,10 @@ int
 VFS_FHTOVP(mount_t mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_context_t ctx) 
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((mp == dead_mountp) || (mp->mnt_op->vfs_fhtovp == 0))
                return(ENOTSUP);
@@ -552,20 +550,20 @@ VFS_FHTOVP(mount_t mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_contex
                ctx = vfs_context_current();
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*mp->mnt_op->vfs_fhtovp)(mp, fhlen, fhp, vpp, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(error);
 }
@@ -574,10 +572,10 @@ int
 VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t ctx)
 {
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if ((vp->v_mount == dead_mountp) || (vp->v_mount->mnt_op->vfs_vptofh == 0))
                return(ENOTSUP);
@@ -586,20 +584,20 @@ VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t ct
                ctx = vfs_context_current();
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*vp->v_mount->mnt_op->vfs_vptofh)(vp, fhlenp, fhp, ctx);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(error);
 }
@@ -1052,12 +1050,12 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle)
                || (vfe->vfe_opvdescs == (struct vnodeopv_desc **)NULL))
                return(EINVAL);
 
-#ifdef __LP64__
-       /* Non-threadsafe filesystems are not supported for K64 */
+#if !CONFIG_VFS_FUNNEL
+       /* Non-threadsafe filesystems are not supported e.g. on K64 & iOS */
        if ((vfe->vfe_flags &  (VFS_TBLTHREADSAFE | VFS_TBLFSNODELOCK)) == 0) {
                return (EINVAL);
        }
-#endif /* __LP64__ */
+#endif /* !CONFIG_VFS_FUNNEL */
 
        MALLOC(newvfstbl, void *, sizeof(struct vfstable), M_TEMP,
               M_WAITOK);
@@ -1080,12 +1078,12 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle)
                newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEINV2;
        if (vfe->vfe_flags &  VFS_TBLVNOP_PAGEOUTV2)
                newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEOUTV2;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (vfe->vfe_flags &  VFS_TBLTHREADSAFE)
                newvfstbl->vfc_vfsflags |= VFC_VFSTHREADSAFE;
        if (vfe->vfe_flags &  VFS_TBLFSNODELOCK)
                newvfstbl->vfc_vfsflags |= VFC_VFSTHREADSAFE;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        if ((vfe->vfe_flags & VFS_TBLLOCALVOL) == VFS_TBLLOCALVOL)
                newvfstbl->vfc_flags |= MNT_LOCAL;
        if ((vfe->vfe_flags & VFS_TBLLOCALVOL) && (vfe->vfe_flags & VFS_TBLGENERICMNTARGS) == 0)
@@ -3016,10 +3014,10 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t
        int _err;
        struct vnop_lookup_args a;
        vnode_t vp;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_lookup_desc;
        a.a_dvp = dvp;
@@ -3027,20 +3025,20 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t
        a.a_cnp = cnp;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(dvp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*dvp->v_op[vnop_lookup_desc.vdesc_offset])(&a);
 
        vp = *vpp;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                if ( (cnp->cn_flags & ISLASTCN) ) {
                        if ( (cnp->cn_flags & LOCKPARENT) ) {
@@ -3061,7 +3059,7 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t
                }
                unlock_fsnode(dvp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3162,10 +3160,10 @@ VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode
 {
        int _err;
        struct vnop_create_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_create_desc;
        a.a_dvp = dvp;
@@ -3174,14 +3172,14 @@ VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode
        a.a_vap = vap;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(dvp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*dvp->v_op[vnop_create_desc.vdesc_offset])(&a);
        if (_err == 0 && !NATIVE_XATTR(dvp)) {
@@ -3191,11 +3189,11 @@ VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode
                xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0);
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(dvp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(dvp, _err, NOTE_WRITE);
 
@@ -3223,10 +3221,10 @@ VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t
 {
        int _err;
        struct vnop_whiteout_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_whiteout_desc;
        a.a_dvp = dvp;
@@ -3234,22 +3232,22 @@ VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t
        a.a_flags = flags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(dvp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*dvp->v_op[vnop_whiteout_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(dvp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(dvp, _err, NOTE_WRITE);
 
@@ -3278,10 +3276,10 @@ VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_
 
        int _err;
        struct vnop_mknod_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_mknod_desc;
        a.a_dvp = dvp;
@@ -3290,22 +3288,22 @@ VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_
        a.a_vap = vap;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(dvp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*dvp->v_op[vnop_mknod_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(dvp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(dvp, _err, NOTE_WRITE);
 
@@ -3330,10 +3328,10 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx)
 {
        int _err;
        struct vnop_open_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0; 
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -3343,7 +3341,7 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx)
        a.a_mode = mode;
        a.a_context = ctx; 
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
@@ -3354,18 +3352,18 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx)
                        }    
                }    
        }    
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_open_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
                        unlock_fsnode(vp, NULL);
                }    
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }    
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3388,10 +3386,10 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx)
 {
        int _err;
        struct vnop_close_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -3401,7 +3399,7 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx)
        a.a_fflag = fflag;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
@@ -3412,18 +3410,18 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx)
                        }
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_close_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
                        unlock_fsnode(vp, NULL);
                }
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3446,10 +3444,10 @@ VNOP_ACCESS(vnode_t vp, int action, vfs_context_t ctx)
 {
        int _err;
        struct vnop_access_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -3459,22 +3457,22 @@ VNOP_ACCESS(vnode_t vp, int action, vfs_context_t ctx)
        a.a_action = action;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_access_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3497,32 +3495,32 @@ VNOP_GETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx)
 {
        int _err;
        struct vnop_getattr_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_getattr_desc;
        a.a_vp = vp;
        a.a_vap = vap;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_getattr_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3545,24 +3543,24 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx)
 {
        int _err;
        struct vnop_setattr_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_setattr_desc;
        a.a_vp = vp;
        a.a_vap = vap;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_setattr_desc.vdesc_offset])(&a);
 
@@ -3601,11 +3599,11 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx)
                }
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        /*
         * If we have changed any of the things about the file that are likely
@@ -3659,10 +3657,10 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx)
 {
        int _err;
        struct vnop_read_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -3674,7 +3672,7 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx)
        a.a_ioflag = ioflag;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
@@ -3685,18 +3683,18 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx)
                        }
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_read_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
                        unlock_fsnode(vp, NULL);
                }
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3721,10 +3719,10 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx)
 {
        struct vnop_write_args a;
        int _err;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -3736,7 +3734,7 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx)
        a.a_ioflag = ioflag;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
@@ -3747,18 +3745,18 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx)
                        }
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_write_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
                        unlock_fsnode(vp, NULL);
                }
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(vp, _err, NOTE_WRITE);
 
@@ -3786,10 +3784,10 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct
 {
        int _err;
        struct vnop_ioctl_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -3822,7 +3820,7 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct
        a.a_fflag = fflag;
        a.a_context= ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
@@ -3833,18 +3831,18 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct
                        }
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_ioctl_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
                        unlock_fsnode(vp, NULL);
                }
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3870,10 +3868,10 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx)
 {
        int _err;
        struct vnop_select_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -3885,7 +3883,7 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx)
        a.a_context = ctx;
        a.a_wql = wql;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
@@ -3896,18 +3894,18 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx)
                        }
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_select_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
                        unlock_fsnode(vp, NULL);
                }
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -3933,11 +3931,11 @@ VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx)
 {
        int _err;
        struct vnop_exchange_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
        vnode_t lock_first = NULL, lock_second = NULL;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_exchange_desc;
        a.a_fvp = fvp;
@@ -3945,7 +3943,7 @@ VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx)
        a.a_options = options;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(fvp);
        if (!thread_safe) {
                /*
@@ -3966,16 +3964,16 @@ VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx)
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*fvp->v_op[vnop_exchange_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(lock_second, NULL);
                unlock_fsnode(lock_first, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        /* Don't post NOTE_WRITE because file descriptors follow the data ... */
        post_event_if_success(fvp, _err, NOTE_ATTRIB);
@@ -4003,30 +4001,30 @@ VNOP_REVOKE(vnode_t vp, int flags, vfs_context_t ctx)
 {
        struct vnop_revoke_args a;
        int _err;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_revoke_desc;
        a.a_vp = vp;
        a.a_flags = flags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_revoke_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -4050,32 +4048,32 @@ VNOP_MMAP(vnode_t vp, int fflags, vfs_context_t ctx)
 {
        int _err;
        struct vnop_mmap_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_mmap_desc;
        a.a_vp = vp;
        a.a_fflags = fflags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_mmap_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -4098,31 +4096,31 @@ VNOP_MNOMAP(vnode_t vp, vfs_context_t ctx)
 {
        int _err;
        struct vnop_mnomap_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_mnomap_desc;
        a.a_vp = vp;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_mnomap_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -4146,32 +4144,32 @@ VNOP_FSYNC(vnode_t vp, int waitfor, vfs_context_t ctx)
 {
        struct vnop_fsync_args a;
        int _err;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_fsync_desc;
        a.a_vp = vp;
        a.a_waitfor = waitfor;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_fsync_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -4198,10 +4196,10 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_
 {
        int _err;
        struct vnop_remove_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_remove_desc;
        a.a_dvp = dvp;
@@ -4210,14 +4208,14 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_
        a.a_flags = flags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a);
 
@@ -4232,11 +4230,11 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_
                }
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK);
        post_event_if_success(dvp, _err, NOTE_WRITE);
@@ -4308,10 +4306,10 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ct
 {
        int _err;
        struct vnop_link_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        /*
         * For file systems with non-native extended attributes,
@@ -4337,22 +4335,22 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ct
        a.a_cnp = cnp;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*tdvp->v_op[vnop_link_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(vp, _err, NOTE_LINK);
        post_event_if_success(tdvp, _err, NOTE_WRITE);
@@ -4368,8 +4366,8 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
        int _err;
        vnode_t src_attr_vp = NULLVP;
        vnode_t dst_attr_vp = NULLVP;
-       struct nameidata fromnd;
-       struct nameidata tond;
+       struct nameidata *fromnd = NULL;
+       struct nameidata *tond = NULL;
        char smallname1[48];
        char smallname2[48];
        char *xfromname = NULL;
@@ -4378,9 +4376,9 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
 
        batched = vnode_compound_rename_available(fdvp);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        vnode_t fdvp_unsafe = (THREAD_SAFE_FS(fdvp) ? NULLVP : fdvp);
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (!batched) {
                if (*fvpp == NULLVP) 
@@ -4429,10 +4427,11 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
                 * is only for AppleDouble files.
                 */
                if (xfromname != NULL) {
-                       NDINIT(&fromnd, RENAME, OP_RENAME, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK,
+                       MALLOC(fromnd, struct nameidata *, sizeof (struct nameidata), M_TEMP, M_WAITOK);
+                       NDINIT(fromnd, RENAME, OP_RENAME, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK,
                               UIO_SYSSPACE, CAST_USER_ADDR_T(xfromname), ctx);
-                       fromnd.ni_dvp = fdvp;
-                       error = namei(&fromnd);
+                       fromnd->ni_dvp = fdvp;
+                       error = namei(fromnd);
                
                        /* 
                         * If there was an error looking up source attribute file, 
@@ -4440,13 +4439,13 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
                         */
 
                        if (error == 0) {
-                               if (fromnd.ni_vp) {
+                               if (fromnd->ni_vp) {
                                        /* src_attr_vp indicates need to call vnode_put / nameidone later */
-                                       src_attr_vp = fromnd.ni_vp;
-                                       
-                                       if (fromnd.ni_vp->v_type != VREG) {
+                                       src_attr_vp = fromnd->ni_vp;
+                                                                               
+                                       if (fromnd->ni_vp->v_type != VREG) {
                                                src_attr_vp = NULLVP;
-                                               vnode_put(fromnd.ni_vp);
+                                               vnode_put(fromnd->ni_vp);
                                        }
                                } 
                                /*
@@ -4455,7 +4454,7 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
                                 * have a vnode here, so we drop our namei buffer for the source attribute file
                                 */
                                if (src_attr_vp == NULLVP) {
-                                       nameidone(&fromnd);
+                                       nameidone(fromnd);
                                }
                        }
                }
@@ -4486,29 +4485,47 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
                 * Note that tdvp already has an iocount reference. Make sure to check that we
                 * get a valid vnode from namei.
                 */
-               NDINIT(&tond, RENAME, OP_RENAME,
+               MALLOC(tond, struct nameidata *, sizeof(struct nameidata), M_TEMP, M_WAITOK);
+               NDINIT(tond, RENAME, OP_RENAME,
                       NOCACHE | NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE,
                       CAST_USER_ADDR_T(xtoname), ctx);
-               tond.ni_dvp = tdvp;
-               error = namei(&tond);
+               tond->ni_dvp = tdvp;
+               error = namei(tond);
 
                if (error) 
                        goto out;
                
-               if (tond.ni_vp) {
-                       dst_attr_vp = tond.ni_vp;
+               if (tond->ni_vp) {
+                       dst_attr_vp = tond->ni_vp;
                }
                
                if (src_attr_vp) {
+                       const char *old_name = src_attr_vp->v_name;
+                       vnode_t old_parent = src_attr_vp->v_parent;
+       
                        if (batched) {
-                               error = VNOP_COMPOUND_RENAME(fdvp, &src_attr_vp, &fromnd.ni_cnd, NULL,
-                                               tdvp, &dst_attr_vp, &tond.ni_cnd, NULL,
+                               error = VNOP_COMPOUND_RENAME(fdvp, &src_attr_vp, &fromnd->ni_cnd, NULL,
+                                               tdvp, &dst_attr_vp, &tond->ni_cnd, NULL,
                                                0, ctx);
                        } else {
-                               error = VNOP_RENAME(fdvp, src_attr_vp, &fromnd.ni_cnd, 
-                                               tdvp, dst_attr_vp, &tond.ni_cnd, ctx);
+                               error = VNOP_RENAME(fdvp, src_attr_vp, &fromnd->ni_cnd, 
+                                               tdvp, dst_attr_vp, &tond->ni_cnd, ctx);
                        }
 
+                       if (error == 0 && old_name == src_attr_vp->v_name &&
+                                       old_parent == src_attr_vp->v_parent) {
+                               int update_flags = VNODE_UPDATE_NAME;
+                               
+                               if (fdvp != tdvp)
+                                       update_flags |= VNODE_UPDATE_PARENT;
+                               
+                               vnode_update_identity(src_attr_vp, tdvp,
+                                               tond->ni_cnd.cn_nameptr,
+                                               tond->ni_cnd.cn_namelen,
+                                               tond->ni_cnd.cn_hash,
+                                               update_flags);
+                       }
+                       
                        /* kevent notifications for moving resource files 
                         * _err is zero if we're here, so no need to notify directories, code
                         * below will do that.  only need to post the rename on the source and
@@ -4531,20 +4548,20 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
                        args.a_desc    = &vnop_remove_desc;
                        args.a_dvp     = tdvp;
                        args.a_vp      = dst_attr_vp;
-                       args.a_cnp     = &tond.ni_cnd;
+                       args.a_cnp     = &tond->ni_cnd;
                        args.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                        if (fdvp_unsafe != NULLVP)
                                error = lock_fsnode(dst_attr_vp, NULL);
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
                        if (error == 0) {
                                error = (*tdvp->v_op[vnop_remove_desc.vdesc_offset])(&args);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                                if (fdvp_unsafe != NULLVP)
                                        unlock_fsnode(dst_attr_vp, NULL);
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
                                if (error == 0)
                                        vnode_setneedinactive(dst_attr_vp);
@@ -4560,13 +4577,18 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
 out:
        if (src_attr_vp) {
                vnode_put(src_attr_vp);
-               nameidone(&fromnd);
+               nameidone(fromnd);
        }
        if (dst_attr_vp) {
                vnode_put(dst_attr_vp);
-               nameidone(&tond);
+               nameidone(tond);
+       }
+       if (fromnd) {
+               FREE(fromnd, M_TEMP);
+       }
+       if (tond) {
+               FREE(tond, M_TEMP);
        }
-
        if (xfromname && xfromname != &smallname1[0]) {
                FREE(xfromname, M_TEMP);
        }
@@ -4606,12 +4628,12 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
        int _err = 0;
        int events;
        struct vnop_rename_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int funnel_state = 0;
        vnode_t lock_first = NULL, lock_second = NULL;
        vnode_t fdvp_unsafe = NULLVP;
        vnode_t tdvp_unsafe = NULLVP;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_rename_desc;
        a.a_fdvp = fdvp;
@@ -4622,7 +4644,7 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
        a.a_tcnp = tcnp;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!THREAD_SAFE_FS(fdvp))
                fdvp_unsafe = fdvp;
        if (!THREAD_SAFE_FS(tdvp))
@@ -4675,32 +4697,32 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
                        goto out1;
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        /* do the rename of the main file. */
        _err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a);
 
-#ifndef  __LP64__
+#if CONFIG_VFS_FUNNEL
        if (fdvp_unsafe != NULLVP) {
                if (lock_second != NULL)
                        unlock_fsnode(lock_second, NULL);
                unlock_fsnode(lock_first, NULL);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (_err == 0) {
                if (tvp && tvp != fvp)
                        vnode_setneedinactive(tvp);
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
 out1:
        if (fdvp_unsafe != NULLVP) {
                if (tdvp_unsafe != NULLVP)
                        unlock_fsnode(tdvp_unsafe, NULL);
                unlock_fsnode(fdvp_unsafe, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        /* Wrote at least one directory.  If transplanted a dir, also changed link counts */
        if (0 == _err) {
@@ -4860,10 +4882,10 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 {
        int _err;
        struct vnop_mkdir_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_mkdir_desc;
        a.a_dvp = dvp;
@@ -4872,14 +4894,14 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        a.a_vap = vap;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(dvp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*dvp->v_op[vnop_mkdir_desc.vdesc_offset])(&a);
        if (_err == 0 && !NATIVE_XATTR(dvp)) {
@@ -4889,11 +4911,11 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0);
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(dvp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE);
 
@@ -4975,10 +4997,10 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c
 {
        int _err;
        struct vnop_rmdir_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_rmdir_desc;
        a.a_dvp = dvp;
@@ -4986,14 +5008,14 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c
        a.a_cnp = cnp;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_rmdir_desc.vdesc_offset])(&a);
 
@@ -5008,11 +5030,11 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c
                }
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        /* If you delete a dir, it loses its "." reference --> NOTE_LINK */
        post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK);
@@ -5182,9 +5204,9 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap,
        nameidone(&nd);
 
        if (xvp->v_type == VREG) {
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                int thread_safe = THREAD_SAFE_FS(dvp);
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
                struct vnop_setattr_args a;
 
                a.a_desc = &vnop_setattr_desc;
@@ -5192,26 +5214,26 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap,
                a.a_vap = vap;
                a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                if (!thread_safe) {
                        if ( (lock_fsnode(xvp, NULL)) )
                                goto out1;
                }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
                (void) (*xvp->v_op[vnop_setattr_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                if (!thread_safe) {
                        unlock_fsnode(xvp, NULL);
                }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        }
 
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
 out1:          
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        vnode_put(xvp);
 
 out2:
@@ -5244,10 +5266,10 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 {
        int _err;
        struct vnop_symlink_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_symlink_desc;
        a.a_dvp = dvp;
@@ -5257,14 +5279,14 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        a.a_target = target;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(dvp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(dvp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*dvp->v_op[vnop_symlink_desc.vdesc_offset])(&a);   
        if (_err == 0 && !NATIVE_XATTR(dvp)) {
@@ -5274,11 +5296,11 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0);
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(dvp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        
        post_event_if_success(dvp, _err, NOTE_WRITE);
 
@@ -5308,10 +5330,10 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
 {
        int _err;
        struct vnop_readdir_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_readdir_desc;
        a.a_vp = vp;
@@ -5320,7 +5342,7 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
        a.a_eofflag = eofflag;
        a.a_numdirent = numdirent;
        a.a_context = ctx;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
 
        if (!thread_safe) {
@@ -5328,15 +5350,15 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_readdir_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        return (_err);
 }
 
@@ -5366,10 +5388,10 @@ VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, uint
 {
        int _err;
        struct vnop_readdirattr_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_readdirattr_desc;
        a.a_vp = vp;
@@ -5382,22 +5404,22 @@ VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, uint
        a.a_actualcount = actualcount;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_readdirattr_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -5440,32 +5462,32 @@ VNOP_READLINK(struct vnode *vp, struct uio *uio, vfs_context_t ctx)
 {
        int _err;
        struct vnop_readlink_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_readlink_desc;
        a.a_vp = vp;
        a.a_uio = uio;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_readlink_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -5487,31 +5509,31 @@ VNOP_INACTIVE(struct vnode *vp, vfs_context_t ctx)
 {
        int _err;
        struct vnop_inactive_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_inactive_desc;
        a.a_vp = vp;
        a.a_context = ctx;
        
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_inactive_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
 #if NAMEDSTREAMS
        /* For file systems that do not support namedstream natively, mark 
@@ -5548,29 +5570,29 @@ VNOP_RECLAIM(struct vnode *vp, vfs_context_t ctx)
 {
        int _err;
        struct vnop_reclaim_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_reclaim_desc;
        a.a_vp = vp;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_reclaim_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -5602,10 +5624,10 @@ VNOP_PATHCONF(struct vnode *vp, int name, int32_t *retval, vfs_context_t ctx)
 {
        int _err;
        struct vnop_pathconf_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_pathconf_desc;
        a.a_vp = vp;
@@ -5613,22 +5635,22 @@ VNOP_PATHCONF(struct vnode *vp, int name, int32_t *retval, vfs_context_t ctx)
        a.a_retval = retval;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_pathconf_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -5665,10 +5687,10 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags,
 {
        int _err;
        struct vnop_advlock_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_advlock_desc;
        a.a_vp = vp;
@@ -5678,12 +5700,12 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags,
        a.a_flags = flags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        /* Disallow advisory locking on non-seekable vnodes */
        if (vnode_isfifo(vp)) {
@@ -5698,11 +5720,11 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags,
                }
        }
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -5731,10 +5753,10 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc
 {
        int _err;
        struct vnop_allocate_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_allocate_desc;
        a.a_vp = vp;
@@ -5744,14 +5766,14 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc
        a.a_offset = offset;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_allocate_desc.vdesc_offset])(&a);
 #if CONFIG_FSE
@@ -5760,11 +5782,11 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc
        }
 #endif
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -5791,10 +5813,10 @@ VNOP_PAGEIN(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset,
 {
        int _err;
        struct vnop_pagein_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_pagein_desc;
        a.a_vp = vp;
@@ -5805,20 +5827,20 @@ VNOP_PAGEIN(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset,
        a.a_flags = flags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_pagein_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        
        return (_err);
 }
@@ -5846,10 +5868,10 @@ VNOP_PAGEOUT(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset,
 {
        int _err;
        struct vnop_pageout_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_pageout_desc;
        a.a_vp = vp;
@@ -5860,20 +5882,20 @@ VNOP_PAGEOUT(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset,
        a.a_flags = flags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_pageout_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(vp, _err, NOTE_WRITE);
 
@@ -5890,6 +5912,7 @@ vn_remove(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, struc
        }
 }
 
+#if CONFIG_SEARCHFS
 
 #if 0
 /*
@@ -5920,10 +5943,10 @@ VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct
 {
        int _err;
        struct vnop_searchfs_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_searchfs_desc;
        a.a_vp = vp;
@@ -5940,25 +5963,26 @@ VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct
        a.a_searchstate = searchstate;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_searchfs_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
+#endif /* CONFIG_SEARCHFS */
 
 #if 0
 /*
@@ -6002,10 +6026,10 @@ VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options
 {
        struct vnop_getxattr_args a;
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_getxattr_desc;
        a.a_vp = vp;
@@ -6015,22 +6039,22 @@ VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options
        a.a_options = options;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (error = lock_fsnode(vp, &funnel_state)) ) {
                        return (error);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*vp->v_op[vnop_getxattr_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (error);
 }
@@ -6040,10 +6064,10 @@ VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_
 {
        struct vnop_setxattr_args a;
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_setxattr_desc;
        a.a_vp = vp;
@@ -6052,22 +6076,22 @@ VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_
        a.a_options = options;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (error = lock_fsnode(vp, &funnel_state)) ) {
                        return (error);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*vp->v_op[vnop_setxattr_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (error == 0)
                vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
@@ -6082,10 +6106,10 @@ VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t ctx)
 {
        struct vnop_removexattr_args a;
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_removexattr_desc;
        a.a_vp = vp;
@@ -6093,22 +6117,22 @@ VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t ctx)
        a.a_options = options;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (error = lock_fsnode(vp, &funnel_state)) ) {
                        return (error);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*vp->v_op[vnop_removexattr_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        post_event_if_success(vp, error, NOTE_ATTRIB);
        
@@ -6120,10 +6144,10 @@ VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t c
 {
        struct vnop_listxattr_args a;
        int error;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_listxattr_desc;
        a.a_vp = vp;
@@ -6132,22 +6156,22 @@ VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t c
        a.a_options = options;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (error = lock_fsnode(vp, &funnel_state)) ) {
                        return (error);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        error = (*vp->v_op[vnop_listxattr_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (error);
 }
@@ -6171,30 +6195,30 @@ VNOP_BLKTOOFF(struct vnode *vp, daddr64_t lblkno, off_t *offset)
 {
        int _err;
        struct vnop_blktooff_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_blktooff_desc;
        a.a_vp = vp;
        a.a_lblkno = lblkno;
        a.a_offset = offset;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_blktooff_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -6217,30 +6241,30 @@ VNOP_OFFTOBLK(struct vnode *vp, off_t offset, daddr64_t *lblkno)
 {
        int _err;
        struct vnop_offtoblk_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_offtoblk_desc;
        a.a_vp = vp;
        a.a_offset = offset;
        a.a_lblkno = lblkno;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_offtoblk_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return (_err);
 }
@@ -6268,10 +6292,11 @@ VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size
 {
        int _err;
        struct vnop_blockmap_args a;
-#ifndef __LP64__
+       size_t localrun = 0;
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ctx == NULL) {
                ctx = vfs_context_current();
@@ -6281,25 +6306,41 @@ VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size
        a.a_foffset = foffset;
        a.a_size = size;
        a.a_bpn = bpn;
-       a.a_run = run;
+       a.a_run = &localrun;
        a.a_poff = poff;
        a.a_flags = flags;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                funnel_state = thread_funnel_set(kernel_flock, TRUE);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_blockmap_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                (void) thread_funnel_set(kernel_flock, funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
+
+       /*
+        * We used a local variable to request information from the underlying
+        * filesystem about the length of the I/O run in question.  If
+        * we get malformed output from the filesystem, we cap it to the length
+        * requested, at most.  Update 'run' on the way out.
+        */
+       if (_err == 0) {
+               if (localrun > size) {
+                       localrun = size;
+               }
+
+               if (run) {
+                       *run = localrun;
+               }
+       }
 
        return (_err);
 }
@@ -6352,32 +6393,32 @@ VNOP_KQFILT_ADD(struct vnode *vp, struct knote *kn, vfs_context_t ctx)
 {
        int _err;
        struct vnop_kqfilt_add_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = VDESC(vnop_kqfilt_add);
        a.a_vp = vp;
        a.a_kn = kn;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_kqfilt_add_desc.vdesc_offset])(&a);
        
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(_err);
 }
@@ -6395,32 +6436,32 @@ VNOP_KQFILT_REMOVE(struct vnode *vp, uintptr_t ident, vfs_context_t ctx)
 {
        int _err;
        struct vnop_kqfilt_remove_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = VDESC(vnop_kqfilt_remove);
        a.a_vp = vp;
        a.a_ident = ident;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_kqfilt_remove_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(_err);
 }
@@ -6430,10 +6471,10 @@ VNOP_MONITOR(vnode_t vp, uint32_t events, uint32_t flags, void *handle, vfs_cont
 {
        int _err;
        struct vnop_monitor_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = VDESC(vnop_monitor);
        a.a_vp = vp;
@@ -6442,22 +6483,22 @@ VNOP_MONITOR(vnode_t vp, uint32_t events, uint32_t flags, void *handle, vfs_cont
        a.a_handle = handle;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        
        _err = (*vp->v_op[vnop_monitor_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(_err);
 }
@@ -6475,32 +6516,32 @@ VNOP_SETLABEL(struct vnode *vp, struct label *label, vfs_context_t ctx)
 {
        int _err;
        struct vnop_setlabel_args a;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        int thread_safe;
        int funnel_state = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = VDESC(vnop_setlabel);
        a.a_vp = vp;
        a.a_vl = label;
        a.a_context = ctx;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        thread_safe = THREAD_SAFE_FS(vp);
        if (!thread_safe) {
                if ( (_err = lock_fsnode(vp, &funnel_state)) ) {
                        return (_err);
                }
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        _err = (*vp->v_op[vnop_setlabel_desc.vdesc_offset])(&a);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!thread_safe) {
                unlock_fsnode(vp, &funnel_state);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        return(_err);
 }
@@ -6515,10 +6556,10 @@ VNOP_GETNAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperatio
 {
        struct vnop_getnamedstream_args a;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!THREAD_SAFE_FS(vp))
                return (ENOTSUP);
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_getnamedstream_desc;
        a.a_vp = vp;
@@ -6539,10 +6580,10 @@ VNOP_MAKENAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, int flags, vfs
 {
        struct vnop_makenamedstream_args a;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!THREAD_SAFE_FS(vp))
                return (ENOTSUP);
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_makenamedstream_desc;
        a.a_vp = vp;
@@ -6563,10 +6604,10 @@ VNOP_REMOVENAMEDSTREAM(vnode_t vp, vnode_t svp, const char *name, int flags, vfs
 {
        struct vnop_removenamedstream_args a;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (!THREAD_SAFE_FS(vp))
                return (ENOTSUP);
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        a.a_desc = &vnop_removenamedstream_desc;
        a.a_vp = vp;
index 091ee16ab3f9be468a2411f47fcc82f00773aa2c..7033b4cf641e8b6d9614e7c767068d6b405c8e30 100644 (file)
@@ -395,7 +395,7 @@ static struct getattrlist_attrtab getattrlist_common_tab[] = {
        {ATTR_CMN_FILEID,       VATTR_BIT(va_fileid),           sizeof(uint64_t),               KAUTH_VNODE_READ_ATTRIBUTES},
        {ATTR_CMN_PARENTID,     VATTR_BIT(va_parentid),         sizeof(uint64_t),               KAUTH_VNODE_READ_ATTRIBUTES},
        {ATTR_CMN_FULLPATH, 0,  sizeof(struct attrreference),   KAUTH_VNODE_READ_ATTRIBUTES     },
-    {ATTR_CMN_ADDEDTIME, VATTR_BIT(va_addedtime), ATTR_TIME_SIZE,      KAUTH_VNODE_READ_ATTRIBUTES}, 
+       {ATTR_CMN_ADDEDTIME, VATTR_BIT(va_addedtime), ATTR_TIME_SIZE,   KAUTH_VNODE_READ_ATTRIBUTES}, 
        {ATTR_CMN_RETURNED_ATTRS, 0,                            sizeof(attribute_set_t),        0},
        {0, 0, 0, 0}
 };
@@ -544,7 +544,6 @@ getattrlist_fixupattrs(attribute_set_t *asp, struct vnode_attr *vap)
                         * on.  This is done so that we can uncheck those bits and re-request
                         * a vnode_getattr from the filesystem again.
                         */
-
                        if ((tab->attr & asp->commonattr) &&
                            (tab->bits & vap->va_active) &&
                            (tab->bits & vap->va_supported) == 0) {
@@ -1774,7 +1773,6 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
                ab.actual.commonattr |= ATTR_CMN_ADDEDTIME;
        }
 
-
        /* directory attributes *********************************************/
        if (al.dirattr && (vtype == VDIR)) {
                if (al.dirattr & ATTR_DIR_LINKCOUNT) {  /* full count of entries */
@@ -2137,6 +2135,21 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con
                }
        }
 
+       /*
+        * If the caller's bitmaps indicate that there are no attributes to set,
+        * then exit early.  In particular, we want to avoid the MALLOC below
+        * since the caller's bufferSize could be zero, and MALLOC of zero bytes
+        * returns a NULL pointer, which would cause setattrlist to return ENOMEM.
+        */
+       if (al.commonattr == 0 &&
+               (al.volattr & ~ATTR_VOL_INFO) == 0 &&
+               al.dirattr == 0 &&
+               al.fileattr == 0 &&
+               al.forkattr == 0) {
+               error = 0;
+               goto out;
+       }
+               
        /*
         * Make the naive assumption that the caller has supplied a reasonable buffer
         * size.  We could be more careful by pulling in the fixed-size region, checking
index 53f4f5576ba5474cedb90bf6188049aeb7efc85b..ce7c68e8277e8fa0ea5c776b5088a8852008dcca 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/ubc_internal.h>
 
 #include <sys/sdt.h>
+#include <sys/cprotect.h>
 
 
 #if BALANCE_QUEUES
@@ -382,29 +383,82 @@ buf_markfua(buf_t bp) {
         SET(bp->b_flags, B_FUA);
 }
 
-#ifdef CONFIG_PROTECT
+#if CONFIG_PROTECT
+void
+buf_setcpaddr(buf_t bp, struct cprotect *entry) {
+       bp->b_attr.ba_cpentry = entry;
+}
+
+void
+buf_setcpoff (buf_t bp, uint64_t foffset) {
+       bp->b_attr.ba_cp_file_off = foffset;
+}
+
 void *
-buf_getcpaddr(buf_t bp) {
-       return bp->b_cpentry;
+bufattr_cpaddr(bufattr_t bap) {
+       return (bap->ba_cpentry);
 }
 
-void 
-buf_setcpaddr(buf_t bp, void *cp_entry_addr) {
-       bp->b_cpentry = (struct cprotect *) cp_entry_addr;
+uint64_t
+bufattr_cpoff(bufattr_t bap) {
+       return (bap->ba_cp_file_off);
+}
+
+void
+bufattr_setcpaddr(bufattr_t bap, void *cp_entry_addr) {
+        bap->ba_cpentry = cp_entry_addr;
+}
+
+void
+bufattr_setcpoff(bufattr_t bap, uint64_t foffset) {
+        bap->ba_cp_file_off = foffset;
 }
 
 #else
 void *
-buf_getcpaddr(buf_t bp __unused) {
-       return NULL;
+bufattr_cpaddr(bufattr_t bap __unused) {
+        return NULL;
 }
 
-void 
-buf_setcpaddr(buf_t bp __unused, void *cp_entry_addr __unused) {
+uint64_t
+bufattr_cpoff(bufattr_t bap __unused) {
+       return 0;
+}
+
+void
+bufattr_setcpaddr(bufattr_t bap __unused, void *cp_entry_addr __unused) {
+}
+
+void
+bufattr_setcpoff(__unused bufattr_t bap, __unused uint64_t foffset) {
        return;
 }
 #endif /* CONFIG_PROTECT */
 
+bufattr_t
+bufattr_alloc() {
+       bufattr_t bap;
+       MALLOC(bap, bufattr_t, sizeof(struct bufattr), M_TEMP, M_WAITOK);
+       if (bap == NULL)
+               return NULL;
+
+       bzero(bap, sizeof(struct bufattr));
+       return bap;
+}
+
+void
+bufattr_free(bufattr_t bap) {
+       if (bap)
+               FREE(bap, M_TEMP);
+}
+
+int
+bufattr_rawencrypted(bufattr_t bap) {
+       if ( (bap->ba_flags & BA_RAW_ENCRYPTED_IO) )
+               return 1;
+       return 0;
+}
+
 int
 bufattr_throttled(bufattr_t bap) {
        if ( (bap->ba_flags & BA_THROTTLED_IO) )
@@ -412,11 +466,51 @@ bufattr_throttled(bufattr_t bap) {
        return 0;
 }
 
+int
+bufattr_nocache(bufattr_t bap) {
+       if ( (bap->ba_flags & BA_NOCACHE) )
+               return 1;
+       return 0;
+}
+
+int
+bufattr_meta(bufattr_t bap) {
+       if ( (bap->ba_flags & BA_META) )
+               return 1;
+       return 0;
+}
+
+int
+#if !CONFIG_EMBEDDED
+bufattr_delayidlesleep(bufattr_t bap) 
+#else /* !CONFIG_EMBEDDED */
+bufattr_delayidlesleep(__unused bufattr_t bap) 
+#endif /* !CONFIG_EMBEDDED */
+{
+#if !CONFIG_EMBEDDED
+       if ( (bap->ba_flags & BA_DELAYIDLESLEEP) )
+               return 1;
+#endif /* !CONFIG_EMBEDDED */
+       return 0;
+}
+
 bufattr_t
 buf_attr(buf_t bp) {
        return &bp->b_attr;
 }
 
+void 
+buf_markstatic(buf_t bp __unused) {
+       SET(bp->b_flags, B_STATICCONTENT);
+}
+
+int
+buf_static(buf_t bp) {
+    if ( (bp->b_flags & B_STATICCONTENT) )
+        return 1;
+    return 0;
+}
+
 errno_t
 buf_error(buf_t bp) {
         
@@ -1135,7 +1229,7 @@ buf_strategy(vnode_t devvp, void *ap)
                        return (cluster_bp(bp));
                }
                if (bp->b_blkno == bp->b_lblkno) {
-                       off_t   f_offset;
+                   off_t       f_offset;
                        size_t  contig_bytes;
                  
                        if ((error = VNOP_BLKTOOFF(vp, bp->b_lblkno, &f_offset))) {
@@ -1143,21 +1237,22 @@ buf_strategy(vnode_t devvp, void *ap)
                                buf_seterror(bp, error);
                                buf_biodone(bp);
 
-                               return (error);
+                           return (error);
                        }
-                       if ((error = VNOP_BLOCKMAP(vp, f_offset, bp->b_bcount, &bp->b_blkno, &contig_bytes, NULL, bmap_flags, NULL))) {
+
+               if ((error = VNOP_BLOCKMAP(vp, f_offset, bp->b_bcount, &bp->b_blkno, &contig_bytes, NULL, bmap_flags, NULL))) {
                                DTRACE_IO1(start, buf_t, bp);
                                buf_seterror(bp, error);
                                buf_biodone(bp);
 
                                return (error);
                        }
-                       
+
                        DTRACE_IO1(start, buf_t, bp);
 #if CONFIG_DTRACE
                        dtrace_io_start_flag = 1;
 #endif /* CONFIG_DTRACE */                     
-                       
+
                        if ((bp->b_blkno == -1) || (contig_bytes == 0)) {
                                /* Set block number to force biodone later */
                                bp->b_blkno = -1;
@@ -1186,6 +1281,33 @@ buf_strategy(vnode_t devvp, void *ap)
                DTRACE_IO1(start, buf_t, bp);
 #endif /* CONFIG_DTRACE */
        
+#if CONFIG_PROTECT
+       /* Capture f_offset in the bufattr*/
+       if (bp->b_attr.ba_cpentry != 0) {
+               /* No need to go here for older EAs */
+               if(bp->b_attr.ba_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
+                       off_t f_offset;
+                       if ((error = VNOP_BLKTOOFF(bp->b_vp, bp->b_lblkno, &f_offset)))
+                               return error;
+
+                       /* 
+                        * Attach the file offset to this buffer.  The
+                        * bufattr attributes will be passed down the stack
+                        * until they reach IOFlashStorage.  IOFlashStorage
+                        * will retain the offset in a local variable when it
+                        * issues its I/Os to the NAND controller.       
+                        * 
+                        * Note that LwVM may end up splitting this I/O 
+                        * into sub-I/Os if it crosses a chunk boundary.  In this
+                        * case, LwVM will update this field when it dispatches
+                        * each I/O to IOFlashStorage.  But from our perspective
+                        * we have only issued a single I/O.
+                        */
+                       bufattr_setcpoff (&(bp->b_attr), (u_int64_t)f_offset);
+               }
+       }
+#endif
+
        /*
         * we can issue the I/O because...
         * either B_CLUSTER is set which
@@ -1489,12 +1611,20 @@ try_dirty_list:
 
 void
 buf_flushdirtyblks(vnode_t vp, int wait, int flags, const char *msg) {
+
+       (void) buf_flushdirtyblks_skipinfo(vp, wait, flags, msg);
+       return;
+}
+
+int
+buf_flushdirtyblks_skipinfo(vnode_t vp, int wait, int flags, const char *msg) {
        buf_t   bp;
        int     writes_issued = 0;
        errno_t error;
        int     busy = 0;
        struct  buflists local_iterblkhd;
        int     lock_flags = BAC_NOWAIT | BAC_REMOVE;
+       int any_locked = 0;
 
        if (flags & BUF_SKIP_LOCKED)
                lock_flags |= BAC_SKIP_LOCKED;
@@ -1508,11 +1638,26 @@ loop:
                        bp = LIST_FIRST(&local_iterblkhd);
                        LIST_REMOVE(bp, b_vnbufs);
                        LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs);
-                       
-                       if ((error = buf_acquire_locked(bp, lock_flags, 0, 0)) == EBUSY)
-                               busy++;
-                       if (error)
-                               continue;
+
+                       if ((error = buf_acquire_locked(bp, lock_flags, 0, 0)) == EBUSY) {
+                               busy++;
+                       }
+                       if (error) {
+                               /* 
+                                * If we passed in BUF_SKIP_LOCKED or BUF_SKIP_NONLOCKED,
+                                * we may want to do somethign differently if a locked or unlocked
+                                * buffer was encountered (depending on the arg specified).
+                                * In this case, we know that one of those two was set, and the
+                                * buf acquisition failed above.  
+                                * 
+                                * If it failed with EDEADLK, then save state which can be emitted
+                                * later on to the caller.  Most callers should not care.
+                                */
+                               if (error == EDEADLK) {
+                                       any_locked++;
+                               }
+                               continue;
+                       }
                        lck_mtx_unlock(buf_mtxp);
 
                        bp->b_flags &= ~B_LOCKED;
@@ -1558,6 +1703,8 @@ loop:
                        goto loop;
                }
        }
+
+       return any_locked;
 }
 
 
@@ -2267,6 +2414,8 @@ buf_brelse_shadow(buf_t bp)
        buf_t   bp_data;
        int     data_ref = 0;
 #endif
+       int need_wakeup = 0;
+
        lck_mtx_lock_spin(buf_mtxp);
 
        bp_head = (buf_t)bp->b_orig;
@@ -2334,8 +2483,17 @@ buf_brelse_shadow(buf_t bp)
 
                        bp_return = bp_head;
                }
+               if (ISSET(bp_head->b_lflags, BL_WANTED_REF)) {
+                       CLR(bp_head->b_lflags, BL_WANTED_REF);
+                       need_wakeup = 1;
+               }
        }
        lck_mtx_unlock(buf_mtxp);
+       
+       if (need_wakeup) {
+               wakeup(bp_head);
+       }
+
 #ifdef BUF_MAKE_PRIVATE        
        if (bp == bp_data && data_ref == 0)
                buf_free_meta_store(bp);
@@ -2662,7 +2820,30 @@ incore_locked(vnode_t vp, daddr64_t blkno, struct bufhashhdr *dp)
        return (NULL);
 }
 
+void
+buf_wait_for_shadow_io(vnode_t vp, daddr64_t blkno)
+{
+       buf_t bp;
+       struct  bufhashhdr *dp;
+
+       dp = BUFHASH(vp, blkno);
+
+       lck_mtx_lock_spin(buf_mtxp);
+
+       for (;;) {
+               if ((bp = incore_locked(vp, blkno, dp)) == NULL)
+                       break;
+
+               if (bp->b_shadow_ref == 0)
+                       break;
+
+               SET(bp->b_lflags, BL_WANTED_REF);
 
+               (void) msleep(bp, buf_mtxp, PSPIN | (PRIBIO+1), "buf_wait_for_shadow", NULL);
+       }
+       lck_mtx_unlock(buf_mtxp);
+}
+       
 /* XXX FIXME -- Update the comment to reflect the UBC changes (please) -- */
 /*
  * Get a block of requested size that is associated with
@@ -3409,9 +3590,6 @@ bcleanbuf(buf_t bp, boolean_t discard)
                bp->b_bcount = 0;
                bp->b_dirtyoff = bp->b_dirtyend = 0;
                bp->b_validoff = bp->b_validend = 0;
-#ifdef CONFIG_PROTECT
-               bp->b_cpentry = 0;
-#endif
                bzero(&bp->b_attr, sizeof(struct bufattr));
 
                lck_mtx_lock_spin(buf_mtxp);
@@ -3654,12 +3832,15 @@ buf_biodone(buf_t bp)
                else if (bp->b_flags & B_PASSIVE)
                        code |= DKIO_PASSIVE;
 
-               KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
+               if (bp->b_attr.ba_flags & BA_NOCACHE)
+                       code |= DKIO_NOCACHE;
+
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON, FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
                               bp, (uintptr_t)bp->b_vp,
                                      bp->b_resid, bp->b_error, 0);
         }
        if ((bp->b_vp != NULLVP) &&
-           ((bp->b_flags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
+           ((bp->b_flags & (B_THROTTLED_IO | B_PASSIVE | B_IOSTREAMING | B_PAGEIO | B_READ | B_THROTTLED_IO | B_PASSIVE)) == (B_PAGEIO | B_READ)) &&
            (bp->b_vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) {
                microuptime(&priority_IO_timestamp_for_root);
                hard_throttle_on_root = 0;
@@ -3672,7 +3853,12 @@ buf_biodone(buf_t bp)
         * indicators
         */
        CLR(bp->b_flags, (B_WASDIRTY | B_THROTTLED_IO | B_PASSIVE));
-       CLR(bp->b_attr.ba_flags, (BA_THROTTLED_IO));
+       CLR(bp->b_attr.ba_flags, (BA_META | BA_NOCACHE));
+#if !CONFIG_EMBEDDED
+       CLR(bp->b_attr.ba_flags, (BA_THROTTLED_IO | BA_DELAYIDLESLEEP));
+#else
+       CLR(bp->b_attr.ba_flags, BA_THROTTLED_IO);
+#endif /* !CONFIG_EMBEDDED */
        DTRACE_IO1(done, buf_t, bp);
 
        if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW))
@@ -3769,6 +3955,7 @@ count_lock_queue(void)
 
 /*
  * Return a count of 'busy' buffers. Used at the time of shutdown.
+ * note: This is also called from the mach side in debug context in kdp.c
  */
 int
 count_busy_buffers(void)
@@ -3864,9 +4051,6 @@ alloc_io_buf(vnode_t vp, int priv)
        bp->b_bufsize = 0;
        bp->b_upl = NULL;
        bp->b_vp = vp;
-#ifdef CONFIG_PROTECT
-       bp->b_cpentry = 0;
-#endif
        bzero(&bp->b_attr, sizeof(struct bufattr));
 
        if (vp && (vp->v_type == VBLK || vp->v_type == VCHR))
@@ -4085,7 +4269,7 @@ buffer_cache_gc(int all)
        boolean_t did_large_zfree = FALSE;
        boolean_t need_wakeup = FALSE;
        int now = buf_timestamp();
-       uint32_t found = 0, total_found = 0;
+       uint32_t found = 0;
        struct bqueues privq;
        int thresh_hold = BUF_STALE_THRESHHOLD;
 
@@ -4093,11 +4277,14 @@ buffer_cache_gc(int all)
                thresh_hold = 0;
        /* 
         * We only care about metadata (incore storage comes from zalloc()).
-        * No more than 1024 buffers total, and only those not accessed within the
-        * last 30s.  We will also only examine 128 buffers during a single grab
-        * of the lock in order to limit lock hold time.
+        * Unless "all" is set (used to evict meta data buffers in preparation
+        * for deep sleep), we only evict up to BUF_MAX_GC_BATCH_SIZE buffers
+        * that have not been accessed in the last 30s. This limit controls both
+        * the hold time of the global lock "buf_mtxp" and the length of time
+        * we spend compute bound in the GC thread which calls this function
         */
        lck_mtx_lock(buf_mtxp);
+
        do {
                found = 0;
                TAILQ_INIT(&privq);
@@ -4179,7 +4366,6 @@ buffer_cache_gc(int all)
                        bp->b_whichq = BQ_EMPTY;
                        BLISTNONE(bp);
                }
-
                lck_mtx_lock(buf_mtxp);
 
                /* Back under lock, move them all to invalid hash and clear busy */
@@ -4199,9 +4385,8 @@ buffer_cache_gc(int all)
 
                /* And do a big bulk move to the empty queue */
                TAILQ_CONCAT(&bufqueues[BQ_EMPTY], &privq, b_freelist);
-               total_found += found;
 
-       } while ((all || (total_found < BUF_MAX_GC_COUNT)) && (found == BUF_MAX_GC_BATCH_SIZE));
+       } while (all && (found == BUF_MAX_GC_BATCH_SIZE));
 
        lck_mtx_unlock(buf_mtxp);
 
index 3096d12941242a58deaa997d4d735b7d6af68b37..73fbb3afb3f0942f1db5cb6e386c9bd1f9214775 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -192,6 +192,11 @@ static unsigned int crc32tab[256];
  * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
  * in the name cache and does not enter the file system.
  *
+ * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when 
+ * we encounter ENOENT during path reconstruction.  ENOENT means that 
+ * one of the parents moved while we were building the path.  The 
+ * caller can special handle this case by calling build_path again.
+ *
  * passed in vp must have a valid io_count reference
  */
 int
@@ -309,7 +314,8 @@ again:
                 * Walk up the parent chain.
                 */
                if (((vp->v_parent != NULLVP) && !fixhardlink) ||
-                   (flags & BUILDPATH_NO_FS_ENTER)) {
+                               (flags & BUILDPATH_NO_FS_ENTER)) {
+
                        /*
                         * In this if () block we are not allowed to enter the filesystem
                         * to conclusively get the most accurate parent identifier.
@@ -323,17 +329,17 @@ again:
 
                                /* The code below will exit early if 'tvp = vp' == NULL */
                        }
-
                        vp = vp->v_parent;
-                       
+
                        /*
                         * if the vnode we have in hand isn't a directory and it
                         * has a v_parent, then we started with the resource fork
                         * so skip up to avoid getting a duplicate copy of the
                         * file name in the path.
                         */
-                       if (vp && !vnode_isdir(vp) && vp->v_parent)
+                       if (vp && !vnode_isdir(vp) && vp->v_parent) {
                                vp = vp->v_parent;
+                       }
                } else {
                        /*
                         * No parent, go get it if supported.
@@ -492,6 +498,14 @@ out:
         */
        *outlen = &buff[buflen] - end;
  
+       /* One of the parents was moved during path reconstruction. 
+        * The caller is interested in knowing whether any of the 
+        * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
+        */
+       if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
+               ret = EAGAIN;
+       }
+
        return (ret);
 }
 
index 2bccd5bb33c138990272a68e6a6f75251354f802..69dfdfda332150e60248f2264b0566f2d1eab155 100644 (file)
@@ -71,6 +71,7 @@
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/resourcevar.h>
+#include <miscfs/specfs/specdev.h>
 #include <sys/uio_internal.h>
 #include <libkern/libkern.h>
 #include <machine/machine_routines.h>
 #define CL_IOSTREAMING 0x4000
 #define CL_CLOSE       0x8000
 #define        CL_ENCRYPTED    0x10000
+#define CL_RAW_ENCRYPTED       0x20000
+#define CL_NOCACHE     0x40000
 
 #define MAX_VECTOR_UPL_ELEMENTS        8
 #define MAX_VECTOR_UPL_SIZE    (2 * MAX_UPL_SIZE) * PAGE_SIZE
@@ -201,6 +204,30 @@ static kern_return_t vfs_drt_get_cluster(void **cmapp, off_t *offsetp, u_int *le
 static kern_return_t vfs_drt_control(void **cmapp, int op_type);
 
 
+/*
+ * For throttled IO to check whether
+ * a block is cached by the boot cache
+ * and thus it can avoid delaying the IO.
+ *
+ * bootcache_contains_block is initially
+ * NULL. The BootCache will set it while
+ * the cache is active and clear it when
+ * the cache is jettisoned.
+ *
+ * Returns 0 if the block is not
+ * contained in the cache, 1 if it is
+ * contained.
+ *
+ * The function pointer remains valid
+ * after the cache has been evicted even
+ * if bootcache_contains_block has been
+ * cleared.
+ *
+ * See rdar://9974130 The new throttling mechanism breaks the boot cache for throttled IOs
+ */
+int (*bootcache_contains_block)(dev_t device, u_int64_t blkno) = NULL;
+
+
 /*
  * limit the internal I/O size so that we
  * can represent it in a 32 bit int
@@ -214,16 +241,26 @@ static kern_return_t vfs_drt_control(void **cmapp, int op_type);
 #define WRITE_THROTTLE_SSD     2
 #define WRITE_BEHIND           1
 #define WRITE_BEHIND_SSD       1
+
+#if CONFIG_EMBEDDED
+#define PREFETCH               1
+#define PREFETCH_SSD           1
+uint32_t speculative_prefetch_max = 512;       /* maximum number of pages to use for a specluative read-ahead */
+uint32_t speculative_prefetch_max_iosize = (512 * 1024);       /* maximum I/O size to use for a specluative read-ahead */
+#else
 #define PREFETCH               3
-#define PREFETCH_SSD           2
+#define PREFETCH_SSD           1
+uint32_t speculative_prefetch_max = (MAX_UPL_SIZE * 3);
+uint32_t speculative_prefetch_max_iosize = (512 * 1024);       /* maximum I/O size to use for a specluative read-ahead on SSDs*/
+#endif
 
-#define IO_SCALE(vp, base)             (vp->v_mount->mnt_ioscale * base)
+
+#define IO_SCALE(vp, base)             (vp->v_mount->mnt_ioscale * (base))
 #define MAX_CLUSTER_SIZE(vp)           (cluster_max_io_size(vp->v_mount, CL_WRITE))
-#define MAX_PREFETCH(vp, size, is_ssd) (size * IO_SCALE(vp, (is_ssd && !ignore_is_ssd) ? PREFETCH_SSD : PREFETCH))
+#define MAX_PREFETCH(vp, size, is_ssd) (size * IO_SCALE(vp, ((is_ssd && !ignore_is_ssd) ? PREFETCH_SSD : PREFETCH)))
 
 int    ignore_is_ssd = 0;
 int    speculative_reads_disabled = 0;
-uint32_t speculative_prefetch_max = (MAX_UPL_SIZE * 3);
 
 /*
  * throttle the number of async writes that
@@ -231,11 +268,25 @@ uint32_t speculative_prefetch_max = (MAX_UPL_SIZE * 3);
  * before we issue a synchronous write 
  */
 #define HARD_THROTTLE_MAXCNT   0
-#define HARD_THROTTLE_MAXSIZE  (256 * 1024)
+#define HARD_THROTTLE_MAX_IOSIZE (128 * 1024)
+#define LEGACY_HARD_THROTTLE_MAX_IOSIZE (512 * 1024)
 
+extern int32_t throttle_legacy_process_count;
 int hard_throttle_on_root = 0;
+uint32_t hard_throttle_max_iosize = HARD_THROTTLE_MAX_IOSIZE;
+uint32_t legacy_hard_throttle_max_iosize = LEGACY_HARD_THROTTLE_MAX_IOSIZE;
 struct timeval priority_IO_timestamp_for_root;
 
+#if CONFIG_EMBEDDED
+#define THROTTLE_MAX_IOSIZE (hard_throttle_max_iosize)
+#else
+#define THROTTLE_MAX_IOSIZE (throttle_legacy_process_count == 0 ? hard_throttle_max_iosize : legacy_hard_throttle_max_iosize)
+#endif
+
+
+SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_max_iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &hard_throttle_max_iosize, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_legacy_throttle_max_iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &legacy_hard_throttle_max_iosize, 0, "");
+
 
 void
 cluster_init(void) {
@@ -426,31 +477,47 @@ cluster_syncup(vnode_t vp, off_t newEOF, int (*callback)(buf_t, void *), void *c
 }
 
 
+static int
+cluster_io_present_in_BC(vnode_t vp, off_t f_offset)
+{
+       daddr64_t blkno;
+       size_t    io_size;
+       int (*bootcache_check_fn)(dev_t device, u_int64_t blkno) = bootcache_contains_block;
+       
+       if (bootcache_check_fn) {
+               if (VNOP_BLOCKMAP(vp, f_offset, PAGE_SIZE, &blkno, &io_size, NULL, VNODE_READ, NULL))
+                       return(0);
+
+               if (io_size == 0)
+                       return (0);
+
+               if (bootcache_check_fn(vp->v_mount->mnt_devvp->v_rdev, blkno))
+                       return(1);
+       }
+       return(0);
+}
+
+
 static int 
 cluster_hard_throttle_on(vnode_t vp, uint32_t hard_throttle)
 {
-       struct uthread  *ut;
+       int throttle_type = 0;
 
-       if (hard_throttle) {
-               static struct timeval hard_throttle_maxelapsed = { 0, 200000 };
+       if ( (throttle_type = throttle_io_will_be_throttled(-1, vp->v_mount)) )
+               return(throttle_type);
 
-               if (vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV) {
-                       struct timeval elapsed;
+       if (hard_throttle && (vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) {
+               static struct timeval hard_throttle_maxelapsed = { 0, 100000 };
+               struct timeval elapsed;
 
-                       if (hard_throttle_on_root)
-                               return(1);
+               if (hard_throttle_on_root)
+                       return(1);
 
-                       microuptime(&elapsed);
-                       timevalsub(&elapsed, &priority_IO_timestamp_for_root);
+               microuptime(&elapsed);
+               timevalsub(&elapsed, &priority_IO_timestamp_for_root);
 
-                       if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <))
-                               return(1);
-               }
-       }
-       if (throttle_get_io_policy(&ut) == IOPOL_THROTTLE) {
-               if (throttle_io_will_be_throttled(-1, vp->v_mount)) {
+               if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <))
                        return(1);
-               }
        }
        return(0);
 }
@@ -707,7 +774,7 @@ uint32_t
 cluster_hard_throttle_limit(vnode_t vp, uint32_t *limit, uint32_t hard_throttle)
 {
        if (cluster_hard_throttle_on(vp, hard_throttle)) {
-               *limit = HARD_THROTTLE_MAXSIZE;
+               *limit = THROTTLE_MAX_IOSIZE;
                return 1;
        }
        return 0;   
@@ -948,8 +1015,8 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 
        if (flags & CL_THROTTLE) {
                if ( !(flags & CL_PAGEOUT) && cluster_hard_throttle_on(vp, 1)) {
-                       if (max_iosize > HARD_THROTTLE_MAXSIZE)
-                               max_iosize = HARD_THROTTLE_MAXSIZE;
+                       if (max_iosize > THROTTLE_MAX_IOSIZE)
+                               max_iosize = THROTTLE_MAX_IOSIZE;
                        async_throttle = HARD_THROTTLE_MAXCNT;
                } else {
                        if ( (flags & CL_DEV_MEMORY) )
@@ -1397,6 +1464,8 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                }
                cbp->b_cliodone = (void *)callback;
                cbp->b_flags |= io_flags;
+               if (flags & CL_NOCACHE)
+                       cbp->b_attr.ba_flags |= BA_NOCACHE;
 
                cbp->b_lblkno = lblkno;
                cbp->b_blkno  = blkno;
@@ -1489,6 +1558,14 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                if ( !(io_flags & B_READ))
                        vnode_startwrite(vp);
                                
+               if (flags & CL_RAW_ENCRYPTED) {
+                       /* 
+                        * User requested raw encrypted bytes.
+                        * Twiddle the bit in the ba_flags for the buffer
+                        */
+                       cbp->b_attr.ba_flags |= BA_RAW_ENCRYPTED_IO;
+               }
+               
                (void) VNOP_STRATEGY(cbp);
 
                if (need_EOT == TRUE) {
@@ -1914,9 +1991,10 @@ cluster_write_ext(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t
        else
                bflag = 0;
 
-       if (vp->v_flag & VNOCACHE_DATA)
+       if (vp->v_flag & VNOCACHE_DATA){
                flags |= IO_NOCACHE;
-
+               bflag |= CL_NOCACHE;
+       }
         if (uio == NULL) {
                /*
                 * no user data...
@@ -2058,7 +2136,10 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in
        user_addr_t      iov_base;
        u_int32_t        mem_alignment_mask;
        u_int32_t        devblocksize;
+       u_int32_t        max_io_size;
        u_int32_t        max_upl_size;
+       u_int32_t        max_vector_size;
+       boolean_t        io_throttled = FALSE;
 
        u_int32_t        vector_upl_iosize = 0;
        int              issueVectorUPL = 0,useVectorUPL = (uio->uio_iovcnt > 1);
@@ -2080,7 +2161,10 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in
 
        if (flags & IO_PASSIVE)
                io_flag |= CL_PASSIVE;
-
+       
+       if (flags & IO_NOCACHE)
+               io_flag |= CL_NOCACHE;
+       
        iostate.io_completed = 0;
        iostate.io_issued = 0;
        iostate.io_error = 0;
@@ -2129,6 +2213,33 @@ next_dwrite:
         }
 
        while (io_req_size >= PAGE_SIZE && uio->uio_offset < newEOF && retval == 0) {
+               int     throttle_type;
+
+               if ( (throttle_type = cluster_hard_throttle_on(vp, 1)) ) {
+                       /*
+                        * we're in the throttle window, at the very least
+                        * we want to limit the size of the I/O we're about
+                        * to issue
+                        */
+                       if ( (flags & IO_RETURN_ON_THROTTLE) && throttle_type == 2) {
+                               /*
+                                * we're in the throttle window and at least 1 I/O
+                                * has already been issued by a throttleable thread
+                                * in this window, so return with EAGAIN to indicate
+                                * to the FS issuing the cluster_write call that it
+                                * should now throttle after dropping any locks
+                                */
+                               throttle_info_update_by_mount(vp->v_mount);
+
+                               io_throttled = TRUE;
+                               goto wait_for_dwrites;
+                       }
+                       max_vector_size = THROTTLE_MAX_IOSIZE;
+                       max_io_size = THROTTLE_MAX_IOSIZE;
+               } else {
+                       max_vector_size = MAX_VECTOR_UPL_SIZE;
+                       max_io_size = max_upl_size;
+               }
 
                if (first_IO) {
                        cluster_syncup(vp, newEOF, callback, callback_arg);
@@ -2137,8 +2248,8 @@ next_dwrite:
                io_size  = io_req_size & ~PAGE_MASK;
                iov_base = uio_curriovbase(uio);
 
-               if (io_size > max_upl_size)
-                       io_size = max_upl_size;
+               if (io_size > max_io_size)
+                       io_size = max_io_size;
 
                if(useVectorUPL && (iov_base & PAGE_MASK)) {
                        /*
@@ -2304,7 +2415,7 @@ next_dwrite:
                        vector_upl_iosize += io_size;
                        vector_upl_size += upl_size;
 
-                       if(issueVectorUPL || vector_upl_index ==  MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= MAX_VECTOR_UPL_SIZE) {
+                       if(issueVectorUPL || vector_upl_index ==  MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= max_vector_size) {
                                retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
                                reset_vector_run_state();
                        }
@@ -2367,6 +2478,9 @@ wait_for_dwrites:
 
        lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
 
+       if (io_throttled == TRUE && retval == 0)
+               retval = EAGAIN;
+
        if (io_req_size && retval == 0) {
                /*
                 * we couldn't handle the tail of this request in DIRECT mode
@@ -2671,7 +2785,9 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                bflag = CL_PASSIVE;
        else
                bflag = 0;
-
+       if (flags & IO_NOCACHE)
+               bflag |= CL_NOCACHE;
+       
        zero_cnt  = 0;
        zero_cnt1 = 0;
        zero_off  = 0;
@@ -3286,17 +3402,34 @@ cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (*
                flags |= IO_NOCACHE;
        if ((vp->v_flag & VRAOFF) || speculative_reads_disabled)
                flags |= IO_RAOFF;
+       
+       /* 
+        * If we're doing an encrypted IO, then first check to see
+        * if the IO requested was page aligned.  If not, then bail 
+        * out immediately.
+        */
+       if (flags & IO_ENCRYPTED) {             
+               if (read_length & PAGE_MASK) {
+                       retval = EINVAL;
+                       return retval;
+               }
+       }
 
-        /*
+       /*
         * do a read through the cache if one of the following is true....
         *   NOCACHE is not true
         *   the uio request doesn't target USERSPACE
+        * Alternatively, if IO_ENCRYPTED is set, then we want to bypass the cache as well.
+        * Reading encrypted data from a CP filesystem should never result in the data touching
+        * the UBC.
+        *
         * otherwise, find out if we want the direct or contig variant for
         * the first vector in the uio request
         */
-       if ( (flags & IO_NOCACHE) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg) )
-               retval = cluster_io_type(uio, &read_type, &read_length, 0);
-
+       if (((flags & IO_NOCACHE) || (flags & IO_ENCRYPTED)) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) {
+               retval = cluster_io_type(uio, &read_type, &read_length, 0);
+       }
+       
        while ((cur_resid = uio_resid(uio)) && uio->uio_offset < filesize && retval == 0) {
 
                switch (read_type) {
@@ -3380,27 +3513,19 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
        struct cl_extent        extent;
        int              bflag;
        int              take_reference = 1;
-#if CONFIG_EMBEDDED
-       struct uthread  *ut;
-#endif /* CONFIG_EMBEDDED */
        int              policy = IOPOL_DEFAULT;
        boolean_t        iolock_inited = FALSE;
 
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
                     (int)uio->uio_offset, io_req_size, (int)filesize, flags, 0);
+       
+       if (flags & IO_ENCRYPTED) {
+               panic ("encrypted blocks will hit UBC!");
+       }
                         
-#if !CONFIG_EMBEDDED
        policy = proc_get_task_selfdiskacc();
-#else /* !CONFIG_EMBEDDED */
-       policy = current_proc()->p_iopol_disk;
 
-       ut = get_bsdthread_info(current_thread());
-
-       if (ut->uu_iopol_disk != IOPOL_DEFAULT)
-               policy = ut->uu_iopol_disk;
-#endif /* !CONFIG_EMBEDDED */
-
-       if (policy == IOPOL_THROTTLE || (flags & IO_NOCACHE))
+       if (policy == IOPOL_THROTTLE || policy == IOPOL_UTILITY || (flags & IO_NOCACHE))
                take_reference = 0;
 
        if (flags & IO_PASSIVE)
@@ -3408,6 +3533,9 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
        else
                bflag = 0;
 
+       if (flags & IO_NOCACHE)
+               bflag |= CL_NOCACHE;
+
        max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
        max_prefetch = MAX_PREFETCH(vp, max_io_size, (vp->v_mount->mnt_kern_flag & MNTK_SSD));
        max_rd_size = max_prefetch;
@@ -3422,13 +3550,15 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                rap = NULL;
        } else {
                if (cluster_hard_throttle_on(vp, 1)) {
+                       /*
+                        * we're in the throttle window, at the very least
+                        * we want to limit the size of the I/O we're about
+                        * to issue
+                        */
                        rd_ahead_enabled = 0;
                        prefetch_enabled = 0;
 
-                       max_rd_size = HARD_THROTTLE_MAXSIZE;
-               } else if (policy == IOPOL_THROTTLE) {
-                       rd_ahead_enabled = 0;
-                       prefetch_enabled = 0;
+                       max_rd_size = THROTTLE_MAX_IOSIZE;
                }
                if ((rap = cluster_get_rap(vp)) == NULL)
                        rd_ahead_enabled = 0;
@@ -3547,6 +3677,30 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                         */
                        max_size = filesize - uio->uio_offset;
                }
+
+               iostate.io_completed = 0;
+               iostate.io_issued = 0;
+               iostate.io_error = 0;
+               iostate.io_wanted = 0;
+
+               if ( (flags & IO_RETURN_ON_THROTTLE) ) {
+                       if (cluster_hard_throttle_on(vp, 0) == 2) {
+                               if ( !cluster_io_present_in_BC(vp, uio->uio_offset)) {
+                                       /*
+                                        * we're in the throttle window and at least 1 I/O
+                                        * has already been issued by a throttleable thread
+                                        * in this window, so return with EAGAIN to indicate
+                                        * to the FS issuing the cluster_read call that it
+                                        * should now throttle after dropping any locks
+                                        */
+                                       throttle_info_update_by_mount(vp->v_mount);
+
+                                       retval = EAGAIN;
+                                       break;
+                               }
+                       }
+               }
+
                /*
                 * compute the size of the upl needed to encompass
                 * the requested read... limit each call to cluster_io
@@ -3608,10 +3762,6 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
                        if (upl_valid_page(pl, last_pg))
                                break;
                }
-               iostate.io_completed = 0;
-               iostate.io_issued = 0;
-               iostate.io_error = 0;
-               iostate.io_wanted = 0;
 
                if (start_pg < last_pg) {               
                        /*
@@ -3804,16 +3954,20 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 
                if (io_req_size) {
                        if (cluster_hard_throttle_on(vp, 1)) {
+                               /*
+                                * we're in the throttle window, at the very least
+                                * we want to limit the size of the I/O we're about
+                                * to issue
+                                */
                                rd_ahead_enabled = 0;
                                prefetch_enabled = 0;
-
-                               max_rd_size = HARD_THROTTLE_MAXSIZE;
+                               max_rd_size = THROTTLE_MAX_IOSIZE;
                        } else {
-                               if (max_rd_size == HARD_THROTTLE_MAXSIZE) {
+                               if (max_rd_size == THROTTLE_MAX_IOSIZE) {
                                        /*
                                         * coming out of throttled state
                                         */
-                                       if (policy != IOPOL_THROTTLE) {
+                                       if (policy != IOPOL_THROTTLE && policy != IOPOL_UTILITY) {
                                                if (rap != NULL)
                                                        rd_ahead_enabled = 1;
                                                prefetch_enabled = 1;
@@ -3884,7 +4038,9 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        u_int32_t        max_upl_size;
        u_int32_t        max_rd_size;
        u_int32_t        max_rd_ahead;
+       u_int32_t        max_vector_size;
        boolean_t        strict_uncached_IO = FALSE;
+       boolean_t        io_throttled = FALSE;
 
        u_int32_t        vector_upl_iosize = 0;
        int              issueVectorUPL = 0,useVectorUPL = (uio->uio_iovcnt > 1);
@@ -3905,6 +4061,14 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        if (flags & IO_PASSIVE)
                io_flag |= CL_PASSIVE;
 
+       if (flags & IO_ENCRYPTED) {
+               io_flag |= CL_RAW_ENCRYPTED;
+       }
+
+       if (flags & IO_NOCACHE) {
+               io_flag |= CL_NOCACHE;
+       }
+
        iostate.io_completed = 0;
        iostate.io_issued = 0;
        iostate.io_error = 0;
@@ -3960,7 +4124,17 @@ next_dread:
                 * I/O that ends on a page boundary in cluster_io
                 */
                misaligned = 1;
-        }
+    }
+
+       /* 
+        * The user must request IO in aligned chunks.  If the 
+        * offset into the file is bad, or the userland pointer 
+        * is non-aligned, then we cannot service the encrypted IO request.
+        */
+       if ((flags & IO_ENCRYPTED) && (misaligned)) {
+               retval = EINVAL;
+       }
+
        /*
         * When we get to this point, we know...
         *  -- the offset into the file is on a devblocksize boundary
@@ -3970,22 +4144,32 @@ next_dread:
                u_int32_t io_start;
 
                if (cluster_hard_throttle_on(vp, 1)) {
-                       max_rd_size  = HARD_THROTTLE_MAXSIZE;
-                       max_rd_ahead = HARD_THROTTLE_MAXSIZE - 1;
+                       /*
+                        * we're in the throttle window, at the very least
+                        * we want to limit the size of the I/O we're about
+                        * to issue
+                        */
+                       max_rd_size  = THROTTLE_MAX_IOSIZE;
+                       max_rd_ahead = THROTTLE_MAX_IOSIZE - 1;
+                       max_vector_size = THROTTLE_MAX_IOSIZE;
                } else {
                        max_rd_size  = max_upl_size;
                        max_rd_ahead = max_rd_size * IO_SCALE(vp, 2);
+                       max_vector_size = MAX_VECTOR_UPL_SIZE;
                }
                io_start = io_size = io_req_size;
 
                /*
                 * First look for pages already in the cache
-                * and move them to user space.
+                * and move them to user space.  But only do this
+                * check if we are not retrieving encrypted data directly
+                * from the filesystem;  those blocks should never
+                * be in the UBC. 
                 *
                 * cluster_copy_ubc_data returns the resid
                 * in io_size
                 */
-               if (strict_uncached_IO == FALSE) {
+               if ((strict_uncached_IO == FALSE) && ((flags & IO_ENCRYPTED) == 0)) {
                        retval = cluster_copy_ubc_data_internal(vp, uio, (int *)&io_size, 0, 0);
                }
                /*
@@ -4018,9 +4202,14 @@ next_dread:
                }
 
                /*
-                * check to see if we are finished with this request...
+                * check to see if we are finished with this request.
+                *
+                * If we satisfied this IO already, then io_req_size will be 0.
+                * Otherwise, see if the IO was mis-aligned and needs to go through 
+                * the UBC to deal with the 'tail'.
+                *
                 */
-               if (io_req_size == 0 || misaligned) {
+               if (io_req_size == 0 || (misaligned)) {
                        /*
                         * see if there's another uio vector to
                         * process that's of type IO_DIRECT
@@ -4046,13 +4235,31 @@ next_dread:
                 * (which overlaps the end of the direct read) in order to 
                 * get at the overhang bytes
                 */
-               if (io_size & (devblocksize - 1)) {
-                       /*
-                        * request does NOT end on a device block boundary
-                        * so clip it back to a PAGE_SIZE boundary
-                        */
-                       io_size &= ~PAGE_MASK;
-                       io_min = PAGE_SIZE;
+               if (io_size & (devblocksize - 1)) {                     
+                       if (flags & IO_ENCRYPTED) {
+                               /* 
+                                * Normally, we'd round down to the previous page boundary to 
+                                * let the UBC manage the zero-filling of the file past the EOF.
+                                * But if we're doing encrypted IO, we can't let any of
+                                * the data hit the UBC.  This means we have to do the full
+                                * IO to the upper block boundary of the device block that
+                                * contains the EOF. The user will be responsible for not
+                                * interpreting data PAST the EOF in its buffer.
+                                *
+                                * So just bump the IO back up to a multiple of devblocksize
+                                */
+                               io_size = ((io_size + devblocksize) & ~(devblocksize - 1));
+                               io_min = io_size;                                       
+                       }
+                       else {
+                               /* 
+                                * Clip the request to the previous page size boundary
+                                * since request does NOT end on a device block boundary
+                                */
+                               io_size &= ~PAGE_MASK;
+                               io_min = PAGE_SIZE;
+                       }
+                       
                }
                if (retval || io_size < io_min) {
                        /*
@@ -4065,10 +4272,14 @@ next_dread:
                        goto wait_for_dreads;
                }
 
-               if (strict_uncached_IO == FALSE) {
+               /* 
+                * Don't re-check the UBC data if we are looking for uncached IO
+                * or asking for encrypted blocks.
+                */
+               if ((strict_uncached_IO == FALSE) && ((flags & IO_ENCRYPTED) == 0)) {
 
                        if ((xsize = io_size) > max_rd_size)
-                               xsize = max_rd_size;
+                               xsize = max_rd_size;
 
                        io_size = 0;
 
@@ -4083,6 +4294,25 @@ next_dread:
                                continue;
                        }
                }
+               if ( (flags & IO_RETURN_ON_THROTTLE) ) {
+                       if (cluster_hard_throttle_on(vp, 0) == 2) {
+                               if ( !cluster_io_present_in_BC(vp, uio->uio_offset)) {
+                                       /*
+                                        * we're in the throttle window and at least 1 I/O
+                                        * has already been issued by a throttleable thread
+                                        * in this window, so return with EAGAIN to indicate
+                                        * to the FS issuing the cluster_read call that it
+                                        * should now throttle after dropping any locks
+                                        */
+                                       throttle_info_update_by_mount(vp->v_mount);
+
+                                       io_throttled = TRUE;
+                                       goto wait_for_dreads;
+                               }
+                       }
+               }
+               if (io_size > max_rd_size)
+                       io_size = max_rd_size;
 
                iov_base = uio_curriovbase(uio);
 
@@ -4216,7 +4446,7 @@ next_dread:
                        vector_upl_size += upl_size;
                        vector_upl_iosize += io_size;
                        
-                       if(issueVectorUPL || vector_upl_index ==  MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= MAX_VECTOR_UPL_SIZE) {
+                       if(issueVectorUPL || vector_upl_index ==  MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= max_vector_size) {
                                retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize,  io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
                                reset_vector_run_state();       
                        }
@@ -4224,9 +4454,24 @@ next_dread:
                /*
                 * update the uio structure
                 */
-               uio_update(uio, (user_size_t)io_size);
-
-               io_req_size -= io_size;
+               if ((flags & IO_ENCRYPTED) && (max_io_size < io_size)) {
+                       uio_update(uio, (user_size_t)max_io_size);
+               }
+               else {
+                       uio_update(uio, (user_size_t)io_size);
+               }
+               /*
+                * Under normal circumstances, the io_size should not be
+                * bigger than the io_req_size, but we may have had to round up
+                * to the end of the page in the encrypted IO case.  In that case only,
+                * ensure that we only decrement io_req_size to 0. 
+                */
+               if ((flags & IO_ENCRYPTED) && (io_size > io_req_size)) {
+                       io_req_size = 0;
+               }
+               else {
+                       io_req_size -= io_size;
+               }
 
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
                             upl, (int)uio->uio_offset, io_req_size, retval, 0);
@@ -4264,6 +4509,9 @@ wait_for_dreads:
 
        lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
 
+       if (io_throttled == TRUE && retval == 0)
+               retval = EAGAIN;
+
        if (io_req_size && retval == 0) {
                /*
                 * we couldn't handle the tail of this request in DIRECT mode
@@ -4311,7 +4559,10 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
                bflag = CL_PASSIVE;
        else
                bflag = 0;
-
+       
+       if (flags & IO_NOCACHE)
+               bflag |= CL_NOCACHE;
+       
        /*
         * When we enter this routine, we know
         *  -- the read_length will not exceed the current iov_len
@@ -4595,6 +4846,16 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c
 
        max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
 
+#if CONFIG_EMBEDDED
+       if (max_io_size > speculative_prefetch_max_iosize)
+               max_io_size = speculative_prefetch_max_iosize;
+#else
+       if ((vp->v_mount->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd) {
+               if (max_io_size > speculative_prefetch_max_iosize)
+                       max_io_size = speculative_prefetch_max_iosize;
+       }
+#endif
+
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
                     (int)f_offset, resid, (int)filesize, 0, 0);
 
@@ -5222,6 +5483,9 @@ cluster_push_now(vnode_t vp, struct cl_extent *cl, off_t EOF, int flags, int (*c
                if (flags & IO_CLOSE)
                        io_flags |= CL_CLOSE;
 
+               if (flags & IO_NOCACHE)
+                       io_flags |= CL_NOCACHE;
+
                retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size,
                                    io_flags, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
 
@@ -5348,6 +5612,9 @@ cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, u_int32_t
        else
                bflag = 0;
 
+       if (flags & IO_NOCACHE)
+               bflag |= CL_NOCACHE;
+
        upl_flags = UPL_SET_LITE;
 
        if ( !(flags & CL_READ) ) {
index a4a962b6664df1bb6275dff4a0fb660dc642190a..a64040dd9259cf1670130dd1101f6d8f22db9541 100644 (file)
 #include <sys/mount_internal.h>
 #include <sys/vnode_internal.h>
 
-#ifndef __LP64__ 
-#define VFS_THREAD_SAFE_FLAG VFC_VFSTHREADSAFE /* This is only defined for 32-bit */
+#if CONFIG_VFS_FUNNEL
+#define VFS_THREAD_SAFE_FLAG VFC_VFSTHREADSAFE /* Only defined under CONFIG_VFS_FUNNEL */
 #else 
 #define VFS_THREAD_SAFE_FLAG 0
-#endif /* __LP64__  */
+#endif /* CONFIG_VFS_FUNNEL */
 
 
 /*
index b92b69a28415757af8abd9cb14c282606fa031ab..179a264d4d9c8ac5868190ce14a90d447bddb295 100644 (file)
@@ -61,7 +61,7 @@
 #include <security/audit/audit.h>
 #include <bsm/audit_kevents.h>
 
-
+#include <pexpert/pexpert.h>
 
 typedef struct kfs_event {
     LIST_ENTRY(kfs_event) kevent_list;
@@ -124,8 +124,8 @@ typedef struct fs_event_watcher {
 #define MAX_WATCHERS  8
 static fs_event_watcher *watcher_table[MAX_WATCHERS];
 
-
-#define MAX_KFS_EVENTS   4096
+#define DEFAULT_MAX_KFS_EVENTS   4096
+static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
 
 // we allocate kfs_event structures out of this zone
 static zone_t     event_zone;
@@ -190,9 +190,11 @@ fsevents_internal_init(void)
 
     lck_rw_init(&event_handling_lock, fsevent_rw_group, fsevent_lock_attr);
 
+    PE_get_default("kern.maxkfsevents", &max_kfs_events, sizeof(max_kfs_events));
+
     event_zone = zinit(sizeof(kfs_event),
-                      MAX_KFS_EVENTS * sizeof(kfs_event),
-                      MAX_KFS_EVENTS * sizeof(kfs_event),
+                      max_kfs_events * sizeof(kfs_event),
+                      max_kfs_events * sizeof(kfs_event),
                       "fs-event-buf");
     if (event_zone == NULL) {
        printf("fsevents: failed to initialize the event zone.\n");
@@ -204,7 +206,7 @@ fsevents_internal_init(void)
     zone_change(event_zone, Z_COLLECT, FALSE);
     zone_change(event_zone, Z_CALLERACCT, FALSE);
 
-    if (zfill(event_zone, MAX_KFS_EVENTS) < MAX_KFS_EVENTS) {
+    if (zfill(event_zone, max_kfs_events) < max_kfs_events) {
        printf("fsevents: failed to pre-fill the event zone.\n");       
     }
     
@@ -999,13 +1001,7 @@ add_fsevent(int type, vfs_context_t ctx, ...)
                    
                    pathbuff[0] = '\0';
                    if ((ret = vn_getpath(vp, pathbuff, &pathbuff_len)) != 0 || pathbuff[0] == '\0') {
-                       struct vnode *orig_vp = vp;
                        
-                       if (ret != ENOSPC) {
-                               printf("add_fsevent: unable to get path for vp %p (%s; ret %d; type %d)\n",
-                                      vp, vp->v_name ? vp->v_name : "-UNKNOWN-FILE", ret, type);
-                       }
-
                        cur->flags |= KFSE_CONTAINS_DROPPED_EVENTS;
                        
                        do {
@@ -1027,7 +1023,6 @@ add_fsevent(int type, vfs_context_t ctx, ...)
                        } while (ret == ENOSPC);
                                
                        if (ret != 0 || vp == NULL) {
-                               printf("add_fsevent: unabled to get a path for vp %p.  dropping the event.\n", orig_vp);
                                error = ENOENT;
                                if (need_event_unlock == 0) {
                                        // then we only grabbed it shared 
@@ -1277,13 +1272,13 @@ release_event_ref(kfs_event *kfse)
 
 
 static int
-add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out)
+add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out, void *fseh)
 {
     int               i;
     fs_event_watcher *watcher;
 
-    if (eventq_size <= 0 || eventq_size > 100*MAX_KFS_EVENTS) {
-       eventq_size = MAX_KFS_EVENTS;
+    if (eventq_size <= 0 || eventq_size > 100*max_kfs_events) {
+       eventq_size = max_kfs_events;
     }
 
     // Note: the event_queue follows the fs_event_watcher struct
@@ -1308,7 +1303,7 @@ add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_even
     watcher->blockers     = 0;
     watcher->num_readers  = 0;
     watcher->max_event_id = 0;
-    watcher->fseh         = NULL;
+    watcher->fseh         = fseh;
 
     watcher->num_dropped  = 0;      // XXXdbg - debugging
 
@@ -1922,13 +1917,14 @@ typedef struct ext_fsevent_dev_filter_args {
 } ext_fsevent_dev_filter_args;
 #pragma pack(pop)
 
+#define NEW_FSEVENTS_DEVICE_FILTER      _IOW('s', 100, ext_fsevent_dev_filter_args)
+
 typedef struct old_fsevent_dev_filter_args {
     uint32_t  num_devices;
     int32_t   devices;
 } old_fsevent_dev_filter_args;
 
 #define        OLD_FSEVENTS_DEVICE_FILTER      _IOW('s', 100, old_fsevent_dev_filter_args)
-#define        NEW_FSEVENTS_DEVICE_FILTER      _IOW('s', 100, ext_fsevent_dev_filter_args)
 
 #if __LP64__
 /* need this in spite of the padding due to alignment of devices */
@@ -1948,7 +1944,8 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx
 
     if (proc_is64bit(vfs_context_proc(ctx))) {
        devfilt_args = (ext_fsevent_dev_filter_args *)data;
-    } else if (cmd == OLD_FSEVENTS_DEVICE_FILTER) {
+    }
+    else if (cmd == OLD_FSEVENTS_DEVICE_FILTER) {
        old_fsevent_dev_filter_args *udev_filt_args = (old_fsevent_dev_filter_args *)data;
        
        devfilt_args = &_devfilt_args;
@@ -1956,7 +1953,8 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx
 
        devfilt_args->num_devices = udev_filt_args->num_devices;
        devfilt_args->devices     = CAST_USER_ADDR_T(udev_filt_args->devices);
-    } else {
+    }
+    else {
 #if __LP64__
        fsevent_dev_filter_args32 *udev_filt_args = (fsevent_dev_filter_args32 *)data;
 #else
@@ -2530,14 +2528,14 @@ fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, s
            error = add_watcher(event_list,
                                fse_clone_args->num_events,
                                fse_clone_args->event_queue_depth,
-                               &fseh->watcher);
+                               &fseh->watcher,
+                               fseh);
            if (error) {
                FREE(event_list, M_TEMP);
                FREE(fseh, M_TEMP);
                return error;
            }
 
-           // connect up the watcher with this fsevent_handle
            fseh->watcher->fseh = fseh;
 
            error = falloc(p, &f, &fd, vfs_context_current());
index 4999f814bbba938baf0984b8835e3db93f09598e..3864ff34c2b805092352f92a433461b707827ffe 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -115,10 +115,12 @@ SYSCTL_INT(_vfs_generic_jnl_kdebug, OID_AUTO, trim, CTLFLAG_RW|CTLFLAG_LOCKED, &
 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
 #endif
 
+
 #ifndef CONFIG_HFS_TRIM
 #define CONFIG_HFS_TRIM 0
 #endif
 
+
 #if JOURNALING
 
 //
@@ -136,8 +138,7 @@ enum {
 unsigned int jnl_trim_flush_limit = JOURNAL_FLUSH_TRIM_EXTENTS;
 SYSCTL_UINT (_kern, OID_AUTO, jnl_trim_flush, CTLFLAG_RW, &jnl_trim_flush_limit, 0, "number of trimmed extents to cause a journal flush");
 
-
-/* XXX next prototytype should be from libsa/stdlib.h> but conflicts libkern */
+/* XXX next prototype should be from libsa/stdlib.h> but conflicts libkern */
 __private_extern__ void qsort(
        void * array,
        size_t nmembers,
@@ -1099,6 +1100,7 @@ replay_journal(journal *jnl)
        struct bucket   *co_buf;
        int             num_buckets = STARTING_BUCKETS, num_full, check_past_jnl_end = 1, in_uncharted_territory=0;
        uint32_t        last_sequence_num = 0;
+       int             replay_retry_count = 0;
     
        // wrap the start ptr if it points to the very end of the journal
        if (jnl->jhdr->start == jnl->jhdr->size) {
@@ -1336,11 +1338,25 @@ restart_replay:
                
 bad_txn_handling:
                if (bad_blocks) {
+                       /* Journal replay got error before it found any valid 
+                        *  transations, abort replay */
                        if (txn_start_offset == 0) {
                                printf("jnl: %s: no known good txn start offset! aborting journal replay.\n", jnl->jdev_name);
                                goto bad_replay;
                        }
 
+                       /* Repeated error during journal replay, abort replay */
+                       if (replay_retry_count == 3) {
+                               printf("jnl: %s: repeated errors replaying journal! aborting journal replay.\n", jnl->jdev_name);
+                               goto bad_replay;
+                       }
+                       replay_retry_count++;
+
+                       /* There was an error replaying the journal (possibly 
+                        * EIO/ENXIO from the device).  So retry replaying all 
+                        * the good transactions that we found before getting 
+                        * the error.  
+                        */
                        jnl->jhdr->start = orig_jnl_start;
                        jnl->jhdr->end = txn_start_offset;
                        check_past_jnl_end = 0;
@@ -1763,7 +1779,8 @@ journal_create(struct vnode *jvp,
        lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
        lck_mtx_init(&jnl->flock, jnl_mutex_group, jnl_lock_attr);
        lck_rw_init(&jnl->trim_lock, jnl_mutex_group, jnl_lock_attr);
-       
+
+
        jnl->flushing = FALSE;
        jnl->asyncIO = FALSE;
        jnl->flush_aborted = FALSE;
@@ -1911,26 +1928,24 @@ journal_open(struct vnode *jvp,
                jnl->jhdr->magic = JOURNAL_HEADER_MAGIC;
        }
 
-    if (phys_blksz != (size_t)jnl->jhdr->jhdr_size && jnl->jhdr->jhdr_size != 0) {
-       /*
-        * The volume has probably been resized (such that we had to adjust the
-        * logical sector size), or copied to media with a different logical
-        * sector size.
-        *
-        * Temporarily change the device's logical block size to match the
-        * journal's header size.  This will allow us to replay the journal
-        * safely.  If the replay succeeds, we will update the journal's header
-        * size (later in this function).
-        */
-
-       orig_blksz = phys_blksz;
-       phys_blksz = jnl->jhdr->jhdr_size;
-       VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, &context);
+       if (phys_blksz != (size_t)jnl->jhdr->jhdr_size && jnl->jhdr->jhdr_size != 0) {
+               /*
+                * The volume has probably been resized (such that we had to adjust the
+                * logical sector size), or copied to media with a different logical
+                * sector size.
+                * 
+                * Temporarily change the device's logical block size to match the
+                * journal's header size.  This will allow us to replay the journal
+                * safely.  If the replay succeeds, we will update the journal's header
+                * size (later in this function).
+                */
+               orig_blksz = phys_blksz;
+               phys_blksz = jnl->jhdr->jhdr_size;
+               VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, &context);
+               printf("jnl: %s: open: temporarily switched block size from %u to %u\n",
+                          jdev_name, orig_blksz, phys_blksz);
+       }
 
-       printf("jnl: %s: open: temporarily switched block size from %u to %u\n",
-              jdev_name, orig_blksz, phys_blksz);
-    }
-    
        if (   jnl->jhdr->start <= 0
               || jnl->jhdr->start > jnl->jhdr->size
               || jnl->jhdr->start > 1024*1024*1024) {
@@ -1980,68 +1995,71 @@ journal_open(struct vnode *jvp,
                printf("jnl: %s: journal_open: Error replaying the journal!\n", jdev_name);
                goto bad_journal;
        }
-
-    /*
-     * When we get here, we know that the journal is empty (jnl->jhdr->start ==
-     * jnl->jhdr->end).  If the device's logical block size was different from
-     * the journal's header size, then we can now restore the device's logical
-     * block size and update the journal's header size to match.
-     *
-     * Note that we also adjust the journal's start and end so that they will
-     * be aligned on the new block size.  We pick a new sequence number to
-     * avoid any problems if a replay found previous transactions using the old
-     * journal header size.  (See the comments in journal_create(), above.)
-     */
-    if (orig_blksz != 0) {
-       VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
-       phys_blksz = orig_blksz;
-       orig_blksz = 0;
-       printf("jnl: %s: open: restored block size to %u\n", jdev_name, phys_blksz);
        
-       jnl->jhdr->jhdr_size = phys_blksz;
-       jnl->jhdr->start = phys_blksz;
-       jnl->jhdr->end = phys_blksz;
-       jnl->jhdr->sequence_num = (jnl->jhdr->sequence_num +
-                                  (journal_size / phys_blksz) +
-                                  (random() % 16384)) & 0x00ffffff;
+       /*
+        * When we get here, we know that the journal is empty (jnl->jhdr->start ==
+        * jnl->jhdr->end).  If the device's logical block size was different from
+        * the journal's header size, then we can now restore the device's logical
+        * block size and update the journal's header size to match.
+        *
+        * Note that we also adjust the journal's start and end so that they will
+        * be aligned on the new block size.  We pick a new sequence number to
+        * avoid any problems if a replay found previous transactions using the old
+        * journal header size.  (See the comments in journal_create(), above.)
+        */
        
-       if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num)) {
-               printf("jnl: %s: open: failed to update journal header size\n", jdev_name);
+       if (orig_blksz != 0) {
+               VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
+               phys_blksz = orig_blksz;
+               
+               orig_blksz = 0;
+               
+               jnl->jhdr->jhdr_size = phys_blksz;
+               jnl->jhdr->start = phys_blksz;
+               jnl->jhdr->end = phys_blksz;
+               jnl->jhdr->sequence_num = (jnl->jhdr->sequence_num +
+                                                                  (journal_size / phys_blksz) +
+                                                                  (random() % 16384)) & 0x00ffffff;
+               
+               if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num)) {
+                       printf("jnl: %s: open: failed to update journal header size\n", jdev_name);
+                       goto bad_journal;
+               }
+       }
+
+       // make sure this is in sync!
+       jnl->active_start = jnl->jhdr->start;
+       jnl->sequence_num = jnl->jhdr->sequence_num;
+
+       // set this now, after we've replayed the journal
+       size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
+
+       // TODO: Does this need to change if the device's logical block size changed?
+       if ((off_t)(jnl->jhdr->blhdr_size/sizeof(block_info)-1) > (jnl->jhdr->size/jnl->jhdr->jhdr_size)) {
+               printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size,
+                      jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size);
                goto bad_journal;
        }
-    }
-    
-    // make sure this is in sync!
-    jnl->active_start = jnl->jhdr->start;
-    jnl->sequence_num = jnl->jhdr->sequence_num;
-
-    // set this now, after we've replayed the journal
-    size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
-
-    // TODO: Does this need to change if the device's logical block size changed?
-    if ((off_t)(jnl->jhdr->blhdr_size/sizeof(block_info)-1) > (jnl->jhdr->size/jnl->jhdr->jhdr_size)) {
-       printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size,
-          jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size);
-       goto bad_journal;
-    }
-
-    lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
-
-    return jnl;
-
-  bad_journal:
-    if (orig_blksz != 0) {
-       phys_blksz = orig_blksz;
-       VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
-       printf("jnl: %s: open: restored block size to %u after error\n", jdev_name, orig_blksz);
-    }
-    kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz);
-  bad_kmem_alloc:
-    if (jdev_name) {
-       vfs_removename(jdev_name);
-    }
-    FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL);
-    return NULL;    
+
+       lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
+       lck_mtx_init(&jnl->flock, jnl_mutex_group, jnl_lock_attr);
+       lck_rw_init(&jnl->trim_lock, jnl_mutex_group, jnl_lock_attr);
+
+       return jnl;
+
+bad_journal:
+       if (orig_blksz != 0) {
+               phys_blksz = orig_blksz;
+               VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
+               printf("jnl: %s: open: restored block size after error\n", jdev_name);
+       }
+       kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz);
+bad_kmem_alloc:
+       if (jdev_name) {
+               vfs_removename(jdev_name);
+       }
+       FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL);
+       return NULL;    
 }
 
 
@@ -2351,7 +2369,7 @@ check_free_space(journal *jnl, int desired_size, boolean_t *delayed_header_write
 
                        lcl_counter = 0;
                        while (jnl->old_start[i] & 0x8000000000000000LL) {
-                               if (lcl_counter++ > 1000) {
+                               if (lcl_counter++ > 10000) {
                                        panic("jnl: check_free_space: tr starting @ 0x%llx not flushing (jnl %p).\n",
                                              jnl->old_start[i], jnl);
                                }
@@ -2922,7 +2940,6 @@ journal_kill_block(journal *jnl, struct buf *bp)
        return 0;
 }
 
-
 /*
 ;________________________________________________________________________________
 ;
@@ -3016,24 +3033,23 @@ trim_realloc(struct jnl_trim_list *trim)
        return 0;
 }
 
-
 /*
-;________________________________________________________________________________
-;
-; Routine:             trim_search_extent
-;
-; Function:            Search the given extent list to see if any of its extents
-                             overlap the given extent.
-;
-; Input Arguments:
-     trim            - The trim list to be searched.
-     offset          - The first byte of the range to be searched for.
-     length          - The number of bytes of the extent being searched for.
-;
-; Output:
-     (result)        - TRUE if one or more extents overlap, FALSE otherwise.
-;________________________________________________________________________________
-*/
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            trim_search_extent
+ ;
+ ; Function:           Search the given extent list to see if any of its extents
+ ;                             overlap the given extent.
+ ;
+ ; Input Arguments:
+ ;     trim            - The trim list to be searched.
+ ;     offset          - The first byte of the range to be searched for.
+ ;     length          - The number of bytes of the extent being searched for.
+ ;
+ ; Output:
+ ;     (result)        - TRUE if one or more extents overlap, FALSE otherwise.
+ ;________________________________________________________________________________
+ */
 static int
 trim_search_extent(struct jnl_trim_list *trim, uint64_t offset, uint64_t length)
 {
@@ -3092,7 +3108,7 @@ journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length)
        dk_extent_t *extent;
        uint32_t insert_index;
        uint32_t replace_count;
-       
+               
        CHECK_JOURNAL(jnl);
 
        /* TODO: Is it OK to manipulate the trim list even if JOURNAL_INVALID is set?  I think so... */
@@ -3112,9 +3128,9 @@ journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length)
        }
 
        free_old_stuff(jnl);
-       
+               
        end = offset + length;
-       
+               
        /*
         * Find the range of existing extents that can be combined with the
         * input extent.  We start by counting the number of extents that end
@@ -3132,7 +3148,7 @@ journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length)
                ++replace_count;
                ++extent;
        }
-       
+               
        /*
         * If none of the existing extents can be combined with the input extent,
         * then just insert it in the list (before item number insert_index).
@@ -3331,24 +3347,23 @@ trim_remove_extent(struct jnl_trim_list *trim, uint64_t offset, uint64_t length)
        return 0;
 }
 
-
 /*
-;________________________________________________________________________________
-;
-; Routine:             journal_trim_remove_extent
-;
-; Function:            Make note of a range of bytes, some of which may have previously
-                             been passed to journal_trim_add_extent, is now in use on the
-                             volume.  The given bytes will be not be trimmed as part of
-                             this transaction, or a pending trim of a transaction being
-                             asynchronously flushed.
-;
-; Input Arguments:
-     jnl                     - The journal for the volume containing the byte range.
-     offset          - The first byte of the range to be trimmed.
-     length          - The number of bytes of the extent being trimmed.
-;________________________________________________________________________________
-*/
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            journal_trim_remove_extent
+ ;
+ ; Function:           Make note of a range of bytes, some of which may have previously
+ ;                             been passed to journal_trim_add_extent, is now in use on the
+ ;                             volume.  The given bytes will be not be trimmed as part of
+ ;                             this transaction, or a pending trim of a transaction being
+ ;                             asynchronously flushed.
+ ;
+ ; Input Arguments:
+ ;     jnl                     - The journal for the volume containing the byte range.
+ ;     offset          - The first byte of the range to be trimmed.
+ ;     length          - The number of bytes of the extent being trimmed.
+ ;________________________________________________________________________________
+ */
 __private_extern__ int
 journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length)
 {
@@ -3374,7 +3389,7 @@ journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length)
        }
 
        free_old_stuff(jnl);
-       
+               
        error = trim_remove_extent(&tr->trim, offset, length);
        if (error == 0) {
                int found = FALSE;
@@ -3424,11 +3439,11 @@ journal_trim_flush(journal *jnl, transaction *tr)
        if (jnl_kdebug)
                KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_START, jnl, tr, 0, tr->trim.extent_count, 0);
 
+       lck_rw_lock_shared(&jnl->trim_lock);
        if (tr->trim.extent_count > 0) {
                dk_unmap_t unmap;
                                
                bzero(&unmap, sizeof(unmap));
-               lck_rw_lock_shared(&jnl->trim_lock);
                if (CONFIG_HFS_TRIM && (jnl->flags & JOURNAL_USE_UNMAP)) {
                        unmap.extents = tr->trim.extents;
                        unmap.extentsCount = tr->trim.extent_count;
@@ -3439,12 +3454,12 @@ journal_trim_flush(journal *jnl, transaction *tr)
                                KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_END, errno, 0, 0, 0, 0);
                        if (errno) {
                                printf("jnl: error %d from DKIOCUNMAP (extents=%lx, count=%u); disabling trim for %s\n",
-                                               errno, (unsigned long) (unmap.extents), unmap.extentsCount,
-                                               jnl->jdev_name);
+                                          errno, (unsigned long) (unmap.extents), unmap.extentsCount,
+                                          jnl->jdev_name);
                                jnl->flags &= ~JOURNAL_USE_UNMAP;
                        }
                }
-
+               
                /*
                 * Call back into the file system to tell them that we have
                 * trimmed some extents and that they can now be reused.
@@ -3456,9 +3471,8 @@ journal_trim_flush(journal *jnl, transaction *tr)
                 */
                if (jnl->trim_callback)
                        jnl->trim_callback(jnl->trim_callback_arg, tr->trim.extent_count, tr->trim.extents);
-
-               lck_rw_unlock_shared(&jnl->trim_lock);
        }
+       lck_rw_unlock_shared(&jnl->trim_lock);
 
        /*
         * If the transaction we're flushing was the async transaction, then
@@ -3475,6 +3489,11 @@ journal_trim_flush(journal *jnl, transaction *tr)
                jnl->async_trim = NULL;
        lck_rw_unlock_exclusive(&jnl->trim_lock);
 
+       /*
+        * By the time we get here, no other thread can discover the address
+        * of "tr", so it is safe for us to manipulate tr->trim without
+        * holding any locks.
+        */
        if (tr->trim.extents) {                 
                kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
                tr->trim.allocated_count = 0;
@@ -3488,7 +3507,6 @@ journal_trim_flush(journal *jnl, transaction *tr)
        return errno;
 }
 
-
 static int
 journal_binfo_cmp(const void *a, const void *b)
 {
@@ -3607,7 +3625,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
                KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_END, jnl, tr, ret_val, 0, 0);
                goto done;
        }
-
+       
        /*
         * Store a pointer to this transaction's trim list so that
         * future transactions can find it.
@@ -3634,7 +3652,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
         * of the journal flush, 'saved_sequence_num' remains stable
         */
        jnl->saved_sequence_num = jnl->sequence_num;
-
+       
        /*
         * if we're here we're going to flush the transaction buffer to disk.
         * 'check_free_space' will not return untl there is enough free
@@ -3822,15 +3840,7 @@ done:
 static void
 finish_end_thread(transaction *tr)
 {
-#if !CONFIG_EMBEDDED
        proc_apply_thread_selfdiskacc(IOPOL_PASSIVE);
-#else /* !CONFIG_EMBEDDED */
-       struct uthread  *ut;
-
-       ut = get_bsdthread_info(current_thread());
-       ut->uu_iopol_disk = IOPOL_PASSIVE;
-#endif /* !CONFIG_EMBEDDED */
-
        finish_end_transaction(tr, NULL, NULL);
 
        thread_deallocate(current_thread());
@@ -3840,14 +3850,7 @@ finish_end_thread(transaction *tr)
 static void
 write_header_thread(journal *jnl)
 {
-#if !CONFIG_EMBEDDED
        proc_apply_thread_selfdiskacc(IOPOL_PASSIVE);
-#else /* !CONFIG_EMBEDDED */
-       struct uthread  *ut;
-
-       ut = get_bsdthread_info(current_thread());
-       ut->uu_iopol_disk = IOPOL_PASSIVE;
-#endif /* !CONFIG_EMBEDDED */
 
        if (write_journal_header(jnl, 1, jnl->saved_sequence_num))
                jnl->write_header_failed = TRUE;
@@ -4249,7 +4252,7 @@ abort_transaction(journal *jnl, transaction *tr)
                                         */
                                        vnode_rele_ext(bp_vp, 0, 1);
                                } else {
-                                       printf("jnl: %s: abort_tr: could not find block %Ld vp %p!\n",
+                                       printf("jnl: %s: abort_tr: could not find block %lld vp %p!\n",
                                               jnl->jdev_name, blhdr->binfo[i].bnum, tbp);
                                        if (bp) {
                                                buf_brelse(bp);
@@ -4276,6 +4279,7 @@ abort_transaction(journal *jnl, transaction *tr)
                jnl->async_trim = NULL;
        lck_rw_unlock_exclusive(&jnl->trim_lock);
        
+       
        if (tr->trim.extents) {
                kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
        }
@@ -4520,7 +4524,8 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu
 {
        int             ret;
        transaction     *tr;
-       
+       size_t i = 0;
+
        /*
         * Sanity check inputs, and adjust the size of the transaction buffer.
         */
@@ -4565,7 +4570,23 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu
                return ret;
        }
        wait_condition(jnl, &jnl->flushing, "end_transaction");
-       
+
+       /*
+        * At this point, we have completely flushed the contents of the current
+        * journal to disk (and have asynchronously written all of the txns to 
+        * their actual desired locations).  As a result, we can (and must) clear 
+        * out the old_start array.  If we do not, then if the last written transaction
+        * started at the beginning of the journal (starting 1 block into the 
+        * journal file) it could confuse the buffer_flushed callback. This is
+        * because we're about to reset the start/end pointers of the journal header
+        * below. 
+        */
+       lock_oldstart(jnl); 
+       for (i = 0; i < sizeof (jnl->old_start) / sizeof(jnl->old_start[0]); i++) { 
+               jnl->old_start[i] = 0; 
+       }
+       unlock_oldstart(jnl);
+
        /* Update the journal's offset and size in memory. */
        jnl->jdev_offset = offset;
        jnl->jhdr->start = jnl->jhdr->end = jnl->jhdr->jhdr_size;
index 11b24c3ee1b50d2f087bf34c133d98cddbe57489..7b7f4f319117e6e65a77a7cfcfb75b7aa6eb269c 100644 (file)
@@ -1,5 +1,6 @@
+
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -43,6 +44,7 @@
 #include <kern/locks.h>
 #include <sys/disk.h>
 
+
 typedef struct _blk_info {
     int32_t    bsize;
     union {
@@ -83,20 +85,20 @@ struct jnl_trim_list {
 typedef void (*jnl_trim_callback_t)(void *arg, uint32_t extent_count, const dk_extent_t *extents);
 
 typedef struct transaction {
-    int                  tbuffer_size;  // in bytes
-    char                *tbuffer;       // memory copy of the transaction
-    block_list_header   *blhdr;         // points to the first byte of tbuffer
-    int                  num_blhdrs;    // how many buffers we've allocated
-    int                  total_bytes;   // total # of bytes in transaction
-    int                  num_flushed;   // how many bytes have been flushed
-    int                  num_killed;    // how many bytes were "killed"
-    off_t                journal_start; // where in the journal this transaction starts
-    off_t                journal_end;   // where in the journal this transaction ends
-    struct journal      *jnl;           // ptr back to the journal structure
-    struct transaction  *next;          // list of tr's (either completed or to be free'd)
-    uint32_t             sequence_num;
-    struct jnl_trim_list trim;
-    boolean_t            delayed_header_write;
+    int                 tbuffer_size;  // in bytes
+    char               *tbuffer;       // memory copy of the transaction
+    block_list_header  *blhdr;         // points to the first byte of tbuffer
+    int                 num_blhdrs;    // how many buffers we've allocated
+    int                 total_bytes;   // total # of bytes in transaction
+    int                 num_flushed;   // how many bytes have been flushed
+    int                 num_killed;    // how many bytes were "killed"
+    off_t               journal_start; // where in the journal this transaction starts
+    off_t               journal_end;   // where in the journal this transaction ends
+    struct journal     *jnl;           // ptr back to the journal structure
+    struct transaction *next;          // list of tr's (either completed or to be free'd)
+    uint32_t            sequence_num;
+       struct jnl_trim_list trim;
+    boolean_t          delayed_header_write;
 } transaction;
 
 
@@ -136,7 +138,8 @@ typedef struct journal_header {
 typedef struct journal {
     lck_mtx_t           jlock;             // protects the struct journal data
     lck_mtx_t          flock;             // serializes flushing of journal
-    lck_rw_t            trim_lock;         // protects the async_trim field, below
+       lck_rw_t            trim_lock;         // protects the async_trim field, below
+
 
     struct vnode       *jdev;              // vnode of the device where the journal lives
     off_t               jdev_offset;       // byte offset to the start of the journal
@@ -154,7 +157,7 @@ typedef struct journal {
     boolean_t          asyncIO;
     boolean_t          writing_header;
     boolean_t          write_header_failed;
-
+       
     struct jnl_trim_list *async_trim;      // extents to be trimmed by transaction being asynchronously flushed
     jnl_trim_callback_t        trim_callback;
     void                               *trim_callback_arg;
@@ -163,8 +166,8 @@ typedef struct journal {
     int32_t             header_buf_size;
     journal_header     *jhdr;              // points to the first byte of header_buf
 
-    uint32_t           saved_sequence_num;
-    uint32_t           sequence_num;
+       uint32_t                saved_sequence_num;
+       uint32_t                sequence_num;
 
     off_t               max_read_size;
     off_t               max_write_size;
@@ -192,6 +195,7 @@ typedef struct journal {
 #define JOURNAL_DO_FUA_WRITES     0x00100000   // do force-unit-access writes
 #define JOURNAL_USE_UNMAP         0x00200000   // device supports UNMAP (TRIM)
 
+
 /* journal_open/create options are always in the low-16 bits */
 #define JOURNAL_OPTION_FLAGS_MASK 0x0000ffff
 
index 10b885d51829894bb5e63e3a2ff6f6713a29a76c..2c225cf19aec346b9d448bf60731e6dccee172b0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -524,12 +524,12 @@ lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname
        /* The "parent" of the stream is the file. */
        if (wantparent) {
                if (ndp->ni_dvp) {
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                        if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) {
                                ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD;
                                unlock_fsnode(ndp->ni_dvp, NULL);
                        }       
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
                        vnode_put(ndp->ni_dvp);
                }
                ndp->ni_dvp = dp;
@@ -1020,12 +1020,12 @@ lookup_error:
                if ((error == ENOENT) &&
                    (dp->v_flag & VROOT) && (dp->v_mount != NULL) &&
                    (dp->v_mount->mnt_flag & MNT_UNION)) {
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                        if ((cnp->cn_flags & FSNODELOCKHELD)) {
                                cnp->cn_flags &= ~FSNODELOCKHELD;
                                unlock_fsnode(dp, NULL);
                        }       
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
                        tdp = dp;
                        dp = tdp->v_mount->mnt_vnodecovered;
 
@@ -1098,12 +1098,12 @@ returned_from_lookup_path:
 
        return (0);
 bad2:
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if ((cnp->cn_flags & FSNODELOCKHELD)) {
                cnp->cn_flags &= ~FSNODELOCKHELD;
                unlock_fsnode(ndp->ni_dvp, NULL);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        if (ndp->ni_dvp)
                vnode_put(ndp->ni_dvp);
 
@@ -1115,12 +1115,12 @@ bad2:
        return (error);
 
 bad:
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if ((cnp->cn_flags & FSNODELOCKHELD)) {
                cnp->cn_flags &= ~FSNODELOCKHELD;
                unlock_fsnode(ndp->ni_dvp, NULL);
        }       
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        if (dp)
                vnode_put(dp);
        ndp->ni_vp = NULLVP;
@@ -1280,12 +1280,12 @@ lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
        vnode_t dp;
        char *tmppn;
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if ((cnp->cn_flags & FSNODELOCKHELD)) {
                cnp->cn_flags &= ~FSNODELOCKHELD;
                unlock_fsnode(ndp->ni_dvp, NULL);
        }       
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
        if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
                return ELOOP;
@@ -1494,14 +1494,14 @@ bad:
 void
 namei_unlock_fsnode(struct nameidata *ndp) 
 {
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) {
                ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD;
                unlock_fsnode(ndp->ni_dvp, NULL);
        }       
 #else
        (void)ndp;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 }
 
 /*
@@ -1553,7 +1553,7 @@ nameidone(struct nameidata *ndp)
  * fails because /foo_bar_baz is not found will only log "/foo_bar_baz", with
  * no '>' padding.  But /foo_bar/spam would log "/foo_bar>>>>".
  */
-#if !defined(NO_KDEBUG)
+#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
 static void
 kdebug_lookup(struct vnode *dp, struct componentname *cnp)
 {
@@ -1590,7 +1590,7 @@ kdebug_lookup(struct vnode *dp, struct componentname *cnp)
        if (dbg_namelen <= 12)
                code |= DBG_FUNC_END;
 
-       KERNEL_DEBUG_CONSTANT(code, dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0);
 
        code &= ~DBG_FUNC_START;
 
@@ -1598,15 +1598,15 @@ kdebug_lookup(struct vnode *dp, struct componentname *cnp)
                if (dbg_namelen <= 16)
                        code |= DBG_FUNC_END;
 
-               KERNEL_DEBUG_CONSTANT(code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0);
        }
 }
-#else /* NO_KDEBUG */
+#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
 static void
 kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused)
 {
 }
-#endif /* NO_KDEBUG */
+#endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
 
 int
 vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx)
index 43352545d1e931171fd2cf9420ee85d70401b074..d287837a3e0a31fd60029416063c17d7cdbab4a9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/kdebug.h>
 #include <sys/kauth.h>
 #include <sys/user.h>
+#include <sys/systm.h>
 #include <sys/kern_memorystatus.h>
+#include <sys/lockf.h>
 #include <miscfs/fifofs/fifo.h>
 
 #include <string.h>
 
 
 #include <kern/assert.h>
+#include <mach/kern_return.h>
+#include <kern/thread.h>
+#include <kern/sched_prim.h>
 
 #include <miscfs/specfs/specdev.h>
 
@@ -183,9 +188,11 @@ __private_extern__ int unlink1(vfs_context_t, struct nameidata *, int);
 extern int system_inshutdown;
 
 static void vnode_list_add(vnode_t);
+static void vnode_async_list_add(vnode_t);
 static void vnode_list_remove(vnode_t);
 static void vnode_list_remove_locked(vnode_t);
 
+static void vnode_abort_advlocks(vnode_t);
 static errno_t vnode_drain(vnode_t);
 static void vgone(vnode_t, int flags);
 static void vclean(vnode_t vp, int flag);
@@ -223,6 +230,8 @@ static void vnode_resolver_detach(vnode_t);
 
 TAILQ_HEAD(freelst, vnode) vnode_free_list;    /* vnode free list */
 TAILQ_HEAD(deadlst, vnode) vnode_dead_list;    /* vnode dead list */
+TAILQ_HEAD(async_work_lst, vnode) vnode_async_work_list;
+
 
 TAILQ_HEAD(ragelst, vnode) vnode_rage_list;    /* vnode rapid age list */
 struct timeval rage_tv;
@@ -262,7 +271,6 @@ static int nummounts = 0;
        } while(0)
 
 
-
 /* remove a vnode from dead vnode list */
 #define VREMDEAD(fun, vp)      \
        do {    \
@@ -274,6 +282,17 @@ static int nummounts = 0;
        } while(0)
 
 
+/* remove a vnode from async work vnode list */
+#define VREMASYNC_WORK(fun, vp)        \
+       do {    \
+               VLISTCHECK((fun), (vp), "async_work");  \
+               TAILQ_REMOVE(&vnode_async_work_list, (vp), v_freelist); \
+               VLISTNONE((vp));        \
+               vp->v_listflag &= ~VLIST_ASYNC_WORK;    \
+               async_work_vnodes--;    \
+       } while(0)
+
+
 /* remove a vnode from rage vnode list */
 #define VREMRAGE(fun, vp)      \
        do {    \
@@ -304,15 +323,21 @@ u_int32_t vnodetarget;            /* target for vnreclaim() */
  */
 #define VNODE_FREE_MIN         CONFIG_VNODE_FREE_MIN   /* freelist should have at least this many */
 
+
+static void async_work_continue(void);
+
 /*
  * Initialize the vnode management data structures.
  */
 __private_extern__ void
 vntblinit(void)
 {
+       thread_t        thread = THREAD_NULL;
+
        TAILQ_INIT(&vnode_free_list);
        TAILQ_INIT(&vnode_rage_list);
        TAILQ_INIT(&vnode_dead_list);
+       TAILQ_INIT(&vnode_async_work_list);
        TAILQ_INIT(&mountlist);
 
        if (!vnodetarget)
@@ -329,6 +354,12 @@ vntblinit(void)
         * we want to cache
         */
        (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
+
+       /*
+        * create worker threads
+        */
+       kernel_thread_start((thread_continue_t)async_work_continue, NULL, &thread);
+       thread_deallocate(thread);
 }
 
 /* Reset the VM Object Cache with the values passed in */
@@ -1201,9 +1232,14 @@ vfs_getnewfsid(struct mount *mp)
  * Routines having to do with the management of the vnode table.
  */
 extern int (**dead_vnodeop_p)(void *);
-long numvnodes, freevnodes, deadvnodes;
+long numvnodes, freevnodes, deadvnodes, async_work_vnodes;
 
 
+int async_work_timed_out = 0;
+int async_work_handled = 0;
+int dead_vnode_wanted = 0;
+int dead_vnode_waited = 0;
+
 /*
  * Move a vnode from one mount queue to another.
  */
@@ -1555,6 +1591,34 @@ out:
 }
 
 
+static boolean_t
+vnode_on_reliable_media(vnode_t vp)
+{
+       if ( !(vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) && (vp->v_mount->mnt_flag & MNT_LOCAL) )
+               return (TRUE);
+       return (FALSE);
+}
+
+static void
+vnode_async_list_add(vnode_t vp)
+{
+       vnode_list_lock();
+
+       if (VONLIST(vp) || (vp->v_lflag & (VL_TERMINATE|VL_DEAD)))
+               panic("vnode_async_list_add: %p is in wrong state", vp);
+
+       TAILQ_INSERT_HEAD(&vnode_async_work_list, vp, v_freelist);
+       vp->v_listflag |= VLIST_ASYNC_WORK;
+
+       async_work_vnodes++;
+
+       vnode_list_unlock();
+
+       wakeup(&vnode_async_work_list);
+
+}
+
+
 /*
  * put the vnode on appropriate free list.
  * called with vnode LOCKED
@@ -1562,6 +1626,8 @@ out:
 static void
 vnode_list_add(vnode_t vp)
 {
+       boolean_t need_dead_wakeup = FALSE;
+
 #if DIAGNOSTIC
        lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
 #endif
@@ -1603,7 +1669,13 @@ vnode_list_add(vnode_t vp)
                        TAILQ_INSERT_HEAD(&vnode_dead_list, vp, v_freelist);
                        vp->v_listflag |= VLIST_DEAD;
                        deadvnodes++;
-               } else if ((vp->v_flag & VAGE)) {
+
+                       if (dead_vnode_wanted) {
+                               dead_vnode_wanted--;
+                               need_dead_wakeup = TRUE;
+                       }
+
+               } else if ( (vp->v_flag & VAGE) ) {
                        TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
                        vp->v_flag &= ~VAGE;
                        freevnodes++;
@@ -1613,6 +1685,9 @@ vnode_list_add(vnode_t vp)
                }
        }
        vnode_list_unlock();
+
+       if (need_dead_wakeup == TRUE)
+               wakeup_one((caddr_t)&dead_vnode_wanted);
 }
 
 
@@ -1633,6 +1708,8 @@ vnode_list_remove_locked(vnode_t vp)
                        VREMRAGE("vnode_list_remove", vp);
                else if (vp->v_listflag & VLIST_DEAD)
                        VREMDEAD("vnode_list_remove", vp);
+               else if (vp->v_listflag & VLIST_ASYNC_WORK)
+                       VREMASYNC_WORK("vnode_list_remove", vp);
                else
                        VREMFREE("vnode_list_remove", vp);
        }
@@ -1744,9 +1821,15 @@ vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked)
                 * if it's been marked for termination
                 */
                if (dont_reenter) {
-                       if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) )
+                       if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) ) {
                                vp->v_lflag |= VL_NEEDINACTIVE;
-                       vp->v_flag |= VAGE;
+                               
+                               if (vnode_on_reliable_media(vp) == FALSE) {
+                                       vnode_async_list_add(vp);
+                                       goto done;
+                               }
+                       }
+                       vp->v_flag |= VAGE;
                }
                vnode_list_add(vp);
 
@@ -1947,6 +2030,7 @@ loop:
 #ifdef JOE_DEBUG
                                record_vp(vp, 1);
 #endif
+                               vnode_abort_advlocks(vp);
                                vnode_reclaim_internal(vp, 1, 1, 0);
                                vnode_dropiocount(vp);
                                vnode_list_add(vp);
@@ -2641,6 +2725,10 @@ vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
        int error;
        struct vfsconf vfsc;
 
+       if (namelen > CTL_MAXNAME) {
+               return (EINVAL);
+       }
+
        /* All non VFS_GENERIC and in VFS_GENERIC, 
         * VFS_MAXTYPENUM, VFS_CONF, VFS_SET_PACKAGE_EXTS
         * needs to have root priv to have modifiers. 
@@ -2729,6 +2817,7 @@ vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
         * We need to get back into the general MIB, so we need to re-prepend
         * CTL_VFS to our name and try userland_sysctl().
         */
+
        usernamelen = namelen + 1;
        MALLOC(username, int *, usernamelen * sizeof(*username),
            M_TEMP, M_WAITOK);
@@ -3039,8 +3128,10 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 
        if (features & DK_FEATURE_FORCE_UNIT_ACCESS)
                mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED;
+       
        if (features & DK_FEATURE_UNMAP)
-               mp->mnt_ioflags |= MNT_IOFLAGS_UNMAP_SUPPORTED;
+               mp->mnt_ioflags |= MNT_IOFLAGS_UNMAP_SUPPORTED;
+       
        return (error);
 }
 
@@ -3058,8 +3149,20 @@ vfs_event_init(void)
 }
 
 void
-vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data)
-{
+vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data)
+{
+       if (event == VQ_DEAD || event == VQ_NOTRESP) {
+               struct mount *mp = vfs_getvfs(fsid);
+               if (mp) {
+                       mount_lock_spin(mp);
+                       if (data)
+                               mp->mnt_kern_flag &= ~MNT_LNOTRESP;     // Now responding
+                       else
+                               mp->mnt_kern_flag |= MNT_LNOTRESP;      // Not responding
+                       mount_unlock(mp);
+               }
+       }
+
        lck_mtx_lock(fs_klist_lock);
        KNOTE(&fs_klist, event);
        lck_mtx_unlock(fs_klist_lock);
@@ -3480,28 +3583,162 @@ SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW | CTLFLAG_ANYBODY,
        
 long num_reusedvnodes = 0;
 
+
+static vnode_t
+process_vp(vnode_t vp, int want_vp, int *deferred)
+{
+       unsigned int  vpid;
+
+       *deferred = 0;
+
+       vpid = vp->v_id;
+
+       vnode_list_remove_locked(vp);
+
+       vnode_list_unlock();
+
+       vnode_lock_spin(vp);
+
+       /* 
+        * We could wait for the vnode_lock after removing the vp from the freelist
+        * and the vid is bumped only at the very end of reclaim. So it is  possible
+        * that we are looking at a vnode that is being terminated. If so skip it.
+        */ 
+       if ((vpid != vp->v_id) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || 
+           VONLIST(vp) || (vp->v_lflag & VL_TERMINATE)) {
+               /*
+                * we lost the race between dropping the list lock
+                * and picking up the vnode_lock... someone else
+                * used this vnode and it is now in a new state
+                */
+               vnode_unlock(vp);
+               
+               return (NULLVP);
+       }
+       if ( (vp->v_lflag & (VL_NEEDINACTIVE | VL_MARKTERM)) == VL_NEEDINACTIVE ) {
+               /*
+                * we did a vnode_rele_ext that asked for
+                * us not to reenter the filesystem during
+                * the release even though VL_NEEDINACTIVE was
+                * set... we'll do it here by doing a
+                * vnode_get/vnode_put
+                *
+                * pick up an iocount so that we can call
+                * vnode_put and drive the VNOP_INACTIVE...
+                * vnode_put will either leave us off 
+                * the freelist if a new ref comes in,
+                * or put us back on the end of the freelist
+                * or recycle us if we were marked for termination...
+                * so we'll just go grab a new candidate
+                */
+               vp->v_iocount++;
+#ifdef JOE_DEBUG
+               record_vp(vp, 1);
+#endif
+               vnode_put_locked(vp);
+               vnode_unlock(vp);
+
+               return (NULLVP);
+       }
+       /*
+        * Checks for anyone racing us for recycle
+        */ 
+       if (vp->v_type != VBAD) {
+               if (want_vp && vnode_on_reliable_media(vp) == FALSE) {
+                       vnode_async_list_add(vp);
+                       vnode_unlock(vp);
+                       
+                       *deferred = 1;
+
+                       return (NULLVP);
+               }
+               if (vp->v_lflag & VL_DEAD)
+                       panic("new_vnode(%p): the vnode is VL_DEAD but not VBAD", vp);
+
+               vnode_lock_convert(vp);
+               (void)vnode_reclaim_internal(vp, 1, want_vp, 0);
+
+               if (want_vp) {
+                       if ((VONLIST(vp)))
+                               panic("new_vnode(%p): vp on list", vp);
+                       if (vp->v_usecount || vp->v_iocount || vp->v_kusecount ||
+                           (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH)))
+                               panic("new_vnode(%p): free vnode still referenced", vp);
+                       if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0))
+                               panic("new_vnode(%p): vnode seems to be on mount list", vp);
+                       if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren))
+                               panic("new_vnode(%p): vnode still hooked into the name cache", vp);
+               } else {
+                       vnode_unlock(vp);
+                       vp = NULLVP;
+               }
+       }
+       return (vp);
+}
+
+
+
+static void
+async_work_continue(void)
+{
+       struct async_work_lst *q;
+       int     deferred;
+       vnode_t vp;
+
+       q = &vnode_async_work_list;
+
+       for (;;) {
+
+               vnode_list_lock();
+
+               if ( TAILQ_EMPTY(q) ) {
+                       assert_wait(q, (THREAD_UNINT));
+       
+                       vnode_list_unlock();
+                       
+                       thread_block((thread_continue_t)async_work_continue);
+
+                       continue;
+               }
+               async_work_handled++;
+
+               vp = TAILQ_FIRST(q);
+
+               vp = process_vp(vp, 0, &deferred);
+
+               if (vp != NULLVP)
+                       panic("found VBAD vp (%p) on async queue", vp);
+       }
+}
+
+
 static int
 new_vnode(vnode_t *vpp)
 {
        vnode_t vp;
-       int retries = 0;                                /* retry incase of tablefull */
+       uint32_t retries = 0, max_retries = 100;                /* retry incase of tablefull */
        int force_alloc = 0, walk_count = 0;
-       unsigned int  vpid;
-       struct timespec ts;
+       boolean_t need_reliable_vp = FALSE;
+       int deferred;
+        struct timeval initial_tv;
         struct timeval current_tv;
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
         struct unsafe_fsnode *l_unsafefs = 0;
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        proc_t  curproc = current_proc();
 
+       initial_tv.tv_sec = 0;
 retry:
-       microuptime(&current_tv);
-
        vp = NULLVP;
 
        vnode_list_lock();
 
+       if (need_reliable_vp == TRUE)
+               async_work_timed_out++;
+
        if ((numvnodes - deadvnodes) < desiredvnodes || force_alloc) {
+               struct timespec ts;
+
                if ( !TAILQ_EMPTY(&vnode_dead_list)) {
                        /*
                         * Can always reuse a dead one
@@ -3534,6 +3771,7 @@ retry:
                vp->v_iocount = 1;
                goto done;
        }
+       microuptime(&current_tv);
 
 #define MAX_WALK_COUNT 1000
 
@@ -3542,10 +3780,10 @@ retry:
              (current_tv.tv_sec - rage_tv.tv_sec) >= RAGE_TIME_LIMIT)) {
 
                TAILQ_FOREACH(vp, &vnode_rage_list, v_freelist) {
-                   if ( !(vp->v_listflag & VLIST_RAGE))
-                       panic("new_vnode: vp (%p) on RAGE list not marked VLIST_RAGE", vp);
+                       if ( !(vp->v_listflag & VLIST_RAGE))
+                               panic("new_vnode: vp (%p) on RAGE list not marked VLIST_RAGE", vp);
 
-                   // if we're a dependency-capable process, skip vnodes that can
+                       // if we're a dependency-capable process, skip vnodes that can
                        // cause recycling deadlocks. (i.e. this process is diskimages
                        // helper and the vnode is in a disk image).  Querying the
                        // mnt_kern_flag for the mount's virtual device status
@@ -3553,19 +3791,27 @@ retry:
                        // may not be updated if there are multiple devnode layers 
                        // in between the disk image and the final consumer.
 
-                   if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || 
-                                       (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) == 0) {
-                               break;
-                   }
+                       if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || 
+                           (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) == 0) {
+                               /*
+                                * if need_reliable_vp == TRUE, then we've already sent one or more
+                                * non-reliable vnodes to the async thread for processing and timed
+                                * out waiting for a dead vnode to show up.  Use the MAX_WALK_COUNT
+                                * mechanism to first scan for a reliable vnode before forcing 
+                                * a new vnode to be created
+                                */
+                               if (need_reliable_vp == FALSE || vnode_on_reliable_media(vp) == TRUE)
+                                       break;
+                       }
 
-                   // don't iterate more than MAX_WALK_COUNT vnodes to
-                   // avoid keeping the vnode list lock held for too long.
-                   if (walk_count++ > MAX_WALK_COUNT) {
+                       // don't iterate more than MAX_WALK_COUNT vnodes to
+                       // avoid keeping the vnode list lock held for too long.
+
+                       if (walk_count++ > MAX_WALK_COUNT) {
                                vp = NULL;
-                       break;
-                   }
+                               break;
+                       }
                }
-
        }
 
        if (vp == NULL && !TAILQ_EMPTY(&vnode_free_list)) {
@@ -3583,19 +3829,27 @@ retry:
                        // may not be updated if there are multiple devnode layers 
                        // in between the disk image and the final consumer.
 
-                   if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || 
-                                       (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) == 0) {
-                               break;
-                   }
+                       if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || 
+                           (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) == 0) {
+                               /*
+                                * if need_reliable_vp == TRUE, then we've already sent one or more
+                                * non-reliable vnodes to the async thread for processing and timed
+                                * out waiting for a dead vnode to show up.  Use the MAX_WALK_COUNT
+                                * mechanism to first scan for a reliable vnode before forcing 
+                                * a new vnode to be created
+                                */
+                               if (need_reliable_vp == FALSE || vnode_on_reliable_media(vp) == TRUE)
+                                       break;
+                       }
 
-                   // don't iterate more than MAX_WALK_COUNT vnodes to
-                   // avoid keeping the vnode list lock held for too long.
-                   if (walk_count++ > MAX_WALK_COUNT) {
-                       vp = NULL;
-                       break;
-                   }
-               }
+                       // don't iterate more than MAX_WALK_COUNT vnodes to
+                       // avoid keeping the vnode list lock held for too long.
 
+                       if (walk_count++ > MAX_WALK_COUNT) {
+                               vp = NULL;
+                               break;
+                       }
+               }
        }
 
        //
@@ -3608,9 +3862,9 @@ retry:
        // the allocation.
        //
        if (vp == NULL && walk_count >= MAX_WALK_COUNT) {
-           force_alloc = 1;
-           vnode_list_unlock();
-           goto retry;
+               force_alloc = 1;
+               vnode_list_unlock();
+               goto retry;
        }
 
        if (vp == NULL) {
@@ -3618,9 +3872,9 @@ retry:
                 * we've reached the system imposed maximum number of vnodes
                 * but there isn't a single one available
                 * wait a bit and then retry... if we can't get a vnode
-                * after 100 retries, than log a complaint
+                * after our target number of retries, than log a complaint
                 */
-               if (++retries <= 100) {
+               if (++retries <= max_retries) {
                        vnode_list_unlock();
                        delay_for_interval(1, 1000 * 1000);
                        goto retry;
@@ -3631,12 +3885,12 @@ retry:
                log(LOG_EMERG, "%d desired, %d numvnodes, "
                        "%d free, %d dead, %d rage\n",
                        desiredvnodes, numvnodes, freevnodes, deadvnodes, ragevnodes);
-#if CONFIG_EMBEDDED
+#if CONFIG_JETSAM
                /*
                 * Running out of vnodes tends to make a system unusable. Start killing
                 * processes that jetsam knows are killable.
                 */
-               if (jetsam_kill_top_proc(TRUE, kJetsamFlagsKilledVnodes) < 0) {
+               if (memorystatus_kill_top_proc(TRUE, kMemorystatusFlagsKilledVnodes) < 0) {
                        /*
                         * If jetsam can't find any more processes to kill and there
                         * still aren't any free vnodes, panic. Hopefully we'll get a
@@ -3645,7 +3899,13 @@ retry:
                        panic("vnode table is full\n");
                }
 
-               delay_for_interval(1, 1000 * 1000);
+               /* 
+                * Now that we've killed someone, wait a bit and continue looking 
+                * (with fewer retries before trying another kill).
+                */
+               delay_for_interval(3, 1000 * 1000);
+               retries = 0;    
+               max_retries = 10;
                goto retry;
 #endif
 
@@ -3653,80 +3913,66 @@ retry:
                return (ENFILE);
        }
 steal_this_vp:
-       vpid = vp->v_id;
+       if ((vp = process_vp(vp, 1, &deferred)) == NULLVP) {
+               if (deferred) {
+                       int     elapsed_msecs;
+                       struct timeval elapsed_tv;
 
-       vnode_list_remove_locked(vp);
+                       if (initial_tv.tv_sec == 0)
+                               microuptime(&initial_tv);
 
-       vnode_list_unlock();
+                       vnode_list_lock();
 
-       vnode_lock_spin(vp);
+                       dead_vnode_waited++;
+                       dead_vnode_wanted++;
 
-       /* 
-        * We could wait for the vnode_lock after removing the vp from the freelist
-        * and the vid is bumped only at the very end of reclaim. So it is  possible
-        * that we are looking at a vnode that is being terminated. If so skip it.
-        */ 
-       if ((vpid != vp->v_id) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || 
-                       VONLIST(vp) || (vp->v_lflag & VL_TERMINATE)) {
-               /*
-                * we lost the race between dropping the list lock
-                * and picking up the vnode_lock... someone else
-                * used this vnode and it is now in a new state
-                * so we need to go back and try again
-                */
-               vnode_unlock(vp);
-               goto retry;
-       }
-       if ( (vp->v_lflag & (VL_NEEDINACTIVE | VL_MARKTERM)) == VL_NEEDINACTIVE ) {
-               /*
-                * we did a vnode_rele_ext that asked for
-                * us not to reenter the filesystem during
-                * the release even though VL_NEEDINACTIVE was
-                * set... we'll do it here by doing a
-                * vnode_get/vnode_put
-                *
-                * pick up an iocount so that we can call
-                * vnode_put and drive the VNOP_INACTIVE...
-                * vnode_put will either leave us off 
-                * the freelist if a new ref comes in,
-                * or put us back on the end of the freelist
-                * or recycle us if we were marked for termination...
-                * so we'll just go grab a new candidate
-                */
-               vp->v_iocount++;
-#ifdef JOE_DEBUG
-               record_vp(vp, 1);
-#endif
-               vnode_put_locked(vp);
-               vnode_unlock(vp);
+                       /*
+                        * note that we're only going to explicitly wait 10ms
+                        * for a dead vnode to become available, since even if one
+                        * isn't available, a reliable vnode might now be available
+                        * at the head of the VRAGE or free lists... if so, we
+                        * can satisfy the new_vnode request with less latency then waiting
+                        * for the full 100ms duration we're ultimately willing to tolerate
+                        */
+                       assert_wait_timeout((caddr_t)&dead_vnode_wanted, (THREAD_INTERRUPTIBLE), 10000, NSEC_PER_USEC);
+
+                       vnode_list_unlock();
+
+                       thread_block(THREAD_CONTINUE_NULL);
+
+                       microuptime(&elapsed_tv);
+                       
+                       timevalsub(&elapsed_tv, &initial_tv);
+                       elapsed_msecs = elapsed_tv.tv_sec * 1000 + elapsed_tv.tv_usec / 1000;
+
+                       if (elapsed_msecs >= 100) {
+                               /*
+                                * we've waited long enough... 100ms is 
+                                * somewhat arbitrary for this case, but the
+                                * normal worst case latency used for UI
+                                * interaction is 100ms, so I've chosen to
+                                * go with that.
+                                *
+                                * setting need_reliable_vp to TRUE
+                                * forces us to find a reliable vnode
+                                * that we can process synchronously, or
+                                * to create a new one if the scan for
+                                * a reliable one hits the scan limit
+                                */
+                               need_reliable_vp = TRUE;
+                       }
+               }
                goto retry;
        }
        OSAddAtomicLong(1, &num_reusedvnodes);
 
-       /* Checks for anyone racing us for recycle */ 
-       if (vp->v_type != VBAD) {
-               if (vp->v_lflag & VL_DEAD)
-                       panic("new_vnode(%p): the vnode is VL_DEAD but not VBAD", vp);
-               vnode_lock_convert(vp);
-               (void)vnode_reclaim_internal(vp, 1, 1, 0);
 
-               if ((VONLIST(vp)))
-                       panic("new_vnode(%p): vp on list", vp);
-               if (vp->v_usecount || vp->v_iocount || vp->v_kusecount ||
-                   (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH)))
-                       panic("new_vnode(%p): free vnode still referenced", vp);
-               if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0))
-                       panic("new_vnode(%p): vnode seems to be on mount list", vp);
-               if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren))
-                       panic("new_vnode(%p): vnode still hooked into the name cache", vp);
-       }
-
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (vp->v_unsafefs) {
                l_unsafefs = vp->v_unsafefs;
                vp->v_unsafefs = (struct unsafe_fsnode *)NULL;
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
 #if CONFIG_MACF
        /*
@@ -3757,12 +4003,12 @@ steal_this_vp:
 
        vnode_unlock(vp);
 
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
        if (l_unsafefs) {
                lck_mtx_destroy(&l_unsafefs->fsnodelock, vnode_lck_grp);
                FREE_ZONE((void *)l_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS);
        }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
 
 done:
        *vpp = vp;
@@ -3988,6 +4234,18 @@ vnode_suspend(vnode_t vp)
        return(0);
 }
                                        
+/*
+ * Release any blocked locking requests on the vnode.
+ * Used for forced-unmounts.
+ *
+ * XXX What about network filesystems?
+ */
+static void
+vnode_abort_advlocks(vnode_t vp)
+{
+       if (vp->v_flag & VLOCKLOCAL)
+               lf_abort_advlocks(vp);
+}
                                        
 
 static errno_t 
@@ -4345,6 +4603,14 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
                        insert = 0;
                        vnode_unlock(vp);
                }
+
+               if (VCHR == vp->v_type) {
+                       u_int maj = major(vp->v_rdev);
+
+                       if (maj < (u_int)nchrdev &&
+                           (D_TYPEMASK & cdevsw[maj].d_type) == D_TTY)
+                               vp->v_flag |= VISTTY;
+               }
        }
 
        if (vp->v_type == VFIFO) {
@@ -4378,7 +4644,7 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
                         */
                        insmntque(vp, param->vnfs_mp);
                }
-#ifndef __LP64__
+#if CONFIG_VFS_FUNNEL
                if ((param->vnfs_mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE) == 0) {
                        MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *,
                                    sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK);
@@ -4386,7 +4652,7 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
                        vp->v_unsafefs->fsnodeowner  = (void *)NULL;
                        lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr);
                }
-#endif /* __LP64__ */
+#endif /* CONFIG_VFS_FUNNEL */
        }
        if (dvp && vnode_ref(dvp) == 0) {
                vp->v_parent = dvp;
@@ -7747,7 +8013,7 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int *
                                       UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name),
                                       ctx);
                                nd_temp.ni_dvp = vp;
-                               error = unlink1(ctx, &nd_temp, 0);
+                               error = unlink1(ctx, &nd_temp, VNODE_REMOVE_SKIP_NAMESPACE_EVENT);
 
                                if (error &&  error != ENOENT) {
                                        goto outsc;
index 3d9b4591b4562a81f0af82f69cabd1a281a0b92a..652a8e34ac400f7ee9eb81deaeabd7910648ec57 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 1995-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -182,33 +182,6 @@ int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *
 __private_extern__
 int unlink1(vfs_context_t, struct nameidata *, int);
 
-
-#ifdef __APPLE_API_OBSOLETE
-struct fstatv_args {
-       int fd;                 /* file descriptor of the target file */
-       struct vstat *vsb;      /* vstat structure for returned info  */
-};
-struct lstatv_args {
-       const char *path;       /* pathname of the target file       */
-       struct vstat *vsb;      /* vstat structure for returned info */
-};
-struct mkcomplex_args {
-        const char *path;      /* pathname of the file to be created */
-               mode_t mode;            /* access mode for the newly created file */
-        u_int32_t type;                /* format of the complex file */
-};
-struct statv_args {
-        const char *path;      /* pathname of the target file       */
-        struct vstat *vsb;     /* vstat structure for returned info */
-};
-
-int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval);
-int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval);
-int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval);
-int statv(proc_t p, struct statv_args *uap, int32_t *retval);
-
-#endif /* __APPLE_API_OBSOLETE */
-
 /*
  * incremented each time a mount or unmount operation occurs
  * used to invalidate the cached value of the rootvp in the
@@ -500,6 +473,16 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
                        goto out1;
                }
 
+               /*
+                * If content protection is enabled, update mounts are not
+                * allowed to turn it off.
+                */
+               if ((mp->mnt_flag & MNT_CPROTECT) && 
+                          ((flags & MNT_CPROTECT) == 0)) {
+                       error = EINVAL;
+                       goto out1;
+               }
+
 #ifdef CONFIG_IMGSRC_ACCESS 
                /* Can't downgrade the backer of the root FS */
                if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
@@ -534,6 +517,8 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
                }
                flag = mp->mnt_flag;
 
+
+
                mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 
                vfsp = mp->mnt_vtable;
@@ -1728,6 +1713,16 @@ safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
        int error;
        proc_t p = vfs_context_proc(ctx);
 
+       /*
+        * If the file system is not responding and MNT_NOBLOCK
+        * is set and not a forced unmount then return EBUSY.
+        */
+       if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
+               (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
+               error = EBUSY;
+               goto out;
+       }
+
        /*
         * Skip authorization if the mount is tagged as permissive and 
         * this is not a forced-unmount attempt.
@@ -2370,7 +2365,7 @@ fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t
 
        mp = vp->v_mount;
        if (!mp) {
-               error = EBADF;;
+               error = EBADF;
                goto out;
        }
        sp = &mp->mnt_vfsstat;
@@ -3052,6 +3047,14 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v
        fp->f_fglob->fg_ops = &vnops;
        fp->f_fglob->fg_data = (caddr_t)vp;
 
+#if CONFIG_PROTECT
+       if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
+               if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
+                       fp->f_fglob->fg_flag |= FENCRYPTED;
+               }
+       }
+#endif
+
        if (flags & (O_EXLOCK | O_SHLOCK)) {
                lf.l_whence = SEEK_SET;
                lf.l_start = 0;
@@ -3209,6 +3212,58 @@ open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
        return ciferror;
 }
 
+/* 
+ * Go through the data-protected atomically controlled open (2)
+ *  
+ * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
+ */
+int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
+       int flags = uap->flags;
+       int class = uap->class;
+       int dpflags = uap->dpflags;
+
+       /* 
+        * Follow the same path as normal open(2)
+        * Look up the item if it exists, and acquire the vnode.
+        */
+       struct filedesc *fdp = p->p_fd;
+       struct vnode_attr va;
+       struct nameidata nd;
+       int cmode;
+       int error;
+       
+       VATTR_INIT(&va);
+       /* Mask off all but regular access permissions */
+       cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+       VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+
+       NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+              uap->path, vfs_context_current());
+
+       /* 
+        * Initialize the extra fields in vnode_attr to pass down our 
+        * extra fields.
+        * 1. target cprotect class.
+        * 2. set a flag to mark it as requiring open-raw-encrypted semantics. 
+        */ 
+       if (flags & O_CREAT) {  
+               VATTR_SET(&va, va_dataprotect_class, class);
+       }
+       
+       if (dpflags & O_DP_GETRAWENCRYPTED) {
+               if ( flags & (O_RDWR | O_WRONLY)) {
+                       /* Not allowed to write raw encrypted bytes */
+                       return EINVAL;          
+               }                       
+               VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+       }
+
+       error = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
+
+       return error;
+}
+
+
 int
 open(proc_t p, struct open_args *uap, int32_t *retval)
 {
@@ -3889,7 +3944,7 @@ undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
  */
 /* ARGSUSED */
 int
-unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
+unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
 {
        vnode_t vp, dvp;
        int error;
@@ -3926,9 +3981,15 @@ lookup_continue:
 
 
        /* With Carbon delete semantics, busy files cannot be deleted */
-       if (nodelbusy) {
+       if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
                flags |= VNODE_REMOVE_NODELETEBUSY;
        }
+       
+       /* If we're told to, then skip any potential future upcalls */
+       if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
+               flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
+       }
+
 
        if (vp) {
                batched = vnode_compound_remove_available(vp);
@@ -4100,7 +4161,7 @@ delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
 
        NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
               uap->path, ctx);
-       return unlink1(ctx, &nd, 1);
+       return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
 }
 
 /*
@@ -5089,8 +5150,8 @@ chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
 #endif
 
 #if CONFIG_MACF
-       error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
-       if (error)
+       if (VATTR_IS_ACTIVE(vap, va_mode) &&
+           (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
                return (error);
 #endif
 
@@ -5887,7 +5948,7 @@ rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
 {
        vnode_t tvp, tdvp;
        vnode_t fvp, fdvp;
-       struct nameidata fromnd, tond;
+       struct nameidata *fromnd, *tond;
        vfs_context_t ctx = vfs_context_current();
        int error;
        int do_retry;
@@ -5901,42 +5962,49 @@ rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
        vnode_t oparent = NULLVP;
 #if CONFIG_FSE
        fse_info from_finfo, to_finfo;
-       struct vnode_attr fva, tva;
 #endif
        int from_truncated=0, to_truncated;
        int batched = 0;
        struct vnode_attr *fvap, *tvap;
        int continuing = 0;
-       
+       /* carving out a chunk for structs that are too big to be on stack. */
+       struct {
+               struct nameidata from_node, to_node;
+               struct vnode_attr fv_attr, tv_attr;
+       } * __rename_data;
+       MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+       fromnd = &__rename_data->from_node;
+       tond = &__rename_data->to_node;
+
        holding_mntlock = 0;
-    do_retry = 0;
+       do_retry = 0;
 retry:
        fvp = tvp = NULL;
        fdvp = tdvp = NULL;
        fvap = tvap = NULL;
        mntrename = FALSE;
 
-       NDINIT(&fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
+       NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
               UIO_USERSPACE, uap->from, ctx);
-       fromnd.ni_flag = NAMEI_COMPOUNDRENAME;
+       fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
        
-       NDINIT(&tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+       NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
               UIO_USERSPACE, uap->to, ctx);
-       tond.ni_flag = NAMEI_COMPOUNDRENAME;
+       tond->ni_flag = NAMEI_COMPOUNDRENAME;
        
 continue_lookup:
-       if ((fromnd.ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
-               if ( (error = namei(&fromnd)) )
+       if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+               if ( (error = namei(fromnd)) )
                        goto out1;
-               fdvp = fromnd.ni_dvp;
-               fvp  = fromnd.ni_vp;
+               fdvp = fromnd->ni_dvp;
+               fvp  = fromnd->ni_vp;
 
                if (fvp && fvp->v_type == VDIR)
-                       tond.ni_cnd.cn_flags |= WILLBEDIR;
+                       tond->ni_cnd.cn_flags |= WILLBEDIR;
        }
 
-       if ((tond.ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
-               if ( (error = namei(&tond)) ) {
+       if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+               if ( (error = namei(tond)) ) {
                        /*
                         * Translate error code for rename("dir1", "dir2/.").
                         */
@@ -5944,8 +6012,8 @@ continue_lookup:
                                error = EINVAL;
                        goto out1;
                }
-               tdvp = tond.ni_dvp;
-               tvp  = tond.ni_vp;
+               tdvp = tond->ni_dvp;
+               tvp  = tond->ni_vp;
        }       
 
        batched = vnode_compound_rename_available(fdvp);
@@ -5968,7 +6036,7 @@ continue_lookup:
        }
 
        if (!batched) {
-               error = vn_authorize_rename(fdvp, fvp, &fromnd.ni_cnd, tdvp, tvp, &tond.ni_cnd, ctx, NULL);
+               error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
                if (error) {
                        if (error == ENOENT) {
                                /*
@@ -6062,9 +6130,9 @@ continue_lookup:
         * XXX filesystem should take care of this itself, perhaps...
         */
        if (fvp == tvp && fdvp == tdvp) {
-               if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
-                   !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
-                         fromnd.ni_cnd.cn_namelen)) {
+               if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
+                   !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
+                         fromnd->ni_cnd.cn_namelen)) {
                        goto out1;
                }
        }
@@ -6106,7 +6174,7 @@ continue_lookup:
                         * nameidone has to happen before we vnode_put(tvp)
                         * since it may need to release the fs_nodelock on the tvp
                         */
-                       nameidone(&tond);
+                       nameidone(tond);
 
                        if (tvp)
                                vnode_put(tvp);
@@ -6116,7 +6184,7 @@ continue_lookup:
                         * nameidone has to happen before we vnode_put(fdvp)
                         * since it may need to release the fs_nodelock on the fvp
                         */
-                       nameidone(&fromnd);
+                       nameidone(fromnd);
 
                        vnode_put(fvp);
                        vnode_put(fdvp);
@@ -6155,23 +6223,23 @@ skipped_lookup:
                if (fvp) {
                        get_fse_info(fvp, &from_finfo, ctx);
                } else {
-                       error = vfs_get_notify_attributes(&fva);
+                       error = vfs_get_notify_attributes(&__rename_data->fv_attr);
                        if (error) {
                                goto out1;
                        }
 
-                       fvap = &fva;
+                       fvap = &__rename_data->fv_attr;
                }
 
                if (tvp) {
                        get_fse_info(tvp, &to_finfo, ctx);
                } else if (batched) {
-                       error = vfs_get_notify_attributes(&tva);
+                       error = vfs_get_notify_attributes(&__rename_data->tv_attr);
                        if (error) {
                                goto out1;
                        }
 
-                       tvap = &tva;
+                       tvap = &__rename_data->tv_attr;
                }
        }
 #else
@@ -6187,7 +6255,7 @@ skipped_lookup:
                        }
                }
 
-               from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
+               from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
 
                if (to_name == NULL) {
                        GET_PATH(to_name);
@@ -6197,11 +6265,11 @@ skipped_lookup:
                        }
                }
 
-               to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
+               to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
        } 
        
-       error = vn_rename(fdvp, &fvp, &fromnd.ni_cnd, fvap,
-                           tdvp, &tvp, &tond.ni_cnd, tvap,
+       error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
+                           tdvp, &tvp, &tond->ni_cnd, tvap,
                            0, ctx);
 
        if (holding_mntlock) {
@@ -6215,14 +6283,14 @@ skipped_lookup:
        }
        if (error) {
                if (error == EKEEPLOOKING) {
-                       if ((fromnd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
-                               if ((tond.ni_flag & NAMEI_CONTLOOKUP) == 0) {
+                       if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+                               if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
                                        panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
                                }
                        }
 
-                       fromnd.ni_vp = fvp;
-                       tond.ni_vp = tvp;
+                       fromnd->ni_vp = fvp;
+                       tond->ni_vp = tvp;
        
                        goto continue_lookup;
                }
@@ -6335,7 +6403,7 @@ skipped_lookup:
                if (fdvp != tdvp)
                        update_flags |= VNODE_UPDATE_PARENT;
 
-               vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
+               vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
        }
 out1:
        if (to_name != NULL) {
@@ -6356,7 +6424,7 @@ out1:
                 * nameidone has to happen before we vnode_put(tdvp)
                 * since it may need to release the fs_nodelock on the tdvp
                 */
-               nameidone(&tond);
+               nameidone(tond);
 
                if (tvp)
                        vnode_put(tvp);
@@ -6367,22 +6435,24 @@ out1:
                 * nameidone has to happen before we vnode_put(fdvp)
                 * since it may need to release the fs_nodelock on the fdvp
                 */
-               nameidone(&fromnd);
+               nameidone(fromnd);
 
                if (fvp)
                        vnode_put(fvp);
                vnode_put(fdvp);
        }
        
+       
        /*
         * If things changed after we did the namei, then we will re-drive
         * this rename call from the top.
         */
-       if(do_retry) {
+       if (do_retry) {
                do_retry = 0;
                goto retry;
        }
-       
+
+       FREE(__rename_data, M_TEMP);
        return (error);
 }
 
@@ -6790,7 +6860,7 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                 * use 32K in the MIN(), but we use magic number 87371 to
                 * prevent uio_resid() * 3 / 8 from overflowing. 
                 */
-               bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
+               bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
                MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
                if (bufptr == NULL) {
                        return ENOMEM;
@@ -7096,65 +7166,6 @@ out:
  *  which are specific to the HFS & HFS Plus volume formats
  */
 
-#ifdef __APPLE_API_OBSOLETE
-
-/************************************************/
-/* *** Following calls will be deleted soon *** */
-/************************************************/
-
-/*
- * Make a complex file.  A complex file is one with multiple forks (data streams)
- */
-/* ARGSUSED */
-int
-mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval)
-{
-       return (ENOTSUP);
-}
-
-/*
- * Extended stat call which returns volumeid and vnodeid as well as other info
- */
-/* ARGSUSED */
-int
-statv(__unused proc_t p,
-         __unused struct statv_args *uap,
-         __unused int32_t *retval)
-{
-       return (ENOTSUP);       /*  We'll just return an error for now */
-
-} /* end of statv system call */
-
-/*
-* Extended lstat call which returns volumeid and vnodeid as well as other info
-*/
-/* ARGSUSED */
-int
-lstatv(__unused proc_t p,
-          __unused struct lstatv_args *uap,
-          __unused int32_t *retval)
-{
-       return (ENOTSUP);       /*  We'll just return an error for now */
-} /* end of lstatv system call */
-
-/*
-* Extended fstat call which returns volumeid and vnodeid as well as other info
-*/
-/* ARGSUSED */
-int
-fstatv(__unused proc_t p, 
-          __unused struct fstatv_args *uap, 
-          __unused int32_t *retval)
-{
-       return (ENOTSUP);       /*  We'll just return an error for now */
-} /* end of fstatv system call */
-
-
-/************************************************/
-/* *** Preceding calls will be deleted soon *** */
-/************************************************/
-
-#endif /* __APPLE_API_OBSOLETE */
 
 /*
 * Obtain attribute information on objects in a directory while enumerating
@@ -7421,6 +7432,7 @@ out2:
         return (error);
 }
 
+#if CONFIG_SEARCHFS
 
 /* ARGSUSED */
 
@@ -7638,6 +7650,15 @@ freeandexit:
 
 } /* end of searchfs system call */
 
+#else /* CONFIG_SEARCHFS */
+
+int
+searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
+{
+       return (ENOTSUP);
+}
+
+#endif /* CONFIG_SEARCHFS */
 
 
 lck_grp_attr_t *  nspace_group_attr;
@@ -9221,6 +9242,7 @@ fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
 #endif
        /* Obtain the absolute path to this vnode. */
        bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
+       bpflags |= BUILDPATH_CHECK_MOVED;
        error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
        vnode_put(vp);
        if (error) {
index d7e2b5f1488a6bc89ef1416c23cde2d4b786b0b7..47552a0d8e0a33eb23083bcee7560767b6e26a5b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -111,6 +111,10 @@ int        ubc_setcred(struct vnode *, struct proc *);
 #include <security/mac_framework.h>
 #endif
 
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+
 
 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
@@ -445,7 +449,8 @@ continue_create_lookup:
                        }
 
                        need_vnop_open = !did_open;
-               } else {
+               } 
+               else {
                        if (fmode & O_EXCL)
                                error = EEXIST;
 
@@ -555,6 +560,25 @@ continue_create_lookup:
                        }
                }
 
+#if CONFIG_PROTECT
+               /* 
+                * Perform any content protection access checks prior to calling 
+                * into the filesystem, if the raw encrypted mode was not 
+                * requested.  
+                * 
+                * If the va_dataprotect_flags are NOT active, or if they are,
+                * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need 
+                * to perform the checks.
+                */
+               if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) ||
+                               ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) {
+                       error = cp_handle_open (vp, fmode);     
+                       if (error) {
+                               goto bad;
+                       }
+               }
+#endif
+
                error = VNOP_OPEN(vp, fmode, ctx);
                if (error) {
                        goto bad;
@@ -877,13 +901,18 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
        }
 #endif
 
-       ioflag = 0;
+       /* This signals to VNOP handlers that this read came from a file table read */
+       ioflag = IO_SYSCALL_DISPATCH;
+
        if (fp->f_fglob->fg_flag & FNONBLOCK)
                ioflag |= IO_NDELAY;
        if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
-               ioflag |= IO_NOCACHE;
+           ioflag |= IO_NOCACHE;
+       if (fp->f_fglob->fg_flag & FENCRYPTED) {
+               ioflag |= IO_ENCRYPTED;
+       }
        if (fp->f_fglob->fg_flag & FNORDAHEAD)
-               ioflag |= IO_RAOFF;
+           ioflag |= IO_RAOFF;
 
        if ((flags & FOF_OFFSET) == 0)
                uio->uio_offset = fp->f_fglob->fg_offset;
@@ -931,7 +960,12 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
        }
 #endif
 
-       ioflag = IO_UNIT;
+       /* 
+        * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write originated 
+        * from a file table write.
+        */
+       ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH);
+
        if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
                ioflag |= IO_APPEND;
        if (fp->f_fglob->fg_flag & FNONBLOCK)
@@ -940,6 +974,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
                ioflag |= IO_NOCACHE;
        if (fp->f_fglob->fg_flag & FNODIRECT)
                ioflag |= IO_NODIRECT;
+       if (fp->f_fglob->fg_flag & FSINGLE_WRITER)
+               ioflag |= IO_SINGLE_WRITER;
 
        /*
         * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
@@ -1289,14 +1325,14 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
                                        error = ENXIO;
                                        goto out;
                                }
-                               *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
+                               *(int *)data = D_TYPEMASK & bdevsw[major(vp->v_rdev)].d_type;
 
                        } else if (vp->v_type == VCHR) {
                                if (major(vp->v_rdev) >= nchrdev) {
                                        error = ENXIO;
                                        goto out;
                                }
-                               *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
+                               *(int *)data = D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type;
                        } else {
                                error = ENOTTY;
                                goto out;
index a37ba0f748d88a405820d2e3ca5fc37bd6e10f1e..94715ae558c70306078e6cb4cd901eb8fa3bfe78 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #if NAMEDSTREAMS
 
+static int shadow_sequence;
+
 /*
  * We use %p to prevent loss of precision for pointers on varying architectures.
  */
+
+#define SHADOW_NAME_FMT                ".vfs_rsrc_stream_%p%08x%p"
+#define SHADOW_DIR_FMT         ".vfs_rsrc_streams_%p%x"
+#define SHADOW_DIR_CONTAINER "/var/run"
+
 #define MAKE_SHADOW_NAME(VP, NAME)  \
-       snprintf((NAME), sizeof((NAME)), ".vfs_rsrc_stream_%p%08x%p", (void*)(VP), (VP)->v_id, (VP)->v_data);
+       snprintf((NAME), sizeof((NAME)), (SHADOW_NAME_FMT), \
+                       ((void*)(VM_KERNEL_ADDRPERM(VP))), \
+                       ((VP)->v_id), \
+                       ((void*)(VM_KERNEL_ADDRPERM((VP)->v_data))))
 
-static int shadow_sequence;
+/* The full path to the shadow directory */
+#define MAKE_SHADOW_DIRNAME(VP, NAME)  \
+       snprintf((NAME), sizeof((NAME)), (SHADOW_DIR_CONTAINER "/" SHADOW_DIR_FMT), \
+                       ((void*)(VM_KERNEL_ADDRPERM(VP))), shadow_sequence)
+
+/* The shadow directory as a 'leaf' entry */
+#define MAKE_SHADOW_DIR_LEAF(VP, NAME) \
+       snprintf((NAME), sizeof((NAME)), (SHADOW_DIR_FMT), \
+                       ((void*)(VM_KERNEL_ADDRPERM(VP))), shadow_sequence)
 
 
 static int  default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperation op, vfs_context_t context);
@@ -960,8 +978,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context)
 
 
        bzero(tmpname, sizeof(tmpname));
-       snprintf(tmpname, sizeof(tmpname), "/var/run/.vfs_rsrc_streams_%p%x",
-                       (void*)rootvnode, shadow_sequence);
+       MAKE_SHADOW_DIRNAME(rootvnode, tmpname);
        /* 
         * Look up the shadow directory to ensure that it still exists. 
         * By looking it up, we get an iocounted dvp to use, and avoid some coherency issues
@@ -980,15 +997,21 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context)
        sdvp = NULLVP;
        bzero (tmpname, sizeof(tmpname));
 
-       /* Obtain the vnode for "/var/run" directory. */
-       if (vnode_lookup("/var/run", 0, &dvp, context) != 0) {
+       /* 
+        * Obtain the vnode for "/var/run" directory. 
+        * This is defined in the SHADOW_DIR_CONTAINER macro
+        */
+       if (vnode_lookup(SHADOW_DIR_CONTAINER, 0, &dvp, context) != 0) {
                error = ENOTSUP;
                goto out;
        }
 
-       /* Create the shadow stream directory. */
-       snprintf(tmpname, sizeof(tmpname), ".vfs_rsrc_streams_%p%x",
-                (void*)rootvnode, shadow_sequence);
+       /* 
+        * Create the shadow stream directory.
+        * 'dvp' below suggests the parent directory so 
+        * we only need to provide the leaf entry name
+        */
+       MAKE_SHADOW_DIR_LEAF(rootvnode, tmpname);
        bzero(&cn, sizeof(cn));
        cn.cn_nameiop = LOOKUP;
        cn.cn_flags = ISLASTCN;
index bb2808ecf7fd780aab15a8bc06a5d33f7db87f1f..9df1b810effdd830ec465bda7c6dcfb67a778a8b 100644 (file)
@@ -279,12 +279,10 @@ macx_swapon(
 
 #if CONFIG_PROTECT
        {
-               void *cnode = NULL;
                /* initialize content protection keys manually */
-               if ((cnode = cp_get_protected_cnode(vp)) != 0) {
-                       if ((error = cp_handle_vnop(cnode, CP_WRITE_ACCESS)) != 0)
-                               goto swapon_bailout;
-               }
+               if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+                       goto swapon_bailout;
+               }
        }
 #endif
 
@@ -474,21 +472,12 @@ macx_swapoff(
 
        ut = get_bsdthread_info(current_thread());
 
-#if !CONFIG_EMBEDDED
        orig_iopol_disk = proc_get_thread_selfdiskacc();
        proc_apply_thread_selfdiskacc(IOPOL_THROTTLE);
-#else /* !CONFIG_EMBEDDED */
-       orig_iopol_disk = ut->uu_iopol_disk;
-       ut->uu_iopol_disk = IOPOL_THROTTLE;
-#endif /* !CONFIG_EMBEDDED */
 
        kr = default_pager_backing_store_delete(backing_store);
 
-#if !CONFIG_EMBEDDED
        proc_apply_thread_selfdiskacc(orig_iopol_disk);
-#else /* !CONFIG_EMBEDDED */
-       ut->uu_iopol_disk = orig_iopol_disk;
-#endif /* !CONFIG_EMBEDDED */
 
        switch (kr) {
                case KERN_SUCCESS:
index 0190e70f73081b00b99d15b1e3cf600c18c2c991..0dd7823ce602e6598fe2245ecd5c3e3f875f0ff7 100644 (file)
@@ -77,6 +77,7 @@
 #include <sys/sysctl.h>
 #include <sys/cprotect.h>
 #include <sys/kpi_socket.h>
+#include <sys/kas_info.h>
 
 #include <security/audit/audit.h>
 #include <security/mac.h>
@@ -94,9 +95,7 @@
 
 #include <vm/vm_protos.h>
 
-#if CONFIG_FREEZE
 #include <sys/kern_memorystatus.h>
-#endif
 
 
 int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*); 
@@ -474,7 +473,7 @@ task_for_pid_posix_check(proc_t target)
        int allowed; 
 
        /* No task_for_pid on bad targets */
-       if (target == PROC_NULL || target->p_stat == SZOMB) {
+       if (target->p_stat == SZOMB) {
                return FALSE;
        }
 
@@ -573,9 +572,13 @@ task_for_pid(
 
 
        p = proc_find(pid);
+       if (p == PROC_NULL) {
+               error = KERN_FAILURE;
+               goto tfpout;
+       }
+
 #if CONFIG_AUDIT
-       if (p != PROC_NULL)
-               AUDIT_ARG(process, p);
+       AUDIT_ARG(process, p);
 #endif
 
        if (!(task_for_pid_posix_check(p))) {
@@ -745,6 +748,11 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
        }
 
        targetproc = proc_find(pid);
+       if (targetproc == PROC_NULL) {
+               error = ESRCH;
+               goto out;
+       }
+
        if (!task_for_pid_posix_check(targetproc)) {
                error = EPERM;
                goto out;
@@ -781,7 +789,7 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
 #endif
 
        task_reference(target);
-       error = task_suspend(target);
+       error = task_pidsuspend(target);
        if (error) {
                if (error == KERN_INVALID_ARGUMENT) {
                        error = EINVAL;
@@ -789,12 +797,14 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
                        error = EPERM;
                }
        }
-       task_deallocate(target);
-
-#if CONFIG_FREEZE
-       kern_hibernation_on_pid_suspend(pid);
+#if CONFIG_MEMORYSTATUS
+       else {
+       memorystatus_on_suspend(pid);
+    }
 #endif
 
+       task_deallocate(target);
+
 out:
        if (targetproc != PROC_NULL)
                proc_rele(targetproc);
@@ -824,6 +834,11 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
        }
 
        targetproc = proc_find(pid);
+       if (targetproc == PROC_NULL) {
+               error = ESRCH;
+               goto out;
+       }
+
        if (!task_for_pid_posix_check(targetproc)) {
                error = EPERM;
                goto out;
@@ -861,11 +876,11 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
 
        task_reference(target);
 
-#if CONFIG_FREEZE
-       kern_hibernation_on_pid_resume(pid, target);
+#if CONFIG_MEMORYSTATUS
+       memorystatus_on_resume(pid);
 #endif
 
-       error = task_resume(target);
+       error = task_pidresume(target);
        if (error) {
                if (error == KERN_INVALID_ARGUMENT) {
                        error = EINVAL;
@@ -873,15 +888,15 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
                        error = EPERM;
                }
        }
+       
        task_deallocate(target);
 
 out:
        if (targetproc != PROC_NULL)
                proc_rele(targetproc);
+       
        *ret = error;
        return error;
-
-       return 0;
 }
 
 #if CONFIG_EMBEDDED
@@ -905,14 +920,19 @@ pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret
 #endif
 
        /*
-        * The only accepted pid value here is currently -1, since we just kick off the hibernation thread
+        * The only accepted pid value here is currently -1, since we just kick off the freeze thread
         * here - individual ids aren't required. However, it's intended that that this call is to change
-        * in the future to initiate hibernation of individual processes. In anticipation, we'll obtain the
+        * in the future to initiate freeze of individual processes. In anticipation, we'll obtain the
         * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
         * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
         */
        if (pid >= 0) {
                targetproc = proc_find(pid);
+               if (targetproc == PROC_NULL) {
+                       error = ESRCH;
+                       goto out;
+               }
+
                if (!task_for_pid_posix_check(targetproc)) {
                        error = EPERM;
                        goto out;
@@ -920,7 +940,7 @@ pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret
        }
 
        if (pid == -1) {
-               kern_hibernation_on_pid_hibernate(pid);
+               memorystatus_on_inactivity(pid);
        } else {
                error = EPERM;
        }
@@ -962,6 +982,11 @@ pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *
 #endif
 
        targetproc = proc_find(pid);
+       if (targetproc == PROC_NULL) {
+               error = ESRCH;
+               goto out;
+       }
+
        if (!task_for_pid_posix_check(targetproc)) {
                error = EPERM;
                goto out;
@@ -1075,7 +1100,7 @@ shared_region_check_np(
        __unused int                            *retvalp)
 {
        vm_shared_region_t      shared_region;
-       mach_vm_offset_t        start_address;
+       mach_vm_offset_t        start_address = 0;
        int                     error;
        kern_return_t           kr;
 
@@ -1248,12 +1273,10 @@ _shared_region_map(
 #if CONFIG_PROTECT
        /* check for content protection access */
        {
-       void *cnode;
-       if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
-               error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
-               if (error) 
+               error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
+               if (error) { 
                        goto done;
-       }
+               }
        }
 #endif /* CONFIG_PROTECT */
 
@@ -1442,7 +1465,7 @@ _shared_region_slide(uint32_t slide,
        if (slide_info_entry == NULL){
                error = EFAULT;
        } else {        
-               error = copyin(slide_start,
+               error = copyin((user_addr_t)slide_start,
                               slide_info_entry,
                               (vm_size_t)slide_size);
        }
@@ -1482,20 +1505,22 @@ shared_region_map_and_slide_np(
 #define SFM_MAX_STACK  8
        struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
 
+       /* Is the process chrooted?? */
+       if (p->p_fd->fd_rdir != NULL) {
+               kr = EINVAL;
+               goto done;
+       }
+               
        if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
                if (kr == KERN_INVALID_ARGUMENT) {
                        /*
                         * This will happen if we request sliding again 
                         * with the same slide value that was used earlier
-                        * for the very first sliding. We continue through
-                        * to the mapping layer. This is so that we can be
-                        * absolutely certain that the same mappings have
-                        * been requested.
+                        * for the very first sliding.
                         */
                        kr = KERN_SUCCESS;
-               } else {
-                       goto done;
                }
+               goto done;
        }
 
        if (mappings_count == 0) {
@@ -1603,6 +1628,66 @@ SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
           &vm_page_stats_reusable.can_reuse_failure, "");
 
 
+extern unsigned int vm_page_free_count, vm_page_speculative_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
+
+extern unsigned int vm_page_cleaned_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
+
+/* pageout counts */
+extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
+extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
+
+extern unsigned int vm_pageout_freed_from_cleaned;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
+
+/* counts of pages entering the cleaned queue */
+extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
+SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
+
+/* counts of pages leaving the cleaned queue */
+extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
+
+#include <kern/thread.h>
+#include <sys/user.h>
+
+void vm_pageout_io_throttle(void);
+
+void vm_pageout_io_throttle(void) {
+       struct uthread *uthread = get_bsdthread_info(current_thread());
+               /*
+                * thread is marked as a low priority I/O type
+                * and the I/O we issued while in this cleaning operation
+                * collided with normal I/O operations... we'll
+                * delay in order to mitigate the impact of this
+                * task on the normal operation of the system
+                */
+
+       if (uthread->uu_lowpri_window) {
+               throttle_lowpri_io(TRUE);
+       }
+
+}
+
 int
 vm_pressure_monitor(
        __unused struct proc *p,
@@ -1639,3 +1724,77 @@ vm_pressure_monitor(
        *retval = (int) pages_wanted;
        return 0;
 }
+
+int
+kas_info(struct proc *p,
+                         struct kas_info_args *uap,
+                         int *retval __unused)
+{
+#ifdef SECURE_KERNEL
+       (void)p;
+       (void)uap;
+       return ENOTSUP;
+#else /* !SECURE_KERNEL */
+       int                     selector = uap->selector;
+       user_addr_t     valuep = uap->value;
+       user_addr_t     sizep = uap->size;
+       user_size_t size;
+       int                     error;
+
+       if (!kauth_cred_issuser(kauth_cred_get())) {
+               return EPERM;
+       }
+
+#if CONFIG_MACF
+       error = mac_system_check_kas_info(kauth_cred_get(), selector);
+       if (error) {
+               return error;
+       }
+#endif
+
+       if (IS_64BIT_PROCESS(p)) {
+               user64_size_t size64;
+               error = copyin(sizep, &size64, sizeof(size64));
+               size = (user_size_t)size64;
+       } else {
+               user32_size_t size32;
+               error = copyin(sizep, &size32, sizeof(size32));
+               size = (user_size_t)size32;
+       }
+       if (error) {
+               return error;
+       }
+
+       switch (selector) {
+               case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
+                       {
+                               uint64_t slide = vm_kernel_slide;
+
+                               if (sizeof(slide) != size) {
+                                       return EINVAL;
+                               }
+                               
+                               if (IS_64BIT_PROCESS(p)) {
+                                       user64_size_t size64 = (user64_size_t)size;
+                                       error = copyout(&size64, sizep, sizeof(size64));
+                               } else {
+                                       user32_size_t size32 = (user32_size_t)size;
+                                       error = copyout(&size32, sizep, sizeof(size32));
+                               }
+                               if (error) {
+                                       return error;
+                               }
+                               
+                               error = copyout(&slide, valuep, sizeof(slide));
+                               if (error) {
+                                       return error;
+                               }
+                       }
+                       break;
+               default:
+                       return EINVAL;
+       }
+
+       return 0;
+#endif /* !SECURE_KERNEL */
+}
index d12a65652de5d5d05018d5b2482dd018059f2b90..f86ba0148f42e2d61b22a07672f8adf17919d25f 100644 (file)
@@ -280,15 +280,17 @@ vnode_pageout(struct vnode *vp,
                 * just go ahead and call vnop_pageout since
                 * it has already sorted out the dirty ranges
                 */
-               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 
-                                     size, 1, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       (MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 
+                       size, 1, 0, 0, 0);
 
                if ( (error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
                                               (size_t)size, flags, ctx)) )
                        result = PAGER_ERROR;
 
-               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 
-                                     size, 1, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       (MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 
+                       size, 1, 0, 0, 0);
 
                goto out;
        }
@@ -303,15 +305,17 @@ vnode_pageout(struct vnode *vp,
                         * via 'f_offset' and 'size' into a UPL... this allows the filesystem to first
                         * take any locks it needs, before effectively locking the pages into a UPL...
                         */
-                       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 
-                                             size, (int)f_offset, 0, 0, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                               (MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 
+                               size, (int)f_offset, 0, 0, 0);
 
                        if ( (error_ret = VNOP_PAGEOUT(vp, NULL, upl_offset, (off_t)f_offset,
                                                       size, flags, ctx)) ) {
                                result = PAGER_ERROR;
                        }
-                       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 
-                                             size, 0, 0, 0, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               (MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 
+                               size, 0, 0, 0, 0);
 
                        goto out;
                }
@@ -461,8 +465,9 @@ vnode_pageout(struct vnode *vp,
                }
                xsize = num_of_pages * PAGE_SIZE;
 
-               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 
-                                     xsize, (int)f_offset, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       (MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 
+                       xsize, (int)f_offset, 0, 0, 0);
 
                if ( (error = VNOP_PAGEOUT(vp, upl, offset, (off_t)f_offset,
                                           xsize, flags, ctx)) ) {
@@ -470,8 +475,9 @@ vnode_pageout(struct vnode *vp,
                                error_ret = error;
                        result = PAGER_ERROR;
                }
-               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 
-                                     xsize, 0, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       (MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 
+                       xsize, 0, 0, 0, 0);
 
                f_offset += xsize;
                offset   += xsize;
@@ -554,6 +560,8 @@ vnode_pagein(
                        error = PAGER_ABSENT;
                        goto out;
                }
+               ubc_upl_range_needed(upl, upl_offset / PAGE_SIZE, 1);
+
                upl_offset = 0;
                first_pg = 0;
                
index ebb5af5db8eb0e59e83c0112221c21385e5875ca..a424e367bb3dc076e687ddde3962970fda857903 100644 (file)
@@ -389,6 +389,7 @@ _mbuf_gethdr
 _mbuf_getpacket
 _mbuf_inbound_modified
 _mbuf_inet_cksum
+_mbuf_is_traffic_class_privileged
 _mbuf_leadingspace
 _mbuf_maxlen
 _mbuf_mclget
index 2ad8e78c96de491913347fb8bc850522502c0406..e78f56919ff60eb000509403e8ac90ab02d046c5 100644 (file)
@@ -3,15 +3,12 @@ _IOBSDNameMatching
 _IOBSDRegistryEntryForDeviceTree
 _IOBSDRegistryEntryGetData
 _IOBSDRegistryEntryRelease
-_IOCDMatching
 _IOCreateThread
 _IODTFreeLoaderInfo
 _IODTGetLoaderInfo
 _IODelay
-_IODiskMatching
 _IOExitThread
 _IOFindBSDRoot
-_IOFindMatchingChild
 _IOFindNameForValue
 _IOFindValueForName
 _IOFlushProcessorCache
@@ -54,9 +51,7 @@ _IOMapperInsertPPNPages
 _IOMapperInsertPage
 _IOMapperInsertUPLPages
 _IONDRVLibrariesInitialize
-_IONetworkMatching
 _IONetworkNamePrefixMatching
-_IOOFPathMatching
 _IOPageableMapForAddress
 _IOPause
 _IOPrintPlane
@@ -87,9 +82,7 @@ _IOSimpleLockTryLock:_lck_spin_try_lock
 _IOSimpleLockUnlock:_lck_spin_unlock
 _IOSizeToAlignment
 _IOSleep
-_IOSpinUnlock
 _IOSystemShutdownNotification
-_IOTrySpinLock
 _IOZeroTvalspec
 _OSKernelStackRemaining
 _OSPrintMemory
@@ -143,9 +136,7 @@ __ZN10IONotifier10superClassE
 __ZN10IONotifier9MetaClassC1Ev
 __ZN10IONotifier9MetaClassC2Ev
 __ZN10IONotifier9metaClassE
-__ZN10IONotifierC1EPK11OSMetaClass
 __ZN10IONotifierC2EPK11OSMetaClass
-__ZN10IONotifierD0Ev
 __ZN10IONotifierD2Ev
 __ZN10IOWorkLoop10gMetaClassE
 __ZN10IOWorkLoop10superClassE
@@ -157,11 +148,6 @@ __ZN10IOWorkLoop14addEventSourceEP13IOEventSource
 __ZN10IOWorkLoop15runEventSourcesEv
 __ZN10IOWorkLoop17removeEventSourceEP13IOEventSource
 __ZN10IOWorkLoop19signalWorkAvailableEv
-__ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev
-__ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev
-__ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev
-__ZN10IOWorkLoop20_RESERVEDIOWorkLoop6Ev
-__ZN10IOWorkLoop20_RESERVEDIOWorkLoop7Ev
 __ZN10IOWorkLoop4freeEv
 __ZN10IOWorkLoop4initEv
 __ZN10IOWorkLoop8openGateEv
@@ -222,14 +208,6 @@ __ZN11IOMemoryMap14getAddressTaskEv
 __ZN11IOMemoryMap17getVirtualAddressEv
 __ZN11IOMemoryMap18getPhysicalAddressEv
 __ZN11IOMemoryMap19getMemoryDescriptorEv
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap0Ev
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap1Ev
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap2Ev
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap3Ev
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap4Ev
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap5Ev
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap6Ev
-__ZN11IOMemoryMap21_RESERVEDIOMemoryMap7Ev
 __ZN11IOMemoryMap5unmapEv
 __ZN11IOMemoryMap9MetaClassC1Ev
 __ZN11IOMemoryMap9MetaClassC2Ev
@@ -263,19 +241,6 @@ __ZN12IODMACommand12getAlignmentEv
 __ZN12IODMACommand17getNumAddressBitsEv
 __ZN12IODMACommand19setMemoryDescriptorEPK18IOMemoryDescriptorb
 __ZN12IODMACommand21clearMemoryDescriptorEb
-__ZN12IODMACommand22_RESERVEDIODMACommand3Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand4Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand5Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand6Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand7Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand8Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand9Ev
-__ZN12IODMACommand23_RESERVEDIODMACommand10Ev
-__ZN12IODMACommand23_RESERVEDIODMACommand11Ev
-__ZN12IODMACommand23_RESERVEDIODMACommand12Ev
-__ZN12IODMACommand23_RESERVEDIODMACommand13Ev
-__ZN12IODMACommand23_RESERVEDIODMACommand14Ev
-__ZN12IODMACommand23_RESERVEDIODMACommand15Ev
 __ZN12IODMACommand26getPreparedOffsetAndLengthEPyS0_
 __ZN12IODMACommand4freeEv
 __ZN12IODMACommand7prepareEyybb
@@ -333,22 +298,8 @@ __ZN12IOUserClient17setAsyncReferenceEPjP8ipc_portPvS3_
 __ZN12IOUserClient18clientHasPrivilegeEPvPKc
 __ZN12IOUserClient20exportObjectToClientEP4taskP8OSObjectPS3_
 __ZN12IOUserClient21destroyUserReferencesEP8OSObject
-__ZN12IOUserClient22_RESERVEDIOUserClient2Ev
-__ZN12IOUserClient22_RESERVEDIOUserClient3Ev
-__ZN12IOUserClient22_RESERVEDIOUserClient4Ev
-__ZN12IOUserClient22_RESERVEDIOUserClient5Ev
-__ZN12IOUserClient22_RESERVEDIOUserClient6Ev
-__ZN12IOUserClient22_RESERVEDIOUserClient7Ev
-__ZN12IOUserClient22_RESERVEDIOUserClient8Ev
-__ZN12IOUserClient22_RESERVEDIOUserClient9Ev
-__ZN12IOUserClient23_RESERVEDIOUserClient10Ev
-__ZN12IOUserClient23_RESERVEDIOUserClient11Ev
-__ZN12IOUserClient23_RESERVEDIOUserClient12Ev
-__ZN12IOUserClient23_RESERVEDIOUserClient13Ev
-__ZN12IOUserClient23_RESERVEDIOUserClient14Ev
-__ZN12IOUserClient23_RESERVEDIOUserClient15Ev
-__ZN12IOUserClient23releaseNotificationPortEP8ipc_port
 __ZN12IOUserClient23releaseAsyncReference64EPy
+__ZN12IOUserClient23releaseNotificationPortEP8ipc_port
 __ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor
 __ZN12IOUserClient4freeEv
 __ZN12IOUserClient4initEP12OSDictionary
@@ -368,13 +319,6 @@ __ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop
 __ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_
 __ZN13IOCommandGate13commandWakeupEPvb
 __ZN13IOCommandGate14attemptCommandEPvS0_S0_S0_
-__ZN13IOCommandGate23_RESERVEDIOCommandGate1Ev
-__ZN13IOCommandGate23_RESERVEDIOCommandGate2Ev
-__ZN13IOCommandGate23_RESERVEDIOCommandGate3Ev
-__ZN13IOCommandGate23_RESERVEDIOCommandGate4Ev
-__ZN13IOCommandGate23_RESERVEDIOCommandGate5Ev
-__ZN13IOCommandGate23_RESERVEDIOCommandGate6Ev
-__ZN13IOCommandGate23_RESERVEDIOCommandGate7Ev
 __ZN13IOCommandGate4freeEv
 __ZN13IOCommandGate4initEP8OSObjectPFiS1_PvS2_S2_S2_E
 __ZN13IOCommandGate6enableEv
@@ -397,14 +341,6 @@ __ZN13IOCommandPool13returnCommandEP9IOCommand
 __ZN13IOCommandPool15gatedGetCommandEPP9IOCommandb
 __ZN13IOCommandPool16initWithWorkLoopEP10IOWorkLoop
 __ZN13IOCommandPool18gatedReturnCommandEP9IOCommand
-__ZN13IOCommandPool23_RESERVEDIOCommandPool0Ev
-__ZN13IOCommandPool23_RESERVEDIOCommandPool1Ev
-__ZN13IOCommandPool23_RESERVEDIOCommandPool2Ev
-__ZN13IOCommandPool23_RESERVEDIOCommandPool3Ev
-__ZN13IOCommandPool23_RESERVEDIOCommandPool4Ev
-__ZN13IOCommandPool23_RESERVEDIOCommandPool5Ev
-__ZN13IOCommandPool23_RESERVEDIOCommandPool6Ev
-__ZN13IOCommandPool23_RESERVEDIOCommandPool7Ev
 __ZN13IOCommandPool4freeEv
 __ZN13IOCommandPool9MetaClassC1Ev
 __ZN13IOCommandPool9MetaClassC2Ev
@@ -419,16 +355,9 @@ __ZN13IOEventSource10gMetaClassE
 __ZN13IOEventSource10superClassE
 __ZN13IOEventSource10wakeupGateEPvb
 __ZN13IOEventSource11setWorkLoopEP10IOWorkLoop
+__ZN13IOEventSource12checkForWorkEv
 __ZN13IOEventSource12tryCloseGateEv
 __ZN13IOEventSource19signalWorkAvailableEv
-__ZN13IOEventSource23_RESERVEDIOEventSource0Ev
-__ZN13IOEventSource23_RESERVEDIOEventSource1Ev
-__ZN13IOEventSource23_RESERVEDIOEventSource2Ev
-__ZN13IOEventSource23_RESERVEDIOEventSource3Ev
-__ZN13IOEventSource23_RESERVEDIOEventSource4Ev
-__ZN13IOEventSource23_RESERVEDIOEventSource5Ev
-__ZN13IOEventSource23_RESERVEDIOEventSource6Ev
-__ZN13IOEventSource23_RESERVEDIOEventSource7Ev
 __ZN13IOEventSource4freeEv
 __ZN13IOEventSource4initEP8OSObjectPFvS1_zE
 __ZN13IOEventSource6enableEv
@@ -440,7 +369,6 @@ __ZN13IOEventSource9MetaClassC2Ev
 __ZN13IOEventSource9closeGateEv
 __ZN13IOEventSource9metaClassE
 __ZN13IOEventSource9setActionEPFvP8OSObjectzE
-__ZN13IOEventSource12checkForWorkEv
 __ZN13IOEventSourceC1EPK11OSMetaClass
 __ZN13IOEventSourceC2EPK11OSMetaClass
 __ZN13IOEventSourceD0Ev
@@ -484,12 +412,12 @@ __ZN14IOPMrootDomain14tellChangeDownEm
 __ZN14IOPMrootDomain15powerChangeDoneEm
 __ZN14IOPMrootDomain16tellNoChangeDownEm
 __ZN14IOPMrootDomain17createPMAssertionEyjP9IOServicePKc
-__ZN14IOPMrootDomain18releasePMAssertionEy
-__ZN14IOPMrootDomain19getPMAssertionLevelEy
-__ZN14IOPMrootDomain19setPMAssertionLevelEyj
 __ZN14IOPMrootDomain17getSleepSupportedEv
 __ZN14IOPMrootDomain17setAggressivenessEmm
 __ZN14IOPMrootDomain18changePowerStateToEm
+__ZN14IOPMrootDomain18releasePMAssertionEy
+__ZN14IOPMrootDomain19getPMAssertionLevelEy
+__ZN14IOPMrootDomain19setPMAssertionLevelEyj
 __ZN14IOPMrootDomain22changePowerStateToPrivEm
 __ZN14IOPMrootDomain22removePublishedFeatureEj
 __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm
@@ -535,9 +463,7 @@ __ZN15IODMAController5startEP9IOService
 __ZN15IODMAController9MetaClassC1Ev
 __ZN15IODMAController9MetaClassC2Ev
 __ZN15IODMAController9metaClassE
-__ZN15IODMAControllerC1EPK11OSMetaClass
 __ZN15IODMAControllerC2EPK11OSMetaClass
-__ZN15IODMAControllerD0Ev
 __ZN15IODMAControllerD2Ev
 __ZN15IOPMPowerSource10cycleCountEv
 __ZN15IOPMPowerSource10gMetaClassE
@@ -625,32 +551,6 @@ __ZN15IORegistryEntry17runPropertyActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2
 __ZN15IORegistryEntry18getGenerationCountEv
 __ZN15IORegistryEntry18getRegistryEntryIDEv
 __ZN15IORegistryEntry21getChildFromComponentEPPKcPK15IORegistryPlane
-__ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev
-__ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev
-__ZN15IORegistryEntry25_RESERVEDIORegistryEntry8Ev
-__ZN15IORegistryEntry25_RESERVEDIORegistryEntry9Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry10Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry11Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry12Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry13Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry14Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry15Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry16Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry17Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry18Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry19Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry20Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry21Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry22Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry23Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry24Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry25Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry26Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry27Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry28Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry29Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry30Ev
-__ZN15IORegistryEntry26_RESERVEDIORegistryEntry31Ev
 __ZN15IORegistryEntry4freeEv
 __ZN15IORegistryEntry4initEP12OSDictionary
 __ZN15IORegistryEntry4initEPS_PK15IORegistryPlane
@@ -753,22 +653,6 @@ __ZN17IOBigMemoryCursorD0Ev
 __ZN17IOBigMemoryCursorD2Ev
 __ZN17IOPolledInterface10gMetaClassE
 __ZN17IOPolledInterface15checkAllForWorkEv
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface0Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface1Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface2Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface3Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface4Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface5Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface6Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface7Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface8Ev
-__ZN17IOPolledInterface27_RESERVEDIOPolledInterface9Ev
-__ZN17IOPolledInterface28_RESERVEDIOPolledInterface10Ev
-__ZN17IOPolledInterface28_RESERVEDIOPolledInterface11Ev
-__ZN17IOPolledInterface28_RESERVEDIOPolledInterface12Ev
-__ZN17IOPolledInterface28_RESERVEDIOPolledInterface13Ev
-__ZN17IOPolledInterface28_RESERVEDIOPolledInterface14Ev
-__ZN17IOPolledInterface28_RESERVEDIOPolledInterface15Ev
 __ZN17IOPolledInterfaceC2EPK11OSMetaClass
 __ZN17IOPolledInterfaceD2Ev
 __ZN17IOPowerConnection10gMetaClassE
@@ -799,14 +683,6 @@ __ZN17IOPowerConnectionD2Ev
 __ZN17IOSharedDataQueue10gMetaClassE
 __ZN17IOSharedDataQueue10superClassE
 __ZN17IOSharedDataQueue19getMemoryDescriptorEv
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue0Ev
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue1Ev
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue2Ev
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue3Ev
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue4Ev
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue5Ev
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue6Ev
-__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue7Ev
 __ZN17IOSharedDataQueue4freeEv
 __ZN17IOSharedDataQueue4peekEv
 __ZN17IOSharedDataQueue9MetaClassC1Ev
@@ -832,9 +708,7 @@ __ZN18IOMemoryDescriptor8redirectEP4taskb
 __ZN18IOMemoryDescriptor9MetaClassC1Ev
 __ZN18IOMemoryDescriptor9MetaClassC2Ev
 __ZN18IOMemoryDescriptor9metaClassE
-__ZN18IOMemoryDescriptorC1EPK11OSMetaClass
 __ZN18IOMemoryDescriptorC2EPK11OSMetaClass
-__ZN18IOMemoryDescriptorD0Ev
 __ZN18IOMemoryDescriptorD2Ev
 __ZN18IORegistryIterator10enterEntryEPK15IORegistryPlane
 __ZN18IORegistryIterator10enterEntryEv
@@ -864,14 +738,6 @@ __ZN18IOTimerEventSource11setWorkLoopEP10IOWorkLoop
 __ZN18IOTimerEventSource13cancelTimeoutEv
 __ZN18IOTimerEventSource14setTimeoutFuncEv
 __ZN18IOTimerEventSource16timerEventSourceEP8OSObjectPFvS1_PS_E
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource0Ev
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource1Ev
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource2Ev
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource3Ev
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource4Ev
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource5Ev
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource6Ev
-__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource7Ev
 __ZN18IOTimerEventSource4freeEv
 __ZN18IOTimerEventSource4initEP8OSObjectPFvS1_PS_E
 __ZN18IOTimerEventSource6enableEv
@@ -896,9 +762,7 @@ __ZN18IOUserNotification7isValidEv
 __ZN18IOUserNotification9MetaClassC1Ev
 __ZN18IOUserNotification9MetaClassC2Ev
 __ZN18IOUserNotification9metaClassE
-__ZN18IOUserNotificationC1EPK11OSMetaClass
 __ZN18IOUserNotificationC2EPK11OSMetaClass
-__ZN18IOUserNotificationD0Ev
 __ZN18IOUserNotificationD2Ev
 __ZN18_IOServiceNotifier10gMetaClassE
 __ZN18_IOServiceNotifier10superClassE
@@ -968,12 +832,6 @@ __ZN21IOInterruptController16getInterruptTypeEP9IOServiceiPi
 __ZN21IOInterruptController17registerInterruptEP9IOServiceiPvPFvS2_S2_S2_iES2_
 __ZN21IOInterruptController19unregisterInterruptEP9IOServicei
 __ZN21IOInterruptController26getInterruptHandlerAddressEv
-__ZN21IOInterruptController31_RESERVEDIOInterruptController0Ev
-__ZN21IOInterruptController31_RESERVEDIOInterruptController1Ev
-__ZN21IOInterruptController31_RESERVEDIOInterruptController2Ev
-__ZN21IOInterruptController31_RESERVEDIOInterruptController3Ev
-__ZN21IOInterruptController31_RESERVEDIOInterruptController4Ev
-__ZN21IOInterruptController31_RESERVEDIOInterruptController5Ev
 __ZN21IOInterruptController9MetaClassC1Ev
 __ZN21IOInterruptController9MetaClassC2Ev
 __ZN21IOInterruptController9metaClassE
@@ -1014,14 +872,6 @@ __ZN22IOInterruptEventSource17interruptOccurredEPvP9IOServicei
 __ZN22IOInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_PS_iEP9IOServicei
 __ZN22IOInterruptEventSource23normalInterruptOccurredEPvP9IOServicei
 __ZN22IOInterruptEventSource24disableInterruptOccurredEPvP9IOServicei
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource0Ev
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource1Ev
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource2Ev
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource3Ev
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource4Ev
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource5Ev
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource6Ev
-__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource7Ev
 __ZN22IOInterruptEventSource4freeEv
 __ZN22IOInterruptEventSource4initEP8OSObjectPFvS1_PS_iEP9IOServicei
 __ZN22IOInterruptEventSource6enableEv
@@ -1115,10 +965,6 @@ __ZN27IOSharedInterruptController17registerInterruptEP9IOServiceiPvPFvS2_S2_S2_i
 __ZN27IOSharedInterruptController19unregisterInterruptEP9IOServicei
 __ZN27IOSharedInterruptController23initInterruptControllerEP21IOInterruptControllerP6OSData
 __ZN27IOSharedInterruptController26getInterruptHandlerAddressEv
-__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController0Ev
-__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController1Ev
-__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController2Ev
-__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController3Ev
 __ZN27IOSharedInterruptController9MetaClassC1Ev
 __ZN27IOSharedInterruptController9MetaClassC2Ev
 __ZN27IOSharedInterruptController9metaClassE
@@ -1135,14 +981,6 @@ __ZN28IOFilterInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_P22IOIn
 __ZN28IOFilterInterruptEventSource23normalInterruptOccurredEPvP9IOServicei
 __ZN28IOFilterInterruptEventSource24disableInterruptOccurredEPvP9IOServicei
 __ZN28IOFilterInterruptEventSource26filterInterruptEventSourceEP8OSObjectPFvS1_P22IOInterruptEventSourceiEPFbS1_PS_EP9IOServicei
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource0Ev
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource1Ev
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource2Ev
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource3Ev
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource4Ev
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource5Ev
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource6Ev
-__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource7Ev
 __ZN28IOFilterInterruptEventSource4initEP8OSObjectPFvS1_P22IOInterruptEventSourceiEP9IOServicei
 __ZN28IOFilterInterruptEventSource4initEP8OSObjectPFvS1_P22IOInterruptEventSourceiEPFbS1_PS_EP9IOServicei
 __ZN28IOFilterInterruptEventSource9MetaClassC1Ev
@@ -1169,19 +1007,6 @@ __ZN29IOInterleavedMemoryDescriptorD2Ev
 __ZN8IOMapper10gMetaClassE
 __ZN8IOMapper10superClassE
 __ZN8IOMapper17setMapperRequiredEb
-__ZN8IOMapper18_RESERVEDIOMapper3Ev
-__ZN8IOMapper18_RESERVEDIOMapper4Ev
-__ZN8IOMapper18_RESERVEDIOMapper5Ev
-__ZN8IOMapper18_RESERVEDIOMapper6Ev
-__ZN8IOMapper18_RESERVEDIOMapper7Ev
-__ZN8IOMapper18_RESERVEDIOMapper8Ev
-__ZN8IOMapper18_RESERVEDIOMapper9Ev
-__ZN8IOMapper19_RESERVEDIOMapper10Ev
-__ZN8IOMapper19_RESERVEDIOMapper11Ev
-__ZN8IOMapper19_RESERVEDIOMapper12Ev
-__ZN8IOMapper19_RESERVEDIOMapper13Ev
-__ZN8IOMapper19_RESERVEDIOMapper14Ev
-__ZN8IOMapper19_RESERVEDIOMapper15Ev
 __ZN8IOMapper19copyMapperForDeviceEP9IOService
 __ZN8IOMapper19waitForSystemMapperEv
 __ZN8IOMapper4freeEv
@@ -1190,28 +1015,8 @@ __ZN8IOMapper7gSystemE
 __ZN8IOMapper9MetaClassC1Ev
 __ZN8IOMapper9MetaClassC2Ev
 __ZN8IOMapper9metaClassE
-__ZN8IOMapperC1EPK11OSMetaClass
 __ZN8IOMapperC2EPK11OSMetaClass
-__ZN8IOMapperD0Ev
 __ZN8IOMapperD2Ev
-__ZN8IOSyncer10gMetaClassE
-__ZN8IOSyncer10superClassE
-__ZN8IOSyncer13privateSignalEv
-__ZN8IOSyncer4freeEv
-__ZN8IOSyncer4initEb
-__ZN8IOSyncer4waitEb
-__ZN8IOSyncer6createEb
-__ZN8IOSyncer6reinitEv
-__ZN8IOSyncer6signalEib
-__ZN8IOSyncer9MetaClassC1Ev
-__ZN8IOSyncer9MetaClassC2Ev
-__ZN8IOSyncer9metaClassE
-__ZN8IOSyncerC1EPK11OSMetaClass
-__ZN8IOSyncerC1Ev
-__ZN8IOSyncerC2EPK11OSMetaClass
-__ZN8IOSyncerC2Ev
-__ZN8IOSyncerD0Ev
-__ZN8IOSyncerD2Ev
 __ZN9IOCommand10gMetaClassE
 __ZN9IOCommand10superClassE
 __ZN9IOCommand4initEv
@@ -1292,53 +1097,12 @@ __ZN9IOService18getResourceServiceEv
 __ZN9IOService18lockForArbitrationEb
 __ZN9IOService18matchPropertyTableEP12OSDictionary
 __ZN9IOService18setIdleTimerPeriodEm
-__ZN9IOService19_RESERVEDIOService6Ev
-__ZN9IOService19_RESERVEDIOService7Ev
-__ZN9IOService19_RESERVEDIOService8Ev
-__ZN9IOService19_RESERVEDIOService9Ev
+__ZN9IOService19copyMatchingServiceEP12OSDictionary
 __ZN9IOService19getMatchingServicesEP12OSDictionary
 __ZN9IOService19powerOverrideOnPrivEv
 __ZN9IOService19registerPowerDriverEPS_P14IOPMPowerStatem
 __ZN9IOService19start_PM_idle_timerEv
 __ZN9IOService19unregisterInterruptEi
-__ZN9IOService20_RESERVEDIOService10Ev
-__ZN9IOService20_RESERVEDIOService11Ev
-__ZN9IOService20_RESERVEDIOService12Ev
-__ZN9IOService20_RESERVEDIOService13Ev
-__ZN9IOService20_RESERVEDIOService14Ev
-__ZN9IOService20_RESERVEDIOService15Ev
-__ZN9IOService20_RESERVEDIOService16Ev
-__ZN9IOService20_RESERVEDIOService17Ev
-__ZN9IOService20_RESERVEDIOService18Ev
-__ZN9IOService20_RESERVEDIOService19Ev
-__ZN9IOService20_RESERVEDIOService20Ev
-__ZN9IOService20_RESERVEDIOService21Ev
-__ZN9IOService20_RESERVEDIOService22Ev
-__ZN9IOService20_RESERVEDIOService23Ev
-__ZN9IOService20_RESERVEDIOService24Ev
-__ZN9IOService20_RESERVEDIOService25Ev
-__ZN9IOService20_RESERVEDIOService26Ev
-__ZN9IOService20_RESERVEDIOService27Ev
-__ZN9IOService20_RESERVEDIOService28Ev
-__ZN9IOService20_RESERVEDIOService29Ev
-__ZN9IOService20_RESERVEDIOService30Ev
-__ZN9IOService20_RESERVEDIOService31Ev
-__ZN9IOService20_RESERVEDIOService32Ev
-__ZN9IOService20_RESERVEDIOService33Ev
-__ZN9IOService20_RESERVEDIOService34Ev
-__ZN9IOService20_RESERVEDIOService35Ev
-__ZN9IOService20_RESERVEDIOService36Ev
-__ZN9IOService20_RESERVEDIOService37Ev
-__ZN9IOService20_RESERVEDIOService38Ev
-__ZN9IOService20_RESERVEDIOService39Ev
-__ZN9IOService20_RESERVEDIOService40Ev
-__ZN9IOService20_RESERVEDIOService41Ev
-__ZN9IOService20_RESERVEDIOService42Ev
-__ZN9IOService20_RESERVEDIOService43Ev
-__ZN9IOService20_RESERVEDIOService44Ev
-__ZN9IOService20_RESERVEDIOService45Ev
-__ZN9IOService20_RESERVEDIOService46Ev
-__ZN9IOService20_RESERVEDIOService47Ev
 __ZN9IOService20callPlatformFunctionEPK8OSSymbolbPvS3_S3_S3_
 __ZN9IOService20callPlatformFunctionEPKcbPvS2_S2_S2_
 __ZN9IOService20getDeviceMemoryCountEv
@@ -1551,8 +1315,6 @@ __ZNK29IOInterleavedMemoryDescriptor9MetaClass5allocEv
 __ZNK8IOMapper12getMetaClassEv
 __ZNK8IOMapper13getBypassMaskEPy
 __ZNK8IOMapper9MetaClass5allocEv
-__ZNK8IOSyncer12getMetaClassEv
-__ZNK8IOSyncer9MetaClass5allocEv
 __ZNK9IOCommand12getMetaClassEv
 __ZNK9IOCommand9MetaClass5allocEv
 __ZNK9IOService10isInactiveEv
@@ -1620,7 +1382,6 @@ __ZTV27IOSharedInterruptController
 __ZTV28IOFilterInterruptEventSource
 __ZTV29IOInterleavedMemoryDescriptor
 __ZTV8IOMapper
-__ZTV8IOSyncer
 __ZTV9IOCommand
 __ZTV9IOService
 __ZTVN10IOMachPort9MetaClassE
@@ -1673,7 +1434,6 @@ __ZTVN27IOSharedInterruptController9MetaClassE
 __ZTVN28IOFilterInterruptEventSource9MetaClassE
 __ZTVN29IOInterleavedMemoryDescriptor9MetaClassE
 __ZTVN8IOMapper9MetaClassE
-__ZTVN8IOSyncer9MetaClassE
 __ZTVN9IOCommand9MetaClassE
 __ZTVN9IOService9MetaClassE
 __giDebugLogDataInternal
@@ -1689,8 +1449,6 @@ _debug_malloc_size
 _device_close
 _device_data_action
 _di_root_image
-_ev_try_lock
-_ev_unlock
 _gIOAppPowerStateInterest
 _gIOBusyInterest
 _gIOCatalogue
index d83bbdde665d7d68a1f8b9a10bfded92a6bdaa86..8dff01adc671ab63b95fea5362610a69fd6cc4c6 100644 (file)
@@ -1,5 +1,8 @@
 _IOLockUnlock_darwin10:_lck_mtx_unlock_darwin10
+_IOOFPathMatching
 _IOPanic
+_IOSpinUnlock
+_IOTrySpinLock
 _PE_parse_boot_arg
 __Z16IODTFindSlotNameP15IORegistryEntrym
 __Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E
@@ -7,6 +10,11 @@ __Z17IODTGetCellCountsP15IORegistryEntryPmS1_
 __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
 __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
 __ZN10IOWorkLoop19workLoopWithOptionsEm
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop6Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop7Ev
 __ZN10IOWorkLoop9sleepGateEPv12UnsignedWidem
 __ZN10IOWorkLoop9sleepGateEPvm
 __ZN11IOCatalogue11findDriversEP12OSDictionaryPl
@@ -19,6 +27,14 @@ __ZN11IODataQueue7enqueueEPvm
 __ZN11IOMemoryMap10getAddressEv
 __ZN11IOMemoryMap18getPhysicalSegmentEmPm
 __ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap0Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap1Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap2Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap3Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap4Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap5Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap6Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap7Ev
 __ZN11IOMemoryMap7getSizeEv
 __ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm
 __ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy
@@ -33,6 +49,19 @@ __ZN12IODMACommand15genIOVMSegmentsEPFbPS_NS_9Segment64EPvmEPyS2_Pm
 __ZN12IODMACommand15genIOVMSegmentsEPyPvPm
 __ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_
 __ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_
+__ZN12IODMACommand22_RESERVEDIODMACommand3Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand4Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand5Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand6Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand7Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand8Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand9Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand10Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand11Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand12Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand13Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand14Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand15Ev
 __ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperyybb
 __ZN12IODMACommand8transferEmyPvy
 __ZN12IOUserClient12initWithTaskEP4taskPvm
@@ -43,6 +72,20 @@ __ZN12IOUserClient17mapClientMemory64EmP4taskmy
 __ZN12IOUserClient17sendAsyncResult64EPyiS0_m
 __ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor
 __ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy
+__ZN12IOUserClient22_RESERVEDIOUserClient2Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient3Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient4Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient5Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient6Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient7Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient8Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient9Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient10Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient11Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient12Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient13Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient14Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient15Ev
 __ZN12IOUserClient23getExternalTrapForIndexEm
 __ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore
 __ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem
@@ -54,8 +97,31 @@ __ZN12IOUserClient30getExternalAsyncMethodForIndexEm
 __ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem
 __ZN13IOCommandGate12commandSleepEPv12UnsignedWidem
 __ZN13IOCommandGate12commandSleepEPvm
+__ZN13IOCommandGate23_RESERVEDIOCommandGate1Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate2Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate3Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate4Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate5Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate6Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate7Ev
 __ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopm
+__ZN13IOCommandPool23_RESERVEDIOCommandPool0Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool1Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool2Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool3Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool4Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool5Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool6Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool7Ev
 __ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopm
+__ZN13IOEventSource23_RESERVEDIOEventSource0Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource1Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource2Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource3Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource4Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource5Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource6Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource7Ev
 __ZN13IOEventSource9sleepGateEPv12UnsignedWidem
 __ZN13IOEventSource9sleepGateEPvm
 __ZN13_IOServiceJob8startJobEP9IOServiceim
@@ -90,6 +156,32 @@ __ZN15IODMAController13getControllerEP9IOServicem
 __ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandim
 __ZN15IODMAController20createControllerNameEm
 __ZN15IODMAController21registerDMAControllerEm
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry8Ev
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry9Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry10Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry11Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry12Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry13Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry14Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry15Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry16Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry17Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry18Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry19Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry20Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry21Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry22Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry23Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry24Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry25Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry26Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry27Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry28Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry29Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry30Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry31Ev
 __ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m
 __ZN16IODMAEventSource15startDMACommandEP12IODMACommand11IODirectionmm
 __ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandim
@@ -106,9 +198,33 @@ __ZN16IORangeAllocator9withRangeEmmmm
 __ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
 __ZN17IOBigMemoryCursor17withSpecificationEmmm
 __ZN17IOBigMemoryCursor21initWithSpecificationEmmm
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface0Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface1Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface2Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface3Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface4Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface5Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface6Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface7Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface8Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface9Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface10Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface11Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface12Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface13Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface14Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface15Ev
 __ZN17IOSharedDataQueue11withEntriesEmm
 __ZN17IOSharedDataQueue12withCapacityEm
 __ZN17IOSharedDataQueue16initWithCapacityEm
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue0Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue1Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue2Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue3Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue4Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue5Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue6Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue7Ev
 __ZN17IOSharedDataQueue7dequeueEPvPm
 __ZN18IOMemoryDescriptor10setMappingEP4taskjm
 __ZN18IOMemoryDescriptor10withRangesEP14IOVirtualRangem11IODirectionP4taskb
@@ -164,6 +280,14 @@ __ZN18IOTimerEventSource12wakeAtTimeMSEm
 __ZN18IOTimerEventSource12wakeAtTimeUSEm
 __ZN18IOTimerEventSource15setTimeoutTicksEm
 __ZN18IOTimerEventSource15wakeAtTimeTicksEm
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource0Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource1Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource2Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource3Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource4Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource5Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource6Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource7Ev
 __ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
 __ZN20IOLittleMemoryCursor17withSpecificationEmmm
 __ZN20IOLittleMemoryCursor21initWithSpecificationEmmm
@@ -175,6 +299,12 @@ __ZN21IOInterruptController12enableVectorElP17IOInterruptVector
 __ZN21IOInterruptController13getVectorTypeElP17IOInterruptVector
 __ZN21IOInterruptController17disableVectorHardElP17IOInterruptVector
 __ZN21IOInterruptController17vectorCanBeSharedElP17IOInterruptVector
+__ZN21IOInterruptController31_RESERVEDIOInterruptController0Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController1Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController2Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController3Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController4Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController5Ev
 __ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
 __ZN21IONaturalMemoryCursor17withSpecificationEmmm
 __ZN21IONaturalMemoryCursor21initWithSpecificationEmmm
@@ -185,6 +315,14 @@ __ZN21IOSubMemoryDescriptor12withSubRangeEP18IOMemoryDescriptormmm
 __ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPmm
 __ZN21IOSubMemoryDescriptor7prepareE11IODirection
 __ZN21IOSubMemoryDescriptor8completeE11IODirection
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource0Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource1Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource2Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource3Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource4Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource5Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource6Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource7Ev
 __ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb
 __ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPmm
 __ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb
@@ -237,6 +375,18 @@ __ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPjmmm
 __ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapjm
 __ZN25IOGeneralMemoryDescriptor7prepareE11IODirection
 __ZN25IOGeneralMemoryDescriptor8completeE11IODirection
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController0Ev
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController1Ev
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController2Ev
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController3Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource0Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource1Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource2Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource3Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource4Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource5Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource6Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource7Ev
 __ZN29IOInterleavedMemoryDescriptor12withCapacityEm11IODirection
 __ZN29IOInterleavedMemoryDescriptor16initWithCapacityEm11IODirection
 __ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPmm
@@ -249,7 +399,20 @@ __ZN8IOMapper10iovmInsertEjmP13upl_page_infom
 __ZN8IOMapper10iovmInsertEjmPjm
 __ZN8IOMapper11NewARTTableEmPPvPj
 __ZN8IOMapper12FreeARTTableEP6OSDatam
+__ZN8IOMapper18_RESERVEDIOMapper3Ev
+__ZN8IOMapper18_RESERVEDIOMapper4Ev
+__ZN8IOMapper18_RESERVEDIOMapper5Ev
+__ZN8IOMapper18_RESERVEDIOMapper6Ev
+__ZN8IOMapper18_RESERVEDIOMapper7Ev
+__ZN8IOMapper18_RESERVEDIOMapper8Ev
+__ZN8IOMapper18_RESERVEDIOMapper9Ev
 __ZN8IOMapper18iovmFreeDMACommandEP12IODMACommandjm
+__ZN8IOMapper19_RESERVEDIOMapper10Ev
+__ZN8IOMapper19_RESERVEDIOMapper11Ev
+__ZN8IOMapper19_RESERVEDIOMapper12Ev
+__ZN8IOMapper19_RESERVEDIOMapper13Ev
+__ZN8IOMapper19_RESERVEDIOMapper14Ev
+__ZN8IOMapper19_RESERVEDIOMapper15Ev
 __ZN8IOMapper19iovmAllocDMACommandEP12IODMACommandm
 __ZN8IOPMprot10gMetaClassE
 __ZN8IOPMprot10superClassE
@@ -262,6 +425,24 @@ __ZN8IOPMprotC2EPK11OSMetaClass
 __ZN8IOPMprotC2Ev
 __ZN8IOPMprotD0Ev
 __ZN8IOPMprotD2Ev
+__ZN8IOSyncer10gMetaClassE
+__ZN8IOSyncer10superClassE
+__ZN8IOSyncer13privateSignalEv
+__ZN8IOSyncer4freeEv
+__ZN8IOSyncer4initEb
+__ZN8IOSyncer4waitEb
+__ZN8IOSyncer6createEb
+__ZN8IOSyncer6reinitEv
+__ZN8IOSyncer6signalEib
+__ZN8IOSyncer9MetaClassC1Ev
+__ZN8IOSyncer9MetaClassC2Ev
+__ZN8IOSyncer9metaClassE
+__ZN8IOSyncerC1EPK11OSMetaClass
+__ZN8IOSyncerC1Ev
+__ZN8IOSyncerC2EPK11OSMetaClass
+__ZN8IOSyncerC2Ev
+__ZN8IOSyncerD0Ev
+__ZN8IOSyncerD2Ev
 __ZN9IOService10adjustBusyEl
 __ZN9IOService10handleOpenEPS_mPv
 __ZN9IOService10systemWakeEv
@@ -303,8 +484,50 @@ __ZN9IOService18matchPropertyTableEP12OSDictionaryPl
 __ZN9IOService18requireMaxBusStallEm
 __ZN9IOService18settleTimerExpiredEv
 __ZN9IOService18systemWillShutdownEm
+__ZN9IOService19_RESERVEDIOService6Ev
+__ZN9IOService19_RESERVEDIOService7Ev
+__ZN9IOService19_RESERVEDIOService8Ev
+__ZN9IOService19_RESERVEDIOService9Ev
 __ZN9IOService19deliverNotificationEPK8OSSymbolmm
 __ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator
+__ZN9IOService20_RESERVEDIOService10Ev
+__ZN9IOService20_RESERVEDIOService11Ev
+__ZN9IOService20_RESERVEDIOService12Ev
+__ZN9IOService20_RESERVEDIOService13Ev
+__ZN9IOService20_RESERVEDIOService14Ev
+__ZN9IOService20_RESERVEDIOService15Ev
+__ZN9IOService20_RESERVEDIOService16Ev
+__ZN9IOService20_RESERVEDIOService17Ev
+__ZN9IOService20_RESERVEDIOService18Ev
+__ZN9IOService20_RESERVEDIOService19Ev
+__ZN9IOService20_RESERVEDIOService20Ev
+__ZN9IOService20_RESERVEDIOService21Ev
+__ZN9IOService20_RESERVEDIOService22Ev
+__ZN9IOService20_RESERVEDIOService23Ev
+__ZN9IOService20_RESERVEDIOService24Ev
+__ZN9IOService20_RESERVEDIOService25Ev
+__ZN9IOService20_RESERVEDIOService26Ev
+__ZN9IOService20_RESERVEDIOService27Ev
+__ZN9IOService20_RESERVEDIOService28Ev
+__ZN9IOService20_RESERVEDIOService29Ev
+__ZN9IOService20_RESERVEDIOService30Ev
+__ZN9IOService20_RESERVEDIOService31Ev
+__ZN9IOService20_RESERVEDIOService32Ev
+__ZN9IOService20_RESERVEDIOService33Ev
+__ZN9IOService20_RESERVEDIOService34Ev
+__ZN9IOService20_RESERVEDIOService35Ev
+__ZN9IOService20_RESERVEDIOService36Ev
+__ZN9IOService20_RESERVEDIOService37Ev
+__ZN9IOService20_RESERVEDIOService38Ev
+__ZN9IOService20_RESERVEDIOService39Ev
+__ZN9IOService20_RESERVEDIOService40Ev
+__ZN9IOService20_RESERVEDIOService41Ev
+__ZN9IOService20_RESERVEDIOService42Ev
+__ZN9IOService20_RESERVEDIOService43Ev
+__ZN9IOService20_RESERVEDIOService44Ev
+__ZN9IOService20_RESERVEDIOService45Ev
+__ZN9IOService20_RESERVEDIOService46Ev
+__ZN9IOService20_RESERVEDIOService47Ev
 __ZN9IOService22PM_Clamp_Timer_ExpiredEv
 __ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection
 __ZN9IOService23acknowledgeNotificationEPvm
@@ -337,7 +560,13 @@ __ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj
 __ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj
 __ZNK8IOPMprot12getMetaClassEv
 __ZNK8IOPMprot9MetaClass5allocEv
+__ZNK8IOSyncer12getMetaClassEv
+__ZNK8IOSyncer9MetaClass5allocEv
 __ZTV14IOCommandQueue
 __ZTV8IOPMprot
+__ZTV8IOSyncer
 __ZTVN14IOCommandQueue9MetaClassE
 __ZTVN8IOPMprot9MetaClassE
+__ZTVN8IOSyncer9MetaClassE
+_ev_try_lock
+_ev_unlock
index 6f986aea64040d6f098b765177545b2bc69b8a00..37e7e8d41162d3fa486a9cd79730571888d5e117 100644 (file)
@@ -1,3 +1,6 @@
+_IOOFPathMatching
+_IOSpinUnlock
+_IOTrySpinLock
 __Z16IODTFindSlotNameP15IORegistryEntryj
 __Z16IODTSetResolvingP15IORegistryEntryPFijPjS1_EPFvS0_PhS4_S4_E
 __Z17IODTGetCellCountsP15IORegistryEntryPjS1_
@@ -7,6 +10,11 @@ __ZN10IOWorkLoop19workLoopWithOptionsEj
 __ZN10IOWorkLoop20_RESERVEDIOWorkLoop0Ev
 __ZN10IOWorkLoop20_RESERVEDIOWorkLoop1Ev
 __ZN10IOWorkLoop20_RESERVEDIOWorkLoop2Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop6Ev
+__ZN10IOWorkLoop20_RESERVEDIOWorkLoop7Ev
 __ZN10IOWorkLoop9sleepGateEPvj
 __ZN10IOWorkLoop9sleepGateEPvyj
 __ZN11IOCatalogue11findDriversEP12OSDictionaryPi
@@ -18,6 +26,14 @@ __ZN11IODataQueue16initWithCapacityEj
 __ZN11IODataQueue7enqueueEPvj
 __ZN11IOMemoryMap18getPhysicalSegmentEyPyj
 __ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap0Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap1Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap2Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap3Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap4Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap5Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap6Ev
+__ZN11IOMemoryMap21_RESERVEDIOMemoryMap7Ev
 __ZN11IOMemoryMap8redirectEP18IOMemoryDescriptorjy
 __ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvj
 __ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvj
@@ -30,6 +46,19 @@ __ZN12IODMACommand15genIOVMSegmentsEPFbPS_NS_9Segment64EPvjEPyS2_Pj
 __ZN12IODMACommand15genIOVMSegmentsEPyPvPj
 __ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_
 __ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_
+__ZN12IODMACommand22_RESERVEDIODMACommand3Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand4Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand5Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand6Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand7Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand8Ev
+__ZN12IODMACommand22_RESERVEDIODMACommand9Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand10Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand11Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand12Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand13Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand14Ev
+__ZN12IODMACommand23_RESERVEDIODMACommand15Ev
 __ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperyybb
 __ZN12IODMACommand8transferEjyPvy
 __ZN12IOUserClient12initWithTaskEP4taskPvj
@@ -41,6 +70,20 @@ __ZN12IOUserClient19clientMemoryForTypeEjPjPP18IOMemoryDescriptor
 __ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy
 __ZN12IOUserClient22_RESERVEDIOUserClient0Ev
 __ZN12IOUserClient22_RESERVEDIOUserClient1Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient2Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient3Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient4Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient5Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient6Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient7Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient8Ev
+__ZN12IOUserClient22_RESERVEDIOUserClient9Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient10Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient11Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient12Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient13Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient14Ev
+__ZN12IOUserClient23_RESERVEDIOUserClient15Ev
 __ZN12IOUserClient23getExternalTrapForIndexEj
 __ZN12IOUserClient24getNotificationSemaphoreEjPP9semaphore
 __ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicej
@@ -53,8 +96,31 @@ __ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicej
 __ZN13IOCommandGate12commandSleepEPvj
 __ZN13IOCommandGate12commandSleepEPvyj
 __ZN13IOCommandGate23_RESERVEDIOCommandGate0Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate1Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate2Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate3Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate4Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate5Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate6Ev
+__ZN13IOCommandGate23_RESERVEDIOCommandGate7Ev
 __ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopj
+__ZN13IOCommandPool23_RESERVEDIOCommandPool0Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool1Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool2Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool3Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool4Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool5Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool6Ev
+__ZN13IOCommandPool23_RESERVEDIOCommandPool7Ev
 __ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopj
+__ZN13IOEventSource23_RESERVEDIOEventSource0Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource1Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource2Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource3Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource4Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource5Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource6Ev
+__ZN13IOEventSource23_RESERVEDIOEventSource7Ev
 __ZN13IOEventSource9sleepGateEPvj
 __ZN13IOEventSource9sleepGateEPvyj
 __ZN13_IOServiceJob8startJobEP9IOServiceij
@@ -78,6 +144,32 @@ __ZN15IORegistryEntry25_RESERVEDIORegistryEntry2Ev
 __ZN15IORegistryEntry25_RESERVEDIORegistryEntry3Ev
 __ZN15IORegistryEntry25_RESERVEDIORegistryEntry4Ev
 __ZN15IORegistryEntry25_RESERVEDIORegistryEntry5Ev
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry8Ev
+__ZN15IORegistryEntry25_RESERVEDIORegistryEntry9Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry10Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry11Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry12Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry13Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry14Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry15Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry16Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry17Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry18Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry19Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry20Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry21Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry22Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry23Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry24Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry25Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry26Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry27Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry28Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry29Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry30Ev
+__ZN15IORegistryEntry26_RESERVEDIORegistryEntry31Ev
 __ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyES8_j
 __ZN16IODMAEventSource15startDMACommandEP12IODMACommandjyy
 __ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandiy
@@ -94,9 +186,33 @@ __ZN16IORangeAllocator9withRangeEyyjj
 __ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvj
 __ZN17IOBigMemoryCursor17withSpecificationEyyy
 __ZN17IOBigMemoryCursor21initWithSpecificationEyyy
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface0Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface1Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface2Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface3Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface4Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface5Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface6Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface7Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface8Ev
+__ZN17IOPolledInterface27_RESERVEDIOPolledInterface9Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface10Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface11Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface12Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface13Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface14Ev
+__ZN17IOPolledInterface28_RESERVEDIOPolledInterface15Ev
 __ZN17IOSharedDataQueue11withEntriesEjj
 __ZN17IOSharedDataQueue12withCapacityEj
 __ZN17IOSharedDataQueue16initWithCapacityEj
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue0Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue1Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue2Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue3Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue4Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue5Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue6Ev
+__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue7Ev
 __ZN17IOSharedDataQueue7dequeueEPvPj
 __ZN18IOMemoryDescriptor10setMappingEP4taskyj
 __ZN18IOMemoryDescriptor10writeBytesEyPKvy
@@ -142,6 +258,14 @@ __ZN18IOTimerEventSource12wakeAtTimeMSEj
 __ZN18IOTimerEventSource12wakeAtTimeUSEj
 __ZN18IOTimerEventSource15setTimeoutTicksEj
 __ZN18IOTimerEventSource15wakeAtTimeTicksEj
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource0Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource1Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource2Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource3Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource4Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource5Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource6Ev
+__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource7Ev
 __ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvj
 __ZN20IOLittleMemoryCursor17withSpecificationEyyy
 __ZN20IOLittleMemoryCursor21initWithSpecificationEyyy
@@ -153,6 +277,12 @@ __ZN21IOInterruptController12enableVectorEiP17IOInterruptVector
 __ZN21IOInterruptController13getVectorTypeEiP17IOInterruptVector
 __ZN21IOInterruptController17disableVectorHardEiP17IOInterruptVector
 __ZN21IOInterruptController17vectorCanBeSharedEiP17IOInterruptVector
+__ZN21IOInterruptController31_RESERVEDIOInterruptController0Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController1Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController2Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController3Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController4Ev
+__ZN21IOInterruptController31_RESERVEDIOInterruptController5Ev
 __ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvj
 __ZN21IONaturalMemoryCursor17withSpecificationEyyy
 __ZN21IONaturalMemoryCursor21initWithSpecificationEyyy
@@ -163,6 +293,14 @@ __ZN21IOSubMemoryDescriptor12withSubRangeEP18IOMemoryDescriptoryyj
 __ZN21IOSubMemoryDescriptor18getPhysicalSegmentEyPyj
 __ZN21IOSubMemoryDescriptor7prepareEj
 __ZN21IOSubMemoryDescriptor8completeEj
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource0Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource1Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource2Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource3Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource4Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource5Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource6Ev
+__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource7Ev
 __ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorjjb
 __ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorjjb
 __ZN23IOMultiMemoryDescriptor7prepareEj
@@ -201,6 +339,18 @@ __ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPyjyy
 __ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapyy
 __ZN25IOGeneralMemoryDescriptor7prepareEj
 __ZN25IOGeneralMemoryDescriptor8completeEj
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController0Ev
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController1Ev
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController2Ev
+__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController3Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource0Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource1Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource2Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource3Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource4Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource5Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource6Ev
+__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource7Ev
 __ZN29IOInterleavedMemoryDescriptor12withCapacityEyj
 __ZN29IOInterleavedMemoryDescriptor16initWithCapacityEyj
 __ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptoryy
@@ -212,8 +362,39 @@ __ZN8IOMapper10iovmInsertEjjP13upl_page_infoj
 __ZN8IOMapper10iovmInsertEjjPjj
 __ZN8IOMapper11NewARTTableEyPPvPj
 __ZN8IOMapper12FreeARTTableEP6OSDatay
+__ZN8IOMapper18_RESERVEDIOMapper3Ev
+__ZN8IOMapper18_RESERVEDIOMapper4Ev
+__ZN8IOMapper18_RESERVEDIOMapper5Ev
+__ZN8IOMapper18_RESERVEDIOMapper6Ev
+__ZN8IOMapper18_RESERVEDIOMapper7Ev
+__ZN8IOMapper18_RESERVEDIOMapper8Ev
+__ZN8IOMapper18_RESERVEDIOMapper9Ev
 __ZN8IOMapper18iovmFreeDMACommandEP12IODMACommandjj
+__ZN8IOMapper19_RESERVEDIOMapper10Ev
+__ZN8IOMapper19_RESERVEDIOMapper11Ev
+__ZN8IOMapper19_RESERVEDIOMapper12Ev
+__ZN8IOMapper19_RESERVEDIOMapper13Ev
+__ZN8IOMapper19_RESERVEDIOMapper14Ev
+__ZN8IOMapper19_RESERVEDIOMapper15Ev
 __ZN8IOMapper19iovmAllocDMACommandEP12IODMACommandj
+__ZN8IOSyncer10gMetaClassE
+__ZN8IOSyncer10superClassE
+__ZN8IOSyncer13privateSignalEv
+__ZN8IOSyncer4freeEv
+__ZN8IOSyncer4initEb
+__ZN8IOSyncer4waitEb
+__ZN8IOSyncer6createEb
+__ZN8IOSyncer6reinitEv
+__ZN8IOSyncer6signalEib
+__ZN8IOSyncer9MetaClassC1Ev
+__ZN8IOSyncer9MetaClassC2Ev
+__ZN8IOSyncer9metaClassE
+__ZN8IOSyncerC1EPK11OSMetaClass
+__ZN8IOSyncerC1Ev
+__ZN8IOSyncerC2EPK11OSMetaClass
+__ZN8IOSyncerC2Ev
+__ZN8IOSyncerD0Ev
+__ZN8IOSyncerD2Ev
 __ZN9IOService10adjustBusyEi
 __ZN9IOService10handleOpenEPS_jPv
 __ZN9IOService11_adjustBusyEi
@@ -247,7 +428,49 @@ __ZN9IOService19_RESERVEDIOService2Ev
 __ZN9IOService19_RESERVEDIOService3Ev
 __ZN9IOService19_RESERVEDIOService4Ev
 __ZN9IOService19_RESERVEDIOService5Ev
+__ZN9IOService19_RESERVEDIOService6Ev
+__ZN9IOService19_RESERVEDIOService7Ev
+__ZN9IOService19_RESERVEDIOService8Ev
+__ZN9IOService19_RESERVEDIOService9Ev
 __ZN9IOService19deliverNotificationEPK8OSSymboljj
+__ZN9IOService20_RESERVEDIOService10Ev
+__ZN9IOService20_RESERVEDIOService11Ev
+__ZN9IOService20_RESERVEDIOService12Ev
+__ZN9IOService20_RESERVEDIOService13Ev
+__ZN9IOService20_RESERVEDIOService14Ev
+__ZN9IOService20_RESERVEDIOService15Ev
+__ZN9IOService20_RESERVEDIOService16Ev
+__ZN9IOService20_RESERVEDIOService17Ev
+__ZN9IOService20_RESERVEDIOService18Ev
+__ZN9IOService20_RESERVEDIOService19Ev
+__ZN9IOService20_RESERVEDIOService20Ev
+__ZN9IOService20_RESERVEDIOService21Ev
+__ZN9IOService20_RESERVEDIOService22Ev
+__ZN9IOService20_RESERVEDIOService23Ev
+__ZN9IOService20_RESERVEDIOService24Ev
+__ZN9IOService20_RESERVEDIOService25Ev
+__ZN9IOService20_RESERVEDIOService26Ev
+__ZN9IOService20_RESERVEDIOService27Ev
+__ZN9IOService20_RESERVEDIOService28Ev
+__ZN9IOService20_RESERVEDIOService29Ev
+__ZN9IOService20_RESERVEDIOService30Ev
+__ZN9IOService20_RESERVEDIOService31Ev
+__ZN9IOService20_RESERVEDIOService32Ev
+__ZN9IOService20_RESERVEDIOService33Ev
+__ZN9IOService20_RESERVEDIOService34Ev
+__ZN9IOService20_RESERVEDIOService35Ev
+__ZN9IOService20_RESERVEDIOService36Ev
+__ZN9IOService20_RESERVEDIOService37Ev
+__ZN9IOService20_RESERVEDIOService38Ev
+__ZN9IOService20_RESERVEDIOService39Ev
+__ZN9IOService20_RESERVEDIOService40Ev
+__ZN9IOService20_RESERVEDIOService41Ev
+__ZN9IOService20_RESERVEDIOService42Ev
+__ZN9IOService20_RESERVEDIOService43Ev
+__ZN9IOService20_RESERVEDIOService44Ev
+__ZN9IOService20_RESERVEDIOService45Ev
+__ZN9IOService20_RESERVEDIOService46Ev
+__ZN9IOService20_RESERVEDIOService47Ev
 __ZN9IOService23acknowledgeNotificationEPvj
 __ZN9IOService23addMatchingNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_P10IONotifierES5_S5_i
 __ZN9IOService23scheduleTerminatePhase2Ej
@@ -266,3 +489,9 @@ __ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanej
 __ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanej
 __ZNK18IOMemoryDescriptor19dmaCommandOperationEjPvj
 __ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEjPvj
+__ZNK8IOSyncer12getMetaClassEv
+__ZNK8IOSyncer9MetaClass5allocEv
+__ZTV8IOSyncer
+__ZTVN8IOSyncer9MetaClassE
+_ev_try_lock
+_ev_unlock
index 4bd05a19379d5d91e028e9cd3ae337158a14dd0b..b310d501aa49d76fef42c8f0f803b54f310348f6 100644 (file)
@@ -1,4 +1,3 @@
-___bzero
 _Assert
 _MD5Final
 _MD5Init
@@ -50,16 +49,10 @@ __Z13OSUnserializePKcPP8OSString
 __Z16OSUnserializeXMLPKcPP8OSString
 __ZN10OSIterator10gMetaClassE
 __ZN10OSIterator10superClassE
-__ZN10OSIterator20_RESERVEDOSIterator0Ev
-__ZN10OSIterator20_RESERVEDOSIterator1Ev
-__ZN10OSIterator20_RESERVEDOSIterator2Ev
-__ZN10OSIterator20_RESERVEDOSIterator3Ev
 __ZN10OSIterator9MetaClassC1Ev
 __ZN10OSIterator9MetaClassC2Ev
 __ZN10OSIterator9metaClassE
-__ZN10OSIteratorC1EPK11OSMetaClass
 __ZN10OSIteratorC2EPK11OSMetaClass
-__ZN10OSIteratorD0Ev
 __ZN10OSIteratorD2Ev
 __ZN11OSMetaClass10preModLoadEPKc
 __ZN11OSMetaClass11postModLoadEPv
@@ -73,23 +66,13 @@ __ZN11OSMetaClass18getClassDictionaryEv
 __ZN11OSMetaClass18reportModInstancesEPKc
 __ZN11OSMetaClass19printInstanceCountsEv
 __ZN11OSMetaClass20getMetaClassWithNameEPK8OSSymbol
-__ZN11OSMetaClass21_RESERVEDOSMetaClass0Ev
-__ZN11OSMetaClass21_RESERVEDOSMetaClass1Ev
-__ZN11OSMetaClass21_RESERVEDOSMetaClass2Ev
-__ZN11OSMetaClass21_RESERVEDOSMetaClass3Ev
-__ZN11OSMetaClass21_RESERVEDOSMetaClass4Ev
-__ZN11OSMetaClass21_RESERVEDOSMetaClass5Ev
-__ZN11OSMetaClass21_RESERVEDOSMetaClass6Ev
-__ZN11OSMetaClass21_RESERVEDOSMetaClass7Ev
 __ZN11OSMetaClass21checkMetaCastWithNameEPK8OSStringPK15OSMetaClassBase
 __ZN11OSMetaClass21checkMetaCastWithNameEPK8OSSymbolPK15OSMetaClassBase
 __ZN11OSMetaClass21checkMetaCastWithNameEPKcPK15OSMetaClassBase
 __ZN11OSMetaClass24serializeClassDictionaryEP12OSDictionary
 __ZN11OSMetaClass8logErrorEi
 __ZN11OSMetaClass9metaClassE
-__ZN11OSMetaClassC1EPKcPKS_j
 __ZN11OSMetaClassC2EPKcPKS_j
-__ZN11OSMetaClassD0Ev
 __ZN11OSMetaClassD2Ev
 __ZN11OSMetaClassdlEPvm
 __ZN11OSMetaClassnwEm
@@ -102,14 +85,6 @@ __ZN11OSSerialize14ensureCapacityEj
 __ZN11OSSerialize16initWithCapacityEj
 __ZN11OSSerialize20previouslySerializedEPK15OSMetaClassBase
 __ZN11OSSerialize20setCapacityIncrementEj
-__ZN11OSSerialize21_RESERVEDOSSerialize0Ev
-__ZN11OSSerialize21_RESERVEDOSSerialize1Ev
-__ZN11OSSerialize21_RESERVEDOSSerialize2Ev
-__ZN11OSSerialize21_RESERVEDOSSerialize3Ev
-__ZN11OSSerialize21_RESERVEDOSSerialize4Ev
-__ZN11OSSerialize21_RESERVEDOSSerialize5Ev
-__ZN11OSSerialize21_RESERVEDOSSerialize6Ev
-__ZN11OSSerialize21_RESERVEDOSSerialize7Ev
 __ZN11OSSerialize4freeEv
 __ZN11OSSerialize7addCharEc
 __ZN11OSSerialize9MetaClassC1Ev
@@ -128,19 +103,11 @@ __ZN12OSCollection10setOptionsEjjPv
 __ZN12OSCollection10superClassE
 __ZN12OSCollection11haveUpdatedEv
 __ZN12OSCollection14copyCollectionEP12OSDictionary
-__ZN12OSCollection22_RESERVEDOSCollection2Ev
-__ZN12OSCollection22_RESERVEDOSCollection3Ev
-__ZN12OSCollection22_RESERVEDOSCollection4Ev
-__ZN12OSCollection22_RESERVEDOSCollection5Ev
-__ZN12OSCollection22_RESERVEDOSCollection6Ev
-__ZN12OSCollection22_RESERVEDOSCollection7Ev
 __ZN12OSCollection4initEv
 __ZN12OSCollection9MetaClassC1Ev
 __ZN12OSCollection9MetaClassC2Ev
 __ZN12OSCollection9metaClassE
-__ZN12OSCollectionC1EPK11OSMetaClass
 __ZN12OSCollectionC2EPK11OSMetaClass
-__ZN12OSCollectionD0Ev
 __ZN12OSCollectionD2Ev
 __ZN12OSDictionary10gMetaClassE
 __ZN12OSDictionary10setOptionsEjjPv
@@ -160,14 +127,6 @@ __ZN12OSDictionary15initWithObjectsEPPK8OSObjectPPK8OSSymboljj
 __ZN12OSDictionary16initWithCapacityEj
 __ZN12OSDictionary18initWithDictionaryEPKS_j
 __ZN12OSDictionary20setCapacityIncrementEj
-__ZN12OSDictionary22_RESERVEDOSDictionary0Ev
-__ZN12OSDictionary22_RESERVEDOSDictionary1Ev
-__ZN12OSDictionary22_RESERVEDOSDictionary2Ev
-__ZN12OSDictionary22_RESERVEDOSDictionary3Ev
-__ZN12OSDictionary22_RESERVEDOSDictionary4Ev
-__ZN12OSDictionary22_RESERVEDOSDictionary5Ev
-__ZN12OSDictionary22_RESERVEDOSDictionary6Ev
-__ZN12OSDictionary22_RESERVEDOSDictionary7Ev
 __ZN12OSDictionary4freeEv
 __ZN12OSDictionary5mergeEPKS_
 __ZN12OSDictionary9MetaClassC1Ev
@@ -194,14 +153,6 @@ __ZN12OSOrderedSet14getOrderingRefEv
 __ZN12OSOrderedSet14setFirstObjectEPK15OSMetaClassBase
 __ZN12OSOrderedSet15flushCollectionEv
 __ZN12OSOrderedSet20setCapacityIncrementEj
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet0Ev
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet1Ev
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet2Ev
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet3Ev
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet4Ev
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet5Ev
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet6Ev
-__ZN12OSOrderedSet22_RESERVEDOSOrderedSet7Ev
 __ZN12OSOrderedSet4freeEv
 __ZN12OSOrderedSet9MetaClassC1Ev
 __ZN12OSOrderedSet9MetaClassC2Ev
@@ -243,14 +194,7 @@ __ZN12OSSymbolPooldlEPvm
 __ZN12OSSymbolPoolnwEm
 __ZN15OSMetaClassBase12safeMetaCastEPKS_PK11OSMetaClass
 __ZN15OSMetaClassBase13checkTypeInstEPKS_S1_
-__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase3Ev
-__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase4Ev
-__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase5Ev
-__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase6Ev
-__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase7Ev
-__ZN15OSMetaClassBaseC1Ev
 __ZN15OSMetaClassBaseC2Ev
-__ZN15OSMetaClassBaseD0Ev
 __ZN15OSMetaClassBaseD2Ev
 __ZN15OSMetaClassMetaC1Ev
 __ZN15OSMetaClassMetaC2Ev
@@ -281,14 +225,6 @@ __ZN5OSSet12withCapacityEj
 __ZN5OSSet13initWithArrayEPK7OSArrayj
 __ZN5OSSet14copyCollectionEP12OSDictionary
 __ZN5OSSet14ensureCapacityEj
-__ZN5OSSet15_RESERVEDOSSet0Ev
-__ZN5OSSet15_RESERVEDOSSet1Ev
-__ZN5OSSet15_RESERVEDOSSet2Ev
-__ZN5OSSet15_RESERVEDOSSet3Ev
-__ZN5OSSet15_RESERVEDOSSet4Ev
-__ZN5OSSet15_RESERVEDOSSet5Ev
-__ZN5OSSet15_RESERVEDOSSet6Ev
-__ZN5OSSet15_RESERVEDOSSet7Ev
 __ZN5OSSet15flushCollectionEv
 __ZN5OSSet15initWithObjectsEPPK8OSObjectjj
 __ZN5OSSet16initWithCapacityEj
@@ -319,13 +255,6 @@ __ZN6OSData12withCapacityEj
 __ZN6OSData13initWithBytesEPKvj
 __ZN6OSData14ensureCapacityEj
 __ZN6OSData15withBytesNoCopyEPvj
-__ZN6OSData16_RESERVEDOSData1Ev
-__ZN6OSData16_RESERVEDOSData2Ev
-__ZN6OSData16_RESERVEDOSData3Ev
-__ZN6OSData16_RESERVEDOSData4Ev
-__ZN6OSData16_RESERVEDOSData5Ev
-__ZN6OSData16_RESERVEDOSData6Ev
-__ZN6OSData16_RESERVEDOSData7Ev
 __ZN6OSData16initWithCapacityEj
 __ZN6OSData18setDeallocFunctionEPFvPvjE
 __ZN6OSData19initWithBytesNoCopyEPvj
@@ -356,14 +285,6 @@ __ZN7OSArray14ensureCapacityEj
 __ZN7OSArray15flushCollectionEv
 __ZN7OSArray15initWithObjectsEPPK8OSObjectjj
 __ZN7OSArray16initWithCapacityEj
-__ZN7OSArray17_RESERVEDOSArray0Ev
-__ZN7OSArray17_RESERVEDOSArray1Ev
-__ZN7OSArray17_RESERVEDOSArray2Ev
-__ZN7OSArray17_RESERVEDOSArray3Ev
-__ZN7OSArray17_RESERVEDOSArray4Ev
-__ZN7OSArray17_RESERVEDOSArray5Ev
-__ZN7OSArray17_RESERVEDOSArray6Ev
-__ZN7OSArray17_RESERVEDOSArray7Ev
 __ZN7OSArray20setCapacityIncrementEj
 __ZN7OSArray4freeEv
 __ZN7OSArray5mergeEPKS_
@@ -383,14 +304,6 @@ __ZN8OSNumber10gMetaClassE
 __ZN8OSNumber10superClassE
 __ZN8OSNumber10withNumberEPKcj
 __ZN8OSNumber10withNumberEyj
-__ZN8OSNumber18_RESERVEDOSNumber0Ev
-__ZN8OSNumber18_RESERVEDOSNumber1Ev
-__ZN8OSNumber18_RESERVEDOSNumber2Ev
-__ZN8OSNumber18_RESERVEDOSNumber3Ev
-__ZN8OSNumber18_RESERVEDOSNumber4Ev
-__ZN8OSNumber18_RESERVEDOSNumber5Ev
-__ZN8OSNumber18_RESERVEDOSNumber6Ev
-__ZN8OSNumber18_RESERVEDOSNumber7Ev
 __ZN8OSNumber4freeEv
 __ZN8OSNumber4initEPKcj
 __ZN8OSNumber4initEyj
@@ -407,22 +320,6 @@ __ZN8OSNumberD0Ev
 __ZN8OSNumberD2Ev
 __ZN8OSObject10gMetaClassE
 __ZN8OSObject10superClassE
-__ZN8OSObject18_RESERVEDOSObject0Ev
-__ZN8OSObject18_RESERVEDOSObject1Ev
-__ZN8OSObject18_RESERVEDOSObject2Ev
-__ZN8OSObject18_RESERVEDOSObject3Ev
-__ZN8OSObject18_RESERVEDOSObject4Ev
-__ZN8OSObject18_RESERVEDOSObject5Ev
-__ZN8OSObject18_RESERVEDOSObject6Ev
-__ZN8OSObject18_RESERVEDOSObject7Ev
-__ZN8OSObject18_RESERVEDOSObject8Ev
-__ZN8OSObject18_RESERVEDOSObject9Ev
-__ZN8OSObject19_RESERVEDOSObject10Ev
-__ZN8OSObject19_RESERVEDOSObject11Ev
-__ZN8OSObject19_RESERVEDOSObject12Ev
-__ZN8OSObject19_RESERVEDOSObject13Ev
-__ZN8OSObject19_RESERVEDOSObject14Ev
-__ZN8OSObject19_RESERVEDOSObject15Ev
 __ZN8OSObject4freeEv
 __ZN8OSObject4initEv
 __ZN8OSObject9MetaClassC1Ev
@@ -443,22 +340,6 @@ __ZN8OSString11withCStringEPKc
 __ZN8OSString14initWithStringEPKS_
 __ZN8OSString15initWithCStringEPKc
 __ZN8OSString17withCStringNoCopyEPKc
-__ZN8OSString18_RESERVEDOSString0Ev
-__ZN8OSString18_RESERVEDOSString1Ev
-__ZN8OSString18_RESERVEDOSString2Ev
-__ZN8OSString18_RESERVEDOSString3Ev
-__ZN8OSString18_RESERVEDOSString4Ev
-__ZN8OSString18_RESERVEDOSString5Ev
-__ZN8OSString18_RESERVEDOSString6Ev
-__ZN8OSString18_RESERVEDOSString7Ev
-__ZN8OSString18_RESERVEDOSString8Ev
-__ZN8OSString18_RESERVEDOSString9Ev
-__ZN8OSString19_RESERVEDOSString10Ev
-__ZN8OSString19_RESERVEDOSString11Ev
-__ZN8OSString19_RESERVEDOSString12Ev
-__ZN8OSString19_RESERVEDOSString13Ev
-__ZN8OSString19_RESERVEDOSString14Ev
-__ZN8OSString19_RESERVEDOSString15Ev
 __ZN8OSString21initWithCStringNoCopyEPKc
 __ZN8OSString4freeEv
 __ZN8OSString7setCharEcj
@@ -479,14 +360,6 @@ __ZN8OSSymbol11withCStringEPKc
 __ZN8OSSymbol14initWithStringEPK8OSString
 __ZN8OSSymbol15initWithCStringEPKc
 __ZN8OSSymbol17withCStringNoCopyEPKc
-__ZN8OSSymbol18_RESERVEDOSSymbol0Ev
-__ZN8OSSymbol18_RESERVEDOSSymbol1Ev
-__ZN8OSSymbol18_RESERVEDOSSymbol2Ev
-__ZN8OSSymbol18_RESERVEDOSSymbol3Ev
-__ZN8OSSymbol18_RESERVEDOSSymbol4Ev
-__ZN8OSSymbol18_RESERVEDOSSymbol5Ev
-__ZN8OSSymbol18_RESERVEDOSSymbol6Ev
-__ZN8OSSymbol18_RESERVEDOSSymbol7Ev
 __ZN8OSSymbol18checkForPageUnloadEPvS0_
 __ZN8OSSymbol21initWithCStringNoCopyEPKc
 __ZN8OSSymbol4freeEv
@@ -503,14 +376,6 @@ __ZN9OSBoolean10gMetaClassE
 __ZN9OSBoolean10initializeEv
 __ZN9OSBoolean10superClassE
 __ZN9OSBoolean11withBooleanEb
-__ZN9OSBoolean19_RESERVEDOSBoolean0Ev
-__ZN9OSBoolean19_RESERVEDOSBoolean1Ev
-__ZN9OSBoolean19_RESERVEDOSBoolean2Ev
-__ZN9OSBoolean19_RESERVEDOSBoolean3Ev
-__ZN9OSBoolean19_RESERVEDOSBoolean4Ev
-__ZN9OSBoolean19_RESERVEDOSBoolean5Ev
-__ZN9OSBoolean19_RESERVEDOSBoolean6Ev
-__ZN9OSBoolean19_RESERVEDOSBoolean7Ev
 __ZN9OSBoolean4freeEv
 __ZN9OSBoolean9MetaClassC1Ev
 __ZN9OSBoolean9MetaClassC2Ev
@@ -717,6 +582,7 @@ __ZdaPv
 __ZdlPv
 __Znam
 __Znwm
+___bzero
 ___cxa_pure_virtual
 ___stack_chk_fail
 ___stack_chk_guard
@@ -857,7 +723,6 @@ _version_stage
 _version_variant
 _vprintf
 _vsnprintf
-_vsprintf
 _vsscanf
 _zError
 _zlibVersion
index d1a97b9eef56cfe115dcd4d314a400558dd1a77a..f9ef1120e97a24baf7f0dbff864cb0db1757d9af 100644 (file)
-_lck_mtx_unlock_darwin10
-_lck_mtx_lock_spin
-_lck_mtx_try_lock_spin
-_lck_mtx_convert_spin
 _OSAddAtomic64
 _OSCompareAndSwap64
 _OSRuntimeFinalizeCPP
 _OSRuntimeInitializeCPP
 _OSRuntimeUnloadCPP
 _OSRuntimeUnloadCPPForSegment
+__ZN10OSIterator20_RESERVEDOSIterator0Ev
+__ZN10OSIterator20_RESERVEDOSIterator1Ev
+__ZN10OSIterator20_RESERVEDOSIterator2Ev
+__ZN10OSIterator20_RESERVEDOSIterator3Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass0Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass1Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass2Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass3Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass4Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass5Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass6Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass7Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize0Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize1Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize2Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize3Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize4Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize5Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize6Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize7Ev
+__ZN12OSCollection22_RESERVEDOSCollection2Ev
+__ZN12OSCollection22_RESERVEDOSCollection3Ev
+__ZN12OSCollection22_RESERVEDOSCollection4Ev
+__ZN12OSCollection22_RESERVEDOSCollection5Ev
+__ZN12OSCollection22_RESERVEDOSCollection6Ev
+__ZN12OSCollection22_RESERVEDOSCollection7Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary0Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary1Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary2Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary3Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary4Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary5Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary6Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary7Ev
 __ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_
 __ZN12OSOrderedSet16initWithCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet0Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet1Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet2Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet3Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet4Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet5Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet6Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet7Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase3Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase4Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase5Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase6Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase7Ev
+__ZN5OSSet15_RESERVEDOSSet0Ev
+__ZN5OSSet15_RESERVEDOSSet1Ev
+__ZN5OSSet15_RESERVEDOSSet2Ev
+__ZN5OSSet15_RESERVEDOSSet3Ev
+__ZN5OSSet15_RESERVEDOSSet4Ev
+__ZN5OSSet15_RESERVEDOSSet5Ev
+__ZN5OSSet15_RESERVEDOSSet6Ev
+__ZN5OSSet15_RESERVEDOSSet7Ev
+__ZN6OSData16_RESERVEDOSData1Ev
+__ZN6OSData16_RESERVEDOSData2Ev
+__ZN6OSData16_RESERVEDOSData3Ev
+__ZN6OSData16_RESERVEDOSData4Ev
+__ZN6OSData16_RESERVEDOSData5Ev
+__ZN6OSData16_RESERVEDOSData6Ev
+__ZN6OSData16_RESERVEDOSData7Ev
+__ZN7OSArray17_RESERVEDOSArray0Ev
+__ZN7OSArray17_RESERVEDOSArray1Ev
+__ZN7OSArray17_RESERVEDOSArray2Ev
+__ZN7OSArray17_RESERVEDOSArray3Ev
+__ZN7OSArray17_RESERVEDOSArray4Ev
+__ZN7OSArray17_RESERVEDOSArray5Ev
+__ZN7OSArray17_RESERVEDOSArray6Ev
+__ZN7OSArray17_RESERVEDOSArray7Ev
+__ZN8OSNumber18_RESERVEDOSNumber0Ev
+__ZN8OSNumber18_RESERVEDOSNumber1Ev
+__ZN8OSNumber18_RESERVEDOSNumber2Ev
+__ZN8OSNumber18_RESERVEDOSNumber3Ev
+__ZN8OSNumber18_RESERVEDOSNumber4Ev
+__ZN8OSNumber18_RESERVEDOSNumber5Ev
+__ZN8OSNumber18_RESERVEDOSNumber6Ev
+__ZN8OSNumber18_RESERVEDOSNumber7Ev
+__ZN8OSObject18_RESERVEDOSObject0Ev
+__ZN8OSObject18_RESERVEDOSObject1Ev
+__ZN8OSObject18_RESERVEDOSObject2Ev
+__ZN8OSObject18_RESERVEDOSObject3Ev
+__ZN8OSObject18_RESERVEDOSObject4Ev
+__ZN8OSObject18_RESERVEDOSObject5Ev
+__ZN8OSObject18_RESERVEDOSObject6Ev
+__ZN8OSObject18_RESERVEDOSObject7Ev
+__ZN8OSObject18_RESERVEDOSObject8Ev
+__ZN8OSObject18_RESERVEDOSObject9Ev
+__ZN8OSObject19_RESERVEDOSObject10Ev
+__ZN8OSObject19_RESERVEDOSObject11Ev
+__ZN8OSObject19_RESERVEDOSObject12Ev
+__ZN8OSObject19_RESERVEDOSObject13Ev
+__ZN8OSObject19_RESERVEDOSObject14Ev
+__ZN8OSObject19_RESERVEDOSObject15Ev
+__ZN8OSString18_RESERVEDOSString0Ev
+__ZN8OSString18_RESERVEDOSString1Ev
+__ZN8OSString18_RESERVEDOSString2Ev
+__ZN8OSString18_RESERVEDOSString3Ev
+__ZN8OSString18_RESERVEDOSString4Ev
+__ZN8OSString18_RESERVEDOSString5Ev
+__ZN8OSString18_RESERVEDOSString6Ev
+__ZN8OSString18_RESERVEDOSString7Ev
+__ZN8OSString18_RESERVEDOSString8Ev
+__ZN8OSString18_RESERVEDOSString9Ev
+__ZN8OSString19_RESERVEDOSString10Ev
+__ZN8OSString19_RESERVEDOSString11Ev
+__ZN8OSString19_RESERVEDOSString12Ev
+__ZN8OSString19_RESERVEDOSString13Ev
+__ZN8OSString19_RESERVEDOSString14Ev
+__ZN8OSString19_RESERVEDOSString15Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol0Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol1Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol2Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol3Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol4Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol5Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol6Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol7Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean0Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean1Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean2Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean3Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean4Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean5Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean6Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean7Ev
+_lck_mtx_convert_spin
+_lck_mtx_lock_spin
+_lck_mtx_try_lock_spin
+_lck_mtx_unlock_darwin10
 _sprintf
 _strcat
 _strcpy
+_vsprintf
index c42f577d866503368604d290858161ba68d66aa3..f67db63a8ac40ce11398fca4627bb33cb74a74aa 100644 (file)
-_lck_mtx_lock_spin
-_lck_mtx_try_lock_spin
-_lck_mtx_convert_spin
 _OSAddAtomic64
 _OSCompareAndSwap64
+__ZN10OSIterator20_RESERVEDOSIterator0Ev
+__ZN10OSIterator20_RESERVEDOSIterator1Ev
+__ZN10OSIterator20_RESERVEDOSIterator2Ev
+__ZN10OSIterator20_RESERVEDOSIterator3Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass0Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass1Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass2Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass3Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass4Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass5Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass6Ev
+__ZN11OSMetaClass21_RESERVEDOSMetaClass7Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize0Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize1Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize2Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize3Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize4Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize5Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize6Ev
+__ZN11OSSerialize21_RESERVEDOSSerialize7Ev
+__ZN12OSCollection22_RESERVEDOSCollection2Ev
+__ZN12OSCollection22_RESERVEDOSCollection3Ev
+__ZN12OSCollection22_RESERVEDOSCollection4Ev
+__ZN12OSCollection22_RESERVEDOSCollection5Ev
+__ZN12OSCollection22_RESERVEDOSCollection6Ev
+__ZN12OSCollection22_RESERVEDOSCollection7Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary0Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary1Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary2Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary3Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary4Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary5Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary6Ev
+__ZN12OSDictionary22_RESERVEDOSDictionary7Ev
 __ZN12OSOrderedSet12withCapacityEjPFiPK15OSMetaClassBaseS2_PvES3_
 __ZN12OSOrderedSet16initWithCapacityEjPFiPK15OSMetaClassBaseS2_PvES3_
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet0Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet1Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet2Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet3Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet4Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet5Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet6Ev
+__ZN12OSOrderedSet22_RESERVEDOSOrderedSet7Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase3Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase4Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase5Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase6Ev
+__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase7Ev
+__ZN5OSSet15_RESERVEDOSSet0Ev
+__ZN5OSSet15_RESERVEDOSSet1Ev
+__ZN5OSSet15_RESERVEDOSSet2Ev
+__ZN5OSSet15_RESERVEDOSSet3Ev
+__ZN5OSSet15_RESERVEDOSSet4Ev
+__ZN5OSSet15_RESERVEDOSSet5Ev
+__ZN5OSSet15_RESERVEDOSSet6Ev
+__ZN5OSSet15_RESERVEDOSSet7Ev
+__ZN6OSData16_RESERVEDOSData1Ev
+__ZN6OSData16_RESERVEDOSData2Ev
+__ZN6OSData16_RESERVEDOSData3Ev
+__ZN6OSData16_RESERVEDOSData4Ev
+__ZN6OSData16_RESERVEDOSData5Ev
+__ZN6OSData16_RESERVEDOSData6Ev
+__ZN6OSData16_RESERVEDOSData7Ev
+__ZN7OSArray17_RESERVEDOSArray0Ev
+__ZN7OSArray17_RESERVEDOSArray1Ev
+__ZN7OSArray17_RESERVEDOSArray2Ev
+__ZN7OSArray17_RESERVEDOSArray3Ev
+__ZN7OSArray17_RESERVEDOSArray4Ev
+__ZN7OSArray17_RESERVEDOSArray5Ev
+__ZN7OSArray17_RESERVEDOSArray6Ev
+__ZN7OSArray17_RESERVEDOSArray7Ev
+__ZN8OSNumber18_RESERVEDOSNumber0Ev
+__ZN8OSNumber18_RESERVEDOSNumber1Ev
+__ZN8OSNumber18_RESERVEDOSNumber2Ev
+__ZN8OSNumber18_RESERVEDOSNumber3Ev
+__ZN8OSNumber18_RESERVEDOSNumber4Ev
+__ZN8OSNumber18_RESERVEDOSNumber5Ev
+__ZN8OSNumber18_RESERVEDOSNumber6Ev
+__ZN8OSNumber18_RESERVEDOSNumber7Ev
+__ZN8OSObject18_RESERVEDOSObject0Ev
+__ZN8OSObject18_RESERVEDOSObject1Ev
+__ZN8OSObject18_RESERVEDOSObject2Ev
+__ZN8OSObject18_RESERVEDOSObject3Ev
+__ZN8OSObject18_RESERVEDOSObject4Ev
+__ZN8OSObject18_RESERVEDOSObject5Ev
+__ZN8OSObject18_RESERVEDOSObject6Ev
+__ZN8OSObject18_RESERVEDOSObject7Ev
+__ZN8OSObject18_RESERVEDOSObject8Ev
+__ZN8OSObject18_RESERVEDOSObject9Ev
+__ZN8OSObject19_RESERVEDOSObject10Ev
+__ZN8OSObject19_RESERVEDOSObject11Ev
+__ZN8OSObject19_RESERVEDOSObject12Ev
+__ZN8OSObject19_RESERVEDOSObject13Ev
+__ZN8OSObject19_RESERVEDOSObject14Ev
+__ZN8OSObject19_RESERVEDOSObject15Ev
+__ZN8OSString18_RESERVEDOSString0Ev
+__ZN8OSString18_RESERVEDOSString1Ev
+__ZN8OSString18_RESERVEDOSString2Ev
+__ZN8OSString18_RESERVEDOSString3Ev
+__ZN8OSString18_RESERVEDOSString4Ev
+__ZN8OSString18_RESERVEDOSString5Ev
+__ZN8OSString18_RESERVEDOSString6Ev
+__ZN8OSString18_RESERVEDOSString7Ev
+__ZN8OSString18_RESERVEDOSString8Ev
+__ZN8OSString18_RESERVEDOSString9Ev
+__ZN8OSString19_RESERVEDOSString10Ev
+__ZN8OSString19_RESERVEDOSString11Ev
+__ZN8OSString19_RESERVEDOSString12Ev
+__ZN8OSString19_RESERVEDOSString13Ev
+__ZN8OSString19_RESERVEDOSString14Ev
+__ZN8OSString19_RESERVEDOSString15Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol0Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol1Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol2Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol3Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol4Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol5Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol6Ev
+__ZN8OSSymbol18_RESERVEDOSSymbol7Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean0Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean1Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean2Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean3Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean4Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean5Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean6Ev
+__ZN9OSBoolean19_RESERVEDOSBoolean7Ev
+_lck_mtx_convert_spin
+_lck_mtx_lock_spin
+_lck_mtx_try_lock_spin
 _sprintf
 _strcat
 _strcpy
+_vsprintf
index 839eadc4fde1d42b0805e7113e91e0acc55e9c1f..73dda1064df0d2963acf1aa7bc809a1e95aafae5 100644 (file)
@@ -10,6 +10,9 @@ _mac_audit_text
 
 _mac_iokit_check_hid_control
 
+_mac_thread_get_threadlabel
+_mac_thread_get_uthreadlabel
+
 _sbuf_cat
 _sbuf_data
 _sbuf_delete
index 6006136b4209b20b3a220c7140cc0b78c30379ac..aa74fd56abc3e1d174cc890051134c91439be82a 100644 (file)
@@ -1,9 +1,11 @@
 _kau_will_audit
+_mac_do_machexc
 _mac_kalloc
 _mac_kalloc_noblock
 _mac_kfree
 _mac_mbuf_alloc
 _mac_mbuf_free
+_mac_schedule_userret
 _mac_unwire
 _mac_wire
 _sysctl__security_mac_children
index 6006136b4209b20b3a220c7140cc0b78c30379ac..aa74fd56abc3e1d174cc890051134c91439be82a 100644 (file)
@@ -1,9 +1,11 @@
 _kau_will_audit
+_mac_do_machexc
 _mac_kalloc
 _mac_kalloc_noblock
 _mac_kfree
 _mac_mbuf_alloc
 _mac_mbuf_free
+_mac_schedule_userret
 _mac_unwire
 _mac_wire
 _sysctl__security_mac_children
index ff2d46ddb7630c28a4f0f11b3d6e79289436cc3f..201cbee6c054db6ed7df13734c7b5ef30c1ffa6f 100644 (file)
@@ -93,7 +93,7 @@ $(OBJPATH)/allsymbols: $(OBJPATH)/mach_kernel
 $(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset :  %.exports %.$(ARCH_CONFIG_LC).exports $(OBJPATH)/allsymbols
        $(_v)if [ "$*" != System6.0 -o $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then   \
                $(KEXT_CREATE_SYMBOL_SET) \
-               $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \
+               $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
                -import $(OBJPATH)/allsymbols \
                -export $*.exports \
                -export $*.$(ARCH_CONFIG_LC).exports \
@@ -107,15 +107,39 @@ $(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset :  %.exports %.$(ARCH_CONFIG_LC).exp
 # symbolsets for the primary machine configuration for each kernel/arch config
 $(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset :
        $(_v)per_arch_symbolsets="";                    \
-       kernel_config=$(INSTALL_TYPE);                  \
-       machine_config=$(MACHINE_CONFIG);               \
        for arch_config in $(INSTALL_ARCHS);            \
        do                                              \
+                                                       \
+       my_counter=1;                                   \
+       found_arch=0;                                   \
+       for my_config in $(TARGET_CONFIGS_UC);          \
+       do                                              \
+               if [ $${my_counter} -eq 1 ] ; then      \
+                       kernel_config=$${my_config};    \
+               elif [ $${my_counter} -eq 2 ] ; then    \
+                       if [ $${my_config} = $${arch_config} ]; then \
+                               found_arch=1;           \
+                       fi;                             \
+               else                                    \
+                       if [ $${found_arch} -eq 1 ]; then \
+                               machine_config=$${my_config};\
+                               break;                  \
+                       fi;                             \
+                       my_counter=0;                   \
+               fi;                                     \
+               my_counter=$$((my_counter + 1));        \
+       done;                                           \
+                                                       \
        if [ $${arch_config} = ARM ] ; then             \
                if [ $${machine_config} = DEFAULT ] ; then              \
                        machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);   \
                fi;                                                     \
        fi;                                             \
+       if [ $${arch_config} = L4_ARM ] ; then          \
+               if [ $${machine_config} = DEFAULT ] ; then              \
+                       machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);        \
+               fi;                                                     \
+       fi;                                             \
        if [ $${machine_config} = DEFAULT ] ; then      \
                objpath=${OBJROOT}/$${kernel_config}_$${arch_config};   \
        else            \
@@ -133,14 +157,14 @@ $(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset :
 
 build_symbol_sets:     $(SYMBOL_SET_BUILD) $(OBJPATH)/allsymbols
        $(_v)$(KEXT_CREATE_SYMBOL_SET) \
-               $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \
+               $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
                $(foreach comp,$(filter-out System6.0 Private,$(SYMBOL_COMPONENT_LIST)), \
                        -export $(SRCROOT)/$(COMPONENT)/$(comp).exports \
                        -export $(SRCROOT)/$(COMPONENT)/$(comp).$(ARCH_CONFIG_LC).exports) \
                -import $(OBJPATH)/allsymbols \
                -output /dev/null $(_vstdout);
        $(_v)$(KEXT_CREATE_SYMBOL_SET) \
-               $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \
+               $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
                $(foreach comp,$(filter-out System6.0 Unsupported,$(SYMBOL_COMPONENT_LIST)), \
                        -export $(SRCROOT)/$(COMPONENT)/$(comp).exports \
                        -export $(SRCROOT)/$(COMPONENT)/$(comp).$(ARCH_CONFIG_LC).exports) \
index fb4c4656acf45d9bd369948d6162442bf9389e1c..1bf9f88187c162a86217e4dfed717d6e54be0539 100644 (file)
@@ -1,4 +1,4 @@
-11.4.2
+12.0.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 364d8406958fa76bf11e1eb8451b157847987eee..95fe92e41b49c548080911edab1b941f3d9a92d7 100644 (file)
@@ -13,20 +13,25 @@ __ZTV24IOCPUInterruptController
 _b_to_q
 _bdevsw
 _boot
+_bootcache_contains_block
 _bsd_hostname
 _bsd_set_dependency_capable
 _buf_attr
 _buf_create_shadow
-_buf_getcpaddr
-_buf_setcpaddr
 _buf_setfilter
 _buf_shadow
+_bufattr_meta
+_bufattr_nocache
 _bufattr_throttled
 _cdevsw
 _cdevsw_setkqueueok
+_chudxnu_platform_ptr
+_chudxnu_thread_get_dirty
+_chudxnu_thread_set_dirty
 _clalloc
 _clfree
 _cons_cinput
+_convert_task_to_port
 _cp_key_store_action
 _cp_register_wraps
 _cs_entitlements_blob_get
@@ -35,13 +40,41 @@ _ctl_name_by_id
 _fd_rdwr
 _get_aiotask
 _hz
+_ifnet_allocate_extended
+_ifnet_bandwidths
 _ifnet_clone_attach
 _ifnet_clone_detach
+_ifnet_get_local_ports
+_ifnet_dequeue
+_ifnet_dequeue_service_class
+_ifnet_dequeue_multi
+_ifnet_dequeue_service_class_multi
+_ifnet_enqueue
+_ifnet_get_sndq_len
+_ifnet_get_rcvq_maxlen
+_ifnet_get_sndq_maxlen
 _ifnet_idle_flags
+_ifnet_inet_defrouter_llreachinfo
+_ifnet_inet6_defrouter_llreachinfo
+_ifnet_input_extended
+_ifnet_link_quality
+_ifnet_notice_node_presence
+_ifnet_notice_node_absence
+_ifnet_notice_master_elected
+_ifnet_purge
+_ifnet_set_bandwidths
 _ifnet_set_idle_flags
+_ifnet_set_link_quality
+_ifnet_set_output_sched_model
+_ifnet_set_rcvq_maxlen
+_ifnet_set_sndq_maxlen
+_ifnet_start
+_ifnet_transmit_burst_start
+_ifnet_transmit_burst_end
 _in6addr_local
 _inaddr_local
 _inet_domain_mutex
+_inp_clear_INP_INADDR_ANY
 _ip_mutex
 _ip_output
 _ip_protox
@@ -71,6 +104,9 @@ _m_split
 _m_trailingspace:_mbuf_trailingspace
 _mac_proc_set_enforce
 _mbuf_get_priority:_mbuf_get_traffic_class
+_mbuf_get_service_class
+_mbuf_is_service_class_privileged:_mbuf_is_traffic_class_privileged
+_mbuf_pkthdr_aux_flags
 _mcl_to_paddr
 _mountroot_post_hook
 _net_add_domain
@@ -78,11 +114,10 @@ _net_add_proto
 _net_del_domain
 _net_del_proto
 _netboot_root
-_perf_monitor_register_*
+_perf_monitor_register
 _perf_monitor_unregister
 _pffinddomain
 _pffindproto
-_pmc_accessible_from_core
 _pmc_config_set_interrupt_threshold
 _pmc_config_set_value
 _pmc_create_config
@@ -105,6 +140,7 @@ _pmc_unregister
 _post_sys_powersource
 _port_name_to_task
 _port_name_to_thread
+_proc_task
 _pru_abort_notsupp
 _pru_accept_notsupp
 _pru_bind_notsupp
@@ -121,6 +157,7 @@ _pru_shutdown_notsupp
 _pru_sockaddr_notsupp
 _pru_sopoll_notsupp
 _q_to_b
+_register_crypto_functions
 _register_decmpfs_decompressor
 _rootdev
 _rootvp
@@ -152,6 +189,8 @@ _soreserve
 _sorwakeup
 _sosend
 _termioschars
+_thread_call_allocate_with_priority
+_thread_call_cancel_wait
 _thread_clear_eager_preempt
 _thread_dispatchqaddr
 _thread_set_eager_preempt
@@ -164,6 +203,7 @@ _throttle_info_update
 _throttle_info_ref_by_mask
 _throttle_info_rel_by_mask
 _throttle_info_update_by_mask
+_throttle_info_io_will_be_throttled
 _throttle_lowpri_io
 _throttle_set_thread_io_policy
 _timeout
@@ -191,6 +231,7 @@ _unregister_decmpfs_decompressor
 _untimeout
 _vnode_isdyldsharedcache
 _vnode_ismonitored
+_vnode_istty
 _vnode_notify
 _vnop_compound_open_desc
 _vnop_compound_mkdir_desc
index 3a7064dc62a372431a473c623384b973c099257d..80e66dfe909bafc5822a0cb4472590ea53d9704d 100644 (file)
@@ -8,6 +8,7 @@ _acpi_sleep_kernel
 _add_fsevent
 _apic_table
 _apply_func_phys
+_bufattr_delayidlesleep
 _cpu_to_lapic
 _cpuid_features
 _cpuid_info
index a9c6a89a1655e83712063be03183682eb8f86ef6..73963f2c23ed7c028f06f5a3539b8fbc37739d27 100644 (file)
@@ -8,6 +8,7 @@ _acpi_sleep_kernel
 _add_fsevent
 _apic_table
 _apply_func_phys
+_bufattr_delayidlesleep
 _cpu_to_lapic
 _cpuid_features
 _cpuid_info
index c3d167834d5a830fcc619396a78c462ee32bca56..9b4585b337ac700f6c0068776b1bda15a7e8f28c 100644 (file)
@@ -19,15 +19,12 @@ _IOBSDNameMatching
 _IOBSDRegistryEntryForDeviceTree
 _IOBSDRegistryEntryGetData
 _IOBSDRegistryEntryRelease
-_IOCDMatching
 _IOCreateThread
 _IODTFreeLoaderInfo
 _IODTGetLoaderInfo
 _IODelay
-_IODiskMatching
 _IOExitThread
 _IOFindBSDRoot
-_IOFindMatchingChild
 _IOFindNameForValue
 _IOFindValueForName
 _IOFlushProcessorCache
@@ -69,7 +66,6 @@ _IOMapperInsertPPNPages
 _IOMapperInsertPage
 _IOMapperInsertUPLPages
 _IONDRVLibrariesInitialize
-_IONetworkMatching
 _IONetworkNamePrefixMatching
 _IOOFPathMatching
 _IOPageableMapForAddress
@@ -100,9 +96,7 @@ _IOSimpleLockTryLock:_lck_spin_try_lock
 _IOSimpleLockUnlock:_lck_spin_unlock
 _IOSizeToAlignment
 _IOSleep
-_IOSpinUnlock
 _IOSystemShutdownNotification
-_IOTrySpinLock
 _IOZeroTvalspec
 _KUNCExecute
 _KUNCGetNotificationID
@@ -2877,8 +2871,6 @@ _device_data_action
 _devnode_free
 _disable_serial_output
 _ether_check_multi
-_ev_try_lock
-_ev_unlock
 _fatfile_getarch
 _fatfile_getarch_affinity
 _find_entry
@@ -3193,7 +3185,6 @@ _vm_protect
 _vm_region
 _vm_region_object_create
 _vsnprintf
-_vsprintf
 _vsscanf
 _zalloc
 _zfree
index f3955791d7a7ff197784feb321f401c0dddad5a6..aecfe0c97b619704546c2b723005dc0f7a97b47d 100644 (file)
@@ -1,3 +1,5 @@
+_IOSpinUnlock
+_IOTrySpinLock
 _PE_install_interrupt_handler
 _PE_interrupt_handler
 _PE_parse_boot_arg
@@ -12,6 +14,8 @@ _cpu_number
 _cpu_to_lapic
 _cpuid_features
 _cpuid_info
+_ev_try_lock
+_ev_unlock
 _hfs_addconverter
 _hfs_remconverter
 _lapic_end_of_interrupt
@@ -27,3 +31,4 @@ _sprintf
 _strcat
 _strcpy
 _thread_funnel_set
+_vsprintf
index 60c3e3ad1bb0c8720d782ed3d732f03306d48b6b..accc98e6558691686c6f9314ac7eac67617b9b3b 100644 (file)
@@ -1,3 +1,8 @@
+_IOSpinUnlock
+_IOTrySpinLock
+_ev_try_lock
+_ev_unlock
 _sprintf
 _strcat
 _strcpy
+_vsprintf
index 374517b7ea217ee6e40a07b3beee9f049122ba37..dc1590d45ac52f9cd99308058fd652ed004c15d4 100644 (file)
@@ -6,37 +6,26 @@ _KUNCUserNotificationDisplayAlert
 _KUNCUserNotificationDisplayFromBundle
 _KUNCUserNotificationDisplayNotice
 _NDR_record
-_OSSpinLockTry
-_OSSpinLockUnlock
 _PE_kputc
 __Z22OSFlushObjectTrackListv
 __ZN15IOWatchDogTimer10gMetaClassE
 __ZN15IOWatchDogTimer10superClassE
 __ZN15IOWatchDogTimer13setPropertiesEP8OSObject
-__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer0Ev
-__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer1Ev
-__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer2Ev
-__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer3Ev
 __ZN15IOWatchDogTimer4stopEP9IOService
 __ZN15IOWatchDogTimer5startEP9IOService
 __ZN15IOWatchDogTimer9MetaClassC1Ev
 __ZN15IOWatchDogTimer9MetaClassC2Ev
 __ZN15IOWatchDogTimer9metaClassE
-__ZN15IOWatchDogTimerC1EPK11OSMetaClass
 __ZN15IOWatchDogTimerC2EPK11OSMetaClass
-__ZN15IOWatchDogTimerD0Ev
 __ZN15IOWatchDogTimerD2Ev
 __ZN16IOPlatformDevice10gMetaClassE
 __ZN16IOPlatformDevice13matchLocationEP9IOService
-__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev
-__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev
-__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev
-__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev
 __ZN16IOPlatformDevice9metaClassE
 __ZN16IOPlatformDeviceC2EPK11OSMetaClass
 __ZN16IOPlatformDeviceD2Ev
 __ZN18IODTPlatformExpert9metaClassE
 __ZN9IODTNVRAM10gMetaClassE
+__ZN9IODTNVRAM10safeToSyncEv
 __ZN9IODTNVRAM15initOFVariablesEv
 __ZN9IODTNVRAM15syncOFVariablesEv
 __ZN9IODTNVRAM16escapeDataToDataEP6OSData
@@ -51,7 +40,6 @@ __ZN9IODTNVRAM26calculatePartitionChecksumEPh
 __ZN9IODTNVRAM9metaClassE
 __ZN9IODTNVRAMC2EPK11OSMetaClass
 __ZN9IODTNVRAMD2Ev
-__ZN9IODTNVRAM10safeToSyncEv
 __ZNK15IOWatchDogTimer12getMetaClassEv
 __ZNK15IOWatchDogTimer9MetaClass5allocEv
 __ZNK9IODTNVRAM17getOFVariablePermEPK8OSSymbol
@@ -81,7 +69,7 @@ _current_act
 _delay
 _delay_for_interval
 _des_ecb_encrypt
-_des_set_key
+_des_ecb_key_sched
 _gIODTSharedInterrupts
 _gOSObjectTrackList
 _gOSObjectTrackThread
@@ -117,6 +105,7 @@ _lck_rw_done
 _ldisc_deregister
 _ldisc_register
 _log
+_mach_gss_lookup
 _mach_gss_accept_sec_context
 _mach_gss_accept_sec_context_v2
 _mach_gss_hold_cred
index 99112a1615afb74a074c66af3a708ce841b78034..602e7123a730ab29f286973a8a487cb2d863a3b2 100644 (file)
@@ -1,13 +1,26 @@
+_OSSpinLockTry
+_OSSpinLockUnlock
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer0Ev
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer1Ev
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer2Ev
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer3Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev
 __ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_
 __ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject
 __ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject
 __ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm
 __ZN9IODTNVRAM19unescapeBytesToDataEPKhm
+_clock_get_system_value
 _cpu_number
 _domains
 _dsmos_page_transform_hook
 _gPEEFIRuntimeServices
 _gPEEFISystemTable
+_hibernate_vm_lock
+_hibernate_vm_unlock
 _ifunit
 _in6addr_local
 _in_broadcast
@@ -42,10 +55,10 @@ _m_trailingspace:_mbuf_trailingspace
 _mach_msg_rpc_from_kernel
 _mach_msg_send_from_kernel_with_options
 _mcl_to_paddr:_mbuf_data_to_physical
+_ml_cpu_int_event_time
 _ml_get_apicid
 _ml_get_maxbusdelay
 _ml_get_maxsnoop
-_ml_cpu_int_event_time
 _mp_rendezvous
 _mp_rendezvous_no_intrs
 _nd6_storelladdr
@@ -109,8 +122,6 @@ _sorwakeup
 _sosend
 _sosetopt
 _tcbinfo
-_tmrCvt
-_tsc_get_info
 _thread_call_func
 _thread_call_func_cancel
 _thread_call_func_delayed
@@ -119,8 +130,7 @@ _thread_cancel_timer
 _thread_funnel_set
 _thread_set_timer
 _thread_set_timer_deadline
+_tmrCvt
+_tsc_get_info
 _udbinfo
-_hibernate_vm_lock
-_hibernate_vm_unlock
-_clock_get_system_value
 _PE_state
index 2e7f007d1f72a180a9e185b3b3cbba8e0c24dde8..4eb17cafaf8666610553f7b425e84134f4138735 100644 (file)
@@ -1,3 +1,13 @@
+_OSSpinLockTry
+_OSSpinLockUnlock
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer0Ev
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer1Ev
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer2Ev
+__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer3Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev
+__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev
 __ZN9IODTNVRAM17getOWVariableInfoEjPPK8OSSymbolPjS4_
 __ZN9IODTNVRAM19convertObjectToPropEPhPjPK8OSSymbolP8OSObject
 __ZN9IODTNVRAM19convertPropToObjectEPhjS0_jPPK8OSSymbolPP8OSObject
@@ -7,14 +17,16 @@ _cpu_number
 _dsmos_page_transform_hook
 _gPEEFIRuntimeServices
 _gPEEFISystemTable
+_hibernate_vm_lock
+_hibernate_vm_unlock
 _kdp_register_callout
 _kdp_set_ip_and_mac_addresses
 _lapic_set_perfcnt_interrupt_mask
 _lapic_set_pmi_func
+_ml_cpu_int_event_time
 _ml_get_apicid
 _ml_get_maxbusdelay
 _ml_get_maxsnoop
-_ml_cpu_int_event_time
 _mp_rendezvous
 _mp_rendezvous_no_intrs
 _pmCPUControl
@@ -29,6 +41,4 @@ _sock_release
 _sock_retain
 _tmrCvt
 _tsc_get_info
-_hibernate_vm_lock
-_hibernate_vm_unlock
 _PE_state
index 31deccacede6f9cb64e9043a2073e82bc135ea6f..bf5096ad0a94ff7138f84aeac6a058e0cb4c99c9 100755 (executable)
 #   ###KERNEL_BUILD_OBJROOT###              xnu/xnu-690.obj~2/RELEASE_PPC
 #   ###KERNEL_BUILD_DATE###                 Sun Oct 24 05:33:28 PDT 2004
 
+use File::Basename;
+
+use strict;
+
 sub ReadFile {
   my ($fileName) = @_;
   my $data;
@@ -39,17 +43,43 @@ sub WriteFile {
   close(OUT);
 }
 
+die("SRCROOT not defined") unless defined($ENV{'SRCROOT'});
+die("OBJROOT not defined") unless defined($ENV{'OBJROOT'});
+
 my $versfile = "MasterVersion";
-$versfile = "$ENV{'SRCROOT'}/config/$versfile" if ($ENV{'SRCROOT'});
-my $BUILD_OBJROOT=$ENV{'OBJROOT'} . "/" . $ENV{'KERNEL_CONFIG'} . '_' . $ENV{'ARCH_CONFIG'};
-if($ENV{'MACHINE_CONFIG'} ne "DEFAULT") {
-    $BUILD_OBJROOT .= '_' . $ENV{'MACHINE_CONFIG'};
-}
+$versfile = "$ENV{'SRCROOT'}/config/$versfile";
+my $BUILD_SRCROOT=$ENV{'SRCROOT'};
+$BUILD_SRCROOT =~ s,/+$,,;
+my $BUILD_OBJROOT=$ENV{'OBJROOT'};
+$BUILD_OBJROOT =~ s,/+$,,;
+my $BUILD_OBJPATH=$ENV{'OBJPATH'} || $ENV{'OBJROOT'};
+$BUILD_OBJPATH =~ s,/+$,,;
 my $BUILD_DATE = `date`;
 $BUILD_DATE =~ s/[\n\t]//g;
 my $BUILDER=`whoami`;
 $BUILDER =~ s/[\n\t]//g;
-$BUILD_OBJROOT =~ s|.*(xnu.*)|$1|;
+
+# Handle two scenarios:
+# SRCROOT=/tmp/xnu
+# OBJROOT=/tmp/xnu/BUILD/obj
+# OBJPATH=/tmp/xnu/BUILD/obj/RELEASE_X86_64
+#
+# SRCROOT=/SourceCache/xnu/xnu-1234
+# OBJROOT=/tmp/xnu/xnu-1234~1.obj
+# OBJPATH=/tmp/xnu/xnu-1234~1.obj/RELEASE_X86_64
+#
+# If SRCROOT is a strict prefix of OBJPATH, we
+# want to preserve the "interesting" part
+# starting with "xnu". If it's not a prefix,
+# the basename of OBJROOT itself is "interesting".
+
+if ($BUILD_OBJPATH =~ m,^$BUILD_SRCROOT/(.*)$,) {
+    $BUILD_OBJROOT = basename($BUILD_SRCROOT) . "/" . $1;
+} elsif ($BUILD_OBJPATH =~ m,^$BUILD_OBJROOT/(.*)$,) {
+    $BUILD_OBJROOT = basename($BUILD_OBJROOT) . "/" . $1;
+} else {
+    # Use original OBJROOT
+}
 
 my $rawvers = &ReadFile($versfile);
 #$rawvers =~ s/\s//g;
index 49f8fb84cbcf089cd8723c7701fb9a4e4e68547d..ac8cec46c2a37cfa2edc5e1d7e333c14bf8a36d8 100644 (file)
@@ -57,17 +57,11 @@ class IOCatalogue : public OSObject
     OSDeclareDefaultStructors(IOCatalogue)
     
 private:
-    OSCollectionIterator   * kernelTables;
-    OSArray                * array;
-    IOLock *                 lock;
+    IORWLock *               lock;
     SInt32                   generation;
-
-/* This stuff is no longer used at all but was exported in prior
- * releases, so we keep it around for i386 only.
- */
-#if __i386__
-    IOLock *                 kld_lock;
-#endif /* __i386__ */
+    OSDictionary           * personalities;
+    OSArray * arrayForPersonality(OSDictionary * dict);
+    void addPersonality(OSDictionary * dict);
 
 public:
     /*!
@@ -273,6 +267,9 @@ private:
         @param moduleName An OSString containing the name of the module to unload.
      */
     IOReturn unloadModule( OSString * moduleName ) const;
+
+    IOReturn _removeDrivers(OSDictionary * matching);
+    IOReturn _terminateDrivers(OSDictionary * matching);
 };
 
 extern const OSSymbol * gIOClassKey;
index 15b5aa4b410cc37e1d67c64b2e76ae0471bcb2ac..6e3ed1ed149fa992c7638650fa891b1655664c2d 100644 (file)
@@ -72,6 +72,10 @@ OSCollectionIterator * IODTFindMatchingEntries( IORegistryEntry * from,
 
 typedef SInt32 (*IODTCompareAddressCellFunc)
        (UInt32 cellCount, UInt32 left[], UInt32 right[]);
+
+typedef SInt64 (*IODTCompareAddressCell64Func)
+       (UInt32 cellCount, UInt32 left[], UInt32 right[]);
+
 typedef void (*IODTNVLocationFunc)
        (IORegistryEntry * entry,
        UInt8 * busNum, UInt8 * deviceNum, UInt8 * functionNum );
index 0cc86a55ca0f86701ec9e2b23f6947044c9ddfc2..6e758273de6d4fda07833584ccef2cc5c0c7a046 100644 (file)
@@ -33,10 +33,13 @@ extern "C" {
 #endif
 
 #ifdef KERNEL
-#include <crypto/aes.h>
+#include <libkern/crypto/aes.h>
 #include <uuid/uuid.h>
 #endif
 
+#ifndef __IOKIT_IOHIBERNATEPRIVATE_H
+#define __IOKIT_IOHIBERNATEPRIVATE_H
+
 struct IOPolledFileExtent
 {
     uint64_t   start;
@@ -96,8 +99,9 @@ struct IOHibernateImageHeader
 
     uint32_t   debugFlags;
     uint32_t   options;
+    uint32_t   sleepTime;
 
-    uint32_t   reserved[70];           // make sizeof == 512
+    uint32_t   reserved[69];           // make sizeof == 512
 
     uint64_t   encryptEnd __attribute__ ((packed));
     uint64_t   deviceBase __attribute__ ((packed));
@@ -235,6 +239,18 @@ static const uint8_t gIOHibernateProgressAlpha                     \
     { 0x00,0x66,0xdb,0xf3,0xdb,0x66,0x00 }                     \
 };
 
+struct hibernate_preview_t
+{
+    uint32_t  imageCount;      // Number of images
+    uint32_t  width;           // Width
+    uint32_t  height;          // Height
+    uint32_t  depth;           // Pixel Depth
+    uint32_t  lockTime;     // Lock time
+    uint32_t  reservedG[8]; // reserved
+    uint32_t  reservedK[8]; // reserved
+};
+typedef struct hibernate_preview_t hibernate_preview_t;
+
 #ifdef KERNEL
 
 #ifdef __cplusplus
@@ -242,9 +258,12 @@ static const uint8_t gIOHibernateProgressAlpha                     \
 void     IOHibernateSystemInit(IOPMrootDomain * rootDomain);
 
 IOReturn IOHibernateSystemSleep(void);
+IOReturn IOHibernateIOKitSleep(void);
 IOReturn IOHibernateSystemHasSlept(void);
 IOReturn IOHibernateSystemWake(void);
 IOReturn IOHibernateSystemPostWake(void);
+bool     IOHibernateWasScreenLocked(void);
+void     IOHibernateSetScreenLocked(uint32_t lockState);
 
 #endif /* __cplusplus */
 
@@ -419,12 +438,14 @@ enum
 #define kIOHibernateFeatureKey         "Hibernation"
 #define kIOHibernatePreviewBufferKey   "IOPreviewBuffer"
 
+#ifndef kIOHibernatePreviewActiveKey
 #define kIOHibernatePreviewActiveKey   "IOHibernatePreviewActive"
 // values for kIOHibernatePreviewActiveKey
 enum {
     kIOHibernatePreviewActive  = 0x00000001,
     kIOHibernatePreviewUpdates = 0x00000002
 };
+#endif
 
 #define kIOHibernateOptionsKey      "IOHibernateOptions"
 #define kIOHibernateGfxStatusKey    "IOHibernateGfxStatus"
@@ -447,6 +468,25 @@ enum {
 
 #define kIOHibernateUseKernelInterpreter    0x80000000
 
+enum
+{
+       kIOPreviewImageIndexDesktop = 0, 
+       kIOPreviewImageIndexLockScreen = 1, 
+       kIOPreviewImageCount = 2
+};
+
+enum
+{
+       kIOScreenLockNoLock          = 1,
+       kIOScreenLockUnlocked        = 2,
+       kIOScreenLockLocked          = 3,
+       kIOScreenLockFileVaultDialog = 4,
+};     
+
+#define kIOScreenLockStateKey      "IOScreenLockState"
+
+#endif /* ! __IOKIT_IOHIBERNATEPRIVATE_H */
+
 #ifdef __cplusplus
 }
 #endif
index 26787a25c0424d3476b11b637b5f6ea1b960d145..48ff9580d885d129daee873f9884fe0cb19b7298 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -52,19 +52,6 @@ extern "C" {
 }
 #endif
 
-// IOMakeMatching
-/*!
-    @enum IOMakeMatching
-    @constant kIOServiceMatching
-    @constant kIOBSDNameMatching
-    @constant kIOOFPathMatching
-*/
-enum {
-    kIOServiceMatching         = 100,
-    kIOBSDNameMatching         = 101,
-    kIOOFPathMatching          = 102
-};
-
 // IOCatalogueSendData
 /*!
     @enum IOCatalogueSendData user-client flags.
index 5e91b47256e098f03a52fd6477b0a47ffa580d0c..3b5103218a62231916165a8804d4e2e9eebe44d2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -85,8 +85,10 @@ void * IOMalloc(vm_size_t size);
 /*! @function IOFree
     @abstract Frees memory allocated with IOMalloc.
     @discussion This function frees memory allocated with IOMalloc, it may block and so should not be called from interrupt level or while a simple lock is held.
-    @param address Pointer to the allocated memory.
-    @param size Size of the memory allocated. */
+    @param address Pointer to the allocated memory. Must be identical to result
+    @of a prior IOMalloc.
+    @param size Size of the memory allocated. Must be identical to size of
+    @the corresponding IOMalloc */
 
 void   IOFree(void * address, vm_size_t size);
 
@@ -348,7 +350,7 @@ OSDictionary *
 #else
 struct OSDictionary *
 #endif
-IOOFPathMatching( const char * path, char * buf, int maxLen );
+IOOFPathMatching( const char * path, char * buf, int maxLen ) __attribute__((deprecated));
 
 /*
  * Convert between size and a power-of-two alignment.
index 6e6961136a1c66d78d415102df025dd372ad671b..fd83d0547177eda8c17ad574bda985e3943902f9 100644 (file)
@@ -134,19 +134,10 @@ class IOMemoryDescriptor : public OSObject
     OSDeclareDefaultStructors(IOMemoryDescriptor);
 
 protected:
-/*! @struct ExpansionData
-    @discussion This structure will be used to expand the capablilties of this class in the future.
-    */    
-    struct ExpansionData {
-        void *                         devicePager;
-        unsigned int                   pagerContig:1;
-        unsigned int                   unused:31;
-       IOMemoryDescriptor *            memory;
-    };
 
 /*! @var reserved
     Reserved for future use.  (Internal use only)  */
-    ExpansionData * reserved;
+    struct IOMemoryDescriptorReserved * reserved;
 
 protected:
     OSSet *            _mappings;
@@ -238,6 +229,11 @@ typedef IOOptionBits DMACommandOps;
 #endif /* !__LP64__ */
 
     virtual uint64_t getPreparationID( void );
+    void             setPreparationID( void );
+
+#ifdef XNU_KERNEL_PRIVATE
+    IOMemoryDescriptorReserved * getKernelReserved( void );
+#endif
        
 private:
     OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 0);
index 99c30699bcb86c0a59d2f7fb0bbd767fd8b2c959..db7f5d20b6a3ba7d77764520d6a4642128371b57 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2009 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -179,11 +179,11 @@ class IOPlatformExpert;
     @abstract The base class for most I/O Kit families, devices, and drivers.
     @discussion The IOService base class defines APIs used to publish services, instantiate other services based on the existance of a providing service (ie. driver stacking), destroy a service and its dependent stack, notify interested parties of service state changes, and general utility functions useful across all families. 
 
-Types of service are specified with a matching dictionary that describes properties of the service. For example, a matching dictionary might describe any IOUSBDevice (or subclass), an IOUSBDevice with a certain class code, or a IOPCIDevice with a set of OpenFirmware matching names or device & vendor IDs. Since the matching dictionary is interpreted by the family which created the service, as well as generically by IOService, the list of properties considered for matching depends on the familiy.
+Types of service are specified with a matching dictionary that describes properties of the service. For example, a matching dictionary might describe any IOUSBDevice (or subclass), an IOUSBDevice with a certain class code, or a IOPCIDevice with a set of matching names or device & vendor IDs. Since the matching dictionary is interpreted by the family which created the service, as well as generically by IOService, the list of properties considered for matching depends on the familiy.
 
 Matching dictionaries are associated with IOService classes by the catalogue, as driver property tables, and also supplied by clients of the notification APIs.
 
-IOService provides matching based on C++ class (via OSMetaClass dynamic casting), registry entry name, a registry path to the service (which includes OpenFirmware paths), a name assigned by BSD, or by its location (its point of attachment).
+IOService provides matching based on C++ class (via OSMetaClass dynamic casting), registry entry name, a registry path to the service (which includes device tree paths), a name assigned by BSD, or by its location (its point of attachment).
 
 <br><br>Driver Instantiation by IOService<br><br>
 
@@ -231,7 +231,7 @@ A string defining the driver category for matching purposes. All drivers with no
 <br>
        <code>kIONameMatchKey, extern const OSSymbol * gIONameMatchKey, "IONameMatch"</code>
 <br>
-A string or collection of strings that match the provider's name. The comparison is implemented with the @link //apple_ref/cpp/instm/IORegistryEntry/compareNames/virtualbool/(OSObject*,OSString**) IORegistryEntry::compareNames@/link method, which supports a single string, or any collection (OSArray, OSSet, OSDictionary etc.) of strings. IOService objects with OpenFirmware device tree properties (eg. IOPCIDevice) will also be matched based on that standard's "compatible", "name", "device_type" properties. The matching name will be left in the driver's property table in the <code>kIONameMatchedKey</code> property.
+A string or collection of strings that match the provider's name. The comparison is implemented with the @link //apple_ref/cpp/instm/IORegistryEntry/compareNames/virtualbool/(OSObject*,OSString**) IORegistryEntry::compareNames@/link method, which supports a single string, or any collection (OSArray, OSSet, OSDictionary etc.) of strings. IOService objects with device tree properties (eg. IOPCIDevice) will also be matched based on that standard's "compatible", "name", "device_type" properties. The matching name will be left in the driver's property table in the <code>kIONameMatchedKey</code> property.
 <br>
 Examples
 <pre>
@@ -728,6 +728,14 @@ public:
 
     static OSIterator * getMatchingServices( OSDictionary * matching );
 
+/*! @function copyMatchingService
+    @abstract Finds one of the current published IOService objects matching a matching dictionary.
+    @discussion Provides a method to find one member of the set of published IOService objects matching the supplied matching dictionary.   
+    @param matching The matching dictionary describing the desired IOService object.
+    @result The IOService object or NULL. To be released by the caller. */
+
+    static IOService * copyMatchingService( OSDictionary * matching );
+
 public:
     /* Helpers to make matching dictionaries for simple cases,
      * they add keys to an existing dictionary, or create one. */
@@ -1254,6 +1262,11 @@ private:
     void doServiceTerminate( IOOptionBits options );
 
 private:
+
+    bool matchPassive(OSDictionary * table, uint32_t options);
+    bool matchInternal(OSDictionary * table, uint32_t options, unsigned int * did);
+    static bool instanceMatch(const OSObject * entry, void * context);
+
     static OSObject * copyExistingServices( OSDictionary * matching,
                 IOOptionBits inState, IOOptionBits options = 0 );
 
@@ -1778,7 +1791,7 @@ private:
     bool checkForDone ( void );
     bool responseValid ( uint32_t x, int pid );
     void computeDesiredState ( unsigned long tempDesire = 0 );
-    void rebuildChildClampBits ( void );
+    void trackSystemSleepPreventers( IOPMPowerStateIndex, IOPMPowerStateIndex, IOPMPowerChangeFlags );
     void tellSystemCapabilityChange( uint32_t nextMS );
 
        static void ack_timer_expired( thread_call_param_t, thread_call_param_t );
index d56aea7be83ca3f2e2573ddbb8e51892feaf6c07..76bd0acfa67dd97455abdbd6ccdfa46aa8a2b07c 100644 (file)
@@ -197,7 +197,8 @@ enum {
     kIOInhibitCache            = 1,
     kIOWriteThruCache          = 2,
     kIOCopybackCache           = 3,
-    kIOWriteCombineCache       = 4
+    kIOWriteCombineCache       = 4,
+    kIOCopybackInnerCache      = 5
 };
 
 // IOMemory mapping options
@@ -206,11 +207,12 @@ enum {
 
     kIOMapCacheMask            = 0x00000700,
     kIOMapCacheShift           = 8,
-    kIOMapDefaultCache         = kIODefaultCache      << kIOMapCacheShift,
-    kIOMapInhibitCache         = kIOInhibitCache      << kIOMapCacheShift,
-    kIOMapWriteThruCache       = kIOWriteThruCache    << kIOMapCacheShift,
-    kIOMapCopybackCache                = kIOCopybackCache     << kIOMapCacheShift,
-    kIOMapWriteCombineCache    = kIOWriteCombineCache << kIOMapCacheShift,
+    kIOMapDefaultCache         = kIODefaultCache       << kIOMapCacheShift,
+    kIOMapInhibitCache         = kIOInhibitCache       << kIOMapCacheShift,
+    kIOMapWriteThruCache       = kIOWriteThruCache     << kIOMapCacheShift,
+    kIOMapCopybackCache                = kIOCopybackCache      << kIOMapCacheShift,
+    kIOMapWriteCombineCache    = kIOWriteCombineCache  << kIOMapCacheShift,
+    kIOMapCopybackInnerCache   = kIOCopybackInnerCache << kIOMapCacheShift,
 
     kIOMapUserOptionsMask      = 0x00000fff,
 
index 7b3c8df3e7d87922ff49e1a364cd35ddf7ca67b7..69e6c7aadc785c65bb85f8d3f966c3e302b77d9a 100644 (file)
@@ -26,12 +26,11 @@ INSTINC_SUBDIRS_I386 =
 
 INSTINC_SUBDIRS_X86_64 = 
 
-INSTINC_SUBDIRS_ARM = 
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
-EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
+
 
 NOT_EXPORT_HEADERS = 
 
@@ -39,7 +38,7 @@ NOT_KF_MI_HEADERS  = $(NOT_EXPORT_HEADERS)                    \
                     IOKitKeysPrivate.h IOCPU.h                 \
                     IOHibernatePrivate.h IOPolledInterface.h   \
                     IOCommandQueue.h IOLocksPrivate.h          \
-                    AppleKeyStoreInterface.h                   \
+                    IOSyncer.h AppleKeyStoreInterface.h        \
                     IOStatistics.h IOStatisticsPrivate.h
 
 NOT_LOCAL_HEADERS = 
index f8f0826c4301d7a219a31cfd0749ee8d8a3bae41..514496af6ba60e6c64666a4d49938a1b18a1650b 100644 (file)
@@ -15,11 +15,9 @@ EXCLUDE_HEADERS =
 
 INSTINC_SUBDIRS =
 INSTINC_SUBDIRS_I386 =
-INSTINC_SUBDIRS_X86_64 =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
-EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 
 ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h))
 HEADER_LIST = $(filter-out $(EXCLUDE_HEADERS), $(ALL_HEADERS))
index 4bdddb751ebe3b03c48ac49f7e3d464757bbdf83..cfcbf6d55d86d5fa250c705e36c906033da6b60f 100644 (file)
@@ -245,7 +245,7 @@ enum {
  *  false       == Retain FV key when going to standby mode
  *  not present == Retain FV key when going to standby mode
  */
-#define kIOPMDestroyFVKeyOnStandbyKey       "DestroyFVKeyOnStandby"
+#define kIOPMDestroyFVKeyOnStandbyKey            "DestroyFVKeyOnStandby"
 
 /*******************************************************************************
  *
@@ -288,15 +288,7 @@ enum {
      */
     kIOPMDriverAssertionPreventDisplaySleepBit      = 0x40,
 
-    /*! kIOPMDriverAssertionReservedBit7
-     * Reserved for storage family.
-     */
-    kIOPMDriverAssertionReservedBit7                = 0x80,
-
-    /*! kIOPMDriverAssertionReservedBit8
-     * Reserved for networking family.
-     */
-    kIOPMDriverAssertionReservedBit8                = 0x100
+    kIOPMDriverAssertionReservedBit7                = 0x80
 };
 
  /* kIOPMAssertionsDriverKey
@@ -304,7 +296,7 @@ enum {
   * a bitfield describing the aggregate PM assertion levels.
   * Example: A value of 0 indicates that no driver has asserted anything.
   * Or, a value of <link>kIOPMDriverAssertionCPUBit</link>
-  *   indicates that a driver (or drivers) have asserted a need fro CPU and video.
+  *   indicates that a driver (or drivers) have asserted a need for CPU and video.
   */
 #define kIOPMAssertionsDriverKey            "DriverPMAssertions"
 
@@ -313,7 +305,7 @@ enum {
   * a bitfield describing the aggregate PM assertion levels.
   * Example: A value of 0 indicates that no driver has asserted anything.
   * Or, a value of <link>kIOPMDriverAssertionCPUBit</link>
-  *   indicates that a driver (or drivers) have asserted a need fro CPU and video.
+  *   indicates that a driver (or drivers) have asserted a need for CPU and video.
   */
 #define kIOPMAssertionsDriverDetailedKey    "DriverPMAssertionsDetailed"
 
@@ -416,6 +408,13 @@ enum {
 #define kIOPMMessageDriverAssertionsChanged  \
                 iokit_family_msg(sub_iokit_powermanagement, 0x150)
 
+/*! kIOPMMessageDarkWakeThermalEmergency
+ * Sent when machine becomes unsustainably warm in DarkWake.
+ * Kernel PM might choose to put the machine back to sleep right after.
+ */
+#define kIOPMMessageDarkWakeThermalEmergency \
+                iokit_family_msg(sub_iokit_powermanagement, 0x160)
+
 /*******************************************************************************
  *
  * Power commands issued to root domain
@@ -437,7 +436,8 @@ enum {
   kIOPMEnableClamshell          = (1<<7),  // sleep on clamshell closure
   kIOPMProcessorSpeedChange     = (1<<8),  // change the processor speed
   kIOPMOverTemp                 = (1<<9),  // system dangerously hot
-  kIOPMClamshellOpened          = (1<<10)  // clamshell was opened
+  kIOPMClamshellOpened          = (1<<10), // clamshell was opened
+  kIOPMDWOverTemp               = (1<<11)  // DarkWake thermal limits exceeded.
 };
 
 
index 09fdb19e82a7c694f8eda289f517efc46eb66c63..4828f38cdd50850c28fd5ffc62dd07229b82e5a6 100644 (file)
 /* @constant kIOPMTimelineDictionaryKey
  * @abstract RootDomain key for dictionary describing Timeline's info
  */
-#define kIOPMTimelineDictionaryKey              "PMTimelineLogging"
+#define     kIOPMTimelineDictionaryKey                  "PMTimelineLogging"
 
 /* @constant kIOPMTimelineEnabledKey
  * @abstract Boolean value indicating whether the system is recording PM events.
  * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
  * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
  */
-#define kIOPMTimelineEnabledKey                 "TimelineEnabled"
+#define     kIOPMTimelineEnabledKey                     "TimelineEnabled"
 
 /* @constant kIOMPTimelineSystemNumberTrackedKey
  * @abstract The maximum number of system power events the system may record.
  * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
  * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
  */
-#define kIOPMTimelineSystemNumberTrackedKey     "TimelineSystemEventsTracked"
+#define     kIOPMTimelineSystemNumberTrackedKey         "TimelineSystemEventsTracked"
 
 /* @constant kIOPMTimelineSystemBufferSizeKey
  * @abstract Size in bytes  of buffer recording system PM events
  * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
  * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
  */
-#define kIOPMTimelineSystemBufferSizeKey        "TimelineSystemBufferSize"
+#define     kIOPMTimelineSystemBufferSizeKey            "TimelineSystemBufferSize"
 
 
 
@@ -131,7 +131,8 @@ enum {
     kIOPMSleepReasonLowPower                    = 106,
     kIOPMSleepReasonThermalEmergency            = 107,
     kIOPMSleepReasonMaintenance                 = 108,
-    kIOPMSleepReasonSleepServiceExit            = 109
+    kIOPMSleepReasonSleepServiceExit            = 109,
+    kIOPMSleepReasonDarkWakeThermalEmergency    = 110
 };
 
 /*
@@ -145,6 +146,7 @@ enum {
 #define kIOPMLowPowerSleepKey                       "Low Power Sleep"
 #define kIOPMThermalEmergencySleepKey               "Thermal Emergency Sleep"
 #define kIOPMSleepServiceExitKey                    "Sleep Service Back to Sleep"
+#define kIOPMDarkWakeThermalEmergencyKey            "Dark Wake Thermal Emergency"
 
 
 enum {
@@ -618,162 +620,31 @@ enum {
 #define kIOPMSleepWakeFailureUUIDKey        "UUID"
 #define kIOPMSleepWakeFailureDateKey        "Date"
 
-/*****************************************************************************
- *
- * Root Domain private property keys
- *
- *****************************************************************************/
-
-/* kIOPMAutoPowerOffEnabledKey
- * Indicates if Auto Power Off is enabled.
- * It has a boolean value.
- *  true        == Auto Power Off is enabled
- *  false       == Auto Power Off is disabled
- *  not present == Auto Power Off is not supported on this hardware
+/******************************************************************************/
+/* System sleep policy
+ * Shared between PM root domain and platform driver.
  */
-#define kIOPMAutoPowerOffEnabledKey         "AutoPowerOff Enabled"
 
-/* kIOPMAutoPowerOffDelayKey
- * Key refers to a CFNumberRef that represents the delay in seconds before
- * entering the Auto Power Off state.  The property is not present if Auto
- * Power Off is unsupported.
- */
-#define kIOPMAutoPowerOffDelayKey           "AutoPowerOff Delay"
+// Platform specific property added by the platform driver.
+// An OSData that describes the system sleep policy.
+#define kIOPlatformSystemSleepPolicyKey     "IOPlatformSystemSleepPolicy"
 
-/*****************************************************************************
- *
- * System Sleep Policy
- *
- *****************************************************************************/
-
-#define kIOPMSystemSleepPolicySignature     0x54504c53
-#define kIOPMSystemSleepPolicyVersion       2
-
-/*!
- * @defined kIOPMSystemSleepTypeKey
- * @abstract Indicates the type of system sleep.
- * @discussion An OSNumber property of root domain that describes the type
- * of system sleep. This property is set after notifying priority sleep/wake
- * clients, but before informing interested drivers and shutting down power
- * plane drivers.
- */
-#define kIOPMSystemSleepTypeKey             "IOPMSystemSleepType"
+// Root domain property updated before platform sleep.
+// An OSData that describes the system sleep parameters.
+#define kIOPMSystemSleepParametersKey       "IOPMSystemSleepParameters"
 
-struct IOPMSystemSleepPolicyVariables
+struct IOPMSystemSleepParameters
 {
-    uint32_t    signature;                  // kIOPMSystemSleepPolicySignature
-    uint32_t    version;                    // kIOPMSystemSleepPolicyVersion
-
-    uint64_t    currentCapability;          // current system capability bits
-    uint64_t    highestCapability;          // highest system capability bits
-
-    uint64_t    sleepFactors;               // sleep factor bits
-    uint32_t    sleepReason;                // kIOPMSleepReason*
-    uint32_t    sleepPhase;                 // identify the sleep phase
-    uint32_t    hibernateMode;              // current hibernate mode
-
-    uint32_t    standbyDelay;               // standby delay in seconds
-    uint32_t    poweroffDelay;              // auto-poweroff delay in seconds
-
-    uint32_t    reserved[51];               // pad sizeof 256 bytes
-};
-
-enum {
-    kIOPMSleepPhase1 = 1,
-    kIOPMSleepPhase2
-};
-
-// Sleep Factor Mask / Bits
-enum {
-    kIOPMSleepFactorSleepTimerWake          = 0x00000001ULL,
-    kIOPMSleepFactorLidOpen                 = 0x00000002ULL,
-    kIOPMSleepFactorACPower                 = 0x00000004ULL,
-    kIOPMSleepFactorBatteryLow              = 0x00000008ULL,
-    kIOPMSleepFactorStandbyNoDelay          = 0x00000010ULL,
-    kIOPMSleepFactorStandbyForced           = 0x00000020ULL,
-    kIOPMSleepFactorStandbyDisabled         = 0x00000040ULL,
-    kIOPMSleepFactorUSBExternalDevice       = 0x00000080ULL,
-    kIOPMSleepFactorBluetoothHIDDevice      = 0x00000100ULL,
-    kIOPMSleepFactorExternalMediaMounted    = 0x00000200ULL,
-    kIOPMSleepFactorThunderboltDevice       = 0x00000400ULL,
-    kIOPMSleepFactorRTCAlarmScheduled       = 0x00000800ULL,
-    kIOPMSleepFactorMagicPacketWakeEnabled  = 0x00001000ULL,
-    kIOPMSleepFactorHibernateForced         = 0x00010000ULL,
-    kIOPMSleepFactorAutoPowerOffDisabled    = 0x00020000ULL,
-    kIOPMSleepFactorAutoPowerOffForced      = 0x00040000ULL
-};
-
-// System Sleep Types
-enum {
-    kIOPMSleepTypeInvalid                   = 0,
-    kIOPMSleepTypeAbortedSleep              = 1,
-    kIOPMSleepTypeNormalSleep               = 2,
-    kIOPMSleepTypeSafeSleep                 = 3,
-    kIOPMSleepTypeHibernate                 = 4,
-    kIOPMSleepTypeStandby                   = 5,
-    kIOPMSleepTypePowerOff                  = 6,
-    kIOPMSleepTypeLast                      = 7
-};
-
-// System Sleep Flags
-enum {
-    kIOPMSleepFlagDisableHibernateAbort     = 0x00000001,
-    kIOPMSleepFlagDisableUSBWakeEvents      = 0x00000002,
-    kIOPMSleepFlagDisableBatlowAssertion    = 0x00000004
+    uint32_t    version;
+    uint32_t    sleepFlags;
+    uint32_t    sleepTimer;
+    uint32_t    wakeEvents;
 };
 
-// System Wake Events
+// Sleep flags
 enum {
-    kIOPMWakeEventLidOpen                   = 0x00000001,
-    kIOPMWakeEventLidClose                  = 0x00000002,
-    kIOPMWakeEventACAttach                  = 0x00000004,
-    kIOPMWakeEventACDetach                  = 0x00000008,
-    kIOPMWakeEventPowerButton               = 0x00000100,
-    kIOPMWakeEventUserPME                   = 0x00000400,
-    kIOPMWakeEventSleepTimer                = 0x00000800,
-    kIOPMWakeEventBatteryLow                = 0x00001000,
-    kIOPMWakeEventDarkPME                   = 0x00002000
+    kIOPMSleepFlagHibernate         = 0x00000001,
+    kIOPMSleepFlagSleepTimerEnable  = 0x00000002
 };
 
-/*!
- * @defined kIOPMSystemSleepParametersKey
- * @abstract Sleep parameters describing the upcoming sleep
- * @discussion Root domain updates this OSData property before system sleep
- * to pass sleep parameters to the platform driver.  Some of the parameters
- * are based on the chosen entry in the system sleep policy table.
- */
-#define kIOPMSystemSleepParametersKey       "IOPMSystemSleepParameters"
-#define kIOPMSystemSleepParametersVersion   2
-
-struct IOPMSystemSleepParameters
-{
-    uint16_t    version;
-    uint16_t    reserved1;
-    uint32_t    sleepType;
-    uint32_t    sleepFlags;
-    uint32_t    ecWakeEvents;
-    uint32_t    ecWakeTimer;
-    uint32_t    ecPoweroffTimer;
-    uint32_t    reserved2[10];
-} __attribute__((packed));
-
-#ifdef KERNEL
-
-/*!
- * @defined kIOPMInstallSystemSleepPolicyHandlerKey
- * @abstract Name of the platform function to install a sleep policy handler.
- * @discussion Pass to IOPMrootDomain::callPlatformFunction(), with a pointer
- * to the C-function handler at param1, and an optional target at param2, to
- * register a sleep policy handler. Only a single sleep policy handler can
- * be installed.
- */
-#define kIOPMInstallSystemSleepPolicyHandlerKey        \
-        "IOPMInstallSystemSleepPolicyHandler"
-
-typedef IOReturn (*IOPMSystemSleepPolicyHandler)(
-        void * target, const IOPMSystemSleepPolicyVariables * vars,
-        IOPMSystemSleepParameters * params );
-
-#endif /* KERNEL */
-
 #endif /* ! _IOKIT_IOPMPRIVATE_H */
index b82357fe9dfd5960c8f136563a240d6b378246bd..db62a3d24e496971d0929f881aa8ca13012386f1 100644 (file)
@@ -15,7 +15,7 @@ NOT_EXPORT_HEADERS = \
        IOPMinformee.h          \
        IOPMinformeeList.h      \
        IOPMlog.h               \
-       IOPMPagingPlexus.h
+       IOPMPrivate.h
        
 INSTINC_SUBDIRS =
 INSTINC_SUBDIRS_I386 =
index 351f7da64257617527e6c18c6cc33764484ee002..9a514bdbca7a86f731b2a87048b6b6ae82b0b4a0 100644 (file)
 
 #include <IOKit/IOService.h>
 #include <IOKit/pwr_mgt/IOPM.h>
-#include "IOKit/pwr_mgt/IOPMPrivate.h"
 #include <IOKit/IOBufferMemoryDescriptor.h> 
 
 #ifdef XNU_KERNEL_PRIVATE
 struct AggressivesRecord;
 struct IOPMMessageFilterContext;
 struct IOPMActions;
+struct IOPMSystemSleepParameters;
 class PMSettingObject;
 class IOPMTimeline;
 class PMEventDetails;
@@ -311,7 +311,7 @@ public:
     @result On success, returns a new assertion of type IOPMDriverAssertionID *
 */
     IOReturn releasePMAssertion(IOPMDriverAssertionID releaseAssertion);
-
+        
 private:
     virtual IOReturn    changePowerStateTo( unsigned long ordinal );
     virtual IOReturn    changePowerStateToPriv( unsigned long ordinal );
@@ -381,7 +381,11 @@ public:
     void        handleQueueSleepWakeUUID(
                     OSObject *obj);
 
-    IOReturn    setMaintenanceWakeCalendar(const IOPMCalendarStruct * calendar );
+    void        handleSuspendPMNotificationClient(
+                    uint32_t pid, bool doSuspend);
+
+    IOReturn    setMaintenanceWakeCalendar(
+                    const IOPMCalendarStruct * calendar );
 
     // Handle callbacks from IOService::systemWillShutdown()
        void        acknowledgeSystemWillShutdown( IOService * from );
@@ -406,6 +410,11 @@ public:
     bool        systemMessageFilter(
                     void * object, void * arg1, void * arg2, void * arg3 );
 
+    void        updatePreventIdleSleepList(
+                    IOService * service, bool addNotRemove );
+    void        updatePreventSystemSleepList(
+                    IOService * service, bool addNotRemove );
+
     void        publishPMSetting(
                     const OSSymbol * feature, uint32_t where, uint32_t * featureID );
 
@@ -430,6 +439,23 @@ public:
                                 uint32_t                       delay_ms,
                                 int                            app_pid);
 
+
+/*! @function   suspendPMNotificationsForPID
+    @abstract   kernel process management calls this to disable sleep/wake notifications
+                when a process is suspended.
+    @param      pid the process ID
+    @param      doSuspend true suspends the notifications; false enables them
+*/
+    void        suspendPMNotificationsForPID( uint32_t pid, bool doSuspend);
+
+/*! @function   pmNotificationIsSuspended
+    @abstract   returns true if PM notifications have been suspended
+    @param      pid the process ID
+    @result     true if the process has been suspended
+*/
+    bool        pmNotificationIsSuspended( uint32_t pid );
+
+
 #if HIBERNATION
     bool        getHibernateSettings(
                     uint32_t *  hibernateMode,
@@ -463,7 +489,6 @@ private:
                                     IONotifier * notifier);
 
     IOService *             wrangler;
-    IOService *             wranglerConnection;
 
     IOLock                  *featuresDictLock;  // guards supportedFeatures
     IOPMPowerStateQueue     *pmPowerStateQueue;
@@ -492,7 +517,6 @@ private:
     OSArray                 *pmStatsAppResponses;
 
     bool                    uuidPublished;
-    PMStatsStruct           pmStats;
 
     // Pref: idle time before idle sleep
     unsigned long           sleepSlider;               
@@ -554,12 +578,12 @@ private:
 
     unsigned int            idleSleepTimerPending   :1;
     unsigned int            userDisabledAllSleep    :1;
-    unsigned int            childPreventSystemSleep :1;
     unsigned int            ignoreTellChangeDown    :1;
     unsigned int            wranglerAsleep          :1;
     unsigned int            wranglerTickled         :1;
     unsigned int            wranglerSleepIgnored    :1;
     unsigned int            graphicsSuppressed      :1;
+    unsigned int            darkWakeThermalAlarm    :1;
 
     unsigned int            capabilityLoss          :1;
     unsigned int            pciCantSleepFlag        :1;
@@ -573,6 +597,7 @@ private:
     unsigned int            darkWakePostTickle      :1;
     unsigned int            sleepTimerMaintenance   :1;
     unsigned int            lowBatteryCondition     :1;
+    unsigned int            darkWakeThermalEmergency:1;
     unsigned int            hibernateDisabled       :1;
     unsigned int            hibernateNoDefeat       :1;
     unsigned int            rejectWranglerTickle    :1;
@@ -606,10 +631,24 @@ private:
     IONotifier *            systemCapabilityNotifier;
 
     IOPMTimeline            *timeline;
+    
+    typedef struct {
+        uint32_t            pid;
+        uint32_t            refcount;
+    } PMNotifySuspendedStruct;
+    
+    uint32_t                pmSuspendedCapacity;    
+    uint32_t                pmSuspendedSize;
+    PMNotifySuspendedStruct *pmSuspendedPIDS;
+
+    OSSet *                 preventIdleSleepList;
+    OSSet *                 preventSystemSleepList;
+
+#if HIBERNATION
+    clock_sec_t             _standbyTimerResetSeconds;
+#endif
 
-    IOPMSystemSleepPolicyHandler    _sleepPolicyHandler;
-    void *                          _sleepPolicyTarget;
-    IOPMSystemSleepPolicyVariables *_sleepPolicyVars;
+    int         findSuspendedPID(uint32_t pid, uint32_t *outRefCount);
 
        // IOPMrootDomain internal sleep call
     IOReturn    privateSleepSystem( uint32_t sleepReason );
@@ -665,7 +704,7 @@ private:
 
 #if HIBERNATION
     bool        getSleepOption( const char * key, uint32_t * option );
-    bool        evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p, int phase );
+    bool        evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p, int sleepPhase );
     void        evaluateSystemSleepPolicyEarly( void );
     void        evaluateSystemSleepPolicyFinal( void );
 #endif /* HIBERNATION */
diff --git a/iokit/IOKit/x86_64/Makefile b/iokit/IOKit/x86_64/Makefile
new file mode 100644 (file)
index 0000000..3b4a79b
--- /dev/null
@@ -0,0 +1,33 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A
+export INCDIR = $(IOKIT_FRAMEDIR)/Headers
+export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+MD_DIR = x86_64
+EXCLUDE_HEADERS = 
+
+INSTINC_SUBDIRS =
+INSTINC_SUBDIRS_X86_64 =
+
+EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
+EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
+
+ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h))
+HEADER_LIST = $(filter-out $(EXCLUDE_HEADERS), $(ALL_HEADERS))
+
+INSTALL_MD_LIST        = ${HEADER_LIST}
+INSTALL_MD_LCL_LIST = ""
+INSTALL_MD_DIR = $(MD_DIR)
+
+EXPORT_MD_LIST = 
+EXPORT_MD_DIR = IOKit/$(MD_DIR)
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
index 563059600b59c1a31850aaf736144e55bde62402..a56b469ee999d7373044d28e131688e0fad6272b 100644 (file)
@@ -99,6 +99,8 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask(
        return (false);
     _ranges.v64->address = 0;
     _ranges.v64->length  = 0;
+       //  make sure super::free doesn't dealloc _ranges before super::init
+       _flags = kIOMemoryAsReference;
 
     // Grab IOMD bits from the Buffer MD options
     iomdOptions  |= (options & kIOBufferDescriptorMemoryFlags);
@@ -148,6 +150,10 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask(
            SET_MAP_MEM(MAP_MEM_COPYBACK, memEntryCacheMode);
            break;
 
+       case kIOMapCopybackInnerCache:
+           SET_MAP_MEM(MAP_MEM_INNERWBACK, memEntryCacheMode);
+           break;
+
        case kIOMapDefaultCache:
        default:
            SET_MAP_MEM(MAP_MEM_NOOP, memEntryCacheMode);
index 5dd9ea41677416f31ed2653ce30b3c49f11e0bba..eae15f97cdcadb8cfc3b7c25799784bc2981e786 100644 (file)
@@ -42,6 +42,7 @@ extern "C" {
 #include <IOKit/IOLib.h>
 #include <IOKit/IOPlatformExpert.h>
 #include <IOKit/pwr_mgt/RootDomain.h>
+#include <IOKit/pwr_mgt/IOPMPrivate.h>
 #include <IOKit/IOUserClient.h>
 #include <IOKit/IOKitKeysPrivate.h>
 #include <IOKit/IOCPU.h>
@@ -50,13 +51,15 @@ extern "C" {
 #include <kern/queue.h>
 
 typedef kern_return_t (*iocpu_platform_action_t)(void * refcon0, void * refcon1, uint32_t priority,
-                                                void * param1, void * param2, void * param3);
+                                                void * param1, void * param2, void * param3,
+                                                const char * name);
 
 struct iocpu_platform_action_entry
 {
     queue_chain_t                     link;
     iocpu_platform_action_t           action;
     int32_t                          priority;
+    const char *                     name;
     void *                           refcon0;
     void *                           refcon1;
     struct iocpu_platform_action_entry * alloc_list;
@@ -168,7 +171,7 @@ iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32
        if ((pri >= first_priority) && (pri <= last_priority))
        {
            //kprintf("[%p]", next->action);
-           ret = (*next->action)(next->refcon0, next->refcon1, pri, param1, param2, param3);
+           ret = (*next->action)(next->refcon0, next->refcon1, pri, param1, param2, param3, next->name);
        }
        if (KERN_SUCCESS == result)
            result = ret;
@@ -194,13 +197,14 @@ IOCPURunPlatformActiveActions(void)
 
 static kern_return_t 
 IOServicePlatformAction(void * refcon0, void * refcon1, uint32_t priority,
-                         void * param1, void * param2, void * param3)
+                         void * param1, void * param2, void * param3,
+                         const char * service_name)
 {
     IOReturn        ret;
     IOService *      service  = (IOService *)      refcon0;
     const OSSymbol * function = (const OSSymbol *) refcon1;
 
-    kprintf("%s -> %s\n", function->getCStringNoCopy(), service->getName());
+    kprintf("%s -> %s\n", function->getCStringNoCopy(), service_name);
 
     ret = service->callPlatformFunction(function, false, 
                                         (void *) priority, param1, param2, param3);
@@ -223,6 +227,7 @@ IOInstallServicePlatformAction(IOService * service,
 
     entry = IONew(iocpu_platform_action_entry_t, 1);
     entry->action = &IOServicePlatformAction;
+    entry->name = service->getName();
     priority = num->unsigned32BitValue();
     if (reverse)
        entry->priority = -priority;
@@ -306,8 +311,9 @@ void IOCPUSleepKernel(void)
 
     kprintf("IOCPUSleepKernel\n");
 
-    OSIterator * iter;
-    IOService *  service;
+    IORegistryIterator * iter;
+    OSOrderedSet *       all;
+    IOService *          service;
 
     rootDomain->tracePoint( kIOPMTracePointSleepPlatformActions );
 
@@ -318,19 +324,28 @@ void IOCPUSleepKernel(void)
                                            kIORegistryIterateRecursively );
     if( iter)
     {
-       do
+       all = 0;
+       do 
        {
-           iter->reset();
-           while((service = (IOService *) iter->getNextObject()))
+           if (all)
+               all->release();
+           all = iter->iterateAll();
+       }
+       while (!iter->isValid());
+       iter->release();
+
+       if (all)
+       {
+           while((service = (IOService *) all->getFirstObject()))
            {
                IOInstallServicePlatformAction(service, gIOPlatformSleepActionKey,   &gIOSleepActionQueue,               false);
                IOInstallServicePlatformAction(service, gIOPlatformWakeActionKey,    &gIOWakeActionQueue,                true);
                IOInstallServicePlatformAction(service, gIOPlatformQuiesceActionKey, iocpu_get_platform_quiesce_queue(), false);
                IOInstallServicePlatformAction(service, gIOPlatformActiveActionKey,  iocpu_get_platform_active_queue(),  true);
+               all->removeObject(service);
            }
-       }
-       while( !service && !iter->isValid());
-       iter->release();
+           all->release();
+       }       
     }
 
     iocpu_run_platform_actions(&gIOSleepActionQueue, 0, 0U-1,
index c6de3b56c0a9b02bf2e25b3967df639bbfe8e01b..ee193c0277aaa8879ee295eec975d6a3686f1ac3 100644 (file)
@@ -49,6 +49,7 @@ extern "C" {
 #include <libkern/c++/OSUnserialize.h>
 #include <libkern/c++/OSKext.h>
 #include <libkern/OSKextLibPrivate.h>
+#include <libkern/OSDebug.h>
 
 #include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/IOService.h>
@@ -63,28 +64,15 @@ extern "C" {
 /*********************************************************************
 *********************************************************************/
 
-#define CATALOGTEST 0
-
 IOCatalogue    * gIOCatalogue;
 const OSSymbol * gIOClassKey;
 const OSSymbol * gIOProbeScoreKey;
 const OSSymbol * gIOModuleIdentifierKey;
-IOLock         * gIOCatalogLock;
+IORWLock         * gIOCatalogLock;
 
 #if PRAGMA_MARK
 #pragma mark Utility functions
 #endif
-/*********************************************************************
-* Add a new personality to the set if it has a unique IOResourceMatchKey value.
-* XXX -- svail: This should be optimized.
-* esb - There doesn't seem like any reason to do this - it causes problems
-* esb - when there are more than one loadable driver matching on the same provider class
-*********************************************************************/
-static void
-AddNewImports(OSOrderedSet * set, OSDictionary * dict)
-{
-    set->setObject(dict);
-}
 
 #if PRAGMA_MARK
 #pragma mark IOCatalogue class implementation
@@ -126,42 +114,68 @@ void IOCatalogue::initialize(void)
     array->release();
 }
 
+/*********************************************************************
+* Initialize the IOCatalog object.
+*********************************************************************/
+OSArray * IOCatalogue::arrayForPersonality(OSDictionary * dict)
+{
+    const OSSymbol * sym;
+
+    sym = OSDynamicCast(OSSymbol, dict->getObject(gIOProviderClassKey));
+    if (!sym)  return (0);
+
+    return ((OSArray *) personalities->getObject(sym));
+}
+
+void IOCatalogue::addPersonality(OSDictionary * dict)
+{
+    const OSSymbol * sym;
+    OSArray * arr;
+
+    sym = OSDynamicCast(OSSymbol, dict->getObject(gIOProviderClassKey));
+    if (!sym) return;
+    arr = (OSArray *) personalities->getObject(sym);
+    if (arr) arr->setObject(dict);
+    else
+    {
+        arr = OSArray::withObjects((const OSObject **)&dict, 1, 2);
+        personalities->setObject(sym, arr);
+        arr->release();
+    }
+}
+
 /*********************************************************************
 * Initialize the IOCatalog object.
 *********************************************************************/
 bool IOCatalogue::init(OSArray * initArray)
 {
     OSDictionary         * dict;
-    
+    OSObject * obj;
+
     if ( !super::init() )
         return false;
 
     generation = 1;
     
-    array = initArray;
-    array->retain();
-    kernelTables = OSCollectionIterator::withCollection( array );
-
-    gIOCatalogLock = IOLockAlloc();
-
-    lock     = gIOCatalogLock;
-#if __i386__
-    kld_lock = NULL;
-#endif /* __i386__ */
-
-    kernelTables->reset();
-    while( (dict = (OSDictionary *) kernelTables->getNextObject())) {
-        OSKext::uniquePersonalityProperties(dict);
+    personalities = OSDictionary::withCapacity(32);
+    personalities->setOptions(OSCollection::kSort, OSCollection::kSort);
+    for (unsigned int idx = 0; (obj = initArray->getObject(idx)); idx++)
+    {
+       dict = OSDynamicCast(OSDictionary, obj);
+       if (!dict) continue;
+       OSKext::uniquePersonalityProperties(dict);
         if( 0 == dict->getObject( gIOClassKey ))
+        {
             IOLog("Missing or bad \"%s\" key\n",
                     gIOClassKey->getCStringNoCopy());
+           continue;
+       }
+       dict->setObject("KernelConfigTable", kOSBooleanTrue);
+        addPersonality(dict);
     }
 
-#if CATALOGTEST
-    AbsoluteTime deadline;
-    clock_interval_to_deadline( 1000, kMillisecondScale );
-    thread_call_func_delayed( ping, this, deadline );
-#endif
+    gIOCatalogLock = IORWLockAlloc();
+    lock = gIOCatalogLock;
 
     return true;
 }
@@ -172,63 +186,8 @@ bool IOCatalogue::init(OSArray * initArray)
 *********************************************************************/
 void IOCatalogue::free( void )
 {
-    if ( array )
-        array->release();
-
-    if ( kernelTables )
-        kernelTables->release();
-    
-    super::free();
-}
-
-/*********************************************************************
-*********************************************************************/
-#if CATALOGTEST
-
-static int hackLimit;
-enum { kDriversPerIter = 4 };
-
-void
-IOCatalogue::ping(thread_call_param_t arg, thread_call_param_t)
-{
-    IOCatalogue         * self = (IOCatalogue *) arg;
-    OSOrderedSet         * set;
-    OSDictionary         * table;
-    int                           newLimit;
-
-    set = OSOrderedSet::withCapacity( 1 );
-
-    IOLockLock( &self->lock );
-
-    for( newLimit = 0; newLimit < kDriversPerIter; newLimit++) {
-       table = (OSDictionary *) self->array->getObject(
-                                       hackLimit + newLimit );
-       if( table) {
-           set->setLastObject( table );
-
-           OSSymbol * sym = (OSSymbol *) table->getObject(gIOClassKey);
-           kprintf("enabling %s\n", sym->getCStringNoCopy());
-
-       } else {
-           newLimit--;
-           break;
-       }
-    }
-
-    IOService::catalogNewDrivers( set );
-
-    hackLimit += newLimit;
-    self->generation++;
-
-    IOLockUnlock( &self->lock );
-
-    if( kDriversPerIter == newLimit) {
-        AbsoluteTime deadline;
-        clock_interval_to_deadline(500, kMillisecondScale);
-        thread_call_func_delayed(ping, this, deadline);
-    }
+    panic("");
 }
-#endif
 
 /*********************************************************************
 *********************************************************************/
@@ -239,33 +198,32 @@ IOCatalogue::findDrivers(
 {
     OSDictionary         * nextTable;
     OSOrderedSet         * set;
-    OSString             * imports;
+    OSArray              * array;
+    const OSMetaClass    * meta;
+    unsigned int           idx;
 
     set = OSOrderedSet::withCapacity( 1, IOServiceOrdering,
                                       (void *)gIOProbeScoreKey );
     if( !set )
        return( 0 );
 
-    IOLockLock(lock);
-    kernelTables->reset();
+    IORWLockRead(lock);
 
-#if CATALOGTEST
-    int hackIndex = 0;
-#endif
-    while( (nextTable = (OSDictionary *) kernelTables->getNextObject())) {
-#if CATALOGTEST
-       if( hackIndex++ > hackLimit)
-           break;
-#endif
-        imports = OSDynamicCast( OSString,
-                       nextTable->getObject( gIOProviderClassKey ));
-       if( imports && service->metaCast( imports ))
-            set->setObject( nextTable );
+    meta = service->getMetaClass();
+    while (meta)
+    {
+       array = (OSArray *) personalities->getObject(meta->getClassNameSymbol());
+       if (array) for (idx = 0; (nextTable = (OSDictionary *) array->getObject(idx)); idx++)
+       {
+            set->setObject(nextTable);
+       }
+       if (meta == &IOService::gMetaClass) break;
+       meta = meta->getSuperClass();
     }
 
     *generationCount = getGenerationCount();
 
-    IOLockUnlock(lock);
+    IORWLockUnlock(lock);
 
     return( set );
 }
@@ -278,27 +236,42 @@ IOCatalogue::findDrivers(
     OSDictionary * matching,
     SInt32 * generationCount)
 {
+    OSCollectionIterator * iter;
     OSDictionary         * dict;
     OSOrderedSet         * set;
+    OSArray              * array;
+    const OSSymbol       * key;
+    unsigned int           idx;
 
     OSKext::uniquePersonalityProperties(matching);
 
     set = OSOrderedSet::withCapacity( 1, IOServiceOrdering,
                                       (void *)gIOProbeScoreKey );
+    if (!set) return (0);
+    iter = OSCollectionIterator::withCollection(personalities);
+    if (!iter) 
+    {
+       set->release();
+       return (0);
+    }
 
-    IOLockLock(lock);
-    kernelTables->reset();
-    while ( (dict = (OSDictionary *) kernelTables->getNextObject()) ) {
-
-       /* This comparison must be done with only the keys in the
-        * "matching" dict to enable general searches.
-        */
-        if ( dict->isEqualTo(matching, matching) )
-            set->setObject(dict);
+    IORWLockRead(lock);
+    while ((key = (const OSSymbol *) iter->getNextObject()))
+    {
+        array = (OSArray *) personalities->getObject(key);
+        if (array) for (idx = 0; (dict = (OSDictionary *) array->getObject(idx)); idx++)
+        {
+          /* This comparison must be done with only the keys in the
+           * "matching" dict to enable general searches.
+           */
+           if ( dict->isEqualTo(matching, matching) )
+               set->setObject(dict);
+       }
     }
     *generationCount = getGenerationCount();
-    IOLockUnlock(lock);
+    IORWLockUnlock(lock);
 
+    iter->release();
     return set;
 }
 
@@ -313,6 +286,7 @@ IOCatalogue::findDrivers(
 * xxx - userlib used to refuse to send personalities with IOKitDebug
 * xxx - during safe boot. That would be better implemented here.
 *********************************************************************/
+
 bool IOCatalogue::addDrivers(
     OSArray * drivers,
     bool doNubMatching)
@@ -322,7 +296,7 @@ bool IOCatalogue::addDrivers(
     OSOrderedSet         * set = NULL;        // must release
     OSObject             * object = NULL;       // do not release
     OSArray              * persons = NULL;    // do not release
-
+    
     persons = OSDynamicCast(OSArray, drivers);
     if (!persons) {
         goto finish;
@@ -343,7 +317,7 @@ bool IOCatalogue::addDrivers(
     */
     result = true;
 
-    IOLockLock(lock);
+    IORWLockWrite(lock);
     while ( (object = iter->getNextObject()) ) {
     
         // xxx Deleted OSBundleModuleDemand check; will handle in other ways for SL
@@ -359,43 +333,48 @@ bool IOCatalogue::addDrivers(
         }
 
         OSKext::uniquePersonalityProperties(personality);
-        
+
         // Add driver personality to catalogue.
-        count = array->getCount();
-        while (count--) {
-            OSDictionary * driver;
-            
-            // Be sure not to double up on personalities.
-            driver = (OSDictionary *)array->getObject(count);
-            
-           /* Unlike in other functions, this comparison must be exact!
-            * The catalogue must be able to contain personalities that
-            * are proper supersets of others.
-            * Do not compare just the properties present in one driver
-            * pesonality or the other.
-            */
-            if (personality->isEqualTo(driver)) {
-                break;
-            }
-        }
-        if (count >= 0) {
-            // its a dup
-            continue;
-        }
-        
-        result = array->setObject(personality);
-        if (!result) {
-            break;
+
+       OSArray * array = arrayForPersonality(personality);
+       if (!array) addPersonality(personality);
+       else
+       {       
+           count = array->getCount();
+           while (count--) {
+               OSDictionary * driver;
+               
+               // Be sure not to double up on personalities.
+               driver = (OSDictionary *)array->getObject(count);
+               
+              /* Unlike in other functions, this comparison must be exact!
+               * The catalogue must be able to contain personalities that
+               * are proper supersets of others.
+               * Do not compare just the properties present in one driver
+               * pesonality or the other.
+               */
+               if (personality->isEqualTo(driver)) {
+                   break;
+               }
+           }
+           if (count >= 0) {
+               // its a dup
+               continue;
+           }
+           result = array->setObject(personality);
+           if (!result) {
+               break;
+           }
         }
-        
-        AddNewImports(set, personality);
+
+       set->setObject(personality);        
     }
     // Start device matching.
     if (result && doNubMatching && (set->getCount() > 0)) {
         IOService::catalogNewDrivers(set);
         generation++;
     }
-    IOLockUnlock(lock);
+    IORWLockUnlock(lock);
 
 finish:
     if (set)  set->release();
@@ -413,61 +392,53 @@ IOCatalogue::removeDrivers(
     OSDictionary * matching,
     bool doNubMatching)
 {
-    OSCollectionIterator * tables;
-    OSDictionary         * dict;
     OSOrderedSet         * set;
-    OSArray              * arrayCopy;
+    OSCollectionIterator * iter;
+    OSDictionary         * dict;
+    OSArray              * array;
+    const OSSymbol       * key;
+    unsigned int           idx;
 
     if ( !matching )
         return false;
-
+    
     set = OSOrderedSet::withCapacity(10,
                                      IOServiceOrdering,
                                      (void *)gIOProbeScoreKey);
     if ( !set )
         return false;
-
-    arrayCopy = OSArray::withCapacity(100);
-    if ( !arrayCopy ) {
-        set->release();
-        return false;
-    }
-    
-    tables = OSCollectionIterator::withCollection(arrayCopy);
-    arrayCopy->release();
-    if ( !tables ) {
-        set->release();
-        return false;
+    iter = OSCollectionIterator::withCollection(personalities);
+    if (!iter) 
+    {
+       set->release();
+       return (false);
     }
 
-    OSKext::uniquePersonalityProperties( matching );
-
-    IOLockLock(lock);
-    kernelTables->reset();
-    arrayCopy->merge(array);
-    array->flushCollection();
-    tables->reset();
-    while ( (dict = (OSDictionary *)tables->getNextObject()) ) {
-
-       /* This comparison must be done with only the keys in the
-        * "matching" dict to enable general searches.
-        */
-        if ( dict->isEqualTo(matching, matching) ) {
-            AddNewImports( set, dict );
-            continue;
+    IORWLockWrite(lock);
+    while ((key = (const OSSymbol *) iter->getNextObject()))
+    {
+        array = (OSArray *) personalities->getObject(key);
+        if (array) for (idx = 0; (dict = (OSDictionary *) array->getObject(idx)); idx++)
+        {
+           /* This comparison must be done with only the keys in the
+            * "matching" dict to enable general searches.
+            */
+            if ( dict->isEqualTo(matching, matching) ) {
+                set->setObject(dict);        
+                array->removeObject(idx);
+                idx--;
+            }
+        }
+        // Start device matching.
+        if ( doNubMatching && (set->getCount() > 0) ) {
+            IOService::catalogNewDrivers(set);
+            generation++;
         }
-
-        array->setObject(dict);
-    }
-    // Start device matching.
-    if ( doNubMatching && (set->getCount() > 0) ) {
-        IOService::catalogNewDrivers(set);
-        generation++;
     }
-    IOLockUnlock(lock);
-    
+    IORWLockUnlock(lock);
+   
     set->release();
-    tables->release();
+    iter->release();
     
     return true;
 }
@@ -538,7 +509,8 @@ void IOCatalogue::moduleHasLoaded(OSString * moduleName)
     startMatching(dict);
     dict->release();
 
-    (void) OSKext::considerRebuildOfPrelinkedKernel(moduleName);
+    (void) OSKext::setDeferredLoadSucceeded();
+    (void) OSKext::considerRebuildOfPrelinkedKernel();
 }
 
 void IOCatalogue::moduleHasLoaded(const char * moduleName)
@@ -556,7 +528,7 @@ IOReturn IOCatalogue::unloadModule(OSString * moduleName) const
     return OSKext::removeKextWithIdentifier(moduleName->getCStringNoCopy());
 }
 
-static IOReturn _terminateDrivers(OSDictionary * matching)
+IOReturn IOCatalogue::_terminateDrivers(OSDictionary * matching)
 {
     OSDictionary         * dict;
     OSIterator           * iter;
@@ -601,41 +573,39 @@ static IOReturn _terminateDrivers(OSDictionary * matching)
     return ret;
 }
 
-static IOReturn _removeDrivers( OSArray * array, OSDictionary * matching )
+IOReturn IOCatalogue::_removeDrivers(OSDictionary * matching)
 {
-    OSCollectionIterator * tables;
-    OSDictionary         * dict;
-    OSArray              * arrayCopy;
     IOReturn               ret = kIOReturnSuccess;
+    OSCollectionIterator * iter;
+    OSDictionary         * dict;
+    OSArray              * array;
+    const OSSymbol       * key;
+    unsigned int           idx;
 
     // remove configs from catalog.
 
-    arrayCopy = OSArray::withCapacity(100);
-    if ( !arrayCopy )
-        return kIOReturnNoMemory;
-
-    tables = OSCollectionIterator::withCollection(arrayCopy);
-    arrayCopy->release();
-    if ( !tables )
-        return kIOReturnNoMemory;
-
-    arrayCopy->merge(array);
-    array->flushCollection();
-    tables->reset();
-    while ( (dict = (OSDictionary *)tables->getNextObject()) ) {
-
-       /* Remove from the catalogue's array any personalities
-        * that match the matching dictionary.
-        * This comparison must be done with only the keys in the
-        * "matching" dict to enable general matching.
-        */
-        if ( dict->isEqualTo(matching, matching) )
-            continue;
+    iter = OSCollectionIterator::withCollection(personalities);
+    if (!iter) return (kIOReturnNoMemory);
 
-        array->setObject(dict);
+    while ((key = (const OSSymbol *) iter->getNextObject()))
+    {
+        array = (OSArray *) personalities->getObject(key);
+        if (array) for (idx = 0; (dict = (OSDictionary *) array->getObject(idx)); idx++)
+        {
+
+           /* Remove from the catalogue's array any personalities
+            * that match the matching dictionary.
+            * This comparison must be done with only the keys in the
+            * "matching" dict to enable general matching.
+            */
+            if (dict->isEqualTo(matching, matching))
+            {
+                array->removeObject(idx);
+                idx--;
+            }
+        }
     }
-
-    tables->release();
+    iter->release();
 
     return ret;
 }
@@ -645,11 +615,10 @@ IOReturn IOCatalogue::terminateDrivers(OSDictionary * matching)
     IOReturn ret;
 
     ret = _terminateDrivers(matching);
-    IOLockLock(lock);
+    IORWLockWrite(lock);
     if (kIOReturnSuccess == ret)
-       ret = _removeDrivers(array, matching);
-    kernelTables->reset();
-    IOLockUnlock(lock);
+       ret = _removeDrivers(matching);
+    IORWLockUnlock(lock);
 
     return ret;
 }
@@ -694,18 +663,17 @@ IOReturn IOCatalogue::terminateDriversForModule(
     
    /* No goto between IOLock calls!
     */
-    IOLockLock(lock);
+    IORWLockWrite(lock);
     if (kIOReturnSuccess == ret) {
-        ret = _removeDrivers(array, dict);
+        ret = _removeDrivers(dict);
     }
-    kernelTables->reset();
 
     // Unload the module itself.
     if (unload && isLoaded && ret == kIOReturnSuccess) {
         ret = unloadModule(moduleName);
     }
 
-    IOLockUnlock(lock);
+    IORWLockUnlock(lock);
 
     dict->release();
 
@@ -732,8 +700,12 @@ IOReturn IOCatalogue::terminateDriversForModule(
 
 bool IOCatalogue::startMatching( OSDictionary * matching )
 {
+    OSCollectionIterator * iter;
     OSDictionary         * dict;
     OSOrderedSet         * set;
+    OSArray              * array;
+    const OSSymbol *       key;
+    unsigned int           idx;
     
     if ( !matching )
         return false;
@@ -743,26 +715,39 @@ bool IOCatalogue::startMatching( OSDictionary * matching )
     if ( !set )
         return false;
 
-    IOLockLock(lock);
-    kernelTables->reset();
+    iter = OSCollectionIterator::withCollection(personalities);
+    if (!iter) 
+    {
+       set->release();
+        return false;
+    }
 
-    while ( (dict = (OSDictionary *)kernelTables->getNextObject()) ) {
+    IORWLockRead(lock);
 
-       /* This comparison must be done with only the keys in the
-        * "matching" dict to enable general matching.
-        */
-        if ( dict->isEqualTo(matching, matching) )
-            AddNewImports(set, dict);
+    while ((key = (const OSSymbol *) iter->getNextObject()))
+    {
+        array = (OSArray *) personalities->getObject(key);
+        if (array) for (idx = 0; (dict = (OSDictionary *) array->getObject(idx)); idx++)
+        {
+          /* This comparison must be done with only the keys in the
+           * "matching" dict to enable general matching.
+           */
+            if (dict->isEqualTo(matching, matching)) {
+                set->setObject(dict);
+            }        
+        }
     }
+
     // Start device matching.
     if ( set->getCount() > 0 ) {
         IOService::catalogNewDrivers(set);
         generation++;
     }
 
-    IOLockUnlock(lock);
+    IORWLockUnlock(lock);
 
     set->release();
+    iter->release();
 
     return true;
 }
@@ -778,172 +763,100 @@ bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching)
 {
     bool                   result              = false;
     OSArray              * newPersonalities    = NULL;  // do not release
-    OSCollectionIterator * newPIterator        = NULL;  // must release
+    OSCollectionIterator * iter                = NULL;  // must release
     OSOrderedSet         * matchSet            = NULL;  // must release
-    OSArray              * oldPersonalities    = NULL;  // must release
-    OSArray              * kernelPersonalities = NULL;  // must release
-    OSString             * errorString         = NULL;  // must release
-    OSObject             * object              = NULL;  // do not release
+    const OSSymbol       * key;
+    OSArray              * array;
     OSDictionary         * thisNewPersonality  = NULL;  // do not release
-    signed int             count, i;
-
-    extern const char    * gIOKernelConfigTables;
+    OSDictionary         * thisOldPersonality  = NULL;  // do not release
+    signed int             idx, newIdx;
 
     if (drivers) {
         newPersonalities = OSDynamicCast(OSArray, drivers);
         if (!newPersonalities) {
             goto finish;
         }
-
-        newPIterator = OSCollectionIterator::withCollection(newPersonalities);
-        if (!newPIterator) {
-            goto finish;
-        }
         
         matchSet = OSOrderedSet::withCapacity(10, IOServiceOrdering,
             (void *)gIOProbeScoreKey);
         if (!matchSet) {
             goto finish;
         }
-    }
-
-   /* Read personalities for the built-in kernel driver classes.
-    * We don't have many any more.
-    */
-    kernelPersonalities = OSDynamicCast(OSArray,
-        OSUnserialize(gIOKernelConfigTables, &errorString));
-    if (!kernelPersonalities && errorString) {
-        IOLog("KernelConfigTables syntax error: %s\n",
-            errorString->getCStringNoCopy());
-        goto finish;
-    }
-    
-   /* Now copy the current array of personalities so we can reuse them
-    * if the new list contains any duplicates. This saves on memory
-    * consumption.
-    */
-    oldPersonalities = OSDynamicCast(OSArray, array->copyCollection());
-    if (!oldPersonalities) {
-        goto finish;
+        iter = OSCollectionIterator::withCollection(personalities);
+        if (!iter) {
+            goto finish;
+        }
     }
 
     result = true;
 
     IOLog("Resetting IOCatalogue.\n");
-    
-   /* No goto finish from here to unlock.
-    */
-    IOLockLock(lock);
-    
-    array->flushCollection();
 
-   /* Add back the kernel personalities and remove them from the old
-    * array so we don't try to match on them again. Go forward through
-    * the arrays as this causes the least iteration since kernel personalities
-    * should always be first.
+   /* No goto finish from here to unlock.
     */
-    count = kernelPersonalities->getCount();
-    for (i = 0; i < count; i++) {
+    IORWLockWrite(lock);
     
-       /* Static cast here, as the data is coming from within the kernel image.
-        */
-        OSDictionary * thisNewPersonality = (OSDictionary *)
-            kernelPersonalities->getObject(i);
-        array->setObject(thisNewPersonality);
-
-        signed int oldPCount = oldPersonalities->getCount();
-        for (signed int oldPIndex = 0; oldPIndex < oldPCount; oldPIndex++) {
-            if (thisNewPersonality->isEqualTo(oldPersonalities->getObject(oldPIndex))) {
-                oldPersonalities->removeObject(oldPIndex);
-                break;
-            }
-        }
-    }
-
-   /* Now add the new set of personalities passed in, using existing
-    * copies if we had them in kernel memory already.
-    */
-    if (newPIterator) {
-        OSDictionary * thisOldPersonality = NULL;  // do not release
-        
-        while ( (object = newPIterator->getNextObject()) ) {
-
-            thisNewPersonality = OSDynamicCast(OSDictionary, object);
-            if (!thisNewPersonality) {
-                IOLog("IOCatalogue::resetAndAddDrivers() encountered non-dictionary; bailing.\n");
-            result = false;
-            break;
-            }
-
-           /* Convert common OSString property values to OSSymbols.
-            */
-            OSKext::uniquePersonalityProperties(thisNewPersonality);
-            
-           /* Add driver personality to catalogue, but if we had a copy already
-            * use that instead so we don't have multiple copies from OSKext instances.
+    while ((key = (const OSSymbol *) iter->getNextObject()))
+    {
+        array = (OSArray *) personalities->getObject(key);
+        if (!array) continue;
+        for (idx = 0; (thisOldPersonality = (OSDictionary *) array->getObject(idx)); idx++)
+        {
+            if (thisOldPersonality->getObject("KernelConfigTable")) continue;
+            if (newPersonalities) for (newIdx = 0; 
+                (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); 
+                newIdx++)
+            {
+              /* Unlike in other functions, this comparison must be exact!
+            * The catalogue must be able to contain personalities that
+            * are proper supersets of others.
+            * Do not compare just the properties present in one driver
+            * pesonality or the other.
             */
-            count = oldPersonalities->getCount();
-            thisOldPersonality = NULL;
-            while (count--) {
-                
-                thisOldPersonality = (OSDictionary *)oldPersonalities->getObject(count);
-                
-               /* Unlike in other functions, this comparison must be exact!
-                * The catalogue must be able to contain personalities that
-                * are proper supersets of others.
-                * Do not compare just the properties present in one driver
-                * pesonality or the other.
-                */
-                if (thisNewPersonality->isEqualTo(thisOldPersonality)) {
+                if (thisNewPersonality->isEqualTo(thisOldPersonality))  
                     break;
-                }
             }
-
-           /* If we found a dup, add the *original* back to the catalogue,
-            * remove it from our bookkeeping list, and continue.
-            * Don't worry about matching on personalities we already had.
-            */
-            if (count >= 0) {
-                array->setObject(thisOldPersonality);
-                oldPersonalities->removeObject(count);
-                continue;
+            if (thisNewPersonality)
+            {
+                // dup, ignore
+                newPersonalities->removeObject(newIdx);
+            }
+            else
+            {
+                // not in new set - remove
+                // only remove dictionary if this module in not loaded - 9953845
+                if ( isModuleLoaded(thisOldPersonality) == false ) 
+                {
+                    if (matchSet)  matchSet->setObject(thisOldPersonality);
+                    array->removeObject(idx);
+                    idx--;
+                }
             }
-
-           /* Otherwise add the new personality and mark it for matching.
-            */
-            array->setObject(thisNewPersonality);
-            AddNewImports(matchSet, thisNewPersonality);                
-        }
-
-       /*****
-        * Now, go through remaining old personalities, which have effectively
-        * been removed, and add them to the match set as necessary.
-        */
-        count = oldPersonalities->getCount();
-        while (count--) {
-        
-           /* Static cast here is ok as these dictionaries were already in the catalogue.
-            */
-            thisOldPersonality = (OSDictionary *)oldPersonalities->getObject(count);
-            AddNewImports(matchSet, thisOldPersonality);
         }
+    }
 
-       /* Finally, start device matching on all new & removed personalities.
-        */
-        if (result && doNubMatching && (matchSet->getCount() > 0)) {
-            IOService::catalogNewDrivers(matchSet);
-            generation++;
-        }
+     // add new
+     for (newIdx = 0;
+          (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); 
+          newIdx++)
+     {
+         OSKext::uniquePersonalityProperties(thisNewPersonality);
+         addPersonality(thisNewPersonality);
+         matchSet->setObject(thisNewPersonality);
+     }
+
+   /* Finally, start device matching on all new & removed personalities.
+    */
+    if (result && doNubMatching && (matchSet->getCount() > 0)) {
+        IOService::catalogNewDrivers(matchSet);
+        generation++;
     }
 
-    IOLockUnlock(lock);
+    IORWLockUnlock(lock);
 
 finish:
-    if (newPIterator) newPIterator->release();
     if (matchSet) matchSet->release();
-    if (oldPersonalities) oldPersonalities->release();
-    if (kernelPersonalities) kernelPersonalities->release();
-    if (errorString) errorString->release();
+    if (iter)     iter->release();
 
     return result;
 }
@@ -963,8 +876,7 @@ bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const
     switch ( kind )
     {
         case kIOCatalogGetContents:
-            if (!array->serialize(s))
-                kr = kIOReturnNoMemory;
+            kr = KERN_NOT_SUPPORTED;
             break;
 
         case kIOCatalogGetModuleDemandList:
@@ -987,7 +899,6 @@ bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const
     return kr;
 }
 
-
 #if PRAGMA_MARK
 #pragma mark Obsolete Kext Loading Stuff
 #endif
index 29ecd859e07820f9f648b3d8eec46067e680bbbe..9b19d70eefeb5e7dd63bfb88997f44401ed9c654 100644 (file)
@@ -182,7 +182,7 @@ IOReturn IOCommandGate::runAction(Action inAction,
        
        if (trace)
                IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
-                                                                (uintptr_t) inAction, (uintptr_t) owner);
+                                        VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner);
        
     IOStatisticsActionCall();
        
@@ -191,7 +191,7 @@ IOReturn IOCommandGate::runAction(Action inAction,
        
        if (trace)
                IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
-                                                          (uintptr_t) inAction, (uintptr_t) owner);
+                                      VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner);
     
     openGate();
        
@@ -220,7 +220,7 @@ IOReturn IOCommandGate::attemptAction(Action inAction,
                
         if (trace)
             IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
-                                                                        (uintptr_t) inAction, (uintptr_t) owner);
+                                    VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner);
         
         IOStatisticsActionCall();
         
@@ -228,7 +228,7 @@ IOReturn IOCommandGate::attemptAction(Action inAction,
                
         if (trace)
             IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
-                                                                  (uintptr_t) inAction, (uintptr_t) owner);
+                                  VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner);
     }
 
     openGate();
index b95ee921de669af505969f5dfe249a27ef17dfd4..dadc4dc3546e4a6de06435ce58c83d621c1aedc3 100644 (file)
@@ -147,7 +147,7 @@ IODMACommand::initWithSpecification(SegmentFunction outSegFunc,
                                    IOMapper       *mapper,
                                    void           *refCon)
 {
-    if (!super::init() || !outSegFunc || !numAddressBits)
+    if (!super::init() || !outSegFunc)
         return false;
 
     bool is32Bit = (OutputHost32   == outSegFunc || OutputBig32 == outSegFunc
@@ -502,7 +502,7 @@ IODMACommand::walkAll(UInt8 op)
                }
                else
                {
-                   DEBG("IODMACommand !iovmAlloc");
+                   DEBG("IODMACommand !alloc IOBMD");
                    return (kIOReturnNoResources);
                }
            }
@@ -513,6 +513,11 @@ IODMACommand::walkAll(UInt8 op)
            state->fLocalMapperPageCount = atop_64(round_page(
                    state->fPreparedLength + ((state->fPreparedOffset + fMDSummary.fPageAlign) & page_mask)));
            state->fLocalMapperPageAlloc = fMapper->iovmAllocDMACommand(this, state->fLocalMapperPageCount);
+            if (!state->fLocalMapperPageAlloc)
+            {
+                DEBG("IODMACommand !iovmAlloc");
+                return (kIOReturnNoResources);
+            }
            state->fMapContig = true;
        }
     }
@@ -610,7 +615,7 @@ IODMACommand::prepareWithSpecification(SegmentFunction      outSegFunc,
     if (fActive)
         return kIOReturnNotPermitted;
 
-    if (!outSegFunc || !numAddressBits)
+    if (!outSegFunc)
         return kIOReturnBadArgument;
 
     bool is32Bit = (OutputHost32   == outSegFunc || OutputBig32 == outSegFunc
@@ -1143,7 +1148,7 @@ IODMACommand::clientOutputSegment(
     SegmentFunction segmentFunction = (SegmentFunction) reference;
     IOReturn ret = kIOReturnSuccess;
 
-    if ((target->fNumAddressBits < 64) 
+    if (target->fNumAddressBits && (target->fNumAddressBits < 64) 
        && ((segment.fIOVMAddr + segment.fLength - 1) >> target->fNumAddressBits)
        && (target->reserved->fLocalMapperPageAlloc || !target->reserved->fLocalMapper))
     {
index 1001ebeff0a88353eeace68baecf001f5ba84aa4..95988aaf42520cce839577add4edfdd0a860401f 100644 (file)
@@ -73,11 +73,19 @@ IODataQueue *IODataQueue::withEntries(UInt32 numEntries, UInt32 entrySize)
 
 Boolean IODataQueue::initWithCapacity(UInt32 size)
 {
+    vm_size_t allocSize = 0;
+
     if (!super::init()) {
         return false;
     }
 
-    dataQueue = (IODataQueueMemory *)IOMallocAligned(round_page(size + DATA_QUEUE_MEMORY_HEADER_SIZE), PAGE_SIZE);
+    allocSize = round_page(size + DATA_QUEUE_MEMORY_HEADER_SIZE);
+
+    if (allocSize < size) {
+        return false;
+    }
+
+    dataQueue = (IODataQueueMemory *)IOMallocAligned(allocSize, PAGE_SIZE);
     if (dataQueue == 0) {
         return false;
     }
index 8de463efd3b3edc1c9b21990ba12fffa407ccd6f..4ee53e5668203b8e239fc9334f82a16324d215cd 100644 (file)
@@ -921,6 +921,7 @@ void IODTSetResolving( IORegistryEntry *    regEntry,
     if( !prop)
         return;
 
+    prop->setSerializable(false);
     regEntry->setProperty( gIODTPersistKey, prop);
     prop->release();
     return;
@@ -928,8 +929,8 @@ void IODTSetResolving( IORegistryEntry *    regEntry,
 
 static SInt32 DefaultCompare( UInt32 cellCount, UInt32 left[], UInt32 right[] )
 {
-    cellCount--;
-    return( left[ cellCount ] - right[ cellCount ] );
+       cellCount--;
+       return( left[ cellCount ] - right[ cellCount ] ); 
 }
 
 void IODTGetCellCounts( IORegistryEntry * regEntry,
@@ -959,14 +960,15 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
     // cells in addresses below regEntry
     UInt32             childSizeCells, childAddressCells;
     UInt32             childCells;
-    UInt32             cell[ 5 ], offset = 0, length;
-    UInt32             endCell[ 5 ];
+    UInt32             cell[ 8 ], length;
+    UInt64             offset = 0;
+    UInt32             endCell[ 8 ];
     UInt32             *range;
     UInt32             *lookRange;
     UInt32             *startRange;
     UInt32             *endRanges;
     bool               ok = true;
-    SInt32             diff, diff2, endDiff;
+    SInt64             diff, diff2, endDiff;
 
     IODTPersistent     *persist;
     IODTCompareAddressCellFunc compare;
@@ -974,10 +976,13 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
     IODTGetCellCounts( regEntry, &childSizeCells, &childAddressCells );
     childCells = childAddressCells + childSizeCells;
 
+    if (childCells > sizeof(cell)/sizeof(cell[0]))
+        panic("IODTResolveAddressCell: Invalid device tree (%u,%u)", (uint32_t)childAddressCells, (uint32_t)childSizeCells);
+
     bcopy( cellsIn, cell, sizeof(UInt32) * childCells );
     if( childSizeCells > 1)
-        *len = IOPhysical32( cellsIn[ childAddressCells ],
-                             cellsIn[ childAddressCells + 1 ] );
+        *len = IOPhysical32( cellsIn[ childAddressCells + 1],
+                             cellsIn[ childAddressCells] );
     else
         *len = IOPhysical32( 0, cellsIn[ childAddressCells ] );
 
@@ -985,8 +990,13 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
     {
        prop = OSDynamicCast( OSData, regEntry->getProperty( gIODTRangeKey ));
        if( 0 == prop) {
-           /* end of the road */
-           *phys = IOPhysical32( 0,  cell[ childAddressCells - 1 ] + offset);
+            /* end of the road */
+           if (childAddressCells == 2)  {
+                *phys = IOPhysical32( cell[ childAddressCells - 1 ], cell [ childAddressCells - 2 ]);
+           } else  {
+               *phys = IOPhysical32( 0, cell[ childAddressCells - 1 ]);
+           }
+            *phys += offset;
            break;
        }
 
@@ -1003,8 +1013,11 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
            if( prop) {
                persist = (IODTPersistent *) prop->getBytesNoCopy();
                compare = persist->compareFunc;
-           } else
+           } else if (addressCells == childAddressCells) {
                compare = DefaultCompare;
+           } else {
+               panic("There is no mixed comparison function yet...");
+           }
 
            for( ok = false;
                 range < endRanges;
@@ -1013,8 +1026,21 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
                // is cell start within range?
                diff = (*compare)( childAddressCells, cell, range );
 
+        if (childAddressCells > sizeof(endCell)/sizeof(endCell[0]))
+            panic("IODTResolveAddressCell: Invalid device tree (%u)", (uint32_t)childAddressCells);
+
                bcopy(range, endCell, childAddressCells * sizeof(UInt32));
-               endCell[childAddressCells - 1] += range[childCells + addressCells - 1];
+
+               if (childAddressCells == 2) {
+                       uint64_t sum = endCell[childAddressCells - 2] + IOPhysical32(range[childCells + addressCells - 1], range[childCells + addressCells - 2]);
+                       endCell[childAddressCells - 2] = (uint32_t)(sum & 0x00000000FFFFFFFFULL);
+                       if (sum > UINT32_MAX) {
+                               endCell[childAddressCells - 1] += (uint32_t)((sum & 0xFFFFFFFF00000000ULL) >> 32);
+                       }
+               } else {
+                       endCell[childAddressCells - 1] += range[childCells + addressCells - 1];
+               }
+
                diff2 = (*compare)( childAddressCells, cell, endCell );
 
                if ((diff < 0) || (diff2 >= 0))
@@ -1025,7 +1051,17 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
                {
                    // search for cell end
                    bcopy(cell, endCell, childAddressCells * sizeof(UInt32));
-                   endCell[childAddressCells - 1] += cell[childCells - 1] - 1;
+
+                   if (childSizeCells == 2) {
+                       uint64_t sum;
+                        sum = endCell[childAddressCells - 2] + IOPhysical32(cell[childCells - 1], cell[childCells - 2]) - 1;
+                       endCell[childAddressCells - 2] = (uint32_t)(sum & 0x00000000FFFFFFFFULL);
+                       if (sum > UINT32_MAX) {
+                               endCell[childAddressCells - 1] += (uint32_t)((sum & 0xFFFFFFFF00000000ULL) >> 32);
+                       }
+                   } else {
+                        endCell[childAddressCells - 1] += cell[childCells - 1] - 1;
+                   }
                    lookRange = startRange;
                    for( ;
                         lookRange < endRanges;
@@ -1049,6 +1085,9 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
                break;
            }
 
+        if (addressCells + sizeCells > sizeof(cell)/sizeof(cell[0]))
+            panic("IODTResolveAddressCell: Invalid device tree (%u, %u)", (uint32_t)addressCells, (uint32_t)sizeCells);
+
            // Get the physical start of the range from our parent
            bcopy( range + childAddressCells, cell, sizeof(UInt32) * addressCells );
            bzero( cell + addressCells, sizeof(UInt32) * sizeCells );
index 944e84ced357c79514e52884297c68476ff427db..6ecc33bfd4b2f2c8ea105e5daf5d73d9ed19d327 100644 (file)
@@ -157,14 +157,14 @@ void IOFilterInterruptEventSource::normalInterruptOccurred
        
        if (trace)
                IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER),
-                                                                (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                        VM_KERNEL_UNSLIDE(filterAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
     
     // Call the filter.
     filterRes = (*filterAction)(owner, this);
        
        if (trace)
                IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER),
-                                                          (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                      VM_KERNEL_UNSLIDE(filterAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
        
     if (filterRes)
         signalInterrupt();
@@ -178,14 +178,14 @@ void IOFilterInterruptEventSource::disableInterruptOccurred
        
        if (trace)
                IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER),
-                                                                (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                        VM_KERNEL_UNSLIDE(filterAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
     
     // Call the filter.
     filterRes = (*filterAction)(owner, this);
        
        if (trace)
                IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER),
-                                                          (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                      VM_KERNEL_UNSLIDE(filterAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
        
     if (filterRes) {
         prov->disableInterrupt(source);        /* disable the interrupt */
index 002055ff145e94ea7bab384a39a29b74541ff198..37c0bc7eaf8d3e5b66383f2e1e5257c2d8eaf56c 100644 (file)
@@ -58,12 +58,12 @@ Sleep:
   by hibernate_page_list_setall(), avoiding having to find arch dependent low level bits.
   The image header and block list are written. The header includes the second file extent so
   only the header block is needed to read the file, regardless of filesystem.
-  The kernel section "__HIB" is written uncompressed to the image. This section of code and data 
+  The kernel segment "__HIB" is written uncompressed to the image. This segment of code and data 
   (only) is used to decompress the image during wake/boot.
   Some additional pages are removed from the bitmaps - the buffers used for hibernation.
   The bitmaps are written to the image.
   More areas are removed from the bitmaps (after they have been written to the image) - the 
-  section "__HIB" pages and interrupt stack.
+  segment "__HIB" pages and interrupt stack.
   Each wired page is compressed and written and then each non-wired page. Compression and 
   disk writes are in parallel.
   The image header is written to the start of the file and the polling driver closed.
@@ -152,7 +152,7 @@ to restrict I/O ops.
 #include "IOPMPowerStateQueue.h"
 #include <IOKit/IOBufferMemoryDescriptor.h>
 #include <IOKit/AppleKeyStoreInterface.h>
-#include <crypto/aes.h>
+#include <libkern/crypto/aes.h>
 
 #include <sys/uio.h>
 #include <sys/conf.h>
@@ -196,6 +196,8 @@ static OSData *                     gIOHibernateBootNextData;
 static OSObject *              gIOHibernateBootNextSave;
 #endif
 
+static IOLock *                           gFSLock;
+static uint32_t                           gFSState;
 static IOPolledFileIOVars                gFileVars;
 static IOHibernateVars                   gIOHibernateVars;
 static struct kern_direct_file_io_ref_t * gIOHibernateFileRef;
@@ -203,6 +205,16 @@ static hibernate_cryptvars_t                 gIOHibernateCryptWakeContext;
 static hibernate_graphics_t              _hibernateGraphics;
 static hibernate_graphics_t *            gIOHibernateGraphicsInfo = &_hibernateGraphics;
 
+enum 
+{
+    kFSIdle     = 0,
+    kFSOpening  = 2,
+    kFSOpened   = 3,
+    kFSTimedOut = 4,
+};
+
+static IOReturn IOHibernateDone(IOHibernateVars * vars);
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 enum { kXPRamAudioVolume = 8 };
@@ -594,7 +606,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
                            IOPolledFileIOVars ** fileVars, OSData ** fileExtents,
                            OSData ** imagePath, uint8_t * volumeCryptKey)
 {
-    IOReturn                   err = kIOReturnError;
+    IOReturn                   err = kIOReturnSuccess;
     IOPolledFileIOVars *       vars;
     _OpenFileContext           ctx;
     OSData *                   extentsData;
@@ -605,8 +617,13 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
     dev_t                      block_dev;
     dev_t                      hibernate_image_dev;
     uint64_t                   maxiobytes;
+    AbsoluteTime                startTime, endTime;
+    uint64_t                    nsec;
+
+    vars = IONew(IOPolledFileIOVars, 1);
+    if (!vars) return (kIOReturnNoMemory);
+    bzero(vars, sizeof(*vars));
 
-    vars = &gFileVars;
     do
     {
        HIBLOG("sizeof(IOHibernateImageHeader) == %ld\n", sizeof(IOHibernateImageHeader));
@@ -620,9 +637,9 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
        vars->bufferSize   = ioBuffer->getLength() >> 1;
     
        extentsData = OSData::withCapacity(32);
-    
-       ctx.extents = extentsData;
+       ctx.extents = extentsData;
        ctx.size    = 0;
+       clock_get_uptime(&startTime);
        vars->fileRef = kern_open_file_for_direct_io(filename, 
                                                    &file_extent_callback, &ctx, 
                                                    &block_dev,
@@ -632,12 +649,23 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
                                                     &vars->flags, 
                                                     0, (caddr_t) gIOHibernateCurrentHeader, 
                                                     sizeof(IOHibernateImageHeader));
-       if (!vars->fileRef)
-       {
-           err = kIOReturnNoSpace;
-           break;
-       }
-       gIOHibernateFileRef = vars->fileRef;
+#if 0
+       uint32_t msDelay = (131071 & random());
+       HIBLOG("sleep %d\n", msDelay);
+       IOSleep(msDelay);
+#endif
+        clock_get_uptime(&endTime);
+        SUB_ABSOLUTETIME(&endTime, &startTime);
+        absolutetime_to_nanoseconds(endTime, &nsec);
+
+       if (!vars->fileRef) err = kIOReturnNoSpace;
+
+       IOLockLock(gFSLock);
+       if (kFSOpening != gFSState) err = kIOReturnTimeout;
+       IOLockUnlock(gFSLock);
+
+        HIBLOG("kern_open_file_for_direct_io(%d) took %qd ms\n", err, nsec / 1000000ULL);
+       if (kIOReturnSuccess != err) break;
 
         if (kIOHibernateModeSSDInvert & gIOHibernateMode)
             vars->flags ^= kIOHibernateOptionSSD;
@@ -793,7 +821,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
        if (vars->fileRef)
        {
            kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0, 0, 0);
-           gIOHibernateFileRef = vars->fileRef = NULL;
+           vars->fileRef = NULL;
        }
     }
 
@@ -1090,15 +1118,13 @@ IOHibernateSystemSleep(void)
     OSObject * obj;
     OSString * str;
     bool       dsSSD;
-
-    IOHibernateVars * vars  = &gIOHibernateVars;
-
-    if (vars->fileVars && vars->fileVars->fileRef)
-       // already on the way down
-       return (kIOReturnSuccess);
+    IOHibernateVars * vars;
 
     gIOHibernateState = kIOHibernateStateInactive;
 
+    if (!gIOChosenEntry)
+       gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
+
     gIOHibernateDebugFlags = 0;
     if (kIOLogHibernate & gIOKitDebug)
        gIOHibernateDebugFlags |= kIOHibernateDebugRestoreLogs;
@@ -1125,6 +1151,20 @@ IOHibernateSystemSleep(void)
 
     HIBLOG("hibernate image path: %s\n", gIOHibernateFilename);
 
+    vars = IONew(IOHibernateVars, 1);
+    if (!vars) return (kIOReturnNoMemory);
+    bzero(vars, sizeof(*vars));
+
+    IOLockLock(gFSLock);
+    if (kFSIdle != gFSState)
+    {
+       HIBLOG("hibernate file busy\n");
+       IOLockUnlock(gFSLock);
+       IODelete(vars, IOHibernateVars, 1);
+        return (kIOReturnBusy);
+    }
+    gFSState = kFSOpening;
+    IOLockUnlock(gFSLock);
 
     do
     {
@@ -1169,7 +1209,7 @@ IOHibernateSystemSleep(void)
             {
                 uintptr_t smcVars[2];
                 smcVars[0] = sizeof(vars->volumeCryptKey);
-                smcVars[1] = (uintptr_t)(void *) &vars->volumeCryptKey[0];
+                smcVars[1] = (uintptr_t)(void *) &gIOHibernateVars.volumeCryptKey[0];
 
                 IOService::getPMRootDomain()->setProperty(kIOHibernateSMCVariablesKey, smcVars, sizeof(smcVars));
                 bzero(smcVars, sizeof(smcVars));
@@ -1224,8 +1264,6 @@ IOHibernateSystemSleep(void)
             if (regEntry && !gIOOptionsEntry)
                 regEntry->release();
         }
-        if (!gIOChosenEntry)
-            gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
 
        if (gIOOptionsEntry)
        {
@@ -1405,10 +1443,31 @@ IOHibernateSystemSleep(void)
        }
        // --
 
+    }
+    while (false);
+
+    IOLockLock(gFSLock);
+    if ((kIOReturnSuccess == err) && (kFSOpening == gFSState))
+    {
+       gFSState = kFSOpened;
+       gIOHibernateVars = *vars;
+       gFileVars = *vars->fileVars;
+       gIOHibernateVars.fileVars = &gFileVars;
+       gIOHibernateFileRef = gFileVars.fileRef;
        gIOHibernateCurrentHeader->signature = kIOHibernateHeaderSignature;
        gIOHibernateState = kIOHibernateStateHibernating;
     }
-    while (false);
+    else
+    {
+       HIBLOG("hibernate file close due timeout\n");
+       if (vars->fileVars && vars->fileVars->fileRef) kern_close_file_for_direct_io(vars->fileVars->fileRef, 0, 0, 0, 0, 0);
+       IOHibernateDone(vars);
+       gFSState = kFSIdle;
+    }
+    IOLockUnlock(gFSLock);
+
+    if (vars->fileVars) IODelete(vars->fileVars, IOPolledFileIOVars, 1);
+    IODelete(vars, IOHibernateVars, 1);
 
     return (err);
 }
@@ -1533,14 +1592,40 @@ ProgressUpdate(hibernate_graphics_t * display, uint8_t * screen, int32_t firstBl
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+IOReturn
+IOHibernateIOKitSleep(void)
+{
+    IOReturn ret = kIOReturnSuccess;
+    IOLockLock(gFSLock);
+    if (kFSOpening == gFSState)
+    {
+       gFSState = kFSTimedOut;
+       HIBLOG("hibernate file open timed out\n");
+       ret = kIOReturnTimeout;
+    }
+    IOLockUnlock(gFSLock);
+    return (ret);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 IOReturn
 IOHibernateSystemHasSlept(void)
 {
+    IOReturn          ret = kIOReturnSuccess;
     IOHibernateVars * vars  = &gIOHibernateVars;
-    OSObject        * obj;
+    OSObject        * obj = 0;
     OSData          * data;
 
-    obj = IOService::getPMRootDomain()->copyProperty(kIOHibernatePreviewBufferKey);
+    IOLockLock(gFSLock);
+    if ((kFSOpened != gFSState) && gIOHibernateMode)
+    {
+       ret = kIOReturnTimeout;
+    }
+    IOLockUnlock(gFSLock);
+    if (kIOReturnSuccess != ret) return (ret);
+
+    if (gIOHibernateMode) obj = IOService::getPMRootDomain()->copyProperty(kIOHibernatePreviewBufferKey);
     vars->previewBuffer = OSDynamicCast(IOMemoryDescriptor, obj);
     if (obj && !vars->previewBuffer)
        obj->release();
@@ -1587,7 +1672,7 @@ IOHibernateSystemHasSlept(void)
     if (gIOOptionsEntry)
         gIOOptionsEntry->sync();
 
-    return (kIOReturnSuccess);
+    return (ret);
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -1627,8 +1712,21 @@ MergeDeviceTree(DeviceTreeNode * entry, IORegistryEntry * regEntry)
 IOReturn
 IOHibernateSystemWake(void)
 {
-    IOHibernateVars * vars  = &gIOHibernateVars;
+    if (kFSOpened == gFSState)
+    {
+       IOHibernateDone(&gIOHibernateVars);
+    }
+    else
+    {
+        IOService::getPMRootDomain()->removeProperty(kIOHibernateOptionsKey);
+        IOService::getPMRootDomain()->removeProperty(kIOHibernateGfxStatusKey);
+    }
+    return (kIOReturnSuccess);
+}
 
+static IOReturn
+IOHibernateDone(IOHibernateVars * vars)
+{
     hibernate_teardown(vars->page_list, vars->page_list_wired);
 
     if (vars->videoMapping)
@@ -1766,20 +1864,49 @@ IOHibernateSystemWake(void)
 IOReturn
 IOHibernateSystemPostWake(void)
 {
-    if (gIOHibernateFileRef)
+    struct kern_direct_file_io_ref_t * fileRef;
+
+    if (kFSOpened == gFSState)
     {
        // invalidate & close the image file
        gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
-       kern_close_file_for_direct_io(gIOHibernateFileRef,
+       if ((fileRef = gIOHibernateFileRef))
+       {
+           gIOHibernateFileRef = 0;
+           kern_close_file_for_direct_io(fileRef,
                                       0, (caddr_t) gIOHibernateCurrentHeader, 
                                       sizeof(IOHibernateImageHeader),
                                       sizeof(IOHibernateImageHeader),
                                       gIOHibernateCurrentHeader->imageSize);
-        gIOHibernateFileRef = 0;
+       }
+       gFSState = kFSIdle;
     }
     return (kIOReturnSuccess);
 }
 
+bool IOHibernateWasScreenLocked(void)
+{
+    bool ret = false;
+    if ((kIOHibernateStateWakingFromHibernate == gIOHibernateState) && gIOChosenEntry)
+    {
+       OSData *
+       data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOScreenLockStateKey));
+       if (data) switch (*((uint32_t *)data->getBytesNoCopy()))
+       {
+           case kIOScreenLockLocked:
+           case kIOScreenLockFileVaultDialog:
+               ret = true;
+               break;
+           case kIOScreenLockNoLock:
+           case kIOScreenLockUnlocked:
+           default:
+               ret = false;
+               break;
+       }
+    }
+    return (ret);
+}
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 SYSCTL_STRING(_kern, OID_AUTO, hibernatefile, 
@@ -1810,6 +1937,8 @@ IOHibernateSystemInit(IOPMrootDomain * rootDomain)
     sysctl_register_oid(&sysctl__kern_hibernatefile);
     sysctl_register_oid(&sysctl__kern_bootsignature);
     sysctl_register_oid(&sysctl__kern_hibernatemode);
+
+    gFSLock = IOLockAlloc();
 }
 
 
@@ -2016,7 +2145,7 @@ hibernate_write_image(void)
 
         hibernateBase = HIB_BASE; /* Defined in PAL headers */
 
-        hibernateEnd = (sectHIBB + sectSizeHIB);
+        hibernateEnd = (segHIBB + segSizeHIB);
 
         // copy out restore1 code
 
@@ -2038,7 +2167,7 @@ hibernate_write_image(void)
         header->restore1CodeOffset = ((uintptr_t) &hibernate_machine_entrypoint)      - hibernateBase;
         header->restore1StackOffset = ((uintptr_t) &gIOHibernateRestoreStackEnd[0]) - 64 - hibernateBase;
 
-        // sum __HIB sect, with zeros for the stack
+        // sum __HIB seg, with zeros for the stack
         src = (uint8_t *) trunc_page(hibernateBase);
         for (page = 0; page < count; page++)
         {
@@ -2050,7 +2179,7 @@ hibernate_write_image(void)
         }
         sum1 = restore1Sum;
     
-        // write the __HIB sect, with zeros for the stack
+        // write the __HIB seg, with zeros for the stack
 
         src = (uint8_t *) trunc_page(hibernateBase);
         count = ((uintptr_t) &gIOHibernateRestoreStack[0]) - trunc_page(hibernateBase);
@@ -2075,6 +2204,10 @@ hibernate_write_image(void)
                 break;
         }
 
+       vars->fileVars->encryptStart = (vars->fileVars->position & ~(AES_BLOCK_SIZE - 1));
+       vars->fileVars->encryptEnd   = UINT64_MAX;
+       HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart);
+
         // write the preview buffer
 
         if (vars->previewBuffer)
@@ -2099,6 +2232,9 @@ hibernate_write_image(void)
                 break;
 
             src = (uint8_t *) vars->previewBuffer->getPhysicalSegment(0, NULL, _kIOMemorySourceSegment);
+
+                       ((hibernate_preview_t *)src)->lockTime = gIOConsoleLockTime;
+
             count = vars->previewBuffer->getLength();
 
             header->previewPageListSize = ppnum;
@@ -2198,20 +2334,16 @@ hibernate_write_image(void)
 
         for (pageType = kWiredEncrypt; pageType >= kUnwiredEncrypt; pageType--)
         {
-            if (needEncrypt && (kEncrypt & pageType))
-            {
+           if (kUnwiredEncrypt == pageType)
+          {
+               // start unwired image
                 vars->fileVars->encryptStart = (vars->fileVars->position & ~(((uint64_t)AES_BLOCK_SIZE) - 1));
                 vars->fileVars->encryptEnd   = UINT64_MAX;
                 HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart);
-
-                if (kUnwiredEncrypt == pageType)
-                {
-                    // start unwired image
-                    bcopy(&cryptvars->aes_iv[0], 
-                            &gIOHibernateCryptWakeContext.aes_iv[0], 
-                            sizeof(cryptvars->aes_iv));
-                    cryptvars = &gIOHibernateCryptWakeContext;
-                }
+               bcopy(&cryptvars->aes_iv[0], 
+                       &gIOHibernateCryptWakeContext.aes_iv[0], 
+                       sizeof(cryptvars->aes_iv));
+               cryptvars = &gIOHibernateCryptWakeContext;
             }
             for (iterDone = false, ppnum = 0; !iterDone; )
             {
@@ -2377,6 +2509,7 @@ hibernate_write_image(void)
         header->restore1Sum  = restore1Sum;
         header->image1Sum    = sum1;
         header->image2Sum    = sum2;
+        header->sleepTime    = gIOLastSleepTime.tv_sec;
     
         count = vars->fileExtents->getLength();
         if (count > sizeof(header->fileExtentMap))
@@ -2496,8 +2629,6 @@ hibernate_machine_init(void)
     uint64_t     nsec;
     uint32_t     lastProgressStamp = 0;
     uint32_t     progressStamp;
-    uint64_t    progressZeroPosition = 0;
-    uint32_t    blob, lastBlob = (uint32_t) -1L;
     hibernate_cryptvars_t * cryptvars = 0;
 
     IOHibernateVars * vars  = &gIOHibernateVars;
@@ -2522,15 +2653,9 @@ hibernate_machine_init(void)
            gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1], 
            gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]);
 
-    HIBPRINT("video %x %d %d %d status %x\n",
-           gIOHibernateGraphicsInfo->physicalAddress, gIOHibernateGraphicsInfo->depth, 
-           gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height, gIOHibernateGraphicsInfo->gfxStatus); 
-
     if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode)
         hibernate_page_list_discard(vars->page_list);
 
-    boot_args *args = (boot_args *) PE_state.bootArgs;
-
     cryptvars = (kIOHibernateModeEncrypt & gIOHibernateMode) ? &gIOHibernateCryptWakeContext : 0;
 
     if (gIOHibernateCurrentHeader->handoffPageCount > gIOHibernateHandoffPageCount)
@@ -2587,9 +2712,11 @@ hibernate_machine_init(void)
     if (cryptvars && !foundCryptData)
        panic("hibernate handoff");
 
-    if (vars->videoMapping 
-       && gIOHibernateGraphicsInfo->physicalAddress
-       && (args->Video.v_baseAddr == gIOHibernateGraphicsInfo->physicalAddress))
+    HIBPRINT("video %x %d %d %d status %x\n",
+           gIOHibernateGraphicsInfo->physicalAddress, gIOHibernateGraphicsInfo->depth, 
+           gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height, gIOHibernateGraphicsInfo->gfxStatus); 
+
+    if (vars->videoMapping && gIOHibernateGraphicsInfo->physicalAddress)
     {
         vars->videoMapSize = round_page(gIOHibernateGraphicsInfo->height 
                                         * gIOHibernateGraphicsInfo->rowBytes);
@@ -2598,6 +2725,10 @@ hibernate_machine_init(void)
                     vars->videoMapSize, kIOMapInhibitCache );
     }
 
+    if (vars->videoMapSize)
+        ProgressUpdate(gIOHibernateGraphicsInfo, 
+                        (uint8_t *) vars->videoMapping, 0, kIOHibernateProgressCount);
+
     uint8_t * src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();
     uint32_t decoOffset;
 
@@ -2609,21 +2740,8 @@ hibernate_machine_init(void)
     err = IOHibernatePollerOpen(vars->fileVars, kIOPolledAfterSleepState, 0);
     HIBLOG("IOHibernatePollerOpen(%x)\n", err);
 
-    if (gIOHibernateCurrentHeader->previewSize)
-        progressZeroPosition = gIOHibernateCurrentHeader->previewSize 
-                             + gIOHibernateCurrentHeader->fileExtentMapSize 
-                             - sizeof(gIOHibernateCurrentHeader->fileExtentMap) 
-                             + ptoa_64(gIOHibernateCurrentHeader->restore1PageCount);
-
     IOPolledFileSeek(vars->fileVars, gIOHibernateCurrentHeader->image1Size);
 
-    if (vars->videoMapSize)
-    {
-        lastBlob = ((vars->fileVars->position - progressZeroPosition) * kIOHibernateProgressCount)
-                        / (gIOHibernateCurrentHeader->imageSize - progressZeroPosition);
-        ProgressUpdate(gIOHibernateGraphicsInfo, (uint8_t *) vars->videoMapping, 0, lastBlob);
-    }
-
     // kick off the read ahead
     vars->fileVars->io          = false;
     vars->fileVars->bufferHalf   = 0;
@@ -2714,17 +2832,6 @@ hibernate_machine_init(void)
            pagesDone++;
            pagesRead++;
 
-            if (vars->videoMapSize && (0 == (1023 & pagesDone)))
-            {
-                blob = ((vars->fileVars->position - progressZeroPosition) * kIOHibernateProgressCount)
-                        / (gIOHibernateCurrentHeader->imageSize - progressZeroPosition);
-                if (blob != lastBlob)
-                {
-                    ProgressUpdate(gIOHibernateGraphicsInfo, (uint8_t *) vars->videoMapping, lastBlob, blob);
-                    lastBlob = blob;
-                }
-            }
-
            if (0 == (8191 & pagesDone))
            {
                clock_get_uptime(&endTime);
@@ -2753,10 +2860,6 @@ hibernate_machine_init(void)
 
     err = IOHibernatePollerClose(vars->fileVars, kIOPolledAfterSleepState);
 
-    if (vars->videoMapSize)
-        ProgressUpdate(gIOHibernateGraphicsInfo, 
-                        (uint8_t *) vars->videoMapping, 0, kIOHibernateProgressCount);
-
     clock_get_uptime(&endTime);
 
     IOService::getPMRootDomain()->pmStatsRecordEvent( 
index 7e7e95fe6787578f35c9b5dac6be80cdff6db2f7..2c1378e5f9b2c0c49221a9e218715f35673ea3ee 100644 (file)
@@ -101,13 +101,10 @@ extern "C"
 uint32_t
 hibernate_sum_page(uint8_t *buf, uint32_t ppnum);
 
-extern vm_offset_t sectHIBB;
-extern unsigned long sectSizeHIB;
-extern vm_offset_t sectDATAB;
-extern unsigned long sectSizeDATA;
-#if defined(__i386__) || defined(__x86_64__)
-extern vm_offset_t sectINITPTB;
-#endif
+extern vm_offset_t segHIBB;
+extern unsigned long segSizeHIB;
+extern vm_offset_t segDATAB;
+extern unsigned long segSizeDATA;
 
 extern ppnum_t gIOHibernateHandoffPages[];
 extern uint32_t gIOHibernateHandoffPageCount;
index 79410326b5521cd67088b64f43e4a582e47977e6..10bd705f509f94fbbae7490dbc61920d9eb91bea 100644 (file)
@@ -32,7 +32,6 @@
 #include <IOKit/IOHibernatePrivate.h>
 #include <IOKit/IOLib.h>
 #include <pexpert/boot.h>
-#include <crypto/aes.h>
 #include <libkern/libkern.h>
 
 #include <libkern/WKdm.h>
index 8b49024a174488864b5b5c1c0281f1a95588cba4..08ecc626d33d3d950c387ff12a551edd0fb0ded7 100644 (file)
@@ -210,14 +210,14 @@ bool IOInterruptEventSource::checkForWork()
        {
                if (trace)
                        IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION),
-                                                                        (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                                VM_KERNEL_UNSLIDE(intAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
                
                // Call the handler
                (*intAction)(owner, this, numInts);
                
                if (trace)
                        IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION),
-                                                                  (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                              VM_KERNEL_UNSLIDE(intAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
                
                consumerCount = cacheProdCount;
                if (autoDisable && !explicitDisable)
@@ -228,14 +228,14 @@ bool IOInterruptEventSource::checkForWork()
        {
                if (trace)
                        IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION),
-                                                                        (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                                VM_KERNEL_UNSLIDE(intAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
                
                // Call the handler
                (*intAction)(owner, this, -numInts);
                
                if (trace)
                        IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION),
-                                                                  (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+                                              VM_KERNEL_UNSLIDE(intAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
                
                consumerCount = cacheProdCount;
                if (autoDisable && !explicitDisable)
index 21048d88cb4eda134f4e4425d8996c6684a46eb6..c170d83a68e962a6acb96d4aabd8ba3a0c47eaf2 100644 (file)
@@ -98,59 +98,12 @@ void IOPrintPlane( const IORegistryPlane * plane )
     iter->release();
 }
 
-void dbugprintf(const char *fmt, ...);
-void db_dumpiojunk( const IORegistryPlane * plane );
-
-void db_piokjunk(void) {
-
-       dbugprintf("\nDT plane:\n");
-       db_dumpiojunk( gIODTPlane );
-       dbugprintf("\n\nService plane:\n");
-       db_dumpiojunk( gIOServicePlane );
-    dbugprintf("\n\n"
-           "ivar kalloc()       0x%08x\n"
-           "malloc()            0x%08x\n"
-            "containers kalloc() 0x%08x\n"
-           "IOMalloc()          0x%08x\n"
-            "----------------------------------------\n",
-           debug_ivars_size,
-            debug_malloc_size,
-            debug_container_malloc_size,
-            debug_iomalloc_size
-            );
-
+void db_piokjunk(void)
+{
 }
 
-
-void db_dumpiojunk( const IORegistryPlane * plane )
+void db_dumpiojunk( const IORegistryPlane * plane __unused )
 {
-    IORegistryEntry *          next;
-    IORegistryIterator *       iter;
-    OSOrderedSet *             all;
-    char                       format[] = "%xxxs";
-    IOService *                        service;
-
-    iter = IORegistryIterator::iterateOver( plane );
-
-    all = iter->iterateAll();
-    if( all) {
-        dbugprintf("Count %d\n", all->getCount() );
-        all->release();
-    } else dbugprintf("Empty\n");
-
-    iter->reset();
-    while( (next = iter->getNextObjectRecursive())) {
-               snprintf(format + 1, sizeof(format) - 1, "%ds", 2 * next->getDepth( plane ));
-               dbugprintf( format, "");
-               dbugprintf( "%s", next->getName( plane ));
-               if( (next->getLocation( plane )))
-                               dbugprintf("@%s", next->getLocation( plane ));
-               dbugprintf(" <class %s", next->getMetaClass()->getClassName());
-                       if( (service = OSDynamicCast(IOService, next)))
-                               dbugprintf(", busy %ld", service->getBusyState());
-               dbugprintf( ">\n");
-    }
-    iter->release();
 }
 
 void IOPrintMemory( void )
index 5a74159a4a31886a6f4e05273a71a990d7cc7376..27c55e7c4928621842d921ece1bd38b43ab352f8 100644 (file)
@@ -40,7 +40,7 @@ __BEGIN_DECLS
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-#if !defined(NO_KDEBUG)
+#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD)
 
 #define IOServiceTrace(csc, a, b, c, d) do {                           \
     if(kIOTraceIOService & gIOKitDebug) {                              \
@@ -48,7 +48,7 @@ __BEGIN_DECLS
     }                                                                  \
 } while(0)
 
-#else /* NO_KDEBUG */
+#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */
 
 #define IOServiceTrace(csc, a, b, c, d) do {   \
   (void)a;                                     \
@@ -57,7 +57,7 @@ __BEGIN_DECLS
   (void)d;                                     \
 } while (0)
 
-#endif /* NO_KDEBUG */
+#endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -99,7 +99,7 @@ extern ppnum_t IOGetLastPageNumber(void);
 extern ppnum_t gIOLastPage;
 
 /* Physical to physical copy (ints must be disabled) */
-extern void bcopy_phys(addr64_t from, addr64_t to, int size);
+extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t size);
 
 __END_DECLS
 
@@ -164,9 +164,26 @@ struct IODMACommandInternal
     UInt64   fActualByteCount;
 };
 
+struct IOMemoryDescriptorDevicePager {
+    void *                      devicePager;
+    unsigned int            pagerContig:1;
+    unsigned int            unused:31;
+    IOMemoryDescriptor * memory;
+};
+
+struct IOMemoryDescriptorReserved {
+    IOMemoryDescriptorDevicePager dp;
+    uint64_t                      preparationID;
+    // for kernel IOMD subclasses... they have no expansion
+    uint64_t                      kernReserved[4];
+};
+
+
 extern "C" struct timeval gIOLastSleepTime;
 extern "C" struct timeval gIOLastWakeTime;
 
+extern clock_sec_t gIOConsoleLockTime;
+
 extern "C" void IOKitResetTime( void );
 extern "C" void IOKitInitializeTime( void );
 
@@ -176,4 +193,7 @@ extern "C" OSString * IOCopyLogNameForPID(int pid);
 extern "C" void IOSetKeyStoreData(IOMemoryDescriptor * data);
 #endif
 
+void IOScreenLockTimeUpdate(clock_sec_t secs);
+
+
 #endif /* ! _IOKIT_KERNELINTERNAL_H */
index b2714fc9b516986e62fc6f2e2fd7e97556dd5b88..886176acf7b3510f54197c1e6642dce70316193c 100644 (file)
@@ -237,7 +237,10 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment)
     alignMask = alignment - 1;
     adjustedSize = size + sizeof(vm_size_t) + sizeof(vm_address_t);
 
-    if (adjustedSize >= page_size) {
+    if (size > adjustedSize) {
+           address = 0;    /* overflow detected */
+    }
+    else if (adjustedSize >= page_size) {
 
         kr = kernel_memory_allocate(kernel_map, &address,
                                        size, alignMask, 0);
index 9b459094542f5dc4046584676621eae5dd633939..fe8f9f2713ace17a8fa1b95e6873a0fe2eb42d1c 100644 (file)
@@ -181,18 +181,12 @@ kern_return_t device_data_action(
                vm_object_offset_t      offset, 
                vm_size_t               size)
 {
-    struct ExpansionData {
-        void *                         devicePager;
-        unsigned int                   pagerContig:1;
-        unsigned int                   unused:31;
-       IOMemoryDescriptor *            memory;
-    };
     kern_return_t       kr;
-    ExpansionData *      ref = (ExpansionData *) device_handle;
+    IOMemoryDescriptorReserved * ref = (IOMemoryDescriptorReserved *) device_handle;
     IOMemoryDescriptor * memDesc;
 
     LOCK;
-    memDesc = ref->memory;
+    memDesc = ref->dp.memory;
     if( memDesc)
     {
        memDesc->retain();
@@ -210,15 +204,9 @@ kern_return_t device_data_action(
 kern_return_t device_close(
                uintptr_t     device_handle)
 {
-    struct ExpansionData {
-        void *                         devicePager;
-        unsigned int                   pagerContig:1;
-        unsigned int                   unused:31;
-       IOMemoryDescriptor *            memory;
-    };
-    ExpansionData *   ref = (ExpansionData *) device_handle;
+    IOMemoryDescriptorReserved * ref = (IOMemoryDescriptorReserved *) device_handle;
 
-    IODelete( ref, ExpansionData, 1 );
+    IODelete( ref, IOMemoryDescriptorReserved, 1 );
 
     return( kIOReturnSuccess );
 }
@@ -935,7 +923,7 @@ void IOGeneralMemoryDescriptor::free()
     if( reserved)
     {
        LOCK;
-       reserved->memory = 0;
+       reserved->dp.memory = 0;
        UNLOCK;
     }
 
@@ -961,11 +949,19 @@ void IOGeneralMemoryDescriptor::free()
        _ranges.v = NULL;
     }
 
-    if (reserved && reserved->devicePager)
-       device_pager_deallocate( (memory_object_t) reserved->devicePager );
+    if (reserved)
+    {
+        if (reserved->dp.devicePager)
+        {
+            // memEntry holds a ref on the device pager which owns reserved
+            // (IOMemoryDescriptorReserved) so no reserved access after this point
+            device_pager_deallocate( (memory_object_t) reserved->dp.devicePager );
+        }
+        else
+            IODelete(reserved, IOMemoryDescriptorReserved, 1);
+        reserved = NULL;
+    }
 
-    // memEntry holds a ref on the device pager which owns reserved
-    // (ExpansionData) so no reserved access after this point
     if (_memEntry)
         ipc_port_release_send( (ipc_port_t) _memEntry );
 
@@ -1151,7 +1147,10 @@ IOGeneralMemoryDescriptor::getPreparationID( void )
        return (kIOPreparationIDUnprepared);
 
     if (_flags & (kIOMemoryTypePhysical | kIOMemoryTypePhysical64))
-       return (kIOPreparationIDAlwaysPrepared);
+    {
+        IOMemoryDescriptor::setPreparationID();
+        return (IOMemoryDescriptor::getPreparationID());
+    }
 
     if (!_memoryEntries || !(dataP = getDataP(_memoryEntries)))
        return (kIOPreparationIDUnprepared);
@@ -1163,10 +1162,35 @@ IOGeneralMemoryDescriptor::getPreparationID( void )
     return (dataP->fPreparationID);
 }
 
-uint64_t
-IOMemoryDescriptor::getPreparationID( void )
+IOMemoryDescriptorReserved * IOMemoryDescriptor::getKernelReserved( void )
 {
-    return (kIOPreparationIDUnsupported);    
+    if (!reserved)
+    {
+        reserved = IONew(IOMemoryDescriptorReserved, 1);
+        if (reserved)
+            bzero(reserved, sizeof(IOMemoryDescriptorReserved));
+    }
+    return (reserved);
+}
+
+void IOMemoryDescriptor::setPreparationID( void )
+{
+    if (getKernelReserved() && (kIOPreparationIDUnprepared == reserved->preparationID))
+    {
+#if defined(__ppc__ )
+        reserved->preparationID = gIOMDPreparationID++;
+#else
+        reserved->preparationID = OSIncrementAtomic64(&gIOMDPreparationID);
+#endif
+    }
+}
+
+uint64_t IOMemoryDescriptor::getPreparationID( void )
+{
+    if (reserved)
+        return (reserved->preparationID);    
+    else
+        return (kIOPreparationIDUnsupported);    
 }
 
 IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const
@@ -1830,6 +1854,7 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options,
                                                 IOByteCount offset, IOByteCount length )
 {
     IOByteCount remaining;
+    unsigned int res;
     void (*func)(addr64_t pa, unsigned int count) = 0;
 
     switch (options)
@@ -1855,6 +1880,7 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options,
     if (kIOMemoryThreadSafe & _flags)
        LOCK;
 
+    res = 0x0UL;
     remaining = length = min(length, getLength() - offset);
     while (remaining)
     // (process another target segment?)
@@ -1882,8 +1908,12 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options,
     return (remaining ? kIOReturnUnderrun : kIOReturnSuccess);
 }
 
+#if defined(__i386__) || defined(__x86_64__)
 extern vm_offset_t             first_avail;
 #define io_kernel_static_end   first_avail
+#else
+#error io_kernel_static_end is undefined for this architecture
+#endif
 
 static kern_return_t
 io_get_kernel_static_upl(
@@ -2365,11 +2395,14 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
                     {
                         segDestAddr  = address;
                         segLen      -= offset;
+                        srcAddr     += offset;
                         mapLength    = length;
 
                         while (true)
                         {
                             vm_prot_t cur_prot, max_prot;
+
+                            if (segLen > length) segLen = length;
                             kr = mach_vm_remap(map, &segDestAddr, round_page_64(segLen), PAGE_MASK, 
                                                     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
                                                     get_task_map(_task), trunc_page_64(srcAddr),
@@ -2430,13 +2463,10 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
 
            pa = getPhysicalSegment( offset, &segLen, kIOMemoryMapperNone );
 
-            if( !reserved) {
-                reserved = IONew( ExpansionData, 1 );
-                if( !reserved)
-                    continue;
-            }
-            reserved->pagerContig = (1 == _rangesCount);
-           reserved->memory = this;
+            if( !getKernelReserved())
+                continue;
+            reserved->dp.pagerContig = (1 == _rangesCount);
+           reserved->dp.memory      = this;
 
            /*What cache mode do we need*/
             switch(options & kIOMapCacheMask ) {
@@ -2477,7 +2507,7 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
                    break;
             }
 
-           flags |= reserved->pagerContig ? DEVICE_PAGER_CONTIGUOUS : 0;
+           flags |= reserved->dp.pagerContig ? DEVICE_PAGER_CONTIGUOUS : 0;
 
             pager = device_pager_setup( (memory_object_t) 0, (uintptr_t) reserved, 
                                                                size, flags);
@@ -2496,11 +2526,7 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
                 }
             }
            if( pager && sharedMem)
-               reserved->devicePager    = pager;
-           else {
-               IODelete( reserved, ExpansionData, 1 );
-               reserved = 0;
-           }
+               reserved->dp.devicePager    = pager;
 
         } while( false );
 
@@ -2643,6 +2669,10 @@ static kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref)
                     SET_MAP_MEM(MAP_MEM_COPYBACK, memEntryCacheMode);
                     break;
 
+               case kIOMapCopybackInnerCache:
+                    SET_MAP_MEM(MAP_MEM_INNERWBACK, memEntryCacheMode);
+                    break;
+
                case kIOMapDefaultCache:
                default:
                     SET_MAP_MEM(MAP_MEM_NOOP, memEntryCacheMode);
@@ -2783,7 +2813,7 @@ IOReturn IOMemoryDescriptor::doMap(
        pageOffset = sourceAddr - trunc_page( sourceAddr );
 
        if( reserved)
-           pager = (memory_object_t) reserved->devicePager;
+           pager = (memory_object_t) reserved->dp.devicePager;
        else
            pager = MACH_PORT_NULL;
 
@@ -2839,7 +2869,7 @@ IOReturn IOMemoryDescriptor::doMap(
                mapping->fMemory->_memEntry = me;
            }
            if (pager)
-               err = handleFault( reserved->devicePager, mapping->fAddressMap, mapping->fAddress, offset, length, options );
+               err = handleFault( pager, mapping->fAddressMap, mapping->fAddress, offset, length, options );
        }
        else
        {
@@ -2871,8 +2901,8 @@ IOReturn IOMemoryDescriptor::doMap(
 
 #if DEBUG
        if (kIOLogMapping & gIOKitDebug)
-           IOLog("mapping(%x) desc %p @ %lx, map %p, address %qx, offset %qx, length %qx\n", 
-                   err, this, sourceAddr, mapping, address, offset, length);
+           IOLog("mapping(%x) desc %p @ %qx, map %p, address %qx, offset %qx, length %qx\n", 
+                 err, this, (uint64_t)sourceAddr, mapping, address, offset, length);
 #endif
 
            if (err == KERN_SUCCESS)
@@ -2950,7 +2980,7 @@ IOReturn IOMemoryDescriptor::handleFault(
 
 
         if( pager) {
-            if( reserved && reserved->pagerContig) {
+            if( reserved && reserved->dp.pagerContig) {
                 IOPhysicalLength       allLen;
                 addr64_t               allPhys;
 
@@ -3424,8 +3454,8 @@ IOMemoryMap * IOMemoryDescriptor::createMappingInTask(
 
 #if DEBUG
     if (!result)
-       IOLog("createMappingInTask failed desc %p, addr %qx, options %lx, offset %qx, length %qx\n",
-                   this, atAddress, options, offset, length);
+       IOLog("createMappingInTask failed desc %p, addr %qx, options %x, offset %qx, length %llx\n",
+               this, atAddress, (uint32_t) options, offset, length);
 #endif
 
     return (result);
index 85ac1a2ec01929f0d4a9aa7de3899ef26afbe7d0..51a72cfb1890369138ec28595408ee09af7f13e1 100644 (file)
@@ -228,23 +228,24 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const
   bool                 result, hasPrivilege;
   UInt32               variablePerm;
   const OSSymbol       *key;
-  OSDictionary         *dict = 0, *tmpDict = 0;
+  OSDictionary         *dict;
   OSCollectionIterator *iter = 0;
   
   // Verify permissions.
   hasPrivilege = (kIOReturnSuccess == IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege));
 
-  tmpDict = OSDictionary::withCapacity(1);
-  if (tmpDict == 0) return false;
+  dict = OSDictionary::withCapacity(1);
+  if (dict == 0) return false;
 
   if (_ofDict == 0) {
     /* No nvram. Return an empty dictionary. */
-    dict = tmpDict;
   } else {
     /* Copy properties with client privilege. */
     iter = OSCollectionIterator::withCollection(_ofDict);
-    if (iter == 0) return false;
-    
+    if (iter == 0) {
+      dict->release();
+      return false;
+    }
     while (1) {
       key = OSDynamicCast(OSSymbol, iter->getNextObject());
       if (key == 0) break;
@@ -252,15 +253,14 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const
       variablePerm = getOFVariablePerm(key);
       if ((hasPrivilege || (variablePerm != kOFVariablePermRootOnly)) &&
          ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )) {
-       tmpDict->setObject(key, _ofDict->getObject(key));
+       dict->setObject(key, _ofDict->getObject(key));
       }
-      dict = tmpDict;
     }
   }
 
   result = dict->serialize(s);
-  
-  if (tmpDict != 0) tmpDict->release();
+  dict->release();
   if (iter != 0) iter->release();
   
   return result;
index 5111a7edd92854a2ad47e2cbee54968035d054eb..e086a456d25c3b726b88ce233e2b538c4d0203f9 100644 (file)
@@ -79,6 +79,9 @@ __END_DECLS
 
 #define _LOG(x...)
 
+#define DARK_WAKE_DEBUG                     1
+#define SUSPEND_PM_NOTIFICATIONS_DEBUG      1
+
 #define CHECK_THREAD_CONTEXT
 #ifdef  CHECK_THREAD_CONTEXT
 static IOWorkLoop * gIOPMWorkLoop = 0;
@@ -125,7 +128,8 @@ enum {
     kPowerEventAssertionRelease,                // 10
     kPowerEventAssertionSetLevel,               // 11
     kPowerEventQueueSleepWakeUUID,              // 12
-    kPowerEventPublishSleepWakeUUID             // 13
+    kPowerEventPublishSleepWakeUUID,            // 13
+    kPowerEventSuspendClient                    // 14
 };
 
 // For evaluatePolicy()
@@ -139,7 +143,8 @@ enum {
     kStimulusDarkWakeActivityTickle,    // 5
     kStimulusDarkWakeEntry,             // 6
     kStimulusDarkWakeReentry,           // 7
-    kStimulusDarkWakeEvaluate           // 8
+    kStimulusDarkWakeEvaluate,          // 8
+    kStimulusNoIdleSleepPreventers      // 9
 };
 
 extern "C" {
@@ -198,12 +203,15 @@ static IOPMPowerState ourPowerStates[NUM_POWER_STATES] =
 #define kIOPMRootDomainWakeTypeUser         "User"
 #define kIOPMRootDomainWakeTypeAlarm        "Alarm"
 #define kIOPMRootDomainWakeTypeNetwork      "Network"
+#define kIOPMRootDomainWakeTypeHIDActivity  "HID Activity"
 
 // Special interest that entitles the interested client from receiving
 // all system messages. Only used by powerd.
 //
 #define kIOPMSystemCapabilityInterest       "IOPMSystemCapabilityInterest"
 
+#define kPMSuspendedNotificationClients      "PMSuspendedNotificationClients"
+
 /*
  * Aggressiveness
  */
@@ -269,8 +277,8 @@ static UInt32           gWillShutdown = 0;
 static UInt32           gPagingOff = 0;
 static UInt32           gSleepWakeUUIDIsSet = false;
 static uint32_t         gAggressivesState = 0;
-static uint32_t         gDarkWakeFlags = kDarkWakeFlagHIDTickleNone;
-static bool             gRAMDiskImageBoot = false;
+static uint32_t         gDarkWakeFlags = kDarkWakeFlagHIDTickleNone | kDarkWakeFlagIgnoreDiskIOAlways;
+static PMStatsStruct    gPMStats;
 
 struct timeval gIOLastSleepTime;
 struct timeval gIOLastWakeTime;
@@ -855,17 +863,6 @@ bool IOPMrootDomain::start( IOService * nub )
 
     PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags));
     
-    IORegistryEntry * chosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
-    if (chosenEntry)
-    {
-        if (chosenEntry->getProperty("boot-ramdmg-size") &&
-            chosenEntry->getProperty("boot-ramdmg-extents"))
-        {
-            gRAMDiskImageBoot = true;
-        }
-        chosenEntry->release();
-    }
-
     queue_init(&aggressivesQueue);
     aggressivesThreadCall = thread_call_allocate(handleAggressivesFunction, this);
     aggressivesData = OSData::withCapacity(
@@ -885,7 +882,7 @@ bool IOPMrootDomain::start( IOService * nub )
     
     setProperty(kIOSleepSupportedKey, true);
 
-    bzero(&pmStats, sizeof(pmStats));
+    bzero(&gPMStats, sizeof(gPMStats));
 
     pmTracer = PMTraceWorker::tracer(this);
 
@@ -942,6 +939,8 @@ bool IOPMrootDomain::start( IOService * nub )
                     (const OSObject **) &gIOPMSettingSilentRunningKey, 1, 0);
 
     fPMSettingsDict = OSDictionary::withCapacity(5);
+    preventIdleSleepList = OSSet::withCapacity(8);
+    preventSystemSleepList = OSSet::withCapacity(2);
 
     PMinit();   // creates gIOPMWorkLoop
 
@@ -1010,8 +1009,13 @@ bool IOPMrootDomain::start( IOService * nub )
         publishFeature("DisplayDims");
     }
     if(psIterator) {
-        psIterator->release();
+        psIterator->release();        
     }
+    
+    
+    pmSuspendedCapacity = pmSuspendedSize = 0;
+    pmSuspendedPIDS = NULL;
+    
 
     sysctl_register_oid(&sysctl__kern_sleeptime);
     sysctl_register_oid(&sysctl__kern_waketime);
@@ -1030,6 +1034,126 @@ bool IOPMrootDomain::start( IOService * nub )
     return true;
 }
 
+
+
+
+void IOPMrootDomain::handleSuspendPMNotificationClient(uint32_t pid, bool doSuspend)
+{
+    ASSERT_GATED();
+    
+    int index = -1;
+    unsigned int i;
+    
+    if (!pmSuspendedPIDS) {
+        pmSuspendedCapacity = 8;
+        pmSuspendedSize = pmSuspendedCapacity * sizeof(PMNotifySuspendedStruct);
+        pmSuspendedPIDS = (PMNotifySuspendedStruct *)IOMalloc(pmSuspendedSize);
+        bzero(pmSuspendedPIDS, pmSuspendedSize);
+    }
+    
+    /* Find the existing pid in the existing array */
+
+    for (i=0; i < pmSuspendedCapacity; i++) {
+        if (pmSuspendedPIDS[i].pid == pid) {
+            index = i;
+            break;
+        }
+    }
+    
+    if (-1 == index)
+    {
+        /* Find an unused slot in the suspended pids table. */
+
+        for (i=0; i < pmSuspendedCapacity; i++) {
+            if (pmSuspendedPIDS[i].refcount == 0) {
+                break;
+            }
+        }
+    
+        if (pmSuspendedCapacity == i) 
+        {
+            /* GROW if necessary */
+
+            PMNotifySuspendedStruct *newSuspended = NULL;
+            pmSuspendedCapacity     *= 2;
+            pmSuspendedSize         = pmSuspendedCapacity * sizeof(PMNotifySuspendedStruct);
+            newSuspended            = (PMNotifySuspendedStruct *)IOMalloc(pmSuspendedSize);
+
+            bzero(newSuspended, pmSuspendedSize);
+            bcopy(pmSuspendedPIDS,  newSuspended, pmSuspendedSize/2);
+            IOFree(pmSuspendedPIDS, pmSuspendedSize/2);
+        
+            pmSuspendedPIDS = newSuspended;
+        }
+
+        index = i;
+        pmSuspendedPIDS[index].pid = pid;
+    }
+
+    if (doSuspend) {
+        pmSuspendedPIDS[index].refcount++;
+    } else {
+        pmSuspendedPIDS[index].refcount--;
+    }
+        
+    /*
+     * Publish array of suspended pids in IOPMrootDomain
+     */
+    OSArray     *publish = OSArray::withCapacity(pmSuspendedCapacity);
+
+    for (i=0; i<pmSuspendedCapacity; i++)
+    {
+        if (pmSuspendedPIDS[i].refcount > 0) {
+            OSDictionary    *suspended = OSDictionary::withCapacity(2);
+            OSNumber        *n = NULL;
+            
+            n = OSNumber::withNumber(pmSuspendedPIDS[i].pid, 32);
+            suspended->setObject("pid", n);
+            n->release();
+            
+            n = OSNumber::withNumber(pmSuspendedPIDS[i].refcount, 32);
+            suspended->setObject("refcount", n);
+            n->release();
+            
+            publish->setObject(suspended);
+            suspended->release();
+            
+        }
+    }
+    
+    if (0 != publish->getCount()) {
+        setProperty(kPMSuspendedNotificationClients, publish);
+    } else {
+        removeProperty(kPMSuspendedNotificationClients);
+    }
+    
+    publish->release();
+    
+    return;
+}
+
+bool IOPMrootDomain::pmNotificationIsSuspended(uint32_t pid)
+{
+    unsigned int index;
+    
+    for (index=0; index < pmSuspendedCapacity; index++) {
+        if (pmSuspendedPIDS[index].pid == pid) {
+            return pmSuspendedPIDS[index].refcount > 0;
+        }
+    }
+    
+    return false;
+}
+
+
+void IOPMrootDomain::suspendPMNotificationsForPID(uint32_t pid, bool doSuspend)
+{
+    if(pmPowerStateQueue) {
+        pmPowerStateQueue->submitPowerEvent(kPowerEventSuspendClient, (void *)pid, (uint64_t)doSuspend );
+    }
+    return;
+}
+
 //******************************************************************************
 // setProperties
 //
@@ -1064,13 +1188,16 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
     const OSSymbol *hibernatefreeratio_string           = OSSymbol::withCString(kIOHibernateFreeRatioKey);
     const OSSymbol *hibernatefreetime_string            = OSSymbol::withCString(kIOHibernateFreeTimeKey);
 #endif
-
+#if SUSPEND_PM_NOTIFICATIONS_DEBUG
+    const OSSymbol *suspendPMClient_string              = OSSymbol::withCString(kPMSuspendedNotificationClients);
+#endif
+    
     if (!dict) 
     {
         return_value = kIOReturnBadArgument;
         goto exit;
     }
-
+    
     if ((b = OSDynamicCast(OSBoolean, dict->getObject(publish_simulated_battery_string))))
     {
         publishResource(publish_simulated_battery_string, kOSBooleanTrue);
@@ -1169,17 +1296,19 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
     {
         setProperty(kIOPMDeepSleepDelayKey, n);
     }
-    if ((b = OSDynamicCast(OSBoolean, dict->getObject(kIOPMDestroyFVKeyOnStandbyKey))))
-    {
-        setProperty(kIOPMDestroyFVKeyOnStandbyKey, b);
-    }
-    if ((b = OSDynamicCast(OSBoolean, dict->getObject(kIOPMAutoPowerOffEnabledKey))))
+
+#if SUSPEND_PM_NOTIFICATIONS_DEBUG
+    if ((n = OSDynamicCast(OSNumber, dict->getObject(suspendPMClient_string))))
     {
-        setProperty(kIOPMAutoPowerOffEnabledKey, b);
+        // Toggle the suspended status for pid n.
+        uint32_t pid_int = n->unsigned32BitValue();        
+        suspendPMNotificationsForPID(pid_int, !pmNotificationIsSuspended(pid_int));
     }
-    if ((n = OSDynamicCast(OSNumber, dict->getObject(kIOPMAutoPowerOffDelayKey))))
+#endif
+    
+    if ((b = OSDynamicCast(OSBoolean, dict->getObject(kIOPMDestroyFVKeyOnStandbyKey))))
     {
-        setProperty(kIOPMAutoPowerOffDelayKey, n);
+        setProperty(kIOPMDestroyFVKeyOnStandbyKey, b);
     }
 
     // Relay our allowed PM settings onto our registered PM clients
@@ -1238,6 +1367,9 @@ exit:
     if(hibernatefile_string) hibernatefile_string->release();
     if(hibernatefreeratio_string) hibernatefreeratio_string->release();
     if(hibernatefreetime_string) hibernatefreetime_string->release();
+#endif
+#if SUSPEND_PM_NOTIFICATIONS_DEBUG
+    if(suspendPMClient_string) suspendPMClient_string->release();
 #endif
     return return_value;
 }
@@ -1748,8 +1880,12 @@ void IOPMrootDomain::startIdleSleepTimer( uint32_t inSeconds )
         clock_interval_to_deadline(inSeconds, kSecondScale, &deadline);        
         thread_call_enter_delayed(extraSleepTimer, deadline);
         idleSleepTimerPending = true;
-        DLOG("idle timer set for %u seconds\n", inSeconds);
     }
+    else
+    {
+        thread_call_enter(extraSleepTimer);
+    }
+    DLOG("idle timer set for %u seconds\n", inSeconds);
 }
 
 //******************************************************************************
@@ -1882,9 +2018,10 @@ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason )
         kIOPMOSSwitchHibernationKey,
         kIOPMIdleSleepKey,
         kIOPMLowPowerSleepKey,
-        kIOPMClamshellSleepKey,
         kIOPMThermalEmergencySleepKey,
-        kIOPMMaintenanceSleepKey
+        kIOPMMaintenanceSleepKey,
+        kIOPMSleepServiceExitKey,
+        kIOPMDarkWakeThermalEmergencyKey
     };
 
     PMEventDetails *details;
@@ -1899,6 +2036,9 @@ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason )
         return kIOReturnNotPermitted;
     }
 
+    if (kIOPMSleepReasonDarkWakeThermalEmergency == sleepReason)
+        messageClients(kIOPMMessageDarkWakeThermalEmergency);
+
     if (timeline)
         timeline->setSleepCycleInProgressFlag(true);
   
@@ -1906,7 +2046,6 @@ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason )
     if(pmPowerStateQueue) {
         pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)true);
     }
-
   
     // Log the beginning of system sleep.
        details = PMEventDetails::eventDetails(kIOPMEventTypeSleep, NULL,
@@ -2077,6 +2216,8 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
             logWranglerTickle  = true;
             sleepTimerMaintenance = false;
             wranglerTickleLatched = false;
+            darkWakeThermalAlarm  = false;
+            darkWakeThermalEmergency = false;
 
             OSString * wakeType = OSDynamicCast(
                 OSString, getProperty(kIOPMRootDomainWakeTypeKey));
@@ -2195,27 +2336,17 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
 
             changePowerStateToPriv(ON_STATE);
         }   break;
-    
-        case ON_STATE: {
-            bool wasPrevented = childPreventSystemSleep;
-
-            details = PMEventDetails::eventDetails(
-                            kIOPMEventTypeWakeDone,
-                            NULL, 
-                            0, 
-                            kIOReturnSuccess);
-                       
-            recordAndReleasePMEvent( details );
 
-            // Update childPreventSystemSleep flag using the capability computed
-            // by IOSevice::rebuildChildClampBits().
-
-            childPreventSystemSleep =
-                ((currentCapability() & kIOPMChildClamp2) != 0);
-
-            if (wasPrevented && !childPreventSystemSleep)
+        case ON_STATE: {
+            if (previousPowerState != ON_STATE)
             {
-                evaluatePolicy( kStimulusDarkWakeEvaluate );
+                details = PMEventDetails::eventDetails(
+                                kIOPMEventTypeWakeDone,
+                                NULL, 
+                                0, 
+                                kIOReturnSuccess);
+                
+                recordAndReleasePMEvent( details );
             }
         }   break;
     }
@@ -2225,9 +2356,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
 // requestPowerDomainState
 //
 // Extend implementation in IOService. Running on PM work loop thread.
-//
-// Examine children desires and initiate idle-sleep if all children are idle,
-// prevent idle and system sleep flags are not set.
 //******************************************************************************
 
 IOReturn IOPMrootDomain::requestPowerDomainState (
@@ -2235,110 +2363,102 @@ IOReturn IOPMrootDomain::requestPowerDomainState (
     IOPowerConnection * childConnection,
     unsigned long       specification )
 {
-    OSIterator          *iter;
-    OSObject            *next;
-    IOPowerConnection   *connection;
-    IOPMPowerFlags      mergedChildDesire = 0;
-    IOPMPowerFlags      editedChildDesire;
-    IOPMPowerFlags      thisDesire;
-    bool                sleepASAP = false;
+    // Idle and system sleep prevention flags affects driver desire.
+    // Children desire are irrelevant so they are cleared.
+
+    return super::requestPowerDomainState(0, childConnection, specification);
+}
+
+//******************************************************************************
+// updatePreventIdleSleepList
+//
+// Called by IOService on PM work loop.
+//******************************************************************************
+
+void IOPMrootDomain::updatePreventIdleSleepList(
+        IOService * service, bool addNotRemove )
+{
+    unsigned int oldCount, newCount;
 
     ASSERT_GATED();
 
-    // Disregard disk I/O (anything besides the display wrangler) as a
-    // factor in preventing idle sleep - based on a runtime setting.
+    // Disregard disk I/O (anything besides the display wrangler)
+    // as a factor preventing idle sleep,except in the case of legacy disk I/O
 
     if ((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOAlways) &&
-        (kIOPMPreventIdleSleep & childDesire) &&
-        (childConnection != wranglerConnection))
+        addNotRemove && (service != wrangler) && (service != this))
     {
-        childDesire &= ~kIOPMPreventIdleSleep;
+        return;
     }
 
-    // Force the child's input power requirement to 0 unless the prevent
-    // idle-sleep flag is set. Nil input power flags maps to our state 0.
-    // Our power clamp (deviceDesire) clamps the lowest power state at 2.
-
-    editedChildDesire = 0;
-    if (childDesire & kIOPMPreventIdleSleep)
-        editedChildDesire |= (kIOPMPowerOn | kIOPMPreventIdleSleep);
-    if (childDesire & kIOPMPreventSystemSleep)
-        editedChildDesire |= (kIOPMPowerOn | kIOPMPreventSystemSleep);
-
-    iter = getChildIterator(gIOPowerPlane);
-    if ( iter )
+    oldCount = preventIdleSleepList->getCount();
+    if (addNotRemove)
     {
-        while ( (next = iter->getNextObject()) )
-        {
-            if ( (connection = OSDynamicCast(IOPowerConnection, next)) )
-            {
-                // Ignore child that are in the process of joining.
-                               if (connection->getReadyFlag() == false)
-                                       continue;
+        preventIdleSleepList->setObject(service);
+        DLOG("prevent idle sleep list: %s+ (%u)\n",
+            service->getName(), preventIdleSleepList->getCount());
+    }
+    else if (preventIdleSleepList->member(service))
+    {
+        preventIdleSleepList->removeObject(service);
+        DLOG("prevent idle sleep list: %s- (%u)\n",
+            service->getName(), preventIdleSleepList->getCount());
+    }
+    newCount = preventIdleSleepList->getCount();
+    
+    if ((oldCount == 0) && (newCount != 0))
+    {
+        // Driver added to empty prevent list.
+        // Update the driver desire to prevent idle sleep.
+        // Driver desire does not prevent demand sleep.
+        
+        changePowerStateTo(ON_STATE);
+    }
+    else if ((oldCount != 0) && (newCount == 0))
+    {
+        // Last driver removed from prevent list.
+        // Drop the driver clamp to allow idle sleep.
 
-                // OR in the child's input power requirements.
-                // Is this connection attached to the child that called
-                // requestPowerDomainState()?
+        changePowerStateTo(SLEEP_STATE);
+        evaluatePolicy( kStimulusNoIdleSleepPreventers );
+    }
+}
 
-                if (connection == childConnection)
-                {
-                    thisDesire = editedChildDesire;
-                }
-                else
-                {
-                    thisDesire = 0;
-                    if (connection->getPreventIdleSleepFlag())
-                        thisDesire |= (kIOPMPowerOn | kIOPMPreventIdleSleep);
-                    if (connection->getPreventSystemSleepFlag())
-                        thisDesire |= (kIOPMPowerOn | kIOPMPreventSystemSleep);
-                }
+//******************************************************************************
+// preventSystemSleepListUpdate
+//
+// Called by IOService on PM work loop.
+//******************************************************************************
 
-                mergedChildDesire |= thisDesire;
-                if (thisDesire && (kIOLogPMRootDomain & gIOKitDebug))
-                {
-                    IOService * child =
-                        (IOService *) connection->getChildEntry(gIOPowerPlane);
-                    LOG("child %p, noIdle %d, noSleep %d - %s\n",
-                        child,
-                        ((thisDesire & kIOPMPreventIdleSleep) != 0),
-                        ((thisDesire & kIOPMPreventSystemSleep) != 0),
-                        child ? child->getName() : "?");
-                }
-            }
-        }
-        iter->release();
-    }
+void IOPMrootDomain::updatePreventSystemSleepList(
+        IOService * service, bool addNotRemove )
+{
+    unsigned int oldCount;
 
-    DLOG("mergedChildDesire 0x%lx, extraSleepDelay %ld\n",
-        mergedChildDesire, extraSleepDelay);
+    ASSERT_GATED();
+    if (this == service)
+        return;
 
-    if ( !mergedChildDesire && !systemBooting )
+    oldCount = preventSystemSleepList->getCount();
+    if (addNotRemove)
     {
-        if (!wrangler)
-        {
-            changePowerStateToPriv(ON_STATE);
-            if (idleSeconds)
-            {
-                // stay awake for at least idleSeconds
-                startIdleSleepTimer(idleSeconds);
-            }
-        }
-        else if (!extraSleepDelay && !idleSleepTimerPending && !systemDarkWake)
+        preventSystemSleepList->setObject(service);
+        DLOG("prevent system sleep list: %s+ (%u)\n",
+            service->getName(), preventSystemSleepList->getCount());
+    }
+    else if (preventSystemSleepList->member(service))
+    {
+        preventSystemSleepList->removeObject(service);
+        DLOG("prevent system sleep list: %s- (%u)\n",
+            service->getName(), preventSystemSleepList->getCount());
+
+        if ((oldCount != 0) && (preventSystemSleepList->getCount() == 0))
         {
-            sleepASAP = true;
+            // Lost all system sleep preventers.
+            // Send stimulus if system sleep was blocked, and is in dark wake.
+            evaluatePolicy( kStimulusDarkWakeEvaluate );
         }
     }
-
-    // Drop our power clamp to SLEEP_STATE when all children became idle,
-    // and system sleep and display sleep slider values are equal.
-
-    adjustPowerState(sleepASAP);
-
-    // If our power clamp has already dropped to SLEEP_STATE, and no child
-    // is keeping us at ON_STATE, then the following will trigger idle sleep.
-
-    return super::requestPowerDomainState(
-        editedChildDesire, childConnection, specification);
 }
 
 //******************************************************************************
@@ -2454,6 +2574,9 @@ void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum )
     DLOG("tellNoChangeDown %u->%u\n",
         (uint32_t) getPowerState(), (uint32_t) stateNum);
 
+       // Sleep canceled, clear the sleep trace point.
+    tracePoint(kIOPMTracePointSystemUp);
+
     if (idleSeconds && !wrangler)
     {
         // stay awake for at least idleSeconds
@@ -2473,7 +2596,6 @@ void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum )
 
 void IOPMrootDomain::tellChangeUp( unsigned long stateNum )
 {
-    OSData *publishPMStats = NULL;
 
     DLOG("tellChangeUp %u->%u\n",
         (uint32_t) getPowerState(), (uint32_t) stateNum);
@@ -2504,10 +2626,6 @@ void IOPMrootDomain::tellChangeUp( unsigned long stateNum )
         }
 
         tracePoint( kIOPMTracePointWakeApplications );
-        publishPMStats = OSData::withBytes(&pmStats, sizeof(pmStats));
-        setProperty(kIOPMSleepStatisticsKey, publishPMStats);
-        publishPMStats->release();
-        bzero(&pmStats, sizeof(pmStats));
 
         if (pmStatsAppResponses) 
         {
@@ -2730,7 +2848,12 @@ void IOPMrootDomain::handlePublishSleepWakeUUID( bool shouldPublish )
 
 IOReturn IOPMrootDomain::changePowerStateTo( unsigned long ordinal )
 {
-    return kIOReturnUnsupported;    // ignored
+    DLOG("changePowerStateTo(%lu)\n", ordinal);
+
+    if ((ordinal != ON_STATE) && (ordinal != SLEEP_STATE))
+        return kIOReturnUnsupported;
+
+    return super::changePowerStateTo(ordinal);
 }
 
 IOReturn IOPMrootDomain::changePowerStateToPriv( unsigned long ordinal )
@@ -3450,92 +3573,85 @@ void IOPMrootDomain::informCPUStateChange(
 // evaluateSystemSleepPolicy
 //******************************************************************************
 
-#define kIOPlatformSystemSleepPolicyKey     "IOPlatformSystemSleepPolicy"
-
-// Sleep flags
-enum {
-    kIOPMSleepFlagHibernate         = 0x00000001,
-    kIOPMSleepFlagSleepTimerEnable  = 0x00000002
-};
-
 struct IOPMSystemSleepPolicyEntry
 {
     uint32_t    factorMask;
     uint32_t    factorBits;
     uint32_t    sleepFlags;
     uint32_t    wakeEvents;
-} __attribute__((packed));
+};
 
 struct IOPMSystemSleepPolicyTable
 {
-    uint32_t    signature;
+    uint8_t     signature[4];
     uint16_t    version;
     uint16_t    entryCount;
     IOPMSystemSleepPolicyEntry  entries[];
-} __attribute__((packed));
+};
+
+enum {
+    kIOPMSleepFactorSleepTimerWake          = 0x00000001,
+    kIOPMSleepFactorLidOpen                 = 0x00000002,
+    kIOPMSleepFactorACPower                 = 0x00000004,
+    kIOPMSleepFactorLowBattery              = 0x00000008,
+    kIOPMSleepFactorDeepSleepNoDelay        = 0x00000010,
+    kIOPMSleepFactorDeepSleepDemand         = 0x00000020,
+    kIOPMSleepFactorDeepSleepDisable        = 0x00000040,
+    kIOPMSleepFactorUSBExternalDevice       = 0x00000080,
+    kIOPMSleepFactorBluetoothHIDDevice      = 0x00000100,
+    kIOPMSleepFactorExternalMediaMounted    = 0x00000200,
+    kIOPMSleepFactorDriverAssertBit5        = 0x00000400,   /* Reserved for ThunderBolt */
+    kIOPMSleepFactorDriverAssertBit6        = 0x00000800,
+    kIOPMSleepFactorDriverAssertBit7        = 0x00001000    /* Reserved for legacy I/O */
+};
+
+enum {
+    kSleepPhaseEarly, kSleepPhaseFinal
+};
 
-bool IOPMrootDomain::evaluateSystemSleepPolicy(
-    IOPMSystemSleepParameters * params, int sleepPhase )
+bool IOPMrootDomain::evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p, int sleepPhase )
 {
     const IOPMSystemSleepPolicyTable * pt;
     OSObject *  prop = 0;
     OSData *    policyData;
-    uint64_t    currentFactors = 0;
-    uint32_t    standbyDelay;
-    uint32_t    powerOffDelay;
-    uint32_t    mismatch;
-    bool        standbyEnabled;
-    bool        powerOffEnabled;
-    bool        found = false;
-
-    // Get platform's sleep policy table
-    if (!_sleepPolicyHandler)
-    {
-        prop = getServiceRoot()->copyProperty(kIOPlatformSystemSleepPolicyKey);
-        if (!prop) goto done;
-    }
-
-    // Fetch additional settings
-    standbyEnabled = (getSleepOption(kIOPMDeepSleepDelayKey, &standbyDelay)
-        && (getProperty(kIOPMDeepSleepEnabledKey) == kOSBooleanTrue));
-    powerOffEnabled = (getSleepOption(kIOPMAutoPowerOffDelayKey, &powerOffDelay)
-        && (getProperty(kIOPMAutoPowerOffEnabledKey) == kOSBooleanTrue));
-    DLOG("standby %d delay %u, powerOff %d delay %u, hibernate %u\n",
-        standbyEnabled, standbyDelay, powerOffEnabled, powerOffDelay,
-        hibernateMode);
-
-    // pmset level overrides
-    if ((hibernateMode & kIOHibernateModeOn) == 0)
+    uint32_t    currentFactors;
+    uint32_t    deepSleepDelay = 0;
+    bool        success = false;
+
+    if (getProperty(kIOPMDeepSleepEnabledKey) != kOSBooleanTrue)
+        return false;
+
+    getSleepOption(kIOPMDeepSleepDelayKey, &deepSleepDelay);
+
+    prop = getServiceRoot()->copyProperty(kIOPlatformSystemSleepPolicyKey);
+    if (!prop)
+        return false;
+
+    policyData = OSDynamicCast(OSData, prop);
+    if (!policyData ||
+        (policyData->getLength() < sizeof(IOPMSystemSleepPolicyTable)))
     {
-        standbyEnabled  = false;
-        powerOffEnabled = false;
+        goto done;
     }
-    else if (!(hibernateMode & kIOHibernateModeSleep))
+
+    pt = (const IOPMSystemSleepPolicyTable *) policyData->getBytesNoCopy();
+    if ((pt->signature[0] != 'S') ||
+        (pt->signature[1] != 'L') ||
+        (pt->signature[2] != 'P') ||
+        (pt->signature[3] != 'T') ||
+        (pt->version      != 1)   ||
+        (pt->entryCount   == 0))
     {
-        // Force hibernate (i.e. mode 25)
-        // If standby is enabled, force standy.
-        // If poweroff is enabled, force poweroff.
-        if (standbyEnabled)
-            currentFactors |= kIOPMSleepFactorStandbyForced;
-        else if (powerOffEnabled)
-            currentFactors |= kIOPMSleepFactorAutoPowerOffForced;
-        else
-            currentFactors |= kIOPMSleepFactorHibernateForced;
+        goto done;
     }
 
-    // Current factors based on environment and assertions
-    if (sleepTimerMaintenance)
-        currentFactors |= kIOPMSleepFactorSleepTimerWake;
-    if (!clamshellClosed)
-        currentFactors |= kIOPMSleepFactorLidOpen;
-    if (acAdaptorConnected)
-        currentFactors |= kIOPMSleepFactorACPower;
-    if (lowBatteryCondition)
-        currentFactors |= kIOPMSleepFactorBatteryLow;
-    if (!standbyDelay)
-        currentFactors |= kIOPMSleepFactorStandbyNoDelay;
-    if (!standbyEnabled)
-        currentFactors |= kIOPMSleepFactorStandbyDisabled;
+    if ((policyData->getLength() - sizeof(IOPMSystemSleepPolicyTable)) !=
+        (sizeof(IOPMSystemSleepPolicyEntry) * pt->entryCount))
+    {
+        goto done;
+    }
+
+    currentFactors = 0;
     if (getPMAssertionLevel(kIOPMDriverAssertionUSBExternalDeviceBit) !=
         kIOPMDriverAssertionLevelOff)
         currentFactors |= kIOPMSleepFactorUSBExternalDevice;
@@ -3545,101 +3661,88 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy(
     if (getPMAssertionLevel(kIOPMDriverAssertionExternalMediaMountedBit) !=
         kIOPMDriverAssertionLevelOff)
         currentFactors |= kIOPMSleepFactorExternalMediaMounted;
-    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) !=
+    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) !=    /* AssertionBit5 = Thunderbolt */
         kIOPMDriverAssertionLevelOff)
-        currentFactors |= kIOPMSleepFactorThunderboltDevice;
-    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit8) !=
+        currentFactors |= kIOPMSleepFactorDriverAssertBit5;
+    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit7) !=
         kIOPMDriverAssertionLevelOff)
-        currentFactors |= kIOPMSleepFactorMagicPacketWakeEnabled;
-    if (!powerOffEnabled)
-        currentFactors |= kIOPMSleepFactorAutoPowerOffDisabled;
-
-    DLOG("sleep factors 0x%llx\n", currentFactors);
-
-    // Clear the output params
-    bzero(params, sizeof(*params));
-
-    if (_sleepPolicyHandler)
-    {
-        if (!_sleepPolicyVars)
-        {
-            _sleepPolicyVars = IONew(IOPMSystemSleepPolicyVariables, 1);
-            if (!_sleepPolicyVars)
-                goto done;
-            bzero(_sleepPolicyVars, sizeof(*_sleepPolicyVars));
-        }
-        _sleepPolicyVars->signature = kIOPMSystemSleepPolicySignature;
-        _sleepPolicyVars->version   = kIOPMSystemSleepPolicyVersion;
-        if (kIOPMSleepPhase1 == sleepPhase)
-        {
-            _sleepPolicyVars->currentCapability = _currentCapability;
-            _sleepPolicyVars->highestCapability = _highestCapability;
-            _sleepPolicyVars->sleepReason   = lastSleepReason;
-            _sleepPolicyVars->hibernateMode = hibernateMode;
-            _sleepPolicyVars->standbyDelay  = standbyDelay;
-            _sleepPolicyVars->poweroffDelay = powerOffDelay;
-        }
-        _sleepPolicyVars->sleepFactors = currentFactors;
-        _sleepPolicyVars->sleepPhase   = sleepPhase;
-        
-        if ((_sleepPolicyHandler(_sleepPolicyTarget, _sleepPolicyVars, params) !=
-             kIOReturnSuccess) || (kIOPMSleepTypeInvalid == params->sleepType) ||
-             (params->sleepType >= kIOPMSleepTypeLast) ||
-             (kIOPMSystemSleepParametersVersion != params->version))
-        {
-            MSG("sleep policy handler error\n");
-            goto done;
-        }
-
-        DLOG("sleep params v%u, type %u, flags 0x%x, wake 0x%x, timer %u, poweroff %u\n",
-            params->version, params->sleepType, params->sleepFlags,
-            params->ecWakeEvents, params->ecWakeTimer, params->ecPoweroffTimer);
-        found = true;
-        goto done;
-    }
-
-    // Policy table is meaningless without standby enabled
-    if (!standbyEnabled)
-        goto done;
-
-    // Validate the sleep policy table
-    policyData = OSDynamicCast(OSData, prop);
-    if (!policyData || (policyData->getLength() <= sizeof(IOPMSystemSleepPolicyTable)))
-        goto done;
-
-    pt = (const IOPMSystemSleepPolicyTable *) policyData->getBytesNoCopy();
-    if ((pt->signature != kIOPMSystemSleepPolicySignature) ||
-        (pt->version != 1) || (0 == pt->entryCount))
-        goto done;
+        currentFactors |= kIOPMSleepFactorDriverAssertBit7;
+    if (0 == deepSleepDelay)
+        currentFactors |= kIOPMSleepFactorDeepSleepNoDelay;
+    if (!clamshellClosed)
+        currentFactors |= kIOPMSleepFactorLidOpen;
+    if (acAdaptorConnected)
+        currentFactors |= kIOPMSleepFactorACPower;
+    if (lowBatteryCondition)
+        currentFactors |= kIOPMSleepFactorLowBattery;
+    if (sleepTimerMaintenance)
+        currentFactors |= kIOPMSleepFactorSleepTimerWake;
 
-    if (((policyData->getLength() - sizeof(IOPMSystemSleepPolicyTable)) !=
-         (sizeof(IOPMSystemSleepPolicyEntry) * pt->entryCount)))
-        goto done;
+    // pmset overrides
+    if ((hibernateMode & kIOHibernateModeOn) == 0)
+        currentFactors |= kIOPMSleepFactorDeepSleepDisable;
+    else if ((hibernateMode & kIOHibernateModeSleep) == 0)
+        currentFactors |= kIOPMSleepFactorDeepSleepDemand;
+    
+    DLOG("Sleep policy %u entries, current factors 0x%x\n",
+        pt->entryCount, currentFactors);
 
     for (uint32_t i = 0; i < pt->entryCount; i++)
     {
-        const IOPMSystemSleepPolicyEntry * entry = &pt->entries[i];
-        mismatch = (((uint32_t)currentFactors ^ entry->factorBits) & entry->factorMask);
+        const IOPMSystemSleepPolicyEntry * policyEntry = &pt->entries[i];
 
-        DLOG("mask 0x%08x, bits 0x%08x, flags 0x%08x, wake 0x%08x, mismatch 0x%08x\n",
-            entry->factorMask, entry->factorBits,
-            entry->sleepFlags, entry->wakeEvents, mismatch);
-        if (mismatch)
-            continue;
+        DLOG("factor mask 0x%08x, bits 0x%08x, flags 0x%08x, wake 0x%08x\n",
+            policyEntry->factorMask, policyEntry->factorBits,
+            policyEntry->sleepFlags, policyEntry->wakeEvents);
 
-        DLOG("^ found match\n");
-        found = true;
+        if ((currentFactors ^ policyEntry->factorBits) & policyEntry->factorMask)
+            continue;   // mismatch, try next
 
-        params->version = kIOPMSystemSleepParametersVersion;
-        params->reserved1 = 1;
-        if (entry->sleepFlags & kIOPMSleepFlagHibernate)
-            params->sleepType = kIOPMSleepTypeStandby;
-        else
-            params->sleepType = kIOPMSleepTypeNormalSleep;
+        if (p)
+        {
+            p->version    = 1;
+            p->sleepFlags = policyEntry->sleepFlags;
+            p->sleepTimer = 0;
+            p->wakeEvents = policyEntry->wakeEvents;
+            if (p->sleepFlags & kIOPMSleepFlagSleepTimerEnable)
+            {
+                if (kSleepPhaseFinal == sleepPhase)
+                {
+                    clock_sec_t now_secs = gIOLastSleepTime.tv_sec;
+
+                    if (!_standbyTimerResetSeconds ||
+                        (now_secs <= _standbyTimerResetSeconds))
+                    {
+                        // Reset standby timer adjustment
+                        _standbyTimerResetSeconds = now_secs;
+                        DLOG("standby delay %u, reset %u\n",
+                            deepSleepDelay, (uint32_t) _standbyTimerResetSeconds);
+                    }
+                    else if (deepSleepDelay)
+                    {
+                        // Shorten the standby delay timer
+                        clock_sec_t elapsed = now_secs - _standbyTimerResetSeconds;
+                        if (deepSleepDelay > elapsed)
+                            deepSleepDelay -= elapsed;
+                        else
+                            deepSleepDelay = 1; // must be > 0
+
+                        DLOG("standby delay %u, elapsed %u\n",
+                            deepSleepDelay, (uint32_t) elapsed);
+                    }
+                }
+                p->sleepTimer = deepSleepDelay;
+            }
+            else if (kSleepPhaseFinal == sleepPhase)
+            {
+                // A sleep that does not enable the sleep timer will reset
+                // the standby delay adjustment.
+                _standbyTimerResetSeconds = 0;
+            }            
+        }
 
-        params->ecWakeEvents = entry->wakeEvents;
-        if (entry->sleepFlags & kIOPMSleepFlagSleepTimerEnable)
-            params->ecWakeTimer = standbyDelay;
+        DLOG("matched policy entry %u\n", i);
+        success = true;
         break;
     }
 
@@ -3647,53 +3750,32 @@ done:
     if (prop)
         prop->release();
 
-    return found;
+    return success;
 }
 
-static IOPMSystemSleepParameters gEarlySystemSleepParams;
-
 void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void )
 {
-    // Evaluate early (priority interest phase), before drivers sleep.
+    IOPMSystemSleepParameters   params;
+
+    // Evaluate sleep policy before driver sleep phase.
 
     DLOG("%s\n", __FUNCTION__);
     removeProperty(kIOPMSystemSleepParametersKey);
 
+    // Full wake resets the standby timer delay adjustment
+    if (_highestCapability & kIOPMSystemCapabilityGraphics)
+        _standbyTimerResetSeconds = 0;
+
     hibernateDisabled = false;
     hibernateMode = 0;
     getSleepOption(kIOHibernateModeKey, &hibernateMode);
 
-    // Save for late evaluation if sleep is aborted
-    bzero(&gEarlySystemSleepParams, sizeof(gEarlySystemSleepParams));
-
-    if (evaluateSystemSleepPolicy(&gEarlySystemSleepParams, kIOPMSleepPhase1))
+    if (!hibernateNoDefeat &&
+        evaluateSystemSleepPolicy(&params, kSleepPhaseEarly) &&
+        ((params.sleepFlags & kIOPMSleepFlagHibernate) == 0))
     {
-        if (!hibernateNoDefeat &&
-            (gEarlySystemSleepParams.sleepType == kIOPMSleepTypeNormalSleep))
-        {
-            // Disable hibernate setup for normal sleep
-            hibernateDisabled = true;
-        }
+        hibernateDisabled = true;
     }
-
-    // Publish IOPMSystemSleepType
-    uint32_t sleepType = gEarlySystemSleepParams.sleepType;
-    if (sleepType == kIOPMSleepTypeInvalid)
-    {
-        // no sleep policy
-        sleepType = kIOPMSleepTypeNormalSleep;
-        if (hibernateMode & kIOHibernateModeOn)
-            sleepType = (hibernateMode & kIOHibernateModeSleep) ?
-                        kIOPMSleepTypeSafeSleep : kIOPMSleepTypeHibernate;
-    }
-    else if ((sleepType == kIOPMSleepTypeStandby) &&
-             (gEarlySystemSleepParams.ecPoweroffTimer))
-    {
-        // report the lowest possible sleep state
-        sleepType = kIOPMSleepTypePowerOff;
-    }
-
-    setProperty(kIOPMSystemSleepTypeKey, sleepType, 32);
 }
 
 void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
@@ -3701,30 +3783,27 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
     IOPMSystemSleepParameters   params;
     OSData *                    paramsData;
 
-    // Evaluate sleep policy after sleeping drivers but before platform sleep.
+    // Evaluate sleep policy after drivers but before platform sleep.
 
     DLOG("%s\n", __FUNCTION__);
 
-    if (evaluateSystemSleepPolicy(&params, kIOPMSleepPhase2))
+    if (evaluateSystemSleepPolicy(&params, kSleepPhaseFinal))
     {
         if ((hibernateDisabled || hibernateAborted) &&
-            (params.sleepType != kIOPMSleepTypeNormalSleep))
+            (params.sleepFlags & kIOPMSleepFlagHibernate))
         {
-            // Final evaluation picked a state requiring hibernation,
-            // but hibernate setup was skipped. Retry using the early
-            // sleep parameters.
+            // Should hibernate but unable to or aborted.
+            // Arm timer for a short sleep and retry or wake fully.
 
-            bcopy(&gEarlySystemSleepParams, &params, sizeof(params));
-            params.sleepType = kIOPMSleepTypeAbortedSleep;
-            params.ecWakeTimer = 1;
+            params.sleepFlags &= ~kIOPMSleepFlagHibernate;
+            params.sleepFlags |= kIOPMSleepFlagSleepTimerEnable;
+            params.sleepTimer = 1;
             hibernateNoDefeat = true;
             DLOG("wake in %u secs for hibernateDisabled %d, hibernateAborted %d\n",
-                params.ecWakeTimer, hibernateDisabled, hibernateAborted);
+                        params.sleepTimer, hibernateDisabled, hibernateAborted);
         }
         else
-        {
             hibernateNoDefeat = false;
-        }
 
         paramsData = OSData::withBytes(&params, sizeof(params));
         if (paramsData)
@@ -3733,28 +3812,25 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
             paramsData->release();
         }
 
-        if (params.sleepType >= kIOPMSleepTypeHibernate)
+        if (params.sleepFlags & kIOPMSleepFlagHibernate)
         {
-            // Disable safe sleep to force the hibernate path
+            // Force hibernate
             gIOHibernateMode &= ~kIOHibernateModeSleep;
         }
     }
 }
 
 bool IOPMrootDomain::getHibernateSettings(
-    uint32_t *  hibernateModePtr,
+    uint32_t *  hibernateMode,
     uint32_t *  hibernateFreeRatio,
     uint32_t *  hibernateFreeTime )
 {
-    // Called by IOHibernateSystemSleep() after evaluateSystemSleepPolicyEarly()
-    // has updated the hibernateDisabled flag.
-
-    bool ok = getSleepOption(kIOHibernateModeKey, hibernateModePtr);
+    bool ok = getSleepOption(kIOHibernateModeKey, hibernateMode);
     getSleepOption(kIOHibernateFreeRatioKey, hibernateFreeRatio);
     getSleepOption(kIOHibernateFreeTimeKey, hibernateFreeTime);
     if (hibernateDisabled)
-        *hibernateModePtr = 0;
-    DLOG("hibernateMode 0x%x\n", *hibernateModePtr);
+        *hibernateMode = 0;
+    DLOG("hibernateMode 0x%x\n", *hibernateMode);
     return ok;
 }
 
@@ -3975,7 +4051,6 @@ void IOPMrootDomain::tagPowerPlaneService(
     if (isDisplayWrangler)
     {
         wrangler = service;
-        wranglerConnection = (IOService *) service->getParentEntry(gIOPowerPlane);
     }
 #else
     isDisplayWrangler = false;
@@ -4374,6 +4449,7 @@ void IOPMrootDomain::handleOurPowerChangeDone(
                 darkWakeToSleepASAP = false;
                 pciCantSleepValid   = false;
                 rejectWranglerTickle = false;
+                darkWakeSleepService = false;
             }
 
             // Entered dark mode.
@@ -4453,6 +4529,20 @@ void IOPMrootDomain::handleOurPowerChangeDone(
         {
             setProperty(kIOPMSystemCapabilitiesKey, _currentCapability, 64);
             tracePoint( kIOPMTracePointSystemUp, 0 );
+
+            // kIOPMDWOverTemp notification handling was postponed
+            if (darkWakeThermalAlarm)
+            {
+                if (!wranglerTickled && !darkWakeThermalEmergency &&
+                    CAP_CURRENT(kIOPMSystemCapabilityCPU) &&
+                    !CAP_CURRENT(kIOPMSystemCapabilityGraphics))
+                {
+                    darkWakeThermalEmergency = true;
+                    privateSleepSystem(kIOPMSleepReasonDarkWakeThermalEmergency);
+                    MSG("DarkWake thermal limits breached. Going to sleep!\n");
+                }
+                darkWakeThermalAlarm = false;
+            }
         }
 
         _systemTransitionType = kSystemTransitionNone;
@@ -4485,12 +4575,14 @@ void IOPMrootDomain::overridePowerChangeForUIService(
         // Activate power limiter.
 
         if ((actions->parameter & kPMActionsFlagIsDisplayWrangler) &&
-            ((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0))
+            ((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0) &&
+            (changeFlags & kIOPMSynchronize))
         {
             actions->parameter |= kPMActionsFlagLimitPower;
         }
         else if ((actions->parameter & kPMActionsFlagIsAudioDevice) &&
-                 ((_pendingCapability & kIOPMSystemCapabilityAudio) == 0))
+                 ((_pendingCapability & kIOPMSystemCapabilityAudio) == 0) &&
+                 (changeFlags & kIOPMSynchronize))
         {
             actions->parameter |= kPMActionsFlagLimitPower;
         }
@@ -4532,13 +4624,6 @@ void IOPMrootDomain::overridePowerChangeForUIService(
         }
     }
 
-    if (gRAMDiskImageBoot &&
-        (actions->parameter & kPMActionsFlagIsDisplayWrangler))
-    {
-        // Tag devices subject to power suppression.
-        *inOutChangeFlags |= kIOPMPowerSuppressed;
-    }
-
     if (actions->parameter & kPMActionsFlagLimitPower)
     {
         uint32_t maxPowerState = (uint32_t)(-1);
@@ -4549,7 +4634,7 @@ void IOPMrootDomain::overridePowerChangeForUIService(
 
             maxPowerState = 0;
             if ((actions->parameter & kPMActionsFlagIsDisplayWrangler) &&
-                (!gRAMDiskImageBoot || (service->getPowerState() > 0)))
+                (service->getPowerState() > 0))
             {
                 // Forces a 3->1 transition sequence
                 if (changeFlags & kIOPMDomainWillChange)
@@ -4625,6 +4710,7 @@ void IOPMrootDomain::handleActivityTickleForDisplayWrangler(
     if (!wranglerTickled &&
         ((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0))
     {
+        setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeHIDActivity);
         DLOG("display wrangler tickled\n");
         if (kIOLogPMRootDomain & gIOKitDebug)
             OSReportWithBacktrace("Dark wake display tickle");
@@ -5107,7 +5193,12 @@ bool IOPMrootDomain::checkSystemCanSleep( IOOptionBits options )
             break;          // always sleep on low battery
         }
 
-        if (childPreventSystemSleep)
+        if(darkWakeThermalEmergency)
+        {
+            break;          // always sleep on dark wake thermal emergencies
+        }
+
+        if (preventSystemSleepList->getCount() != 0)
         {
             err = 4;        // 4. child prevent system sleep clamp
             break;
@@ -5329,6 +5420,9 @@ void IOPMrootDomain::dispatchPowerEvent(
         case kPowerEventPublishSleepWakeUUID:
             handlePublishSleepWakeUUID((bool)arg0);
             break;
+        case kPowerEventSuspendClient:
+            handleSuspendPMNotificationClient((uintptr_t)arg0, (bool)arg1);
+            break;
     }
 }
 
@@ -5447,6 +5541,27 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg )
         privateSleepSystem (kIOPMSleepReasonThermalEmergency);
     }
 
+    if (msg & kIOPMDWOverTemp)
+    {
+        if (!CAP_CURRENT(kIOPMSystemCapabilityCPU) ||
+            (_systemTransitionType == kSystemTransitionSleep) ||
+            (_systemTransitionType == kSystemTransitionWake)  ||
+            (_systemTransitionType == kSystemTransitionCapability))
+        {
+            // During early wake or when system capability is changing,
+            // set flag and take action at end of transition.
+            darkWakeThermalAlarm = true;
+        }
+        else if (!wranglerTickled && !darkWakeThermalEmergency &&
+                 !CAP_CURRENT(kIOPMSystemCapabilityGraphics))
+        {
+            // System in steady state and in dark wake
+            darkWakeThermalEmergency = true;
+            privateSleepSystem(kIOPMSleepReasonDarkWakeThermalEmergency);
+            MSG("DarkWake thermal limits breached. Going to sleep!\n");
+        }
+    }
+
     /*
      * Sleep Now!
      */
@@ -5477,6 +5592,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg )
         // Don't issue a hid tickle when lid is open and polled on wake
         if (msg & kIOPMSetValue)
         {
+            setProperty(kIOPMRootDomainWakeTypeKey, "Lid Open");
             reportUserInput();
         }
 
@@ -5562,6 +5678,11 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg )
         // This mirrors the hardware's USB wake event latch, where a latched
         // USB wake event followed by an AC attach will trigger a full wake.
         latchDisplayWranglerTickle( false );
+
+#if HIBERNATION
+        // AC presence will reset the standy timer delay adjustment.
+        _standbyTimerResetSeconds = 0;
+#endif
     }
     
     /*
@@ -5644,6 +5765,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
             int displaySleep        : 1;
             int sleepDelayChanged   : 1;
             int evaluateDarkWake    : 1;
+            int adjustPowerState    : 1;
         } bit;
         uint32_t u32;
     } flags;
@@ -5694,6 +5816,8 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
 
             if ( minutesToIdleSleep > minutesToDisplayDim )
                 minutesDelta = minutesToIdleSleep - minutesToDisplayDim;
+            else if( minutesToIdleSleep == minutesToDisplayDim )
+                minutesDelta = 1;
 
             if ((sleepSlider == 0) && (minutesToIdleSleep != 0))
                 flags.bit.idleSleepEnabled = true;
@@ -5721,8 +5845,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
             break;
 
         case kStimulusAllowSystemSleepChanged:
-            // FIXME: de-compose to change flags.
-            adjustPowerState();
+            flags.bit.adjustPowerState = true;
             break;
 
         case kStimulusDarkWakeActivityTickle:
@@ -5848,6 +5971,10 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
 #endif
             break;
 
+        case kStimulusNoIdleSleepPreventers:
+            flags.bit.adjustPowerState = true;
+            break;
+
     } /* switch(stimulus) */
 
     if (flags.bit.evaluateDarkWake && !wranglerTickled)
@@ -5882,7 +6009,8 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
                 // Parked in dark wake, a tickle will return to full wake
                 rejectWranglerTickle = false;
             }
-        } else // non-maintenance (network) dark wake
+        }
+        else // non-maintenance (network) dark wake
         {
             if (checkSystemCanSleep(true))
             {
@@ -5965,7 +6093,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
                 AbsoluteTime    now;
                 uint64_t        nanos;
                 uint32_t        minutesSinceDisplaySleep = 0;
-                uint32_t        sleepDelay;
+                uint32_t        sleepDelay = 0;
 
                 clock_get_uptime(&now);
                 if (CMP_ABSOLUTETIME(&now, &wranglerSleepTime) > 0)
@@ -5979,10 +6107,6 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
                 {
                     sleepDelay = extraSleepDelay - minutesSinceDisplaySleep;
                 }
-                else
-                {
-                    sleepDelay = 1; // 1 min
-                }
 
                 startIdleSleepTimer(sleepDelay * 60);
                 DLOG("display slept %u min, set idle timer to %u min\n",
@@ -5998,6 +6122,35 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
         restoreUserSpinDownTimeout();
         adjustPowerState();
     }
+
+    if (flags.bit.adjustPowerState)
+    {
+        bool sleepASAP = false;
+
+        if (!systemBooting && (preventIdleSleepList->getCount() == 0))
+        {
+            if (!wrangler)
+            {
+                changePowerStateToPriv(ON_STATE);
+                if (idleSeconds)
+                {
+                    // stay awake for at least idleSeconds
+                    startIdleSleepTimer(idleSeconds);
+                }
+            }
+            else if (!extraSleepDelay && !idleSleepTimerPending && !systemDarkWake)
+            {
+                sleepASAP = true;
+            }
+        }
+        if(sleepASAP)
+        {
+            lastSleepReason = kIOPMSleepReasonIdle;
+            setProperty(kRootDomainSleepReasonKey, kIOPMIdleSleepKey);
+        }
+
+        adjustPowerState(sleepASAP);
+    }
 }
 
 //******************************************************************************
@@ -6019,8 +6172,22 @@ void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, I
             wrangler->setIgnoreIdleTimer( value );
         }
     }
+
     if (changedBits & kIOPMDriverAssertionCPUBit)
         evaluatePolicy(kStimulusDarkWakeEvaluate);
+
+    if (changedBits & kIOPMDriverAssertionReservedBit7) {
+        bool value = (newAssertions & kIOPMDriverAssertionReservedBit7) ? true : false;
+        if (value) {
+            DLOG("Driver assertion ReservedBit7 raised. Legacy IO preventing sleep\n");
+            updatePreventIdleSleepList(this, true);
+        }
+        else {
+            DLOG("Driver assertion ReservedBit7 dropped\n");
+            updatePreventIdleSleepList(this, false);
+        }
+
+    }
 }
 
 // MARK: -
@@ -6039,6 +6206,7 @@ void IOPMrootDomain::pmStatsRecordEvent(
     bool        stopping = eventIndex & kIOPMStatsEventStopFlag ? true:false;
     uint64_t    delta;
     uint64_t    nsec;
+    OSData *publishPMStats = NULL;
 
     eventIndex &= ~(kIOPMStatsEventStartFlag | kIOPMStatsEventStopFlag);
 
@@ -6047,24 +6215,29 @@ void IOPMrootDomain::pmStatsRecordEvent(
     switch (eventIndex) {
         case kIOPMStatsHibernateImageWrite:
             if (starting)
-                pmStats.hibWrite.start = nsec;
+                gPMStats.hibWrite.start = nsec;
             else if (stopping)
-                pmStats.hibWrite.stop = nsec;
+                gPMStats.hibWrite.stop = nsec;
 
             if (stopping) {
-                delta = pmStats.hibWrite.stop - pmStats.hibWrite.start;
+                delta = gPMStats.hibWrite.stop - gPMStats.hibWrite.start;
                 IOLog("PMStats: Hibernate write took %qd ms\n", delta/1000000ULL);
             }
             break;
         case kIOPMStatsHibernateImageRead:
             if (starting)
-                pmStats.hibRead.start = nsec;
+                gPMStats.hibRead.start = nsec;
             else if (stopping)
-                pmStats.hibRead.stop = nsec;
+                gPMStats.hibRead.stop = nsec;
 
             if (stopping) {
-                delta = pmStats.hibRead.stop - pmStats.hibRead.start;
+                delta = gPMStats.hibRead.stop - gPMStats.hibRead.start;
                 IOLog("PMStats: Hibernate read took %qd ms\n", delta/1000000ULL);
+
+                publishPMStats = OSData::withBytes(&gPMStats, sizeof(gPMStats));
+                setProperty(kIOPMSleepStatisticsKey, publishPMStats);
+                publishPMStats->release();
+                bzero(&gPMStats, sizeof(gPMStats));
             }
             break;
     }
@@ -6194,18 +6367,6 @@ IOReturn IOPMrootDomain::callPlatformFunction(
 
         return kIOReturnSuccess;
     }
-    else if (functionName &&
-             functionName->isEqualTo(kIOPMInstallSystemSleepPolicyHandlerKey))
-    {
-        if (_sleepPolicyHandler)
-            return kIOReturnExclusiveAccess;
-        if (!param1)
-            return kIOReturnBadArgument;
-        _sleepPolicyHandler = (IOPMSystemSleepPolicyHandler) param1;
-        _sleepPolicyTarget  = (void *) param2;
-        setProperty("IOPMSystemSleepPolicyHandler", kOSBooleanTrue);
-        return kIOReturnSuccess;
-    }
 
     return super::callPlatformFunction(
         functionName, waitForFunction, param1, param2, param3, param4);
@@ -6213,8 +6374,13 @@ IOReturn IOPMrootDomain::callPlatformFunction(
 
 void IOPMrootDomain::tracePoint( uint8_t point )
 {
-    if (!systemBooting)
-        pmTracer->tracePoint(point);
+    if (systemBooting) return;
+
+    pmTracer->tracePoint(point);
+
+#if    HIBERNATION
+    if (kIOPMTracePointSleepPowerPlaneDrivers == point) IOHibernateIOKitSleep();
+#endif
 }
 
 void IOPMrootDomain::tracePoint( uint8_t point, uint8_t data )
@@ -6932,7 +7098,7 @@ IOPMDriverAssertionID IOPMrootDomain::createPMAssertion(
  
     if (!pmAssertions)
         return 0;
+
     ret = pmAssertions->createAssertion(whichAssertionBits, assertionLevel, ownerService, ownerDescription, &newAssertion);
 
     if (kIOReturnSuccess == ret)
index 7800babda52cb81bf376554159bfaa2d2ba18b6e..03779d4b104d050e8f315f8da7cf026bd0b6e8c4 100644 (file)
@@ -747,10 +747,16 @@ static void IOShutdownNotificationsTimedOut(
     thread_call_param_t p0, 
     thread_call_param_t p1)
 {
+#ifdef CONFIG_EMBEDDED
+    /* 30 seconds has elapsed - panic */
+    panic("Halt/Restart Timed Out");
+
+#else /* ! CONFIG_EMBEDDED */
     int type = (int)(long)p0;
 
     /* 30 seconds has elapsed - resume shutdown */
     if(gIOPlatform) gIOPlatform->haltRestart(type);
+#endif /* CONFIG_EMBEDDED */
 }
 
 
@@ -960,6 +966,41 @@ void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller)
     OSString *        string = 0;
     uuid_string_t     uuid;
 
+#if CONFIG_EMBEDDED
+    entry = IORegistryEntry::fromPath( "/chosen", gIODTPlane );
+    if ( entry )
+    {
+        OSData * data1;
+
+        data1 = OSDynamicCast( OSData, entry->getProperty( "unique-chip-id" ) );
+        if ( data1 && data1->getLength( ) == 8 )
+        {
+            OSData * data2;
+
+            data2 = OSDynamicCast( OSData, entry->getProperty( "chip-id" ) );
+            if ( data2 && data2->getLength( ) == 4 )
+            {
+                SHA1_CTX     context;
+                uint8_t      digest[ SHA_DIGEST_LENGTH ];
+                const uuid_t space = { 0xA6, 0xDD, 0x4C, 0xCB, 0xB5, 0xE8, 0x4A, 0xF5, 0xAC, 0xDD, 0xB6, 0xDC, 0x6A, 0x05, 0x42, 0xB8 };
+
+                SHA1Init( &context );
+                SHA1Update( &context, space, sizeof( space ) );
+                SHA1Update( &context, data1->getBytesNoCopy( ), data1->getLength( ) );
+                SHA1Update( &context, data2->getBytesNoCopy( ), data2->getLength( ) );
+                SHA1Final( digest, &context );
+
+                digest[ 6 ] = ( digest[ 6 ] & 0x0F ) | 0x50;
+                digest[ 8 ] = ( digest[ 8 ] & 0x3F ) | 0x80;
+
+                uuid_unparse( digest, uuid );
+                string = OSString::withCString( uuid );
+            }
+        }
+
+        entry->release( );
+    }
+#else /* !CONFIG_EMBEDDED */
     entry = IORegistryEntry::fromPath( "/efi/platform", gIODTPlane );
     if ( entry )
     {
@@ -984,6 +1025,7 @@ void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller)
 
         entry->release( );
     }
+#endif /* !CONFIG_EMBEDDED */
 
     if ( string == 0 )
     {
index a299d3fa12c86ff56aa66cc0b9b0bf3f90d78aa5..7d0b011670fcadbef51c887f2b5a498d52d756f4 100644 (file)
@@ -60,6 +60,8 @@ OSDefineMetaClassAndStructors(IORegistryEntry, OSObject)
 #define kIORegPlaneNameSuffixLen       (sizeof(kIORegPlaneNameSuffix) - 1)
 #define kIORegPlaneLocationSuffixLen   (sizeof(kIORegPlaneLocationSuffix) - 1)
 
+#define KASLR_IOREG_DEBUG 0
+
 static IORegistryEntry * gRegistryRoot;
 static OSDictionary *   gIORegistryPlanes;
 
@@ -526,6 +528,15 @@ IORegistryEntry::removeProperty( const OSSymbol * aKey)
     PUNLOCK;
 }
 
+#if KASLR_IOREG_DEBUG
+extern "C" {
+    
+bool ScanForAddrInObject(OSObject * theObject, 
+                         int indent);
+    
+}; /* extern "C" */
+#endif
+
 bool
 IORegistryEntry::setProperty( const OSSymbol * aKey, OSObject * anObject)
 {
@@ -543,7 +554,18 @@ IORegistryEntry::setProperty( const OSSymbol * aKey, OSObject * anObject)
 
     ret = getPropertyTable()->setObject( aKey, anObject );
     PUNLOCK;
-    
+
+#if KASLR_IOREG_DEBUG
+    if ( anObject && strcmp(kIOKitDiagnosticsKey, aKey->getCStringNoCopy()) != 0 ) {
+        if (ScanForAddrInObject(anObject, 0)) {
+            IOLog("%s: IORegistryEntry name %s with key \"%s\" \n",
+                  __FUNCTION__,
+                  getName(0),
+                  aKey->getCStringNoCopy() );        
+        }
+    }
+#endif
+
     return ret;
 }
 
index f083482725d3bd70d3025906102f4eddea5d3765..0c5dbc36cd5248fd0187a285f62917265be577cd 100644 (file)
@@ -35,6 +35,7 @@
 #include <libkern/c++/OSUnserialize.h>
 #include <IOKit/IOCatalogue.h>
 #include <IOKit/IOCommand.h>
+#include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/IODeviceMemory.h>
 #include <IOKit/IOInterrupts.h>
 #include <IOKit/IOInterruptController.h>
@@ -46,6 +47,7 @@
 #include <IOKit/IOUserClient.h>
 #include <IOKit/IOWorkLoop.h>
 #include <IOKit/IOTimeStamp.h>
+#include <IOKit/IOHibernatePrivate.h>
 #include <mach/sync_policy.h>
 #include <IOKit/assert.h>
 #include <sys/errno.h>
@@ -54,6 +56,7 @@
 
 #define LOG kprintf
 //#define LOG IOLog
+#define MATCH_DEBUG    0
 
 #include "IOServicePrivate.h"
 #include "IOKitKernelInternal.h"
@@ -119,7 +122,10 @@ const OSSymbol *           gIOConsoleSessionLoginDoneKey;
 const OSSymbol *               gIOConsoleSessionSecureInputPIDKey;
 const OSSymbol *               gIOConsoleSessionScreenLockedTimeKey;
 
-static clock_sec_t             gIOConsoleLockTime;
+clock_sec_t                    gIOConsoleLockTime;
+static bool                    gIOConsoleLoggedIn;
+static uint32_t                        gIOScreenLockState;
+static IORegistryEntry *        gIOChosenEntry;
 
 static int                     gIOResourceGenerationCount;
 
@@ -225,7 +231,6 @@ static IOLock *     gArbitrationLockQueueLock;
 bool IOService::isInactive( void ) const
     { return( 0 != (kIOServiceInactiveState & getState())); }
 
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -490,6 +495,10 @@ void IOService::detach( IOService * provider )
             _adjustBusy( -busy );
     }
 
+    if (kIOServiceInactiveState & __state[0]) {
+       getMetaClass()->removeInstance(this);
+    }
+
     unlockForArbitration();
 
     if( newProvider) {
@@ -628,7 +637,9 @@ void IOService::startMatching( IOOptionBits options )
             lockForArbitration();
             IOLockLock( gIOServiceBusyLock );
 
-            waitAgain = (prevBusy < (__state[1] & kIOServiceBusyStateMask));
+            waitAgain = ((prevBusy < (__state[1] & kIOServiceBusyStateMask))
+                                      && (0 == (__state[0] & kIOServiceInactiveState)));
+
             if( waitAgain)
                 __state[1] |= kIOServiceSyncPubState | kIOServiceBusyWaiterState;
             else
@@ -661,37 +672,37 @@ IOReturn IOService::catalogNewDrivers( OSOrderedSet * newTables )
     
     while( (table = (OSDictionary *) newTables->getFirstObject())) {
 
-       LOCKWRITENOTIFY();
+        LOCKWRITENOTIFY();
         set = (OSSet *) copyExistingServices( table, 
                                                kIOServiceRegisteredState,
                                                kIOServiceExistingSet);
-       UNLOCKNOTIFY();
-       if( set) {
+        UNLOCKNOTIFY();
+        if( set) {
 
 #if IOMATCHDEBUG
-           count += set->getCount();
+            count += set->getCount();
 #endif
-           if (allSet) {
-               allSet->merge((const OSSet *) set);
-               set->release();
-           }
-           else
-               allSet = set;
-       }
+            if (allSet) {
+                allSet->merge((const OSSet *) set);
+                set->release();
+            }
+            else
+                allSet = set;
+        }
 
 #if IOMATCHDEBUG
-       if( getDebugFlags( table ) & kIOLogMatch)
-           LOG("Matching service count = %ld\n", (long)count);
+        if( getDebugFlags( table ) & kIOLogMatch)
+            LOG("Matching service count = %ld\n", (long)count);
 #endif
-       newTables->removeObject(table);
+        newTables->removeObject(table);
     }
 
     if (allSet) {
-       while( (service = (IOService *) allSet->getAnyObject())) {
-           service->startMatching(kIOServiceAsynchronous);
-           allSet->removeObject(service);
-       }
-       allSet->release();
+        while( (service = (IOService *) allSet->getAnyObject())) {
+            service->startMatching(kIOServiceAsynchronous);
+            allSet->removeObject(service);
+        }
+        allSet->release();
     }
 
     newTables->release();
@@ -2475,13 +2486,13 @@ static SInt32 IOServiceObjectOrder( const OSObject * entry, void * ref)
     OSSymbol *         key = (OSSymbol *) ref;
     OSNumber *         offset;
 
-    if( (notify = OSDynamicCast( _IOServiceNotifier, entry)))
+    if( (dict = OSDynamicCast( OSDictionary, entry)))
+        offset = OSDynamicCast(OSNumber, dict->getObject( key ));
+    else if( (notify = OSDynamicCast( _IOServiceNotifier, entry)))
        return( notify->priority );
 
     else if( (service = OSDynamicCast( IOService, entry)))
         offset = OSDynamicCast(OSNumber, service->getProperty( key ));
-    else if( (dict = OSDynamicCast( OSDictionary, entry)))
-        offset = OSDynamicCast(OSNumber, dict->getObject( key ));
     else {
        assert( false );
        offset = 0;
@@ -2602,10 +2613,6 @@ void IOService::probeCandidates( OSOrderedSet * matches )
     OSObject           *       nextMatch = 0;
     bool                       started;
     bool                       needReloc = false;
-#if CONFIG_MACF_KEXT
-    OSBoolean          *       isSandbox = 0;
-    bool                       useSandbox = false;
-#endif
 #if IOMATCHDEBUG
     SInt64                     debugFlags;
 #endif
@@ -2667,7 +2674,7 @@ void IOService::probeCandidates( OSOrderedSet * matches )
            props->setCapacityIncrement(1);             
 
            // check the nub matches
-           if( false == passiveMatch( props, true ))
+           if( false == matchPassive(props, kIOServiceChangesOK | kIOServiceClassDone))
                continue;
 
             // Check to see if driver reloc has been loaded.
@@ -2748,10 +2755,6 @@ void IOService::probeCandidates( OSOrderedSet * matches )
                 if( 0 == category)
                     category = gIODefaultMatchCategoryKey;
                 inst->setProperty( gIOMatchCategoryKey, (OSObject *) category );
-#if CONFIG_MACF_KEXT
-               isSandbox = OSDynamicCast(OSBoolean,
-                            props->getObject("IOKitForceMatch"));
-#endif
                 // attach driver instance
                 if( !(inst->attach( this )))
                         continue;
@@ -2768,21 +2771,6 @@ void IOService::probeCandidates( OSOrderedSet * matches )
     
                 newInst = inst->probe( this, &score );
                 inst->detach( this );
-#if CONFIG_MACF_KEXT
-               /*
-                * If this is the Sandbox driver and it matched, this is a
-                * disallowed device; toss any drivers that were already
-                * matched.
-                */
-               if (isSandbox && isSandbox->isTrue() && newInst != 0) {
-                   if (startDict != 0) {
-                       startDict->flushCollection();
-                       startDict->release();
-                       startDict = 0;
-                   }
-                   useSandbox = true;
-               }
-#endif
                 if( 0 == newInst) {
 #if IOMATCHDEBUG
                     if( debugFlags & kIOLogProbe)
@@ -2821,13 +2809,6 @@ void IOService::probeCandidates( OSOrderedSet * matches )
             props->release();
             if( inst)
                 inst->release();
-#if CONFIG_MACF_KEXT
-           /*
-            * If we're forcing the sandbox, drop out of the loop.
-            */
-           if (isSandbox && isSandbox->isTrue() && useSandbox)
-                   break;
-#endif
         }
         familyMatches->release();
         familyMatches = 0;
@@ -3113,6 +3094,7 @@ void IOService::doServiceMatch( IOOptionBits options )
     SInt32             catalogGeneration;
     bool               keepGuessing = true;
     bool               reRegistered = true;
+    bool               didRegister;
 
 //    job->nub->deliverNotification( gIOPublishNotification,
 //                             kIOServiceRegisteredState, 0xffffffff );
@@ -3130,6 +3112,7 @@ void IOService::doServiceMatch( IOOptionBits options )
            LOCKREADNOTIFY();
             __state[1] &= ~kIOServiceNeedConfigState;
             __state[1] |= kIOServiceConfigState;
+            didRegister = (0 == (kIOServiceRegisteredState & __state[0]));
             __state[0] |= kIOServiceRegisteredState;
 
            keepGuessing &= (0 == (__state[0] & kIOServiceInactiveState));
@@ -3140,7 +3123,7 @@ void IOService::doServiceMatch( IOOptionBits options )
                     while((notify = (_IOServiceNotifier *)
                            iter->getNextObject())) {
 
-                        if( passiveMatch( notify->matching )
+                        if( matchPassive(notify->matching, 0)
                          && (kIOServiceNotifyEnable & notify->state))
                             matches->setObject( notify );
                     }
@@ -3149,6 +3132,9 @@ void IOService::doServiceMatch( IOOptionBits options )
             }
 
            UNLOCKNOTIFY();
+           if (didRegister) {
+               getMetaClass()->addInstance(this);
+           }
             unlockForArbitration();
 
             if (keepGuessing && matches->getCount() && (kIOReturnSuccess == getResources()))
@@ -3518,27 +3504,83 @@ void _IOServiceJob::pingConfig( _IOServiceJob * job )
     semaphore_signal( gJobsSemaphore );
 }
 
+struct IOServiceMatchContext
+{
+    OSDictionary * table;
+    OSObject *     result;
+    uint32_t      options;
+    uint32_t      state;
+    uint32_t      count;
+    uint32_t       done;
+};
+
+bool IOService::instanceMatch(const OSObject * entry, void * context)
+{
+    IOServiceMatchContext * ctx = (typeof(ctx)) context;
+    IOService *    service = (typeof(service)) entry;
+    OSDictionary * table   = ctx->table;
+    uint32_t      options = ctx->options;
+    uint32_t      state   = ctx->state;
+    uint32_t       done;
+    bool           match;
+
+    done = 0;
+    do
+    {
+       match = ((state == (state & service->__state[0]))
+               && (0 == (service->__state[0] & kIOServiceInactiveState)));
+       if (!match) break;
+       ctx->count += table->getCount();
+        match = service->matchInternal(table, options, &done);
+       ctx->done += done;
+    }
+    while (false);
+    if (!match)
+       return (false);
+
+    if ((kIONotifyOnce & options) && (ctx->done == ctx->count))
+    {
+       service->retain();
+       ctx->result = service;
+       return (true);
+    }
+    else if (!ctx->result)
+    {
+       ctx->result = OSSet::withObjects((const OSObject **) &service, 1, 1);
+    }
+    else
+    {
+       ((OSSet *)ctx->result)->setObject(service);
+    }
+    return (false);
+}
+
 // internal - call with gNotificationLock
 OSObject * IOService::copyExistingServices( OSDictionary * matching,
                 IOOptionBits inState, IOOptionBits options )
 {
-    OSObject *         current = 0;
-    OSIterator *       iter;
-    IOService *                service;
-    OSObject *         obj;
+    OSObject *  current = 0;
+    OSIterator * iter;
+    IOService *         service;
+    OSObject *  obj;
+    OSString *   str;
 
     if( !matching)
        return( 0 );
 
-    if(true 
-      && (obj = matching->getObject(gIOProviderClassKey))
+#if MATCH_DEBUG
+    OSSerialize * s = OSSerialize::withCapacity(128);
+    matching->serialize(s);
+#endif
+
+    if((obj = matching->getObject(gIOProviderClassKey))
       && gIOResourcesKey
       && gIOResourcesKey->isEqualTo(obj)
       && (service = gIOResources))
     {
        if( (inState == (service->__state[0] & inState))
          && (0 == (service->__state[0] & kIOServiceInactiveState))
-         &&  service->passiveMatch( matching ))
+         &&  service->matchPassive(matching, options))
        {
            if( options & kIONotifyOnce)
            {
@@ -3546,12 +3588,69 @@ OSObject * IOService::copyExistingServices( OSDictionary * matching,
                current = service;
            }
            else
-               current = OSSet::withObjects(
-                               (const OSObject **) &service, 1, 1 );
+               current = OSSet::withObjects((const OSObject **) &service, 1, 1 );
        }
     }
     else
     {
+       IOServiceMatchContext ctx;
+       ctx.table   = matching;
+       ctx.state   = inState;
+       ctx.count   = 0;
+       ctx.done    = 0;
+       ctx.options = options;
+       ctx.result  = 0;
+
+       if ((str = OSDynamicCast(OSString, obj)))
+       {
+           const OSSymbol * sym = OSSymbol::withString(str);
+           OSMetaClass::applyToInstancesOfClassName(sym, instanceMatch, &ctx);
+           sym->release();
+       }
+       else
+       {
+           IOService::gMetaClass.applyToInstances(instanceMatch, &ctx);
+       }
+
+
+       current = ctx.result;
+
+       options |= kIOServiceInternalDone | kIOServiceClassDone;
+       if (current && (ctx.done != ctx.count))
+       {
+           OSSet *
+           source = OSDynamicCast(OSSet, current);
+           current = 0;
+           while ((service = (IOService *) source->getAnyObject()))
+           {
+               if (service->matchPassive(matching, options))
+               {
+                   if( options & kIONotifyOnce)
+                   {
+                       service->retain();
+                       current = service;
+                       break;
+                   }
+                   if( current)
+                   {
+                       ((OSSet *)current)->setObject( service );
+                   }
+                   else
+                   {
+                       current = OSSet::withObjects(
+                                       (const OSObject **) &service, 1, 1 );
+                   }
+               }
+               source->removeObject(service);      
+           }
+           source->release();
+       }
+    }
+
+#if MATCH_DEBUG
+    {
+       OSObject * _current = 0;
+    
        iter = IORegistryIterator::iterateOver( gIOServicePlane,
                                            kIORegistryIterateRecursively );
        if( iter) {
@@ -3560,24 +3659,42 @@ OSObject * IOService::copyExistingServices( OSDictionary * matching,
                while( (service = (IOService *) iter->getNextObject())) {
                    if( (inState == (service->__state[0] & inState))
                    && (0 == (service->__state[0] & kIOServiceInactiveState))
-                   &&  service->passiveMatch( matching )) {
+                   &&  service->matchPassive(matching, 0)) {
     
                        if( options & kIONotifyOnce) {
                            service->retain();
-                           current = service;
+                           _current = service;
                            break;
                        }
-                       if( current)
-                           ((OSSet *)current)->setObject( service );
+                       if( _current)
+                           ((OSSet *)_current)->setObject( service );
                        else
-                           current = OSSet::withObjects(
+                           _current = OSSet::withObjects(
                                            (const OSObject **) &service, 1, 1 );
                    }
                }
            } while( !service && !iter->isValid());
            iter->release();
        }
-    }
+
+
+       if ( ((current != 0) != (_current != 0)) 
+       || (current && _current && !current->isEqualTo(_current)))
+       {
+           OSSerialize * s1 = OSSerialize::withCapacity(128);
+           OSSerialize * s2 = OSSerialize::withCapacity(128);
+           current->serialize(s1);
+           _current->serialize(s2);
+           kprintf("**mismatch** %p %p\n%s\n%s\n%s\n", current, _current, s->text(), s1->text(), s2->text());
+           s1->release();
+           s2->release();
+       }
+
+       if (_current) _current->release();
+    }    
+
+    s->release();
+#endif
 
     if( current && (0 == (options & (kIONotifyOnce | kIOServiceExistingSet)))) {
        iter = OSCollectionIterator::withCollection( (OSSet *)current );
@@ -3604,6 +3721,21 @@ OSIterator * IOService::getMatchingServices( OSDictionary * matching )
     return( iter );
 }
 
+IOService * IOService::copyMatchingService( OSDictionary * matching )
+{
+    IOService *        service;
+
+    // is a lock even needed?
+    LOCKWRITENOTIFY();
+
+    service = (IOService *) copyExistingServices( matching,
+                                               kIOServiceMatchedState, kIONotifyOnce );
+    
+    UNLOCKNOTIFY();
+
+    return( service );
+}
+
 struct _IOServiceMatchingNotificationHandlerRef
 {
     IOServiceNotificationHandler handler;
@@ -3911,7 +4043,7 @@ void IOService::deliverNotification( const OSSymbol * type,
         if( iter) {
             while( (notify = (_IOServiceNotifier *) iter->getNextObject())) {
 
-                if( passiveMatch( notify->matching)
+                if( matchPassive(notify->matching, 0)
                   && (kIOServiceNotifyEnable & notify->state)) {
                     if( 0 == willSend)
                         willSend = OSArray::withCapacity(8);
@@ -3950,10 +4082,18 @@ IOOptionBits IOService::getState( void ) const
 OSDictionary * IOService::serviceMatching( const OSString * name,
                        OSDictionary * table )
 {
+
+    const OSString *   str;
+
+    str = OSSymbol::withString(name);
+    if( !str)
+       return( 0 );
+
     if( !table)
        table = OSDictionary::withCapacity( 2 );
     if( table)
-        table->setObject(gIOProviderClassKey, (OSObject *)name );
+        table->setObject(gIOProviderClassKey, (OSObject *)str );
+    str->release();
 
     return( table );
 }
@@ -4238,28 +4378,37 @@ void IOService::updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessa
     IORegistryEntry * regEntry;
     OSObject *        locked = kOSBooleanFalse;
     uint32_t          idx;
-    bool              loggedIn;
     bool              publish;
     OSDictionary *    user;
     static IOMessage  sSystemPower;
 
     regEntry = IORegistryEntry::getRegistryRoot();
 
+    if (!gIOChosenEntry)
+       gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
+
     IOLockLock(gIOConsoleUsersLock);
 
     if (systemMessage)
     {
         sSystemPower = systemMessage;
+#if HIBERNATION
+       if ((kIOMessageSystemHasPoweredOn == systemMessage) && IOHibernateWasScreenLocked())
+       {
+           locked = kOSBooleanTrue;
+       }
+#endif /* HIBERNATION */
     }
-    loggedIn = false;
+
     if (consoleUsers)
     {
         OSNumber * num = 0;
+       gIOConsoleLoggedIn = false;
        for (idx = 0; 
              (user = OSDynamicCast(OSDictionary, consoleUsers->getObject(idx))); 
              idx++)
        {
-           loggedIn |= ((kOSBooleanTrue == user->getObject(gIOConsoleSessionOnConsoleKey))
+           gIOConsoleLoggedIn |= ((kOSBooleanTrue == user->getObject(gIOConsoleSessionOnConsoleKey))
                        && (kOSBooleanTrue == user->getObject(gIOConsoleSessionLoginDoneKey)));
            if (!num)
            {
@@ -4269,7 +4418,7 @@ void IOService::updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessa
         gIOConsoleLockTime = num ? num->unsigned32BitValue() : 0;
     }
 
-    if (!loggedIn 
+    if (!gIOConsoleLoggedIn 
      || (kIOMessageSystemWillSleep == sSystemPower)
      || (kIOMessageSystemPagingOff == sSystemPower))
     {
@@ -4304,6 +4453,20 @@ void IOService::updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessa
        OSIncrementAtomic( &gIOConsoleUsersSeed );
     }
 
+#if HIBERNATION
+    if (gIOChosenEntry)
+    {
+       uint32_t screenLockState;
+
+       if (locked == kOSBooleanTrue) screenLockState = kIOScreenLockLocked;
+       else if (gIOConsoleLockTime)  screenLockState = kIOScreenLockUnlocked;
+       else                          screenLockState = kIOScreenLockNoLock;
+
+       if (screenLockState != gIOScreenLockState) gIOChosenEntry->setProperty(kIOScreenLockStateKey, &screenLockState, sizeof(screenLockState));
+       gIOScreenLockState = screenLockState;
+    }
+#endif /* HIBERNATION */
+
     IOLockUnlock(gIOConsoleUsersLock);
 
     if (publish)
@@ -4455,144 +4618,188 @@ IOService * IOService::matchLocation( IOService * /* client */ )
     return( parent );
 }
 
-bool IOService::passiveMatch( OSDictionary * table, bool changesOK )
+bool IOService::matchInternal(OSDictionary * table, uint32_t options, uint32_t * did)
 {
-    IOService *                where;
     OSString *         matched;
     OSObject *         obj;
     OSString *         str;
     IORegistryEntry *  entry;
     OSNumber *         num;
-    SInt32             score;
-    OSNumber *         newPri;
     bool               match = true;
-    bool               matchParent = false;
-    UInt32             done;
-
-    assert( table );
+    bool                changesOK = (0 != (kIOServiceChangesOK & options));
+    uint32_t            count;
+    uint32_t            done;
 
-    where = this;
+    do
+    {
+       count = table->getCount();
+       done = 0;
+       str = OSDynamicCast(OSString, table->getObject(gIOProviderClassKey));
+       if (str) {
+           done++;
+           match = ((kIOServiceClassDone & options) || (0 != metaCast(str)));
+#if MATCH_DEBUG
+           match = (0 != metaCast( str ));
+           if ((kIOServiceClassDone & options) && !match) panic("classDone");
+#endif
+           if ((!match) || (done == count)) break;
+       }
 
-    do {
-        do {
-            done = 0;
-
-            str = OSDynamicCast( OSString, table->getObject( gIOProviderClassKey));
-            if( str) {
-                done++;
-                match = (0 != where->metaCast( str ));
-                if( !match)
-                    break;
-            }
+       obj = table->getObject( gIONameMatchKey );
+       if( obj) {
+           done++;
+           match = compareNames( obj, changesOK ? &matched : 0 );
+           if (!match) break;
+           if( changesOK && matched) {
+               // leave a hint as to which name matched
+               table->setObject( gIONameMatchedKey, matched );
+               matched->release();
+           }
+           if (done == count) break;
+       }
 
-            obj = table->getObject( gIONameMatchKey );
-            if( obj) {
-                done++;
-                match = where->compareNames( obj, changesOK ? &matched : 0 );
-                if( !match)
-                    break;
-                if( changesOK && matched) {
-                    // leave a hint as to which name matched
-                    table->setObject( gIONameMatchedKey, matched );
-                    matched->release();
-                }
-            }
+       str = OSDynamicCast( OSString, table->getObject( gIOLocationMatchKey ));
+       if (str)
+       {
+           const OSSymbol * sym;
+           done++;
+           match = false;
+           sym = copyLocation();
+           if (sym) {
+               match = sym->isEqualTo( str );
+               sym->release();
+           }
+           if ((!match) || (done == count)) break;
+       }
 
-            str = OSDynamicCast( OSString, table->getObject( gIOLocationMatchKey ));
-            if( str) {
+       obj = table->getObject( gIOPropertyMatchKey );
+       if( obj)
+       {
+           OSDictionary * dict;
+           OSDictionary * nextDict;
+           OSIterator *   iter;
+           done++;
+           match = false;
+           dict = dictionaryWithProperties();
+           if( dict) {
+               nextDict = OSDynamicCast( OSDictionary, obj);
+               if( nextDict)
+                   iter = 0;
+               else
+                   iter = OSCollectionIterator::withCollection(
+                               OSDynamicCast(OSCollection, obj));
+
+               while( nextDict
+                   || (iter && (0 != (nextDict = OSDynamicCast(OSDictionary,
+                                           iter->getNextObject()))))) {
+                   match = dict->isEqualTo( nextDict, nextDict);
+                   if( match)
+                       break;
+                   nextDict = 0;
+               }
+               dict->release();
+               if( iter)
+                   iter->release();
+           }
+           if ((!match) || (done == count)) break;
+       }
 
-                const OSSymbol * sym;
+       str = OSDynamicCast( OSString, table->getObject( gIOPathMatchKey ));
+       if( str) {
+           done++;
+           entry = IORegistryEntry::fromPath( str->getCStringNoCopy() );
+           match = (this == entry);
+           if( entry)
+               entry->release();
+           if ((!match) || (done == count)) break;
+       }
 
-                done++;
-                match = false;
-                sym = where->copyLocation();
-                if( sym) {
-                    match = sym->isEqualTo( str );
-                    sym->release();
-                }
-                if( !match)
-                    break;
-            }
+       num = OSDynamicCast( OSNumber, table->getObject( gIORegistryEntryIDKey ));
+       if (num) {
+           done++;
+           match = (getRegistryEntryID() == num->unsigned64BitValue());
+           if ((!match) || (done == count)) break;
+       }
 
-            obj = table->getObject( gIOPropertyMatchKey );
-            if( obj) {
+       num = OSDynamicCast( OSNumber, table->getObject( gIOMatchedServiceCountKey ));
+       if( num)
+       {
+           OSIterator *        iter;
+           IOService *         service = 0;
+           UInt32              serviceCount = 0;
 
-                OSDictionary * dict;
-                OSDictionary * nextDict;
-                OSIterator *   iter;
+           done++;
+           iter = getClientIterator();
+           if( iter) {
+               while( (service = (IOService *) iter->getNextObject())) {
+                   if( kIOServiceInactiveState & service->__state[0])
+                       continue;
+                   if( 0 == service->getProperty( gIOMatchCategoryKey ))
+                       continue;
+                   ++serviceCount;
+               }
+               iter->release();
+           }
+           match = (serviceCount == num->unsigned32BitValue());
+           if ((!match) || (done == count)) break;
+       }
 
-                done++;
-                match = false;
-                dict = where->dictionaryWithProperties();
-                if( dict) {
-                    nextDict = OSDynamicCast( OSDictionary, obj);
-                    if( nextDict)
-                        iter = 0;
-                    else
-                        iter = OSCollectionIterator::withCollection(
-                                    OSDynamicCast(OSCollection, obj));
-
-                    while( nextDict
-                        || (iter && (0 != (nextDict = OSDynamicCast(OSDictionary,
-                                                iter->getNextObject()))))) {
-                        match = dict->isEqualTo( nextDict, nextDict);
-                        if( match)
-                            break;
-                        nextDict = 0;
-                    }
-                    dict->release();
-                    if( iter)
-                        iter->release();
-                }
-                if( !match)
-                    break;
-            }
+#define propMatch(key)                                 \
+       obj = table->getObject(key);                    \
+       if (obj)                                        \
+       {                                               \
+           OSObject * prop;                            \
+           done++;                                     \
+           prop = copyProperty(key);                   \
+           match = obj->isEqualTo(prop);               \
+            if (prop) prop->release();                 \
+           if ((!match) || (done == count)) break;     \
+       }
+       propMatch(kIOBSDNameKey)
+       propMatch(kIOBSDMajorKey)
+       propMatch(kIOBSDMinorKey)
+       propMatch(kIOBSDUnitKey)
+#undef propMatch
+    }
+    while (false);
 
-            str = OSDynamicCast( OSString, table->getObject( gIOPathMatchKey ));
-            if( str) {
-                done++;
-                entry = IORegistryEntry::fromPath( str->getCStringNoCopy() );
-                match = (where == entry);
-                if( entry)
-                    entry->release();
-                if( !match)
-                    break;
-            }
+    if (did) *did = done;
+    return (match);
+}
 
-            num = OSDynamicCast( OSNumber, table->getObject( gIORegistryEntryIDKey ));
-            if( num) {
-               done++;
-                match = (getRegistryEntryID() == num->unsigned64BitValue());
-           }
+bool IOService::passiveMatch( OSDictionary * table, bool changesOK )
+{
+    return (matchPassive(table, changesOK ? kIOServiceChangesOK : 0));
+}
 
-            num = OSDynamicCast( OSNumber, table->getObject( gIOMatchedServiceCountKey ));
-            if( num) {
+bool IOService::matchPassive(OSDictionary * table, uint32_t options)
+{
+    IOService *                where;
+    OSDictionary *      nextTable;
+    SInt32             score;
+    OSNumber *         newPri;
+    bool               match = true;
+    bool               matchParent = false;
+    uint32_t           count;
+    uint32_t           done;
 
-                OSIterator *   iter;
-                IOService *            service = 0;
-                UInt32         serviceCount = 0;
+    assert( table );
 
-                done++;
-                iter = where->getClientIterator();
-                if( iter) {
-                    while( (service = (IOService *) iter->getNextObject())) {
-                        if( kIOServiceInactiveState & service->__state[0])
-                            continue;
-                        if( 0 == service->getProperty( gIOMatchCategoryKey ))
-                            continue;
-                        ++serviceCount;
-                    }
-                    iter->release();
-                }
-                match = (serviceCount == num->unsigned32BitValue());
-                if( !match)
-                    break;
-            }
+#if MATCH_DEBUG 
+    OSDictionary * root = table;
+#endif
 
-            if( done == table->getCount()) {
-                // don't call family if we've done all the entries in the table
-                matchParent = false;
-                break;
+    where = this;
+    do
+    {
+        do
+        {
+           count = table->getCount();
+           if (!(kIOServiceInternalDone & options))
+           {
+               match = where->matchInternal(table, options, &done);
+               // don't call family if we've done all the entries in the table
+               if ((!match) || (done == count)) break;
             }
 
             // pass in score from property table
@@ -4609,7 +4816,7 @@ bool IOService::passiveMatch( OSDictionary * table, bool changesOK )
                 break;
             }
 
-            if( changesOK) {
+            if (kIOServiceChangesOK & options) {
                 // save the score
                 newPri = OSNumber::withNumber( score, 32 );
                 if( newPri) {
@@ -4618,43 +4825,42 @@ bool IOService::passiveMatch( OSDictionary * table, bool changesOK )
                 }
             }
 
-            if( !(match = where->compareProperty( table, kIOBSDNameKey )))
-                break;
-            if( !(match = where->compareProperty( table, kIOBSDMajorKey )))
-                break;
-            if( !(match = where->compareProperty( table, kIOBSDMinorKey )))
-                break;
-            if( !(match = where->compareProperty( table, kIOBSDUnitKey )))
-                break;
-
+           options = 0;
             matchParent = false;
 
-            obj = OSDynamicCast( OSDictionary,
+            nextTable = OSDynamicCast(OSDictionary,
                   table->getObject( gIOParentMatchKey ));
-            if( obj) {
+            if( nextTable) {
+               // look for a matching entry anywhere up to root
                 match = false;
                 matchParent = true;
-                table = (OSDictionary *) obj;
+               table = nextTable;
                 break;
             }
 
-            table = OSDynamicCast( OSDictionary,
+            table = OSDynamicCast(OSDictionary,
                     table->getObject( gIOLocationMatchKey ));
-            if( table) {
+            if (table) {
+               // look for a matching entry at matchLocation()
                 match = false;
                 where = where->getProvider();
-                if( where)
-                    where = where->matchLocation( where );
+                if (where && (where = where->matchLocation(where))) continue;
             }
+            break;
+        }
+        while (true);
+    }
+    while( matchParent && (!match) && (where = where->getProvider()) );
 
-        } while( table && where );
-
-    } while( matchParent && (where = where->getProvider()) );
-
-    if( kIOLogMatch & gIOKitDebug)
-        if( where && (where != this) )
-            LOG("match parent @ %s = %d\n",
-                        where->getName(), match );
+#if MATCH_DEBUG
+    if (where != this) 
+    {
+       OSSerialize * s = OSSerialize::withCapacity(128);
+       root->serialize(s);
+       kprintf("parent match 0x%llx, %d,\n%s\n", getRegistryEntryID(), match, s->text());
+       s->release();
+    }
+#endif
 
     return( match );
 }
index 4905ec2cd9feace6054e69ae2eaf840951a1f82a..814a402efae183d1723bf30e65251578e602f5ef 100644 (file)
@@ -47,6 +47,7 @@
 #include <IOKit/pwr_mgt/IOPMPrivate.h>
 
 #include <sys/proc.h>
+#include <sys/proc_internal.h>
 #include <libkern/OSDebug.h>
 
 // Required for notification instrumentation
@@ -90,8 +91,8 @@ static IOPMRequestQueue *    gIOPMReplyQueue    = 0;
 static IOPMWorkQueue *       gIOPMWorkQueue     = 0;
 static IOPMCompletionQueue * gIOPMFreeQueue     = 0;
 static IOPMRequest *         gIOPMRequest       = 0;
-static IOPlatformExpert *    gPlatform          = 0;
 static IOService *           gIOPMRootNode      = 0;
+static IOPlatformExpert *    gPlatform          = 0;
 
 static const OSSymbol *      gIOPMPowerClientDevice     = 0;
 static const OSSymbol *      gIOPMPowerClientDriver     = 0;
@@ -143,7 +144,7 @@ do {                                  \
 #define PM_LOCK_WAKEUP(event)       IOLockWakeup(fPMLock, event, false)
 
 #define ns_per_us                   1000
-#define k30seconds                  (30*1000000)
+#define k30Seconds                  (30*1000000)
 #define kMinAckTimeoutTicks         (10*1000000)
 #define kIOPMTardyAckSPSKey         "IOPMTardyAckSetPowerState"
 #define kIOPMTardyAckPSCKey         "IOPMTardyAckPowerStateChange"
@@ -196,6 +197,16 @@ enum {
     do { if (fPMActions.a) { \
          (fPMActions.a)(fPMActions.target, this, &fPMActions, x, y); } \
          } while (false)
+         
+static OSNumber * copyClientIDForNotification(
+    OSObject *object, 
+    IOPMInterestContext *context);
+
+static void logClientIDForNotification(
+    OSObject *object,
+    IOPMInterestContext *context, 
+    const char *logString);
+         
 
 //*********************************************************************************
 // PM machine states
@@ -747,6 +758,10 @@ void IOService::handlePMstop ( IOPMRequest * request )
        PM_ASSERT_IN_GATE();
        PM_LOG2("%s: %p %s start\n", getName(), this, __FUNCTION__);
 
+    // remove driver from prevent system sleep lists
+    getPMRootDomain()->updatePreventIdleSleepList(this, false);
+    getPMRootDomain()->updatePreventSystemSleepList(this, false);
+
     // remove the property
     removeProperty(kPwrMgtKey);                        
 
@@ -1983,64 +1998,51 @@ void IOService::setParentInfo (
     }
 }
 
-//*********************************************************************************
-// [private] rebuildChildClampBits
-//
-// The ChildClamp bits (kIOPMChildClamp & kIOPMChildClamp2) in our capabilityFlags
-// indicate that one of our children (or grandchildren or great-grandchildren ...)
-// doesn't support idle or system sleep in its current state. Since we don't track
-// the origin of each bit, every time any child changes state we have to clear
-// these bits and rebuild them.
-//*********************************************************************************
+//******************************************************************************
+// [private] trackSystemSleepPreventers
+//******************************************************************************
 
-void IOService::rebuildChildClampBits ( void )
+void IOService::trackSystemSleepPreventers(
+    IOPMPowerStateIndex     oldPowerState,
+    IOPMPowerStateIndex     newPowerState,
+    IOPMPowerChangeFlags    changeFlags __unused )
 {
-    unsigned long              i;
-    OSIterator *               iter;
-    OSObject *                 next;
-    IOPowerConnection *        connection;
-       unsigned long           powerState;
+    IOPMPowerFlags  oldCapability, newCapability;
 
-    // A child's desires has changed. We need to rebuild the child-clamp bits in
-       // our power state array. Start by clearing the bits in each power state.
-    
-    for ( i = 0; i < fNumberOfPowerStates; i++ )
-    {
-        fPowerStates[i].capabilityFlags &= ~(kIOPMChildClamp | kIOPMChildClamp2);
-    }
-
-       if (!inPlane(gIOPowerPlane))
-               return;
+    oldCapability = fPowerStates[oldPowerState].capabilityFlags &
+                    (kIOPMPreventIdleSleep | kIOPMPreventSystemSleep);
+    newCapability = fPowerStates[newPowerState].capabilityFlags &
+                    (kIOPMPreventIdleSleep | kIOPMPreventSystemSleep);
 
-    // Loop through the children. When we encounter the calling child, save the
-       // computed state as this child's desire. And set the ChildClamp bits in any
-    // of our states that some child has clamp on.
+    if (fHeadNoteChangeFlags & kIOPMInitialPowerChange)
+        oldCapability = 0;
+    if (oldCapability == newCapability)
+        return;
 
-    iter = getChildIterator(gIOPowerPlane);
-    if ( iter )
+    if ((oldCapability ^ newCapability) & kIOPMPreventIdleSleep)
     {
-        while ( (next = iter->getNextObject()) )
+#if SUPPORT_IDLE_CANCEL
+        if ((oldCapability & kIOPMPreventIdleSleep) == 0)
         {
-            if ( (connection = OSDynamicCast(IOPowerConnection, next)) )
-            {
-                               if (connection->getReadyFlag() == false)
-                               {
-                                       PM_LOG3("[%s] %s: connection not ready\n",
-                                               getName(), __FUNCTION__);
-                                       continue;
-                               }
+            IOPMRequest *   cancelRequest;
 
-                               powerState = connection->getDesiredDomainState();
-                if (powerState < fNumberOfPowerStates)
-                {
-                    if ( connection->getPreventIdleSleepFlag() )
-                        fPowerStates[powerState].capabilityFlags |= kIOPMChildClamp;
-                    if ( connection->getPreventSystemSleepFlag() )
-                        fPowerStates[powerState].capabilityFlags |= kIOPMChildClamp2;
-                }
+            cancelRequest = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
+            if (cancelRequest)
+            {
+                getPMRootDomain()->submitPMRequest( cancelRequest );
             }
         }
-        iter->release();
+#endif
+    
+        getPMRootDomain()->updatePreventIdleSleepList(this,
+            ((oldCapability & kIOPMPreventIdleSleep) == 0));
+    }
+
+    if ((oldCapability ^ newCapability) & kIOPMPreventSystemSleep)
+    {
+        
+        getPMRootDomain()->updatePreventSystemSleepList(this,
+            ((oldCapability & kIOPMPreventSystemSleep) == 0));
     }
 }
 
@@ -2059,7 +2061,6 @@ IOReturn IOService::requestPowerDomainState(
        IOPMPowerFlags          outputPowerFlags;
     IOService *         child;
        IOPMRequest *       subRequest;
-    bool                preventIdle, preventSleep; 
     bool                adjustPower = false;
 
     if (!initialized)
@@ -2082,10 +2083,6 @@ IOReturn IOService::requestPowerDomainState(
        child = (IOService *) childConnection->getChildEntry(gIOPowerPlane);
        assert(child);
 
-    preventIdle  = ((childRequestPowerFlags & kIOPMPreventIdleSleep) != 0);
-    preventSleep = ((childRequestPowerFlags & kIOPMPreventSystemSleep) != 0);
-    childRequestPowerFlags &= ~(kIOPMPreventIdleSleep | kIOPMPreventSystemSleep);
-
     // Merge in the power flags contributed by this power parent
     // at its current or impending power state. 
 
@@ -2156,9 +2153,7 @@ IOReturn IOService::requestPowerDomainState(
     // prevent idle/sleep flags towards the root domain.
 
     if (!childConnection->childHasRequestedPower() ||
-        (ps != childConnection->getDesiredDomainState()) ||
-        (childConnection->getPreventIdleSleepFlag() != preventIdle) ||
-        (childConnection->getPreventSystemSleepFlag() != preventSleep))
+        (ps != childConnection->getDesiredDomainState()))
         adjustPower = true;
 
 #if ENABLE_DEBUG_LOGS
@@ -2173,13 +2168,8 @@ IOReturn IOService::requestPowerDomainState(
 #endif
 
        // Record the child's desires on the connection.
-#if SUPPORT_IDLE_CANCEL
-       bool attemptCancel = (preventIdle && !childConnection->getPreventIdleSleepFlag());
-#endif
        childConnection->setChildHasRequestedPower();
        childConnection->setDesiredDomainState( ps );
-       childConnection->setPreventIdleSleepFlag( preventIdle );
-       childConnection->setPreventSystemSleepFlag( preventSleep );
 
        // Schedule a request to re-evaluate all children desires and
        // adjust power state. Submit a request if one wasn't pending,
@@ -2197,17 +2187,6 @@ IOReturn IOService::requestPowerDomainState(
                }
     }
 
-#if SUPPORT_IDLE_CANCEL
-       if (attemptCancel)
-       {
-               subRequest = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
-               if (subRequest)
-               {
-                       submitPMRequest( subRequest );
-               }
-       }
-#endif
-
     return kIOReturnSuccess;
 }
 
@@ -3860,12 +3839,14 @@ bool IOService::notifyControllingDriver ( void )
 
     if (fInitialSetPowerState)
     {
+        fInitialSetPowerState = false;
+        fHeadNoteChangeFlags |= kIOPMInitialPowerChange;
+
         // Driver specified flag to skip the inital setPowerState()
         if (fHeadNotePowerArrayEntry->capabilityFlags & kIOPMInitialDeviceState)
         {
             return false;
         }
-        fInitialSetPowerState = false;
     }
 
     param = (DriverCallParam *) fDriverCallParamPtr;
@@ -3989,6 +3970,9 @@ void IOService::all_done ( void )
         // could our driver switch to the new state?
         if ( !( fHeadNoteChangeFlags & kIOPMNotDone) )
         {
+            trackSystemSleepPreventers(
+                fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags);
+
                        // we changed, tell our parent
             requestDomainPower(fHeadNotePowerState);
 
@@ -4037,14 +4021,8 @@ void IOService::all_done ( void )
                          ((fHeadNoteChangeFlags & kIOPMDomainDidChange)  &&
              (fCurrentPowerState < fHeadNotePowerState)))
         {
-            if ((fHeadNoteChangeFlags & kIOPMPowerSuppressed) &&
-                (fHeadNotePowerState != fCurrentPowerState) &&
-                (fHeadNotePowerState == fDesiredPowerState))
-            {
-                // Power changed, and desired power state restored.
-                // Clear any prior power desire while in suppressed state.
-                requestDomainPower(fHeadNotePowerState);
-            }
+            trackSystemSleepPreventers(
+                fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags);
 
             // did power raise?
             if ( fCurrentPowerState < fHeadNotePowerState )
@@ -4221,7 +4199,6 @@ IOReturn IOService::requestDomainPower(
     IOPMPowerStateIndex ourPowerState,
     IOOptionBits        options )
 {
-    const IOPMPSEntry *             powerStateEntry;
     IOPMPowerFlags                  requestPowerFlags;
     IOPMPowerStateIndex             maxPowerState;
     IOPMRequestDomainPowerContext   context;
@@ -4236,13 +4213,7 @@ IOReturn IOService::requestDomainPower(
     // Fetch the input power flags for the requested power state.
     // Parent request is stated in terms of required power flags.
 
-       powerStateEntry = &fPowerStates[ourPowerState];
-       requestPowerFlags = powerStateEntry->inputPowerFlags;
-
-    if (powerStateEntry->capabilityFlags & (kIOPMChildClamp | kIOPMPreventIdleSleep))
-        requestPowerFlags |= kIOPMPreventIdleSleep;
-    if (powerStateEntry->capabilityFlags & (kIOPMChildClamp2 | kIOPMPreventSystemSleep))
-        requestPowerFlags |= kIOPMPreventSystemSleep;
+       requestPowerFlags = fPowerStates[ourPowerState].inputPowerFlags;
 
     // Disregard the "previous request" for power reservation.
 
@@ -5091,18 +5062,24 @@ static void logAppTimeouts ( OSObject * object, void * arg )
             (flag = context->responseArray->getObject(clientIndex)) &&
             (flag != kOSBooleanTrue))
         {
-            OSString * clientID = 0;
-            context->us->messageClient(context->messageType, object, &clientID);
-            PM_ERROR(context->errorLog, clientID ? clientID->getCStringNoCopy() : "");
+            OSString *logClientID = NULL;
+            OSNumber *clientID = copyClientIDForNotification(object, context);    
+            
+            if (clientID) {
+                logClientID = IOCopyLogNameForPID(clientID->unsigned32BitValue());
+                clientID->release();
+            }
+                
+            PM_ERROR(context->errorLog, logClientID ? logClientID->getCStringNoCopy() : "");
 
             // TODO: record message type if possible
             IOService::getPMRootDomain()->pmStatsRecordApplicationResponse(
                 gIOPMStatsApplicationResponseTimedOut,
-                clientID ? clientID->getCStringNoCopy() : "",
+                logClientID ? logClientID->getCStringNoCopy() : "",
                 0, (30*1000), -1);
 
-            if (clientID)
-                clientID->release();
+            if (logClientID)
+                logClientID->release();
         }
     }
 }
@@ -5225,7 +5202,7 @@ bool IOService::tellClientsWithResponse ( int messageType )
                 context.notifyType  = fOutOfBandParameter;
                 context.messageType = messageType;
             }
-            context.maxTimeRequested = k30seconds;
+            context.maxTimeRequested = k30Seconds;
 
             applyToInterested( gIOGeneralInterest,
                                pmTellClientWithResponse, (void *) &context );
@@ -5252,7 +5229,7 @@ bool IOService::tellClientsWithResponse ( int messageType )
             applyToInterested( gIOAppPowerStateInterest,
                                pmTellCapabilityAppWithResponse, (void *) &context );
             fNotifyClientArray = context.notifyClients;
-            context.maxTimeRequested = k30seconds;
+            context.maxTimeRequested = k30Seconds;
             break;
 
         case kNotifyCapabilityChangePriority:
@@ -5299,6 +5276,9 @@ void IOService::pmTellAppWithResponse ( OSObject * object, void * arg )
     IOPMInterestContext *   context = (IOPMInterestContext *) arg;
     IOServicePM *           pwrMgt = context->us->pwrMgt;
     uint32_t                msgIndex, msgRef, msgType;
+    OSNumber                *clientID = NULL;
+    proc_t                  proc = NULL;
+    boolean_t               proc_suspended = FALSE;
 #if LOG_APP_RESPONSE_TIMES
     AbsoluteTime            now;
 #endif
@@ -5306,19 +5286,34 @@ void IOService::pmTellAppWithResponse ( OSObject * object, void * arg )
     if (!OSDynamicCast(_IOServiceInterestNotifier, object))
         return;
 
+    if (context->us == getPMRootDomain())
+    {
+        if ((clientID = copyClientIDForNotification(object, context)))
+        {
+            uint32_t clientPID = clientID->unsigned32BitValue();
+            clientID->release();
+            proc = proc_find(clientPID);
+
+            if (proc)
+            {
+                proc_suspended = get_task_pidsuspended((task_t) proc->task);
+                proc_rele(proc);
+
+                if (proc_suspended)
+                {
+                    logClientIDForNotification(object, context, "PMTellAppWithResponse - Suspended");
+                    return;
+                }
+            }
+        }
+    }
+    
     if (context->messageFilter &&
         !context->messageFilter(context->us, object, context, 0, 0))
     {
         if (kIOLogDebugPower & gIOKitDebug)
         {
-            // Log client pid/name and client array index.
-            OSString * clientID = 0;
-            context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
-            PM_LOG("%s DROP App %s, %s\n",
-                context->us->getName(),
-                getIOMessageString(context->messageType),
-                clientID ? clientID->getCStringNoCopy() : "");
-            if (clientID) clientID->release();
+            logClientIDForNotification(object, context, "DROP App");
         }
         return;
     }
@@ -5335,14 +5330,7 @@ void IOService::pmTellAppWithResponse ( OSObject * object, void * arg )
     OUR_PMLog(kPMLogAppNotify, msgType, msgRef);
     if (kIOLogDebugPower & gIOKitDebug)
     {
-        // Log client pid/name and client array index.
-        OSString * clientID = 0;
-        context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
-        PM_LOG("%s MESG App(%u) %s, %s\n",
-            context->us->getName(),
-            msgIndex, getIOMessageString(msgType),
-            clientID ? clientID->getCStringNoCopy() : "");
-        if (clientID) clientID->release();
+        logClientIDForNotification(object, context, "MESG App");
     }
 
 #if LOG_APP_RESPONSE_TIMES
@@ -5433,15 +5421,12 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg )
     }
 
     retCode = context->us->messageClient(msgType, object, (void *) &notify, sizeof(notify));
-    if ( kIOReturnSuccess == retCode )
+
+    if (kIOReturnSuccess == retCode)
     {
-        if ( 0 == notify.returnValue )
-        {
-            // client doesn't want time to respond
+        if (0 == notify.returnValue) {
                        OUR_PMLog(kPMLogClientAcknowledge, msgRef, (uintptr_t) object);
-        }
-        else
-        {
+        } else {
             replied = kOSBooleanFalse;
             if ( notify.returnValue > context->maxTimeRequested )
             {
@@ -5458,9 +5443,7 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg )
                     context->maxTimeRequested = notify.returnValue;
             }
         }
-    }
-    else
-    {
+    } else {
         // not a client of ours
         // so we won't be waiting for response
                OUR_PMLog(kPMLogClientAcknowledge, msgRef, 0);
@@ -5507,14 +5490,20 @@ void IOService::pmTellCapabilityAppWithResponse ( OSObject * object, void * arg
     if (kIOLogDebugPower & gIOKitDebug)
     {
         // Log client pid/name and client array index.
-        OSString * clientID = 0;
+        OSNumber * clientID = NULL;
+        OSString * clientIDString = NULL;;
         context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
+        if (clientID) {
+            clientIDString = IOCopyLogNameForPID(clientID->unsigned32BitValue());
+        }
+    
         PM_LOG("%s MESG App(%u) %s, wait %u, %s\n",
             context->us->getName(),
             msgIndex, getIOMessageString(msgType),
             (replied != kOSBooleanTrue),
-            clientID ? clientID->getCStringNoCopy() : "");
+            clientIDString ? clientIDString->getCStringNoCopy() : "");
         if (clientID) clientID->release();
+        if (clientIDString) clientIDString->release();
     }
 
     msgArg.notifyRef = msgRef;
@@ -5763,43 +5752,86 @@ static void tellKernelClientApplier ( OSObject * object, void * arg )
     }
 }
 
-//*********************************************************************************
-// [private] tellAppClientApplier
-//
-// Message a registered application.
-//*********************************************************************************
+static OSNumber * copyClientIDForNotification(
+    OSObject *object, 
+    IOPMInterestContext *context)
+{
+    OSNumber *clientID = NULL;
+    context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
+    return clientID;
+}
+
+static void logClientIDForNotification(
+    OSObject *object,
+    IOPMInterestContext *context, 
+    const char *logString)
+{
+    OSString *logClientID = NULL;
+    OSNumber *clientID = copyClientIDForNotification(object, context);    
+
+    if (logString) 
+    {
+        if (clientID)
+            logClientID = IOCopyLogNameForPID(clientID->unsigned32BitValue());
+    
+        PM_LOG("%s %s %s, %s\n",
+            context->us->getName(), logString,
+            IOService::getIOMessageString(context->messageType),
+            logClientID ? logClientID->getCStringNoCopy() : "");
+
+        if (logClientID) 
+            logClientID->release();
+    }
+    
+    if (clientID) 
+        clientID->release();
+
+    return;
+}
+
 
 static void tellAppClientApplier ( OSObject * object, void * arg )
 {
     IOPMInterestContext * context = (IOPMInterestContext *) arg;
+    OSNumber            * clientID = NULL;
+    proc_t                proc = NULL;
+    boolean_t             proc_suspended = FALSE;
+    
+    if (context->us == IOService::getPMRootDomain())
+    {
+        if ((clientID = copyClientIDForNotification(object, context)))
+        {
+            uint32_t clientPID = clientID->unsigned32BitValue();
+            clientID->release();
+            proc = proc_find(clientPID);
+
+            if (proc)
+            {
+                proc_suspended = get_task_pidsuspended((task_t) proc->task);
+                proc_rele(proc);
+
+                if (proc_suspended)
+                {
+                    logClientIDForNotification(object, context, "tellAppClientApplier - Suspended");
+                    return;
+                }
+            }
+        }
+    }
 
     if (context->messageFilter &&
         !context->messageFilter(context->us, object, context, 0, 0))
     {
         if (kIOLogDebugPower & gIOKitDebug)
         {
-            // Log client pid/name and client array index.
-            OSString * clientID = 0;
-            context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
-            PM_LOG("%s DROP App %s, %s\n",
-                context->us->getName(),
-                IOService::getIOMessageString(context->messageType),
-                clientID ? clientID->getCStringNoCopy() : "");
-            if (clientID) clientID->release();
+            logClientIDForNotification(object, context, "DROP App");
         }
         return;
     }
 
     if (kIOLogDebugPower & gIOKitDebug)
     {
-        // Log client pid/name and client array index.
-        OSString * clientID = 0;
-        context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
-        PM_LOG("%s MESG App %s, %s\n",
-            context->us->getName(),
-            IOService::getIOMessageString(context->messageType),
-            clientID ? clientID->getCStringNoCopy() : "");
-        if (clientID) clientID->release();
+        logClientIDForNotification(object, context, "MESG App");
     }
 
     context->us->messageClient(context->messageType, object, 0);
@@ -5814,20 +5846,18 @@ bool IOService::checkForDone ( void )
     int                        i = 0;
     OSObject * theFlag;
 
-    if ( fResponseArray == NULL )
-    {
+    if (fResponseArray == NULL) {
         return true;
     }
     
-    for ( i = 0; ; i++ )
-    {
+    for (i = 0; ; i++) {
         theFlag = fResponseArray->getObject(i);
-        if ( theFlag == NULL )
-        {
+
+        if (NULL == theFlag) {
             break;
         }
-        if ( kOSBooleanTrue != theFlag ) 
-        {
+
+        if (kOSBooleanTrue != theFlag) {
             return false;
         }
     }
@@ -6778,7 +6808,6 @@ void IOService::executePMRequest( IOPMRequest * request )
 
                case kIOPMRequestTypeAdjustPowerState:
                        fAdjustPowerScheduled = false;
-                       rebuildChildClampBits();
                        adjustPowerState();
                        break;
 
index bd2ec9234ca82aed8cb44d1108b91da7ac0515d1..61e88e4ebb53d57a71739bcc0630b79865e1e388 100644 (file)
@@ -232,7 +232,6 @@ private:
     unsigned int            InitialPowerChange:1;
     unsigned int            InitialSetPowerState:1;
     unsigned int            DeviceOverrideEnabled:1;
-    unsigned int            DeviceWasActive:1;
     unsigned int            DoNotPowerDown:1;
     unsigned int            ParentsKnowState:1;
     unsigned int            StrictTreeOrder:1;
@@ -313,7 +312,12 @@ private:
     uint32_t                OutOfBandMessage;
     uint32_t                TempClampCount;
     uint32_t                OverrideMaxPowerState;
+
+    // Protected by ActivityLock - BEGIN
     uint32_t                ActivityTickleCount;
+    uint32_t                DeviceWasActive;
+    // Protected by ActivityLock - END
+
     uint32_t                WaitReason;
     uint32_t                SavedMachineState;
     uint32_t                RootDomainState;
@@ -445,7 +449,7 @@ the ack timer is ticking every tenth of a second.
 #define kIOPMSyncNoChildNotify      0x0200  // sync root domain only, not entire tree
 #define kIOPMSyncTellPowerDown      0x0400  // send the ask/will power off messages
 #define kIOPMSyncCancelPowerDown    0x0800  // sleep cancel for maintenance wake
-#define kIOPMPowerSuppressed        0x1000  // power suppressed for dark wake
+#define kIOPMInitialPowerChange     0x1000  // set for initial power change
 
 enum {
     kDriverCallInformPreChange,
index 873d47660d21ebf480c62fbb314f3a5411c21a54..cba83742d60cd3533d8703dbb441df9e2aaa8ab7 100644 (file)
 
 // options for getExistingServices()
 enum {
-    kIONotifyOnce              = 0x00000001,
-    kIOServiceExistingSet      = 0x00000002
+    kIONotifyOnce            = 0x00000001,
+    kIOServiceExistingSet     = 0x00000002,
+    kIOServiceChangesOK       = 0x00000004,
+    kIOServiceInternalDone    = 0x00000008,
+    kIOServiceClassDone       = 0x00000010,
 };
 
 // masks for __state[1]
@@ -56,7 +59,7 @@ enum {
     kIOServiceTermPhase2State  = 0x01000000,
     kIOServiceTermPhase3State  = 0x00800000,
     kIOServiceTermPhase1State  = 0x00400000,
-       kIOServiceTerm1WaiterState  = 0x00200000
+    kIOServiceTerm1WaiterState  = 0x00200000
 };
 
 // options for terminate()
index 9235b293d8f6c003c10fb016d7ff879cd8f06754..def362e60fff448676276e77cce1d7d5f1bc04c0 100644 (file)
@@ -39,7 +39,6 @@
 #include <IOKit/IOKitDebug.h>
 
 #if IOKITSTATS
-
 bool IOStatistics::enabled = false;
 
 uint32_t IOStatistics::sequenceID = 0;
@@ -178,14 +177,10 @@ void IOStatistics::initialize()
                return;
        }
 
-#if DEVELOPMENT || DEBUG
-       /* Always enabled in development and debug builds. */
-#else
-       /* Only enabled in release builds if the boot argument is set. */
+       /* Only enabled if the boot argument is set. */
        if (!(kIOStatistics & gIOKitDebug)) {
                return;
        }
-#endif 
        
        sysctl_register_oid(&sysctl__debug_iokit_statistics_general);
        sysctl_register_oid(&sysctl__debug_iokit_statistics_workloop);
@@ -1218,7 +1213,7 @@ KextNode *IOStatistics::getKextNodeFromBacktrace(boolean_t write) {
        vm_offset_t *scanAddr = NULL;
        uint32_t i;
        KextNode *found = NULL, *ke = NULL;
-
+    
        btCount = OSBacktrace(bt, btCount);
 
        if (write) {
@@ -1230,7 +1225,7 @@ KextNode *IOStatistics::getKextNodeFromBacktrace(boolean_t write) {
        /* Ignore first levels */
        scanAddr = (vm_offset_t *)&bt[btMin - 1];
 
-       for (i = 0; i < btCount; i++, scanAddr++) {
+       for (i = btMin - 1; i < btCount; i++, scanAddr++) {
                ke = RB_ROOT(&kextAddressHead);
                while (ke) {
                        if (*scanAddr < ke->address) {
index 3e06210fb96aff53356da1fad2f5c14361478fa3..c82a927ee8563b9e5c18e586e683d4529895fe51 100644 (file)
@@ -61,10 +61,7 @@ bool IOSubMemoryDescriptor::initSubRange( IOMemoryDescriptor * parent,
                                        IOByteCount offset, IOByteCount length,
                                        IODirection direction )
 {
-    if( !parent)
-       return( false);
-
-    if( (offset + length) > parent->getLength())
+    if( parent && ((offset + length) > parent->getLength()))
        return( false);
 
     /*
@@ -83,10 +80,15 @@ bool IOSubMemoryDescriptor::initSubRange( IOMemoryDescriptor * parent,
         */
 
        _parent->release();
-       _parent = 0;
     }
 
-    parent->retain();
+    if (parent) {
+       parent->retain();
+       _tag    = parent->getTag();
+    }
+    else {
+        _tag    = 0;
+    }
     _parent    = parent;
     _start     = offset;
     _length    = length;
@@ -94,7 +96,6 @@ bool IOSubMemoryDescriptor::initSubRange( IOMemoryDescriptor * parent,
 #ifndef __LP64__
     _direction  = (IODirection) (_flags & kIOMemoryDirectionMask);
 #endif /* !__LP64__ */
-    _tag       = parent->getTag();
 
     return( true );
 }
@@ -188,6 +189,19 @@ IOMemoryMap * IOSubMemoryDescriptor::makeMapping(
 uint64_t
 IOSubMemoryDescriptor::getPreparationID( void )
 {
-    return (_parent->getPreparationID());    
+    uint64_t pID;
+
+    if (!super::getKernelReserved())
+        return (kIOPreparationIDUnsupported);    
+
+    pID = _parent->getPreparationID();
+    if (reserved->kernReserved[0] != pID)
+    {
+        reserved->kernReserved[0] = pID;
+        reserved->preparationID   = kIOPreparationIDUnprepared;
+        super::setPreparationID();
+    }
+
+    return (super::getPreparationID());    
 }
 
index 32ce10c0f80bfbc9e4ad4340283ce2d131f469a1..29fb9577f811769ebc5bbce05d1b0b47f921bfae 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -580,6 +580,9 @@ bool IOServiceUserNotification::init( mach_port_t port, natural_t type,
                                       void * reference, vm_size_t referenceSize,
                                       bool clientIs64 )
 {
+    if( !super::init())
+        return( false );
+
     newSet = OSArray::withCapacity( 1 );
     if( !newSet)
         return( false );
@@ -605,7 +608,7 @@ bool IOServiceUserNotification::init( mach_port_t port, natural_t type,
     pingMsg->notifyHeader.type = type;
     bcopy( reference, pingMsg->notifyHeader.reference, referenceSize );
 
-    return( super::init() );
+    return( true );
 }
 
 void IOServiceUserNotification::free( void )
@@ -622,8 +625,12 @@ void IOServiceUserNotification::free( void )
 
     super::free();
 
-    if( _pingMsg && _msgSize)
-        IOFree( _pingMsg, _msgSize);
+    if( _pingMsg && _msgSize) {
+               if (_pingMsg->msgHdr.msgh_remote_port) {
+                       iokit_release_port_send(_pingMsg->msgHdr.msgh_remote_port);
+               }
+        IOFree(_pingMsg, _msgSize);
+       }
 
     if( _lastEntry)
         _lastEntry->release();
@@ -715,6 +722,8 @@ bool IOServiceMessageUserNotification::init( mach_port_t port, natural_t type,
                                void * reference, vm_size_t referenceSize, vm_size_t extraSize,
                                bool client64 )
 {
+    if( !super::init())
+        return( false );
 
     if (referenceSize > sizeof(OSAsyncReference64))
         return( false );
@@ -749,7 +758,7 @@ bool IOServiceMessageUserNotification::init( mach_port_t port, natural_t type,
     pingMsg->notifyHeader.type                 = type;
     bcopy( reference, pingMsg->notifyHeader.reference, referenceSize );
 
-    return( super::init() );
+    return( true );
 }
 
 void IOServiceMessageUserNotification::free( void )
@@ -762,8 +771,12 @@ void IOServiceMessageUserNotification::free( void )
 
     super::free();
 
-    if( _pingMsg && _msgSize)
+    if( _pingMsg && _msgSize) {
+               if (_pingMsg->msgHdr.msgh_remote_port) {
+                       iokit_release_port_send(_pingMsg->msgHdr.msgh_remote_port);
+               }
         IOFree( _pingMsg, _msgSize);
+       }
 }
 
 IOReturn IOServiceMessageUserNotification::_handler( void * target, void * ref,
@@ -786,8 +799,8 @@ IOReturn IOServiceMessageUserNotification::handler( void * ref,
 
     if (kIOMessageCopyClientID == messageType)
     {
-       *((void **) messageArgument) = IOCopyLogNameForPID(owningPID);
-       return (kIOReturnSuccess);
+        *((void **) messageArgument) = OSNumber::withNumber(owningPID, 32);
+        return (kIOReturnSuccess);
     }
 
     data->messageType = messageType;
@@ -1619,6 +1632,60 @@ kern_return_t is_io_service_get_matching_services_ool(
     return( kr );
 }
 
+
+/* Routine io_service_get_matching_service */
+kern_return_t is_io_service_get_matching_service(
+       mach_port_t master_port,
+       io_string_t matching,
+       io_service_t *service )
+{
+    kern_return_t      kr;
+    OSObject *         obj;
+    OSDictionary *     dict;
+
+    if( master_port != master_device_port)
+        return( kIOReturnNotPrivileged);
+
+    obj = OSUnserializeXML( matching );
+
+    if( (dict = OSDynamicCast( OSDictionary, obj))) {
+        *service = IOService::copyMatchingService( dict );
+       kr = *service ? kIOReturnSuccess : kIOReturnNotFound;
+    } else
+       kr = kIOReturnBadArgument;
+
+    if( obj)
+        obj->release();
+
+    return( kr );
+}
+
+/* Routine io_service_get_matching_services_ool */
+kern_return_t is_io_service_get_matching_service_ool(
+       mach_port_t master_port,
+       io_buf_ptr_t matching,
+       mach_msg_type_number_t matchingCnt,
+       kern_return_t *result,
+       io_object_t *service )
+{
+    kern_return_t      kr;
+    vm_offset_t        data;
+    vm_map_offset_t    map_data;
+
+    kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) matching );
+    data = CAST_DOWN(vm_offset_t, map_data);
+
+    if( KERN_SUCCESS == kr) {
+        // must return success after vm_map_copyout() succeeds
+       *result = is_io_service_get_matching_service( master_port,
+                       (char *) data, service );
+       vm_deallocate( kernel_map, data, matchingCnt );
+    }
+
+    return( kr );
+}
+
+
 static kern_return_t internal_io_service_add_notification(
        mach_port_t master_port,
        io_name_t notification_type,
@@ -1667,6 +1734,7 @@ static kern_return_t internal_io_service_add_notification(
 
         if( userNotify && !userNotify->init( port, userMsgType,
                                              reference, referenceSize, client64)) {
+                       iokit_release_port_send(port);
             userNotify->release();
             userNotify = 0;
         }
@@ -1828,6 +1896,7 @@ static kern_return_t internal_io_service_add_interest_notification(
                                              reference, referenceSize,
                                             kIOUserNotifyMaxMessageSize,
                                             client64 )) {
+                       iokit_release_port_send(port);
             userNotify->release();
             userNotify = 0;
         }
@@ -3846,72 +3915,6 @@ kern_return_t shim_io_async_method_structureI_structureO(
     return( err);
 }
 
-/* Routine io_make_matching */
-kern_return_t is_io_make_matching(
-       mach_port_t         master_port,
-       uint32_t            type,
-       uint32_t                options,
-        io_struct_inband_t     input,
-        mach_msg_type_number_t inputCount,
-       io_string_t     matching )
-{
-    OSSerialize *      s;
-    IOReturn           err = kIOReturnSuccess;
-    OSDictionary *     dict;
-
-    if( master_port != master_device_port)
-        return( kIOReturnNotPrivileged);
-
-    switch( type) {
-
-       case kIOServiceMatching:
-            dict = IOService::serviceMatching( gIOServiceKey );
-           break;
-
-       case kIOBSDNameMatching:
-           dict = IOBSDNameMatching( (const char *) input );
-           break;
-
-       case kIOOFPathMatching:
-           dict = IOOFPathMatching( (const char *) input,
-                                    matching, sizeof( io_string_t));
-           break;
-
-       default:
-           dict = 0;
-    }
-
-    if( !dict)
-       return( kIOReturnUnsupported);
-
-    do {
-        s = OSSerialize::withCapacity(4096);
-        if( !s) {
-            err = kIOReturnNoMemory;
-           continue;
-       }
-        s->clearText();
-        if( !dict->serialize( s )) {
-            err = kIOReturnUnsupported;
-           continue;
-        }
-
-        if( s->getLength() > sizeof( io_string_t)) {
-            err = kIOReturnNoMemory;
-           continue;
-        } else
-            strlcpy(matching, s->text(), sizeof(io_string_t));
-    }
-    while( false);
-
-    if( s)
-       s->release();
-    if( dict)
-       dict->release();
-
-    return( err);
-}
-
 /* Routine io_catalog_send_data */
 kern_return_t is_io_catalog_send_data(
         mach_port_t            master_port,
index 51045a234cfe441eb52768a62bf6af452a3e72a9..d2c28b04391aa89c968d6a38801f766e2d89c4e1 100644 (file)
@@ -556,6 +556,8 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *)
 
     case mRemoveEvent:
         if (inEvent->getWorkLoop()) {
+               IOStatisticsDetachEventSource();
+               
                if (eventSourcePerformsWork(inEvent)) {
                                if (eventChain == inEvent)
                                        eventChain = inEvent->getNext();
@@ -595,7 +597,6 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *)
             inEvent->setNext(0);
             inEvent->release();
             SETP(&fFlags, kLoopRestart);
-            IOStatisticsDetachEventSource();
         }
         break;
 
index 92097acdef15cfb9790aa6f2b90c456084bbd165..75a26e7cc544f7af607e8142bc74bc5118e1f0e7 100644 (file)
@@ -36,6 +36,7 @@
 #include <IOKit/IOBufferMemoryDescriptor.h>
 #include "RootDomainUserClient.h"
 #include <IOKit/pwr_mgt/IOPMLibDefs.h>
+#include <IOKit/pwr_mgt/IOPMPrivate.h>
 
 #define super IOUserClient
 
@@ -311,6 +312,7 @@ IOReturn RootDomainUserClient::externalMethod(
             
         case kPMActivityTickle:
             fOwner->reportUserInput( );
+            fOwner->setProperty(kIOPMRootDomainWakeTypeKey, "UserActivity Assertion");
             ret = kIOReturnSuccess;
             break;
             
index feffd1a9e44f1fd86a4d060d54e07bd95fc28472..35ca86d8d2713d9781847a5525b7849e93e36864 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -155,83 +155,6 @@ OSDictionary * IOUUIDMatching( void )
     return IOService::resourceMatching( "boot-uuid-media" );
 }
 
-
-OSDictionary * IOCDMatching( void )
-{
-    OSDictionary *     dict;
-    const OSSymbol *   str;
-    
-    dict = IOService::serviceMatching( "IOMedia" );
-    if( dict == 0 ) {
-        IOLog("Unable to find IOMedia\n");
-        return 0;
-    }
-    
-    str = OSSymbol::withCString( "CD_ROM_Mode_1" );
-    if( str == 0 ) {
-        dict->release();
-        return 0;
-    }
-    
-    dict->setObject( "Content Hint", (OSObject *)str );
-    str->release();        
-    return( dict );
-}
-
-OSDictionary * IONetworkMatching(  const char * path,
-                                  char * buf, int maxLen )
-{
-    OSDictionary *     matching = 0;
-    OSDictionary *     dict;
-    OSString *         str;
-    char *             comp;
-    const char *       skip;
-    int                        len;
-
-    do {
-
-       len = strlen( kIODeviceTreePlane ":" );
-       maxLen -= len;
-       if( maxLen <= 0)
-           continue;
-
-       strlcpy( buf, kIODeviceTreePlane ":", len + 1 );
-       comp = buf + len;
-
-        // remove parameters following ':' from the path
-        skip = strchr( path, ':');
-       if( !skip)
-           continue;
-
-        len = skip - path;
-       maxLen -= len;
-       if( maxLen <= 0)
-           continue;
-       strlcpy( comp, path, len + 1 );
-
-       matching = IOService::serviceMatching( "IONetworkInterface" );
-       if( !matching)
-           continue;
-       dict = IOService::addLocation( matching );
-       if( !dict)
-           continue;
-
-       str = OSString::withCString( buf );
-       if( !str)
-           continue;
-        dict->setObject( kIOPathMatchKey, str );
-       str->release();
-
-       return( matching );
-
-    } while( false );
-
-    if( matching)
-        matching->release();
-
-    return( 0 );
-}
-
 OSDictionary * IONetworkNamePrefixMatching( const char * prefix )
 {
     OSDictionary *      matching;
@@ -339,107 +262,6 @@ static bool IORegisterNetworkInterface( IOService * netif )
        return ( netif->getProperty( kIOBSDNameKey ) != 0 );
 }
 
-OSDictionary * IODiskMatching( const char * path, char * buf, int maxLen )
-{
-    const char * look;
-    const char * alias;
-    char *       comp;
-    long         unit = -1;
-    long         partition = -1;
-    long                lun = -1;
-    char         c;
-    int          len;
-
-    // scan the tail of the path for "@unit:partition"
-    do {
-        // Have to get the full path to the controller - an alias may
-        // tell us next to nothing, like "hd:8"
-        alias = IORegistryEntry::dealiasPath( &path, gIODTPlane );
-               
-        look = path + strlen( path);
-        c = ':';
-        while( look != path) {
-            if( *(--look) == c) {
-                if( c == ':') {
-                    partition = strtol( look + 1, 0, 0 );
-                    c = '@';
-                } else if( c == '@') {
-                    unit = strtol( look + 1, &comp, 16 );
-
-                    if( *comp == ',') {
-                        lun = strtol( comp + 1, 0, 16 );
-                    }
-                    
-                    c = '/';
-                } else if( c == '/') {
-                    c = 0;
-                    break;
-                }
-            }
-
-               if( alias && (look == path)) {
-                path = alias;
-                look = path + strlen( path);
-                alias = 0;
-            }
-        }
-        if( c || unit == -1 || partition == -1)
-            continue;
-               
-        len = strlen( "{" kIOPathMatchKey "='" kIODeviceTreePlane ":" );
-        maxLen -= len;
-        if( maxLen <= 0)
-            continue;
-
-        snprintf( buf, len + 1, "{" kIOPathMatchKey "='" kIODeviceTreePlane ":" );
-        comp = buf + len;
-
-        if( alias) {
-            len = strlen( alias );
-            maxLen -= len;
-            if( maxLen <= 0)
-                continue;
-
-            strlcpy( comp, alias, len + 1 );
-            comp += len;
-        }
-
-        if ( (look - path)) {
-            len = (look - path);
-            maxLen -= len;
-            if( maxLen <= 0)
-                continue;
-
-            strlcpy( comp, path, len + 1 );
-            comp += len;
-        }
-                       
-        if ( lun != -1 )
-        {
-            len = strlen( "/@hhhhhhhh,hhhhhhhh:dddddddddd';}" );
-            maxLen -= len;
-            if( maxLen <= 0)
-                continue;
-
-            snprintf( comp, len + 1, "/@%lx,%lx:%ld';}", unit, lun, partition );
-        }
-        else
-        {
-            len = strlen( "/@hhhhhhhh:dddddddddd';}" );
-            maxLen -= len;
-            if( maxLen <= 0)
-                continue;
-
-            snprintf( comp, len + 1, "/@%lx:%ld';}", unit, partition );
-        }
-               
-        return( OSDynamicCast(OSDictionary, OSUnserialize( buf, 0 )) );
-
-    } while( false );
-
-    return( 0 );
-}
-
 OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen )
 {
     OSDictionary *     matching;
@@ -447,13 +269,6 @@ OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen )
     char *             comp;
     int                        len;
 
-    /* need to look up path, get device type,
-        call matching help based on device type */
-
-    matching = IODiskMatching( path, buf, maxLen );
-    if( matching)
-       return( matching );
-
     do {
 
        len = strlen( kIODeviceTreePlane ":" );
@@ -490,42 +305,6 @@ OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen )
     return( 0 );
 }
 
-IOService * IOFindMatchingChild( IOService * service )
-{
-    // find a matching child service
-    IOService * child = 0;
-    OSIterator * iter = service->getClientIterator();
-    if ( iter ) {
-        while( ( child = (IOService *) iter->getNextObject() ) ) {
-            OSDictionary * dict = OSDictionary::withCapacity( 1 );
-            if( dict == 0 ) {
-                iter->release();
-                return 0;
-            }
-            const OSSymbol * str = OSSymbol::withCString( "Apple_HFS" );
-            if( str == 0 ) {
-                dict->release();
-                iter->release();
-                return 0;
-            }
-            dict->setObject( "Content", (OSObject *)str );
-            str->release();
-            if ( child->compareProperty( dict, "Content" ) ) {
-                dict->release();
-                break;
-            }
-            dict->release();
-            IOService * subchild = IOFindMatchingChild( child );
-            if ( subchild ) {
-                child = subchild;
-                break;
-            }
-        }
-        iter->release();
-    }
-    return child;
-}
-
 static int didRam = 0;
 
 kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
@@ -538,18 +317,15 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
     OSString *         iostr;
     OSNumber *         off;
     OSData *           data = 0;
-    UInt32             *ramdParms = 0;
 
     UInt32             flags = 0;
     int                        mnr, mjr;
-    bool               findHFSChild = false;
     const char *        mediaProperty = 0;
     char *             rdBootVar;
     enum {             kMaxPathBuf = 512, kMaxBootVar = 128 };
     char *             str;
     const char *       look = 0;
     int                        len;
-    bool               forceNet = false;
     bool               debugInfoPrintedOnce = false;
     const char *       uuidStr = NULL;
 
@@ -599,34 +375,10 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
                    uuidStr = NULL;
                }
            }
-
-           // else try for an OF Path
-           data = (OSData *) regEntry->getProperty( "rootpath" );
            regEntry->release();
-           if( data) continue;
-       }
-        if( (regEntry = IORegistryEntry::fromPath( "/options", gIODTPlane ))) {
-           data = (OSData *) regEntry->getProperty( "boot-file" );
-           regEntry->release();
-           if( data) continue;
        }
     } while( false );
 
-    if( data && !uuidStr)
-        look = (const char *) data->getBytesNoCopy();
-
-    if( rdBootVar[0] == '*') {
-        look = rdBootVar + 1;
-               forceNet = false;
-    } else {
-        if( (regEntry = IORegistryEntry::fromPath( "/", gIODTPlane ))) {
-            forceNet = (0 != regEntry->getProperty( "net-boot" ));
-               regEntry->release();
-               }
-    }
-
-
-
 //
 //     See if we have a RAMDisk property in /chosen/memory-map.  If so, make it into a device.
 //     It will become /dev/mdx, where x is 0-f. 
@@ -637,7 +389,7 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
                if((regEntry = IORegistryEntry::fromPath( "/chosen/memory-map", gIODTPlane ))) {        /* Find the map node */
                        data = (OSData *)regEntry->getProperty("RAMDisk");      /* Find the ram disk, if there */
                        if(data) {                                                                                      /* We found one */
-
+                               UInt32          *ramdParms = 0;
                                ramdParms = (UInt32 *)data->getBytesNoCopy();   /* Point to the ram disk base and size */
                                (void)mdevadd(-1, ml_static_ptovirt(ramdParms[0]) >> 12, ramdParms[1] >> 12, 0);        /* Initialize it and pass back the device number */
                        }
@@ -676,19 +428,6 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
                }
        }
 
-    if( look) {
-       // from OpenFirmware path
-       IOLog("From path: \"%s\", ", look);
-
-        if (!matching) {
-            if( forceNet || (0 == strncmp( look, "enet", strlen( "enet" ))) ) {
-                matching = IONetworkMatching( look, str, kMaxPathBuf );
-            } else {
-                matching = IODiskMatching( look, str, kMaxPathBuf );
-            }
-        }
-    }
-    
       if( (!matching) && rdBootVar[0] ) {
        // by BSD name
        look = rdBootVar;
@@ -697,10 +436,7 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
     
        if ( strncmp( look, "en", strlen( "en" )) == 0 ) {
            matching = IONetworkNamePrefixMatching( "en" );
-       } else if ( strncmp( look, "cdrom", strlen( "cdrom" )) == 0 ) {
-            matching = IOCDMatching();
-            findHFSChild = true;
-        } else if ( strncmp( look, "uuid", strlen( "uuid" )) == 0 ) {
+       } else if ( strncmp( look, "uuid", strlen( "uuid" )) == 0 ) {
             char *uuid;
             OSString *uuidString;
 
@@ -772,25 +508,7 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
     } while( !service);
     matching->release();
 
-    if ( service && findHFSChild ) {
-        bool waiting = true;
-        uint64_t    timeoutNS;
-
-        // wait for children services to finish registering
-        while ( waiting ) {
-            timeoutNS = ROOTDEVICETIMEOUT;
-            timeoutNS *= kSecondScale;
-            
-            if ( (service->waitQuiet(timeoutNS) ) == kIOReturnSuccess) {
-                waiting = false;
-            } else {
-                IOLog( "Waiting for child registration\n" );
-            }
-        }
-        // look for a subservice with an Apple_HFS child
-        IOService * subservice = IOFindMatchingChild( service );
-        if ( subservice ) service = subservice;
-    } else if ( service && mediaProperty ) {
+    if ( service && mediaProperty ) {
         service = (IOService *)service->getProperty(mediaProperty);
     }
 
index f1d0f0648e4b337922135ed2306b81fcb19eae65..3eff425fed9c33322bcf7fb627626064f09b0af6 100644 (file)
@@ -62,9 +62,12 @@ options              KERNOBJC        # Objective-C implementation    # <kernobjc>
 options                IOKITCPP        # C++ implementation            # <iokitcpp>
 options                IOKITSTATS      # IOKit statistics              # <iokitstats>
 options                KDEBUG          # kernel tracing                # <kdebug>
+options                IST_KDEBUG      # limited tracing               # <ist_kdebug>
+options                NO_KDEBUG   # no kernel tracing         # <no_kdebug>
 options                NETWORKING      # kernel networking             # <networking>
 options                CRYPTO          # want crypto code              # <crypto>
 options                CONFIG_DTRACE   # enable dtrace                 # <config_dtrace>
+options                VM_PRESSURE_EVENTS                              # <vm_pressure_events>
 
 options                CONFIG_SLEEP    #                               # <config_sleep>
 
@@ -104,4 +107,5 @@ options   MACH_ASSERT                               # <mach_assert>
 #
 options                CONFIG_MACF                     # Mandatory Access Control Framework
 
-options   DEVELOPMENT                           # <development>
+options                DEVELOPMENT                     # <development>
+options                DEBUG                           # <debug>
index ab7ff3360d15cb97ef13145655d222fba4e2c03c..b752689214444813ae1aa21a61efe2ac2693604a 100644 (file)
@@ -3,7 +3,7 @@
 #  Standard Apple Mac OS Configurations:
 #  -------- ----- ------ ---------------
 #
-#  RELEASE     = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats ]
+#  RELEASE     = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats vm_pressure_events ]
 #  PROFILE     = [ RELEASE profile ]
 #  DEBUG       = [ RELEASE debug ]
 #
index 781ce8c7cd1ad17f0c9fc099caa58337ee47b6ea..b1fceabab921819a7e992bab0f080f1efeeaf0f7 100644 (file)
@@ -3,9 +3,9 @@
 #  Standard Apple Mac OS Configurations:
 #  -------- ----- ------ ---------------
 #
-#  RELEASE     = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats ]
+#  RELEASE     = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats vm_pressure_events ]
 #  PROFILE     = [ RELEASE profile ]
-#  DEBUG       = [ RELEASE debug ]
+#  DEBUG       = [ RELEASE debug mach_assert ]
 #
 #  EMBEDDED    = [ intel mach iokitcpp hibernation no_kextd bsmall crypto ]
 #  DEVELOPMENT = [ EMBEDDED development ]
index 7b37a4736eb8e521c9c84c13df639de97e82c027..868b1422bf3d6ac4f744f3d716b692d547aaae6c 100644 (file)
@@ -42,9 +42,11 @@ $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 
 do_all: $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile
        $(_v)next_source=$(subst conf/,,$(SOURCE));                     \
+       next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH));         \
        ${MAKE} -C $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)        \
                MAKEFILES=$(TARGET)/$(IOKIT_KERNEL_CONFIG)/Makefile     \
                SOURCE=$${next_source}                  \
+               RELATIVE_SOURCE_PATH=$${next_relsource}                 \
                TARGET=$(TARGET)                                        \
                INCL_MAKEDEP=FALSE      \
                KERNEL_CONFIG=$(IOKIT_KERNEL_CONFIG) \
index 8842b32d7ea7bcf7f4787896d6c0bc55652c9c57..df2fbb32378fec15d3d8750c0c2042a173ee4d8e 100644 (file)
@@ -8,6 +8,8 @@ UNCONFIGURED_HIB_FILES= \
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
+IOHibernateRestoreKernel.o_CFLAGS_ADD += -fno-stack-protector
+
 ######################################################################
 #END    Machine dependent Makefile fragment for i386
 ######################################################################
index 463de5a20bbd56f527f00c1fa37fb1c08f5d87cd..39d2cc065e5edef7dd1785a21ca151501da6336b 100644 (file)
@@ -8,6 +8,8 @@ UNCONFIGURED_HIB_FILES= \
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
+IOHibernateRestoreKernel.o_CFLAGS_ADD += -fno-stack-protector
+
 ######################################################################
 #END    Machine dependent Makefile fragment for x86_64
 ######################################################################
index 532732d3b0d2dc7556d16954bd3d5077d9b378d3..90fbc0098a6861684b9d787663a9123eb9f65ab9 100644 (file)
@@ -16,7 +16,6 @@ iokit/Kernel/IOHibernateRestoreKernel.c                       optional hibernation
 iokit/Kernel/IOLib.cpp                                 optional iokitcpp
 iokit/Kernel/IOLocks.cpp                               optional iokitcpp
 iokit/Kernel/IOConditionLock.cpp                       optional iokitcpp
-iokit/Kernel/IOSyncer.cpp                              optional iokitcpp
 
 #iokit/Kernel/IORuntime.cpp                            optional iokitcpp
 iokit/Kernel/IOStartIOKit.cpp                          optional iokitcpp
index 17c544f8698dc842741b35feccd30ce78b9df3f6..5f337b90ab30d090029ede24b75f371df7432139 100644 (file)
@@ -1,4 +1,8 @@
 
+# libIOKit
+
+iokit/Kernel/IOSyncer.cpp                              optional iokitcpp
+
 # Shared lock
 
 iokit/Kernel/i386/IOSharedLock.s                            standard
index 9d6ca13eede4628ba3956aa41cb0fcb9508540b4..457354b48c79b55cfb2170acd6374fbb5f73de7b 100644 (file)
@@ -1,4 +1,8 @@
 
+# libIOKit
+
+iokit/Kernel/IOSyncer.cpp                              optional iokitcpp
+
 # Shared lock
 
 iokit/Kernel/x86_64/IOSharedLock.s                            standard
index a2c6879f82ad2eaf238e1de2b40292de84613fcd..0f5dcbc2b024b06664f345f150c930228e8eb633 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -306,6 +306,9 @@ document kgm
 |     showmodctl        Display info about a dtrace modctl
 |     showfbtprobe      Display info about an fbt probe given an id (traverses fbt_probetab)
 |     processortimers   Display all processor timers, noting any inconsistencies
+|        
+|     maplocalcache     Enable local caching in GDB for improved debug speeds
+|     flushlocalcahe    Disable local caching in GDB (deletes all memory regions)
 |
 | Type "help <macro>" for more specific help on a particular macro.
 | Type "show user <macro>" to see what the macro is really doing.
@@ -666,17 +669,17 @@ define showactint
              printf "     "
           end
           set $diskpolicy = 0
-          if ($kgm_thread->ext_actionstate.hw_disk != 0)
-               set $diskpolicy = $kgm_thread->ext_actionstate.hw_disk
+          if ($kgm_thread->ext_appliedstate.hw_disk != 0)
+               set $diskpolicy = $kgm_thread->ext_appliedstate.hw_disk
           else 
-               if ($kgm_thread->actionstate.hw_disk != 0)
-                       set $diskpolicy = $kgm_thread->actionstate.hw_disk
+               if ($kgm_thread->appliedstate.hw_disk != 0)
+                       set $diskpolicy = $kgm_thread->appliedstate.hw_disk
                end
           end
-          if ($kgm_thread->ext_actionstate.hw_bg != 0)
+          if ($kgm_thread->ext_appliedstate.hw_bg != 0)
                set $diskpolicy = 5
           end
-          if ($kgm_thread->actionstate.hw_bg != 0)
+          if ($kgm_thread->appliedstate.hw_bg != 0)
                set $diskpolicy = 4
           end
           if ($diskpolicy == 2)
@@ -1533,13 +1536,13 @@ end
 define showipcheader
     printf "ipc_space "
     showptrhdrpad
-    printf "  is_table  "
+    printf "  is_task   "
     showptrhdrpad
-    printf "  table_next"
+    printf "  is_table  "
     showptrhdrpad
-    printf " flags ports  splaysize   "
+    printf " flags ports  table_next  "
     showptrhdrpad
-    printf "splaybase\n"
+    printf "   low_mod   high_mod\n"
 end
 
 define showipceheader
@@ -1622,29 +1625,25 @@ define showipcint
     set $kgm_is = *$kgm_isp
     showptr $arg0
     printf "  "
-    showptr $kgm_is.is_table
+    showptr $kgm_is.is_task
     printf "  "
-    showptr $kgm_is.is_table_next
+    showptr $kgm_is.is_table
     printf "  "
-    if $kgm_is.is_growing != 0
-        printf "G"
-    else
-        printf " "
-    end
-    if $kgm_is.is_fast != 0
-        printf "F"
+    if ($kgm_is.is_bits & 0x40000000) == 0
+        printf "A"
     else
         printf " "
     end
-    if $kgm_is.is_active != 0
-        printf "A  "
+    if ($kgm_is.is_bits & 0x20000000) != 0
+        printf "  "
     else
-        printf "   "
+        printf "    "
     end
-    printf "%5d  ", $kgm_is.is_table_size + $kgm_is.is_tree_total
-    showptr $kgm_is.is_tree_total
+    printf "%5d  ", $kgm_is.is_table_size 
+    showptr $kgm_is.is_table_next
     printf "  "
-    showptr &$kgm_isp->is_tree
+    printf "%10d ", $kgm_is.is_low_mod
+    printf "%10d", $kgm_is.is_high_mod
     printf "\n"
     if $arg1 != 0
         showipceheader
@@ -1666,9 +1665,6 @@ define showipcint
             set $kgm_iindex = $kgm_iindex + 1
             set $kgm_iep = &($kgm_is.is_table[$kgm_iindex])
         end
-        if $kgm_is.is_tree_total
-            printf "Still need to write tree traversal\n"
-        end
     end
     printf "\n"
 end
@@ -1956,19 +1952,22 @@ define showprocint
        end
        set $ptask = (struct task *)$kgm_procp->task
        set $diskpolicy = 0
-       if ($ptask->ext_actionstate.hw_disk != 0)
-               set $diskpolicy = $ptask->ext_actionstate.hw_disk
+       if ($ptask->ext_appliedstate.hw_disk != 0)
+               set $diskpolicy = $ptask->ext_appliedstate.hw_disk
        else 
-               if ($ptask->actionstate.hw_disk != 0)
-                       set $diskpolicy = $ptask->actionstate.hw_disk
+               if ($ptask->appliedstate.hw_disk != 0)
+                       set $diskpolicy = $ptask->appliedstate.hw_disk
                end
        end
-       if ($ptask->ext_actionstate.hw_bg != 0)
+       if ($ptask->ext_appliedstate.hw_bg != 0)
                set $diskpolicy = 5
        end
-       if ($ptask->actionstate.hw_bg != 0)
+       if ($ptask->appliedstate.hw_bg != 0)
                set $diskpolicy = 4
        end
+       if ($ptask->ext_appliedstate.apptype == 2)
+               set $diskpolicy = 6
+       end
        if ($diskpolicy == 2)
                printf "PASS    "
                set $kgm_printed = 1
@@ -1985,12 +1984,16 @@ define showprocint
                printf "EBG_THRT"
                set $kgm_printed = 1
        end
+       if ($diskpolicy == 6)
+               printf "APD_THRT"
+               set $kgm_printed = 1
+       end
        if ($kgm_printed == 0)
           printf "      "
        end
        set $kgm_wqp = (struct workqueue *)$kgm_procp->p_wqptr
        if $kgm_wqp != 0
-          printf "  %2d %2d %2d ", $kgm_wqp->wq_nthreads, $kgm_wqp->wq_thidlecount, $kgm_wqp->wq_itemcount
+          printf "  %2d %2d %2d ", $kgm_wqp->wq_nthreads, $kgm_wqp->wq_thidlecount, $kgm_wqp->wq_reqcount
        else
           printf "           "
        end
@@ -2450,8 +2453,10 @@ define zprint_one
     printf "%8x ",$kgm_zone->max_size
     printf "%8d ",$kgm_zone->elem_size
     printf "%8x ",$kgm_zone->alloc_size
-       printf " %16ld ",$kgm_zone->num_allocs
-       printf "%16ld ",$kgm_zone->num_frees
+    if ($kgm_mtype != $kgm_mtype_arm) 
+        printf " %16ld ",$kgm_zone->num_allocs 
+        printf "%16ld ",$kgm_zone->num_frees
+    end
     printf "%s ",$kgm_zone->zone_name
 
     if ($kgm_zone->exhaustible)
@@ -2705,52 +2710,54 @@ define switchtoctx
                set $pc=((struct savearea *) $arg0)->save_srr0
                update
        else
-       if ($kgm_mtype == $kgm_mtype_arm)
-               select 0
-               set $kdp_arm_act_counter = $kdp_arm_act_counter + 1
-               if ($kdp_arm_act_counter == 1)
-                       set $r0_save   = $r0
-                       set $r1_save   = $r1
-                       set $r2_save   = $r2
-                       set $r3_save   = $r3
-                       set $r4_save   = $r4
-                       set $r5_save   = $r5
-                       set $r6_save   = $r6
-                       set $r7_save   = $r7
-                       set $r8_save   = $r8
-                       set $r9_save   = $r9
-                       set $r10_save  = $r10
-                       set $r11_save  = $r11
-                       set $r12_save  = $r12
-                       set $sp_save   = $sp
-                       set $lr_save   = $lr
-                       set $pc_save   = $pc
-               end
-               set $kgm_statep = (struct arm_saved_state *)$arg0
-               set $r0 =  $kgm_statep->r[0]
-               set $r1 =  $kgm_statep->r[1]
-               set $r2 =  $kgm_statep->r[2]
-               set $r3 =  $kgm_statep->r[3]
-               set $r4 =  $kgm_statep->r[4]
-               set $r5 =  $kgm_statep->r[5]
-               set $r6 =  $kgm_statep->r[6]
-               set $r8 =  $kgm_statep->r[8]
-               set $r9 =  $kgm_statep->r[9]
-               set $r10 = $kgm_statep->r[10]
-               set $r11 = $kgm_statep->r[11]
-               set $r12 = $kgm_statep->r[12]
-               set $sp = $kgm_statep->sp
-               set $lr = $kgm_statep->lr
-               set $r7 =  $kgm_statep->r[7]
-               set $pc = $kgm_statep->pc
-               flushregs
-               flushstack
-               update
-       else
-               echo switchtoctx not implemented for this architecture.\n
+               if ($kgm_mtype == $kgm_mtype_arm)
+                       select 0
+                       set $kdp_arm_act_counter = $kdp_arm_act_counter + 1
+                       if ($kdp_arm_act_counter == 1)
+                               set $r0_save   = $r0
+                               set $r1_save   = $r1
+                               set $r2_save   = $r2
+                               set $r3_save   = $r3
+                               set $r4_save   = $r4
+                               set $r5_save   = $r5
+                               set $r6_save   = $r6
+                               set $r7_save   = $r7
+                               set $r8_save   = $r8
+                               set $r9_save   = $r9
+                               set $r10_save  = $r10
+                               set $r11_save  = $r11
+                               set $r12_save  = $r12
+                               set $sp_save   = $sp
+                               set $lr_save   = $lr
+                               set $pc_save   = $pc
+                       end
+                       set $kgm_statep = (struct arm_saved_state *)$arg0
+                       set $r0 =  $kgm_statep->r[0]
+                       set $r1 =  $kgm_statep->r[1]
+                       set $r2 =  $kgm_statep->r[2]
+                       set $r3 =  $kgm_statep->r[3]
+                       set $r4 =  $kgm_statep->r[4]
+                       set $r5 =  $kgm_statep->r[5]
+                       set $r6 =  $kgm_statep->r[6]
+                       set $r8 =  $kgm_statep->r[8]
+                       set $r9 =  $kgm_statep->r[9]
+                       set $r10 = $kgm_statep->r[10]
+                       set $r11 = $kgm_statep->r[11]
+                       set $r12 = $kgm_statep->r[12]
+                       set $sp = $kgm_statep->sp
+                       set $lr = $kgm_statep->lr
+                       set $r7 =  $kgm_statep->r[7]
+                       set $pc = $kgm_statep->pc
+                       flushregs
+                       flushstack
+                       update
+               else
+                       echo switchtoctx not implemented for this architecture.\n
+               end
        end
 end
 
+
 document switchtoctx  
 Syntax: switchtoctx <address of pcb>
 | This command allows gdb to examine an execution context and dump the
@@ -2896,8 +2903,8 @@ define dumpcallqueue
        set $kgm_i = 0
        while $kgm_callentry != $kgm_callhead
                set $kgm_call = (struct call_entry *)$kgm_callentry
-               printf "0x%08x ", $kgm_call
-               printf "0x%08x 0x%08x ", $kgm_call->param0, $kgm_call->param1
+               showptr $kgm_call
+               printf "0x%lx 0x%lx ", $kgm_call->param0, $kgm_call->param1
                output $kgm_call->deadline
                printf "\t"
                output $kgm_call->func
@@ -3151,6 +3158,12 @@ define showuserstack
                                showactint $kgm_threadp 0
                                set $kgm_thread_pmap = $kgm_threadp->task->map->pmap
                                set $kgm_thread_sp = $kgm_threadp.machine->PcbData.r[7]
+                               showptrhdrpad
+                               printf "                  "
+                               showptr 0
+                               printf "  "
+                               showptr $kgm_threadp.machine->PcbData.pc
+                               printf "\n"
                                set kdp_pmap = $kgm_thread_pmap
                                while ($kgm_thread_sp != 0)
                                set $link_register = *($kgm_thread_sp + 4)
@@ -3164,7 +3177,23 @@ define showuserstack
                                end
                                set kdp_pmap = $kgm_saved_pmap
                        else
-                               echo You must be connected via nanokdp to use this macro\n
+                               set $kgm_threadp = (struct thread *)$arg0
+                               showactheader
+                               showactint $kgm_threadp 0
+                               set $kgm_thread_sp = $kgm_threadp.machine->PcbData.r[7]
+                               while ($kgm_thread_sp != 0)
+                               _map_user_data_from_task $kgm_threadp->task $kgm_thread_sp 8
+                               set $kgm_thread_sp_window = (int *)$kgm_map_user_window
+                               set $link_register = *($kgm_thread_sp_window + 1)
+                               showptrhdrpad
+                                       printf "                  "
+                                       showptr $kgm_thread_sp
+                                       printf "  "
+                               showptr $link_register
+                               printf "\n"
+                               set $kgm_thread_sp = *$kgm_thread_sp_window
+                               _unmap_user_data_from_task
+                               end
                        end
                else
                        echo showuserstack not supported on this architecture\n
@@ -3316,7 +3345,7 @@ define showuserregisters
        else
        if ($kgm_mtype == $kgm_mtype_arm)
                printf "ARM Thread State:\n"
-               set $kgm_pcb = (arm_saved_state_t *) ($kgm_threadp->machine.upcb)
+               set $kgm_pcb = (arm_saved_state_t *) (&$kgm_threadp->machine.PcbData)
 
                printf "    r0: "
                showuserptr $kgm_pcb.r[0]
@@ -3398,7 +3427,7 @@ define kdp-reboot
 # Alternatively, set *(*(unsigned **) 0x2498) = 1 
 # (or 0x5498 on PPC, 0xffffff8000002928 on x86_64, 0xffff049c on arm)
        manualhdrint $kgm_kdp_pkt_hostreboot
-       continue
+       detach
 end
 
 document kdp-reboot
@@ -3531,9 +3560,9 @@ define getdumpinfo
        dumpinfoint KDP_DUMPINFO_GETINFO
        set $kgm_dumpinfo = (kdp_dumpinfo_reply_t *) manual_pkt.data
        if $kgm_dumpinfo->type & KDP_DUMPINFO_REBOOT
-                 printf "Sysem will reboot after kernel info gets dumped.\n"
+                 printf "System will reboot after kernel info gets dumped.\n"
        else
-                 printf "Sysem will not reboot after kernel info gets dumped.\n"
+                 printf "System will not reboot after kernel info gets dumped.\n"
        end
        if $kgm_dumpinfo->type & KDP_DUMPINFO_NORESUME
                  printf "System will allow a re-attach after a KDP disconnect.\n"
@@ -4759,7 +4788,7 @@ define readphysint
         # No KDP. Attempt to use physical memory mapping
 
         if ($kgm_mtype == $kgm_mtype_x86_64)
-            set $kgm_readphys_paddr_in_kva = (unsigned long long)$arg0 + (((unsigned long long)-1 << 47) | ((unsigned long long)509 << 39))
+            set $kgm_readphys_paddr_in_kva = (unsigned long long)$arg0 + physmap_base
         else
             if ($kgm_mtype == $kgm_mtype_arm)
                 set $kgm_readphys_paddr_in_kva = (unsigned long long)$arg0 - gPhysBase + gVirtBase
@@ -4900,16 +4929,30 @@ document writephys64
 end
 
 define addkextsyms
-       shell echo cd `pwd` > /tmp/gdb-cd
-       cd $arg0
-       source kcbmacros
-       source /tmp/gdb-cd
-       set $kgm_show_kmod_syms = 1
+       if ($argc <= 1)
+               if ($argc == 0)
+                       printf "Adding kext symbols from in-kernel summary data.\n"
+                       add-all-kexts
+               else
+                       printf "Adding kext symbols from $arg0.\n"
+                       shell echo cd `pwd` > /tmp/gdb-cd
+                       cd $arg0
+                       source kcbmacros
+                       source /tmp/gdb-cd
+               end
+               set $kgm_show_kmod_syms = 1
+       else
+               printf "| Usage:\n|\n"
+               help addkextsyms
+       end
 end
 
 document addkextsyms
-| Takes a directory of symbols for kexts generated with kextcache -y and loads them
-| into gdb.
+| If specified without an argument, uses gdb's add-all-kexts command to load
+| kext symbols. Otherwise, takes a directory of kext symbols generated with
+| kextcache -y or kcgen and loads them into gdb.
+| (gdb) addkextsyms
+| - or -
 | (gdb) addkextsyms /path/to/symboldir
 end
 
@@ -6486,6 +6529,9 @@ set $RTF_BROADCAST   = 0x400000
 set $RTF_MULTICAST   = 0x800000
 set $RTF_IFSCOPE     = 0x1000000
 set $RTF_CONDEMNED   = 0x2000000
+set $RTF_IFREF       = 0x4000000
+set $RTF_PROXY       = 0x8000000
+set $RTF_ROUTER      = 0x10000000
 
 set $AF_INET = 2
 set $AF_INET6 = 30
@@ -6610,6 +6656,18 @@ define rtentry_prdetails
        if $rt->rt_flags & $RTF_IFSCOPE
                printf "I"
        end
+       if $rt->rt_flags & $RTF_CONDEMNED
+               printf "Z"
+       end
+       if $rt->rt_flags & $RTF_IFREF
+               printf "i"
+       end
+       if $rt->rt_flags & $RTF_PROXY
+               printf "Y"
+       end
+       if $rt->rt_flags & $RTF_ROUTER
+               printf "r"
+       end
 
        printf "/%s%d", $rt->rt_ifp->if_name, $rt->rt_ifp->if_unit
 end
@@ -8312,7 +8370,7 @@ set $INP_ANONPORT=0x40
 set $INP_RECVIF=0x80
 set $INP_MTUDISC=0x100
 set $INP_STRIPHDR=0x200
-set $INP_FAITH=0x400
+set $INP_RECV_ANYIF=0x400
 set $INP_INADDR_ANY=0x800
 set $INP_RECVTTL=0x1000
 set $INP_UDP_NOCKSUM=0x2000
@@ -8416,8 +8474,8 @@ define _dump_inpcb
        if ($pcb->inp_flags & $INP_STRIPHDR)
                printf "striphdr "
        end
-       if ($pcb->inp_flags & $INP_FAITH)
-               printf "faith "
+       if ($pcb->inp_flags & $INP_RECV_ANYIF)
+               printf "recv_anyif "
        end
        if ($pcb->inp_flags & $INP_INADDR_ANY)
                printf "inaddr_any "
@@ -9635,9 +9693,303 @@ define _pmap_walk_x86
     _pml4_walk $kgm_pmap->pm_cr3 $arg1
 end
 
+define _pmap_walk_arm_level1_section
+    set $kgm_tte_p = $arg0
+    set $kgm_tte = *$kgm_tte_p
+    set $kgm_vaddr = $arg1
+
+       # Supersection or just section?
+    if (($kgm_tte & 0x00040000) == 0x00040000)
+        set $kgm_paddr = ($kgm_tte & 0xFF000000) | ($kgm_vaddr & 0x00FFFFFF)
+        set $kgm_paddr_isvalid = 1
+    else
+        set $kgm_paddr = ($kgm_tte & 0xFFF00000) | ($kgm_vaddr & 0x000FFFFF)
+        set $kgm_paddr_isvalid = 1
+    end
+
+    if $kgm_pt_verbose >= 2
+        printf "0x%08x\n\t0x%08x\n\t", (unsigned long)$kgm_tte_p, $kgm_tte
+
+        # bit [1:0] evaluated in _pmap_walk_arm
+
+        # B bit 2
+        set $kgm_b_bit = (($kgm_tte & 0x00000004) >> 2)
+
+        # C bit 3
+        set $kgm_c_bit = (($kgm_tte & 0x00000008) >> 3)
+
+        # XN bit 4
+        if ($kgm_tte & 0x00000010)
+            printf "no-execute" 
+        else
+            printf "execute" 
+        end
+
+        # Domain bit [8:5] if not supersection
+        if (($kgm_tte & 0x00040000) == 0x00000000)
+            printf " domain(%d)", (($kgm_tte & 0x000001e0) >> 5)
+        end
+
+        # IMP bit 9
+        printf " imp(%d)", (($kgm_tte & 0x00000200) >> 9) 
+
+        # AP bit 15 and [11:10], merged to a single 3-bit value
+        set $kgm_access = (($kgm_tte & 0x00000c00) >> 10) | (($kgm_tte & 0x00008000) >> 13)
+        if ($kgm_access == 0x0)
+            printf " noaccess"
+        end
+        if ($kgm_access == 0x1)
+            printf " supervisor(readwrite) user(noaccess)"
+        end
+        if ($kgm_access == 0x2)
+            printf " supervisor(readwrite) user(readonly)"
+        end
+        if ($kgm_access == 0x3)
+            printf " supervisor(readwrite) user(readwrite)"
+        end
+        if ($kgm_access == 0x4)
+            printf " noaccess(reserved)"
+        end
+        if ($kgm_access == 0x5)
+            printf " supervisor(readonly) user(noaccess)"
+        end
+        if ($kgm_access == 0x6)
+            printf " supervisor(readonly) user(readonly)"
+        end
+        if ($kgm_access == 0x7)
+            printf " supervisor(readonly) user(readonly)"
+        end
+
+        # TEX bit [14:12]
+        set $kgm_tex_bits = (($kgm_tte & 0x00007000) >> 12)
+
+        # Print TEX, C, B all together
+        printf " TEX:C:B(%d%d%d:%d:%d)", ($kgm_tex_bits & 0x4 ? 1 : 0), ($kgm_tex_bits & 0x2 ? 1 : 0), ($kgm_tex_bits & 0x1 ? 1 : 0), $kgm_c_bit, $kgm_b_bit
+
+        # S bit 16
+        if ($kgm_tte & 0x00010000)
+            printf " shareable" 
+        else
+            printf " not-shareable" 
+        end
+
+        # nG bit 17
+        if ($kgm_tte & 0x00020000)
+            printf " not-global"
+        else
+            printf " global" 
+        end
+
+        # Supersection bit 18
+        if ($kgm_tte & 0x00040000)
+            printf " supersection"
+        else
+            printf " section" 
+        end
+
+        # NS bit 19
+        if ($kgm_tte & 0x00080000)
+            printf " no-secure"
+        else
+            printf " secure" 
+        end
+
+        printf "\n"
+    end
+end
+
+define _pmap_walk_arm_level2
+    set $kgm_tte_p = $arg0
+    set $kgm_tte = *$kgm_tte_p
+    set $kgm_vaddr = $arg1
+
+    set $kgm_pte_pbase = (($kgm_tte & 0xFFFFFC00) - gPhysBase + gVirtBase)
+    set $kgm_pte_index = ($kgm_vaddr >> 12) & 0x000000FF
+    set $kgm_pte_p = &((pt_entry_t *)$kgm_pte_pbase)[$kgm_pte_index]
+    set $kgm_pte = *$kgm_pte_p
+
+    # Print first level symbolically
+    if $kgm_pt_verbose >= 2
+        printf "0x%08x\n\t0x%08x\n\t", (unsigned long)$kgm_tte_p, $kgm_tte
+
+        # bit [1:0] evaluated in _pmap_walk_arm
+
+        # NS bit 3
+        if ($kgm_tte & 0x00000008)
+            printf "no-secure"
+        else
+            printf "secure" 
+        end
+
+        # Domain bit [8:5]
+        printf " domain(%d)", (($kgm_tte & 0x000001e0) >> 5)
+
+        # IMP bit 9
+        printf " imp(%d)", (($kgm_tte & 0x00000200) >> 9) 
+
+        printf "\n"
+    end
+
+    if $kgm_pt_verbose >= 2
+        printf "second-level table (index %d):\n", $kgm_pte_index
+    end
+    if $kgm_pt_verbose >= 3
+        set $kgm_pte_loop = 0
+        while $kgm_pte_loop < 256
+            set $kgm_pte_p_tmp = &((pt_entry_t *)$kgm_pte_pbase)[$kgm_pte_loop]
+            printf "0x%08x:\t0x%08x\n", (unsigned long)$kgm_pte_p_tmp, *$kgm_pte_p_tmp
+            set $kgm_pte_loop = $kgm_pte_loop + 1
+        end
+    end
+
+    if ($kgm_pte & 0x00000003)
+        set $kgm_pve_p = (pv_entry_t *)($kgm_pte_pbase + 0x100*sizeof(pt_entry_t) + $kgm_pte_index*sizeof(pv_entry_t))
+        if ($kgm_pve_p->shadow != 0)
+            set $kgm_spte = $kgm_pve_p->shadow ^ ($kgm_vaddr & ~0xFFF)
+            set $kgm_paddr = ($kgm_spte & 0xFFFFF000) | ($kgm_vaddr & 0xFFF)
+            set $kgm_paddr_isvalid = 1
+        else
+            set $kgm_paddr = (*$kgm_pte_p & 0xFFFFF000) | ($kgm_vaddr & 0xFFF)
+            set $kgm_paddr_isvalid = 1
+        end
+    else
+        set $kgm_paddr = 0
+        set $kgm_paddr_isvalid = 0
+    end
+
+    if $kgm_pt_verbose >= 2
+        printf "0x%08x\n\t0x%08x\n\t", (unsigned long)$kgm_pte_p, $kgm_pte
+        if (($kgm_pte & 0x00000003) == 0x00000000)
+            printf "invalid" 
+        else
+            if (($kgm_pte & 0x00000003) == 0x00000001)
+                printf "large"
+
+                # XN bit 15
+                if ($kgm_pte & 0x00008000) == 0x00008000
+                    printf " no-execute"
+                else
+                    printf " execute"
+                end
+            else
+                printf "small"
+
+                # XN bit 0
+                if ($kgm_pte & 0x00000001) == 0x00000001
+                    printf " no-execute"
+                else
+                    printf " execute"
+                end
+            end
+
+            # B bit 2
+            set $kgm_b_bit = (($kgm_pte & 0x00000004) >> 2)
+
+            # C bit 3
+            set $kgm_c_bit = (($kgm_pte & 0x00000008) >> 3)
+
+            # AP bit 9 and [5:4], merged to a single 3-bit value
+            set $kgm_access = (($kgm_pte & 0x00000030) >> 4) | (($kgm_pte & 0x00000200) >> 7)
+            if ($kgm_access == 0x0)
+                printf " noaccess"
+            end
+            if ($kgm_access == 0x1)
+                printf " supervisor(readwrite) user(noaccess)"
+            end
+            if ($kgm_access == 0x2)
+                printf " supervisor(readwrite) user(readonly)"
+            end
+            if ($kgm_access == 0x3)
+                printf " supervisor(readwrite) user(readwrite)"
+            end
+            if ($kgm_access == 0x4)
+                printf " noaccess(reserved)"
+            end
+            if ($kgm_access == 0x5)
+                printf " supervisor(readonly) user(noaccess)"
+            end
+            if ($kgm_access == 0x6)
+                printf " supervisor(readonly) user(readonly)"
+            end
+            if ($kgm_access == 0x7)
+                printf " supervisor(readonly) user(readonly)"
+            end
+
+            # TEX bit [14:12] for large, [8:6] for small
+            if (($kgm_pte & 0x00000003) == 0x00000001)
+                set $kgm_tex_bits = (($kgm_pte & 0x00007000) >> 12)
+            else
+                set $kgm_tex_bits = (($kgm_pte & 0x000001c0) >> 6)
+            end
+
+            # Print TEX, C, B all together
+            printf " TEX:C:B(%d%d%d:%d:%d)", ($kgm_tex_bits & 0x4 ? 1 : 0), ($kgm_tex_bits & 0x2 ? 1 : 0), ($kgm_tex_bits & 0x1 ? 1 : 0), $kgm_c_bit, $kgm_b_bit
+
+            # S bit 10
+            if ($kgm_pte & 0x00000400)
+                printf " shareable" 
+            else
+                printf " not-shareable" 
+            end
+
+            # nG bit 11
+            if ($kgm_pte & 0x00000800)
+                printf " not-global"
+            else
+                printf " global" 
+            end
+
+        end
+        printf "\n"
+    end
+end
+
+# See ARM ARM Section B3.3
 define _pmap_walk_arm
+    set $kgm_pmap = (pmap_t) $arg0
+    set $kgm_vaddr = $arg1
     set $kgm_paddr = 0
     set $kgm_paddr_isvalid = 0
+
+    # Shift by TTESHIFT (20) to get tte index
+    set $kgm_tte_index = (($kgm_vaddr - $kgm_pmap->min) >> 20)
+    set $kgm_tte_p = &$kgm_pmap->tte[$kgm_tte_index]
+    set $kgm_tte = *$kgm_tte_p
+    if $kgm_pt_verbose >= 2
+        printf "first-level table (index %d):\n", $kgm_tte_index
+    end
+    if $kgm_pt_verbose >= 3
+        set $kgm_tte_loop = 0
+        while $kgm_tte_loop < 4096
+            set $kgm_tte_p_tmp = &$kgm_pmap->tte[$kgm_tte_loop]
+            printf "0x%08x:\t0x%08x\n", (unsigned long)$kgm_tte_p_tmp, *$kgm_tte_p_tmp
+            set $kgm_tte_loop = $kgm_tte_loop + 1
+        end
+    end
+
+    if (($kgm_tte & 0x00000003) == 0x00000001)
+        _pmap_walk_arm_level2 $kgm_tte_p $kgm_vaddr
+    else
+        if (($kgm_tte & 0x00000003) == 0x00000002)
+            _pmap_walk_arm_level1_section $kgm_tte_p $kgm_vaddr
+        else
+            set $kgm_paddr = 0
+            set $kgm_paddr_isvalid = 0
+            if $kgm_pt_verbose >= 2
+                printf "Invalid First-Level Translation Table Entry: 0x%08x\n", $kgm_tte
+            end
+        end
+    end
+
+    if $kgm_pt_verbose >= 1
+        if $kgm_paddr_isvalid
+            readphysint $kgm_paddr 32 $kgm_lcpu_self
+            set $kgm_value = $kgm_readphysint_result
+            printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value
+        else
+            printf "(no translation)\n"
+        end
+    end
 end
 
 define pmap_walk
@@ -9718,7 +10070,6 @@ define zstack
                else
                        printf "FREE  "
                end
-
                showptr zrecords[$index].z_element
                printf " : index %d  :  ztime %d -------------\n", $index, zrecords[$index].z_time
 
@@ -9844,7 +10195,7 @@ define findelem
                        zstack $fe_index
 
                        if (zrecords[$fe_index].z_opcode == $fe_prev_op)
-                               printf "***************   DOUBLE OP!   *********************\n
+                               printf "***************   DOUBLE OP!   *********************\n"
                        end
 
                        set $fe_prev_op = zrecords[$fe_index].z_opcode
@@ -9964,12 +10315,12 @@ define _map_user_data_from_task
             set $kgm_pt_verbose = 0
             _pmap_walk_x86 $kgm_map_user_pmap $kgm_vaddr_range1_start
             if $kgm_paddr_isvalid
-                set $kgm_paddr_range1_in_kva = $kgm_paddr + (((unsigned long long)-1 << 47) | ((unsigned long long)509 << 39))
+                set $kgm_paddr_range1_in_kva = $kgm_paddr + physmap_base
             end
             if $kgm_vaddr_range2_start
                 _pmap_walk_x86 $kgm_map_user_pmap $kgm_vaddr_range2_start
                 if $kgm_paddr_isvalid
-                    set $kgm_paddr_range2_in_kva = $kgm_paddr + (((unsigned long long)-1 << 47) | ((unsigned long long)509 << 39))
+                    set $kgm_paddr_range2_in_kva = $kgm_paddr + physmap_base
                 end
             end
         else
@@ -10185,14 +10536,19 @@ define _print_images_for_dyld_image_info
         set $kgm_image_info_size = 24
         set $kgm_image_info_array_address = ((unsigned long long *)$kgm_dyld_all_image_infos)[1]
         set $kgm_dyld_load_address = ((unsigned long long *)$kgm_dyld_all_image_infos)[4]
+        set $kgm_dyld_all_image_infos_address_from_struct = ((unsigned long long *)$kgm_dyld_all_image_infos)[13]
     else
         set $kgm_image_info_size = 12
         set $kgm_image_info_array_address = ((unsigned int *)$kgm_dyld_all_image_infos)[2]
         set $kgm_dyld_load_address = ((unsigned int *)$kgm_dyld_all_image_infos)[5]
+        set $kgm_dyld_all_image_infos_address_from_struct = ((unsigned int *)$kgm_dyld_all_image_infos)[14]
     end
 
     _unmap_user_data_from_task $kgm_taskp
 
+    # Account for ASLR slide before dyld can fix the structure
+    set $kgm_dyld_load_address = $kgm_dyld_load_address + ($kgm_dyld_all_image_infos_address - $kgm_dyld_all_image_infos_address_from_struct)
+
     set $kgm_image_info_i = 0
     while $kgm_image_info_i < $kgm_image_info_count
 
@@ -10301,6 +10657,9 @@ define showuserdyldinfo
 
           _unmap_user_data_from_task $kgm_taskp
 
+          set $kgm_dyld_all_imfo_infos_slide = ( $kgm_dyld_all_image_infos_address - $kgm_dyld_all_image_infos_dyldAllImageInfosAddress )
+          set $kgm_dyld_all_image_infos_dyldVersion_postslide = ( $kgm_dyld_all_image_infos_dyldVersion + $kgm_dyld_all_imfo_infos_slide )
+
           printf "                        version %u\n", $kgm_dyld_all_image_infos_version
           printf "                 infoArrayCount %u\n", $kgm_dyld_all_image_infos_infoArrayCount
           printf "                      infoArray "
@@ -10321,7 +10680,12 @@ define showuserdyldinfo
           showuserptr $kgm_dyld_all_image_infos_dyldVersion
           printf "\n"
           printf "                                "
-          _print_path_for_image $kgm_dyld_all_image_infos_dyldVersion
+          _print_path_for_image $kgm_dyld_all_image_infos_dyldVersion_postslide
+          if ($kgm_dyld_all_imfo_infos_slide != 0)
+                 printf " (currently "
+                 showuserptr $kgm_dyld_all_image_infos_dyldVersion_postslide
+                 printf ")"
+          end
           printf "\n"
 
           printf "                   errorMessage "
@@ -10726,8 +11090,8 @@ define showkerneldebugbuffercpu
        set $kgm_cpu_number = (int) $arg0
        set $kgm_entry_count = (int) $arg1
        set $kgm_debugentriesfound = 0
-       
-       if (kdebug_flags & 0x80000000)  # 0x80000000 == KDBG_BFINIT
+       # 0x80000000 == KDBG_BFINIT
+       if (kd_ctrl_page.kdebug_flags & 0x80000000)     
                showkerneldebugheader
                
                if $kgm_entry_count == 0
@@ -10740,16 +11104,17 @@ define showkerneldebugbuffercpu
                else
                        set $kgm_kdbp = &kdbip[$kgm_cpu_number]
                        set $kgm_kdsp = $kgm_kdbp->kd_list_head
-                       while (($kgm_kdsp != 0) && ($kgm_entry_count > 0))
-                               if $kgm_kdsp->kds_readlast != $kgm_kdsp->kds_bufptr
-                                       set $kgm_kds_bufptr = $kgm_kdsp->kds_bufptr
-                                       while (($kgm_kds_bufptr > $kgm_kdsp->kds_readlast) && ($kgm_entry_count > 0))
+                       while (($kgm_kdsp.raw != 0) && ($kgm_entry_count > 0))
+                               set $kgm_kdsp_actual = &kd_bufs[$kgm_kdsp.buffer_index].kdsb_addr[$kgm_kdsp.offset]
+                               if $kgm_kdsp_actual->kds_readlast != $kgm_kdsp_actual->kds_bufindx
+                                       set $kgm_kds_bufptr = &$kgm_kdsp_actual->kds_records[$kgm_kdsp_actual->kds_bufindx]
+                                       while (($kgm_kds_bufptr > &$kgm_kdsp_actual->kds_records[$kgm_kdsp_actual->kds_readlast]) && ($kgm_entry_count > 0))
                                                set $kgm_kds_bufptr = $kgm_kds_bufptr - 1
                                                set $kgm_entry_count = $kgm_entry_count - 1
                                                showkerneldebugbufferentry $kgm_kds_bufptr
                                        end
                                end
-                               set $kgm_kdsp = $kgm_kdsp->kds_next
+                               set $kgm_kdsp = $kgm_kdsp_actual->kds_next
                        end
                end
        else
@@ -10763,8 +11128,8 @@ Syntax:  showkerneldebugbuffercpu <cpu> <count>
 end
 
 define showkerneldebugbuffer
-       
-       if (kdebug_flags & 0x80000000)  # 0x80000000 == KDBG_BFINIT
+       # 0x80000000 == KDBG_BFINIT
+       if (kd_ctrl_page.kdebug_flags & 0x80000000)     
        
                set $kgm_entrycount = (int) $arg0
        
@@ -13279,4 +13644,27 @@ Syntax: (gdb) processortimers
 | Print details of processor timers, noting any timer which might be suspicious
 end
 
+define maplocalcache
+       if ($kgm_mtype == $kgm_mtype_arm)
+               mem 0x80000000 0xefffffff cache
+               set dcache-linesize-power 9
+               printf "GDB memory caching enabled. Be sure to disable by calling flushlocalcache before detaching or connecting to a new device\n"
+       end
+end
+
+document maplocalcache
+Syntax: (gdb) maplocalcache 
+| Sets up memory regions for GDB to cache on read. Significantly increases debug speed over KDP
+end
 
+define flushlocalcache
+       if ($kgm_mtype == $kgm_mtype_arm)
+               delete mem
+               printf "GDB memory caching disabled.\n"
+       end
+end
+
+document flushlocalcache
+Syntax: (gdb) flushlocalcache 
+| Clears all memory regions
+end
index ff3bbec5ff4822273464578f2bad681e10972283..67e6f4c99b4a13eb29225cd7c725db570f222661 100644 (file)
@@ -22,12 +22,12 @@ EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS}
 
 SETUP_SUBDIRS =
 
-COMP_SUBDIRS_I386 = conf kmod
-COMP_SUBDIRS_X86_64 = conf kmod
-COMP_SUBDIRS_ARM = conf kmod
+COMP_SUBDIRS_I386 = conf
+COMP_SUBDIRS_X86_64 = conf
+COMP_SUBDIRS_ARM = conf
 
 
-INST_SUBDIRS = kmod
+INST_SUBDIRS = 
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index c782a830fd4e7454476829ba782bcbc4755afe53..73c216fcf00648a09ca0b11bedacb9e3cbb2da0d 100644 (file)
@@ -351,8 +351,8 @@ finish:
 /*********************************************************************
 * Gets the vm_map for the current kext
 *********************************************************************/
-extern vm_offset_t sectPRELINKB;
-extern int sectSizePRELINK;
+extern vm_offset_t segPRELINKB;
+extern unsigned long segSizePRELINK;
 extern int kth_started;
 extern vm_map_t g_kext_map;
 
@@ -362,8 +362,8 @@ kext_get_vm_map(kmod_info_t *info)
     vm_map_t kext_map = NULL;
 
     /* Set the vm map */
-    if ((info->address >= sectPRELINKB) && 
-            (info->address < (sectPRELINKB + sectSizePRELINK)))
+    if ((info->address >= segPRELINKB) && 
+            (info->address < (segPRELINKB + segSizePRELINK)))
     {
         kext_map = kernel_map;
     } else {
index b7cd6852c5850dc3771bcc8f78ebcc323773f787..61b4342d2326d4cdc6a2a058be395b7021f7aa3d 100644 (file)
@@ -56,6 +56,12 @@ extern int debug_container_malloc_size;
 #define ACCUMSIZE(s)
 #endif
 
+struct OSData::ExpansionData
+{
+    DeallocFunction deallocFunction;
+    bool            disableSerialization;
+};
+
 bool OSData::initWithCapacity(unsigned int inCapacity)
 {
     if (!super::init())
@@ -191,11 +197,12 @@ void OSData::free()
         kfree(data, capacity);
         ACCUMSIZE( -capacity );
     } else if (capacity == EXTERNAL) {
-           DeallocFunction freemem = (DeallocFunction)reserved;
-               if (freemem && data && length) {
-                       freemem(data, length);
-               }
+       DeallocFunction freemem = reserved ? reserved->deallocFunction : NULL;
+       if (freemem && data && length) {
+               freemem(data, length);
        }
+    }
+    if (reserved) kfree(reserved, sizeof(ExpansionData));
     super::free();
 }
 
@@ -388,12 +395,16 @@ bool OSData::serialize(OSSerialize *s) const
     unsigned int i;
     const unsigned char *p;
     unsigned char c;
+    unsigned int serializeLength;
 
     if (s->previouslySerialized(this)) return true;
 
     if (!s->addXMLStartTag(this, "data")) return false;
 
-    for (i = 0, p = (unsigned char *)data; i < length; i++, p++) {
+    serializeLength = length;
+    if (reserved && reserved->disableSerialization) serializeLength = 0;
+
+    for (i = 0, p = (unsigned char *)data; i < serializeLength; i++, p++) {
         /* 3 bytes are encoded as 4 */
         switch (i % 3) {
        case 0:
@@ -431,11 +442,24 @@ bool OSData::serialize(OSSerialize *s) const
     return s->addXMLEndTag("data");
 }
 
-/* Note I am just using the reserved pointer here instead of allocating a whole buffer
- * to hold one pointer.
- */
 void OSData::setDeallocFunction(DeallocFunction func)
 {
-    reserved = (ExpansionData *)func;
-       return;
+    if (!reserved)
+    {
+       reserved = (typeof(reserved)) kalloc(sizeof(ExpansionData));
+       if (!reserved) return;
+       bzero(reserved, sizeof(ExpansionData));
+    }
+    reserved->deallocFunction = func;
+}
+
+void OSData::setSerializable(bool serializable)
+{
+    if (!reserved)
+    {
+       reserved = (typeof(reserved)) kalloc(sizeof(ExpansionData));
+       if (!reserved) return;
+       bzero(reserved, sizeof(ExpansionData));
+    }
+    reserved->disableSerialization = (!serializable);
 }
index eaa1483df87685e5038338ee80b21ef49dc1334d..7329f3a4ec82a16595a8c2e6e75633d7f92233f6 100644 (file)
@@ -68,6 +68,8 @@ bool OSDictionary::initWithCapacity(unsigned int inCapacity)
 
     int size = inCapacity * sizeof(dictEntry);
 
+//fOptions |= kSort;
+
     dictionary = (dictEntry *) kalloc(size);
     if (!dictionary)
         return false;
@@ -170,6 +172,15 @@ bool OSDictionary::initWithDictionary(const OSDictionary *dict,
     if (!initWithCapacity(newCapacity))
         return false;
 
+    if ((kSort & fOptions) && !(kSort & dict->fOptions)) {
+       for (unsigned int i = 0; i < dict->count; i++) {
+           if (!setObject(dict->dictionary[i].key, dict->dictionary[i].value)) {
+               return false;
+           }
+       }
+       return true;
+    }
+
     count = dict->count;
     bcopy(dict->dictionary, dictionary, count * sizeof(dictEntry));
     for (unsigned int i = 0; i < count; i++) {
@@ -306,34 +317,45 @@ void OSDictionary::flushCollection()
 bool OSDictionary::
 setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject)
 {
+    unsigned int i;
+    bool exists;
+
     if (!anObject || !aKey)
         return false;
 
     // if the key exists, replace the object
-    for (unsigned int i = 0; i < count; i++) {
-        if (aKey == dictionary[i].key) {
-            const OSMetaClassBase *oldObject = dictionary[i].value;
 
-            haveUpdated();
-
-            anObject->taggedRetain(OSTypeID(OSCollection));
-            dictionary[i].value = anObject;
+    if (fOptions & kSort) {
+       i = OSSymbol::bsearch(aKey, &dictionary[0], count, sizeof(dictionary[0]));
+       exists = (i < count) && (aKey == dictionary[i].key);
+    } else for (exists = false, i = 0; i < count; i++) {
+        if ((exists = (aKey == dictionary[i].key))) break;
+    }
 
-            oldObject->taggedRelease(OSTypeID(OSCollection));
-            return true;
-        }
+    if (exists) {
+       const OSMetaClassBase *oldObject = dictionary[i].value;
+    
+       haveUpdated();
+    
+       anObject->taggedRetain(OSTypeID(OSCollection));
+       dictionary[i].value = anObject;
+    
+       oldObject->taggedRelease(OSTypeID(OSCollection));
+       return true;
     }
 
     // add new key, possibly extending our capacity
     if (count >= capacity && count >= ensureCapacity(count+1))
-        return 0;
+        return false;
 
     haveUpdated();
 
+    bcopy(&dictionary[i], &dictionary[i+1], (count - i) * sizeof(dictionary[0]));
+
     aKey->taggedRetain(OSTypeID(OSCollection));
     anObject->taggedRetain(OSTypeID(OSCollection));
-    dictionary[count].key = aKey;
-    dictionary[count].value = anObject;
+    dictionary[i].key = aKey;
+    dictionary[i].value = anObject;
     count++;
 
     return true;
@@ -341,24 +363,33 @@ setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject)
 
 void OSDictionary::removeObject(const OSSymbol *aKey)
 {
+    unsigned int i;
+    bool exists;
+
     if (!aKey)
         return;
 
     // if the key exists, remove the object
-    for (unsigned int i = 0; i < count; i++)
-        if (aKey == dictionary[i].key) {
-            dictEntry oldEntry = dictionary[i];
 
-            haveUpdated();
+    if (fOptions & kSort) {
+       i = OSSymbol::bsearch(aKey, &dictionary[0], count, sizeof(dictionary[0]));
+       exists = (i < count) && (aKey == dictionary[i].key);
+    } else for (exists = false, i = 0; i < count; i++) {
+        if ((exists = (aKey == dictionary[i].key))) break;
+    }
+
+    if (exists) {
+       dictEntry oldEntry = dictionary[i];
 
-            count--;
-            for (; i < count; i++)
-                dictionary[i] = dictionary[i+1];
+       haveUpdated();
 
-            oldEntry.key->taggedRelease(OSTypeID(OSCollection));
-            oldEntry.value->taggedRelease(OSTypeID(OSCollection));
-            return;
-        }
+       count--;
+       bcopy(&dictionary[i+1], &dictionary[i], (count - i) * sizeof(dictionary[0]));
+
+       oldEntry.key->taggedRelease(OSTypeID(OSCollection));
+       oldEntry.value->taggedRelease(OSTypeID(OSCollection));
+       return;
+    }
 }
 
 
@@ -391,13 +422,24 @@ bool OSDictionary::merge(const OSDictionary *srcDict)
 
 OSObject *OSDictionary::getObject(const OSSymbol *aKey) const
 {
+    unsigned int i;
+    bool exists;
+
     if (!aKey)
         return 0;
 
-    // if the key exists, remove the object
-    for (unsigned int i = 0; i < count; i++)
-        if (aKey == dictionary[i].key)
-            return (const_cast<OSObject *> ((const OSObject *)dictionary[i].value));
+    // if the key exists, return the object
+
+    if (fOptions & kSort) {
+       i = OSSymbol::bsearch(aKey, &dictionary[0], count, sizeof(dictionary[0]));
+       exists = (i < count) && (aKey == dictionary[i].key);
+    } else for (exists = false, i = 0; i < count; i++) {
+        if ((exists = (aKey == dictionary[i].key))) break;
+    }
+
+    if (exists) {
+       return (const_cast<OSObject *> ((const OSObject *)dictionary[i].value));
+    }
 
     return 0;
 }
index 68139f092e5df43dc34b529cfbd2b08b37bfb908..1e4395043950b4168bb28550988eca9f5504ac29 100644 (file)
@@ -88,16 +88,29 @@ static OSReturn _OSDictionarySetCStringValue(
     OSDictionary * dict,
     const char   * key,
     const char   * value);
+static bool _OSKextInPrelinkRebuildWindow(void);
+static bool _OSKextInUnloadedPrelinkedKexts(const OSSymbol * theBundleID);
     
 // We really should add containsObject() & containsCString to OSCollection & subclasses.
 // So few pad slots, though....
 static bool _OSArrayContainsCString(OSArray * array, const char * cString);
 
-#if CONFIG_MACF_KEXT
-static void * MACFCopyModuleDataForKext(
-    OSKext                 * theKext,
-    mach_msg_type_number_t * datalen);
-#endif /* CONFIG_MACF_KEXT */
+#if CONFIG_KEC_FIPS
+static void * GetAppleTEXTHashForKext(OSKext * theKext, OSDictionary *theInfoDict);
+#endif // CONFIG_KEC_FIPS
+
+/* Prelinked arm kexts do not have VM entries because the method we use to
+ * fake an entry (see libsa/bootstrap.cpp:readPrelinkedExtensions()) does
+ * not work on ARM.  To get around that, we must free prelinked kext
+ * executables with ml_static_mfree() instead of kext_free().
+ */
+#if __i386__ || __x86_64__
+#define VM_MAPPED_KEXTS 1
+#define KASLR_KEXT_DEBUG 0
+#define KASLR_IOREG_DEBUG 0
+#else
+#error Unsupported architecture
+#endif
 
 #if PRAGMA_MARK
 #pragma mark Constants & Macros
@@ -136,6 +149,9 @@ static void * MACFCopyModuleDataForKext(
 
 #define STRING_HAS_PREFIX(s, p)      (strncmp((s), (p), strlen(p)) == 0)
 
+#define REBUILD_MAX_TIME (60 * 5) // 5 minutes
+#define MINIMUM_WAKEUP_SECONDS (30)
+
 /*********************************************************************
 * infoDict keys for internally-stored data. Saves on ivar slots for
 * objects we don't keep around past boot time or during active load.
@@ -263,7 +279,7 @@ kmod_info_t g_kernel_kmod_info = {
     /* version         */ "0",               // filled in in OSKext::initialize()
     /* reference_count */ -1,                // never adjusted; kernel never unloads
     /* reference_list  */ NULL,
-    /* address         */ (vm_address_t)&_mh_execute_header,
+    /* address         */ NULL,
     /* size            */ 0,                 // filled in in OSKext::initialize()
     /* hdr_size        */ 0,
     /* start           */ 0,
@@ -318,6 +334,7 @@ static unsigned int         sConsiderUnloadDelay       = 60;     // seconds
 static thread_call_t        sUnloadCallout             = 0;
 static thread_call_t        sDestroyLinkContextThread  = 0;      // one-shot, one-at-a-time thread
 static bool                 sSystemSleep               = false;  // true when system going to sleep
+static AbsoluteTime         sLastWakeTime;                       // last time we woke up   
 
 /*********************************************************************
 * Backtraces can be printed at various times so we need a tight lock
@@ -505,22 +522,22 @@ kxld_log_callback(
 
 #define notifyKextLoadObservers(kext, kmod_info) \
 do { \
-       IOStatistics::onKextLoad(kext, kmod_info); \
+    IOStatistics::onKextLoad(kext, kmod_info); \
 } while (0)
 
 #define notifyKextUnloadObservers(kext) \
 do { \
-       IOStatistics::onKextUnload(kext); \
+    IOStatistics::onKextUnload(kext); \
 } while (0)
 
 #define notifyAddClassObservers(kext, addedClass, flags) \
 do { \
-       IOStatistics::onClassAdded(kext, addedClass); \
+    IOStatistics::onClassAdded(kext, addedClass); \
 } while (0)
 
 #define notifyRemoveClassObservers(kext, removedClass, flags) \
 do { \
-       IOStatistics::onClassRemoved(kext, removedClass); \
+    IOStatistics::onClassRemoved(kext, removedClass); \
 } while (0)
 
 #else
@@ -583,7 +600,7 @@ OSKext::initialize(void)
 
    /* Read the log flag boot-args and set the log flags.
     */
-    if (PE_parse_boot_argn("kextlog", &bootLogFilter, sizeof("kextlog=0x00000000 "))) {
+    if (PE_parse_boot_argn("kextlog", &bootLogFilter, sizeof(bootLogFilter))) {
         sBootArgLogFilterFound = true;
         sKernelLogFilter = bootLogFilter;
         // log this if any flags are set
@@ -618,6 +635,13 @@ OSKext::initialize(void)
         kernelStart, kernelLength);
     assert(kernelExecutable);
 
+#if KASLR_KEXT_DEBUG 
+    IOLog("kaslr: kernel start 0x%lx end 0x%lx length %lu \n", 
+          (unsigned long)kernelStart, 
+          (unsigned long)getlastaddr(),
+          kernelLength);
+#endif
+
     sKernelKext->loadTag = sNextLoadTag++;  // the kernel is load tag 0
     sKernelKext->bundleID = OSSymbol::withCString(kOSKextKernelIdentifier);
     
@@ -631,6 +655,7 @@ OSKext::initialize(void)
     sKernelKext->flags.loaded = 1;
     sKernelKext->flags.started = 1;
     sKernelKext->flags.CPPInitialized = 0;
+    sKernelKext->flags.jettisonLinkeditSeg = 0;
 
     sKernelKext->kmod_info = &g_kernel_kmod_info;
     strlcpy(g_kernel_kmod_info.version, osrelease,
@@ -693,6 +718,8 @@ OSKext::initialize(void)
     *timestamp = 0;
     timestamp = __OSAbsoluteTimePtr(&last_unloaded_timestamp);
     *timestamp = 0;
+    timestamp = __OSAbsoluteTimePtr(&sLastWakeTime);
+    *timestamp = 0;
 
     OSKextLog(/* kext */ NULL,
         kOSKextLogProgressLevel |
@@ -700,7 +727,7 @@ OSKext::initialize(void)
         "Kext system initialized.");
 
     notifyKextLoadObservers(sKernelKext, sKernelKext->kmod_info);
-       
+
     return;
 }
 
@@ -726,6 +753,7 @@ OSKext::removeKextBootstrap(void)
 
     kernel_segment_command_t * seg_to_remove         = NULL;
 
+
    /* This must be the very first thing done by this function.
     */
     IORecursiveLockLock(sKextLock);
@@ -774,10 +802,10 @@ OSKext::removeKextBootstrap(void)
     * defining the lower bound for valid physical addresses.
     */
     if (seg_to_remove && seg_to_remove->vmaddr && seg_to_remove->vmsize) {
-       // 04/18/11 - gab: <rdar://problem/9236163>
-       // overwrite memory occupied by KLD segment with random data before
-       // releasing it.
-       read_random((void *) seg_to_remove->vmaddr, seg_to_remove->vmsize);
+        // 04/18/11 - gab: <rdar://problem/9236163>
+        // overwrite memory occupied by KLD segment with random data before
+        // releasing it.
+        read_random((void *) seg_to_remove->vmaddr, seg_to_remove->vmsize);
         ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize);
     }
 #else
@@ -831,7 +859,7 @@ OSKext::removeKextBootstrap(void)
        /* Copy it out.
         */
         memcpy(seg_copy, seg_data, seg_length);
-        
+
        /* Dump the booter memory.
         */
         ml_static_mfree(seg_offset, seg_length);
@@ -846,7 +874,7 @@ OSKext::removeKextBootstrap(void)
             (ipc_port_t)NULL,
             (vm_object_offset_t) 0,
             /* copy */ FALSE,
-            /* cur_protection */ VM_PROT_ALL,
+            /* cur_protection */ VM_PROT_READ | VM_PROT_WRITE,
             /* max_protection */ VM_PROT_ALL,
             /* inheritance */ VM_INHERIT_DEFAULT);
         if ((mem_result != KERN_SUCCESS) || 
@@ -869,20 +897,22 @@ OSKext::removeKextBootstrap(void)
         kmem_free(kernel_map, seg_copy_offset, seg_length);
     }
 #else /* we are not CONFIG_KXLD */
+#error CONFIG_KXLD is expected for this arch
 
     /*****
     * Dump the LINKEDIT segment, unless keepsyms is set.
     */
     if (!sKeepSymbols) {
-#if   __i386__ || __x86_64__
-        if (seg_to_remove && seg_to_remove->vmaddr && seg_to_remove->vmsize) {
-            ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize);
+        const char *dt_segment_name = "Kernel-__LINKEDIT";
+        if (0 == IODTGetLoaderInfo(dt_segment_name,
+            &segment_paddress, &segment_size)) {
+#ifdef SECURE_KERNEL
+            vm_offset_t vmaddr = ml_static_ptovirt((vm_offset_t)segment_paddress);
+            bzero((void*)vmaddr, segment_size);
+#endif
+            IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress,
+                (int)segment_size);
         }
-#else /* from if __arm__ */
-
-#error arch
-#endif /* from if __arm__ */
-
     } else {
         OSKextLog(/* kext */ NULL,
            kOSKextLogBasicLevel |
@@ -1342,6 +1372,9 @@ OSKext::initWithPrelinkedInfoDict(
     if (!setInfoDictionaryAndPath(anInfoDict, kextPath)) {
         goto finish;
     }
+#if KASLR_KEXT_DEBUG
+    IOLog("kaslr: kext %s \n", getIdentifierCString());
+#endif
 
    /* Also get the executable's bundle-relative path if present.
     * Don't look for an arch-specific path property.
@@ -1373,9 +1406,16 @@ OSKext::initWithPrelinkedInfoDict(
             goto finish;
         }
 
-        data = (void *) (intptr_t) (addressNum->unsigned64BitValue());
+        data = (void *) ((intptr_t) (addressNum->unsigned64BitValue()) + vm_kernel_slide);
         length = (uint32_t) (lengthNum->unsigned32BitValue());
 
+#if KASLR_KEXT_DEBUG
+        IOLog("kaslr: unslid 0x%lx slid 0x%lx length %u - prelink executable \n", 
+              (unsigned long)VM_KERNEL_UNSLIDE(data), 
+              (unsigned long)data,
+              length);
+#endif
+
         anInfoDict->removeObject(kPrelinkExecutableLoadKey);
         anInfoDict->removeObject(kPrelinkExecutableSizeKey);
 
@@ -1384,7 +1424,13 @@ OSKext::initWithPrelinkedInfoDict(
         */
         addressNum = OSDynamicCast(OSNumber, anInfoDict->getObject(kPrelinkExecutableSourceKey));
         if (addressNum) {
-            srcData = (void *) (intptr_t) (addressNum->unsigned64BitValue());
+            srcData = (void *) ((intptr_t) (addressNum->unsigned64BitValue()) + vm_kernel_slide);
+
+#if KASLR_KEXT_DEBUG
+            IOLog("kaslr: unslid 0x%lx slid 0x%lx - prelink executable source \n", 
+                  (unsigned long)VM_KERNEL_UNSLIDE(srcData), 
+                  (unsigned long)srcData);
+#endif
 
             if (data != srcData) {
 #if __LP64__
@@ -1421,7 +1467,12 @@ OSKext::initWithPrelinkedInfoDict(
                 getIdentifierCString());
             goto finish;
         }
+
+#if VM_MAPPED_KEXTS
         prelinkedExecutable->setDeallocFunction(osdata_kext_free);
+#else
+        prelinkedExecutable->setDeallocFunction(osdata_phys_free);
+#endif
         setLinkedExecutable(prelinkedExecutable);
 
         addressNum = OSDynamicCast(OSNumber,
@@ -1435,7 +1486,18 @@ OSKext::initWithPrelinkedInfoDict(
             goto finish;
         }
 
-        kmod_info = (kmod_info_t *) (intptr_t) (addressNum->unsigned64BitValue());
+        if (addressNum->unsigned64BitValue() != 0) {
+            kmod_info = (kmod_info_t *) (intptr_t) (addressNum->unsigned64BitValue() + vm_kernel_slide);
+            kmod_info->address += vm_kernel_slide;
+#if KASLR_KEXT_DEBUG
+            IOLog("kaslr: unslid 0x%lx slid 0x%lx - kmod_info \n", 
+                  (unsigned long)VM_KERNEL_UNSLIDE(kmod_info), 
+                  (unsigned long)kmod_info);
+            IOLog("kaslr: unslid 0x%lx slid 0x%lx - kmod_info->address \n", 
+                  (unsigned long)VM_KERNEL_UNSLIDE(kmod_info->address), 
+                  (unsigned long)kmod_info->address);
+ #endif
+        }
 
         anInfoDict->removeObject(kPrelinkKmodInfoKey);
     }
@@ -3881,22 +3943,22 @@ OSKext::getLoadTag(void)
  *********************************************************************/
 void OSKext::getSizeInfo(uint32_t *loadSize, uint32_t *wiredSize)
 {
-       if (linkedExecutable) {
-               *loadSize = linkedExecutable->getLength();
+    if (linkedExecutable) {
+        *loadSize = linkedExecutable->getLength();
            
-               /* If we have a kmod_info struct, calculated the wired size
-                * from that. Otherwise it's the full load size.
-                */
-               if (kmod_info) {
-                       *wiredSize = *loadSize - kmod_info->hdr_size;
-               } else {
-                       *wiredSize = *loadSize;
-               }
-       }
-       else {
-               *wiredSize = 0;
-               *loadSize = 0;
-       }
+        /* If we have a kmod_info struct, calculated the wired size
+         * from that. Otherwise it's the full load size.
+         */
+        if (kmod_info) {
+            *wiredSize = *loadSize - kmod_info->hdr_size;
+        } else {
+            *wiredSize = *loadSize;
+        }
+    }
+    else {
+        *wiredSize = 0;
+        *loadSize = 0;
+    }
 }
 
 /*********************************************************************
@@ -3949,6 +4011,7 @@ finish:
 
 /*********************************************************************
 *********************************************************************/
+
 #if defined (__i386__)
 #define ARCHNAME "i386"
 #elif defined (__x86_64__)
@@ -4521,6 +4584,222 @@ finish:
     return result;
 }
 
+/*********************************************************************
+* 
+*********************************************************************/
+OSReturn
+OSKext::slidePrelinkedExecutable()
+{
+    OSReturn                   result           = kOSKextReturnBadData;
+    kernel_mach_header_t     * mh               = NULL;
+    kernel_segment_command_t * seg              = NULL;
+    kernel_segment_command_t * linkeditSeg      = NULL;
+    kernel_section_t         * sec              = NULL;
+    char                     * linkeditBase     = NULL;
+    bool                       haveLinkeditBase = false;
+    char                     * relocBase        = NULL;
+    bool                       haveRelocBase    = false;
+    struct dysymtab_command  * dysymtab         = NULL;
+    struct symtab_command    * symtab           = NULL;
+    kernel_nlist_t           * sym              = NULL;
+    struct relocation_info   * reloc            = NULL;
+    uint32_t                   i                = 0; 
+    int                        reloc_size;
+    vm_offset_t                new_kextsize;
+
+    if (linkedExecutable == NULL || vm_kernel_slide == 0) {
+        result = kOSReturnSuccess;
+        goto finish;
+    }
+
+    mh = (kernel_mach_header_t *)linkedExecutable->getBytesNoCopy();
+
+    for (seg = firstsegfromheader(mh); seg != NULL; seg = nextsegfromheader(mh, seg)) {
+        seg->vmaddr += vm_kernel_slide;
+                
+#if KASLR_KEXT_DEBUG
+        IOLog("kaslr: segname %s unslid 0x%lx slid 0x%lx \n", 
+              seg->segname,
+              (unsigned long)VM_KERNEL_UNSLIDE(seg->vmaddr), 
+              (unsigned long)seg->vmaddr);
+#endif
+       
+        if (!haveRelocBase) {
+            relocBase = (char *) seg->vmaddr;
+            haveRelocBase = true;
+        }
+        if (!strcmp(seg->segname, "__LINKEDIT")) {
+            linkeditBase = (char *) seg->vmaddr - seg->fileoff;
+            haveLinkeditBase = true;
+            linkeditSeg = seg;
+        }
+        for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) {
+            sec->addr += vm_kernel_slide;
+
+#if KASLR_KEXT_DEBUG
+            IOLog("kaslr: sectname %s unslid 0x%lx slid 0x%lx \n", 
+                  sec->sectname,
+                  (unsigned long)VM_KERNEL_UNSLIDE(sec->addr), 
+                  (unsigned long)sec->addr);
+#endif
+        }
+    }
+
+    dysymtab = (struct dysymtab_command *) getcommandfromheader(mh, LC_DYSYMTAB);
+
+    symtab = (struct symtab_command *) getcommandfromheader(mh, LC_SYMTAB);
+
+    if (symtab != NULL) {
+      /* Some pseudo-kexts have symbol tables without segments.
+       * Ignore them. */
+        if (symtab->nsyms > 0 && haveLinkeditBase) {
+            sym = (kernel_nlist_t *) (linkeditBase + symtab->symoff);
+            for (i = 0; i < symtab->nsyms; i++) {
+                if (sym[i].n_type & N_STAB) {
+                    continue;
+                }
+                sym[i].n_value += vm_kernel_slide;
+                
+#if KASLR_KEXT_DEBUG
+#define MAX_SYMS_TO_LOG 5
+                if ( i < MAX_SYMS_TO_LOG ) {
+                    IOLog("kaslr: LC_SYMTAB unslid 0x%lx slid 0x%lx \n", 
+                          (unsigned long)VM_KERNEL_UNSLIDE(sym[i].n_value), 
+                          (unsigned long)sym[i].n_value);
+                }
+#endif
+            }
+        }
+    }
+
+    if (dysymtab != NULL) {
+        if (dysymtab->nextrel > 0) {
+            OSKextLog(this,
+                kOSKextLogErrorLevel | kOSKextLogLoadFlag |
+                kOSKextLogLinkFlag,
+                "Sliding kext %s: External relocations found.",
+                getIdentifierCString());
+            goto finish;
+        }
+
+        if (dysymtab->nlocrel > 0) {
+            if (!haveLinkeditBase) {
+                OSKextLog(this,
+                    kOSKextLogErrorLevel | kOSKextLogLoadFlag |
+                    kOSKextLogLinkFlag,
+                    "Sliding kext %s: No linkedit segment.",
+                    getIdentifierCString());
+                goto finish;
+            }
+
+            if (!haveRelocBase) {
+                OSKextLog(this,
+                    kOSKextLogErrorLevel | kOSKextLogLoadFlag |
+                    kOSKextLogLinkFlag,
+#if __x86_64__
+                    "Sliding kext %s: No writable segments.",
+#else
+                    "Sliding kext %s: No segments.",
+#endif
+                    getIdentifierCString());
+                goto finish;
+            }
+
+            reloc = (struct relocation_info *) (linkeditBase + dysymtab->locreloff);
+            reloc_size = dysymtab->nlocrel * sizeof(struct relocation_info);
+            
+            for (i = 0; i < dysymtab->nlocrel; i++) {
+                if (   reloc[i].r_extern != 0
+                    || reloc[i].r_type != 0
+                    || reloc[i].r_length != (sizeof(void *) == 8 ? 3 : 2)
+#if __i386__
+                    || (reloc[i].r_address & R_SCATTERED)
+#endif
+                    ) {
+                    OSKextLog(this,
+                        kOSKextLogErrorLevel | kOSKextLogLoadFlag |
+                        kOSKextLogLinkFlag,
+                        "Sliding kext %s: Unexpected relocation found.",
+                        getIdentifierCString());
+                    goto finish;
+                }
+                if (reloc[i].r_pcrel != 0) {
+                    continue;
+                }
+                *((uintptr_t *)(relocBase + reloc[i].r_address)) += vm_kernel_slide;
+
+#if KASLR_KEXT_DEBUG
+#define MAX_DYSYMS_TO_LOG 5
+                if ( i < MAX_DYSYMS_TO_LOG ) {
+                    IOLog("kaslr: LC_DYSYMTAB unslid 0x%lx slid 0x%lx \n", 
+                          (unsigned long)VM_KERNEL_UNSLIDE(*((uintptr_t *)(relocBase + reloc[i].r_address))), 
+                          (unsigned long)*((uintptr_t *)(relocBase + reloc[i].r_address)));
+                }
+#endif
+            }
+
+            /* We should free these relocations, not just delete the reference to them.
+             * <rdar://problem/10535549> Free relocations from PIE kexts.
+             */
+            new_kextsize = round_page(kmod_info->size - reloc_size);
+            
+            if ((kmod_info->size - new_kextsize) > PAGE_SIZE) {
+                vm_offset_t     endofkext = kmod_info->address + kmod_info->size;
+                vm_offset_t     new_endofkext = kmod_info->address + new_kextsize;
+                vm_offset_t     endofrelocInfo = (vm_offset_t) (((uint8_t *)reloc) + reloc_size);
+                int             bytes_remaining = endofkext - endofrelocInfo;
+                OSData *        new_osdata = NULL;
+
+                /* fix up symbol offsets if they are after the dsymtab local relocs */
+                if (symtab) {
+                    if (dysymtab->locreloff < symtab->symoff){
+                        symtab->symoff -= reloc_size;
+                    }
+                    if (dysymtab->locreloff < symtab->stroff) {
+                        symtab->stroff -= reloc_size;
+                    }
+                }
+                if (dysymtab->locreloff < dysymtab->extreloff) {
+                    dysymtab->extreloff -= reloc_size;
+                }
+                
+                /* move data behind reloc info down to new offset */
+                if (endofrelocInfo < endofkext) {
+                   memcpy(reloc, (void *)endofrelocInfo, bytes_remaining);
+                }
+                               
+                /* Create a new OSData for the smaller kext object and reflect 
+                 * new linkedit segment size.
+                 */
+                linkeditSeg->vmsize = round_page(linkeditSeg->vmsize - reloc_size);
+                linkeditSeg->filesize = linkeditSeg->vmsize;
+                
+                new_osdata = OSData::withBytesNoCopy((void *)kmod_info->address, new_kextsize);
+                if (new_osdata) {
+                    /* Fix up kmod info and linkedExecutable.
+                     */
+                    kmod_info->size = new_kextsize;
+                    linkedExecutable->setDeallocFunction(NULL);
+                    linkedExecutable->release();
+                    linkedExecutable = new_osdata;
+                    
+#if VM_MAPPED_KEXTS
+                    kext_free(new_endofkext, (endofkext - new_endofkext));
+#else
+                    ml_static_mfree(new_endofkext, (endofkext - new_endofkext));
+#endif
+                }
+            }
+            dysymtab->nlocrel = 0;
+            dysymtab->locreloff = 0;
+        }
+    }
+                
+    result = kOSReturnSuccess;
+finish:
+    return result;
+}
+
 /*********************************************************************
 * called only by load()
 *********************************************************************/
@@ -4579,6 +4858,10 @@ OSKext::loadExecutable()
     }
 
     if (isPrelinked()) {
+        result = slidePrelinkedExecutable();
+        if (result != kOSReturnSuccess) {
+            goto finish;
+        }
         goto register_kmod;
     }
 
@@ -4731,7 +5014,7 @@ OSKext::loadExecutable()
     * cache and invalidate the instruction cache.
     * I/D caches are coherent on x86
     */
-#if    !defined(__i386__) && !defined(__x86_64__)
+#if !defined(__i386__) && !defined(__x86_64__)
     flush_dcache(kmod_info->address, kmod_info->size, false);
     invalidate_icache(kmod_info->address, kmod_info->size, false);
 #endif
@@ -4803,7 +5086,7 @@ register_kmod:
             "Kext %s executable loaded; %u pages at 0x%lx (load tag %u).", 
             kmod_info->name,
             (unsigned)kmod_info->size / PAGE_SIZE,
-            (unsigned long)kmod_info->address,
+            (unsigned long)VM_KERNEL_UNSLIDE(kmod_info->address),
             (unsigned)kmod_info->id);
     }
 
@@ -4873,11 +5156,18 @@ OSKext::jettisonLinkeditSegment(void)
 {
     kernel_mach_header_t     * machhdr = (kernel_mach_header_t *)kmod_info->address;
     kernel_segment_command_t * linkedit = NULL;
+    vm_offset_t                start;
     vm_size_t                  linkeditsize, kextsize;
-    vm_offset_t                linkeditaddr = 0;
     OSData                   * data = NULL;
-       
-    if (sKeepSymbols || isLibrary() || !isExecutable() || !linkedExecutable) {
+
+#if NO_KEXTD
+       /* We can free symbol tables for all embedded kexts because we don't
+        * support runtime kext linking.
+        */
+    if (sKeepSymbols || !isExecutable() || !linkedExecutable || flags.jettisonLinkeditSeg) {
+#else
+    if (sKeepSymbols || isLibrary() || !isExecutable() || !linkedExecutable || flags.jettisonLinkeditSeg) {
+#endif
         goto finish;
     }
 
@@ -4900,21 +5190,12 @@ OSKext::jettisonLinkeditSegment(void)
     */
     linkeditsize = round_page(linkedit->vmsize);
     kextsize = kmod_info->size - linkeditsize;
-       
-       /* Save linkedit address as removeLinkeditHeaders() will zero it */
-       linkeditaddr = trunc_page(linkedit->vmaddr);
-       
+    start = linkedit->vmaddr;
+
     data = OSData::withBytesNoCopy((void *)kmod_info->address, kextsize);
     if (!data) {
         goto finish;
     }
-    data->setDeallocFunction(osdata_kext_free);
-
-   /* Rewrite the Mach-O headers.
-    */
-    if (KERN_SUCCESS != removeLinkeditHeaders(linkedit)) {
-        goto finish;
-    }
 
    /* Fix the kmod info and linkedExecutable.
     */
@@ -4922,75 +5203,20 @@ OSKext::jettisonLinkeditSegment(void)
     linkedExecutable->setDeallocFunction(NULL);
     linkedExecutable->release();
     linkedExecutable = data;
-
+    flags.jettisonLinkeditSeg = 1;
+        
    /* Free the linkedit segment.
     */
-    kext_free(linkeditaddr, linkeditsize);
+#if VM_MAPPED_KEXTS
+    kext_free(start, linkeditsize);
+#else
+    ml_static_mfree(start, linkeditsize);
+#endif
 
 finish:
     return;
 }
 
-/*********************************************************************
-*********************************************************************/
-OSReturn
-OSKext::removeLinkeditHeaders(kernel_segment_command_t *linkedit)
-{
-    OSReturn               result  = KERN_FAILURE;
-    kernel_mach_header_t * machhdr = (kernel_mach_header_t *)kmod_info->address;
-    vm_map_t               kext_map;
-    u_char               * src, * dst;
-    uint32_t               cmdsize, ncmds;
-    u_int                  i = 0;
-
-    kext_map = kext_get_vm_map(kmod_info);
-    if (!kext_map) {
-        result = KERN_MEMORY_ERROR;
-        goto finish;
-    }
-
-    result = vm_map_protect(kext_map, kmod_info->address,
-        kmod_info->address + kmod_info->hdr_size, VM_PROT_DEFAULT, TRUE);
-    if (result != KERN_SUCCESS) {
-        goto finish;
-    }
-
-    ncmds = machhdr->ncmds;
-    src = dst = (u_char *)(kmod_info->address + sizeof(*machhdr));
-
-    for (i = 0; i < ncmds; ++i, src += cmdsize) {
-        struct load_command * lc = (struct load_command *) src;
-        cmdsize = lc->cmdsize;
-
-        switch (lc->cmd) {
-        case LC_SEGMENT:
-        case LC_SEGMENT_64:
-            if (src != (u_char *)linkedit) break;
-            /* FALLTHROUGH */
-        case LC_SYMTAB:
-        case LC_DYSYMTAB:
-            bzero(src, cmdsize);
-            machhdr->ncmds--;
-            machhdr->sizeofcmds -= cmdsize;
-            continue;
-        }
-
-        memmove(dst, src, cmdsize);
-        dst += cmdsize;
-    }
-
-    result = vm_map_protect(kext_map, kmod_info->address,
-        kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE);
-    if (result != KERN_SUCCESS) {
-        goto finish;
-    }
-
-    result = KERN_SUCCESS;
-
-finish:
-    return result;
-}
-
 /*********************************************************************
 *********************************************************************/
 void
@@ -5037,7 +5263,7 @@ OSKext::registerKextsWithDTrace(void)
 }
 
 extern "C" {
-    extern int (*dtrace_modload)(struct kmod_info *);
+    extern int (*dtrace_modload)(struct kmod_info *, uint32_t);
     extern int (*dtrace_modunload)(struct kmod_info *);
 };
 
@@ -5050,7 +5276,13 @@ OSKext::registerWithDTrace(void)
     * prevent a kext from loading, so we ignore the return code.
     */
     if (!flags.dtraceInitialized && (dtrace_modload != NULL)) {
-        (void)(*dtrace_modload)(kmod_info);
+        uint32_t modflag = 0;
+        OSObject * forceInit = getPropertyForHostArch("OSBundleForceDTraceInit");
+        if (forceInit == kOSBooleanTrue) {
+            modflag |= KMOD_DTRACE_FORCE_INIT;
+        }
+
+        (void)(*dtrace_modload)(kmod_info, modflag);
         flags.dtraceInitialized = true;
         jettisonLinkeditSegment();
     }
@@ -5076,6 +5308,35 @@ OSKext::unregisterWithDTrace(void)
 /*********************************************************************
 * called only by loadExecutable()
 *********************************************************************/
+#if !VM_MAPPED_KEXTS
+#error Unrecognized architecture 
+#else
+static inline kern_return_t
+OSKext_protect(
+    vm_map_t   map,
+    vm_map_offset_t    start,
+    vm_map_offset_t    end,
+    vm_prot_t  new_prot,
+    boolean_t  set_max)
+{
+    if (start == end) { // 10538581
+        return(KERN_SUCCESS);
+    }
+    return vm_map_protect(map, start, end, new_prot, set_max);
+}
+
+static inline kern_return_t
+OSKext_wire(
+    vm_map_t   map,
+    vm_map_offset_t    start,
+    vm_map_offset_t    end,
+    vm_prot_t  access_type,
+    boolean_t       user_wire)
+{
+       return vm_map_wire(map, start, end, access_type, user_wire);
+}
+#endif
+
 OSReturn
 OSKext::setVMProtections(void)
 {
@@ -5097,14 +5358,8 @@ OSKext::setVMProtections(void)
         goto finish;
     }
 
-    /* XXX: On arm, the vme covering the prelinked kernel (really, the whole
-     * range from 0xc0000000 to a little over 0xe0000000) has maxprot set to 0
-     * so the vm_map_protect calls below fail
-     * I believe this happens in the call to vm_map_enter in kmem_init but I 
-     * need to confirm.
-     */
     /* Protect the headers as read-only; they do not need to be wired */
-    result = vm_map_protect(kext_map, kmod_info->address, 
+    result = OSKext_protect(kext_map, kmod_info->address, 
         kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE);
     if (result != KERN_SUCCESS) {
         goto finish;
@@ -5116,7 +5371,7 @@ OSKext::setVMProtections(void)
         start = round_page(seg->vmaddr);
         end = trunc_page(seg->vmaddr + seg->vmsize);
 
-        result = vm_map_protect(kext_map, start, end, seg->maxprot, TRUE);
+        result = OSKext_protect(kext_map, start, end, seg->maxprot, TRUE);
         if (result != KERN_SUCCESS) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
@@ -5127,7 +5382,7 @@ OSKext::setVMProtections(void)
             goto finish;
         }
 
-        result = vm_map_protect(kext_map, start, end, seg->initprot, FALSE);
+        result = OSKext_protect(kext_map, start, end, seg->initprot, FALSE);
         if (result != KERN_SUCCESS) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
@@ -5139,7 +5394,7 @@ OSKext::setVMProtections(void)
         }
 
         if (segmentShouldBeWired(seg)) {
-            result = vm_map_wire(kext_map, start, end, seg->initprot, FALSE);
+            result = OSKext_wire(kext_map, start, end, seg->initprot, FALSE);
             if (result != KERN_SUCCESS) {
                 goto finish;
             }
@@ -5226,9 +5481,9 @@ OSKext::validateKextMapping(bool startFlag)
             getIdentifierCString(),
             whichOp,
             whichOp,
-            (void *)address,
-            (void *)kmod_info->address,
-            (void *)(kmod_info->address + kmod_info->size));
+            (void *)VM_KERNEL_UNSLIDE(address),
+            (void *)VM_KERNEL_UNSLIDE(kmod_info->address),
+            (void *)(VM_KERNEL_UNSLIDE(kmod_info->address) + kmod_info->size));
         result = kOSKextReturnBadData;
         goto finish;
     }
@@ -5249,11 +5504,12 @@ OSKext::validateKextMapping(bool startFlag)
                 kOSKextLogLoadFlag,
                 "Kext %s - bad %s pointer %p.",
                 getIdentifierCString(),
-                whichOp, (void *)address);
+                whichOp, (void *)VM_KERNEL_UNSLIDE(address)); 
             result = kOSKextReturnBadData;
             goto finish;
         }
 
+#if VM_MAPPED_KEXTS
         if (!(info.protection & VM_PROT_EXECUTE)) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
@@ -5264,6 +5520,7 @@ OSKext::validateKextMapping(bool startFlag)
             result = kOSKextReturnBadData;
             goto finish;
         }
+#endif
 
        /* Verify that the kext's segments are backed by physical memory.
         */
@@ -5319,10 +5576,7 @@ OSKext::start(bool startDependenciesFlag)
     OSReturn                            result = kOSReturnError;
     kern_return_t                       (* startfunc)(kmod_info_t *, void *);
     unsigned int                        i, count;
-    void                              * kmodStartData      = NULL;  // special handling needed
-#if CONFIG_MACF_KEXT
-    mach_msg_type_number_t              kmodStartDataCount = 0;
-#endif /* CONFIG_MACF_KEXT */
+    void                              * kmodStartData = NULL; 
 
     if (isStarted() || isInterface() || isKernelComponent()) {
         result = kOSReturnSuccess;
@@ -5394,14 +5648,6 @@ OSKext::start(bool startDependenciesFlag)
         }
     }
 
-#if CONFIG_MACF_KEXT
-   /* See if the kext has any MAC framework module data in its plist.
-    * This is passed in as arg #2 of the kext's start routine,
-    * which is otherwise reserved for any other kext.
-    */
-    kmodStartData = MACFCopyModuleDataForKext(this, &kmodStartDataCount);
-#endif /* CONFIG_MACF_KEXT */
-
     OSKextLog(this,
         kOSKextLogDetailLevel |
         kOSKextLogLoadFlag,
@@ -5415,6 +5661,20 @@ OSKext::start(bool startDependenciesFlag)
     if (result == KERN_SUCCESS) {
 #endif
 
+#if CONFIG_KEC_FIPS
+        kmodStartData = GetAppleTEXTHashForKext(this, this->infoDict);
+        
+#if 0
+        if (kmodStartData) {
+            OSKextLog(this,
+                      kOSKextLogErrorLevel |
+                      kOSKextLogGeneralFlag,
+                      "Kext %s calling module start function. kmodStartData %p. arch %s",
+                      getIdentifierCString(), kmodStartData, ARCHNAME); 
+        }
+#endif
+#endif // CONFIG_KEC_FIPS 
+
         result = startfunc(kmod_info, kmodStartData);
 
 #if !CONFIG_STATIC_CPPINIT
@@ -5450,18 +5710,6 @@ OSKext::start(bool startDependenciesFlag)
     }
 
 finish:
-#if CONFIG_MACF_KEXT
-   /* Free the module data for a MAC framework kext. When we start using
-    * param #2 we'll have to distinguish and free/release appropriately.
-    *
-    * xxx - I'm pretty sure the old codepath freed the data and that it's
-    * xxx - up to the kext to copy it.
-    */
-    if (kmodStartData) {
-        kmem_free(kernel_map, (vm_offset_t)kmodStartData, kmodStartDataCount);
-    }
-#endif /* CONFIG_MACF_KEXT */
-
     return result;
 }
 
@@ -5747,6 +5995,7 @@ OSKext::unload(void)
     /* Unwire and free the linked executable.
      */
     if (linkedExecutable) {
+#if VM_MAPPED_KEXTS
         if (!isInterface()) {
             kernel_segment_command_t *seg = NULL;
             vm_map_t kext_map = kext_get_vm_map(kmod_info);
@@ -5786,7 +6035,7 @@ OSKext::unload(void)
                 seg = nextsegfromheader((kernel_mach_header_t *) kmod_info->address, seg);
             }
         }
-
+#endif
         OSSafeReleaseNULL(linkedExecutable);
     }
 
@@ -5808,7 +6057,13 @@ OSKext::unload(void)
      * kernel cache.  9055303
      */
     if (isPrelinked()) {
-        sUnloadedPrelinkedKexts->setObject(bundleID);
+        if (!_OSKextInUnloadedPrelinkedKexts(bundleID)) {
+            IORecursiveLockLock(sKextLock);
+            if (sUnloadedPrelinkedKexts) {
+                sUnloadedPrelinkedKexts->setObject(bundleID);
+            }
+            IORecursiveLockUnlock(sKextLock);
+        }
     }
 
     OSKextLog(this,
@@ -6097,7 +6352,7 @@ finish:
     sConsiderUnloadsPending = false;
     sConsiderUnloadsExecuted = true;
 
-    (void) OSKext::considerRebuildOfPrelinkedKernel(NULL);
+    (void) OSKext::considerRebuildOfPrelinkedKernel();
     
     IORecursiveLockUnlock(sKextInnerLock);
     IORecursiveLockUnlock(sKextLock);
@@ -6177,9 +6432,11 @@ IOReturn OSKextSystemSleepOrWake(UInt32 messageType)
             thread_call_cancel(sUnloadCallout);
         }
         sSystemSleep = true;
+        AbsoluteTime_to_scalar(&sLastWakeTime) = 0;
     } else if (messageType == kIOMessageSystemHasPoweredOn) {
         sSystemSleep = false;
-    }
+        clock_get_uptime(&sLastWakeTime);
+   }
     IORecursiveLockUnlock(sKextInnerLock);
 
     return kIOReturnSuccess;
@@ -6198,68 +6455,119 @@ IOReturn OSKextSystemSleepOrWake(UInt32 messageType)
 *********************************************************************/
 /* static */
 void
-OSKext::considerRebuildOfPrelinkedKernel(OSString * moduleName)
-{
-    OSReturn       checkResult      = kOSReturnError;
-    static bool    requestedPrelink = false;
-    OSDictionary * prelinkRequest   = NULL;  // must release
-
+OSKext::considerRebuildOfPrelinkedKernel(void)
+{
+    static bool     requestedPrelink        = false;
+    OSReturn        checkResult             = kOSReturnError;
+    OSDictionary *  prelinkRequest          = NULL;  // must release
+    OSCollectionIterator * kextIterator     = NULL;  // must release
+    const OSSymbol * thisID                 = NULL;  // do not release
+    bool            doRebuild               = false;
+    AbsoluteTime    my_abstime;
+    UInt64          my_ns;
+    SInt32          delta_secs;
+    
+    /* Only one auto rebuild per boot and only on boot from prelinked kernel */
+    if (requestedPrelink || !sPrelinkBoot) {
+        return;
+    }
+    
+    /* no direct return from this point */
     IORecursiveLockLock(sKextLock);
     
-    /* moduleName is only passed when we see a load come in.  We are only 
-     * interested in rebuilding the kernel cache if the kext we are loading
-     * is not already in the original kernel cache.  9055303
+    /* We need to wait for kextd to get up and running with unloads already done
+     * and any new startup kexts loaded.   
      */
-    if (moduleName) {
-        int         count = sUnloadedPrelinkedKexts->getCount();
-        int         i;
-
-        for (i = 0; i < count; i++) {
-            const OSSymbol *    myBundleID;     // do not release
-
-            myBundleID = OSDynamicCast(OSSymbol, sUnloadedPrelinkedKexts->getObject(i));
-            if (!myBundleID) continue;
-            if (moduleName->isEqualTo(myBundleID->getCStringNoCopy())) {
-                OSKextLog(/* kext */ NULL,
-                          kOSKextLogDetailLevel |
-                          kOSKextLogArchiveFlag,
-                          "bundleID %s already in cache skipping rebuild.",
-                          myBundleID->getCStringNoCopy());
-
-                /* no need to rebuild, already in kernel cache  */
-                goto finish;
-            }
+    if (!sConsiderUnloadsExecuted ||
+        !sDeferredLoadSucceeded) {
+        goto finish;
+    }
+    
+    /* we really only care about boot / system start up related kexts so bail 
+     * if we're here after REBUILD_MAX_TIME.
+     */
+    if (!_OSKextInPrelinkRebuildWindow()) {
+        OSKextLog(/* kext */ NULL,
+                  kOSKextLogArchiveFlag,
+                  "%s prebuild rebuild has expired",
+                  __FUNCTION__);
+        requestedPrelink = true;
+        goto finish;
+    }
+    
+    /* we do not want to trigger a rebuild if we get here too close to waking
+     * up.  (see radar 10233768)
+     */
+    IORecursiveLockLock(sKextInnerLock);
+    
+    clock_get_uptime(&my_abstime);
+    delta_secs = MINIMUM_WAKEUP_SECONDS + 1;
+    if (AbsoluteTime_to_scalar(&sLastWakeTime) != 0) {
+        SUB_ABSOLUTETIME(&my_abstime, &sLastWakeTime);
+        absolutetime_to_nanoseconds(my_abstime, &my_ns);
+        delta_secs = (SInt32)(my_ns / NSEC_PER_SEC);
+    }
+    IORecursiveLockUnlock(sKextInnerLock);
+    
+    if (delta_secs < MINIMUM_WAKEUP_SECONDS) {
+        /* too close to time of last wake from sleep */
+        goto finish;
+    }
+    requestedPrelink = true;
+    
+    /* Now it's time to see if we have a reason to rebuild.  We may have done 
+     * some loads and unloads but the kernel cache didn't actually change.
+     * We will rebuild if any kext is not marked prelinked AND is not in our
+     * list of prelinked kexts that got unloaded.  (see radar 9055303)
+     */
+    kextIterator = OSCollectionIterator::withCollection(sKextsByID);
+    if (!kextIterator) {
+        goto finish;
+    }
+    
+    while ((thisID = OSDynamicCast(OSSymbol, kextIterator->getNextObject()))) {
+        OSKext *    thisKext;  // do not release
+        
+        thisKext = OSDynamicCast(OSKext, sKextsByID->getObject(thisID));
+        if (!thisKext || thisKext->isPrelinked() || thisKext->isKernel()) {
+            continue;
+        }
+        
+        if (_OSKextInUnloadedPrelinkedKexts(thisKext->bundleID)) {
+            continue;
         }
-        (void) OSKext::setDeferredLoadSucceeded();
+        /* kext is loaded and was not in current kernel cache so let's rebuild
+         */
+        doRebuild = true;
+        OSKextLog(/* kext */ NULL,
+                  kOSKextLogArchiveFlag,
+                  "considerRebuildOfPrelinkedKernel %s triggered rebuild",
+                  thisKext->bundleID->getCStringNoCopy());
+        break;
     }
-
-    if (!sDeferredLoadSucceeded || !sConsiderUnloadsExecuted || 
-        sSafeBoot || requestedPrelink) 
-    {
+    sUnloadedPrelinkedKexts->flushCollection();
+    
+    if (!doRebuild) {
         goto finish;
     }
-
-    OSKextLog(/* kext */ NULL,
-        kOSKextLogProgressLevel |
-        kOSKextLogArchiveFlag,
-        "Requesting build of prelinked kernel.");
-
+    
     checkResult = _OSKextCreateRequest(kKextRequestPredicateRequestPrelink,
-        &prelinkRequest);
+                                       &prelinkRequest);
     if (checkResult != kOSReturnSuccess) {
         goto finish;
     }
-
+    
     if (!sKernelRequests->setObject(prelinkRequest)) {
         goto finish;
     }
-
+    
     OSKext::pingKextd();
-    requestedPrelink = true;
-
+    
 finish:
     IORecursiveLockUnlock(sKextLock);
     OSSafeRelease(prelinkRequest);
+    OSSafeRelease(kextIterator);
+    
     return;
 }
 
@@ -7008,6 +7316,8 @@ OSKext::handleRequest(
     OSKext       * theKext            = NULL;  // do not release
     OSBoolean    * boolArg            = NULL;  // do not release
 
+    bool           hideTheSlide       = false; 
+
     IORecursiveLockLock(sKextLock);
 
     if (responseOut) {
@@ -7070,13 +7380,19 @@ OSKext::handleRequest(
     
     result = kOSKextReturnNotPrivileged;
     if (hostPriv == HOST_PRIV_NULL) {
-        if (!predicate->isEqualTo(kKextRequestPredicateGetLoaded) &&
-            !predicate->isEqualTo(kKextRequestPredicateGetKernelImage) &&
-            !predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress)) {
-
-            goto finish;
-        }
-    }
+        if (sPrelinkBoot) {
+            hideTheSlide = true;
+            
+            /* must be root to use these kext requests */
+            if (predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress) ) {
+                OSKextLog(/* kext */ NULL,
+                          kOSKextLogErrorLevel |
+                          kOSKextLogIPCFlag,
+                          "Access Failure - must be root user.");
+                goto finish;
+            }
+       }
+    }
 
    /* Get common args in anticipation of use.
     */
@@ -7218,20 +7534,13 @@ OSKext::handleRequest(
             kOSKextLogDebugLevel |
             kOSKextLogIPCFlag,
             "Returning kernel load address 0x%llx.",
-            (unsigned long long)textseg->vmaddr);
+            (unsigned long long) textseg->vmaddr );
+        
         addressNum = OSNumber::withNumber((long long unsigned int)textseg->vmaddr,
             8 * sizeof(long long unsigned int));
         responseObject = addressNum;
         result = kOSReturnSuccess;
 
-    } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelImage)) {
-        OSKextLog(/* kext */ NULL,
-            kOSKextLogDebugLevel |
-            kOSKextLogIPCFlag,
-            "Returning kernel image.");
-        responseData = OSKext::copySanitizedKernelImage();
-        result = kOSReturnSuccess;
-
     } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelRequests)) {
 
        /* Hand the current sKernelRequests array to the caller
@@ -7360,7 +7669,7 @@ finish:
 OSDictionary *
 OSKext::copyLoadedKextInfo(
     OSArray * kextIdentifiers,
-    OSArray * infoKeys)
+    OSArray * infoKeys) 
 {
     OSDictionary * result = NULL;
     OSDictionary * kextInfo = NULL;  // must release
@@ -7494,11 +7803,49 @@ OSKext::copyInfo(OSArray * infoKeys)
                 linkedExecutable->getBytesNoCopy();
 
             if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleMachOHeadersKey)) {
+                kernel_mach_header_t *  temp_kext_mach_hdr;
+                struct load_command *   lcp;
+
                 headerData = OSData::withBytes(kext_mach_hdr,
                     (u_int) (sizeof(*kext_mach_hdr) + kext_mach_hdr->sizeofcmds));
                 if (!headerData) {
                     goto finish;
                 }
+
+                // unslide any vmaddrs we return to userspace - 10726716
+               temp_kext_mach_hdr = (kernel_mach_header_t *)
+                    headerData->getBytesNoCopy();
+                if (temp_kext_mach_hdr == NULL) {
+                    goto finish;
+                }
+
+                lcp = (struct load_command *) (temp_kext_mach_hdr + 1);
+                for (i = 0; i < temp_kext_mach_hdr->ncmds; i++) {
+                    if (lcp->cmd == LC_SEGMENT_KERNEL) {
+                        kernel_segment_command_t *  scp;
+                        
+                        scp = (kernel_segment_command_t *) lcp;
+                        // 10543468 - if we jettisoned __LINKEDIT clear size info
+                        if (flags.jettisonLinkeditSeg) {
+                            if (strncmp(scp->segname, SEG_LINKEDIT, sizeof(scp->segname)) == 0) {
+                                scp->vmsize = 0;
+                                scp->fileoff = 0;
+                                scp->filesize = 0;
+                            }
+                        }
+#if 0
+                        OSKextLog(/* kext */ NULL,
+                                  kOSKextLogErrorLevel |
+                                  kOSKextLogGeneralFlag,
+                                  "%s: LC_SEGMENT_KERNEL segname '%s' vmaddr 0x%llX 0x%lX vmsize %llu nsects %u",
+                                  __FUNCTION__, scp->segname, scp->vmaddr, 
+                                  VM_KERNEL_UNSLIDE(scp->vmaddr), 
+                                  scp->vmsize, scp->nsects);
+#endif
+                        scp->vmaddr = VM_KERNEL_UNSLIDE(scp->vmaddr);
+                    }
+                    lcp = (struct load_command *)((caddr_t)lcp + lcp->cmdsize);
+                }
                 result->setObject(kOSBundleMachOHeadersKey, headerData);
             }
 
@@ -7656,6 +8003,7 @@ OSKext::copyInfo(OSArray * infoKeys)
             */
             if (linkedExecutable /* && !isInterface() */) {
                 loadAddress = (uint64_t)linkedExecutable->getBytesNoCopy();
+                loadAddress = VM_KERNEL_UNSLIDE(loadAddress);
                 loadSize = linkedExecutable->getLength();
                 
                /* If we have a kmod_info struct, calculated the wired size
@@ -7746,10 +8094,10 @@ OSKext::copyInfo(OSArray * infoKeys)
             while ( (thisMetaClass = OSDynamicCast(OSMetaClass,
                 metaClassIterator->getNextObject())) ) {
 
-                               OSSafeReleaseNULL(metaClassDict);
-                               OSSafeReleaseNULL(scratchNumber);
-                               OSSafeReleaseNULL(metaClassName);
-                               OSSafeReleaseNULL(superclassName);
+                OSSafeReleaseNULL(metaClassDict);
+                OSSafeReleaseNULL(scratchNumber);
+                OSSafeReleaseNULL(metaClassName);
+                OSSafeReleaseNULL(superclassName);
 
                 metaClassDict = OSDictionary::withCapacity(3);
                 if (!metaClassDict) {
@@ -7821,218 +8169,8 @@ finish:
     return result;
 }
 
-/********************************************************************/
-static struct symtab_command * getKernelSymtab(void)
-{
-    struct symtab_command * result   = NULL;
-    struct load_command   * load_cmd = NULL;
-       unsigned long i;
-
-       load_cmd = (struct load_command *)
-               ((uintptr_t)&_mh_execute_header + sizeof(_mh_execute_header));
-       for(i = 0; i < _mh_execute_header.ncmds; i++){
-               if (load_cmd->cmd == LC_SYMTAB) {
-            result = (struct symtab_command *)load_cmd;
-            goto finish;
-        }
-               load_cmd = (struct load_command *)
-            ((uintptr_t)load_cmd + load_cmd->cmdsize);
-       }
-
-finish:
-    return result;
-}
-
 /*********************************************************************
-*********************************************************************/
-/* static */
-OSData *
-OSKext::copySanitizedKernelImage(void)
-{
-    OSData                   * result            = NULL;
-
-    kernel_mach_header_t     * kernelHeader      = NULL;
-    uint32_t                   sizeofcmds        = 0;
-
-   /* These start out pointing to running kernel but
-    * after copying point to the copied info.
-    */
-    kernel_segment_command_t * text_seg          = NULL;
-    kernel_segment_command_t * data_seg          = NULL;
-    kernel_segment_command_t * linkedit_seg      = NULL;
-    struct symtab_command    * symtab_cmd        = NULL;
-    kernel_section_t         * text_const_sect   = NULL;
-    kernel_section_t         * data_const_sect   = NULL;
-
-    kern_return_t              kern_result       = 0;
-    u_long                     kernelCopyLength  = 0;
-    vm_offset_t                kernelCopyAddr    = 0;
-    u_char                   * kernelCopy        = NULL;
-    
-    vm_offset_t                contentOffset     = 0;
-    struct load_command      * scan_cmd          = NULL;
-    kernel_section_t         * scan_sect         = NULL;
-    int64_t                    stroff_shift      = 0;
-    
-    uint32_t                   i;
-
-    text_seg = getsegbyname("__TEXT");
-    data_seg = getsegbyname("__DATA");
-    linkedit_seg = getsegbyname("__LINKEDIT");
-    symtab_cmd = getKernelSymtab();
-    
-    text_const_sect = getsectbyname("__TEXT", "__const");
-    data_const_sect = getsectbyname("__DATA", "__const");
-    
-    if (!text_seg || !data_seg || !linkedit_seg || !symtab_cmd ||
-        !text_const_sect || ! data_const_sect) {
-
-        OSKextLog(/* kext */ NULL, 
-            kOSKextLogErrorLevel | kOSKextLogIPCFlag,
-            "Can't provide kernel image for linking; missing component.");
-        goto finish;
-    }
-
-   /* Figure the size of the kernel image to build. We don't use the sizes of
-    * the __TEXT & __DATA segments overall as we only use the __const sections,
-    * so add those in manually. We're going to round each part to page size
-    * multiples too, just to be extra cautious.
-    */
-    sizeofcmds = text_seg->cmdsize + data_seg->cmdsize +
-        linkedit_seg->cmdsize + symtab_cmd->cmdsize;
-    kernelCopyLength = round_page(sizeof(_mh_execute_header) + sizeofcmds) +
-        round_page(text_const_sect->size) +
-        round_page(data_const_sect->size) +
-        round_page(linkedit_seg->filesize);
-
-    kern_result = kmem_alloc(kernel_map, &kernelCopyAddr, kernelCopyLength);
-    if (kern_result != KERN_SUCCESS) {
-        goto finish;
-    }
-
-    kernelCopy = (u_char *)kernelCopyAddr;
-    bzero(kernelCopy, kernelCopyLength);  // ??? - is this really necessary?
-
-   /*****
-    * Copy the kernel Mach header and the load commands we want.
-    */
-    memcpy(kernelCopy, &_mh_execute_header, sizeof(_mh_execute_header));
-    kernelHeader = (kernel_mach_header_t *)kernelCopy;
-    kernelHeader->ncmds = 0;
-    kernelHeader->sizeofcmds = sizeofcmds;
-    contentOffset = round_page(sizeof(_mh_execute_header) + sizeofcmds);
-
-   /* __TEXT segment load command and sections.
-    * Note that the __TEXT segment's 'offset' and 'filesize' include
-    * the data from the beginning of the mach header.
-    *
-    * Don't muck with the __TEXT segment's vmsize here;
-    * user-space linking requires it to match what is in the running kernel.
-    * We'll just have to live with it not being accurate
-    * (not like we can run the sanitized image after all).
-    */
-    scan_cmd = (struct load_command *)&kernelHeader[1]; // just past mach header
-    memcpy(scan_cmd, text_seg, text_seg->cmdsize);
-    kernelHeader->ncmds++;
-    text_seg = (kernel_segment_command_t *)scan_cmd;  // retarget to constructed segment
-    text_seg->fileoff = 0;
-    text_seg->filesize = round_page(sizeof(_mh_execute_header) + sizeofcmds);
-
-    scan_sect = (kernel_section_t *)(text_seg + 1);
-    for (i = 0; i < text_seg->nsects; i++, scan_sect++) {
-        if (0 == strncmp("__const", scan_sect->sectname, sizeof("__const"))) {
-            text_const_sect   = scan_sect;  // retarget to constructed section
-
-            text_seg->filesize += scan_sect->size;
-
-            scan_sect->offset = contentOffset;
-            contentOffset    += scan_sect->size;
-
-            memcpy(kernelCopy + scan_sect->offset, (void *)(uintptr_t)scan_sect->addr,
-                scan_sect->size);
-        } else {
-            scan_sect->addr  = 0;
-            scan_sect->size  = 0;
-            scan_sect->offset = contentOffset;
-            scan_sect->nreloc = 0;
-        }
-    }
-
-    contentOffset = round_page(contentOffset);
-
-   /* __DATA segment load command and sections.
-    * Leave the vmsize as in the running kernel here, too.
-    */
-    scan_cmd = (struct load_command *)((uintptr_t)scan_cmd + scan_cmd->cmdsize);
-    memcpy(scan_cmd, data_seg, data_seg->cmdsize);
-    kernelHeader->ncmds++;
-    data_seg = (kernel_segment_command_t *)scan_cmd;  // retarget to constructed segment
-    data_seg->fileoff = contentOffset;
-    data_seg->filesize = 0;
-
-    scan_sect = (kernel_section_t *)(data_seg + 1);
-    for (i = 0; i < data_seg->nsects; i++, scan_sect++) {
-        if (0 == strncmp("__const", scan_sect->sectname, sizeof("__const"))) {
-            data_const_sect   = scan_sect;  // retarget to constructed section
-
-            data_seg->filesize += scan_sect->size;
-
-            scan_sect->offset = contentOffset;
-            contentOffset    += scan_sect->size;
-
-            memcpy(kernelCopy + scan_sect->offset, (void *)(uintptr_t)scan_sect->addr,
-                scan_sect->size);
-        } else {
-            scan_sect->addr  = 0;
-            scan_sect->size  = 0;
-            scan_sect->offset = contentOffset;
-            scan_sect->nreloc = 0;
-        }
-    }
-
-    contentOffset = round_page(contentOffset);
-
-   /* __LINKEDIT segment load command.
-    * Leave the vmsize as in the running kernel here, too.
-    */
-    scan_cmd = (struct load_command *)((uintptr_t)scan_cmd + scan_cmd->cmdsize);
-    memcpy(scan_cmd, linkedit_seg, linkedit_seg->cmdsize);
-    kernelHeader->ncmds++;
-    linkedit_seg = (kernel_segment_command_t *)scan_cmd;  // retarget to constructed segment
-    linkedit_seg->fileoff = contentOffset;
-    linkedit_seg->filesize = linkedit_seg->vmsize;
-
-    contentOffset += round_page(linkedit_seg->vmsize);
-
-    memcpy(kernelCopy + linkedit_seg->fileoff, (void *)(uintptr_t)linkedit_seg->vmaddr,
-        linkedit_seg->vmsize);
-
-   /* __SYMTAB load command (contents shared with __LINKEDIT).
-    */
-    scan_cmd = (struct load_command *)((uintptr_t)scan_cmd + scan_cmd->cmdsize);
-    memcpy(scan_cmd, symtab_cmd, symtab_cmd->cmdsize);
-    kernelHeader->ncmds++;
-    symtab_cmd = (struct symtab_command *)scan_cmd;  // retarget to constructed cmd
-    stroff_shift = symtab_cmd->stroff - symtab_cmd->symoff;
-    symtab_cmd->symoff = linkedit_seg->fileoff;
-    symtab_cmd->stroff = symtab_cmd->symoff + stroff_shift;
-
-   /* Wrap the thing up in an OSData.
-    */
-    result = OSData::withBytesNoCopy(kernelCopy, kernelCopyLength);
-    if (result) {
-        result->setDeallocFunction(osdata_kmem_free);
-        kernelCopy = NULL;
-    }
-    
-finish:
-    if (kernelCopy) kmem_free(kernel_map, kernelCopyAddr, kernelCopyLength);
-
-    return result;
-}
-
-/*********************************************************************
-*********************************************************************/
+ *********************************************************************/
 /* static */
 OSReturn
 OSKext::requestResource(
@@ -8068,8 +8206,8 @@ OSKext::requestResource(
         OSKextLog(/* kext */ NULL, 
             kOSKextLogErrorLevel | kOSKextLogIPCFlag,
             "Can't request resource %s for %s - requests to user space are disabled.",
-                       resourceNameCString,
-                       kextIdentifierCString);
+            resourceNameCString,
+            kextIdentifierCString);
         result = kOSKextReturnDisabled;
         goto finish;
     }
@@ -8368,17 +8506,13 @@ OSKext::dispatchResource(OSDictionary * requestDict)
     if (!callbackKext) {
         OSKextLog(/* kext */ NULL,
             kOSKextLogErrorLevel | kOSKextLogIPCFlag,
-            "Can't invoke callback for resource request; "
-            "no kext loaded at callback address %p.",
-            callback);
+            "Can't invoke callback for resource request; ");
         goto finish;
     }
     if (!callbackKext->flags.starting && !callbackKext->flags.started) {
         OSKextLog(/* kext */ NULL,
             kOSKextLogErrorLevel | kOSKextLogIPCFlag,
-            "Can't invoke kext resource callback; "
-            "kext at callback address %p is not running.",
-            callback);
+            "Can't invoke kext resource callback; ");
         goto finish;
     }
 
@@ -8713,6 +8847,63 @@ finish:
     return result;
 }
 
+/*********************************************************************
+ * We really only care about boot / system start up related kexts. 
+ * We return true if we're less than REBUILD_MAX_TIME since start up,
+ * otherwise return false.
+ *********************************************************************/
+bool _OSKextInPrelinkRebuildWindow(void)
+{
+    static bool     outside_the_window = false;
+    AbsoluteTime    my_abstime;
+    UInt64          my_ns;
+    SInt32          my_secs;
+    
+    if (outside_the_window) {
+        return(false);
+    }
+    clock_get_uptime(&my_abstime);
+    absolutetime_to_nanoseconds(my_abstime, &my_ns);
+    my_secs = (SInt32)(my_ns / NSEC_PER_SEC);
+    if (my_secs > REBUILD_MAX_TIME) {
+        outside_the_window = true;
+        return(false);
+    }
+    return(true);
+}
+
+/*********************************************************************
+ *********************************************************************/
+bool _OSKextInUnloadedPrelinkedKexts( const OSSymbol * theBundleID )
+{
+    int unLoadedCount, i;
+    bool result = false;
+    
+    IORecursiveLockLock(sKextLock);
+    
+    if (sUnloadedPrelinkedKexts == NULL) {
+        goto finish;
+    }
+    unLoadedCount = sUnloadedPrelinkedKexts->getCount();
+    if (unLoadedCount == 0) {
+        goto finish;
+    }
+    
+    for (i = 0; i < unLoadedCount; i++) {
+        const OSSymbol *    myBundleID;     // do not release
+        
+        myBundleID = OSDynamicCast(OSSymbol, sUnloadedPrelinkedKexts->getObject(i));
+        if (!myBundleID) continue;
+        if (theBundleID->isEqualTo(myBundleID->getCStringNoCopy())) {
+            result = true;
+            break;
+        }
+    }
+finish:
+    IORecursiveLockUnlock(sKextLock);
+    return(result);
+}
+
 #if PRAGMA_MARK
 #pragma mark Personalities (IOKit Drivers)
 #endif
@@ -9319,6 +9510,195 @@ finish:
     return;
 }
 
+#if KASLR_IOREG_DEBUG
+    
+#define IOLOG_INDENT( the_indention ) \
+{ \
+    int     i; \
+    for ( i = 0; i < (the_indention); i++ ) { \
+        IOLog(" "); \
+    } \
+}
+    
+extern vm_offset_t      vm_kernel_stext;
+extern vm_offset_t      vm_kernel_etext;
+extern mach_vm_offset_t kext_alloc_base; 
+extern mach_vm_offset_t kext_alloc_max;
+    
+bool ScanForAddrInObject(OSObject * theObject, 
+                         int indent );
+
+bool ScanForAddrInObject(OSObject * theObject, 
+                         int indent)
+{
+    const OSMetaClass *     myTypeID;
+    OSCollectionIterator *  myIter;
+    OSSymbol *              myKey;
+    OSObject *              myValue;
+    bool                    myResult = false;
+    
+    if ( theObject == NULL ) {
+        IOLog("%s: theObject is NULL \n", 
+              __FUNCTION__);
+        return myResult;
+    }
+    
+    myTypeID = OSTypeIDInst(theObject);
+    
+    if ( myTypeID == OSTypeID(OSDictionary) ) {
+        OSDictionary *      myDictionary;
+        
+        myDictionary = OSDynamicCast(OSDictionary, theObject);
+        myIter = OSCollectionIterator::withCollection( myDictionary );
+        if ( myIter == NULL ) 
+            return myResult;
+        myIter->reset();
+        
+        while ( (myKey = OSDynamicCast(OSSymbol, myIter->getNextObject())) ) {
+            bool    myTempResult;
+            
+            myValue = myDictionary->getObject(myKey);
+            myTempResult = ScanForAddrInObject(myValue, (indent + 4));
+            if (myTempResult) {
+                // if we ever get a true result return true
+                myResult = true;
+                IOLOG_INDENT(indent);
+                IOLog("OSDictionary key \"%s\" \n", myKey->getCStringNoCopy());
+            }
+        }
+        myIter->release();
+    }
+    else if ( myTypeID == OSTypeID(OSArray) ) {
+        OSArray *   myArray;
+        
+        myArray = OSDynamicCast(OSArray, theObject);
+        myIter = OSCollectionIterator::withCollection(myArray);
+        if ( myIter == NULL ) 
+            return myResult;
+        myIter->reset();
+        
+        while ( (myValue = myIter->getNextObject()) ) {
+            bool        myTempResult;
+            myTempResult = ScanForAddrInObject(myValue, (indent + 4));
+            if (myTempResult) {
+                // if we ever get a true result return true
+                myResult = true;
+                IOLOG_INDENT(indent);
+                IOLog("OSArray: \n");
+            }
+        }
+        myIter->release();
+    }
+    else if ( myTypeID == OSTypeID(OSString) || myTypeID == OSTypeID(OSSymbol) ) {
+        
+        // should we look for addresses in strings?
+    }
+    else if ( myTypeID == OSTypeID(OSData) ) {
+        
+        void * *        myPtrPtr;
+        unsigned int    myLen;
+        OSData *        myDataObj;
+        
+        myDataObj =    OSDynamicCast(OSData, theObject);
+        myPtrPtr = (void * *) myDataObj->getBytesNoCopy();
+        myLen = myDataObj->getLength();
+        
+        if (myPtrPtr && myLen && myLen > 7) {
+            int     i;
+            int     myPtrCount = (myLen / sizeof(void *));
+            
+            for (i = 0; i < myPtrCount; i++) {
+                UInt64 numberValue = (UInt64) *(myPtrPtr);
+                
+                if ( kext_alloc_max != 0 &&
+                    numberValue >= kext_alloc_base && 
+                    numberValue < kext_alloc_max ) {
+                    
+                    OSKext * myKext    = NULL;  // must release (looked up)
+                                                // IOLog("found OSData %p in kext map %p to %p  \n",
+                                                //       *(myPtrPtr),
+                                                //       (void *) kext_alloc_base,
+                                                //       (void *) kext_alloc_max);
+                    
+                    myKext = OSKext::lookupKextWithAddress( (vm_address_t) *(myPtrPtr) );
+                    if (myKext) {
+                        IOLog("found addr %p from an OSData obj within kext \"%s\"  \n",
+                              *(myPtrPtr),
+                              myKext->getIdentifierCString());
+                        myKext->release();
+                    }
+                    myResult = true;
+                }
+                if ( vm_kernel_etext != 0 &&
+                    numberValue >= vm_kernel_stext && 
+                    numberValue < vm_kernel_etext ) {
+                    IOLog("found addr %p from an OSData obj within kernel text segment %p to %p  \n",
+                          *(myPtrPtr),
+                          (void *) vm_kernel_stext,
+                          (void *) vm_kernel_etext);
+                    myResult = true;
+                }
+                myPtrPtr++;
+            }
+        }
+    }
+    else if ( myTypeID == OSTypeID(OSBoolean) ) {
+        
+        // do nothing here...
+    }
+    else if ( myTypeID == OSTypeID(OSNumber) ) {
+        
+        OSNumber * number = OSDynamicCast(OSNumber, theObject);
+        
+        UInt64 numberValue = number->unsigned64BitValue();
+        
+        if ( kext_alloc_max != 0 &&
+            numberValue >= kext_alloc_base && 
+            numberValue < kext_alloc_max ) {
+            
+            OSKext * myKext    = NULL;  // must release (looked up)
+            IOLog("found OSNumber in kext map %p to %p  \n",
+                  (void *) kext_alloc_base,
+                  (void *) kext_alloc_max);
+            IOLog("OSNumber 0x%08llx (%llu) \n", numberValue, numberValue);
+            
+            myKext = OSKext::lookupKextWithAddress( (vm_address_t) numberValue );
+            if (myKext) {
+                IOLog("found in kext \"%s\"  \n",
+                      myKext->getIdentifierCString());
+                myKext->release();
+            }
+            
+            myResult = true;
+        }
+        if ( vm_kernel_etext != 0 &&
+            numberValue >= vm_kernel_stext && 
+            numberValue < vm_kernel_etext ) {
+            IOLog("found OSNumber in kernel text segment %p to %p  \n",
+                  (void *) vm_kernel_stext,
+                  (void *) vm_kernel_etext);
+            IOLog("OSNumber 0x%08llx (%llu) \n", numberValue, numberValue);
+            myResult = true;
+        }
+    }
+#if 0
+    else {
+        const OSMetaClass* myMetaClass = NULL;
+        
+        myMetaClass = theObject->getMetaClass();
+        if ( myMetaClass ) {
+            IOLog("class %s \n", myMetaClass->getClassName() );
+        }
+        else {
+            IOLog("Unknown object \n" );
+        }
+    }
+#endif
+    
+    return myResult;
+}
+#endif // KASLR_KEXT_DEBUG 
+
 }; /* extern "C" */
 
 #if PRAGMA_MARK
@@ -9920,8 +10300,6 @@ OSKext::updateLoadedKextSummaries(void)
 
         start = (vm_map_offset_t) summaryHeader;
         end = start + summarySize;
-        result = vm_map_protect(kernel_map, start, end, VM_PROT_DEFAULT, FALSE);
-        if (result != KERN_SUCCESS) goto finish;
     }
 
    /* Populate the summary header.
@@ -9948,8 +10326,6 @@ OSKext::updateLoadedKextSummaries(void)
 
     start = (vm_map_offset_t) summaryHeader;
     end = start + summarySize;
-    result = vm_map_protect(kernel_map, start, end, VM_PROT_READ, FALSE);
-    if (result != KERN_SUCCESS) goto finish;
 
     sPrevLoadedKextSummaries = gLoadedKextSummaries;
     sPrevLoadedKextSummariesAllocSize = sLoadedKextSummariesAllocSize;
@@ -10107,426 +10483,65 @@ finish:
     return result;
 }
 #endif /* __i386__ */
+    
+#if CONFIG_KEC_FIPS
+    
 #if PRAGMA_MARK
-#pragma mark MAC Framework Support
+#pragma mark Kernel External Components for FIPS compliance
 #endif
+    
 /*********************************************************************
-*********************************************************************/
-#if CONFIG_MACF_KEXT
-/* MAC Framework support */
-
-/* 
- * define IOC_DEBUG to display run-time debugging information
- * #define IOC_DEBUG 1
- */
-
-#ifdef IOC_DEBUG
-#define DPRINTF(x)    printf x
-#else
-#define IOC_DEBUG
-#define DPRINTF(x)
-#endif
-
-/*********************************************************************
-*********************************************************************/
-static bool
-MACFObjectIsPrimitiveType(OSObject * obj)
-{
-    const OSMetaClass * typeID = NULL;  // do not release
-
-    typeID = OSTypeIDInst(obj);
-    if (typeID == OSTypeID(OSString) || typeID == OSTypeID(OSNumber) ||
-        typeID == OSTypeID(OSBoolean) || typeID == OSTypeID(OSData)) {
-
-        return true;
-    }
-    return false;
-}
-
-/*********************************************************************
-*********************************************************************/
-static int
-MACFLengthForObject(OSObject * obj)
-{
-    const OSMetaClass * typeID = NULL;  // do not release
-    int len;
-
-    typeID = OSTypeIDInst(obj);
-    if (typeID == OSTypeID(OSString)) {
-        OSString * stringObj = OSDynamicCast(OSString, obj);
-        len = stringObj->getLength() + 1;
-    } else if (typeID == OSTypeID(OSNumber)) {
-        len = sizeof("4294967295");    /* UINT32_MAX */
-    } else if (typeID == OSTypeID(OSBoolean)) {
-        OSBoolean * boolObj = OSDynamicCast(OSBoolean, obj);
-        len = (boolObj == kOSBooleanTrue) ? sizeof("true") : sizeof("false");
-    } else if (typeID == OSTypeID(OSData)) {
-        OSData * dataObj = OSDynamicCast(OSData, obj);
-        len = dataObj->getLength();
-    } else {
-        len = 0;
-    }
-    return len;
-}
-
-/*********************************************************************
-*********************************************************************/
-static void
-MACFInitElementFromObject(
-    struct mac_module_data_element * element,
-    OSObject                       * value)
-{
-    const OSMetaClass * typeID = NULL;  // do not release
-
-    typeID = OSTypeIDInst(value);
-    if (typeID == OSTypeID(OSString)) {
-        OSString * stringObj = OSDynamicCast(OSString, value);
-        element->value_type = MAC_DATA_TYPE_PRIMITIVE;
-        element->value_size = stringObj->getLength() + 1;
-        DPRINTF(("osdict: string %s size %d\n", 
-            stringObj->getCStringNoCopy(), element->value_size));
-        memcpy(element->value, stringObj->getCStringNoCopy(),
-            element->value_size);
-    } else if (typeID == OSTypeID(OSNumber)) {
-        OSNumber * numberObj = OSDynamicCast(OSNumber, value);
-        element->value_type = MAC_DATA_TYPE_PRIMITIVE;
-        element->value_size = sprintf(element->value, "%u",
-            numberObj->unsigned32BitValue()) + 1;
-    } else if (typeID == OSTypeID(OSBoolean)) {
-        OSBoolean * boolObj = OSDynamicCast(OSBoolean, value);
-        element->value_type = MAC_DATA_TYPE_PRIMITIVE;
-        if (boolObj == kOSBooleanTrue) {
-            strcpy(element->value, "true");
-            element->value_size = 5;
-        } else {
-            strcpy(element->value, "false");
-            element->value_size = 6;
-        }
-    } else if (typeID == OSTypeID(OSData)) {
-        OSData * dataObj = OSDynamicCast(OSData, value);
-        element->value_type = MAC_DATA_TYPE_PRIMITIVE;
-        element->value_size = dataObj->getLength();
-        DPRINTF(("osdict: data size %d\n", dataObj->getLength()));
-        memcpy(element->value, dataObj->getBytesNoCopy(),
-            element->value_size);
-    }
-    return;
-}
-
-/*********************************************************************
-* This function takes an OSDictionary and returns a struct mac_module_data
-* list.
-*********************************************************************/
-static struct mac_module_data *
-MACFEncodeOSDictionary(OSDictionary * dict)
+ * Kernel External Components for FIPS compliance (KEC_FIPS)
+ *********************************************************************/
+static void * 
+GetAppleTEXTHashForKext(OSKext * theKext, OSDictionary *theInfoDict)
 {
-    struct mac_module_data         * result      = NULL;  // do not free
-    const OSMetaClass              * typeID      = NULL;  // do not release
-    OSString                       * key         = NULL;  // do not release
-    OSCollectionIterator           * keyIterator = NULL;  // must release
-    struct mac_module_data_element * element     = NULL;  // do not free
-    unsigned int                     strtabsize  = 0;
-    unsigned int                     listtabsize = 0;
-    unsigned int                     dicttabsize = 0;
-    unsigned int                     nkeys       = 0;
-    unsigned int                     datalen     = 0;
-    char                           * strtab      = NULL;  // do not free
-    char                           * listtab     = NULL;  // do not free
-    char                           * dicttab     = NULL;  // do not free
-    vm_offset_t                      data_addr   = 0;
+    AppleTEXTHash_t         my_ath = {1, 0, NULL};
+    AppleTEXTHash_t *       my_athp = NULL;         // do not release
+    OSDictionary *          textHashDict = NULL;    // do not release
+    OSData *                segmentHash = NULL;     // do not release
     
-    keyIterator = OSCollectionIterator::withCollection(dict);
-    if (!keyIterator) {
-        goto finish;
+    if (theKext == NULL || theInfoDict == NULL) {
+        return(NULL);
     }
     
-    /* Iterate over OSModuleData to figure out total size */
-    while ( (key = OSDynamicCast(OSString, keyIterator->getNextObject())) ) {
-        
-        // Get the key's value and determine its type
-        OSObject * value = dict->getObject(key);
-        if (!value) {
-            continue;
-        }
-        
-        typeID = OSTypeIDInst(value);
-        if (MACFObjectIsPrimitiveType(value)) {
-            strtabsize += MACFLengthForObject(value);
-        }
-        else if (typeID == OSTypeID(OSArray)) {
-            unsigned int k, cnt, nents;
-            OSArray * arrayObj = OSDynamicCast(OSArray, value);
-            
-            nents = 0;
-            cnt = arrayObj->getCount();
-            for (k = 0; k < cnt; k++) {
-                value = arrayObj->getObject(k);
-                typeID = OSTypeIDInst(value);
-                if (MACFObjectIsPrimitiveType(value)) {
-                    listtabsize += MACFLengthForObject(value);
-                    nents++;
-                }
-                else if (typeID == OSTypeID(OSDictionary)) {
-                    unsigned int           dents = 0;
-                    OSDictionary         * dictObj      = NULL;  // do not release
-                    OSString             * dictkey      = NULL;  // do not release
-                    OSCollectionIterator * dictIterator = NULL;  // must release
-                    
-                    dictObj = OSDynamicCast(OSDictionary, value);
-                    dictIterator = OSCollectionIterator::withCollection(dictObj);
-                    if (!dictIterator) {
-                        goto finish;
-                    }
-                    while ((dictkey = OSDynamicCast(OSString,
-                        dictIterator->getNextObject()))) {
-
-                        OSObject * dictvalue = NULL;  // do not release
-                        
-                        dictvalue = dictObj->getObject(dictkey);
-                        if (!dictvalue) {
-                            continue;
-                        }
-                        if (MACFObjectIsPrimitiveType(dictvalue)) {
-                            strtabsize += MACFLengthForObject(dictvalue);
-                        } else {
-                            continue; /* Only handle primitive types here. */
-                        }
-                       /*
-                        * Allow for the "arraynnn/" prefix in the key length.
-                        */
-                        strtabsize += dictkey->getLength() + 1;
-                        dents++;
-                    }
-                    dictIterator->release();
-                    if (dents-- > 0) {
-                        dicttabsize += sizeof(struct mac_module_data_list) +
-                        dents * sizeof(struct mac_module_data_element);
-                        nents++;
-                    }
-                }
-                else {
-                    continue; /* Skip everything else. */
-                }
-            }
-            if (nents == 0) {
-                continue;
-            }
-            listtabsize += sizeof(struct mac_module_data_list) +
-                (nents - 1) * sizeof(struct mac_module_data_element);
-        } else {
-            continue; /* skip anything else */
-        }
-        strtabsize += key->getLength() + 1;
-        nkeys++;
+    textHashDict = OSDynamicCast(OSDictionary, theInfoDict->getObject(kAppleTextHashesKey));
+    if (textHashDict == NULL) {
+        return(NULL);
     }
-    if (nkeys == 0) {
-        goto finish;
+    
+    segmentHash = OSDynamicCast(OSData,
+                                textHashDict->getObject(ARCHNAME));
+    if (segmentHash == NULL) {
+        return(NULL);
     }
     
-   /*
-    * Allocate and fill in the module data structures.
-    */
-    datalen = sizeof(struct mac_module_data) +
-        sizeof(mac_module_data_element) * (nkeys - 1) +
-    strtabsize + listtabsize + dicttabsize;
-    DPRINTF(("osdict: datalen %d strtabsize %d listtabsize %d dicttabsize %d\n", 
-        datalen, strtabsize, listtabsize, dicttabsize));
-    if (kmem_alloc(kernel_map, &data_addr, datalen) != KERN_SUCCESS) {
-        goto finish;
+    // KEC_FIPS type kexts never unload so we don't have to clean up our 
+    // AppleTEXTHash_t
+    if (kmem_alloc(kernel_map, (vm_offset_t *) &my_athp, 
+                   sizeof(AppleTEXTHash_t)) != KERN_SUCCESS) {
+        return(NULL);
     }
-    result = (mac_module_data *)data_addr;
-    result->base_addr = data_addr;
-    result->size = datalen;
-    result->count = nkeys;
-    strtab = (char *)&result->data[nkeys];
-    listtab = strtab + strtabsize;
-    dicttab = listtab + listtabsize;
-    DPRINTF(("osdict: data_addr %p strtab %p listtab %p dicttab %p end %p\n", 
-        data_addr, strtab, listtab, dicttab, data_addr + datalen));
     
-    keyIterator->reset();
-    nkeys = 0;
-    element = &result->data[0];
-    DPRINTF(("osdict: element %p\n", element));
-    while ( (key = OSDynamicCast(OSString, keyIterator->getNextObject())) ) {
-        
-        // Get the key's value and determine its type
-        OSObject * value = dict->getObject(key);
-        if (!value) {
-            continue;
-        }
+    memcpy(my_athp, &my_ath, sizeof(my_ath));
+    my_athp->ath_length = segmentHash->getLength();
+    if (my_athp->ath_length > 0) {
+        my_athp->ath_hash = (void *)segmentHash->getBytesNoCopy();
+    }
         
-        /* Store key */
-        DPRINTF(("osdict: element @%p\n", element));
-        element->key = strtab;
-        element->key_size = key->getLength() + 1;
-        DPRINTF(("osdict: key %s size %d @%p\n", key->getCStringNoCopy(),
-            element->key_size, strtab));
-        memcpy(element->key, key->getCStringNoCopy(), element->key_size);
+#if 0
+    OSKextLog(theKext,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "Kext %s ath_version %d ath_length %d ath_hash %p",
+              theKext->getIdentifierCString(), 
+              my_athp->ath_version,
+              my_athp->ath_length,
+              my_athp->ath_hash); 
+#endif
         
-        typeID = OSTypeIDInst(value);
-        if (MACFObjectIsPrimitiveType(value)) {
-            /* Store value */
-            element->value = element->key + element->key_size;
-            DPRINTF(("osdict: primitive element value %p\n", element->value));
-            MACFInitElementFromObject(element, value);
-            strtab += element->key_size + element->value_size;
-            DPRINTF(("osdict: new strtab %p\n", strtab));
-        } else if (typeID == OSTypeID(OSArray)) {
-            unsigned int k, cnt, nents;
-            char *astrtab;
-            struct mac_module_data_list *arrayhd;
-            struct mac_module_data_element *ele;
-            OSArray *arrayObj = OSDynamicCast(OSArray, value);
-            
-            element->value = listtab;
-            DPRINTF(("osdict: array element value %p\n", element->value));
-            element->value_type = MAC_DATA_TYPE_ARRAY;
-            arrayhd = (struct mac_module_data_list *)element->value;
-            arrayhd->type = 0;
-            DPRINTF(("osdict: arrayhd %p\n", arrayhd));
-            nents = 0;
-            astrtab = strtab + element->key_size;
-            ele = &(arrayhd->list[0]);
-            cnt = arrayObj->getCount();
-            for (k = 0; k < cnt; k++) {
-                value = arrayObj->getObject(k);
-                DPRINTF(("osdict: array ele %d @%p\n", nents, ele));
-                ele->key = NULL;
-                ele->key_size = 0;
-                typeID = OSTypeIDInst(value);
-                if (MACFObjectIsPrimitiveType(value)) {
-                    if (arrayhd->type != 0 &&
-                        arrayhd->type != MAC_DATA_TYPE_PRIMITIVE) {
-
-                        continue;
-                    }
-                    arrayhd->type = MAC_DATA_TYPE_PRIMITIVE;
-                    ele->value = astrtab;
-                    MACFInitElementFromObject(ele, value);
-                    astrtab += ele->value_size;
-                    DPRINTF(("osdict: array new astrtab %p\n", astrtab));
-                } else if (typeID == OSTypeID(OSDictionary)) {
-                    unsigned int                     dents;
-                    char                           * dstrtab      = NULL;  // do not free
-                    OSDictionary                   * dictObj      = NULL;  // do not release
-                    OSString                       * dictkey      = NULL;  // do not release
-                    OSCollectionIterator           * dictIterator = NULL;  // must release
-                    struct mac_module_data_list    * dicthd       = NULL;  // do not free
-                    struct mac_module_data_element * dele         = NULL;  // do not free
-                    
-                    if (arrayhd->type != 0 &&
-                        arrayhd->type != MAC_DATA_TYPE_DICT) {
-
-                        continue;
-                    }
-                    dictObj = OSDynamicCast(OSDictionary, value);
-                    dictIterator = OSCollectionIterator::withCollection(dictObj);
-                    if (!dictIterator) {
-                        goto finish;
-                    }
-                    DPRINTF(("osdict: dict\n"));
-                    ele->value = dicttab;
-                    ele->value_type = MAC_DATA_TYPE_DICT;
-                    dicthd = (struct mac_module_data_list *)ele->value;
-                    DPRINTF(("osdict: dicthd %p\n", dicthd));
-                    dstrtab = astrtab;
-                    dents = 0;
-                    while ((dictkey = OSDynamicCast(OSString,
-                        dictIterator->getNextObject()))) {
-
-                        OSObject * dictvalue = NULL;  // do not release
-                        
-                        dictvalue = dictObj->getObject(dictkey);
-                        if (!dictvalue) {
-                            continue;
-                        }
-                        dele = &(dicthd->list[dents]);
-                        DPRINTF(("osdict: dict ele %d @%p\n", dents, dele));
-                        if (MACFObjectIsPrimitiveType(dictvalue)) {
-                            dele->key = dstrtab;
-                            dele->key_size = dictkey->getLength() + 1;
-                            DPRINTF(("osdict: dictkey %s size %d @%p\n",
-                                dictkey->getCStringNoCopy(), dictkey->getLength(), dstrtab));
-                            memcpy(dele->key, dictkey->getCStringNoCopy(),
-                                dele->key_size);
-                            dele->value = dele->key + dele->key_size;
-                            MACFInitElementFromObject(dele, dictvalue);
-                            dstrtab += dele->key_size + dele->value_size;
-                            DPRINTF(("osdict: dict new dstrtab %p\n", dstrtab));
-                        } else {
-                            continue;    /* Only handle primitive types here. */
-                        }
-                        dents++;
-                    }
-                    dictIterator->release();
-                    if (dents == 0) {
-                        continue;
-                    }
-                    arrayhd->type = MAC_DATA_TYPE_DICT;
-                    ele->value_size = sizeof(struct mac_module_data_list) +
-                        (dents - 1) * sizeof(struct mac_module_data_element);
-                    DPRINTF(("osdict: dict ele size %d ents %d\n", ele->value_size, dents));
-                    dicttab += ele->value_size;
-                    DPRINTF(("osdict: new dicttab %p\n", dicttab));
-                    dicthd->count = dents;
-                    astrtab = dstrtab;
-                } else {
-                    continue;        /* Skip everything else. */
-                }
-                nents++;
-                ele++;
-            }
-            if (nents == 0) {
-                continue;
-            }
-            element->value_size = sizeof(struct mac_module_data_list) +
-                (nents - 1) * sizeof(struct mac_module_data_element);
-            listtab += element->value_size;
-            DPRINTF(("osdict: new listtab %p\n", listtab));
-            arrayhd->count = nents;
-            strtab = astrtab;
-            DPRINTF(("osdict: new strtab %p\n", strtab));
-        } else {
-            continue;        /* skip anything else */
-        }
-        element++;
-    }
-    DPRINTF(("result list @%p, key %p value %p\n",
-        result, result->data[0].key, result->data[0].value));
-finish:
-    if (keyIterator) keyIterator->release();
-    return result;
+    return( (void *) my_athp );
 }
-
-/*********************************************************************
-* This function takes a plist and looks for an OSModuleData dictionary.
-* If it is found, an encoded copy is returned. The value must be
-* kmem_free()'d.
-*********************************************************************/
-static void *
-MACFCopyModuleDataForKext(
-    OSKext                 * theKext,
-    mach_msg_type_number_t * datalen)
-
-{
-    struct mac_module_data * result         = NULL;
-    OSDictionary           * kextModuleData = NULL;  // do not release
-    vm_map_copy_t            copy           = 0;
     
-    kextModuleData = OSDynamicCast(OSDictionary,
-        theKext->getPropertyForHostArch("OSModuleData"));
-    if (!kextModuleData) {
-        goto finish;
-    }
-    
-    result = MACFEncodeOSDictionary(kextModuleData);
-    if (!result) {
-        goto finish;
-    }
-    *datalen = module_data->size;
+#endif // CONFIG_KEC_FIPS
 
-finish:
-    return (void *)result;
-}
-#endif /* CONFIG_MACF_KEXT */
index 0696e2b024a218413beed80b70c50ae5fef16c69..009383888a7767008dee56b137c1ba0df6e56540 100644 (file)
@@ -95,7 +95,9 @@ static enum {
 static const int      kClassCapacityIncrement = 40;
 static const int      kKModCapacityIncrement  = 10;
 static OSDictionary * sAllClassesDict;
+static unsigned int   sDeepestClass;
 IOLock              * sAllClassesLock = NULL;
+IOLock              * sInstancesLock  = NULL;
 
 /*
  * While loading a kext and running all its constructors to register
@@ -111,6 +113,13 @@ static struct StalledData {
 } * sStalled;
 IOLock * sStalledClassesLock = NULL;
 
+
+struct ExpansionData {
+    OSOrderedSet * instances;
+    OSKext *       kext;
+};
+
+
 #if PRAGMA_MARK
 #pragma mark OSMetaClassBase
 #endif /* PRAGMA_MARK */
@@ -118,6 +127,7 @@ IOLock * sStalledClassesLock = NULL;
 * OSMetaClassBase.
 *********************************************************************/
 
+#if APPLE_KEXT_VTABLE_PADDING
 /*********************************************************************
 * Reserved vtable functions.
 *********************************************************************/
@@ -139,7 +149,8 @@ void OSMetaClassBase::_RESERVEDOSMetaClassBase5()
     { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 5); }
 void OSMetaClassBase::_RESERVEDOSMetaClassBase6()
     { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 6); }
-    
+#endif
+
 /*********************************************************************
 * These used to be inline in the header but gcc didn't believe us
 * Now we MUST pull the inline out at least until the compiler is
@@ -176,8 +187,10 @@ initialize()
 {
     sAllClassesLock = IOLockAlloc();
     sStalledClassesLock = IOLockAlloc();
+    sInstancesLock = IOLockAlloc();
 }
 
+#if APPLE_KEXT_VTABLE_PADDING
 /*********************************************************************
 * If you need this slot you had better setup an IOCTL style interface.
 * 'Cause the whole kernel world depends on OSMetaClassBase and YOU
@@ -186,6 +199,7 @@ initialize()
 void
 OSMetaClassBase::_RESERVEDOSMetaClassBase7()
 { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 7); }
+#endif
 
 /*********************************************************************
 *********************************************************************/
@@ -285,6 +299,7 @@ const OSMetaClass * OSMetaClass::getMetaClass() const
 * OSMetaClass
 *********************************************************************/
 
+#if APPLE_KEXT_VTABLE_PADDING
 /*********************************************************************
 * Reserved functions.
 *********************************************************************/
@@ -304,6 +319,7 @@ void OSMetaClass::_RESERVEDOSMetaClass6()
     { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 6); }
 void OSMetaClass::_RESERVEDOSMetaClass7()
     { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 7); }
+#endif
 
 /*********************************************************************
 *********************************************************************/
@@ -377,6 +393,9 @@ OSMetaClass::OSMetaClass(
     classSize = inClassSize;
     superClassLink = inSuperClass;
 
+    reserved = IONew(ExpansionData, 1);
+    bzero(reserved, sizeof(ExpansionData));
+
    /* Hack alert: We are just casting inClassName and storing it in
     * an OSString * instance variable. This may be because you can't
     * create C++ objects in static constructors, but I really don't know!
@@ -420,7 +439,7 @@ OSMetaClass::OSMetaClass(
 *********************************************************************/
 OSMetaClass::~OSMetaClass()
 {
-    OSKext * myKext = (OSKext *)reserved; // do not release
+    OSKext * myKext = reserved ? reserved->kext : 0; // do not release
 
    /* Hack alert: 'className' is a C string during early C++ init, and
     * is converted to a real OSSymbol only when we record the OSKext in
@@ -494,7 +513,13 @@ OSMetaClass::getClassName() const
     if (!className) return NULL;
     return className->getCStringNoCopy();
 }
-
+/*********************************************************************
+*********************************************************************/
+const OSSymbol *
+OSMetaClass::getClassNameSymbol() const
+{
+    return className;
+}
 /*********************************************************************
 *********************************************************************/
 unsigned int
@@ -571,6 +596,7 @@ OSMetaClass::postModLoad(void * loadHandle)
                 result = kOSMetaClassNoDicts;
                 break;
             }
+            sAllClassesDict->setOptions(OSCollection::kSort, OSCollection::kSort);
 
         // No break; fall through
 
@@ -605,7 +631,7 @@ OSMetaClass::postModLoad(void * loadHandle)
             */
             IOLockLock(sAllClassesLock);
             for (i = 0; i < sStalled->count; i++) {
-                OSMetaClass * me = sStalled->classes[i];
+                const OSMetaClass * me = sStalled->classes[i];
                 OSMetaClass * orig = OSDynamicCast(OSMetaClass,
                     sAllClassesDict->getObject((const char *)me->className));
                 
@@ -618,10 +644,13 @@ OSMetaClass::postModLoad(void * loadHandle)
                         "OSMetaClass: Kext %s class %s is a duplicate;"
                         "kext %s already has a class by that name.",
                          sStalled->kextIdentifier, (const char *)me->className,
-                        ((OSKext *)orig->reserved)->getIdentifierCString());
+                        ((OSKext *)orig->reserved->kext)->getIdentifierCString());
                     result = kOSMetaClassDuplicateClass;
                     break;
                 }
+               unsigned int depth = 1;
+               while ((me = me->superClassLink)) depth++;
+               if (depth > sDeepestClass) sDeepestClass = depth;
             }
             IOLockUnlock(sAllClassesLock);
             
@@ -649,7 +678,7 @@ OSMetaClass::postModLoad(void * loadHandle)
                 
                /* Do not retain the kext object here.
                 */
-                me->reserved = (ExpansionData *)myKext;
+                me->reserved->kext = myKext;
                 if (myKext) {
                     result = myKext->addClass(me, sStalled->count);
                     if (result != kOSReturnSuccess) {
@@ -718,7 +747,7 @@ OSMetaClass::instanceDestructed() const
     }
 
     if (((int)instanceCount) < 0) {
-        OSKext * myKext = (OSKext *)reserved;
+        OSKext * myKext = reserved->kext;
 
         OSKextLog(myKext, kOSMetaClassLogSpec,
             // xxx - this phrasing is rather cryptic
@@ -756,6 +785,138 @@ OSMetaClass::reportModInstances(const char * kextIdentifier)
         kOSKextLogExplicitLevel);
     return;
 }
+/*********************************************************************
+*********************************************************************/
+
+void
+OSMetaClass::addInstance(const OSObject * instance, bool super) const
+{
+    if (!super) IOLockLock(sInstancesLock);
+
+    if (!reserved->instances) {
+       reserved->instances = OSOrderedSet::withCapacity(16);
+       if (superClassLink) {
+           superClassLink->addInstance(reserved->instances, true);
+       }
+    }
+    reserved->instances->setLastObject(instance);
+
+    if (!super) IOLockUnlock(sInstancesLock);
+}
+
+void
+OSMetaClass::removeInstance(const OSObject * instance, bool super) const
+{
+    if (!super) IOLockLock(sInstancesLock);
+
+    if (reserved->instances) {
+       reserved->instances->removeObject(instance);
+       if (0 == reserved->instances->getCount()) {
+           if (superClassLink) {
+               superClassLink->removeInstance(reserved->instances, true);
+           }
+           reserved->instances->release();
+           reserved->instances = 0;
+       }
+    }
+
+    if (!super) IOLockUnlock(sInstancesLock);
+}
+
+void
+OSMetaClass::applyToInstances(OSOrderedSet * set,
+                             OSMetaClassInstanceApplierFunction  applier,
+                              void * context)
+{
+    enum {         kLocalDepth = 24 };
+    unsigned int    _nextIndex[kLocalDepth];
+    OSOrderedSet *  _sets[kLocalDepth];
+    unsigned int *  nextIndex = &_nextIndex[0];
+    OSOrderedSet ** sets      = &_sets[0];
+    OSObject *      obj;
+    OSOrderedSet *  childSet;
+    unsigned int    maxDepth;
+    unsigned int    idx;
+    unsigned int    level;
+    bool            done;
+
+    maxDepth = sDeepestClass;
+    if (maxDepth > kLocalDepth)
+    {
+       nextIndex = IONew(typeof(nextIndex[0]), maxDepth);
+       sets      = IONew(typeof(sets[0]), maxDepth);
+    }
+    done = false;
+    level = 0;
+    idx = 0;
+    do
+    {
+       while (!done && (obj = set->getObject(idx++)))
+       {
+           if ((childSet = OSDynamicCast(OSOrderedSet, obj)))
+           {
+               if (level >= maxDepth) panic(">maxDepth");
+               sets[level] = set;
+               nextIndex[level] = idx;
+               level++;
+               set = childSet;
+               idx = 0;
+               break;
+           }
+           done = (*applier)(obj, context);
+       }
+       if (!obj)
+       {
+           if (!done && level)
+           {
+               level--;
+               set = sets[level];
+               idx = nextIndex[level];
+           } else done = true;
+       }
+    }
+    while (!done);
+    if (maxDepth > kLocalDepth)
+    {
+       IODelete(nextIndex, typeof(nextIndex[0]), maxDepth);
+       IODelete(sets, typeof(sets[0]), maxDepth);
+    }
+}
+
+void
+OSMetaClass::applyToInstances(OSMetaClassInstanceApplierFunction applier,
+                              void * context) const
+{
+    IOLockLock(sInstancesLock);
+    if (reserved->instances) applyToInstances(reserved->instances, applier, context);
+    IOLockUnlock(sInstancesLock);
+}
+
+void
+OSMetaClass::applyToInstancesOfClassName(
+                               const OSSymbol * name,
+                               OSMetaClassInstanceApplierFunction  applier,
+                                void * context)
+{
+    OSMetaClass  * meta;
+    OSOrderedSet * set = 0;
+
+    IOLockLock(sAllClassesLock);
+    if (sAllClassesDict 
+       && (meta = (OSMetaClass *) sAllClassesDict->getObject(name))
+       && (set = meta->reserved->instances))
+    {
+       set->retain();
+    }
+    IOLockUnlock(sAllClassesLock);
+
+    if (!set) return;
+
+    IOLockLock(sInstancesLock);
+    applyToInstances(set, applier, context);
+    IOLockUnlock(sInstancesLock);
+    set->release();
+}
 
 /*********************************************************************
 *********************************************************************/
@@ -922,7 +1083,7 @@ OSMetaClass::getSuperClass() const
 const OSSymbol *
 OSMetaClass::getKmodName() const
 {
-    OSKext * myKext = (OSKext *)reserved;
+    OSKext * myKext = reserved ? reserved->kext : 0;
     if (myKext) {
         return myKext->getIdentifier();
     }
index ae8faf0efc657c42422b277e8e7de0e1f2d3a28d..d16fa34cebcb37dce59dbe242bcd66536ea2a52b 100644 (file)
@@ -548,6 +548,7 @@ void OSlibkernInit(void)
     // This must be called before calling OSRuntimeInitializeCPP.
     OSMetaClassBase::initialize();
     
+    g_kernel_kmod_info.address = (vm_address_t) &_mh_execute_header;
     if (kOSReturnSuccess != OSRuntimeInitializeCPP(&g_kernel_kmod_info, 0)) {
         panic("OSRuntime: C++ runtime failed to initialize.");
     }
index f2d5c3e8c247e91c6ff5b7e3922e7ef2a0c54090..775253bafe7b67b7e43bcb15a39df0bc929e7b9a 100644 (file)
@@ -280,6 +280,8 @@ bool OSSet::isEqualTo(const OSSet *aSet) const
 
     for ( i = 0; i < count; i++ ) {
         obj1 = aSet->members->getObject(i);
+       if (containsObject(obj1))
+               continue;
         obj2 = members->getObject(i);
         if ( !obj1 || !obj2 )
                 return false;
index 5f9fad84eeec7b4e78d80f49f5c5a88eb4ef9bbf..d2eca1bf7650696f94e09a8325a0b0e0fab55424 100644 (file)
@@ -601,3 +601,33 @@ bool OSSymbol::isEqualTo(const OSMetaClassBase *obj) const
     else
        return false;
 }
+
+unsigned int
+OSSymbol::bsearch(
+       const void *  key,
+       const void *  array,
+       unsigned int  arrayCount,
+       size_t        memberSize)
+{
+    const void **p;
+    unsigned int baseIdx = 0;
+    unsigned int lim;
+
+    for (lim = arrayCount; lim; lim >>= 1)
+    {
+       p = (typeof(p)) (((uintptr_t) array) + (baseIdx + (lim >> 1)) * memberSize);
+       if (key == *p)
+       {
+           return (baseIdx + (lim >> 1));
+       }
+       if (key > *p) 
+       {       
+           // move right
+           baseIdx += (lim >> 1) + 1;
+           lim--;
+       }
+       // else move left
+    }
+    // not found, insertion point here
+    return (baseIdx + (lim >> 1));
+}
index 3f4a3744e273aa49088ba5df1c3c6ce25961ad34..3a22ef1bd35a719e9a8a7fba7b8434553347f507 100644 (file)
                        isa = PBXProject;
                        buildConfigurationList = 1DEB91C708733DAC0010E9CD /* Build configuration list for PBXProject "test1" */;
                        compatibilityVersion = "Xcode 3.1";
+                       developmentRegion = English;
                        hasScannedForEncodings = 1;
+                       knownRegions = (
+                               en,
+                       );
                        mainGroup = 089C166AFE841209C02AAC07 /* test1 */;
                        projectDirPath = "";
                        projectRoot = "";
index 783f1af083003900257b15644238efb3f32ce07a..1e25a04c3d3da0ef5d2b00177e7bca39109ae491 100644 (file)
 #
 ident          LIBKERN
 
-options         HIBERNATION     # system hibernation    # <hibernation>
+options        HIBERNATION             # system hibernation            # <hibernation>
 options                KDEBUG                  # kernel tracing                # <kdebug>
+options                IST_KDEBUG              # limited tracing               # <ist_kdebug>
+options                NO_KDEBUG       # no kernel tracing     # <no_kdebug>
 options                GPROF                   # kernel profiling              # <profile>
 options                LIBKERNCPP              # C++ implementation    # <libkerncpp>
 options                NETWORKING              # kernel networking             # <networking>
-options     CONFIG_DTRACE      # dtrace support                # <config_dtrace>
+options        CONFIG_DTRACE   # dtrace support                        # <config_dtrace>
+options                VM_PRESSURE_EVENTS                                      # <vm_pressure_events>
 options                CRYPTO                  # cryptographic routines        # <ipsec,crypto>
+options                ALLCRYPTO               # <allcrypto>
 options                ZLIB            # zlib support                  # <zlib>
 options                IOKITSTATS              # IOKit statistics              # <iokitstats>
 
@@ -69,11 +73,22 @@ options             IPSEC                   # IP security   # <ipsec>
 
 options                CONFIG_KXLD             # kxld/runtime linking of kexts # <config_kxld>
 
+options                CONFIG_KEC_FIPS         # Kernel External Components for FIPS compliance (KEC_FIPS) # <config_kec_fips>
+
+
 # Note that when adding this config option to an architecture, one MUST
 # add the architecture to the preprocessor test at the beginning of
 # libkern/kmod/cplus_{start.c,stop.c}.
 options         CONFIG_STATIC_CPPINIT   # Static library initializes kext cpp runtime # <config_static_cppinit>
 
+# configurable kernel - general switch to say we are building for an
+# embedded device
+#
+options         CONFIG_EMBEDDED         # <config_embedded>
+
 # secure_kernel - secure kernel from user programs
 options     SECURE_KERNEL       # <secure_kernel> 
 
+
+options                DEBUG           # <debug>
+options                MACH_ASSERT     # <mach_assert>
index 46f20d9ec46e533192ac0310c0bbfe39bbc8aa67..fab8b50c8be5b9c9ed789bca3f33a6005b354e22 100644 (file)
@@ -1,10 +1,10 @@
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp hibernation networking config_dtrace crypto zlib config_kxld config_static_cppinit iokitstats ]
+#  RELEASE     = [ intel mach libkerncpp hibernation networking config_dtrace crypto allcrypto zlib config_kxld config_static_cppinit iokitstats vm_pressure_events ]
 #  PROFILE     = [ RELEASE profile ]
-#  DEBUG       = [ RELEASE debug mach_kdb ]
+#  DEBUG       = [ RELEASE debug ]
 #
-#  EMBEDDED    = [ intel mach libkerncpp hibernation networking crypto zlib ]
+#  EMBEDDED    = [ intel mach libkerncpp networking crypto zlib ]
 #  DEVELOPMENT = [ EMBEDDED config_dtrace ]
 #
 ######################################################################
@@ -12,6 +12,4 @@
 machine                "i386"                                  # <intel>
 cpu            "i386"                                  # <intel>
 
-options                MACH_KDB        #                               # <mach_kdb>
-
 options     NO_NESTED_PMAP                  # <no_nested_pmap>
index a9fd68364541273b5dd866c23ddd2a80d1d5cf93..311403c6f7722ca24eace987abae28ac13235c56 100644 (file)
@@ -1,8 +1,8 @@
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp hibernation networking config_dtrace crypto zlib config_kxld iokitstats ]
+#  RELEASE     = [ intel mach libkerncpp hibernation networking config_dtrace crypto allcrypto zlib config_kxld iokitstats vm_pressure_events config_kec_fips ]
 #  PROFILE     = [ RELEASE profile ]
-#  DEBUG       = [ RELEASE debug mach_kdb ]
+#  DEBUG       = [ RELEASE debug mach_assert ]
 #
 #  EMBEDDED    = [ intel mach libkerncpp hibernation networking crypto zlib ]
 #  DEVELOPMENT = [ EMBEDDED ]
@@ -12,6 +12,4 @@
 machine                "x86_64"                                        # <intel>
 cpu            "x86_64"                                        # <intel>
 
-options                MACH_KDB        #                               # <mach_kdb>
-
 options     NO_NESTED_PMAP                  # <no_nested_pmap>
index f0cf53e3d3e4eb1df578832f337fffdbaa7b8a21..65190ee614b55360a1cd05a84e9398ec70fdfa05 100644 (file)
@@ -42,9 +42,11 @@ $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 
 do_all: $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile
        $(_v)next_source=$(subst conf/,,$(SOURCE));                     \
+       next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH));         \
        ${MAKE} -C $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)      \
                MAKEFILES=$(TARGET)/$(LIBKERN_KERNEL_CONFIG)/Makefile   \
                SOURCE=$${next_source}                  \
+               RELATIVE_SOURCE_PATH=$${next_relsource}                 \
                TARGET=$(TARGET)                                        \
                INCL_MAKEDEP=FALSE      \
                KERNEL_CONFIG=$(LIBKERN_KERNEL_CONFIG)  \
index f28e7a4591a49ee178c99f05f4c293c958cb360a..d756144780aa967082fb3717f27540404aaac23b 100644 (file)
@@ -2,12 +2,16 @@
 #BEGIN Machine dependent Makefile fragment for i386
 ######################################################################
 
+# sha1 Files to build with -DSHA1_USE_ASSEMBLY=1
+sha1.o_CFLAGS_ADD += -DSHA1_USE_ASSEMBLY=1
+
 # Files that must go in the __HIB segment:
 UNCONFIGURED_HIB_FILES= \
                        WKdmDecompress.o
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
+
 ######################################################################
 #END   Machine dependent Makefile fragment for i386
 ######################################################################
index 9dad4c81666a39d022bd709f5b54468e95208118..7d18485357cb908b7fb2cee076a9d641d84217c0 100644 (file)
@@ -27,7 +27,7 @@ include $(MakeInc_def)
 # CFLAGS
 #
 CFLAGS+= -include meta_features.h -DLIBKERN_KERNEL_PRIVATE -DOSALLOCDEBUG=1 \
-       $(CFLAGS_INLINE_CONFIG)
+         $(CFLAGS_INLINE_CONFIG)
 
 # zlib is 3rd party source
 compress.o_CWARNFLAGS_ADD = -Wno-cast-qual 
@@ -41,6 +41,9 @@ uncompr.o_CWARNFLAGS_ADD = -Wno-cast-qual
 # warnings in bison-generated code
 OSUnserializeXML.cpo_CXXWARNFLAGS_ADD = -Wno-uninitialized
 
+# Runtime support functions don't interact well with LTO (9294679)
+stack_protector.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG)
+
 #
 # Directories for mig generated files
 #
@@ -87,10 +90,10 @@ LDOBJS = $(OBJS)
 
 $(COMPONENT).filelist: $(LDOBJS)
        $(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \
-       for hib_file in ${HIB_FILES};           \
+       for hib_file in ${HIB_FILES}; \
        do      \
-                $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \
-                mv $${hib_file}__ $${hib_file} ; \
+               $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \
+               mv $${hib_file}__ $${hib_file} ; \
        done; \
        fi
        @echo LDFILELIST $(COMPONENT)
index a7fda56ca28958c53cfc89ccdb2eef32f9d99fe8..719fd1d29c8ce2ffae1c530cf000521015ea3601 100644 (file)
@@ -2,9 +2,12 @@
 #BEGIN Machine dependent Makefile fragment for x86_64
 ######################################################################
 
+# sha1 Files to build with -DSHA1_USE_ASSEMBLY=1
+sha1.o_CFLAGS_ADD += -DSHA1_USE_ASSEMBLY=1
 # Files that must go in the __HIB segment:
 UNCONFIGURED_HIB_FILES= \
-                        WKdmDecompress.o
+                       WKdmDecompress.o
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
index 6f3d432ac6f6f3d8911cbd00936b26ac27cfe38a..0228f1e00eeb4f78295613494cb2098ba9d9548f 100644 (file)
@@ -3,8 +3,8 @@
 OPTIONS/libkerncpp                                     optional libkerncpp
 OPTIONS/kdebug                                         optional kdebug
 OPTIONS/gprof                                          optional gprof
-OPTIONS/config_dtrace                          optional config_dtrace
-OPTIONS/hibernation                            optional hibernation
+OPTIONS/config_dtrace                                  optional config_dtrace
+OPTIONS/hibernation                                    optional hibernation
 OPTIONS/networking                                     optional networking
 OPTIONS/crypto                                         optional crypto
 OPTIONS/zlib                                           optional zlib
@@ -59,10 +59,15 @@ libkern/zlib/trees.c                                    optional zlib
 libkern/zlib/uncompr.c                                  optional zlib
 libkern/zlib/zutil.c                                    optional zlib
 
-libkern/crypto/md5.c                           optional crypto
-libkern/crypto/md5.c                           optional networking
-libkern/crypto/sha1.c                          optional crypto
-libkern/crypto/sha1.c                          optional ipsec
+libkern/crypto/register_crypto.c               optional crypto
+libkern/crypto/corecrypto_sha2.c               optional crypto allcrypto
+libkern/crypto/corecrypto_sha1.c               optional crypto
+libkern/crypto/corecrypto_sha1.c               optional ipsec
+libkern/crypto/corecrypto_md5.c                        optional crypto
+libkern/crypto/corecrypto_md5.c                        optional networking
+libkern/crypto/corecrypto_des.c                        optional crypto
+libkern/crypto/corecrypto_aes.c                        optional crypto
+libkern/crypto/corecrypto_aesxts.c             optional crypto
 
 libkern/stack_protector.c       standard
 
@@ -76,9 +81,11 @@ libkern/kxld/kxld_reloc.c       optional config_kxld
 libkern/kxld/kxld_object.c      optional config_kxld
 libkern/kxld/kxld_sect.c        optional config_kxld
 libkern/kxld/kxld_seg.c         optional config_kxld
+libkern/kxld/kxld_srcversion.c  optional config_kxld
 libkern/kxld/kxld_sym.c         optional config_kxld
 libkern/kxld/kxld_symtab.c      optional config_kxld
 libkern/kxld/kxld_util.c        optional config_kxld
 libkern/kxld/kxld_uuid.c        optional config_kxld
+libkern/kxld/kxld_versionmin.c  optional config_kxld
 libkern/kxld/kxld_vtable.c      optional config_kxld
 libkern/kxld/kxld_stubs.c       standard
index 18edb6e7d69979752d49c2dd344c561fe93e36b1..2982431f789c08d250ed8f509203a89160363f90 100644 (file)
@@ -1,8 +1,7 @@
 libkern/i386/OSAtomic.s                        standard
 libkern/zlib/intel/inffastS.s  optional zlib
 libkern/zlib/intel/adler32vec.s        optional zlib
-libkern/crypto/intel/sha1edp.s optional crypto 
 
 # Optimized WKdm compressor
-libkern/kxld/i386/WKdmCompress.s                         optional hibernation
-libkern/kxld/i386/WKdmDecompress.s                       optional hibernation
+libkern/kxld/i386/WKdmCompress.s       optional hibernation
+libkern/kxld/i386/WKdmDecompress.s     optional hibernation
index bc32a484671f0d9a292065528a802086f3a5ebc0..b1f7e44fafeea0293ad073d826566f1f059a9dc4 100644 (file)
@@ -1,8 +1,7 @@
 libkern/x86_64/OSAtomic.s                      standard
 libkern/zlib/intel/inffastS.s          optional zlib
 libkern/zlib/intel/adler32vec.s                optional zlib
-libkern/crypto/intel/sha1edp.s         optional crypto
 
 # Optimized WKdm compressor
-libkern/kxld/i386/WKdmCompress.s                         optional hibernation
-libkern/kxld/i386/WKdmDecompress.s                       optional hibernation
+libkern/kxld/i386/WKdmCompress.s       optional hibernation
+libkern/kxld/i386/WKdmDecompress.s     optional hibernation
diff --git a/libkern/crypto/corecrypto_aes.c b/libkern/crypto/corecrypto_aes.c
new file mode 100644 (file)
index 0000000..161715a
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <libkern/crypto/crypto_internal.h>
+#include <libkern/crypto/aes.h>
+#include <corecrypto/ccmode.h>
+#include <corecrypto/ccaes.h>
+#include <kern/debug.h>
+
+aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1])
+{
+       const struct ccmode_cbc *cbc = g_crypto_funcs->ccaes_cbc_encrypt;
+
+    /* Make sure the context size for the mode fits in the one we have */
+    if(cbc->size>sizeof(aes_encrypt_ctx))
+        panic("%s: inconsistent size for AES encrypt context", __FUNCTION__);
+
+       cccbc_init(cbc, cx[0].ctx, key_len, key);
+
+       return aes_good;
+}
+
+aes_rval aes_encrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
+                                        unsigned char *out_blk, aes_encrypt_ctx cx[1])
+{
+       const struct ccmode_cbc *cbc = g_crypto_funcs->ccaes_cbc_encrypt;
+       cccbc_iv_decl(cbc->block_size, ctx_iv);
+
+       cccbc_set_iv(cbc, ctx_iv, in_iv);
+       cccbc_update(cbc, cx[0].ctx, ctx_iv, num_blk, in_blk, out_blk); //Actually cbc encrypt.
+
+       return aes_good;
+}
+
+/* This does one block of ECB, using the CBC implementation - this allow to use the same context for both CBC and ECB */
+aes_rval aes_encrypt(const unsigned char *in_blk, unsigned char *out_blk, aes_encrypt_ctx cx[1])
+{
+       return aes_encrypt_cbc(in_blk, NULL, 1, out_blk, cx);    
+}
+
+aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1])
+{
+       const struct ccmode_cbc *cbc = g_crypto_funcs->ccaes_cbc_decrypt;
+
+    /* Make sure the context size for the mode fits in the one we have */
+    if(cbc->size>sizeof(aes_decrypt_ctx))
+        panic("%s: inconsistent size for AES decrypt context", __FUNCTION__);
+
+       cccbc_init(cbc, cx[0].ctx, key_len, key);
+
+       return aes_good;
+}
+
+aes_rval aes_decrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
+                                                unsigned char *out_blk, aes_decrypt_ctx cx[1])
+{
+       const struct ccmode_cbc *cbc = g_crypto_funcs->ccaes_cbc_decrypt;
+       cccbc_iv_decl(cbc->block_size, ctx_iv);
+
+       cccbc_set_iv(cbc, ctx_iv, in_iv);
+       cccbc_update(cbc, cx[0].ctx, ctx_iv, num_blk, in_blk, out_blk); //Actually cbc decrypt.
+
+       return aes_good;
+}
+
+/* This does one block of ECB, using the CBC implementation - this allow to use the same context for both CBC and ECB */
+aes_rval aes_decrypt(const unsigned char *in_blk, unsigned char *out_blk, aes_decrypt_ctx cx[1])
+{
+       return aes_decrypt_cbc(in_blk, NULL, 1, out_blk, cx);
+}
+
+aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
+{
+       return aes_encrypt_key(key, 16, cx);
+}
+
+aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
+{
+       return aes_decrypt_key(key, 16, cx);
+}
+
+
+aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
+{
+       return aes_encrypt_key(key, 32, cx);
+}
+
+aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
+{
+       return aes_decrypt_key(key, 32, cx);
+}
diff --git a/libkern/crypto/corecrypto_aesxts.c b/libkern/crypto/corecrypto_aesxts.c
new file mode 100644 (file)
index 0000000..dc0d6f4
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <libkern/crypto/crypto_internal.h>
+#include <libkern/libkern.h>
+#include <libkern/crypto/aesxts.h>
+#include <corecrypto/ccmode.h>
+#include <corecrypto/ccpad.h>
+#include <kern/debug.h>
+
+/*
+ * These are the interfaces required for XTS-AES support
+ */
+
+uint32_t
+xts_start(uint32_t cipher __unused, // ignored - we're doing this for xts-aes only
+                 const uint8_t *IV __unused, // ignored
+                 const uint8_t *key1, int keylen,
+                 const uint8_t *key2, int tweaklen __unused, // both keys are the same size for xts
+                 uint32_t num_rounds __unused, // ignored
+                 uint32_t options __unused,    // ignored
+                 symmetric_xts *xts)
+{
+               const struct ccmode_xts *enc, *dec;
+               
+               if(!g_crypto_funcs)
+                       panic("%s: corecrypto not registered!\n", __FUNCTION__);
+               
+               enc = g_crypto_funcs->ccaes_xts_encrypt;
+               dec = g_crypto_funcs->ccaes_xts_decrypt;
+
+               if(!enc && !dec)
+                       panic("%s: xts mode not registered? enc=%p, dec=%p\n", __FUNCTION__, enc, dec);
+                       
+               /* Make sure the context size for the mode fits in the one we have */
+               if((enc->size>sizeof(xts->enc)) || (dec->size>sizeof(xts->dec)))
+                       panic("%s: inconsistent size for AES-XTS context", __FUNCTION__);
+
+               enc->init(enc, xts->enc, keylen, key1, key2);
+               dec->init(dec, xts->dec, keylen, key1, key2);
+
+               return 0; //never fails
+}
+
+int xts_encrypt(const uint8_t *pt, unsigned long ptlen,
+                       uint8_t *ct,
+                       const uint8_t *iv, // this can be considered the sector IV for this use
+                       symmetric_xts *xts)
+{
+       const struct ccmode_xts *xtsenc = g_crypto_funcs->ccaes_xts_encrypt;
+       ccxts_tweak_decl(xtsenc->tweak_size, tweak);
+               
+       if(ptlen%16) panic("xts encrypt not a multiple of block size\n");
+
+       xtsenc->set_tweak(xts->enc, tweak, iv);
+       xtsenc->xts(xts->enc, tweak, ptlen/16, pt, ct);
+       
+       return 0; //never fails
+}
+
+int xts_decrypt(const uint8_t *ct, unsigned long ptlen,
+                       uint8_t *pt,
+                               const uint8_t *iv, // this can be considered the sector IV for this use
+                       symmetric_xts *xts)
+{
+       const struct ccmode_xts *xtsdec = g_crypto_funcs->ccaes_xts_decrypt;
+       ccxts_tweak_decl(xtsdec->tweak_size, tweak);
+
+       if(ptlen%16) panic("xts decrypt not a multiple of block size\n");
+
+       xtsdec->set_tweak(xts->dec, tweak, iv);
+       xtsdec->xts(xts->dec, tweak, ptlen/16, ct, pt);
+
+       return 0; //never fails
+}
+
+void xts_done(symmetric_xts *xts __unused)
+{
+
+}
diff --git a/libkern/crypto/corecrypto_des.c b/libkern/crypto/corecrypto_des.c
new file mode 100644 (file)
index 0000000..26f5ab5
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+#include <libkern/crypto/crypto_internal.h>
+#include <libkern/libkern.h>
+#include <kern/debug.h>
+#include <libkern/crypto/des.h>
+#include <corecrypto/ccmode.h>
+
+/* Single DES ECB - used by ipv6 (esp_core.c) */
+int des_ecb_key_sched(des_cblock *key, des_ecb_key_schedule *ks)
+{
+       const struct ccmode_ecb *enc = g_crypto_funcs->ccdes_ecb_encrypt;
+       const struct ccmode_ecb *dec = g_crypto_funcs->ccdes_ecb_decrypt;
+
+        /* Make sure the context size for the mode fits in the one we have */
+        if((enc->size>sizeof(ks->enc)) || (dec->size>sizeof(ks->dec)))
+                panic("%s: inconsistent size for DES-ECB context", __FUNCTION__);
+       enc->init(enc, ks->enc, CCDES_KEY_SIZE, key);
+       dec->init(dec, ks->dec, CCDES_KEY_SIZE, key);
+
+       /* The old DES interface could return -1 or -2 for weak keys and wrong parity,
+        but this was disabled all the time, so we never fail here */
+       return 0;
+}
+
+/* Simple des - 1 block */
+void des_ecb_encrypt(des_cblock *in, des_cblock *out, des_ecb_key_schedule *ks, int enc)
+{
+       const struct ccmode_ecb *ecb = enc ? g_crypto_funcs->ccdes_ecb_encrypt : g_crypto_funcs->ccdes_ecb_decrypt;
+       ccecb_ctx *ctx = enc ? ks->enc : ks->dec;
+
+       ecb->ecb(ctx, 1, in, out);
+}
+
+
+/* Triple DES ECB - used by ipv6 (esp_core.c) */
+int des3_ecb_key_sched(des_cblock *key, des3_ecb_key_schedule *ks)
+{
+       const struct ccmode_ecb *enc = g_crypto_funcs->cctdes_ecb_encrypt;
+       const struct ccmode_ecb *dec = g_crypto_funcs->cctdes_ecb_decrypt;
+
+        /* Make sure the context size for the mode fits in the one we have */
+        if((enc->size>sizeof(ks->enc)) || (dec->size>sizeof(ks->dec)))
+                panic("%s: inconsistent size for 3DES-ECB context", __FUNCTION__);
+       enc->init(enc, ks->enc, CCDES_KEY_SIZE*3, key);
+       dec->init(dec, ks->dec, CCDES_KEY_SIZE*3, key);
+
+       /* The old DES interface could return -1 or -2 for weak keys and wrong parity,
+        but this was disabled all the time, so we never fail here */
+       return 0;
+}
+
+/* Simple des - 1 block */
+void des3_ecb_encrypt(des_cblock *in, des_cblock *out, des3_ecb_key_schedule *ks, int enc)
+{
+       const struct ccmode_ecb *ecb = enc ? g_crypto_funcs->cctdes_ecb_encrypt : g_crypto_funcs->cctdes_ecb_decrypt;
+       ccecb_ctx *ctx = enc ? ks->enc : ks->dec;
+
+       ecb->ecb(ctx, 1, in, out);
+}
+
+/* Single DES CBC - used by nfs_gss */
+int des_cbc_key_sched(des_cblock *key, des_cbc_key_schedule *ks)
+{
+       const struct ccmode_cbc *enc = g_crypto_funcs->ccdes_cbc_encrypt;
+       const struct ccmode_cbc *dec = g_crypto_funcs->ccdes_cbc_decrypt;
+
+        /* Make sure the context size for the mode fits in the one we have */
+        if((enc->size>sizeof(ks->enc)) || (dec->size>sizeof(ks->dec)))
+                panic("%s: inconsistent size for DES-CBC context", __FUNCTION__);
+
+       cccbc_init(enc, ks->enc, CCDES_KEY_SIZE, key);
+       cccbc_init(dec, ks->dec, CCDES_KEY_SIZE, key);
+
+       /* The old DES interface could return -1 or -2 for weak keys and wrong parity,
+        but this was disabled all the time, so we never fail here */
+       return 0;
+}
+
+/* this is normally only called with length an 8 bytes multiple */
+void
+des_cbc_encrypt(des_cblock *in, des_cblock *out, int32_t length,
+                               des_cbc_key_schedule *ks, des_cblock *iv, des_cblock *retiv, int encrypt)
+{
+       const struct ccmode_cbc *cbc = encrypt?g_crypto_funcs->ccdes_cbc_encrypt:g_crypto_funcs->ccdes_cbc_decrypt;
+       cccbc_ctx *ctx = encrypt ? ks->enc : ks->dec;
+       int nblocks;
+       cccbc_iv_decl(cbc->block_size, ctx_iv); 
+
+       assert(length%8==0);
+       nblocks=length/8;
+
+       /* set the iv */
+       cccbc_set_iv(cbc, ctx_iv, iv);
+
+       cccbc_update(cbc, ctx, ctx_iv, nblocks, in, out);
+
+       /* copy back iv */
+       if(retiv)
+               memcpy(retiv, ctx_iv, 8);
+}
+
+/* Triple DES CBC - used by nfs_gss */
+int des3_cbc_key_sched(des_cblock *key, des3_cbc_key_schedule *ks)
+{
+       const struct ccmode_cbc *enc = g_crypto_funcs->cctdes_cbc_encrypt;
+       const struct ccmode_cbc *dec = g_crypto_funcs->cctdes_cbc_decrypt;
+
+        /* Make sure the context size for the mode fits in the one we have */
+        if((enc->size>sizeof(ks->enc)) || (dec->size>sizeof(ks->dec)))
+                panic("%s: inconsistent size for 3DES-CBC context", __FUNCTION__);
+       cccbc_init(enc, ks->enc, CCDES_KEY_SIZE*3, key);
+       cccbc_init(dec, ks->dec, CCDES_KEY_SIZE*3, key);
+
+       /* The old DES interface could return -1 or -2 for weak keys and wrong parity,
+        but this was disabled all the time, so we never fail here */
+       return 0;
+}
+
+/* this is normally only called with length an 8 bytes multiple */
+void
+des3_cbc_encrypt(des_cblock *in, des_cblock *out, int32_t length,
+                                des3_cbc_key_schedule *ks, des_cblock *iv, des_cblock *retiv, int encrypt)
+{
+       const struct ccmode_cbc *cbc = encrypt?g_crypto_funcs->cctdes_cbc_encrypt:g_crypto_funcs->cctdes_cbc_decrypt;
+       cccbc_ctx *ctx = encrypt ? ks->enc : ks->dec;
+       int nblocks;
+       cccbc_iv_decl(cbc->block_size, ctx_iv); 
+
+       assert(length%8==0);
+       nblocks=length/8;
+
+       /* set the iv */
+       cccbc_set_iv(cbc, ctx_iv, iv);
+
+       cccbc_update(cbc, ctx, ctx_iv, nblocks, in, out);
+
+       /* copy back iv */
+       if(retiv)
+               memcpy(retiv, ctx_iv, 8);
+}
+
+
+/*
+ * DES MAC implemented according to FIPS 113
+ * http://www.itl.nist.gov/fipspubs/fip113.htm
+ * Only full blocks.
+ * Used by nfs-gss
+ */
+void
+des_cbc_cksum(des_cblock *in, des_cblock *out,
+                         int len, des_cbc_key_schedule *ks)
+{
+       const struct ccmode_cbc *cbc = g_crypto_funcs->ccdes_cbc_encrypt;
+       int nblocks;
+       des_cblock cksum;
+       cccbc_iv_decl(cbc->block_size, ctx_iv);
+
+       assert(len%8==0);
+       nblocks=len/8;
+
+       cccbc_set_iv(cbc, ctx_iv, NULL);
+       while(nblocks--) {
+               cccbc_update(cbc, ks->enc, ctx_iv, 1, in++, cksum);
+       }
+       memcpy(out, cksum, sizeof(des_cblock));
+}
+
+
+/* Raw key helper functions */
+void des_fixup_key_parity(des_cblock *key)
+{
+       g_crypto_funcs->ccdes_key_set_odd_parity_fn(key, CCDES_KEY_SIZE);
+}
+
+int des_is_weak_key(des_cblock *key)
+{
+       return g_crypto_funcs->ccdes_key_is_weak_fn(key, CCDES_KEY_SIZE);
+}
diff --git a/libkern/crypto/corecrypto_md5.c b/libkern/crypto/corecrypto_md5.c
new file mode 100644 (file)
index 0000000..70225a5
--- /dev/null
@@ -0,0 +1,65 @@
+
+#include <libkern/crypto/crypto_internal.h>
+#include <libkern/crypto/md5.h>
+#include <kern/debug.h>
+#include <corecrypto/ccdigest.h>
+
+static uint64_t getCount(MD5_CTX *ctx)
+{
+       return ( (((uint64_t)ctx->count[0])<<32) | (ctx->count[1]) );
+}
+
+static void setCount(MD5_CTX *ctx, uint64_t count)
+{
+       ctx->count[0]=(uint32_t)(count>>32);
+       ctx->count[1]=(uint32_t)count;
+}
+
+/* Copy a ccdigest ctx into a legacy MD5 context */
+static void DiToMD5(const struct ccdigest_info *di, struct ccdigest_ctx *di_ctx, MD5_CTX *md5_ctx)
+{
+       setCount(md5_ctx, ccdigest_nbits(di, di_ctx)/8+ccdigest_num(di, di_ctx));
+       memcpy(md5_ctx->buffer, ccdigest_data(di, di_ctx), di->block_size);
+       memcpy(md5_ctx->state, ccdigest_state_ccn(di, di_ctx), di->state_size);
+}
+
+/* Copy a legacy MD5 context into a ccdigest ctx  */
+static void MD5ToDi(const struct ccdigest_info *di, MD5_CTX *md5_ctx, struct ccdigest_ctx *di_ctx)
+{
+       uint64_t count = getCount(md5_ctx);
+       
+       ccdigest_num(di, di_ctx)=count%di->block_size;
+       ccdigest_nbits(di, di_ctx)=(count-ccdigest_num(di, di_ctx))*8;
+       memcpy(ccdigest_data(di, di_ctx), md5_ctx->buffer, di->block_size);
+       memcpy(ccdigest_state_ccn(di, di_ctx), md5_ctx->state, di->state_size); 
+}
+
+void MD5Init(MD5_CTX *ctx)
+{
+       const struct ccdigest_info *di=g_crypto_funcs->ccmd5_di;
+       ccdigest_di_decl(di, di_ctx);
+       
+       g_crypto_funcs->ccdigest_init_fn(di, di_ctx);
+       
+       DiToMD5(di, di_ctx, ctx);
+}
+
+void MD5Update(MD5_CTX *ctx, const void *data, unsigned int len)
+{
+       const struct ccdigest_info *di=g_crypto_funcs->ccmd5_di;
+       ccdigest_di_decl(di, di_ctx);
+       
+       MD5ToDi(di, ctx, di_ctx);
+       g_crypto_funcs->ccdigest_update_fn(di, di_ctx, len, data);      
+       DiToMD5(di, di_ctx, ctx);
+}
+
+void MD5Final(unsigned char digest[MD5_DIGEST_LENGTH], MD5_CTX *ctx)
+{
+       const struct ccdigest_info *di=g_crypto_funcs->ccmd5_di;
+       ccdigest_di_decl(di, di_ctx);
+       
+       MD5ToDi(di, ctx, di_ctx);
+       ccdigest_final(di, di_ctx, digest);
+}
+
diff --git a/libkern/crypto/corecrypto_sha1.c b/libkern/crypto/corecrypto_sha1.c
new file mode 100644 (file)
index 0000000..1513287
--- /dev/null
@@ -0,0 +1,110 @@
+
+#include <libkern/crypto/crypto_internal.h>
+#include <libkern/crypto/sha1.h>
+#include <kern/debug.h>
+#include <corecrypto/ccdigest.h>
+
+
+static uint64_t getCount(SHA1_CTX *ctx)
+{
+       return ctx->c.b64[0];
+}
+
+static void setCount(SHA1_CTX *ctx, uint64_t count)
+{
+       ctx->c.b64[0]=count;
+}
+
+/* Copy a ccdigest ctx into a legacy SHA1 context */
+static void DiToSHA1(const struct ccdigest_info *di, struct ccdigest_ctx *di_ctx, SHA1_CTX *sha1_ctx)
+{
+       setCount(sha1_ctx, ccdigest_nbits(di, di_ctx)/8+ccdigest_num(di, di_ctx));
+       memcpy(sha1_ctx->m.b8, ccdigest_data(di, di_ctx), di->block_size);
+       memcpy(sha1_ctx->h.b8, ccdigest_state_ccn(di, di_ctx), di->state_size);
+}
+
+/* Copy a legacy SHA1 context into a ccdigest ctx  */
+static void SHA1ToDi(const struct ccdigest_info *di, SHA1_CTX *sha1_ctx, struct ccdigest_ctx *di_ctx)
+{
+       uint64_t count = getCount(sha1_ctx);
+       
+       ccdigest_num(di, di_ctx)=count%di->block_size;
+       ccdigest_nbits(di, di_ctx)=(count-ccdigest_num(di, di_ctx))*8;
+       memcpy(ccdigest_data(di, di_ctx), sha1_ctx->m.b8, di->block_size);
+       memcpy(ccdigest_state_ccn(di, di_ctx), sha1_ctx->h.b8, di->state_size); 
+}
+
+void SHA1Init(SHA1_CTX *ctx)
+{
+       const struct ccdigest_info *di=g_crypto_funcs->ccsha1_di;
+       ccdigest_di_decl(di, di_ctx);
+       
+       g_crypto_funcs->ccdigest_init_fn(di, di_ctx);
+       
+       DiToSHA1(di, di_ctx, ctx);
+}
+
+void SHA1Update(SHA1_CTX *ctx, const void *data, size_t len)
+{
+       const struct ccdigest_info *di=g_crypto_funcs->ccsha1_di;
+       ccdigest_di_decl(di, di_ctx);
+       
+       SHA1ToDi(di, ctx, di_ctx);
+       g_crypto_funcs->ccdigest_update_fn(di, di_ctx, len, data);      
+       DiToSHA1(di, di_ctx, ctx);
+}
+
+void SHA1Final(void *digest, SHA1_CTX *ctx)
+{
+       const struct ccdigest_info *di=g_crypto_funcs->ccsha1_di;
+       ccdigest_di_decl(di, di_ctx);
+       
+       SHA1ToDi(di, ctx, di_ctx);
+       ccdigest_final(di, di_ctx, digest);
+}
+
+#ifdef XNU_KERNEL_PRIVATE
+void SHA1UpdateUsePhysicalAddress(SHA1_CTX *ctx, const void *data, size_t len)
+{
+       //TODO: What the hell ?
+       SHA1Update(ctx, data, len);
+}
+#endif
+
+/* This is not publicised in header, but exported in libkern.exports */ 
+void SHA1Final_r(SHA1_CTX *context, void *digest);
+void SHA1Final_r(SHA1_CTX *context, void *digest)
+{
+       SHA1Final(digest, context);
+}
+
+
+/*
+ * This function is called by the SHA1 hardware kext during its init.
+ * This will register the function to call to perform SHA1 using hardware.
+ */
+#include <sys/types.h>
+#include <libkern/OSAtomic.h>
+#include <sys/systm.h>
+
+typedef kern_return_t (*InKernelPerformSHA1Func)(void *ref, const void *data, size_t dataLen, u_int32_t *inHash, u_int32_t options, u_int32_t *outHash, Boolean usePhysicalAddress);
+void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref);
+static void *SHA1Ref;
+static InKernelPerformSHA1Func performSHA1WithinKernelOnly;
+
+void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref)
+{
+       if(option) {
+               // Establish the hook. The hardware is ready.
+               OSCompareAndSwapPtr((void*)NULL, (void*)ref, (void * volatile*)&SHA1Ref);
+
+               if(!OSCompareAndSwapPtr((void *)NULL, (void *)func, (void * volatile *)&performSHA1WithinKernelOnly)) {
+                       panic("sha1_hardware_hook: Called twice.. Should never happen\n");
+               }
+       }
+       else {
+               // The hardware is going away. Tear down the hook.
+               performSHA1WithinKernelOnly = NULL;
+               SHA1Ref = NULL;
+       }
+}
diff --git a/libkern/crypto/corecrypto_sha2.c b/libkern/crypto/corecrypto_sha2.c
new file mode 100644 (file)
index 0000000..e85479d
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <libkern/crypto/crypto_internal.h>
+#include <libkern/crypto/sha2.h>
+#include <kern/debug.h>
+#include <corecrypto/ccdigest.h>
+
+void SHA256_Init(SHA256_CTX *ctx)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha256_di;
+
+        /* Make sure the context size for the digest info fits in the one we have */
+        if(ccdigest_di_size(di)>sizeof(SHA256_CTX))
+                panic("%s: inconsistent size for SHA256 context", __FUNCTION__);
+       g_crypto_funcs->ccdigest_init_fn(di, ctx->ctx);
+}
+
+void SHA256_Update(SHA256_CTX *ctx, const void *data, size_t len)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha256_di;
+
+       g_crypto_funcs->ccdigest_update_fn(di, ctx->ctx, len, data);
+}
+
+void SHA256_Final(void *digest, SHA256_CTX *ctx)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha256_di;
+
+       ccdigest_final(di, ctx->ctx, digest);
+}
+
+void SHA384_Init(SHA384_CTX *ctx)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha384_di;
+
+        /* Make sure the context size for the digest info fits in the one we have */
+        if(ccdigest_di_size(di)>sizeof(SHA384_CTX))
+                panic("%s: inconsistent size for SHA384 context", __FUNCTION__);
+       g_crypto_funcs->ccdigest_init_fn(di, ctx->ctx);
+}
+
+void SHA384_Update(SHA384_CTX *ctx, const void *data, size_t len)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha384_di;
+
+       g_crypto_funcs->ccdigest_update_fn(di, ctx->ctx, len, data);
+}
+
+
+void SHA384_Final(void *digest, SHA384_CTX *ctx)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha512_di;
+
+       ccdigest_final(di, ctx->ctx, digest);
+}
+
+void SHA512_Init(SHA512_CTX *ctx)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha512_di;
+
+        /* Make sure the context size for the digest info fits in the one we have */
+        if(ccdigest_di_size(di)>sizeof(SHA512_CTX))
+                panic("%s: inconsistent size for SHA512 context", __FUNCTION__);
+       g_crypto_funcs->ccdigest_init_fn(di, ctx->ctx);
+}
+
+void SHA512_Update(SHA512_CTX *ctx, const void *data, size_t len)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha512_di;
+
+       g_crypto_funcs->ccdigest_update_fn(di, ctx->ctx, len, data);
+}
+
+void SHA512_Final(void *digest, SHA512_CTX *ctx)
+{
+       const struct ccdigest_info *di;
+       di=g_crypto_funcs->ccsha512_di;
+
+       ccdigest_final(di, ctx->ctx, digest);
+}
index 80da81a62b69586a6f1532905c92220918c0654e..8c52a5e7bf6c9e39535e7df06b7b98bb536de439 100644 (file)
@@ -1199,8 +1199,13 @@ void SHA1( int HASH[], int MESSAGE[] )
 0:
        INTERNAL_nossse3                                // update W (i=16:79) and update ABCDE (i=0:63) 
 #if Multiple_Blocks
+#if defined(__x86_64__)
        add     $$64, BUFFER_PTR                        // BUFFER_PTR+=64;
        sub     $$1, cnt                                        // pre-decrement cnt by 1
+#else
+       addl    $$64, BUFFER_PTR                        // BUFFER_PTR+=64;
+       subl    $$1, cnt                                        // pre-decrement cnt by 1
+#endif
        jbe     1f                                                      // if cnt <= 0, branch to finish off
        SOFTWARE_PIPELINING_nossse3             // update ABCDE (i=64:79) || big_endian_load(W) and W+K (i=0:15)
        UPDATE_ALL_HASH                                 // update output hashes
@@ -1223,8 +1228,13 @@ void SHA1( int HASH[], int MESSAGE[] )
 0:
        INTERNAL_ssse3                                  // update W (i=16:79) and update ABCDE (i=0:63)
 #if Multiple_Blocks
+#if defined(__x86_64__)
        add     $$64, BUFFER_PTR                        // BUFFER_PTR+=64;
        sub     $$1, cnt                                        // pre-decrement cnt by 1
+#else
+       addl    $$64, BUFFER_PTR                        // BUFFER_PTR+=64;
+       subl    $$1, cnt                                        // pre-decrement cnt by 1
+#endif
        jbe     1f                                                      // if cnt <= 0, branch to finish off
        SOFTWARE_PIPELINING_ssse3               // update ABCDE (i=64:79) || big_endian_load(W) and W+K (i=0:15)
        UPDATE_ALL_HASH                                 // update output hashes
@@ -1236,12 +1246,16 @@ void SHA1( int HASH[], int MESSAGE[] )
        UPDATE_ALL_HASH                                 // update output hashes
        .endm
 
+#ifdef KERNEL
 #include <i386/cpu_capabilities.h>
+#else
+#include <System/i386/cpu_capabilities.h>
+#endif
 
        .text
 
        .globl _SHA1Transform
-       .private_extern _SHA1Transform  
+       //.private_extern       _SHA1Transform  
 _SHA1Transform:
 
        // detect SSSE3 and dispatch appropriate code branch
diff --git a/libkern/crypto/md5.c b/libkern/crypto/md5.c
deleted file mode 100644 (file)
index 46e0059..0000000
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * MD5.C - RSA Data Security, Inc., MD5 message-digest algorithm
- *
- * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
- * rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * This code is the same as the code published by RSA Inc.  It has been
- * edited for clarity and style only.
- */
-
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <libkern/crypto/md5.h>
-
-#define        memset(x, y, z) bzero(x, z);
-#define        memcpy(x, y, z) bcopy(y, x, z)
-
-/*
- * The digest algorithm interprets the input message as a sequence of 32-bit
- * little-endian words.  We must reverse bytes in each word on PPC and other
- * big-endian platforms, but not on little-endian ones.  When we can, we try
- * to load each word at once.  We don't quite care about alignment, since
- * x86/x64 allows us to do 4-byte loads on non 4-byte aligned addresses,
- * and on PPC we do 1-byte loads anyway.
- *
- * We could check against __LITLE_ENDIAN__ to generalize the 4-byte load
- * optimization, but that might not tell us whether or not we need 4-byte
- * aligned loads.  Since we know that __i386__ and __x86_64__ are the two
- * little-endian architectures that are not alignment-restrictive, we check
- * explicitly against them below.  Note that the byte-reversing code for
- * big-endian will still work on little-endian, albeit much slower.
- */
-#if defined(__i386__) || defined(__x86_64__)
-#define        FETCH_32(p)     (*(const u_int32_t *)(p))
-#else
-#define        FETCH_32(p)                                             \
-       (((u_int32_t)*((const u_int8_t *)(p))) |                \
-       (((u_int32_t)*((const u_int8_t *)(p) + 1)) << 8) |      \
-       (((u_int32_t)*((const u_int8_t *)(p) + 2)) << 16) |     \
-       (((u_int32_t)*((const u_int8_t *)(p) + 3)) << 24))
-#endif /* __i386__ || __x86_64__ */
-
-/*
- * Encodes input (u_int32_t) into output (unsigned char). Assumes len is
- * a multiple of 4. This is not compatible with memcpy().
- */
-static void
-Encode(unsigned char *output, u_int32_t *input, unsigned int len)
-{
-       unsigned int i, j;
-
-       for (i = 0, j = 0; j < len; i++, j += 4) {
-#if defined(__i386__) || defined(__x86_64__)
-               *(u_int32_t *)(output + j) = input[i];
-#else
-               output[j] = input[i] & 0xff;
-               output[j + 1] = (input[i] >> 8) & 0xff;
-               output[j + 2] = (input[i] >> 16) & 0xff;
-               output[j + 3] = (input[i] >> 24) & 0xff;
-#endif /* __i386__ || __x86_64__ */
-       }
-}
-
-static unsigned char PADDING[64] = { 0x80, /* zeros */ };
-
-/* F, G, H and I are basic MD5 functions. */
-#define        F(x, y, z)      ((((y) ^ (z)) & (x)) ^ (z))
-#define        G(x, y, z)      ((((x) ^ (y)) & (z)) ^ (y))
-#define        H(x, y, z)      ((x) ^ (y) ^ (z))
-#define        I(x, y, z)      (((~(z)) | (x)) ^ (y))
-
-/* ROTATE_LEFT rotates x left n bits. */
-#define        ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
-
-/*
- * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
- * Rotation is separate from addition to prevent recomputation.
- */
-#define        FF(a, b, c, d, x, s, ac) {                                      \
-       (a) += F((b), (c), (d)) + (x) + (unsigned long long)(ac);       \
-       (a) = ROTATE_LEFT((a), (s));                                    \
-       (a) += (b);                                                     \
-}
-
-#define        GG(a, b, c, d, x, s, ac) {                                      \
-       (a) += G((b), (c), (d)) + (x) + (unsigned long long)(ac);       \
-       (a) = ROTATE_LEFT((a), (s));                                    \
-       (a) += (b);                                                     \
-}
-
-#define        HH(a, b, c, d, x, s, ac) {                                      \
-       (a) += H((b), (c), (d)) + (x) + (unsigned long long)(ac);       \
-       (a) = ROTATE_LEFT((a), (s));                                    \
-       (a) += (b);                                                     \
-}
-
-#define        II(a, b, c, d, x, s, ac) {                                      \
-       (a) += I((b), (c), (d)) + (x) + (unsigned long long)(ac);       \
-       (a) = ROTATE_LEFT((a), (s));                                    \
-       (a) += (b);                                                     \
-}
-
-static void MD5Transform(u_int32_t, u_int32_t, u_int32_t, u_int32_t,
-    const u_int8_t [64], MD5_CTX *);
-
-/*
- * MD5 initialization. Begins an MD5 operation, writing a new context.
- */
-void
-MD5Init(MD5_CTX *context)
-{
-       context->count[0] = context->count[1] = 0;
-
-       /* Load magic initialization constants.  */
-       context->state[0] = 0x67452301UL;
-       context->state[1] = 0xefcdab89UL;
-       context->state[2] = 0x98badcfeUL;
-       context->state[3] = 0x10325476UL;
-}
-
-/*
- * MD5 block update operation. Continues an MD5 message-digest
- * operation, processing another message block, and updating the
- * context.
- */
-void
-MD5Update(MD5_CTX *context, const void *inpp, unsigned int inputLen)
-{
-       u_int32_t i, index, partLen;
-       const unsigned char *input = (const unsigned char *)inpp;
-
-       /* Compute number of bytes mod 64 */
-       index = (context->count[0] >> 3) & 0x3F;
-
-       /* Update number of bits */
-       if ((context->count[0] += (inputLen << 3)) < (inputLen << 3))
-               context->count[1]++;
-       context->count[1] += (inputLen >> 29);
-
-       partLen = 64 - index;
-
-       /* Transform as many times as possible. */
-       i = 0;
-       if (inputLen >= partLen) {
-               if (index != 0) {
-                       memcpy(&context->buffer[index], input, partLen);
-                       MD5Transform(context->state[0], context->state[1],
-                           context->state[2], context->state[3],
-                           context->buffer, context);
-                       i = partLen;
-               }
-
-               for (; i + 63 < inputLen; i += 64)
-                       MD5Transform(context->state[0], context->state[1],
-                           context->state[2], context->state[3],
-                           &input[i], context);
-
-               if (inputLen == i)
-                       return;
-
-               index = 0;
-       }
-
-       /* Buffer remaining input */
-       memcpy(&context->buffer[index], &input[i], inputLen - i);
-}
-
-/*
- * MD5 finalization. Ends an MD5 message-digest operation, writing the
- * the message digest and zeroizing the context.
- */
-void
-MD5Final(unsigned char digest[MD5_DIGEST_LENGTH], MD5_CTX *context)
-{
-       unsigned char bits[8];
-       u_int32_t index = (context->count[0] >> 3) & 0x3f;
-
-       /* Save number of bits */
-       Encode(bits, context->count, 8);
-
-       /* Pad out to 56 mod 64. */
-       MD5Update(context, PADDING, ((index < 56) ? 56 : 120) - index);
-
-       /* Append length (before padding) */
-       MD5Update(context, bits, 8);
-
-       /* Store state in digest */
-       Encode(digest, context->state, 16);
-
-       /* Zeroize sensitive information. */
-       memset(context, 0, sizeof (*context));
-}
-
-/*
- * MD5 basic transformation. Transforms state based on block.
- */
-static void
-MD5Transform(u_int32_t a, u_int32_t b, u_int32_t c, u_int32_t d,
-    const u_int8_t block[64], MD5_CTX *context)
-{
-       /* Register (instead of array) is a win in most cases */
-       register u_int32_t x0, x1, x2, x3, x4, x5, x6, x7;
-       register u_int32_t x8, x9, x10, x11, x12, x13, x14, x15;
-
-       x15 = FETCH_32(block + 60);
-       x14 = FETCH_32(block + 56);
-       x13 = FETCH_32(block + 52);
-       x12 = FETCH_32(block + 48);
-       x11 = FETCH_32(block + 44);
-       x10 = FETCH_32(block + 40);
-       x9  = FETCH_32(block + 36);
-       x8  = FETCH_32(block + 32);
-       x7  = FETCH_32(block + 28);
-       x6  = FETCH_32(block + 24);
-       x5  = FETCH_32(block + 20);
-       x4  = FETCH_32(block + 16);
-       x3  = FETCH_32(block + 12);
-       x2  = FETCH_32(block +  8);
-       x1  = FETCH_32(block +  4);
-       x0  = FETCH_32(block +  0);
-
-       /* Round 1 */
-#define        S11 7
-#define        S12 12
-#define        S13 17
-#define        S14 22
-       FF(a, b, c, d, x0,  S11, 0xd76aa478UL); /* 1 */
-       FF(d, a, b, c, x1,  S12, 0xe8c7b756UL); /* 2 */
-       FF(c, d, a, b, x2,  S13, 0x242070dbUL); /* 3 */
-       FF(b, c, d, a, x3,  S14, 0xc1bdceeeUL); /* 4 */
-       FF(a, b, c, d, x4,  S11, 0xf57c0fafUL); /* 5 */
-       FF(d, a, b, c, x5,  S12, 0x4787c62aUL); /* 6 */
-       FF(c, d, a, b, x6,  S13, 0xa8304613UL); /* 7 */
-       FF(b, c, d, a, x7,  S14, 0xfd469501UL); /* 8 */
-       FF(a, b, c, d, x8,  S11, 0x698098d8UL); /* 9 */
-       FF(d, a, b, c, x9,  S12, 0x8b44f7afUL); /* 10 */
-       FF(c, d, a, b, x10, S13, 0xffff5bb1UL); /* 11 */
-       FF(b, c, d, a, x11, S14, 0x895cd7beUL); /* 12 */
-       FF(a, b, c, d, x12, S11, 0x6b901122UL); /* 13 */
-       FF(d, a, b, c, x13, S12, 0xfd987193UL); /* 14 */
-       FF(c, d, a, b, x14, S13, 0xa679438eUL); /* 15 */
-       FF(b, c, d, a, x15, S14, 0x49b40821UL); /* 16 */
-
-       /* Round 2 */
-#define        S21 5
-#define        S22 9
-#define        S23 14
-#define        S24 20
-       GG(a, b, c, d, x1,  S21, 0xf61e2562UL); /* 17 */
-       GG(d, a, b, c, x6,  S22, 0xc040b340UL); /* 18 */
-       GG(c, d, a, b, x11, S23, 0x265e5a51UL); /* 19 */
-       GG(b, c, d, a, x0,  S24, 0xe9b6c7aaUL); /* 20 */
-       GG(a, b, c, d, x5,  S21, 0xd62f105dUL); /* 21 */
-       GG(d, a, b, c, x10, S22, 0x02441453UL); /* 22 */
-       GG(c, d, a, b, x15, S23, 0xd8a1e681UL); /* 23 */
-       GG(b, c, d, a, x4,  S24, 0xe7d3fbc8UL); /* 24 */
-       GG(a, b, c, d, x9,  S21, 0x21e1cde6UL); /* 25 */
-       GG(d, a, b, c, x14, S22, 0xc33707d6UL); /* 26 */
-       GG(c, d, a, b, x3,  S23, 0xf4d50d87UL); /* 27 */
-       GG(b, c, d, a, x8,  S24, 0x455a14edUL); /* 28 */
-       GG(a, b, c, d, x13, S21, 0xa9e3e905UL); /* 29 */
-       GG(d, a, b, c, x2,  S22, 0xfcefa3f8UL); /* 30 */
-       GG(c, d, a, b, x7,  S23, 0x676f02d9UL); /* 31 */
-       GG(b, c, d, a, x12, S24, 0x8d2a4c8aUL); /* 32 */
-
-       /* Round 3 */
-#define        S31 4
-#define        S32 11
-#define        S33 16
-#define        S34 23
-       HH(a, b, c, d, x5,  S31, 0xfffa3942UL); /* 33 */
-       HH(d, a, b, c, x8,  S32, 0x8771f681UL); /* 34 */
-       HH(c, d, a, b, x11, S33, 0x6d9d6122UL); /* 35 */
-       HH(b, c, d, a, x14, S34, 0xfde5380cUL); /* 36 */
-       HH(a, b, c, d, x1,  S31, 0xa4beea44UL); /* 37 */
-       HH(d, a, b, c, x4,  S32, 0x4bdecfa9UL); /* 38 */
-       HH(c, d, a, b, x7,  S33, 0xf6bb4b60UL); /* 39 */
-       HH(b, c, d, a, x10, S34, 0xbebfbc70UL); /* 40 */
-       HH(a, b, c, d, x13, S31, 0x289b7ec6UL); /* 41 */
-       HH(d, a, b, c, x0,  S32, 0xeaa127faUL); /* 42 */
-       HH(c, d, a, b, x3,  S33, 0xd4ef3085UL); /* 43 */
-       HH(b, c, d, a, x6,  S34, 0x04881d05UL); /* 44 */
-       HH(a, b, c, d, x9,  S31, 0xd9d4d039UL); /* 45 */
-       HH(d, a, b, c, x12, S32, 0xe6db99e5UL); /* 46 */
-       HH(c, d, a, b, x15, S33, 0x1fa27cf8UL); /* 47 */
-       HH(b, c, d, a, x2,  S34, 0xc4ac5665UL); /* 48 */
-
-       /* Round 4 */
-#define        S41 6
-#define        S42 10
-#define        S43 15
-#define        S44 21
-       II(a, b, c, d, x0,  S41, 0xf4292244UL); /* 49 */
-       II(d, a, b, c, x7,  S42, 0x432aff97UL); /* 50 */
-       II(c, d, a, b, x14, S43, 0xab9423a7UL); /* 51 */
-       II(b, c, d, a, x5,  S44, 0xfc93a039UL); /* 52 */
-       II(a, b, c, d, x12, S41, 0x655b59c3UL); /* 53 */
-       II(d, a, b, c, x3,  S42, 0x8f0ccc92UL); /* 54 */
-       II(c, d, a, b, x10, S43, 0xffeff47dUL); /* 55 */
-       II(b, c, d, a, x1,  S44, 0x85845dd1UL); /* 56 */
-       II(a, b, c, d, x8,  S41, 0x6fa87e4fUL); /* 57 */
-       II(d, a, b, c, x15, S42, 0xfe2ce6e0UL); /* 58 */
-       II(c, d, a, b, x6,  S43, 0xa3014314UL); /* 59 */
-       II(b, c, d, a, x13, S44, 0x4e0811a1UL); /* 60 */
-       II(a, b, c, d, x4,  S41, 0xf7537e82UL); /* 61 */
-       II(d, a, b, c, x11, S42, 0xbd3af235UL); /* 62 */
-       II(c, d, a, b, x2,  S43, 0x2ad7d2bbUL); /* 63 */
-       II(b, c, d, a, x9,  S44, 0xeb86d391UL); /* 64 */
-
-       context->state[0] += a;
-       context->state[1] += b;
-       context->state[2] += c;
-       context->state[3] += d;
-
-       /* Zeroize sensitive information. */
-       x15 = x14 = x13 = x12 = x11 = x10 = x9 = x8 = 0;
-       x7 = x6 = x5 = x4 = x3 = x2 = x1 = x0 = 0;
-}
diff --git a/libkern/crypto/register_crypto.c b/libkern/crypto/register_crypto.c
new file mode 100644 (file)
index 0000000..4f08156
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+#include <libkern/crypto/register_crypto.h>
+#include <libkern/crypto/crypto_internal.h>
+
+crypto_functions_t g_crypto_funcs = NULL;
+
+int register_crypto_functions(const crypto_functions_t funcs)
+{
+       if(g_crypto_funcs)
+               return -1;
+
+       g_crypto_funcs = funcs;
+       
+       return 0;
+}
+
+
diff --git a/libkern/crypto/sha1.c b/libkern/crypto/sha1.c
deleted file mode 100644 (file)
index b85cbec..0000000
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * This SHA1 code is based on the basic framework from the reference
- * implementation for MD5.  That implementation is Copyright (C)
- * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * Based on the FIPS 180-1: Secure Hash Algorithm (SHA-1) available at
- * http://www.itl.nist.gov/div897/pubs/fip180-1.htm
- */
-
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <libkern/OSAtomic.h>
-#include <libkern/crypto/sha1.h>
-#define        SHA1_TIMER      0               // change to nonzero to write timing stamps to profile sha1transform
-
-#if SHA1_TIMER
-#include <sys/kdebug.h>
-#endif
-
-#define        memset(x, y, z) bzero(x, z);
-#define        memcpy(x, y, z) bcopy(y, x, z)
-
-/* Internal mappings to the legacy sha1_ctxt structure. */
-#define        state   h.b32
-#define        bcount  c.b32
-#define        buffer  m.b8
-
-/*
- * The digest algorithm interprets the input message as a sequence of 32-bit
- * big-endian words.  We must reverse bytes in each word on x86/64 platforms,
- * but not on big-endian ones such as PPC.  For performance, we take advantage
- * of the bswap instruction on x86/64 to perform byte-reversal.  On PPC, we
- * could do 4-byte load if the address is 4-byte aligned which should further
- * improve the performance.  But for code simplicity, we punt and do 1-byte
- * loads instead.
- */
-#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
-#define        FETCH_32(p) ({                                                  \
-       register u_int32_t l = (u_int32_t)*((const u_int32_t *)(p));    \
-       __asm__ __volatile__("bswap %0" : "=r" (l) : "0" (l));          \
-       l;                                                              \
-})
-#else
-#define        FETCH_32(p)                                                     \
-       (((u_int32_t)*((const u_int8_t *)(p) + 3)) |                    \
-       (((u_int32_t)*((const u_int8_t *)(p) + 2)) << 8) |              \
-       (((u_int32_t)*((const u_int8_t *)(p) + 1)) << 16) |             \
-       (((u_int32_t)*((const u_int8_t *)(p))) << 24))
-#endif /* __i386__ || __x86_64__ */
-
-/*
- * Encodes input (u_int32_t) into output (unsigned char). Assumes len is
- * a multiple of 4. This is not compatible with memcpy().
- */
-static void
-Encode(unsigned char *output, u_int32_t *input, unsigned int len)
-{
-       unsigned int i, j;
-
-       for (i = 0, j = 0; j < len; i++, j += 4) {
-               output[j + 3] = input[i] & 0xff;
-               output[j + 2] = (input[i] >> 8) & 0xff;
-               output[j + 1] = (input[i] >> 16) & 0xff;
-               output[j] = (input[i] >> 24) & 0xff;
-       }
-}
-
-static unsigned char PADDING[64] = { 0x80, /* zeros */ };
-
-/* Constants from FIPS 180-1 */
-#define        K_00_19         0x5a827999UL
-#define        K_20_39         0x6ed9eba1UL
-#define        K_40_59         0x8f1bbcdcUL
-#define        K_60_79         0xca62c1d6UL
-
-/* F, G, H and I are basic SHA1 functions. */
-#define        F(b, c, d)      ((((c) ^ (d)) & (b)) ^ (d))
-#define        G(b, c, d)      ((b) ^ (c) ^ (d))
-#define        H(b, c, d)      (((b) & (c)) | (((b) | (c)) & (d)))
-
-/* ROTATE_LEFT rotates x left n bits. */
-#define        ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
-
-/* R, R1-R4 are macros used during each transformation round. */
-#define R(f, k, v, w, x, y, z, i) {                            \
-       (v) = ROTATE_LEFT(w, 5) + f(x, y, z) + (v) + (i) + (k); \
-       (x) = ROTATE_LEFT(x, 30);                               \
-}
-
-#define        R1(v, w, x, y, z, i)    R(F, K_00_19, v, w, x, y, z, i)
-#define        R2(v, w, x, y, z, i)    R(G, K_20_39, v, w, x, y, z, i)
-#define        R3(v, w, x, y, z, i)    R(H, K_40_59, v, w, x, y, z, i)
-#define        R4(v, w, x, y, z, i)    R(G, K_60_79, v, w, x, y, z, i)
-
-/* WUPDATE represents Wt variable that gets updated for steps 16-79 */
-#define        WUPDATE(p, q, r, s) {           \
-       (p) = ((q) ^ (r) ^ (s) ^ (p));  \
-       (p) = ROTATE_LEFT(p, 1);        \
-}
-
-#if (defined (__x86_64__) || defined (__i386__)) 
-extern void SHA1Transform(SHA1_CTX *, const u_int8_t *, u_int32_t Nblocks);
-#else
-static void SHA1Transform(SHA1_CTX *, const u_int8_t *);
-#endif
-
-void _SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen);
-
-void SHA1Final_r(SHA1_CTX *, void *);
-
-typedef kern_return_t (*InKernelPerformSHA1Func)(void *ref, const void *data, size_t dataLen, u_int32_t *inHash, u_int32_t options, u_int32_t *outHash, Boolean usePhysicalAddress); 
-void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref);
-static void *SHA1Ref;
-InKernelPerformSHA1Func performSHA1WithinKernelOnly; 
-#define SHA1_USE_HARDWARE_THRESHOLD 2048 //bytes 
-
-
-/*
- * SHA1 initialization. Begins a SHA1 operation, writing a new context.
- */
-void
-SHA1Init(SHA1_CTX *context)
-{
-       context->bcount[0] = context->bcount[1] = 0;
-       context->count = 0;
-
-       /* Load magic initialization constants.  */
-       context->state[0] = 0x67452301UL;
-       context->state[1] = 0xefcdab89UL;
-       context->state[2] = 0x98badcfeUL;
-       context->state[3] = 0x10325476UL;
-       context->state[4] = 0xc3d2e1f0UL;
-}
-
-/*
- * SHA1 block update operation. Continues a SHA1 message-digest
- * operation, processing another message block, and updating the
- * context.
- */
-void
-_SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
-{
-       u_int32_t i, index, partLen;
-       const unsigned char *input = (const unsigned char *)inpp;
-
-       if (inputLen == 0)
-               return;
-
-       /* Compute number of bytes mod 64 */
-       index = (context->bcount[1] >> 3) & 0x3F;
-
-       /* Update number of bits */
-       if ((context->bcount[1] += (inputLen << 3)) < (inputLen << 3))
-               context->bcount[0]++;
-       context->bcount[0] += (inputLen >> 29);
-
-       partLen = 64 - index;
-
-       /* Transform as many times as possible. */
-       i = 0;
-       if (inputLen >= partLen) {
-               if (index != 0) {
-                       memcpy(&context->buffer[index], input, partLen);
-#if (defined (__x86_64__) || defined (__i386__)) 
-                       SHA1Transform(context, context->buffer, 1);
-#else
-                       SHA1Transform(context, context->buffer);
-#endif
-                       i = partLen;
-               }
-
-#if SHA1_TIMER
-               KERNEL_DEBUG_CONSTANT(0xaa800004 | DBG_FUNC_START, 0, 0, 0, 0, 0);
-#endif
-#if (defined (__x86_64__) || defined (__i386__)) 
-                       {       
-                               int     kk = (inputLen-i)>>6;
-                               if (kk>0) {
-                                       SHA1Transform(context, &input[i], kk);
-                                       i += (kk<<6);
-                               }
-                       }
-#else
-               for (; i + 63 < inputLen; i += 64)
-                       SHA1Transform(context, &input[i]);
-#endif
-
-                if (inputLen == i) {
-#if SHA1_TIMER
-                       KERNEL_DEBUG_CONSTANT(0xaa800004 | DBG_FUNC_END, 0, 0, 0, 0, 0);
-#endif
-                       return;
-                }
-
-               index = 0;
-       }
-
-       /* Buffer remaining input */
-       memcpy(&context->buffer[index], &input[i], inputLen - i);
-}
-
-
-
-
-/*
- * This function is called by the SHA1 hardware kext during its init. 
- * This will register the function to call to perform SHA1 using hardware. 
- */
-void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref)
-{
-       if(option) {
-               // Establish the hook. The hardware is ready.
-               OSCompareAndSwapPtr((void*)NULL, (void*)ref, (void * volatile*)&SHA1Ref); 
-
-               if(!OSCompareAndSwapPtr((void *)NULL, (void *)func, (void * volatile *)&performSHA1WithinKernelOnly)) {
-                       panic("sha1_hardware_hook: Called twice.. Should never happen\n");
-               }
-       }
-       else {
-               // The hardware is going away. Tear down the hook.      
-               performSHA1WithinKernelOnly = NULL;
-               SHA1Ref = NULL;
-       }
-}
-
-static u_int32_t SHA1UpdateWithHardware(SHA1_CTX *context, const unsigned char *data, size_t dataLen, Boolean usePhysicalAddress)
-{
-       u_int32_t *inHashBuffer = context->state;
-       u_int32_t options = 0;
-       int result;
-
-       result = performSHA1WithinKernelOnly(SHA1Ref, data, dataLen, inHashBuffer, options, inHashBuffer, usePhysicalAddress);
-       if(result != KERN_SUCCESS) {
-               //The hardware failed to hash for some reason. Fall back to software. 
-               return 0;
-       }
-
-       //Update the context with the total length.
-        /* Update number of bits */
-        if ((context->bcount[1] += (dataLen << 3)) < (dataLen << 3))
-                context->bcount[0]++;
-        context->bcount[0] += (dataLen >> 29);
-       return dataLen;
-}
-
-/*
- * This is function is only called in from the pagefault path or from page_copy().
- * So we assume that we can safely convert the virtual address to the physical address and use it.
- * Assumptions: The passed in address(inpp) is a kernel virtual address 
- * and a physical page has been faulted in. 
- * The inputLen passed in should always be less than or equal to a  page size (4096) 
- * and inpp should be on a page boundary. 
- * "performSHA1WithinKernelOnly" is initialized only when the hardware driver exists and is ready.
- */
-void SHA1UpdateUsePhysicalAddress(SHA1_CTX *context, const void *inpp, size_t inputLen)
-{
-       Boolean usePhysicalAddress = TRUE;
-       if((inputLen == PAGE_SIZE) && performSHA1WithinKernelOnly) { // If hardware exists and is ready.
-               if(SHA1UpdateWithHardware(context, (const unsigned char *)inpp, inputLen, usePhysicalAddress))
-                       return;
-               //else for some reason the hardware failed.. 
-               //fall through to software and try the hash in software. 
-       }
-       //Use the software implementation since the hardware is absent or 
-       // has not been initialized yet or inputLen !=  PAGE_SIZE. 
-       _SHA1Update(context, inpp, inputLen);
-}
-
-/*
- * A wrapper around _SHA1Update() to pick between software or hardware based SHA1. 
- *
- */
-void SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
-{
-       const unsigned char *input = (const unsigned char *)inpp;
-       Boolean usePhysicalAddress = FALSE;
-       u_int32_t index;
-       
-       if((inputLen > SHA1_USE_HARDWARE_THRESHOLD) && performSHA1WithinKernelOnly) { 
-               index = (context->bcount[1] >> 3) & 0x3F;
-               if(index != 0) {  //bytes left in the context. Handle them first.
-                       u_int32_t partLen = 64 - index;
-                       memcpy(&context->buffer[index], input, partLen);
-                       _SHA1Update(context, inpp, inputLen);
-                       inputLen -= partLen; 
-                       input += partLen; 
-               }
-               
-               u_int32_t lenForHardware = inputLen & (~0x3F); //multiple of 64
-               u_int32_t bytesHashed = 0;
-               bytesHashed = SHA1UpdateWithHardware(context, input, lenForHardware, usePhysicalAddress);       
-               
-               inputLen -= bytesHashed;
-               input += bytesHashed;
-       }
-
-       //Fall through to the software implementation.
-       _SHA1Update(context, input, inputLen);
-}
-
-/*
- * For backwards compatibility, sha1_result symbol is mapped to this
- * routine since it's equivalent to SHA1Final with reversed parameters.
- */
-void
-SHA1Final_r(SHA1_CTX *context, void *digest)
-{
-       SHA1Final(digest, context);
-}
-
-/*
- * SHA1 finalization. Ends an SHA1 message-digest operation, writing the
- * the message digest and zeroizing the context.
- */
-void
-SHA1Final(void *digest, SHA1_CTX *context)
-{
-       unsigned char bits[8];
-       u_int32_t index = (context->bcount[1] >> 3) & 0x3f;
-
-       /* Save number of bits */
-       Encode(bits, context->bcount, 8);
-
-       /* Pad out to 56 mod 64. */
-       SHA1Update(context, PADDING, ((index < 56) ? 56 : 120) - index);
-
-       /* Append length (before padding) */
-       SHA1Update(context, bits, 8);
-
-       /* Store state in digest */
-       Encode(digest, context->state, 20);
-
-       /* Zeroize sensitive information. */
-       memset(context, 0, sizeof (*context));
-}
-
-/*
- * SHA1 basic transformation. Transforms state based on block.
- */
-#if !(defined (__x86_64__) || defined (__i386__)) 
-static void
-SHA1Transform(SHA1_CTX *context, const u_int8_t block[64])
-{
-       /* Register (instead of array) is a win in most cases */
-       register u_int32_t a, b, c, d, e;
-       register u_int32_t w0, w1, w2, w3, w4, w5, w6, w7;
-       register u_int32_t w8, w9, w10, w11, w12, w13, w14, w15;
-
-       a = context->state[0];
-       b = context->state[1];
-       c = context->state[2];
-       d = context->state[3];
-       e = context->state[4];
-
-       w15 = FETCH_32(block + 60);
-       w14 = FETCH_32(block + 56);
-       w13 = FETCH_32(block + 52);
-       w12 = FETCH_32(block + 48);
-       w11 = FETCH_32(block + 44);
-       w10 = FETCH_32(block + 40);
-       w9  = FETCH_32(block + 36);
-       w8  = FETCH_32(block + 32);
-       w7  = FETCH_32(block + 28);
-       w6  = FETCH_32(block + 24);
-       w5  = FETCH_32(block + 20);
-       w4  = FETCH_32(block + 16);
-       w3  = FETCH_32(block + 12);
-       w2  = FETCH_32(block +  8);
-       w1  = FETCH_32(block +  4);
-       w0  = FETCH_32(block +  0);
-
-       /* Round 1 */
-                                       R1(e, a, b, c, d,  w0);         /*  0 */
-                                       R1(d, e, a, b, c,  w1);         /*  1 */
-                                       R1(c, d, e, a, b,  w2);         /*  2 */
-                                       R1(b, c, d, e, a,  w3);         /*  3 */
-                                       R1(a, b, c, d, e,  w4);         /*  4 */
-                                       R1(e, a, b, c, d,  w5);         /*  5 */
-                                       R1(d, e, a, b, c,  w6);         /*  6 */
-                                       R1(c, d, e, a, b,  w7);         /*  7 */
-                                       R1(b, c, d, e, a,  w8);         /*  8 */
-                                       R1(a, b, c, d, e,  w9);         /*  9 */
-                                       R1(e, a, b, c, d, w10);         /* 10 */
-                                       R1(d, e, a, b, c, w11);         /* 11 */
-                                       R1(c, d, e, a, b, w12);         /* 12 */
-                                       R1(b, c, d, e, a, w13);         /* 13 */
-                                       R1(a, b, c, d, e, w14);         /* 14 */
-                                       R1(e, a, b, c, d, w15);         /* 15 */
-       WUPDATE( w0, w13,  w8,  w2);    R1(d, e, a, b, c,  w0);         /* 16 */
-       WUPDATE( w1, w14,  w9,  w3);    R1(c, d, e, a, b,  w1);         /* 17 */
-       WUPDATE( w2, w15, w10,  w4);    R1(b, c, d, e, a,  w2);         /* 18 */
-       WUPDATE( w3,  w0, w11,  w5);    R1(a, b, c, d, e,  w3);         /* 19 */
-
-       /* Round 2 */
-       WUPDATE( w4,  w1, w12,  w6);    R2(e, a, b, c, d,  w4);         /* 20 */
-       WUPDATE( w5,  w2, w13,  w7);    R2(d, e, a, b, c,  w5);         /* 21 */
-       WUPDATE( w6,  w3, w14,  w8);    R2(c, d, e, a, b,  w6);         /* 22 */
-       WUPDATE( w7,  w4, w15,  w9);    R2(b, c, d, e, a,  w7);         /* 23 */
-       WUPDATE( w8,  w5,  w0, w10);    R2(a, b, c, d, e,  w8);         /* 24 */
-       WUPDATE( w9,  w6,  w1, w11);    R2(e, a, b, c, d,  w9);         /* 25 */
-       WUPDATE(w10,  w7,  w2, w12);    R2(d, e, a, b, c, w10);         /* 26 */
-       WUPDATE(w11,  w8,  w3, w13);    R2(c, d, e, a, b, w11);         /* 27 */
-       WUPDATE(w12,  w9,  w4, w14);    R2(b, c, d, e, a, w12);         /* 28 */
-       WUPDATE(w13, w10,  w5, w15);    R2(a, b, c, d, e, w13);         /* 29 */
-       WUPDATE(w14, w11,  w6,  w0);    R2(e, a, b, c, d, w14);         /* 30 */
-       WUPDATE(w15, w12,  w7,  w1);    R2(d, e, a, b, c, w15);         /* 31 */
-       WUPDATE( w0, w13,  w8,  w2);    R2(c, d, e, a, b,  w0);         /* 32 */
-       WUPDATE( w1, w14,  w9,  w3);    R2(b, c, d, e, a,  w1);         /* 33 */
-       WUPDATE( w2, w15, w10,  w4);    R2(a, b, c, d, e,  w2);         /* 34 */
-       WUPDATE( w3,  w0, w11,  w5);    R2(e, a, b, c, d,  w3);         /* 35 */
-       WUPDATE( w4,  w1, w12,  w6);    R2(d, e, a, b, c,  w4);         /* 36 */
-       WUPDATE( w5,  w2, w13,  w7);    R2(c, d, e, a, b,  w5);         /* 37 */
-       WUPDATE( w6,  w3, w14,  w8);    R2(b, c, d, e, a,  w6);         /* 38 */
-       WUPDATE( w7,  w4, w15,  w9);    R2(a, b, c, d, e,  w7);         /* 39 */
-
-       /* Round 3 */
-       WUPDATE( w8,  w5,  w0, w10);    R3(e, a, b, c, d,  w8);         /* 40 */
-       WUPDATE( w9,  w6,  w1, w11);    R3(d, e, a, b, c,  w9);         /* 41 */
-       WUPDATE(w10,  w7,  w2, w12);    R3(c, d, e, a, b, w10);         /* 42 */
-       WUPDATE(w11,  w8,  w3, w13);    R3(b, c, d, e, a, w11);         /* 43 */
-       WUPDATE(w12,  w9,  w4, w14);    R3(a, b, c, d, e, w12);         /* 44 */
-       WUPDATE(w13, w10,  w5, w15);    R3(e, a, b, c, d, w13);         /* 45 */
-       WUPDATE(w14, w11,  w6,  w0);    R3(d, e, a, b, c, w14);         /* 46 */
-       WUPDATE(w15, w12,  w7,  w1);    R3(c, d, e, a, b, w15);         /* 47 */
-       WUPDATE( w0, w13,  w8,  w2);    R3(b, c, d, e, a,  w0);         /* 48 */
-       WUPDATE( w1, w14,  w9,  w3);    R3(a, b, c, d, e,  w1);         /* 49 */
-       WUPDATE( w2, w15, w10,  w4);    R3(e, a, b, c, d,  w2);         /* 50 */
-       WUPDATE( w3,  w0, w11,  w5);    R3(d, e, a, b, c,  w3);         /* 51 */
-       WUPDATE( w4,  w1, w12,  w6);    R3(c, d, e, a, b,  w4);         /* 52 */
-       WUPDATE( w5,  w2, w13,  w7);    R3(b, c, d, e, a,  w5);         /* 53 */
-       WUPDATE( w6,  w3, w14,  w8);    R3(a, b, c, d, e,  w6);         /* 54 */
-       WUPDATE( w7,  w4, w15,  w9);    R3(e, a, b, c, d,  w7);         /* 55 */
-       WUPDATE( w8,  w5,  w0, w10);    R3(d, e, a, b, c,  w8);         /* 56 */
-       WUPDATE( w9,  w6,  w1, w11);    R3(c, d, e, a, b,  w9);         /* 57 */
-       WUPDATE(w10,  w7,  w2, w12);    R3(b, c, d, e, a, w10);         /* 58 */
-       WUPDATE(w11,  w8,  w3, w13);    R3(a, b, c, d, e, w11);         /* 59 */
-
-       WUPDATE(w12,  w9,  w4, w14);    R4(e, a, b, c, d, w12);         /* 60 */
-       WUPDATE(w13, w10,  w5, w15);    R4(d, e, a, b, c, w13);         /* 61 */
-       WUPDATE(w14, w11,  w6,  w0);    R4(c, d, e, a, b, w14);         /* 62 */
-       WUPDATE(w15, w12,  w7,  w1);    R4(b, c, d, e, a, w15);         /* 63 */
-       WUPDATE( w0, w13,  w8,  w2);    R4(a, b, c, d, e,  w0);         /* 64 */
-       WUPDATE( w1, w14,  w9,  w3);    R4(e, a, b, c, d,  w1);         /* 65 */
-       WUPDATE( w2, w15, w10,  w4);    R4(d, e, a, b, c,  w2);         /* 66 */
-       WUPDATE( w3,  w0, w11,  w5);    R4(c, d, e, a, b,  w3);         /* 67 */
-       WUPDATE( w4,  w1, w12,  w6);    R4(b, c, d, e, a,  w4);         /* 68 */
-       WUPDATE( w5,  w2, w13,  w7);    R4(a, b, c, d, e,  w5);         /* 69 */
-       WUPDATE( w6,  w3, w14,  w8);    R4(e, a, b, c, d,  w6);         /* 70 */
-       WUPDATE( w7,  w4, w15,  w9);    R4(d, e, a, b, c,  w7);         /* 71 */
-       WUPDATE( w8,  w5,  w0, w10);    R4(c, d, e, a, b,  w8);         /* 72 */
-       WUPDATE( w9,  w6,  w1, w11);    R4(b, c, d, e, a,  w9);         /* 73 */
-       WUPDATE(w10,  w7,  w2, w12);    R4(a, b, c, d, e, w10);         /* 74 */
-       WUPDATE(w11,  w8,  w3, w13);    R4(e, a, b, c, d, w11);         /* 75 */
-       WUPDATE(w12,  w9,  w4, w14);    R4(d, e, a, b, c, w12);         /* 76 */
-       WUPDATE(w13, w10,  w5, w15);    R4(c, d, e, a, b, w13);         /* 77 */
-       WUPDATE(w14, w11,  w6,  w0);    R4(b, c, d, e, a, w14);         /* 78 */
-       WUPDATE(w15, w12,  w7,  w1);    R4(a, b, c, d, e, w15);         /* 79 */
-
-       context->state[0] += a;
-       context->state[1] += b;
-       context->state[2] += c;
-       context->state[3] += d;
-       context->state[4] += e;
-
-       /* Zeroize sensitive information. */
-       w15 = w14 = w13 = w12 = w11 = w10 = w9 = w8 = 0;
-       w7 = w6 = w5 = w4 = w3 = w2 = w1 = w0 = 0;
-}
-#endif
index 3484791d567a41a288f28e005679baf2d03d8e78..cfb15c5c1b7e0b3f160166fb2c2aaf48de2ae7a6 100644 (file)
@@ -54,48 +54,10 @@ enum {
  * Like standards, there are a lot of atomic ops to choose from!
  */
 
-#if !defined(__i386__) && !defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 /* Implemented in assembly for i386 and x86_64 */
-#undef OSAddAtomic
-SInt32
-OSAddAtomic(SInt32 amount, volatile SInt32 * value)
-{
-       SInt32 oldValue;
-       SInt32 newValue;
-
-       do {
-               oldValue = *value;
-               newValue = oldValue + amount;
-       } while (!OSCompareAndSwap((UInt32)oldValue,
-                               (UInt32)newValue,
-                               (volatile UInt32 *) value));
-       return oldValue;
-}
-
-#undef OSAddAtomicLong
-long
-OSAddAtomicLong(long theAmount, volatile long *address)
-{
-#if __LP64__
-#error Unimplemented
-#else
-       return (long)OSAddAtomic((SInt32)theAmount, address);
-#endif
-}
-
-/* Implemented as an assembly alias for i386 */
-#undef OSCompareAndSwapPtr
-Boolean OSCompareAndSwapPtr(void *oldValue, void *newValue,
-                           void * volatile *address)
-{
-#if __LP64__
-  return OSCompareAndSwap64((UInt64)oldValue, (UInt64)newValue,
-                         (volatile UInt64 *)address);
 #else
-  return OSCompareAndSwap((UInt32)oldValue, (UInt32)newValue,
-                         (volatile UInt32 *)address);
-#endif
-}
+#error Unsupported arch
 #endif
 
 #undef OSIncrementAtomic
index 3e67cfff8520f3732b668e7e840cd44f1ba8a2ee..14ee32621ed62352c30e288120140f5fc84b9c5e 100644 (file)
@@ -286,4 +286,3 @@ pad:
 #endif
     return frame;
 }
-
index 0edc6b64de899500862f2153380f7ff28c3e2e1a..ebef5b5ab5bea6ed3d768593c7f2dd60f5ee8894 100644 (file)
@@ -59,7 +59,7 @@ getlastaddr(void)
        sgp = (kernel_segment_command_t *)
                ((uintptr_t)header + sizeof(kernel_mach_header_t));
        for (i = 0; i < header->ncmds; i++){
-               if (   sgp->cmd == LC_SEGMENT_KERNEL) {
+               if (sgp->cmd == LC_SEGMENT_KERNEL) {
                        if (sgp->vmaddr + sgp->vmsize > last_addr)
                                last_addr = sgp->vmaddr + sgp->vmsize;
                }
@@ -69,32 +69,47 @@ getlastaddr(void)
 }
 
 /*
- * Find the UUID load command in the Mach-O headers, and return
- * the address of the UUID blob and size in "*size". If the
- * Mach-O image is missing a UUID, NULL is returned.
+ * Find the specified load command in the Mach-O headers, and return
+ * the command. If there is no such load command, NULL is returned.
  */
 void *
-getuuidfromheader(kernel_mach_header_t *mhp, unsigned long *size)
-{
-       struct uuid_command *uuidp;
+getcommandfromheader(kernel_mach_header_t *mhp, uint32_t cmd) {
+       struct load_command *lcp;
        unsigned long i;
 
-       uuidp = (struct uuid_command *)
-               ((uintptr_t)mhp + sizeof(kernel_mach_header_t));
+       lcp = (struct load_command *) (mhp + 1);
        for(i = 0; i < mhp->ncmds; i++){
-               if(uuidp->cmd == LC_UUID) {
-                       if (size)
-                               *size = sizeof(uuidp->uuid);
-
-                       return (void *)uuidp->uuid;
+               if(lcp->cmd == cmd) {
+                       return (void *)lcp;
                }
 
-               uuidp = (struct uuid_command *)((uintptr_t)uuidp + uuidp->cmdsize);
+               lcp = (struct load_command *)((uintptr_t)lcp + lcp->cmdsize);
        }
 
        return NULL;
 }
 
+/*
+ * Find the UUID load command in the Mach-O headers, and return
+ * the address of the UUID blob and size in "*size". If the
+ * Mach-O image is missing a UUID, NULL is returned.
+ */
+void *
+getuuidfromheader(kernel_mach_header_t *mhp, unsigned long *size)
+{
+    struct uuid_command *cmd = (struct uuid_command *)
+        getcommandfromheader(mhp, LC_UUID);
+
+    if (cmd != NULL) {
+        if (size) {
+            *size = sizeof(cmd->uuid);
+        }
+        return cmd->uuid;
+    }
+
+    return NULL;
+}
+
 /*
  * This routine returns the a pointer to the data for the named section in the
  * named segment if it exist in the mach header passed to it.  Also it returns
@@ -323,68 +338,3 @@ nextsect(kernel_segment_command_t *sgp, kernel_section_t *sp)
 
        return sp+1;
 }
-
-#ifdef MACH_KDB
-/*
- * This routine returns the section command for the symbol table in the
- * named segment for the mach_header pointer passed to it if it exist.
- * Otherwise it returns zero.
- */
-static struct symtab_command *
-getsectcmdsymtabfromheader(
-       kernel_mach_header_t *mhp)
-{
-       kernel_segment_command_t *sgp;
-       unsigned long i;
-
-       sgp = (kernel_segment_command_t *)
-               ((uintptr_t)mhp + sizeof(kernel_mach_header_t));
-       for(i = 0; i < mhp->ncmds; i++){
-               if(sgp->cmd == LC_SYMTAB)
-               return((struct symtab_command *)sgp);
-               sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize);
-       }
-       return((struct symtab_command *)NULL);
-}
-
-boolean_t getsymtab(kernel_mach_header_t *header,
-                       vm_offset_t *symtab,
-                       int *nsyms,
-                       vm_offset_t *strtab,
-                       vm_size_t *strtabsize)
-{
-       kernel_segment_command_t *seglink_cmd;
-       struct symtab_command *symtab_cmd;
-
-       seglink_cmd = NULL;
-       
-       if((header->magic != MH_MAGIC)
-        && (header->magic != MH_MAGIC_64)) {                                           /* Check if this is a valid header format */
-               return (FALSE);                                                                 /* Bye y'all... */
-       }
-       
-       seglink_cmd = getsegbynamefromheader(header,"__LINKEDIT");
-       if (seglink_cmd == NULL) {
-               return(FALSE);
-       }
-
-       symtab_cmd = NULL;
-       symtab_cmd = getsectcmdsymtabfromheader(header);
-       if (symtab_cmd == NULL)
-               return(FALSE);
-
-       *nsyms = symtab_cmd->nsyms;
-       if(symtab_cmd->nsyms == 0) return (FALSE);      /* No symbols */
-
-       *strtabsize = symtab_cmd->strsize;
-       if(symtab_cmd->strsize == 0) return (FALSE);    /* Symbol length is 0 */
-       
-       *symtab = seglink_cmd->vmaddr + symtab_cmd->symoff -
-               seglink_cmd->fileoff;
-
-       *strtab = seglink_cmd->vmaddr + symtab_cmd->stroff -
-                       seglink_cmd->fileoff;
-
-       return(TRUE);
-}
-#endif
diff --git a/libkern/kmod/Makefile b/libkern/kmod/Makefile
deleted file mode 100644 (file)
index 8ffce50..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)
-else
-export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
-endif
-
-do_all: 
-       $(_v)($(MKDIR) $(COMPOBJROOT)/kmod;                             \
-       cd $(COMPOBJROOT)/kmod;                                         \
-       ${MAKE} MAKEFILES=$(SOURCE)/Makefile.kmod                       \
-               TARGET=$(TARGET)                                        \
-               do_build_all                                            \
-       )
-
-do_build_all:  do_all
-
-do_install: 
-       @echo "[ $(SOURCE) ] make do_install  $(COMPONENT) $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)";          \
-       ($(MKDIR) $(COMPOBJROOT)/kmod;                                  \
-       cd $(COMPOBJROOT)/kmod;                                         \
-       ${MAKE} MAKEFILES=$(SOURCE)/Makefile.kmod                       \
-               TARGET=$(TARGET)                                        \
-               do_build_install                                        \
-       )
-
-do_build_install:      do_install
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/libkern/kmod/Makefile.kmod b/libkern/kmod/Makefile.kmod
deleted file mode 100644 (file)
index 62ffd89..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-#
-# Kernel Module Library code makefile
-#
-
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTALL_DIR =          $(DSTROOT)/usr/lib
-KMOD_NAME =            libkmod
-KMODCPP_NAME =         libkmodc++
-LIB_INSTALL_FLAGS =    -p -m 444
-
-# -mkernel implies -mlong-branch/-mlong-calls/-mno-red-zone as needed for
-# code linked into kexts
-# -fno-stack-protector is necessary for the kernel, but not for kexts
-CFLAGS_KMOD = $(filter-out -O0 -O1 -O2 -O3 -O4 -Os -Oz -freorder-blocks -flto -fno-stack-protector,$(CFLAGS)) \
-           -Os -mkernel -Wall
-
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-COMPOBJROOT = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/kmod
-INSTOBJROOT = $(OBJROOT)/$(INSTALL_TYPE)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/kmod
-else
-COMPOBJROOT = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/kmod
-INSTOBJROOT = $(OBJROOT)/$(INSTALL_TYPE)_$(ARCH_CONFIG)/$(COMPONENT)/kmod
-endif
-
-
-KMOD_CFILES    = c_start.c c_stop.c
-KMODCPP_CFILES = cplus_start.c cplus_stop.c
-
-KMOD_OFILES    = $(KMOD_CFILES:.c=.o)
-KMODCPP_OFILES = $(KMODCPP_CFILES:.c=.o)
-
-ALL_OFILES = $(KMOD_OFILES) $(KMODCPP_OFILES)
-
-$(ALL_OFILES): %.o : %.c
-       @echo LIBKMOD_CC $@
-       $(_v)${LIBKMOD_CC} -c ${CFLAGS_KMOD} ${${join $@,_CFLAGS}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -o $(COMPOBJROOT)/$(*F).o $<
-
-$(COMPOBJROOT)/$(KMOD_NAME).a: $(KMOD_OFILES)
-       @echo LIBTOOL $(notdir $@)
-       $(_v)$(LIBTOOL) -static -o $@ $^ $(_vstdout) 2>&1
-
-$(COMPOBJROOT)/$(KMODCPP_NAME).a: $(KMODCPP_OFILES)
-       @echo LIBTOOL $(notdir $@)
-       $(_v)$(LIBTOOL) -static -o $@ $^ $(_vstdout) 2>&1
-
-do_build_all: $(COMPOBJROOT)/$(KMOD_NAME).a $(COMPOBJROOT)/$(KMODCPP_NAME).a
-
-$(INSTALL_DIR)/%.a: $(INSTOBJROOT)/%.a
-       @echo Installing $< in $@;
-       $(_v)$(RM) $@ || true;                                          \
-       ${MKDIR} $(INSTALL_DIR) $(SYMROOT);                             \
-       if [ $(MACHINE_CONFIG) = DEFAULT ]; then                        \
-               allarchs="";                                            \
-               for onearch in $(INSTALL_ARCHS); do                     \
-                       if [ $${onearch} = ARM ] ; then                 \
-                               archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}_$(DEFAULT_ARM_MACHINE_CONFIG)/$(COMPONENT); \
-                       else                                            \
-                               archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}/$(COMPONENT); \
-                       fi;                                             \
-                       if [ -e $${archdir}/kmod/$(*F).a ]; then        \
-                               allarchs="$${allarchs} $${archdir}/kmod/$(*F).a"; \
-                       fi;                                             \
-               done;                                                   \
-               cmd="$(LIPO) $${allarchs} -create -output $(SYMROOT)/$(*F).a"; \
-               echo $$cmd; eval $$cmd;                                 \
-       else                                                            \
-               my_counter=1;                                           \
-               my_innercounter=1;                                      \
-               outputfile=$(SYMROOT)/$(*F).a;                          \
-               for my_config in $(TARGET_CONFIGS_UC); do               \
-                       if [ $${my_counter} -eq 1 ]; then               \
-                               my_counter=2;                           \
-                               my_kconfig=$${my_config};               \
-                       elif [ $${my_counter} -eq 2 ]; then             \
-                               my_counter=3;                           \
-                               my_aconfig=$${my_config};               \
-                       else                                            \
-                               my_counter=1;                           \
-                               if [ $${my_aconfig} = ARM ] ; then      \
-                                       if [ $${my_config} = DEFAULT ] ; then   \
-                                               my_config=$(DEFAULT_ARM_MACHINE_CONFIG);        \
-                                       fi;                             \
-                               fi;                                     \
-                               inputfile=$(OBJROOT)/$${my_kconfig}_$${my_aconfig}_$${my_config}/$(COMPONENT)/kmod/$(*F).a; \
-                               if [ -e $${inputfile} ]; then           \
-                                       if [ $${my_innercounter} -eq 1 ]; then \
-                                               my_innercounter=2;      \
-                                               cmd="$(LIPO) -create $${inputfile} -o $${outputfile}"; \
-                                       else                            \
-                                               cmd="$(LIPO) -create $${outputfile} $${inputfile} -o $${outputfile} || true"; \
-                                       fi;                             \
-                                       echo $$cmd; eval $$cmd;         \
-                               fi;                                     \
-                       fi;                                             \
-               done;                                                   \
-       fi;                                                             \
-       cmd="$(INSTALL) $(LIB_INSTALL_FLAGS) $(SYMROOT)/$(*F).a $@";    \
-       echo $$cmd; eval $$cmd
-
-do_build_install: $(INSTALL_DIR)/$(KMOD_NAME).a $(INSTALL_DIR)/$(KMODCPP_NAME).a
-
-# include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/libkern/kmod/libkmod.xcodeproj/project.pbxproj b/libkern/kmod/libkmod.xcodeproj/project.pbxproj
new file mode 100644 (file)
index 0000000..39a8121
--- /dev/null
@@ -0,0 +1,482 @@
+// !$*UTF8*$!
+{
+       archiveVersion = 1;
+       classes = {
+       };
+       objectVersion = 46;
+       objects = {
+
+/* Begin PBXAggregateTarget section */
+               C61E2D9512F3647000FC9BCA /* All */ = {
+                       isa = PBXAggregateTarget;
+                       buildConfigurationList = C61E2D9612F3647000FC9BCA /* Build configuration list for PBXAggregateTarget "All" */;
+                       buildPhases = (
+                       );
+                       dependencies = (
+                               C61E2D9912F364A800FC9BCA /* PBXTargetDependency */,
+                               C61E2DA212F3650100FC9BCA /* PBXTargetDependency */,
+                       );
+                       name = All;
+                       productName = All;
+               };
+/* End PBXAggregateTarget section */
+
+/* Begin PBXBuildFile section */
+               C61E2D8012F360A200FC9BCA /* libkmodtest.h in Headers */ = {isa = PBXBuildFile; fileRef = C61E2D7F12F360A200FC9BCA /* libkmodtest.h */; };
+               C61E2D8212F360A200FC9BCA /* libkmodtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C61E2D8112F360A200FC9BCA /* libkmodtest.cpp */; };
+               C61E2DAC12F3661900FC9BCA /* README in Resources */ = {isa = PBXBuildFile; fileRef = C61E2DA712F3661900FC9BCA /* README */; };
+               C61E2DAD12F3672F00FC9BCA /* c_start.c in Sources */ = {isa = PBXBuildFile; fileRef = C61E2DA312F3661900FC9BCA /* c_start.c */; };
+               C61E2DAE12F3672F00FC9BCA /* c_stop.c in Sources */ = {isa = PBXBuildFile; fileRef = C61E2DA412F3661900FC9BCA /* c_stop.c */; };
+               C61E2DAF12F3673A00FC9BCA /* cplus_start.c in Sources */ = {isa = PBXBuildFile; fileRef = C61E2DA512F3661900FC9BCA /* cplus_start.c */; };
+               C61E2DB012F3673A00FC9BCA /* cplus_stop.c in Sources */ = {isa = PBXBuildFile; fileRef = C61E2DA612F3661900FC9BCA /* cplus_stop.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+               C61E2D9812F364A800FC9BCA /* PBXContainerItemProxy */ = {
+                       isa = PBXContainerItemProxy;
+                       containerPortal = C61E2D6012F3605700FC9BCA /* Project object */;
+                       proxyType = 1;
+                       remoteGlobalIDString = C61E2D9112F3642100FC9BCA;
+                       remoteInfo = libkmod;
+               };
+               C61E2DA112F3650100FC9BCA /* PBXContainerItemProxy */ = {
+                       isa = PBXContainerItemProxy;
+                       containerPortal = C61E2D6012F3605700FC9BCA /* Project object */;
+                       proxyType = 1;
+                       remoteGlobalIDString = C61E2D9D12F364C100FC9BCA;
+                       remoteInfo = "libkmodc++";
+               };
+               C61E2DB112F36AC700FC9BCA /* PBXContainerItemProxy */ = {
+                       isa = PBXContainerItemProxy;
+                       containerPortal = C61E2D6012F3605700FC9BCA /* Project object */;
+                       proxyType = 1;
+                       remoteGlobalIDString = C61E2D9112F3642100FC9BCA;
+                       remoteInfo = libkmod;
+               };
+               C61E2DB312F36ACB00FC9BCA /* PBXContainerItemProxy */ = {
+                       isa = PBXContainerItemProxy;
+                       containerPortal = C61E2D6012F3605700FC9BCA /* Project object */;
+                       proxyType = 1;
+                       remoteGlobalIDString = C61E2D9D12F364C100FC9BCA;
+                       remoteInfo = "libkmodc++";
+               };
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXFileReference section */
+               C61E2D7312F360A200FC9BCA /* libkmodtest.kext */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = libkmodtest.kext; sourceTree = BUILT_PRODUCTS_DIR; };
+               C61E2D7712F360A200FC9BCA /* Kernel.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Kernel.framework; path = System/Library/Frameworks/Kernel.framework; sourceTree = SDKROOT; };
+               C61E2D7A12F360A200FC9BCA /* libkmodtest-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "libkmodtest-Info.plist"; sourceTree = "<group>"; };
+               C61E2D7F12F360A200FC9BCA /* libkmodtest.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = libkmodtest.h; sourceTree = "<group>"; };
+               C61E2D8112F360A200FC9BCA /* libkmodtest.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = libkmodtest.cpp; sourceTree = "<group>"; };
+               C61E2D9212F3642100FC9BCA /* libkmod.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkmod.a; sourceTree = BUILT_PRODUCTS_DIR; };
+               C61E2D9E12F364C100FC9BCA /* libkmodc++.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkmodc++.a"; sourceTree = BUILT_PRODUCTS_DIR; };
+               C61E2DA312F3661900FC9BCA /* c_start.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = c_start.c; sourceTree = "<group>"; };
+               C61E2DA412F3661900FC9BCA /* c_stop.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = c_stop.c; sourceTree = "<group>"; };
+               C61E2DA512F3661900FC9BCA /* cplus_start.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cplus_start.c; sourceTree = "<group>"; };
+               C61E2DA612F3661900FC9BCA /* cplus_stop.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cplus_stop.c; sourceTree = "<group>"; };
+               C61E2DA712F3661900FC9BCA /* README */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = README; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+               C61E2D6E12F360A200FC9BCA /* Frameworks */ = {
+                       isa = PBXFrameworksBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               C61E2D8F12F3642100FC9BCA /* Frameworks */ = {
+                       isa = PBXFrameworksBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               C61E2D9B12F364C100FC9BCA /* Frameworks */ = {
+                       isa = PBXFrameworksBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+               C61E2D5E12F3605700FC9BCA = {
+                       isa = PBXGroup;
+                       children = (
+                               C61E2DA312F3661900FC9BCA /* c_start.c */,
+                               C61E2DA412F3661900FC9BCA /* c_stop.c */,
+                               C61E2DA512F3661900FC9BCA /* cplus_start.c */,
+                               C61E2DA612F3661900FC9BCA /* cplus_stop.c */,
+                               C61E2DA712F3661900FC9BCA /* README */,
+                               C61E2D7812F360A200FC9BCA /* libkmodtest */,
+                               C61E2D7512F360A200FC9BCA /* Frameworks */,
+                               C61E2D7412F360A200FC9BCA /* Products */,
+                       );
+                       sourceTree = "<group>";
+               };
+               C61E2D7412F360A200FC9BCA /* Products */ = {
+                       isa = PBXGroup;
+                       children = (
+                               C61E2D7312F360A200FC9BCA /* libkmodtest.kext */,
+                               C61E2D9212F3642100FC9BCA /* libkmod.a */,
+                               C61E2D9E12F364C100FC9BCA /* libkmodc++.a */,
+                       );
+                       name = Products;
+                       sourceTree = "<group>";
+               };
+               C61E2D7512F360A200FC9BCA /* Frameworks */ = {
+                       isa = PBXGroup;
+                       children = (
+                               C61E2D7612F360A200FC9BCA /* Other Frameworks */,
+                       );
+                       name = Frameworks;
+                       sourceTree = "<group>";
+               };
+               C61E2D7612F360A200FC9BCA /* Other Frameworks */ = {
+                       isa = PBXGroup;
+                       children = (
+                               C61E2D7712F360A200FC9BCA /* Kernel.framework */,
+                       );
+                       name = "Other Frameworks";
+                       sourceTree = "<group>";
+               };
+               C61E2D7812F360A200FC9BCA /* libkmodtest */ = {
+                       isa = PBXGroup;
+                       children = (
+                               C61E2D7F12F360A200FC9BCA /* libkmodtest.h */,
+                               C61E2D8112F360A200FC9BCA /* libkmodtest.cpp */,
+                               C61E2D7912F360A200FC9BCA /* Supporting Files */,
+                       );
+                       path = libkmodtest;
+                       sourceTree = "<group>";
+               };
+               C61E2D7912F360A200FC9BCA /* Supporting Files */ = {
+                       isa = PBXGroup;
+                       children = (
+                               C61E2D7A12F360A200FC9BCA /* libkmodtest-Info.plist */,
+                       );
+                       name = "Supporting Files";
+                       sourceTree = "<group>";
+               };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+               C61E2D6F12F360A200FC9BCA /* Headers */ = {
+                       isa = PBXHeadersBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                               C61E2D8012F360A200FC9BCA /* libkmodtest.h in Headers */,
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               C61E2D9012F3642100FC9BCA /* Headers */ = {
+                       isa = PBXHeadersBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               C61E2D9C12F364C100FC9BCA /* Headers */ = {
+                       isa = PBXHeadersBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+               C61E2D7212F360A200FC9BCA /* libkmodtest */ = {
+                       isa = PBXNativeTarget;
+                       buildConfigurationList = C61E2D8512F360A200FC9BCA /* Build configuration list for PBXNativeTarget "libkmodtest" */;
+                       buildPhases = (
+                               C61E2D6D12F360A200FC9BCA /* Sources */,
+                               C61E2D6E12F360A200FC9BCA /* Frameworks */,
+                               C61E2D6F12F360A200FC9BCA /* Headers */,
+                               C61E2D7012F360A200FC9BCA /* Resources */,
+                               C61E2D7112F360A200FC9BCA /* Rez */,
+                       );
+                       buildRules = (
+                       );
+                       dependencies = (
+                               C61E2DB212F36AC700FC9BCA /* PBXTargetDependency */,
+                               C61E2DB412F36ACB00FC9BCA /* PBXTargetDependency */,
+                       );
+                       name = libkmodtest;
+                       productName = libkmodtest;
+                       productReference = C61E2D7312F360A200FC9BCA /* libkmodtest.kext */;
+                       productType = "com.apple.product-type.kernel-extension";
+               };
+               C61E2D9112F3642100FC9BCA /* libkmod */ = {
+                       isa = PBXNativeTarget;
+                       buildConfigurationList = C61E2D9312F3642100FC9BCA /* Build configuration list for PBXNativeTarget "libkmod" */;
+                       buildPhases = (
+                               C61E2D8E12F3642100FC9BCA /* Sources */,
+                               C61E2D8F12F3642100FC9BCA /* Frameworks */,
+                               C61E2D9012F3642100FC9BCA /* Headers */,
+                       );
+                       buildRules = (
+                       );
+                       dependencies = (
+                       );
+                       name = libkmod;
+                       productName = libkmod;
+                       productReference = C61E2D9212F3642100FC9BCA /* libkmod.a */;
+                       productType = "com.apple.product-type.library.static";
+               };
+               C61E2D9D12F364C100FC9BCA /* libkmodc++ */ = {
+                       isa = PBXNativeTarget;
+                       buildConfigurationList = C61E2D9F12F364C100FC9BCA /* Build configuration list for PBXNativeTarget "libkmodc++" */;
+                       buildPhases = (
+                               C61E2D9A12F364C100FC9BCA /* Sources */,
+                               C61E2D9B12F364C100FC9BCA /* Frameworks */,
+                               C61E2D9C12F364C100FC9BCA /* Headers */,
+                       );
+                       buildRules = (
+                       );
+                       dependencies = (
+                       );
+                       name = "libkmodc++";
+                       productName = "libkmodc++";
+                       productReference = C61E2D9E12F364C100FC9BCA /* libkmodc++.a */;
+                       productType = "com.apple.product-type.library.static";
+               };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+               C61E2D6012F3605700FC9BCA /* Project object */ = {
+                       isa = PBXProject;
+                       attributes = {
+                               LastUpgradeCheck = 0420;
+                       };
+                       buildConfigurationList = C61E2D6312F3605700FC9BCA /* Build configuration list for PBXProject "libkmod" */;
+                       compatibilityVersion = "Xcode 3.2";
+                       developmentRegion = English;
+                       hasScannedForEncodings = 0;
+                       knownRegions = (
+                               en,
+                       );
+                       mainGroup = C61E2D5E12F3605700FC9BCA;
+                       productRefGroup = C61E2D7412F360A200FC9BCA /* Products */;
+                       projectDirPath = "";
+                       projectRoot = "";
+                       targets = (
+                               C61E2D9512F3647000FC9BCA /* All */,
+                               C61E2D9112F3642100FC9BCA /* libkmod */,
+                               C61E2D9D12F364C100FC9BCA /* libkmodc++ */,
+                               C61E2D7212F360A200FC9BCA /* libkmodtest */,
+                       );
+               };
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+               C61E2D7012F360A200FC9BCA /* Resources */ = {
+                       isa = PBXResourcesBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                               C61E2DAC12F3661900FC9BCA /* README in Resources */,
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXRezBuildPhase section */
+               C61E2D7112F360A200FC9BCA /* Rez */ = {
+                       isa = PBXRezBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXRezBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+               C61E2D6D12F360A200FC9BCA /* Sources */ = {
+                       isa = PBXSourcesBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                               C61E2D8212F360A200FC9BCA /* libkmodtest.cpp in Sources */,
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               C61E2D8E12F3642100FC9BCA /* Sources */ = {
+                       isa = PBXSourcesBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                               C61E2DAD12F3672F00FC9BCA /* c_start.c in Sources */,
+                               C61E2DAE12F3672F00FC9BCA /* c_stop.c in Sources */,
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               C61E2D9A12F364C100FC9BCA /* Sources */ = {
+                       isa = PBXSourcesBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                               C61E2DAF12F3673A00FC9BCA /* cplus_start.c in Sources */,
+                               C61E2DB012F3673A00FC9BCA /* cplus_stop.c in Sources */,
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+               C61E2D9912F364A800FC9BCA /* PBXTargetDependency */ = {
+                       isa = PBXTargetDependency;
+                       target = C61E2D9112F3642100FC9BCA /* libkmod */;
+                       targetProxy = C61E2D9812F364A800FC9BCA /* PBXContainerItemProxy */;
+               };
+               C61E2DA212F3650100FC9BCA /* PBXTargetDependency */ = {
+                       isa = PBXTargetDependency;
+                       target = C61E2D9D12F364C100FC9BCA /* libkmodc++ */;
+                       targetProxy = C61E2DA112F3650100FC9BCA /* PBXContainerItemProxy */;
+               };
+               C61E2DB212F36AC700FC9BCA /* PBXTargetDependency */ = {
+                       isa = PBXTargetDependency;
+                       target = C61E2D9112F3642100FC9BCA /* libkmod */;
+                       targetProxy = C61E2DB112F36AC700FC9BCA /* PBXContainerItemProxy */;
+               };
+               C61E2DB412F36ACB00FC9BCA /* PBXTargetDependency */ = {
+                       isa = PBXTargetDependency;
+                       target = C61E2D9D12F364C100FC9BCA /* libkmodc++ */;
+                       targetProxy = C61E2DB312F36ACB00FC9BCA /* PBXContainerItemProxy */;
+               };
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+               C61E2D6612F3605700FC9BCA /* Release */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
+                       };
+                       name = Release;
+               };
+               C61E2D8412F360A200FC9BCA /* Release */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               CURRENT_PROJECT_VERSION = 1.0.0d1;
+                               DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+                               GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+                               GCC_WARN_ABOUT_RETURN_TYPE = YES;
+                               GCC_WARN_UNUSED_VARIABLE = YES;
+                               INFOPLIST_FILE = "libkmodtest/libkmodtest-Info.plist";
+                               MODULE_NAME = com.apple.driver.libkmodtest;
+                               MODULE_VERSION = 1.0.0d1;
+                               PRODUCT_NAME = "$(TARGET_NAME)";
+                               WRAPPER_EXTENSION = kext;
+                       };
+                       name = Release;
+               };
+               C61E2D9412F3642100FC9BCA /* Release */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               COPY_PHASE_STRIP = YES;
+                               EXECUTABLE_PREFIX = lib;
+                               GCC_ENABLE_BUILTIN_FUNCTIONS = NO;
+                               GCC_ENABLE_KERNEL_DEVELOPMENT = YES;
+                               GCC_PREPROCESSOR_DEFINITIONS = (
+                                       KERNEL,
+                                       KERNEL_PRIVATE,
+                                       DRIVER_PRIVATE,
+                                       APPLE,
+                                       NeXT,
+                               );
+                               GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
+                               GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+                               GCC_USE_STANDARD_INCLUDE_SEARCHING = NO;
+                               GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+                               GCC_WARN_ABOUT_RETURN_TYPE = YES;
+                               GCC_WARN_UNUSED_VARIABLE = YES;
+                               HEADER_SEARCH_PATHS = (
+                                       /System/Library/Frameworks/Kernel.framework/PrivateHeaders,
+                                       /System/Library/Frameworks/Kernel.framework/Headers,
+                               );
+                               INSTALL_PATH = /usr/lib;
+                               PRODUCT_NAME = kmod;
+                       };
+                       name = Release;
+               };
+               C61E2D9712F3647000FC9BCA /* Release */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               PRODUCT_NAME = "$(TARGET_NAME)";
+                       };
+                       name = Release;
+               };
+               C61E2DA012F364C100FC9BCA /* Release */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               COPY_PHASE_STRIP = YES;
+                               EXECUTABLE_PREFIX = lib;
+                               GCC_ENABLE_BUILTIN_FUNCTIONS = NO;
+                               GCC_ENABLE_KERNEL_DEVELOPMENT = YES;
+                               GCC_PREPROCESSOR_DEFINITIONS = (
+                                       KERNEL,
+                                       KERNEL_PRIVATE,
+                                       DRIVER_PRIVATE,
+                                       APPLE,
+                                       NeXT,
+                               );
+                               GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
+                               GCC_TREAT_WARNINGS_AS_ERRORS = YES;
+                               GCC_USE_STANDARD_INCLUDE_SEARCHING = NO;
+                               GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+                               GCC_WARN_ABOUT_RETURN_TYPE = YES;
+                               GCC_WARN_UNUSED_VARIABLE = YES;
+                               HEADER_SEARCH_PATHS = (
+                                       /System/Library/Frameworks/Kernel.framework/PrivateHeaders,
+                                       /System/Library/Frameworks/Kernel.framework/Headers,
+                               );
+                               INSTALL_PATH = /usr/lib;
+                               PRODUCT_NAME = "kmodc++";
+                       };
+                       name = Release;
+               };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+               C61E2D6312F3605700FC9BCA /* Build configuration list for PBXProject "libkmod" */ = {
+                       isa = XCConfigurationList;
+                       buildConfigurations = (
+                               C61E2D6612F3605700FC9BCA /* Release */,
+                       );
+                       defaultConfigurationIsVisible = 0;
+                       defaultConfigurationName = Release;
+               };
+               C61E2D8512F360A200FC9BCA /* Build configuration list for PBXNativeTarget "libkmodtest" */ = {
+                       isa = XCConfigurationList;
+                       buildConfigurations = (
+                               C61E2D8412F360A200FC9BCA /* Release */,
+                       );
+                       defaultConfigurationIsVisible = 0;
+                       defaultConfigurationName = Release;
+               };
+               C61E2D9312F3642100FC9BCA /* Build configuration list for PBXNativeTarget "libkmod" */ = {
+                       isa = XCConfigurationList;
+                       buildConfigurations = (
+                               C61E2D9412F3642100FC9BCA /* Release */,
+                       );
+                       defaultConfigurationIsVisible = 0;
+                       defaultConfigurationName = Release;
+               };
+               C61E2D9612F3647000FC9BCA /* Build configuration list for PBXAggregateTarget "All" */ = {
+                       isa = XCConfigurationList;
+                       buildConfigurations = (
+                               C61E2D9712F3647000FC9BCA /* Release */,
+                       );
+                       defaultConfigurationIsVisible = 0;
+                       defaultConfigurationName = Release;
+               };
+               C61E2D9F12F364C100FC9BCA /* Build configuration list for PBXNativeTarget "libkmodc++" */ = {
+                       isa = XCConfigurationList;
+                       buildConfigurations = (
+                               C61E2DA012F364C100FC9BCA /* Release */,
+                       );
+                       defaultConfigurationIsVisible = 0;
+                       defaultConfigurationName = Release;
+               };
+/* End XCConfigurationList section */
+       };
+       rootObject = C61E2D6012F3605700FC9BCA /* Project object */;
+}
diff --git a/libkern/kmod/libkmodtest/libkmodtest-Info.plist b/libkern/kmod/libkmodtest/libkmodtest-Info.plist
new file mode 100644 (file)
index 0000000..7092cb6
--- /dev/null
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>${EXECUTABLE_NAME}</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.apple.driver.${PRODUCT_NAME:rfc1034identifier}</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundleName</key>
+       <string>${PRODUCT_NAME}</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleShortVersionString</key>
+       <string>1.0</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1</string>
+       <key>IOKitPersonalities</key>
+       <dict>
+               <key>testlibkmod</key>
+               <dict>
+                       <key>CFBundleIdentifier</key>
+                       <string>com.apple.driver.${PRODUCT_NAME:rfc1034identifier}</string>
+                       <key>IOClass</key>
+                       <string>testlibkmod</string>
+                       <key>IOMatchCategory</key>
+                       <string>testlibkmod</string>
+                       <key>IOProviderClass</key>
+                       <string>IOResources</string>
+                       <key>IOResourceMatch</key>
+                       <string>IOKit</string>
+               </dict>
+       </dict>
+       <key>NSHumanReadableCopyright</key>
+       <string>Copyright Â© 2011 Apple, Inc. All rights reserved.</string>
+       <key>OSBundleLibraries</key>
+       <dict>
+               <key>com.apple.kpi.iokit</key>
+               <string>11.0</string>
+               <key>com.apple.kpi.libkern</key>
+               <string>11.0</string>
+       </dict>
+</dict>
+</plist>
diff --git a/libkern/kmod/libkmodtest/libkmodtest.cpp b/libkern/kmod/libkmodtest/libkmodtest.cpp
new file mode 100644 (file)
index 0000000..6886cd3
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include "libkmodtest.h"
+
+#define super IOService
+OSDefineMetaClassAndStructors(testlibkmod, super);
+
+IOService *
+testlibkmod::probe(
+                                                 IOService *provider, 
+                                                 SInt32 *score )
+{
+       IOLog("%s\n", __PRETTY_FUNCTION__);
+       return NULL;
+}
diff --git a/libkern/kmod/libkmodtest/libkmodtest.h b/libkern/kmod/libkmodtest/libkmodtest.h
new file mode 100644 (file)
index 0000000..cd0eb44
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2011 Apple, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <IOKit/IOService.h>
+#include <IOKit/IOLib.h>
+
+class testlibkmod : public IOService {
+       OSDeclareDefaultStructors(testlibkmod);
+    
+       virtual IOService * probe(
+                                                         IOService *provider, 
+                                                         SInt32 *score );
+    
+};
\ No newline at end of file
index 0e962487be5b1296a6038207e0d16f434bbe3e3e..8c322053ce1b6eb687617dada8a2b85cfe01a0bd 100644 (file)
@@ -18,10 +18,12 @@ ifndef SYMROOT
 SYMROOT=./BUILD/sym
 endif
 ifdef SRCROOT
+EXTHDRSRC=$(SRCROOT)/EXTERNAL_HEADERS
 HDRSRC=$(SRCROOT)/libkern/libkern
 OBJSRC=$(SRCROOT)/libkern/kxld
 else
 SRCROOT=.
+EXTHDRSRC=$(SRCROOT)/../../EXTERNAL_HEADERS
 HDRSRC=$(SRCROOT)/../libkern
 OBJSRC=$(SRCROOT)
 ROOT=BUILD
@@ -29,9 +31,12 @@ endif
 ifdef RC_CFLAGS
 ARCHS=$(addprefix -arch , $(RC_ARCHS))
 else
-ARCHS=-arch i386 -arch x86_64 -arch ppc
+ARCHS=-arch i386 -arch x86_64
 RC_CFLAGS=$(ARCHS) -pipe
 endif
+ifdef INSTALL_LOCATION
+override DSTROOT := $(DSTROOT)/$(INSTALL_LOCATION)
+endif
 
 PRODUCT_TYPE ?= DYLIB
 
@@ -60,7 +65,7 @@ CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \
 LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLD_INSTALLNAME) \
        -compatibility_version $(COMPATIBILITY_VERSION) \
        -current_version $(CURRENT_VERSION) -lstdc++
-INCLUDES=-I$(HDRSRC)
+INCLUDES=-I$(HDRSRC) -I$(EXTHDRSRC)
 
 ifneq ($(SDKROOT),/)
        CFLAGS += -isysroot $(SDKROOT)
@@ -68,22 +73,18 @@ ifneq ($(SDKROOT),/)
 endif
 
 # Tools
-CC = xcrun -sdk $(SDKROOT) cc
+CC = xcrun -sdk $(SDKROOT) clang
 CLANG_ANALYZER = clang --analyze
 LIBTOOL = xcrun -sdk $(SDKROOT) libtool
 STRIP = xcrun -sdk $(SDKROOT) strip
 
-# Turn on -Wno-cast-align for arm since it won't build without it
-ifeq ($(findstring arm, $(ARCHS)),arm)
-CFLAGS+=-Wno-cast-align 
-endif
-
 # Files
 HDR_NAMES=kxld.h kxld_types.h WKdm.h
 OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_demangle.o kxld_dict.o \
-       kxld_kext.o kxld_object.o kxld_reloc.o kxld_sect.o kxld_seg.o           \
-       kxld_sym.o kxld_symtab.o kxld_util.o kxld_uuid.o kxld_vtable.o \
-       WKdmCompress.o WKdmDecompress.o
+       kxld_kext.o kxld_object.o kxld_reloc.o kxld_sect.o kxld_seg.o      \
+       kxld_srcversion.o kxld_sym.o kxld_symtab.o kxld_util.o kxld_uuid.o \
+    kxld_vtable.o kxld_versionmin.o WKdmCompress.o WKdmDecompress.o
+
 HDRS=$(addprefix $(HDRSRC)/, $(HDR_NAMES))
 OBJS=$(addprefix $(OBJROOT)/, $(OBJ_NAMES))
 
@@ -99,7 +100,7 @@ $(OBJROOT)/%.o : $(TESTSRC)/%.c
 SRCROOTESC=$(subst /,\/,$(SRCROOT))
 OBJROOTESC=$(subst /,\/,$(OBJROOT))
 SEDOBJS=sed -E 's/(^[a-z_]+)\.o/$(OBJROOTESC)\/\1\.o $(OBJROOTESC)\/\1\.d/'
-SEDSRCS=sed -E 's/([a-z_]+\.[ch])/$(SRCROOTESC)\/\1/g'
+SEDSRCS=sed -E 's/ ([a-z_]+\.[ch])/ $(SRCROOTESC)\/\1/g'
 $(OBJROOT)/%.d: $(OBJSRC)/%.c
        @set -e; rm -f $@; \
        $(CC) $(INCLUDES) -MM $< | $(SEDOBJS) | $(SEDSRCS) > $@;
index db2c5c05b291f38b1177e2071efdfc3c11e10aae..5109015c9393c98c5871abf846633da3045b64b6 100644 (file)
@@ -150,7 +150,7 @@ WKdm_compress (WK_word* src_buf,
       */
      dict_location =
        (WK_word *)
-       (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word));
+       ((void*) (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)));
 
      dict_word = *dict_location;
 
@@ -232,7 +232,7 @@ WKdm_compress (WK_word* src_buf,
 #endif
 
   boundary_tmp = WK_pack_2bits(tempTagsArray,
-                              (WK_word *) next_tag,
+                              (WK_word *) ((void *) next_tag),
                               dest_buf + HEADER_SIZE_IN_WORDS);
 
 #ifdef WK_DEBUG  
index 8921ae0e92ef5c8f296e4a6665859f2c7c9f6f7a..8eaf78bd89421667b76f461a3b6ad6f9b3dff9f3 100644 (file)
@@ -262,7 +262,7 @@ WKdm_decompress (WK_word* src_buf,
             WK_word missed_word = *(next_full_word++);
             WK_word *dict_location = 
               (WK_word *)
-              (((char *) dictionary) + HASH_TO_DICT_BYTE_OFFSET(missed_word));
+              ((void *) (((char *) dictionary) + HASH_TO_DICT_BYTE_OFFSET(missed_word)));
             *dict_location = missed_word;
             *next_output = missed_word;
             break;
index ada1cf3cfc9b3b8c68016ddebad5321092a4c166..da3fbec7d23a92947ea5cf2e9ccbc3a6cb8f1b56 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2008, 2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -354,8 +354,9 @@ init_kext_objects(KXLDContext *context, u_char *file, u_long size,
      * export its symbols by name by value.  If it's indirect, just export the
      * C++ symbols by value.
      */
-    for (i = 0; i < ndependencies; ++i) { kext =
-        kxld_array_get_item(&context->dependencies, i); kext_object = NULL;
+    for (i = 0; i < ndependencies; ++i) {
+        kext = kxld_array_get_item(&context->dependencies, i);
+        kext_object = NULL;
         interface_object = NULL;
 
         kext_object = get_object_for_file(context, dependencies[i].kext,
@@ -432,7 +433,7 @@ get_object_for_file(KXLDContext *context, u_char *file, u_long size,
 
         if (!kxld_object_get_file(object)) {
             result = kxld_object_init_from_macho(object, file, size, name,
-                context->section_order, context->cputype, context->cpusubtype);
+                context->section_order, context->cputype, context->cpusubtype, context->flags);
             require_noerr(result, finish);
 
             rval = object;
@@ -480,6 +481,8 @@ allocate_kext(KXLDContext *context, void *callback_data,
         *linked_object_alloc_out = linked_object;
     }
 
+    kxld_kext_set_linked_object_size(context->kext, vmsize);
+    
     /* Zero out the memory before we fill it.  We fill this buffer in a
      * sparse fashion, and it's simpler to clear it now rather than
      * track and zero any pieces we didn't touch after we've written
index b2be1535ae629e103cc7f5f4c87f14a2ea86c90e..a9ef4779820b2b6d4e41abb83d73f5ba4a761878 100644 (file)
@@ -26,7 +26,6 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <string.h>
-#include <mach/machine.h>
 #include <mach/vm_param.h>
 #include <mach/vm_types.h>
 #include <mach/kmod.h>
@@ -60,7 +59,6 @@
 #include "kxld_seg.h"
 #include "kxld_symtab.h"
 #include "kxld_util.h"
-#include "kxld_uuid.h"
 #include "kxld_vtable.h"
 
 struct symtab_command;
@@ -185,9 +183,9 @@ kxld_kext_deinit(KXLDKext *kext)
 *******************************************************************************/
 kern_return_t 
 kxld_kext_export_symbols(const KXLDKext *kext, 
-    struct kxld_dict *defined_symbols_by_name,
-    struct kxld_dict *obsolete_symbols_by_name,
-    struct kxld_dict *defined_cxx_symbols_by_value)
+    KXLDDict *defined_symbols_by_name,
+    KXLDDict *obsolete_symbols_by_name,
+    KXLDDict *defined_cxx_symbols_by_value)
 {
     kern_return_t rval = KERN_FAILURE;
 
@@ -364,7 +362,16 @@ kxld_kext_get_vmsize(const KXLDKext *kext,
 {
     (void) kxld_object_get_vmsize(kext->kext, header_size, vmsize);
 }
-    
+
+/*******************************************************************************
+ *******************************************************************************/
+void 
+kxld_kext_set_linked_object_size(KXLDKext *kext, u_long vmsize)
+{
+    (void) kxld_object_set_linked_object_size(kext->kext, vmsize);
+}
+
+
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t 
index f2b80c0f6d13e60b4fb1c98056dd9033a09bdefd..58e9326845d20462f5ed3fc997e9a19363974260 100644 (file)
@@ -28,7 +28,6 @@
 #ifndef _KXLD_KEXT_H_
 #define _KXLD_KEXT_H_
 
-#include <mach/machine.h>
 #include <sys/types.h>
 #if KERNEL
     #include <libkern/kxld_types.h>
@@ -76,7 +75,10 @@ kern_return_t kxld_kext_export_symbols(const KXLDKext *kext,
 void kxld_kext_get_vmsize(const KXLDKext *kext, 
     u_long *header_size, u_long *vmsize)
     __attribute__((nonnull, visibility("hidden")));
-    
+
+void kxld_kext_set_linked_object_size(KXLDKext *kext, u_long vmsize)
+__attribute__((nonnull, visibility("hidden")));
+
 kern_return_t kxld_kext_export_linked_object(const KXLDKext *kext,
     u_char *linked_object, kxld_addr_t *kmod_info)
     __attribute__((nonnull, visibility("hidden")));
index 24b589912caac042594360057051c91c50ad1f32..752518b7a20b9f38a8c3354fd66db017ed1764fe 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2009, 2011-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <string.h>
-
-#include <mach-o/loader.h>
-#include <mach-o/nlist.h>
-#include <mach-o/reloc.h>
 #include <sys/types.h>
 
 #if KERNEL
     #include <libkern/kernel_mach_header.h>
+    #include <mach/machine.h>
     #include <mach/vm_param.h>
     #include <mach-o/fat.h>
 #else /* !KERNEL */
+    /* Get machine.h from the kernel source so we can support all platforms
+     * that the kernel supports. Otherwise we're at the mercy of the host.
+     */
+    #include "../../osfmk/mach/machine.h"
+
     #include <architecture/byte_order.h>
     #include <mach/mach_init.h>
     #include <mach-o/arch.h>
     #include <mach-o/swap.h>
 #endif /* KERNEL */
 
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+#include <mach-o/reloc.h>
+
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
 #include "kxld_reloc.h"
 #include "kxld_sect.h"
 #include "kxld_seg.h"
+#include "kxld_srcversion.h"
 #include "kxld_symtab.h"
 #include "kxld_util.h"
 #include "kxld_uuid.h"
+#include "kxld_versionmin.h"
 #include "kxld_vtable.h"
 
 #include "kxld_object.h"
@@ -75,9 +83,12 @@ struct kxld_object {
     KXLDArray locrelocs;
     KXLDRelocator relocator;
     KXLDuuid uuid;
+    KXLDversionmin versionmin;
+    KXLDsrcversion srcversion;
     KXLDSymtab *symtab;
     struct dysymtab_command *dysymtab_hdr;
     kxld_addr_t link_addr;
+    u_long    output_buffer_size;
     boolean_t is_kernel;
     boolean_t is_final_image;
     boolean_t is_linked;
@@ -85,6 +96,9 @@ struct kxld_object {
 #if KXLD_USER_OR_OBJECT
     KXLDArray *section_order;
 #endif
+#if KXLD_PIC_KEXTS
+    boolean_t include_kaslr_relocs;
+#endif
 #if !KERNEL
     enum NXByteOrder host_order;
     enum NXByteOrder target_order;
@@ -129,6 +143,11 @@ static kern_return_t init_from_object(KXLDObject *object);
 static kern_return_t process_relocs_from_sections(KXLDObject *object);
 #endif /* KXLD_USER_OR_OBJECT */
 
+#if KXLD_PIC_KEXTS
+static boolean_t target_supports_slideable_kexts(const KXLDObject *object);
+#endif  /* KXLD_PIC_KEXTS */
+
+
 static kern_return_t export_macho_header(const KXLDObject *object, u_char *buf, 
     u_int ncmds, u_long *header_offset, u_long header_size);
 #if KXLD_USER_OR_ILP32
@@ -183,7 +202,7 @@ kxld_object_sizeof(void)
 kern_return_t 
 kxld_object_init_from_macho(KXLDObject *object, u_char *file, u_long size,
     const char *name, KXLDArray *section_order __unused, 
-    cpu_type_t cputype, cpu_subtype_t cpusubtype)
+    cpu_type_t cputype, cpu_subtype_t cpusubtype, KXLDFlags flags __unused)
 {
     kern_return_t       rval    = KERN_FAILURE;
     KXLDSeg           * seg     = NULL;
@@ -198,6 +217,10 @@ kxld_object_init_from_macho(KXLDObject *object, u_char *file, u_long size,
 #if KXLD_USER_OR_OBJECT
     object->section_order = section_order;
 #endif
+#if KXLD_PIC_KEXTS
+    object->include_kaslr_relocs = ((flags & kKXLDFlagIncludeRelocs) == kKXLDFlagIncludeRelocs);
+#endif
+    
     /* Find the local architecture */
 
     rval = get_target_machine_info(object, cputype, cpusubtype);
@@ -231,10 +254,10 @@ kxld_object_init_from_macho(KXLDObject *object, u_char *file, u_long size,
      */
 
     if (kxld_object_is_32_bit(object)) {
-        struct mach_header *mach_hdr = (struct mach_header *) object->file;
+        struct mach_header *mach_hdr = (struct mach_header *) ((void *) object->file);
         object->filetype = mach_hdr->filetype;
     } else {
-        struct mach_header_64 *mach_hdr = (struct mach_header_64 *) object->file;
+        struct mach_header_64 *mach_hdr = (struct mach_header_64 *) ((void *) object->file);
         object->filetype = mach_hdr->filetype;
     }
 
@@ -273,7 +296,12 @@ kxld_object_init_from_macho(KXLDObject *object, u_char *file, u_long size,
         seg = kxld_object_get_seg_by_name(object, SEG_LINKEDIT);
         if (seg) {
             (void) kxld_seg_populate_linkedit(seg, object->symtab,
-                kxld_object_is_32_bit(object));
+                kxld_object_is_32_bit(object)
+#if KXLD_PIC_KEXTS
+                , &object->locrelocs, &object->extrelocs,
+                target_supports_slideable_kexts(object)
+#endif
+                );
         }
     }
 
@@ -344,9 +372,6 @@ get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused,
         case CPU_TYPE_I386:
             object->cpusubtype = CPU_SUBTYPE_I386_ALL;
             break;
-        case CPU_TYPE_POWERPC:
-            object->cpusubtype = CPU_SUBTYPE_POWERPC_ALL;
-            break;
         case CPU_TYPE_X86_64:
             object->cpusubtype = CPU_SUBTYPE_X86_64_ALL;
             break;
@@ -368,9 +393,6 @@ get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused,
     case CPU_TYPE_X86_64:
         object->target_order = NX_LittleEndian;
         break;
-    case CPU_TYPE_POWERPC:
-        object->target_order = NX_BigEndian;
-        break;
     default:
         rval = KERN_NOT_SUPPORTED;
         kxld_log(kKxldLogLinking, kKxldLogErr, 
@@ -393,7 +415,7 @@ get_macho_slice_for_arch(KXLDObject *object, u_char *file, u_long size)
     kern_return_t rval = KERN_FAILURE;
     struct mach_header *mach_hdr = NULL;
 #if !KERNEL
-    struct fat_header *fat = (struct fat_header *) file;
+    struct fat_header *fat = (struct fat_header *) ((void *) file);
     struct fat_arch *archs = (struct fat_arch *) &fat[1];
     boolean_t swap = FALSE;
 #endif /* KERNEL */
@@ -462,7 +484,7 @@ get_macho_slice_for_arch(KXLDObject *object, u_char *file, u_long size)
     }
     require_noerr(rval, finish);
 
-    mach_hdr = (struct mach_header *) object->file;
+    mach_hdr = (struct mach_header *) ((void *) object->file);
     require_action(object->cputype == mach_hdr->cputype, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
@@ -484,6 +506,8 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
     struct load_command *cmd_hdr = NULL;
     struct symtab_command *symtab_hdr = NULL;
     struct uuid_command *uuid_hdr = NULL;
+    struct version_min_command *versionmin_hdr = NULL;
+    struct source_version_command *source_version_hdr = NULL;
     u_long base_offset = 0;
     u_long offset = 0;
     u_long sect_offset = 0;
@@ -504,7 +528,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
 
     offset = base_offset;
     for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
-        cmd_hdr = (struct load_command *) (object->file + offset);
+        cmd_hdr = (struct load_command *) ((void *) (object->file + offset));
 
         switch(cmd_hdr->cmd) {
 #if KXLD_USER_OR_ILP32
@@ -525,7 +549,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
         case LC_SEGMENT_64:
             {
                 struct segment_command_64 *seg_hdr = 
-                    (struct segment_command_64 *) cmd_hdr;
+                    (struct segment_command_64 *) ((void *) cmd_hdr);
 
                 /* Ignore segments with no vm size */
                 if (!seg_hdr->vmsize) continue;
@@ -554,7 +578,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
 
     offset = base_offset;
     for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
-        cmd_hdr = (struct load_command *) (object->file + offset); 
+        cmd_hdr = (struct load_command *) ((void *) (object->file + offset)); 
         seg = NULL;
 
         switch(cmd_hdr->cmd) {
@@ -580,7 +604,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
         case LC_SEGMENT_64:
             {
                 struct segment_command_64 *seg_hdr = 
-                    (struct segment_command_64 *) cmd_hdr;
+                    (struct segment_command_64 *) ((void *) cmd_hdr);
 
                 /* Ignore segments with no vm size */
                 if (!seg_hdr->vmsize) continue;
@@ -601,26 +625,44 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
             uuid_hdr = (struct uuid_command *) cmd_hdr;
             kxld_uuid_init_from_macho(&object->uuid, uuid_hdr);
             break;
+        case LC_VERSION_MIN_MACOSX:
+        case LC_VERSION_MIN_IPHONEOS:
+            versionmin_hdr = (struct version_min_command *) cmd_hdr;
+            kxld_versionmin_init_from_macho(&object->versionmin, versionmin_hdr);
+            break;
+        case LC_SOURCE_VERSION:
+            source_version_hdr = (struct source_version_command *) (void *) cmd_hdr;
+            kxld_srcversion_init_from_macho(&object->srcversion, source_version_hdr);
+            break;
         case LC_DYSYMTAB:
             object->dysymtab_hdr = (struct dysymtab_command *) cmd_hdr;            
 
             rval = kxld_reloc_create_macho(&object->extrelocs, &object->relocator,
-                (struct relocation_info *) (object->file + object->dysymtab_hdr->extreloff), 
+                (struct relocation_info *) ((void *) (object->file + object->dysymtab_hdr->extreloff)), 
                 object->dysymtab_hdr->nextrel);
             require_noerr(rval, finish);
 
             rval = kxld_reloc_create_macho(&object->locrelocs, &object->relocator,
-                (struct relocation_info *) (object->file + object->dysymtab_hdr->locreloff), 
+                (struct relocation_info *) ((void *) (object->file + object->dysymtab_hdr->locreloff)), 
                 object->dysymtab_hdr->nlocrel);
             require_noerr(rval, finish);
 
             break;
         case LC_UNIXTHREAD:
-            /* Don't need to do anything with UNIXTHREAD for the kernel */
+        case LC_MAIN:
+            /* Don't need to do anything with UNIXTHREAD or MAIN for the kernel */
             require_action(kxld_object_is_kernel(object), 
                 finish, rval=KERN_FAILURE;
                 kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                    "LC_UNIXTHREAD segment is not valid in a kext."));
+                    "LC_UNIXTHREAD/LC_MAIN segment is not valid in a kext."));
+            break;
+        case LC_CODE_SIGNATURE:
+        case LC_DYLD_INFO:
+        case LC_DYLD_INFO_ONLY:
+        case LC_FUNCTION_STARTS:
+        case LC_DATA_IN_CODE:
+        case LC_DYLIB_CODE_SIGN_DRS:
+            /* Various metadata that might be stored in the linkedit segment */
             break;
         default:
             rval=KERN_FAILURE;
@@ -695,7 +737,7 @@ init_from_execute(KXLDObject *object)
         kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO));
 #endif
 
-       KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval,
+    KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval,
         kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64,
         object->symtab, symtab_hdr, object->file, kernel_linkedit_seg);
     require_noerr(rval, finish);
@@ -736,11 +778,9 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 static boolean_t
-target_supports_bundle(const KXLDObject *object)
+target_supports_bundle(const KXLDObject *object __unused)
 {
-    return (object->cputype == CPU_TYPE_I386    ||
-            object->cputype == CPU_TYPE_X86_64  ||
-            object->cputype == CPU_TYPE_ARM);
+    return TRUE;
 }
 
 /*******************************************************************************
@@ -782,9 +822,7 @@ finish:
 *******************************************************************************/
 static boolean_t target_supports_object(const KXLDObject *object)
 {
-    return (object->cputype == CPU_TYPE_POWERPC ||
-            object->cputype == CPU_TYPE_I386    ||
-            object->cputype == CPU_TYPE_ARM);
+    return (object->cputype == CPU_TYPE_I386);
 }
 
 /*******************************************************************************
@@ -825,7 +863,7 @@ init_from_object(KXLDObject *object)
      */
 
     for (; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
-        cmd_hdr = (struct load_command *) (object->file + offset);
+        cmd_hdr = (struct load_command *) ((void *) (object->file + offset));
 
         switch(cmd_hdr->cmd) {
 #if KXLD_USER_OR_ILP32
@@ -861,7 +899,7 @@ init_from_object(KXLDObject *object)
         case LC_SEGMENT_64:
             {
                 struct segment_command_64 *seg_hdr =
-                    (struct segment_command_64 *) cmd_hdr;
+                    (struct segment_command_64 *) ((void *) cmd_hdr);
 
                 /* Ignore segments with no vm size */
                 if (!seg_hdr->vmsize) continue;
@@ -900,8 +938,21 @@ init_from_object(KXLDObject *object)
             kxld_uuid_init_from_macho(&object->uuid, uuid_hdr);
             break;
         case LC_UNIXTHREAD:
-            /* Don't need to do anything with UNIXTHREAD */
+        case LC_MAIN:
+            /* Don't need to do anything with UNIXTHREAD or MAIN */
             break;
+        case LC_CODE_SIGNATURE:
+        case LC_DYLD_INFO:
+        case LC_DYLD_INFO_ONLY:
+        case LC_FUNCTION_STARTS:
+        case LC_DATA_IN_CODE:
+        case LC_DYLIB_CODE_SIGN_DRS:
+            /* Various metadata that might be stored in the linkedit segment */
+            break;
+        case LC_VERSION_MIN_MACOSX:
+        case LC_VERSION_MIN_IPHONEOS:
+        case LC_SOURCE_VERSION:
+            /* Not supported for object files, fall through */
         default:
             rval = KERN_FAILURE;
             kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
@@ -964,7 +1015,7 @@ finish:
 static u_long
 get_macho_cmd_data_32(u_char *file, u_long offset, u_int *filetype, u_int *ncmds)
 {
-    struct mach_header *mach_hdr = (struct mach_header *) (file + offset);
+    struct mach_header *mach_hdr = (struct mach_header *) ((void *) (file + offset));
 
     if (filetype) *filetype = mach_hdr->filetype;
     if (ncmds) *ncmds = mach_hdr->ncmds;
@@ -980,7 +1031,7 @@ get_macho_cmd_data_32(u_char *file, u_long offset, u_int *filetype, u_int *ncmds
 static u_long
 get_macho_cmd_data_64(u_char *file, u_long offset, u_int *filetype,  u_int *ncmds)
 {
-    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) (file + offset);
+    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) ((void *) (file + offset));
 
     if (filetype) *filetype = mach_hdr->filetype;
     if (ncmds) *ncmds = mach_hdr->ncmds;
@@ -997,28 +1048,39 @@ get_macho_header_size(const KXLDObject *object)
     KXLDSeg *seg = NULL;
     u_long header_size = 0;
     u_int i = 0;
+    boolean_t   object_is_32_bit = kxld_object_is_32_bit(object);
 
     check(object);
 
     /* Mach, segment, symtab, and UUID headers */
 
-    if (kxld_object_is_32_bit(object)) {
-        header_size += sizeof(struct mach_header);
-    } else {
-        header_size += sizeof(struct mach_header_64);
-    }
+    header_size += object_is_32_bit ? sizeof(struct mach_header) : sizeof(struct mach_header_64);
 
     for (i = 0; i < object->segs.nitems; ++i) {
         seg = kxld_array_get_item(&object->segs, i);
-        header_size += kxld_seg_get_macho_header_size(seg, kxld_object_is_32_bit(object));
+        header_size += kxld_seg_get_macho_header_size(seg, object_is_32_bit);
     }
 
     header_size += kxld_symtab_get_macho_header_size();
 
+#if KXLD_PIC_KEXTS
+    if (target_supports_slideable_kexts(object)) {
+        header_size += kxld_reloc_get_macho_header_size();
+    }
+#endif /* KXLD_PIC_KEXTS */
+
     if (object->uuid.has_uuid) {
         header_size += kxld_uuid_get_macho_header_size();
     }
 
+    if (object->versionmin.has_versionmin) {
+        header_size += kxld_versionmin_get_macho_header_size();
+    }
+
+    if (object->srcversion.has_srcversion) {
+        header_size += kxld_srcversion_get_macho_header_size();
+    }
+    
     return header_size;
 }
 
@@ -1033,11 +1095,48 @@ get_macho_data_size(const KXLDObject *object)
 
     check(object);
 
+    /* total all segment vmsize values */
     for (i = 0; i < object->segs.nitems; ++i) {
         seg = kxld_array_get_item(&object->segs, i);
         data_size += (u_long) kxld_seg_get_vmsize(seg);
     }
 
+#if KXLD_PIC_KEXTS
+    {
+        /* ensure that when we eventually emit the final linked object, 
+         * appending the __DYSYMTAB data after the __LINKEDIT data will
+         * not overflow the space allocated for the __LINKEDIT segment
+         */
+        
+        u_long  seg_vmsize = 0;
+        u_long  symtab_size = 0;
+        u_long  reloc_size = 0;
+        
+        /* get current __LINKEDIT sizes */
+        seg = kxld_object_get_seg_by_name(object, SEG_LINKEDIT);
+        seg_vmsize = (u_long) kxld_seg_get_vmsize(seg);
+        
+        /* get size of symbol table data that will eventually be dumped
+         * into the __LINKEDIT segment
+         */
+        symtab_size = kxld_symtab_get_macho_data_size(object->symtab, kxld_object_is_32_bit(object));
+        
+        if (target_supports_slideable_kexts(object)) {
+            /* get size of __DYSYMTAB relocation entries */
+            reloc_size = kxld_reloc_get_macho_data_size(&object->locrelocs, &object->extrelocs);
+        }
+        
+        /* combine, and ensure they'll both fit within the page(s)
+         * allocated for the __LINKEDIT segment. If they'd overflow,
+         * increase the vmsize appropriately so no overflow will occur
+         */
+        if ((symtab_size + reloc_size) > seg_vmsize) {
+            u_long  overflow = (symtab_size + reloc_size) - seg_vmsize;
+            data_size += round_page(overflow);
+        }
+    }
+#endif  // KXLD_PIC_KEXTS
+
     return data_size;
 }
 
@@ -1395,7 +1494,7 @@ set_is_object_linked(KXLDObject *object)
     }
 
     if (object->is_final_image) {
-        object->is_linked = !object->extrelocs.nitems && !object->locrelocs.nitems;
+        object->is_linked = !object->extrelocs.nitems;
         return;
     }
 
@@ -1442,6 +1541,8 @@ void kxld_object_clear(KXLDObject *object __unused)
     kxld_array_reset(&object->locrelocs);
     kxld_relocator_clear(&object->relocator);
     kxld_uuid_clear(&object->uuid);
+    kxld_versionmin_clear(&object->versionmin);
+    kxld_srcversion_clear(&object->srcversion);
 
     if (object->symtab) kxld_symtab_clear(object->symtab);
 
@@ -1570,8 +1671,7 @@ kxld_object_target_supports_strict_patching(const KXLDObject *object)
 {
     check(object);
 
-    return (object->cputype != CPU_TYPE_I386 && 
-            object->cputype != CPU_TYPE_POWERPC);
+    return (object->cputype != CPU_TYPE_I386);
 }
 
 /*******************************************************************************
@@ -1581,8 +1681,7 @@ kxld_object_target_supports_common_symbols(const KXLDObject *object)
 {
     check(object);
 
-    return (object->cputype == CPU_TYPE_I386 || 
-            object->cputype == CPU_TYPE_POWERPC);
+    return (object->cputype == CPU_TYPE_I386);
 }
 
 /*******************************************************************************
@@ -1605,6 +1704,15 @@ kxld_object_get_vmsize(const KXLDObject *object, u_long *header_size,
 
 }
 
+/*******************************************************************************
+ *******************************************************************************/
+void
+kxld_object_set_linked_object_size(KXLDObject *object, u_long vmsize)
+{
+    object->output_buffer_size = vmsize;       /* cache this for use later */
+    return;
+}
+
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t 
@@ -1619,6 +1727,7 @@ kxld_object_export_linked_object(const KXLDObject *object,
     u_long data_offset = 0;
     u_int ncmds = 0;
     u_int i = 0;
+    boolean_t   is_32bit_object = kxld_object_is_32_bit(object);
 
     check(object);
     check(linked_object);
@@ -1627,36 +1736,74 @@ kxld_object_export_linked_object(const KXLDObject *object,
 
     header_size = get_macho_header_size(object);
     data_offset = (object->is_final_image) ? header_size : round_page(header_size);
-    size = data_offset + get_macho_data_size(object);
+    size = object->output_buffer_size;
 
     /* Copy data to the file */
 
-    ncmds = object->segs.nitems + (object->uuid.has_uuid == TRUE) + 1 /* linkedit */;
+    ncmds = object->segs.nitems + 1 /* LC_SYMTAB */;
 
-    rval = export_macho_header(object, linked_object, ncmds, 
-        &header_offset, header_size);
+#if KXLD_PIC_KEXTS
+    /* don't write out a DYSYMTAB segment for targets that can't digest it
+     */
+    if (target_supports_slideable_kexts(object)) {
+        ncmds++; /* dysymtab */
+    }
+#endif /* KXLD_PIC_KEXTS */
+
+    if (object->uuid.has_uuid == TRUE) {
+        ncmds++;
+    }
+
+    if (object->versionmin.has_versionmin == TRUE) {
+        ncmds++;
+    }
+
+    if (object->srcversion.has_srcversion == TRUE) {
+        ncmds++;
+    }
+    
+    rval = export_macho_header(object, linked_object, ncmds, &header_offset, header_size);
     require_noerr(rval, finish);
 
     for (i = 0; i < object->segs.nitems; ++i) {
         seg = kxld_array_get_item(&object->segs, i);
 
-        rval = kxld_seg_export_macho_to_vm(seg, linked_object, &header_offset, 
-            header_size, size, object->link_addr, kxld_object_is_32_bit(object));
+        rval = kxld_seg_export_macho_to_vm(seg, linked_object, &header_offset,
+            header_size, size, object->link_addr, is_32bit_object);
         require_noerr(rval, finish);
     }
 
     seg = kxld_object_get_seg_by_name(object, SEG_LINKEDIT);
     data_offset = (u_long) (seg->link_addr - object->link_addr);
+    
     rval = kxld_symtab_export_macho(object->symtab, linked_object, &header_offset,
-        header_size, &data_offset, size, kxld_object_is_32_bit(object));
+        header_size, &data_offset, size, is_32bit_object);
     require_noerr(rval, finish);
 
+#if KXLD_PIC_KEXTS
+    if (target_supports_slideable_kexts(object)) {
+        rval = kxld_reloc_export_macho(&object->relocator, &object->locrelocs,
+            &object->extrelocs, linked_object, &header_offset, header_size,
+            &data_offset, size);
+        require_noerr(rval, finish);
+    }
+#endif /* KXLD_PIC_KEXTS */
+
     if (object->uuid.has_uuid) {
-        rval = kxld_uuid_export_macho(&object->uuid, linked_object, 
-            &header_offset, header_size);
+        rval = kxld_uuid_export_macho(&object->uuid, linked_object, &header_offset, header_size);
+        require_noerr(rval, finish);
+    }
+
+    if (object->versionmin.has_versionmin) {
+        rval = kxld_versionmin_export_macho(&object->versionmin, linked_object, &header_offset, header_size);
         require_noerr(rval, finish);
     }
 
+    if (object->srcversion.has_srcversion) {
+        rval = kxld_srcversion_export_macho(&object->srcversion, linked_object, &header_offset, header_size);
+        require_noerr(rval, finish);
+    }
+    
 #if !KERNEL
     unswap_macho(linked_object, object->host_order, object->target_order);
 #endif /* KERNEL */
@@ -1706,7 +1853,7 @@ export_macho_header_32(const KXLDObject *object, u_char *buf, u_int ncmds,
 
     require_action(sizeof(*mach) <= header_size - *header_offset, finish,
         rval=KERN_FAILURE);
-    mach = (struct mach_header *) (buf + *header_offset);
+    mach = (struct mach_header *) ((void *) (buf + *header_offset));
 
     mach->magic = MH_MAGIC;
     mach->cputype = object->cputype;
@@ -1741,7 +1888,7 @@ export_macho_header_64(const KXLDObject *object, u_char *buf, u_int ncmds,
     
     require_action(sizeof(*mach) <= header_size - *header_offset, finish,
         rval=KERN_FAILURE);
-    mach = (struct mach_header_64 *) (buf + *header_offset);
+    mach = (struct mach_header_64 *) ((void *) (buf + *header_offset));
     
     mach->magic = MH_MAGIC_64;
     mach->cputype = object->cputype;
@@ -1965,17 +2112,11 @@ process_symbol_pointers(KXLDObject *object)
      */
 
     sect = kxld_object_get_sect_by_name(object, SEG_DATA, SECT_SYM_PTRS);
-    if (!sect) {
+    if (!sect || !(sect->flags & S_NON_LAZY_SYMBOL_POINTERS)) {
         rval = KERN_SUCCESS;
         goto finish;
     }
 
-    require_action(sect->flags & S_NON_LAZY_SYMBOL_POINTERS,
-        finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO 
-            "Section %s,%s does not have S_NON_LAZY_SYMBOL_POINTERS flag.",
-            SEG_DATA, SECT_SYM_PTRS));
-
     /* Calculate the table offset and number of entries in the section */
 
     if (kxld_object_is_32_bit(object)) {
@@ -1989,7 +2130,8 @@ process_symbol_pointers(KXLDObject *object)
 
     require_action(firstsym + nsyms <= object->dysymtab_hdr->nindirectsyms,
         finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO));
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+            "firstsym + nsyms > object->dysymtab_hdr->nindirectsyms"));
 
     /* Iterate through the indirect symbol table and fill in the section of
      * symbol pointers.  There are three cases:
@@ -2001,7 +2143,7 @@ process_symbol_pointers(KXLDObject *object)
      *      action is required.
      */
 
-    symidx = (int32_t *) (object->file + object->dysymtab_hdr->indirectsymoff);
+    symidx = (int32_t *) ((void *) (object->file + object->dysymtab_hdr->indirectsymoff));
     symidx += firstsym;
     symptr = sect->data;
     for (i = 0; i < nsyms; ++i, ++symidx, symptr+=symptrsize) {
@@ -2088,10 +2230,10 @@ static void
 add_to_ptr(u_char *symptr, kxld_addr_t val, boolean_t is_32_bit)
 {
     if (is_32_bit) {
-        uint32_t *ptr = (uint32_t *) symptr;
+        uint32_t *ptr = (uint32_t *) ((void *) symptr);
         *ptr += (uint32_t) val;
     } else {
-        uint64_t *ptr = (uint64_t *) symptr;
+        uint64_t *ptr = (uint64_t *) ((void *) symptr);
         *ptr += (uint64_t) val;
     }
 }
@@ -2146,7 +2288,7 @@ populate_kmod_info(KXLDObject *object)
  
     kmodsect = kxld_array_get_item(&object->sects, kmodsym->sectnum);
     kmod_offset = (u_long) (kmodsym->base_addr -  kmodsect->base_addr);
-    kmod_info = (kmod_info_t *) (kmodsect->data + kmod_offset);
+    kmod_info = (kmod_info_t *) ((void *) (kmodsect->data + kmod_offset));
 
     if (kxld_object_is_32_bit(object)) {
         kmod_info_32_v1_t *kmod = (kmod_info_32_v1_t *) (kmod_info);
@@ -2183,3 +2325,16 @@ finish:
     return rval;
 }
 
+#if KXLD_PIC_KEXTS
+/*******************************************************************************
+ *******************************************************************************/
+static boolean_t
+target_supports_slideable_kexts(const KXLDObject *object)
+{
+    check(object);
+
+    return (   object->cputype != CPU_TYPE_I386
+            && object->include_kaslr_relocs
+           );
+}
+#endif  /* KXLD_PIC_KEXTS */
index 5b6b5064d61f1cb3dc292370ce1daf0155f3f0bd..ab78f200e367547e8aab005b763e2cfa0c751beb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2009, 2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -28,7 +28,6 @@
 #ifndef _KXLD_OBJECT_H_
 #define _KXLD_OBJECT_H_
 
-#include <mach/machine.h>
 #include <sys/types.h>
 #if KERNEL
     #include <libkern/kxld_types.h>
@@ -56,7 +55,7 @@ size_t kxld_object_sizeof(void)
 kern_return_t kxld_object_init_from_macho(KXLDObject *object, 
     u_char *file, u_long size, const char *name,
     struct kxld_array *section_order,
-    cpu_type_t cputype, cpu_subtype_t cpusubtype)
+    cpu_type_t cputype, cpu_subtype_t cpusubtype, KXLDFlags flags)
     __attribute__((nonnull(1,2,4) visibility("hidden")));
 
 void kxld_object_clear(KXLDObject *object)
@@ -121,6 +120,9 @@ void kxld_object_get_vmsize(const KXLDObject *object, u_long *header_size,
     u_long *vmsize)
     __attribute__((nonnull, visibility("hidden")));
 
+void kxld_object_set_linked_object_size(KXLDObject *object, u_long vmsize)
+    __attribute__((nonnull, visibility("hidden")));
+
 /* This will be the same size as kxld_kext_get_vmsize */
 kern_return_t kxld_object_export_linked_object(const KXLDObject *object,
     u_char *linked_object)
index 4867c8c78e41dc186299072a50ca67a16ef027ec..583b5bc5f597d5e4c972d9050621f6101971166f 100644 (file)
  */
 #include <string.h>
 #include <mach/boolean.h>
-#include <mach/machine.h>
 #include <sys/types.h>
 
 #if KERNEL
     #include <libkern/libkern.h>
+    #include <mach/machine.h>
 #else
-    #include <libkern/OSByteOrder.h>
     #include <stdlib.h>
+    #include <libkern/OSByteOrder.h>
+
+    /* Get machine.h from the kernel source so we can support all platforms
+     * that the kernel supports. Otherwise we're at the mercy of the host.
+     */
+    #include "../../osfmk/mach/machine.h"
 #endif
 
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include "kxld_util.h"
 #include "kxld_vtable.h"
 
+#if KXLD_PIC_KEXTS
+/* This will try to pull in mach/machine.h, so it has to come after the
+ * explicit include above.
+ */
+#include <mach-o/loader.h>
+#endif
+
 /* include target-specific relocation prototypes */
 #include <mach-o/reloc.h>
-#if KXLD_USER_OR_PPC
-#include <mach-o/ppc/reloc.h>
-#endif
 #if KXLD_USER_OR_X86_64
 #include <mach-o/x86_64/reloc.h>
 #endif
 #if KXLD_USER_OR_I386
 static boolean_t generic_reloc_has_pair(u_int _type) 
     __attribute__((const));
-static boolean_t generic_reloc_is_pair(u_int _type, u_int _prev_type)
+static u_int generic_reloc_get_pair_type(u_int _prev_type)
     __attribute__((const));
 static boolean_t generic_reloc_has_got(u_int _type)
     __attribute__((const));
@@ -111,23 +120,10 @@ static kern_return_t generic_process_reloc(const KXLDRelocator *relocator,
     kxld_addr_t pair_target, boolean_t swap);
 #endif /* KXLD_USER_OR_I386 */
 
-#if KXLD_USER_OR_PPC 
-static boolean_t ppc_reloc_has_pair(u_int _type) 
-    __attribute__((const));
-static boolean_t ppc_reloc_is_pair(u_int _type, u_int _prev_type) 
-    __attribute__((const));
-static boolean_t ppc_reloc_has_got(u_int _type)
-    __attribute__((const));
-static kern_return_t ppc_process_reloc(const KXLDRelocator *relocator, 
-    u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, 
-    kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, kxld_addr_t target, 
-    kxld_addr_t pair_target, boolean_t swap);
-#endif /* KXLD_USER_OR_PPC */
-
 #if KXLD_USER_OR_X86_64 
 static boolean_t x86_64_reloc_has_pair(u_int _type) 
     __attribute__((const));
-static boolean_t x86_64_reloc_is_pair(u_int _type, u_int _prev_type) 
+static u_int x86_64_reloc_get_pair_type(u_int _prev_type) 
     __attribute__((const));
 static boolean_t x86_64_reloc_has_got(u_int _type)
     __attribute__((const));
@@ -142,7 +138,7 @@ static kern_return_t calculate_displacement_x86_64(uint64_t target,
 #if KXLD_USER_OR_ARM
 static boolean_t arm_reloc_has_pair(u_int _type) 
     __attribute__((const));
-static boolean_t arm_reloc_is_pair(u_int _type, u_int _prev_type) 
+static u_int arm_reloc_get_pair_type(u_int _prev_type) 
     __attribute__((const));
 static boolean_t arm_reloc_has_got(u_int _type)
     __attribute__((const));
@@ -179,6 +175,13 @@ static kern_return_t get_target_by_address_lookup(kxld_addr_t *target,
 static kern_return_t check_for_direct_pure_virtual_call(
     const KXLDRelocator *relocator, u_long offset);
 
+#if KXLD_PIC_KEXTS
+static u_long get_macho_data_size_for_array(const KXLDArray *relocs);
+
+static kern_return_t export_macho_for_array(const KXLDRelocator *relocator,
+    const KXLDArray *relocs, struct relocation_info **dstp);
+#endif /* KXLD_PIC_KEXTS */
+
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t 
@@ -189,46 +192,39 @@ kxld_relocator_init(KXLDRelocator *relocator, u_char *file,
     kern_return_t rval = KERN_FAILURE;
 
     check(relocator);
-    
+
     switch(cputype) {
 #if KXLD_USER_OR_I386
     case CPU_TYPE_I386:
         relocator->reloc_has_pair = generic_reloc_has_pair;
-        relocator->reloc_is_pair = generic_reloc_is_pair;
+        relocator->reloc_get_pair_type = generic_reloc_get_pair_type;
         relocator->reloc_has_got = generic_reloc_has_got;
         relocator->process_reloc = generic_process_reloc;
         relocator->function_align = 0;
         relocator->is_32_bit = TRUE;
+        relocator->may_scatter = TRUE;
         break;
 #endif /* KXLD_USER_OR_I386 */
-#if KXLD_USER_OR_PPC
-    case CPU_TYPE_POWERPC:
-        relocator->reloc_has_pair = ppc_reloc_has_pair;
-        relocator->reloc_is_pair = ppc_reloc_is_pair;
-        relocator->reloc_has_got = ppc_reloc_has_got;
-        relocator->process_reloc = ppc_process_reloc;
-        relocator->function_align = 0;
-        relocator->is_32_bit = TRUE;
-        break;
-#endif /* KXLD_USER_OR_PPC */
 #if KXLD_USER_OR_X86_64
     case CPU_TYPE_X86_64:
         relocator->reloc_has_pair = x86_64_reloc_has_pair;
-        relocator->reloc_is_pair = x86_64_reloc_is_pair;
+        relocator->reloc_get_pair_type = x86_64_reloc_get_pair_type;
         relocator->reloc_has_got = x86_64_reloc_has_got;
         relocator->process_reloc = x86_64_process_reloc;
         relocator->function_align = 0;
         relocator->is_32_bit = FALSE;
+        relocator->may_scatter = FALSE;
         break;
 #endif /* KXLD_USER_OR_X86_64 */
 #if KXLD_USER_OR_ARM
     case CPU_TYPE_ARM:
         relocator->reloc_has_pair = arm_reloc_has_pair;
-        relocator->reloc_is_pair = arm_reloc_is_pair;
+        relocator->reloc_get_pair_type = arm_reloc_get_pair_type;
         relocator->reloc_has_got = arm_reloc_has_got;
         relocator->process_reloc = arm_process_reloc;
         relocator->function_align = 1;
         relocator->is_32_bit = TRUE;
+        relocator->may_scatter = FALSE;
         break;
 #endif /* KXLD_USER_OR_ARM */
     default:
@@ -293,8 +289,8 @@ kxld_reloc_create_macho(KXLDArray *relocarray, const KXLDRelocator *relocator,
              * symbols.
              */
 
-            if (!(src->r_address & R_SCATTERED) && !(src->r_extern) && 
-                (R_ABS == src->r_symbolnum))
+            if (!(relocator->may_scatter && (src->r_address & R_SCATTERED)) &&
+                !(src->r_extern) && (R_ABS == src->r_symbolnum))
             {
                 continue;
             }
@@ -306,7 +302,7 @@ kxld_reloc_create_macho(KXLDArray *relocarray, const KXLDRelocator *relocator,
              *  Extern -> Symbolnum by Index
              */
             reloc = kxld_array_get_item(relocarray, reloc_index++);
-            if (src->r_address & R_SCATTERED) {
+            if (relocator->may_scatter && (src->r_address & R_SCATTERED)) {
                 reloc->address = scatsrc->r_address;
                 reloc->pcrel = scatsrc->r_pcrel;
                 reloc->length = scatsrc->r_length;
@@ -337,16 +333,18 @@ kxld_reloc_create_macho(KXLDArray *relocarray, const KXLDRelocator *relocator,
                 src = srcs + i;
                 scatsrc = (const struct scattered_relocation_info *) src;
                  
-                if (src->r_address & R_SCATTERED) {
-                    require_action(relocator->reloc_is_pair(
-                        scatsrc->r_type, reloc->reloc_type), 
+                if (relocator->may_scatter && (src->r_address & R_SCATTERED)) {
+                    require_action(relocator->reloc_get_pair_type(
+                        reloc->reloc_type) == scatsrc->r_type,
                         finish, rval=KERN_FAILURE);
+                    reloc->pair_address= scatsrc->r_address;
                     reloc->pair_target = scatsrc->r_value;
                     reloc->pair_target_type = KXLD_TARGET_LOOKUP;
                 } else {
-                    require_action(relocator->reloc_is_pair(src->r_type, 
-                        reloc->reloc_type), finish, rval=KERN_FAILURE);
-
+                    require_action(relocator->reloc_get_pair_type(
+                        reloc->reloc_type) == scatsrc->r_type,
+                        finish, rval=KERN_FAILURE);
+                    reloc->pair_address = scatsrc->r_address;
                     if (src->r_extern) {
                         reloc->pair_target = src->r_symbolnum;
                         reloc->pair_target_type = KXLD_TARGET_SYMBOLNUM;
@@ -384,7 +382,6 @@ count_relocatable_relocs(const KXLDRelocator *relocator,
 {
     u_int num_nonpair_relocs = 0;
     u_int i = 0;
-    u_int prev_type = 0;
     const struct relocation_info *reloc = NULL;
     const struct scattered_relocation_info *sreloc = NULL;
 
@@ -394,7 +391,6 @@ count_relocatable_relocs(const KXLDRelocator *relocator,
     /* Loop over all of the relocation entries */
 
     num_nonpair_relocs = 1;
-    prev_type = relocs->r_type;
     for (i = 1; i < nrelocs; ++i) {
         reloc = relocs + i;
 
@@ -405,18 +401,14 @@ count_relocatable_relocs(const KXLDRelocator *relocator,
             sreloc = (const struct scattered_relocation_info *) reloc;
 
             num_nonpair_relocs += 
-                (!relocator->reloc_is_pair(sreloc->r_type, prev_type));
-
-            prev_type = sreloc->r_type;
+                !relocator->reloc_has_pair(sreloc->r_type);
         } else {
             /* A normal relocation entry is relocatable if it is not a pair and
              * if it is not a section-based relocation for an absolute symbol.
              */
             num_nonpair_relocs += 
-                !(relocator->reloc_is_pair(reloc->r_type, prev_type)
+                !(relocator->reloc_has_pair(reloc->r_type)
                  || (0 == reloc->r_extern && R_ABS == reloc->r_symbolnum));
-
-            prev_type = reloc->r_type;
         }
 
     }
@@ -444,13 +436,13 @@ kxld_relocator_has_pair(const KXLDRelocator *relocator, u_int r_type)
 
 /*******************************************************************************
 *******************************************************************************/
-boolean_
-kxld_relocator_is_pair(const KXLDRelocator *relocator, u_int r_type, 
+u_in
+kxld_relocator_get_pair_type(const KXLDRelocator *relocator,
     u_int prev_r_type)
 {
     check(relocator);
 
-    return relocator->reloc_is_pair(r_type, prev_r_type);
+    return relocator->reloc_get_pair_type(prev_r_type);
 }
 
 /*******************************************************************************
@@ -538,6 +530,81 @@ finish:
     return reloc;
 }
 
+#if KXLD_PIC_KEXTS
+/*******************************************************************************
+*******************************************************************************/
+u_long
+kxld_reloc_get_macho_header_size()
+{
+    return sizeof(struct dysymtab_command);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+u_long
+kxld_reloc_get_macho_data_size(const KXLDArray *locrelocs,
+    const KXLDArray *extrelocs)
+{
+    u_long    rval = 0;
+
+    rval += get_macho_data_size_for_array(locrelocs);
+    rval += get_macho_data_size_for_array(extrelocs);
+
+    return (rval);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t
+kxld_reloc_export_macho(const KXLDRelocator *relocator,
+    const KXLDArray *locrelocs, const KXLDArray *extrelocs,
+    u_char *buf, u_long *header_offset, u_long header_size,
+    u_long *data_offset, u_long size)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct dysymtab_command *dysymtabhdr = NULL;
+    struct relocation_info *start = NULL;
+    struct relocation_info *dst = NULL;
+    u_long count = 0;
+    u_long data_size = 0;
+
+    check(locrelocs);
+    check(extrelocs);
+    check(buf);
+    check(header_offset);
+    check(data_offset);
+
+    require_action(sizeof(*dysymtabhdr) <= header_size - *header_offset, finish, rval=KERN_FAILURE);
+    dysymtabhdr = (struct dysymtab_command *) ((void *) (buf + *header_offset));
+    *header_offset += sizeof(*dysymtabhdr);
+
+    data_size = kxld_reloc_get_macho_data_size(locrelocs, extrelocs);
+    require_action((*data_offset + data_size) <= size, finish, rval=KERN_FAILURE);
+    
+    start = dst = (struct relocation_info *) ((void *) (buf + *data_offset));
+
+    rval = export_macho_for_array(relocator, locrelocs, &dst);
+    require_noerr(rval, finish);
+    
+    rval = export_macho_for_array(relocator, extrelocs, &dst);
+    require_noerr(rval, finish);
+
+    count = dst - start;
+
+    memset(dysymtabhdr, 0, sizeof(*dysymtabhdr));
+    dysymtabhdr->cmd = LC_DYSYMTAB;
+    dysymtabhdr->cmdsize = (uint32_t) sizeof(*dysymtabhdr);
+    dysymtabhdr->locreloff = (uint32_t) *data_offset;
+    dysymtabhdr->nlocrel = (uint32_t) count;
+    
+    *data_offset += count * sizeof(struct relocation_info);
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+#endif /* KXLD_PIC_KEXTS */
+
 /*******************************************************************************
 *******************************************************************************/
 kxld_addr_t
@@ -564,7 +631,7 @@ get_pointer_at_addr_32(const KXLDRelocator *relocator,
     
     check(relocator);
 
-    addr = *(const uint32_t *) (data + offset);
+    addr = *(const uint32_t *) ((void *) (data + offset));
 #if !KERNEL
     if (relocator->swap) {
         addr = OSSwapInt32(addr);
@@ -586,7 +653,7 @@ get_pointer_at_addr_64(const KXLDRelocator *relocator,
     
     check(relocator);
 
-    addr = *(const uint64_t *) (data + offset);
+    addr = *(const uint64_t *) ((void *) (data + offset));
 #if !KERNEL
     if (relocator->swap) {
         addr = OSSwapInt64(addr);
@@ -600,8 +667,7 @@ get_pointer_at_addr_64(const KXLDRelocator *relocator,
 /*******************************************************************************
 *******************************************************************************/
 void 
-kxld_relocator_set_vtables(KXLDRelocator *relocator, 
-    const struct kxld_dict *vtables)
+kxld_relocator_set_vtables(KXLDRelocator *relocator, const KXLDDict *vtables)
 {
     relocator->vtables = vtables;
 }
@@ -627,7 +693,7 @@ align_raw_function_address(const KXLDRelocator *relocator, kxld_addr_t value)
 *******************************************************************************/
 kern_return_t 
 kxld_relocator_process_sect_reloc(KXLDRelocator *relocator,
-    const KXLDReloc *reloc, const struct kxld_sect *sect)
+    const KXLDReloc *reloc, const KXLDSect *sect)
 {
     kern_return_t rval = KERN_FAILURE;
     u_char *instruction = NULL;
@@ -910,165 +976,184 @@ finish:
     return rval;
 }
 
-#if KXLD_USER_OR_I386 
+#if KXLD_PIC_KEXTS
 /*******************************************************************************
 *******************************************************************************/
-static boolean_t
-generic_reloc_has_pair(u_int _type)
+static u_long
+get_macho_data_size_for_array(const KXLDArray *relocs)
 {
-    enum reloc_type_generic type = _type;
+    const KXLDReloc *reloc = NULL;
+    u_int i = 0;
+    u_long size = 0;
 
-    return (type == GENERIC_RELOC_SECTDIFF || 
-        type == GENERIC_RELOC_LOCAL_SECTDIFF);
-}
+    check(relocs);
 
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t 
-generic_reloc_is_pair(u_int _type, u_int _prev_type __unused)
-{
-    enum reloc_type_generic type = _type;
+    for (i = 0; i < relocs->nitems; ++i) {
+        reloc = kxld_array_get_item(relocs, i);
+        if (!reloc->pcrel) {
+            size += sizeof(struct relocation_info);
+            if(reloc->pair_target_type != KXLD_TARGET_NONE) {
+                size += sizeof(struct relocation_info);
+            }
+        }
+    }
 
-    return (type == GENERIC_RELOC_PAIR);
+    return size;
 }
 
 /*******************************************************************************
 *******************************************************************************/
-static boolean_t generic_reloc_has_got(u_int _type __unused)
-{
-    return FALSE;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t 
-generic_process_reloc(const KXLDRelocator *relocator, u_char *instruction, 
-    u_int length, u_int pcrel, kxld_addr_t _base_pc, kxld_addr_t _link_pc, 
-    kxld_addr_t _link_disp __unused, u_int _type, kxld_addr_t _target, 
-    kxld_addr_t _pair_target, boolean_t swap __unused)
+static kern_return_t
+export_macho_for_array(const KXLDRelocator *relocator,
+    const KXLDArray *relocs, struct relocation_info **dstp)
 {
     kern_return_t rval = KERN_FAILURE;
-    uint32_t base_pc = (uint32_t) _base_pc;
-    uint32_t link_pc = (uint32_t) _link_pc;
-    uint32_t *instr_addr = NULL;
-    uint32_t instr_data = 0;
-    uint32_t target = (uint32_t) _target;
-    uint32_t pair_target = (uint32_t) _pair_target;
-    enum reloc_type_generic type = _type;
+    const KXLDReloc *reloc = NULL;
+    struct relocation_info *dst = NULL;
+    struct scattered_relocation_info *scatdst = NULL;
+    u_int i = 0;
 
-    check(instruction);
-    require_action(length == 2, finish, rval=KERN_FAILURE);
+    dst = *dstp;
 
-    if (pcrel) target = target + base_pc - link_pc;
-
-    instr_addr = (uint32_t *)instruction;
-    instr_data = *instr_addr;
+    for (i = 0; i < relocs->nitems; ++i) {
+        reloc = kxld_array_get_item(relocs, i);
+        scatdst = (struct scattered_relocation_info *) dst;
 
-#if !KERNEL
-    if (swap) instr_data = OSSwapInt32(instr_data);
-#endif
+        if (reloc->pcrel) {
+            continue;
+        }
 
-    rval = check_for_direct_pure_virtual_call(relocator, instr_data);
-    require_noerr(rval, finish);
+        switch (reloc->target_type) {
+        case KXLD_TARGET_LOOKUP:
+            scatdst->r_address = reloc->address;
+            scatdst->r_pcrel = reloc->pcrel;
+            scatdst->r_length = reloc->length;
+            scatdst->r_type = reloc->reloc_type;
+            scatdst->r_value = reloc->target;
+            scatdst->r_scattered = 1;
+            break;
+        case KXLD_TARGET_SECTNUM:
+            dst->r_address = reloc->address;
+            dst->r_pcrel = reloc->pcrel;
+            dst->r_length = reloc->length;
+            dst->r_type = reloc->reloc_type;
+            dst->r_symbolnum = reloc->target + 1;
+            dst->r_extern = 0;
+            break;
+        case KXLD_TARGET_SYMBOLNUM:
+           /* Assume that everything will be slid together; otherwise,
+            * there is no sensible value for the section number.
+            */
+            dst->r_address = reloc->address;
+            dst->r_pcrel = reloc->pcrel;
+            dst->r_length = reloc->length;
+            dst->r_type = reloc->reloc_type;
+            dst->r_symbolnum = 1;
+            dst->r_extern = 0;
+            break;
+        default:
+            rval = KERN_FAILURE;
+            goto finish;
+        }
 
-    switch (type) {
-    case GENERIC_RELOC_VANILLA:
-        instr_data += target;
-        break;
-    case GENERIC_RELOC_SECTDIFF:
-    case GENERIC_RELOC_LOCAL_SECTDIFF:
-        instr_data = instr_data + target - pair_target;
-        break;
-    case GENERIC_RELOC_PB_LA_PTR:
-        rval = KERN_FAILURE;
-        goto finish;
-    case GENERIC_RELOC_PAIR:
-    default:
-        rval = KERN_FAILURE;
-        goto finish;
+        ++dst;
+
+        if(reloc->pair_target_type != KXLD_TARGET_NONE) {
+            ++i;
+            require_action(i < relocs->nitems, finish, rval=KERN_FAILURE);
+            scatdst = (struct scattered_relocation_info *) dst;
+            switch (reloc->pair_target_type) {
+            case KXLD_TARGET_LOOKUP:
+                scatdst->r_address = reloc->pair_address;
+                scatdst->r_pcrel = reloc->pcrel;
+                scatdst->r_length = reloc->length;
+                scatdst->r_type = relocator->reloc_get_pair_type(reloc->reloc_type);
+                scatdst->r_value = reloc->pair_target;
+                scatdst->r_scattered = 1;
+                break;
+            case KXLD_TARGET_SECTNUM:
+                dst->r_address = reloc->pair_address;
+                dst->r_pcrel = reloc->pcrel;
+                dst->r_length = reloc->length;
+                dst->r_type = relocator->reloc_get_pair_type(reloc->reloc_type);
+                dst->r_symbolnum = reloc->pair_target + 1;
+                dst->r_extern = 0;
+                break;
+            case KXLD_TARGET_SYMBOLNUM:
+                dst->r_address = reloc->pair_address;
+                dst->r_pcrel = reloc->pcrel;
+                dst->r_length = reloc->length;
+                dst->r_type = relocator->reloc_get_pair_type(reloc->reloc_type);
+                dst->r_symbolnum = 1;
+                dst->r_extern = 0;
+                break;
+            default:
+                rval = KERN_FAILURE;
+                goto finish;
+            }
+            ++dst;
+        }
     }
 
-#if !KERNEL
-    if (swap) instr_data = OSSwapInt32(instr_data);
-#endif
-
-    *instr_addr = instr_data;
-
     rval = KERN_SUCCESS;
-
 finish:
+    *dstp = dst;
     return rval;
 }
-#endif /* KXLD_USER_OR_I386 */
+#endif /* KXLD_PIC_KEXTS */
 
-#if KXLD_USER_OR_PPC
+#if KXLD_USER_OR_I386 
 /*******************************************************************************
 *******************************************************************************/
 static boolean_t
-ppc_reloc_has_pair(u_int _type)
+generic_reloc_has_pair(u_int _type)
 {
-    enum reloc_type_ppc type = _type;
+    enum reloc_type_generic type = _type;
 
-    switch(type) {
-    case PPC_RELOC_HI16:
-    case PPC_RELOC_LO16:
-    case PPC_RELOC_HA16:
-    case PPC_RELOC_LO14:
-    case PPC_RELOC_JBSR:
-    case PPC_RELOC_SECTDIFF:
-        return TRUE;
-    default:
-        return FALSE;
-    }
+    return (type == GENERIC_RELOC_SECTDIFF || 
+        type == GENERIC_RELOC_LOCAL_SECTDIFF);
 }
 
 /*******************************************************************************
 *******************************************************************************/
-static boolean_t
-ppc_reloc_is_pair(u_int _type, u_int _prev_type __unused)
+static u_int 
+generic_reloc_get_pair_type(u_int _prev_type __unused)
 {
-    enum reloc_type_ppc type = _type;
-
-    return (type == PPC_RELOC_PAIR);
+    return GENERIC_RELOC_PAIR;
 }
 
 /*******************************************************************************
 *******************************************************************************/
-static boolean_t ppc_reloc_has_got(u_int _type __unused)
+static boolean_t generic_reloc_has_got(u_int _type __unused)
 {
     return FALSE;
 }
 
 /*******************************************************************************
 *******************************************************************************/
-static kern_return_t
-ppc_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction, 
+static kern_return_t 
+generic_process_reloc(const KXLDRelocator *relocator, u_char *instruction, 
     u_int length, u_int pcrel, kxld_addr_t _base_pc, kxld_addr_t _link_pc, 
     kxld_addr_t _link_disp __unused, u_int _type, kxld_addr_t _target, 
-    kxld_addr_t _pair_target __unused, boolean_t swap __unused)
+    kxld_addr_t _pair_target, boolean_t swap __unused)
 {
     kern_return_t rval = KERN_FAILURE;
-    uint32_t *instr_addr = NULL;
-    uint32_t instr_data = 0;
     uint32_t base_pc = (uint32_t) _base_pc;
     uint32_t link_pc = (uint32_t) _link_pc;
+    uint32_t *instr_addr = NULL;
+    uint32_t instr_data = 0;
     uint32_t target = (uint32_t) _target;
     uint32_t pair_target = (uint32_t) _pair_target;
-    int32_t addend = 0;
-    int32_t displacement = 0;
-    uint32_t difference = 0;
-    uint32_t br14_disp_sign = 0;
-    enum reloc_type_ppc type = _type;
+    enum reloc_type_generic type = _type;
 
     check(instruction);
-    require_action(length == 2 || length == 3, finish, 
-        rval=KERN_FAILURE);
+    require_action(length == 2, finish, rval=KERN_FAILURE);
 
-    if (pcrel) displacement = target + base_pc - link_pc;
+    if (pcrel) target = target + base_pc - link_pc;
 
-    instr_addr = (uint32_t *)instruction;
+    instr_addr = (uint32_t *) ((void *) instruction);
     instr_data = *instr_addr;
-    
+
 #if !KERNEL
     if (swap) instr_data = OSSwapInt32(instr_data);
 #endif
@@ -1077,100 +1162,17 @@ ppc_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction,
     require_noerr(rval, finish);
 
     switch (type) {
-    case PPC_RELOC_VANILLA:
-        require_action(!pcrel, finish, rval=KERN_FAILURE);
-
+    case GENERIC_RELOC_VANILLA:
         instr_data += target;
         break;
-    case PPC_RELOC_BR14:
-        require_action(pcrel, finish, rval=KERN_FAILURE);
-
-        addend = BR14D(instr_data);
-        displacement += SIGN_EXTEND(addend, BR14_NBITS_DISPLACEMENT);
-        difference = ABSOLUTE_VALUE(displacement);
-        require_action(difference < BR14_LIMIT, finish, 
-            rval=KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogRelocationOverflow));
-
-
-        br14_disp_sign = BIT15(instr_data);
-        instr_data = BR14I(instr_data) | BR14D(displacement);
-        
-        /* If this is a predicted conditional branch (signified by an
-         * instruction length of 3) that is not branch-always, and the sign of
-         * the displacement is different after relocation, then flip the y-bit
-         * to preserve the branch prediction
-         */
-        if ((length == 3) && 
-            IS_COND_BR_INSTR(instr_data) &&
-            IS_NOT_ALWAYS_TAKEN(instr_data) && 
-            (BIT15(instr_data) != br14_disp_sign))
-        {     
-            FLIP_PREDICT_BIT(instr_data);
-        }
-        break;
-    case PPC_RELOC_BR24:
-        require_action(pcrel, finish, rval=KERN_FAILURE);
-
-        addend = BR24D(instr_data);
-        displacement += SIGN_EXTEND(addend, BR24_NBITS_DISPLACEMENT);
-        difference = ABSOLUTE_VALUE(displacement);
-        require_action(difference < BR24_LIMIT, finish, 
-            rval=KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogRelocationOverflow));
-
-        instr_data = BR24I(instr_data) | BR24D(displacement);
-        break;
-    case PPC_RELOC_HI16:
-        require_action(!pcrel, finish, rval=KERN_FAILURE);
-
-        target += LO16S(instr_data) | LO16(pair_target);
-        instr_data = HI16(instr_data) | HI16S(target);
-        break;
-    case PPC_RELOC_LO16:
-        require_action(!pcrel, finish, rval=KERN_FAILURE);
-
-        target += LO16S(pair_target) | LO16(instr_data);
-        instr_data = HI16(instr_data) | LO16(target);
-        break;
-    case PPC_RELOC_HA16:
-        require_action(!pcrel, finish, rval=KERN_FAILURE);
-
-        instr_data -= BIT15(pair_target) ? 1 : 0;
-        target += LO16S(instr_data) | LO16(pair_target);
-        instr_data = HI16(instr_data) | HI16S(target);
-        instr_data += BIT15(target) ? 1 : 0;
-        break;
-    case PPC_RELOC_JBSR:
-        require_action(!pcrel, finish, rval=KERN_FAILURE);
-
-        /* The generated code as written branches to an island that loads the
-         * absolute address of the target.  If we can branch to the target 
-         * directly with less than 24 bits of displacement, we modify the branch
-         * instruction to do so which avoids the cost of the island.
-         */
-
-        displacement = target + pair_target - link_pc;
-        difference = ABSOLUTE_VALUE(displacement);
-        if (difference < BR24_LIMIT) {
-            instr_data = BR24I(instr_data) | BR24D(displacement);
-        }
-        break;
-    case PPC_RELOC_SECTDIFF:
-        require_action(!pcrel, finish, rval=KERN_FAILURE);
-        
+    case GENERIC_RELOC_SECTDIFF:
+    case GENERIC_RELOC_LOCAL_SECTDIFF:
         instr_data = instr_data + target - pair_target;
         break;
-    case PPC_RELOC_LO14:
-    case PPC_RELOC_PB_LA_PTR:
-    case PPC_RELOC_HI16_SECTDIFF:
-    case PPC_RELOC_LO16_SECTDIFF:
-    case PPC_RELOC_HA16_SECTDIFF:
-    case PPC_RELOC_LO14_SECTDIFF:
-    case PPC_RELOC_LOCAL_SECTDIFF:
+    case GENERIC_RELOC_PB_LA_PTR:
         rval = KERN_FAILURE;
         goto finish;
-    case PPC_RELOC_PAIR:
+    case GENERIC_RELOC_PAIR:
     default:
         rval = KERN_FAILURE;
         goto finish;
@@ -1183,11 +1185,11 @@ ppc_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction,
     *instr_addr = instr_data;
 
     rval = KERN_SUCCESS;
-finish:
 
+finish:
     return rval;
 }
-#endif /* KXLD_USER_OR_PPC */
+#endif /* KXLD_USER_OR_I386 */
 
 #if KXLD_USER_OR_X86_64
 /*******************************************************************************
@@ -1202,13 +1204,10 @@ x86_64_reloc_has_pair(u_int _type)
 
 /*******************************************************************************
 *******************************************************************************/
-static boolean_
-x86_64_reloc_is_pair(u_int _type, u_int _prev_type)
+static u_in
+x86_64_reloc_get_pair_type(u_int _prev_type __unused)
 {
-    enum reloc_type_x86_64 type = _type;
-    enum reloc_type_x86_64 prev_type = _prev_type;
-
-    return (x86_64_reloc_has_pair(prev_type) && type == X86_64_RELOC_UNSIGNED);
+    return X86_64_RELOC_UNSIGNED;
 }
 
 /*******************************************************************************
@@ -1246,7 +1245,7 @@ x86_64_process_reloc(const KXLDRelocator *relocator __unused, u_char *instructio
         finish, rval=KERN_FAILURE);
 
     if (length == 2) {
-        instr32p = (int32_t *) instruction;
+        instr32p = (int32_t *) ((void *) instruction);
         instr32 = *instr32p;
 
 #if !KERNEL
@@ -1348,7 +1347,7 @@ x86_64_process_reloc(const KXLDRelocator *relocator __unused, u_char *instructio
 
         *instr32p = instr32;
     } else {
-        instr64p = (uint64_t *) instruction;
+        instr64p = (uint64_t *) ((void *) instruction);
         instr64 = *instr64p;
 
 #if !KERNEL
@@ -1437,12 +1436,10 @@ arm_reloc_has_pair(u_int _type)
 
 /*******************************************************************************
 *******************************************************************************/
-static boolean_
-arm_reloc_is_pair(u_int _type, u_int _prev_type __unused)
+static u_in
+arm_reloc_get_pair_type(u_int _prev_type __unused)
 {
-    enum reloc_type_arm type = _type;
-
-    return (type == ARM_RELOC_PAIR);
+    return ARM_RELOC_PAIR;
 }
 
 /*******************************************************************************
@@ -1476,7 +1473,7 @@ arm_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction,
 
     if (pcrel) displacement = target + base_pc - link_pc;
 
-    instr_addr = (uint32_t *)instruction;
+    instr_addr = (uint32_t *) ((void *) instruction);
     instr_data = *instr_addr;
     
 #if !KERNEL
@@ -1535,4 +1532,3 @@ finish:
 }
 
 #endif /* KXLD_USER_OR_ARM */
-
index 40a610d1a6b2779d668d1f6eea293bff624b5cfb..695e708fd39a2e872493d26f3baacb0036576734 100644 (file)
@@ -49,7 +49,7 @@ typedef struct kxld_relocator KXLDRelocator;
 typedef struct kxld_reloc KXLDReloc;
 
 typedef boolean_t (*RelocHasPair)(u_int r_type);
-typedef boolean_t (*RelocIsPair)(u_int r_type, u_int prev_r_type);
+typedef u_int (*RelocGetPairType)(u_int prev_r_type);
 typedef boolean_t (*RelocHasGot)(u_int r_type);
 typedef kern_return_t(*ProcessReloc)(const KXLDRelocator *relocator, 
     u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, 
@@ -58,7 +58,7 @@ typedef kern_return_t(*ProcessReloc)(const KXLDRelocator *relocator,
 
 struct kxld_relocator {
     RelocHasPair reloc_has_pair;
-    RelocIsPair reloc_is_pair;
+    RelocGetPairType reloc_get_pair_type;
     RelocHasGot reloc_has_got;
     ProcessReloc process_reloc;
     const struct kxld_symtab *symtab;
@@ -69,10 +69,12 @@ struct kxld_relocator {
     u_int function_align; /* Power of two alignment of functions */
     boolean_t is_32_bit;
     boolean_t swap;
+    boolean_t may_scatter;
 };
 
 struct kxld_reloc {
     u_int address;
+    u_int pair_address;
     u_int target;
     u_int pair_target;
     u_int target_type:3;
@@ -104,7 +106,7 @@ void kxld_relocator_clear(KXLDRelocator *relocator)
 boolean_t kxld_relocator_has_pair(const KXLDRelocator *relocator, u_int r_type)
     __attribute__((pure, nonnull,visibility("hidden")));
 
-boolean_t kxld_relocator_is_pair(const KXLDRelocator *relocator, u_int r_type, 
+u_int kxld_relocator_get_pair_type(const KXLDRelocator *relocator,
     u_int last_r_type)
     __attribute__((pure, nonnull,visibility("hidden")));
 
@@ -127,6 +129,21 @@ KXLDReloc * kxld_reloc_get_reloc_by_offset(const struct kxld_array *relocs,
     kxld_addr_t offset)
     __attribute__((pure, nonnull, visibility("hidden")));
 
+#if KXLD_PIC_KEXTS
+u_long kxld_reloc_get_macho_header_size(void)
+    __attribute__((pure, visibility("hidden")));
+
+u_long kxld_reloc_get_macho_data_size(const struct kxld_array *locrelocs,
+    const struct kxld_array *extrelocs)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+kern_return_t kxld_reloc_export_macho(const KXLDRelocator *relocator,
+    const struct kxld_array *locrelocs, const struct kxld_array *extrelocs,
+    u_char *buf,  u_long *header_offset, u_long header_size,
+    u_long *data_offset, u_long size)
+    __attribute__((nonnull, visibility("hidden")));
+#endif /* KXLD_PIC_KEXTS */
+
 /*******************************************************************************
 * Modifiers
 *******************************************************************************/
index d00d6596d48ead9ddfa4cd9fbf3b94a5824b4e34..a89e3f69385c720244f0273e5bcdaaa7c43e5826 100644 (file)
@@ -58,7 +58,7 @@ kxld_sect_init_from_macho_32(KXLDSect *sect, u_char *macho, u_long *sect_offset,
     u_int sectnum, const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
-    struct section *src = (struct section *) (macho + *sect_offset);
+    struct section *src = (struct section *) ((void *) (macho + *sect_offset));
     struct relocation_info *relocs = NULL;
 
     check(sect);
@@ -82,7 +82,7 @@ kxld_sect_init_from_macho_32(KXLDSect *sect, u_char *macho, u_long *sect_offset,
         sect->data = NULL;
     }
 
-    relocs = (struct relocation_info *) (macho + src->reloff);
+    relocs = (struct relocation_info *) ((void *) (macho + src->reloff));
 
     rval = kxld_reloc_create_macho(&sect->relocs, relocator, 
         relocs, src->nreloc);
@@ -106,7 +106,7 @@ kxld_sect_init_from_macho_64(KXLDSect *sect, u_char *macho, u_long *sect_offset,
     u_int sectnum, const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
-    struct section_64 *src = (struct section_64 *) (macho + *sect_offset);
+    struct section_64 *src = (struct section_64 *) ((void *) (macho + *sect_offset));
     struct relocation_info *relocs = NULL;
 
     check(sect);
@@ -130,7 +130,7 @@ kxld_sect_init_from_macho_64(KXLDSect *sect, u_char *macho, u_long *sect_offset,
         sect->data = NULL;
     }
 
-    relocs = (struct relocation_info *) (macho + src->reloff);
+    relocs = (struct relocation_info *) ((void *) (macho + src->reloff));
 
     rval = kxld_reloc_create_macho(&sect->relocs, relocator, 
         relocs, src->nreloc);
@@ -430,11 +430,11 @@ export_macho(const KXLDSect *sect, u_char *buf, u_long offset, u_long bufsize)
     case S_LITERAL_POINTERS:
     case S_COALESCED:
     case S_16BYTE_LITERALS:
+    case S_SYMBOL_STUBS:
         memcpy(buf + offset, sect->data, (size_t)sect->size);
         break;
     case S_ZEROFILL: /* sect->data should be NULL, so we'll never get here */
     case S_LAZY_SYMBOL_POINTERS:
-    case S_SYMBOL_STUBS:
     case S_GB_ZEROFILL:
     case S_INTERPOSING:
     case S_DTRACE_DOF:
@@ -467,7 +467,7 @@ sect_export_macho_header_32(const KXLDSect *sect, u_char *buf,
     
     require_action(sizeof(*secthdr) <= header_size - *header_offset, finish,
         rval=KERN_FAILURE);
-    secthdr = (struct section *) (buf + *header_offset);
+    secthdr = (struct section *) ((void *) (buf + *header_offset));
     *header_offset += sizeof(*secthdr);
 
     /* Initalize header */
@@ -507,7 +507,7 @@ sect_export_macho_header_64(const KXLDSect *sect, u_char *buf,
     
     require_action(sizeof(*secthdr) <= header_size - *header_offset, finish,
         rval=KERN_FAILURE);
-    secthdr = (struct section_64 *) (buf + *header_offset);
+    secthdr = (struct section_64 *) ((void *) (buf + *header_offset));
     *header_offset += sizeof(*secthdr);
 
     /* Initalize header */
index 2f655b4afa58cf0b3d9b9b7d4b89741cfd4b78fb..96d0b1b358a315088d1d43b1cf1fcfc11aacd961 100644 (file)
@@ -28,7 +28,6 @@
 #ifndef _KXLD_SECT_H_
 #define _KXLD_SECT_H_
 
-#include <mach/machine.h>
 #include <sys/types.h>
 #if KERNEL
     #include <libkern/kxld_types.h>
index ba14b49170a3830d1373bcdda6eb00b86946b95e..ca3d2fb4faefcc1de2cb10b28ebda0bf42e1999c 100644 (file)
@@ -39,6 +39,7 @@
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
+#include "kxld_reloc.h"
 #include "kxld_sect.h"
 #include "kxld_seg.h"
 #include "kxld_symtab.h"
@@ -494,6 +495,8 @@ kxld_seg_get_macho_header_size(const KXLDSeg *seg, boolean_t is_32_bit)
 
 /*******************************************************************************
 *******************************************************************************/
+/* This is no longer used, but may be useful some day... */
+#if 0
 u_long
 kxld_seg_get_macho_data_size(const KXLDSeg *seg)
 {
@@ -511,6 +514,7 @@ kxld_seg_get_macho_data_size(const KXLDSeg *seg)
 
     return round_page(size);
 }
+#endif
 
 /*******************************************************************************
 *******************************************************************************/
@@ -535,9 +539,9 @@ kxld_seg_export_macho_to_file_buffer(const KXLDSeg *seg, u_char *buf,
     u_long base_data_offset = *data_offset;
     u_int i = 0;
     struct segment_command *hdr32 = 
-        (struct segment_command *) (buf + *header_offset);
+        (struct segment_command *) ((void *) (buf + *header_offset));
     struct segment_command_64 *hdr64 = 
-        (struct segment_command_64 *) (buf + *header_offset);
+        (struct segment_command_64 *) ((void *) (buf + *header_offset));
 
     check(seg);
     check(buf);
@@ -634,7 +638,7 @@ seg_export_macho_header_32(const KXLDSeg *seg, u_char *buf,
 
     require_action(sizeof(*seghdr) <= header_size - *header_offset, finish,
         rval=KERN_FAILURE);
-    seghdr = (struct segment_command *) (buf + *header_offset);
+    seghdr = (struct segment_command *) ((void *) (buf + *header_offset));
     *header_offset += sizeof(*seghdr);
 
     seghdr->cmd = LC_SEGMENT;
@@ -674,7 +678,7 @@ seg_export_macho_header_64(const KXLDSeg *seg, u_char *buf,
 
     require_action(sizeof(*seghdr) <= header_size - *header_offset, finish,
         rval=KERN_FAILURE);
-    seghdr = (struct segment_command_64 *) (buf + *header_offset);
+    seghdr = (struct segment_command_64 *) ((void *) (buf + *header_offset));
     *header_offset += sizeof(*seghdr);
 
     seghdr->cmd = LC_SEGMENT_64;
@@ -752,8 +756,7 @@ kxld_seg_finish_init(KXLDSeg *seg)
         }
 
         /* XXX Cross architecture linking will fail if the page size ever differs
-         * from 4096.  (As of this writing, we're fine on ppc, i386, x86_64, and
-         * arm.)
+         * from 4096.  (As of this writing, we're fine on i386, x86_64, and arm).
          */
         seg->vmsize = round_page(maxaddr + maxsize - seg->base_addr);
     }
@@ -807,9 +810,24 @@ kxld_seg_relocate(KXLDSeg *seg, kxld_addr_t link_addr)
 /*******************************************************************************
 *******************************************************************************/
 void 
-kxld_seg_populate_linkedit(KXLDSeg *seg,
-    const KXLDSymtab *symtab, boolean_t is_32_bit)
+kxld_seg_populate_linkedit(KXLDSeg *seg, const KXLDSymtab *symtab, boolean_t is_32_bit 
+#if KXLD_PIC_KEXTS
+    , const KXLDArray *locrelocs
+    , const KXLDArray *extrelocs
+    , boolean_t target_supports_slideable_kexts
+#endif  /* KXLD_PIC_KEXTS */
+    )
 {
-    seg->vmsize = round_page(kxld_symtab_get_macho_data_size(symtab, is_32_bit));
+    u_long size = 0;
+
+    size += kxld_symtab_get_macho_data_size(symtab, is_32_bit);
+
+#if KXLD_PIC_KEXTS
+    if (target_supports_slideable_kexts) {
+        size += kxld_reloc_get_macho_data_size(locrelocs, extrelocs);
+    }
+#endif /* KXLD_PIC_KEXTS */
+
+    seg->vmsize = round_page(size);
 }
 
index ab5abcdc6a71c056e784cd35c43ccbb093fe9263..1d863bf02b7df8b8358ad78f0f7b1ced66c176d4 100644 (file)
@@ -100,8 +100,11 @@ kxld_size_t kxld_seg_get_vmsize(const KXLDSeg *seg)
 u_long kxld_seg_get_macho_header_size(const KXLDSeg *seg, boolean_t is_32_bit)
     __attribute__((pure, nonnull, visibility("hidden")));
 
+#if 0
+/* This is no longer used, but may be useful some day... */
 u_long kxld_seg_get_macho_data_size(const KXLDSeg *seg)
     __attribute__((pure, nonnull, visibility("hidden")));
+#endif
 
 kern_return_t
 kxld_seg_export_macho_to_file_buffer(const KXLDSeg *seg, u_char *buf,
@@ -134,8 +137,14 @@ void kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections)
 void kxld_seg_relocate(KXLDSeg *seg, kxld_addr_t link_addr)
     __attribute__((nonnull, visibility("hidden")));
 
-void kxld_seg_populate_linkedit(KXLDSeg *seg,
-    const struct kxld_symtab *symtab, boolean_t is_32_bit)
+void kxld_seg_populate_linkedit(KXLDSeg *seg, const struct kxld_symtab *symtab,
+    boolean_t is_32_bit
+#if KXLD_PIC_KEXTS
+    , const struct kxld_array *locrelocs
+    , const struct kxld_array *extrelocs
+    , boolean_t target_supports_slideable_kexts
+#endif  /* KXLD_PIC_KEXTS */
+       )
     __attribute__((nonnull, visibility("hidden")));
 
 #endif /* _KXLD_SEG_H_ */
diff --git a/libkern/kxld/kxld_srcversion.c b/libkern/kxld/kxld_srcversion.c
new file mode 100644 (file)
index 0000000..c6d4462
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <string.h>
+#include <mach-o/loader.h>
+#include <sys/types.h>
+
+#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
+#include <AssertMacros.h>
+
+#include "kxld_util.h"
+#include "kxld_srcversion.h"
+
+/*******************************************************************************
+ *******************************************************************************/
+void
+kxld_srcversion_init_from_macho(KXLDsrcversion *srcversion, struct source_version_command *src)
+{
+    check(srcversion);
+    check(src);
+
+    srcversion->version = src->version;
+    srcversion->has_srcversion = TRUE;
+}
+
+/*******************************************************************************
+ *******************************************************************************/
+void
+kxld_srcversion_clear(KXLDsrcversion *srcversion)
+{
+    bzero(srcversion, sizeof(*srcversion));
+}
+
+/*******************************************************************************
+ *******************************************************************************/
+u_long
+kxld_srcversion_get_macho_header_size(void)
+{
+    return sizeof(struct source_version_command);
+}
+
+/*******************************************************************************
+ *******************************************************************************/
+kern_return_t
+kxld_srcversion_export_macho(const KXLDsrcversion *srcversion, u_char *buf, 
+                       u_long *header_offset, u_long header_size)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct source_version_command *srcversionhdr = NULL;
+
+    check(srcversion);
+    check(buf);
+    check(header_offset);
+
+    require_action(sizeof(*srcversionhdr) <= header_size - *header_offset, finish,
+                   rval=KERN_FAILURE);
+    srcversionhdr = (struct source_version_command *) ((void *) (buf + *header_offset));
+    *header_offset += sizeof(*srcversionhdr);
+
+    srcversionhdr->cmd = LC_SOURCE_VERSION;
+    srcversionhdr->cmdsize = (uint32_t) sizeof(*srcversionhdr);
+    srcversionhdr->version = srcversion->version;
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+
diff --git a/libkern/kxld/kxld_srcversion.h b/libkern/kxld/kxld_srcversion.h
new file mode 100644 (file)
index 0000000..b6cdf38
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _KXLD_SRCVERSION_H_
+#define _KXLD_SRCVERSION_H_
+
+#include <sys/types.h>
+#if KERNEL
+#include <libkern/kxld_types.h>
+#else
+#include "kxld_types.h"
+#endif
+
+struct source_version_command;
+typedef struct kxld_srcversion KXLDsrcversion;
+
+struct kxld_srcversion {
+    uint64_t    version;
+    boolean_t   has_srcversion;
+};
+
+/*******************************************************************************
+ * Constructors and destructors
+ *******************************************************************************/
+
+void kxld_srcversion_init_from_macho(KXLDsrcversion *srcversion, struct source_version_command *src)
+__attribute__((nonnull, visibility("hidden")));
+
+void kxld_srcversion_clear(KXLDsrcversion *srcversion)
+__attribute__((nonnull, visibility("hidden")));
+
+/*******************************************************************************
+ * Accessors
+ *******************************************************************************/
+
+u_long kxld_srcversion_get_macho_header_size(void)
+__attribute__((pure, visibility("hidden")));
+
+kern_return_t
+kxld_srcversion_export_macho(const KXLDsrcversion *srcversion, u_char *buf, 
+                       u_long *header_offset, u_long header_size)
+__attribute__((pure, nonnull, visibility("hidden")));
+
+#endif /* _KXLD_SRCVERSION_H_ */
index 2e9cb16e9cd671d1d6618c0267658b66d366ddd6..d82cd5cce9a884ecd6aad9ab82545e54c2612ccb 100644 (file)
@@ -856,7 +856,7 @@ kxld_sym_export_macho_32(const KXLDSym *sym, u_char *_nl, char *strtab,
     u_long *stroff, u_long strsize)
 {
     kern_return_t rval = KERN_FAILURE;
-    struct nlist *nl = (struct nlist *) _nl;
+    struct nlist *nl = (struct nlist *) ((void *) _nl);
     char *str = NULL;
     long bytes = 0;
 
@@ -897,7 +897,7 @@ kxld_sym_export_macho_64(const KXLDSym *sym, u_char *_nl, char *strtab,
     u_long *stroff, u_long strsize)
 {
     kern_return_t rval = KERN_FAILURE;
-    struct nlist_64 *nl = (struct nlist_64 *) _nl;
+    struct nlist_64 *nl = (struct nlist_64 *) ((void *) _nl);
     char *str = NULL;
     long bytes = 0;
 
index 69cb8cbf7babcc05e0aa99048ef0f5be41c4e3d9..81fe4a4ab349574beee7c157eb7e2f3cf9f19a69 100644 (file)
@@ -28,7 +28,6 @@
 #ifndef _KXLD_SYMBOL_H_
 #define _KXLD_SYMBOL_H_
 
-#include <mach/machine.h>
 #include <sys/types.h>
 #if KERNEL
     #include <libkern/kxld_types.h>
index 6700774f4021e747cc9bb0e28769d9595289fc5a..c5ce51740dce1e47ec0c8882510fda19aa865e90 100644 (file)
@@ -114,7 +114,7 @@ init_macho(KXLDSymtab *symtab, struct symtab_command *src,
     boolean_t is_32_bit __unused)
 {
     kern_return_t rval = KERN_FAILURE;
-       u_long symoff;
+    u_long symoff;
     u_char * macho_or_linkedit = macho;
 
     check(symtab);
@@ -128,7 +128,7 @@ init_macho(KXLDSymtab *symtab, struct symtab_command *src,
 
     /* Initialize the string table */
 
-       if (kernel_linkedit_seg) {
+    if (kernel_linkedit_seg) {
 
        /* If initing the kernel file in memory, we can't trust
         * the symtab offsets directly, because the kernel file has been mapped
@@ -146,13 +146,13 @@ init_macho(KXLDSymtab *symtab, struct symtab_command *src,
         * the base of the linkedit segment.
         */
 
-               symoff = (u_long)(src->symoff - kernel_linkedit_seg->fileoff);
-               symtab->strings = (char *)(uintptr_t)kernel_linkedit_seg->base_addr +
+        symoff = (u_long)(src->symoff - kernel_linkedit_seg->fileoff);
+        symtab->strings = (char *)(uintptr_t)kernel_linkedit_seg->base_addr +
             src->stroff - kernel_linkedit_seg->fileoff;
         macho_or_linkedit = (u_char *)(uintptr_t)kernel_linkedit_seg->base_addr;
-       } else {
-               symoff = (u_long)src->symoff;
-               symtab->strings = (char *) (macho + src->stroff);
+    } else {
+        symoff = (u_long)src->symoff;
+        symtab->strings = (char *) (macho + src->stroff);
     }
 
     symtab->strsize = src->strsize;
@@ -185,7 +185,7 @@ init_syms_32(KXLDSymtab *symtab, u_char *macho, u_long offset, u_int nsyms)
     kern_return_t rval = KERN_FAILURE;
     KXLDSym *sym = NULL;
     u_int i = 0;
-    struct nlist *src_syms = (struct nlist *) (macho + offset);
+    struct nlist *src_syms = (struct nlist *) ((void *) (macho + offset));
 
     for (i = 0; i < nsyms; ++i) {
         sym = kxld_array_get_item(&symtab->syms, i);
@@ -212,7 +212,7 @@ init_syms_64(KXLDSymtab *symtab, u_char *macho, u_long offset, u_int nsyms)
     kern_return_t rval = KERN_FAILURE;
     KXLDSym *sym = NULL;
     u_int i = 0;
-    struct nlist_64 *src_syms = (struct nlist_64 *) (macho + offset);
+    struct nlist_64 *src_syms = (struct nlist_64 *) ((void *) (macho + offset));
 
     for (i = 0; i < nsyms; ++i) {
         sym = kxld_array_get_item(&symtab->syms, i);
@@ -421,6 +421,8 @@ kxld_symtab_get_macho_data_size(const KXLDSymtab *symtab, boolean_t is_32_bit)
         size += nsyms * sizeof(struct nlist_64);
     }
 
+    size = (size + 7) & ~7;
+
     return size;
 }
 
@@ -448,7 +450,7 @@ kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf,
 
     require_action(sizeof(*symtabhdr) <= header_size - *header_offset, 
         finish, rval=KERN_FAILURE);
-    symtabhdr = (struct symtab_command *) (buf + *header_offset);
+    symtabhdr = (struct symtab_command *) ((void *) (buf + *header_offset));
     *header_offset += sizeof(*symtabhdr);
     
     /* Initialize the symbol table header */
@@ -501,6 +503,8 @@ kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf,
     /* Update the data offset */
     *data_offset += (symtabhdr->nsyms * nlistsize) + stroff;
 
+    *data_offset = (*data_offset + 7) & ~7;
+
     rval = KERN_SUCCESS;
     
 finish:
index a5a0387564fada5a00bac16388a51e16ac6decd8..ff4b557c55007da542ae719f37e60df59091d937 100644 (file)
@@ -28,7 +28,6 @@
 #ifndef _KXLD_SYMTAB_H_
 #define _KXLD_SYMTAB_H_
 
-#include <mach/machine.h>
 #include <sys/types.h>
 #if KERNEL
     #include <libkern/kxld_types.h>
index 2f7a10643142164d3f48371d911a4c7b08287918..67d838fe8f1ba5466a3b440c0322609a3f406927 100644 (file)
@@ -271,7 +271,7 @@ validate_and_swap_macho_32(u_char *file, u_long size
     )
 {
     kern_return_t rval = KERN_FAILURE;
-    struct mach_header *mach_hdr = (struct mach_header *) file;
+    struct mach_header *mach_hdr = (struct mach_header *) ((void *) file);
     struct load_command *load_hdr = NULL;
     struct segment_command *seg_hdr = NULL;
     struct section *sects = NULL;
@@ -325,7 +325,7 @@ validate_and_swap_macho_32(u_char *file, u_long size
     for(i = 0; i < mach_hdr->ncmds; ++i, offset += cmdsize) {
 
         /* Get the load command and size */
-        load_hdr = (struct load_command *) (file + offset);
+        load_hdr = (struct load_command *) ((void *) (file + offset));
         cmd = load_hdr->cmd;
         cmdsize = load_hdr->cmdsize;
 
@@ -382,7 +382,7 @@ validate_and_swap_macho_32(u_char *file, u_long size
                     kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
 
                 /* Swap the relocation entries */
-                relocs = (struct relocation_info *) (file + sects[j].reloff);
+                relocs = (struct relocation_info *) ((void *) (file + sects[j].reloff));
 #if !KERNEL
                 if (swap) {
                     swap_relocation_info(relocs, sects[j].nreloc, 
@@ -412,7 +412,7 @@ validate_and_swap_macho_32(u_char *file, u_long size
 
 #if !KERNEL
             /* Swap the symbol table entries */
-            symtab = (struct nlist *) (file + symtab_hdr->symoff);
+            symtab = (struct nlist *) ((void *) (file + symtab_hdr->symoff));
             if (swap) swap_nlist(symtab, symtab_hdr->nsyms, host_order);
 #endif /* !KERNEL */
 
@@ -442,7 +442,7 @@ validate_and_swap_macho_64(u_char *file, u_long size
     )
 {
     kern_return_t rval = KERN_FAILURE;
-    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) file;
+    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) ((void *) file);
     struct load_command *load_hdr = NULL;
     struct segment_command_64 *seg_hdr = NULL;
     struct section_64 *sects = NULL;
@@ -495,7 +495,7 @@ validate_and_swap_macho_64(u_char *file, u_long size
     /* Validate and potentially swap the load commands */
     for(i = 0; i < mach_hdr->ncmds; ++i, offset += cmdsize) {
         /* Get the load command and size */
-        load_hdr = (struct load_command *) (file + offset);
+        load_hdr = (struct load_command *) ((void *) (file + offset));
         cmd = load_hdr->cmd;
         cmdsize = load_hdr->cmdsize;
 
@@ -513,7 +513,7 @@ validate_and_swap_macho_64(u_char *file, u_long size
         switch(cmd) {
         case LC_SEGMENT_64:
             /* Get and swap the segment header */
-            seg_hdr = (struct segment_command_64 *) load_hdr;
+            seg_hdr = (struct segment_command_64 *) ((void *) load_hdr);
 #if !KERNEL
             if (swap) swap_segment_command_64(seg_hdr, host_order);
 #endif /* !KERNEL */
@@ -551,7 +551,7 @@ validate_and_swap_macho_64(u_char *file, u_long size
                     kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
 
                 /* Swap the relocation entries */
-                relocs = (struct relocation_info *) (file + sects[j].reloff);
+                relocs = (struct relocation_info *) ((void *) (file + sects[j].reloff));
 #if !KERNEL
                 if (swap) {
                     swap_relocation_info(relocs, sects[j].nreloc, 
@@ -581,7 +581,7 @@ validate_and_swap_macho_64(u_char *file, u_long size
 
 #if !KERNEL
             /* Swap the symbol table entries */
-            symtab = (struct nlist_64 *) (file + symtab_hdr->symoff);
+            symtab = (struct nlist_64 *) ((void *) (file + symtab_hdr->symoff));
             if (swap) swap_nlist_64(symtab, symtab_hdr->nsyms, host_order);
 #endif /* !KERNEL */
 
@@ -607,7 +607,7 @@ finish:
 void unswap_macho(u_char *file, enum NXByteOrder host_order, 
     enum NXByteOrder target_order)
 {
-    struct mach_header *hdr = (struct mach_header *) file;
+    struct mach_header *hdr = (struct mach_header *) ((void *) file);
 
     if (!hdr) return;
 
@@ -624,7 +624,7 @@ static void
 unswap_macho_32(u_char *file, enum NXByteOrder host_order, 
     enum NXByteOrder target_order)
 {
-    struct mach_header *mach_hdr = (struct mach_header *) file;
+    struct mach_header *mach_hdr = (struct mach_header *) ((void *) file);
     struct load_command *load_hdr = NULL;
     struct segment_command *seg_hdr = NULL;
     struct section *sects = NULL;
@@ -641,7 +641,7 @@ unswap_macho_32(u_char *file, enum NXByteOrder host_order,
 
     offset = sizeof(*mach_hdr);
     for(i = 0; i < mach_hdr->ncmds; ++i, offset += size) {
-        load_hdr = (struct load_command *) (file + offset);
+        load_hdr = (struct load_command *) ((void *) (file + offset));
         cmd = load_hdr->cmd;
         size = load_hdr->cmdsize;
 
@@ -659,7 +659,7 @@ unswap_macho_32(u_char *file, enum NXByteOrder host_order,
             break;
         case LC_SYMTAB:
             symtab_hdr = (struct symtab_command *) load_hdr;
-            symtab = (struct nlist*) (file + symtab_hdr->symoff);
+            symtab = (struct nlist*) ((void *) (file + symtab_hdr->symoff));
 
             swap_nlist(symtab, symtab_hdr->nsyms, target_order);
             swap_symtab_command(symtab_hdr, target_order);
@@ -680,7 +680,7 @@ static void
 unswap_macho_64(u_char *file, enum NXByteOrder host_order, 
     enum NXByteOrder target_order)
 {
-    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) file;
+    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) ((void *) file);
     struct load_command *load_hdr = NULL;
     struct segment_command_64 *seg_hdr = NULL;
     struct section_64 *sects = NULL;
@@ -697,13 +697,13 @@ unswap_macho_64(u_char *file, enum NXByteOrder host_order,
 
     offset = sizeof(*mach_hdr);
     for(i = 0; i < mach_hdr->ncmds; ++i, offset += size) {
-        load_hdr = (struct load_command *) (file + offset);
+        load_hdr = (struct load_command *) ((void *) (file + offset));
         cmd = load_hdr->cmd;
         size = load_hdr->cmdsize;
 
         switch(cmd) {
         case LC_SEGMENT_64:
-            seg_hdr = (struct segment_command_64 *) load_hdr;
+            seg_hdr = (struct segment_command_64 *) ((void *) load_hdr);
             sects = (struct section_64 *) &seg_hdr[1];
 
             /* We don't need to unswap relocations because this function is
@@ -715,7 +715,7 @@ unswap_macho_64(u_char *file, enum NXByteOrder host_order,
             break;
         case LC_SYMTAB:
             symtab_hdr = (struct symtab_command *) load_hdr;
-            symtab = (struct nlist_64 *) (file + symtab_hdr->symoff);
+            symtab = (struct nlist_64 *) ((void *) (file + symtab_hdr->symoff));
 
             swap_nlist_64(symtab, symtab_hdr->nsyms, target_order);
             swap_symtab_command(symtab_hdr, target_order);
index 9d5720f043a0377179350a39217f532515ce636d..0eb0f2f7a36987bf8273ede84a252a9157d732cb 100644 (file)
 #ifndef _KXLD_UTIL_H_
 #define _KXLD_UTIL_H_
 
-#include <mach/machine.h>
 #include <sys/types.h>
 #if KERNEL
     #include <libkern/kxld_types.h>
+    #include <mach/machine.h>
 #else
     #include <architecture/byte_order.h>
     #include "kxld_types.h"
+
+    /* Get machine.h from the kernel source so we can support all platforms
+     * that the kernel supports. Otherwise we're at the mercy of the host.
+     */
+    #include "../../osfmk/mach/machine.h"
 #endif
 
 /* 64-bit helpers */
index ce64c343e25655b6b742e5b39116d6598a06935a..66f32a0fa52c851864dde89c7914f167fa646a3c 100644 (file)
@@ -78,7 +78,7 @@ kxld_uuid_export_macho(const KXLDuuid *uuid, u_char *buf,
 
     require_action(sizeof(*uuidhdr) <= header_size - *header_offset, finish,
         rval=KERN_FAILURE);
-    uuidhdr = (struct uuid_command *) (buf + *header_offset);
+    uuidhdr = (struct uuid_command *) ((void *) (buf + *header_offset));
     *header_offset += sizeof(*uuidhdr);
 
     uuidhdr->cmd = LC_UUID;
diff --git a/libkern/kxld/kxld_versionmin.c b/libkern/kxld/kxld_versionmin.c
new file mode 100644 (file)
index 0000000..9b4753c
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <string.h>
+#include <mach-o/loader.h>
+#include <sys/types.h>
+
+#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
+#include <AssertMacros.h>
+
+#include "kxld_util.h"
+#include "kxld_versionmin.h"
+
+/*******************************************************************************
+*******************************************************************************/
+void
+kxld_versionmin_init_from_macho(KXLDversionmin *versionmin, struct version_min_command *src)
+{
+    check(versionmin);
+    check(src);
+    check((src->cmd == LC_VERSION_MIN_MACOSX) || (src->cmd == LC_VERSION_MIN_IPHONEOS));
+
+    switch (src->cmd) {
+        case LC_VERSION_MIN_MACOSX:
+            versionmin->platform = kKxldVersionMinMacOSX;
+            break;
+        case LC_VERSION_MIN_IPHONEOS:
+            versionmin->platform = kKxldVersionMiniPhoneOS;
+            break;
+    }
+
+    versionmin->version = src->version;
+    versionmin->has_versionmin = TRUE;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+void
+kxld_versionmin_clear(KXLDversionmin *versionmin)
+{
+    bzero(versionmin, sizeof(*versionmin));
+}
+
+/*******************************************************************************
+*******************************************************************************/
+u_long
+kxld_versionmin_get_macho_header_size(void)
+{
+    return sizeof(struct version_min_command);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t
+kxld_versionmin_export_macho(const KXLDversionmin *versionmin, u_char *buf, 
+    u_long *header_offset, u_long header_size)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct version_min_command *versionminhdr = NULL;
+
+    check(versionmin);
+    check(buf);
+    check(header_offset);
+
+    require_action(sizeof(*versionminhdr) <= header_size - *header_offset, finish,
+        rval=KERN_FAILURE);
+    versionminhdr = (struct version_min_command *) ((void *) (buf + *header_offset));
+    bzero(versionminhdr, sizeof(*versionminhdr));
+    *header_offset += sizeof(*versionminhdr);
+
+    switch (versionmin->platform) {
+        case kKxldVersionMinMacOSX:
+            versionminhdr->cmd = LC_VERSION_MIN_MACOSX;
+            break;
+        case kKxldVersionMiniPhoneOS:
+            versionminhdr->cmd = LC_VERSION_MIN_IPHONEOS;
+            break;
+    }
+    versionminhdr->cmdsize = (uint32_t) sizeof(*versionminhdr);
+    versionminhdr->version = versionmin->version;
+    versionminhdr->sdk = 0;
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+
diff --git a/libkern/kxld/kxld_versionmin.h b/libkern/kxld/kxld_versionmin.h
new file mode 100644 (file)
index 0000000..3ebcac6
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _KXLD_VERSIONMIN_H_
+#define _KXLD_VERSIONMIN_H_
+
+#include <sys/types.h>
+#if KERNEL
+    #include <libkern/kxld_types.h>
+#else
+    #include "kxld_types.h"
+#endif
+
+struct version_min_command;
+typedef struct kxld_versionmin KXLDversionmin;
+
+enum kxld_versionmin_platforms {
+    kKxldVersionMinMacOSX,
+    kKxldVersionMiniPhoneOS
+};
+
+struct kxld_versionmin {
+    enum kxld_versionmin_platforms platform;
+    uint32_t version;
+    boolean_t has_versionmin;
+};
+
+/*******************************************************************************
+* Constructors and destructors
+*******************************************************************************/
+
+void kxld_versionmin_init_from_macho(KXLDversionmin *versionmin, struct version_min_command *src)
+    __attribute__((nonnull, visibility("hidden")));
+
+void kxld_versionmin_clear(KXLDversionmin *versionmin)
+    __attribute__((nonnull, visibility("hidden")));
+
+/*******************************************************************************
+* Accessors
+*******************************************************************************/
+
+u_long kxld_versionmin_get_macho_header_size(void)
+    __attribute__((pure, visibility("hidden")));
+
+kern_return_t
+kxld_versionmin_export_macho(const KXLDversionmin *versionmin, u_char *buf, 
+    u_long *header_offset, u_long header_size)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+#endif /* _KXLD_VERSIONMIN_H_ */
+
index e792d3842a29105a0892cbd1d4f0f0f6ab3bd74a..24408145b3d535ab83ab0e941ae8e4917507a403 100644 (file)
@@ -495,6 +495,7 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     symtab = kxld_object_get_symtab(object);
 
     require_action(!vtable->is_patched, finish, rval=KERN_SUCCESS);
+    require_action(super_vtable->is_patched, finish, rval=KERN_FAILURE);
     require_action(vtable->entries.nitems >= super_vtable->entries.nitems, finish,
         rval=KERN_FAILURE;
         kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMalformedVTable, 
index 2d86d6882686422ad5067736823c05d2167abacb..55954e9857869b5bc7a862d76622cf46f41b0be9 100644 (file)
@@ -15,13 +15,11 @@ INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} \
         i386
 INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} \
         i386
-INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} \
-        arm
+
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
-EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
 
 DATAFILES = \
        OSAtomic.h      \
@@ -43,7 +41,7 @@ PRIVATE_DATAFILES =         \
        OSKextLibPrivate.h  \
        kext_request_keys.h \
        mkext.h             \
-       prelink.h           \
+       prelink.h           \
        WKdm.h
 
 INSTALL_MI_LIST        =       \
@@ -77,7 +75,8 @@ EXPORT_MI_LIST        =              \
        ${PRIVATE_DATAFILES}   \
        kernel_mach_header.h   \
        kxld.h                 \
-       kxld_types.h
+       kxld_types.h           \
+       stack_protector.h
 
 EXPORT_MI_GEN_LIST = version.h
 
index d585c4175d7e8bfbe997800678f966fc269719f1..98e0eb99b145b63895f10000ca9647b6a6e205be 100644 (file)
@@ -85,8 +85,6 @@ extern Boolean OSCompareAndSwap64(
 
 #endif /* defined(__i386__) || defined(__x86_64__) */
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
-
 /*!
  * @function OSAddAtomic64
  *
@@ -130,8 +128,6 @@ inline static SInt64 OSDecrementAtomic64(volatile SInt64 * address)
     return OSAddAtomic64(-1LL, address);
 }
 
-#endif  /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) */
-
 #if XNU_KERNEL_PRIVATE
 /* Not to be included in headerdoc.
  *
index 6ecc3548d7f0f7bc7c8595e6e711df5fe56fb14a..34e7544ac38c91dfef1364d60945f992383c61a5 100644 (file)
@@ -346,6 +346,15 @@ __BEGIN_DECLS
  */
 #define kIOPersonalityPublisherKey              "IOPersonalityPublisher"
 
+#if CONFIG_KEC_FIPS
+/*
+ * @define   kAppleTextHashesKey
+ * @abstract A dictionary conataining hashes for corecrypto kext.
+ */
+#define kAppleTextHashesKey                     "AppleTextHashes"
+#endif
+
+
 
 #if PRAGMA_MARK
 /********************************************************************/
@@ -916,6 +925,25 @@ extern const void * gOSKextUnresolved;
 #define OSKextSymbolIsResolved(weak_sym)        \
     (&(weak_sym) != gOSKextUnresolved)
 
+
+#if CONFIG_KEC_FIPS
+
+#if PRAGMA_MARK
+#pragma mark -
+/********************************************************************/
+#pragma mark Kernel External Components for FIPS compliance
+/********************************************************************/
+#endif
+
+// Kernel External Components for FIPS compliance (KEC_FIPS)
+// WARNING - ath_hash is owned by the kernel, do not free
+typedef struct AppleTEXTHash {
+    const int       ath_version;    // version of this structure (value is 1)
+    int             ath_length;     // length of hash data
+    void *          ath_hash;       // hash extracted from AppleTextHashes dict 
+} AppleTEXTHash_t;
+#endif // CONFIG_KEC_FIPS
+
 #endif /* KERNEL */
 
 __END_DECLS
index 53fbc392177599df02015e129ca890f2cf1022bc..15f85461ddb44787959693e1aaf06ed251bb0389 100644 (file)
@@ -72,6 +72,28 @@ typedef uint8_t OSKextExcludeLevel;
  */
 #define kOSBundleHelperKey      "OSBundleHelper"
 
+/*!
+ * @define   kOSBundleDeveloperOnlyKey
+ * @abstract A boolean value indicating whether the kext should only load on
+ *           Developer devices.
+ */
+#define kOSBundleDeveloperOnlyKey              "OSBundleDeveloperOnly"
+
+
+/*!
+ * @define   kAppleSecurityExtensionKey
+ * @abstract A boolean value indicating whether the kext registers
+ *           MACF hooks.
+ */
+#define kAppleSecurityExtensionKey     "AppleSecurityExtension"
+
+/*!
+ * @define   kAppleKernelExternalComponentKey
+ * @abstract A boolean value indicating whether the kext is vending kernel
+ *           KPI, and needs special loading behavior.
+ */
+#define kAppleKernelExternalComponentKey       "AppleKernelExternalComponent"
+
 // properties found in the registry root
 #define kOSKernelCPUTypeKey             "OSKernelCPUType"
 #define kOSKernelCPUSubtypeKey          "OSKernelCPUSubtype"
index 6a03a2740ebc894a32ca3f4cd1ee0189ee23f334..0945952bbe56330284381d118d8db49bea5e3e56 100644 (file)
@@ -89,7 +89,7 @@ typedef       struct wide {
 
 typedef SInt32                         OSStatus;
 
-#if defined(__LP64__) && defined(KERNEL)
+#if (defined(__LP64__) || defined (__arm__)) && defined(KERNEL)
 #ifndef ABSOLUTETIME_SCALAR_TYPE
 #define ABSOLUTETIME_SCALAR_TYPE    1
 #endif
index f88b9971b592b33900fbb4567f4ce3d4b649c8b5..68977ce8acd65613d9407b720ad1b8cf9d1ff4bc 100644 (file)
@@ -68,11 +68,11 @@ typedef unsigned int WK_word;
 
 /* the next few are used during compression to write the header */
 #define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr)  \
-        (compr_dest_buf[1] = (unsigned int)(qpos_start_addr - compr_dest_buf))
+        (compr_dest_buf[1] = (WK_word)(qpos_start_addr - compr_dest_buf))
 #define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \
-        (compr_dest_buf[2] = (unsigned int)(lb_start_addr - compr_dest_buf))
+        (compr_dest_buf[2] = (WK_word)(lb_start_addr - compr_dest_buf))
 #define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \
-        (compr_dest_buf[3] = (unsigned int)(lb_end_addr - compr_dest_buf))
+        (compr_dest_buf[3] = (WK_word)(lb_end_addr - compr_dest_buf))
 
 /* the next few are only use during decompression to read the header */
 #define TAGS_AREA_START(decomp_src_buf)       \
index 8045763a19e2ee73c56d1b9658e602f31aff91ed..9b7738bd706c0123e0c628ccdcfd4750616988a2 100644 (file)
@@ -12,7 +12,6 @@ INSTINC_SUBDIRS_I386 =
 
 INSTINC_SUBDIRS_X86_64 = 
 
-INSTINC_SUBDIRS_ARM = 
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 
@@ -20,7 +19,6 @@ EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 
-EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
 
 DATAFILES = \
           OSArray.h \
index ea0acb64845a3e880ef1146a1bc6b85133cecccf..adb7cbf8bdedef4181a122d6fd7dcbbe2794b6b6 100644 (file)
@@ -102,7 +102,11 @@ protected:
     */
     unsigned int updateStamp;
 
+#ifdef XNU_KERNEL_PRIVATE
+protected:
+#else
 private:
+#endif /* XNU_KERNEL_PRIVATE */
     /* Reserved for future use.  (Internal use only)  */
     // ExpansionData * reserved;
     unsigned int fOptions;
@@ -230,6 +234,7 @@ public:
     */
     typedef enum {
         kImmutable  = 0x00000001,
+        kSort       = 0x00000002,
         kMASK       = (unsigned) -1
     } _OSCollectionFlags;
 
index 2b4159604f2af4961e8ed444a209a5cc8cccb570..11c3a3d10371755a636792d42248d01f55166ef2 100644 (file)
@@ -81,7 +81,7 @@ protected:
     unsigned int   capacity;
     unsigned int   capacityIncrement;
 
-    struct ExpansionData { };
+    struct ExpansionData;
     
    /* Reserved for future use. (Internal use only)  */
     ExpansionData * reserved;
@@ -711,6 +711,9 @@ public:
         unsigned char byte,
         unsigned int  numBytes);
 
+
+    void setSerializable(bool serializable);
+
 #ifdef XNU_KERNEL_PRIVATE
 /* Available within xnu source only */
 public:
index d3f0fa23296c12ac8da996204ed4fd265608934a..c8d5edd0c6756386eb7fad72d54365fc44f7463a 100644 (file)
@@ -245,6 +245,7 @@ private:
         unsigned int delayAutounload:1;    // for development
 
         unsigned int CPPInitialized:1;
+        unsigned int jettisonLinkeditSeg:1;
     } flags;
 
 #if PRAGMA_MARK
@@ -388,9 +389,9 @@ private:
     static void recordIdentifierRequest(
         OSString * kextIdentifier);
 
+    virtual OSReturn slidePrelinkedExecutable(void);
     virtual OSReturn loadExecutable(void);
     virtual void     jettisonLinkeditSegment(void);
-    virtual OSReturn removeLinkeditHeaders(kernel_segment_command_t *linkedit);
     static  void     considerDestroyingLinkContext(void);
     virtual OSData * getExecutable(void);
     virtual void     setLinkedExecutable(OSData * anExecutable);
@@ -437,8 +438,6 @@ private:
         OSArray * keys = NULL);
     virtual OSDictionary * copyInfo(OSArray * keys = NULL);
 
-    static  OSData       * copySanitizedKernelImage(void);
-
    /* Logging to user space.
     */
     static OSKextLogSpec setUserSpaceLogFilter(
@@ -573,7 +572,7 @@ public:
     static void     flushNonloadedKexts(Boolean flushPrelinkedKexts);
     static void     setKextdActive(Boolean active = true);
     static void     setDeferredLoadSucceeded(Boolean succeeded = true);
-    static void     considerRebuildOfPrelinkedKernel(OSString * moduleName);
+    static void     considerRebuildOfPrelinkedKernel(void);
 
     virtual bool    setAutounloadEnabled(bool flag);
 
index cb2f9896a0a77f76a87f2cbf862a00ebb183ae37..fe211c7240bd03f20b66362128225533f57f3b49 100644 (file)
@@ -39,6 +39,9 @@ class OSString;
 class OSSymbol;
 class OSDictionary;
 class OSSerialize;
+#ifdef XNU_KERNEL_PRIVATE
+class OSOrderedSet;
+#endif
 
 
 /*!
@@ -54,8 +57,26 @@ class OSSerialize;
 /*! @parseOnly */
 #define APPLE_KEXT_COMPATIBILITY
 
+#ifdef XNU_KERNEL_PRIVATE
+
+#ifdef CONFIG_EMBEDDED
+#define APPLE_KEXT_VTABLE_PADDING   0
+#else /* CONFIG_EMBEDDED */
 /*! @parseOnly */
 #define APPLE_KEXT_VTABLE_PADDING   1
+#endif /* CONFIG_EMBEDDED */
+
+#else /* XNU_KERNEL_PRIVATE */
+#include <TargetConditionals.h>
+
+#if TARGET_OS_EMBEDDED
+#define APPLE_KEXT_VTABLE_PADDING   0
+#else /* TARGET_OS_EMBEDDED */
+/*! @parseOnly */
+#define APPLE_KEXT_VTABLE_PADDING   1
+#endif /* TARGET_OS_EMBEDDED */
+
+#endif /* XNU_KERNEL_PRIVATE */
 
 #if defined(__LP64__)
 /*! @parseOnly */
@@ -64,16 +85,6 @@ class OSSerialize;
 #define APPLE_KEXT_LEGACY_ABI  1
 #endif
 
-#if APPLE_KEXT_VTABLE_PADDING
-/*! @parseOnly */
-#define APPLE_KEXT_PAD_METHOD  virtual
-/*! @parseOnly */
-#define APPLE_KEXT_PAD_IMPL(index)  gMetaClass.reservedCalled(index)
-#else
-#define APPLE_KEXT_PAD_METHOD  static
-#define APPLE_KEXT_PAD_IMPL(index)  
-#endif
-
 #if defined(__LP64__)
 /*! @parseOnly */
 #define APPLE_KEXT_COMPATIBILITY_VIRTUAL
@@ -329,6 +340,7 @@ _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void))
 }
 
 #else /* !APPLE_KEXT_LEGACY_ABI */
+#if   defined(__i386__) || defined(__x86_64__)
 
 // Slightly less arcane and slightly less evil code to do
 // the same for kexts compiled with the standard Itanium C++
@@ -361,6 +373,9 @@ _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void))
     }
 }
 
+#else
+#error Unknown architecture.
+#endif /* __arm__ */
 
 #endif /* !APPLE_KEXT_LEGACY_ABI */
 
@@ -745,15 +760,22 @@ protected:
         const int    freeWhen) const = 0;
 
 private:
+#if APPLE_KEXT_VTABLE_PADDING
     // Virtual Padding
     virtual void _RESERVEDOSMetaClassBase3();
     virtual void _RESERVEDOSMetaClassBase4();
     virtual void _RESERVEDOSMetaClassBase5();
     virtual void _RESERVEDOSMetaClassBase6();
     virtual void _RESERVEDOSMetaClassBase7();
+#endif
 } APPLE_KEXT_COMPATIBILITY;
 
 
+#ifdef XNU_KERNEL_PRIVATE
+typedef bool (*OSMetaClassInstanceApplierFunction)(const OSObject * instance,
+                                                  void * context);
+#endif /* XNU_KERNEL_PRIVATE */
+
 /*!
  * @class OSMetaClass
  *
@@ -848,10 +870,8 @@ private:
     // Can never be allocated must be created at compile time
     static void * operator new(size_t size);
 
-    struct ExpansionData { };
-
    /* Reserved for future use.  (Internal use only) */
-    ExpansionData *reserved;
+    struct ExpansionData *reserved;
 
    /* superClass Handle to the superclass's meta class. */
     const OSMetaClass *superClassLink;
@@ -1474,7 +1494,6 @@ public:
     */
     const OSMetaClass * getSuperClass() const;
 
-
    /*!
     * @function getKmodName
     *
@@ -1501,6 +1520,7 @@ public:
     * Returns the name of the C++ class managed by this metaclass.
     */
     const char * getClassName() const;
+    const OSSymbol * getClassNameSymbol() const;
 
 
    /*!
@@ -1531,6 +1551,21 @@ public:
     */
     virtual OSObject * alloc() const = 0;
 
+#ifdef XNU_KERNEL_PRIVATE
+    void addInstance(const OSObject * instance, bool super = false) const;     
+    void removeInstance(const OSObject * instance, bool super = false) const;
+    void applyToInstances(OSMetaClassInstanceApplierFunction applier, 
+                          void * context) const;
+    static void applyToInstancesOfClassName(
+                               const OSSymbol * name,
+                               OSMetaClassInstanceApplierFunction  applier,
+                                void * context);
+private:
+    static void applyToInstances(OSOrderedSet * set,
+                                OSMetaClassInstanceApplierFunction  applier,
+                                 void * context);
+public:
+#endif
 
    /* Not to be included in headerdoc.
     *
@@ -1939,9 +1974,13 @@ public:
     * <code>@link OSMetaClassDeclareReservedUsed
     *       OSMetaClassDeclareReservedUsed@/link</code>.
     */
+#if APPLE_KEXT_VTABLE_PADDING
 #define OSMetaClassDeclareReservedUnused(className, index)        \
     private:                                                      \
-    APPLE_KEXT_PAD_METHOD void _RESERVED ## className ## index ()
+    virtual void _RESERVED ## className ## index ()
+#else
+#define OSMetaClassDeclareReservedUnused(className, index)
+#endif
 
 
    /*!
@@ -2001,9 +2040,13 @@ public:
     * <code>@link OSMetaClassDefineReservedUsed
     *       OSMetaClassDefineReservedUsed@/link</code>.
     */
+#if APPLE_KEXT_VTABLE_PADDING
 #define OSMetaClassDefineReservedUnused(className, index)       \
 void className ::_RESERVED ## className ## index ()             \
-    { APPLE_KEXT_PAD_IMPL(index); }
+       { gMetaClass.reservedCalled(index); }
+#else
+#define OSMetaClassDefineReservedUnused(className, index)
+#endif
 
 
    /*!
index b33ed3c47d0980687a7fcff73906156036b66c21..a24f30e98497318a34bf7ff0ca044aff10a1b79e 100644 (file)
@@ -36,6 +36,10 @@ HISTORY
 
 #include <libkern/c++/OSMetaClass.h>
 
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Woverloaded-virtual"
+#endif
+
 class OSSymbol;
 class OSString;
 
index 5067423a903e7ff80c6c3a9fef0a60493bd374f0..d3ae9e1e1c0faeb8c7b577f31c85fb043279a1a5 100644 (file)
@@ -366,13 +366,21 @@ public:
     virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
 
 
+#ifdef XNU_KERNEL_PRIVATE
    /* OSRuntime only INTERNAL API - DO NOT USE */
    /* Not to be included in headerdoc. */
     // xx-review: this should be removed from the symbol set.
+
     static void checkForPageUnload(
         void * startAddr,
         void * endAddr);
 
+    static unsigned int bsearch(
+       const void *  key,
+       const void *  array,
+       unsigned int  arrayCount,
+       size_t        memberSize);
+#endif /* XNU_KERNEL_PRIVATE */
 
     OSMetaClassDeclareReservedUnused(OSSymbol, 0);
     OSMetaClassDeclareReservedUnused(OSSymbol, 1);
index 38aaa055e8086a352a982f643805dab6fdf8271c..1b8cc587cba569943cd9059f4d54eb4821856c68 100644 (file)
@@ -16,11 +16,15 @@ EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 
 DATAFILES = md5.h sha1.h
 
+PRIVATE_DATAFILES = register_crypto.h sha2.h des.h aes.h aesxts.h
+
 INSTALL_KF_MI_LIST = ${DATAFILES}
 
+INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
+
 INSTALL_MI_DIR = libkern/crypto
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} crypto_internal.h
 
 EXPORT_MI_DIR = libkern/crypto
 
diff --git a/libkern/libkern/crypto/aes.h b/libkern/libkern/crypto/aes.h
new file mode 100644 (file)
index 0000000..dc7a16c
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _AES_H
+#define _AES_H
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#include <corecrypto/ccmode.h>
+#include <corecrypto/ccn.h>
+
+#define AES_BLOCK_SIZE  16  /* the AES block size in bytes          */
+
+//Unholy HACK: this works because we know the size of the context for every
+//possible corecrypto implementation is less than this.
+#define AES_CBC_CTX_MAX_SIZE (ccn_sizeof_size(sizeof(void *)) + ccn_sizeof_size(AES_BLOCK_SIZE) + ccn_sizeof_size(64*4))
+
+typedef struct{
+       cccbc_ctx_decl(AES_CBC_CTX_MAX_SIZE, ctx);
+} aes_decrypt_ctx;
+
+typedef struct{
+       cccbc_ctx_decl(AES_CBC_CTX_MAX_SIZE, ctx);
+} aes_encrypt_ctx;
+
+typedef struct
+{
+       aes_decrypt_ctx decrypt;
+       aes_encrypt_ctx encrypt;
+} aes_ctx;
+
+
+/* for compatibility with old apis*/
+#define aes_ret     int
+#define aes_good    0
+#define aes_error  -1
+#define aes_rval    aes_ret
+
+
+
+/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */
+/* those in the range 128 <= key_len <= 256 are given in bits       */
+
+aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]);
+aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
+aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
+
+#if defined (__i386__) || defined (__x86_64__)
+aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, aes_encrypt_ctx cx[1]);
+#endif
+
+aes_rval aes_encrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
+                                        unsigned char *out_blk, aes_encrypt_ctx cx[1]);
+
+
+aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]);
+aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
+aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
+
+#if defined (__i386__) || defined (__x86_64__)
+aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, aes_decrypt_ctx cx[1]);
+#endif
+
+aes_rval aes_decrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
+                                        unsigned char *out_blk, aes_decrypt_ctx cx[1]);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/libkern/libkern/crypto/aesxts.h b/libkern/libkern/crypto/aesxts.h
new file mode 100644 (file)
index 0000000..ad1da43
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _CRYPTO_AESXTS_H
+#define _CRYPTO_AESXTS_H
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#include <corecrypto/ccmode.h>
+#include <corecrypto/ccaes.h>
+#include <corecrypto/ccn.h>
+
+//Unholy HACK: this works because we know the size of the context for every
+//possible corecrypto implementation is less than this.
+#define AES_XTS_CTX_MAX_SIZE (ccn_sizeof_size(3*sizeof(void *)) + 2*ccn_sizeof_size(128*4) + ccn_sizeof_size(16))
+
+typedef struct {
+       ccxts_ctx_decl(AES_XTS_CTX_MAX_SIZE, enc);
+       ccxts_ctx_decl(AES_XTS_CTX_MAX_SIZE, dec);
+} symmetric_xts;
+
+
+/*
+ * These are the interfaces required for XTS-AES support
+ */
+
+uint32_t
+xts_start(uint32_t cipher, // ignored - we're doing this for xts-aes only
+                 const uint8_t *IV, // ignored
+                 const uint8_t *key1, int keylen,
+                 const uint8_t *key2, int tweaklen, // both keys are the same size for xts
+                 uint32_t num_rounds, // ignored
+                 uint32_t options,    // ignored
+                 symmetric_xts *xts);
+
+int xts_encrypt(const uint8_t *pt, unsigned long ptlen,
+                       uint8_t *ct,
+                               const uint8_t *tweak, // this can be considered the sector IV for this use
+                       symmetric_xts *xts);
+
+int xts_decrypt(const uint8_t *ct, unsigned long ptlen,
+                       uint8_t *pt,
+                               const uint8_t *tweak, // this can be considered the sector IV for this use
+                       symmetric_xts *xts);
+
+void xts_done(symmetric_xts *xts);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/libkern/libkern/crypto/crypto_internal.h b/libkern/libkern/crypto/crypto_internal.h
new file mode 100644 (file)
index 0000000..82c98b1
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* To access the corecrypto functions */
+#ifndef _CRYPTO_CRYPTO_INTERNAL_H_
+#define _CRYPTO_CRYPTO_INTERNAL_H_
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#include <libkern/crypto/register_crypto.h>
+
+extern crypto_functions_t g_crypto_funcs;
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /*_CRYPTO_CRYPTO_INTERNAL_H_*/
diff --git a/libkern/libkern/crypto/des.h b/libkern/libkern/crypto/des.h
new file mode 100644 (file)
index 0000000..960e60e
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _CRYPTO_DES_H
+#define _CRYPTO_DES_H
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#include <corecrypto/ccmode.h>
+#include <corecrypto/ccdes.h>
+#include <corecrypto/ccn.h>
+
+/* must be 32bit quantity */
+#define DES_LONG u_int32_t
+
+typedef unsigned char des_cblock[8];
+
+/* Unholy hack: this is currently the size for the only implementation of DES in corecrypto */
+#define DES_ECB_CTX_MAX_SIZE (64*4)
+#define DES_CBC_CTX_MAX_SIZE (ccn_sizeof_size(sizeof(struct ccmode_ecb)) + ccn_sizeof_size(CCDES_BLOCK_SIZE) + ccn_sizeof_size(DES_ECB_CTX_MAX_SIZE))
+#define DES3_ECB_CTX_MAX_SIZE (64*4*3)
+#define DES3_CBC_CTX_MAX_SIZE (ccn_sizeof_size(sizeof(struct ccmode_ecb)) + ccn_sizeof_size(CCDES_BLOCK_SIZE) + ccn_sizeof_size(DES3_ECB_CTX_MAX_SIZE))
+
+
+typedef struct{
+       ccecb_ctx_decl(DES_ECB_CTX_MAX_SIZE, enc);
+       ccecb_ctx_decl(DES_ECB_CTX_MAX_SIZE, dec);
+} des_ecb_key_schedule;
+
+typedef struct{
+       cccbc_ctx_decl(DES_CBC_CTX_MAX_SIZE, enc);
+       cccbc_ctx_decl(DES_CBC_CTX_MAX_SIZE, dec);
+} des_cbc_key_schedule;
+
+typedef struct{
+       ccecb_ctx_decl(DES3_ECB_CTX_MAX_SIZE, enc);
+       ccecb_ctx_decl(DES3_ECB_CTX_MAX_SIZE, dec);
+} des3_ecb_key_schedule;
+
+typedef struct{
+       cccbc_ctx_decl(DES3_CBC_CTX_MAX_SIZE, enc);
+       cccbc_ctx_decl(DES3_CBC_CTX_MAX_SIZE, dec);
+} des3_cbc_key_schedule;
+
+/* Only here for backward compatibility with smb kext */
+typedef des_ecb_key_schedule des_key_schedule[1];
+#define des_set_key des_ecb_key_sched
+
+#define DES_ENCRYPT    1
+#define DES_DECRYPT    0
+
+
+/* Single DES ECB - 1 block */
+int des_ecb_key_sched(des_cblock *key, des_ecb_key_schedule *ks);
+void des_ecb_encrypt(des_cblock *in, des_cblock *out, des_ecb_key_schedule *ks, int encrypt);
+
+/* Triple DES ECB - 1 block */
+int des3_ecb_key_sched(des_cblock *key, des3_ecb_key_schedule *ks);
+void des3_ecb_encrypt(des_cblock *block, des_cblock *, des3_ecb_key_schedule *ks, int encrypt);
+
+/* Single DES CBC */
+int des_cbc_key_sched(des_cblock *key, des_cbc_key_schedule *ks);
+void des_cbc_encrypt(des_cblock *in, des_cblock *out, int32_t len,
+                                        des_cbc_key_schedule *ks, des_cblock *iv, des_cblock *retiv, int encrypt);
+
+/* Triple DES CBC */
+int des3_cbc_key_sched(des_cblock *key, des3_cbc_key_schedule *ks);
+void des3_cbc_encrypt(des_cblock *in, des_cblock *out, int32_t len,
+                                         des3_cbc_key_schedule *ks, des_cblock *iv, des_cblock *retiv, int encrypt);
+
+/* Single DES CBC-MAC */
+void des_cbc_cksum(des_cblock *in, des_cblock *out, int len, des_cbc_key_schedule *ks);
+
+void des_fixup_key_parity(des_cblock *key);
+int des_is_weak_key(des_cblock *key);
+// int des_set_key(des_cblock *, des_key_schedule); // Unsupported KPI.
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/libkern/libkern/crypto/register_crypto.h b/libkern/libkern/crypto/register_crypto.h
new file mode 100644 (file)
index 0000000..d6647db
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _CRYPTO_REGISTER_CRYPTO_H_
+#define _CRYPTO_REGISTER_CRYPTO_H_
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#include <corecrypto/ccdigest.h>
+#include <corecrypto/cchmac.h>
+#include <corecrypto/ccmode.h>
+#include <corecrypto/ccrc4.h>
+
+/* Function types */
+
+/* digests */
+typedef void (*ccdigest_init_fn_t)(const struct ccdigest_info *di, ccdigest_ctx_t ctx);
+typedef void (*ccdigest_update_fn_t)(const struct ccdigest_info *di, ccdigest_ctx_t ctx,
+                                  unsigned long len, const void *data);
+typedef void (*ccdigest_final_fn_t)(const struct ccdigest_info *di, ccdigest_ctx_t ctx,
+                                     void *digest);
+typedef void (*ccdigest_fn_t)(const struct ccdigest_info *di, unsigned long len,
+                           const void *data, void *digest);
+
+/* hmac */
+typedef void (*cchmac_init_fn_t)(const struct ccdigest_info *di, cchmac_ctx_t ctx,
+                              unsigned long key_len, const void *key);
+typedef void (*cchmac_update_fn_t)(const struct ccdigest_info *di, cchmac_ctx_t ctx,
+                                unsigned long data_len, const void *data);
+typedef void (*cchmac_final_fn_t)(const struct ccdigest_info *di, cchmac_ctx_t ctx,
+                               unsigned char *mac);
+
+typedef void (*cchmac_fn_t)(const struct ccdigest_info *di, unsigned long key_len,
+                         const void *key, unsigned long data_len, const void *data,
+                         unsigned char *mac);
+
+/* pbkdf2 */
+typedef void (*ccpbkdf2_hmac_fn_t)(const struct ccdigest_info *di,
+                                unsigned long passwordLen, const void *password,
+                                unsigned long saltLen, const void *salt,
+                                unsigned long iterations,
+                                unsigned long dkLen, void *dk);
+
+/* des weak key testing */
+typedef int (*ccdes_key_is_weak_fn_t)(void *key, unsigned long  length);
+typedef void (*ccdes_key_set_odd_parity_fn_t)(void *key, unsigned long length);
+
+
+typedef        void (*ccpad_xts_decrypt_fn_t)(const struct ccmode_xts *xts, ccxts_ctx *ctx,
+                                                  unsigned long nbytes, const void *in, void *out);
+
+typedef        void (*ccpad_xts_encrypt_fn_t)(const struct ccmode_xts *xts, ccxts_ctx *ctx,
+                                          unsigned long nbytes, const void *in, void *out);
+
+
+typedef struct crypto_functions {
+    /* digests common functions */
+    ccdigest_init_fn_t ccdigest_init_fn;
+    ccdigest_update_fn_t ccdigest_update_fn;
+    ccdigest_final_fn_t ccdigest_final_fn;
+    ccdigest_fn_t ccdigest_fn;
+    /* digest implementations */
+    const struct ccdigest_info * ccmd5_di;
+    const struct ccdigest_info * ccsha1_di;
+    const struct ccdigest_info * ccsha256_di;
+    const struct ccdigest_info * ccsha384_di;
+    const struct ccdigest_info * ccsha512_di;
+    
+    /* hmac common function */
+    cchmac_init_fn_t cchmac_init_fn;
+    cchmac_update_fn_t cchmac_update_fn;
+    cchmac_final_fn_t cchmac_final_fn;
+    cchmac_fn_t cchmac_fn;
+    
+    /* ciphers modes implementations */
+    /* AES, ecb, cbc and xts */
+    const struct ccmode_ecb *ccaes_ecb_encrypt;
+    const struct ccmode_ecb *ccaes_ecb_decrypt;
+    const struct ccmode_cbc *ccaes_cbc_encrypt;
+    const struct ccmode_cbc *ccaes_cbc_decrypt;
+    const struct ccmode_xts *ccaes_xts_encrypt;
+    const struct ccmode_xts *ccaes_xts_decrypt;
+    /* DES, ecb and cbc */
+    const struct ccmode_ecb *ccdes_ecb_encrypt;
+    const struct ccmode_ecb *ccdes_ecb_decrypt;
+    const struct ccmode_cbc *ccdes_cbc_encrypt;
+    const struct ccmode_cbc *ccdes_cbc_decrypt;
+    /* Triple DES, ecb and cbc */
+    const struct ccmode_ecb *cctdes_ecb_encrypt;
+    const struct ccmode_ecb *cctdes_ecb_decrypt;
+    const struct ccmode_cbc *cctdes_cbc_encrypt;
+    const struct ccmode_cbc *cctdes_cbc_decrypt;
+    /* RC4 */
+       const struct ccrc4_info *ccrc4_info;
+       /* Blowfish - ECB only */
+    const struct ccmode_ecb *ccblowfish_ecb_encrypt;
+    const struct ccmode_ecb *ccblowfish_ecb_decrypt;
+       /* CAST - ECB only */
+    const struct ccmode_ecb *cccast_ecb_encrypt;
+    const struct ccmode_ecb *cccast_ecb_decrypt;
+       /* DES key helper functions */
+       ccdes_key_is_weak_fn_t ccdes_key_is_weak_fn;
+       ccdes_key_set_odd_parity_fn_t ccdes_key_set_odd_parity_fn;
+       /* XTS padding functions */
+       ccpad_xts_encrypt_fn_t ccpad_xts_encrypt_fn;
+       ccpad_xts_decrypt_fn_t ccpad_xts_decrypt_fn;
+} *crypto_functions_t;
+
+int register_crypto_functions(const crypto_functions_t funcs);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /*_CRYPTO_REGISTER_CRYPTO_H_*/
diff --git a/libkern/libkern/crypto/sha2.h b/libkern/libkern/crypto/sha2.h
new file mode 100644 (file)
index 0000000..7908f7e
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * copyright (c) 2012 apple computer, inc. all rights reserved.
+ *
+ * @apple_osreference_license_header_start@
+ * 
+ * this file contains original code and/or modifications of original code
+ * as defined in and that are subject to the apple public source license
+ * version 2.0 (the 'license'). you may not use this file except in
+ * compliance with the license. the rights granted to you under the license
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an apple operating system software license agreement.
+ * 
+ * please obtain a copy of the license at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * the original code and all software distributed under the license are
+ * distributed on an 'as is' basis, without warranty of any kind, either
+ * express or implied, and apple hereby disclaims all such warranties,
+ * including without limitation, any warranties of merchantability,
+ * fitness for a particular purpose, quiet enjoyment or non-infringement.
+ * please see the license for the specific language governing rights and
+ * limitations under the license.
+ * 
+ * @apple_osreference_license_header_end@
+ */
+
+#ifndef _CRYPTO_SHA2_H__
+#define _CRYPTO_SHA2_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <corecrypto/ccsha2.h>
+
+/*** SHA-256/384/512 Various Length Definitions ***********************/
+#define SHA256_BLOCK_LENGTH            CCSHA256_BLOCK_SIZE
+#define SHA256_DIGEST_LENGTH   CCSHA256_OUTPUT_SIZE
+#define SHA256_DIGEST_STRING_LENGTH    (SHA256_DIGEST_LENGTH * 2 + 1)
+#define SHA384_BLOCK_LENGTH            CCSHA512_BLOCK_SIZE
+#define SHA384_DIGEST_LENGTH   CCSHA384_OUTPUT_SIZE
+#define SHA384_DIGEST_STRING_LENGTH    (SHA384_DIGEST_LENGTH * 2 + 1)
+#define SHA512_BLOCK_LENGTH            CCSHA512_BLOCK_SIZE
+#define SHA512_DIGEST_LENGTH   CCSHA384_OUTPUT_SIZE
+#define SHA512_DIGEST_STRING_LENGTH    (SHA512_DIGEST_LENGTH * 2 + 1)
+
+typedef struct {
+       ccdigest_ctx_decl(CCSHA256_STATE_SIZE, CCSHA256_BLOCK_SIZE, ctx);
+} SHA256_CTX;
+
+typedef struct SHA512_CTX {
+       ccdigest_ctx_decl(CCSHA256_STATE_SIZE, CCSHA256_BLOCK_SIZE, ctx);
+} SHA512_CTX;
+
+typedef SHA512_CTX SHA384_CTX;
+
+/*** SHA-256/384/512 Function Prototypes ******************************/
+
+void SHA256_Init(SHA256_CTX *ctx);
+void SHA256_Update(SHA256_CTX *ctx, const void *data, size_t len);
+void SHA256_Final(void *digest, SHA256_CTX *ctx);
+
+void SHA384_Init(SHA384_CTX *ctx);
+void SHA384_Update(SHA384_CTX *ctx, const void *data, size_t len);
+void SHA384_Final(void *digest, SHA384_CTX *ctx);
+
+void SHA512_Init(SHA512_CTX *ctx);
+void SHA512_Update(SHA512_CTX *ctx, const void *data, size_t len);
+void SHA512_Final(void *digest, SHA512_CTX *ctx);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _CRYPTO_SHA2_H__ */
index 6588b9b09a938f5286af86eca38f0bf8962769d0..59218993e7c9eef557df8733821cdf77dd1e7ea6 100644 (file)
@@ -45,6 +45,8 @@ extern "C" {
 
 #include <mach/mach_types.h>
 #include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+#include <mach-o/reloc.h>
 
 #if    !KERNEL
 #error this header for kernel use only
@@ -55,15 +57,19 @@ extern "C" {
 typedef struct mach_header_64  kernel_mach_header_t;
 typedef struct segment_command_64 kernel_segment_command_t;
 typedef struct section_64              kernel_section_t;
+typedef struct nlist_64         kernel_nlist_t;
 
-#define LC_SEGMENT_KERNEL              LC_SEGMENT_64
+#define MH_MAGIC_KERNEL         MH_MAGIC_64
+#define LC_SEGMENT_KERNEL       LC_SEGMENT_64
 
 #else
 
 typedef struct mach_header             kernel_mach_header_t;
 typedef struct segment_command kernel_segment_command_t;
 typedef struct section                 kernel_section_t;
+typedef struct nlist            kernel_nlist_t;
 
+#define MH_MAGIC_KERNEL         MH_MAGIC
 #define LC_SEGMENT_KERNEL              LC_SEGMENT
 #define SECT_CONSTRUCTOR               "__constructor"
 #define SECT_DESTRUCTOR                        "__destructor"
@@ -95,16 +101,9 @@ kernel_section_t *getsectbynamefromheader(
 void *getsectdatafromheader(kernel_mach_header_t *, const char *, const char *, unsigned long *);
 kernel_section_t *firstsect(kernel_segment_command_t *sgp);
 kernel_section_t *nextsect(kernel_segment_command_t *sgp, kernel_section_t *sp);
+void *getcommandfromheader(kernel_mach_header_t *, uint32_t);
 void *getuuidfromheader(kernel_mach_header_t *, unsigned long *);
 
-#if MACH_KDB
-boolean_t getsymtab(kernel_mach_header_t *header,
-                    vm_offset_t *symtab,
-                    int *nsyms,
-                    vm_offset_t *strtab,
-                    vm_size_t *strtabsize);
-#endif
-
 #ifdef __cplusplus
 }
 #endif
index 6b908f1331da116743728ffc1d8e564947fc9175..66b50e6c2eba69141e14e2f82e2d2abd407c83fe 100644 (file)
@@ -88,16 +88,6 @@ extern "C" {
  */
 #define kKextRequestPredicateGetLoaded             "Get Loaded Kext Info"
 
-/* Predicate: Get Kernel Image
- * Argument:  None
- * Response:  Raw bytes + length containing the sanitized image of the kernel.
- * Op result: OSReturn indicating any errors in processing (see OSKextLib.h)
- *
- * Retrieves a sanitized image of the running kernel for use in generating
- * debug symbols in user space.
- */
-#define kKextRequestPredicateGetKernelImage    "Get Kernel Image"
-
 /* Predicate: Get Kernel Load Address
  * Argument:  None
  * Response:  OSNumber containing kernel load address.
index 1578b58595493e1b12225096b6cf054f3913da12..0aad7abbe592c97d6df9b6df7cb1fc5755c54e71 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2008, 2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
     #define KXLD_USER_OR_LP64 1
 #endif
 
-/* For ppc-specific linking code */
-#if (!KERNEL || __ppc__)
-    #define KXLD_USER_OR_PPC 1
-#endif
-
 /* For i386-specific linking code */
 #if (!KERNEL || __i386__)
     #define KXLD_USER_OR_I386 1
 #endif
 
 /* For linking code specific to architectures that support common symbols */
-#if (!KERNEL || __i386__ || __ppc__)
+#if (!KERNEL || __i386__)
     #define KXLD_USER_OR_COMMON 1
 #endif
 
 /* For linking code specific to architectures that support strict patching */
-#if (!KERNEL || !(__i386__ || __ppc__))
+#if (!KERNEL || !__i386__)
     #define KXLD_USER_OR_STRICT_PATCHING 1
 #endif
 
 /* For linking code specific to architectures that use MH_OBJECT */
-#if (!KERNEL || __i386__ || __ppc__ || __arm__)
+#if (!KERNEL || __i386__)
     #define KXLD_USER_OR_OBJECT 1
 #endif
 
 /* For linking code specific to architectures that use MH_KEXT_BUNDLE */
-#if (!KERNEL || __i386__ || __x86_64__ || __arm__)
-    #define KXLD_USER_OR_BUNDLE 1
-#endif
+#define KXLD_USER_OR_BUNDLE 1
 
 /* We no longer need to generate our own GOT for any architectures, but the code
  * required to do this will be saved inside this macro.
  */
 #define KXLD_USER_OR_GOT 0
 
+/* for building the dysymtab command generation into the dylib */
+#if (!KERNEL)
+    #define KXLD_PIC_KEXTS 1
+#endif
+
 /*******************************************************************************
 * Types
 *******************************************************************************/
@@ -117,6 +115,7 @@ typedef uint64_t kxld_size_t;
 /* Flags for general linker behavior */
 enum kxld_flags {
     kKxldFlagDefault = 0x0,
+    kKXLDFlagIncludeRelocs = 0x01
 };
 typedef enum kxld_flags KXLDFlags;
 
index e4d4ce152c0c06048ffdf85499eb806fe09b1a9c..c5f944fa06d5efcd2c766d63ed70b7879314a668 100644 (file)
@@ -12,7 +12,6 @@ INSTINC_SUBDIRS_I386 =
 
 INSTINC_SUBDIRS_X86_64 =
 
-INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 
@@ -20,7 +19,6 @@ EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 
-EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
 
 DATAFILES = \
           OSByteOrder.h
diff --git a/libkern/libkern/stack_protector.h b/libkern/libkern/stack_protector.h
new file mode 100644 (file)
index 0000000..d2d3c82
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _STACK_PROTECTOR_H
+#define _STACK_PROTECTOR_H
+
+/* Set up by machine-dependent code early in boot */
+extern unsigned long __stack_chk_guard;
+
+/* Called as a consequence on stack corruption */
+extern void __stack_chk_fail(void);
+
+#endif // _STACK_PROTECTOR_H_
+
index 3a26162bdd90104717bb01bba5ff1bb4d5b9d2d0..7865f359e0f6d0e7cb7154ea5ea32b33c6538fd6 100644 (file)
@@ -334,9 +334,9 @@ struct name {                                                               \
 #define RB_PLACEHOLDER NULL
 #define RB_ENTRY(type)                                                 \
 struct {                                                               \
+       struct type *rbe_parent;        /* parent element */            \
        struct type *rbe_left;          /* left element */              \
        struct type *rbe_right;         /* right element */             \
-       struct type *rbe_parent;        /* parent element */            \
 }
 
 #define RB_COLOR_MASK                  (uintptr_t)0x1
index dad8a7e2e455adafe679329f63d871cbea293a4e..21c4d06aea56f8fd063ae011f1a10d72a5f1b646 100644 (file)
@@ -1,55 +1,35 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008-2012 Apple Inc. All rights reserved.
  *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-/*******************************************************************************
-* NOTE: This implementation of the stack check routines required by the GCC
-* -fstack-protector flag is only safe for kernel extensions.
-*******************************************************************************/
-
-#include <sys/types.h>
-#include <sys/random.h>
+#include <libkern/stack_protector.h>
 #include <kern/debug.h>
 
-long __stack_chk_guard[8];
-void __stack_chk_fail(void);
-
-static void __guard_setup(void) __attribute__((constructor));
-
-static void
-__guard_setup(void)
-{
-    /* Cannot report failure.  */
-    read_random(__stack_chk_guard, sizeof(__stack_chk_guard));
-}
+unsigned long __stack_chk_guard = 0UL;
 
 void
 __stack_chk_fail(void)
index 13a3f196902bfedcbeda695c1da278848ede482f..f20633ddb85a5175a624a2932b6e22161aaa4972 100644 (file)
@@ -13,7 +13,6 @@ INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
-INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = \
 
@@ -21,7 +20,7 @@ EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
 
-EXPINC_SUBDIRS_ARM = \
+
 
 # uuid.h is now installed by bsd/uuid/Makefile
 DATAFILES = \
index ffc5c80592cd4dae8251e5c36af114cdf4ecdf52..217b6b667a4fd8894ef9df168f6bd1321368fc0e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * %Begin-Header%
  * Redistribution and use in source and binary forms, with or without
@@ -40,9 +40,7 @@
 #include <sys/systm.h>
 #include <sys/time.h>
 
-#include <net/if.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
+extern int uuid_get_ethernet(u_int8_t *);
 
 UUID_DEFINE(UUID_NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 
@@ -50,25 +48,8 @@ static void
 read_node(uint8_t *node)
 {
 #if NETWORKING
-       struct ifnet *ifp;
-       struct sockaddr_dl *sdl;
-
-       ifnet_head_lock_shared();
-       TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-               ifnet_lock_shared(ifp);
-               IFA_LOCK_SPIN(ifp->if_lladdr);
-               sdl = (struct sockaddr_dl *)ifp->if_lladdr->ifa_addr;
-               if (sdl->sdl_type == IFT_ETHER) {
-                       memcpy(node, LLADDR(sdl), 6);
-                       IFA_UNLOCK(ifp->if_lladdr);
-                       ifnet_lock_done(ifp);
-                       ifnet_head_done();
-                       return;
-               }
-               IFA_UNLOCK(ifp->if_lladdr);
-               ifnet_lock_done(ifp);
-       }
-       ifnet_head_done();
+       if (uuid_get_ethernet(node) == 0)
+               return;
 #endif /* NETWORKING */
 
        read_random(node, 6);
index 30713ef3db6859f34b36454e0042804c0e38d95b..f3a7e617c8c2f563ec2af582055b8f462b32f6e8 100644 (file)
 
        .globl _OSCompareAndSwap
 _OSCompareAndSwap: #;oldValue, newValue, ptr
+#if    DEBUG
+       test    $3, %rdx
+       jz      1f
+       ud2
+1:
+#endif 
        movl             %edi, %eax
        lock
        cmpxchgl        %esi, (%rdx)    #; CAS (eax is an implicit operand)
@@ -48,6 +54,12 @@ _OSCompareAndSwap: #;oldValue, newValue, ptr
 
 _OSCompareAndSwap64:
 _OSCompareAndSwapPtr: #;oldValue, newValue, ptr
+#if    DEBUG
+       test    $7, %rdx
+       jz      1f
+       ud2
+1:
+#endif
        movq            %rdi, %rax
        lock
        cmpxchgq        %rsi, (%rdx)    #; CAS (rax is an implicit operand)
@@ -63,6 +75,12 @@ _OSCompareAndSwapPtr: #;oldValue, newValue, ptr
        .globl  _OSAddAtomic64
 _OSAddAtomic64:
 _OSAddAtomicLong:
+#if    DEBUG
+       test    $7, %rsi
+       jz      1f
+       ud2
+1:
+#endif
        lock
        xaddq   %rdi, (%rsi)            #; Atomic exchange and add
        movq    %rdi, %rax;
@@ -75,6 +93,12 @@ _OSAddAtomicLong:
 
        .globl  _OSAddAtomic
 _OSAddAtomic:
+#if    DEBUG
+       test    $3, %rsi
+       jz      1f
+       ud2
+1:
+#endif
        lock
        xaddl   %edi, (%rsi)            #; Atomic exchange and add
        movl    %edi, %eax;
index 14c2f30d789f0bc0931eae871ecddc6bed3e1765..c12296b5cca071c6a1d79aa777fb09fc88d210a0 100644 (file)
@@ -280,7 +280,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #endif
 
 /* Diagnostic functions */
-#ifdef DEBUG
+#if defined(DEBUG) && !defined(KERNEL)
 #  include <stdio.h>
    extern int z_verbose;
    extern void z_error    OF((char *m));
@@ -298,7 +298,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #  define Tracec(c,x)
 #  define Tracecv(c,x)
 #endif
-
+#undef DEBUG
 
 #ifndef NO_ZCFUNCS
 voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
index 9ad023c1a71a5a65ba1f7068599e3f7d14d8c1c1..f24547c950e991f647d521975f436a72d00a7b04 100644 (file)
@@ -40,6 +40,10 @@ extern "C" {
 #include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/IOCatalogue.h>
 
+#if __x86_64__
+#define KASLR_KEXT_DEBUG 0
+#endif
+
 #if PRAGMA_MARK
 #pragma mark Bootstrap Declarations
 #endif
@@ -100,20 +104,6 @@ static const char * sKernelComponentNames[] = {
    "com.apple.driver.AppleNMI",
    "com.apple.iokit.IOSystemManagementFamily",
    "com.apple.iokit.ApplePlatformFamily",
-   
-#if defined(__i386__) || defined(__arm__)
-   /* These ones are not supported on x86_64 or any newer platforms.
-    * They must be version 7.9.9; check by "com.apple.kernel.", with
-    * the trailing period; "com.apple.kernel" always represents the
-    * current kernel version.
-    */
-    "com.apple.kernel.6.0",
-    "com.apple.kernel.bsd",
-    "com.apple.kernel.iokit",
-    "com.apple.kernel.libkern",
-    "com.apple.kernel.mach",
-#endif
-
    NULL
 };
 
@@ -142,6 +132,7 @@ private:
         OSData   * deviceTreeData);
     
     OSReturn loadKernelComponentKexts(void);
+    void     loadKernelExternalComponents(void);
     void     readBuiltinPersonalities(void);
 
     void     loadSecurityExtensions(void);
@@ -207,6 +198,7 @@ KLDBootstrap::readStartupExtensions(void)
     }
 
     loadKernelComponentKexts();
+    loadKernelExternalComponents();
     readBuiltinPersonalities();
     OSKext::sendAllKextPersonalitiesToCatalog();
 
@@ -234,7 +226,7 @@ KLDBootstrap::readPrelinkedExtensions(
     void                      * prelinkData             = NULL;  // see code
     vm_size_t                   prelinkLength           = 0;
 
-#if !__LP64__ && !defined(__arm__)
+#if __i386__
     vm_map_offset_t             prelinkDataMapOffset    = 0;
     void                      * prelinkCopy             = NULL;  // see code
     kern_return_t               mem_result              = KERN_SUCCESS;
@@ -246,6 +238,9 @@ KLDBootstrap::readPrelinkedExtensions(
     OSNumber                  * prelinkCountObj         = NULL;  // must release
 
     u_int                       i = 0;
+#if NO_KEXTD
+    bool                        developerDevice;
+#endif
 
     OSKextLog(/* kext */ NULL,
         kOSKextLogProgressLevel |
@@ -260,18 +255,62 @@ KLDBootstrap::readPrelinkedExtensions(
             "Can't find prelinked kexts' text segment.");
         goto finish;
     }
+    
+#if KASLR_KEXT_DEBUG
+    unsigned long   scratchSize;
+    vm_offset_t     scratchAddr;
+    
+    IOLog("kaslr: prelinked kernel address info: \n");
+    
+    scratchAddr = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &scratchSize);
+    IOLog("kaslr: start 0x%lx end 0x%lx length %lu for __TEXT \n", 
+          (unsigned long)scratchAddr, 
+          (unsigned long)(scratchAddr + scratchSize),
+          scratchSize);
+    
+    scratchAddr = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &scratchSize);
+    IOLog("kaslr: start 0x%lx end 0x%lx length %lu for __DATA \n", 
+          (unsigned long)scratchAddr, 
+          (unsigned long)(scratchAddr + scratchSize),
+          scratchSize);
+    
+    scratchAddr = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &scratchSize);
+    IOLog("kaslr: start 0x%lx end 0x%lx length %lu for __LINKEDIT \n", 
+          (unsigned long)scratchAddr, 
+          (unsigned long)(scratchAddr + scratchSize),
+          scratchSize);
+    
+    scratchAddr = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &scratchSize);
+    IOLog("kaslr: start 0x%lx end 0x%lx length %lu for __KLD \n", 
+          (unsigned long)scratchAddr, 
+          (unsigned long)(scratchAddr + scratchSize),
+          scratchSize);
+    
+    scratchAddr = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &scratchSize);
+    IOLog("kaslr: start 0x%lx end 0x%lx length %lu for __PRELINK_TEXT \n", 
+          (unsigned long)scratchAddr, 
+          (unsigned long)(scratchAddr + scratchSize),
+          scratchSize);
+    
+    scratchAddr = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &scratchSize);
+    IOLog("kaslr: start 0x%lx end 0x%lx length %lu for __PRELINK_INFO \n", 
+          (unsigned long)scratchAddr, 
+          (unsigned long)(scratchAddr + scratchSize),
+          scratchSize);
+#endif
 
     prelinkData = (void *) prelinkTextSegment->vmaddr;
     prelinkLength = prelinkTextSegment->vmsize;
 
-#if !__LP64__ && !__arm__
-    /* XXX: arm's pmap implementation doesn't seem to let us do this */
-
+#if __i386__
     /* To enable paging and write/execute protections on the kext
      * executables, we need to copy them out of the booter-created
      * memory, reallocate that space with VM, then prelinkCopy them back in.
-     * This isn't necessary on LP64 because kexts have their own VM
-     * region on that architecture model.
+     *
+     * This isn't necessary on x86_64 because kexts have their own VM
+     * region for that architecture.
+     *
+     * XXX: arm's pmap implementation doesn't seem to let us do this.
      */
 
     mem_result = kmem_alloc(kernel_map, (vm_offset_t *)&prelinkCopy,
@@ -323,7 +362,7 @@ KLDBootstrap::readPrelinkedExtensions(
     memcpy(prelinkData, prelinkCopy, prelinkLength);
 
     kmem_free(kernel_map, (vm_offset_t)prelinkCopy, prelinkLength);
-#endif /* !__LP64__ && !__arm__*/
+#endif /* __i386__ */
 
    /* Unserialize the info dictionary from the prelink info section.
     */
@@ -345,6 +384,22 @@ KLDBootstrap::readPrelinkedExtensions(
         goto finish;
     }
 
+#if NO_KEXTD
+    /* Check if we should keep developer kexts around. Default:
+     *   Release: No
+     *   Development: Yes
+     *   Debug : Yes
+     * TODO: Check DeviceTree instead of a boot-arg <rdar://problem/10604201>
+     */
+#if DEVELOPMENT
+    developerDevice = true;
+#else
+    developerDevice = false;
+#endif
+
+    PE_parse_boot_argn("developer", &developerDevice, sizeof(developerDevice));
+#endif /* NO_KEXTD */
+
     infoDictArray = OSDynamicCast(OSArray, 
         prelinkInfoDict->getObject(kPrelinkInfoDictionaryKey));
     if (!infoDictArray) {
@@ -365,6 +420,34 @@ KLDBootstrap::readPrelinkedExtensions(
             continue;
         }
 
+#if NO_KEXTD
+        /* If we're not on a developer device, skip and free developer kexts.
+         */
+        if (developerDevice == false) {
+            OSBoolean *devOnlyBool = OSDynamicCast(OSBoolean,
+                infoDict->getObject(kOSBundleDeveloperOnlyKey));
+            if (devOnlyBool == kOSBooleanTrue) {
+                OSString *bundleID = OSDynamicCast(OSString,
+                    infoDict->getObject(kCFBundleIdentifierKey));
+                if (bundleID) {
+                    OSKextLog(NULL, kOSKextLogWarningLevel | kOSKextLogGeneralFlag,
+                        "Kext %s not loading on non-dev device.", bundleID->getCStringNoCopy());
+                }
+
+                OSNumber *addressNum = OSDynamicCast(OSNumber,
+                    infoDict->getObject(kPrelinkExecutableLoadKey));
+                OSNumber *lengthNum = OSDynamicCast(OSNumber,
+                    infoDict->getObject(kPrelinkExecutableSizeKey));
+                if (addressNum && lengthNum) {
+#error Pick the right way to free prelinked data on this arch
+                }
+
+                infoDictArray->removeObject(i--);
+                continue;
+            }
+        }
+#endif /* NO_KEXTD */
+
        /* Create the kext for the entry, then release it, because the
         * kext system keeps them around until explicitly removed.
         * Any creation/registration failures are already logged for us.
@@ -394,12 +477,13 @@ KLDBootstrap::readPrelinkedExtensions(
         "%u prelinked kexts", 
         infoDictArray->getCount());
 
-#if __LP64__
-        /* On LP64 systems, kexts are copied to their own special VM region
-         * during OSKext init time, so we can free the whole segment now.
+#if CONFIG_KEXT_BASEMENT
+        /* On CONFIG_KEXT_BASEMENT systems, kexts are copied to their own 
+         * special VM region during OSKext init time, so we can free the whole 
+         * segment now.
          */
         ml_static_mfree((vm_offset_t) prelinkData, prelinkLength);
-#endif /* __LP64__ */
+#endif /* __x86_64__ */
 
    /* Free the prelink info segment, we're done with it.
     */
@@ -665,7 +749,7 @@ KLDBootstrap::loadSecurityExtensions(void)
         }
 
         isSecurityKext = OSDynamicCast(OSBoolean,
-            theKext->getPropertyForHostArch("AppleSecurityExtension"));
+            theKext->getPropertyForHostArch(kAppleSecurityExtensionKey));
         if (isSecurityKext && isSecurityKext->isTrue()) {
             OSKextLog(/* kext */ NULL,
                 kOSKextLogStepLevel |
@@ -724,6 +808,80 @@ KLDBootstrap::loadKernelComponentKexts(void)
     return result;
 }
 
+/*********************************************************************
+* Ensure that Kernel External Components are loaded early in boot,
+* before other kext personalities get sent to the IOCatalogue. These
+* kexts are treated specially because they may provide the implementation
+* for kernel-vended KPI, so they must register themselves before
+* general purpose IOKit probing begins.
+*********************************************************************/
+
+#define COM_APPLE_KEC  "com.apple.kec."
+
+void
+KLDBootstrap::loadKernelExternalComponents(void)
+{
+    OSDictionary         * extensionsDict = NULL;  // must release
+    OSCollectionIterator * keyIterator    = NULL;  // must release
+    OSString             * bundleID       = NULL;  // don't release
+    OSKext               * theKext        = NULL;  // don't release
+    OSBoolean            * isKernelExternalComponent = NULL;  // don't release
+
+    OSKextLog(/* kext */ NULL,
+        kOSKextLogStepLevel |
+        kOSKextLogLoadFlag,
+        "Loading Kernel External Components.");
+
+    extensionsDict = OSKext::copyKexts();
+    if (!extensionsDict) {
+        return;
+    }
+
+    keyIterator = OSCollectionIterator::withCollection(extensionsDict);
+    if (!keyIterator) {
+        OSKextLog(/* kext */ NULL,
+            kOSKextLogErrorLevel |
+            kOSKextLogGeneralFlag,
+            "Failed to allocate iterator for Kernel External Components.");
+        goto finish;
+    }
+
+    while ((bundleID = OSDynamicCast(OSString, keyIterator->getNextObject()))) {
+
+        const char * bundle_id = bundleID->getCStringNoCopy();
+        
+       /* Skip extensions whose bundle IDs don't start with "com.apple.kec.".
+        */
+        if (!bundle_id ||
+            (strncmp(bundle_id, COM_APPLE_KEC, CONST_STRLEN(COM_APPLE_KEC)) != 0)) {
+
+            continue;
+        }
+
+        theKext = OSDynamicCast(OSKext, extensionsDict->getObject(bundleID));
+        if (!theKext) {
+            continue;
+        }
+
+        isKernelExternalComponent = OSDynamicCast(OSBoolean,
+            theKext->getPropertyForHostArch(kAppleKernelExternalComponentKey));
+        if (isKernelExternalComponent && isKernelExternalComponent->isTrue()) {
+            OSKextLog(/* kext */ NULL,
+                kOSKextLogStepLevel |
+                kOSKextLogLoadFlag,
+                "Loading kernel external component %s.", bundleID->getCStringNoCopy());
+            OSKext::loadKextWithIdentifier(bundleID->getCStringNoCopy(),
+                /* allowDefer */ false);
+        }
+    }
+
+finish:
+    OSSafeRelease(keyIterator);
+    OSSafeRelease(extensionsDict);
+
+    return;
+}
+
 /*********************************************************************
  *********************************************************************/
 void
index 99865aa3e54dbbc733da2e4bf392399e44d71bfc..a47e748138e8f003ec59f894cd7e69928dd09c23 100644 (file)
 #
 ident          LIBSA
 
-options                KDEBUG          # kernel tracing                # <kdebug>
+options                KDEBUG                  # kernel tracing        # <kdebug>
+options                IST_KDEBUG              # limited tracing       # <ist_kdebug>
+options                NO_KDEBUG       # no kernel tracing # <no_kdebug>
+
 options                GPROF           # kernel profiling              # <profile>
 
 options                CONFIG_NOLIBKLD # kernel linker                 # <no_kld>
 
 options                MALLOC_RESET_GC                                 # <smaller_kld>
 options                CONFIG_DTRACE                                   # <config_dtrace>
+options                VM_PRESSURE_EVENTS                              # <vm_pressure_events>
 
 options                CONFIG_NO_PANIC_STRINGS                         # <no_panic_str>
 options                CONFIG_NO_PRINTF_STRINGS                        # <no_printf_str>
 options                CONFIG_NO_KPRINTF_STRINGS                       # <no_kprintf_str>
 
 options                CONFIG_KXLD             # kxld/runtime linking of kexts # <config_kxld>
+
+options                DEVELOPMENT             # dev kernel                            # <development>
+
+# configurable kernel - general switch to say we are building for an
+# embedded device
+#
+options         CONFIG_EMBEDDED         # <config_embedded>
+
+# CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts
+#
+options                CONFIG_KEXT_BASEMENT            #               # <config_kext_basement>
+
index 448133126ac61f439139408f50e45254ef8f7830..8e7b8008c0d5ce0565f76e65f7de44d5b966e43a 100644 (file)
@@ -1,6 +1,6 @@
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp config_dtrace config_kxld ]
+#  RELEASE     = [ intel mach libkerncpp config_dtrace config_kxld vm_pressure_events ]
 #  PROFILE     = [ RELEASE profile ]
 #  DEBUG       = [ RELEASE debug ]
 #
index 89c745125b6ebc27fbfbf40a9e842cfb78ec0b80..c4edf238fe0ddb707601bd2752b6667370d91bec 100644 (file)
@@ -1,6 +1,6 @@
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp config_dtrace config_kxld ]
+#  RELEASE     = [ intel mach libkerncpp config_dtrace config_kxld vm_pressure_events config_kext_basement ]
 #  PROFILE     = [ RELEASE profile ]
 #  DEBUG       = [ RELEASE debug ]
 #
index b463b2528ae2593de277be03677041ab963551cb..45981d362b1513ed6c135b9dcd2923683abcf1d8 100644 (file)
@@ -42,9 +42,11 @@ $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 
 do_all: $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile
        $(_v)next_source=$(subst conf/,,$(SOURCE));                     \
+       next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH));         \
        ${MAKE} -C $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)        \
                MAKEFILES=$(TARGET)/$(LIBSA_KERNEL_CONFIG)/Makefile     \
                SOURCE=$${next_source}                  \
+               RELATIVE_SOURCE_PATH=$${next_relsource}                 \
                TARGET=$(TARGET)                                        \
                INCL_MAKEDEP=FALSE      \
                KERNEL_CONFIG=$(LIBSA_KERNEL_CONFIG)    \
index 4554d46ba680e7a64bed89c82da8c8cae2bbfd80..196b8f326c3fffc78867b89d0e42e7c81cc1b1ac 100644 (file)
@@ -10,12 +10,10 @@ include $(MakeInc_def)
 INSTINC_SUBDIRS = 
 INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS}
-INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS}
 
 EXPINC_SUBDIRS = 
 EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS}
-EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS}
 
 INSTALL_MI_LIST        = 
 
index 8881d50281554e4f1c4a81b84b8292a90c695433..3f9e34bdb408da186adbe7aea147caccc117f90a 100644 (file)
@@ -10,15 +10,20 @@ PUBLIC_HEADERS_FOLDER_PATH = /usr/include/mach
 PUBLIC_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/include/mach
 PUBLIC_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/include/mach
 PUBLIC_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/include/mach
+PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include
+PRIVATE_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/local/include
+PRIVATE_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/local/include
+PRIVATE_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/local/include
 EXECUTABLE_PREFIX = libsystem_
 PRODUCT_NAME = kernel
 ALWAYS_SEARCH_USER_PATHS = NO
+ORDER_FILE[sdk=iphoneos*] = $(SDKROOT)/$(APPLE_INTERNAL_DIR)/OrderFiles/libsystem_kernel.order
 OTHER_CFLAGS = -fdollars-in-identifiers -no-cpp-precomp -fno-common -fno-stack-protector -pipe -DLIBSYSCALL_INTERFACE -D__DARWIN_VERS_1050=1
 OTHER_CFLAGS[sdk=macosx*] = $(inherited) -DSYSCALL_PRE1050
 OTHER_CFLAGS[sdk=macosx*][arch=x86_64] = $(inherited) -DNO_SYSCALL_LEGACY
 OTHER_CFLAGS[sdk=iphoneos*] = $(inherited) -DNO_SYSCALL_LEGACY
 GCC_PREPROCESSOR_DEFINITIONS = CF_OPEN_SOURCE CF_EXCLUDE_CSTD_HEADERS DEBUG _FORTIFY_SOURCE=0
-HEADER_SEARCH_PATHS = /System/Library/Frameworks/System.framework/PrivateHeaders $(PROJECT_DIR)/mach $(PROJECT_DIR)/wrappers
+HEADER_SEARCH_PATHS = $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders $(PROJECT_DIR)/mach $(PROJECT_DIR)/wrappers
 WARNING_CFLAGS = -Wmost
 GCC_TREAT_WARNINGS_AS_ERRORS = YES
 GCC_WARN_ABOUT_MISSING_NEWLINE = YES
index 6310cd437b8a17e6671c32e91ee3e37d46288634..c391bcf32c66c5880c2effae5860974bd3a0030d 100644 (file)
@@ -33,6 +33,7 @@
 /* End PBXAggregateTarget section */
 
 /* Begin PBXBuildFile section */
+               030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 030B179A135377B400DAD1F0 /* open_dprotected_np.c */; };
                240BAC4C1214770F000A1719 /* memcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B028D511FF4FBB00CA64A9 /* memcpy.c */; };
                2419382B12135FF6003CDE41 /* chmod.c in Sources */ = {isa = PBXBuildFile; fileRef = 2419382A12135FF6003CDE41 /* chmod.c */; };
                242AB66611EBDC1200107336 /* errno.c in Sources */ = {isa = PBXBuildFile; fileRef = 242AB66511EBDC1200107336 /* errno.c */; };
@@ -86,6 +87,8 @@
                24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B8C2611237F53900D36CC3 /* remove-counter.c */; };
                24D1158311E671B20063D54D /* SYS.h in Headers */ = {isa = PBXBuildFile; fileRef = 24D1157411E671B20063D54D /* SYS.h */; };
                24E4782712088267009A384D /* _libc_funcptr.c in Sources */ = {isa = PBXBuildFile; fileRef = 24E47824120881DF009A384D /* _libc_funcptr.c */; };
+               291D3C281354FDD100D46061 /* mach_port.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C261354FDD100D46061 /* mach_port.c */; };
+               291D3C291354FDD100D46061 /* mach_vm.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C271354FDD100D46061 /* mach_vm.c */; };
                C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */; };
                C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */; };
                C9D9BD17114B00600000D8B9 /* vm_map_compat.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */; };
                C9D9BD2F114B00600000D8B9 /* vm_task.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE3114B00600000D8B9 /* vm_task.h */; settings = {ATTRIBUTES = (Public, ); }; };
                C9D9BD30114B00600000D8B9 /* host_priv.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE4114B00600000D8B9 /* host_priv.defs */; };
                C9D9BD31114B00600000D8B9 /* host_security.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE5114B00600000D8B9 /* host_security.defs */; };
-               C9D9BD34114B00600000D8B9 /* ledger.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE9114B00600000D8B9 /* ledger.defs */; };
                C9D9BD35114B00600000D8B9 /* lock_set.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCEA114B00600000D8B9 /* lock_set.defs */; };
                C9D9BD36114B00600000D8B9 /* mach_error_string.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCEB114B00600000D8B9 /* mach_error_string.c */; };
                C9D9BD37114B00600000D8B9 /* mach_error.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCEC114B00600000D8B9 /* mach_error.c */; };
                C9D9BD57114B00600000D8B9 /* task.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD0F114B00600000D8B9 /* task.defs */; };
                C9D9BD58114B00600000D8B9 /* thread_act.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD10114B00600000D8B9 /* thread_act.defs */; };
                C9D9BD59114B00600000D8B9 /* vm_map.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD11114B00600000D8B9 /* vm_map.defs */; };
+               EE3F605A149A6D66003BAEBA /* getaudit.c in Sources */ = {isa = PBXBuildFile; fileRef = EE3F6059149A6D66003BAEBA /* getaudit.c */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXContainerItemProxy section */
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
+               030B179A135377B400DAD1F0 /* open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = open_dprotected_np.c; sourceTree = "<group>"; };
                240D716711933ED300556E97 /* mach_install_mig.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = mach_install_mig.sh; sourceTree = "<group>"; };
                2419382A12135FF6003CDE41 /* chmod.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = chmod.c; sourceTree = "<group>"; };
                2427FA821200BCF800EF7A1F /* compat-symlinks.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "compat-symlinks.sh"; sourceTree = "<group>"; };
                24D1159811E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
                24D1159911E6723E0063D54D /* create-syscalls.pl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.perl; path = "create-syscalls.pl"; sourceTree = "<group>"; };
                24E47824120881DF009A384D /* _libc_funcptr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = _libc_funcptr.c; sourceTree = "<group>"; };
+               291D3C261354FDD100D46061 /* mach_port.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = mach_port.c; path = mach/mach_port.c; sourceTree = "<group>"; };
+               291D3C271354FDD100D46061 /* mach_vm.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = mach_vm.c; path = mach/mach_vm.c; sourceTree = "<group>"; };
                C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm; path = __get_cpu_capabilities.s; sourceTree = "<group>"; };
                C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = init_cpu_capabilities.c; sourceTree = "<group>"; };
                C9D9BCBF114B00600000D8B9 /* .open_source_exclude */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .open_source_exclude; sourceTree = "<group>"; };
                C9D9BCE3114B00600000D8B9 /* vm_task.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vm_task.h; sourceTree = "<group>"; };
                C9D9BCE4114B00600000D8B9 /* host_priv.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = host_priv.defs; sourceTree = "<group>"; };
                C9D9BCE5114B00600000D8B9 /* host_security.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = host_security.defs; sourceTree = "<group>"; };
-               C9D9BCE9114B00600000D8B9 /* ledger.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = ledger.defs; sourceTree = "<group>"; };
                C9D9BCEA114B00600000D8B9 /* lock_set.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = lock_set.defs; sourceTree = "<group>"; };
                C9D9BCEB114B00600000D8B9 /* mach_error_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_error_string.c; sourceTree = "<group>"; };
                C9D9BCEC114B00600000D8B9 /* mach_error.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_error.c; sourceTree = "<group>"; };
                C9D9BD11114B00600000D8B9 /* vm_map.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = vm_map.defs; sourceTree = "<group>"; };
                C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Libsyscall.xcconfig; sourceTree = "<group>"; };
                D2AAC0630554660B00DB518D /* libsystem_kernel.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsystem_kernel.a; sourceTree = BUILT_PRODUCTS_DIR; };
+               EE3F6059149A6D66003BAEBA /* getaudit.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getaudit.c; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
                08FB7794FE84155DC02AAC07 /* mach */ = {
                        isa = PBXGroup;
                        children = (
+                               291D3C261354FDD100D46061 /* mach_port.c */,
+                               291D3C271354FDD100D46061 /* mach_vm.c */,
                                C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */,
                                24D1158911E672270063D54D /* Platforms */,
                                24D1156511E671B20063D54D /* custom */,
                                C9D9BCD8114B00600000D8B9 /* mach */,
                                C9D9BCE4114B00600000D8B9 /* host_priv.defs */,
                                C9D9BCE5114B00600000D8B9 /* host_security.defs */,
-                               C9D9BCE9114B00600000D8B9 /* ledger.defs */,
                                C9D9BCEA114B00600000D8B9 /* lock_set.defs */,
                                C9D9BCEB114B00600000D8B9 /* mach_error_string.c */,
                                C9D9BCEC114B00600000D8B9 /* mach_error.c */,
                                C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */,
                                247A08B211F8B05900E4693F /* _libkernel_init.h */,
                                247A08B311F8B05900E4693F /* _libkernel_init.c */,
+                               030B179A135377B400DAD1F0 /* open_dprotected_np.c */,
                                24E47824120881DF009A384D /* _libc_funcptr.c */,
                                24A7C5CB11FF973C007669EB /* _errno.h */,
                                C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */,
                                24A7C5AF11FF8DA6007669EB /* bind.c */,
                                248BA01C121C56BF008C073F /* connect.c */,
                                24A7C5B111FF8DA6007669EB /* getattrlist.c */,
+                               EE3F6059149A6D66003BAEBA /* getaudit.c */,
                                24A7C5B211FF8DA6007669EB /* getpeername.c */,
                                24A7C5B311FF8DA6007669EB /* getsockname.c */,
                                24A7C5B411FF8DA6007669EB /* lchown.c */,
                        buildConfigurationList = 1DEB914A08733D8E0010E9CD /* Build configuration list for PBXNativeTarget "Libmach" */;
                        buildPhases = (
                                D2AAC0600554660B00DB518D /* Headers */,
+                               2487545E11629934000975E0 /* Install Headers */,
                                D2AAC0610554660B00DB518D /* Sources */,
                                D289988505E68E00004EDB86 /* Frameworks */,
-                               2487545E11629934000975E0 /* Install Headers */,
                        );
                        buildRules = (
                        );
                                C9D9BD22114B00600000D8B9 /* exc.defs in Sources */,
                                C9D9BD30114B00600000D8B9 /* host_priv.defs in Sources */,
                                C9D9BD31114B00600000D8B9 /* host_security.defs in Sources */,
-                               C9D9BD34114B00600000D8B9 /* ledger.defs in Sources */,
                                C9D9BD35114B00600000D8B9 /* lock_set.defs in Sources */,
                                C9D9BD38114B00600000D8B9 /* mach_host.defs in Sources */,
                                C9D9BD3D114B00600000D8B9 /* mach_port.defs in Sources */,
                                24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */,
                                C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */,
                                C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */,
+                               030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */,
+                               291D3C281354FDD100D46061 /* mach_port.c in Sources */,
+                               291D3C291354FDD100D46061 /* mach_vm.c in Sources */,
+                               EE3F605A149A6D66003BAEBA /* getaudit.c in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index bdfa11aac833c9a4539b830f9c764f6338916ed9..656bd8fdb8d255f4e95f9b79905afff449d0da0d 100644 (file)
@@ -60,6 +60,7 @@ _recvmsg$NOCANCEL$UNIX2003    ___recvmsg_nocancel
 _recvmsg$UNIX2003      ___recvmsg
 _select$DARWIN_EXTSN   ___select
 _select$DARWIN_EXTSN$NOCANCEL  ___select_nocancel
+_sem_open      ___sem_open
 _sem_wait      ___sem_wait_nocancel
 _sem_wait$NOCANCEL$UNIX2003    ___sem_wait_nocancel
 _sem_wait$UNIX2003     ___sem_wait
index b8cb6b1e14b4e9cbb548e2ee19c90aab0fc0d89e..d717a159c38c06e034c0c25d42efd80841f0b271 100644 (file)
@@ -45,6 +45,7 @@ _open ___open
 _recvfrom      ___recvfrom
 _recvmsg       ___recvmsg
 _semctl        ___semctl
+_sem_open ___sem_open
 _sendmsg       ___sendmsg
 _sendto        ___sendto
 _setattrlist   ___setattrlist
index a16f358d8ce0c9fc45d66ee5d6520991238fbd53..675fede15be647582815693e3d72f9ecb32091e6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -48,6 +48,8 @@
 
 #include <sys/syscall.h>
 
+/* Binary compatibility stubs for syscalls that no longer exist */
+
 #ifndef SYS_setquota
 #define SYS_setquota   148
 #endif
@@ -86,14 +88,14 @@ LEAF(_##name, 0)                                    ;\
 2:
 
 #if defined(__SYSCALL_32BIT_ARG_BYTES) && ((__SYSCALL_32BIT_ARG_BYTES >= 4) && (__SYSCALL_32BIT_ARG_BYTES <= 20))
-#define UNIX_SYSCALL_NONAME(name, nargs)                       \
+#define UNIX_SYSCALL_NONAME(name, nargs, cerror)                       \
        movl    $(SYS_##name | (__SYSCALL_32BIT_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax               ;\
        UNIX_SYSCALL_SYSENTER                                   ;\
        jnb     2f                                              ;\
        BRANCH_EXTERN(cerror)                                   ;\
 2:
 #else /* __SYSCALL_32BIT_ARG_BYTES < 4 || > 20 */
-#define UNIX_SYSCALL_NONAME(name, nargs)               \
+#define UNIX_SYSCALL_NONAME(name, nargs, cerror)               \
        .globl  cerror                                  ;\
        movl    $ SYS_##name, %eax                      ;\
        UNIX_SYSCALL_SYSENTER                           ;\
@@ -107,19 +109,23 @@ LEAF(_##name, 0)                                  ;\
        movl    $ SYS_##name, %eax                      ;\
        UNIX_SYSCALL_TRAP                               ;\
        jnb     2f                                      ;\
-       BRANCH_EXTERN(cerror                          ;\
+       BRANCH_EXTERN(cerror_nocancel)                          ;\
 2:
 
-#define PSEUDO(pseudo, name, nargs)                    \
+#define PSEUDO(pseudo, name, nargs, cerror)                    \
 LEAF(pseudo, 0)                                        ;\
-       UNIX_SYSCALL_NONAME(name, nargs)
+       UNIX_SYSCALL_NONAME(name, nargs, cerror)
 
 #define PSEUDO_INT(pseudo, name, nargs)                        \
 LEAF(pseudo, 0)                                        ;\
        UNIX_SYSCALL_INT_NONAME(name, nargs)
 
+#define __SYSCALL2(pseudo, name, nargs, cerror)                        \
+       PSEUDO(pseudo, name, nargs, cerror)                     ;\
+       ret
+
 #define __SYSCALL(pseudo, name, nargs)                 \
-       PSEUDO(pseudo, name, nargs)                     ;\
+       PSEUDO(pseudo, name, nargs, cerror)                     ;\
        ret
 
 #define __SYSCALL_INT(pseudo, name, nargs)             \
@@ -144,7 +150,7 @@ LEAF(_##name, 0)                                    ;\
        BRANCH_EXTERN(cerror)                           ;\
 2:
 
-#define UNIX_SYSCALL_NONAME(name, nargs)               \
+#define UNIX_SYSCALL_NONAME(name, nargs, cerror)               \
        .globl  cerror                                  ;\
        movl    $ SYSCALL_CONSTRUCT_UNIX(SYS_##name), %eax      ;\
        UNIX_SYSCALL_SYSCALL                            ;\
@@ -152,14 +158,19 @@ LEAF(_##name, 0)                                  ;\
        BRANCH_EXTERN(cerror)                           ;\
 2:
 
-#define PSEUDO(pseudo, name, nargs)                    \
+#define PSEUDO(pseudo, name, nargs, cerror)                    \
 LEAF(pseudo, 0)                                        ;\
-       UNIX_SYSCALL_NONAME(name, nargs)
+       UNIX_SYSCALL_NONAME(name, nargs, cerror)
+
+#define __SYSCALL2(pseudo, name, nargs, cerror) \
+       PSEUDO(pseudo, name, nargs, cerror)                     ;\
+       ret
 
 #define __SYSCALL(pseudo, name, nargs)                 \
-       PSEUDO(pseudo, name, nargs)                     ;\
+       PSEUDO(pseudo, name, nargs, cerror)                     ;\
        ret
 
 #else
 #error Unsupported architecture
 #endif
+
index 48c85313c24c0a06eb271c016518c227ca5b7772..2768d9b826297c3a4f6f2464a5db451600a0b43b 100644 (file)
@@ -32,9 +32,9 @@
 
        .data
        .private_extern __current_pid
-__current_pid:
+L__current_pid_addr:
+ __current_pid:
        .long 0
-L__current_pid_addr = __current_pid
 
 #if defined(__DYNAMIC__)
 #define GET_CURRENT_PID                                \
@@ -61,7 +61,7 @@ LEAF(___getpid, 0)
        jle             1f
        ret
 1:
-       UNIX_SYSCALL_NONAME(getpid, 0)
+       UNIX_SYSCALL_NONAME(getpid, 0, cerror_nocancel)
        movl            %eax, %edx
        xorl            %eax, %eax
        GET_CURRENT_PID
@@ -88,7 +88,7 @@ LEAF(___getpid, 0)
        jle             1f
        ret
 1:
-       UNIX_SYSCALL_NONAME(getpid, 0)
+       UNIX_SYSCALL_NONAME(getpid, 0, cerror_nocancel)
        movl            %eax, %edx
        xorl            %eax, %eax
        leaq            __current_pid(%rip), %rcx
index 1dbf19c7728adee187687d407f6d22b97c5435c1..0076f49ce7ffc641206727adc8c324aa09bab66e 100644 (file)
@@ -48,7 +48,7 @@ LABEL(___gettimeofday)
  *     This syscall is special cased: the timeval is returned in rax:rdx.
  */
 LABEL(___gettimeofday)
-    UNIX_SYSCALL_NONAME(gettimeofday,0)
+    UNIX_SYSCALL_NONAME(gettimeofday,0,cerror_nocancel)
     movq       %rax, (%rdi)
     movl       %edx, 8(%rdi)
     xorl       %eax, %eax
index 0131d476d73ac39955e62d09866a611e9675b316..d375dddbd7a00754a9d014762bb7df9876e6bf94 100644 (file)
@@ -40,7 +40,7 @@ PSEUDO_INT(___pipe, pipe, 0)
 
 #elif defined(__x86_64__)
 
-PSEUDO(___pipe, pipe, 0)
+PSEUDO(___pipe, pipe, 0, cerror_nocancel)
        movl    %eax, (%rdi)
        movl    %edx, 4(%rdi)
        xorl    %eax, %eax
diff --git a/libsyscall/custom/__psynch_cvbroad.s b/libsyscall/custom/__psynch_cvbroad.s
deleted file mode 100644 (file)
index 037fcfc..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1992 NeXT Computer, Inc.  All rights reserved. */
-
-#include "SYS.h"
-
-#define __SYSCALL_32BIT_ARG_BYTES 36
-
-#if defined(__i386__) || defined(__x86_64__)
-
-__SYSCALL(___psynch_cvbroad, psynch_cvbroad, 8)
-
-#else
-#error Unsupported architecture
-#endif
diff --git a/libsyscall/custom/__psynch_cvwait.s b/libsyscall/custom/__psynch_cvwait.s
deleted file mode 100644 (file)
index c5d69ce..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1992 NeXT Computer, Inc.  All rights reserved. */
-
-#include "SYS.h"
-
-#define __SYSCALL_32BIT_ARG_BYTES 40
-
-#if defined(__i386__) || defined(__x86_64__)
-
-__SYSCALL(___psynch_cvwait, psynch_cvwait, 8)
-
-#else
-#error Unsupported architecture
-#endif
index 9eae221f2c235ee09b03cf3e96d2670187ce27cf..bdcbec9fb7ebc60aa060722db2109bb8d907453a 100644 (file)
@@ -36,7 +36,7 @@
 LEAF(___ptrace, 0)
        xorl    %eax,%eax
        REG_TO_EXTERN(%eax,_errno)
-UNIX_SYSCALL_NONAME(ptrace, 4)
+       UNIX_SYSCALL_NONAME(ptrace, 4, cerror)
        ret
 
 #elif defined(__x86_64__)
@@ -47,7 +47,7 @@ LEAF(___ptrace, 0)
        xorq    %rax,%rax
        PICIFY(_errno)
        movl    %eax,(%r11)
-UNIX_SYSCALL_NONAME(ptrace, 4)
+       UNIX_SYSCALL_NONAME(ptrace, 4, cerror)
        ret
 
 #else
index b9d46ba13ef13d60a6ea0726c08252e2694ec8e4..a6a4f8bb8cd36cadbabf4b434ddc377085b7b5fb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
        .globl  _errno
 
 LABEL(cerror)
-       REG_TO_EXTERN(%eax, _errno)
+       movl    $0,%ecx
+       jmp             1f
+LABEL(cerror_nocancel)
+       movl    $1,%ecx
+1:     REG_TO_EXTERN(%eax, _errno)
        mov             %esp,%edx
        andl    $0xfffffff0,%esp
        subl    $16,%esp
-       movl    %edx,4(%esp)
+       movl    %edx,8(%esp)
+       movl    %ecx,4(%esp)
        movl    %eax,(%esp)
        CALL_EXTERN(_cthread_set_errno_self)
-       movl    4(%esp),%esp
+       movl    8(%esp),%esp
        movl    $-1,%eax
        movl    $-1,%edx /* in case a 64-bit value is returned */
        ret
@@ -57,7 +62,13 @@ LABEL(__sysenter_trap)
        .globl  _errno
 
 LABEL(cerror)
-       PICIFY(_errno) /* address -> %r11 */
+       /* cancelable syscall, for arg1 to _cthread_set_errno_self */
+       movq    $0,%rsi
+       jmp             1f
+LABEL(cerror_nocancel)
+       /* non-cancelable, see above. */
+       movq    $1,%rsi
+1:     PICIFY(_errno) /* address -> %r11 */
        movl    %eax,(%r11)
        mov     %rsp,%rdx
        andq    $-16,%rsp
index af939cb3b839ae34650f3c1b465d43e3e0a9115c..b0f7a01c95a21ed9dec9aa9ffa2886f2e7b29ca7 100644 (file)
@@ -42,7 +42,6 @@
 #include <mach/clock_priv.h>
 #include <mach/host_priv.h>
 #include <mach/host_security.h>
-#include <mach/ledger.h>
 #include <mach/lock_set.h>
 #include <mach/processor.h>
 #include <mach/processor_set.h>
index d8b094119b570cf2ddf4b3123baee623a19d14b9..87f6cb573949a440a11f5942b22afa9239df30ba 100644 (file)
@@ -318,6 +318,7 @@ mach_msg_destroy(mach_msg_header_t *msg)
                    /*
                     * Just skip it.
                     */
+                   dsc = &daddr->out_of_line;
                    daddr = (mach_msg_descriptor_t *)(dsc + 1);
                    break;
            }
diff --git a/libsyscall/mach/mach_port.c b/libsyscall/mach/mach_port.c
new file mode 100644 (file)
index 0000000..954d45e
--- /dev/null
@@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_port_internal.h>
+#include <mach/mach.h>
+#include <mach/mach_vm.h>
+#include <mach/mach_traps.h>
+
+kern_return_t
+mach_port_names(
+       ipc_space_t task,
+       mach_port_name_array_t *names,
+       mach_msg_type_number_t *namesCnt,
+       mach_port_type_array_t *types,
+       mach_msg_type_number_t *typesCnt)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_names(task, names, namesCnt, types,
+                       typesCnt);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_type(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_type_t *ptype)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_type(task, name, ptype);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_rename(
+       ipc_space_t task,
+       mach_port_name_t old_name,
+       mach_port_name_t new_name)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_rename(task, old_name, new_name);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_allocate_name(
+       ipc_space_t task,
+       mach_port_right_t right,
+       mach_port_name_t name)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_allocate_name(task, right, name);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_allocate(
+       ipc_space_t task,
+       mach_port_right_t right,
+       mach_port_name_t *name)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_allocate_trap(task, right, name);
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_allocate(task, right, name);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_destroy(
+       ipc_space_t task,
+       mach_port_name_t name)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_destroy_trap(task, name);
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_destroy(task, name);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_deallocate(
+       ipc_space_t task,
+       mach_port_name_t name)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_deallocate_trap(task, name); 
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_deallocate(task,name);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_get_refs(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_right_t right,
+       mach_port_urefs_t *refs)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_get_refs(task, name, right, refs);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_mod_refs(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_right_t right,
+       mach_port_delta_t delta)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_mod_refs_trap(task, name, right, delta); 
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_mod_refs(task, name, right, delta);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_set_mscount(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_mscount_t mscount)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_set_mscount(task, name, mscount);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_get_set_status(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_name_array_t *members,
+       mach_msg_type_number_t *membersCnt)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_get_set_status(task, name, members,
+                       membersCnt);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_move_member(
+       ipc_space_t task,
+       mach_port_name_t member,
+       mach_port_name_t after)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_move_member_trap(task, member, after);
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_move_member(task, member, after);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_request_notification(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_msg_id_t msgid,
+       mach_port_mscount_t sync,
+       mach_port_t notify,
+       mach_msg_type_name_t notifyPoly,
+       mach_port_t *previous)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_request_notification(task, name, msgid,
+               sync, notify, notifyPoly, previous);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_insert_right(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_t poly,
+       mach_msg_type_name_t polyPoly)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_insert_right_trap(task, name, poly, polyPoly); 
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_insert_right(task, name, poly,
+                   polyPoly);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_extract_right(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_msg_type_name_t msgt_name,
+       mach_port_t *poly,
+       mach_msg_type_name_t *polyPoly)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_extract_right(task, name, msgt_name,
+               poly, polyPoly);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_set_seqno(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_seqno_t seqno)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_set_seqno(task, name, seqno);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_get_attributes(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_flavor_t flavor,
+       mach_port_info_t port_info_out,
+       mach_msg_type_number_t *port_info_outCnt)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_get_attributes(task, name, flavor,
+                       port_info_out, port_info_outCnt);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_set_attributes(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_flavor_t flavor,
+       mach_port_info_t port_info,
+       mach_msg_type_number_t port_infoCnt)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_set_attributes(task, name, flavor,
+                       port_info, port_infoCnt);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_allocate_qos(
+       ipc_space_t task,
+       mach_port_right_t right,
+       mach_port_qos_t *qos,
+       mach_port_name_t *name)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_allocate_qos(task, right, qos, name);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_allocate_full(
+       ipc_space_t task,
+       mach_port_right_t right,
+       mach_port_t proto,
+       mach_port_qos_t *qos,
+       mach_port_name_t *name)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_allocate_full(task, right, proto, qos, name);
+
+       return (rv);
+}
+
+kern_return_t
+task_set_port_space(
+       ipc_space_t task,
+       int table_entries)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_task_set_port_space(task, table_entries);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_get_srights(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_rights_t *srights)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_get_srights(task, name, srights);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_space_info(
+       ipc_space_t task,
+       ipc_info_space_t *space_info,
+       ipc_info_name_array_t *table_info,
+       mach_msg_type_number_t *table_infoCnt,
+       ipc_info_tree_name_array_t *tree_info,
+       mach_msg_type_number_t *tree_infoCnt)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_space_info(task, space_info, table_info,
+                       table_infoCnt, tree_info, tree_infoCnt);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_dnrequest_info(
+       ipc_space_t task,
+       mach_port_name_t name,
+       unsigned *dnr_total,
+       unsigned *dnr_used)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_dnrequest_info(task, name, dnr_total,
+                       dnr_used);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_kernel_object(
+       ipc_space_t task,
+       mach_port_name_t name,
+       unsigned *object_type,
+       unsigned *object_addr)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_kernel_object(task, name,
+                       object_type, object_addr);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_insert_member(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_name_t pset)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_insert_member_trap(task, name, pset);
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_insert_member(task, name, pset);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_extract_member(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_name_t pset)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_extract_member_trap(task, name, pset); 
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_port_extract_member(task, name, pset);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_get_context(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_context_t *context)
+{
+       kern_return_t rv;
+       mach_vm_address_t wide_context;
+
+       rv = _kernelrpc_mach_port_get_context(task, name, &wide_context);
+
+       if (rv == KERN_SUCCESS) {
+               *context = (mach_port_context_t)wide_context;
+       }
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_set_context(
+       ipc_space_t task,
+       mach_port_name_t name,
+       mach_port_context_t context)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_set_context(task, name, context);
+
+       return (rv);
+}
+
+kern_return_t
+mach_port_kobject(
+       ipc_space_t task,
+       mach_port_name_t name,
+       natural_t *object_type,
+       mach_vm_address_t *object_addr)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_port_kobject(task, name, object_type, object_addr);
+
+       return (rv);
+}
diff --git a/libsyscall/mach/mach_vm.c b/libsyscall/mach/mach_vm.c
new file mode 100644 (file)
index 0000000..2db3830
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Make sure we don't accidentally include the external definitions of
+ * the routines we're interposing on below.
+ */
+#define _vm_map_user_
+#define _mach_vm_user_
+#include <mach/mach.h>
+#include <mach/mach_traps.h>
+#undef _vm_map_user_
+#include <mach/vm_map_internal.h>
+#undef _mach_vm_user_
+#include <mach/mach_vm_internal.h>
+
+kern_return_t
+mach_vm_allocate(
+               mach_port_name_t target,
+               mach_vm_address_t *address,
+               mach_vm_size_t size,
+               int flags)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_vm_allocate_trap(target, address, size, flags);
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_vm_allocate(target, address, size, flags);
+
+       return (rv);
+}
+
+kern_return_t
+mach_vm_deallocate(
+       mach_port_name_t target,
+       mach_vm_address_t address,
+       mach_vm_size_t size)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_vm_deallocate_trap(target, address, size);
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_vm_deallocate(target, address, size);
+
+       return (rv);
+}
+
+kern_return_t
+mach_vm_protect(
+       mach_port_name_t task,
+       mach_vm_address_t address,
+       mach_vm_size_t size,
+       boolean_t set_maximum,
+       vm_prot_t new_protection)
+{
+       kern_return_t rv;
+
+       rv = _kernelrpc_mach_vm_protect_trap(task, address, size, set_maximum,
+               new_protection);
+
+       if (rv == MACH_SEND_INVALID_DEST)
+               rv = _kernelrpc_mach_vm_protect(task, address, size,
+                       set_maximum, new_protection);
+
+       return (rv);
+}
+
+kern_return_t
+vm_allocate(
+       mach_port_name_t task,
+       vm_address_t *address,
+       vm_size_t size,
+       int flags)
+{
+       kern_return_t rv;
+       mach_vm_address_t mach_addr;
+
+       mach_addr = (mach_vm_address_t)*address;
+       rv = mach_vm_allocate(task, &mach_addr, size, flags);
+#if defined(__LP64__)
+       *address = mach_addr;
+#else
+       *address = (vm_address_t)(mach_addr & ((vm_address_t)-1));
+#endif
+
+       return (rv);
+}
+
+kern_return_t
+vm_deallocate(
+       mach_port_name_t task,
+       vm_address_t address,
+       vm_size_t size)
+{
+       kern_return_t rv;
+
+       rv = mach_vm_deallocate(task, address, size);
+
+       return (rv);
+}
+
+kern_return_t
+vm_protect(
+       mach_port_name_t task,
+       vm_address_t address,
+       vm_size_t size,
+       boolean_t set_maximum,
+       vm_prot_t new_protection)
+{
+       kern_return_t rv;
+
+       rv = mach_vm_protect(task, address, size, set_maximum, new_protection);
+
+       return (rv);
+}
index 9b20980cf4463cbe2bedceaad436a71a3449e4fd..39a02753a87157e4bfca969d16ae6c8c25ee5eaa 100644 (file)
@@ -56,6 +56,5 @@ int _mach_vsnprintf(char *buffer, int length, const char *fmt, va_list ap);
 // Actually in memcpy.c but MIG likes to include string.h
 
 void *memcpy(void *dst0, const void *src0, size_t length);
-int memcmp(const void *s1, const void *s2, size_t n);
 
 #endif /* _STRING_H_ */
index c9aefb3c6db9b73158f226f807cf7984fa05aed8..15a60542c6d95177c5ec73eb6aaef422c6f54918 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#if !LIBSYSCALL_INTERFACE && (defined(__LP64__) || defined(__arm__))
+#if !LIBSYSCALL_INTERFACE && defined(__LP64__)
 /*
  * In an LP64 environment, the traditional Mach VM interface names are
  * really just a second instance of the "wide" Mach VM interfaces.
  *
- * For ARM, which doesn't support two address space sizes, use the "wide"
- * interfaces as well, to reduce the amount of duplicate code compiled
- * into the kernel.
- *
  * The _MACH_VM_PUBLISH_AS_LOCAL_ flag triggers mach_vm.defs to export
  * the local names instead.
  *
index 12e9c76523b6e4c942ff72db0e09d2659e078ad4..f03e44420ea375f9149f5fb022a09c7151271304 100644 (file)
@@ -46,4 +46,6 @@ __get_cpu_capabilities:
        movl    _COMM_PAGE_CPU_CAPABILITIES, %eax
        ret
 
+#else
+#error Unsupported architecture
 #endif
index 2f48a42c1a31cea4a64801bad7491bd947a94e5a..589af9a69081218dac0ee2d6f05fcdd592d23e4e 100644 (file)
@@ -45,15 +45,15 @@ fcntl(int fd, int cmd, ...)
         case F_PREALLOCATE:
         case F_SETSIZE:
         case F_RDADVISE:
-        case F_READBOOTSTRAP:
-        case F_WRITEBOOTSTRAP:
         case F_LOG2PHYS:
+        case F_LOG2PHYS_EXT:
         case F_GETPATH:
         case F_GETPATH_MTMINFO:
         case F_PATHPKG_CHECK:
         case F_OPENFROM:
         case F_UNLINKFROM:
         case F_ADDSIGS:
+        case F_ADDFILESIGS:
                arg = va_arg(ap, void *);
                break;
         default:
diff --git a/libsyscall/wrappers/legacy/getaudit.c b/libsyscall/wrappers/legacy/getaudit.c
new file mode 100644 (file)
index 0000000..0870532
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <bsm/audit.h>
+
+/*
+ * Wrappers for the legacy getaudit() and setaudit() syscalls.
+ */
+
+int
+getaudit(struct auditinfo *ainfo)
+{
+       int err;
+       auditinfo_addr_t aia;
+
+       if ((err = getaudit_addr(&aia, sizeof(aia))) != 0)
+               return (err);
+
+       ainfo->ai_auid = aia.ai_auid;
+       ainfo->ai_mask = aia.ai_mask;
+       ainfo->ai_termid.port = aia.ai_termid.at_port;
+       ainfo->ai_termid.machine = aia.ai_termid.at_addr[0];
+       ainfo->ai_asid = aia.ai_asid;
+
+       return (0);
+}
+
+int
+setaudit(const struct auditinfo *ainfo)
+{
+       int err;
+       struct auditinfo *ai = (struct auditinfo *)ainfo;
+       auditinfo_addr_t aia;
+
+       /* Get the current ai_flags so they are preserved. */
+       if ((err = getaudit_addr(&aia, sizeof(aia))) != 0)
+               return (err);
+
+       aia.ai_auid = ai->ai_auid;
+       aia.ai_mask = ai->ai_mask;
+       aia.ai_termid.at_port = ai->ai_termid.port;
+       aia.ai_termid.at_type = AU_IPv4;
+       aia.ai_termid.at_addr[0] = ai->ai_termid.machine;
+       aia.ai_asid = ai->ai_asid;
+
+       if ((err = setaudit_addr(&aia, sizeof(aia))) != 0)
+               return (err);
+
+       /* The session ID may have been assigned by kernel so copy that back. */
+       ai->ai_asid = aia.ai_asid;
+
+       return (0);
+}
index c9af35506ae73f1fb4cb239700c5d8e4b48190b7..5bce5933a8a91880b7dadc42130804d010dc14d6 100644 (file)
@@ -124,20 +124,3 @@ bcopy(const void *s1, void *s2, size_t n)
 {
        memcpy(s2, s1, n);
 }
-
-/*
- * Compare memory regions.
- */
-__private_extern__ int
-memcmp(const void *s1, const void *s2, size_t n)
-{
-       if (n != 0) {
-               const unsigned char *p1 = s1, *p2 = s2;
-
-               do {
-                       if (*p1++ != *p2++)
-                               return (*--p1 - *--p2);
-               } while (--n != 0);
-       }
-       return (0);
-}
diff --git a/libsyscall/wrappers/open_dprotected_np.c b/libsyscall/wrappers/open_dprotected_np.c
new file mode 100644 (file)
index 0000000..afd213d
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <sys/fcntl.h>
+
+int __open_dprotected_np(const char* path, int flags, int class, int dpflags, int mode);
+
+int open_dprotected_np(const char *path, int flags, int class, int dpflags, ...) {
+       int mode = 0;
+
+       if (flags & O_CREAT)  {
+               va_list ap;
+               va_start(ap, dpflags);
+               mode = va_arg(ap, int);
+               va_end(ap);
+       }
+       return (__open_dprotected_np(path, flags, class, dpflags, mode));
+}
+
index 68366de863ae019d369ed9155978e72cc041f16b..85c282f0ba3c81a01154cdd5716fd664ff7c0abf 100755 (executable)
@@ -126,6 +126,25 @@ my %Symbols = (
     },
 );
 
+# An explicit list of cancelable syscalls. For creating stubs that call the
+# cancellable version of cerror.
+my @Cancelable = qw/
+       accept access aio_suspend
+       close connect
+       fcntl fdatasync fpathconf fstat fsync
+       getlogin
+       ioctl
+       link lseek lstat
+       msgrcv msgsnd msync
+       open
+       pathconf poll posix_spawn pread pwrite
+       read readv recvfrom recvmsg rename
+       __semwait_signal __sigwait
+       select sem_wait semop sendmsg sendto sigsuspend stat symlink sync
+       unlink
+       wait4 waitid write writev
+/;
+
 sub usage {
     die "Usage: $MyName syscalls.master custom-directory platforms-directory out-directory\n";
 }
@@ -216,7 +235,7 @@ sub checkForCustomStubs {
         $$sym{is_custom} = $source;
         if (!$$sym{is_private}) {
             foreach my $subarch (@Architectures) {
-                (my $arch = $subarch) =~ s/arm(.*)/arm/;
+                (my $arch = $subarch) =~ s/arm(v.*)/arm/;
                 $$sym{aliases}{$arch} = [] unless $$sym{aliases}{$arch};
                 push(@{$$sym{aliases}{$arch}}, $$sym{asm_sym});
             }
@@ -237,7 +256,7 @@ sub readAliases {
     
     my @a = ();
     for my $arch (@Architectures) {
-        (my $new_arch = $arch) =~ s/arm(.*)/arm/g;
+        (my $new_arch = $arch) =~ s/arm(v.*)/arm/g;
         push(@a, $new_arch) unless grep { $_ eq $new_arch } @a;
     }
     
@@ -294,18 +313,22 @@ sub writeStubForSymbol {
     
     my @conditions;
     for my $subarch (@Architectures) {
-        (my $arch = $subarch) =~ s/arm(.*)/arm/;
+        (my $arch = $subarch) =~ s/arm(v.*)/arm/;
         push(@conditions, "defined(__${arch}__)") unless grep { $_ eq $arch } @{$$symbol{except}};
     }
+
+       my %is_cancel;
+       for (@Cancelable) { $is_cancel{$_} = 1 };
     
     print $f "#define __SYSCALL_32BIT_ARG_BYTES $$symbol{bytes}\n";
     print $f "#include \"SYS.h\"\n\n";
     if (scalar(@conditions)) {
+        my $nc = ($is_cancel{$$symbol{syscall}} ? "cerror" : "cerror_nocancel");
         printf $f "#if " . join(" || ", @conditions) . "\n";
-        printf $f "__SYSCALL(%s, %s, %d)\n", $$symbol{asm_sym}, $$symbol{syscall}, $$symbol{nargs};
+        printf $f "__SYSCALL2(%s, %s, %d, %s)\n", $$symbol{asm_sym}, $$symbol{syscall}, $$symbol{nargs}, $nc;
         if (!$$symbol{is_private} && (scalar(@conditions) < scalar(@Architectures))) {
             printf $f "#else\n";
-            printf $f "__SYSCALL(%s, %s, %d)\n", "__".$$symbol{asm_sym}, $$symbol{syscall}, $$symbol{nargs};
+            printf $f "__SYSCALL2(%s, %s, %d, %s)\n", "__".$$symbol{asm_sym}, $$symbol{syscall}, $$symbol{nargs}, $nc;
         }
         printf $f "#endif\n\n";
     } else {
@@ -318,7 +341,7 @@ sub writeAliasesForSymbol {
     my ($f, $symbol) = @_;
     
     foreach my $subarch (@Architectures) {
-        (my $arch = $subarch) =~ s/arm(.*)/arm/;
+        (my $arch = $subarch) =~ s/arm(v.*)/arm/;
         
         next unless scalar($$symbol{aliases}{$arch});
         
index 068bc30ad15fe8813bda017faa9f3f599a86c91a..3f98c487d620e95084e7311db16beb1a33dacf49 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -x
 #
 # Copyright (c) 2010 Apple Inc. All rights reserved.
 #
 cd $OBJROOT
 
 # check if we're building for the simulator
-[ "$RC_ProjectName" == "Libmach_Sim" ] && DSTROOT="$DSTROOT$SDKROOT"
+if [ "${RC_ProjectName%_Sim}" != "${RC_ProjectName}" ] ; then
+       DSTROOT="${DSTROOT}${SDKROOT}"
+fi
 
 MIG=`xcrun -sdk "$SDKROOT" -find mig`
 MIGCC=`xcrun -sdk "$SDKROOT" -find cc`
 export MIGCC
 MIG_DEFINES="-DLIBSYSCALL_INTERFACE"
 MIG_HEADER_DST="$DSTROOT/usr/include/mach"
+MIG_PRIVATE_HEADER_DST="$DSTROOT/usr/local/include/mach"
 SERVER_HEADER_DST="$DSTROOT/usr/include/servers"
 # from old Libsystem makefiles
 MACHINE_ARCH=`echo $ARCHS | cut -d' ' -f 1`
 SRC="$SRCROOT/mach"
+MIG_INTERNAL_HEADER_DST="$DERIVED_SOURCES_DIR/mach"
+MIG_PRIVATE_DEFS_INCFLAGS="-I${SDKROOT}/System/Library/Frameworks/System.framework/PrivateHeaders"
 
 MIGS="clock.defs
        clock_priv.defs
@@ -49,18 +54,32 @@ MIGS="clock.defs
        exc.defs
        host_priv.defs
        host_security.defs
-       ledger.defs
        lock_set.defs
-       mach_port.defs
        mach_host.defs
-       mach_vm.defs
+       mach_port.defs
        processor.defs
        processor_set.defs
        vm_map.defs"
 
+MIGS_PRIVATE=""
+
+MIGS_DUAL_PUBLIC_PRIVATE=""
+
+if [[ "$PLATFORM" = "iPhoneOS" || "$RC_ProjectName" = "Libsyscall_headers_Sim" ]]
+then
+       MIGS_PRIVATE="mach_vm.defs"
+else
+       MIGS+=" mach_vm.defs"
+fi
+
+
 MIGS_ARCH="thread_act.defs
        task.defs"
 
+MIGS_INTERNAL="mach_port.defs
+       mach_vm.defs
+       vm_map.defs"
+
 SERVER_HDRS="key_defs.h
        ls_defs.h
        netname_defs.h
@@ -79,11 +98,33 @@ $MIG -arch $MACHINE_ARCH -header "$SERVER_HEADER_DST/netname.h" $SRC/servers/net
 
 mkdir -p $MIG_HEADER_DST
 
-for mig in $MIGS; do
+for mig in $MIGS $MIGS_DUAL_PUBLIC_PRIVATE; do
        MIG_NAME=`basename $mig .defs`
        $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig
 done
 
+mkdir -p $MIG_PRIVATE_HEADER_DST
+
+for mig in $MIGS_PRIVATE $MIGS_DUAL_PUBLIC_PRIVATE; do
+       MIG_NAME=`basename $mig .defs`
+       $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_PRIVATE_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $MIG_PRIVATE_DEFS_INCFLAGS $SRC/$mig
+       if [ ! -e "$MIG_HEADER_DST/$MIG_NAME.h" ]; then
+           echo "#error $MIG_NAME.h unsupported." > "$MIG_HEADER_DST/$MIG_NAME.h"
+       fi
+done
+
+
+# special headers used just for building Libsyscall
+# Note: not including -DLIBSYSCALL_INTERFACE to mig so we'll get the proper
+#  'internal' version of the headers being built
+mkdir -p $MIG_INTERNAL_HEADER_DST
+for mig in $MIGS_INTERNAL; do
+       MIG_NAME=`basename $mig .defs`
+       $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_INTERNAL_HEADER_DST/${MIG_NAME}_internal.h" $SRC/$mig
+done
 ARCHS=`echo $ARCHS | sed -e 's/armv./arm/g'`
 for arch in $ARCHS; do
        MIG_ARCH_DST="$MIG_HEADER_DST/$arch"
diff --git a/lldbmacros.py b/lldbmacros.py
new file mode 100644 (file)
index 0000000..9278abb
--- /dev/null
@@ -0,0 +1,184 @@
+import lldb
+import re
+import getopt
+
+# Note: This module will eventually contain loads of macros. So please bear with the Macro/EndMacro comments
+
+
+# Global functions
+def findGlobal(variable):
+    return lldb.target.FindGlobalVariables(variable, 0).GetValueAtIndex(0)
+
+def findGlobalValue(variable):
+    return findGlobal(variable).GetValue()
+
+def readMemberUnsigned(variable,member):
+    return variable.GetChildMemberWithName(member).GetValueAsUnsigned(0)
+
+def readMemberSigned(variable,member):
+    return variable.GetChildMemberWithName(member).GetValueAsSigned()
+
+def readMemberString(variable,member):
+    return str(variable.GetChildMemberWithName(member).GetSummary()).strip('"')
+
+
+
+class Output :
+    """
+    An output handler for all command. Use Output.print to direct all output of macro via the handler. 
+    Currently this provide capabilities 
+    -o path/to/filename
+       The output of this command execution will be saved to file. Parser information or errors will 
+       not be sent to file though. eg /tmp/output.txt
+    -s filter_string
+       the "filter_string" param is parsed to python regex expression and each line of output 
+       will be printed/saved only if it matches the expression. 
+       The command header will not be filtered in any case.
+    """
+    STDOUT  =1
+    FILEOUT =2
+    FILTER  =False
+
+    def __init__(self):
+        self.out = Output.STDOUT
+       self.fname=None
+       self.fhandle=None
+       self.FILTER=False
+
+    def printString(self, s):
+        """ Handler for all commands output. By default just print to stdout """
+        if self.FILTER and not self.reg.search(s): return
+        if self.out == Output.STDOUT: print s
+       elif self.out == Output.FILEOUT : self.fhandle.write(s+"\n")
+    
+    def printHeader(self,s):
+        if self.out == Output.STDOUT: print s
+       elif self.out == Output.FILEOUT: self.fhandle.write(s+"\n")
+    
+    def done(self):
+        """ closes any open files. report on any errors """
+        if self.fhandle != None :
+               self.fhandle.close()
+    
+    def setOptions(self,args):
+        """ parse the arguments passed to the command 
+           param : args => [] of <str> (typically args.split())
+       """
+        opts=()
+        try:
+         opts,args = getopt.getopt(args,'o:s:',[])
+       except getopt.GetoptError,err:
+         print str(err)
+       #continue with processing
+       for o,a in opts :
+         if o == "-o" and len(a) > 0:
+            self.fname=a.strip()
+           self.fhandle=open(self.fname,"w")
+           self.out = Output.FILEOUT
+           print "saving results in file ",str(a)
+         elif o == "-s" and len(a) > 0:
+           self.reg = re.compile(a.strip(),re.MULTILINE|re.DOTALL)
+           self.FILTER=True
+           print "showing results for regex:",a.strip()
+         else :
+           print "Error: unknown option ",o,a
+
+
+# Inteface function for showallkexts command
+def showallkexts_command(debugger, args, result, lldb_dict):
+    kext_summary_header = findGlobal("gLoadedKextSummaries")
+    result.Printf(_summarizeallkexts(kext_summary_header))
+    return None
+
+# Interface function for loaded kext summary formatter
+def showallkexts_summary(kext_summary_header, lldb_dict):
+    return "\n" + _summarizeallkexts(kext_summary_header)
+
+# Internal function for walking kext summaries
+def _summarizeallkexts(kext_summary_header):
+    summary = "ID  Address            Size              Version    Name\n"
+    summaries = kext_summary_header.GetChildMemberWithName("summaries")
+    count = int(kext_summary_header.GetChildMemberWithName("numSummaries").GetValue())
+    for i in range(0, count):
+        summary += summaries.GetChildAtIndex(i, lldb.eNoDynamicValues, True).GetSummary() + "\n"
+    return summary
+
+# Macro: memstats
+def memstats_command(debugger,args,result,lldb_dict):
+    stream = Output()
+    stream.setOptions(args.split())
+    memstats(stream)
+    stream.done()
+
+def memstats(ostream):
+    ostream.printString ( "kern_memorystatus_level: {0}".format(findGlobalValue("kern_memorystatus_level")) )
+    ostream.printString ( "vm_page_throttled_count: {0}".format(findGlobalValue("vm_page_throttled_count")) )
+    ostream.printString ( "vm_page_active_count:    {0}".format(findGlobalValue("vm_page_active_count")) )
+    ostream.printString ( "vm_page_inactive_count:  {0}".format(findGlobalValue("vm_page_inactive_count")) )
+    ostream.printString ( "vm_page_wire_count:      {0}".format(findGlobalValue("vm_page_wire_count")) )
+    ostream.printString ( "vm_page_free_count:      {0}".format(findGlobalValue("vm_page_free_count")) )
+    ostream.printString ( "vm_page_purgeable_count: {0}".format(findGlobalValue("vm_page_purgeable_count")) )
+    ostream.printString ( "vm_page_inactive_target: {0}".format(findGlobalValue("vm_page_inactive_target")) )
+    ostream.printString ( "vm_page_free_target:     {0}".format(findGlobalValue("vm_page_free_target")) )
+    ostream.printString ( "insue_ptepages_count:    {0}".format(findGlobalValue("inuse_ptepages_count")) )
+    ostream.printString ( "vm_page_free_reserved:   {0}".format(findGlobalValue("vm_page_free_reserved")) )
+# EndMacro: memstats
+
+
+# Macro: zprint
+def zprint_command(debugger,args,result,lldb_dict):
+    stream = Output()
+    stream.setOptions(args.split())
+    _zprint(stream)
+    stream.done()
+
+def _zprint(ostream):
+    """Display info about memory zones"""
+    ostream.printHeader ( "{0: ^20s} {1: >5s} {2: >12s} {3: >12s} {4: >7s} {5: >8s} {6: >9s} {7: >8s} {8: <20s} {9} ".format('ZONE', 'COUNT', 'TOT_SZ', 'MAX_SZ', 'ELT_SZ', 'ALLOC_SZ', 'TOT_ALLOC', 'TOT_FREE', 'NAME','') )
+    format_string = '{0: >#020x} {1: >5d} {2: >12d} {3: >12d} {4: >7d} {5: >8d} {6: >9d} {7: >8d} {8: <20s} {9}'
+    zone_ptr = findGlobal("first_zone");
+
+    while zone_ptr.GetValueAsUnsigned() != 0 :
+        addr = zone_ptr.GetValueAsUnsigned()
+       count = readMemberUnsigned(zone_ptr, "count")
+       cur_size = readMemberUnsigned(zone_ptr, "cur_size")
+       max_size = readMemberUnsigned(zone_ptr, "max_size")
+       elem_size = readMemberUnsigned(zone_ptr, "elem_size")
+       alloc_size = readMemberUnsigned(zone_ptr, "alloc_size")
+       num_allocs = readMemberUnsigned(zone_ptr, "num_allocs")
+       num_frees = readMemberUnsigned(zone_ptr, "num_frees")
+       name = str(readMemberString(zone_ptr, "zone_name"))
+       markings=""
+       if str(zone_ptr.GetChildMemberWithName("exhaustible").GetValue()) == '1' : markings+="H"
+       if str(zone_ptr.GetChildMemberWithName("collectable").GetValue()) == '1' : markings+="C"
+       if str(zone_ptr.GetChildMemberWithName("expandable").GetValue()) == '1' : markings+="X"
+       if str(zone_ptr.GetChildMemberWithName("noencrypt").GetValue()) == '1' : markings+="$"
+       
+       ostream.printString(format_string.format(addr, count, cur_size, max_size, elem_size, alloc_size, num_allocs, num_frees, name, markings))
+       
+       zone_ptr = zone_ptr.GetChildMemberWithName("next_zone")
+    return None
+# EndMacro: zprint
+
+
+# Macro: showioalloc
+def showioalloc_command(debugger,args,result,lldb_dict):
+    stream = Output()
+    stream.setOptions(args.split())
+    _showioalloc(stream)
+    stream.done()
+
+def _showioalloc(ostream):
+    ivars_size = findGlobal("debug_ivars_size").GetValueAsUnsigned()
+    container_malloc_size = findGlobal("debug_container_malloc_size").GetValueAsUnsigned()
+    iomalloc_size = findGlobal("debug_iomalloc_size").GetValueAsUnsigned()
+    iomallocpageable_size = findGlobal("debug_iomallocpageable_size").GetValueAsUnsigned()
+    
+    ostream.printString("Instance allocation  = {0:#0x} = {1:d} K".format(ivars_size, (int)(ivars_size/1024)))
+    ostream.printString("Container allocation = {0:#0x} = {1:d} K".format(container_malloc_size,(int)(container_malloc_size/1024)))
+    ostream.printString("IOMalloc allocation  = {0:#0x} = {1:d} K".format(iomalloc_size,(int)(iomalloc_size/1024)))
+    ostream.printString("Pageable allocation  = {0:#0x} = {1:d} K".format(iomallocpageable_size,(int)(iomallocpageable_size/1024)))
+    return None
+# EndMacro: showioalloc
+
+
index 5fea21d308e697c3a1df5fff029f244ecc5287eb..12f5203b1f5ce3e43b6842a2fa41b2184c4575fb 100644 (file)
@@ -25,6 +25,19 @@ endif
 
 SDKROOT ?= /
 HOST_SDKROOT ?= /
+HOST_SPARSE_SDKROOT ?= /
+
+# SDKROOT may be passed as a shorthand like "iphoneos.internal". We
+# must resolve these to a full path and override SDKROOT.
+
+ifeq ($(SDKROOT_RESOLVED),)
+ifeq ($(SDKROOT),/)
+export SDKROOT_RESOLVED        := /
+else
+export SDKROOT_RESOLVED := $(shell xcodebuild -sdk $(SDKROOT) -version Path | head -1)
+endif
+endif
+override SDKROOT = $(SDKROOT_RESOLVED)
 
 ifeq ($(PLATFORM),)
        export PLATFORM := $(shell xcodebuild -sdk $(SDKROOT) -version PlatformPath | head -1 | sed 's,^.*/\([^/]*\)\.platform$$,\1,')
@@ -33,20 +46,25 @@ ifeq ($(PLATFORM),)
        endif
 endif
 
+ifeq ($(PLATFORM),iPhoneOS)
+       DEVELOPER_DIR ?= $(shell xcode-select -print-path)
+       export HOST_SPARSE_SDKROOT := $(DEVELOPER_DIR)/SDKs/iPhoneHostSideTools.sparse.sdk
+endif
+
 # CC/CXX get defined by make(1) by default, so we can't check them
 # against the empty string to see if they haven't been set
 ifeq ($(origin CC),default)
 ifneq ($(findstring iPhone,$(PLATFORM)),)
-       export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find gcc-4.2)
+       export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang)
 else
-       export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find cc)
+       export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang)
 endif
 endif
 ifeq ($(origin CXX),default)
 ifneq ($(findstring iPhone,$(PLATFORM)),)
-       export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find g++-4.2)
+       export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang++)
 else
-       export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find c++)
+       export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang++)
 endif
 endif
 ifeq ($(MIG),)
@@ -55,9 +73,6 @@ endif
 ifeq ($(MIGCC),)
        export MIGCC := $(CC)
 endif
-ifeq ($(RELPATH),)
-       export RELPATH := $(shell $(XCRUN) -sdk $(SDKROOT) -find relpath)
-endif
 ifeq ($(STRIP),)
        export STRIP := $(shell $(XCRUN) -sdk $(SDKROOT) -find strip)
 endif
@@ -73,9 +88,6 @@ endif
 ifeq ($(UNIFDEF),)
        export UNIFDEF := $(shell $(XCRUN) -sdk $(SDKROOT) -find unifdef)
 endif
-ifeq ($(DECOMMENT),)
-       export DECOMMENT := $(shell $(XCRUN) -sdk $(SDKROOT) -find decomment)
-endif
 ifeq ($(DSYMUTIL),)
        export DSYMUTIL := $(shell $(XCRUN) -sdk $(SDKROOT) -find dsymutil)
 endif
@@ -94,6 +106,9 @@ endif
 
 # Platform-specific tools
 ifneq ($(findstring iPhone,$(PRODUCT)),)
+ifeq ($(EMBEDDED_DEVICE_MAP),)
+       export EMBEDDED_DEVICE_MAP := $(shell $(XCRUN) -sdk $(SDKROOT) -find embedded_device_map)
+endif
 ifeq ($(IPHONEOS_OPTIMIZE),)
        export IPHONEOS_OPTIMIZE := $(shell $(XCRUN) -sdk $(SDKROOT) -find iphoneos-optimize)
 endif
@@ -102,10 +117,11 @@ endif
 # Scripts or tools we build ourselves
 SEG_HACK := $(OBJROOT)/SETUP/setsegname/setsegname
 KEXT_CREATE_SYMBOL_SET := $(OBJROOT)/SETUP/kextsymboltool/kextsymboltool
+DECOMMENT := $(OBJROOT)/SETUP/decomment/decomment
 NEWVERS = $(SRCROOT)/config/newvers.pl
+MD := $(OBJROOT)/SETUP/md/md
 
 # Standard BSD tools
-MD = /usr/bin/md
 RM = /bin/rm -f
 CP = /bin/cp
 MV = /bin/mv
@@ -113,6 +129,7 @@ LN = /bin/ln -fs
 CAT = /bin/cat
 MKDIR = /bin/mkdir -p
 FIND = /usr/bin/find
+XARGS = /usr/bin/xargs
 INSTALL = /usr/bin/install
 TAR = /usr/bin/gnutar
 BASENAME = /usr/bin/basename
@@ -120,6 +137,9 @@ TR = /usr/bin/tr
 
 # Platform-specific tools
 ifeq (iPhoneOS,$(PLATFORM))
+ifeq ($(EMBEDDED_DEVICE_MAP),)
+       export EMBEDDED_DEVICE_MAP := $(shell $(XCRUN) -sdk $(SDKROOT) -find embedded_device_map || echo /usr/bin/true)
+endif
 ifeq ($(IPHONEOS_OPTIMIZE),)
        export IPHONEOS_OPTIMIZE := $(shell $(XCRUN) -sdk $(SDKROOT) -find iphoneos-optimize || echo /usr/bin/true)
 endif
@@ -144,13 +164,4 @@ ifeq ($(HOST_CODESIGN),)
        export HOST_CODESIGN    := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find codesign)
 endif
 
-#
-# Command to build libkmod.a/libkmodc++.a, which are
-# linked into kext binaries, and should be built as if
-# they followed system-wide policies
-#
-ifeq ($(LIBKMOD_CC),)
-       export LIBKMOD_CC       := $(shell $(XCRUN) -sdk $(SDKROOT) -find cc)
-endif
-
 # vim: set ft=make:
index 92d80379f282168017dd5c897566726e06754969..4e49fe6a71320f217dd09f6f5d44c3d144ea392f 100644 (file)
@@ -1,4 +1,5 @@
 export SOURCE=$(shell /bin/pwd)
+export RELATIVE_SOURCE_PATH ?= .
 
 #
 # gnumake 3.77 support
@@ -25,11 +26,11 @@ export COMPONENT_LIST       = osfmk bsd libkern iokit pexpert libsa security
 export COMPONENT_LIST_UC := $(shell printf "%s" "$(COMPONENT_LIST)" | $(TR) a-z A-Z)
 endif
 ifndef COMPONENT
-export COMPONENT       := $(firstword $(subst /, ,$(shell $(RELPATH) $(SRCROOT) $(SOURCE))))
+export COMPONENT       := $(firstword $(subst /, ,$(RELATIVE_SOURCE_PATH)))
 export COMPONENT_IMPORT_LIST := $(filter-out $(COMPONENT),$(COMPONENT_LIST)) 
 else
 ifeq   ($(COMPONENT), .)
-export COMPONENT        := $(firstword $(subst /, ,$(shell $(RELPATH) $(SRCROOT) $(SOURCE))))
+export COMPONENT       := $(if $(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(firstword $(subst /, ,$(RELATIVE_SOURCE_PATH))))
 export COMPONENT_IMPORT_LIST := $(filter-out $(COMPONENT),$(COMPONENT_LIST)) 
 endif
 endif
@@ -42,9 +43,13 @@ endif
 
 ifndef ARCH_CONFIGS
 ifdef RC_ARCHS
-export ARCH_CONFIGS    := $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z | sed -e 's/ARMV./ARM/g')
+export ARCH_CONFIGS    := $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z | sed -E 's/ARMV[0-9][A-Z]?/ARM/g')
 else
-export ARCH_CONFIGS    := $(shell arch | $(TR) a-z A-Z | sed -e 's/ARMV./ARM/g')
+ifeq ($(PLATFORM),iPhoneOS)
+       export ARCH_CONFIGS     := ARM
+else
+       export ARCH_CONFIGS     := $(shell arch | $(TR) a-z A-Z | sed -E 's/ARMV[0-9][A-Z]?/ARM/g')
+endif
 endif
 endif
 ifdef  ARCH_CONFIG
@@ -104,10 +109,12 @@ export MACHINE_CONFIG     = DEFAULT
 endif
 
 
+#
+# Machine Configuration options  
+#
+export SUPPORTED_I386_MACHINE_CONFIGS := DEFAULT
+export SUPPORTED_X86_64_MACHINE_CONFIGS := DEFAULT
 
-ifndef SUPPORTED_MACHINE_CONFIGS
-export SUPPORTED_MACHINE_CONFIGS = DEFAULT
-endif
 
 #
 # Target configuration options.  NOTE - target configurations will 
@@ -117,10 +124,10 @@ endif
 # kernel configuration, the second is the architecture configuration,
 # and the third is the machine configuration.  You may pass in as
 # many groups of configurations as you wish.  Each item passed in is
-# seperated by whitespace.
+# separated by whitespace.
 #
 # Example:
-#      TARGET_CONFIGS="release x86_64 default debug i386 default release arm MX31ADS"
+#      TARGET_CONFIGS="release x86_64 default debug i386 default release arm S5l8920X"
 # Parameters may be in upper or lower case (they are converted to upper).
 #
 # "default" parameter is a special case.  It means use the default value for 
@@ -138,9 +145,17 @@ ifdef TARGET_CONFIGS
        export MACHINE_CONFIG   = $(word 3, $(TARGET_CONFIGS_UC))
        export DEFAULT_KERNEL_CONFIG = $(word 1, $(TARGET_CONFIGS_UC))
 else
-       # generate TARGET_CONFIGS using KERNEL_CONFIGS and ARCH_CONFIGS and MACHINE_CONFIG (which defaults to "DEFAULT")
-       temp_list = $(foreach my_kern_config, $(KERNEL_CONFIGS), $(my_kern_config) arch_slot $(MACHINE_CONFIG))
-       export TARGET_CONFIGS = $(strip $(foreach my_arch_config, $(ARCH_CONFIGS), $(subst arch_slot,$(my_arch_config),$(temp_list))))
+
+       ifneq ($(filter %_embedded,$(MAKECMDGOALS)),)
+# generate set of standard embedded configs
+               export TARGET_CONFIGS = $(TARGET_CONFIGS_EMBEDDED)
+       else ifneq ($(filter %_devicemap,$(MAKECMDGOALS)),)
+               DEVICEMAP_PLATFORMS = $(shell $(EMBEDDED_DEVICE_MAP) -query SELECT DISTINCT Platform FROM Targets | $(TR) [:lower:] [:upper:])
+               export TARGET_CONFIGS = $(foreach my_kernel_config,$(KERNEL_CONFIGS_EMBEDDED),$(foreach my_arch,$(ARCH_CONFIGS),$(foreach my_machine_config,$(filter $(DEVICEMAP_PLATFORMS),$(SUPPORTED_$(my_arch)_MACHINE_CONFIGS)),$(my_kernel_config) $(my_arch) $(my_machine_config) )))
+       else
+# generate TARGET_CONFIGS using KERNEL_CONFIGS and ARCH_CONFIGS and MACHINE_CONFIG (which defaults to "DEFAULT")
+               export TARGET_CONFIGS = $(strip $(foreach my_arch_config, $(ARCH_CONFIGS), $(foreach my_kern_config, $(KERNEL_CONFIGS), $(my_kern_config) $(my_arch_config) $(MACHINE_CONFIG))))
+       endif
        export TARGET_CONFIGS_UC := $(shell printf "%s" "$(TARGET_CONFIGS)" | $(TR) a-z A-Z)
        export MACHINE_CONFIG   = $(word 3, $(TARGET_CONFIGS_UC))
        export DEFAULT_KERNEL_CONFIG = $(word 1, $(TARGET_CONFIGS_UC))
@@ -166,10 +181,12 @@ endif
 endif
 
 ifneq ($(MACHINE_CONFIG),)
-ifeq ($(filter $(MACHINE_CONFIG),$(SUPPORTED_MACHINE_CONFIGS)),)
+ifneq ($(ARCH_CONFIG),)
+ifeq ($(filter $(MACHINE_CONFIG),$(SUPPORTED_$(ARCH_CONFIG)_MACHINE_CONFIGS)),)
 $(error Unsupported MACHINE_CONFIG $(MACHINE_CONFIG))
 endif
 endif
+endif
 
 ifneq ($(PLATFORM),)
 ifeq ($(filter $(PLATFORM),$(SUPPORTED_PLATFORMS)),)
@@ -180,7 +197,7 @@ endif
 #
 # Kernel Configuration to install
 #
-#  supported install architecture : I386 X86_64 ARM
+#  supported install architecture : I386 X86_64 
 #
 export INSTALL_TYPE    = $(DEFAULT_KERNEL_CONFIG)
 
@@ -194,6 +211,22 @@ ifeq ($(INSTALL_ARCH_DEFAULT),)
 $(error Could not determine INSTALL_ARCH_DEFAULT)
 endif
 
+#
+# Deployment target flag
+#
+ifndef DEPLOYMENT_TARGET_FLAGS
+SDKVERSION=$(shell xcodebuild -sdk $(SDKROOT) -version SDKVersion | head -1)
+ifeq ($(PLATFORM),MacOSX)
+    export DEPLOYMENT_TARGET_FLAGS := -mmacosx-version-min=$(SDKVERSION)
+else ifeq ($(PLATFORM),iPhoneOS)
+    export DEPLOYMENT_TARGET_FLAGS := -miphoneos-version-min=$(SDKVERSION)
+else ifeq ($(PLATFORM),iPhoneSimulator)
+    export DEPLOYMENT_TARGET_FLAGS := 
+else
+    export DEPLOYMENT_TARGET_FLAGS := 
+endif
+endif
+
 #
 # Standard defines list
 #
@@ -231,8 +264,8 @@ CXXWARNFLAGS_STD = \
        -Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wchar-subscripts \
        -Wredundant-decls -Wextra-tokens
 
-# Certain warnings are non-fatal (8474835)
-CXXWARNFLAGS_STD += -Wno-error=cast-align
+# Certain warnings are non-fatal (8474835, 9000888)
+CXXWARNFLAGS_STD += -Wno-error=cast-align -Wno-error=overloaded-virtual
 
 # Can be overridden in Makefile.template or Makefile.$arch
 export CXXWARNFLAGS ?= $(CXXWARNFLAGS_STD)
@@ -253,11 +286,6 @@ endif
 
 ARCH_FLAGS_I386                  = -arch i386
 ARCH_FLAGS_X86_64        = -arch x86_64
-ARCH_FLAGS_ARM           = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_))
-
-ARCH_FLAGS_ALL_I386            = $(ARCH_FLAGS_I386)
-ARCH_FLAGS_ALL_X86_64          = $(ARCH_FLAGS_X86_64)
-ARCH_FLAGS_ALL_ARM             = -arch arm
 
 
 #
@@ -274,10 +302,9 @@ export DSYMBUILDDIR = ./Contents/Resources/DWARF/
 # We must not use -fno-keep-inline-functions, or it will remove the dtrace
 # probes from the kernel.
 #
-export CFLAGS_GEN = -static $(DEBUG_CFLAGS) -nostdinc \
-       -freorder-blocks                                      \
-       -fno-builtin -fno-common -msoft-float \
-       -fsigned-bitfields -fno-stack-protector $(OTHER_CFLAGS)
+export CFLAGS_GEN = $(DEBUG_CFLAGS) -nostdinc \
+       -freorder-blocks -fno-builtin -fno-common \
+       -fsigned-bitfields $(OTHER_CFLAGS)
 
 ifeq ($(BUILD_STABS),1)
 export CFLAGS_GEN += -gstabs+
@@ -291,15 +318,15 @@ endif
 
 export CFLAGS_RELEASE  = 
 export CFLAGS_DEVELOPMENT      =
-export CFLAGS_DEBUG    = 
-export CFLAGS_PROFILE  =  -pg
+export CFLAGS_DEBUG    = -fstack-protector-all
+export CFLAGS_PROFILE  = -pg
 
-export CFLAGS_I386     = -Di386 -DI386 -D__I386__ \
-                               -DPAGE_SIZE_FIXED
+export CFLAGS_I386     = -static -Di386 -DI386 -D__I386__ \
+                               -DPAGE_SIZE_FIXED -msoft-float \
+                               -integrated-as
 export CFLAGS_X86_64   = -Dx86_64 -DX86_64 -D__X86_64__ -DLP64 \
-                               -DPAGE_SIZE_FIXED -mkernel
-export CFLAGS_ARM      = -Darm -DARM -D__ARM__ -DPAGE_SIZE_FIXED \
-                               -fno-strict-aliasing -fno-keep-inline-functions
+                               -DPAGE_SIZE_FIXED -mkernel -msoft-float \
+                               -integrated-as
 
 
 ifeq (-arch armv7,$(ARCH_FLAGS_ARM))
@@ -308,12 +335,7 @@ endif
 ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM             += -mthumb
 endif
-ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
-CFLAGS_ARM             += -mno-thumb
-endif
-ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
-CFLAGS_ARM             += -mthumb
-endif
+
 
 export CFLAGS_RELEASEI386 = -O2
 export CFLAGS_DEVELOPMENTI386 = -O2
@@ -328,7 +350,7 @@ export CFLAGS_PROFILEX86_64 = -O2
 
 export CFLAGS_RELEASEARM = -O2
 export CFLAGS_DEVELOPMENTARM = -O2
-export CFLAGS_DEBUGARM = -O2
+export CFLAGS_DEBUGARM = -O0
 export CFLAGS_PROFILEARM = -O2
 
 export CFLAGS  = $(CFLAGS_GEN) \
@@ -337,6 +359,7 @@ export CFLAGS       = $(CFLAGS_GEN) \
                  $($(addsuffix $(ARCH_CONFIG),CFLAGS_)) \
                  $($(addsuffix $(KERNEL_CONFIG),CFLAGS_)) \
                  $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),CFLAGS_))) \
+                 $(DEPLOYMENT_TARGET_FLAGS) \
                  $(DEFINES)
 
 #
@@ -344,30 +367,13 @@ export CFLAGS     = $(CFLAGS_GEN) \
 #
 
 OTHER_CXXFLAGS =
-               
-CXXFLAGS_GEN  = -fno-rtti -fno-exceptions -fcheck-new -fapple-kext \
-                       $(OTHER_CXXFLAGS)
+
+CXXFLAGS_GEN  = -fapple-kext $(OTHER_CXXFLAGS)
 
 CXXFLAGS      = $(CXXFLAGS_GEN) \
                  $($(addsuffix $(ARCH_CONFIG),CXXFLAGS_)) \
                  $($(addsuffix $(KERNEL_CONFIG),CXXFLAGS_))
 
-
-#
-# Support for LLVM Link Time Optimization (LTO)
-#
-
-ifeq ($(BUILD_LTO),1)
-export CFLAGS_GEN      += -flto
-export CXXFLAGS_GEN    += -flto
-export BUILD_MACHO_OBJ = 0
-export BUILD_LTO       = 1
-else
-export BUILD_MACHO_OBJ = 1
-export BUILD_LTO       = 0
-endif
-
-
 #
 # Assembler command
 #
@@ -377,7 +383,7 @@ S_KCC       = $(CC)
 #
 # Default SFLAGS
 #
-export SFLAGS_GEN = -static -D__ASSEMBLER__ $(OTHER_CFLAGS)
+export SFLAGS_GEN = -D__ASSEMBLER__ $(OTHER_CFLAGS)
 
 export SFLAGS_RELEASE  = 
 export SFLAGS_DEVELOPMENT      = 
@@ -385,16 +391,18 @@ export SFLAGS_DEBUG       =
 export SFLAGS_PROFILE  = 
 
 export SFLAGS_I386     = $(CFLAGS_I386)
-export SFLAGS_ARM      = $(CFLAGS_ARM)
 export SFLAGS_X86_64   = $(CFLAGS_X86_64)
 
+
 export SFLAGS  = $(SFLAGS_GEN) \
                  $($(addsuffix $(MACHINE_CONFIG),MACHINE_FLAGS_)) \
                  $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
                  $($(addsuffix $(ARCH_CONFIG),SFLAGS_)) \
                  $($(addsuffix $(KERNEL_CONFIG),SFLAGS_)) \
+                 $(DEPLOYMENT_TARGET_FLAGS) \
                  $(DEFINES)
 
+
 #
 # Linker command
 #
@@ -403,19 +411,22 @@ LD        = $(KC++) -nostdlib
 #
 # Default LDFLAGS
 #
-
 export LDFLAGS_KERNEL_GEN = \
-       -static \
        -nostdlib \
        -fapple-kext \
        -Wl,-e,__start \
        -Wl,-sectalign,__TEXT,__text,0x1000 \
+       -Wl,-sectalign,__TEXT,initcode,0x1000 \
        -Wl,-sectalign,__DATA,__common,0x1000 \
        -Wl,-sectalign,__DATA,__bss,0x1000 \
        -Wl,-sectcreate,__PRELINK_TEXT,__text,/dev/null \
        -Wl,-sectcreate,__PRELINK_STATE,__kernel,/dev/null \
         -Wl,-sectcreate,__PRELINK_STATE,__kexts,/dev/null \
-       -Wl,-sectcreate,__PRELINK_INFO,__info,/dev/null
+       -Wl,-sectcreate,__PRELINK_INFO,__info,/dev/null \
+       -Wl,-new_linker \
+       -Wl,-pagezero_size,0x0 \
+       -Wl,-version_load_command \
+       -Wl,-function_starts
 
 # Availability of DWARF allows DTrace CTF (compressed type format) to be constructed.
 # ctf_insert creates the CTF section.  It needs reserved padding in the
@@ -431,8 +442,6 @@ export LDFLAGS_KERNEL_DEBUG         =
 export LDFLAGS_KERNEL_PROFILE  = 
 
 export LDFLAGS_KERNEL_RELEASEI386     = \
-       -Wl,-new_linker \
-       -Wl,-pagezero_size,0x0 \
        -Wl,-segaddr,__INITPT,0x00100000 \
        -Wl,-segaddr,__INITGDT,0x00106000 \
        -Wl,-segaddr,__SLEEP,0x00107000 \
@@ -444,34 +453,58 @@ export LDFLAGS_KERNEL_DEBUGI386 = $(LDFLAGS_KERNEL_RELEASEI386)
 export LDFLAGS_KERNEL_DEVELOPMENTI386 = $(LDFLAGS_KERNEL_RELEASEI386)
 export LDFLAGS_KERNEL_PROFILEI386 = $(LDFLAGS_KERNEL_RELEASEI386)
 
-# Keep these constants in sync with the *_SEG_BASE definitions in i386/pmap.h
-export LDFLAGS_KERNEL_RELEASEX86_64   = \
-       -Wl,-new_linker \
-       -Wl,-pagezero_size,0x0 \
-       -Wl,-segaddr,__INITPT,0xffffff8000100000 \
-       -Wl,-segaddr,__INITGDT,0xffffff8000106000 \
-       -Wl,-segaddr,__SLEEP,0xffffff8000107000 \
-       -Wl,-segaddr,__HIB,0xffffff8000108000 \
-       -Wl,-image_base,0xffffff8000200000 \
-       -Wl,-seg_page_size,__TEXT,0x200000
+# KASLR static slide config:
+ifndef SLIDE
+SLIDE=0x00
+endif
+KERNEL_MIN_ADDRESS      := 0xffffff8000000000
+KERNEL_BASE_OFFSET      := 0x100000
+KERNEL_STATIC_SLIDE     := $(shell printf "0x%016x" \
+                          $$[ $(SLIDE) << 21 ])
+KERNEL_STATIC_BASE      := $(shell printf "0x%016x" \
+                          $$[ $(KERNEL_MIN_ADDRESS) + $(KERNEL_BASE_OFFSET) ])
+KERNEL_HIB_SECTION_BASE := $(shell printf "0x%016x" \
+                          $$[ $(KERNEL_STATIC_BASE) + $(KERNEL_STATIC_SLIDE) ])
+KERNEL_TEXT_BASE        := $(shell printf "0x%016x" \
+                          $$[ $(KERNEL_HIB_SECTION_BASE) + 0x100000 ])
+
+export LDFLAGS_KERNEL_RELEASEX86_64 = \
+       -Wl,-pie \
+       -Wl,-segaddr,__HIB,$(KERNEL_HIB_SECTION_BASE) \
+       -Wl,-image_base,$(KERNEL_TEXT_BASE) \
+       -Wl,-seg_page_size,__TEXT,0x200000 \
+       -Wl,-sectalign,__DATA,__const,0x1000 \
+       -Wl,-sectalign,__DATA,__sysctl_set,0x1000 \
+       -Wl,-sectalign,__HIB,__bootPT,0x1000 \
+       -Wl,-sectalign,__HIB,__desc,0x1000 \
+       -Wl,-sectalign,__HIB,__data,0x1000 \
+       -Wl,-sectalign,__HIB,__text,0x1000 \
+       -Wl,-sectalign,__HIB,__const,0x1000 \
+       -Wl,-sectalign,__HIB,__bss,0x1000 \
+       -Wl,-sectalign,__HIB,__common,0x1000 \
+
+# Define KERNEL_BASE_OFFSET so known at compile time:
+export CFLAGS_X86_64 += -DKERNEL_BASE_OFFSET=$(KERNEL_BASE_OFFSET)
 
 export LDFLAGS_KERNEL_DEBUGX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64)
 export LDFLAGS_KERNEL_DEVELOPMENTX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64)
 export LDFLAGS_KERNEL_PROFILEX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64)
 
 export LDFLAGS_KERNEL_RELEASEARM     = \
-       -Wl,-new_linker \
-       -Wl,-pagezero_size,0x0 \
+       -Wl,-pie \
+       -Wl,-static \
        -Wl,-image_base,0x80001000 \
        -Wl,-exported_symbols_list,$(TARGET)/kernel-kpi.exp
 
 export LDFLAGS_KERNEL_DEVELOPMENTARM     = \
-       -Wl,-new_linker \
-       -Wl,-pagezero_size,0x0 \
+       -Wl,-pie \
+       -Wl,-static \
        -Wl,-image_base,0x80001000
 
 export LDFLAGS_KERNEL_DEBUGARM = $(LDFLAGS_KERNEL_DEVELOPMENTARM)
 
+# Offset image base by page to have iBoot load kernel TEXT correctly.
+# First page is used for various purposes : sleep token, reset vector.
 
 export LDFLAGS_KERNEL  = $(LDFLAGS_KERNEL_GEN) \
                  $($(addsuffix $(MACHINE_CONFIG),MACHINE_FLAGS_)) \
@@ -479,7 +512,7 @@ export LDFLAGS_KERNEL       = $(LDFLAGS_KERNEL_GEN) \
                  $($(addsuffix $(ARCH_CONFIG),LDFLAGS_KERNEL_)) \
                  $($(addsuffix $(KERNEL_CONFIG),LDFLAGS_KERNEL_)) \
                  $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),LDFLAGS_KERNEL_))) \
-
+                 $(DEPLOYMENT_TARGET_FLAGS)
 
 #
 # Default runtime libraries to be linked with the kernel
@@ -501,7 +534,40 @@ export INCFLAGS    = $(INCFLAGS_LOCAL) $(INCFLAGS_GEN) $(INCFLAGS_IMPORT) $(INCFLA
 #
 # Default MIGFLAGS
 #
-export MIGFLAGS        = $(DEFINES) $(INCFLAGS) $($(addsuffix $(ARCH_CONFIG),CFLAGS_)) $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_))
+export MIGFLAGS        = $(DEFINES) $(INCFLAGS) $($(addsuffix $(ARCH_CONFIG),CFLAGS_)) \
+                         $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
+                         $(DEPLOYMENT_TARGET_FLAGS)
+
+#
+# Support for LLVM Link Time Optimization (LTO)
+#
+
+ifeq ($(BUILD_LTO),1)
+export CFLAGS_GEN      += -flto
+export CXXFLAGS_GEN    += -flto
+export LDFLAGS_KERNEL_GEN      += -Wl,-object_path_lto,$(TARGET)/lto.o
+export CFLAGS_NOLTO_FLAG = -fno-lto
+export BUILD_MACHO_OBJ = 0
+export BUILD_LTO       = 1
+else
+export CFLAGS_NOLTO_FLAG =
+export BUILD_MACHO_OBJ = 1
+export BUILD_LTO       = 0
+endif
+
+#
+# Support for LLVM Integrated Assembler with clang driver
+#
+ifeq ($(BUILD_INTEGRATED_ASSEMBLER),1)
+export SFLAGS_GEN      += -integrated-as
+export CFLAGS_GEN      += -integrated-as
+export CXXFLAGS_GEN    += -integrated-as
+export SFLAGS_NOINTEGRATEDAS_FLAGS     = -no-integrated-as
+export CFLAGS_NOINTEGRATEDAS_FLAGS     = -no-integrated-as
+else
+export SFLAGS_NOINTEGRATEDAS_FLAGS     =
+export CFLAGS_NOINTEGRATEDAS_FLAGS     =
+endif
 
 #
 # Default VPATH
@@ -564,7 +630,7 @@ KINCFRAME_UNIFDEF  = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE
 
 
 #
-# Compononent Header file destinations
+# Component Header file destinations
 #
 EXPDIR = EXPORT_HDRS/$(COMPONENT)
 
@@ -583,7 +649,6 @@ export STRIP_FLAGS  = $($(addsuffix $(KERNEL_CONFIG),STRIP_FLAGS_))
 #
 export DSYMUTIL_FLAGS_I386 = --arch=i386
 export DSYMUTIL_FLAGS_X86_64 = --arch=x86_64
-export DSYMUTIL_FLAGS_ARM = --arch=arm
 
 export DSYMUTIL_FLAGS = $($(addsuffix $(ARCH_CONFIG),DSYMUTIL_FLAGS_))
 
index b4b594cd6cb36dbd9a4f43c6657a92e93892deaa..12191a3c22ef11a0b8255a62d67115439ea043e3 100644 (file)
@@ -3,7 +3,7 @@
 #
 .PHONY: installhdrs
 
-ifeq ($(RC_ProjectName),Libsyscall)
+ifeq ($(findstring Libsyscall,$(RC_ProjectName)),Libsyscall)
 installhdrs:
        cd libsyscall ; \
                sdk="$(SDKROOT)" ;                                                              \
@@ -19,8 +19,11 @@ installhdrs:
 else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld)
 installhdrs:
        make -C libkern/kxld/ installhdrs
+else ifeq ($(RC_ProjectName),libkmod)
+installhdrs:
+# nothing to do
 else # xnu, xnu_debug, or xnu_headers_Sim
-installhdrs: exporthdrs installhdrs_mi installhdrs_md 
+installhdrs: exporthdrs installhdrs_mi installhdrs_md setup
        @echo "[ $(SRCROOT) ] make installhdrs installing Kernel.framework"
        $(_v)kincpath=$(DSTROOT)/$(KINCDIR);                                    \
        krespath=$(DSTROOT)/$(KRESDIR);                                 \
@@ -53,6 +56,9 @@ ifeq (iPhoneOS,$(PLATFORM))
 endif
 endif
 
+.PHONY: installhdrs_embedded installhdrs_devicemap
+installhdrs_embedded installhdrs_devicemap: installhdrs
+
 #
 # Install header files order
 #
@@ -62,9 +68,8 @@ endif
 #
 # Install machine independent header files
 #
-installhdrs_mi:
-       $(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));         \
-       kernel_config=$(INSTALL_TYPE);                                  \
+installhdrs_mi: setup
+       $(_v)kernel_config=$(INSTALL_TYPE);                             \
        machine_config=$(MACHINE_CONFIG);                               \
        arch_config=$(INSTALL_ARCH_DEFAULT);                            \
        if [ $${arch_config} = ARM ] ; then                             \
@@ -72,27 +77,34 @@ installhdrs_mi:
                        machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);   \
                fi;                                                     \
        fi;                                                             \
+       if [ $${arch_config} = L4_ARM ] ; then                          \
+               if [ $${machine_config} = DEFAULT ] ; then              \
+                       machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);        \
+               fi;                                                     \
+       fi;                                                             \
        if [ $${machine_config} = DEFAULT ] ; then                      \
-               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};       \
+               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH};    \
        else                                                            \
-               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};    \
+               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \
        fi;                                                             \
        [ -d $${installinc_dir} ] || $(MKDIR) $${installinc_dir};       \
        ${MAKE} ${MAKEJOBS} -C $${installinc_dir}                       \
                KERNEL_CONFIG=$${kernel_config}                         \
                ARCH_CONFIG=$${arch_config}                             \
+               MACHINE_CONFIG=$${machine_config}                       \
                MAKEFILES=${SOURCE}/Makefile                            \
                SOURCE=${SOURCE}/                                       \
+               RELATIVE_SOURCE_PATH=.                                  \
                TARGET=$${installinc_dir}/                              \
                build_installhdrs_mi;                                   \
 
 #
 # Install machine dependent kernel header files 
+# Uses hack for machine_config, which is not threaded through properly.
 #
-installhdrs_md:
-       $(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));                 \
-       kernel_config=$(INSTALL_TYPE);                                  \
-       machine_config=$(MACHINE_CONFIG);               \
+installhdrs_md: setup
+       $(_v)kernel_config=$(INSTALL_TYPE);                             \
+       machine_config=$(MACHINE_CONFIG);                               \
        for arch_config in $(INSTALL_ARCHS);                            \
        do                                                              \
        if [ $${arch_config} = ARM ] ; then     \
@@ -100,17 +112,24 @@ installhdrs_md:
                        machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);           \
                fi;             \
        fi;             \
+       if [ $${arch_config} = L4_ARM ] ; then  \
+               if [ $${machine_config} = DEFAULT ] ; then      \
+                       machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);                \
+               fi;             \
+       fi;             \
        if [ $${machine_config} = DEFAULT ] ; then      \
-               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};       \
+               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH};    \
        else            \
-               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};    \
+               installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \
        fi;             \
        [ -d $${installinc_dir} ] || $(MKDIR) $${installinc_dir};       \
        ${MAKE} ${MAKEJOBS} -C $${installinc_dir}                       \
                KERNEL_CONFIG=$${kernel_config}                         \
                ARCH_CONFIG=$${arch_config}                             \
+               MACHINE_CONFIG=$${machine_config}                       \
                MAKEFILES=${SOURCE}/Makefile                            \
                SOURCE=${SOURCE}/                                       \
+               RELATIVE_SOURCE_PATH=.                                  \
                TARGET=$${installinc_dir}/                              \
                build_installhdrs_md;                                   \
        done;
@@ -130,6 +149,7 @@ $(BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS):
        ${MAKE} -C $${installinc_subdir}                                        \
                MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile               \
                SOURCE=$(SOURCE)$${installinc_subdir}/                          \
+               RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${installinc_subdir}      \
                TARGET=$(TARGET)$${installinc_subdir}/                          \
                build_installhdrs_mi;
 
@@ -151,6 +171,7 @@ $(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS):
        ${MAKE} -C $${installinc_subdir}                                        \
                MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile               \
                SOURCE=$(SOURCE)$${installinc_subdir}/                          \
+               RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${installinc_subdir}      \
                TARGET=$(TARGET)$${installinc_subdir}/                          \
                build_installhdrs_md;
 
@@ -176,26 +197,32 @@ exporthdrs: exporthdrs_mi exporthdrs_md
 do_exporthdrs_mi:
 
 exporthdrs_mi:
-       $(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));         \
-       kernel_config=$(INSTALL_TYPE);                                  \
-       machine_config=$(MACHINE_CONFIG);                               \
+       $(_v)kernel_config=$(INSTALL_TYPE);                             \
        arch_config=$(INSTALL_ARCH_DEFAULT);                            \
+       machine_config=DEFAULT;                                         \
        if [ $${arch_config} = ARM ] ; then                             \
                if [ $${machine_config} = DEFAULT ] ; then              \
                        machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);   \
                fi;                                                     \
        fi;                                                             \
+       if [ $${arch_config} = L4_ARM ] ; then                          \
+               if [ $${machine_config} = DEFAULT ] ; then              \
+                       machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);\
+               fi;                                                     \
+       fi;                                                             \
        if [ $${machine_config} = DEFAULT ] ; then                      \
-               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};        \
+               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH};     \
        else                                                            \
-               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};     \
+               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH};  \
        fi;                                                             \
        [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir};         \
        ${MAKE} ${MAKEJOBS} -C $${exportinc_dir}                        \
                KERNEL_CONFIG=$${kernel_config}                         \
                ARCH_CONFIG=$${arch_config}                             \
+               MACHINE_CONFIG=$${machine_config}                       \
                MAKEFILES=${SOURCE}/Makefile                            \
                SOURCE=${SOURCE}/                                       \
+               RELATIVE_SOURCE_PATH=.                                  \
                TARGET=$${exportinc_dir}/                               \
                build_exporthdrs_mi;                                    \
 
@@ -209,8 +236,7 @@ exporthdrs_mi:
 # set is the kernel configuration.  The second item in the set is the architecture and the 
 # third item is the machine configuration.  There may be multiple sets to build.
 exporthdrs_md:
-       $(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));                 \
-       my_counter=1;           \
+       $(_v)my_counter=1;              \
        for my_config in $(TARGET_CONFIGS_UC);                          \
        do              \
        if [ $${my_counter} -eq 1 ] ; then      \
@@ -238,17 +264,24 @@ exporthdrs_md:
                                        machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);           \
                                fi;             \
                        fi;             \
+                       if [ $${arch_config} = L4_ARM ] ; then  \
+                               if [ $${machine_config} = DEFAULT ] ; then      \
+                                       machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);                \
+                               fi;             \
+                       fi;             \
                        if [ $${machine_config} = DEFAULT ] ; then      \
-                               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};        \
+                               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH};     \
                        else            \
-                               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};     \
+                               exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH};  \
                        fi;             \
                        [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir};                                 \
                        ${MAKE} ${MAKEJOBS} -C $${exportinc_dir}                        \
                                KERNEL_CONFIG=$${kernel_config}                         \
                                ARCH_CONFIG=$${arch_config}                             \
+                               MACHINE_CONFIG=$${machine_config}                       \
                                MAKEFILES=${SOURCE}/Makefile                            \
                                SOURCE=${SOURCE}/                                       \
+                               RELATIVE_SOURCE_PATH=.                                  \
                                TARGET=$${exportinc_dir}/                               \
                                build_exporthdrs_md;                                    \
                fi;             \
@@ -270,6 +303,7 @@ $(BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS):
        ${MAKE} -C $${exportinc_subdir}                                         \
                MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile                \
                SOURCE=$(SOURCE)$${exportinc_subdir}/                           \
+               RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${exportinc_subdir}       \
                TARGET=$(TARGET)$${exportinc_subdir}/                           \
                build_exporthdrs_mi;
 
@@ -291,6 +325,7 @@ $(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS):
        ${MAKE} -C $${exportinc_subdir}                                         \
                MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile                \
                SOURCE=$(SOURCE)$${exportinc_subdir}/                           \
+               RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${exportinc_subdir}       \
                TARGET=$(TARGET)$${exportinc_subdir}/                           \
                build_exporthdrs_md;
 
@@ -303,16 +338,17 @@ build_exporthdrs_md: $(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS)
 .PHONY: setup
 
 setup:
-       $(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));         \
-       kernel_config=$(INSTALL_TYPE);                                  \
+       $(_v)kernel_config=$(INSTALL_TYPE);                             \
        arch_config=$(INSTALL_ARCH_DEFAULT);                            \
-       setup_subdir=${OBJROOT}/$${rel_path};                           \
+       setup_subdir=${OBJROOT}/$${RELATIVE_SOURCE_PATH};               \
        [ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir};           \
        ${MAKE} ${MAKEJOBS} -C $${setup_subdir}                         \
                KERNEL_CONFIG=$${kernel_config}                         \
                ARCH_CONFIG=$${arch_config}                             \
+               MACHINE_CONFIG=$${machine_config}                       \
                MAKEFILES=${SOURCE}/Makefile                            \
                SOURCE=${SOURCE}/                                       \
+               RELATIVE_SOURCE_PATH=.                                  \
                TARGET=$${setup_subdir}/                                \
        build_setup;
 
@@ -328,6 +364,7 @@ $(BUILD_SETUP_SUBDIRS_TARGETS):
        ${MAKE} -C $${setup_subdir}                             \
                MAKEFILES=${SOURCE}/$${setup_subdir}/Makefile   \
                SOURCE=${SOURCE}/$${setup_subdir}/              \
+               RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}/$${setup_subdir}   \
                TARGET=${TARGET}/$${setup_subdir}/              \
                build_setup;
 
@@ -347,11 +384,11 @@ build_setup: $(BUILD_SETUP_SUBDIRS_TARGETS)
 ifeq ($(RC_ProjectName),Libsyscall)
 all:
        cd libsyscall ; \
-               sdk="$(SDKROOT)" ;                                                              \
+               sdk="$(SDKROOT)" ;                                                      \
                if [ $${sdk} = / ] ; then                                               \
-                       sdk="" ;                                                                        \
-               fi;                                                                                             \
-               xcrun -sdk "$(SDKROOT)" xcodebuild install              \
+                       sdk="" ;                                                        \
+               fi;                                                                     \
+               xcrun -sdk "$(SDKROOT)" xcodebuild install                              \
                        "SRCROOT=$(SRCROOT)/libsyscall"                                 \
                        "OBJROOT=$(OBJROOT)"                                            \
                        "SYMROOT=$(SYMROOT)"                                            \
@@ -363,7 +400,20 @@ all:
 else ifeq ($(RC_ProjectName),libkxld_host)
 all:
        make -C libkern/kxld/ install PRODUCT_TYPE=ARCHIVE
-else ifeq ($(RC_ProjectName),xnu_headers_Sim)
+else ifeq ($(RC_ProjectName),libkmod)
+all:
+       cd libkern/kmod ; \
+               sdk="$(SDKROOT)" ;                                                      \
+               if [ $${sdk} = / ] ; then                                               \
+                       sdk="" ;                                                        \
+               fi;                                                                     \
+               xcrun -sdk "$(SDKROOT)" xcodebuild install                              \
+                       "SRCROOT=$(SRCROOT)/libkern/kmod"                               \
+                       "OBJROOT=$(OBJROOT)"                                            \
+                       "SYMROOT=$(SYMROOT)"                                            \
+                       "DSTROOT=$(DSTROOT)"                                            \
+                       "SDKROOT=$${sdk}"
+else ifeq ($(findstring _headers_Sim,$(RC_ProjectName)),_headers_Sim) # Libsyscall/xnu _headers_Sim
 all: exporthdrs
 else # xnu or xnu_debug
 ifeq ($(COMPONENT), .)
@@ -394,10 +444,15 @@ endif
                                machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);           \
                        fi;             \
                fi;             \
+               if [ $${arch_config} = L4_ARM ] ; then  \
+                       if [ $${machine_config} = DEFAULT ] ; then      \
+                               machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);                \
+                       fi;             \
+               fi;             \
                if [ $${machine_config} = DEFAULT ] ; then      \
-                       build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \
+                       build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH}; \
                else            \
-                       build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path}; \
+                       build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \
                fi;             \
                [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir};                                   \
                        ${MAKE} ${MAKEJOBS} -C $${build_subdir}                         \
@@ -406,11 +461,15 @@ endif
                                MACHINE_CONFIG=$${machine_config}                       \
                                MAKEFILES=${SOURCE}/Makefile                            \
                                SOURCE=${SOURCE}/                                       \
+                               RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}            \
                                build_all;                                              \
        fi;             \
        done;
 endif
 
+.PHONY: all_embedded all_devicemap
+all_embedded all_devicemap: all
+
 #
 # Build all architectures for all Configuration/Architecture options
 #
@@ -431,6 +490,7 @@ $(BUILD_ALL_SUBDIRS_TARGETS):
        ${MAKE} -C $${comp_subdir}                              \
                MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile    \
                SOURCE=${SOURCE}$${comp_subdir}/                \
+               RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${comp_subdir}    \
                TARGET=$${TARGET}                               \
                build_all;
 
@@ -448,6 +508,7 @@ build_all: $(BUILD_ALL_SUBDIRS_TARGETS)
                ${MAKE} -C $${comp_subdir}                              \
                        MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile    \
                        SOURCE=${SOURCE}$${comp_subdir}/                \
+                       RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${comp_subdir}    \
                        TARGET=$${TARGET}                               \
                build_all;                                              \
        done;                                                           
@@ -484,6 +545,11 @@ mach_kernel:
                                machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);           \
                        fi;             \
                fi;             \
+               if [ $${arch_config} = L4_ARM ] ; then  \
+                       if [ $${machine_config} = DEFAULT ] ; then      \
+                               machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);                \
+                       fi;             \
+               fi;             \
                if [ $${machine_config} = DEFAULT ] ; then      \
                        build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config};      \
                else            \
@@ -521,15 +587,17 @@ build_mach_kernel:
 # Install kernel header files based on RC_ARCHS
 #
 install: installhdrs all installman installmachinekernels
-ifeq ($(RC_ProjectName),Libsyscall)
+ifeq ($(findstring Libsyscall,$(RC_ProjectName)),Libsyscall)
 # nothing to do
 else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld)
 # nothing to do, work performed in "all" action
-else ifeq ($(RC_ProjectName),xnu_headers_Sim)
+else ifeq ($(RC_ProjectName),libkmod)
+# nothing to do, work performed in "all" action
+else ifeq ($(findstring _headers_Sim,$(RC_ProjectName)),_headers_Sim)
 # nothing to do
 else # xnu or xnu_debug
-       $(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));                 \
-       machine_config=$(MACHINE_CONFIG);               \
+# A bit of a hack for machine_config: machine configs aren't really threaded through properly.
+       $(_v)machine_config=$(MACHINE_CONFIG);          \
        for kernel_config in $(INSTALL_TYPE);                           \
        do                                                              \
        for arch_config in $(INSTALL_ARCHS);                            \
@@ -539,10 +607,15 @@ else # xnu or xnu_debug
                        machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);           \
                fi;             \
        fi;             \
+       if [ $${arch_config} = L4_ARM ] ; then  \
+               if [ $${machine_config} = DEFAULT ] ; then      \
+                       machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);                \
+               fi;             \
+       fi;             \
        if [ $${machine_config} = DEFAULT ] ; then      \
-               install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};       \
+               install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH};    \
        else            \
-               install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};    \
+               install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \
        fi;             \
        [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir};                                       \
        ${MAKE} ${MAKEJOBS} -C $${install_subdir}                       \
@@ -551,6 +624,7 @@ else # xnu or xnu_debug
                MACHINE_CONFIG=$${machine_config}                       \
                MAKEFILES=${SOURCE}/Makefile                            \
                SOURCE=${SOURCE}/                                       \
+               RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}            \
                build_install;                                          \
        done;                                                           \
        done;
@@ -565,6 +639,9 @@ ifeq ($(RC_ProjectName),xnu_debug)
 endif
 endif
 
+.PHONY: install_embedded install_devicemap
+install_embedded install_devicemap: install
+
 installmachinekernels:
        @echo "[ $(SOURCE) ] make installmachinekernels";                               \
        my_counter=1;           \
@@ -596,6 +673,7 @@ installmachinekernels:
                                MACHINE_CONFIG=$${machine_config}                       \
                                MAKEFILES=${SOURCE}/Makefile                            \
                                SOURCE=${SOURCE}/                                       \
+                               RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}            \
                                TARGET=$${build_subdir}/                                \
                                do_build_install;                                       \
                fi;             \
@@ -627,6 +705,7 @@ $(BUILD_INSTALL_SUBDIRS_TARGETS):
                KERNEL_CONFIG=$${kernel_config}                 \
                MAKEFILES=${SOURCE}/$${install_subdir}/Makefile \
                SOURCE=${SOURCE}$${install_subdir}/             \
+               RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}/$${install_subdir}         \
                TARGET=$${TARGET}                               \
                build_install;
 
@@ -697,10 +776,12 @@ TAGS: cscope.files
 .PHONY: installman
 
 installman:
-ifeq ($(RC_ProjectName),Libsyscall)
+ifeq ($(findstring Libsyscall,$(RC_ProjectName)),Libsyscall)
 # nothing to do
 else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld)
 # nothing to do
+else ifeq ($(RC_ProjectName),libkmod)
+# nothing to do
 else ifeq ($(findstring xnu_,$(RC_ProjectName)),xnu_)
 installman:
 # nothing to do
@@ -710,6 +791,7 @@ else # xnu
        [ -d $$manpath ] || $(MKDIR) $$manpath;             \
        ${MAKE} ${MAKEJOBS} MAKEFILES=${SOURCE}/Makefile \
                SOURCE=${SOURCE}/ \
+               RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH} \
                TARGET=${DSTROOT}/ \
                build_installman
        ${SRCROOT}/config/compress-man-pages.pl ${DSTROOT}/${MANDIR}
@@ -726,6 +808,7 @@ $(BUILD_INSTALLMAN_SUBDIRS_TARGETS):
        ${MAKE} -C $${installman_subdir} -r                                     \
                MAKEFILES=$(SOURCE)$${installman_subdir}/Makefile               \
                SOURCE=$(SOURCE)$${installman_subdir}/                          \
+               RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${installman_subdir}      \
                TARGET=$(TARGET)$${installman_subdir}/                          \
                build_installman; 
 
index b2d7e3af35ab27fcb121ef2b3b1066f7d599c603..d4e5e5fee973c57de65aefe18a1aeb85a575af0e 100644 (file)
@@ -51,9 +51,9 @@ ifndef INSTALL_KF_MD_GEN_LIST
 endif
 
 ifneq ($(MACHINE_CONFIG), DEFAULT)
-       OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)
+       export OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)
 else
-       OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)
+       export OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)
 endif
 
 INSTALL_MI_GEN_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_GEN_LIST))
@@ -513,13 +513,14 @@ endif
 # Compilation rules to generate .o from .s
 #
 
-S_RULE_1A=$(_v)${S_KCC} -c -MD ${SFLAGS} -DASSEMBLER ${INCFLAGS} ${$@_INCFLAGS}
+S_RULE_1A=$(_v)${S_KCC} -c ${SFLAGS} -MD -DASSEMBLER ${$@_SFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS}
 S_RULE_1B=$*.s
 S_RULE_2=@echo AS $@
 S_RULE_3=
 
 #
 # Compilation rules to generate .o from .c for normal files
+# 
 C_RULE_1A=$(_v)${KCC} -c ${filter-out ${$@_CFLAGS_RM}, ${CFLAGS} ${CWARNFLAGS}} -MD ${$@_CFLAGS_ADD} ${$@_CWARNFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} 
 C_RULE_1B=$*.c
 C_RULE_2=@echo CC $@
@@ -527,11 +528,19 @@ ifeq ($(BUILD_MACHO_OBJ),0)
 C_RULE_3=
 else ifeq ($(BUILD_STABS),1)
 C_RULE_3=
+else ifeq ($(BUILD_DWARF),1)
+C_RULE_3=$(_v)${CTFCONVERT} -l xnu -v -o $@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $@.ctf || true;
 else
-C_RULE_3=$(_v)${CTFCONVERT} -l xnu -v -o $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf || true;
+C_RULE_3=
 endif
 C_RULE_4=
 
+ifeq ($(ARCH_CONFIG),ARM)
+ifeq ($(KERNEL_CONFIG),RELEASE)
+C_RULE_3=
+endif
+endif
+
 #
 # Compilation rules to generate .o from .c for driver files
 #
@@ -553,10 +562,17 @@ ifeq ($(BUILD_MACHO_OBJ),0)
 P_RULE_4=
 else ifeq ($(BUILD_STABS),1)
 P_RULE_4=
+else ifeq ($(BUILD_DWARF),1)
+P_RULE_4=$(_v)${CTFCONVERT} -l xnu -v -o $@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $@.ctf || true;
 else
-P_RULE_4=$(_v)${CTFCONVERT} -l xnu -v -o $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf || true;
+P_RULE_4=
 endif
 
+ifeq ($(ARCH_CONFIG),ARM)
+ifeq ($(KERNEL_CONFIG),RELEASE)
+P_RULE_4=
+endif
+endif
 
 setup_build_all: 
 
@@ -582,6 +598,7 @@ $(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LI
        $(_v)if [ $(BUILD_DWARF)  -eq  1 ]; then \
                echo DSYMUTIL mach_kernel.sys; \
                $(DSYMUTIL) $(DSYMUTIL_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel.sys.dSYM > /dev/null; \
+               $(MKDIR) $(TARGET)/mach_kernel.sys.dSYM/$(DSYMRESDIR); \
                $(INSTALL) $(INSTALL_FLAGS) $(SRCROOT)/kgmacros $(TARGET)/mach_kernel.sys.dSYM/$(DSYMRESDIR)/kgmacros; \
        fi;
        $(_v)if [ $(MACHINE_CONFIG) != DEFAULT ] ; then     \
@@ -591,19 +608,31 @@ $(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LI
        fi;
        @echo STRIP mach_kernel
        $(_v)$(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel
-       $(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 -a $(BUILD_DWARF) -eq 1 ]; then                      \
+
+       $(_v)kernel_config=$(KERNEL_CONFIG);                               \
+       onearch=$(ARCH_CONFIG);                                                            \
+       skip_ctf=FALSE;                                                                            \
+       if [ $${kernel_config} = RELEASE ]; then                   \
+           if [[ $${onearch} = ARM ]]; then    \
+            skip_ctf=TRUE;                                 \
+            echo "Skipping CTF processing";                       \
+               fi                                                                                                 \
+    fi;                                                                                                           \
+    if [ $${skip_ctf} = FALSE ]; then                                     \
+       if [ $(BUILD_MACHO_OBJ) -eq 1 -a $(BUILD_DWARF) -eq 1 ]; then      \
                echo CTFMERGE mach_kernel;                      \
                $(FIND) $(OBJPATH)/ -name \*.ctf -size 0        \
                        -exec $(RM) -rf {} \;   ;               \
-               $(CTFMERGE) -l xnu -o $(TARGET)/mach_kernel     \
-                           -Z $(TARGET)/mach_kernel.ctfdata    \
-                           $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true;     \
+               $(FIND) $(OBJPATH)/ -name \*.ctf |              \
+                       $(XARGS) $(CTFMERGE) -l xnu -o $(TARGET)/mach_kernel \
+                           -Z $(TARGET)/mach_kernel.ctfdata || true;   \
                echo CTFINSERT mach_kernel;                     \
                $(CTFINSERT) $(TARGET)/mach_kernel              \
                         $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) $(TARGET)/mach_kernel.ctfdata \
                         -o $(TARGET)/mach_kernel || true;      \
                         $(RM) -f $(TARGET)/mach_kernel.ctfdata > /dev/null || true; \
        fi;                                                     \
+       fi;                                                     \
 
 version.o: $(OBJPATH)/version.c
        ${C_RULE_1A}$<
@@ -632,6 +661,7 @@ build_mach_kernel_exports:
        $(_v)${MAKE}                                    \
                MAKEFILES=${SOURCE}/config/Makefile     \
                SOURCE=${SOURCE}/config                 \
+               RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}/config     \
                TARGET=$${TARGET}                       \
        build_mach_kernel_exports;
 
@@ -663,11 +693,19 @@ $(INSTALL_KERNEL_FILE_FILES): $(TARGET)/mach_kernel force_kernel_file_install
        fi
 
 INSTALL_KERNEL_FILESYS_FILES = $(addprefix $(SYMROOT)$(INSTALL_KERNEL_DIR), $(INSTALL_KERNEL_FILE))
+ifeq ($(PLATFORM),iPhoneOS)
+INSTALL_KERNEL_FILESYS_FILES += $(addprefix $(DSTROOT)$(INSTALL_KERNEL_SYM_DIR), $(INSTALL_KERNEL_FILE))
+endif
 
 force_kernel_filesys_install:
 
 $(INSTALL_KERNEL_FILESYS_FILES): $(TARGET)/mach_kernel.sys force_kernel_filesys_install
        @echo Installing $< in $@;
+ifeq ($(PLATFORM),iPhoneOS)
+       $(_v)if [ ! -e $(DSTROOT)$(INSTALL_KERNEL_SYM_DIR) ]; then      \
+               $(MKDIR) $(DSTROOT)$(INSTALL_KERNEL_SYM_DIR);           \
+       fi;
+endif
        $(_v)if [ ! -e $(SYMROOT)$(INSTALL_KERNEL_DIR) ]; then  \
                $(MKDIR) $(SYMROOT)$(INSTALL_KERNEL_DIR);               \
        fi;                                                     \
index b61d3bc7d3ebc4f73d807311767f700d3de45932..a864f850bef041056e926c4fd128ca2a345be395 100644 (file)
@@ -21,7 +21,8 @@ INSTINC_SUBDIRS = \
        vm \
        libsa \
        kdp \
-       pmc
+       pmc \
+       kperf
 INSTINC_SUBDIRS_I386 = \
        mach    \
        i386
@@ -29,9 +30,6 @@ INSTINC_SUBDIRS_X86_64 = \
        mach    \
        i386    \
        x86_64
-INSTINC_SUBDIRS_ARM = \
-       mach    \
-       arm
 
 EXPINC_SUBDIRS = \
        mach    \
@@ -50,7 +48,8 @@ EXPINC_SUBDIRS = \
        libsa \
        kdp \
        console \
-       pmc
+       pmc \
+       kperf
 
 EXPINC_SUBDIRS_I386 = \
        mach    \
@@ -59,9 +58,6 @@ EXPINC_SUBDIRS_X86_64 = \
        mach    \
        i386    \
        x86_64
-EXPINC_SUBDIRS_ARM = \
-       mach    \
-       arm
 
 SETUP_SUBDIRS =
 
index 26f1a70ce5210fe690568f9054e26485e66864cb..a721a7313a5468c708dfb5a7ac248b92fec89773 100644 (file)
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+
+void *chudxnu_platform_ptr(void);
+
+void *
+chudxnu_platform_ptr(void)
+{
+       return (void *)0;
+}
+
+
index 97c07757bd179e4ae60ccc57049444b839443e41..6cc0eb0e9573cf50a39ac08a3ea5e95e747bba08 100644 (file)
@@ -550,3 +550,73 @@ chudxnu_thread_set_marked(thread_t thread, boolean_t new_value)
        return FALSE;
 }
 
+/* XXX: good thing this code is experimental... */
+
+/* external handler */
+extern void (*chudxnu_thread_ast_handler)(thread_t);
+void (*chudxnu_thread_ast_handler)(thread_t) = NULL;
+
+/* AST callback to dispatch to AppleProfile */
+extern void chudxnu_thread_ast(thread_t);
+void
+chudxnu_thread_ast(thread_t thread)
+{
+       /* atomicness for kdebug events */
+       void (*handler)(thread_t) = chudxnu_thread_ast_handler;
+       if( handler )
+               handler( thread );
+
+       thread->t_chud = 0;
+}
+
+
+
+/* Get and set bits on the thread and trigger an AST handler */
+void chudxnu_set_thread_ast( thread_t thread );
+void
+chudxnu_set_thread_ast( thread_t thread )
+{
+       /* FIXME: only call this on current thread from an interrupt handler for now... */
+       if( thread != current_thread() )
+               panic( "unsafe AST set" );
+
+       act_set_kperf(thread);
+}
+
+/* get and set the thread bits */
+extern uint32_t chudxnu_get_thread_bits( thread_t thread );
+extern void chudxnu_set_thread_bits( thread_t thread, uint32_t bits );
+
+uint32_t
+chudxnu_get_thread_bits( thread_t thread )
+{
+       return thread->t_chud;
+}
+
+void
+chudxnu_set_thread_bits( thread_t thread, uint32_t bits )
+{
+       thread->t_chud = bits;
+}
+
+/* get and set thread dirty bits. so CHUD can track whether the thread
+ * has been dispatched since it last looked. caller must hold the
+ * thread lock
+ */
+boolean_t
+chudxnu_thread_get_dirty(thread_t thread)
+{
+       if( thread->c_switch != thread->chud_c_switch )
+               return TRUE;
+       else
+               return FALSE;
+}
+
+void
+chudxnu_thread_set_dirty(thread_t thread, boolean_t makedirty)
+{
+       if( makedirty )
+               thread->chud_c_switch = thread->c_switch - 1;
+       else
+               thread->chud_c_switch = thread->c_switch;
+}
index 2e8168577d0b7fe279aaddbda0994081cbd800ae..7d2c56f67fd6740f8d51ec8fbec7c05d4cb8d32b 100644 (file)
@@ -99,6 +99,9 @@ enum {
 
 extern int chudxnu_thread_get_scheduler_state(thread_t thread);
 
+extern boolean_t chudxnu_thread_get_dirty(thread_t thread);
+extern void chudxnu_thread_set_dirty(thread_t thread, boolean_t);
+
 #if 0
 #pragma mark **** memory ****
 #endif
index aa576cbc71368c0acc81500a1bf9cff56b45e7bd..c92dbb7fd55d1c466b610114a3e4ef085bf28d25 100644 (file)
@@ -103,6 +103,9 @@ chudxnu_cpu_alloc(boolean_t boot_processor)
 
        mpqueue_init(&chud_proc_info->cpu_request_queue, &chud_request_lck_grp, &chud_request_lck_attr);
 
+       /* timer_call_cancel() can be called before first usage, so init here: <rdar://problem/9320202> */
+       timer_call_setup(&(chud_proc_info->cpu_timer_call), NULL, NULL);
+
 
        return (void *)chud_proc_info;
 }
index a8edff8fa4bc060e893d87567124c0be07f6e8b4..6b8a4e873d76bb528c975dd157602698de226fdc 100644 (file)
 #include <i386/mp_desc.h>
 #include <i386/misc_protos.h>
 
+
+static uint64_t
+chudxnu_vm_unslide( uint64_t ptr, int kaddr )
+{
+       if( !kaddr )
+               return ptr;
+
+       return VM_KERNEL_UNSLIDE(ptr);
+}
+
 #if 0
 #pragma mark **** thread state ****
 #endif
@@ -236,7 +246,7 @@ static kern_return_t do_backtrace32(
        if(ct >= max_idx)
                return KERN_RESOURCE_SHORTAGE;  // no frames traced
        
-       frames[ct++] = currPC;
+       frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
 
        // build a backtrace of this 32 bit state.
        while(VALID_STACK_ADDRESS(supervisor, currFP, kernStackMin, kernStackMax)) {
@@ -279,7 +289,7 @@ static kern_return_t do_backtrace32(
                prevFP = (uint64_t) tmpWord;    // promote 32 bit address
 
         if(prevFP) {
-            frames[ct++] = currPC;
+            frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
             prevPC = currPC;
         }
         if(prevFP < currFP) {
@@ -314,7 +324,7 @@ static kern_return_t do_backtrace64(
        if(*start_idx >= max_idx)
                return KERN_RESOURCE_SHORTAGE;  // no frames traced
        
-       frames[ct++] = currPC;
+       frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
 
        // build a backtrace of this 32 bit state.
        while(VALID_STACK_ADDRESS64(supervisor, currFP, kernStackMin, kernStackMax)) {
@@ -355,7 +365,7 @@ static kern_return_t do_backtrace64(
                }
 
         if(VALID_STACK_ADDRESS64(supervisor, prevFP, kernStackMin, kernStackMax)) {
-            frames[ct++] = currPC;
+            frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
             prevPC = currPC;
         }
         if(prevFP < currFP) {
@@ -412,7 +422,7 @@ static kern_return_t do_kernel_backtrace(
                return KERN_FAILURE;
        }
 
-       frames[ct++] = (uint64_t)currPC;
+       frames[ct++] = chudxnu_vm_unslide((uint64_t)currPC, 1);
 
        // build a backtrace of this kernel state
 #if __LP64__
@@ -454,7 +464,7 @@ static kern_return_t do_kernel_backtrace(
 #else
         if(VALID_STACK_ADDRESS(TRUE, prevFP, kernStackMin, kernStackMax)) {
 #endif
-            frames[ct++] = (uint64_t)currPC;
+            frames[ct++] = chudxnu_vm_unslide((uint64_t)currPC, 1);
             prevPC = currPC;
         }
         if(prevFP <= currFP) {
index e34f671cb4008fb282b5dc05a53b30ff5c06f3c2..ad71e25ccbca2f5116743fee34444346ecdf7f9f 100644 (file)
@@ -113,6 +113,18 @@ options            CONFIG_ZLEAKS   # Live zone leak debugging      # <zleaks>
 
 #
 options                ZONE_ALIAS_ADDR #               # <zone_alias_addr>
+
+
+#
+# CONFIG_TASK_ZONE_INFO allows per-task zone information to be extracted
+# Primarily useful for xnu debug and development.
+#
+options                CONFIG_TASK_ZONE_INFO           # <task_zone_info>
+#
+# CONFIG_DEBUGGER_FOR_ZONE_INFO restricts zone info so that it is only
+# available when the kernel is being debugged.
+#
+options                CONFIG_DEBUGGER_FOR_ZONE_INFO   # <debugger_for_zone_info>
 # 
 # XPR_DEBUG enables the gathering of data through the XPR macros inserted
 #      into various subsystems. This option is normally only enabled for
@@ -130,9 +142,11 @@ options            XPR_DEBUG       #               # <debug>
 options                MACH_LDEBUG     #               # <debug>
 
 #
-# 
+# configuration option for full, partial, or no kernel debug event tracing
 #
-options                KDEBUG          # kernel tracing        # <kdebug>
+options                KDEBUG                  # kernel tracing        # <kdebug>
+options                IST_KDEBUG              # limited tracing       # <ist_kdebug>
+options                NO_KDEBUG       # no kernel tracing # <no_kdebug>
 
 #
 # CONFIG_DTRACE enables code needed to support DTrace. Currently this is
@@ -193,6 +207,11 @@ options            HIBERNATION             #               # <hibernation>
 #
 options                CONFIG_SLEEP            #               # <config_sleep>
 
+# CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts
+#
+options                CONFIG_KEXT_BASEMENT            #               # <config_kext_basement>
+
+
 #
 #  configurable kernel related resources (CONFIG_THREAD_MAX needs to stay in 
 #  sync with bsd/conf/MASTER until we fix the config system... todo XXX
@@ -209,6 +228,13 @@ options   CONFIG_ZONE_MAP_MIN=12582912     # <medium,large,xlarge>
 options   CONFIG_ZONE_MAP_MIN=6291456  # <small,xsmall>
 options   CONFIG_ZONE_MAP_MIN=1048576  # <bsmall>
 
+# Sizes must be a power of two for the zhash to 
+# be able to just mask off bits instead of mod 
+options          CONFIG_ZLEAK_ALLOCATION_MAP_NUM=16384 #<medium,large,xlarge>
+options          CONFIG_ZLEAK_ALLOCATION_MAP_NUM=8192  #<small,xsmall,bsmall>
+options   CONFIG_ZLEAK_TRACE_MAP_NUM=8192 #<medium,large,xlarge>
+options   CONFIG_ZLEAK_TRACE_MAP_NUM=4096 #<small,xsmall,bsmall>
+
 #
 #  configurable kernel - use these options to strip strings from panic
 #  and printf calls.
@@ -260,12 +286,30 @@ options           CONFIG_SCHED_FIXEDPRIORITY      # <config_sched_fixedpriority>
 options                CONFIG_SCHED_GRRR_CORE          # <config_sched_grrr,config_sched_fixedpriority>
 
 options                CONFIG_SCHED_IDLE_IN_PLACE              # <config_sched_idle_in_place>
+options                CONFIG_GZALLOC                  # <config_gzalloc>
+#
+# enable per-process memory priority tracking
+#
+options                CONFIG_MEMORYSTATUS                             # <memorystatus>
 
 #
-# freeze - support app hibernation, used on embedded
+# enable jetsam - used on embedded
 #
-options                CONFIG_FREEZE   # <freeze>
+options                CONFIG_JETSAM                           # <jetsam>
 
+#
+# enable freezing of suspended processes - used on embedded
+#
+options                CONFIG_FREEZE                                   # <freeze>
 
 options                CHECK_CS_VALIDATION_BITMAP      # <config_cs_validation_bitmap>
 
+#
+# Enable dispatch of memory pressure events from the vm_pageout_garbage_collect thread
+#
+options                VM_PRESSURE_EVENTS              # <vm_pressure_events>
+
+# Enable allocation of contiguous physical memory through vm_map_enter_cpm()
+options                VM_CPM          # <vm_cpm>
+
+options            CONFIG_SKIP_PRECISE_USER_KERNEL_TIME        # <config_skip_precise_user_kernel_time>
index 42b4294e14108eed7a735e12d609df2f831e31b5..2240533b9d6aec397d7ec06dde2e2c4050272a19 100644 (file)
@@ -9,13 +9,13 @@
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
-#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap config_sched_idle_in_place ]
-#  DEBUG= [ RELEASE osf_debug debug mach_kdb mach_assert]
+#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events config_sched_idle_in_place memorystatus ]
+#  DEBUG= [ RELEASE osf_debug debug mach_assert task_zone_info ]
 #  PROFILE = [ RELEASE profile ]
 #
 #  EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ]
 #  EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ]
-#  DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace config_counters ]
+#  DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace config_counters task_zone_info ]
 #
 ######################################################################
 #
@@ -50,8 +50,6 @@ options               MACH_BSD
 options         IOKIT          #                               # <iokit>
 options         MACH_PE                #                               # <mach_pe>
 
-options                DDB             # Inline debugger               # <debug>
-options                MACH_KDB        #                               # <mach_kdb>
 options                MACH_KDP        # KDP                           # <mach_kdp>
 options                CONFIG_SERIAL_KDP       # KDP over serial                               # <config_serial_kdp>
 options                PAE
index 993fa17ab412cfd945ed16274fb31695b2f6be19..27b9ce5e5b673a1d8222697312e4e9de03b4968a 100644 (file)
@@ -9,12 +9,12 @@
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
-#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap config_sched_idle_in_place ]
-#  DEBUG = [ RELEASE osf_debug debug mach_assert ]
+#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_gzalloc config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events config_sched_idle_in_place kperf memorystatus config_kext_basement ]
+#  DEBUG = [ RELEASE osf_debug debug mach_assert task_zone_info ]
 #
 #  EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ]
 #  EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ]
-#  DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_counters ]
+#  DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_counters task_zone_info ]
 #
 ######################################################################
 #
@@ -51,6 +51,7 @@ options         MACH_PE               #                               # <mach_pe>
 
 options                MACH_KDP        # KDP                           # <mach_kdp>
 options                CONFIG_SERIAL_KDP       # KDP over serial                               # <config_serial_kdp>
+# options         KPERF                #                               # <kperf>
 options                PAE
 options                X86_64
 options                DISPATCH_COUNTS
index 330f94ab687947fbbdc9307b661177ba713f3f21..43980797923a52a94121ca1f24cad967e5c23ebc 100644 (file)
@@ -48,9 +48,11 @@ $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/platforms.h: $(COMPOBJROOT)/$(OSFMK_KERNEL
 do_all: $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile \
                $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/platforms.h 
        $(_v)next_source=$(subst conf/,,$(SOURCE));                     \
+       next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH));         \
        ${MAKE} -C $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)        \
                MAKEFILES=$(TARGET)/$(OSFMK_KERNEL_CONFIG)/Makefile     \
                SOURCE=$${next_source}                  \
+               RELATIVE_SOURCE_PATH=$${next_relsource}                 \
                TARGET=$(TARGET)                                        \
                INCL_MAKEDEP=FALSE      \
                KERNEL_CONFIG=$(OSFMK_KERNEL_CONFIG)    \
index e232c0e32668653988b929df8f831ad1392d90fd..852d8ad0a6975dd3199d88437c6116981e63a7f9 100644 (file)
@@ -4,24 +4,14 @@
 
 CWARNFLAGS = $(CWARNFLAGS_STD) -Wshorten-64-to-32
 
-# Objects that don't compile cleanly:
-OBJS_NO_WERROR=                                \
-       db_macro.o                      \
-       db_print.o                      \
-       db_sym.o                        \
-       db_variables.o                  \
-       db_disasm.o                     \
-       db_interface.o                  \
-       db_trace.o
-
-$(foreach file,$(OBJS_NO_WERROR),$(eval $(call add_perfile_cflags,$(file),-Wno-error)))
-
 # Files that must go in the __HIB segment:
 UNCONFIGURED_HIB_FILES=                                        \
            hibernate_restore.o
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
+hibernate_restore.o_CFLAGS_ADD += -fno-stack-protector
+
 ######################################################################
 #END   Machine dependent Makefile fragment for i386
 ######################################################################
index 768a508455f4436f3e7aa7c2aa0580e221bf8341..2a4eb03ffc0d687291567fed9a5a993c6e4f0afc 100644 (file)
@@ -11,6 +11,9 @@ UNCONFIGURED_HIB_FILES=                                       \
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
+hibernate_restore.o_CFLAGS_ADD += -fno-stack-protector
+hibernate_bootstrap.o_CFLAGS_ADD += -fno-stack-protector
+
 ######################################################################
 #END   Machine dependent Makefile fragment for x86_64
 ######################################################################
index 7a97e71c6ed6e5825b37d44b2a377b898ed04b05..19b3b0550102e95113265fe428d76151a14ac52d 100644 (file)
@@ -44,8 +44,6 @@ OPTIONS/mach_cluster_stats    optional mach_cluster_stats
 OPTIONS/mach_counters          optional mach_counters
 OPTIONS/mach_ipc_debug         optional mach_ipc_debug
 OPTIONS/mach_ipc_test          optional mach_ipc_test
-OPTIONS/mach_kdb               optional mach_kdb
-OPTIONS/mach_kgdb              optional mach_kgdb
 OPTIONS/mach_kdp               optional mach_kdp
 OPTIONS/config_serial_kdp              optional config_serial_kdp
 OPTIONS/mach_kprof             optional mach_kprof
@@ -57,7 +55,6 @@ OPTIONS/advisory_pageout      optional advisory_pageout
 OPTIONS/mach_vm_debug          optional mach_vm_debug
 OPTIONS/mach_page_hash_stats    optional mach_page_hash_stats
 OPTIONS/mig_debug              optional mig_debug
-OPTIONS/stat_time              optional stat_time
 OPTIONS/time_stamp             optional time_stamp
 OPTIONS/xpr_debug              optional xpr_debug
 OPTIONS/bootstrap_symbols      optional bootstrap_symbols
@@ -104,27 +101,6 @@ osfmk/default_pager/dp_memory_object.c     standard
 ./UserNotification/UNDReplyServer.c            standard
 osfmk/UserNotification/KUNCUserNotifications.c standard
 
-osfmk/ddb/db_access.c                  optional mach_kdb
-osfmk/ddb/db_break.c                   optional mach_kdb
-osfmk/ddb/db_command.c         optional mach_kdb
-osfmk/ddb/db_cond.c                    optional mach_kdb
-osfmk/ddb/db_examine.c         optional mach_kdb
-osfmk/ddb/db_expr.c                    optional mach_kdb
-osfmk/ddb/db_ext_symtab.c              standard
-osfmk/ddb/db_input.c                   optional mach_kdb
-osfmk/ddb/db_lex.c                     optional mach_kdb
-osfmk/ddb/db_macro.c                   optional mach_kdb
-osfmk/ddb/db_output.c                  optional mach_kdb
-osfmk/ddb/db_print.c                   optional mach_kdb
-osfmk/ddb/db_run.c                     optional mach_kdb
-osfmk/ddb/db_sym.c                     optional mach_kdb
-osfmk/ddb/db_task_thread.c             optional mach_kdb
-osfmk/ddb/db_trap.c                    optional mach_kdb
-osfmk/ddb/db_variables.c               optional mach_kdb
-osfmk/ddb/db_watch.c                   optional mach_kdb
-osfmk/ddb/db_write_cmd.c               optional mach_kdb
-
-osfmk/ddb/tr.c                 optional mach_tr
 osfmk/kdp/kdp.c                        optional mach_kdp
 osfmk/kdp/kdp_udp.c                    optional mach_kdp
 osfmk/kdp/kdp_serial.c                 optional config_serial_kdp
@@ -139,10 +115,10 @@ osfmk/ipc/ipc_port.c                      standard
 osfmk/ipc/ipc_pset.c                   standard
 osfmk/ipc/ipc_right.c                  standard
 osfmk/ipc/ipc_space.c                  standard
-osfmk/ipc/ipc_splay.c                  standard
 osfmk/ipc/ipc_table.c                  standard
 osfmk/ipc/ipc_labelh.c                 standard
 osfmk/ipc/mach_debug.c         standard
+osfmk/ipc/mach_kernelrpc.c             standard
 osfmk/ipc/mach_msg.c                   standard
 osfmk/ipc/mach_port.c                  standard
 osfmk/ipc/mig_log.c                    optional mig_debug
@@ -167,7 +143,6 @@ osfmk/kern/ipc_tt.c                 standard
 osfmk/kern/kalloc.c                    standard
 osfmk/kern/ledger.c                    standard
 osfmk/kern/locks.c                     standard
-osfmk/kern/mach_clock.c                standard
 osfmk/kern/machine.c                   standard
 osfmk/kern/mk_sp.c                     standard
 osfmk/kern/mk_timer.c          standard
@@ -202,6 +177,7 @@ osfmk/kern/timer_call.c             standard
 osfmk/kern/wait_queue.c                standard
 osfmk/kern/xpr.c                       optional xpr_debug
 osfmk/kern/zalloc.c                    standard
+osfmk/kern/gzalloc.c           optional config_gzalloc
 osfmk/kern/bsd_kern.c          optional mach_bsd
 osfmk/kern/hibernate.c         optional hibernation
 osfmk/pmc/pmc.c                                standard 
@@ -212,7 +188,6 @@ osfmk/pmc/pmc.c                             standard
 ./mach/exc_server.c                    optional mach_bsd
 ./mach/host_priv_server.c              standard
 ./mach/host_security_server.c          standard
-./mach/ledger_server.c                 standard
 ./mach/lock_set_server.c               standard
 ./mach/mach_exc_user.c                 standard
 ./mach/mach_exc_server.c               optional mach_bsd
@@ -289,4 +264,14 @@ osfmk/chud/chud_memory.c           standard
 osfmk/chud/chud_osfmk_callback.c       standard
 osfmk/chud/chud_thread.c               standard
 
+# Kernel performance monitoring
+osfmk/kperf/kperf.c                     optional kperf
+osfmk/kperf/action.c                    optional kperf
+osfmk/kperf/callstack.c                 optional kperf
+osfmk/kperf/pet.c                       optional kperf
+osfmk/kperf/filter.c                    optional kperf
+# osfmk/kperf/kperfbsd.c                    optional kperf # bsd/conf/files
+osfmk/kperf/threadinfo.c                optional kperf
+osfmk/kperf/timetrigger.c               optional kperf
+
 osfmk/console/serial_general.c standard
index 8c28645275d5d3e5e80f94da1ad9aa5293cff574..fb2610ce305e3af9529db6ae19207a3f9bfa1a6a 100644 (file)
@@ -11,7 +11,6 @@ OPTIONS/debug                 optional debug
 
 
 OPTIONS/gprof          optional gprof
-OPTIONS/db_machine_commands    optional db_machine_commands
 OPTIONS/dynamic_num_nodes      optional dynamic_num_nodes
 OPTIONS/vtoc_compat    optional vtoc_compat
 OPTIONS/fddi           optional fddi
@@ -27,8 +26,6 @@ osfmk/i386/pmap_common.c      standard
 osfmk/i386/pal_routines.c      optional pal_i386
 osfmk/i386/pal_routines_asm.s  optional pal_i386
 
-osfmk/ddb/db_aout.c            optional mach_kdb
-
 osfmk/i386/bsd_i386.c          optional mach_bsd
 osfmk/i386/bsd_i386_native.c   optional mach_bsd
 osfmk/i386/machdep_call.c      optional mach_bsd
@@ -40,9 +37,6 @@ osfmk/i386/cpu.c              standard
 osfmk/i386/cpuid.c             standard
 osfmk/i386/cpu_threads.c       standard
 osfmk/i386/cpu_topology.c      standard
-osfmk/i386/db_disasm.c optional mach_kdb
-osfmk/i386/db_interface.c      optional mach_kdb
-osfmk/i386/db_trace.c          optional mach_kdb
 osfmk/i386/etimer.c            standard
 osfmk/i386/fpu.c               standard
 osfmk/i386/gdt.c               standard
index a147f68de773470cfff9f368028e9537f9d711c9..88d449c781e6fdca6939794e508f8fa29ab1ea5e 100644 (file)
@@ -13,7 +13,6 @@ OPTIONS/debug                 optional debug
 
 
 OPTIONS/gprof          optional gprof
-OPTIONS/db_machine_commands    optional db_machine_commands
 OPTIONS/dynamic_num_nodes      optional dynamic_num_nodes
 OPTIONS/vtoc_compat    optional vtoc_compat
 OPTIONS/fddi           optional fddi
@@ -34,6 +33,12 @@ osfmk/i386/bsd_i386.c                optional mach_bsd
 osfmk/i386/bsd_i386_native.c   optional mach_bsd
 osfmk/i386/machdep_call.c      optional mach_bsd
 
+# Order is important here for __HIB section
+osfmk/x86_64/boot_pt.c         standard
+osfmk/i386/mp_desc.c           standard
+osfmk/i386/gdt.c               standard
+osfmk/x86_64/start.s           standard
+
 osfmk/x86_64/bcopy.s           standard
 osfmk/x86_64/bzero.s           standard
 osfmk/i386/cpu.c               standard
@@ -42,10 +47,8 @@ osfmk/i386/cpu_threads.c     standard
 osfmk/i386/cpu_topology.c      standard
 osfmk/i386/etimer.c            standard
 osfmk/i386/fpu.c               standard
-osfmk/i386/gdt.c               standard
 osfmk/i386/i386_lock.s standard
 osfmk/i386/i386_init.c         standard
-osfmk/i386/idle_pt.c           standard
 osfmk/i386/i386_vm_init.c      standard
 osfmk/i386/io_map.c            standard
 osfmk/i386/ktss.c              standard
@@ -54,15 +57,13 @@ osfmk/x86_64/loose_ends.c   standard
 osfmk/x86_64/copyio.c          standard
 osfmk/i386/locks_i386.c        standard
 osfmk/x86_64/locore.s  standard
-osfmk/x86_64/start.s   standard
-osfmk/x86_64/lowmem_vectors.s  standard
+osfmk/x86_64/lowmem_vectors.c  standard
 osfmk/x86_64/cswitch.s standard
 osfmk/i386/machine_routines.c          standard
 osfmk/x86_64/machine_routines_asm.s    standard
 osfmk/i386/machine_check.c     optional config_mca
 osfmk/i386/machine_task.c              standard
 osfmk/x86_64/mcount.s          optional profile
-osfmk/i386/mp_desc.c           standard
 #osfmk/x86_64/ntoh.s           standard
 osfmk/i386/pcb.c               standard
 osfmk/i386/pcb_native.c                standard
@@ -129,6 +130,8 @@ osfmk/i386/vmx/vmx_shims.c                  optional config_vmx
 #osfmk/OPTIONS/ec              optional ec
 #osfmk/OPTIONS/hi_res_clock    optional hi_res_clock
 
+# Kernel performance monitoring
+osfmk/kperf/x86_64/kperf_mp.c   optional kperf
 
 osfmk/i386/startup64.c         standard
 osfmk/x86_64/idt64.s           standard
index 2af1a95536bb27e52032dea2908eafa75d201cf2..7cac3d45d757f61e60324dbf3059b8aefcd3c7e2 100644 (file)
@@ -269,13 +269,6 @@ void
 cnputc(char c)
 {
        console_buf_t   *cbp;
-#if MACH_KDB
-       /* Bypass locking/buffering if in debugger */
-       if (kdb_cpu == cpu_number()) {
-               _cnputc(c);
-               return;
-       }
-#endif /* MACH_KDB */  
        mp_disable_preemption();
        cbp = (console_buf_t *) current_cpu_datap()->cpu_console_buf;
        if (cbp == NULL) {
index d51e98dabe32ec12153dc5e16a0353a6c3d9049d..8551fd6b7c039f4574ff396046432e0d8f3f7cb2 100644 (file)
@@ -32,7 +32,6 @@
  * @APPLE_FREE_COPYRIGHT@
  */
 
-#include <mach_kdb.h>
 #include <platforms.h>
 #include <kern/spl.h>
 #include <mach/std_types.h>
index 0afd153b73379e2b031bc0be9c285eaa2acb0eae..3dd1a2ecab70e57bc83e32d46f64109b19a3885f 100644 (file)
@@ -1281,6 +1281,8 @@ gc_show_cursor(unsigned int xx, unsigned int yy)
 static void
 gc_update_color(int color, boolean_t fore)
 {
+       assert(gc_ops.update_color);
+
        gc_color_code = COLOR_CODE_SET(gc_color_code, color, fore);
        gc_ops.update_color(color, fore);
 }
@@ -2465,8 +2467,6 @@ vc_progress_task(__unused void *arg0, __unused void *arg)
 
     if( vc_progress_enable) {
 
-       KERNEL_DEBUG_CONSTANT(0x7020008, vc_progress_count, 0, 0, 0, 0);
-
         vc_progress_count++;
         if( vc_progress_count >= vc_progress->count) {
             vc_progress_count = 0;
@@ -2579,7 +2579,14 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
 #else
                    new_vinfo.v_type = 0;
 #endif
-                   new_vinfo.v_scale = boot_vinfo->v_scale;
+            unsigned int scale   = (unsigned int)boot_vinfo->v_scale;
+            if (scale == kPEScaleFactor1x )
+                new_vinfo.v_scale = kPEScaleFactor1x;
+            else if (scale == kPEScaleFactor2x)
+                new_vinfo.v_scale = kPEScaleFactor2x;
+            else /* Scale factor not set, default to 1x */
+                new_vinfo.v_scale = kPEScaleFactor1x;
+
                }
      
                if (!lastVideoMapped)
@@ -2749,6 +2756,8 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                        gc_acquired = FALSE;
                        gc_desire_text = FALSE;
                        gc_enable( FALSE );
+                       if ( gc_graphics_boot == FALSE ) break;
+
                        vc_progress_set( FALSE, 0 );
 #if !CONFIG_EMBEDDED
                        vc_enable_progressmeter( FALSE );
index 26c6081fc8c149a8186c90a2c12b87d9d48a1b22..368e94da22557c35077be480dd80482e5eff971b 100644 (file)
@@ -58,19 +58,19 @@ void video_scroll_down(     void    *start,  /* HIGH addr */
 
 struct vc_info
 {
-    unsigned int       v_height;       /* pixels */
-    unsigned int       v_width;        /* pixels */
-    unsigned int       v_depth;
-    unsigned int       v_rowbytes;
-    unsigned long      v_baseaddr;
-    unsigned int       v_type;
-    char               v_name[32];
-    uint64_t           v_physaddr;
-    unsigned int       v_rows;         /* characters */
-    unsigned int       v_columns;      /* characters */
-    unsigned int       v_rowscanbytes; /* Actualy number of bytes used for display per row*/
-    unsigned int       v_scale;
-    unsigned int       v_reserved[4];
+       unsigned int    v_height;       /* pixels */
+       unsigned int    v_width;        /* pixels */
+       unsigned int    v_depth;
+       unsigned int    v_rowbytes;
+       unsigned long   v_baseaddr;
+       unsigned int    v_type;
+       char            v_name[32];
+       uint64_t        v_physaddr;
+       unsigned int    v_rows;         /* characters */
+       unsigned int    v_columns;      /* characters */
+       unsigned int    v_rowscanbytes; /* Actualy number of bytes used for display per row*/
+       unsigned int    v_scale;        
+       unsigned int    v_reserved[4];
 };
 
 struct vc_progress_element {
diff --git a/osfmk/ddb/Makefile b/osfmk/ddb/Makefile
deleted file mode 100644 (file)
index b0689e4..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-MIG_DEFS =     \
-
-MIG_HDRS = \
-
-DATAFILES = \
-
-MIGINCLUDES = \
-
-EXPORT_MI_LIST = ${DATAFILES} ${_MIG_HDRS_} ${MIGINCLUDES}
-
-EXPORT_MI_DIR = ddb
-
-.ORDER: ${_MIG_HDRS_} ${MIGINCLUDES}
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/osfmk/ddb/db_access.c b/osfmk/ddb/db_access.c
deleted file mode 100644 (file)
index fb0512b..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>                /* type definitions */
-#include <machine/setjmp.h>
-#include <machine/endian.h>
-#include <kern/task.h>
-#include <ddb/db_access.h>
-
-
-
-/*
- * Access unaligned data items on aligned (longword)
- * boundaries.
- */
-
-int db_access_level = DB_ACCESS_LEVEL;
-
-db_expr_t
-db_get_task_value(
-       db_addr_t       addr,
-       register int    size,
-       boolean_t       is_signed,
-       task_t          task)
-{
-       char            data[sizeof(db_expr_t)];
-       register db_expr_t value;
-       register int    i;
-       uint64_t signx;
-
-       if(size == 0) return 0;
-
-       db_read_bytes((vm_offset_t)addr, size, data, task);
-
-       value = 0;
-#if    BYTE_MSF
-       for (i = 0; i < size; i++)
-#else  /* BYTE_LSF */
-       for (i = size - 1; i >= 0; i--)
-#endif
-       {
-           value = (value << 8) + (data[i] & 0xFF);
-       }
-       
-       if(!is_signed) return value;
-       
-       signx = 0xFFFFFFFFFFFFFFFFULL << ((size << 3) - 1);
-        
-       if(value & signx) value |= signx;       /* Add 1s to front if sign bit is on */
-
-       return (value);
-}
-
-void
-db_put_task_value(
-       db_addr_t       addr,
-       register int    size,
-       register db_expr_t value,
-       task_t          task)
-{
-       char            data[sizeof(db_expr_t)];
-       register int    i;
-
-#if    BYTE_MSF
-       for (i = size - 1; i >= 0; i--)
-#else  /* BYTE_LSF */
-       for (i = 0; i < size; i++)
-#endif
-       {
-           data[i] = value & 0xFF;
-           value >>= 8;
-       }
-
-       db_write_bytes((vm_offset_t)addr, size, data, task);
-}
-
-db_expr_t
-db_get_value(
-       db_addr_t       addr,
-       int             size,
-       boolean_t       is_signed)
-{
-       return(db_get_task_value(addr, size, is_signed, TASK_NULL));
-}
-
-void
-db_put_value(
-       db_addr_t       addr,
-       int             size,
-       db_expr_t       value)
-{
-       db_put_task_value(addr, size, value, TASK_NULL);
-}
diff --git a/osfmk/ddb/db_access.h b/osfmk/ddb/db_access.h
deleted file mode 100644 (file)
index 518a1b6..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-/*
- * Data access functions for debugger.
- */
-
-#ifndef        _DDB_DB_ACCESS_H_
-#define        _DDB_DB_ACCESS_H_
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-#include <ddb/db_task_thread.h>
-
-/* implementation dependent access capability */
-#define        DB_ACCESS_KERNEL        0       /* only kernel space */
-#define DB_ACCESS_CURRENT      1       /* kernel or current task space */
-#define DB_ACCESS_ANY          2       /* any space */
-
-#ifndef        DB_ACCESS_LEVEL
-#define DB_ACCESS_LEVEL                DB_ACCESS_KERNEL
-#endif /* DB_ACCESS_LEVEL */
-
-#ifndef DB_VALID_KERN_ADDR
-#define DB_VALID_KERN_ADDR(addr)       ((addr) >= VM_MIN_KERNEL_ADDRESS \
-                                         && (addr) < VM_MAX_KERNEL_ADDRESS)
-#define DB_VALID_ADDRESS(addr,user)    ((user != 0) ^ DB_VALID_KERN_ADDR(addr))
-#define DB_PHYS_EQ(task1,addr1,task2,addr2)    0
-#define DB_CHECK_ACCESS(addr,size,task)        db_is_current_space(task)
-#endif /* DB_VALID_KERN_ADDR */
-
-extern int db_access_level;
-
-
-
-/* Prototypes for functions exported by ddb/db_access.c.
- */
-db_expr_t db_get_task_value(
-       db_addr_t       addr,
-       register int    size,
-       boolean_t       is_signed,
-       task_t          task);
-
-void db_put_task_value(
-       db_addr_t       addr,
-       register int    size,
-       register db_expr_t value,
-       task_t          task);
-
-db_expr_t db_get_value(
-       db_addr_t       addr,
-       int             size,
-       boolean_t       is_signed);
-
-void db_put_value(
-       db_addr_t       addr,
-       int             size,
-       db_expr_t       value);
-
-#endif /* !_DDB_DB_ACCESS_H_ */
diff --git a/osfmk/ddb/db_aout.c b/osfmk/ddb/db_aout.c
deleted file mode 100644 (file)
index a6e48c3..0000000
+++ /dev/null
@@ -1,961 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-/*
- * Symbol table routines for a.out format files.
- */
-#include <mach/mach_types.h>
-#include <mach/boolean.h>
-#include <mach/std_types.h>
-#include <machine/db_machdep.h>                /* data types */
-#include <vm/pmap.h>
-#include <string.h>                    /* For strcpy(), strcmp() */
-#include <ddb/db_aout.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_sym.h>
-
-#ifndef        DB_NO_AOUT
-
-#include <ddb/nlist.h>                 /* a.out symbol table */
-#include <ddb/stab.h>
-
-#include <libkern/kernel_mach_header.h>
-
-#define private static
-
-private int aout_db_order_symbols(char *, char *);
-private int aout_db_compare_symbols(char *, char *);
-private boolean_t aout_db_is_filename(char *);
-private boolean_t aout_db_eq_name(struct nlist *, char *, int);
-
-/*
- * An a.out symbol table as loaded into the kernel debugger:
- *
- * symtab      -> size of symbol entries, in bytes
- * sp          -> first symbol entry
- *                ...
- * ep          -> last symbol entry + 1
- * strtab      == start of string table
- *                size of string table in bytes,
- *                including this word
- *             -> strings
- */
-
-/*
- * Find pointers to the start and end of the symbol entries,
- * given a pointer to the start of the symbol table.
- */
-#define        db_get_aout_symtab(symtab, sp, ep) \
-       (sp = (struct nlist *)(((vm_offset_t *)(symtab)) + 1), \
-        ep = (struct nlist *)((char *)sp + *((int *)(symtab))))
-
-char *db_sorting_sym_end;
-
-private int
-aout_db_order_symbols(
-       char    *s1,
-       char    *s2)
-{
-       struct nlist    *sym1 = (struct nlist *) s1;
-       struct nlist    *sym2 = (struct nlist *) s2;
-
-       if (sym1->n_value != sym2->n_value) 
-               return (sym1->n_value - sym2->n_value);
-       else {
-               return (sym1->n_un.n_name - sym2->n_un.n_name);
-       }
-}
-
-private int
-aout_db_compare_symbols(
-       char    *sym1,
-       char    *sym2)
-{
-       return (((struct nlist *) sym1)->n_value -
-               ((struct nlist *) sym2)->n_value);
-}
-
-int db_sorting_limit = 50000;
-
-boolean_t
-aout_db_sym_init(
-       char *  symtab,         /* pointer to start of symbol table */
-       __unused char *esymtab, /* pointer to end of string table,
-                                  for checking - may be rounded up to
-                                  integer boundary */
-       const char *name,
-       char *  task_addr)      /* use for this task only */
-{
-       struct nlist    *sym_start, *sym_end, *dbsym_start, *dbsym_end;
-       struct nlist    *sp;
-       char *strtab, *dbstrtab;
-       long    db_strlen;
-       char *estrtab, *dbestrtab;
-       unsigned long   minsym = ~0;
-       unsigned long   maxsym = 0;
-       boolean_t       sorted;
-       boolean_t       sorting;
-       int nsyms;
-
-
-       if (!getsymtab((kernel_mach_header_t *)symtab, 
-               (vm_offset_t *)&sym_start, &nsyms, 
-               (vm_offset_t *)&strtab, (vm_size_t *)&db_strlen)) {
-               return(FALSE);
-       }
-       sym_end = sym_start + nsyms;
-       estrtab = strtab + db_strlen;
-       
-/*
- *     We haven't actually started up VM yet, so we can just steal some pages to 
- *     make a working copy of the symbols and strings
- */
-       dbsym_start = (struct nlist *)pmap_steal_memory(((unsigned int)sym_end - (unsigned int)sym_start + 4096) & -4096);      /* Get space for symbols */
-       dbstrtab = (char *)pmap_steal_memory(((unsigned int)estrtab - (unsigned int)strtab + 4096) & -4096);    /* Get space for strings */
-
-       bcopy((char *)sym_start, (char *)dbsym_start, (unsigned int)sym_end - (unsigned int)sym_start); /* Copy symbols */
-       bcopy(strtab, dbstrtab, (unsigned int)estrtab - (unsigned int)strtab);  /* Copy strings */
-
-       dbsym_end = dbsym_start + nsyms;
-       dbestrtab = dbstrtab + db_strlen;
-
-       sorting = ((dbsym_end - dbsym_start) < db_sorting_limit);
-       
-       for (sp = dbsym_start; sp < dbsym_end; sp++) {
-           register long strx;
-           strx = sp->n_un.n_strx;
-           if (strx != 0) {
-               if (strx > db_strlen) {
-                   sp->n_un.n_name = 0;
-                   continue;
-               }
-               sp->n_un.n_name = dbstrtab + strx;
-           }
-           if (sp->n_type != N_ABS) {
-               if (sp->n_value > 0 && sp->n_value < minsym)
-                   minsym = sp->n_value;
-               if (sp->n_value > maxsym)
-                   maxsym = sp->n_value;
-           }
-       }
-       
-       if (maxsym < minsym)
-               minsym = maxsym = 0;
-
-       if (sorting) {
-               db_qsort((char *) dbsym_start, dbsym_end - dbsym_start,
-                        sizeof(struct nlist), aout_db_order_symbols);
-               sorted = TRUE;
-       } else
-               sorted = FALSE;
-
-       if (db_add_symbol_table(SYMTAB_AOUT,
-                               (char*)dbsym_start,
-                               (char*)dbsym_end,
-                               name,
-                               0,
-                               task_addr,
-                               minsym,
-                               maxsym,
-                               sorted))
-       {
-           /* Successfully added symbol table */
-               
-           pmap_protect(kernel_pmap,
-                        (vm_offset_t) dbsym_start, (vm_offset_t) dbsym_end,
-                        VM_PROT_READ|VM_PROT_WRITE);
-           pmap_protect(kernel_pmap,
-                        (vm_offset_t) dbstrtab, (vm_offset_t) dbestrtab,
-                        VM_PROT_READ|VM_PROT_WRITE);
-           return TRUE;
-       }
-       return FALSE;
-}
-
-/*
- * This KLUDGE offsets the n_values of a copied symbol table
- */
-void db_clone_offsetXXX(char *, long);
-void
-db_clone_offsetXXX(char * symtab, long offset)
-{
-       register struct nlist   *sym_start, *sym_end, *sp;
-
-       db_get_aout_symtab((int *)symtab, sym_start, sym_end);
-
-       for (sp = sym_start; sp < sym_end; sp++)
-               if (sp->n_type != N_ABS)
-                       sp->n_value += offset;
-}
-/* end KLUDGE */
-
-/*
- * check file name or not (check xxxx.x pattern)
- */
-private boolean_t
-aout_db_is_filename(char *name)
-{
-       while (*name) {
-           if (*name == '.') {
-               if (name[1])
-                   return(TRUE);
-           }
-           name++;
-       }
-       return(FALSE);
-}
-
-/*
- * special name comparison routine with a name in the symbol table entry
- */
-private boolean_t
-aout_db_eq_name(
-       struct nlist *sp,
-       char *name,
-       int incomplete)
-{
-       register char *s1, *s2;
-
-       s1 = sp->n_un.n_name;
-       s2 = name;
-#ifndef __NO_UNDERSCORES__
-       if (*s1 == '_' && *s2 && *s2 != '_')
-           s1++;
-#endif /* __NO_UNDERSCORES__ */
-       while (*s2) {
-           if (*s1++ != *s2++) {
-               /*
-                * check .c .o file name comparison case
-                */
-               if (*s2 == 0 && sp->n_un.n_name <= s1 - 2 
-                       && s1[-2] == '.' && s1[-1] == 'o')
-                   return(TRUE);
-               return(FALSE);
-           }
-       }
-       if (incomplete)
-           return(TRUE);
-       /*
-        * do special check for
-        *     xxx:yyy for N_FUN
-        *     xxx.ttt for N_DATA and N_BSS
-        */
-       return(*s1 == 0 || (*s1 == ':' && sp->n_type == N_FUN) || 
-               (*s1 == '.' && (sp->n_type == N_DATA || sp->n_type == N_BSS)));
-}
-
-/*
- * search a symbol table with name and type
- *     fp(in,out): last found text file name symbol entry
- */
-private struct nlist *
-aout_db_search_name(
-       struct nlist    *sp,
-       struct nlist    *ep,
-       char            *name,
-       int             type,
-       struct nlist    **fp,
-        int            incomplete)
-{
-       struct nlist    *file_sp = *fp;
-       struct nlist    *found_sp = 0;
-
-       for ( ; sp < ep; sp++) {
-           if (sp->n_other)
-               sp->n_other = 0;
-           if (sp->n_type == N_TEXT && aout_db_is_filename(sp->n_un.n_name))
-               *fp = sp;
-           if (type) {
-               if (sp->n_type == type) {
-                   /* dwm_debug: b26 name, mk6 added last param */
-                   if (aout_db_eq_name(sp, name, 0))
-                       return(sp);
-               }
-               if (sp->n_type == N_SO)
-                   *fp = sp;
-               continue;
-           }
-           if (sp->n_type & N_STAB)
-               continue;
-           if (sp->n_un.n_name && aout_db_eq_name(sp, name, incomplete)) {
-               /*
-                * In case of qaulified search by a file,
-                * return it immediately with some check.
-                * Otherwise, search external one
-                */
-               if (file_sp) {
-                   if ((file_sp == *fp) || (sp->n_type & N_EXT))
-                       return(sp);
-               } else if ((sp->n_type & N_EXT) ||
-                          (incomplete && !aout_db_is_filename(sp->n_un.n_name)))
-                   return(sp);
-               else
-                   found_sp = sp;
-           }
-       }
-       return(found_sp);
-}
-
-/*
- * Print sorted possible completions for a symbol.
- * Use n_other field to mark completion symbols in order
- *     to speed up sort.
- */
-int
-aout_db_qualified_print_completion(
-       db_symtab_t     *stab,
-       char            *sym)
-{
-       struct nlist    *sp;
-       struct nlist    *sp1;
-       struct nlist    *ep;
-       struct nlist    *ep1 = NULL;
-       struct nlist    *fp = 0;
-       int             symlen;
-       int             nsym = 0;
-       struct nlist    *cur;
-       struct nlist    *new;
-       char            *fname;
-       int             func;
-       int             line;
-
-       sp = aout_db_search_name((struct nlist *)stab->start,
-                             (struct nlist *)stab->end,
-                             sym, 0, &fp, 1);
-       if (sp == (struct nlist *)0)
-           return 0;
-
-       symlen = strlen(sym);
-       cur = sp;
-       while (cur) {
-           if (strncmp(cur->n_un.n_name, sym, symlen) == 0)
-                cur->n_other = 1;
-           else
-                cur->n_other = 2;
-           ep = cur;
-           cur = aout_db_search_name(cur + 1, (struct nlist *)stab->end,
-                                  sym, 0, &fp, 1);
-       }
-
-       sp1 = sp;
-       for (;;) {
-           new = cur = sp;
-           while (++cur <= ep)
-               if (cur->n_other) {
-                  if (sp1 == sp)
-                       sp1 = cur;
-                  if (strncmp(&cur->n_un.n_name[cur->n_other - 1],
-                             &new->n_un.n_name[new->n_other - 1],
-                             symlen) < 0)
-                       new = cur;
-                  else
-                       ep1 = cur;
-               }
-
-           func = line = 0;
-           if ((new->n_type & N_EXT) == 0) {
-               for (cur = new - 1; cur > (struct nlist *)stab->start; cur--) {
-                   if (cur->n_type == N_SO ||
-                       (stab->sorted && cur->n_value < new->n_value))
-                       break;
-                   if (line == 0 &&
-                       cur->n_type == N_SLINE &&
-                       cur->n_value == new->n_value)
-                       line = cur->n_desc;
-                   if (func == 0 &&
-                       cur->n_type == N_FUN &&
-                       cur->n_value == new->n_value)
-                       func = 1;
-               }
-
-               if (cur->n_type == N_SO)
-                   fname = cur->n_un.n_name;
-               else
-                   fname = (char *)0;
-
-               if (line == 0 || func == 0)
-                   for (cur = new + 1;
-                        cur < (struct nlist *)stab->end; cur++) {
-                       if (cur->n_type == N_SO ||
-                           (stab->sorted && cur->n_value > new->n_value))
-                           break;
-                       if (line == 0 &&
-                           cur->n_type == N_SLINE &&
-                           cur->n_value == new->n_value) {
-                           line = cur->n_desc;
-                           if (func)
-                               break;
-                       }
-                       if (func == 0 &&
-                           cur->n_type == N_FUN &&
-                           cur->n_value == new->n_value) {
-                           func = 1;
-                           if (line)
-                               break;
-                       }
-               }
-           } else {
-               fname = (char *)0;
-               for (cur = new - 1; cur > (struct nlist *)stab->start; cur--) {
-                   if (cur->n_type == N_SO ||
-                       (stab->sorted && cur->n_value < new->n_value))
-                       break;
-                   if (func == 0 &&
-                       cur->n_type == N_FUN &&
-                       cur->n_value == new->n_value)
-                       func = 1;
-               }
-               if (func == 0)
-                   for (cur = new + 1;
-                        cur < (struct nlist *)stab->end; cur++) {
-                       if (cur->n_type == N_SO ||
-                           (stab->sorted && cur->n_value > new->n_value))
-                           break;
-                       if (cur->n_type == N_FUN &&
-                           cur->n_value == new->n_value) {
-                           func = 1;
-                           break;
-                       }
-               }
-           }
-
-           db_sym_print_completion(stab, &new->n_un.n_name[new->n_other - 1],
-                                   func, fname, line);
-           nsym++;
-           new->n_other = 0;
-
-           if (new == sp) {
-               if (sp1 == sp)
-                   break;
-               sp = sp1;
-           } else if (new == sp1)
-               sp1 = sp;
-
-           if (new == ep)
-               ep = ep1;
-       }
-       return nsym;
-}
-
-/*
- * search a (possibly incomplete) symbol with file, func and line qualification
- */
-private int
-aout_db_qualified_search(
-       db_symtab_t     *stab,
-       char            *file,
-       char            *sym,
-       int             line,
-       db_sym_t        *ret,
-       char            **name,
-       int             *len)
-{
-       register struct nlist *sp = (struct nlist *)stab->start;
-       struct nlist    *ep = (struct nlist *)stab->end;
-       struct nlist    *fp = 0;
-       struct nlist    *found_sp;
-       unsigned long   func_top;
-       boolean_t       in_file;
-       int             nsym = 0;
-       int             i;
-       char            *p;
-
-       if (file == 0 && sym == 0)
-           return(0);
-       if (file) {
-           if ((sp = aout_db_search_name(sp, ep, file, N_TEXT, &fp, 0)) == 0)
-               return(0);
-       }
-       if (sym) {
-           for (;;) {
-               sp = aout_db_search_name(sp, ep, sym, (line > 0)? N_FUN: 0, &fp,
-                                     (ret == (db_sym_t *)0));
-               if (sp == 0)
-                   return(nsym);
-               if (ret)
-                   break;
-
-               if (strncmp(sp->n_un.n_name, sym, strlen(sym)) == 0)
-                   p = sp->n_un.n_name;
-               else
-                   p = &sp->n_un.n_name[1];
-
-               if (*name == (char *)0) {
-                   *name = p;
-                   *len = strlen(p);
-               } else {
-                   for (i = 0; i < *len; i++)
-                       if ((*name)[i] != p[i]) {
-                           *len = i;
-                           break;
-                       }
-               }
-
-               nsym++;
-               sp++;
-           }
-       }
-       if (line > 0) {
-           if (file && !aout_db_eq_name(fp, file, 0))
-               return(0);
-           found_sp = 0;
-           if (sp->n_type == N_FUN) {
-               /*
-                * qualfied by function name
-                *     search backward because line number entries
-                *     for the function are above it in this case.
-                */
-               func_top = sp->n_value;
-               if (stab->sorted) {
-                   /* symbols with the same value may have been mixed up */
-                   do {
-                       sp++;
-                   } while (sp->n_value == func_top);
-               }
-               for (sp--; sp >= (struct nlist *)stab->start; sp--) {
-                   if (sp->n_type != N_SLINE)
-                       continue;
-                   if (sp->n_value < func_top)
-                       break;
-                   if (sp->n_desc <= line) {
-                       if (found_sp == 0 || found_sp->n_desc < sp->n_desc)
-                           found_sp = sp;
-                       if (sp->n_desc == line)
-                           break;
-                   }
-               }
-               if (sp->n_type != N_SLINE || sp->n_value < func_top)
-                   return(0);
-           } else {
-               /*
-                * qualified by only file name
-                *    search forward in this case
-                */
-               in_file = TRUE;
-               if (stab->sorted) {
-                   /* symbols with the same value may have been mixed up */
-                   func_top = sp->n_value;
-                   do {
-                       sp--;
-                   } while (sp->n_value == func_top);
-               }
-               for (sp++; sp < ep; sp++) {
-                   if (sp->n_type == N_TEXT 
-                       && aout_db_is_filename(sp->n_un.n_name))
-                       break;          /* enter into another file */
-                   if (sp->n_type == N_SOL) {
-                       in_file = aout_db_eq_name(sp, file, 0);
-                       continue;
-                   }
-                   if (!in_file || sp->n_type != N_SLINE)
-                       continue;
-                   if (sp->n_desc <= line) {
-                       if (found_sp == 0 || found_sp->n_desc < sp->n_desc)
-                           found_sp = sp;
-                       if (sp->n_desc == line)
-                           break;
-                   }
-               }
-           }
-           sp = found_sp;
-       }
-       *ret = (db_sym_t) sp;
-       return(1);
-}
-
-/*
- * lookup symbol by name
- */
-db_sym_t
-aout_db_lookup(
-       db_symtab_t     *stab,
-       char *          symstr)
-{
-       return(db_sym_parse_and_lookup(aout_db_qualified_search, stab, symstr));
-}
-
-/*
- * lookup (possibly incomplete) symbol by name
- */
-int
-aout_db_lookup_incomplete(
-       db_symtab_t     *stab,
-       char *          symstr,
-       char **         name,
-       int             *len,
-       int             *toadd)
-{
-       return(db_sym_parse_and_lookup_incomplete(aout_db_qualified_search,
-                                       stab, symstr, name, len, toadd));
-}
-
-/*
- * Display possible completion for the symbol
- */
-int
-aout_db_print_completion(stab, symstr)
-       db_symtab_t     *stab;
-       char *          symstr;
-{
-
-       return(db_sym_parse_and_print_completion(aout_db_qualified_print_completion,
-                                                stab, symstr));
-}
-
-db_sym_t
-aout_db_search_symbol(
-       db_symtab_t     *symtab,
-       db_addr_t       off,
-       db_strategy_t   strategy,
-       db_expr_t       *diffp)         /* in/out */
-{
-       db_expr_t diff = *diffp;
-       register struct nlist   *symp = 0;
-       struct nlist            *sp, *ep, *cp;
-       boolean_t               first_pass = FALSE;
-
-       sp = (struct nlist *)symtab->start;
-       ep = (struct nlist *)symtab->end;
-
-       if (symtab->sorted) {
-           struct nlist target;
-
-           target.n_value = (vm_offset_t)off;
-           target.n_un.n_name = (char *) 0;
-           target.n_other = (char) 0;
-           db_qsort_limit_search((char *)&target, (char **)&sp, (char **)&ep,
-                                 sizeof(struct nlist), aout_db_compare_symbols);
-           first_pass = TRUE;
-       }
-
-    try_again:
-       for (cp = ep-1; cp >= sp; cp--) {
-           if (cp->n_un.n_name == 0)
-               continue;
-           if ((cp->n_type & N_STAB) != 0)
-               continue;
-           if (strategy == DB_STGY_XTRN && (cp->n_type & N_EXT) == 0)
-               continue;
-           if (off >= cp->n_value) {
-               if (off - cp->n_value < diff) {
-                   diff = off - cp->n_value;
-                   symp = cp;
-                   if (diff == 0 && (cp->n_type & N_EXT))
-                           break;
-               }
-               else if (off - cp->n_value == diff) {
-                   if (symp == 0)
-                       symp = cp;
-                   else if ((symp->n_type & N_EXT) == 0 &&
-                               (cp->n_type & N_EXT) != 0)
-                       symp = cp;      /* pick the external symbol */
-               }
-           }
-       }
-       if (symp == 0) {
-           if (first_pass) {
-               first_pass = FALSE;
-               sp = (struct nlist *) symtab->start;
-               goto try_again;
-           }
-           *diffp = off;
-       }
-       else {
-           *diffp = diff;
-       }
-       return ((db_sym_t)symp);
-}
-
-/*
- * Return the name and value for a symbol.
- */
-void
-aout_db_symbol_values(
-       db_sym_t        sym,
-       char            **namep,
-       db_expr_t       *valuep)
-{
-       register struct nlist *sp;
-
-       sp = (struct nlist *)sym;
-       if (namep)
-           *namep = sp->n_un.n_name;
-       if (valuep)
-           *valuep = sp->n_value;
-}
-
-#define X_DB_MAX_DIFF  8       /* maximum allowable diff at the end of line */
-extern unsigned int db_search_maxoff;  /* maximum acceptable offset */
-
-/*
- * search symbol by value
- */
-db_sym_t
-aout_db_search_by_addr(
-       db_symtab_t     *stab,
-       db_addr_t       addr,
-       char            **file,
-       char            **func,
-       int             *line,
-       db_expr_t       *diff,
-        int             *args)
-{
-       struct nlist    *sp, *cp;
-       register        struct nlist *line_sp, *func_sp, *file_sp, *line_func;
-       unsigned long   func_diff, line_diff;
-       boolean_t       found_line = FALSE;
-       struct          nlist *ep = (struct nlist *)stab->end;
-       boolean_t       first_pass = FALSE;
-
-       /*
-        * 92-May-16
-        * Added init of these two... not sure if it's correct, but
-        * can't be worse than random values....  -- jfriedl@omron.co.jp
-        */
-       func_diff = line_diff = /*HUGE*/0x0fffffff;
-
-       line_sp = func_sp = file_sp = line_func = 0;
-       *file = *func = 0;
-       *line = 0;
-       *args = -1;
-
-       sp = (struct nlist *)stab->start;
-       if (stab->sorted) {
-               struct nlist target;
-
-               target.n_value = (vm_offset_t)addr;
-               target.n_un.n_name = (char *) 0;
-               target.n_other = (char) 0;
-               db_qsort_limit_search((char *)&target, (char **)&sp,
-                                     (char **)&ep, sizeof(struct nlist),
-                                     aout_db_compare_symbols);
-               first_pass = TRUE;
-       }
-
-       for (cp = sp; cp < ep; cp++) {
-           switch(cp->n_type) {
-           case N_SLINE:
-               if (cp->n_value <= addr) {
-                   if (line_sp == 0 || line_diff >= addr - cp->n_value) {
-                       if (line_func)
-                           line_func = 0;
-                       line_sp = cp;
-                       line_diff = (unsigned long)(addr - cp->n_value);
-                   }
-               }
-               if (cp->n_value >= addr && line_sp)
-                   found_line = TRUE;
-               continue;
-           case N_FUN:
-               if ((found_line || (line_sp && line_diff < X_DB_MAX_DIFF))
-                   && line_func == 0)
-                   line_func = cp;
-               continue;
-           case N_SO:
-               if (cp->n_value > addr)
-                   continue;
-               if (file_sp == 0 || file_sp->n_value <= cp->n_value)
-                   file_sp = cp;
-               continue;
-           case N_TEXT:
-               if (aout_db_is_filename(cp->n_un.n_name)) {
-                   if (cp->n_value > addr)
-                       continue;
-                   if (file_sp == 0 || file_sp->n_value <= cp->n_value)
-                       file_sp = cp;
-               } else if (cp->n_value <= addr &&
-                        (func_sp == 0 || func_diff > addr - cp->n_value)) {
-                   func_sp = cp;
-                   func_diff = (unsigned long)(addr - cp->n_value);
-               }
-               continue;
-           case N_TEXT|N_EXT:
-               if (cp->n_value <= addr &&
-                        (func_sp == 0 || func_diff >= addr - cp->n_value)) {
-                   func_sp = cp;
-                   func_diff = (unsigned long)(addr - cp->n_value);
-                   if (func_diff == 0 && file_sp && func_sp && line_sp == 0)
-                       break;
-               }
-           default:
-               if (stab->sorted) {
-                       if ((cp->n_value > addr) &&
-                           (cp->n_value - addr > db_search_maxoff))
-                               break;
-               }
-               continue;
-           }
-           break;
-       }
-       if (first_pass && (!file_sp || !line_sp || !func_sp)) {
-           first_pass = FALSE;
-           cp = sp;
-           sp = (struct nlist *)stab->start;
-           for (; cp >= sp; cp--) {
-               switch(cp->n_type) {
-               case N_SLINE:
-                   if (line_sp)
-                       found_line = TRUE;
-                   continue;
-               case N_FUN:
-                   if ((found_line || (line_sp && line_diff < X_DB_MAX_DIFF))
-                       && line_func == 0)
-                       line_func = cp;
-                   continue;
-               case N_SO:
-                   if (file_sp == 0)
-                       file_sp = cp;
-                   continue;
-               case N_TEXT:
-                   if (aout_db_is_filename(cp->n_un.n_name)) {
-                       if (file_sp == 0)
-                           file_sp = cp;
-                   } else if (func_sp == 0) {
-                       func_sp = cp;
-                       func_diff = (unsigned long)(addr - cp->n_value);
-                   }
-                   continue;
-               case N_TEXT|N_EXT:
-                   if (func_sp == 0) {
-                       func_sp = cp;
-                       func_diff = (unsigned long)(addr - cp->n_value);
-                       if (func_diff == 0 && file_sp && func_sp
-                           && line_sp == 0)
-                           break;
-                   }
-               default:
-                   if (line_sp && file_sp &&
-                       addr - cp->n_value > db_search_maxoff)
-                       break;
-                   continue;
-               }
-               break;
-           }
-       }
-#if 0  
-/*
- * XXX - barbou@gr.osf.org
- * I don't know if that code is useful to something, but it makes the -gline
- * option of gcc useless.
- */
-       if (line_sp) {
-           if (line_func == 0 || func_sp == 0
-               || line_func->n_value != func_sp->n_value)
-               line_sp = 0;
-       }
-#else
-       if (line_sp && !found_line) {
-               line_sp = 0;
-       }
-#endif
-       *diff = 0;
-       if (file_sp) {
-           *diff = addr - file_sp->n_value;
-           *file = file_sp->n_un.n_name;
-       }
-       if (line_sp) {
-           *diff = addr - line_sp->n_value;
-           *line = line_sp->n_desc;
-       }
-       if (func_sp) {
-           *diff = addr - func_sp->n_value;
-           *func = (func_sp->n_un.n_name[0] == '_')?
-                       func_sp->n_un.n_name + 1: func_sp->n_un.n_name;
-           if (line_func && (line_func->n_desc & 0x4000))
-               *args = line_func->n_desc & 0x3ff;
-       }
-       return((db_sym_t) func_sp);
-}
-
-/*
- * Find filename and lineno within, given the current pc.
- */
-boolean_t
-aout_db_line_at_pc(
-       db_symtab_t     *stab,
-       __unused db_sym_t       sym,
-       char            **file,
-       int             *line,
-       db_expr_t       pc)
-{
-       char            *func;
-       db_expr_t       diff;
-       boolean_t       found;
-       int             args;
-
-       found = (aout_db_search_by_addr(stab, (unsigned)pc, file, &func, line,
-                                    &diff, &args)
-                != DB_SYM_NULL);
-       return(found && func && *file);
-}
-
-/*
- * Initialization routine for a.out files.
- */
-void
-aout_db_init(void)
-{
-       aout_db_sym_init((char *) &_mh_execute_header,
-               (char *)0, "mach", (char *)0);
-}
-
-#endif /* DB_NO_AOUT */
diff --git a/osfmk/ddb/db_aout.h b/osfmk/ddb/db_aout.h
deleted file mode 100644 (file)
index 3a1ab38..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * Symbol table routines for a.out format files.
- */
-
-#ifndef        _DDB_DB_AOUT_H_
-#define        _DDB_DB_AOUT_H_
-
-#include <machine/db_machdep.h>                /* data types */
-#include <ddb/db_sym.h>                        /* db_symtab_t */
-
-boolean_t aout_db_sym_init(char *, char *, const char *, char *);
-
-db_sym_t aout_db_lookup(
-       db_symtab_t     *stab,
-       char *          symstr);
-
-int aout_db_lookup_incomplete(
-       db_symtab_t     *stab,
-       char *          symstr,
-       char **         name,
-       int             *len,
-       int             *toadd);
-
-int aout_db_print_completion(
-       db_symtab_t     *stab,
-       char *          symstr);
-
-db_sym_t aout_db_search_symbol(
-       db_symtab_t     *symtab,
-       db_addr_t       off,
-       db_strategy_t   strategy,
-       db_expr_t       *diffp);                /* in/out */
-
-void aout_db_symbol_values(
-       db_sym_t        sym,
-       char            **namep,
-       db_expr_t       *valuep);
-
-db_sym_t aout_db_search_by_addr(
-       db_symtab_t     *stab,
-       db_addr_t       addr,
-       char            **file,
-       char            **func,
-       int             *line,
-       db_expr_t       *diff,
-       int             *args);
-
-boolean_t aout_db_line_at_pc(
-       db_symtab_t     *stab,
-       db_sym_t        sym,
-       char            **file,
-       int             *line,
-       db_expr_t       pc);
-
-int aout_db_qualified_print_completion(
-       db_symtab_t     *stab,
-       char            *sym);
-
-void aout_db_init(void);
-
-#endif /* !_DDB_DB_AOUT_H_ */
diff --git a/osfmk/ddb/db_break.c b/osfmk/ddb/db_break.c
deleted file mode 100644 (file)
index 38c4e23..0000000
+++ /dev/null
@@ -1,816 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-/*
- * Breakpoints.
- */
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_break.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_cond.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_task_thread.h>
-#include <kern/thread.h>
-
-#define        NBREAKPOINTS    100
-#define NTHREAD_LIST   (NBREAKPOINTS*3)
-
-struct db_breakpoint   db_break_table[NBREAKPOINTS];
-db_breakpoint_t                db_next_free_breakpoint = &db_break_table[0];
-db_breakpoint_t                db_free_breakpoints = 0;
-db_breakpoint_t                db_breakpoint_list = 0;
-
-static struct db_thread_breakpoint     db_thread_break_list[NTHREAD_LIST];
-static db_thread_breakpoint_t          db_free_thread_break_list = 0;
-static boolean_t                       db_thread_break_init = FALSE;
-static int                             db_breakpoint_number = 0;
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-static int db_add_thread_breakpoint(
-       register db_breakpoint_t        bkpt,
-       vm_offset_t                     task_thd,
-       int                             count,
-       boolean_t                       task_bpt);
-
-static int db_delete_thread_breakpoint(
-       register db_breakpoint_t        bkpt,
-       vm_offset_t                     task_thd);
-
-static db_thread_breakpoint_t db_find_thread_breakpoint(
-       db_breakpoint_t bkpt,
-       thread_t        thr_act);
-
-static void db_force_delete_breakpoint(
-       db_breakpoint_t bkpt,
-       vm_offset_t     task_thd,
-       boolean_t       is_task);
-
-db_breakpoint_t db_breakpoint_alloc(void);
-
-void db_breakpoint_free(register db_breakpoint_t bkpt);
-
-void db_delete_breakpoint(
-       task_t          task,
-       db_addr_t       addr,
-       vm_offset_t     task_thd);
-
-void
-db_delete_all_breakpoints(
-       task_t          task);
-
-void db_list_breakpoints(void);
-
-
-
-db_breakpoint_t
-db_breakpoint_alloc(void)
-{
-       register db_breakpoint_t        bkpt;
-
-       if ((bkpt = db_free_breakpoints) != 0) {
-           db_free_breakpoints = bkpt->link;
-           return (bkpt);
-       }
-       if (db_next_free_breakpoint == &db_break_table[NBREAKPOINTS]) {
-           db_printf("All breakpoints used.\n");
-           return (0);
-       }
-       bkpt = db_next_free_breakpoint;
-       db_next_free_breakpoint++;
-
-       return (bkpt);
-}
-
-void
-db_breakpoint_free(register db_breakpoint_t bkpt)
-{
-       bkpt->link = db_free_breakpoints;
-       db_free_breakpoints = bkpt;
-}
-
-static int
-db_add_thread_breakpoint(
-       register db_breakpoint_t        bkpt,
-       vm_offset_t                     task_thd,
-       int                             count,
-       boolean_t                       task_bpt)
-{
-       register db_thread_breakpoint_t tp;
-
-       if (db_thread_break_init == FALSE) {
-           for (tp = db_thread_break_list; 
-               tp < &db_thread_break_list[NTHREAD_LIST-1]; tp++)
-               tp->tb_next = tp+1;
-           tp->tb_next = 0;
-           db_free_thread_break_list = db_thread_break_list;
-           db_thread_break_init = TRUE;
-       }
-       if (db_free_thread_break_list == 0)
-           return (-1);
-       tp = db_free_thread_break_list;
-       db_free_thread_break_list = tp->tb_next;
-       tp->tb_is_task = task_bpt;
-       tp->tb_task_thd = task_thd;
-       tp->tb_count = count;
-       tp->tb_init_count = count;
-       tp->tb_cond = 0;
-       tp->tb_number = ++db_breakpoint_number;
-       tp->tb_next = bkpt->threads;
-       bkpt->threads = tp;
-       return(0);
-}
-
-static int
-db_delete_thread_breakpoint(
-       register db_breakpoint_t        bkpt,
-       vm_offset_t                     task_thd)
-{
-       register db_thread_breakpoint_t tp;
-       register db_thread_breakpoint_t *tpp;
-
-       if (task_thd == 0) {
-           /* delete all the thread-breakpoints */
-
-           for (tpp = &bkpt->threads; (tp = *tpp) != 0; tpp = &tp->tb_next)
-               db_cond_free(tp);
-
-           *tpp = db_free_thread_break_list;
-           db_free_thread_break_list = bkpt->threads;
-           bkpt->threads = 0;
-           return 0;
-       } else {
-           /* delete the specified thread-breakpoint */
-
-           for (tpp = &bkpt->threads; (tp = *tpp) != 0; tpp = &tp->tb_next)
-               if (tp->tb_task_thd == task_thd) {
-                   db_cond_free(tp);
-                   *tpp = tp->tb_next;
-                   tp->tb_next = db_free_thread_break_list;
-                   db_free_thread_break_list = tp;
-                   return 0;
-               }
-
-           return -1;  /* not found */
-       }
-}
-
-static db_thread_breakpoint_t
-db_find_thread_breakpoint(
-       db_breakpoint_t bkpt,
-       thread_t        thr_act)
-{
-       register db_thread_breakpoint_t tp;
-       register task_t task =
-                       (thr_act == THREAD_NULL)
-                                       ? TASK_NULL : thr_act->task;
-
-       for (tp = bkpt->threads; tp; tp = tp->tb_next) {
-           if (tp->tb_is_task) {
-               if (tp->tb_task_thd == (vm_offset_t)task)
-                   break;
-               continue;
-           }
-           if (tp->tb_task_thd == (vm_offset_t)thr_act || tp->tb_task_thd == 0)
-               break;
-       }
-       return(tp);
-}
-
-db_thread_breakpoint_t
-db_find_thread_breakpoint_here(
-       task_t          task,
-       db_addr_t       addr)
-{
-       db_breakpoint_t bkpt;
-
-       bkpt = db_find_breakpoint(task, (db_addr_t)addr);
-       if (bkpt == 0)
-           return(0);
-       return(db_find_thread_breakpoint(bkpt, current_thread()));
-}
-
-db_thread_breakpoint_t
-db_find_breakpoint_number(
-       int             num,
-       db_breakpoint_t *bkptp)
-{
-       register db_thread_breakpoint_t tp;
-       register db_breakpoint_t bkpt;
-
-       for (bkpt = db_breakpoint_list; bkpt != 0; bkpt = bkpt->link) {
-           for (tp = bkpt->threads; tp; tp = tp->tb_next) {
-               if (tp->tb_number == num) {
-                   if (bkptp)
-                       *bkptp = bkpt;
-                   return(tp);
-               }
-           }
-       }
-       return(0);
-}
-
-static void
-db_force_delete_breakpoint(
-       db_breakpoint_t bkpt,
-       vm_offset_t     task_thd,
-       boolean_t       is_task)
-{
-       db_printf("deleted a stale breakpoint at ");
-       if (bkpt->task == TASK_NULL || db_lookup_task(bkpt->task) >= 0)
-          db_task_printsym(bkpt->address, DB_STGY_PROC, bkpt->task);
-       else
-          db_printf("%#X", bkpt->address);
-       if (bkpt->task)
-          db_printf(" in task %X", bkpt->task);
-       if (task_thd)
-          db_printf(" for %s %X", (is_task)? "task": "thr_act", task_thd);
-       db_printf("\n");
-       db_delete_thread_breakpoint(bkpt, task_thd);
-}
-
-void
-db_check_breakpoint_valid(void)
-{
-       register db_thread_breakpoint_t tbp, tbp_next;
-       register db_breakpoint_t bkpt, *bkptp;
-
-       bkptp = &db_breakpoint_list;
-       for (bkpt = *bkptp; bkpt; bkpt = *bkptp) {
-           if (bkpt->task != TASK_NULL) {
-               if (db_lookup_task(bkpt->task) < 0) {
-                   db_force_delete_breakpoint(bkpt, 0, FALSE);
-                   *bkptp = bkpt->link;
-                   db_breakpoint_free(bkpt);
-                   continue;
-               }
-           } else {
-               for (tbp = bkpt->threads; tbp; tbp = tbp_next) {
-                   tbp_next = tbp->tb_next;
-                   if (tbp->tb_task_thd == 0)
-                       continue;
-                   if ((tbp->tb_is_task && 
-                        db_lookup_task((task_t)(tbp->tb_task_thd)) < 0) ||
-                       (!tbp->tb_is_task && 
-                        db_lookup_act((thread_t)(tbp->tb_task_thd)) < 0)) {
-                       db_force_delete_breakpoint(bkpt, 
-                                       tbp->tb_task_thd, tbp->tb_is_task);
-                   }
-               }
-               if (bkpt->threads == 0) {
-                   db_put_task_value(bkpt->address, BKPT_SIZE,
-                                bkpt->bkpt_inst, bkpt->task);
-                   *bkptp = bkpt->link;
-                   db_breakpoint_free(bkpt);
-                   continue;
-               }
-           }
-           bkptp = &bkpt->link;
-       }
-}
-
-void
-db_set_breakpoint(
-       task_t          task,
-       db_addr_t       addr,
-       int             count,
-       thread_t        thr_act,
-       boolean_t       task_bpt)
-{
-       register db_breakpoint_t bkpt;
-       db_breakpoint_t alloc_bkpt = 0;
-       vm_offset_t task_thd;
-
-       bkpt = db_find_breakpoint(task, addr);
-       if (bkpt) {
-           if (thr_act == THREAD_NULL
-               || db_find_thread_breakpoint(bkpt, thr_act)) {
-               db_printf("Already set.\n");
-               return;
-           }
-       } else {
-           if (!DB_CHECK_ACCESS((vm_offset_t)addr, BKPT_SIZE, task)) {
-               if (task) {
-                   db_printf("Warning: non-resident page for breakpoint at %llX",
-                             (unsigned long long)addr);
-                   db_printf(" in task %lX.\n", task);
-               } else {
-                   db_printf("Cannot set breakpoint at %llX in kernel space.\n",
-                             (unsigned long long)addr);
-                   return;
-               }
-           }
-           alloc_bkpt = bkpt = db_breakpoint_alloc();
-           if (bkpt == 0) {
-               db_printf("Too many breakpoints.\n");
-               return;
-           }
-           bkpt->task = task;
-           bkpt->flags = (task && thr_act == THREAD_NULL)?
-                               (BKPT_USR_GLOBAL|BKPT_1ST_SET): 0;
-           bkpt->address = addr;
-           bkpt->threads = 0;
-       }
-       if (db_breakpoint_list == 0)
-           db_breakpoint_number = 0;
-       task_thd = (task_bpt)   ? (vm_offset_t)(thr_act->task)
-                               : (vm_offset_t)thr_act;
-       if (db_add_thread_breakpoint(bkpt, task_thd, count, task_bpt) < 0) {
-           if (alloc_bkpt)
-               db_breakpoint_free(alloc_bkpt);
-           db_printf("Too many thread_breakpoints.\n");
-       } else {
-           db_printf("set breakpoint #%x\n", db_breakpoint_number);
-           if (alloc_bkpt) {
-               bkpt->link = db_breakpoint_list;
-               db_breakpoint_list = bkpt;
-           }
-       }
-}
-
-void
-db_delete_breakpoint(
-       task_t          task,
-       db_addr_t       addr,
-       vm_offset_t     task_thd)
-{
-       register db_breakpoint_t        bkpt;
-       register db_breakpoint_t        *prev;
-
-       for (prev = &db_breakpoint_list; (bkpt = *prev) != 0;
-                                            prev = &bkpt->link) {
-           if ((bkpt->task == task
-                  || (task != TASK_NULL && (bkpt->flags & BKPT_USR_GLOBAL)))
-               && bkpt->address == addr)
-               break;
-       }
-       if (bkpt && (bkpt->flags & BKPT_SET_IN_MEM)) {
-           db_printf("cannot delete it now.\n");
-           return;
-       }
-       if (bkpt == 0
-           || db_delete_thread_breakpoint(bkpt, task_thd) < 0) {
-           db_printf("Not set.\n");
-           return;
-       }
-       if (bkpt->threads == 0) {
-           *prev = bkpt->link;
-           db_breakpoint_free(bkpt);
-       }
-}
-
-db_breakpoint_t
-db_find_breakpoint(
-       task_t          task,
-       db_addr_t       addr)
-{
-       register db_breakpoint_t        bkpt;
-
-       for (bkpt = db_breakpoint_list; bkpt != 0; bkpt = bkpt->link) {
-           if ((bkpt->task == task
-                 || (task != TASK_NULL && (bkpt->flags & BKPT_USR_GLOBAL)))
-               && bkpt->address == addr)
-               return (bkpt);
-       }
-       return (0);
-}
-
-boolean_t
-db_find_breakpoint_here(
-       task_t          task,
-       db_addr_t       addr)
-{
-       register db_breakpoint_t        bkpt;
-
-       for (bkpt = db_breakpoint_list; bkpt != 0; bkpt = bkpt->link) {
-           if ((bkpt->task == task
-                  || (task != TASK_NULL && (bkpt->flags & BKPT_USR_GLOBAL)))
-                && bkpt->address == addr)
-               return(TRUE);
-           if ((bkpt->flags & BKPT_USR_GLOBAL) == 0 &&
-                 DB_PHYS_EQ(task, (vm_offset_t)addr, bkpt->task, (vm_offset_t)bkpt->address))
-               return (TRUE);
-       }
-       return(FALSE);
-}
-
-boolean_t      db_breakpoints_inserted = TRUE;
-
-void
-db_set_breakpoints(void)
-{
-       register db_breakpoint_t bkpt;
-       register task_t task;
-       db_expr_t       inst;
-       thread_t        cur_act = current_thread();
-       task_t          cur_task =
-                               (cur_act) ?
-                                       cur_act->task : TASK_NULL;
-       boolean_t       inserted = TRUE;
-
-       if (!db_breakpoints_inserted) {
-           for (bkpt = db_breakpoint_list; bkpt != 0; bkpt = bkpt->link) {
-               if (bkpt->flags & BKPT_SET_IN_MEM)
-                   continue;
-               task = bkpt->task;
-               if (bkpt->flags & BKPT_USR_GLOBAL) {
-                   if ((bkpt->flags & BKPT_1ST_SET) == 0) {
-                       if (cur_task == TASK_NULL)
-                           continue;
-                       task = cur_task;
-                   } else
-                       bkpt->flags &= ~BKPT_1ST_SET;
-               }
-               if (DB_CHECK_ACCESS((vm_offset_t)bkpt->address, BKPT_SIZE, task)) {
-                   inst = db_get_task_value(bkpt->address, BKPT_SIZE, FALSE,
-                                                               task);
-                   if (inst == BKPT_SET(inst))
-                       continue;
-                   bkpt->bkpt_inst = (vm_size_t)inst;
-                   db_put_task_value(bkpt->address,
-                               BKPT_SIZE,
-                               BKPT_SET(bkpt->bkpt_inst), task);
-                   bkpt->flags |= BKPT_SET_IN_MEM;
-               } else {
-                   inserted = FALSE;
-               }
-           }
-           db_breakpoints_inserted = inserted;
-       }
-}
-
-void
-db_clear_breakpoints(void)
-{
-       register db_breakpoint_t bkpt, *bkptp;
-       register task_t  task;
-       db_expr_t inst;
-       thread_t         cur_act = current_thread();
-       task_t   cur_task = (cur_act) ?
-                       cur_act->task: TASK_NULL;
-
-       if (db_breakpoints_inserted) {
-           bkptp = &db_breakpoint_list;
-           for (bkpt = *bkptp; bkpt; bkpt = *bkptp) {
-               task = bkpt->task;
-               if (bkpt->flags & BKPT_USR_GLOBAL) {
-                   if (cur_task == TASK_NULL) {
-                       bkptp = &bkpt->link;
-                       continue;
-                   }
-                   task = cur_task;
-               }
-               if ((bkpt->flags & BKPT_SET_IN_MEM)
-                   && DB_CHECK_ACCESS((vm_offset_t)bkpt->address, BKPT_SIZE, task)) {
-                   inst = db_get_task_value(bkpt->address, BKPT_SIZE, FALSE, 
-                                                               task);
-                   if (inst != BKPT_SET(inst)) {
-                       if (bkpt->flags & BKPT_USR_GLOBAL) {
-                           bkptp = &bkpt->link;
-                           continue;
-                       }
-                       db_force_delete_breakpoint(bkpt, 0, FALSE);
-                       *bkptp = bkpt->link;
-                       db_breakpoint_free(bkpt);
-                       continue;
-                   }
-                   db_put_task_value(bkpt->address, BKPT_SIZE,
-                                bkpt->bkpt_inst, task);
-                   bkpt->flags &= ~BKPT_SET_IN_MEM;
-               }
-               bkptp = &bkpt->link;
-           }
-           db_breakpoints_inserted = FALSE;
-       }
-}
-
-/*
- * Set a temporary breakpoint.
- * The instruction is changed immediately,
- * so the breakpoint does not have to be on the breakpoint list.
- */
-db_breakpoint_t
-db_set_temp_breakpoint(
-       task_t          task,
-       db_addr_t       addr)
-{
-       register db_breakpoint_t        bkpt;
-
-       bkpt = db_breakpoint_alloc();
-       if (bkpt == 0) {
-           db_printf("Too many breakpoints.\n");
-           return 0;
-       }
-       bkpt->task = task;
-       bkpt->address = addr;
-       bkpt->flags = BKPT_TEMP;
-       bkpt->threads = 0;
-       if (db_add_thread_breakpoint(bkpt, 0, 1, FALSE) < 0) {
-           if (bkpt)
-               db_breakpoint_free(bkpt);
-           db_printf("Too many thread_breakpoints.\n");
-           return 0;
-       }
-       bkpt->bkpt_inst = (vm_size_t)db_get_task_value(bkpt->address, BKPT_SIZE, 
-                                               FALSE, task);
-       db_put_task_value(bkpt->address, BKPT_SIZE, 
-                               BKPT_SET(bkpt->bkpt_inst), task);
-       return bkpt;
-}
-
-void
-db_delete_temp_breakpoint(
-       task_t          task,
-       db_breakpoint_t bkpt)
-{
-       db_put_task_value(bkpt->address, BKPT_SIZE, bkpt->bkpt_inst, task);
-       db_delete_thread_breakpoint(bkpt, 0);
-       db_breakpoint_free(bkpt);
-}
-
-/*
- * List breakpoints.
- */
-void
-db_list_breakpoints(void)
-{
-       register db_breakpoint_t        bkpt;
-
-       if (db_breakpoint_list == 0) {
-           db_printf("No breakpoints set\n");
-           return;
-       }
-
-       db_printf(" No  Space    Task.Act    Cnt  Address(Cond)\n");
-       for (bkpt = db_breakpoint_list;
-            bkpt != 0;
-            bkpt = bkpt->link)
-       {
-           register    db_thread_breakpoint_t tp;
-           int         task_id;
-           int         act_id;
-
-           if (bkpt->threads) {
-               for (tp = bkpt->threads; tp; tp = tp->tb_next) {
-                   db_printf("%3d  ", tp->tb_number);
-                   if (bkpt->flags & BKPT_USR_GLOBAL)
-                       db_printf("user     ");
-                   else if (bkpt->task == TASK_NULL)
-                       db_printf("kernel   ");
-                   else if ((task_id = db_lookup_task(bkpt->task)) < 0)
-                       db_printf("%0*X ", 2*sizeof(vm_offset_t), bkpt->task);
-                   else
-                       db_printf("task%-3d  ", task_id);
-                   if (tp->tb_task_thd == 0) {
-                       db_printf("all         ");
-                   } else {
-                       if (tp->tb_is_task) {
-                           task_id = db_lookup_task((task_t)(tp->tb_task_thd));
-                           if (task_id < 0)
-                               db_printf("%0*X    ", 2*sizeof(vm_offset_t),
-                                          tp->tb_task_thd);
-                           else
-                               db_printf("task%03d     ", task_id);
-                       } else {
-                           thread_t thd = (thread_t)(tp->tb_task_thd);
-                           task_id = db_lookup_task(thd->task);
-                           act_id = db_lookup_task_act(thd->task, thd);
-                           if (task_id < 0 || act_id < 0)
-                               db_printf("%0*X    ", 2*sizeof(vm_offset_t),
-                                               tp->tb_task_thd);
-                           else        
-                               db_printf("task%03d.%-3d ", task_id, act_id);
-                       }
-                   }
-                   db_printf("%3d  ", tp->tb_init_count);
-                   db_task_printsym(bkpt->address, DB_STGY_PROC, bkpt->task);
-                   if (tp->tb_cond > 0) {
-                       db_printf("(");
-                       db_cond_print(tp);
-                       db_printf(")");
-                   }
-                   db_printf("\n");
-               }
-           } else {
-               if (bkpt->task == TASK_NULL)
-                   db_printf("  ?  kernel   ");
-               else
-                   db_printf("%*X ", 2*sizeof(vm_offset_t), bkpt->task);
-               db_printf("(?)              ");
-               db_task_printsym(bkpt->address, DB_STGY_PROC, bkpt->task);
-               db_printf("\n");
-           }
-       }
-}
-
-void
-db_delete_all_breakpoints(
-       task_t          task)
-{
-       register db_breakpoint_t        bkpt;
-
-       bkpt = db_breakpoint_list;
-       while ( bkpt != 0 ) {
-               if (bkpt->task == task ||
-                   (task != TASK_NULL && (bkpt->flags & BKPT_USR_GLOBAL))) {
-                       db_delete_breakpoint(task, bkpt->address, 0);
-                       bkpt = db_breakpoint_list;
-               }
-               else
-                       bkpt = bkpt->link;
-       
-       }
-}
-
-/* Delete breakpoint */
-void
-db_delete_cmd(void)
-{
-       register int n;
-       thread_t         thr_act;
-       vm_offset_t task_thd;
-       boolean_t user_global = FALSE;
-       boolean_t task_bpt = FALSE;
-       boolean_t user_space = FALSE;
-       boolean_t thd_bpt = FALSE;
-       db_expr_t addr;
-       int t;
-       
-       t = db_read_token();
-       if (t == tSLASH) {
-           t = db_read_token();
-           if (t != tIDENT) {
-               db_printf("Bad modifier \"%s\"\n", db_tok_string);
-               db_error(0);
-           }
-           user_global = db_option(db_tok_string, 'U');
-           user_space = (user_global)? TRUE: db_option(db_tok_string, 'u');
-           task_bpt = db_option(db_tok_string, 'T');
-           thd_bpt = db_option(db_tok_string, 't');
-           if (task_bpt && user_global)
-               db_error("Cannot specify both 'T' and 'U' option\n");
-           t = db_read_token();
-       }
-
-       if ( t == tSTAR ) {
-               db_printf("Delete ALL breakpoints\n");
-               db_delete_all_breakpoints( (task_t)task_bpt );
-               return;
-       }
-
-       if (t == tHASH) {
-           db_thread_breakpoint_t tbp;
-           db_breakpoint_t bkpt = 0;
-
-           if (db_read_token() != tNUMBER) {
-               db_printf("Bad break point number #%s\n", db_tok_string);
-               db_error(0);
-           }
-           if ((tbp = db_find_breakpoint_number((int)db_tok_number, &bkpt)) == 0) {
-               db_printf("No such break point #%d\n", db_tok_number);
-               db_error(0);
-           }
-           db_delete_breakpoint(bkpt->task, bkpt->address, tbp->tb_task_thd);
-           return;
-       }
-       db_unread_token(t);
-       if (!db_expression(&addr)) {
-           /*
-            *  We attempt to pick up the user_space indication from db_dot,
-            *  so that a plain "d" always works.
-            */
-           addr = (db_expr_t)db_dot;
-           if (!user_space && !DB_VALID_ADDRESS(addr, FALSE))
-               user_space = TRUE;
-       }
-       if (!DB_VALID_ADDRESS(addr, user_space)) {
-           db_printf("Address %#llX is not in %s space\n", (unsigned long long)addr, 
-                       (user_space)? "user": "kernel");
-           db_error(0);
-       }
-       if (thd_bpt || task_bpt) {
-           for (n = 0; db_get_next_act(&thr_act, n); n++) {
-               if (thr_act == THREAD_NULL)
-                   db_error("No active thr_act\n");
-               if (task_bpt) {
-                   if (thr_act->task == TASK_NULL)
-                       db_error("No task\n");
-                   task_thd = (vm_offset_t) (thr_act->task);
-               } else
-                   task_thd = (user_global)? 0: (vm_offset_t) thr_act;
-               db_delete_breakpoint(db_target_space(thr_act, user_space),
-                                       (db_addr_t)addr, task_thd);
-           }
-       } else {
-           db_delete_breakpoint(db_target_space(THREAD_NULL, user_space),
-                                        (db_addr_t)addr, 0);
-       }
-}
-
-/* Set breakpoint with skip count */
-#include <mach/machine/vm_param.h>
-
-void
-db_breakpoint_cmd(db_expr_t addr, __unused boolean_t have_addr, db_expr_t count,
-                 char *modif)
-{
-       register int n;
-       thread_t thr_act;
-       boolean_t user_global = db_option(modif, 'U');
-       boolean_t task_bpt = db_option(modif, 'T');
-       boolean_t user_space;
-
-       if (count == (uint64_t)-1)
-           count = 1;
-#if 0 /* CHECKME */
-       if (!task_bpt && db_option(modif,'t'))
-         task_bpt = TRUE;
-#endif
-
-       if (task_bpt && user_global)
-           db_error("Cannot specify both 'T' and 'U'\n");
-       user_space = (user_global)? TRUE: db_option(modif, 'u');
-       if (user_space && db_access_level < DB_ACCESS_CURRENT)
-           db_error("User space break point is not supported\n");
-       if ((!task_bpt || !user_space) &&
-           !DB_VALID_ADDRESS(addr, user_space)) {
-           /* if the user has explicitly specified user space,
-              do not insert a breakpoint into the kernel */
-           if (user_space)
-             db_error("Invalid user space address\n");
-           user_space = TRUE;
-           db_printf("%#llX is in user space\n", (unsigned long long)addr);
-#ifdef ppc
-           db_printf("kernel is from %#X to %#x\n", VM_MIN_KERNEL_ADDRESS, vm_last_addr);
-#else
-           db_printf("kernel is from %#X to %#x\n", VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
-#endif
-       }
-       if (db_option(modif, 't') || task_bpt) {
-           for (n = 0; db_get_next_act(&thr_act, n); n++) {
-               if (thr_act == THREAD_NULL)
-                   db_error("No active thr_act\n");
-               if (task_bpt && thr_act->task == TASK_NULL)
-                   db_error("No task\n");
-               if (db_access_level <= DB_ACCESS_CURRENT && user_space
-                        && thr_act->task != db_current_space())
-                   db_error("Cannot set break point in inactive user space\n");
-               db_set_breakpoint(db_target_space(thr_act, user_space), 
-                                       (db_addr_t)addr, (int)count,
-                                       (user_global)? THREAD_NULL: thr_act,
-                                       task_bpt);
-           }
-       } else {
-           db_set_breakpoint(db_target_space(THREAD_NULL, user_space),
-                                (db_addr_t)addr,
-                                (int)count, THREAD_NULL, FALSE);
-       }
-}
-
-/* list breakpoints */
-void
-db_listbreak_cmd(__unused db_expr_t addr, __unused boolean_t have_addr,
-                __unused db_expr_t count, __unused char *modif)
-{
-       db_list_breakpoints();
-}
diff --git a/osfmk/ddb/db_break.h b/osfmk/ddb/db_break.h
deleted file mode 100644 (file)
index f456b25..0000000
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:47  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.18.3  1995/01/06  19:10:05  devrcs
- *     mk6 CR668 - 1.3b26 merge
- *     64bit cleanup, prototypes.
- *     [1994/10/14  03:39:52  dwm]
- *
- * Revision 1.1.18.2  1994/09/23  01:18:04  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:09:24  ezf]
- * 
- * Revision 1.1.18.1  1994/06/11  21:11:29  bolinger
- *     Merge up to NMK17.2.
- *     [1994/06/11  20:03:39  bolinger]
- * 
- * Revision 1.1.16.1  1994/04/11  09:34:32  bernadat
- *     Moved db_breakpoint struct declaration from db_break.c
- *     to here.
- *     [94/03/16            bernadat]
- * 
- * Revision 1.1.12.2  1994/03/17  22:35:24  dwm
- *     The infamous name change:  thread_activation + thread_shuttle = thread.
- *     [1994/03/17  21:25:41  dwm]
- * 
- * Revision 1.1.12.1  1994/01/12  17:50:30  dwm
- *     Coloc: initial restructuring to follow Utah model.
- *     [1994/01/12  17:13:00  dwm]
- * 
- * Revision 1.1.4.4  1993/07/27  18:26:51  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:10:59  elliston]
- * 
- * Revision 1.1.4.3  1993/06/07  22:06:31  jeffc
- *     CR9176 - ANSI C violations: trailing tokens on CPP
- *     directives, extra semicolons after decl_ ..., asm keywords
- *     [1993/06/07  18:57:06  jeffc]
- * 
- * Revision 1.1.4.2  1993/06/02  23:10:21  jeffc
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:55:49  jeffc]
- * 
- * Revision 1.1  1992/09/30  02:24:12  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.6  91/10/09  15:58:03  af
- *      Revision 2.5.3.1  91/10/05  13:05:04  jeffreyh
- *             Added db_thread_breakpoint structure, and added task and threads
- *             field to db_breakpoint structure.  Some status flags were also
- *             added to keep track user space break point correctly.
- *             [91/08/29            tak]
- * 
- * Revision 2.5.3.1  91/10/05  13:05:04  jeffreyh
- *     Added db_thread_breakpoint structure, and added task and threads
- *     field to db_breakpoint structure.  Some status flags were also
- *     added to keep track user space break point correctly.
- *     [91/08/29            tak]
- * 
- * Revision 2.5  91/05/14  15:32:35  mrt
- *     Correcting copyright
- * 
- * Revision 2.4  91/02/05  17:06:06  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:17:10  mrt]
- * 
- * Revision 2.3  90/10/25  14:43:40  rwd
- *     Added map field to breakpoints.
- *     [90/10/18            rpd]
- * 
- * Revision 2.2  90/08/27  21:50:00  dbg
- *     Modularized typedef names.
- *     [90/08/20            af]
- *     Add external defintions.
- *     [90/08/07            dbg]
- *     Created.
- *     [90/07/25            dbg]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-#ifndef        _DDB_DB_BREAK_H_
-#define        _DDB_DB_BREAK_H_
-
-#include <machine/db_machdep.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-#include <mach/boolean.h>
-
-/*
- * thread list at the same breakpoint address
- */
-struct db_thread_breakpoint {
-       vm_offset_t tb_task_thd;                /* target task or thread */
-       boolean_t tb_is_task;                   /* task qualified */
-       short    tb_number;                     /* breakpoint number */
-       short    tb_init_count;                 /* skip count(initial value) */
-       short    tb_count;                      /* current skip count */
-       short    tb_cond;                       /* break condition */
-       struct   db_thread_breakpoint *tb_next; /* next chain */
-};
-typedef struct db_thread_breakpoint *db_thread_breakpoint_t;
-
-/*
- * Breakpoint.
- */
-struct db_breakpoint {
-       task_t    task;                 /* target task */
-       db_addr_t address;              /* set here */
-       db_thread_breakpoint_t threads; /* thread */
-       int     flags;                  /* flags: */
-#define        BKPT_SINGLE_STEP        0x2     /* to simulate single step */
-#define        BKPT_TEMP               0x4     /* temporary */
-#define BKPT_USR_GLOBAL                0x8     /* global user space break point */
-#define BKPT_SET_IN_MEM                0x10    /* break point is set in memory */
-#define BKPT_1ST_SET           0x20    /* 1st time set of user global bkpt */
-       vm_size_t       bkpt_inst;      /* saved instruction at bkpt */
-       struct db_breakpoint *link;     /* link in in-use or free chain */
-};
-
-typedef struct db_breakpoint *db_breakpoint_t;
-
-
-/*
- * Prototypes for functions exported by this module.
- */
-
-db_thread_breakpoint_t db_find_thread_breakpoint_here(
-       task_t          task,
-       db_addr_t       addr);
-
-void db_check_breakpoint_valid(void);
-
-void db_set_breakpoint(
-       task_t          task,
-       db_addr_t       addr,
-       int             count,
-       thread_t        thr_act,
-       boolean_t       task_bpt);
-
-db_breakpoint_t db_find_breakpoint(
-       task_t          task,
-       db_addr_t       addr);
-
-boolean_t db_find_breakpoint_here(
-       task_t          task,
-       db_addr_t       addr);
-
-db_thread_breakpoint_t db_find_breakpoint_number(
-       int             num,
-       db_breakpoint_t *bkptp);
-
-void db_set_breakpoints(void);
-
-void db_clear_breakpoints(void);
-
-db_breakpoint_t db_set_temp_breakpoint(
-       task_t          task,
-       db_addr_t       addr);
-
-void db_delete_temp_breakpoint(
-       task_t          task,
-       db_breakpoint_t bkpt);
-
-void db_delete_cmd(void);
-
-void db_breakpoint_cmd(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_listbreak_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-#endif /* !_DDB_DB_BREAK_H_ */
diff --git a/osfmk/ddb/db_coff.h b/osfmk/ddb/db_coff.h
deleted file mode 100644 (file)
index 57f3b7d..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:47  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.2.1  1995/02/23  16:34:08  alanl
- *     Initial file creation.
- *     [95/02/06            sjs]
- *
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_COFF_H_
-#define        _DDB_DB_COFF_H_
-
-#define DB_NO_AOUT     1
-
-
-/*
- * Symbol table routines for COFF format files.
- */
-
-boolean_t coff_db_sym_init(
-       char *  symtab,
-       char *  esymtab,
-       const char *    name,
-       char *  task_addr);
-
-db_sym_t coff_db_lookup(
-       db_symtab_t     *stab,
-       char *          symstr);
-
-int coff_db_lookup_incomplete(
-       db_symtab_t     *stab,
-       char *          symstr,
-       char **         name,
-       int             *len,
-       int             *toadd);
-
-int coff_db_print_completion(
-       db_symtab_t     *stab,
-       char *          symstr);
-
-db_sym_t coff_db_search_symbol(
-       db_symtab_t     *symtab,
-       db_addr_t       off,
-       db_strategy_t   strategy,
-       db_expr_t       *diffp);                /* in/out */
-
-void coff_db_symbol_values(
-       db_sym_t        sym,
-       char            **namep,
-       db_expr_t       *valuep);
-
-db_sym_t coff_db_search_by_addr(
-       db_symtab_t     *stab,
-       db_addr_t       addr,
-       char            **file,
-       char            **func,
-       int             *line,
-       db_expr_t       *diff,
-       int             *args);
-
-boolean_t coff_db_line_at_pc(
-       db_symtab_t     *stab,
-       db_sym_t        sym,
-       char            **file,
-       int             *line,
-       db_expr_t       pc);
-
-int coff_db_qualified_print_completion(
-       db_symtab_t     *stab,
-       char            *sym);
-
-void coff_db_init(void);
-
-#endif /* !_DDB_DB_COFF_H_ */
diff --git a/osfmk/ddb/db_command.c b/osfmk/ddb/db_command.c
deleted file mode 100644 (file)
index 7e21b12..0000000
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-/*
- * Command dispatcher.
- */
-#include <norma_vm.h>
-
-#include <mach/boolean.h>
-#include <string.h>
-#include <machine/db_machdep.h>
-
-#if defined(__alpha)
-#  include <kdebug.h>
-#  if KDEBUG
-#    include <machine/kdebug.h>
-#  endif
-#endif /* defined(__alpha) */
-
-#include <ddb/db_lex.h>
-#include <ddb/db_output.h>
-#include <ddb/db_break.h>
-#include <ddb/db_command.h>
-#include <ddb/db_cond.h>
-#include <ddb/db_examine.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_macro.h>
-#include <ddb/db_print.h>
-#include <ddb/db_run.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_watch.h>
-#include <ddb/db_write_cmd.h>
-
-#include <machine/setjmp.h>
-#include <kern/thread.h>
-
-#include <kern/misc_protos.h>
-#include <vm/vm_print.h>
-#include <ipc/ipc_print.h>
-#include <kern/kern_print.h>
-#include <machine/db_machdep.h>                /* For db_stack_trace_cmd(). */
-#include <kern/zalloc.h>       /* For db_show_one_zone, db_show_all_zones. */
-#include <kern/lock.h>                 /* For db_show_all_slocks(). */
-
-#if    NORMA_VM
-#include <xmm/xmm_obj.h>
-#endif /* NORMA_VM */
-
-/*
- * Exported global variables
- */
-boolean_t      db_cmd_loop_done;
-jmp_buf_t      *db_recover = 0;
-db_addr_t      db_dot;
-db_addr_t      db_last_addr;
-db_addr_t      db_prev;
-db_addr_t      db_next;
-
-/*
- * if 'ed' style: 'dot' is set at start of last item printed,
- * and '+' points to next line.
- * Otherwise: 'dot' points to next item, '..' points to last.
- */
-boolean_t      db_ed_style = TRUE;
-
-/*
- * Results of command search.
- */
-#define        CMD_UNIQUE      0
-#define        CMD_FOUND       1
-#define        CMD_NONE        2
-#define        CMD_AMBIGUOUS   3
-#define        CMD_HELP        4
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-
-void db_command(
-       struct db_command       **last_cmdp,    /* IN_OUT */
-       db_expr_t               *last_countp,   /* IN_OUT */
-       char                    *last_modifp,   /* IN_OUT */
-       struct db_command       *cmd_table);
-
-void db_help_cmd(void);
-
-void db_fncall(void);
-
-void db_cmd_list(struct db_command *table);
-
-int db_cmd_search(
-       char *                  name,
-       struct db_command *     table,
-       struct db_command **    cmdp);  /* out */
-
-void db_command_list(
-       struct db_command       **last_cmdp,    /* IN_OUT */
-       db_expr_t               *last_countp,   /* IN_OUT */
-       char                    *last_modifp,   /* IN_OUT */
-       struct db_command       *cmd_table);
-
-/*
- * Search for command prefix.
- */
-int
-db_cmd_search(
-       char *                  name,
-       struct db_command *     table,
-       struct db_command **    cmdp)   /* out */
-{
-       struct db_command       *cmd;
-       int             result = CMD_NONE;
-
-       for (cmd = table; cmd->name != 0; cmd++) {
-           register char *lp;
-           const char *rp;
-           register int  c;
-
-           lp = name;
-           rp = cmd->name;
-           while ((c = *lp) == *rp) {
-               if (c == 0) {
-                   /* complete match */
-                   *cmdp = cmd;
-                   return (CMD_UNIQUE);
-               }
-               lp++;
-               rp++;
-           }
-           if (c == 0) {
-               /* end of name, not end of command -
-                  partial match */
-               if (result == CMD_FOUND) {
-                   result = CMD_AMBIGUOUS;
-                   /* but keep looking for a full match -
-                      this lets us match single letters */
-               }
-               else {
-                   *cmdp = cmd;
-                   result = CMD_FOUND;
-               }
-           }
-       }
-       if (result == CMD_NONE) {
-           /* check for 'help' */
-           if (!strncmp(name, "help", strlen(name)))
-               result = CMD_HELP;
-       }
-       return (result);
-}
-
-void
-db_cmd_list(struct db_command *table)
-{
-       struct db_command *new;
-       struct db_command *old;
-       struct db_command *cur;
-       unsigned int l;
-       unsigned int len;
-
-       len = 1;
-       for (cur = table; cur->name != 0; cur++)
-           if ((l = strlen(cur->name)) >= len)
-               len = l + 1;
-
-       old = (struct db_command *)0;
-       for (;;) {
-           new = (struct db_command *)0;
-           for (cur = table; cur->name != 0; cur++)
-               if ((new == (struct db_command *)0 ||
-                    strncmp(cur->name, new->name, strlen(cur->name)) < 0) &&
-                   (old == (struct db_command *)0 ||
-                    strncmp(cur->name, old->name, strlen(cur->name)) > 0))
-                   new = cur;
-           if (new == (struct db_command *)0)
-                   return;
-           db_reserve_output_position(len);
-           db_printf("%-*s", len, new->name);
-           old = new;
-       }
-}
-
-void
-db_command(
-       struct db_command       **last_cmdp,    /* IN_OUT */
-       db_expr_t               *last_countp,   /* IN_OUT */
-       char                    *last_modifp,   /* IN_OUT */
-       struct db_command       *cmd_table)
-{
-       struct db_command       *cmd;
-       int             t;
-       char            modif[TOK_STRING_SIZE];
-       char            *modifp = &modif[0];
-       db_expr_t       addr, count;
-       boolean_t       have_addr = FALSE;
-       int             result;
-
-       t = db_read_token();
-       if (t == tEOL || t == tSEMI_COLON) {
-           /* empty line repeats last command, at 'next' */
-           cmd = *last_cmdp;
-           count = *last_countp;
-           modifp = last_modifp;
-           addr = (db_expr_t)db_next;
-           have_addr = FALSE;
-           if (t == tSEMI_COLON)
-               db_unread_token(t);
-       }
-       else if (t == tEXCL) {
-           db_fncall();
-           return;
-       }
-       else if (t != tIDENT) {
-           db_printf("?\n");
-           db_flush_lex();
-           return;
-       }
-       else {
-           /*
-            * Search for command
-            */
-           while (cmd_table) {
-               result = db_cmd_search(db_tok_string,
-                                      cmd_table,
-                                      &cmd);
-               switch (result) {
-                   case CMD_NONE:
-                       if (db_exec_macro(db_tok_string) == 0)
-                           return;
-                       db_printf("No such command \"%s\"\n", db_tok_string);
-                       db_flush_lex();
-                       return;
-                   case CMD_AMBIGUOUS:
-                       db_printf("Ambiguous\n");
-                       db_flush_lex();
-                       return;
-                   case CMD_HELP:
-                       db_cmd_list(cmd_table);
-                       db_flush_lex();
-                       return;
-                   default:
-                       break;
-               }
-               if ((cmd_table = cmd->more) != 0) {
-                   t = db_read_token();
-                   if (t != tIDENT) {
-                       db_cmd_list(cmd_table);
-                       db_flush_lex();
-                       return;
-                   }
-               }
-           }
-
-           if ((cmd->flag & CS_OWN) == 0) {
-               /*
-                * Standard syntax:
-                * command [/modifier] [addr] [,count]
-                */
-               t = db_read_token();
-               if (t == tSLASH) {
-                   t = db_read_token();
-                   if (t != tIDENT) {
-                       db_printf("Bad modifier \"/%s\"\n", db_tok_string);
-                       db_flush_lex();
-                       return;
-                   }
-                   strlcpy(modif, db_tok_string, TOK_STRING_SIZE);
-               }
-               else {
-                   db_unread_token(t);
-                   modif[0] = '\0';
-               }
-
-               if (db_expression(&addr)) {
-                   db_dot = (db_addr_t) addr;
-                   db_last_addr = db_dot;
-                   have_addr = TRUE;
-               }
-               else {
-                   addr = (db_expr_t) db_dot;
-                   have_addr = FALSE;
-               }
-               t = db_read_token();
-               if (t == tCOMMA) {
-                   if (!db_expression(&count)) {
-                       db_printf("Count missing after ','\n");
-                       db_flush_lex();
-                       return;
-                   }
-               }
-               else {
-                   db_unread_token(t);
-                   count = -1;
-               }
-           }
-       }
-       if (cmd != 0) {
-           /*
-            * Execute the command.
-            */
-           (*cmd->fcn)(addr, have_addr, count, modifp);
-
-           if (cmd->flag & CS_SET_DOT) {
-               /*
-                * If command changes dot, set dot to
-                * previous address displayed (if 'ed' style).
-                */
-               if (db_ed_style) {
-                   db_dot = db_prev;
-               }
-               else {
-                   db_dot = db_next;
-               }
-           }
-           else {
-               /*
-                * If command does not change dot,
-                * set 'next' location to be the same.
-                */
-               db_next = db_dot;
-           }
-       }
-       *last_cmdp = cmd;
-       *last_countp = count;
-       strlcpy(last_modifp, modifp, TOK_STRING_SIZE);
-}
-
-void
-db_command_list(
-       struct db_command       **last_cmdp,    /* IN_OUT */
-       db_expr_t               *last_countp,   /* IN_OUT */
-       char                    *last_modifp,   /* IN_OUT */
-       struct db_command       *cmd_table)
-{
-       do {
-           db_command(last_cmdp, last_countp, last_modifp, cmd_table);
-           db_skip_to_eol();
-       } while (db_read_token() == tSEMI_COLON && db_cmd_loop_done == 0);
-}
-
-
-extern void    db_system_stats(void);
-
-struct db_command db_show_all_cmds[] = {
-       {
-               .name = "acts",
-               .fcn = db_show_all_acts,
-       },
-       {
-               .name = "spaces",
-               .fcn = db_show_all_spaces,
-       },
-       {
-               .name = "tasks",
-               .fcn = db_show_all_acts,
-       },
-       /* temporary alias for sanity preservation */
-       {
-               .name ="threads",
-               db_show_all_acts,
-       },
-       {
-               .name = "zones",
-               .fcn = db_show_all_zones,
-       },
-       {
-               .name = "vmtask",
-               .fcn = db_show_all_task_vm,
-       },
-       {
-               .name = (const char *)NULL,
-               },
-};
-
-/* XXX */
-
-extern void            db_show_thread_log(void);
-extern void            db_show_etap_log(db_expr_t, int, db_expr_t, char *);
-
-struct db_command db_show_cmds[] = {
-       {
-               .name = "all",
-               .more = db_show_all_cmds
-       },
-       {
-               .name = "registers",
-               .fcn = db_show_regs,
-       },
-       {
-               .name = "variables",
-               .fcn = db_show_variable,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "breaks",
-               .fcn = db_listbreak_cmd,
-       },
-       {
-               .name = "watches",
-               .fcn = db_listwatch_cmd,
-       },
-       {
-               .name = "task",
-               .fcn = db_show_one_task,
-       },
-       {
-               .name = "act",
-               .fcn = db_show_one_act,
-       },
-       {
-               .name = "shuttle",
-               .fcn = db_show_shuttle,
-       },
-#if 0
-       {
-               .name = "thread",
-               .fcn = db_show_one_thread,
-       },
-#endif
-       {
-               .name = "vmtask",
-               .fcn = db_show_one_task_vm,
-       },
-       {
-               .name = "macro",
-               .fcn = (db_func)db_show_macro,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "runq",
-               .fcn = (db_func)db_show_runq,
-       },
-       {
-               .name = "map",
-               .fcn = (db_func)vm_map_print,
-       },
-       {
-               .name = "object",
-               .fcn = vm_object_print,
-       },
-       {
-               .name = "page",
-               .fcn = (db_func)vm_page_print,
-       },
-       {
-               .name = "copy",
-               .fcn = (db_func)vm_map_copy_print,
-       },
-       {
-               .name = "port",
-               .fcn = (db_func)ipc_port_print,
-       },
-       {
-               .name = "pset",
-               .fcn = (db_func)ipc_pset_print,
-       },
-       {
-               .name = "kmsg",
-               .fcn = (db_func)ipc_kmsg_print,
-       },
-       {
-               .name = "msg",
-               .fcn = (db_func)ipc_msg_print,
-       },
-       {
-               .name = "ipc_port",
-               .fcn = db_show_port_id,
-       },
-#if NORMA_VM
-       {
-               .name = "xmm_obj",
-               .fcn = (db_func)xmm_obj_print,
-       },
-       {
-               .name = "xmm_reply",
-               .fcn = (db_func)xmm_reply_print,
-       },
-#endif /* NORMA_VM */
-       {
-               .name = "space",
-               .fcn = db_show_one_space,
-       },
-       {
-               .name = "system",
-               .fcn = (db_func)db_system_stats,
-       },
-       {
-               .name = "zone",
-               .fcn = db_show_one_zone,
-       },
-       {
-               .name = "lock",
-               .fcn = (db_func)db_show_one_lock,
-       },
-       {
-               .name = "simple_lock",
-               .fcn = (db_func)db_show_one_simple_lock,
-       },
-       {
-               .name = "thread_log",
-               (db_func)db_show_thread_log,
-       },
-       {
-               .name = "shuttle",
-               .fcn = db_show_shuttle,
-       },
-       {
-               .name = (const char *)NULL,
-       },
-};
-
-#define        db_switch_cpu kdb_on
-
-struct db_command db_command_table[] = {
-#if DB_MACHINE_COMMANDS
-       /* this must be the first entry, if it exists */
-       {
-               .name = "machine",
-       },
-#endif /* DB_MACHINE_COMMANDS */
-       {
-               .name = "print",
-               .fcn = (db_func)db_print_cmd,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "examine",
-               .fcn = db_examine_cmd,
-               .flag = CS_MORE|CS_SET_DOT,
-       },
-       {
-               .name = "x",
-               .fcn = db_examine_cmd,
-               .flag = CS_MORE|CS_SET_DOT,
-       },
-       {
-               .name = "xf",
-               .fcn = db_examine_forward,
-               .flag = CS_SET_DOT,
-       },
-       {
-               .name = "xb",
-               .fcn = db_examine_backward,
-               .flag = CS_SET_DOT,
-       },
-       {
-               .name = "search",
-               .fcn = (db_func)db_search_cmd,
-               .flag = CS_OWN|CS_SET_DOT,
-       },
-       {
-               .name = "set",
-               .fcn = (db_func)db_set_cmd,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "write",
-               .fcn = db_write_cmd,
-               .flag = CS_MORE|CS_SET_DOT,
-       },
-       {
-               .name = "w",
-               .fcn = db_write_cmd,
-               .flag = CS_MORE|CS_SET_DOT,
-       },
-       {
-               .name = "delete",
-               .fcn = (db_func)db_delete_cmd,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "d",
-               .fcn = (db_func)db_delete_cmd,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "break",
-               .fcn = db_breakpoint_cmd,
-               .flag = CS_MORE,
-       },
-       {
-               .name = "dwatch",
-               .fcn = db_deletewatch_cmd,
-               .flag = CS_MORE,
-       },
-       {
-               .name = "watch",
-               .fcn = db_watchpoint_cmd,
-               .flag = CS_MORE,
-       },
-       {
-               .name = "step",
-               .fcn = db_single_step_cmd,
-       },
-       {
-               .name = "s",
-               .fcn = db_single_step_cmd,
-       },
-       {
-               .name = "continue",
-               .fcn = db_continue_cmd,
-       },
-       {
-               .name = "c",
-               .fcn = db_continue_cmd,
-       },
-       {
-               .name = "gdb",
-               .fcn = db_continue_gdb,
-       },
-       {
-               .name = "until",
-               .fcn = db_trace_until_call_cmd,
-       },
-
-       /* As per request of DNoveck, CR1550, leave this disabled       */
-#if 0  /* until CR1440 is fixed, to avoid toe-stubbing                 */
-       {
-               .name = "next",
-               .fcn = db_trace_until_matching_cmd,
-       },
-#endif
-       {
-               .name = "match",
-               .fcn = db_trace_until_matching_cmd,
-       },
-       {
-               .name = "trace",
-               .fcn = db_stack_trace_cmd,
-       },
-       {
-               .name = "cond",
-               .fcn = (db_func)db_cond_cmd,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "call",
-               .fcn = (db_func)db_fncall,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "macro",
-               .fcn = (db_func)db_def_macro_cmd,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "dmacro",
-               .fcn = (db_func)db_del_macro_cmd,
-               .flag = CS_OWN,
-       },
-       {
-               .name = "show",
-               .more = db_show_cmds
-       },
-       {
-               .name = "cpu",
-               .fcn = (db_func)db_switch_cpu,
-       },
-       {
-               .name = "dr",
-               .fcn = db_display_real,
-               .flag = CS_MORE|CS_SET_DOT,
-       },
-       {
-               .name = "di",
-               .fcn = db_display_iokit,
-               .flag = CS_MORE,
-       },
-       {
-               .name = "dk",
-               .fcn = db_display_kmod,
-               .flag = CS_MORE,
-       },
-
-       {
-               .name = "reboot",
-               (db_func)db_reboot,
-       },
-       {
-               .name = "ms",
-               .fcn = db_msr,
-               .flag = CS_MORE,
-       },
-       {
-               .name = "cp",
-               .fcn = db_cpuid,
-               .flag = CS_MORE,
-       },
-       {
-               .name = "da",
-               .fcn = db_apic,
-               .flag = CS_MORE,
-       },
-       {
-               .name = (const char *)NULL,
-       },
-};
-
-/* this function should be called to install the machine dependent
-   commands. It should be called before the debugger is enabled  */
-void db_machine_commands_install(struct db_command *ptr)
-{
-  db_command_table[0].more = ptr;
-  return;
-}
-
-
-struct db_command      *db_last_command = 0;
-db_expr_t              db_last_count = 0;
-char                   db_last_modifier[TOK_STRING_SIZE] = { '\0' };
-
-void
-db_help_cmd(void)
-{
-       struct db_command *cmd = db_command_table;
-
-       while (cmd->name != 0) {
-           db_printf("%-12s", cmd->name);
-           db_end_line();
-           cmd++;
-       }
-}
-
-int    (*ddb_display)(void);
-
-extern int db_output_line;
-extern int db_macro_level;
-
-void
-db_command_loop(void)
-{
-       jmp_buf_t db_jmpbuf;
-       jmp_buf_t *prev = db_recover;
-
-       /*
-        * Initialize 'prev' and 'next' to dot.
-        */
-       db_prev = db_dot;
-       db_next = db_dot;
-
-       if (ddb_display)
-               (*ddb_display)();
-
-       db_cmd_loop_done = 0;
-       while (!db_cmd_loop_done) {
-           (void) _setjmp(db_recover = &db_jmpbuf);
-           db_macro_level = 0;
-           if (db_print_position() != 0)
-               db_printf("\n");
-           db_output_line = 0;
-           db_indent = 0;
-           db_reset_more();
-           db_output_prompt();
-
-           (void) db_read_line("!!");
-           db_command_list(&db_last_command, &db_last_count,
-                           db_last_modifier, db_command_table);
-       }
-
-       db_recover = prev;
-}
-
-boolean_t
-db_exec_cmd_nest(
-       const char      *cmd,
-       int             size)
-{
-       struct db_lex_context lex_context;
-
-       db_cmd_loop_done = 0;
-       if (cmd) {
-           db_save_lex_context(&lex_context);
-           db_switch_input(cmd, size);
-       }
-       db_command_list(&db_last_command, &db_last_count,
-                       db_last_modifier, db_command_table);
-       if (cmd)
-           db_restore_lex_context(&lex_context);
-       return(db_cmd_loop_done == 0);
-}
-
-void
-db_error(const char *s)
-{
-       db_macro_level = 0;
-       if (db_recover) {
-           if (s > (char *)1)
-               db_printf(s);
-           db_flush_lex();
-           _longjmp(db_recover, (s == (char *)1) ? 2 : 1);
-       }
-       else
-       {
-           if (s > (char *)1)
-               db_printf(s);
-           panic("db_error");
-       }
-}
-
-
-/*
- * Call random function:
- * !expr(arg,arg,arg)
- */
-void
-db_fncall(void)
-{
-       db_expr_t       fn_addr;
-#define        MAXARGS         11
-       uint32_t        args[MAXARGS];
-       db_expr_t argwork;
-       int             nargs = 0;
-       uint32_t        retval;
-       uint32_t        (*func)(uint32_t, ...);
-       int             t;
-
-       if (!db_expression(&fn_addr)) {
-           db_printf("Bad function \"%s\"\n", db_tok_string);
-           db_flush_lex();
-           return;
-       }
-       func = (uint32_t (*) (uint32_t, ...))(unsigned long)fn_addr;
-
-       t = db_read_token();
-       if (t == tLPAREN) {
-           if (db_expression(&argwork)) {
-                       args[nargs] = (uint32_t)argwork;
-                       nargs++;
-                       while ((t = db_read_token()) == tCOMMA) {
-                               if (nargs == MAXARGS) {
-                                       db_printf("Too many arguments\n");
-                                       db_flush_lex();
-                                       return;
-                               }
-                               if (!db_expression(&argwork)) {
-                                       db_printf("Argument missing\n");
-                                       db_flush_lex();
-                                       return;
-                               }
-                               args[nargs] = (uint32_t)argwork;
-                               nargs++;
-                       }
-                       db_unread_token(t);
-           }
-           if (db_read_token() != tRPAREN) {
-                       db_printf("?\n");
-                       db_flush_lex();
-                       return;
-           }
-       }
-       while (nargs < MAXARGS) {
-           args[nargs++] = 0;
-       }
-
-       retval = (*func)(args[0], args[1], args[2], args[3], args[4],
-                        args[5], args[6], args[7], args[8], args[9] );
-       db_printf(" %#n\n", retval);
-}
-
-boolean_t
-db_option(
-       const char      *modif,
-       int             option)
-{
-       const char *p;
-
-       for (p = modif; *p; p++)
-           if (*p == option)
-               return(TRUE);
-       return(FALSE);
-}
diff --git a/osfmk/ddb/db_command.h b/osfmk/ddb/db_command.h
deleted file mode 100644 (file)
index 102dd97..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-/*
- * Command loop declarations.
- */
-
-#ifndef        _DDB_DB_COMMAND_H_
-#define        _DDB_DB_COMMAND_H_
-
-#include <machine/db_machdep.h>
-#include <db_machine_commands.h>
-
-typedef void   (*db_func)(db_expr_t, boolean_t, db_expr_t, char *);
-
-/*
- * Command table
- */
-struct db_command {
-       const char *name;       /* command name */
-       db_func fcn;            /* function to call */
-       int     flag;           /* extra info: */
-#define        CS_OWN          0x1         /* non-standard syntax */
-#define        CS_MORE         0x2         /* standard syntax, but may have other
-                                      words at end */
-#define        CS_SET_DOT      0x100       /* set dot after command */
-       struct db_command *more;   /* another level of command */
-};
-
-
-extern db_addr_t       db_dot;         /* current location */
-extern db_addr_t       db_last_addr;   /* last explicit address typed */
-extern db_addr_t       db_prev;        /* last address examined
-                                          or written */
-extern db_addr_t       db_next;        /* next address to be examined
-                                          or written */
-
-
-/* Prototypes for functions exported by this module.
- */
-
-void db_command_loop(void);
-
-void db_machine_commands_install(struct db_command *ptr);
-
-boolean_t db_exec_cmd_nest(
-       const char      *cmd,
-       int             size);
-
-void db_error(const char *s);
-
-boolean_t db_option(
-       const char      *modif,
-       int             option);
-
-#endif /* !_DDB_DB_COMMAND_H_ */
diff --git a/osfmk/ddb/db_cond.c b/osfmk/ddb/db_cond.c
deleted file mode 100644 (file)
index 3209a22..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:47  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.2.18.1  1997/03/27  18:46:29  barbou
- *     ri-osc CR1558: enable use of breakpoint counts even when no
- *     condition given.
- *     [1995/09/20  15:24:24  bolinger]
- *     [97/02/25            barbou]
- *
- * Revision 1.2.6.2  1996/01/09  19:15:34  devrcs
- *     Change 'register c' to 'register int c'.
- *     [1995/12/01  21:42:00  jfraser]
- * 
- *     Merged '64-bit safe' changes from DEC alpha port.
- *     [1995/11/21  18:02:54  jfraser]
- * 
- * Revision 1.2.6.1  1994/09/23  01:18:27  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:09:37  ezf]
- * 
- * Revision 1.2.2.4  1993/08/11  20:37:33  elliston
- *     Add ANSI Prototypes.  CR #9523.
- *     [1993/08/11  03:32:57  elliston]
- * 
- * Revision 1.2.2.3  1993/07/27  18:26:59  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:11:12  elliston]
- * 
- * Revision 1.2.2.2  1993/06/09  02:19:53  gm
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:56:04  jeffc]
- * 
- * Revision 1.2  1993/04/19  16:01:51  devrcs
- *     Changes from mk78:
- *     Changed errant call of db_error in db_cond_cmd() to db_printf/db_error.
- *     [92/05/20            jfriedl]
- *     [93/02/02            bruel]
- * 
- * Revision 1.1  1992/09/30  02:00:58  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.2  91/10/09  15:59:09  af
- *      Revision 2.1.3.1  91/10/05  13:05:38  jeffreyh
- *             Created to support conditional break point and command execution.
- *             [91/08/29            tak]
- * 
- * Revision 2.1.3.1  91/10/05  13:05:38  jeffreyh
- *     Created to support conditional break point and command execution.
- *     [91/08/29            tak]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#include <machine/db_machdep.h>
-#include <machine/setjmp.h>
-#include <kern/misc_protos.h>
-
-#include <ddb/db_lex.h>
-#include <ddb/db_break.h>
-#include <ddb/db_command.h>
-#include <ddb/db_cond.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-
-#define DB_MAX_COND    10              /* maximum conditions to be set */
-
-int   db_ncond_free = DB_MAX_COND;                     /* free condition */
-struct db_cond {
-       int     c_size;                                 /* size of cond */
-       char    c_cond_cmd[DB_LEX_LINE_SIZE];           /* cond & cmd */
-} db_cond[DB_MAX_COND];
-
-void
-db_cond_free(db_thread_breakpoint_t bkpt)
-{
-       if (bkpt->tb_cond > 0) {
-           db_cond[bkpt->tb_cond-1].c_size = 0;
-           db_ncond_free++;
-           bkpt->tb_cond = 0;
-       }
-}
-
-extern jmp_buf_t *db_recover;
-
-boolean_t
-db_cond_check(db_thread_breakpoint_t bkpt)
-{
-       register  struct db_cond *cp;
-       db_expr_t value;
-       int       t;
-       jmp_buf_t db_jmpbuf;
-
-       if (bkpt->tb_cond <= 0) {               /* no condition */
-               if (--(bkpt->tb_count) > 0)
-                       return(FALSE);
-               bkpt->tb_count = bkpt->tb_init_count;
-           return(TRUE);
-       }
-       db_dot = PC_REGS(DDB_REGS);
-       db_prev = db_dot;
-       db_next = db_dot;
-       if (_setjmp(db_recover = &db_jmpbuf)) {
-           /*
-            * in case of error, return true to enter interactive mode
-            */
-           return(TRUE);
-       }
-
-       /*
-        * switch input, and evalutate condition
-        */
-       cp = &db_cond[bkpt->tb_cond - 1];
-       db_switch_input(cp->c_cond_cmd, cp->c_size);
-       if (!db_expression(&value)) {
-           db_printf("error: condition evaluation error\n");
-           return(TRUE);
-       }
-       if (value == 0 || --(bkpt->tb_count) > 0)
-           return(FALSE);
-
-       /*
-        * execute a command list if exist
-        */
-       bkpt->tb_count = bkpt->tb_init_count;
-       if ((t = db_read_token()) != tEOL) {
-           db_unread_token(t);
-           return(db_exec_cmd_nest(0, 0));
-       }
-       return(TRUE);
-}
-
-void
-db_cond_print(db_thread_breakpoint_t bkpt)
-{
-       register char *p, *ep;
-       register struct db_cond *cp;
-
-       if (bkpt->tb_cond <= 0)
-           return;
-       cp = &db_cond[bkpt->tb_cond-1];
-       p = cp->c_cond_cmd;
-       ep = p + cp->c_size;
-       while (p < ep) {
-           if (*p == '\n' || *p == 0)
-               break;
-           db_putchar(*p++);
-       }
-}
-
-void
-db_cond_cmd(void)
-{
-       register  int c;
-       register  struct db_cond *cp;
-       register  char *p;
-       db_expr_t value;
-       db_thread_breakpoint_t bkpt;
-
-       if (db_read_token() != tHASH || db_read_token() != tNUMBER) {
-           db_printf("#<number> expected instead of \"%s\"\n", db_tok_string);
-           db_error(0);
-           return;
-       }
-       if ((bkpt = db_find_breakpoint_number((int)db_tok_number, 0)) == 0) {
-           db_printf("No such break point #%d\n", db_tok_number);
-           db_error(0);
-           return;
-       }
-       /*
-        * if the break point already has a condition, free it first
-        */
-       if (bkpt->tb_cond > 0) {
-           cp = &db_cond[bkpt->tb_cond - 1];
-           db_cond_free(bkpt);
-       } else {
-           if (db_ncond_free <= 0) {
-               db_error("Too many conditions\n");
-               return;
-           }
-           for (cp = db_cond; cp < &db_cond[DB_MAX_COND]; cp++)
-               if (cp->c_size == 0)
-                   break;
-           if (cp >= &db_cond[DB_MAX_COND])
-               panic("bad db_cond_free");
-       }
-       for (c = db_read_char(); c == ' ' || c == '\t'; c = db_read_char());
-       for (p = cp->c_cond_cmd; c >= 0; c = db_read_char())
-           *p++ = c;
-       /*
-        * switch to saved data and call db_expression to check the condition.
-        * If no condition is supplied, db_expression will return false.
-        * In this case, clear previous condition of the break point.
-         * If condition is supplied, set the condition to the permanent area.
-        * Note: db_expression will not return here, if the condition
-        *       expression is wrong.
-        */
-       db_switch_input(cp->c_cond_cmd, p - cp->c_cond_cmd);
-       if (!db_expression(&value)) {
-           /* since condition is already freed, do nothing */
-           db_flush_lex();
-           return;
-       }
-       db_flush_lex();
-       db_ncond_free--;
-       cp->c_size = p - cp->c_cond_cmd;
-       bkpt->tb_cond = (cp - db_cond) + 1;
-}
diff --git a/osfmk/ddb/db_cond.h b/osfmk/ddb/db_cond.h
deleted file mode 100644 (file)
index 4e8c98a..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:47  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.6.1  1994/09/23  01:18:37  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:09:41  ezf]
- *
- * Revision 1.1.2.3  1993/09/17  21:34:31  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:07  robert]
- * 
- * Revision 1.1.2.2  1993/07/27  18:27:04  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:11:18  elliston]
- * 
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_COND_H_
-#define        _DDB_DB_COND_H_
-
-#include <mach/boolean.h>
-#include <ddb/db_break.h>
-
-/* Prototypes for functions exported by this module.
- */
-
-void db_cond_free(db_thread_breakpoint_t bkpt);
-
-boolean_t db_cond_check(db_thread_breakpoint_t bkpt);
-
-void db_cond_print(db_thread_breakpoint_t bkpt);
-
-void db_cond_cmd(void);
-
-#endif /* !_DDB_DB_COND_H_ */
diff --git a/osfmk/ddb/db_examine.c b/osfmk/ddb/db_examine.c
deleted file mode 100644 (file)
index 6ed8418..0000000
+++ /dev/null
@@ -1,747 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-#include <string.h>                    /* For strlcpy() */
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-
-#include <ddb/db_access.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_output.h>
-#include <ddb/db_command.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_command.h>            /* For db_option() */
-#include <ddb/db_examine.h>
-#include <ddb/db_expr.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-#include <mach/vm_param.h>
-
-#define db_act_to_task(thr_act)        ((thr_act)? thr_act->task: TASK_NULL)
-
-char           db_examine_format[TOK_STRING_SIZE] = "x";
-int            db_examine_count = 1;
-db_addr_t      db_examine_prev_addr = 0;
-thread_t       db_examine_act = THREAD_NULL;
-
-extern int     db_max_width;
-
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-int db_xcdump(
-       db_addr_t       addr,
-       int             size,
-       int             count,
-       task_t          task);
-
-int db_examine_width(
-       int size,
-       int *items,
-       int *remainder);
-
-extern char db_last_modifier[];
-
-/*
- * Examine (print) data.
- */
-void
-db_examine_cmd(db_expr_t addr, __unused boolean_t have_addr, db_expr_t count,
-              char *modif)
-{
-       thread_t        thr_act;
-
-       if (modif[0] != '\0')
-           strlcpy(db_examine_format, modif, TOK_STRING_SIZE);
-
-       if (count == (db_expr_t)-1)
-           count = 1;
-       db_examine_count = (int)count;
-       if (db_option(modif, 't')) {
-           if (modif == db_last_modifier)
-               thr_act = db_examine_act;
-           else if (!db_get_next_act(&thr_act, 0))
-               return;
-       } else
-         if (db_option(modif,'u'))
-           thr_act = current_thread();
-         else
-           thr_act = THREAD_NULL;
-
-       db_examine_act = thr_act;
-       db_examine((db_addr_t) addr, db_examine_format, (int)count, 
-                                       db_act_to_task(thr_act));
-}
-
-void
-db_examine_forward(__unused db_expr_t addr, __unused boolean_t have_addr,
-                  __unused db_expr_t count, __unused char *modif)
-{
-       db_examine(db_next, db_examine_format, db_examine_count,
-                               db_act_to_task(db_examine_act));
-}
-
-void
-db_examine_backward(__unused db_expr_t addr, __unused boolean_t have_addr,
-                   __unused db_expr_t count, __unused char *modif)
-{
-       db_examine(db_examine_prev_addr - (db_next - db_examine_prev_addr),
-                        db_examine_format, db_examine_count,
-                               db_act_to_task(db_examine_act));
-}
-
-int
-db_examine_width(
-       int size,
-       int *items,
-       int *remainder)
-{
-       int sz;
-       int entry;
-       int width;
-
-       width = size * 2 + 1;
-       sz = (db_max_width - (sizeof (void *) * 2 + 4)) / width;
-       for (entry = 1; (entry << 1) < sz; entry <<= 1)
-               continue;
-
-       sz = sizeof (void *) * 2 + 4 + entry * width;
-       while (sz + entry < db_max_width) {
-               width++;
-               sz += entry;
-       }
-       *remainder = (db_max_width - sz + 1) / 2;
-       *items = entry;
-       return width;
-}
-
-void
-db_examine(
-       db_addr_t       addr,
-       char *          fmt,    /* format string */
-       int             count,  /* repeat count */
-       task_t          task)
-{
-       int             c;
-       db_expr_t       value;
-       int             size;
-       int             width;
-       int             leader;
-       int             items;
-       int             nitems = 0;
-       char *          fp;
-       db_addr_t       next_addr = 0;
-       int             sz;
-
-       db_examine_prev_addr = addr;
-       while (--count >= 0) {
-           fp = fmt;
-           size = sizeof(int);
-           width = db_examine_width(size, &items, &leader);
-           while ((c = *fp++) != 0) {
-               switch (c) {
-                   case 'b':
-                       size = sizeof(char);
-                       width = db_examine_width(size, &items, &leader);
-                       break;
-                   case 'h':
-                       size = sizeof(short);
-                       width = db_examine_width(size, &items, &leader);
-                       break;
-                   case 'l':
-                       size = sizeof(int);
-                       width = db_examine_width(size, &items, &leader);
-                       break;
-                   case 'q':
-                       size = sizeof(long);
-                       width = db_examine_width(size, &items, &leader);
-                       break;
-                   case 'a':   /* address */
-                   case 'A':   /* function address */
-                       /* always forces a new line */
-                       if (db_print_position() != 0)
-                           db_printf("\n");
-                       db_prev = addr;
-                       next_addr = addr + 4;
-                       db_task_printsym(addr, 
-                                       (c == 'a')?DB_STGY_ANY:DB_STGY_PROC,
-                                       task);
-                       db_printf(":\t");
-                       break;
-                   case 'm':
-                       db_next = db_xcdump(addr, size, count+1, task);
-                       return;
-                   case 't':
-                   case 'u':
-                       break;
-                   default:
-               restart:
-                       /* Reset next_addr in case we are printing in
-                          multiple formats.  */
-                       next_addr = addr;
-                       if (db_print_position() == 0) {
-                           /* If we hit a new symbol, print it */
-                           const char *        name;
-                           db_addr_t   off;
-
-                           db_find_task_sym_and_offset(addr,&name,&off,task);
-                           if (off == 0)
-                               db_printf("\r%s:\n", name);
-                           db_printf("%#lln: ", (unsigned long long)addr);
-                           for (sz = 0; sz < leader; sz++)
-                                   db_putchar(' ');
-                           db_prev = addr;
-                           nitems = items;
-                       }
-
-                       switch (c) {
-                           case 'p':   /* Addrs rendered symbolically. */
-                               if( size == sizeof(void *) )  {
-                                   const char       *symName;
-                                   db_addr_t   offset;
-
-                                   items = 1;
-                                   value = db_get_task_value( next_addr,
-                                       sizeof(db_expr_t), FALSE, task );
-                                   db_find_task_sym_and_offset( value,
-                                       &symName, &offset, task);
-                                   db_printf("\n\t*%8llX(%8llX) = %s",
-                                               (unsigned long long)next_addr, (unsigned long long)value, symName );
-                                   if( offset )  {
-                                       db_printf("+%llX", (unsigned long long)offset );
-                                   }
-                                   next_addr += size;
-                               }
-                               break;
-                           case 'r':   /* signed, current radix */
-                               for (sz = size, next_addr = addr;
-                                    sz >= (signed)sizeof (db_expr_t);
-                                    sz -= sizeof (db_expr_t)) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr,
-                                                             sizeof (db_expr_t),
-                                                             TRUE,task);
-                                   db_printf("%-*llr", width, (unsigned long long)value);
-                                   next_addr += sizeof (db_expr_t);
-                               }
-                               if (sz > 0) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr, sz,
-                                                             TRUE, task);
-                                   db_printf("%-*llR", width, (unsigned long long)value);
-                                   next_addr += sz;
-                               }
-                               break;
-                           case 'X':   /* unsigned hex */
-                           case 'x':   /* unsigned hex */
-                               for (sz = size, next_addr = addr;
-                                    sz >= (signed)sizeof (db_expr_t);
-                                    sz -= sizeof (db_expr_t)) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr,
-                                                             sizeof (db_expr_t),
-                                                             FALSE,task);
-                                   if ( c == 'X')
-                                     db_printf("%0*llX ", 2*size, (unsigned long long)value);
-                                   else
-                                     db_printf("%-*llx", width, (unsigned long long)value);
-                                   next_addr += sizeof (db_expr_t);
-                               }
-                               if (sz > 0) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr, sz,
-                                                             FALSE, task);
-                                   if ( c == 'X')
-                                     db_printf("%0*llX ", 2*size, (unsigned long long)value);
-                                   else
-                                     db_printf("%-*llX", width, (unsigned long long)value);
-                                   next_addr += sz;
-                               }
-                               break;
-                           case 'z':   /* signed hex */
-                               for (sz = size, next_addr = addr;
-                                    sz >= (signed)sizeof (db_expr_t);
-                                    sz -= sizeof (db_expr_t)) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr,
-                                                             sizeof (db_expr_t),
-                                                             TRUE, task);
-                                   db_printf("%-*llz", width, (unsigned long long)value);
-                                   next_addr += sizeof (db_expr_t);
-                               }
-                               if (sz > 0) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr,sz,
-                                                             TRUE,task);
-                                   db_printf("%-*llZ", width, (unsigned long long)value);
-                                   next_addr += sz;
-                               }
-                               break;
-                           case 'd':   /* signed decimal */
-                               for (sz = size, next_addr = addr;
-                                    sz >= (signed)sizeof (db_expr_t);
-                                    sz -= sizeof (db_expr_t)) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr,
-                                                             sizeof (db_expr_t),
-                                                             TRUE,task);
-                                   db_printf("%-*lld", width, (unsigned long long)value);
-                                   next_addr += sizeof (db_expr_t);
-                               }
-                               if (sz > 0) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr, sz,
-                                                             TRUE, task);
-                                   db_printf("%-*llD", width, (unsigned long long)value);
-                                   next_addr += sz;
-                               }
-                               break;
-                           case 'U':   /* unsigned decimal */
-                           case 'u':
-                               for (sz = size, next_addr = addr;
-                                    sz >= (signed)sizeof (db_expr_t);
-                                    sz -= sizeof (db_expr_t)) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr,
-                                                             sizeof (db_expr_t),
-                                                             FALSE,task);
-                                   db_printf("%-*llu", width, (unsigned long long)value);
-                                   next_addr += sizeof (db_expr_t);
-                               }
-                               if (sz > 0) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr, sz,
-                                                             FALSE, task);
-                                   db_printf("%-*llU", width, (unsigned long long)value);
-                                   next_addr += sz;
-                               }
-                               break;
-                           case 'o':   /* unsigned octal */
-                               for (sz = size, next_addr = addr;
-                                    sz >= (signed)sizeof (db_expr_t);
-                                    sz -= sizeof (db_expr_t)) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr,
-                                                             sizeof (db_expr_t),
-                                                             FALSE,task);
-                                   db_printf("%-*llo", width, (unsigned long long)value);
-                                   next_addr += sizeof (db_expr_t);
-                               }
-                               if (sz > 0) {
-                                   if (nitems-- == 0) {
-                                       db_putchar('\n');
-                                       goto restart;
-                                   }
-                                   value = db_get_task_value(next_addr, sz,
-                                                             FALSE, task);
-                                   db_printf("%-*llo", width, (unsigned long long)value);
-                                   next_addr += sz;
-                               }
-                               break;
-                           case 'c':   /* character */
-                               for (sz = 0, next_addr = addr;
-                                    sz < size;
-                                    sz++, next_addr++) {
-                                   value = db_get_task_value(next_addr,1,
-                                                             FALSE,task);
-                                   if ((value >= ' ' && value <= '~') ||
-                                       value == '\n' ||
-                                       value == '\t')
-                                           db_printf("%llc", (unsigned long long)value);
-                                   else
-                                           db_printf("\\%03llo", (unsigned long long)value);
-                               }
-                               break;
-                           case 's':   /* null-terminated string */
-                               size = 0;
-                               for (;;) {
-                                   value = db_get_task_value(next_addr,1,
-                                                             FALSE,task);
-                                   next_addr += 1;
-                                   size++;
-                                   if (value == 0)
-                                       break;
-                                   if (value >= ' ' && value <= '~')
-                                       db_printf("%llc", (unsigned long long)value);
-                                   else
-                                       db_printf("\\%03llo", (unsigned long long)value);
-                               }
-                               break;
-                           case 'i':   /* instruction */
-                               next_addr = db_disasm(addr, FALSE, task);
-                               size = (int)(next_addr - addr);
-                               break;
-                           case 'I':   /* instruction, alternate form */
-                               next_addr = db_disasm(addr, TRUE, task);
-                               size = (int)(next_addr - addr);
-                               break;
-                           default:
-                               break;
-                       }
-                       if (db_print_position() != 0)
-                           db_end_line();
-                       break;
-               }
-           }
-           addr = next_addr;
-       }
-       db_next = addr;
-}
-
-/*
- * Print value.
- */
-char   db_print_format = 'x';
-
-void
-db_print_cmd(void)
-{
-       db_expr_t       value;
-       int             t;
-       task_t          task = TASK_NULL;
-
-       if ((t = db_read_token()) == tSLASH) {
-           if (db_read_token() != tIDENT) {
-               db_printf("Bad modifier \"/%s\"\n", db_tok_string);
-               db_error(0);
-               /* NOTREACHED */
-           }
-           if (db_tok_string[0])
-               db_print_format = db_tok_string[0];
-           if (db_option(db_tok_string, 't')) {
-               if (db_default_act)
-                   task = db_default_act->task;
-               if (db_print_format == 't')
-                  db_print_format = db_tok_string[1];
-           }
-       } else
-           db_unread_token(t);
-       
-       for ( ; ; ) {
-           t = db_read_token();
-           if (t == tSTRING) {
-               db_printf("%s", db_tok_string);
-               continue;
-           }
-           db_unread_token(t);
-           if (!db_expression(&value))
-               break;
-           switch (db_print_format) {
-           case 'a':
-           case 'A':
-               db_task_printsym((db_addr_t)value,
-                                (db_print_format == 'a') ? DB_STGY_ANY:
-                                                           DB_STGY_PROC,
-                                task);
-               break;
-           case 'r':
-               db_printf("%11llr", (unsigned long long)value);
-               break;
-           case 'X':
-               db_printf("%016llX", (unsigned long long)value);
-               break;
-           case 'x':
-               db_printf("%016llx", (unsigned long long)value);
-               break;
-           case 'z':
-               db_printf("%16llz", (unsigned long long)value);
-               break;
-           case 'd':
-               db_printf("%11lld", (unsigned long long)value);
-               break;
-           case 'u':
-               db_printf("%11llu", (unsigned long long)value);
-               break;
-           case 'o':
-               db_printf("%16llo", (unsigned long long)value);
-               break;
-           case 'c':
-               value = value & 0xFF;
-               if (value >= ' ' && value <= '~')
-                   db_printf("%llc", (unsigned long long)value);
-               else
-                   db_printf("\\%03llo", (unsigned long long)value);
-               break;
-           default:
-               db_printf("Unknown format %c\n", db_print_format);
-               db_print_format = 'x';
-               db_error(0);
-           }
-       }
-}
-
-void
-db_print_loc(
-       db_addr_t       loc,
-       task_t          task)
-{
-       db_task_printsym(loc, DB_STGY_PROC, task);
-}
-
-void
-db_print_inst(
-       db_addr_t       loc,
-       task_t          task)
-{
-       (void) db_disasm(loc, TRUE, task);
-}
-
-void
-db_print_loc_and_inst(
-       db_addr_t       loc,
-       task_t          task)
-{
-       db_task_printsym(loc, DB_STGY_PROC, task);
-       db_printf(":\t");
-       (void) db_disasm(loc, TRUE, task);
-}
-
-/*
- * Search for a value in memory.
- * Syntax: search [/bhl] addr value [mask] [,count] [thread]
- */
-void
-db_search_cmd(void)
-{
-       int             t;
-       db_addr_t       addr;
-       int             size = 0;
-       db_expr_t       value;
-       db_expr_t       mask;
-       db_addr_t       count;
-       thread_t        thr_act;
-       boolean_t       thread_flag = FALSE;
-       register char   *p;
-
-       t = db_read_token();
-       if (t == tSLASH) {
-           t = db_read_token();
-           if (t != tIDENT) {
-             bad_modifier:
-               db_printf("Bad modifier \"/%s\"\n", db_tok_string);
-               db_flush_lex();
-               return;
-           }
-
-           for (p = db_tok_string; *p; p++) {
-               switch(*p) {
-               case 'b':
-                   size = sizeof(char);
-                   break;
-               case 'h':
-                   size = sizeof(short);
-                   break;
-               case 'l':
-                   size = sizeof(long);
-                   break;
-               case 't':
-                   thread_flag = TRUE;
-                   break;
-               default:
-                   goto bad_modifier;
-               }
-           }
-       } else {
-           db_unread_token(t);
-           size = sizeof(int);
-       }
-
-       if (!db_expression((db_expr_t *) &addr)) {
-           db_printf("Address missing\n");
-           db_flush_lex();
-           return;
-       }
-
-       if (!db_expression(&value)) {
-           db_printf("Value missing\n");
-           db_flush_lex();
-           return;
-       }
-
-       if (!db_expression(&mask))
-           mask = ~0;
-
-       t = db_read_token();
-       if (t == tCOMMA) {
-           if (!db_expression((db_expr_t *) &count)) {
-               db_printf("Count missing\n");
-               db_flush_lex();
-               return;
-           }
-       } else {
-           db_unread_token(t);
-           count = -1;         /* effectively forever */
-       }
-       if (thread_flag) {
-           if (!db_get_next_act(&thr_act, 0))
-               return;
-       } else
-           thr_act = THREAD_NULL;
-
-       db_search(addr, size, value, mask, (unsigned int)count, db_act_to_task(thr_act));
-}
-
-void
-db_search(
-       db_addr_t       addr,
-       int             size,
-       db_expr_t       value,
-       db_expr_t       mask,
-       unsigned int    count,
-       task_t          task)
-{
-       while (count-- != 0) {
-               db_prev = addr;
-               if ((db_get_task_value(addr,size,FALSE,task) & mask) == value)
-                       break;
-               addr += size;
-       }
-       db_printf("0x%llx: ", (unsigned long long)addr);
-       db_next = addr;
-}
-
-#define DB_XCDUMP_NC   16
-
-int
-db_xcdump(
-       db_addr_t       addr,
-       int             size,
-       int             count,
-       task_t          task)
-{
-       register int    i, n;
-       db_expr_t       value;
-       int             bcount;
-       db_addr_t       off;
-       const char              *name;
-       char            data[DB_XCDUMP_NC];
-
-       db_find_task_sym_and_offset(addr, &name, &off, task);
-       for (n = count*size; n > 0; n -= bcount) {
-           db_prev = addr;
-           if (off == 0) {
-               db_printf("%s:\n", name);
-               off = -1;
-           }
-           db_printf("%0*llX:%s", 2*sizeof(db_addr_t),(unsigned long long) addr,
-                                       (size != 1) ? " " : "" );
-           bcount = ((n > DB_XCDUMP_NC)? DB_XCDUMP_NC: n);
-           if (trunc_page(addr) != trunc_page(addr+bcount-1)) {
-               db_addr_t next_page_addr = trunc_page(addr+bcount-1);
-               if (!DB_CHECK_ACCESS((vm_offset_t)next_page_addr, (int)sizeof(int), task))
-                   bcount = (int)(next_page_addr - addr);
-           }
-           db_read_bytes((vm_offset_t)addr, bcount, data, task);
-           for (i = 0; i < bcount && off != 0; i += size) {
-               if (i % 4 == 0)
-                       db_printf(" ");
-               value = db_get_task_value(addr, size, FALSE, task);
-               db_printf("%0*llX ", size*2, (unsigned long long)value);
-               addr += size;
-               db_find_task_sym_and_offset(addr, &name, &off, task);
-           }
-           db_printf("%*s",
-                       ((DB_XCDUMP_NC-i)/size)*(size*2+1)+(DB_XCDUMP_NC-i)/4,
-                        "");
-           bcount = i;
-           db_printf("%s*", (size != 1)? " ": "");
-           for (i = 0; i < bcount; i++) {
-               value = data[i];
-               db_printf("%llc", (value >= ' ' && value <= '~')? (unsigned long long)value: (unsigned long long)'.');
-           }
-           db_printf("*\n");
-       }
-       return((int)addr);
-}
diff --git a/osfmk/ddb/db_examine.h b/osfmk/ddb/db_examine.h
deleted file mode 100644 (file)
index 08f026d..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.7.2  1996/01/09  19:15:43  devrcs
- *     Function prototypes for db_print_loc() & db_print_inst().
- *     [1995/12/01  21:42:06  jfraser]
- *
- *     Merged '64-bit safe' changes from DEC alpha port.
- *     [1995/11/21  18:03:03  jfraser]
- *
- * Revision 1.1.7.1  1994/09/23  01:18:55  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:09:49  ezf]
- * 
- * Revision 1.1.2.4  1993/09/17  21:34:33  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:11  robert]
- * 
- * Revision 1.1.2.3  1993/08/11  22:12:10  elliston
- *     Add ANSI Prototypes.  CR #9523.
- *     [1993/08/11  03:33:11  elliston]
- * 
- * Revision 1.1.2.2  1993/07/27  18:27:12  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:11:28  elliston]
- * 
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_EXAMINE_H_
-#define        _DDB_DB_EXAMINE_H_
-
-#include <machine/db_machdep.h>
-#include <kern/task.h>
-
-/* Prototypes for functions exported by this module.
- */
-
-void db_examine_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_examine_forward(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_examine_backward(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_examine(
-       db_addr_t       addr,
-       char *          fmt,    /* format string */
-       int             count,  /* repeat count */
-       task_t          task);
-
-void db_print_cmd(void);
-
-void db_print_loc(
-       db_addr_t       loc,
-       task_t          task);
-
-void
-db_print_inst(
-       db_addr_t       loc,
-       task_t          task);
-
-void db_print_loc_and_inst(
-       db_addr_t       loc,
-       task_t          task);
-
-void db_search_cmd(void);
-
-void db_search(
-       db_addr_t       addr,
-       int             size,
-       db_expr_t       value,
-       db_expr_t       mask,
-       unsigned int    count,
-       task_t          task);
-
-#endif /* !_DDB_DB_EXAMINE_H_ */
diff --git a/osfmk/ddb/db_expr.c b/osfmk/ddb/db_expr.c
deleted file mode 100644 (file)
index 9c3962b..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-#include <ddb/db_access.h>
-#include <ddb/db_command.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <kern/task.h>
-
-
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-boolean_t db_term(db_expr_t *valuep);
-boolean_t db_unary(db_expr_t *valuep);
-boolean_t db_mult_expr(db_expr_t *valuep);
-boolean_t db_add_expr(db_expr_t *valuep);
-boolean_t db_shift_expr(db_expr_t *valuep);
-boolean_t db_logical_relation_expr(db_expr_t *valuep);
-boolean_t db_logical_and_expr(db_expr_t *valuep);
-boolean_t db_logical_or_expr(db_expr_t *valuep);
-
-
-/* try to interpret unknown symbols as hexadecimal constants */
-int db_allow_unprefixed_hexa = 1;
-
-boolean_t
-db_term(db_expr_t *valuep)
-{
-       int     t;
-       boolean_t valid_symbol = FALSE;
-       boolean_t valid_hexa = FALSE;
-
-       switch(t = db_read_token()) {
-       case tIDENT:
-           if (db_value_of_name(db_tok_string, valuep)) {
-               valid_symbol = TRUE;
-           }
-           if (db_allow_unprefixed_hexa && db_radix == 16 &&
-               db_tok_string[0]) {
-               char *cp;
-               db_expr_t value;
-                   
-               value = 0;
-               valid_hexa = TRUE;
-               for (cp = db_tok_string; *cp; cp++) {
-                   if (*cp >= 'a' && *cp <= 'f') {
-                       value = value * 16 + 10 + (*cp - 'a');
-                   } else if (*cp >= 'A' && *cp <= 'F') {
-                       value = value * 16 + 10 + (*cp - 'A');
-                   } else if (*cp >= '0' && *cp <= '9') {
-                       value = value * 16 + (*cp - '0');
-                   } else {
-                       valid_hexa = FALSE;
-                       break;
-                   }
-               }
-               if (valid_hexa) {
-                   if (valid_symbol) {
-                       db_printf("Ambiguous constant %x used as a symbol\n",
-                                 value);
-                   } else {
-                       *valuep = value;
-                   }   
-               }
-           }
-           if (!valid_symbol && !valid_hexa) {
-               db_printf("Symbol \"%s\" not found\n", db_tok_string);
-               db_error(0);
-               /*NOTREACHED*/
-           }
-           return (TRUE);
-       case tNUMBER:
-           *valuep = /*(db_expr_t)*/db_tok_number;
-           return (TRUE);
-       case tDOT:
-           *valuep = (db_expr_t)db_dot;
-           return (TRUE);
-       case tDOTDOT:
-           *valuep = (db_expr_t)db_prev;
-           return (TRUE);
-       case tPLUS:
-           *valuep = (db_expr_t) db_next;
-           return (TRUE);
-       case tQUOTE:
-           *valuep = (db_expr_t)db_last_addr;
-           return (TRUE);
-       case tDOLLAR:
-           if (!db_get_variable(valuep))
-               return (FALSE);
-           return (TRUE);
-       case tLPAREN:
-           if (!db_expression(valuep)) {
-               db_error("Unmached ()s\n");
-               /*NOTREACHED*/
-           }
-           t = db_read_token();
-           if (t != tRPAREN) {
-               db_printf("')' expected at \"%s...\"\n", db_tok_string);
-               db_error(0);
-               /*NOTREACHED*/
-           }
-           return (TRUE);
-       case tSTRING:
-           {
-                   static int db_tok_offset = 0;
-                   char *sp, *cp;
-
-                   sp = (char *)db_tok_string + db_tok_offset;
-                   *valuep = *(int *)sp;
-                   for (cp = sp;
-                        *cp && cp < sp + sizeof (int);
-                        cp++);
-                   if (cp == sp + sizeof (int) && *cp) {
-                           db_tok_offset += sizeof (int);
-                           db_unread_token(t);
-                   } else {
-                           db_tok_offset = 0;
-                   }
-                   return (TRUE);
-           }
-       default:
-           db_unread_token(t);
-           return (FALSE);
-       }
-}
-
-int
-db_size_option(
-       char            *modif,
-       boolean_t       *u_option,
-       boolean_t       *t_option)
-{
-       register  char *p;
-       int       size = sizeof(int);
-
-       *u_option = FALSE;
-       *t_option = FALSE;
-       for (p = modif; *p; p++) {
-           switch(*p) {
-           case 'b':
-               size = sizeof(char);
-               break;
-           case 'h':
-               size = sizeof(short);
-               break;
-           case 'l':
-               size = sizeof(long);
-               break;
-           case 'u':
-               *u_option = TRUE;
-               break;
-           case 't':
-               *t_option = TRUE;
-               break;
-           }
-       }
-       return(size);
-}
-
-boolean_t
-db_unary(db_expr_t *valuep)
-{
-       int       t;
-       int       size;
-       boolean_t u_opt, t_opt;
-       task_t    task;
-
-       t = db_read_token();
-       if (t == tMINUS) {
-           if (!db_unary(valuep)) {
-               db_error("Expression syntax error after '-'\n");
-               /*NOTREACHED*/
-           }
-           *valuep = -*valuep;
-           return (TRUE);
-       }
-       if (t == tSTAR) {
-           /* indirection */
-           if (!db_unary(valuep)) {
-               db_error("Expression syntax error after '*'\n");
-               /*NOTREACHED*/
-           }
-           task = TASK_NULL;
-           size = sizeof(db_addr_t);
-           u_opt = FALSE;
-           t = db_read_token();
-           if (t == tIDENT && db_tok_string[0] == ':') {
-               size = db_size_option(&db_tok_string[1], &u_opt, &t_opt);
-               if (t_opt)
-                   task = db_default_task;
-           } else
-               db_unread_token(t);
-           *valuep = db_get_task_value((db_addr_t)*valuep, size, !u_opt, task);
-           return (TRUE);
-       }
-       if (t == tEXCL) {
-           if (!db_unary(valuep)) {
-               db_error("Expression syntax error after '!'\n");
-               /*NOTREACHED*/
-           }
-           *valuep = (!(*valuep));
-           return (TRUE);
-       }
-       db_unread_token(t);
-       return (db_term(valuep));
-}
-
-boolean_t
-db_mult_expr(db_expr_t *valuep)
-{
-       db_expr_t       lhs, rhs;
-       int             t;
-       char            c;
-
-       if (!db_unary(&lhs))
-           return (FALSE);
-
-       t = db_read_token();
-       while (t == tSTAR || t == tSLASH || t == tPCT || t == tHASH
-               || t == tBIT_AND) {
-           c = db_tok_string[0];
-           if (!db_term(&rhs)) {
-               db_printf("Expression syntax error after '%c'\n", c);
-               db_error(0);
-               /*NOTREACHED*/
-           }
-           switch(t) {
-           case tSTAR:
-               lhs *= rhs;
-               break;
-           case tBIT_AND:
-               lhs &= rhs;
-               break;
-           default:
-               if (rhs == 0) {
-                   db_error("Divide by 0\n");
-                   /*NOTREACHED*/
-               }
-               if (t == tSLASH)
-                   lhs /= rhs;
-               else if (t == tPCT)
-                   lhs %= rhs;
-               else
-                   lhs = ((lhs+rhs-1)/rhs)*rhs;
-           }
-           t = db_read_token();
-       }
-       db_unread_token(t);
-       *valuep = lhs;
-       return (TRUE);
-}
-
-boolean_t
-db_add_expr(db_expr_t *valuep)
-{
-       db_expr_t       lhs, rhs;
-       int             t;
-       char            c;
-
-       if (!db_mult_expr(&lhs))
-           return (FALSE);
-
-       t = db_read_token();
-       while (t == tPLUS || t == tMINUS || t == tBIT_OR) {
-           c = db_tok_string[0];
-           if (!db_mult_expr(&rhs)) {
-               db_printf("Expression syntax error after '%c'\n", c);
-               db_error(0);
-               /*NOTREACHED*/
-           }
-           if (t == tPLUS)
-               lhs += rhs;
-           else if (t == tMINUS)
-               lhs -= rhs;
-           else
-               lhs |= rhs;
-           t = db_read_token();
-       }
-       db_unread_token(t);
-       *valuep = lhs;
-       return (TRUE);
-}
-
-boolean_t
-db_shift_expr(db_expr_t *valuep)
-{
-       db_expr_t       lhs, rhs;
-       int             t;
-
-       if (!db_add_expr(&lhs))
-           return (FALSE);
-
-       t = db_read_token();
-       while (t == tSHIFT_L || t == tSHIFT_R) {
-           if (!db_add_expr(&rhs)) {
-               db_printf("Expression syntax error after \"%s\"\n",
-                       (t == tSHIFT_L)? "<<": ">>");
-               db_error(0);
-               /*NOTREACHED*/
-           }
-           if ((int64_t)rhs < 0) {
-               db_error("Negative shift amount\n");
-               /*NOTREACHED*/
-           }
-           if (t == tSHIFT_L)
-               lhs <<= rhs;
-           else {
-               /* Shift right is unsigned */
-               lhs = (uint64_t) lhs >> rhs;
-           }
-           t = db_read_token();
-       }
-       db_unread_token(t);
-       *valuep = lhs;
-       return (TRUE);
-}
-
-boolean_t
-db_logical_relation_expr(db_expr_t *valuep)
-{
-       db_expr_t       lhs, rhs;
-       int             t;
-       char            op[3];
-
-       if (!db_shift_expr(&lhs))
-           return(FALSE);
-
-       t = db_read_token();
-       while (t == tLOG_EQ || t == tLOG_NOT_EQ
-               || t == tGREATER || t == tGREATER_EQ
-               || t == tLESS || t == tLESS_EQ) {
-           op[0] = db_tok_string[0];
-           op[1] = db_tok_string[1];
-           op[2] = 0;
-           if (!db_shift_expr(&rhs)) {
-               db_printf("Expression syntax error after \"%s\"\n", op);
-               db_error(0);
-               /*NOTREACHED*/
-           }
-           switch(t) {
-           case tLOG_EQ:
-               lhs = (lhs == rhs);
-               break;
-           case tLOG_NOT_EQ:
-               lhs = (lhs != rhs);
-               break;
-           case tGREATER:
-               lhs = (lhs > rhs);
-               break;
-           case tGREATER_EQ:
-               lhs = (lhs >= rhs);
-               break;
-           case tLESS:
-               lhs = (lhs < rhs);
-               break;
-           case tLESS_EQ:
-               lhs = (lhs <= rhs);
-               break;
-           }
-           t = db_read_token();
-       }
-       db_unread_token(t);
-       *valuep = lhs;
-       return (TRUE);
-}
-
-boolean_t
-db_logical_and_expr(db_expr_t *valuep)
-{
-       db_expr_t       lhs, rhs;
-       int             t;
-
-       if (!db_logical_relation_expr(&lhs))
-           return(FALSE);
-
-       t = db_read_token();
-       while (t == tLOG_AND) {
-           if (!db_logical_relation_expr(&rhs)) {
-               db_error("Expression syntax error after \"&&\"\n");
-               /*NOTREACHED*/
-           }
-           lhs = (lhs && rhs);
-           t = db_read_token();
-       }
-       db_unread_token(t);
-       *valuep = lhs;
-       return (TRUE);
-}
-
-boolean_t
-db_logical_or_expr(db_expr_t *valuep)
-{
-       db_expr_t       lhs, rhs;
-       int             t;
-
-       if (!db_logical_and_expr(&lhs))
-           return(FALSE);
-
-       t = db_read_token();
-       while (t == tLOG_OR) {
-           if (!db_logical_and_expr(&rhs)) {
-               db_error("Expression syntax error after \"||\"\n");
-               /*NOTREACHED*/
-           }
-           lhs = (lhs || rhs);
-           t = db_read_token();
-       }
-       db_unread_token(t);
-       *valuep = lhs;
-       return (TRUE);
-}
-
-int
-db_expression(db_expr_t *valuep)
-{
-       return (db_logical_or_expr(valuep));
-}
diff --git a/osfmk/ddb/db_expr.h b/osfmk/ddb/db_expr.h
deleted file mode 100644 (file)
index 080e1a6..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.6.1  1994/09/23  01:19:18  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:09:57  ezf]
- *
- * Revision 1.1.2.3  1993/09/17  21:34:35  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:14  robert]
- * 
- * Revision 1.1.2.2  1993/07/27  18:27:21  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:11:42  elliston]
- * 
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_EXPR_H_
-#define        _DDB_DB_EXPR_H_
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-
-
-/* Prototypes for functions exported by this module.
- */
-
-int db_size_option(
-       char            *modif,
-       boolean_t       *u_option,
-       boolean_t       *t_option);
-
-int db_expression(db_expr_t *valuep);
-
-#endif /* !_DDB_DB_EXPR_H_ */
diff --git a/osfmk/ddb/db_ext_symtab.c b/osfmk/ddb/db_ext_symtab.c
deleted file mode 100644 (file)
index 4cb7c02..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-/*
- */
-#include <mach_kdb.h>
-#include <mach_debug.h>
-
-#include <mach/vm_map.h>
-#include <mach/vm_param.h>
-#include <mach/std_types.h>
-#include <mach/mach_types.h>
-#include <mach/host_priv_server.h>  /* prototype */
-
-#if MACH_KDB && MACH_DEBUG
-#include <vm/vm_map.h>
-#include <vm/vm_kern.h>
-#include <kern/host.h>
-#include <kern/task.h>
-#include <ddb/db_sym.h>
-#endif
-
-/*
- *     Loads a symbol table for an external file into the kernel debugger.
- *     The symbol table data is an array of characters.  It is assumed that
- *     the caller and the kernel debugger agree on its format.
- *     This has never and will never be supported on MacOS X. The only reason I don't remove
- *     it entirely is that it is an exported symbol.
- */
-kern_return_t
-host_load_symbol_table(
-       __unused host_priv_t                    host_priv,
-       __unused task_t                 task,
-       __unused char *                 name,
-       __unused pointer_t                      symtab,
-       __unused mach_msg_type_number_t symtab_count)
-{
-        return KERN_FAILURE;
-}
diff --git a/osfmk/ddb/db_input.c b/osfmk/ddb/db_input.c
deleted file mode 100644 (file)
index 650f05b..0000000
+++ /dev/null
@@ -1,821 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.3.10.2  1994/09/23  01:19:37  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:10:05  ezf]
- *
- * Revision 1.3.10.1  1994/06/11  21:11:48  bolinger
- *     Merge up to NMK17.2.
- *     [1994/06/11  20:01:41  bolinger]
- * 
- * Revision 1.3.8.2  1994/02/11  14:21:41  paire
- *     Added string.h header file for strlen declaration.
- *     [94/02/09            paire]
- * 
- * Revision 1.3.8.1  1994/02/08  10:57:55  bernadat
- *     Added db_auto_completion variable.
- *     [93/08/17            paire]
- * 
- *     Added support of symbol completion by typing '\t'.
- *     [93/08/14            paire]
- *     [94/02/07            bernadat]
- * 
- * Revision 1.3.2.4  1993/08/11  20:37:51  elliston
- *     Add ANSI Prototypes.  CR #9523.
- *     [1993/08/11  03:33:21  elliston]
- * 
- * Revision 1.3.2.3  1993/07/27  18:27:30  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:12:01  elliston]
- * 
- * Revision 1.3.2.2  1993/06/09  02:20:13  gm
- *     CR9176 - ANSI C violations: trailing tokens on CPP
- *     directives, extra semicolons after decl_ ..., asm keywords
- *     [1993/06/07  18:57:14  jeffc]
- * 
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:56:26  jeffc]
- * 
- * Revision 1.3  1993/04/19  16:02:17  devrcs
- *     Replaced ^R (redraw) with ^L [barbou@gr.osf.org]
- * 
- *     Added ^R and ^S commands for history search commands
- *     ^U does not erase end of the line anymore. (only erases
- *     from the beginning of the line to current position).
- *     [barbou@gr.osf.org]
- * 
- *     ^C now erases the entire line. [barbou@gr.osf.org]
- *     [92/12/03            bernadat]
- * 
- *     Fixed history management: Do not store repeated typed
- *     command. Null terminate current command in case it is a
- *     substring of the last command.
- *     [92/10/02            bernadat]
- * 
- * Revision 1.2  1992/11/25  01:04:24  robert
- *     integrate changes for norma_14 below
- * 
- *     Philippe Bernadat (bernadat) at gr.osf.org 02-Oct-92
- *     Fixed history management: Do not store repeated typed
- *     command. Null terminate current command in case it is a
- *     substring of the last command.
- *     [1992/11/20  00:56:07  robert]
- * 
- *     integrate changes below for norma_14
- *     [1992/11/13  19:21:34  robert]
- * 
- * Revision 1.1  1992/09/30  02:01:08  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.7.3.2  92/09/15  17:14:26  jeffreyh
- *     Fixed history code. (Only one char. out of 2 was checked to
- *     compare to last command)
- *     [barbou@gr.osf.org]
- * 
- * Revision 2.7.3.1  92/03/03  16:13:30  jeffreyh
- *     Pick up changes from TRUNK
- *     [92/02/26  10:59:36  jeffreyh]
- * 
- * Revision 2.8  92/02/19  15:07:44  elf
- *     Added delete_line (Ctrl-U).
- *     [92/02/17            kivinen]
- * 
- *     Added command line history. Ctrl-P = previous, Ctrl-N = next. If
- *     DB_HISTORY_SIZE is 0 then command history is disabled.
- *     [92/02/17            kivinen]
- * 
- * Revision 2.7  91/10/09  16:00:03  af
- *      Revision 2.6.2.1  91/10/05  13:06:12  jeffreyh
- *             Fixed incorrect db_lbuf_end setting.
- *             [91/08/29            tak]
- * 
- * Revision 2.6.2.1  91/10/05  13:06:12  jeffreyh
- *     Fixed incorrect db_lbuf_end setting.
- *     [91/08/29            tak]
- * 
- * Revision 2.6  91/07/09  23:15:49  danner
- *     Add include of machine/db_machdep.h to allow machine-specific
- *      overrides via defines.
- *     [91/07/08            danner]
- *
- * Revision 2.5  91/05/14  15:34:03  mrt
- *     Correcting copyright
- * 
- * Revision 2.4  91/02/14  14:41:53  mrt
- *     Add input line editing.
- *     [90/11/11            dbg]
- * 
- * Revision 2.3  91/02/05  17:06:32  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:18:13  mrt]
- * 
- * Revision 2.2  90/08/27  21:51:03  dbg
- *     Reduce lint.
- *     [90/08/07            dbg]
- *     Created.
- *     [90/07/25            dbg]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-#include <string.h>
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-#include <kern/misc_protos.h>
-#include <ddb/db_output.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_command.h>
-#include <ddb/db_input.h>
-#include <ddb/db_sym.h>
-
-#ifndef DB_HISTORY_SIZE
-#define DB_HISTORY_SIZE 4000
-#endif /* DB_HISTORY_SIZE */
-
-/*
- * Character input and editing.
- */
-
-/*
- * We don't track output position while editing input,
- * since input always ends with a new-line.  We just
- * reset the line position at the end.
- */
-char * db_lbuf_start;  /* start of input line buffer */
-char * db_lbuf_end;    /* end of input line buffer */
-char * db_lc;          /* current character */
-char * db_le;          /* one past last character */
-int    db_completion;  /* number of incomplete symbols matched */
-int    db_auto_completion = 10; /* number of line to display without asking */
-#if DB_HISTORY_SIZE != 0
-char    db_history[DB_HISTORY_SIZE];   /* start of history buffer */
-int     db_history_size = DB_HISTORY_SIZE;/* size of history buffer */
-char *  db_history_curr = db_history;  /* start of current line */
-char *  db_history_last = db_history;  /* start of last line */
-char *  db_history_prev = (char *) 0;  /* start of previous line */
-int    db_hist_unmodified = 0;         /* unmodified line from history */
-int    db_hist_search = 0;             /* are we in hist search mode ? */
-char   db_hist_search_string[DB_LEX_LINE_SIZE];/* the string to look for */
-int    db_hist_ignore_dups = 0;        /* don't duplicate commands in hist */
-#endif
-       
-#define        CTRL(c)         ((c) & 0x1f)
-#define        isspace(c)      ((c) == ' ' || (c) == '\t')
-#define        BLANK           ' '
-#define        BACKUP          '\b'
-
-
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-void db_putstring(const char *s, int count);
-
-void db_putnchars(
-       int     c,
-       int     count);
-
-void db_delete(
-       int     n,
-       int     bwd);
-
-void db_delete_line(void);
-
-boolean_t db_hist_substring(
-       char    *string,
-       char    *substring);
-
-boolean_t db_inputchar(int c);
-
-extern jmp_buf_t       *db_recover;
-
-void
-db_putstring(const char *s, int count)
-{
-       while (--count >= 0)
-           cnputc(*s++);
-}
-
-void
-db_putnchars(
-       int     c,
-       int     count)
-{
-       while (--count >= 0)
-           cnputc(c);
-}
-
-/*
- * Delete N characters, forward or backward
- */
-#define        DEL_FWD         0
-#define        DEL_BWD         1
-void
-db_delete(
-       int     n,
-       int     bwd)
-{
-       register char *p;
-
-       if (bwd) {
-           db_lc -= n;
-           db_putnchars(BACKUP, n);
-       }
-       for (p = db_lc; p < db_le-n; p++) {
-           *p = *(p+n);
-           cnputc(*p);
-       }
-       db_putnchars(BLANK, n);
-       db_putnchars(BACKUP, db_le - db_lc);
-       db_le -= n;
-}
-
-void
-db_delete_line(void)
-{
-       db_delete(db_le - db_lc, DEL_FWD);
-       db_delete(db_lc - db_lbuf_start, DEL_BWD);
-       db_le = db_lc = db_lbuf_start;
-}
-
-#if DB_HISTORY_SIZE != 0
-#define INC_DB_CURR() \
-    do { \
-            db_history_curr++; \
-            if (db_history_curr > \
-                db_history + db_history_size - 1) \
-                    db_history_curr = db_history; \
-       } while (0)
-#define DEC_DB_CURR() \
-    do { \
-            db_history_curr--; \
-            if (db_history_curr < db_history) \
-                db_history_curr = db_history + \
-                db_history_size - 1; \
-       } while (0)
-#endif
-               
-/* returs TRUE if "substring" is a substring of "string" */
-boolean_t
-db_hist_substring(
-       char    *string,
-       char    *substring)
-{
-       register char *cp1, *cp2;
-
-       cp1 = string;
-       while (*cp1)
-               cp1++;
-       cp2 = substring;
-       while (*cp2)
-               cp2++;
-
-       while (cp2 > substring) {
-               cp1--; cp2--;
-       }
-       
-       while (cp1 >= string) {
-               register char *cp3;
-
-               cp2 = substring;
-               cp3 = cp1;
-               while (*cp2 && *cp2 == *cp3) {
-                       cp2++; cp3++;
-               }
-               if (*cp2 == '\0') {
-                       return TRUE;
-               }
-               cp1--;
-       }
-       return FALSE;
-}
-
-/* returns TRUE at end-of-line */
-boolean_t
-db_inputchar(int c)
-{
-       char *sym;
-       char *start;
-       char *restart;
-       jmp_buf_t db_jmpbuf;
-       jmp_buf_t *local_prev;
-       char *p;
-       int len;
-
-       switch(db_completion) {
-       case -1:
-           db_putchar('\n');
-           local_prev = db_recover;
-           if (_setjmp(db_recover = &db_jmpbuf) == 0 &&
-               (c == 'y' || c == ' ' || c == '\t'))
-                   db_print_completion(db_tok_string);
-           db_recover = local_prev;
-           db_completion = 0;
-           db_reset_more();
-           db_output_prompt();
-           if (db_le > db_lbuf_start) {
-                   for (start = db_lbuf_start; start < db_le; start++)
-                           db_putchar(*start);
-               db_putnchars(BACKUP, db_le - db_lc);
-           }
-           return(FALSE);
-
-       case 0:
-           break;
-
-       default:
-           if (c == '\t') {
-               db_printf("\nThere are %d possibilities. ", db_completion);
-               db_printf("Do you really wish to see them all [n] ? ");
-               db_force_whitespace();
-               db_completion = -1;
-               db_reset_more();
-               return(FALSE);
-           }
-           db_completion = 0;
-           break;
-       }
-
-       switch (c) {
-           case '\t':
-               /* symbol completion */
-               if (db_lc == db_lbuf_start || db_auto_completion == 0)
-                   break;
-               if (db_le == db_lbuf_end) {
-                   cnputc('\007');
-                   break;
-               }
-               start = db_lc - 1;
-               while (start >= db_lbuf_start &&
-                      ((*start >= 'A' && *start <= 'Z') ||
-                       (*start >= 'a' && *start <= 'z') ||
-                       (*start >= '0' && *start <= '9') ||
-                       *start == '_' || *start == ':'))
-                   start--;
-               if (start == db_lc - 1)
-                   break;
-               if (start > db_lbuf_start && *start == '$') {
-                   cnputc('\007');
-                   break;
-               }
-               sym = db_tok_string;
-               restart = ++start;
-               do {
-                   *sym++ = *start++;
-               } while (start != db_lc &&
-                        sym != db_tok_string + sizeof(db_tok_string));
-               if (sym == db_tok_string + sizeof(db_tok_string)) {
-                   cnputc('\007');
-                   break;
-               }
-               *sym = '\0';
-               db_completion = db_lookup_incomplete(db_tok_string,
-                                                    sizeof(db_tok_string));
-               if (db_completion == 0) {
-                   /* symbol unknown */
-                   cnputc('\007');
-                   break;
-               }
-
-               len = strlen(db_tok_string) - (start - restart);
-               if (db_completion == 1 &&
-                   (db_le == db_lc ||
-                    ((db_le > db_lc) && *db_lc != ' ')))
-                   len++;
-               for (p = db_le - 1; p >= db_lc; p--)
-                   *(p + len) = *p;
-               db_le += len;
-               for (sym = &db_tok_string[start - restart];
-                    *sym != '\0'; sym++)
-                   *db_lc++ = *sym;
-
-               if (db_completion == 1 || db_completion > db_auto_completion) {
-                   for (sym = &db_tok_string[start - restart];
-                        *sym != '\0'; sym++)
-                       cnputc(*sym);
-                   if (db_completion == 1) {
-                       if (db_le == db_lc ||
-                           ((db_le > db_lc) && *db_lc != ' ')) {
-                           cnputc(' ');
-                           *db_lc++ = ' ';
-                       }
-                       db_completion = 0;
-                   }
-                   db_putstring(db_lc, db_le - db_lc);
-                   db_putnchars(BACKUP, db_le - db_lc);
-               }
-
-               if (db_completion > 1) {
-                   cnputc('\007');
-                   if (db_completion <= db_auto_completion) {
-                       db_putchar('\n');
-                       db_print_completion(db_tok_string);
-                       db_completion = 0;
-                       db_reset_more();
-                       db_output_prompt();
-                       if (db_le > db_lbuf_start) {
-                           for (start = db_lbuf_start; start < db_le; start++)
-                               db_putchar(*start);
-                           db_putnchars(BACKUP, db_le - db_lc);
-                       }
-                   }
-               }
-               break;
-
-           case CTRL('b'):
-               /* back up one character */
-               if (db_lc > db_lbuf_start) {
-                   cnputc(BACKUP);
-                   db_lc--;
-               }
-               break;
-           case CTRL('f'):
-               /* forward one character */
-               if (db_lc < db_le) {
-                   cnputc(*db_lc);
-                   db_lc++;
-               }
-               break;
-           case CTRL('a'):
-               /* beginning of line */
-               while (db_lc > db_lbuf_start) {
-                   cnputc(BACKUP);
-                   db_lc--;
-               }
-               break;
-           case CTRL('e'):
-               /* end of line */
-               while (db_lc < db_le) {
-                   cnputc(*db_lc);
-                   db_lc++;
-               }
-               break;
-           case CTRL('h'):
-           case 0177:
-               /* erase previous character */
-               if (db_lc > db_lbuf_start)
-                   db_delete(1, DEL_BWD);
-               break;
-           case CTRL('d'):
-               /* erase next character */
-               if (db_lc < db_le)
-                   db_delete(1, DEL_FWD);
-               break;
-           case CTRL('k'):
-               /* delete to end of line */
-               if (db_lc < db_le)
-                   db_delete(db_le - db_lc, DEL_FWD);
-               break;
-           case CTRL('u'):
-               /* delete to beginning of line */
-               if (db_lc > db_lbuf_start)
-                   db_delete(db_lc - db_lbuf_start, DEL_BWD);
-               break;
-           case CTRL('t'):
-               /* twiddle last 2 characters */
-               if (db_lc >= db_lbuf_start + 2) {
-                   c = db_lc[-2];
-                   db_lc[-2] = db_lc[-1];
-                   db_lc[-1] = c;
-                   cnputc(BACKUP);
-                   cnputc(BACKUP);
-                   cnputc(db_lc[-2]);
-                   cnputc(db_lc[-1]);
-               }
-               break;
-           case CTRL('c'):
-           case CTRL('g'):
-               db_delete_line();
-#if DB_HISTORY_SIZE != 0
-               db_history_curr = db_history_last;
-               if (c == CTRL('g') && db_hist_search) {
-                       for (p = db_hist_search_string, db_le = db_lbuf_start;
-                            *p; ) {
-                               *db_le++ = *p++;
-                       }
-                       db_lc = db_le;
-                       *db_le = '\0';
-                       db_putstring(db_lbuf_start, db_le - db_lbuf_start);
-               }
-#endif
-               break;
-#if DB_HISTORY_SIZE != 0
-           case CTRL('r'):
-               if (db_hist_search++ == 0) {
-                       /* starting an history lookup */
-                       register char *cp1, *cp2;
-                       for (cp1 = db_lbuf_start, cp2 = db_hist_search_string;
-                            cp1 < db_le;
-                            cp1++, cp2++)
-                               *cp2 = *cp1;
-                       *cp2 = '\0';
-                       db_hist_search++;
-               }
-               /* FALL THROUGH */
-           case CTRL('p'):
-               {
-               char * old_history_curr = db_history_curr;
-
-               if (db_hist_unmodified++ == 0)
-                       db_hist_unmodified++;
-               DEC_DB_CURR();
-               while (db_history_curr != db_history_last) {
-                       DEC_DB_CURR();
-                       if (*db_history_curr == '\0') {
-                               INC_DB_CURR();
-                               if (db_hist_search <= 1) {
-                                       if (*db_history_curr == '\0')
-                                               cnputc('\007');
-                                       else
-                                               DEC_DB_CURR();
-                                       break;
-                               }
-                               if (*db_history_curr == '\0') {
-                                       cnputc('\007');
-                                       db_history_curr = old_history_curr;
-                                       DEC_DB_CURR();
-                                       break;
-                               }
-                               if (db_history_curr != db_history_last &&
-                                   db_hist_substring(db_history_curr,
-                                                     db_hist_search_string)) {
-                                       DEC_DB_CURR();
-                                       break;
-                               }
-                               DEC_DB_CURR();
-                       }
-               }
-               if (db_history_curr == db_history_last) {
-                       cnputc('\007');
-                       db_history_curr = old_history_curr;
-               } else {
-                       INC_DB_CURR();
-                       db_delete_line();
-                       for (p = db_history_curr, db_le = db_lbuf_start;
-                            *p; ) {
-                               *db_le++ = *p++;
-                               if (p == db_history + db_history_size) {
-                                       p = db_history;
-                               }
-                       }
-                       db_lc = db_le;
-                       *db_le = '\0';
-                       db_putstring(db_lbuf_start, db_le - db_lbuf_start);
-               }
-               break;
-               }
-           case CTRL('s'):
-               if (db_hist_search++ == 0) {
-                       /* starting an history lookup */
-                       register char *cp1, *cp2;
-                       for (cp1 = db_lbuf_start, cp2 = db_hist_search_string;
-                            cp1 < db_le;
-                            cp1++, cp2++)
-                               *cp2 = *cp1;
-                       *cp2 = '\0';
-                       db_hist_search++;
-               }
-               /* FALL THROUGH */
-           case CTRL('n'):
-               {
-               char *old_history_curr = db_history_curr;
-
-               if (db_hist_unmodified++ == 0)
-                       db_hist_unmodified++;
-               while (db_history_curr != db_history_last) {
-                       if (*db_history_curr == '\0') {
-                               if (db_hist_search <= 1)
-                                       break;
-                               INC_DB_CURR();
-                               if (db_history_curr != db_history_last &&
-                                   db_hist_substring(db_history_curr,
-                                                     db_hist_search_string)) {
-                                       DEC_DB_CURR();
-                                       break;
-                               }
-                               DEC_DB_CURR();
-                       }
-                       INC_DB_CURR();
-               }
-               if (db_history_curr != db_history_last) {
-                       INC_DB_CURR();
-                       if (db_history_curr != db_history_last) {
-                               db_delete_line();
-                               for (p = db_history_curr,
-                                    db_le = db_lbuf_start; *p;) {
-                                       *db_le++ = *p++;
-                                       if (p == db_history +
-                                           db_history_size) {
-                                               p = db_history;
-                                       }
-                               }
-                               db_lc = db_le;
-                               *db_le = '\0';
-                               db_putstring(db_lbuf_start,
-                                            db_le - db_lbuf_start);
-                       } else {
-                               cnputc('\007');
-                               db_history_curr = old_history_curr;
-                       }
-               } else {
-                       cnputc('\007');
-                       db_history_curr = old_history_curr;
-               }
-               break;
-               }
-#endif
-           /* refresh the command line */
-           case CTRL('l'):
-               db_putstring("^L\n", 3);
-               if (db_le > db_lbuf_start) {
-                       db_putstring(db_lbuf_start, db_le - db_lbuf_start);
-                       db_putnchars(BACKUP, db_le - db_lc);
-               }
-               break;
-           case '\n':
-           case '\r':
-#if DB_HISTORY_SIZE != 0
-               /* Check if it same than previous line */
-               if (db_history_prev) {
-                       char *pc;
-
-                       /* Is it unmodified */
-                       for (p = db_history_prev, pc = db_lbuf_start;
-                            pc != db_le && *p;) {
-                               if (*p != *pc)
-                                   break;
-                               if (++p == db_history + db_history_size) {
-                                       p = db_history;
-                               }
-                               if (++pc == db_history + db_history_size) {
-                                       pc = db_history;
-                               }
-                       }
-                       if (!*p && pc == db_le) {
-                               /* Repeted previous line, not saved */
-                               db_history_curr = db_history_last;
-                               *db_le++ = c;
-                               db_hist_search = 0;
-                               db_hist_unmodified = 0;
-                               return (TRUE);
-                       }
-               }
-               if (db_le != db_lbuf_start &&
-                   (db_hist_unmodified == 0 || !db_hist_ignore_dups)) {
-                       db_history_prev = db_history_last;
-                       for (p = db_lbuf_start; p != db_le; p++) {
-                               *db_history_last++ = *p;
-                               if (db_history_last == db_history +
-                                   db_history_size) {
-                                       db_history_last = db_history;
-                               }
-                       }
-                       *db_history_last++ = '\0';
-               }
-               db_history_curr = db_history_last;
-#endif
-               *db_le++ = c;
-               db_hist_search = 0;
-               db_hist_unmodified = 0;
-               return (TRUE);
-           default:
-               if (db_le == db_lbuf_end) {
-                   cnputc('\007');
-               }
-               else if (c >= ' ' && c <= '~') {
-                   for (p = db_le; p > db_lc; p--)
-                       *p = *(p-1);
-                   *db_lc++ = c;
-                   db_le++;
-                   cnputc(c);
-                   db_putstring(db_lc, db_le - db_lc);
-                   db_putnchars(BACKUP, db_le - db_lc);
-               }
-               break;
-       }
-       if (db_hist_search)
-               db_hist_search--;
-       if (db_hist_unmodified)
-               db_hist_unmodified--;
-       return (FALSE);
-}
-
-int
-db_readline(
-       char *  lstart,
-       int     lsize)
-{
-       db_force_whitespace();  /* synch output position */
-
-       db_lbuf_start = lstart;
-       db_lbuf_end   = lstart + lsize - 1;
-       db_lc = lstart;
-       db_le = lstart;
-
-       while (!db_inputchar(cngetc()))
-           continue;
-
-       db_putchar('\n');       /* synch output position */
-
-       *db_le = 0;
-       return (db_le - db_lbuf_start);
-}
-
-void
-db_check_interrupt(void)
-{
-       register int    c;
-
-       c = cnmaygetc();
-       switch (c) {
-           case -1:            /* no character */
-               return;
-
-           case CTRL('c'):
-               db_error((char *)0);
-               /*NOTREACHED*/
-
-           case CTRL('s'):
-               do {
-                   c = cnmaygetc();
-                   if (c == CTRL('c'))
-                       db_error((char *)0);
-               } while (c != CTRL('q'));
-               break;
-
-           default:
-               /* drop on floor */
-               break;
-       }
-}
diff --git a/osfmk/ddb/db_input.h b/osfmk/ddb/db_input.h
deleted file mode 100644 (file)
index 107bfcd..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.6.1  1994/09/23  01:19:48  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:10:10  ezf]
- *
- * Revision 1.1.2.3  1993/09/17  21:34:37  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:17  robert]
- * 
- * Revision 1.1.2.2  1993/07/27  18:27:36  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:12:08  elliston]
- * 
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_INPUT_H_
-#define        _DDB_DB_INPUT_H_
-
-/* Prototypes for functions exported by this module.
- */
-
-int db_readline(
-       char *  lstart,
-       int     lsize);
-
-void db_check_interrupt(void);
-
-#endif /* !_DDB_DB_INPUT_H_ */
diff --git a/osfmk/ddb/db_lex.c b/osfmk/ddb/db_lex.c
deleted file mode 100644 (file)
index 5097585..0000000
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.11.3  1996/01/09  19:15:49  devrcs
- *     Change 'register foo' to 'register int foo'.
- *     [1995/12/01  21:42:12  jfraser]
- *
- *     Merged '64-bit safe' changes from DEC alpha port.
- *     [1995/11/21  18:03:11  jfraser]
- *
- * Revision 1.1.11.2  1995/01/06  19:10:21  devrcs
- *     mk6 CR668 - 1.3b26 merge
- *     * Revision 1.1.4.6  1994/05/06  18:39:20  tmt
- *     Merged osc1.3dec/shared with osc1.3b19
- *     Merge Alpha changes into osc1.312b source code.
- *     String protos.
- *     64bit cleanup.
- *     Cleanup to quiet gcc warnings.
- *     * End1.3merge
- *     [1994/11/04  08:49:35  dwm]
- * 
- * Revision 1.1.11.1  1994/09/23  01:19:59  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:10:14  ezf]
- * 
- * Revision 1.1.4.4  1993/08/11  20:37:55  elliston
- *     Add ANSI Prototypes.  CR #9523.
- *     [1993/08/11  03:33:26  elliston]
- * 
- * Revision 1.1.4.3  1993/07/27  18:27:38  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:12:13  elliston]
- * 
- * Revision 1.1.4.2  1993/06/02  23:11:27  jeffc
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:56:32  jeffc]
- * 
- * Revision 1.1  1992/09/30  02:01:10  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.5  91/10/09  16:00:20  af
- *      Revision 2.4.3.1  91/10/05  13:06:25  jeffreyh
- *             Added relational operator tokens and string constant etc.
- *             Added input switching functions for macro and conditional command.
- *             Moved skip_to_eol() from db_command.c and added db_last_lp to print
- *               skipped input data as a warning message.
- *             Added last input repetition support to db_read_line.
- *             Changed db_lex() to always set db_tok_string for error message.
- *             [91/08/29            tak]
- * 
- * Revision 2.4.3.1  91/10/05  13:06:25  jeffreyh
- *     Added relational operator tokens and string constant etc.
- *     Added input switching functions for macro and conditional command.
- *     Moved skip_to_eol() from db_command.c and added db_last_lp to print
- *       skipped input data as a warning message.
- *     Added last input repetition support to db_read_line.
- *     Changed db_lex() to always set db_tok_string for error message.
- *     [91/08/29            tak]
- * 
- * Revision 2.4  91/05/14  15:34:23  mrt
- *     Correcting copyright
- *
- * Revision 2.3  91/02/05  17:06:36  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:18:20  mrt]
- * 
- * Revision 2.2  90/08/27  21:51:10  dbg
- *     Add 'dotdot' token.
- *     [90/08/22            dbg]
- * 
- *     Allow backslash to quote any character into an identifier.
- *     Allow colon in identifier for symbol table qualification.
- *     [90/08/16            dbg]
- *     Reduce lint.
- *     [90/08/07            dbg]
- *     Created.
- *     [90/07/25            dbg]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-/*
- * Lexical analyzer.
- */
-#include <string.h>                    /* For strlcpy(), strlcmp(), strlen() */
-#include <ddb/db_lex.h>
-#include <ddb/db_command.h>
-#include <ddb/db_input.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-
-char   db_line[DB_LEX_LINE_SIZE];
-char   db_last_line[DB_LEX_LINE_SIZE];
-const char *db_lp, *db_endlp;
-const char *db_last_lp;
-int    db_look_char = 0;
-db_expr_t db_look_token = 0;
-
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-void db_flush_line(void);
-void db_unread_char(int c);
-
-
-int
-db_read_line(const char *repeat_last)
-{
-       int     i;
-
-       i = db_readline(db_line, sizeof(db_line));
-       if (i == 0)
-           return (0); /* EOI */
-       if (repeat_last) {
-           if (strncmp(db_line, repeat_last, strlen(repeat_last)) == 0) {
-               strlcpy(db_line, db_last_line, DB_LEX_LINE_SIZE);
-               db_printf("%s", db_line);
-               i = strlen(db_line);
-           } else if (db_line[0] != '\n' && db_line[0] != 0)
-               strlcpy(db_last_line, db_line, DB_LEX_LINE_SIZE);
-       }
-       db_lp = db_line;
-       db_endlp = db_lp + i;
-       db_last_lp = db_lp;
-       db_look_char = 0;
-       db_look_token = 0;
-       return (i);
-}
-
-void
-db_flush_line(void)
-{
-       db_lp = db_line;
-       db_last_lp = db_lp;
-       db_endlp = db_line;
-}
-
-void
-db_switch_input(const char *buffer, int        size)
-{
-       db_lp = buffer;
-       db_last_lp = db_lp;
-       db_endlp = buffer + size;
-       db_look_char = 0;
-       db_look_token = 0;
-}
-
-void
-db_save_lex_context(register struct db_lex_context *lp)
-{
-       lp->l_ptr = db_lp;
-       lp->l_eptr = db_endlp;
-       lp->l_char = db_look_char;
-       lp->l_token = (int)db_look_token;
-}
-
-void
-db_restore_lex_context(register struct db_lex_context *lp)
-{
-       db_lp = lp->l_ptr;
-       db_last_lp = db_lp;
-       db_endlp = lp->l_eptr;
-       db_look_char = lp->l_char;
-       db_look_token = lp->l_token;
-}
-
-int
-db_read_char(void)
-{
-       int     c;
-
-       if (db_look_char != 0) {
-           c = db_look_char;
-           db_look_char = 0;
-       }
-       else if (db_lp >= db_endlp)
-           c = -1;
-       else 
-           c = *db_lp++;
-       return (c);
-}
-
-void
-db_unread_char(int c)
-{
-       db_look_char = c;
-}
-
-void
-db_unread_token(int t)
-{
-       db_look_token = t;
-}
-
-int
-db_read_token(void)
-{
-       int     t;
-
-       if (db_look_token) {
-           t = (int)db_look_token;
-           db_look_token = 0;
-       }
-       else {
-           db_last_lp = db_lp;
-           if (db_look_char)
-               db_last_lp--;
-           t = db_lex();
-       }
-       return (t);
-}
-
-db_expr_t db_tok_number;
-char   db_tok_string[TOK_STRING_SIZE];
-
-db_expr_t db_radix = 16;
-
-void
-db_flush_lex(void)
-{
-       db_flush_line();
-       db_look_char = 0;
-       db_look_token = 0;
-}
-
-#define        DB_DISP_SKIP    40              /* number of chars to display skip */
-
-void
-db_skip_to_eol(void)
-{
-       register int skip;
-       register int t;
-       register int n;
-       const char *p;
-
-       t = db_read_token();
-       p = db_last_lp;
-       for (skip = 0; t != tEOL && t != tSEMI_COLON && t != tEOF; skip++)
-           t = db_read_token();
-       if (t == tSEMI_COLON)
-           db_unread_token(t);
-       if (skip != 0) {
-           while (p < db_last_lp && (*p == ' ' || *p == '\t'))
-               p++;
-           db_printf("Warning: Skipped input data \"");
-           for (n = 0; n < DB_DISP_SKIP && p < db_last_lp; n++)
-               db_printf("%c", *p++);
-           if (n >= DB_DISP_SKIP)
-               db_printf("....");
-           db_printf("\"\n");
-       }
-}
-
-int
-db_lex(void)
-{
-       register char *cp;
-       register int c;
-
-       c = db_read_char();
-       while (c <= ' ' || c > '~') {
-           if (c == '\n' || c == -1)
-               return (tEOL);
-           c = db_read_char();
-       }
-
-       cp = db_tok_string;
-       *cp++ = c;
-
-       if (c >= '0' && c <= '9') {
-           /* number */
-           int r, digit;
-
-           if (c > '0')
-               r = (int)db_radix;
-           else {
-               c = db_read_char();
-               if (c == 'O' || c == 'o')
-                   r = 8;
-               else if (c == 'T' || c == 't')
-                   r = 10;
-               else if (c == 'X' || c == 'x')
-                   r = 16;
-               else {
-                   cp--;
-                   r = (int)db_radix;
-                   db_unread_char(c);
-               }
-               c = db_read_char();
-               *cp++ = c;
-           }
-           db_tok_number = 0;
-           for (;;) {
-               if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
-                   digit = c - '0';
-               else if (r == 16 && ((c >= 'A' && c <= 'F') ||
-                                    (c >= 'a' && c <= 'f'))) {
-                   if (c >= 'a')
-                       digit = c - 'a' + 10;
-                   else
-                       digit = c - 'A' + 10;
-               }
-               else
-                   break;
-               db_tok_number = db_tok_number * r + digit;
-               c = db_read_char();
-               if (cp < &db_tok_string[sizeof(db_tok_string)-1])
-                       *cp++ = c;
-           }
-           cp[-1] = 0;
-           if ((c >= '0' && c <= '9') ||
-               (c >= 'A' && c <= 'Z') ||
-               (c >= 'a' && c <= 'z') ||
-               (c == '_'))
-           {
-               db_printf("Bad character '%c' after number %s\n", 
-                               c, db_tok_string);
-               db_error(0);
-               db_flush_lex();
-               return (tEOF);
-           }
-           db_unread_char(c);
-           return (tNUMBER);
-       }
-       if ((c >= 'A' && c <= 'Z') ||
-           (c >= 'a' && c <= 'z') ||
-           c == '_' || c == '\\' || c == ':')
-       {
-           /* identifier */
-           if (c == '\\') {
-               c = db_read_char();
-               if (c == '\n' || c == -1)
-                   db_error("Bad '\\' at the end of line\n");
-               cp[-1] = c;
-           }
-           while (1) {
-               c = db_read_char();
-               if ((c >= 'A' && c <= 'Z') ||
-                   (c >= 'a' && c <= 'z') ||
-                   (c >= '0' && c <= '9') ||
-                   c == '_' || c == '\\' || c == ':' || c == '.')
-               {
-                   if (c == '\\') {
-                       c = db_read_char();
-                       if (c == '\n' || c == -1)
-                           db_error("Bad '\\' at the end of line\n");
-                   }
-                   *cp++ = c;
-                   if (cp == db_tok_string+sizeof(db_tok_string)) {
-                       db_error("String too long\n");
-                       db_flush_lex();
-                       return (tEOF);
-                   }
-                   continue;
-               }
-               else {
-                   *cp = '\0';
-                   break;
-               }
-           }
-           db_unread_char(c);
-           return (tIDENT);
-       }
-
-       *cp = 0;
-       switch (c) {
-           case '+':
-               return (tPLUS);
-           case '-':
-               return (tMINUS);
-           case '.':
-               c = db_read_char();
-               if (c == '.') {
-                   *cp++ = c;
-                   *cp = 0;
-                   return (tDOTDOT);
-               }
-               db_unread_char(c);
-               return (tDOT);
-           case '*':
-               return (tSTAR);
-           case '/':
-               return (tSLASH);
-           case '=':
-               c = db_read_char();
-               if (c == '=') {
-                   *cp++ = c;
-                   *cp = 0;
-                   return(tLOG_EQ);
-               }
-               db_unread_char(c);
-               return (tEQ);
-           case '%':
-               return (tPCT);
-           case '#':
-               return (tHASH);
-           case '(':
-               return (tLPAREN);
-           case ')':
-               return (tRPAREN);
-           case ',':
-               return (tCOMMA);
-           case '\'':
-               return (tQUOTE);
-           case '"':
-               /* string */
-               cp = db_tok_string;
-               c = db_read_char();
-               while (c != '"' && c > 0 && c != '\n') {
-                   if (cp >= &db_tok_string[sizeof(db_tok_string)-1]) {
-                       db_error("Too long string\n");
-                       db_flush_lex();
-                       return (tEOF);
-                   }
-                   if (c == '\\') {
-                       c = db_read_char();
-                       switch(c) {
-                       case 'n':
-                           c = '\n'; break;
-                       case 't':
-                           c = '\t'; break;
-                       case '\\':
-                       case '"':
-                           break;
-                       default:
-                           db_printf("Bad escape sequence '\\%c'\n", c);
-                           db_error(0);
-                           db_flush_lex();
-                           return (tEOF);
-                       }
-                   }
-                   *cp++ = c;
-                   c = db_read_char();
-               }
-               *cp = 0;
-               if (c != '"') {
-                   db_error("Non terminated string constant\n");
-                   db_flush_lex();
-                   return (tEOF);
-               }
-               return (tSTRING);
-           case '$':
-               return (tDOLLAR);
-           case '!':
-               c = db_read_char();
-               if (c == '=') {
-                   *cp++ = c;
-                   *cp = 0;
-                   return(tLOG_NOT_EQ);
-               }
-               db_unread_char(c);
-               return (tEXCL);
-           case '&':
-               c = db_read_char();
-               if (c == '&') {
-                   *cp++ = c;
-                   *cp = 0;
-                   return(tLOG_AND);
-               }
-               db_unread_char(c);
-               return(tBIT_AND);
-           case '|':
-               c = db_read_char();
-               if (c == '|') {
-                   *cp++ = c;
-                   *cp = 0;
-                   return(tLOG_OR);
-               }
-               db_unread_char(c);
-               return(tBIT_OR);
-           case '<':
-               c = db_read_char();
-               *cp++ = c;
-               *cp = 0;
-               if (c == '<')
-                   return (tSHIFT_L);
-               if (c == '=')
-                   return (tLESS_EQ);
-               cp[-1] = 0;
-               db_unread_char(c);
-               return(tLESS);
-               break;
-           case '>':
-               c = db_read_char();
-               *cp++ = c;
-               *cp = 0;
-               if (c == '>')
-                   return (tSHIFT_R);
-               if (c == '=')
-                   return (tGREATER_EQ);
-               cp[-1] = 0;
-               db_unread_char(c);
-               return (tGREATER);
-               break;
-           case ';':
-               return (tSEMI_COLON);
-           case '?':
-               return (tQUESTION);
-           case -1:
-               strlcpy(db_tok_string, "<EOL>", TOK_STRING_SIZE);
-               return (tEOF);
-       }
-       db_printf("Bad character '%c'\n", c);
-       db_flush_lex();
-       return (tEOF);
-}
diff --git a/osfmk/ddb/db_lex.h b/osfmk/ddb/db_lex.h
deleted file mode 100644 (file)
index 2b32724..0000000
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.11.2  1995/01/06  19:10:24  devrcs
- *     mk6 CR668 - 1.3b26 merge
- *     64bit cleanup
- *     [1994/10/14  03:39:54  dwm]
- *
- * Revision 1.1.11.1  1994/09/23  01:20:10  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:10:18  ezf]
- * 
- * Revision 1.1.4.3  1993/07/27  18:27:40  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:12:19  elliston]
- * 
- * Revision 1.1.4.2  1993/06/02  23:11:33  jeffc
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:56:37  jeffc]
- * 
- * Revision 1.1  1992/09/30  02:24:17  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.5  91/10/09  16:00:48  af
- *      Revision 2.4.3.1  91/10/05  13:06:34  jeffreyh
- *             Added db_lex_context structure and some routine declarations
- *               for macro and conditinal command.
- *             Added relational operator tokens etc. for condition expression.
- *             Changed TOK_STRING_SIZE from 120 to 64, and defined
- *               DB_LEX_LINE_SIZE as 256 which was previously embedded
- *               in db_lex.c as 120.
- *             [91/08/29            tak]
- *     Revision 2.4.1 91/07/15  09:30:00  tak
- *             Added db_lex_context for macro support
- *             Added some lexical constants to support logical expression etc.
- *             [91/05/15  13:55:00  tak]
- * 
- * Revision 2.4.3.1  91/10/05  13:06:34  jeffreyh
- *     Added db_lex_context structure and some routine declarations
- *       for macro and conditinal command.
- *     Added relational operator tokens etc. for condition expression.
- *     Changed TOK_STRING_SIZE from 120 to 64, and defined
- *       DB_LEX_LINE_SIZE as 256 which was previously embedded
- *       in db_lex.c as 120.
- *     [91/08/29            tak]
- * 
- * Revision 2.4.1 91/07/15  09:30:00  tak
- *     Added db_lex_context for macro support
- *     Added some lexical constants to support logical expression etc.
- *     [91/05/15  13:55:00  tak]
- *
- * Revision 2.4  91/05/14  15:34:38  mrt
- *     Correcting copyright
- * 
- * Revision 2.3  91/02/05  17:06:41  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:18:28  mrt]
- * 
- * Revision 2.2  90/08/27  21:51:16  dbg
- *     Add 'dotdot' token.
- *     [90/08/22            dbg]
- *     Export db_flush_lex.
- *     [90/08/07            dbg]
- *     Created.
- *     [90/07/25            dbg]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-/*
- * Lexical analyzer.
- */
-
-#ifndef        _DDB_DB_LEX_H_
-#define        _DDB_DB_LEX_H_
-
-#include <machine/db_machdep.h>          /* For db_expr_t */
-
-#define        TOK_STRING_SIZE         64 
-#define DB_LEX_LINE_SIZE       256
-
-struct db_lex_context {
-       int  l_char;            /* peek char */
-       int  l_token;           /* peek token */
-       const char *l_ptr;      /* line pointer */
-       const char *l_eptr;     /* line end pointer */
-};
-
-extern db_expr_t db_tok_number;
-extern char    db_tok_string[TOK_STRING_SIZE];
-extern db_expr_t db_radix;
-
-#define        tEOF            (-1)
-#define        tEOL            1
-#define        tNUMBER         2
-#define        tIDENT          3
-#define        tPLUS           4
-#define        tMINUS          5
-#define        tDOT            6
-#define        tSTAR           7
-#define        tSLASH          8
-#define        tEQ             9
-#define        tLPAREN         10
-#define        tRPAREN         11
-#define        tPCT            12
-#define        tHASH           13
-#define        tCOMMA          14
-#define        tQUOTE          15
-#define        tDOLLAR         16
-#define        tEXCL           17
-#define        tSHIFT_L        18
-#define        tSHIFT_R        19
-#define        tDOTDOT         20
-#define tSEMI_COLON    21
-#define tLOG_EQ                22
-#define tLOG_NOT_EQ    23
-#define tLESS          24
-#define tLESS_EQ       25
-#define tGREATER       26
-#define tGREATER_EQ    27
-#define tBIT_AND       28
-#define tBIT_OR                29
-#define tLOG_AND       30
-#define tLOG_OR                31
-#define tSTRING                32
-#define tQUESTION      33
-
-/* Prototypes for functions exported by this module.
- */
-int db_read_line(const char *);
-
-void db_switch_input(const char *, int);
-
-void db_save_lex_context(struct db_lex_context *lp);
-
-void db_restore_lex_context(struct db_lex_context *lp);
-
-int db_read_char(void);
-
-void db_unread_token(int t);
-
-int db_read_token(void);
-
-void db_flush_lex(void);
-
-void db_skip_to_eol(void);
-
-int db_lex(void);
-
-#endif /* !_DDB_DB_LEX_H_ */
diff --git a/osfmk/ddb/db_macro.c b/osfmk/ddb/db_macro.c
deleted file mode 100644 (file)
index 9e9e6cf..0000000
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-#include <kern/thread.h>
-#include <string.h>                    /* For strncmp(), strlcpy() */
-
-#include <machine/db_machdep.h>
-#include <ddb/db_command.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_macro.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-
-/*
- * debugger macro support
- */
-
-#define DB_NUSER_MACRO 10              /* max user macros */
-
-int            db_macro_free = DB_NUSER_MACRO;
-struct db_user_macro {
-       char    m_name[TOK_STRING_SIZE];
-       char    m_lbuf[DB_LEX_LINE_SIZE];
-       int     m_size;
-} db_user_macro[DB_NUSER_MACRO];
-
-int            db_macro_level = -1;
-db_expr_t      db_macro_args[DB_MACRO_LEVEL][DB_MACRO_NARGS];
-
-
-/* Prototypes for functions local to this file.
- */
-static struct db_user_macro *db_lookup_macro(char *name);
-
-
-static struct db_user_macro *
-db_lookup_macro(char *name)
-{
-       register struct db_user_macro *mp;
-
-       for (mp = db_user_macro; mp < &db_user_macro[DB_NUSER_MACRO]; mp++) {
-           if (mp->m_name[0] == 0)
-               continue;
-           if (strncmp(mp->m_name, name, TOK_STRING_SIZE) == 0)
-               return(mp);
-       }
-       return(0);
-}
-
-void
-db_def_macro_cmd(void)
-{
-       register char *p;
-       register int c;
-       register struct db_user_macro *mp, *ep;
-
-       if (db_read_token() != tIDENT) {
-           db_printf("Bad macro name \"%s\"\n", db_tok_string);
-           db_error(0);
-           /* NOTREACHED */
-       }
-       if ((mp = db_lookup_macro(db_tok_string)) == 0) {
-           if (db_macro_free <= 0)
-               db_error("Too many macros\n");
-               /* NOTREACHED */
-           ep = &db_user_macro[DB_NUSER_MACRO];
-           for (mp = db_user_macro; mp < ep && mp->m_name[0]; mp++);
-           if (mp >= ep)
-               db_error("ddb: internal error(macro)\n");
-               /* NOTREACHED */
-           db_macro_free--;
-           strlcpy(mp->m_name, db_tok_string, TOK_STRING_SIZE);
-       }
-       for (c = db_read_char(); c == ' ' || c == '\t'; c = db_read_char());
-       for (p = mp->m_lbuf; c > 0; c = db_read_char())
-           *p++ = c;
-       *p = 0;
-       mp->m_size = p - mp->m_lbuf;
-}
-
-void
-db_del_macro_cmd(void)
-{
-       struct db_user_macro *mp = NULL;
-
-       if (db_read_token() != tIDENT 
-           || (mp = db_lookup_macro(db_tok_string)) == 0) {
-           db_printf("No such macro \"%s\"\n", db_tok_string);
-           db_error(0);
-           /* NOTREACHED */
-       }
-       mp->m_name[0] = 0;
-       db_macro_free++;
-}
-
-void
-db_show_macro(void)
-{
-       register struct db_user_macro *mp;
-       int  t;
-       char *name = 0;
-
-       if ((t = db_read_token()) == tIDENT)
-           name = db_tok_string;
-       else
-           db_unread_token(t);
-       for (mp = db_user_macro; mp < &db_user_macro[DB_NUSER_MACRO]; mp++) {
-           if (mp->m_name[0] == 0)
-               continue;
-           if (name && strncmp(mp->m_name, name, TOK_STRING_SIZE))
-               continue;
-           db_printf("%s: %s", mp->m_name, mp->m_lbuf);
-       }
-}
-
-int
-db_exec_macro(char *name)
-{
-       register struct db_user_macro *mp;
-       register int n;
-
-       if ((mp = db_lookup_macro(name)) == 0)
-           return(-1);
-       if (db_macro_level+1 >= DB_MACRO_LEVEL) {
-           db_macro_level = -1;
-           db_error("Too many macro nest\n");
-           /* NOTREACHED */
-       }
-       for (n = 0;
-            n < DB_MACRO_NARGS && 
-            db_expression(&db_macro_args[db_macro_level+1][n]);
-            n++);
-       while (n < DB_MACRO_NARGS)
-           db_macro_args[db_macro_level+1][n++] = 0;
-       db_macro_level++;
-       db_exec_cmd_nest(mp->m_lbuf, mp->m_size);
-       db_macro_level--;
-       return(0);
-}
-
-int
-db_arg_variable(__unused struct db_variable *vp, db_expr_t *valuep, int flag,
-               db_var_aux_param_t ap)
-{
-       db_expr_t value;
-       char *name;
-       db_addr_t offset;
-
-       if (flag == DB_VAR_SHOW) {
-           value = db_macro_args[ap->hidden_level][ap->suffix[0]-1];
-           db_printf("%#lln", (unsigned long long)value);
-           db_find_xtrn_task_sym_and_offset(value, &name, &offset, TASK_NULL);
-           if (name != (char *)0 && offset <= db_maxoff && offset != value) {
-               db_printf("\t%s", name);
-               if (offset != 0)
-                   db_printf("+%#llr", (unsigned long long)offset);
-           }
-           return(0);
-       }
-
-       if (ap->level != 1 || ap->suffix[0] < 1 ||
-           ap->suffix[0] > DB_MACRO_NARGS) {
-           db_error("Bad $arg variable\n");
-           /* NOTREACHED */
-       }
-       if (flag == DB_VAR_GET)
-           *valuep = db_macro_args[db_macro_level][ap->suffix[0]-1];
-       else
-           db_macro_args[db_macro_level][ap->suffix[0]-1] = *valuep;
-       return(0);
-}
diff --git a/osfmk/ddb/db_macro.h b/osfmk/ddb/db_macro.h
deleted file mode 100644 (file)
index 2a21e44..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.6.1  1994/09/23  01:20:28  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:10:28  ezf]
- *
- * Revision 1.1.2.3  1993/09/17  21:34:39  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:20  robert]
- * 
- * Revision 1.1.2.2  1993/07/27  18:27:48  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:12:28  elliston]
- * 
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_MACRO_H_
-#define        _DDB_DB_MACRO_H_
-
-#include <machine/db_machdep.h>
-#include <ddb/db_variables.h>
-
-/* Prototypes for functions exported by this module.
- */
-void db_def_macro_cmd(void);
-
-void db_del_macro_cmd(void);
-
-void db_show_macro(void);
-
-int db_exec_macro(char *name);
-
-int db_arg_variable(
-       struct db_variable      *vp,
-       db_expr_t               *valuep,
-       int                     flag,
-       db_var_aux_param_t      ap);
-
-#endif /* !_DDB_DB_MACRO_H_ */
diff --git a/osfmk/ddb/db_output.c b/osfmk/ddb/db_output.c
deleted file mode 100644 (file)
index 69bfeea..0000000
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-/*
- * Printf and character output for debugger.
- */
-
-#include <mach/boolean.h>
-#include <kern/misc_protos.h>
-#include <stdarg.h>
-#include <machine/db_machdep.h>
-#include <ddb/db_command.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_input.h>
-#include <ddb/db_output.h>
-#include <ddb/db_task_thread.h>
-
-/*
- *     Character output - tracks position in line.
- *     To do this correctly, we should know how wide
- *     the output device is - then we could zero
- *     the line position when the output device wraps
- *     around to the start of the next line.
- *
- *     Instead, we count the number of spaces printed
- *     since the last printing character so that we
- *     don't print trailing spaces.  This avoids most
- *     of the wraparounds.
- */
-
-#ifndef        DB_MAX_LINE
-#define        DB_MAX_LINE             43      /* maximum line */
-#define DB_MAX_WIDTH           132     /* maximum width */
-#endif /* DB_MAX_LINE */
-
-#define DB_MIN_MAX_WIDTH       20      /* minimum max width */
-#define DB_MIN_MAX_LINE                3       /* minimum max line */
-#define CTRL(c)                        ((c) & 0xff)
-
-int    db_output_position = 0;         /* output column */
-int    db_output_line = 0;             /* output line number */
-int    db_last_non_space = 0;          /* last non-space character */
-int    db_last_gen_return = 0;         /* last character generated return */
-int    db_auto_wrap = 1;               /* auto wrap at end of line ? */
-int    db_tab_stop_width = 8;          /* how wide are tab stops? */
-#define        NEXT_TAB(i) \
-       ((((i) + db_tab_stop_width) / db_tab_stop_width) * db_tab_stop_width)
-int    db_max_line = DB_MAX_LINE;      /* output max lines */
-int    db_max_width = DB_MAX_WIDTH;    /* output line width */
-
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-static void db_more(void);
-void db_advance_output_position(int new_output_position,
-                               int blank);
-
-
-/*
- * Force pending whitespace.
- */
-void
-db_force_whitespace(void)
-{
-       register int last_print, next_tab;
-
-       last_print = db_last_non_space;
-       while (last_print < db_output_position) {
-           next_tab = NEXT_TAB(last_print);
-           if (next_tab <= db_output_position) {
-               cnputc('\t');
-               last_print = next_tab;
-           }
-           else {
-               cnputc(' ');
-               last_print++;
-           }
-       }
-       db_last_non_space = db_output_position;
-}
-
-void
-db_reset_more()
-{
-       db_output_line = 0;
-}
-
-static void
-db_more(void)
-{
-       const char *p;
-       boolean_t quit_output = FALSE;
-
-       for (p = "--db_more--"; *p; p++)
-           cnputc(*p);
-       switch(cngetc()) {
-       case ' ':
-           db_output_line = 0;
-           break;
-       case 'q':
-       case CTRL('c'):
-           db_output_line = 0;
-           quit_output = TRUE;
-           break;
-       default:
-           db_output_line--;
-           break;
-       }
-       p = "\b\b\b\b\b\b\b\b\b\b\b           \b\b\b\b\b\b\b\b\b\b\b";
-       while (*p)
-           cnputc(*p++);
-       if (quit_output) {
-           db_error((char *) 0);
-           /* NOTREACHED */
-       }
-}
-
-void
-db_advance_output_position(int new_output_position,
-                          int blank)
-{
-       if (db_max_width >= DB_MIN_MAX_WIDTH 
-           && new_output_position >= db_max_width) {
-               /* auto new line */
-               if (!db_auto_wrap || blank)
-                   cnputc('\n');
-               db_output_position = 0;
-               db_last_non_space = 0;
-               db_last_gen_return = 1;
-               db_output_line++;
-       } else {
-               db_output_position = new_output_position;
-       }
-}
-
-boolean_t
-db_reserve_output_position(int increment)
-{
-       if (db_max_width >= DB_MIN_MAX_WIDTH
-           && db_output_position + increment >= db_max_width) {
-               /* auto new line */
-               if (!db_auto_wrap || db_last_non_space != db_output_position)
-                   cnputc('\n');
-               db_output_position = 0;
-               db_last_non_space = 0;
-               db_last_gen_return = 1;
-               db_output_line++;
-               return TRUE;
-       }
-       return FALSE;
-}
-
-/*
- * Output character.  Buffer whitespace.
- */
-void
-db_putchar(char c)
-{
-       if (db_max_line >= DB_MIN_MAX_LINE && db_output_line >= db_max_line-1)
-           db_more();
-       if (c > ' ' && c <= '~') {
-           /*
-            * Printing character.
-            * If we have spaces to print, print them first.
-            * Use tabs if possible.
-            */
-           db_force_whitespace();
-           cnputc(c);
-           db_last_gen_return = 0;
-           db_advance_output_position(db_output_position+1, 0);
-           db_last_non_space = db_output_position;
-       }
-       else if (c == '\n') {
-           /* Return */
-           if (db_last_gen_return) {
-               db_last_gen_return = 0;
-           } else {
-               cnputc(c);
-               db_output_position = 0;
-               db_last_non_space = 0;
-               db_output_line++;
-               db_check_interrupt();
-           }
-       }
-       else if (c == '\t') {
-           /* assume tabs every 8 positions */
-           db_advance_output_position(NEXT_TAB(db_output_position), 1);
-       }
-       else if (c == ' ') {
-           /* space */
-           db_advance_output_position(db_output_position+1, 1);
-       }
-       else if (c == '\007') {
-           /* bell */
-           cnputc(c);
-       }
-       /* other characters are assumed non-printing */
-}
-
-/*
- * Return output position
- */
-int
-db_print_position(void)
-{
-       return (db_output_position);
-}
-
-/*
- * End line if too long.
- */
-void
-db_end_line(void)
-{
-       if (db_output_position >= db_max_width-1) {
-           /* auto new line */
-           if (!db_auto_wrap)
-               cnputc('\n');
-           db_output_position = 0;
-           db_last_non_space = 0;
-           db_last_gen_return = 1;
-           db_output_line++;
-       }
-}
-
-/*
- * Printing
- */
-
-void
-db_printf(const char *fmt, ...)
-{
-       va_list listp;
-
-       va_start(listp, fmt);
-       _doprnt(fmt, &listp, db_putchar, (int)db_radix);
-       va_end(listp);
-}
-
-/* alternate name */
-
-void
-kdbprintf(const char *fmt, ...)
-{
-       va_list listp;
-
-       va_start(listp, fmt);
-       _doprnt(fmt, &listp, db_putchar, (int)db_radix);
-       va_end(listp);
-}
-
-int    db_indent = 0;
-
-/*
- * Printing (to console) with indentation.
- */
-void
-iprintf(const char *fmt, ...)
-{
-       va_list listp;
-       register int i;
-
-       for (i = db_indent; i > 0; ){
-           if (i >= 8) {
-               kdbprintf("\t");
-               i -= 8;
-           }
-           else {
-               kdbprintf(" ");
-               i--;
-           }
-       }
-
-       va_start(listp, fmt);
-       _doprnt(fmt, &listp, db_putchar, (int)db_radix);
-       va_end(listp);
-}
-
-void
-db_output_prompt(void)
-{
-       db_printf("db%s", (db_default_act) ? "t": "");
-       db_printf("{%d}", cpu_number());
-       db_printf("> ");
-}
-
diff --git a/osfmk/ddb/db_output.h b/osfmk/ddb/db_output.h
deleted file mode 100644 (file)
index 3905264..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   8/90
- */
-
-/*
- * Printing routines for kernel debugger.
- */
-
-#ifndef        _DDB_DB_OUTPUT_H_
-#define        _DDB_DB_OUTPUT_H_
-
-#include <mach/boolean.h>
-
-extern int db_indent;
-
-/*
- * Prototypes for functions exported by this module.
- */
-void db_force_whitespace(void);
-void db_putchar(char c);
-int db_print_position(void);
-void db_end_line(void);
-void db_printf(const char *fmt, ...);
-void kdbprintf(const char *fmt, ...);
-void iprintf(const char *fmt, ...);
-boolean_t db_reserve_output_position(int len);
-void db_reset_more(void);
-void db_output_prompt(void);
-#endif /* !_DDB_DB_OUTPUT_H_ */
diff --git a/osfmk/ddb/db_print.c b/osfmk/ddb/db_print.c
deleted file mode 100644 (file)
index d773823..0000000
+++ /dev/null
@@ -1,931 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-/*
- * Miscellaneous printing.
- */
-#include <task_swapper.h>
-
-#include <string.h>                    /* For strlen() */
-#include <mach/port.h>
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <kern/queue.h>
-#include <kern/processor.h>
-#include <ipc/ipc_port.h>
-#include <ipc/ipc_space.h>
-#include <ipc/ipc_pset.h>
-#include <vm/vm_print.h>               /* for db_vm() */
-
-#include <machine/db_machdep.h>
-#include <machine/thread.h>
-
-#include <ddb/db_lex.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_print.h>
-
-#if    TASK_SWAPPER
-#include <kern/task_swap.h>
-#endif /* TASK_SWAPPER */
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-
-char *db_act_stat(
-       register thread_t       thr_act,
-       char                    *status);
-
-char *db_act_swap_stat(
-       register thread_t       thr_act,
-       char                    *status);
-
-void db_print_task(
-       task_t  task,
-       int     task_id,
-       int     flag);
-
-void db_reset_print_entry(
-       void);
-
-void db_print_one_entry(
-       ipc_entry_t     entry,
-       int             index,
-       mach_port_name_t        name,
-       boolean_t       is_pset,
-       ipc_space_t     space);
-
-int db_port_iterate(
-       thread_t        thr_act,
-       boolean_t       is_pset,
-       boolean_t       do_output);
-
-ipc_port_t db_lookup_port(
-       thread_t        thr_act,
-       int             id);
-
-void db_print_act(
-       thread_t        thr_act,
-       int             act_id,
-       int             flag);
-
-void db_print_space(
-       task_t  task,
-       int     task_id,
-       int     flag);
-
-void db_print_task_vm(
-       task_t          task,
-       int             task_id,
-       boolean_t       title,
-       char            *modif);
-
-void db_system_stats(void);
-
-
-void
-db_show_regs(db_expr_t addr, boolean_t have_addr, __unused db_expr_t count,
-            char *modif)
-{
-       register struct db_variable *regp;
-       db_expr_t       value;
-       db_addr_t       offset;
-       char *          name;
-       register int    i; 
-       struct db_var_aux_param aux_param;
-       task_t          task = TASK_NULL;
-
-       aux_param.modif = modif;
-       aux_param.thr_act = THREAD_NULL;
-       if (db_option(modif, 't')) {
-           if (have_addr) {
-               if (!db_check_act_address_valid((thread_t)(unsigned long)addr))
-                   return;
-               aux_param.thr_act = (thread_t)(unsigned long)addr;
-           } else
-               aux_param.thr_act = db_default_act;
-           if (aux_param.thr_act != THREAD_NULL)
-               task = aux_param.thr_act->task;
-       }
-       for (regp = db_regs; regp < db_eregs; regp++) {
-           if (regp->max_level > 1) {
-               db_printf("bad multi-suffixed register %s\n", regp->name);
-               continue;
-           }
-           aux_param.level = regp->max_level;
-           for (i = regp->low; i <= regp->high; i++) {
-               aux_param.suffix[0] = i;
-               db_read_write_variable(regp, &value, DB_VAR_GET, &aux_param);
-               if (regp->max_level > 0)
-                   db_printf("%s%d%*s", regp->name, i, 
-                               12-strlen(regp->name)-((i<10)?1:2), "");
-               else
-                   db_printf("%-12s", regp->name);
-               db_printf("%#*llN", 2+2*sizeof(db_expr_t), (unsigned long long)value);
-               db_find_xtrn_task_sym_and_offset((db_addr_t)value, &name, 
-                                                       &offset, task);
-               if (name != 0 && offset <= db_maxoff && offset != value) {
-                   db_printf("\t%s", name);
-                   if (offset != 0)
-                               db_printf("+%#llr", (unsigned long long)offset);
-               }
-               db_printf("\n");
-           }
-       }
-}
-
-#define OPTION_LONG            0x001           /* long print option */
-#define OPTION_USER            0x002           /* print ps-like stuff */
-#define OPTION_INDENT          0x100           /* print with indent */
-#define OPTION_THREAD_TITLE    0x200           /* print thread title */
-#define OPTION_TASK_TITLE      0x400           /* print thread title */
-
-#ifndef        DB_TASK_NAME
-#define DB_TASK_NAME(task)                     /* no task name */
-#define DB_TASK_NAME_TITLE     ""              /* no task name */
-#endif /* DB_TASK_NAME */
-
-#ifndef        db_act_fp_used
-#define db_act_fp_used(thr_act)        FALSE
-#endif
-
-char *
-db_act_stat(
-       register thread_t       thr_act,
-       char                    *status)
-{
-       register char *p = status;
-       
-       if (!thr_act->active) {
-               *p++ = 'D',
-               *p++ = 'y',
-               *p++ = 'i',
-               *p++ = 'n',
-               *p++ = 'g';
-               *p++ = ' ';
-       } else {
-               thread_t athread = thr_act;
-
-               *p++ = (athread->state & TH_RUN)  ? 'R' : '.';
-               *p++ = (athread->state & TH_WAIT) ? 'W' : '.';
-               *p++ = (athread->state & TH_SUSP) ? 'S' : '.';
-               *p++ = (!athread->kernel_stack) ? 'O' : '.';
-               *p++ = (athread->state & TH_UNINT) ? 'N' : '.';
-               /* show if the FPU has been used */
-               *p++ = db_act_fp_used(thr_act) ? 'F' : '.';
-       }
-       *p++ = 0;
-       return(status);
-}
-
-char *
-db_act_swap_stat(__unused thread_t thr_act, char *status)
-{
-       register char *p = status;
-       *p++ = 0;
-
-       return status;
-}
-
-const char *policy_list[] = { "TS", "RR", "??", "FF", "??", "??", "??", "BE"};
-
-void
-db_print_act(
-       thread_t        thr_act,
-       int             act_id,
-       int             flag)
-{
-       thread_t athread;
-       char status[8];
-       char swap_status[3];
-       const char *indent = "";
-       int      policy;
-
-       if (!thr_act) {
-           db_printf("db_print_act(NULL)!\n");
-           return;
-       }
-
-       athread = thr_act;
-       if (flag & OPTION_USER) {
-
-           if (flag & OPTION_LONG) {
-               if (flag & OPTION_INDENT)
-                   indent = "    ";
-               if (flag & OPTION_THREAD_TITLE) {
-                   db_printf("%s ID:   ACT     STAT  SW STACK    SHUTTLE", indent);
-                   db_printf("  SUS  PRI  WAIT_FUNC\n");
-               }
-               policy = ((athread && (athread->sched_mode == TH_MODE_TIMESHARE))? 1: 2);
-               db_printf("%s%3d%c %0*X %s %s %0*X %0*X %3d %3d/%s ",
-                   indent, act_id,
-                   (thr_act == current_thread())? '#': ':',
-                   2*sizeof(vm_offset_t), thr_act,
-                   db_act_stat(thr_act, status),
-                   db_act_swap_stat(thr_act, swap_status),
-                   2*sizeof(vm_offset_t), (athread ?athread->kernel_stack:0),
-                   2*sizeof(vm_offset_t), athread,
-                   thr_act->suspend_count,
-                   (athread ? athread->sched_pri : 999), /* XXX */
-                   policy_list[policy-1]);
-               if (athread) {
-                   /* no longer TH_SWAP, no continuation to print */
-                   if (athread->state & TH_WAIT)
-                       db_task_printsym((db_addr_t)athread->wait_event,
-                                               DB_STGY_ANY, kernel_task);
-               }
-               db_printf("\n");
-           } else {
-               if (act_id % 3 == 0) {
-                   if (flag & OPTION_INDENT)
-                       db_printf("\n    ");
-               } else
-                   db_printf(" ");
-               db_printf("%3d%c(%0*X,%s)", act_id, 
-                   (thr_act == current_thread())? '#': ':',
-                   2*sizeof(vm_offset_t), thr_act,
-                   db_act_stat(thr_act, status));
-           }
-       } else {
-           if (flag & OPTION_INDENT)
-               db_printf("            %3d (%0*X) ", act_id,
-                         2*sizeof(vm_offset_t), thr_act);
-           else
-               db_printf("(%0*X) ", 2*sizeof(vm_offset_t), thr_act);
-           if (athread) {
-               db_printf("%c%c%c%c%c",
-                       (athread->state & TH_RUN)  ? 'R' : ' ',
-                       (athread->state & TH_WAIT) ? 'W' : ' ',
-                       (athread->state & TH_SUSP) ? 'S' : ' ',
-                       (athread->state & TH_UNINT)? 'N' : ' ',
-                       db_act_fp_used(thr_act) ? 'F' : ' ');
-               if (!athread->kernel_stack) {
-                   if (athread->continuation) {
-                       db_printf("(");
-                       db_task_printsym((db_addr_t)(unsigned long)athread->continuation,
-                                               DB_STGY_ANY, kernel_task);
-                       db_printf(")");
-                   } else {
-                       db_printf("(handoff)");
-                   }
-               }
-               if (athread->state & TH_WAIT) {
-                   db_printf(" ");
-                   db_task_printsym((db_addr_t)athread->wait_event,
-                                               DB_STGY_ANY, kernel_task);
-               }
-           } else
-               db_printf("Empty");
-           db_printf("\n");
-       }
-}
-
-void
-db_print_task(
-       task_t  task,
-       int     task_id,
-       int     flag)
-{
-       thread_t thr_act;
-       int act_id;
-       char sstate;
-
-       if (flag & OPTION_USER) {
-           if (flag & OPTION_TASK_TITLE) {
-               db_printf(" ID: TASK     MAP      THD SUS PR SW %s", 
-                         DB_TASK_NAME_TITLE);
-               if ((flag & OPTION_LONG) == 0)
-                   db_printf("  ACTS");
-               db_printf("\n");
-           }
-#if    TASK_SWAPPER
-           switch ((int) task->swap_state) {
-               case TASK_SW_IN:
-                   sstate = 'I';
-                   break;
-               case TASK_SW_OUT:
-                   sstate = 'O';
-                   break;
-               case TASK_SW_GOING_OUT:
-                   sstate = 'G';
-                   break;
-               case TASK_SW_COMING_IN:
-                   sstate = 'C';
-                   break;
-               case TASK_SW_UNSWAPPABLE:
-                   sstate = 'U';
-                   break;
-               default:
-                   sstate = '?';
-                   break;
-           }
-#else  /* TASK_SWAPPER */
-           sstate = 'I';
-#endif /* TASK_SWAPPER */
-           /*** ??? fix me ***/
-           db_printf("%3d: %0*X %0*X %3d %3d %2d %c  ",
-                           task_id, 2*sizeof(vm_offset_t), task,
-                           2*sizeof(vm_offset_t), task->map,
-                           task->thread_count,
-                           task->suspend_count,
-                           task->priority,
-                           sstate);
-           DB_TASK_NAME(task);
-           if (flag & OPTION_LONG) {
-               if (flag & OPTION_TASK_TITLE)
-                   flag |= OPTION_THREAD_TITLE;
-               db_printf("\n");
-           } else if (task->thread_count <= 1)
-               flag &= ~OPTION_INDENT;
-           act_id = 0;
-           queue_iterate(&task->threads, thr_act, thread_t, task_threads) {
-               db_print_act(thr_act, act_id, flag);
-               flag &= ~OPTION_THREAD_TITLE;
-               act_id++;
-           }
-           if ((flag & OPTION_LONG) == 0)
-               db_printf("\n");
-       } else {
-           if (flag & OPTION_LONG) {
-               if (flag & OPTION_TASK_TITLE) {
-                   db_printf("    TASK        ACT\n");
-                   if (task->thread_count > 1)
-                       flag |= OPTION_THREAD_TITLE;
-               }
-           }
-           db_printf("%3d (%0*X): ", task_id, 2*sizeof(vm_offset_t), task);
-           if (task->thread_count == 0) {
-               db_printf("no threads\n");
-           } else {
-               if (task->thread_count > 1) {
-                   db_printf("%d threads: \n", task->thread_count);
-                   flag |= OPTION_INDENT;
-               } else
-                   flag &= ~OPTION_INDENT;
-               act_id = 0;
-               queue_iterate(&task->threads, thr_act,
-                             thread_t, task_threads) {
-                   db_print_act(thr_act, act_id++, flag);
-                   flag &= ~OPTION_THREAD_TITLE;
-               }
-           }
-       }
-}
-
-void
-db_print_space(task_t task, int task_id, __unused int flag)
-{
-       ipc_space_t space;
-       thread_t act = (thread_t)queue_first(&task->threads);
-       int count;
-
-       count = 0;
-       space = task->itk_space;
-       if (act)
-               count = db_port_iterate(act, FALSE, FALSE);
-       db_printf("%3d: %08x %08x %08x %sactive   %d\n",
-                 task_id, task, space, task->map,
-                 space->is_active? "":"!", count);
-}
-
-void
-db_print_task_vm(task_t task, int task_id, boolean_t title,
-                __unused char *modif)
-{
-       vm_map_t        map;
-       pmap_t          pmap;
-       vm_size_t       size;
-       long            resident;
-       long            wired;
-
-       if (title) {
-               db_printf("id     task      map     pmap  virtual  rss pg rss mem  wir pg wir mem\n");
-       }
-
-       map = task->map;
-       pmap = vm_map_pmap(map);
-
-       size = db_vm_map_total_size((unsigned long)map);
-       resident = pmap->stats.resident_count;
-       wired = pmap->stats.wired_count;
-
-       db_printf("%2d %08x %08x %08x %7dK  %6d %6dK  %6d %6dK\n",
-               task_id,
-               task,
-               map,
-               pmap,
-               size / 1024,
-               resident, (resident * PAGE_SIZE) / 1024,
-               wired, (wired * PAGE_SIZE) / 1024);
-}
-
-
-void
-db_show_one_task_vm(db_expr_t addr, boolean_t have_addr,
-                   __unused db_expr_t count, char *modif)
-{
-       thread_t        thread;
-       task_t          task;
-       int             task_id;
-
-       if (have_addr == FALSE) {
-               if ((thread = db_default_act) == THREAD_NULL) {
-                       if ((thread = current_thread()) == THREAD_NULL) {
-                               db_printf("no thread.\n");
-                               return;
-                       }
-               }
-               task = thread->task;
-       } else {
-               task = (task_t)(unsigned long)addr;
-       }
-
-       task_id = db_lookup_task(task);
-       if (task_id < 0) {
-               db_printf("0x%x is not a task_t\n", addr);
-               return;
-       }
-
-       db_print_task_vm(task, task_id, TRUE, modif);
-}
-
-void
-db_show_all_task_vm(__unused db_expr_t addr, __unused boolean_t have_addr,
-                   __unused db_expr_t count, char *modif)
-{
-       task_t          task;
-       int             task_id;
-       boolean_t       title = TRUE;
-
-       task_id = 0;
-       queue_iterate(&tasks, task, task_t, tasks) {
-               db_print_task_vm(task, task_id, title, modif);
-               title = FALSE;
-               task_id++;
-       }
-}
-
-void
-db_show_all_acts(__unused db_expr_t addr, __unused boolean_t have_addr,
-                __unused db_expr_t count, char *modif)
-{
-       task_t task;
-       int task_id;
-       int flag;
-
-       flag = OPTION_TASK_TITLE|OPTION_INDENT;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       task_id = 0;
-       queue_iterate(&tasks, task, task_t, tasks) {
-               db_print_task(task, task_id, flag);
-               flag &= ~OPTION_TASK_TITLE;
-               task_id++;
-               if ((flag & (OPTION_LONG|OPTION_INDENT)) == OPTION_INDENT)
-                   db_printf("\n");
-       }
-}
-
-void
-db_show_one_space(db_expr_t addr, boolean_t have_addr,
-                 __unused db_expr_t count, char *modif)
-{
-       int             flag;
-       int             task_id;
-       task_t          task;
-
-       flag = OPTION_TASK_TITLE;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       if (!have_addr) {
-           task = db_current_task();
-           if (task == TASK_NULL) {
-               db_error("No task\n");
-               /*NOTREACHED*/
-           }
-       } else
-           task = (task_t)(unsigned long)addr;
-
-       if ((task_id = db_lookup_task(task)) < 0) {
-           db_printf("bad task address 0x%llx\n", (unsigned long long)addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-
-       db_printf(" ID: TASK     SPACE    MAP               COUNT\n");
-       db_print_space(task, task_id, flag);
-}
-
-void
-db_show_all_spaces(__unused db_expr_t addr, __unused boolean_t have_addr,
-                  __unused db_expr_t count, char *modif)
-{
-       task_t task;
-       int task_id = 0;
-       int flag;
-
-       flag = OPTION_TASK_TITLE|OPTION_INDENT;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       db_printf(" ID: TASK     SPACE    MAP               COUNT\n");
-       queue_iterate(&tasks, task, task_t, tasks) {
-               db_print_space(task, task_id, flag);
-               task_id++;
-       }
-}
-
-db_addr_t
-db_task_from_space(
-       ipc_space_t     space,
-       int             *task_id)
-{
-       task_t task;
-       int tid = 0;
-
-       queue_iterate(&tasks, task, task_t, tasks) {
-               if (task->itk_space == space) {
-                       *task_id = tid;
-                       return (db_addr_t)(unsigned long)task;
-               }
-               tid++;
-       }
-       *task_id = 0;
-       return (0);
-}
-
-void
-db_show_one_act(db_expr_t addr, boolean_t have_addr, __unused db_expr_t        count,
-               char *modif)
-{
-       int             flag;
-       int             act_id;
-       thread_t                thr_act;
-
-       flag = OPTION_THREAD_TITLE;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       if (!have_addr) {
-           thr_act = current_thread();
-           if (thr_act == THREAD_NULL) {
-               db_error("No thr_act\n");
-               /*NOTREACHED*/
-           }
-       } else
-           thr_act = (thread_t)(unsigned long)addr;
-
-       if ((act_id = db_lookup_act(thr_act)) < 0) {
-           db_printf("bad thr_act address %#llX\n", (unsigned long long)addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-
-       if (flag & OPTION_USER) {
-           db_printf("TASK%d(%0*X):\n",
-                     db_lookup_task(thr_act->task),
-                     2*sizeof(vm_offset_t), thr_act->task);
-           db_print_act(thr_act, act_id, flag);
-       } else {
-           db_printf("task %d(%0*Xx): thr_act    %d",
-                     db_lookup_task(thr_act->task),
-                     2*sizeof(vm_offset_t), thr_act->task, act_id);
-           db_print_act(thr_act, act_id, flag);
-       }
-       if (db_option(modif, 'i') &&
-           (thr_act->state & TH_WAIT) && 
-           thr_act->kernel_stack == 0) {
-
-           db_printf("Wait State: option 0x%x\n",
-               thr_act->ith_option);
-       }
-}
-
-void
-db_show_one_task(db_expr_t addr, boolean_t have_addr,
-                __unused db_expr_t count, char *modif)
-{
-       int             flag;
-       int             task_id;
-       task_t          task;
-
-       flag = OPTION_TASK_TITLE|OPTION_INDENT;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       if (!have_addr) {
-           task = db_current_task();
-           if (task == TASK_NULL) {
-               db_error("No task\n");
-               /*NOTREACHED*/
-           }
-       } else
-           task = (task_t)(unsigned long)addr;
-
-       if ((task_id = db_lookup_task(task)) < 0) {
-           db_printf("bad task address 0x%llX\n", (unsigned long long)addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-
-       db_print_task(task, task_id, flag);
-}
-
-void
-db_show_shuttle(db_expr_t addr, boolean_t have_addr, __unused db_expr_t        count,
-               __unused char *modif)
-{
-       thread_t                        thread;
-
-       if (have_addr)
-           thread = (thread_t)(unsigned long)addr;
-       else {
-           thread = current_thread();
-           if (thread == THREAD_NULL) {
-               db_error("No thread\n");
-               /*NOTREACHED*/
-           }
-       }
-       db_printf("thread %x:\n", thread);
-       printf(" $task%d.%d(%x)", db_lookup_task(thread->task),
-                      db_lookup_act(thread), thread);
-       db_printf("\n");
-}
-
-int
-db_port_kmsg_count(
-       ipc_port_t      port)
-{
-       return (port->ip_messages.imq_msgcount);
-}
-
-static int db_print_ent_cnt = 0;
-
-void db_reset_print_entry(
-       void)
-{
-       db_print_ent_cnt = 0;
-}
-
-void
-db_print_one_entry(ipc_entry_t entry, int index, mach_port_name_t name,
-                  boolean_t is_pset, __unused ipc_space_t space)
-{
-       ipc_port_t aport = (ipc_port_t)entry->ie_object;
-       ipc_entry_bits_t bits;
-
-       bits = entry->ie_bits;
-       if (is_pset && !aport->ip_pset_count)
-               return;
-       if (db_print_ent_cnt && db_print_ent_cnt % 2 == 0)
-               db_printf("\n");
-       if (!name)
-               db_printf("\t%s%d[%x]",
-                       !is_pset && aport->ip_pset_count ? "pset" : "port",
-                       index,
-                       MACH_PORT_MAKE(index, IE_BITS_GEN(bits)));
-       else
-               db_printf("\t%s[%x]",
-                       !is_pset && aport->ip_pset_count ? "pset" : "port",
-                       name);
-       if (!is_pset) {
-               db_printf("(%s,%x,%d)",
-                   (bits & MACH_PORT_TYPE_RECEIVE)? "r":
-                       (bits & MACH_PORT_TYPE_SEND)? "s": "S",
-                       aport,
-                   db_port_kmsg_count(aport));
-               db_print_ent_cnt++;
-       }
-       else {
-               db_printf("(%s,%x,set_count=%d,%d)",
-                       (bits & MACH_PORT_TYPE_RECEIVE)? "r":
-                               (bits & MACH_PORT_TYPE_SEND)? "s": "S",
-                       aport,
-                       aport->ip_pset_count,
-                       db_port_kmsg_count(aport));
-               db_print_ent_cnt++;
-       }
-}
-
-int
-db_port_iterate(
-       thread_t        thr_act,
-       boolean_t       is_pset,
-       boolean_t       do_output)
-{
-       ipc_entry_t entry;
-       ipc_tree_entry_t tentry;
-       int index;
-       int size;
-       int count;
-       ipc_space_t space;
-
-       count = 0;
-       space = thr_act->task->itk_space;
-       entry = space->is_table;
-       size = space->is_table_size;
-       db_reset_print_entry();
-       for (index = 0; index < size; ++index, ++entry) {
-               if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) {
-                       if (do_output)
-                               db_print_one_entry(entry,
-                                       index, MACH_PORT_NULL, is_pset, space);
-                       ++count;
-               }
-       }
-       for (tentry = ipc_splay_traverse_start(&space->is_tree);
-               tentry != ITE_NULL;
-               tentry = ipc_splay_traverse_next(&space->is_tree, FALSE)) {
-               entry = &tentry->ite_entry;
-               if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) {
-                       if (do_output)
-                               db_print_one_entry(entry,
-                                       0, tentry->ite_name, is_pset, space);
-                       ++count;
-               }
-       }
-       return (count);
-}
-
-ipc_port_t
-db_lookup_port(
-       thread_t        thr_act,
-       int             id)
-{
-       register ipc_space_t space;
-       register ipc_entry_t entry;
-
-       if (thr_act == THREAD_NULL)
-           return(0);
-       space = thr_act->task->itk_space;
-       if (id < 0 || (unsigned)id >= space->is_table_size)
-           return(0);
-       entry = &space->is_table[id];
-       if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS)
-           return((ipc_port_t)entry->ie_object);
-       return(0);
-}
-
-void
-db_show_port_id(db_expr_t addr, boolean_t have_addr, __unused db_expr_t count,
-               char *modif)
-{
-       thread_t thr_act;
-
-       if (!have_addr) {
-           thr_act = current_thread();
-           if (thr_act == THREAD_NULL) {
-               db_error("No thr_act\n");
-               /*NOTREACHED*/
-           }
-       } else
-           thr_act = (thread_t)(unsigned long)addr;
-       if (db_lookup_act(thr_act) < 0) {
-           db_printf("Bad thr_act address 0x%llX\n", addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-       if (db_port_iterate(thr_act, db_option(modif,'s'), TRUE))
-           db_printf("\n");
-}
-
-extern void db_sched(void);
-/*
- *     Useful system state when the world has hung.
- */
-void
-db_system_stats(void)
-{
-       db_sched();
-       iprintf("\n");
-       db_vm();
-       iprintf("\n");
-       iprintf("\n");
-       db_printf("current_{thread/task} 0x%x 0x%x\n",
-                       current_thread(),current_task());
-}
-
-void db_show_one_runq(run_queue_t runq);
-
-void
-db_show_runq(__unused db_expr_t addr, __unused boolean_t have_addr,
-            __unused db_expr_t count, __unused char *modif)
-{
-       processor_t proc;
-       run_queue_t runq;
-       boolean_t showedany = FALSE;
-
-       for (proc = processor_list; proc != PROCESSOR_NULL; proc = proc->processor_list) {
-               runq = &proc->runq;
-               if (runq->count > 0) {
-                   db_printf("PROCESSOR %x IN SET %x\n", proc, proc->processor_set);
-                   db_show_one_runq(runq);
-                   showedany = TRUE;
-               }
-       }
-       if (rt_runq.count > 0) {
-               db_printf("REAL TIME\n");
-               db_show_one_runq(runq);
-               showedany = TRUE;
-       }
-       if (!showedany)
-           db_printf("No runnable threads\n");
-}
-
-void
-db_show_one_runq(
-       run_queue_t     runq)
-{
-       int i, task_id, thread_id;
-       queue_t q;
-       thread_t thread;
-       task_t task;
-
-       printf("PRI  TASK.ACTIVATION\n");
-       for (i = runq->highq, q = runq->queues + i; i >= 0; i--, q--) {
-           if (!queue_empty(q)) {
-               db_printf("%3d:", i);
-               queue_iterate(q, thread, thread_t, links) {
-                   task = thread->task;
-                   task_id = db_lookup_task(task);
-                   thread_id = db_lookup_task_act(task, thread);
-                   db_printf(" %d.%d", task_id, thread_id);
-               }
-               db_printf("\n");
-           }
-       }
-}
diff --git a/osfmk/ddb/db_print.h b/osfmk/ddb/db_print.h
deleted file mode 100644 (file)
index f02d697..0000000
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.31.1  1997/03/27  18:46:44  barbou
- *     ri-osc CR1566: Add db_show_one_thread() prototype. [dwm]
- *     [1995/08/28  15:47:07  bolinger]
- *     [97/02/25            barbou]
- *
- * Revision 1.1.16.6  1995/02/23  21:43:39  alanl
- *     Merge with DIPC2_SHARED.
- *     [1995/01/05  13:30:16  alanl]
- * 
- * Revision 1.1.21.2  1994/12/09  22:11:02  dwm
- *     mk6 CR801 - merge up from nmk18b4 to nmk18b7
- *     * Rev 1.1.16.4  1994/10/11  16:36:02  emcmanus
- *       Added db_show_shuttle() and db_show_runq() prototypes.
- *     [1994/12/09  20:36:53  dwm]
- * 
- * Revision 1.1.21.1  1994/11/10  06:06:47  dwm
- *     mk6 CR764 - s/spinlock/simple_lock/ (name change only)
- *     [1994/11/10  05:24:14  dwm]
- * 
- * Revision 1.1.16.3  1994/09/23  01:21:01  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:10:46  ezf]
- * 
- * Revision 1.1.16.2  1994/09/16  15:30:07  emcmanus
- *     Add prototype for db_show_subsystem.
- *     [1994/09/16  15:29:05  emcmanus]
- * 
- * Revision 1.1.16.1  1994/06/11  21:12:10  bolinger
- *     Merge up to NMK17.2.
- *     [1994/06/11  20:04:06  bolinger]
- * 
- * Revision 1.1.18.2  1994/12/06  19:43:09  alanl
- *     Intel merge, Oct 94 code drop.
- *     Added prototypes for db_show_{one,all}_task_vm
- *     [94/11/28            mmp]
- * 
- * Revision 1.1.18.1  1994/08/05  19:35:57  mmp
- *     Remove duplicate prototype for db_show_port_id.
- *     [1994/08/05  19:31:44  mmp]
- * 
- * Revision 1.1.10.3  1994/04/15  18:41:54  paire
- *     Changed db_task_from_space prototype.
- *     [94/03/31            paire]
- * 
- * Revision 1.1.10.2  1994/03/07  16:37:54  paire
- *     Added ANSI prototype for db_port_kmsg_count routine.
- *     [94/02/15            paire]
- * 
- * Revision 1.1.10.1  1994/02/08  10:58:27  bernadat
- *     Added   db_show_one_space
- *             db_show_all_spaces
- *             db_sys
- *     prototypes
- *     [94/02/07            bernadat]
- * 
- * Revision 1.1.2.3  1993/09/17  21:34:40  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:24  robert]
- * 
- * Revision 1.1.2.2  1993/07/27  18:28:01  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:12:43  elliston]
- * 
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_PRINT_H_
-#define        _DDB_DB_PRINT_H_
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-
-/* Prototypes for functions exported by this module.
- */
-void db_show_regs(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif);
-
-void db_show_all_acts(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_one_act(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_one_thread(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_one_task(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_shuttle(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_port_id(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_one_task_vm(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif);
-
-void db_show_all_task_vm(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif);
-
-void db_show_one_space(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_all_spaces(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_sys(void);
-
-int db_port_kmsg_count(
-       ipc_port_t      port);
-
-db_addr_t db_task_from_space(
-       ipc_space_t     space,
-       int             *task_id);
-
-void db_show_one_simple_lock(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_runq(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-void db_show_one_lock(lock_t *);
-
-#endif /* !_DDB_DB_PRINT_H_ */
diff --git a/osfmk/ddb/db_run.c b/osfmk/ddb/db_run.c
deleted file mode 100644 (file)
index 6c7d5be..0000000
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-/*
- * Commands to run process.
- */
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-
-#include <ddb/db_lex.h>
-#include <ddb/db_break.h>
-#include <ddb/db_access.h>
-#include <ddb/db_run.h>
-#include <ddb/db_cond.h>
-#include <ddb/db_examine.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_watch.h>
-#include <kern/misc_protos.h>
-#include <kern/debug.h>
-
-#include <IOKit/IOPlatformExpert.h>
-
-boolean_t      db_sstep_print;
-int            db_loop_count;
-int            db_call_depth;
-
-int            db_inst_count;
-int            db_last_inst_count;
-int            db_load_count;
-int            db_store_count;
-int            db_max_inst_count = 1000;
-
-#ifndef db_set_single_step
-void db_set_task_single_step(
-       register db_regs_t      *regs,
-       task_t                  task);
-#else
-#define        db_set_task_single_step(regs,task)      db_set_single_step(regs)
-#endif
-#ifndef db_clear_single_step
-void db_clear_task_single_step(
-       db_regs_t       *regs,
-       task_t          task);
-#else
-#define db_clear_task_single_step(regs,task)   db_clear_single_step(regs)
-#endif
-
-extern jmp_buf_t *db_recover;
-boolean_t db_step_again(void);
-
-static db_addr_t db_stop_pc;
-boolean_t
-db_stop_at_pc(
-       boolean_t       *is_breakpoint,
-       task_t          task,
-       task_t          space)
-{
-       register  db_thread_breakpoint_t bkpt;
-
-       db_clear_task_single_step(DDB_REGS, space);
-       db_clear_breakpoints();
-       db_clear_watchpoints();
-       db_stop_pc = PC_REGS(DDB_REGS);
-
-#ifdef FIXUP_PC_AFTER_BREAK
-       if (*is_breakpoint) {
-           /*
-            * Breakpoint trap.  Fix up the PC if the
-            * machine requires it.
-            */
-           FIXUP_PC_AFTER_BREAK
-           db_stop_pc = PC_REGS(DDB_REGS);
-       }
-#endif
-
-       /*
-        * Now check for a breakpoint at this address.
-        */
-       bkpt = db_find_thread_breakpoint_here(space, db_stop_pc);
-       if (bkpt) {
-           if (db_cond_check(bkpt)) {
-               *is_breakpoint = TRUE;
-               return (TRUE);  /* stop here */
-           }
-       }
-       *is_breakpoint = FALSE;
-
-       if (db_run_mode == STEP_INVISIBLE) {
-           db_run_mode = STEP_CONTINUE;
-           return (FALSE);     /* continue */
-       }
-       if (db_run_mode == STEP_COUNT) {
-           return (FALSE); /* continue */
-       }
-       if (db_run_mode == STEP_ONCE) {
-           if (--db_loop_count > 0) {
-               if (db_sstep_print) {
-                   db_print_loc_and_inst(db_stop_pc, task);
-               }
-               return (FALSE); /* continue */
-           }
-       }
-       if (db_run_mode == STEP_RETURN) {
-           jmp_buf_t *prev;
-           jmp_buf_t db_jmpbuf;
-           /* WARNING: the following assumes an instruction fits an int */
-               db_expr_t ins;
-          
-               ins = db_get_task_value(db_stop_pc, sizeof(int), FALSE, space);
-
-           /* continue until matching return */
-
-           prev = db_recover;
-           if (_setjmp(db_recover = &db_jmpbuf) == 0) {
-               if (!inst_trap_return(ins) &&
-                   (!inst_return(ins) || --db_call_depth != 0)) {
-                       if (db_sstep_print) {
-                           if (inst_call(ins) || inst_return(ins)) {
-                               register int i;
-
-                               db_printf("[after %6d /%4d] ",
-                                         db_inst_count,
-                                         db_inst_count - db_last_inst_count);
-                               db_last_inst_count = db_inst_count;
-                               for (i = db_call_depth; --i > 0; )
-                                   db_printf("  ");
-                               db_print_loc_and_inst(db_stop_pc, task);
-                               db_printf("\n");
-                           }
-                       }
-                       if (inst_call(ins))
-                           db_call_depth++;
-                       db_recover = prev;
-                       if (db_step_again())
-                               return (FALSE); /* continue */
-               }
-           }
-           db_recover = prev;
-       }
-       if (db_run_mode == STEP_CALLT) {
-           /* WARNING: the following assumes an instruction fits an int */
-               db_expr_t ins;
-               ins = db_get_task_value(db_stop_pc, sizeof(int), FALSE, space);
-
-           /* continue until call or return */
-
-           if (!inst_call(ins) &&
-               !inst_return(ins) &&
-               !inst_trap_return(ins)) {
-                       if (db_step_again())
-                               return (FALSE); /* continue */
-           }
-       }
-       if (db_find_breakpoint_here(space, db_stop_pc))
-               return(FALSE);
-       db_run_mode = STEP_NONE;
-       return (TRUE);
-}
-
-void
-db_restart_at_pc(
-       boolean_t       watchpt,
-       task_t          task)
-{
-       db_addr_t pc = PC_REGS(DDB_REGS);
-#ifdef SOFTWARE_SSTEP
-       db_addr_t brpc;
-#endif
-
-
-       if ((db_run_mode == STEP_COUNT) ||
-           (db_run_mode == STEP_RETURN) ||
-           (db_run_mode == STEP_CALLT)) {
-           db_expr_t           ins;
-
-           /*
-            * We are about to execute this instruction,
-            * so count it now.
-            */
-
-           ins = db_get_task_value(pc, sizeof(int), FALSE, task);
-           db_inst_count++;
-           db_load_count += db_inst_load((unsigned long)ins);
-           db_store_count += db_inst_store((unsigned long)ins);
-#ifdef SOFTWARE_SSTEP
-           /* Account for instructions in delay slots */
-           brpc = next_instr_address(pc,1,task);
-           if ((brpc != pc) && (inst_branch(ins) || inst_call(ins))) {
-               /* Note: this ~assumes an instruction <= sizeof(int) */
-               ins = db_get_task_value(brpc, sizeof(int), FALSE, task);
-               db_inst_count++;
-               db_load_count += db_inst_load(ins);
-               db_store_count += db_inst_store(ins);
-           }
-#endif /* SOFTWARE_SSTEP */
-       }
-
-       if (db_run_mode == STEP_CONTINUE) {
-           if (watchpt || db_find_breakpoint_here(task, pc)) {
-               /*
-                * Step over breakpoint/watchpoint.
-                */
-               db_run_mode = STEP_INVISIBLE;
-               db_set_task_single_step(DDB_REGS, task);
-           } else {
-               db_set_breakpoints();
-               db_set_watchpoints();
-           }
-       } else {
-           db_set_task_single_step(DDB_REGS, task);
-       }
-}
-
-/*
- * 'n' and 'u' commands might never return.
- * Limit the maximum number of steps.
- */
-
-boolean_t
-db_step_again(void)
-{
-       if (db_inst_count && !(db_inst_count%db_max_inst_count)) {
-               char c;
-               db_printf("%d instructions, continue ? (y/n) ",
-                         db_inst_count);
-               c = cngetc();
-               db_printf("\n");
-               if(c == 'n')
-                       return(FALSE);
-       }
-       return(TRUE);
-}
-
-void
-db_single_step(db_regs_t *regs, __unused task_t task)
-{
-       if (db_run_mode == STEP_CONTINUE) {
-           db_run_mode = STEP_INVISIBLE;
-           db_set_task_single_step(regs, task);
-       }
-}
-
-#ifdef SOFTWARE_SSTEP
-/*
- *     Software implementation of single-stepping.
- *     If your machine does not have a trace mode
- *     similar to the vax or sun ones you can use
- *     this implementation, done for the mips.
- *     Just define the above conditional and provide
- *     the functions/macros defined below.
- *
- * extern boolean_t
- *     inst_branch(),          returns true if the instruction might branch
- * extern unsigned
- *     branch_taken(),         return the address the instruction might
- *                             branch to
- *     db_getreg_val();        return the value of a user register,
- *                             as indicated in the hardware instruction
- *                             encoding, e.g. 8 for r8
- *                     
- * next_instr_address(pc,bd,task) returns the address of the first
- *                             instruction following the one at "pc",
- *                             which is either in the taken path of
- *                             the branch (bd==1) or not.  This is
- *                             for machines (mips) with branch delays.
- *
- *     A single-step may involve at most 2 breakpoints -
- *     one for branch-not-taken and one for branch taken.
- *     If one of these addresses does not already have a breakpoint,
- *     we allocate a breakpoint and save it here.
- *     These breakpoints are deleted on return.
- */                    
-db_breakpoint_t        db_not_taken_bkpt = 0;
-db_breakpoint_t        db_taken_bkpt = 0;
-
-db_breakpoint_t
-db_find_temp_breakpoint(
-       task_t             task,
-       db_addr_t          addr)
-{
-       if (db_taken_bkpt && (db_taken_bkpt->address == addr) &&
-           db_taken_bkpt->task == task)
-               return db_taken_bkpt;
-       if (db_not_taken_bkpt && (db_not_taken_bkpt->address == addr) &&
-           db_not_taken_bkpt->task == task)
-               return db_not_taken_bkpt;
-       return 0;
-}
-
-void
-db_set_task_single_step(
-       register db_regs_t      *regs,
-       task_t                  task)
-{
-       db_addr_t pc = PC_REGS(regs), brpc;
-       register unsigned int    inst;
-       register boolean_t       unconditional;
-
-       /*
-        *      User was stopped at pc, e.g. the instruction
-        *      at pc was not executed.
-        */
-       inst = db_get_task_value(pc, sizeof(int), FALSE, task);
-       if (inst_branch(inst) || inst_call(inst)) {
-           extern db_expr_t getreg_val();      /* XXX -- need prototype! */
-
-           brpc = branch_taken(inst, pc, getreg_val, (unsigned char*)regs);
-           if (brpc != pc) {   /* self-branches are hopeless */
-               db_taken_bkpt = db_set_temp_breakpoint(task, brpc);
-           } else
-               db_taken_bkpt = 0;
-           pc = next_instr_address(pc,1,task);
-       } else 
-           pc = next_instr_address(pc,0,task);
-       
-       /* 
-        * check if this control flow instruction is an
-        * unconditional transfer
-        */
-
-       unconditional = inst_unconditional_flow_transfer(inst);
-
-       /* 
-         We only set the sequential breakpoint if previous instruction was not
-         an unconditional change of flow of control. If the previous instruction
-         is an unconditional change of flow of control, setting a breakpoint in the
-         next sequential location may set a breakpoint in data or in another routine,
-         which could screw up either the program or the debugger. 
-         (Consider, for instance, that the next sequential instruction is the 
-         start of a routine needed by the debugger.)
-       */
-       if (!unconditional && db_find_breakpoint_here(task, pc) == 0 &&
-           (db_taken_bkpt == 0 || db_taken_bkpt->address != pc)) {
-               db_not_taken_bkpt = db_set_temp_breakpoint(task, pc);
-       } else
-               db_not_taken_bkpt = 0;
-}
-
-void
-db_clear_task_single_step(
-       db_regs_t       *regs,
-       task_t          task)
-{
-       if (db_taken_bkpt != 0) {
-           db_delete_temp_breakpoint(task, db_taken_bkpt);
-           db_taken_bkpt = 0;
-       }
-       if (db_not_taken_bkpt != 0) {
-           db_delete_temp_breakpoint(task, db_not_taken_bkpt);
-           db_not_taken_bkpt = 0;
-       }
-}
-
-#endif /* SOFTWARE_SSTEP */
-
-extern int     db_cmd_loop_done;
-
-/* single-step */
-void
-db_single_step_cmd(__unused db_expr_t addr, __unused boolean_t have_addr,
-                  db_expr_t count, char *modif)
-{
-       boolean_t       print = FALSE;
-
-       if (count == (db_expr_t)-1)
-           count = 1;
-
-       if (modif[0] == 'p')
-           print = TRUE;
-
-       db_run_mode = STEP_ONCE;
-       db_loop_count = (typeof(db_loop_count))count;
-       db_sstep_print = print;
-       db_inst_count = 0;
-       db_last_inst_count = 0;
-       db_load_count = 0;
-       db_store_count = 0;
-
-       db_cmd_loop_done = 1;
-}
-
-/* trace and print until call/return */
-void
-db_trace_until_call_cmd(__unused db_expr_t addr, __unused boolean_t have_addr,
-                       __unused db_expr_t count, char *modif)
-{
-       boolean_t       print = FALSE;
-
-       if (modif[0] == 'p')
-           print = TRUE;
-
-       db_run_mode = STEP_CALLT;
-       db_sstep_print = print;
-       db_inst_count = 0;
-       db_last_inst_count = 0;
-       db_load_count = 0;
-       db_store_count = 0;
-
-       db_cmd_loop_done = 1;
-}
-
-void
-db_trace_until_matching_cmd(__unused db_expr_t addr,
-                           __unused boolean_t have_addr,
-                           __unused db_expr_t count,
-                           char *modif)
-{
-       boolean_t       print = FALSE;
-
-       if (modif[0] == 'p')
-           print = TRUE;
-
-       db_run_mode = STEP_RETURN;
-       db_call_depth = 1;
-       db_sstep_print = print;
-       db_inst_count = 0;
-       db_last_inst_count = 0;
-       db_load_count = 0;
-       db_store_count = 0;
-
-       db_cmd_loop_done = 1;
-}
-
-/* continue */
-void
-db_continue_cmd(__unused db_expr_t addr, __unused boolean_t have_addr,
-               __unused db_expr_t count, __unused char *modif)
-{
-       /*
-        * Though "cont/c" works fairly well, it's not really robust
-        * enough to use in arbitrary situations, so disable it.
-        * (Doesn't seem cost-effective to debug and fix what ails
-        * it.)
-        */
-#if 0
-       if (modif[0] == 'c')
-           db_run_mode = STEP_COUNT;
-       else
-           db_run_mode = STEP_CONTINUE;
-#else
-       db_run_mode = STEP_CONTINUE;
-#endif
-       db_inst_count = 0;
-       db_last_inst_count = 0;
-       db_load_count = 0;
-       db_store_count = 0;
-
-       db_cmd_loop_done = 1;
-}
-
-
-/*
- * Switch to gdb
- */
-static void
-db_to_gdb(void)
-{
-       switch_debugger = 1;
-}
-
-/* gdb */
-void    
-db_continue_gdb(__unused db_expr_t addr, __unused boolean_t have_addr,
-               __unused db_expr_t count, __unused char *modif)
-{
-       db_to_gdb();
-       db_run_mode = STEP_CONTINUE;
-       db_inst_count = 0;
-       db_last_inst_count = 0;   
-       db_load_count = 0;
-       db_store_count = 0;  
-
-       db_cmd_loop_done = 1;
-}
-        
-
-boolean_t
-db_in_single_step(void)
-{
-       return(db_run_mode != STEP_NONE && db_run_mode != STEP_CONTINUE);
-}
-
diff --git a/osfmk/ddb/db_run.h b/osfmk/ddb/db_run.h
deleted file mode 100644 (file)
index c568f0d..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon 
- * the rights to redistribute these changes.
- */
-
-/*
- */
-
-#ifndef        _DDB_DB_RUN_H_
-#define        _DDB_DB_RUN_H_
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-#include <kern/task.h>
-
-
-/* Prototypes for functions exported by this module.
- */
-
-boolean_t db_stop_at_pc(
-       boolean_t       *is_breakpoint,
-       task_t          task,
-       task_t          space);
-
-void db_restart_at_pc(
-       boolean_t       watchpt,
-       task_t          task);
-
-void db_single_step(
-       db_regs_t       *regs,
-       task_t          task);
-
-void db_single_step_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_trace_until_call_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_trace_until_matching_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_continue_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_continue_gdb(db_expr_t, boolean_t, db_expr_t, char *);
-
-boolean_t db_in_single_step(void);
-
-#endif /* !_DDB_DB_RUN_H_ */
diff --git a/osfmk/ddb/db_sym.c b/osfmk/ddb/db_sym.c
deleted file mode 100644 (file)
index 1e11805..0000000
+++ /dev/null
@@ -1,1502 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-#include <machine/db_machdep.h>
-#include <string.h>                    /* For strcpy(), strcmp() */
-#include <mach/std_types.h>
-#include <kern/misc_protos.h>          /* For printf() */
-#include <kern/kalloc.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-
-#include <vm/vm_map.h> /* vm_map_t */
-
-/*
- * Multiple symbol tables
- *
- * mach, bootstrap, name_server, default_pager, unix, 1 spare
- */
-#define        MAXNOSYMTABS    6
-
-db_symtab_t db_symtabs[MAXNOSYMTABS];
-int db_nsymtab = 0;
-
-db_symtab_t    *db_last_symtab;
-
-unsigned long  db_maxoff = 0x4000;
-extern         char end;
-unsigned long  db_maxval = (unsigned long)&end;
-natural_t      db_minval = 0x1000;
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-static char *db_qualify(
-       char            *sym,
-       register char   *symtabname);
-
-boolean_t db_eqname(
-       char            *src,
-       char            *dst,
-       unsigned        c);
-
-boolean_t db_symbol_is_ambiguous(char *name);
-
-void db_shorten_filename(char **filenamep);
-
-void qsort_swap(
-       register int    *a,
-       register int    *b,
-       register int    size);
-
-void qsort_rotate(
-       register int    *a,
-       register int    *b,
-       register int    *c,
-       register int    size);
-
-void qsort_recur(
-       char    *left,
-       char    *right,
-       int     eltsize,
-       int     (*compfun)(char *, char *));
-
-void qsort_checker(
-       char    *table,
-       int     nbelts,
-       int     eltsize,
-       int     (*compfun)(char *, char *));
-
-void bubble_sort(
-       char    *table,
-       int     nbelts,
-       int     eltsize,
-       int     (*compfun)(char *, char *));
-
-int no_print_completion(
-       db_symtab_t     *stab,
-       char            *symstr );
-int no_lookup_incomplete(
-       db_symtab_t     *stab,
-       char            *symstr,
-       char            **name,
-       int             *len,
-       int             *toadd);
-
-/*
- * Initialization routine for ddb.
- */
-void
-ddb_init(void)
-{
-       X_db_init();
-       db_machdep_init();
-}
-
-extern vm_map_t kernel_map;
-/*
- * Add symbol table, with given name, to list of symbol tables.
- */
-boolean_t
-db_add_symbol_table(
-       int             type,
-       char            *start,
-       char            *db_end,
-       const char      *name,
-       char            *ref,
-       char            *map_pointer,
-       unsigned long   minsym,
-       unsigned long   maxsym,
-       boolean_t       sorted)
-{
-       register db_symtab_t *st;
-
-       if (db_nsymtab >= MAXNOSYMTABS)
-           return (FALSE);
-
-       st = &db_symtabs[db_nsymtab];
-       st->type = type;
-       st->start = start;
-       st->end = db_end;
-       st->private = ref;
-       if (map_pointer == (char *)kernel_map ||
-           (VM_MIN_KERNEL_ADDRESS - VM_MAX_ADDRESS > 0 &&
-            minsym - VM_MIN_KERNEL_ADDRESS > 0))
-               st->map_pointer = 0;
-       else
-               st->map_pointer = map_pointer;
-       strlcpy(st->name, name, sizeof (st->name));
-       st->minsym = minsym;
-       st->maxsym = maxsym;
-       if (maxsym == 0)
-               st->sorted = FALSE;
-       else {
-               st->sorted = sorted;
-               if (db_maxval < maxsym + db_maxoff)
-                       db_maxval = maxsym + db_maxoff;
-       }
-       db_nsymtab++;
-
-       return (TRUE);
-}
-
-/*
- *  db_qualify("vm_map", "ux") returns "ux::vm_map".
- *
- *  Note: return value points to static data whose content is
- *  overwritten by each call... but in practice this seems okay.
- */
-static char *
-db_qualify(
-       char            *symname,
-       register char   *symtabname)
-{
-       static char     tmp[256];
-       register char   *s;
-
-       s = tmp;
-       while ((*s++ = *symtabname++)) {
-               ;
-       }
-       s[-1] = ':';
-       *s++ = ':';
-       while ((*s++ = *symname++)) {
-               ;
-       }
-       return tmp;
-}
-
-
-boolean_t
-db_eqname(
-       char            *src,
-       char            *dst,
-       unsigned        c)
-{
-       if (!strcmp(src, dst))
-           return (TRUE);
-       if (src[0] == (char)c)
-           return (!strcmp(src+1,dst));
-       return (FALSE);
-}
-
-boolean_t
-db_value_of_name(
-       const char      *name,
-       db_expr_t       *valuep)
-{
-       db_sym_t        sym;
-
-       sym = db_lookup(name);
-       if (sym == DB_SYM_NULL)
-           return (FALSE);
-       db_symbol_values(0, sym, &name, valuep);
-       return (TRUE);
-}
-
-/*
- * Display list of possible completions for a symbol.
- */
-void
-db_print_completion(
-       char *symstr)
-{
-       register int i;
-       int symtab_start = 0;
-       int symtab_end = db_nsymtab;
-       register char *cp;
-
-       /*
-        * Look for, remove, and remember any symbol table specifier.
-        */
-       for (cp = symstr; *cp; cp++) {
-               if (*cp == ':' && cp[1] == ':') {
-                       *cp = '\0';
-                       for (i = 0; i < db_nsymtab; i++) {
-                               if (! strcmp(symstr, db_symtabs[i].name)) {
-                                       symtab_start = i;
-                                       symtab_end = i + 1;
-                                       break;
-                               }
-                       }
-                       *cp = ':';
-                       if (i == db_nsymtab)
-                               return;
-                       symstr = cp+2;
-               }
-       }
-
-       /*
-        * Look in the specified set of symbol tables.
-        * Return on first match.
-        */
-       for (i = symtab_start; i < symtab_end; i++) {
-               if (X_db_print_completion(&db_symtabs[i], symstr))
-                       break;
-       }
-}
-
-/*
- * Lookup a (perhaps incomplete) symbol.
- * If the symbol has a qualifier (e.g., ux::vm_map),
- * then only the specified symbol table will be searched;
- * otherwise, all symbol tables will be searched.
- */
-int
-db_lookup_incomplete(
-       char *symstr,
-       int symlen)
-{
-       register int i;
-       int symtab_start = 0;
-       int symtab_end = db_nsymtab;
-       register char *cp;
-       int nsym = 0;
-       char *name = (char *)0;
-       int len;
-       int toadd;
-
-       /*
-        * Look for, remove, and remember any symbol table specifier.
-        */
-       for (cp = symstr; *cp; cp++) {
-               if (*cp == ':' && cp[1] == ':') {
-                       *cp = '\0';
-                       for (i = 0; i < db_nsymtab; i++) {
-                               if (! strcmp(symstr, db_symtabs[i].name)) {
-                                       symtab_start = i;
-                                       symtab_end = i + 1;
-                                       break;
-                               }
-                       }
-                       *cp = ':';
-                       if (i == db_nsymtab)
-                               return 0;
-                       symstr = cp+2;
-               }
-       }
-
-       /*
-        * Look in the specified set of symbol tables.
-        * Return on first match.
-        */
-       for (i = symtab_start; i < symtab_end; i++) {
-               nsym = X_db_lookup_incomplete(&db_symtabs[i], symstr,
-                                             &name, &len, &toadd);
-               if (nsym > 0) {
-                       if (toadd > 0) {
-                               len = strlen(symstr);
-                               if (len + toadd >= symlen)
-                                       return 0;
-                               bcopy(&name[len], &symstr[len], toadd);
-                               symstr[len + toadd] = '\0';
-                       }
-                       break;
-               }
-       }
-       return nsym;
-}
-
-/*
- * Lookup a symbol.
- * If the symbol has a qualifier (e.g., ux::vm_map),
- * then only the specified symbol table will be searched;
- * otherwise, all symbol tables will be searched.
- */
-db_sym_t
-db_lookup(const char *symstr)
-{
-       db_sym_t sp;
-       int i;
-       int symtab_start = 0;
-       int symtab_end = db_nsymtab;
-       char *cp;
-
-       /*
-        * Look for, remove, and remember any symbol table specifier.
-        */
-       for (cp = symstr; *cp; cp++) {
-               if (*cp == ':' && cp[1] == ':') {
-                       *cp = '\0';
-                       for (i = 0; i < db_nsymtab; i++) {
-                               if (! strcmp(symstr, db_symtabs[i].name)) {
-                                       symtab_start = i;
-                                       symtab_end = i + 1;
-                                       break;
-                               }
-                       }
-                       *cp = ':';
-                       if (i == db_nsymtab)
-                               db_error("Invalid symbol table name\n");
-                       symstr = cp+2;
-               }
-       }
-
-       /*
-        * Look in the specified set of symbol tables.
-        * Return on first match.
-        */
-       for (i = symtab_start; i < symtab_end; i++) {
-               if ((sp = X_db_lookup(&db_symtabs[i], symstr))) {
-                       db_last_symtab = &db_symtabs[i];
-                       return sp;
-               }
-       }
-       return 0;
-}
-
-/*
- * Print a symbol completion
- */
-void
-db_sym_print_completion(
-       db_symtab_t *stab,
-       char *name,
-       int function,
-       char *fname,
-       int line)
-{
-       if (stab != db_symtabs)
-               db_printf("%s::", stab->name);
-       db_printf(name);
-       if (function) {
-           db_putchar('(');
-           db_putchar(')');
-       }
-       if (fname) {
-           db_printf(" [static from %s", fname);
-           if (line > 0)
-               db_printf(":%d", line);
-           db_putchar(']');
-       }
-       db_putchar('\n');
-}
-
-/*
- * Common utility routine to parse a symbol string into a file
- * name, a (possibly incomplete) symbol name without line number.
- * This routine is called from aout_db_print_completion if the object
- * dependent handler supports qualified search with a file name.
- * It parses the symbol string, and call an object dependent routine
- * with parsed file name and symbol name.
- */ 
-int
-db_sym_parse_and_print_completion(
-       int             (*func)(db_symtab_t *,
-                                char *),
-       db_symtab_t     *symtab,
-       char            *symstr)
-{
-       register        char *p;
-       register int    n;
-       char            *sym_name;
-       char            *component[2];
-       int             nsym;
-
-       /*
-        * disassemble the symbol into components: [file_name:]symbol
-        */
-       component[0] = symstr;
-       component[1] = 0;
-       for (p = symstr, n = 1; *p; p++) {
-               if (*p == ':') {
-                       if (n == 2)
-                               break;
-                       *p = 0;
-                       component[n++] = p+1;
-               }
-       }
-       if (*p == 0) {
-               if (n == 1) {
-                       sym_name = component[0];
-               } else {
-                       sym_name = component[1];
-               }
-               nsym = func(symtab, sym_name);
-       } else
-               nsym = 0;
-       if (n == 2)
-               component[1][-1] = ':';
-       return nsym;
-}
-
-/*
- * Common utility routine to parse a symbol string into a file
- * name, a (possibly incomplete) symbol name without line number.
- * This routine is called from X_db_lookup_incomplete if the object
- * dependent handler supports qualified search with a file name.
- * It parses the symbol string, and call an object dependent routine
- * with parsed file name and symbol name.
- */ 
-int
-db_sym_parse_and_lookup_incomplete(
-       int             (*func)(db_symtab_t *,
-                               char *,
-                               char *,
-                               int,
-                               db_sym_t*,
-                               char **,
-                               int *),
-       db_symtab_t     *symtab,
-       char            *symstr,
-       char            **name,
-       int             *len,
-       int             *toadd)
-{
-       register        char *p;
-       register int    n;
-       char            *file_name = 0;
-       char            *sym_name = 0;
-       char            *component[2];
-       int             nsym = 0;
-
-       /*
-        * disassemble the symbol into components: [file_name:]symbol
-        */
-       component[0] = symstr;
-       component[1] = 0;
-       for (p = symstr, n = 1; *p; p++) {
-               if (*p == ':') {
-                       if (n == 2)
-                               break;
-                       *p = 0;
-                       component[n++] = p+1;
-               }
-       }
-       if (*p == 0) {
-               if (n == 1) {
-                       file_name = 0;
-                       sym_name = component[0];
-               } else {
-                       file_name = component[0];
-                       sym_name = component[1];
-               }
-               nsym = func(symtab, file_name, sym_name, 0, (db_sym_t *)0,
-                           name, len);
-               if (nsym > 0)
-                       *toadd = *len - strlen(sym_name);
-       }
-       if (n == 2)
-               component[1][-1] = ':';
-       return(nsym);
-}
-
-/*
- * Common utility routine to parse a symbol string into a file
- * name, a symbol name and line number.
- * This routine is called from aout_db_lookup if the object dependent
- * handler supports qualified search with a file name or a line number.
- * It parses the symbol string, and call an object dependent routine
- * with parsed file name, symbol name and line number.
- */ 
-db_sym_t
-db_sym_parse_and_lookup(
-       int             (*func)(db_symtab_t *, char *, char *, int,
-                               db_sym_t*, char **, int *),
-       db_symtab_t     *symtab,
-       char            *symstr)
-{
-       register        char *p;
-       register int    n;
-       int             n_name;
-       int             line_number;
-       char            *file_name = 0;
-       char            *sym_name = 0;
-       char            *component[3];
-       db_sym_t        found = DB_SYM_NULL;
-
-       /*
-        * disassemble the symbol into components:
-        *      [file_name:]symbol[:line_nubmer]
-        */
-       component[0] = symstr;
-       component[1] = component[2] = 0;
-       for (p = symstr, n = 1; *p; p++) {
-               if (*p == ':') {
-                       if (n >= 3)
-                               break;
-                       *p = 0;
-                       component[n++] = p+1;
-               }
-       }
-       if (*p != 0)
-               goto out;
-       line_number = 0;
-       n_name = n;
-       p = component[n-1];
-       if (*p >= '0' && *p <= '9') {
-               if (n == 1)
-                       goto out;
-               for (line_number = 0; *p; p++) {
-                       if (*p < '0' || *p > '9')
-                               goto out;
-                       line_number = line_number*10 + *p - '0';
-               }
-               n_name--;
-       } else if (n >= 3)
-               goto out;
-       if (n_name == 1) {
-               for (p = component[0]; *p && *p != '.'; p++);
-               if (*p == '.') {
-                       file_name = component[0];
-                       sym_name = 0;
-               } else {
-                       file_name = 0;
-                       sym_name = component[0];
-               }
-       } else {
-               file_name = component[0];
-               sym_name = component[1];
-       }
-       (void) func(symtab, file_name, sym_name, line_number, &found,
-                  (char **)0, (int *)0);
-       
-out:
-       while (--n >= 1)
-               component[n][-1] = ':';
-       return(found);
-}
-
-/*
- * Does this symbol name appear in more than one symbol table?
- * Used by db_symbol_values to decide whether to qualify a symbol.
- */
-boolean_t db_qualify_ambiguous_names = TRUE;
-
-boolean_t
-db_symbol_is_ambiguous(char *name)
-{
-       register int    i;
-       register
-       boolean_t       found_once = FALSE;
-
-       if (!db_qualify_ambiguous_names)
-               return FALSE;
-
-       for (i = 0; i < db_nsymtab; i++) {
-               if (X_db_lookup(&db_symtabs[i], name)) {
-                       if (found_once)
-                               return TRUE;
-                       found_once = TRUE;
-               }
-       }
-       return FALSE;
-}
-
-/*
- * Find the closest symbol to val, and return its name
- * and the difference between val and the symbol found.
- */
-unsigned int db_search_maxoff = 0x4000;
-db_sym_t
-db_search_task_symbol(
-       register db_addr_t      val,
-       db_strategy_t           strategy,
-       db_addr_t               *offp,  /* better be unsigned */
-       task_t                  task)
-{
-       db_addr_t diff, newdiff;
-       register int    i;
-       db_symtab_t     *sp;
-       db_sym_t        ret = DB_SYM_NULL, sym;
-       vm_map_t        map_for_val;
-
-       if (task == TASK_NULL)
-           task = db_current_task();
-       map_for_val = (task == TASK_NULL)? VM_MAP_NULL: task->map;
-again:
-       newdiff = diff = -1;
-       db_last_symtab = 0;
-       for (sp = &db_symtabs[0], i = 0;
-            i < db_nsymtab;
-            sp++, i++) {
-           if ((((vm_map_t)sp->map_pointer == VM_MAP_NULL) ||
-                       ((vm_map_t)sp->map_pointer == map_for_val)) &&
-                       ((sp->maxsym == 0) ||
-                       ((val >= (db_addr_t)sp->minsym) &&
-                       (val <= (db_addr_t)sp->maxsym)))) {
-               sym = X_db_search_symbol(sp, val, strategy,
-                                               (db_expr_t *)&newdiff);
-               if (newdiff < diff) {
-                   db_last_symtab = sp;
-                   diff = newdiff;
-                   ret = sym;
-                   if (diff <= db_search_maxoff)
-                       break;
-               }
-           }
-       }
-       if (ret == DB_SYM_NULL && map_for_val != VM_MAP_NULL) {
-               map_for_val = VM_MAP_NULL;
-               goto again;
-       }
-       *offp = diff;
-       return ret;
-}
-
-/*
- * Find the closest symbol to val, and return its name
- * and the difference between val and the symbol found.
- * Also return the filename and linenumber if available.
- */
-db_sym_t
-db_search_task_symbol_and_line(
-       register db_addr_t      val,
-       __unused db_strategy_t  strategy,
-       db_expr_t               *offp,
-       char                    **filenamep,
-       int                     *linenump,
-       task_t                  task,
-       int                     *argsp)
-{
-       db_addr_t diff, newdiff;
-       register int    i;
-       db_symtab_t     *sp;
-       db_sym_t        ret = DB_SYM_NULL, sym;
-       vm_map_t        map_for_val;
-       char            *func;
-       char            *filename;
-       int             linenum;
-       int             args;
-
-       if (task == TASK_NULL)
-           task = db_current_task();
-       map_for_val = (task == TASK_NULL)? VM_MAP_NULL: task->map;
-       *filenamep = (char *) 0;
-       *linenump = 0;
-       *argsp = -1;
-    again:
-       filename = (char *) 0;
-       linenum = 0;
-       newdiff = diff = ~0UL;
-       db_last_symtab = 0;
-       for (sp = &db_symtabs[0], i = 0;
-            i < db_nsymtab;
-            sp++, i++) {
-           if ((((vm_map_t)sp->map_pointer == VM_MAP_NULL) ||
-                       ((vm_map_t)sp->map_pointer == map_for_val)) &&
-                       ((sp->maxsym == 0) ||
-                       ((val >= (db_addr_t)sp->minsym) &&
-                       (val <= (db_addr_t)sp->maxsym)))) {
-               
-                       sym = X_db_search_by_addr(sp, val, &filename, &func,
-                                                 &linenum, (db_expr_t *)&newdiff,
-                                                 &args);
-                       if (sym && newdiff < diff) {
-                               db_last_symtab = sp;
-                               diff = newdiff;
-                               ret = sym;
-                               *filenamep = filename;
-                               *linenump = linenum;
-                               *argsp = args;
-                               if (diff <= db_search_maxoff)
-                               break;
-                       }
-           }
-       }
-       if (ret == DB_SYM_NULL && map_for_val != VM_MAP_NULL) {
-               map_for_val = VM_MAP_NULL;
-               goto again;
-       }
-       *offp = diff;
-       if (*filenamep)
-               db_shorten_filename(filenamep);
-       return ret;
-}
-
-/*
- * Return name and value of a symbol
- */
-void
-db_symbol_values(
-       db_symtab_t     *stab,
-       db_sym_t        sym,
-       const char              **namep,
-       db_expr_t       *valuep)
-{
-       db_expr_t       value;
-       char            *name;
-
-       if (sym == DB_SYM_NULL) {
-               *namep = 0;
-               return;
-       }
-       if (stab == 0)
-               stab = db_last_symtab;
-
-       X_db_symbol_values(stab, sym, &name, &value);
-
-       if (db_symbol_is_ambiguous(name)) {
-               *namep = db_qualify(name, db_last_symtab->name);
-       }else {
-               *namep = name;
-       }
-       if (valuep)
-               *valuep = value;
-}
-
-
-/*
- * Print a the closest symbol to value
- *
- * After matching the symbol according to the given strategy
- * we print it in the name+offset format, provided the symbol's
- * value is close enough (eg smaller than db_maxoff).
- * We also attempt to print [filename:linenum] when applicable
- * (eg for procedure names).
- *
- * If we could not find a reasonable name+offset representation,
- * then we just print the value in hex.  Small values might get
- * bogus symbol associations, e.g. 3 might get some absolute
- * value like _INCLUDE_VERSION or something, therefore we do
- * not accept symbols whose value is zero (and use plain hex).
- */
-
-void
-db_task_printsym(
-       db_addr_t       off,
-       db_strategy_t   strategy,
-       task_t          task)
-{
-       db_expr_t       d;
-       char            *filename;
-       char            *name;
-       db_expr_t       value;
-       int             linenum;
-       db_sym_t        cursym;
-
-       if (off >= db_maxval || off < db_minval) {
-               db_printf("%#lln", (unsigned long long)off);
-               return;
-       }
-       cursym = db_search_task_symbol(off, strategy, &d, task);
-
-       db_symbol_values(0, cursym, &name, &value);
-       if (name == 0 || d >= db_maxoff || value == 0) {
-               db_printf("%#lln",(unsigned long long) off);
-               return;
-       }
-       db_printf("%s", name);
-       if (d)
-               db_printf("+%llx", (unsigned long long)d);
-       if (strategy == DB_STGY_PROC) {
-               if (db_line_at_pc(cursym, &filename, &linenum, off)) {
-                       db_printf(" [%s", filename);
-                       if (linenum > 0)
-                               db_printf(":%d", linenum);
-                       db_printf("]");
-               }
-       }
-}
-
-/*
- * Return symbol name for a given offset and
- * change the offset to be relative to this symbol.
- * Very usefull for xpr, when you want to log offsets
- * in a user friendly way.
- */
-
-char null_sym[] = "";
-
-char *
-db_get_sym(db_expr_t *off)
-{
-       db_sym_t        cursym;
-       db_expr_t       value;
-       char            *name;
-       db_addr_t       d;
-
-       cursym = db_search_symbol(*off, DB_STGY_ANY, &d);
-       db_symbol_values(0, cursym, &name, &value);
-       if (name) 
-               *off = d;
-       else
-               name = null_sym;
-       return(name);
-}
-
-void
-db_printsym(
-       db_expr_t       off,
-       db_strategy_t   strategy)
-{
-       db_task_printsym(off, strategy, TASK_NULL);
-}
-
-int db_short_filename = 1;
-
-void
-db_shorten_filename(char **filenamep)
-{
-       char *cp, *cp_slash;
-
-       if (! *filenamep)
-               return;
-       for (cp = cp_slash = *filenamep; *cp; cp++) {
-               if (*cp == '/')
-                       cp_slash = cp;
-       }
-       if (*cp_slash == '/')
-               *filenamep = cp_slash+1;
-}
-
-int
-db_task_getlinenum(
-       db_expr_t       off,
-       task_t          task)
-{
-       db_addr_t       d;
-       char            *filename;
-       char            *name;
-       db_expr_t       value;
-       int             linenum;
-       db_sym_t        cursym;
-       db_strategy_t   strategy = DB_STGY_PROC;
-
-       if (off >= db_maxval || off < db_minval) {
-               db_printf("%#lln", (unsigned long long)off);
-               return(-1);
-       }
-       cursym = db_search_task_symbol(off, strategy, &d, task);
-
-       db_symbol_values(0, cursym, &name, &value);
-       if (name == 0 || d >= db_maxoff || value == 0) {
-               return(-1);
-       }
-       if (db_line_at_pc(cursym, &filename, &linenum, off))
-               return(linenum);
-       else
-               return(-1);
-}
-
-boolean_t
-db_line_at_pc(
-       db_sym_t        sym,
-       char            **filename,
-       int             *linenum,
-       db_expr_t       pc)
-{
-       boolean_t result;
-
-       if (db_last_symtab == 0)
-               return FALSE;
-       if (X_db_line_at_pc( db_last_symtab, sym, filename, linenum, pc)) {
-               if (db_short_filename)
-                       db_shorten_filename(filename);
-               result = TRUE;
-       } else 
-               result = FALSE;
-       return(result);
-}
-
-int qsort_check = 0;
-
-void
-db_qsort(
-       char    *table,
-       int     nbelts,
-       int     eltsize,
-       int     (*compfun)(char *, char *))
-{
-       if (nbelts <= 0 || eltsize <= 0 || compfun == 0) {
-               printf("qsort: invalid parameters\n");
-               return;
-       }
-       qsort_recur(table, table + nbelts * eltsize, eltsize, compfun);
-
-       if (qsort_check)
-               qsort_checker(table, nbelts, eltsize, compfun);
-}
-
-void
-qsort_swap(
-       register int    *a,
-       register int    *b,
-       register int    size)
-{
-       register int temp;
-       char *aa, *bb;
-       char ctemp;
-
-       for (; size >= (signed)sizeof (int); size -= sizeof (int), a++, b++) {
-               temp = *a;
-               *a = *b;
-               *b = temp;
-       }
-       aa = (char *)a;
-       bb = (char *)b;
-       for (; size > 0; size--, aa++, bb++) {
-               ctemp = *aa;
-               *aa = *bb;
-               *bb = ctemp;
-       }
-}
-
-/* rotate the three elements to the left */
-void
-qsort_rotate(
-       register int    *a,
-       register int    *b,
-       register int    *c,
-       register int    size)
-{
-       register int temp;
-       char *aa, *bb, *cc;
-       char ctemp;
-
-       for (; size >= (signed)sizeof(int);
-                       size -= sizeof(int), a++, b++, c++) {
-               temp = *a;
-               *a = *c;
-               *c = *b;
-               *b = temp;
-       }
-       aa = (char *)a;
-       bb = (char *)b;
-       cc = (char *)c;
-       for (; size > 0; size--, aa++, bb++, cc++) {
-               ctemp = *aa;
-               *aa = *cc;
-               *cc = *bb;
-               *bb = ctemp;
-       }
-}
-
-void
-qsort_recur(
-       char    *left,
-       char    *right,
-       int     eltsize,
-       int     (*compfun)(char *, char *))
-{
-       char *i, *j;
-       char *sameleft, *sameright;
-
-    top:
-       if (left + eltsize - 1 >= right) {
-               return;
-       }
-
-       /* partition element (reference for "same"ness */
-       sameleft = left + (((right - left) / eltsize) / 2) * eltsize;
-       sameright = sameleft;
-
-       i = left;
-       j = right - eltsize;
-
-    again:
-       while (i < sameleft) {
-               int comp;
-
-               comp = (*compfun)(i, sameleft);
-               if (comp == 0) {
-                       /*
-                        * Move to the "same" partition.
-                        */
-                       /*
-                        * Shift the left part of the "same" partition to
-                        * the left, so that "same" elements stay in their
-                        * original order.
-                        */
-                       sameleft -= eltsize;
-                       qsort_swap((int *) i, (int *) sameleft, eltsize);
-               } else if (comp < 0) {
-                       /*
-                        * Stay in the "left" partition.
-                        */
-                       i += eltsize;
-               } else {
-                       /*
-                        * Should be moved to the "right" partition.
-                        * Wait until the next loop finds an appropriate
-                        * place to store this element.
-                        */
-                       break;
-               }
-       }
-
-       while (j > sameright) {
-               int comp;
-
-               comp = (*compfun)(sameright, j);
-               if (comp == 0) {
-                       /*
-                        * Move to the right of the "same" partition.
-                        */
-                       sameright += eltsize;
-                       qsort_swap((int *) sameright, (int *) j, eltsize);
-               } else if (comp > 0) {
-                       /*
-                        * Move to the "left" partition.
-                        */
-                       if (i == sameleft) {
-                               /*
-                                * Unfortunately, the "left" partition
-                                * has already been fully processed, so
-                                * we have to shift the "same" partition
-                                * to the right to free a "left" element.
-                                * This is done by moving the leftest same
-                                * to the right of the "same" partition.
-                                */
-                               sameright += eltsize;
-                               qsort_rotate((int *) sameleft, (int*) sameright,
-                                            (int *) j, eltsize);
-                               sameleft += eltsize;
-                               i = sameleft;
-                       } else {
-                               /*
-                                * Swap with the "left" partition element
-                                * waiting to be moved to the "right"
-                                * partition.
-                                */
-                               qsort_swap((int *) i, (int *) j, eltsize);
-                               j -= eltsize;
-                               /*
-                                * Go back to the 1st loop.
-                                */
-                               i += eltsize;
-                               goto again;
-                       }
-               } else {
-                       /*
-                        * Stay in the "right" partition.
-                        */
-                       j -= eltsize;
-               }
-       }
-                       
-       if (i != sameleft) {
-               /*
-                * The second loop completed (the"right" partition is ok),
-                * but we have to go back to the first loop, and deal with
-                * the element waiting for a place in the "right" partition.
-                * Let's shift the "same" zone to the left.
-                */
-               sameleft -= eltsize;
-               qsort_rotate((int *) sameright, (int *) sameleft, (int *) i,
-                            eltsize);
-               sameright -= eltsize;
-               j = sameright;
-               /*
-                * Go back to 1st loop.
-                */
-               goto again;
-       }
-
-       /*
-        * The partitions are correct now. Recur on the smallest side only.
-        */
-       if (sameleft - left >= right - (sameright + eltsize)) {
-               qsort_recur(sameright + eltsize, right, eltsize, compfun);
-               /*
-                * The "right" partition is now completely sorted.
-                * The "same" partition is OK, so...
-                * Ignore them, and start the loops again on the
-                * "left" partition.
-                */
-               right = sameleft;
-               goto top;
-       } else {
-               qsort_recur(left, sameleft, eltsize, compfun);
-               /*
-                * The "left" partition is now completely sorted.
-                * The "same" partition is OK, so ...
-                * Ignore them, and start the loops again on the
-                * "right" partition.
-                */
-               left = sameright + eltsize;
-               goto top;
-       }
-}
-
-void
-qsort_checker(
-       char    *table,
-       int     nbelts,
-       int     eltsize,
-       int     (*compfun)(char *, char *))
-{
-       char *curr, *prev, *last;
-
-       prev = table;
-       curr = prev + eltsize;
-       last = table + (nbelts * eltsize);
-
-       while (prev < last) {
-               if ((*compfun)(prev, curr) > 0) {
-                       printf("**** qsort_checker: error between 0x%x and 0x%x!!!\n", prev, curr);
-                       break;
-               }
-               prev = curr;
-               curr += eltsize;
-       }
-       printf("qsort_checker: OK\n");
-}
-
-int qsort_search_debug = 0;
-
-void
-db_qsort_limit_search(
-       char    *target,
-       char    **start,
-       char    **db_end,
-       int     eltsize,
-       int     (*compfun)(char *, char *))
-{
-       register char *left, *right;
-       char *oleft, *oright, *part;
-       int nbiter = 0;
-       int comp;
-
-       oleft = left = *start;
-       oright = right = *db_end;
-       part = (char *) 0;
-
-       while (left < right) {
-               nbiter++;
-               part = left + (((right - left) / eltsize) / 2) * eltsize;
-               comp = (*compfun)(target, part);
-               if (comp > 0) {
-                       oleft = left;
-                       oright = right;
-                       left = part;
-                       if (left == oleft)
-                               break;
-                       if (qsort_search_debug > 1)
-                               printf(" [ Moved left from 0x%x to 0x%x]\n",
-                                      oleft, left);
-               } else if (comp < 0) {
-                       oright = right;
-                       oleft = left;
-                       right = part;
-                       if (qsort_search_debug > 1)
-                               printf(" [ Moved right from 0x%x to 0x%x]\n",
-                                      oright, right);
-               } else {
-                       if (qsort_search_debug > 1)
-                               printf(" [ FOUND! left=0x%x right=0x%x]\n",
-                                      left, right);
-                       for (left = part;
-                            left > *start && (*compfun)(left, part) == 0;
-                            left -= eltsize);
-                       for (right = part + eltsize;
-                            right < *db_end && (*compfun)(right, part) == 0;
-                            right += eltsize);
-                       oright = right;
-                       oleft = left;
-                       break;
-               }
-       }
-       
-       if (qsort_search_debug)
-               printf("[ Limited from %x-%x to %x-%x in %d iters ]\n",
-                         *start, *db_end, oleft, oright, nbiter);
-       *start = oleft;
-       *db_end = oright;
-}
-
-void
-bubble_sort(
-       char    *table,
-       int     nbelts,
-       int     eltsize,
-       int     (*compfun)(char *, char *))
-{
-       boolean_t sorted;
-       char *b_end;
-       register char *p;
-
-       b_end = table + ((nbelts-1) * eltsize);
-       do {
-               sorted = TRUE;
-               for (p = table; p < b_end; p += eltsize) {
-                       if ((*compfun)(p, p + eltsize) > 0) {
-                               qsort_swap((int *) p, (int *) (p + eltsize),
-                                          eltsize);
-                               sorted = FALSE;
-                       }
-               }
-       } while (sorted == FALSE);
-
-       if (qsort_check)
-               qsort_checker(table, nbelts, eltsize, compfun);
-}
-
-vm_offset_t    vm_min_inks_addr = VM_MAX_KERNEL_ADDRESS;
-
-void
-db_install_inks(
-      vm_offset_t base)
-{
-       /* save addr to demarcate kernel/inks boundary (1st time only)  */
-       if (vm_min_inks_addr == VM_MAX_KERNEL_ADDRESS) {
-               vm_min_inks_addr = base;
-               db_qualify_ambiguous_names = TRUE;
-       }
-}
-
-extern void db_clone_offsetXXX(char *, long);
-
-void
-db_clone_symtabXXX(
-       char *clonee,                   /* which symtab to clone        */
-       char *cloner,                   /* in-kernel-server name        */
-       vm_offset_t base)               /* base address of cloner       */
-{
-       db_symtab_t     *st, *st_src;
-       char *          memp;
-       vm_size_t       size;
-       long            offset;
-
-       if (db_nsymtab >= MAXNOSYMTABS) {
-           db_printf("db_clone_symtab: Too Many Symbol Tables\n");
-           return;
-       }
-
-       db_install_inks(base);
-
-       st = &db_symtabs[db_nsymtab];   /* destination symtab           */
-       if ((st_src = db_symtab_cloneeXXX(clonee)) == 0) {
-           db_printf("db_clone_symtab: clonee (%s) not found\n", clonee);
-           return;
-       }
-                                       /* alloc new symbols            */
-       size = (vm_size_t)(st_src->end - st_src->private);
-       memp = (char *)kalloc( round_page(size) );
-       if (!memp) {
-           db_printf("db_clone_symtab: no memory for symtab\n");
-           return;
-       }
-
-       *st = *st_src;                  /* bulk copy src -> dest        */
-       strlcpy(st->name, cloner, sizeof (st->name));   /* new name     */
-       st->private = memp;             /* copy symbols                 */
-       bcopy((const char *)st_src->private, st->private, size);
-       st->start = memp + sizeof(int); /* fixup pointers to symtab     */
-       st->end   = memp + *(int *)memp;
-       st->map_pointer = 0;            /* no map because kernel-loaded */
-
-       /* Offset symbols, leaving strings pointing into st_src         */
-       offset      = base - st_src->minsym;
-       st->minsym  += offset;
-       st->maxsym  += offset;
-       db_clone_offsetXXX(memp, offset);
-       db_nsymtab++;
-
-       db_printf( "[ cloned symbol table for %s: range 0x%x to 0x%x %s]\n",
-                 st->name, st->minsym, st->maxsym,
-                 st->sorted ? "(sorted) " : "");
-       db_maxval = (unsigned int)st->maxsym + db_maxoff;
-}
-
-db_symtab_t *
-db_symtab_cloneeXXX(
-      char *clonee)
-{
-       db_symtab_t *st, *st_src;
-
-       st = &db_symtabs[db_nsymtab];   /* destination symtab */
-       for (st_src = &db_symtabs[0]; st_src < st; ++st_src)
-               if (!strcmp(clonee, st_src->name))
-                       break;
-       return ((st_src < st) ? st_src : 0);
-}
-
-/*
- * Switch into symbol-table specific routines
- */
-
-#if    !defined(__alpha) && !defined(INTEL860)
-#define DB_NO_COFF
-#endif
-
-#ifndef        DB_NO_AOUT
-#include <ddb/db_aout.h>
-#endif
-
-#ifndef        DB_NO_COFF
-#include <ddb/db_coff.h>
-#endif
-
-static void no_init(void)
-
-{
-       db_printf("Non-existent code for ddb init\n");
-}
-
-static boolean_t
-no_sym_init(__unused char *nstart, __unused char *nend, const char *name,
-           __unused char *task_addr)
-{
-       db_printf("Non-existent code for init of symtab %s\n", name);
-       return FALSE;
-}
-
-static db_sym_t
-no_lookup(__unused db_symtab_t *stab, char *symstr)
-{
-       db_printf("Bogus lookup of symbol %s\n", symstr);
-       return DB_SYM_NULL;
-}
-
-static db_sym_t
-no_search(__unused db_symtab_t *stab, db_addr_t off,
-         __unused db_strategy_t strategy, __unused db_expr_t *diffp)
-{
-       db_printf("Bogus search for offset %#llXn", (unsigned long long)off);
-       return DB_SYM_NULL;
-}
-
-static boolean_t
-no_line_at_pc(__unused db_symtab_t *stab, __unused db_sym_t sym,
-             __unused char **file, __unused int *line, db_expr_t pc)
-{
-       db_printf("Bogus search for pc %#llX\n", (unsigned long long)pc);
-       return FALSE;
-}
-
-static void
-no_symbol_values(__unused db_sym_t sym, char **namep, db_expr_t *valuep)
-{
-       db_printf("Bogus symbol value resolution\n");
-       if (namep) *namep = NULL;
-       if (valuep) *valuep = 0;
-}
-
-static db_sym_t
-no_search_by_addr(__unused db_symtab_t *stab, db_addr_t off,
-                 __unused char **file, __unused char **func,
-                 __unused int *line, __unused db_expr_t *diffp,
-                 __unused int *args)
-{
-       db_printf("Bogus search for address %#llX\n", (unsigned long long)off);
-       return DB_SYM_NULL;
-}
-       
-int
-no_print_completion(__unused db_symtab_t *stab, __unused char *symstr)
-{
-       db_printf("Bogus print completion: not supported\n");
-       return 0;
-}
-
-int
-no_lookup_incomplete(__unused db_symtab_t *stab,
-                    __unused char *symstr, __unused char **name,
-                    __unused int *len, __unused int *toadd)
-{
-       db_printf("Bogus lookup incomplete: not supported\n");
-       return 0;
-}
-
-#define NONE   \
-       {       \
-               .init = no_init, \
-               .sym_init = no_sym_init, \
-               .lookup = no_lookup, \
-               .search_symbol = no_search, \
-               .line_at_pc = no_line_at_pc, \
-               .symbol_values = no_symbol_values, \
-               .search_by_addr = no_search_by_addr, \
-               .print_completion = no_print_completion, \
-               .lookup_incomplete = no_lookup_incomplete, \
-       }
-
-struct db_sym_switch x_db[] = {
-
-       /* BSD a.out format (really, sdb/dbx(1) symtabs) */
-#ifdef DB_NO_AOUT
-       NONE,
-#else  /* DB_NO_AOUT */
-       {
-               .init = aout_db_init,
-               .sym_init = aout_db_sym_init,
-               .lookup = aout_db_lookup,
-               .search_symbol = aout_db_search_symbol,
-               .line_at_pc = aout_db_line_at_pc,
-               .symbol_values = aout_db_symbol_values,
-               .search_by_addr = aout_db_search_by_addr,
-               .print_completion = aout_db_print_completion,
-               .lookup_incomplete = aout_db_lookup_incomplete,
-       },
-#endif /* DB_NO_AOUT */
-
-#ifdef DB_NO_COFF
-       NONE,
-#else  /* DB_NO_COFF */
-       {
-               .init = coff_db_init,
-               .sym_init = coff_db_sym_init,
-               .lookup = coff_db_lookup,
-               .search_symbol = coff_db_search_symbol,
-               .line_at_pc = coff_db_line_at_pc,
-               .symbol_values = coff_db_symbol_values,
-               .search_by_addr = coff_db_search_by_addr,
-               .print_completion = coff_db_print_completion,
-               .lookup_incomplete = coff_db_lookup_incomplete,
-       },
-#endif /* DB_NO_COFF */
-
-       /* Machdep, not inited here */
-       NONE
-};
diff --git a/osfmk/ddb/db_sym.h b/osfmk/ddb/db_sym.h
deleted file mode 100644 (file)
index 3749e75..0000000
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: Alessandro Forin, Carnegie Mellon University
- *     Date:   8/90
- */
-
-#ifndef        _DDB_DB_SYM_H_
-#define        _DDB_DB_SYM_H_
-
-#include <mach/boolean.h>
-#include <mach/machine/vm_types.h>
-#include <machine/db_machdep.h>
-#include <kern/task.h>
-
-/*
- * This module can handle multiple symbol tables,
- * of multiple types, at the same time
- */
-#define        SYMTAB_NAME_LEN 32
-
-typedef struct {
-       int             type;
-#define        SYMTAB_AOUT     0
-#define        SYMTAB_COFF     1
-#define        SYMTAB_MACHDEP  2
-       char            *start;         /* symtab location */
-       char            *end;
-       char            *private;       /* optional machdep pointer */
-       char            *map_pointer;   /* symbols are for this map only,
-                                          if not null */
-       char            name[SYMTAB_NAME_LEN];
-                                       /* symtab name */
-       unsigned long   minsym;         /* lowest symbol value */
-       unsigned long   maxsym;         /* highest symbol value */
-       boolean_t       sorted;         /* is this table sorted ? */
-} db_symtab_t;
-
-extern db_symtab_t     *db_last_symtab; /* where last symbol was found */
-
-/*
- * Symbol representation is specific to the symtab style:
- * BSD compilers use dbx' nlist, other compilers might use
- * a different one
- */
-typedef        void *          db_sym_t;       /* opaque handle on symbols */
-#define        DB_SYM_NULL     ((db_sym_t)0)
-
-/*
- * Non-stripped symbol tables will have duplicates, for instance
- * the same string could match a parameter name, a local var, a
- * global var, etc.
- * We are most concern with the following matches.
- */
-typedef int            db_strategy_t;  /* search strategy */
-
-#define        DB_STGY_ANY     0                       /* anything goes */
-#define DB_STGY_XTRN   1                       /* only external symbols */
-#define DB_STGY_PROC   2                       /* only procedures */
-
-extern boolean_t       db_qualify_ambiguous_names;
-                                       /* if TRUE, check across symbol tables
-                                        * for multiple occurrences of a name.
-                                        * Might slow down quite a bit */
-
-extern unsigned long   db_maxoff;
-
-/* Prototypes for functions exported by this module.
- */
-extern boolean_t db_add_symbol_table(
-       int             type,
-       char            *start,
-       char            *end,
-       const char      *name,
-       char            *ref,
-       char            *map_pointer,
-       unsigned long   minsym,
-       unsigned long   maxsym,
-       boolean_t       sorted);
-
-extern void db_install_inks(
-       vm_offset_t     base);
-
-extern boolean_t db_value_of_name(
-       const char      *name,
-       db_expr_t       *valuep);
-
-extern db_sym_t db_lookup(const char *symstr);
-
-extern char * db_get_sym(
-       db_expr_t       * off);
-
-extern db_sym_t db_sym_parse_and_lookup(
-       int     (*func)(db_symtab_t *,
-                       char *,
-                       char *,
-                       int,
-                       db_sym_t*,
-                       char **,
-                       int *),
-       db_symtab_t     *symtab,
-       char            *symstr);
-
-extern int db_sym_parse_and_lookup_incomplete(
-       int     (*func)(db_symtab_t *,
-                       char *,
-                       char *,
-                       int,
-                       db_sym_t*,
-                       char **,
-                       int *),
-       db_symtab_t     *symtab,
-       char            *symstr,
-       char            **name,
-       int             *len,
-       int             *toadd);
-
-extern int db_sym_parse_and_print_completion(
-       int     (*func)(db_symtab_t *,
-                       char *),
-       db_symtab_t     *symtab,
-       char            *symstr);
-
-extern db_sym_t db_search_task_symbol(
-       db_addr_t               val,
-       db_strategy_t           strategy,
-       db_addr_t               *offp,
-       task_t                  task);
-
-extern db_sym_t db_search_task_symbol_and_line(
-       db_addr_t               val,
-       db_strategy_t           strategy,
-       db_expr_t               *offp,
-       char                    **filenamep,
-       int                     *linenump,
-       task_t                  task,
-       int                     *argsp);
-
-extern void db_symbol_values(
-       db_symtab_t     *stab,
-       db_sym_t        sym,
-       const char              **namep,
-       db_expr_t       *valuep);
-
-extern void db_task_printsym(
-       db_expr_t       off,
-       db_strategy_t   strategy,
-       task_t          task);
-
-extern void db_printsym(
-       db_expr_t       off,
-       db_strategy_t   strategy);
-
-extern boolean_t db_line_at_pc(
-       db_sym_t        sym,
-       char            **filename,
-       int             *linenum,
-       db_expr_t       pc);
-
-extern void db_qsort(
-       char    *table,
-       int     nbelts,
-       int     eltsize,
-       int     (*compfun)(char *, char *));
-
-extern void db_qsort_limit_search(
-       char    *target,
-       char    **start,
-       char    **end,
-       int     eltsize,
-       int     (*compfun)(char *, char *));
-
-extern void db_sym_print_completion(
-       db_symtab_t *stab,
-       char *name,
-       int function,
-       char *fname,
-       int line);
-
-extern void db_print_completion(
-       char *symstr);
-
-extern int db_lookup_incomplete(
-       char *symstr,
-       int symlen);
-
-extern void ddb_init(void);
-
-extern void db_machdep_init(void);
-
-extern void db_clone_symtabXXX(char *, char *, vm_offset_t);
-
-extern db_symtab_t *db_symtab_cloneeXXX(char *);
-
-extern int db_task_getlinenum( db_expr_t, task_t);
-
-/* Some convenience macros.
- */
-#define db_find_sym_and_offset(val,namep,offp) \
-       db_symbol_values(0, db_search_symbol(val,DB_STGY_ANY,offp),namep,0)
-                                       /* find name&value given approx val */
-
-#define db_find_xtrn_sym_and_offset(val,namep,offp)    \
-       db_symbol_values(0, db_search_symbol(val,DB_STGY_XTRN,offp),namep,0)
-                                       /* ditto, but no locals */
-
-#define db_find_task_sym_and_offset(val,namep,offp,task)       \
-       db_symbol_values(0, db_search_task_symbol(val,DB_STGY_ANY,offp,task),  \
-                        namep, 0)      /* find name&value given approx val */
-
-#define db_find_xtrn_task_sym_and_offset(val,namep,offp,task)  \
-       db_symbol_values(0, db_search_task_symbol(val,DB_STGY_XTRN,offp,task), \
-                        namep,0)       /* ditto, but no locals */
-
-#define db_search_symbol(val,strgy,offp)       \
-       db_search_task_symbol(val,strgy,offp,0)
-                                       /* find symbol in current task */
-
-/*
- * Symbol table switch, defines the interface
- * to symbol-table specific routines.
- */
-
-extern struct db_sym_switch {
-
-       void            (*init)(void);
-
-       boolean_t       (*sym_init)(
-                               char *start,
-                               char *end,
-                               const char *name,
-                               char *task_addr
-                               );
-
-       db_sym_t        (*lookup)(
-                               db_symtab_t *stab,
-                               char *symstr
-                               );
-       db_sym_t        (*search_symbol)(
-                               db_symtab_t *stab,
-                               db_addr_t off,
-                               db_strategy_t strategy,
-                               db_expr_t *diffp
-                               );
-
-       boolean_t       (*line_at_pc)(
-                               db_symtab_t     *stab,
-                               db_sym_t        sym,
-                               char            **file,
-                               int             *line,
-                               db_expr_t       pc
-                               );
-
-       void            (*symbol_values)(
-                               db_sym_t        sym,
-                               char            **namep,
-                               db_expr_t       *valuep
-                               );
-       db_sym_t        (*search_by_addr)(
-                               db_symtab_t     *stab,
-                               db_addr_t       off,
-                               char            **file,
-                               char            **func,
-                               int             *line,
-                               db_expr_t       *diffp,
-                               int             *args
-                               );
-
-       int             (*print_completion)(
-                               db_symtab_t     *stab,
-                               char            *symstr
-                               );
-
-       int             (*lookup_incomplete)(
-                               db_symtab_t     *stab,
-                               char            *symstr,
-                               char            **name,
-                               int             *len,
-                               int             *toadd
-                               );
-} x_db[];
-
-#ifndef        symtab_type
-#define        symtab_type(s)          SYMTAB_AOUT
-#endif
-
-#define        X_db_init()                     x_db[symtab_type(s)].init()
-#define        X_db_sym_init(s,e,n,t)          x_db[symtab_type(s)].sym_init(s,e,n,t)
-#define        X_db_lookup(s,n)                x_db[(s)->type].lookup(s,n)
-#define        X_db_search_symbol(s,o,t,d)     x_db[(s)->type].search_symbol(s,o,t,d)
-#define        X_db_line_at_pc(s,p,f,l,a)      x_db[(s)->type].line_at_pc(s,p,f,l,a)
-#define        X_db_symbol_values(s,p,n,v)     x_db[(s)->type].symbol_values(p,n,v)
-#define X_db_search_by_addr(s,a,f,c,l,d,r) \
-                       x_db[(s)->type].search_by_addr(s,a,f,c,l,d,r)
-#define        X_db_print_completion(s,p)      x_db[(s)->type].print_completion(s,p)
-#define        X_db_lookup_incomplete(s,p,n,l,t)       \
-                       x_db[(s)->type].lookup_incomplete(s,p,n,l,t)
-
-#endif /* !_DDB_DB_SYM_H_ */
diff --git a/osfmk/ddb/db_task_thread.c b/osfmk/ddb/db_task_thread.c
deleted file mode 100644 (file)
index 7e7420c..0000000
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#include <kern/kern_types.h>
-#include <kern/processor.h>
-#include <machine/db_machdep.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_sym.h>
-
-/*
- * Following constants are used to prevent infinite loop of task
- * or thread search due to the incorrect list.
- */
-#define        DB_MAX_TASKID   0x10000         /* max # of tasks */
-#define DB_MAX_THREADID        0x10000         /* max # of threads in a task */
-#define DB_MAX_PSETS   0x10000         /* max # of processor sets */
-
-task_t         db_default_task = TASK_NULL;    /* default target task */
-thread_t       db_default_act = THREAD_NULL;           /* default target thr_act */
-
-
-
-/* Prototypes for functions local to this file.
- */
-task_t db_lookup_task_id(register int task_id);
-
-static thread_t db_lookup_act_id(
-       task_t   task,
-       register int thread_id);
-
-
-
-/*
- * search valid task queue, and return the queue position as the task id
- */
-int
-db_lookup_task(task_t target_task)
-{
-       register task_t task;
-       register int task_id;
-
-       task_id = 0;
-       if (queue_first(&tasks) == 0)
-               return(-1);
-       queue_iterate(&tasks, task, task_t, tasks) {
-               if (target_task == task)
-                   return(task_id);
-               if (task_id++ >= DB_MAX_TASKID)
-                   return(-1);
-       }
-       return(-1);
-}
-
-/*
- * search thread queue of the task, and return the queue position
- */
-int
-db_lookup_task_act(
-       task_t          task,
-       thread_t        target_act)
-{
-       register thread_t thr_act;
-       register int act_id;
-
-       act_id = 0;
-       if (queue_first(&task->threads) == 0)
-           return(-1);
-       queue_iterate(&task->threads, thr_act, thread_t, task_threads) {
-           if (target_act == thr_act)
-               return(act_id);
-           if (act_id++ >= DB_MAX_THREADID)
-               return(-1);
-       }
-       return(-1);
-}
-
-/*
- * search thr_act queue of every valid task, and return the queue position
- * as the thread id.
- */
-int
-db_lookup_act(thread_t target_act)
-{
-       register int act_id;
-       register task_t task;
-       register int ntask = 0;
-
-       if (queue_first(&tasks) == 0)
-               return(-1);
-       queue_iterate(&tasks, task, task_t, tasks) {
-               if (ntask++ > DB_MAX_TASKID)
-                   return(-1);
-               if (task->thread_count == 0)
-                   continue;
-               act_id = db_lookup_task_act(task, target_act);
-               if (act_id >= 0)
-                   return(act_id);
-       }
-       return(-1);
-}
-
-/*
- * check the address is a valid thread address
- */
-int force_act_lookup = 0;
-boolean_t
-db_check_act_address_valid(thread_t thr_act)
-{
-       if (!force_act_lookup && db_lookup_act(thr_act) < 0) {
-           db_printf("Bad thr_act address 0x%x\n", thr_act);
-           db_flush_lex();
-           return(FALSE);
-       } else
-           return(TRUE);
-}
-
-/*
- * convert task_id(queue postion) to task address
- */
-task_t
-db_lookup_task_id(int task_id)
-{
-       register task_t task;
-
-       if (task_id > DB_MAX_TASKID)
-           return(TASK_NULL);
-       if (queue_first(&tasks) == 0)
-               return(TASK_NULL);
-       queue_iterate(&tasks, task, task_t, tasks) {
-               if (task_id-- <= 0)
-                       return(task);
-       }
-       return(TASK_NULL);
-}
-
-/*
- * convert (task_id, act_id) pair to thr_act address
- */
-static thread_t
-db_lookup_act_id(
-       task_t   task,
-       register int act_id)
-{
-       register thread_t thr_act;
-
-       
-       if (act_id > DB_MAX_THREADID)
-           return(THREAD_NULL);
-       if (queue_first(&task->threads) == 0)
-           return(THREAD_NULL);
-       queue_iterate(&task->threads, thr_act, thread_t, task_threads) {
-           if (act_id-- <= 0)
-               return(thr_act);
-       }
-       return(THREAD_NULL);
-}
-
-/*
- * get next parameter from a command line, and check it as a valid
- * thread address
- */
-boolean_t
-db_get_next_act(
-       thread_t        *actp,
-       int             position)
-{
-       db_expr_t       value;
-       thread_t        thr_act;
-
-       *actp = THREAD_NULL;
-       if (db_expression(&value)) {
-           thr_act = (thread_t)(unsigned long)value;
-           if (!db_check_act_address_valid(thr_act)) {
-               db_flush_lex();
-               return(FALSE);
-           }
-       } else if (position <= 0) {
-           thr_act = db_default_act;
-       } else
-           return(FALSE);
-       *actp = thr_act;
-       return(TRUE);
-}
-
-/*
- * check the default thread is still valid
- *     ( it is called in entering DDB session )
- */
-void
-db_init_default_act(void)
-{
-       if (db_lookup_act(db_default_act) < 0) {
-           db_default_act = THREAD_NULL;
-           db_default_task = TASK_NULL;
-       } else
-           db_default_task = db_default_act->task;
-}
-
-/*
- * set or get default thread which is used when /t or :t option is specified
- * in the command line
- */
-int
-db_set_default_act(__unused struct db_variable *vp, db_expr_t *valuep,
-                  int flag, __unused db_var_aux_param_t ap)
-{
-       thread_t        thr_act;
-       int             task_id;
-       int             act_id;
-
-       if (flag == DB_VAR_SHOW) {
-           db_printf("%#n", db_default_act);
-           task_id = db_lookup_task(db_default_task);
-           if (task_id != -1) {
-               act_id = db_lookup_act(db_default_act);
-               if (act_id != -1) {
-                   db_printf(" (task%d.%d)", task_id, act_id);
-               }
-           }
-           return(0);
-       }
-
-       if (flag != DB_VAR_SET) {
-           *valuep = (db_expr_t)(unsigned long)db_default_act;
-           return(0);
-       }
-       thr_act = (thread_t)(unsigned long)*valuep;
-       if (thr_act != THREAD_NULL && !db_check_act_address_valid(thr_act))
-           db_error(0);
-           /* NOTREACHED */
-       db_default_act = thr_act;
-       if (thr_act)
-               db_default_task = thr_act->task;
-       return(0);
-}
-
-/*
- * convert $taskXXX[.YYY] type DDB variable to task or thread address
- */
-int
-db_get_task_act(__unused struct db_variable *vp, db_expr_t *valuep, int flag,
-               db_var_aux_param_t ap)
-{
-       task_t                  task;
-       thread_t                thr_act;
-       int                     task_id;
-
-       if (flag == DB_VAR_SHOW) {
-           db_printf("%#n", db_default_task);
-           task_id = db_lookup_task(db_default_task);
-           if (task_id != -1)
-               db_printf(" (task%d)", task_id);
-           return(0);
-       }
-
-       if (flag != DB_VAR_GET) {
-           db_error("Cannot set to $task variable\n");
-           /* NOTREACHED */
-       }
-       if ((task = db_lookup_task_id(ap->suffix[0])) == TASK_NULL) {
-           db_printf("no such task($task%d)\n", ap->suffix[0]);
-           db_error(0);
-           /* NOTREACHED */
-       }
-       if (ap->level <= 1) {
-           *valuep = (db_expr_t)(unsigned long)task;
-           return(0);
-       }
-       if ((thr_act = db_lookup_act_id(task, ap->suffix[1])) == THREAD_NULL){
-           db_printf("no such thr_act($task%d.%d)\n", 
-                                       ap->suffix[0], ap->suffix[1]);
-           db_error(0);
-           /* NOTREACHED */
-       }
-       *valuep = (db_expr_t)(unsigned long)thr_act;
-       return(0);
-}
diff --git a/osfmk/ddb/db_task_thread.h b/osfmk/ddb/db_task_thread.h
deleted file mode 100644 (file)
index 12f9ac6..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#ifndef _DDB_DB_TASK_THREAD_H_
-#define _DDB_DB_TASK_THREAD_H_
-
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <kern/processor.h>
-#include <ddb/db_variables.h>          /* For db_var_aux_param_t */
-
-/*
- * On behalf of kernel-loaded tasks, distinguish between current task
- * (=> symbol table) and current address space (=> where [e.g.]
- * breakpoints are set).  From ddb's perspective, kernel-loaded tasks
- * can retain their own symbol tables, but share the kernel's address
- * space.
- */
-#define db_current_task()                                              \
-               ((current_thread())? current_thread()->task: TASK_NULL)
-#define db_current_space()                                             \
-               ((current_thread())?\
-                       current_thread()->task: TASK_NULL)
-#define db_target_space(thr_act, user_space)                           \
-               ((!(user_space) || ((thr_act)))?\
-                       TASK_NULL:                                      \
-                       (thr_act)?                                      \
-                               (thr_act)->task: db_current_space())
-#define db_is_current_space(task)                                      \
-               ((task) == TASK_NULL || (task) == db_current_space())
-
-extern task_t          db_default_task;        /* default target task */
-extern thread_t        db_default_act;         /* default target thr_act */
-
-
-/* Prototypes for functions exported by this module.
- */
-
-int db_lookup_act(thread_t target_act);
-
-int db_lookup_task(task_t target_task);
-
-int db_lookup_task_act(
-       task_t          task,
-       thread_t                target_act);
-
-boolean_t db_check_act_address_valid(thread_t thr_act);
-
-boolean_t db_get_next_act(
-       thread_t                *actp,
-       int             position);
-
-void db_init_default_act(void);
-
-int db_set_default_act(
-       struct db_variable      *vp,
-       db_expr_t               *valuep,
-       int                     flag,
-       db_var_aux_param_t      ap);
-
-int db_get_task_act(
-       struct db_variable      *vp,
-       db_expr_t               *valuep,
-       int                     flag,
-       db_var_aux_param_t      ap);
-
-#endif  /* !_DDB_DB_TASK_THREAD_H_ */
diff --git a/osfmk/ddb/db_trap.c b/osfmk/ddb/db_trap.c
deleted file mode 100644 (file)
index 759649b..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-/*
- * Trap entry point to kernel debugger.
- */
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-#include <kern/misc_protos.h>
-#include <ddb/db_access.h>
-#include <ddb/db_break.h>
-#include <ddb/db_command.h>
-#include <ddb/db_examine.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_run.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_trap.h>
-#include <machine/setjmp.h>
-
-extern jmp_buf_t *db_recover;
-
-extern int             db_inst_count;
-extern int             db_load_count;
-extern int             db_store_count;
-
-static task_t task_space;
-static task_t task;
-void
-db_task_trap(__unused int type, __unused int code, boolean_t user_space)
-{
-       jmp_buf_t db_jmpbuf;
-       jmp_buf_t *prev;
-       boolean_t       bkpt;
-       boolean_t       watchpt;
-
-       task = db_current_task();
-       task_space = db_target_space(current_thread(), user_space);
-       bkpt = IS_BREAKPOINT_TRAP(type, code);
-       watchpt = IS_WATCHPOINT_TRAP(type, code);
-
-       /*
-        * Note:  we look up PC values in an address space (task_space),
-        * but print symbols using a (task-specific) symbol table, found
-        * using task.
-        */
-
-       /* Elided since walking the thread/task lists before setting up
-        * safe recovery points is incorrect, and could
-        * potentially cause us to loop and fault indefinitely.
-        */
-#if 0  
-       db_init_default_act();
-#endif       
-       db_check_breakpoint_valid();
-
-       if (db_stop_at_pc(&bkpt, task, task_space)) {
-           if (db_inst_count) {
-               db_printf("After %d instructions (%d loads, %d stores),\n",
-                         db_inst_count, db_load_count, db_store_count);
-           }
-           if (bkpt)
-               db_printf("Breakpoint at  ");
-           else if (watchpt)
-               db_printf("Watchpoint at  ");
-           else
-               db_printf("Stopped at  ");
-           db_dot = PC_REGS(DDB_REGS);
-
-           prev = db_recover;
-           if (_setjmp(db_recover = &db_jmpbuf) == 0) {
-#if defined(__alpha)
-               db_print_loc(db_dot, task_space);
-               db_printf("\n\t");
-               db_print_inst(db_dot, task_space);
-#else /* !defined(__alpha) */
-               db_print_loc_and_inst(db_dot, task);
-#endif /* defined(__alpha) */
-           } else
-               db_printf("Trouble printing location %#llX.\n", (unsigned long long)db_dot);
-           db_recover = prev;
-
-           db_command_loop();
-       }
-
-       db_restart_at_pc(watchpt, task_space);
-}
diff --git a/osfmk/ddb/db_trap.h b/osfmk/ddb/db_trap.h
deleted file mode 100644 (file)
index 79554c8..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.6.1  1994/09/23  01:22:27  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:11:20  ezf]
- *
- * Revision 1.1.2.4  1993/09/17  21:34:42  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:27  robert]
- * 
- * Revision 1.1.2.3  1993/08/03  18:21:39  rod
- *     ANSI prototypes:  prototype thread_kdb_return().  CR #9523.
- *     [1993/08/03  13:06:06  rod]
- * 
- * Revision 1.1.2.2  1993/07/27  18:28:24  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:13:18  elliston]
- * 
- * $EndLog$
- */
-
-#ifndef        _DDB_DB_TRAP_H_
-#define        _DDB_DB_TRAP_H_
-
-#include <mach/boolean.h>
-
-
-/* Prototypes for functions exported by this module.
- */
-
-void db_task_trap(
-       int             type,
-       int             code,
-       boolean_t       user_space);
-
-void db_trap(
-       int     type,
-       int     code);
-
-/* Other exported prototypes
- */
-
-void thread_kdb_return(void);
-
-#endif /* !_DDB_DB_TRAP_H_ */
diff --git a/osfmk/ddb/db_variables.c b/osfmk/ddb/db_variables.c
deleted file mode 100644 (file)
index f30e5ca..0000000
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-#include <machine/db_machdep.h>
-#include <string.h>                    /* For strcpy() */
-
-#include <ddb/db_lex.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_command.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_macro.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-
-extern db_expr_t       db_max_width;
-extern db_expr_t       db_tab_stop_width;
-extern db_expr_t       db_max_line;
-extern db_expr_t       db_auto_wrap;
-extern db_expr_t       db_macro_level;
-extern db_expr_t       db_auto_completion;
-
-#define DB_NWORK       32              /* number of work variable */
-
-db_expr_t      db_work[DB_NWORK];      /* work variable */
-
-struct db_variable db_vars[] = {
-       {
-               .name = "maxoff",
-               .valuep = (db_expr_t*)&db_maxoff,
-       },
-       {
-               .name = "autowrap",
-               .valuep = &db_auto_wrap,
-       },
-       {
-               .name = "completion",
-               .valuep = &db_auto_completion,
-       },
-       {
-               .name = "maxwidth",
-               .valuep = &db_max_width,
-       },
-       {
-               .name = "radix",
-               .valuep = &db_radix,
-       },
-       {
-               .name = "tabstops",
-               .valuep = &db_tab_stop_width,
-       },
-       {
-               .name = "lines",
-               .valuep = &db_max_line,
-       },
-       {
-               .name = "thr_act",
-               .fcn = db_set_default_act,
-       },
-       {
-               .name = "task",
-               .fcn = db_get_task_act,
-               .min_level = 1,
-               .max_level = 2,
-               .low = -1,
-               .high = -1,
-       },
-       {
-               .name = "work",
-               .valuep = &db_work[0],
-               .min_level = 1,
-               .max_level = 1,
-               .high = DB_NWORK - 1,
-       },
-       {
-               .name = "arg",
-               .fcn = db_arg_variable,
-               .min_level = 1,
-               .max_level = 1,
-               .low = 1,
-               .high = DB_MACRO_NARGS,
-               .hidden_level = 1,
-               .hidden_low = 0,
-               .hidden_high = DB_MACRO_LEVEL - 1,
-               .hidden_levelp = (int *)&db_macro_level,
-       },
-};
-struct db_variable *db_evars = db_vars + sizeof(db_vars)/sizeof(db_vars[0]);
-
-
-
-/* Prototypes for functions local to this file.
- */
-
-static const char *db_get_suffix(const char *, short *);
-
-static boolean_t db_cmp_variable_name(struct db_variable *, const char *,
-               db_var_aux_param_t);
-
-static int db_find_variable(
-       struct db_variable      **varp,
-       db_var_aux_param_t      ap);
-
-void db_list_variable(void);
-
-static const char *
-db_get_suffix(const char *suffix, short *suffix_value)
-{
-       register int value;
-
-       for (value = 0; *suffix && *suffix != '.' && *suffix != ':'; suffix++) {
-           if (*suffix < '0' || *suffix > '9')
-               return(0);
-           value = value*10 + *suffix - '0';
-       }
-       *suffix_value = value;
-       if (*suffix == '.')
-           suffix++;
-       return(suffix);
-}
-
-static boolean_t
-db_cmp_variable_name(struct db_variable *vp, const char *name,
-                    db_var_aux_param_t ap)
-{
-       const char *var_np, *np;
-       int level;
-       
-       for (np = name, var_np = vp->name; *var_np; ) {
-           if (*np++ != *var_np++)
-               return(FALSE);
-       }
-       for (level = 0; *np && *np != ':' && level < vp->max_level; level++){
-           if ((np = db_get_suffix(np, &ap->suffix[level])) == 0)
-               return(FALSE);
-       }
-       if ((*np && *np != ':') || level < vp->min_level
-           || (level > 0 && (ap->suffix[0] < vp->low 
-                             || (vp->high >= 0 && ap->suffix[0] > vp->high))))
-           return(FALSE);
-       strlcpy(ap->modif, (*np)? np+1: "", TOK_STRING_SIZE);
-       ap->thr_act = (db_option(ap->modif, 't')?db_default_act: THREAD_NULL);
-       ap->level = level;
-       ap->hidden_level = -1;
-       return(TRUE);
-}
-
-static int
-db_find_variable(
-       struct db_variable      **varp,
-       db_var_aux_param_t      ap)
-{
-       int     t;
-       struct db_variable *vp;
-
-       t = db_read_token();
-       if (t == tIDENT) {
-           for (vp = db_vars; vp < db_evars; vp++) {
-               if (db_cmp_variable_name(vp, db_tok_string, ap)) {
-                   *varp = vp;
-                   return (1);
-               }
-           }
-           for (vp = db_regs; vp < db_eregs; vp++) {
-               if (db_cmp_variable_name(vp, db_tok_string, ap)) {
-                   *varp = vp;
-                   return (1);
-               }
-           }
-#if defined(ALTERNATE_REGISTER_DEFS)
-           for (vp = db_altregs; vp < db_ealtregs; vp++) {
-               if (db_cmp_variable_name(vp, db_tok_string, ap)) {
-                   *varp = vp;
-                   return (1);
-               }
-           }
-#endif /* defined(ALTERNATE_REGISTER_DEFS) */
-       }
-       db_printf("Unknown variable \"$%s\"\n", db_tok_string);
-       db_error(0);
-       return (0);
-}
-
-int
-db_get_variable(db_expr_t *valuep)
-{
-       struct db_variable *vp;
-       struct db_var_aux_param aux_param;
-       char            modif[TOK_STRING_SIZE];
-
-       aux_param.modif = modif;
-       if (!db_find_variable(&vp, &aux_param))
-           return (0);
-
-       db_read_write_variable(vp, valuep, DB_VAR_GET, &aux_param);
-
-       return (1);
-}
-
-void
-db_read_write_variable(
-       struct db_variable      *vp,
-       db_expr_t               *valuep,
-       int                     rw_flag,
-       db_var_aux_param_t      ap)
-{
-       int     (*func)(struct db_variable*, db_expr_t*,int, db_var_aux_param_t)
-                       = vp->fcn;
-       struct  db_var_aux_param aux_param;
-       db_expr_t old_value;
-
-       if (ap == 0) {
-           ap = &aux_param;
-           ap->modif = NULL;
-           ap->level = 0;
-           ap->thr_act = THREAD_NULL;
-       }
-       if (rw_flag == DB_VAR_SET && vp->precious)
-               db_read_write_variable(vp, &old_value, DB_VAR_GET, ap);
-       if (func == FCN_NULL) {
-           if (rw_flag == DB_VAR_SET)
-               vp->valuep[(ap->level)? (ap->suffix[0] - vp->low): 0] = *valuep;
-           else
-               *valuep = vp->valuep[(ap->level)? (ap->suffix[0] - vp->low): 0];
-       } else
-           (*func)(vp, valuep, rw_flag, ap);
-       if (rw_flag == DB_VAR_SET && vp->precious)
-               db_printf("\t$%s:%s<%#x>\t%#8lln\t=\t%#8lln\n", vp->name,
-                         ap->modif, ap->thr_act, (unsigned long long)old_value, (unsigned long long)*valuep);
-}
-
-void
-db_list_variable(void)
-{
-       register struct db_variable *new;
-       register struct db_variable *old;
-       register struct db_variable *cur;
-       unsigned int l;
-       unsigned int len;
-       short i;
-       unsigned int j;
-
-       len = 1;
-       for (cur = db_vars; cur < db_evars; cur++) {
-           if (cur->min_level > 0 || cur->max_level > 0) {
-               j = 3 * (cur->max_level - cur->min_level + 1) - 1;
-               if (cur->max_level > cur->min_level)
-                   j += 2;
-           } else
-               j = 0;
-           if ((l = strlen(cur->name) + j) >= len)
-               len = l + 1;
-       }
-
-       old = (struct db_variable *)0;
-       for (;;) {
-           new = (struct db_variable *)0;
-           for (cur = db_vars; cur < db_evars; cur++)
-               if ((new == (struct db_variable *)0 ||
-                    strcmp(cur->name, new->name) < 0) &&
-                   (old == (struct db_variable *)0 ||
-                    strcmp(cur->name, old->name) > 0))
-                   new = cur;
-           if (new == (struct db_variable *)0)
-                   return;
-           db_reserve_output_position(len);
-           db_printf(new->name);
-           j = strlen(new->name);
-           if (new->min_level > 0) {
-               db_putchar('?');
-               db_putchar('?');
-               j += 2;
-               for (i = new->min_level - 1; i > 0; i--) {
-                   db_putchar('.');
-                   db_putchar('?');
-                   db_putchar('?');
-                   j += 3;
-               }
-               if (new->max_level > new->min_level) {
-                   db_putchar('[');
-                   db_putchar('.');
-                   db_putchar('?');
-                   db_putchar('?');
-                   j += 4;
-               }
-               i = new->min_level + 1;
-           } else {
-               if (new->max_level > new->min_level) {
-                   db_putchar('[');
-                   j++;
-               }
-               i = new->min_level;
-           }
-           while (i++ < new->max_level) {
-                db_putchar('.');
-                db_putchar('?');
-                db_putchar('?');
-                j += 3;
-           }
-           if (new->max_level > new->min_level) {
-                db_putchar(']');
-                j++;
-           }
-           while (j++ < len)
-                   db_putchar(' ');
-           old = new;
-       }
-}
-
-void
-db_set_cmd(void)
-{
-       db_expr_t       value;
-       int             t;
-       struct db_variable *vp;
-       struct db_var_aux_param aux_param;
-       char            modif[TOK_STRING_SIZE];
-
-       aux_param.modif = modif;
-       t = db_read_token();
-       if (t == tIDENT && strcmp("help", db_tok_string) == 0) {
-               db_list_variable();
-               return;
-       }
-       if (t != tDOLLAR) {
-           db_error("Variable name should be prefixed with $\n");
-           return;
-       }
-       if (!db_find_variable(&vp, &aux_param)) {
-           db_error("Unknown variable\n");
-           return;
-       }
-
-       t = db_read_token();
-       if (t != tEQ)
-           db_unread_token(t);
-
-       if (!db_expression(&value)) {
-           db_error("No value\n");
-           return;
-       }
-       if ((t = db_read_token()) == tSEMI_COLON)
-           db_unread_token(t);
-       else if (t != tEOL)
-           db_error("?\n");
-
-       db_read_write_variable(vp, &value, DB_VAR_SET, &aux_param);
-}
-
-void
-db_show_one_variable(void)
-{
-       struct db_variable *cur;
-       unsigned int len;
-       unsigned int sl;
-       unsigned int slen = 0;
-       short h = 0;
-       short i;
-       unsigned short j;
-       short k;
-       short low;
-       int hidden_level = 0;
-       struct db_var_aux_param aux_param;
-       const char *p = NULL, *q;
-       char *name;
-       db_addr_t offset = 0;
-
-       for (cur = db_vars; cur < db_evars; cur++)
-           if (db_cmp_variable_name(cur, db_tok_string, &aux_param))
-               break;
-       if (cur == db_evars) {
-           for (cur = db_vars; cur < db_evars; cur++) {
-               for (q = cur->name, p = db_tok_string; *q && *p == *q; p++,q++)
-                   continue;
-               if (*q == '\0')
-                   break;
-           }
-           if (cur == db_evars) {
-               db_error("Unknown variable\n");
-               return;
-           }
-
-           for (i = 0; *p && *p != ':' && i < cur->max_level; i++, p = q)
-               if ((q = db_get_suffix(p, &aux_param.suffix[i])) == 0)
-                   break;
-           aux_param.level = i;
-           if ((*p && *p != ':') ||
-               (i > 0 && (aux_param.suffix[0] < cur->low  ||
-                          (cur->high >= 0 &&
-                           aux_param.suffix[0] > cur->high)))) {
-               db_error("Unknown variable format\n");
-               return;
-           }
-
-           strlcpy(aux_param.modif, *p ? p + 1 : "", TOK_STRING_SIZE);
-           aux_param.thr_act = (db_option(aux_param.modif, 't') ?
-                       db_default_act : THREAD_NULL);
-       }
-
-       if (cur->hidden_level)
-           if (*cur->hidden_levelp >= cur->hidden_low &&
-               *cur->hidden_levelp <= cur->hidden_high) {
-               hidden_level = 1;
-               aux_param.hidden_level = h = *cur->hidden_levelp;
-           } else {
-               hidden_level = 0;
-               aux_param.hidden_level = h = cur->hidden_low;
-               slen = 1;
-               for (k = aux_param.level > 0 ? aux_param.suffix[0] : cur->high;
-                    k > 9; k /= 10)
-                   slen++;
-           }
-       else
-           aux_param.hidden_level = -1;
-
-       if ((cur->min_level == 0 && !cur->hidden_level) || cur->high < 0)
-           j = 0;
-       else {
-           if (cur->min_level > 0) {
-               j = 1;
-               for (k = aux_param.level > 0 ?
-                    aux_param.suffix[0] : cur->high; k > 9; k /= 10)
-                   j++;
-           } else
-               j = 0;
-           if (cur->hidden_level && hidden_level == 0) {
-               j += 3;
-               for (k = aux_param.hidden_level >= 0 ?
-                    aux_param.hidden_level : cur->hidden_high; k > 9; k /= 10)
-                   j++;
-           }
-       }
-       len = strlen(cur->name) + j;
-       i = low = aux_param.level > 0 ? aux_param.suffix[0] : cur->low;
-
-       for (;;) {
-           db_printf(cur->name);
-           j = strlen(cur->name);
-           if (cur->high >= 0) {
-               if (cur->min_level > 0) {
-                   db_printf("%d", i);
-                   j++;
-                   for (k = i; k > 9; k /= 10)
-                       j++;
-               }
-               if (cur->hidden_level && hidden_level == 0) {
-                   sl = 1;
-                   for (k = i; k > 9; k /= 10)
-                       sl++;
-                   while (sl++ < slen) {
-                       db_putchar(' ');
-                       j++;
-                   }
-                   db_printf("[%d]", h);
-                   j += 3;
-                   for (k = h; k > 9; k /= 10)
-                       j++;
-               }
-           }
-
-           while (j++ < len)
-               db_putchar(' ');
-           db_putchar(':');
-           db_putchar(' ');
-
-           if (cur->fcn) {
-               aux_param.suffix[0] = i;
-               (*cur->fcn)(cur, (db_expr_t *)0, DB_VAR_SHOW, &aux_param);
-           } else {
-               db_printf("%#lln", (unsigned long long)*(cur->valuep + i));
-               db_find_xtrn_task_sym_and_offset(*(cur->valuep + i), &name,
-                                                &offset, TASK_NULL);
-               if (name != (char *)0 && offset <= db_maxoff &&
-                   offset != *(cur->valuep + i)) {
-                   db_printf("\t%s", name);
-                   if (offset != 0)
-                       db_printf("+%#llr", (unsigned long long)offset);
-               }
-           }
-           db_putchar('\n');
-           if (cur->high < 0)
-               break;
-           if (aux_param.level > 0 || i++ == cur->high) {
-               if (!cur->hidden_level ||
-                   hidden_level == 0 ||
-                   h++ == cur->hidden_high)
-                   break;
-               aux_param.hidden_level = h;
-               i = low;
-           }
-       }
-}
-
-void
-db_show_variable(__unused db_expr_t addr, __unused boolean_t have_addr,
-                __unused db_expr_t count, __unused char *modif)
-{
-       struct db_variable *cur;
-       unsigned int l;
-       unsigned int len;
-       unsigned int sl;
-       unsigned int slen;
-       short h = 0;
-       short i;
-       unsigned short j;
-       short k;
-       int t;
-       int t1;
-       struct db_var_aux_param aux_param;
-       char *name;
-       db_addr_t offset;
-
-       switch(t = db_read_token()) {
-       case tEOL:
-       case tEOF:
-       case tSEMI_COLON:
-           break;
-
-       case tDOLLAR:
-           t1 = db_read_token();
-           if (t1 == tIDENT) {
-               db_show_one_variable();
-               return;
-           }
-           db_error("Not a variable name after $\n");
-           db_unread_token(t);
-           return;
-
-       default:
-           db_error("Variable name should be prefixed with $\n");
-           db_unread_token(t);
-           return;
-       }
-       db_unread_token(t);
-
-       slen = len = 1;
-       for (cur = db_vars; cur < db_evars; cur++) {
-           if ((cur->min_level == 0 && !cur->hidden_level) || cur->high < 0)
-               j = 0;
-           else {
-               if (cur->min_level > 0) {
-                   j = 1;
-                   for (k = cur->high; k > 9; k /= 10)
-                       j++;
-               } else
-                   j = 0;
-               if (cur->hidden_level &&
-                   (*cur->hidden_levelp < cur->hidden_low ||
-                    *cur->hidden_levelp > cur->hidden_high)) {
-                   j += 3;
-                   for (k = cur->hidden_high; k > 9; k /= 10)
-                       j++;
-               }
-           }
-           if ((l = strlen(cur->name) + j) >= len)
-               len = l + 1;
-       }
-
-       aux_param.modif = NULL;
-       aux_param.level = 1;
-       aux_param.thr_act = THREAD_NULL;
-
-       for (cur = db_vars; cur < db_evars; cur++) {
-           i = cur->low;
-           if (cur->hidden_level) {
-               if (*cur->hidden_levelp >= cur->hidden_low &&
-                   *cur->hidden_levelp <= cur->hidden_high) {
-                   h = cur->hidden_low - 1;
-                   aux_param.hidden_level = *cur->hidden_levelp;
-               } else {
-                   h = cur->hidden_low;
-                   aux_param.hidden_level = cur->hidden_low;
-               }
-               slen = 1;
-               for (k = cur->high; k > 9; k /= 10)
-                   slen++;
-           } else
-               aux_param.hidden_level = -1;
-
-           if (cur != db_vars && cur->high >= 0 &&
-               (cur->min_level > 0 || cur->hidden_level))
-                   db_putchar('\n');
-
-           for (;;) {
-               db_printf(cur->name);
-               j = strlen(cur->name);
-               if (cur->high >= 0) {
-                   if (cur->min_level > 0) {
-                       db_printf("%d", i);
-                       j++;
-                       for (k = i; k > 9; k /= 10)
-                           j++;
-                   }
-                   if (cur->hidden_level && h >= cur->hidden_low) {
-                       sl = 1;
-                       for (k = i; k > 9; k /= 10)
-                           sl++;
-                       while (sl++ < slen) {
-                           db_putchar(' ');
-                           j++;
-                       }
-                       db_printf("[%d]", h);
-                       j += 3;
-                       for (k = h; k > 9; k /= 10)
-                           j++;
-                   }
-               }
-               while (j++ < len)
-                   db_putchar(' ');
-               db_putchar(':');
-               db_putchar(' ');
-
-               if (cur->fcn) {
-                   aux_param.suffix[0] = i;
-                   (*cur->fcn)(cur, (db_expr_t *)0, DB_VAR_SHOW, &aux_param);
-               } else {
-                   db_printf("%#lln", (unsigned long long)*(cur->valuep + i));
-                   db_find_xtrn_task_sym_and_offset(*(cur->valuep + i), &name,
-                                                    &offset, TASK_NULL);
-                   if (name != (char *)0 && offset <= db_maxoff &&
-                       offset != *(cur->valuep + i)) {
-                       db_printf("\t%s", name);
-                       if (offset != 0)
-                           db_printf("+%#llr", (unsigned long long)offset);
-                   }
-               }
-               db_putchar('\n');
-               if (cur->high < 0)
-                   break;
-               if (i++ == cur->high) {
-                   if (!cur->hidden_level || h++ == cur->hidden_high)
-                       break;
-                   aux_param.hidden_level = h;
-                   i = cur->low;
-               }
-           }
-       }
-}
-
-/*
- * given a name of a machine register, return a variable pointer to it.
- */
-db_variable_t
-db_find_reg_name(
-       char    *s)
-{
-       register db_variable_t  regp;
-
-       if ( s == (char *)0 )
-               return DB_VAR_NULL;
-
-       for (regp = db_regs; regp < db_eregs; regp++) {
-               if ( strcmp( s, regp->name) == 0 )
-                       return regp;
-       }
-       return DB_VAR_NULL;
-}
diff --git a/osfmk/ddb/db_variables.h b/osfmk/ddb/db_variables.h
deleted file mode 100644 (file)
index 3ff52cf..0000000
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.2.17.5  1996/01/09  19:16:39  devrcs
- *     Define alternate register definitions.
- *     [1995/12/01  21:42:46  jfraser]
- *
- *     Merged '64-bit safe' changes from DEC alpha port.
- *     [1995/11/21  18:04:00  jfraser]
- *
- * Revision 1.2.17.4  1995/02/23  21:44:00  alanl
- *     Merged with DIPC2_SHARED.
- *     [1995/01/05  13:36:23  alanl]
- * 
- * Revision 1.2.20.2  1994/10/14  03:47:19  dwm
- *     mk6 CR668 - 1.3b26 merge
- *     64bit cleanup
- *     [1994/10/14  03:40:00  dwm]
- * 
- * Revision 1.2.17.2  1994/09/23  01:22:42  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:11:29  ezf]
- * 
- * Revision 1.2.17.1  1994/06/11  21:12:42  bolinger
- *     Merge up to NMK17.2.
- *     [1994/06/11  20:04:23  bolinger]
- * 
- * Revision 1.2.22.1  1994/12/06  19:43:29  alanl
- *     Intel merge, Oct 94 code drop.
- *     Define DB_VAR_NULL.
- *     Add prototype for db_find_reg_name.
- *     [94/11/23            mmp]
- * 
- * Revision 1.2.15.1  1994/02/08  10:59:16  bernadat
- *     Added db_show_one_variable & db_show_variable prototypes
- * 
- *     Got DB_MACRO_LEVEL and DB_MACRO_NARGS macros from <ddb/db_variables.h>.
- *     Added new fields (hidden_xxx) into struct db_variable and into
- *     struct db_var_aux_param.
- *     Added DB_VAR_SHOW for showing variables.
- *     [93/08/12            paire]
- *     [94/02/07            bernadat]
- * 
- * Revision 1.2.4.3  1993/07/27  18:28:29  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:13:26  elliston]
- * 
- * Revision 1.2.4.2  1993/06/09  02:21:06  gm
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:57:48  jeffc]
- * 
- * Revision 1.2  1993/04/19  16:03:36  devrcs
- *     New field used to display old register values with 'set' command
- *     [barbou@gr.osf.org]
- *     [92/12/03            bernadat]
- * 
- * Revision 1.1  1992/09/30  02:24:26  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.5  91/10/09  16:04:17  af
- *      Revision 2.4.3.1  91/10/05  13:08:42  jeffreyh
- *             Added suffix related field to db_variable structure.
- *             Added macro definitions of db_{read,write}_variable.
- *             [91/08/29            tak]
- * 
- * Revision 2.4.3.1  91/10/05  13:08:42  jeffreyh
- *     Added suffix related field to db_variable structure.
- *     Added macro definitions of db_{read,write}_variable.
- *     [91/08/29            tak]
- * 
- * Revision 2.4  91/05/14  15:37:12  mrt
- *     Correcting copyright
- * 
- * Revision 2.3  91/02/05  17:07:23  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:19:54  mrt]
- * 
- * Revision 2.2  90/08/27  21:53:40  dbg
- *     Modularized typedef name.  Documented the calling sequence of
- *     the (optional) access function of a variable.  Now the valuep
- *     field can be made opaque, eg be an offset that fcn() resolves.
- *     [90/08/20            af]
- * 
- *     Created.
- *     [90/07/25            dbg]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-#ifndef        _DDB_DB_VARIABLES_H_
-#define        _DDB_DB_VARIABLES_H_
-
-#include <kern/thread.h>
-#include <machine/db_machdep.h>                /* For db_expr_t */
-
-
-#define DB_VAR_LEVEL   3       /* maximum number of suffix level */
-
-/*
- * auxiliary parameters passed to a variable handler
- */
-struct db_var_aux_param {
-       char            *modif;                 /* option strings, must be TOK_STRING_SIZE */
-       short           level;                  /* number of levels */
-       short           hidden_level;           /* hidden level */
-       short           suffix[DB_VAR_LEVEL];   /* suffix */
-       thread_t        thr_act;                /* target thr_act */
-};
-
-typedef struct db_var_aux_param        *db_var_aux_param_t;
-       
-
-/*
- * Debugger variables.
- */
-struct db_variable {
-       const char *name;       /* Name of variable */
-       db_expr_t *valuep;      /* pointer to value of variable */
-                               /* function to call when reading/writing */
-       int     (*fcn)(struct db_variable *,db_expr_t *,int,db_var_aux_param_t);
-       short   min_level;      /* number of minimum suffix levels */
-       short   max_level;      /* number of maximum suffix levels */
-       short   low;            /* low value of level 1 suffix */
-       short   high;           /* high value of level 1 suffix */
-       boolean_t hidden_level; /* is there a hidden suffix level ? */
-       short   hidden_low;     /* low value of hidden level */
-       short   hidden_high;    /* high value of hidden level */
-       int     *hidden_levelp; /* value of current hidden level */
-       boolean_t precious;     /* print old value when affecting ? */
-#define DB_VAR_GET     0
-#define DB_VAR_SET     1
-#define DB_VAR_SHOW    2
-};
-
-typedef struct db_variable     *db_variable_t;
-
-#define        DB_VAR_NULL     (db_variable_t)0
-
-#define        FCN_NULL        ((int (*)(struct db_variable *,                 \
-                                 db_expr_t *,                          \
-                                 int,                                  \
-                                 db_var_aux_param_t)) 0)
-
-#define DB_VAR_LEVEL   3       /* maximum number of suffix level */
-#define DB_MACRO_LEVEL 5       /* max macro nesting */
-#define DB_MACRO_NARGS 10      /* max args per macro */
-
-#define db_read_variable(vp, valuep)   \
-       db_read_write_variable(vp, valuep, DB_VAR_GET, 0)
-#define db_write_variable(vp, valuep)  \
-       db_read_write_variable(vp, valuep, DB_VAR_SET, 0)
-
-
-extern struct db_variable      db_vars[];      /* debugger variables */
-extern struct db_variable      *db_evars;
-extern struct db_variable      db_regs[];      /* machine registers */
-extern struct db_variable      *db_eregs;
-
-#if defined(ALTERNATE_REGISTER_DEFS)
-
-extern struct db_variable      db_altregs[];   /* alternate machine regs */
-extern struct db_variable      *db_ealtregs;
-
-#endif /* defined(ALTERNATE_REGISTER_DEFS) */
-
-/* Prototypes for functions exported by this module.
- */
-
-int db_get_variable(db_expr_t *valuep);
-
-void db_read_write_variable(
-       struct db_variable      *vp,
-       db_expr_t               *valuep,
-       int                     rw_flag,
-       db_var_aux_param_t      ap);
-
-void db_set_cmd(void);
-
-void db_show_one_variable(void);
-
-void db_show_variable(db_expr_t, boolean_t, db_expr_t, char *);
-
-db_variable_t db_find_reg_name(char    *s);
-
-#endif /* !_DDB_DB_VARIABLES_H_ */
diff --git a/osfmk/ddb/db_watch.c b/osfmk/ddb/db_watch.c
deleted file mode 100644 (file)
index f4a7400..0000000
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: Richard P. Draves, Carnegie Mellon University
- *     Date:   10/90
- */
-
-#include <mach/boolean.h>
-#include <mach/vm_param.h>
-#include <mach/machine/vm_types.h>
-#include <mach/machine/vm_param.h>
-#include <vm/vm_map.h>
-
-#include <machine/db_machdep.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_watch.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_command.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_run.h>                        /* For db_single_step() */
-
-/*
- * Watchpoints.
- */
-
-boolean_t      db_watchpoints_inserted = TRUE;
-
-#define        NWATCHPOINTS    100
-struct db_watchpoint   db_watch_table[NWATCHPOINTS];
-db_watchpoint_t                db_next_free_watchpoint = &db_watch_table[0];
-db_watchpoint_t                db_free_watchpoints = 0;
-db_watchpoint_t                db_watchpoint_list = 0;
-
-extern vm_map_t                kernel_map;
-
-
-
-/* Prototypes for functions local to this file.  XXX -- should be static.
- */
-
-db_watchpoint_t db_watchpoint_alloc(void);
-
-void db_watchpoint_free(register db_watchpoint_t watch);
-
-void db_set_watchpoint(
-       task_t          task,
-       db_addr_t       addr,
-       vm_size_t       size);
-
-void db_delete_watchpoint(
-       task_t          task,
-       db_addr_t       addr);
-
-static int db_get_task(
-       char            *modif,
-       task_t          *taskp,
-       db_addr_t       addr);
-
-void db_list_watchpoints(void);
-
-
-
-db_watchpoint_t
-db_watchpoint_alloc(void)
-{
-       register db_watchpoint_t        watch;
-
-       if ((watch = db_free_watchpoints) != 0) {
-           db_free_watchpoints = watch->link;
-           return (watch);
-       }
-       if (db_next_free_watchpoint == &db_watch_table[NWATCHPOINTS]) {
-           db_printf("All watchpoints used.\n");
-           return (0);
-       }
-       watch = db_next_free_watchpoint;
-       db_next_free_watchpoint++;
-
-       return (watch);
-}
-
-void
-db_watchpoint_free(register db_watchpoint_t watch)
-{
-       watch->link = db_free_watchpoints;
-       db_free_watchpoints = watch;
-}
-
-void
-db_set_watchpoint(
-       task_t          task,
-       db_addr_t       addr,
-       vm_size_t       size)
-{
-       register db_watchpoint_t        watch;
-
-       /*
-        *      Should we do anything fancy with overlapping regions?
-        */
-
-       for (watch = db_watchpoint_list; watch != 0; watch = watch->link) {
-           if (watch->task == task &&
-               (watch->loaddr == addr) &&
-               (watch->hiaddr == addr+size)) {
-               db_printf("Already set.\n");
-               return;
-           }
-       }
-
-       watch = db_watchpoint_alloc();
-       if (watch == 0) {
-           db_printf("Too many watchpoints.\n");
-           return;
-       }
-
-       watch->task = task;
-       watch->loaddr = addr;
-       watch->hiaddr = addr+size;
-
-       watch->link = db_watchpoint_list;
-       db_watchpoint_list = watch;
-
-       db_watchpoints_inserted = FALSE;
-}
-
-void
-db_delete_watchpoint(
-       task_t          task,
-       db_addr_t       addr)
-{
-       register db_watchpoint_t        watch;
-       register db_watchpoint_t        *prev;
-
-       for (prev = &db_watchpoint_list; (watch = *prev) != 0;
-            prev = &watch->link) {
-           if (watch->task == task &&
-               (watch->loaddr <= addr) &&
-               (addr < watch->hiaddr)) {
-               *prev = watch->link;
-               db_watchpoint_free(watch);
-               return;
-           }
-       }
-
-       db_printf("Not set.\n");
-}
-
-void
-db_list_watchpoints(void)
-{
-       register db_watchpoint_t watch;
-       int      task_id;
-
-       if (db_watchpoint_list == 0) {
-           db_printf("No watchpoints set\n");
-           return;
-       }
-
-       db_printf("Space      Address  Size\n");
-       for (watch = db_watchpoint_list; watch != 0; watch = watch->link)  {
-           if (watch->task == TASK_NULL)
-               db_printf("kernel  ");
-           else {
-               task_id = db_lookup_task(watch->task);
-               if (task_id < 0)
-                   db_printf("%*X", 2*sizeof(vm_offset_t), watch->task);
-               else
-                   db_printf("task%-3d ", task_id);
-           }
-           db_printf("  %*X  %X\n", 2*sizeof(vm_offset_t), watch->loaddr,
-                     watch->hiaddr - watch->loaddr);
-       }
-}
-
-static int
-db_get_task(
-       char            *modif,
-       task_t          *taskp,
-       db_addr_t       addr)
-{
-       task_t          task = TASK_NULL;
-       db_expr_t       value;
-       boolean_t       user_space;
-
-       user_space = db_option(modif, 'T');
-       if (user_space) {
-           if (db_expression(&value)) {
-               task = (task_t)(unsigned long)value;
-               if (db_lookup_task(task) < 0) {
-                   db_printf("bad task address %X\n", task);
-                   return(-1);
-               }
-           } else {
-               task = db_default_task;
-               if (task == TASK_NULL) {
-                   if ((task = db_current_task()) == TASK_NULL) {
-                       db_printf("no task\n");
-                       return(-1);
-                   }
-               }
-           }
-       }
-       if (!DB_VALID_ADDRESS(addr, user_space)) {
-           db_printf("Address %#X is not in %s space\n", addr, 
-                       (user_space)? "user": "kernel");
-           return(-1);
-       }
-       *taskp = task;
-       return(0);
-}
-
-/* Delete watchpoint */
-void
-db_deletewatch_cmd(db_expr_t addr, __unused boolean_t have_addr,
-                  __unused db_expr_t count, char *modif)
-{
-       task_t          task;
-
-       if (db_get_task(modif, &task, addr) < 0)
-           return;
-       db_delete_watchpoint(task, addr);
-}
-
-/* Set watchpoint */
-void
-db_watchpoint_cmd(db_expr_t addr, __unused boolean_t have_addr,
-                 __unused db_expr_t count, char *modif)
-{
-       vm_size_t       size;
-       db_expr_t       value;
-       task_t          task;
-
-       if (db_get_task(modif, &task, addr) < 0)
-           return;
-       if (db_expression(&value))
-           size = (vm_size_t) value;
-       else
-           size = sizeof(int);
-       db_set_watchpoint(task, addr, size);
-}
-
-/* list watchpoints */
-void
-db_listwatch_cmd(__unused db_expr_t addr, __unused boolean_t have_addr,
-                __unused db_expr_t count, __unused char *modif)
-{
-       db_list_watchpoints();
-}
-
-void
-db_set_watchpoints(void)
-{
-       register db_watchpoint_t        watch;
-       vm_map_t                        map;
-
-       if (!db_watchpoints_inserted) {
-           for (watch = db_watchpoint_list; watch != 0; watch = watch->link) {
-               map = (watch->task)? watch->task->map: kernel_map;
-               pmap_protect(map->pmap,
-                            vm_map_trunc_page(watch->loaddr),
-                            vm_map_round_page(watch->hiaddr),
-                            VM_PROT_READ);
-           }
-           db_watchpoints_inserted = TRUE;
-       }
-}
-
-void
-db_clear_watchpoints(void)
-{
-       db_watchpoints_inserted = FALSE;
-}
-
-boolean_t
-db_find_watchpoint(
-       vm_map_t        map,
-       db_addr_t       addr,
-       db_regs_t       *regs)
-{
-       register db_watchpoint_t watch;
-       db_watchpoint_t found = 0;
-       register task_t task_space;
-
-       task_space = (vm_map_pmap(map) == kernel_pmap)?
-               TASK_NULL: db_current_space();
-       for (watch = db_watchpoint_list; watch != 0; watch = watch->link) {
-           if (watch->task == task_space) {
-               if ((watch->loaddr <= addr) && (addr < watch->hiaddr))
-                   return (TRUE);
-               else if ((trunc_page(watch->loaddr) <= addr) &&
-                        (addr < round_page(watch->hiaddr)))
-                   found = watch;
-           }
-       }
-
-       /*
-        *      We didn't hit exactly on a watchpoint, but we are
-        *      in a protected region.  We want to single-step
-        *      and then re-protect.
-        */
-
-       if (found) {
-           db_watchpoints_inserted = FALSE;
-           db_single_step(regs, task_space);
-       }
-
-       return (FALSE);
-}
diff --git a/osfmk/ddb/db_watch.h b/osfmk/ddb/db_watch.h
deleted file mode 100644 (file)
index 6ec420e..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.6.1  1994/09/23  01:23:04  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:11:39  ezf]
- *
- * Revision 1.1.2.4  1993/07/27  18:28:34  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:13:34  elliston]
- * 
- * Revision 1.1.2.3  1993/06/07  22:07:00  jeffc
- *     CR9176 - ANSI C violations: trailing tokens on CPP
- *     directives, extra semicolons after decl_ ..., asm keywords
- *     [1993/06/07  18:57:38  jeffc]
- * 
- * Revision 1.1.2.2  1993/06/02  23:13:21  jeffc
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:57:59  jeffc]
- * 
- * Revision 1.1  1992/09/30  02:24:28  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.5  91/10/09  16:04:47  af
- *      Revision 2.4.3.1  91/10/05  13:09:14  jeffreyh
- *             Changed "map" field of db_watchpoint structure to "task",
- *             and also changed paramters of function declarations.
- *             [91/08/29            tak]
- * 
- * Revision 2.4.3.1  91/10/05  13:09:14  jeffreyh
- *     Changed "map" field of db_watchpoint structure to "task",
- *     and also changed paramters of function declarations.
- *     [91/08/29            tak]
- * 
- * Revision 2.4  91/05/14  15:37:46  mrt
- *     Correcting copyright
- * 
- * Revision 2.3  91/02/05  17:07:31  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:20:09  mrt]
- * 
- * Revision 2.2  90/10/25  14:44:21  rwd
- *     Generalized the watchpoint support.
- *     [90/10/16            rwd]
- *     Created.
- *     [90/10/16            rpd]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   10/90
- */
-
-#ifndef        _DDB_DB_WATCH_H_
-#define        _DDB_DB_WATCH_H_
-
-#include <mach/machine/vm_types.h>
-#include <kern/task.h>
-#include <machine/db_machdep.h>
-
-/*
- * Watchpoint.
- */
-
-typedef struct db_watchpoint {
-       task_t    task;                 /* in this map */
-       db_addr_t loaddr;               /* from this address */
-       db_addr_t hiaddr;               /* to this address */
-       struct db_watchpoint *link;     /* link in in-use or free chain */
-} *db_watchpoint_t;
-
-
-
-/* Prototypes for functions exported by this module.
- */
-
-void db_deletewatch_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_watchpoint_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_listwatch_cmd(db_expr_t, boolean_t, db_expr_t, char *);
-
-void db_clear_watchpoints(void);
-
-void db_set_watchpoints(void);
-
-boolean_t db_find_watchpoint(
-       vm_map_t        map,
-       db_addr_t       addr,
-       db_regs_t       *regs);
-
-#endif /* !_DDB_DB_WATCH_H_ */
diff --git a/osfmk/ddb/db_write_cmd.c b/osfmk/ddb/db_write_cmd.c
deleted file mode 100644 (file)
index 00b00e5..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub,  Carnegie Mellon University
- *     Date:   7/90
- */
-
-#include <mach/boolean.h>
-#include <kern/task.h>
-#include <kern/thread.h>
-
-#include <machine/db_machdep.h>
-
-#include <ddb/db_lex.h>
-#include <ddb/db_access.h>
-#include <ddb/db_command.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_expr.h>
-#include <ddb/db_write_cmd.h>
-#include <ddb/db_output.h>                     /* For db_printf() */
-
-/*
- * Write to file.
- */
-void
-db_write_cmd(db_expr_t address, __unused boolean_t have_addr,
-            __unused db_expr_t count, char *modif)
-{
-       register db_addr_t      addr;
-       register db_expr_t      old_value;
-       db_expr_t       new_value;
-       register int    size;
-       boolean_t       wrote_one = FALSE;
-       boolean_t       t_opt, u_opt;
-       thread_t        thr_act;
-       task_t          task;
-
-       addr = (db_addr_t) address;
-
-       size = db_size_option(modif, &u_opt, &t_opt);
-
-       if (t_opt) 
-         {
-           if (!db_get_next_act(&thr_act, 0))
-             return;
-           task = thr_act->task;
-         }
-       else
-         task = db_current_space();
-
-       /* if user space is not explicitly specified, 
-          look in the kernel */
-       if (!u_opt)
-         task = TASK_NULL;
-
-       if (!DB_VALID_ADDRESS(addr, u_opt)) {
-         db_printf("Bad address 0x%llx\n", (unsigned long long)addr);
-         return;
-       }
-
-       while (db_expression(&new_value)) {
-           old_value = db_get_task_value(addr, size, FALSE, task);
-           db_task_printsym(addr, DB_STGY_ANY, task);
-           db_printf("\t\t%#8lln\t=\t%#8lln\n", (unsigned long long)old_value, (unsigned long long)new_value);
-           db_put_task_value(addr, size, new_value, task);
-           addr += size;
-
-           wrote_one = TRUE;
-       }
-
-       if (!wrote_one)
-           db_error("Nothing written.\n");
-
-       db_next = addr;
-       db_prev = addr - size;
-}
diff --git a/osfmk/ddb/db_write_cmd.h b/osfmk/ddb/db_write_cmd.h
deleted file mode 100644 (file)
index 1987c27..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.6.1  1994/09/23  01:23:27  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:11:46  ezf]
- *
- * Revision 1.1.2.3  1993/09/17  21:34:44  robert
- *     change marker to OSF_FREE_COPYRIGHT
- *     [1993/09/17  21:27:30  robert]
- * 
- * Revision 1.1.2.2  1993/07/27  18:28:41  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:13:41  elliston]
- * 
- * $EndLog$
- */
-#ifndef        _DDB_DB_WRITE_CMD_H_
-#define        _DDB_DB_WRITE_CMD_H_
-
-#include <machine/db_machdep.h>
-
-/* Prototypes for functions exported by this module.
- */
-void db_write_cmd(
-       db_expr_t       address,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif);
-
-#endif /* !_DDB_DB_WRITE_CMD_H_ */
diff --git a/osfmk/ddb/makedis.c b/osfmk/ddb/makedis.c
deleted file mode 100644 (file)
index a33bf21..0000000
+++ /dev/null
@@ -1,2386 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.2.1  1997/03/27  18:46:52  barbou
- *     Created.
- *     [1997/03/27  13:58:42  barbou]
- *
- * $EndLog$
- */
-
-/* makedis.c - make a disassembler. */
-
-/*    ,
-   By Eamonn McManus <emcmanus@gr.osf.org>, April 1995.
-   Copyright 1995 by Eamonn McManus.  Non-commercial use is permitted.  */
-
-/* DESCRIPTION
-   
-   This program generates a disassembler in C from a file describing the
-   opcodes of the machine in question.  Lines in the description file are
-   either comments beginning with #, or contain three fields, with the
-   first two being terminated by space and the third containing the rest
-   of the line.  Long logical lines can be split onto several physical
-   lines by ending each one except the last with a \.  A logical line
-   can also be split immediately after a |.  Unlike \, | is considered
-   part of the logical line.  Leading spaces on continuation lines
-   following either \ or | are ignored.
-
-   Here is a concise description of the meanings of the three fields.
-   Examples later will make it clearer what they are used for.
-
-   The first field of the three is a function name.  This will produce
-   a function or array of the same name in the C output, so it should
-   not conflict with other identifiers or C keywords.  By default the
-   function named returns a string (a (char *) in C), but if the first
-   field is preceded by %, the function returns an unsigned long
-   integer.
-
-   The second field describes the arguments of the function.  It consists
-   of two parts, either but not both of which may be omitted.  The first
-   part is a string which is a bitmask describing the first argument of
-   the function.  Each character of the string represents one bit,
-   with the least significant bit being the last.  A character can be
-   0 or 1, representing that constant value, or a letter, representing
-   part of a bitfield.  A given bitfield consists of all of the
-   contiguous bits containing the same letter.  Upper and lower case
-   letters are considered different.
-
-   The second part of the second field is a list of parameters
-   describing the parameters of the function, or the parameters after
-   the first if the bitfield part was present.  The list is contained
-   in parentheses () and the individual parameters are separated by
-   commas.  Spaces are not allowed.  Each parameter name is a single
-   letter, optionally preceded by %.  The parameter is an unsigned
-   long integer if % is present, otherwise a string.  Again, upper and
-   lower case parameter names are different.
-
-   The third field describes the value of the function.  If a bitmask
-   is present in the second field and it contains constant bits (0s or
-   1s), then the third field is the value of the function only in the
-   case where its first argument contains matching values in those bit
-   positions.  There can be many different lines naming the same
-   function but with different bitpatterns.  The generated C code will
-   arrange to return the value corresponding to the pattern that
-   matches the actual first argument of the function when it is
-   called.  This argument should not have bits set in positions beyond
-   those present in the bitpattern.
-
-   It is only allowed for two different lines to name the same function
-   if there is a bitstring in the second field.  It is not allowed for
-   two such lines to specify exactly the same constant bit values.  But
-   it is allowed for a line to have all the same constant bit values as
-   another plus some extra constant values.  In this case the more
-   specific line applies when all of its constant bits match, and
-   otherwise the less specific line applies.
-
-   Apart from the contents of the bitstring, the second field must be
-   identical on every line referring to a given function, and the
-   bitstring must always be of the same length.
-
-   For string-valued functions, the third field is the string value.
-   For integer-valued functions, it is a C integer expression
-   generating the value.  In both cases there may be several special
-   values:
-
-   - A $ followed by a single letter is replaced by the value of the
-     argument or bitfield with that name.  The value of a bitfield is
-     shifted as if that bitfield were in the least-significant bit
-     position.  Thus, a single-bit field always has value 0 or 1.
-
-   - A $ followed by the name of a function and an argument list in
-     parentheses () is replaced by the value returned by the function
-     with those arguments.  An integer value cannot be inserted into a
-     string without being converted by a function, nor can a string
-     value be used in an integer expression.
-
-   - A $ followed by a bitstring enclosed in [] is replaced by the
-     value of that bitstring.  The bitstring has the same syntax as in
-     the second field, described above.  Each contiguous sequence of
-     the same repeated letter in the bitstring is replaced by the
-     value of the argument or bitfield-argument with that name,
-     shifted into the appropriate position.
-
-   - A list of strings, separated by |, enclosed in
-     {}, and followed by an integer expression enclosed in [], is
-     replaced by the string in the list whose number matches the value
-     of the expression.  The first string in the list is numbered 0.
-     If there is no string corresponding to the value of the
-     expression, the behaviour is undefined.  The strings in the list
-     may themselves contain $ or {} operations.
-
-   - A \ followed by any character is replaced by that
-     character, without regard to any meaning it may usually have.
-     This is used to obtain strings containing characters such as
-     {, $, or \.  The use of backslash to split long logical
-     lines takes precedence over this use, so \\ should not appear
-     at the end of a line.
-
-   The third field may also be a lone colon ":", in which case the
-   function is assumed to be defined externally and only a function
-   declaration (prototype) is generated.
-
-
-   EXAMPLES
-
-   Here are some examples from the description file for the Z80
-   microprocessor.  This processor has 8-bit opcodes which are
-   disassembled by a generated function "inst" which looks like this:
-
-   typedef unsigned long bits;
-   char *inst(bits code) {...}
-
-   The simplest sort of line in the description file is one that looks
-   like this:
-
-   inst    01110110        halt
-
-   The first field names the function, "inst".  The second field
-   implies that that function has exactly one argument which is an
-   integer, and that this line specifies the value of the function
-   when this integer has the binary value 01110110 (hex 0x76).  This
-   value will be the string "halt".
-
-   A more complex line is one looking like this:
-
-   inst    001aa111        {daa|cpl|scf|ccf}[$a]
-
-   This line is compatible with the previous one, because it has the
-   same number of bits and the constant bits are different.  It
-   specifies the value of inst when its argument looks like
-   001aa111, i.e., for the binary values
-   00100111,
-   00101111,
-   00110111, and
-   00111111.  The value of $a for these four values will be
-   respectively binary 00, 01, 10, 11, i.e., 0 to 3.  The
-   corresponding values of the inst function will be "daa", "cpl",
-   "scf", and "ccf".
-
-   The description defines a helper function "reg8" like this:
-
-   reg8    rrr             {b|c|d|e|h|l|(hl)|a}[$r]
-
-   This simply selects one of the eight strings between {} depending
-   on the value of the argument, which is assumed to be a three-bit
-   value.  This could just as easily have been written:
-
-   reg8    (%r)            {b|c|d|e|h|l|(hl)|a}[$r]
-
-   The generated C code is the same -- in each case makedis realises
-   that the function can be represented by an array rather than
-   compiling a C function.
-
-   The reg8 function is used in lines like this one:
-
-   inst    01rrrsss        ld $reg8($r),$reg8($s)
-
-   Thus if the argument to inst is
-          01010011
-   then $r is 010 (2) and $s is 011 (3).  Since reg8(2) is "d" and
-   reg8(3) is "e", the value of inst with this argument will be the
-   string "ld d,e".
-
-   Note that the opcode for "halt" given above matches this pattern,
-   but because the bitpattern for "halt" is more specific (has more
-   constant bits) it is the one chosen when the argument is 01110110.
-
-   The description also uses an external C function "hexprint" defined
-   like this:
-
-   char *hexprint(bits digits, bits n) {
-       char *p = dis_alloc(digits + 1);
-       sprintf(p, "%0*lx", (int) digits, n);
-       return p;
-   }
-
-   The value of this function is a string containing the number n
-   spelt out in hex with "digits" digits.  In the description
-   file this function is declared like this:
-
-   hexprint  (%w,%n)       :
-
-   The names of the parameters are not important in this case as long
-   as they are letters and are different from each other.
-
-   The hexprint function is used in lines like this one:
-
-   inst    11vvv111        rst $hexprint(2,$v << 3)
-
-   If the argument to inst is
-          11011111
-   then $v is 011 (3) and the arguments to hexprint are 2 and (3 << 3),
-   i.e., 0x18.  So the value of inst with this argument will be the
-   string "rst 18".
-
-   Instead of writing $v << 3, it would be possible to write
-   $[00vvv000].  For instance when $v is binary 011, this becomes
-     00011000.  The leading 0s could be omitted.
-
-   The $[...] operation is particularly useful for moving bits around.
-   For instance, the HP PA-RISC opcodes contain bits assigned to
-   apparently random parts of the instruction word.  One of the helper
-   functions in its description file looks like this:
-
-   im21l aaaaabbccddddddddddde l'$hex($[edddddddddddbbaaaaacc00000000000])
-
-   So    111110011000000000001 produces 10000000000000111111100000000000.
-
-   The $[...] operation can also be used to spell out binary constants,
-   since C has no syntax for this.
-
-
-   ...More to come...  */
-
-/* To do:
-   - More error detection, e.g., bitstring or arg not used in entry.
-   - Better error recovery -- nearly all errors are currently fatal.
-   - Clean up type handling, which is somewhat haphazard.  It works but there
-     is stuff that is surely redundant.
-   - Make generated functions void by default, with $ prefix to indicate
-     string-value.  In a void function, instead of returning a string (or
-     integer) it would be output via a user-supplied function.
-   - Further optimise and tidy generated code, e.g.: arrays of one-character
-     strings could be replaced by arrays of characters; switches with just
-     one case could be replaced by ifs.
- */
-
-#include <assert.h>
-#include <ctype.h>
-#include <sys/errno.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define MAXfunction 32         /* Max function name length. */
-#define MAXBITS 32                     /* Max bitstring length. */
-typedef unsigned long bits;
-enum type {T_ERROR, T_UNKNOWN, T_INTEGER, T_STRING};
-const char *const typename[] = {"error", "unknown", "integer", "string"};
-enum walkstringop {COUNTARRAYS, DECLAREARRAYS, COMPILEARRAYS};
-char *bitstype = "unsigned long";
-
-int maxfunctionname, maxargwidth;
-char *progname = "makedis";
-char **global_argv;
-char *filename;
-char *headerfilename;
-FILE *headerfile;
-int lineno;
-int indentation;
-int debug, dump, warnings;
-
-/* componentbits has a 1 bit for every possible number of strings we may want
-   to concatenate together at some stage.  A separate C function is compiled
-   for each such case.  */
-bits componentbits;
-
-
-struct entry;
-struct arg;
-struct string;
-struct functioncall;
-struct array;
-struct bits;
-struct bitsplice;
-
-
-int main(int argc, char **argv);
-int makedis(FILE *f, char *fname);
-struct function *findfunction(char *function);
-int parseextern(struct function *fp, FILE *f);
-struct function *makefunction(char *function);
-int parsebits(struct function *fp, char *bitstring, int nbits);
-int parseentrybits(struct entry *ep, char *bitstring, int nbits, int issplice);
-int parsecontrol(char *name, char *value);
-int parseargs(struct function *fp, FILE *f, int *cp);
-int parsestring(struct function *fp, char *str);
-enum type makestring(struct function *fp, struct string **stringlink,
-                    char **stringp, char *magic, enum type targettype);
-int parsedollar(struct function *fp, char **stringp, struct string *sp);
-int parsebitsplice(struct function *fp, char *bitstring, int nbits,
-                  struct string *sp);
-int findvariable(struct function *fp, int name, struct string *sp);
-int parsefunctioncall(struct function *fp, char *start, char **stringp,
-                     struct string *sp);
-int parsearray(struct function *fp, char **stringp, struct string *sp,
-              enum type t);
-void dumpfunctions(void);
-void dumpfunction(struct function *fp);
-void showentry(FILE *f, struct function *fp, struct entry *ep, bits highlight);
-void showbits(FILE *f, struct entry *ep, int nbits, bits highlight);
-void showargs(FILE *f, struct arg *ap, int fieldwidth);
-void showstring(FILE *f, struct string *sp);
-void showstringelement(FILE *f, struct string *sp);
-void showfunctioncall(FILE *f, struct functioncall *fcp);
-void showarray(FILE *f, struct array *ap);
-int outputfunctions(void);
-void outputidentity(FILE *f);
-int outputdeclarations(void);
-void outputconcats(void);
-void outputconcat(int n);
-void outputconcatheader(FILE *f, int n);
-void findarrays(void);
-int checkfixedlength(struct array *ap);
-int outputfunction(struct function *fp);
-void functionarray(struct function *fp);
-void functionheader(FILE *f, struct function *fp);
-int simplearray(struct array *ap);
-void compiletype(FILE *f, enum type *tp);
-int functionswitch(struct function *fp, bits mask, bits value);
-int compilestring(int assignto, struct string *sp, enum type type);
-int compilecheckedstring(int assignto, struct string *sp, enum type type);
-void compileassign(int assignto);
-void compiletemp(int tempno);
-void compiletext(char *s);
-int compileconcat(struct string *sp, enum type type);
-int compilenull(enum type type);
-int compilesimple(struct string *sp, enum type type);
-int compilearrayref(struct array *ap);
-int compilefunctioncall(struct string *sp);
-int walkstring(struct string *sp, enum walkstringop op, int tempno);
-int compilearray(struct array *ap);
-void compilesimplearray(enum type *tp, char *name, int num, struct array *ap);
-void declarearray(struct array *ap);
-void compilebitstring(struct bits *bp);
-void compilebitsplice(struct bitsplice *splicep);
-int bitcount(bits x);
-bits allbitsset(int nbits);
-void findent(FILE *f);
-void indent(void);
-void *xrealloc(char *oldp, size_t size);
-void *xmalloc(size_t size);
-void *xstrdup(char *s);
-int prematureeof(void);
-
-
-int main(int argc, char **argv) {
-    int i;
-    FILE *f;
-
-    global_argv = argv;
-    if (argc > 0)
-       progname = argv[0];
-    for (i = 1; i < argc && argv[i][0] == '-'; i++) {
-       switch (argv[i][1]) {
-       case 'h':
-           if (++i >= argc)
-               goto Usage;
-           headerfilename = argv[i]; break;
-       case 'd':
-           debug = 1; break;
-       case 'D':
-           dump = 1; break;
-       case 'w':
-           warnings = 1; break;
-       default:
-Usage:
-           fprintf(stderr, "Usage: %s [file]\n", progname);
-           return 1;
-       }
-    }
-    if (i == argc)
-       return makedis(stdin, "<stdin>");
-    if (i + 1 != argc)
-       goto Usage;
-    if ((f = fopen(argv[i], "r")) == NULL) {
-       fprintf(stderr, "%s: %s: %s\n", progname, argv[i], strerror(errno));
-       return 1;
-    }
-    return makedis(f, argv[i]);
-}
-
-
-int makedis(FILE *f, char *fname) {
-    int c, i;
-    char function[MAXfunction], bitstring[MAXBITS];
-    static char *string = NULL;
-    int stringlen = 0;
-    struct function *fp;
-
-    filename = fname;
-    lineno = 1;
-    /* Loop for every line in the description. */
-    while (1) {
-       /* Ignore initial spaces and newlines. */
-       while (isspace(c = getc(f)))
-           if (c == '\n')
-               lineno++;
-       if (c == EOF)
-           break;
-
-       /* Ignore comments.  # only allowed at start of line. */
-       if (c == '#') {
-           while ((c = getc(f)) != '\n')
-               if (c == EOF)
-                   return prematureeof();
-           lineno++;
-           continue;
-       }
-
-       /* Read function name, terminated by space. */
-       for (i = 0; i < sizeof function && !isspace(c); i++, c = getc(f)) {
-           if (c == EOF)
-               return prematureeof();
-           function[i] = c;
-       }
-       if (i >= sizeof function) {
-           fprintf(stderr, "%s: %s(%d): function name is too long: %.*s\n",
-                   progname, filename, lineno, i, function);
-           return 1;
-       }
-       function[i] = '\0';
-
-       /* Skip to next field.  */
-       while (isspace(c) && c != '\n')
-           c = getc(f);
-
-       /* If not a control statement, read bitstring and/or arguments. */
-       if (function[0] == ':')
-           fp = 0;     /* Silence gcc. */
-       else {
-           fp = makefunction(function);
-           if (fp == NULL)
-               return 1;
-
-           /* Read optional bitstring. */
-           for (i = 0; i < sizeof bitstring && isalnum(c); i++, c = getc(f)) {
-               if (c == EOF)
-                   return prematureeof();
-               bitstring[i] = c;
-           }
-           if (isalnum(c)) {
-               fprintf(stderr, "%s: %s(%d): bit string is too long: %.*s\n",
-                       progname, filename, lineno, i, bitstring);
-               return 1;
-           }
-           if (parsebits(fp, bitstring, i) != 0)
-               return 1;
-
-           /* Read optional arguments. */
-           if (parseargs(fp, f, &c) != 0)
-               return 1;
-
-           /* Skip to next field. */
-           while (isspace(c) && c != '\n')
-               c = getc(f);
-
-           /* : indicates an external (C) function. */
-           if (c == ':') {
-               if (parseextern(fp, f) != 0)
-                   return 1;
-               continue;
-           }
-       }
-
-       /* Read associated text. */
-       i = 0;
-       while (1) {
-           for ( ; c != '\n'; i++, c = getc(f)) {
-               if (c == EOF)
-                   return prematureeof();
-               if (i >= stringlen) {
-                   stringlen = stringlen * 2 + 16;
-                   string = xrealloc(string, stringlen);
-               }
-               string[i] = c;
-           }
-           lineno++;
-           if (i > 0) {
-               switch (string[i - 1]) {
-               case '\\':
-                   i--;
-                   /* Fall in... */
-               case '|':
-                   while (isspace(c = getc(f)) && c != '\n') ;
-                   continue;
-               }
-           }
-           break;
-       }
-       if (i >= stringlen) {
-           stringlen = stringlen * 2 + 16;
-           string = xrealloc(string, stringlen);
-       }
-       string[i] = '\0';
-
-       /* Parse the line just read. */
-       if (function[0] == ':') {
-           if (parsecontrol(function + 1, string) != 0)
-               return 1;
-       } else {
-           if (parsestring(fp, string) != 0)
-               return 1;
-       }
-    }
-    if (dump)
-       dumpfunctions();
-    return outputfunctions();
-}
-
-
-/* A function in the description file.  nbits and nargs are -1 until the
-   real values are known.  */
-struct function {
-    struct function *next;
-    char *name;
-    enum type type;
-    int nbits;         /* Number of bits in the bitpattern, 0 if none. */
-    int nargs;         /* Number of (x,y,...) parameters, 0 if none. */
-    char isarray;      /* Will be represented by a C array. */
-    int fixedlength;   /* If a C array, will be a char [][N] not a char *[]. */
-    struct entry *first, *last;
-                       /* Links to the value(s) supplied. */
-    struct arg *args;  /* List of (x,y,...) names and types. */
-};
-struct function *functions;
-
-
-/* Find the function with the given name.  If not found, create a structure
-   for it, fill it out with a template, and return that.  */
-struct function *findfunction(char *name) {
-    struct function *fp;
-
-    for (fp = functions; fp != NULL; fp = fp->next) {
-       if (strcmp(fp->name, name) == 0)
-           return fp;
-    }
-    if (strlen(name) > maxfunctionname)
-       maxfunctionname = strlen(name);
-    fp = xmalloc(sizeof *fp);
-    fp->next = functions;
-    functions = fp;
-    fp->name = xstrdup(name);
-    fp->type = T_UNKNOWN;
-    fp->nbits = fp->nargs = -1;                /* nbits will be set correctly later. */
-    fp->isarray = 0;
-    fp->first = fp->last = NULL;
-    return fp;
-}
-
-
-/* Parse an external (C) function declaration.  This will look something like:
-       malloc (%s) :
-   We're called just after seeing the ':'.
-   Return 0 if parsing is successful, 1 otherwise.  */
-int parseextern(struct function *fp, FILE *f) {
-    int c;
-
-    if ((c = getc(f)) != '\n') {
-       fprintf(stderr,
-               "%s: %s(%d): extern declaration should be a lone `:'\n",
-               progname, filename, lineno);
-       return 1;
-    }
-    if (fp->nbits != 0) {
-       fprintf(stderr,
-               "%s: %s(%d): extern functions should not have bitstrings\n",
-               progname, filename, lineno);
-       return 1;
-    }
-    free(fp->first);
-    fp->first = fp->last = NULL;
-    return 0;
-}
-
-
-/* A value supplied for a function (the third field in a description line).
-   In general there can be any number of such values, differing in the
-   bitpattern supplied.  The mask and value fields describe the constant
-   bits in the bitpattern: mask indicates which bits they are and value
-   indicates the values of those bits.  So this entry matches
-   ((x & mask) == value).  */
-struct entry {
-    struct entry *next;
-    bits mask, value;
-    struct bits *bits;         /* List of named bitfields. */
-    struct string *string;     /* Value of function when bitpattern matched. */
-    char done;                 /* This entry has already been compiled. */
-};
-
-
-/* We've just seen a definition of function "name".  Make a structure for it
-   if necessary, and a template entry that will describe the value given here.
-   */
-struct function *makefunction(char *name) {
-    struct function *fp;
-    struct entry *ep = xmalloc(sizeof *ep);
-    enum type type;
-
-    if (name[0] == '%') {
-       name++;
-       type = T_INTEGER;
-    } else
-       type = T_STRING;
-    fp = findfunction(name);
-    if (fp->type == T_UNKNOWN)
-       fp->type = type;
-    else if (fp->type != type) {
-       fprintf(stderr, "%s: %s(%d): function %s previously declared as %s, "
-                       "here as %s\n", progname, filename, lineno, name,
-                       typename[fp->type], typename[type]);
-       return NULL;
-    }
-    ep->next = NULL;
-    ep->bits = NULL;
-    ep->done = 0;
-    if (fp->first != NULL)
-       fp->last->next = ep;
-    else
-       fp->first = ep;
-    fp->last = ep;
-    return fp;
-}
-
-
-/* A named bitfield within the bitpattern of a function entry, or within a
-   $[...] bitsplice.  The mask covers the bitfield and the shift says how
-   many 0 bits there are after the last 1 in the mask.  */
-struct bits {
-    struct bits *next;
-    int shift;
-    bits mask;
-    char name;
-};
-
-
-/* Parse the bitstring supplied for the given function.  nbits says how many
-   bits there are; it can legitimately be 0.  Return value is 0 on success.  */
-int parsebits(struct function *fp, char *bitstring, int nbits) {
-    if (fp->nbits < 0)
-       fp->nbits = nbits;
-    else if (fp->nbits != nbits) {
-       fprintf(stderr, "%s: %s(%d): bit string of length %d;\n",
-               progname, filename, lineno, nbits);
-       fprintf(stderr, "  function %s has bit strings of length %d\n",
-               fp->name, fp->nbits);
-       return 1;
-    }
-    return parseentrybits(fp->last, bitstring, nbits, 0);
-}
-
-
-/* Parse a bitstring that is the pattern for a function entry or that is in a
-   $[...] bitsplice.  Put the result in ep.  Return value is 0 on success.  */
-int parseentrybits(struct entry *ep, char *bitstring, int nbits, int issplice) {
-    int i, j;
-    char bit;
-    bits mask, value, entrymask;
-    struct bits *bp;
-
-    mask = value = 0;
-    for (i = 0; i < nbits; i++) {
-       bit = bitstring[nbits - 1 - i];
-       switch (bit) {
-       case '1':
-           value |= 1 << i;
-           /* Fall in... */
-       case '0':
-           mask |= 1 << i;
-           continue;
-       }
-       if (!isalpha(bit)) {
-           fprintf(stderr, "%s: %s(%d): invalid character in bitstring: %c\n",
-                   progname, filename, lineno, bit);
-           return 1;
-       }
-       if (!issplice) {
-           for (bp = ep->bits; bp != NULL; bp = bp->next) {
-               if (bp->name == bit) {
-                   fprintf(stderr,
-                           "%s: %s(%d): bitstring name %c used twice\n",
-                           progname, filename, lineno, bit);
-                   return 1;
-               }
-           }
-       }
-       entrymask = 1 << i;
-       for (j = i + 1; j < nbits && bitstring[nbits - 1 - j] == bit; j++)
-           entrymask |= 1 << j;
-       bp = xmalloc(sizeof *bp);
-       bp->shift = i;
-       bp->mask = entrymask;
-       bp->name = bit;
-       bp->next = ep->bits;
-       ep->bits = bp;
-       i = j - 1;
-    }
-    ep->mask = mask;
-    ep->value = value;
-    return 0;
-}
-
-
-/* Parse a control line.  This looks something like:
-   :bitstype unsigned int
-   in which case we will be called with name "bitstype" and
-   value "unsigned int".  */
-int parsecontrol(char *name, char *value) {
-    if (strcmp(name, "bitstype") == 0)
-       bitstype = xstrdup(value);
-    else {
-       fprintf(stderr, "%s: %s(%d): unrecognised control keyword %s\n",
-               progname, filename, lineno, name);
-       return 1;
-    }
-    return 0;
-}
-
-
-/* A parameter to a function, e.g., x in:
-   %f aaa(%x) $a + $x  */
-struct arg {
-    struct arg *next;
-    enum type type;
-    char name;
-};
-
-
-/* Parse the parameters (x,y,...) to a function and put the result in fp.
-   The entry that is being built is fp->last.  cp points to the opening
-   (; if it does not point to a ( then there are no parameters.  If
-   this is the first entry for the function, fp->nargs will be -1 and
-   we will build up an argument list.  Otherwise, fp->nargs will be
-   >= 0 and we will only check that the arguments here are consistent
-   with what went before.  Return value is 0 on success.  */
-int parseargs(struct function *fp, FILE *f, int *cp) {
-    struct arg **arglink, *ap;
-    struct bits *bp;
-    int nargs, width;
-    char name;
-    enum type t;
-
-    arglink = &fp->args;
-    width = nargs = 0;
-    if (*cp == '(') {
-       *cp = getc(f);
-       if (*cp != ')') {
-           width = 1;
-           while (1) {
-               nargs++;
-               width += 2;
-               if (fp->nargs >= 0 && nargs > fp->nargs) {
-                   fprintf(stderr,
-                           "%s: %s(%d): %d arg(s) instead of %d for %s\n",
-                           progname, filename, lineno, nargs, fp->nargs,
-                           fp->name);
-                   return 1;
-               }
-               t = T_STRING;
-               if (*cp == '%') {
-                   width++;
-                   t = T_INTEGER;
-                   *cp = getc(f);
-               }
-               name = *cp;
-               if (!isalpha(name)) {
-                   fprintf(stderr,
-                           "%s: %s(%d): argument should be letter: %c\n",
-                           progname, filename, lineno, name);
-                   return 1;
-               }
-               for (bp = fp->last->bits; bp != NULL; bp = bp->next) {
-                   if (bp->name == name) {
-                       fprintf(stderr,
-                               "%s: %s(%d): %c is a bitstring and an arg\n",
-                               progname, filename, lineno, name);
-                       return 1;
-                   }
-               }
-               if (fp->nargs >= 0) {
-                   if ((*arglink)->name != name) {
-                       fprintf(stderr,
-                               "%s: %s(%d): arg %d of %s is %c not %c\n",
-                               progname, filename, lineno, nargs, fp->name,
-                               (*arglink)->name, name);
-                       return 1;
-                   }
-                   if ((*arglink)->type != t) {
-                       fprintf(stderr,
-                               "%s: %s(%d): arg %c of %s: inconsistent type\n",
-                               progname, filename, lineno, name, fp->name);
-                       return 1;
-                   }
-               } else {
-                   for (ap = fp->args; ap != *arglink; ap = ap->next) {
-                       if (ap->name == name) {
-                           fprintf(stderr,
-                                   "%s: %s(%d): argument name %c used twice\n",
-                                   progname, filename, lineno, name);
-                           return 1;
-                       }
-                   }
-                   *arglink = xmalloc(sizeof **arglink);
-                   (*arglink)->name = name;
-                   (*arglink)->type = t;
-               }
-               arglink = &(*arglink)->next;
-               *cp = getc(f);
-               if (*cp == ')')
-                   break;
-               if (*cp != ',') {
-                   fprintf(stderr,
-                           "%s: %s(%d): bad character in argument list: %c\n"
-                           "  (arguments must be single letters)\n",
-                           progname, filename, lineno, *cp);
-                   return 1;
-               }
-               *cp = getc(f);
-           }
-       }
-       *cp = getc(f);
-    }
-    if (fp->nargs < 0) {
-       fp->nargs = nargs;
-       width += fp->nbits;
-       if (width > maxargwidth)
-           maxargwidth = width;
-    } else if (fp->nargs != nargs) {
-       fprintf(stderr, "%s: %s(%d): argument list of length %d;\n",
-               progname, filename, lineno, nargs);
-       fprintf(stderr, "  function %s has argument lists of length %d\n",
-               fp->name, fp->nargs);
-       return 1;
-    }
-    *arglink = NULL;
-    return 0;
-}
-
-
-/* Parse the string describing the value of this entry for our
-   function.  Return 0 on success.  */
-int parsestring(struct function *fp, char *str) {
-    enum type t;
-
-    t = makestring(fp, &fp->last->string, &str, NULL, fp->type);
-    if (t == T_ERROR)
-       return 1;
-    if (fp->type != t && t != T_UNKNOWN) {
-       fprintf(stderr, "%s: %s(%d): function %s has inconsistent types\n",
-               progname, filename, lineno, fp->name);
-       return 1;
-    }
-    return 0;
-}
-
-
-/* A parsed representation of the whole string describing a value of a
-   function, or certain strings within that (e.g., array indices).  This is a
-   linked list of substrings whose type is given by the type field.  */
-struct string {
-    struct string *next;
-    enum elementtype {
-       S_TEXT, S_BITSTRING, S_BITSPLICE, S_PARAMETER, S_FUNCTIONCALL, S_ARRAY
-    } type;
-    union value {      /* The fields here correspond to the enum values. */
-       char *text;                             /* plain text */
-       struct bits *bits;                      /* $x where x is a bitfield */
-       struct bitsplice *bitsplice;            /* $[...] */
-       struct arg *parameter;                  /* $x where x is a parameter */
-       struct functioncall *functioncall;      /* $func(...) */
-       struct array *array;                    /* {...}[...] */
-    } value;
-};
-
-/* The representation of a function call $func(...) in the description of a
-   function value.  */
-struct functioncall {
-    struct function *function;
-    struct stringlist *args;
-};
-
-/* The representation of an array selection {...|...}[...] in the description
-   of a function value.  tempno is used when constructing a C variable name
-   that will contain the strings or numbers in an array.  */
-struct array {
-    struct string *index;              /* what's between [...] */
-    struct stringlist *elements;       /* what's between {...} */
-    enum type type;                    /* the type of each element */
-    int tempno;        
-};
-
-/* A list of strings, being the list of arguments in a function call or the
-   list of elements of an array.  This is a linked list of linked lists.  */
-struct stringlist {
-    struct stringlist *next;
-    enum type type;
-    struct string *string;
-};
-
-
-/* The following are the only characters with special meaning at the top level
-   of parsing of a function value.  When parsing arrays or function calls,
-   other characters become special.  */
-#define MAKESTRING_MAGIC "${"/*}*/
-
-
-/* Parse a function return-value string or substring and make a struct string
-   list for it.  The string starts at *stringp and ends at a \0 or at any
-   character in the `magic' string other than { or $.  *stringp is updated
-   to point to the terminating character.  The parsed representation is put
-   at *stringlink.  `fp' is the function whose return value is being parsed.
-   `targettype' is the expected type of the result, if known.
-   The return value is the actual type.  */
-enum type makestring(struct function *fp, struct string **stringlink,
-                    char **stringp, char *magic, enum type targettype) {
-    char *p, *q;
-    struct string *sp, **firststringlink;
-    int n, components;
-    int parenlevel = 0;
-    enum type t = targettype, newt;
-
-    if (magic == NULL)
-       magic = MAKESTRING_MAGIC;
-    p = *stringp;
-    firststringlink = stringlink;
-    components = 0;
-    while (*p != '\0') {
-       sp = xmalloc(sizeof *sp);
-       q = p;
-       n = 0;
-       do {
-           if (strchr(magic, *q) != NULL) {
-               if (*q != ')' || parenlevel == 0)
-                   break;
-           }
-           switch (*q) {
-           case '(':
-               parenlevel++; break;
-           case ')':
-               parenlevel--; break;
-           case '\\':
-               if (q[1] != '\0')
-                   q++;
-               break;
-           }
-           n++;
-       } while (*++q != '\0');
-       if (n > 0) {
-           sp->type = S_TEXT;
-           sp->value.text = q = xmalloc(n + 1);
-           do {
-               if (*p == '\\')
-                   p++;
-               *q++ = *p++;
-           } while (--n > 0);
-           *q = '\0';
-           newt = t;
-       } else if (*p == '$') {
-           if (parsedollar(fp, &p, sp) != 0)
-               return T_ERROR;
-           switch (sp->type) {
-           case S_BITSTRING:
-           case S_BITSPLICE:
-               newt = T_INTEGER;
-               break;
-           case S_PARAMETER:
-               newt = sp->value.parameter->type;
-               break;
-           case S_FUNCTIONCALL:
-               newt = sp->value.functioncall->function->type;
-               break;
-           default:
-               fprintf(stderr, "makestring type %d\n", sp->type);
-               abort();
-           }
-       } else if (*p == '{'/*}*/) {
-           if (parsearray(fp, &p, sp, t) != 0)
-               return T_ERROR;
-           newt = sp->value.array->type;
-       } else {
-           free(sp);
-           break;
-       }
-       if (t == T_UNKNOWN)
-           t = newt;
-       else if (newt != T_UNKNOWN && t != newt) {
-           if (stringlink == firststringlink) {
-               fprintf(stderr, "%s: %s(%d): expected %s type:\n", progname,
-                       filename, lineno, typename[t]);
-               showstringelement(stderr, sp);
-               return T_ERROR;
-           }
-           *stringlink = NULL;
-           fprintf(stderr, "%s: %s(%d): mixed types in string:\n",
-                   progname, filename, lineno);
-           showstring(stderr, *firststringlink);
-           fprintf(stderr, " -- %s\n", typename[t]);
-           showstringelement(stderr, sp);
-           fprintf(stderr, " -- %s\n", typename[newt]);
-           return T_ERROR;
-       }
-       *stringlink = sp;
-       stringlink = &sp->next;
-       components++;
-    }
-    *stringlink = NULL;
-    *stringp = p;
-    if (components >= MAXBITS) {
-       fprintf(stderr, "%s: %s(%d): excessively complicated string\n",
-               progname, filename, lineno);
-       return T_ERROR;
-    }
-    componentbits |= 1 << components;
-    return t;
-}
-
-
-/* Parse a $ operation at **stringp and update *stringp to point past it.
-   `fp' is the function whose return value is being parsed.  The parsed
-   item will be put at *sp.  Return 0 on success, nonzero on error.  */
-int parsedollar(struct function *fp, char **stringp, struct string *sp) {
-    char *p, *start;
-
-    p = *stringp;
-    assert(*p == '$');
-    start = ++p;
-    if (*p == '[')
-       p++;
-    while (isalnum(*p) || *p == '_')
-       p++;
-    if (*start == '[') {
-       if (*p != ']') {
-           fprintf(stderr, "%s: %s(%d): missing ] or bad character in $[\n",
-                   progname, filename, lineno);
-           return 1;
-       }
-       *stringp = p + 1;
-       return parsebitsplice(fp, start + 1, p - start - 1, sp);
-    }
-    if (p == start) {
-       fprintf(stderr, "%s: %s(%d): missing identifier after $\n", progname,
-               filename, lineno);
-       return 1;
-    }
-    if (p == start + 1) {
-       if (findvariable(fp, *start, sp) != 0)
-           return 1;
-    } else {
-       if (parsefunctioncall(fp, start, &p, sp) != 0)
-           return 1;
-    }
-    *stringp = p;
-    return 0;
-}
-
-
-/* The representation of a $[...] bitsplice.  It is parsed into a
-   struct entry just as if it were a bitfield parameter, then analysed
-   into a chain of struct bitsplicebits.  These in conjunction with
-   the constant portion of the struct entry will allow the bitsplice to
-   be compiled.  Each bitsplicebits element represents either a numeric
-   argument to the current function, in which case it will be shifted
-   into place; or a bitfield name from the bitfield description of the
-   current function, in which case it will be shifted by the difference
-   between the position of the bitfield in the argument and the position
-   it occurs in the bitsplice.  `shift' indicates how much to shift left
-   the associated value; if it is negative the value is shifted right.
-   For instance, in a function like this:
-     %oh  xx00(%y)  $[yyxx]
-   the bitsplicebits for y will have shift = 2 and value.arg pointing to y,
-   and those for x will have shift = -2 and value.mask = binary 1100.
-   As an optimisation, contiguous bitfields that are also contiguous in the
-   bitsplice will be combined.  For instance:
-     %oh  xxyy00    $[0xxyy0]
-   will compile the same code as:
-     %oh  zzzz00    $[0zzzz0].
-   As another optimisation, a bitfield that occupies the entire bitstring
-   for a function will be treated like a parameter in that it will not be
-   masked in the bitsplice.  For instance:
-     %oh  xxxxxx    $[0xxxxxx0]
-   will compile the same code as:
-     %oh  (%x)      $[0xxxxxx0].  */
-struct bitsplice {
-    struct entry entry;
-    int nbits;
-    struct bitsplicebits *splice;
-};
-struct bitsplicebits {
-    struct bitsplicebits *next;
-    int shift;
-    enum elementtype type;
-    union {
-       struct arg *arg;
-       bits mask;
-    } value;
-};
-
-
-int parsebitsplice(struct function *fp, char *bitstring, int nbits,
-                  struct string *sp) {
-    struct bitsplice *splicep;
-    struct bitsplicebits *bsp, *lastbsp, **bspp;
-    struct bits *bp;
-    int shift, nfrombits, ntobits;
-    bits allbits, b;
-
-    splicep = xmalloc(sizeof *splicep);
-    splicep->nbits = nbits;
-    if (parseentrybits(&splicep->entry, bitstring, nbits, 1) != 0)
-       return 1;
-    bspp = &splicep->splice;
-    lastbsp = NULL;
-    for (bp = splicep->entry.bits; bp != NULL; bp = bp->next) {
-       if (findvariable(fp, bp->name, sp) != 0)
-           return 1;
-       shift = bp->shift;
-       if (sp->type == S_BITSTRING) {
-           nfrombits = bitcount(sp->value.bits->mask);
-           ntobits = bitcount(bp->mask);
-           if (warnings) {
-               if (nfrombits != ntobits) {
-                   fprintf(stderr, "%s: %s(%d): warning: "
-                                   "bitstring $%c %ser than its place "
-                                   "in bitsplice\n",
-                           progname, filename, lineno, bp->name,
-                           (nfrombits > ntobits) ? "bigg" : "small");
-               }
-           }
-           shift -= sp->value.bits->shift;
-
-           /* See if this bitfield can be combined with a previous contiguous
-              bitfield.  */
-           if (lastbsp != NULL && lastbsp->type == S_BITSTRING
-               && lastbsp->shift == shift) {
-               lastbsp->value.mask |= sp->value.bits->mask;
-               continue;
-           }
-       } else {
-           assert(sp->type == S_PARAMETER);
-           if (sp->value.parameter->type != T_INTEGER) {
-               fprintf(stderr,
-                       "%s: %s(%d): variable %c in $[...] should be integer\n",
-                       progname, filename, lineno, sp->value.parameter->name);
-               return 1;
-           }
-       }
-       *bspp = bsp = xmalloc(sizeof *bsp);
-       bsp->type = sp->type;
-       bsp->shift = shift;
-       if (sp->type == S_PARAMETER)
-           bsp->value.arg = sp->value.parameter;
-       else
-           bsp->value.mask = sp->value.bits->mask;
-       bspp = &bsp->next;
-       lastbsp = bsp;
-    }
-    *bspp = NULL;
-
-    /* Look for a spliced element that is the entire bitstring argument to
-       this function and therefore doesn't need to be masked.  */
-    allbits = allbitsset(fp->nbits);
-    for (bsp = splicep->splice; bsp != NULL; bsp = bsp->next) {
-       if (bsp->type == S_BITSTRING) {
-           for (b = bsp->value.mask; b != 0 && !(b & 1); b >>= 1) ;
-           if (b == allbits)
-               bsp->value.mask = 0;
-       }
-    }
-    sp->type = S_BITSPLICE;
-    sp->value.bitsplice = splicep;
-    return 0;
-}
-
-
-int findvariable(struct function *fp, int name, struct string *sp) {
-    struct bits *bp;
-    struct arg *ap;
-
-    for (bp = fp->last->bits; bp != NULL; bp = bp->next) {
-       if (bp->name == name) {
-           sp->type = S_BITSTRING;
-           sp->value.bits = bp;
-           return 0;
-       }
-    }
-    for (ap = fp->args; ap != NULL; ap = ap->next) {
-       if (ap->name == name) {
-           sp->type = S_PARAMETER;
-           sp->value.parameter = ap;
-           return 0;
-       }
-    }
-    fprintf(stderr, "%s: %s(%d): undefined parameter %c\n", progname, filename,
-           lineno, name);
-    return 1;
-}
-
-
-int parsefunctioncall(struct function *fp, char *start, char **stringp,
-                     struct string *sp) {
-    char *p;
-    struct functioncall *fcp;
-    struct stringlist **arglink, *arg;
-    enum type t;
-
-    p = *stringp;
-    if (*p != '(') {
-       fprintf(stderr, "%s: %s(%d): missing ( after function %.*s\n", progname,
-                       filename, lineno, (int)(p - start), start);
-       return 1;
-    }
-    sp->type = S_FUNCTIONCALL;
-    sp->value.functioncall = fcp = xmalloc(sizeof *fcp);
-    *p = '\0'; /* Ugly. */
-    fcp->function = findfunction(start);
-    *p = '(';
-    arglink = &fcp->args;
-    if (*++p != ')') {
-       while (1) {
-           arg = xmalloc(sizeof *arg);
-           t = makestring(fp, &arg->string, &p, MAKESTRING_MAGIC ",)",
-                          T_UNKNOWN);
-           if (t == T_ERROR)
-               return 1;
-           arg->type = t;
-           *arglink = arg;
-           arglink = &arg->next;
-           if (*p == ')')
-               break;
-           assert(*p == ',');
-           p++;
-       }
-    }
-    *arglink = NULL;
-    assert(*p == ')');
-    *stringp = p + 1;
-    return 0;
-}
-
-
-int parsearray(struct function *fp, char **stringp, struct string *sp,
-              enum type t) {
-    char *p;
-    struct array *ap;
-    struct stringlist **elementlink, *element;
-
-    p = *stringp;
-    assert(*p == '{'/*}*/);
-    sp->type = S_ARRAY;
-    sp->value.array = ap = xmalloc(sizeof *ap);
-    ap->tempno = -1;
-    elementlink = &ap->elements;
-    ap->type = t;
-    if (*++p != /*{*/'}') {
-       while (1) {
-           element = xmalloc(sizeof *element);
-           t = makestring(fp, &element->string, &p,
-                          MAKESTRING_MAGIC /*{*/"|}", t);
-           if (t == T_ERROR)
-               return 1;
-           element->type = t;
-           if (ap->type == T_UNKNOWN)
-               ap->type = t;
-           else if (t != T_UNKNOWN && ap->type != t) {
-               fprintf(stderr, "%s: %s(%d): mixed types in array:\n",
-                       progname, filename, lineno);
-               showstring(stderr, ap->elements->string);
-               fprintf(stderr, " -- %s\n", typename[ap->type]);
-               showstring(stderr, element->string);
-               fprintf(stderr, " -- %s\n", typename[t]);
-               return 1;
-           }
-           *elementlink = element;
-           elementlink = &element->next;
-           if (*p == /*{*/'}')
-               break;
-           assert(*p == '|');
-           p++;
-       }
-    }
-    *elementlink = NULL;
-    assert(*p == /*{*/'}');
-    if (*++p != '[') {
-       fprintf(stderr, "%s: %s(%d): missing [index] after array\n",
-               progname, filename, lineno);
-       return 1;
-    }
-    ++p;
-    t = makestring(fp, &ap->index, &p, MAKESTRING_MAGIC "]", T_INTEGER);
-    if (t == T_ERROR)
-       return 1;
-    if (t == T_STRING) {
-       fprintf(stderr, "%s: %s(%d): array index cannot be string:\n",
-               progname, filename, lineno);
-       showstring(stderr, ap->index);
-       return 1;
-    }
-    if (*p != ']') {
-       fprintf(stderr, "%s: %s(%d): [ without ]\n", progname, filename,
-               lineno);
-       return 1;
-    }
-    *stringp = p + 1;
-    return 0;
-}
-
-
-void dumpfunctions() {
-    struct function *fp;
-
-    for (fp = functions; fp != NULL; fp = fp->next)
-       dumpfunction(fp);
-}
-
-
-void dumpfunction(struct function *fp) {
-    struct entry *ep;
-
-    for (ep = fp->first; ep != NULL; ep = ep->next)
-       showentry(stderr, fp, ep, 0);
-}
-
-
-/* Entries are not shown exactly as they would be input, since \ would
-   need to be provided before some characters such as $ or {.  But the
-   characters "|},]" pose a problem since a \ is only needed in certain
-   contexts and is annoying otherwise.  It's not worth doing this right,
-   since it's only used for error messages.  */
-void showentry(FILE *f, struct function *fp, struct entry *ep, bits highlight) {
-    if (fp->type == T_INTEGER)
-       putc('%', f);
-    fprintf(f, "%-*s ", maxfunctionname + 1, fp->name);
-    if (fp->nbits == 0 && fp->nargs == 0)
-       fprintf(f, "%-*s", maxargwidth, "()");
-    else {
-       showbits(f, ep, fp->nbits, 0);
-       showargs(f, fp->args, maxargwidth - fp->nbits);
-    }
-    putc(' ', f);
-    showstring(f, ep->string);
-    putc('\n', f);
-    if (highlight != 0) {
-       fprintf(f, "%-*s ", maxfunctionname + 1, "");
-       showbits(f, ep, fp->nbits, highlight);
-       putc('\n', f);
-    }
-}
-
-
-void showbits(FILE *f, struct entry *ep, int nbits, bits highlight) {
-    struct bits *bp;
-    bits i, value;
-    char zero, one;
-
-    if (nbits == 0)
-       return;
-    i = 1 << (nbits - 1);
-    bp = ep->bits;
-    if (highlight) {
-       value = highlight;
-       zero = ' ';
-       one = '^';
-    } else {
-       value = ep->value;
-       zero = '0';
-       one = '1';
-    }
-    do {
-       if (highlight != 0 || (ep->mask & i)) {
-           putc((value & i) ? one : zero, f);
-           i >>= 1;
-       } else {
-           assert(bp != NULL && (bp->mask & i));
-           do {
-               putc(bp->name, f);
-               i >>= 1;
-           } while (bp->mask & i);
-           bp = bp->next;
-       }
-    } while (i != 0);
-}
-
-
-void showargs(FILE *f, struct arg *ap, int fieldwidth) {
-    int width;
-    int lastc;
-    int isint;
-
-    if (ap == NULL)
-       width = 0;
-    else {
-       width = 1;
-       lastc = '(';
-       do {
-           isint = (ap->type == T_INTEGER);
-           fprintf(f, "%c%s%c", lastc, isint ? "%" : "", ap->name);
-           width += 2 + isint;
-           ap = ap->next;
-           lastc = ',';
-       } while (ap != NULL);
-       putc(')', f);
-    }
-    fprintf(f, "%-*s", fieldwidth - width, "");
-}
-
-
-void showstring(FILE *f, struct string *sp) {
-    for ( ; sp != NULL; sp = sp->next)
-       showstringelement(f, sp);
-}
-
-
-void showstringelement(FILE *f, struct string *sp) {
-    struct bitsplice *bsp;
-
-    switch (sp->type) {
-    case S_TEXT:
-       fputs(sp->value.text, f);
-       break;
-    case S_BITSTRING:
-       fprintf(f, "$%c", sp->value.bits->name);
-       break;
-    case S_BITSPLICE:
-       fprintf(f, "$[");
-       bsp = sp->value.bitsplice;
-       showbits(f, &bsp->entry, bsp->nbits, 0);
-       fprintf(f, "]");
-       break;
-    case S_PARAMETER:
-       fprintf(f, "$%c", sp->value.parameter->name);
-       break;
-    case S_FUNCTIONCALL:
-       showfunctioncall(f, sp->value.functioncall);
-       break;
-    case S_ARRAY:
-       showarray(f, sp->value.array);
-       break;
-    default:
-       fprintf(stderr, "showstring case %d\n", sp->type);
-       abort();
-    }
-}
-
-
-void showfunctioncall(FILE *f, struct functioncall *fcp) {
-    struct stringlist *sp;
-    char *last;
-
-    fprintf(f, "$%s(", fcp->function->name);
-    last = "";
-    for (sp = fcp->args; sp != NULL; sp = sp->next) {
-       fputs(last, f);
-       last = ",";
-       showstring(f, sp->string);
-    }
-    putc(')', f);
-}
-
-
-void showarray(FILE *f, struct array *ap) {
-    struct stringlist *sp;
-    char *last;
-
-    putc('{'/*}*/, f);
-    last = "";
-    for (sp = ap->elements; sp != NULL; sp = sp->next) {
-       fputs(last, f);
-       last = "|";
-       showstring(f, sp->string);
-    }
-    fputs(/*{*/"}[", f);
-    showstring(f, ap->index);
-    putc(']', f);
-}
-
-
-const char commonpreamble[] = "\
-typedef %s bits;\n\
-\n\
-";
-
-const char concatpreamble[] = "\
-static char *dis_buf;\n\
-static int dis_bufindex, dis_buflen;\n\
-\n\
-void *dis_alloc(size_t size)\n\
-{\n\
-    void *p;\n\
-    int newindex = dis_bufindex + size;\n\
-    if (newindex > dis_buflen) {\n\
-       dis_buflen = newindex * 4;\n\
-       dis_buf = malloc(dis_buflen);\n\
-       /* We can't use realloc because there might be pointers extant into\n\
-          the old buffer.  So we waste the memory of the old buffer.  We\n\
-          should soon reach an adequate buffer size and stop leaking.  */\n\
-       if (dis_buf == 0) {\n\
-           perror(\"malloc\");\n\
-           exit(1);\n\
-       }\n\
-       dis_bufindex = 0;\n\
-    }\n\
-    p = dis_buf + dis_bufindex;\n\
-    dis_bufindex = newindex;\n\
-    return p;\n\
-}\n\
-\n\
-void dis_done()\n\
-{\n\
-    dis_bufindex = 0;\n\
-}\n\
-\n\
-";
-
-const char concatdeclarations[] = "\
-#include <string.h>\n\
-#include <stdlib.h>\n\
-#include <sys/errno.h>\n\
-\n\
-extern void *dis_realloc(void *p, size_t size); /* User-provided. */\n\
-void *dis_alloc(size_t size);\n\
-void dis_done(void);\n\
-";
-
-const char nonconcatpreamble[] = "\
-void dis_done() {}\n\
-";
-
-
-int outputfunctions() {
-    struct function *fp;
-
-    outputidentity(stdout);
-    if (headerfilename != NULL) {
-       if ((headerfile = fopen(headerfilename, "w")) == NULL) {
-           fprintf(stderr, "%s: create %s: %s\n", progname, headerfilename,
-                   strerror(errno));
-           return 1;
-       }
-       outputidentity(headerfile);
-       fprintf(headerfile, commonpreamble, bitstype);
-       printf("\n#include \"%s\"\n", headerfilename);
-    } else
-       printf(commonpreamble, bitstype);
-    findarrays();
-    if (outputdeclarations() != 0)
-       return 1;
-    outputconcats();
-    for (fp = functions; fp != NULL; fp = fp->next) {
-       if (fp->isarray)
-           functionarray(fp);
-    }
-    for (fp = functions; fp != NULL; fp = fp->next) {
-       if (fp->first != NULL && !fp->isarray) {
-           if (outputfunction(fp) != 0)
-               return 1;
-       }
-    }
-    return 0;
-}
-
-
-void outputidentity(FILE *f) {
-    char **p;
-
-    fprintf(f, "/*\n * This file was generated by:\n *");
-    for (p = global_argv; *p != NULL; p++)
-       fprintf(f, " %s", *p);
-    fprintf(f, "\n */\n\n");
-}
-
-
-int outputdeclarations() {
-    FILE *f = headerfile ? headerfile : stdout;
-    struct function *fp;
-
-    for (fp = functions; fp != NULL; fp = fp->next) {
-       if (fp->type != T_UNKNOWN) {
-           if (fp->isarray) {
-               fprintf(f, "extern ");
-               if (fp->fixedlength > 0)
-                   fprintf(f, "char %s[][%d]", fp->name, fp->fixedlength);
-               else {
-                   compiletype(f, &fp->type);
-                   fprintf(f, "%s[]", fp->name);
-               }
-           } else
-               functionheader(f, fp);
-           fprintf(f, ";\n");
-       }
-    }
-    return 0;
-}
-
-
-void outputconcats() {
-    int i;
-
-    if (componentbits & ~3) {
-       fputs(concatdeclarations, headerfile ? headerfile : stdout);
-       fputs(concatpreamble, stdout);
-    } else
-       fputs(nonconcatpreamble, stdout);
-    for (i = 2; i < MAXBITS; i++) {
-       if (componentbits & (1 << i))
-           outputconcat(i);
-    }
-}
-
-
-void outputconcat(int n) {
-    int i;
-    char *last;
-
-    assert(n > 1);
-    if (headerfile) {
-       outputconcatheader(headerfile, n);
-       fprintf(headerfile, ";\n");
-    }
-    outputconcatheader(stdout, n);
-    printf("\n{\n    void *p;\n    int len = ");
-    last = "";
-    for (i = 0; i < n; i++) {
-       printf("%sstrlen(p%d)", last, i);
-       last = " + ";
-    }
-    printf(";\n    p = dis_alloc(len + 1);\n    return ");
-    for (i = 1; i < n; i++)
-       printf("strcat(");
-    printf("strcpy(p, p0)");
-    for (i = 1; i < n; i++)
-       printf(", p%d)", i);
-    printf(";\n}\n\n");
-}
-
-
-void outputconcatheader(FILE *f, int n) {
-    int i;
-    char *last = "";
-
-    fprintf(f, "char *dis_concat%d(", n);
-    for (i = 0; i < n; i++) {
-       fprintf(f, "%schar *p%d", last, i);
-       last = ", ";
-    }
-    fprintf(f, ")");
-}
-
-
-void findarrays() {
-    struct function *fp;
-    struct entry *ep;
-    struct string *estr, *indexstr;
-    struct bits *bp;
-
-    for (fp = functions; fp != NULL; fp = fp->next) {
-       if (fp->nbits > 0 && fp->nargs > 0)
-           continue;
-       if (fp->nargs > 1)
-           continue;
-       ep = fp->first;
-       if (ep == NULL || ep->next != NULL)
-           continue;
-       estr = ep->string;
-       if (estr == NULL || estr->next != NULL || estr->type != S_ARRAY)
-           continue;
-       indexstr = estr->value.array->index;
-       if (indexstr->next != NULL)
-           continue;
-       if (fp->nbits > 0) {
-           bp = ep->bits;
-           if (bp == NULL || bp->next != NULL || bp->shift != 0)
-               continue;
-           if (bp->mask != allbitsset(fp->nbits))
-               continue;
-           if (indexstr->type != S_BITSTRING || indexstr->value.bits != bp)
-               continue;
-       } else {
-           if (indexstr->type != S_PARAMETER
-               || indexstr->value.parameter != fp->args)
-               continue;
-       }
-       if (!simplearray(estr->value.array))
-           continue;
-       fp->isarray = 1;
-       fp->fixedlength =
-           (fp->type == T_INTEGER) ? 0 : checkfixedlength(estr->value.array);
-    }
-}
-
-
-int checkfixedlength(struct array *ap) {
-    int len, maxlen, wasted, n;
-    struct stringlist *lp;
-
-    maxlen = 0;
-    for (lp = ap->elements; lp != NULL; lp = lp->next) {
-       if (lp->string == NULL)
-           continue;
-       assert(lp->string->type == S_TEXT);
-       len = strlen(lp->string->value.text);
-       if (len > maxlen)
-           maxlen = len;
-    }
-    for (wasted = n = 0, lp = ap->elements; lp != NULL; n++, lp = lp->next) {
-       if (lp->string == NULL)
-           continue;
-       wasted += maxlen - strlen(lp->string->value.text);
-    }
-    if (wasted < n * sizeof(char *))   /* Should be target's sizeof. */
-       return maxlen + 1;
-    return 0;
-}
-
-
-int outputfunction(struct function *fp) {
-    printf("\n");
-    functionheader(stdout, fp);
-    printf("\n{\n"/*}*/);
-    switch (functionswitch(fp, 0, 0)) {
-    case -1:
-       return 1;
-    case 0:
-       if (warnings) {
-           fprintf(stderr, "%s: warning: not all cases of %s covered\n",
-                   progname, fp->name);
-       }
-    }
-    printf(/*{*/"}\n");
-    return 0;
-}
-
-
-void functionarray(struct function *fp) {
-    struct array *ap;
-
-    ap = fp->first->string->value.array;
-    printf("\n");
-    compilesimplearray(&fp->type, fp->name, 0, ap);
-}
-
-
-void functionheader(FILE *f, struct function *fp) {
-    char *last;
-    struct arg *ap;
-
-    compiletype(f, &fp->type);
-    fprintf(f, "%s(", fp->name);
-    last = "";
-    if (fp->nbits > 0) {
-       fprintf(f, "bits code");
-       last = ", ";
-    }
-    for (ap = fp->args; ap != NULL; ap = ap->next) {
-       fprintf(f, "%s", last);
-       compiletype(f, &ap->type);
-       putc(ap->name, f);
-       last = ", ";
-    }
-    if (*last == '\0')
-       fprintf(f, "void");
-    putc(')', f);
-}
-
-
-int simplearray(struct array *ap) {
-    struct stringlist *lp;
-
-    for (lp = ap->elements; lp != NULL; lp = lp->next) {
-       if (lp->string != NULL
-           && (lp->string->next != NULL || lp->string->type != S_TEXT))
-           break;
-    }
-    return (lp == NULL);
-}
-
-
-void compiletype(FILE *f, enum type *tp) {
-    switch (*tp) {
-    case T_UNKNOWN:
-       *tp = T_STRING;
-       /* Fall in... */
-    case T_STRING:
-       fprintf(f, "char *");
-       break;
-    case T_INTEGER:
-       fprintf(f, "bits ");
-       break;
-    default:
-       fprintf(stderr, "compiletype type %d\n", *tp);
-       abort();
-    }
-}
-
-
-/* Generate code for entries in function fp whose bitstring b satisfies
-   the constraint (b & mask) == value.  Return 1 if generated switch
-   always does `return', 0 if not, -1 on error.
-   The algorithm is as follows.  Scan the eligible entries to find the
-   largest set of bits not in the passed-in mask which always have a
-   constant value (are not variable).  One `default' entry is allowed
-   all of whose bits are variable.  For each value of the constant bits,
-   generate a `switch' case and invoke the function recursively with
-   that value included in the constraint parameters.  The recursion
-   stops when no set of constant bits is found, perhaps because the
-   mask parameter has all bits set.
-   This algorithm could be improved.  Currently it will fail if there
-   are input lines "xxyy", "00xx" and "yy00", each of which is default with
-   respect to the others.  The correct behaviour would then be to select
-   a bit that is sometimes constant and deal with those cases first.
-   But this problem has not yet arisen in real life.  */
-int functionswitch(struct function *fp, bits mask, bits value) {
-    struct entry *ep, *defaultcase;
-    bits allbits, constbits, missingcases;
-    int nhits, ncases, nconstbits, alwaysreturns;
-
-    indentation++;
-    allbits = allbitsset(fp->nbits);
-    constbits = allbits & ~mask;
-    if (debug) {
-       findent(stderr);
-       fprintf(stderr,
-               "functionswitch(%s): (x & 0x%lx) == 0x%lx; const == 0x%lx\n",
-               fp->name, mask, value, constbits);
-    }
-    defaultcase = NULL;
-    ncases = nhits = 0;
-    alwaysreturns = 1;
-    for (ep = fp->first; ep != NULL; ep = ep->next) {
-       /* If this is not one of the entries under consideration, skip.  */
-       if (ep->done
-           || (ep->mask & mask) != mask || (ep->value & mask) != value)
-           continue;
-       if (debug) {
-           findent(stderr);
-           showentry(stderr, fp, ep, 0);
-       }
-       /* If this entry has no constant bits in the still-variable portion,
-          it's the default.  */
-       if ((constbits & ep->mask) == 0) {
-           if (defaultcase != NULL) {
-               fprintf(stderr,
-                       "%s: function %s: unable to distinguish between:\n",
-                       progname, fp->name);
-               showentry(stderr, fp, defaultcase, 0);
-               showentry(stderr, fp, ep, 0);
-               return -1;
-           }
-           defaultcase = ep;
-           if (debug) {
-               findent(stderr);
-               fprintf(stderr, "^^ default case\n");
-           }
-       } else {
-           if (debug && (constbits & ~ep->mask)) {
-               findent(stderr);
-               fprintf(stderr, "const now 0x%lx\n", constbits & ep->mask);
-           }
-           constbits &= ep->mask;
-           nhits++;
-       }
-    }
-    if (nhits > 0) {
-       indent();
-       if (constbits == allbits)
-           printf("switch (code) {\n"/*}*/);
-       else
-           printf("switch (code & 0x%lx) {\n"/*}*/, constbits);
-       for (ep = fp->first; ep != NULL; ep = ep->next) {
-           /* If this is not one of the entries under consideration, skip.  */
-           if ((ep->mask & mask) != mask || (ep->value & mask) != value)
-               continue;
-           if (ep->done || ep == defaultcase)
-               continue;
-           ncases++;
-           indent();
-           printf("case 0x%lx:\n", ep->value & constbits);
-           switch (functionswitch(fp, mask | constbits,
-                               value | (ep->value & constbits))) {
-           case -1:
-               return -1;
-           case 0:
-               alwaysreturns = 0;
-               indentation++; indent(); indentation--;
-               printf("break;\n");
-           }
-       }
-       indent();
-       printf(/*{*/"}\n");
-    }
-    nconstbits = bitcount(constbits);
-    missingcases = ((nconstbits == MAXBITS) ? 0 : 1 << nconstbits) - ncases;
-    if (alwaysreturns) {
-       switch (missingcases) {
-       case 0:
-           if (defaultcase != NULL) {
-               fprintf(stderr, "%s: warning: redundant entry:\n", progname);
-               showentry(stderr, fp, defaultcase, 0);
-               defaultcase = NULL;
-           }
-           break;
-       case 1:
-           if (defaultcase != NULL && nconstbits != 0) {
-               fprintf(stderr,
-                       "%s: warning: variable bit(s) could be constant:\n",
-                       progname);
-               showentry(stderr, fp, defaultcase, constbits);
-               break;
-           }
-           /* Fall in... */
-       default:
-           alwaysreturns = 0;
-       }
-    }
-    if (defaultcase != NULL) {
-       /* If defaultcase has some constant bits of its own, recursion will
-          check that they have the required value.  */
-       if ((defaultcase->mask & ~mask) == 0) {
-           alwaysreturns = 1;
-           if (compilestring(-1, defaultcase->string, fp->type) != 0)
-               return -1;
-           defaultcase->done = 1;
-       } else {
-           indentation--;
-           alwaysreturns = functionswitch(fp, mask, value);
-           indentation++;
-       }
-    }
-    indentation--;
-    return alwaysreturns;
-}
-
-
-int compilestring(int assignto, struct string *sp, enum type type) {
-    int tempno;
-
-    tempno = walkstring(sp, COUNTARRAYS, assignto);
-    if (tempno > assignto) {
-       indent();
-       printf("{\n"/*}*/);
-       indentation++;
-       (void) walkstring(sp, DECLAREARRAYS, assignto);
-       if (walkstring(sp, COMPILEARRAYS, assignto) < 0)
-           return 1;
-    }
-    if (compilecheckedstring(assignto, sp, type) != 0)
-       return 1;
-    if (tempno > assignto) {
-       indentation--;
-       indent();
-       printf(/*{*/"}\n");
-    }
-    return 0;
-}
-
-
-int compilecheckedstring(int assignto, struct string *sp, enum type type) {
-    compileassign(assignto);
-    if (compileconcat(sp, type) != 0)
-       return 1;
-    printf(";\n");
-    return 0;
-}
-
-
-void compileassign(int assignto) {
-    indent();
-    if (assignto < 0)
-       printf("return ");
-    else {
-       compiletemp(assignto);
-       printf(" = ");
-    }
-}
-
-
-void compiletemp(int tempno) {
-    printf("t__%d", tempno);
-}
-
-
-void compiletext(char *s) {
-    putchar('"');
-    if (s != NULL) {
-       for ( ; *s != '\0'; s++) {
-           switch (*s) {
-           case '"':
-           case '\\':
-               putchar('\\');
-           }
-           putchar(*s);
-       }
-    }
-    putchar('"');
-}
-
-
-int compileconcat(struct string *sp, enum type type) {
-    int elements;
-    struct string *sp1;
-    char *last;
-
-    if (sp == NULL)
-       return compilenull(type);
-    if (sp->next == NULL)
-       return compilesimple(sp, type);
-    if (type != T_INTEGER) {
-       for (elements = 0, sp1 = sp; sp1 != NULL; elements++, sp1 = sp1->next) ;
-       printf("dis_concat%d(", elements);
-    }
-    last = "";
-    for (sp1 = sp; sp1 != NULL; sp1 = sp1->next) {
-       printf("%s", last);
-       if (type != T_INTEGER)
-           last = ", ";
-       if (sp1->type == S_ARRAY)
-           compilearrayref(sp1->value.array);
-       else
-           if (compilesimple(sp1, type) != 0)
-               return 1;
-    }
-    if (type != T_INTEGER)
-       printf(")");
-    return 0;
-}
-
-
-int compilenull(enum type type) {
-    if (type == T_INTEGER) {
-       fprintf(stderr, "%s: empty integer expression\n", progname);
-       return 1;
-    }
-    printf("\"\"");
-    return 0;
-}
-
-
-int compilesimple(struct string *sp, enum type type) {
-    if (sp == NULL)
-       return compilenull(type);
-    switch (sp->type) {
-    case S_TEXT:
-       if (type == T_INTEGER)
-           printf("%s", sp->value.text);
-       else
-           compiletext(sp->value.text);
-       break;
-    case S_BITSTRING:
-       compilebitstring(sp->value.bits);
-       break;
-    case S_BITSPLICE:
-       compilebitsplice(sp->value.bitsplice);
-       break;
-    case S_PARAMETER:
-       putchar(sp->value.parameter->name);
-       break;
-    case S_FUNCTIONCALL:
-       return compilefunctioncall(sp);
-    case S_ARRAY:
-       if (compilearrayref(sp->value.array) != 0)
-           return 1;
-       break;
-    default:
-       fprintf(stderr, "compilesimple case %d", sp->type);
-       abort();
-    }
-    return 0;
-}
-
-
-int compilearrayref(struct array *ap) {
-    compiletemp(ap->tempno);
-    if (simplearray(ap)) {
-       printf("[");
-       if (compileconcat(ap->index, T_INTEGER) != 0)
-           return 1;
-       printf("]");
-    }
-    return 0;
-}
-
-
-int compilefunctioncall(struct string *sp) {
-    struct function *fp;
-    struct stringlist *actualp;
-    struct arg *formalp;
-    char *last;
-    int nbits;
-    enum type formaltype;
-
-    assert(sp->type == S_FUNCTIONCALL);
-    fp = sp->value.functioncall->function;
-    printf("%s%c", fp->name, fp->isarray ? '[' : '(');
-    last = "";
-    nbits = fp->nbits;
-    formalp = fp->args;
-    actualp = sp->value.functioncall->args;
-    while (actualp != NULL) {
-       if (nbits > 0) {
-           nbits = 0;
-           formaltype = T_INTEGER;
-       } else {
-           if (formalp == NULL) {
-               fprintf(stderr, "%s: too many arguments to %s:\n", progname,
-                       fp->name);
-               showstring(stderr, sp);
-               putc('\n', stderr);
-               return 1;
-           }
-           formaltype = formalp->type;
-           formalp = formalp->next;
-       }
-       if (actualp->type != T_UNKNOWN && actualp->type != formaltype) {
-           fprintf(stderr, "%s: argument to %s has the wrong type:\n",
-                   progname, fp->name);
-           showstring(stderr, actualp->string);
-           putc('\n', stderr);
-           return 1;
-       }
-       printf("%s", last);
-       last = ", ";
-       if (compileconcat(actualp->string, formaltype) != 0)
-           return 1;
-       actualp = actualp->next;
-    }
-    putchar(fp->isarray ? ']' : ')');
-    return 0;
-}
-
-
-int walkstring(struct string *sp, enum walkstringop op, int tempno) {
-    struct stringlist *lp;
-    struct array *ap;
-
-    for ( ; sp != NULL; sp = sp->next) {
-       switch (sp->type) {
-       case S_ARRAY:
-           ap = sp->value.array;
-           for (lp = ap->elements; lp != NULL; lp = lp->next)
-               tempno = walkstring(lp->string, op, tempno);
-           tempno = walkstring(ap->index, op, tempno);
-           ap->tempno = ++tempno;
-           switch (op) {
-           case DECLAREARRAYS:
-               if (simplearray(ap)) {
-                   indent();
-                   printf("static ");
-                   compilesimplearray(&ap->type, NULL, tempno, ap);
-               } else
-                   declarearray(ap);
-               break;
-           case COMPILEARRAYS:
-               if (!simplearray(ap))
-                   if (compilearray(ap) != 0)
-                       return -1;
-               break;
-           default:
-               break;
-           }
-           break;
-       case S_FUNCTIONCALL:
-           for (lp = sp->value.functioncall->args; lp != NULL; lp = lp->next)
-               tempno = walkstring(lp->string, op, tempno);
-           break;
-       default:
-           break;
-       }
-    }
-    return tempno;
-}
-
-
-int compilearray(struct array *ap) {
-    struct stringlist *ep;
-    int i;
-
-    indent();
-    printf("switch (");
-    if (compileconcat(ap->index, T_INTEGER) != 0)
-       return 1;
-    printf(") {\n"/*}*/);
-    for (i = 0, ep = ap->elements; ep != NULL; i++, ep = ep->next) {
-       indent();
-       printf("case %d:\n", i);
-       indentation++;
-       if (compilecheckedstring(ap->tempno, ep->string, ap->type) != 0)
-           return 1;
-       indent();
-       printf("break;\n");
-       indentation--;
-    }
-    indent();
-    printf(/*{*/"}\n");
-    return 0;
-}
-
-
-void compilesimplearray(enum type *tp, char *name, int num, struct array *ap) {
-    struct stringlist *lp;
-    int fixedlength;
-
-    fixedlength = (*tp == T_INTEGER) ? 0 : checkfixedlength(ap);
-    if (fixedlength > 0)
-       printf("char ");
-    else
-       compiletype(stdout, tp);
-    if (name != NULL)
-       printf("%s", name);
-    else
-       compiletemp(num);
-    printf("[]");
-    if (fixedlength > 0)
-       printf("[%d]", fixedlength);
-    printf(" = {\n"/*}*/);
-    indentation++;
-    for (lp = ap->elements; lp != NULL; lp = lp->next) {
-       indent();
-       compilesimple(lp->string, lp->type);
-       printf(",\n");
-    }
-    indentation--;
-    indent();
-    printf(/*{*/"};\n");
-}
-
-
-void declarearray(struct array *ap) {
-    indent();
-    compiletype(stdout, &ap->type);
-    compiletemp(ap->tempno);
-    printf(";\n");
-}
-
-
-void compilebitstring(struct bits *bp) {
-    printf("(");
-    if (bp->shift != 0)
-       printf("(");
-    printf("code & 0x%lx", bp->mask);
-    if (bp->shift != 0)
-       printf(") >> %d", bp->shift);
-    printf(")");
-}
-
-
-void compilebitsplice(struct bitsplice *splicep) {
-    struct bitsplicebits *bsp;
-    char *last = "";
-
-    printf("(");
-    for (bsp = splicep->splice; bsp != NULL; bsp = bsp->next) {
-       printf("%s", last);
-       last = " | ";
-       if (bsp->type == S_PARAMETER)
-           putchar(bsp->value.arg->name);
-       else {
-           assert(bsp->type == S_BITSTRING);
-           if (bsp->value.mask == 0)
-               printf("code");
-           else
-               printf("(code & 0x%lx)", bsp->value.mask);
-       }
-       if (bsp->shift > 0)
-           printf(" << %d", bsp->shift);
-       else if (bsp->shift < 0)
-           printf(" >> %d", -bsp->shift);
-    }
-    if (splicep->entry.value != 0)
-       printf("%s0x%lx", last, splicep->entry.value);
-    printf(")");
-}
-
-
-int bitcount(bits x) {
-    int nbits;
-
-    for (nbits = 0; x != 0; x >>= 1) {
-       if (x & 1)
-           nbits++;
-    }
-    return nbits;
-}
-
-
-bits allbitsset(int nbits) {
-    return (nbits == MAXBITS) ? ~0 : (1 << nbits) - 1;
-}
-
-
-void findent(FILE *f) {
-    int i;
-
-    for (i = 1; i < indentation; i += 2)
-       putc('\t', f);
-    if (i == indentation)
-       fputs("    ", f);
-}
-
-
-void indent() {
-    findent(stdout);
-}
-
-
-void *xrealloc(char *oldp, size_t size) {
-    void *p;
-
-    if (oldp == NULL)
-       p = malloc(size);
-    else
-       p = realloc(oldp, size);
-    if (p == NULL) {
-       fprintf(stderr, "%s: allocate of %d bytes failed: %s\n", progname,
-               (int) size, strerror(errno));
-       exit(1);
-    }
-    return p;
-}
-
-
-void *xmalloc(size_t size) {
-    return xrealloc(NULL, size);
-}
-
-
-void *xstrdup(char *s) {
-    char *p;
-    size_t i = strlen(s) + 1;
-
-    p = xmalloc(i);
-    strlcpy(p, s, i);
-    return p;
-}
-
-
-int prematureeof() {
-    fprintf(stderr, "%s: %s(%d): premature end of file\n", progname, filename,
-           lineno);
-    return 1;
-}
diff --git a/osfmk/ddb/nlist.h b/osfmk/ddb/nlist.h
deleted file mode 100644 (file)
index a677d77..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.11.2  1995/01/06  19:11:11  devrcs
- *     mk6 CR668 - 1.3b26 merge
- *     Add padding for alpha, make n_other unsigned,
- *     fix erroneous def of N_FN.
- *     [1994/10/14  03:40:03  dwm]
- *
- * Revision 1.1.11.1  1994/09/23  01:23:37  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:11:49  ezf]
- * 
- * Revision 1.1.4.3  1993/07/27  18:28:42  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:13:44  elliston]
- * 
- * Revision 1.1.4.2  1993/06/02  23:13:34  jeffc
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:58:08  jeffc]
- * 
- * Revision 1.1  1992/09/30  02:24:29  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.4  91/05/14  15:38:20  mrt
- *     Correcting copyright
- * 
- * Revision 2.3  91/02/05  17:07:42  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:20:26  mrt]
- * 
- * 11-Aug-88  David Golub (dbg) at Carnegie-Mellon University
- *     Added n_un, n_strx definitions for kernel debugger (from
- *     a.out.h).
- *
- */
-/* CMU_ENDHIST */
-/*
- * Mach Operating System
- * Copyright (c) 1991 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon 
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *  nlist.h - symbol table entry  structure for an a.out file
- *  derived from FSF's a.out.gnu.h
- *
- */
-
-#ifndef _DDB_NLIST_H_
-#define _DDB_NLIST_H_
-
-struct nlist {
-       union n_un {
-           char        *n_name;        /* symbol name */
-           long        n_strx;         /* index into file string table */
-       } n_un;
-       unsigned char n_type;   /* type flag, i.e. N_TEXT etc; see below */
-       unsigned char n_other;  /* unused */
-       short   n_desc;         /* see <stab.h> */
-#if    defined(__alpha)
-       int     n_pad;          /* alignment, used to carry framesize info */
-#endif
-       vm_offset_t n_value;    /* value of this symbol (or sdb offset) */
-};
-
-/*
- * Simple values for n_type.
- */
-#define        N_UNDF  0               /* undefined */
-#define        N_ABS   2               /* absolute */
-#define        N_TEXT  4               /* text */
-#define        N_DATA  6               /* data */
-#define        N_BSS   8               /* bss */
-#define        N_FN    0x1e            /* file name symbol */
-#define        N_EXT   1               /* external bit, or'ed in */
-#define        N_TYPE  0x1e            /* mask for all the type bits */
-#define        N_STAB  0xe0            /* if any of these bits set, a SDB entry */
-
-#endif /* !_DDB_NLIST_H_ */
diff --git a/osfmk/ddb/orig/db_print.c b/osfmk/ddb/orig/db_print.c
deleted file mode 100644 (file)
index 7e91ec9..0000000
+++ /dev/null
@@ -1,1373 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.2  1998/04/29 17:35:25  mburg
- * MK7.3 merger
- *
- * Revision 1.2.85.1  1998/02/03  09:24:09  gdt
- *     Merge up to MK7.3
- *     [1998/02/03  09:10:24  gdt]
- *
- * Revision 1.2.81.1  1997/03/27  18:46:38  barbou
- *     ri-osc CR1565 - clean up db_print_act, removing old !USER code
- *     which had gotten stale (the option made little sense here anyway).
- *     Added routine db_show_one_thread() to take either act/shuttle and
- *     do something sensible. [dwm]  Also rationalize plain, /u and /l
- *     output for "show act", "show task" and "show all acts".
- *     [1995/08/28  15:47:00  bolinger]
- *     [97/02/25            barbou]
- * 
- * Revision 1.2.31.13  1996/01/09  19:16:02  devrcs
- *     Alpha kdebug Changes:
- *       Correct various header spacing to account for 64-bit addresses.
- *       Modify db_show_all_*() functions, so the can be called from kdebug.
- *       ( There's no way to call with "char *modif", so added NULL check. )
- *       Changed db_error() calls to DB_ERROR() macro, so we return on error
- *       on Alpha (we gotta return to kdebug).
- *     Changed declarations of 'register foo' to 'register int foo'.
- *     [1995/12/01  21:42:20  jfraser]
- * 
- *     Merged '64-bit safe' changes from DEC alpha port.
- *     [1995/11/21  18:03:24  jfraser]
- * 
- * Revision 1.2.31.12  1995/10/09  17:03:30  devrcs
- *     Merge forward.
- *     [1995/08/24  20:56:42  watkins]
- * 
- * Revision 1.2.59.1  1995/08/04  17:03:17  watkins
- *     Change to stack per shuttle model.
- *     [1995/07/19  20:26:13  watkins]
- * 
- * Revision 1.2.31.11  1995/09/18  19:08:49  devrcs
- *     Merge forward.
- *     [1995/08/24  20:56:42  watkins]
- * 
- * Revision 1.2.59.1  1995/08/04  17:03:17  watkins
- *     Change to stack per shuttle model.
- *     [1995/07/19  20:26:13  watkins]
- * 
- * Revision 1.2.31.10  1995/05/19  15:43:04  bernadat
- *     Fixed db_print_act for empty activations.
- *     Let thread swapping be configurable.
- *     [95/05/19            bernadat]
- * 
- * Revision 1.2.31.9  1995/05/14  18:10:25  dwm
- *     ri-osc CR1304 - merge (nmk19_latest - nmk19b1) diffs into mainline.
- *     mk6 CR938 - restore mach_msg hot path
- *     remove use of now-defunct fields in thread [mmp,dwm]
- *     [1995/05/14  17:25:05  dwm]
- * 
- * Revision 1.2.31.8  1995/04/07  18:53:00  barbou
- *     VM Merge - Task Swapper.
- *     Renamed TH_SWAPPED to TH_STACK_HANDOFF and swap_func to continuation
- *     to resolve name conflict.
- *     From kernel/kdb/kdb_mach.c:
- *     Put in changes for swapping.
- *     [1991/11/21  20:32:15  mmp]
- *     [94/07/27            barbou]
- *     [95/03/08            barbou]
- * 
- * Revision 1.2.31.7  1995/02/28  01:58:38  dwm
- *     mk6 CR1120 - Merge mk6pro_shared into cnmk_shared
- *     * Rev1.2.43.1  1995/01/27  22:01:26  bolinger
- *     * Fix ri-osc CR977:  Make "show space" and "show ipc_port" give
- *     * accurate count of ports active in IPC space.  Make "show ipc_port"
- *     * output task-visible port name.
- *     [1995/02/28  01:12:46  dwm]
- * 
- * Revision 1.2.31.6  1995/02/23  21:43:34  alanl
- *     Fix db_show_one_task_vm for thread_act_ts.
- *     [95/01/09            rwd]
- * 
- *     Merged with DIPC2_SHARED.
- *     [95/01/04            alanl]
- * 
- * Revision 1.2.31.5  1995/01/10  04:49:52  devrcs
- *     mk6 CR801 - merge up from nmk18b4 to nmk18b7
- *     Fix "sh thr/ul"; no cont. to print, fix pri/policy format.
- *     * Rev 1.2.31.4  1994/10/11  16:35:58  emcmanus
- *       Added "show runq" and "show shuttle".
- *     [1994/12/09  20:36:49  dwm]
- * 
- *     mk6 CR668 - 1.3b26 merge
- *     * Revision 1.2.8.6  1994/05/06  18:39:37  tmt
- *     Merged osc1.3dec/shared with osc1.3b19
- *     Merge Alpha changes into osc1.312b source code.
- *     64bit cleanup.
- *     * End1.3merge
- *     [1994/11/04  08:49:52  dwm]
- * 
- * Revision 1.2.31.3  1994/09/23  01:20:51  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:10:41  ezf]
- * 
- * Revision 1.2.31.2  1994/06/14  17:21:05  bolinger
- *     Merge up to NMK17.2.
- *     [1994/06/14  17:20:35  bolinger]
- * 
- * Revision 1.2.23.4  1994/04/15  18:41:31  paire
- *     Changed interface of db_task_from_space routine.
- *     [94/03/31            paire]
- * 
- * Revision 1.2.23.3  1994/03/07  16:37:48  paire
- *     Merge with Intel R1_1
- *     Change from NMK14.10 [1993/11/15  16:06:21  rwd]
- * 
- *     Enhanced pretty print routine and added db_task_from_space.
- *     Change from NMK14.10 [93/09/24            sjs]
- *     [94/02/21            paire]
- * 
- *     Exported ANSI prototype of db_port_kmsg_count routine.
- *     Added header file include for the declaration of db_norma_ipc routine.
- *     [94/02/15            paire]
- * 
- * Revision 1.2.23.2  1994/02/11  14:21:58  paire
- *     Added new vm_print.h header file for db_vm declaration.
- *     [94/02/09            paire]
- * 
- * Revision 1.2.23.1  1994/02/08  10:58:19  bernadat
- *     print out msgcount for each port in db_port_iterate
- *     Change from NORMA_MK14.6(August 93) [1993/07/27  12:35:17  mmp]
- * 
- *     Removed defintion of db_maxoff (got from <ddb/db_sym.h>).
- *     [93/08/12            paire]
- * 
- *     Show ipc_space_remote msg counts only if NORMA_IPC is on
- *     [93/07/21            bernadat]
- * 
- *     Add /s option to "show ipc_port" to pick out port sets.
- *     Change from NORMA_MK14.6 [1993/02/17  16:29:54  dwm]
- *     [93/07/16            bernadat]
- *     [94/02/07            bernadat]
- * 
- * Revision 1.2.20.8  1994/06/08  19:11:15  dswartz
- *     Preemption merge.
- *     [1994/06/08  19:10:18  dswartz]
- * 
- * Revision 1.2.20.7  1994/04/30  21:28:24  bolinger
- *     Thread control ops synchronization:  now that TH_SUSP is back,
- *     enable ddb to show it when printing thread state.
- *     [1994/04/28  21:55:42  bolinger]
- * 
- * Revision 1.2.20.6  1994/03/17  22:35:31  dwm
- *     The infamous name change:  thread_activation + thread_shuttle = thread.
- *     [1994/03/17  21:25:46  dwm]
- * 
- * Revision 1.2.20.5  1994/01/26  15:43:37  bolinger
- *     Move kernel_stack from thread to activation.
- *     [1994/01/25  21:53:11  bolinger]
- * 
- * Revision 1.2.20.4  1994/01/12  17:50:44  dwm
- *     Coloc: initial restructuring to follow Utah model.
- *     [1994/01/12  17:13:12  dwm]
- * 
- * Revision 1.2.20.3  1993/11/18  18:11:47  dwm
- *     Coloc: remove continuations entirely; they are incompatible
- *     with migration, and their volume is obfuscatory.
- *     [1993/11/18  18:06:27  dwm]
- * 
- * Revision 1.2.20.2  1993/10/12  16:38:50  dwm
- *     CoLoc: neuter continuations, ifdef USE_CONTINUATIONS.
- *     [1993/10/12  16:14:46  dwm]
- * 
- * Revision 1.2.8.4  1993/08/11  20:38:06  elliston
- *     Add ANSI Prototypes.  CR #9523.
- *     [1993/08/11  03:33:51  elliston]
- * 
- * Revision 1.2.8.3  1993/07/27  18:27:55  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:12:39  elliston]
- * 
- * Revision 1.2.8.2  1993/06/09  02:20:35  gm
- *     CR9176 - ANSI C violations: trailing tokens on CPP
- *     directives, extra semicolons after decl_ ..., asm keywords
- *     [1993/06/07  18:57:22  jeffc]
- * 
- *     Removed a '#if MACH_FIXPRI' which somehow survived the purge.  CR #9131.
- *     [1993/05/11  20:56:00  dswartz]
- * 
- * Revision 1.2  1993/04/19  16:02:50  devrcs
- *     Added printout of thread scheduling policy to long form
- *     of thread display.
- *     [93/01/28            jat]
- * 
- *     Changes from mk78:
- *     Removed unused variable from db_show_regs().
- *     [92/05/16            jfriedl]
- *     Converted some db_printsyms to db_task_printsyms.
- *     [92/04/10            danner]
- *     Changed db_print_thread so that both display formats
- *     show the floating-point-used status of the thread.
- *     [92/03/16            rpd]
- *     [93/02/02            bruel]
- * 
- * Revision 1.1  1992/09/30  02:01:18  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.11.3.2  92/04/08  15:43:10  jeffreyh
- *     Added i option to show thread. This gives wait state information.
- *     [92/04/08            sjs]
- * 
- * Revision 2.11.3.1  92/03/03  16:13:34  jeffreyh
- *     Pick up changes from TRUNK
- *     [92/02/26  11:00:01  jeffreyh]
- * 
- * Revision 2.13  92/02/20  18:34:28  elf
- *     Fixed typo.
- *     [92/02/20            elf]
- * 
- * Revision 2.12  92/02/19  15:07:47  elf
- *     Added db_thread_fp_used, to avoid machine-dependent conditionals.
- *     [92/02/19            rpd]
- * 
- *     Added 'F' flag to db_thread_stat showing if the thread has a valid
- *     FPU context. Tested on i386 and pmax.
- *     [92/02/17            kivinen]
- * 
- * Revision 2.11  91/11/12  11:50:32  rvb
- *     Added OPTION_USER ("/u") to db_show_all_threads, db_show_one_thread,
- *     db_show_one_task.  Without it, we display old-style information.
- *     [91/10/31            rpd]
- * 
- * Revision 2.10  91/10/09  16:01:48  af
- *     Supported "show registers" for non current thread.
- *     Changed display format of thread and task information.
- *     Changed "show thread" to print current thread information 
- *       if no thread is specified.
- *     Added "show_one_task" for "show task" command.
- *     Added IPC port print routines for "show ipc_port" command.
- *     [91/08/29            tak]
- * 
- * Revision 2.9  91/08/03  18:17:19  jsb
- *     In db_print_thread, if the thread is swapped and there is a
- *     continuation function, print the function name in parentheses
- *     instead of '(swapped)'.
- *     [91/07/04  09:59:27  jsb]
- * 
- * Revision 2.8  91/07/31  17:30:43  dbg
- *     Revise scheduling state machine.
- *     [91/07/30  16:43:42  dbg]
- * 
- * Revision 2.7  91/07/09  23:15:57  danner
- *     Fixed a few printf that should be db_printfs. 
- *     [91/07/08            danner]
- * 
- * Revision 2.6  91/05/14  15:35:25  mrt
- *     Correcting copyright
- * 
- * Revision 2.5  91/02/05  17:06:53  mrt
- *     Changed to new Mach copyright
- *     [91/01/31  16:18:56  mrt]
- * 
- * Revision 2.4  90/10/25  14:43:54  rwd
- *     Changed db_show_regs to print unsigned.
- *     [90/10/19            rpd]
- *     Generalized the watchpoint support.
- *     [90/10/16            rwd]
- * 
- * Revision 2.3  90/09/09  23:19:52  rpd
- *     Avoid totally incorrect guesses of symbol names for small values.
- *     [90/08/30  17:39:08  af]
- * 
- * Revision 2.2  90/08/27  21:51:49  dbg
- *     Insist that 'show thread' be called with an explicit address.
- *     [90/08/22            dbg]
- * 
- *     Fix type for db_maxoff.
- *     [90/08/20            dbg]
- * 
- *     Do not dereference the "valuep" field of a variable directly,
- *     call the new db_read/write_variable functions instead.
- *     Reflected changes in symbol lookup functions.
- *     [90/08/20            af]
- *     Reduce lint.
- *     [90/08/10  14:33:44  dbg]
- * 
- *     Created.
- *     [90/07/25            dbg]
- * 
- */
-/* CMU_ENDHIST */
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     Author: David B. Golub, Carnegie Mellon University
- *     Date:   7/90
- */
-
-/*
- * Miscellaneous printing.
- */
-#include <dipc.h>
-#include <task_swapper.h>
-
-#include <string.h>                    /* For strlen() */
-#include <mach/port.h>
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <kern/thread_swap.h>
-#include <kern/queue.h>
-#include <ipc/ipc_port.h>
-#include <ipc/ipc_space.h>
-#include <ipc/ipc_pset.h>
-#include <vm/vm_print.h>               /* for db_vm() */
-
-#include <machine/db_machdep.h>
-#include <machine/thread.h>
-
-#include <ddb/db_lex.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>             /* For db_printf() */
-#include <ddb/db_print.h>
-
-#include <kern/sf.h>
-#include <kern/sp_mk.h>        /*** ??? fix so this can be removed ***/
-
-#if    TASK_SWAPPER
-#include <kern/task_swap.h>
-#endif /* TASK_SWAPPER */
-
-/* Prototypes for functions local to this file.  XXX -- should be static!
- */
-
-char *db_act_stat(
-       register thread_act_t   thr_act,
-       char                    *status);
-
-char *db_act_swap_stat(
-       register thread_act_t   thr_act,
-       char                    *status);
-
-void db_print_task(
-       task_t  task,
-       int     task_id,
-       int     flag);
-
-void db_reset_print_entry(
-       void);
-
-void db_print_one_entry(
-       ipc_entry_t     entry,
-       int             index,
-       mach_port_name_t        name,
-       boolean_t       is_pset);
-
-int db_port_iterate(
-       thread_act_t    thr_act,
-       boolean_t       is_pset,
-       boolean_t       do_output);
-
-ipc_port_t db_lookup_port(
-       thread_act_t    thr_act,
-       int             id);
-
-static void db_print_port_id(
-       int             id,
-       ipc_port_t      port,
-       unsigned        bits,
-       int             n);
-
-void db_print_act(
-       thread_act_t    thr_act,
-       int             act_id,
-       int             flag);
-
-void db_print_space(
-       task_t  task,
-       int     task_id,
-       int     flag);
-
-void db_print_task_vm(
-       task_t          task,
-       int             task_id,
-       boolean_t       title,
-       char            *modif);
-
-void db_system_stats(void);
-
-
-void
-db_show_regs(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif)
-{
-       register struct db_variable *regp;
-       db_expr_t       value;
-       db_addr_t       offset;
-       char *          name;
-       register int    i; 
-       struct db_var_aux_param aux_param;
-       task_t          task = TASK_NULL;
-
-       aux_param.modif = modif;
-       aux_param.thr_act = THR_ACT_NULL;
-       if (db_option(modif, 't')) {
-           if (have_addr) {
-               if (!db_check_act_address_valid((thread_act_t)addr))
-                   return;
-               aux_param.thr_act = (thread_act_t)addr;
-           } else
-               aux_param.thr_act = db_default_act;
-           if (aux_param.thr_act != THR_ACT_NULL)
-               task = aux_param.thr_act->task;
-       }
-       for (regp = db_regs; regp < db_eregs; regp++) {
-           if (regp->max_level > 1) {
-               db_printf("bad multi-suffixed register %s\n", regp->name);
-               continue;
-           }
-           aux_param.level = regp->max_level;
-           for (i = regp->low; i <= regp->high; i++) {
-               aux_param.suffix[0] = i;
-               db_read_write_variable(regp, &value, DB_VAR_GET, &aux_param);
-               if (regp->max_level > 0)
-                   db_printf("%s%d%*s", regp->name, i, 
-                               12-strlen(regp->name)-((i<10)?1:2), "");
-               else
-                   db_printf("%-12s", regp->name);
-               db_printf("%#*N", 2+2*sizeof(vm_offset_t), value);
-               db_find_xtrn_task_sym_and_offset((db_addr_t)value, &name, 
-                                                       &offset, task);
-               if (name != 0 && offset <= db_maxoff && offset != value) {
-                   db_printf("\t%s", name);
-                   if (offset != 0)
-                       db_printf("+%#r", offset);
-               }
-               db_printf("\n");
-           }
-       }
-}
-
-#define OPTION_LONG            0x001           /* long print option */
-#define OPTION_USER            0x002           /* print ps-like stuff */
-#define OPTION_INDENT          0x100           /* print with indent */
-#define OPTION_THREAD_TITLE    0x200           /* print thread title */
-#define OPTION_TASK_TITLE      0x400           /* print thread title */
-
-#ifndef        DB_TASK_NAME
-#define DB_TASK_NAME(task)                     /* no task name */
-#define DB_TASK_NAME_TITLE     ""              /* no task name */
-#endif /* DB_TASK_NAME */
-
-#ifndef        db_act_fp_used
-#define db_act_fp_used(thr_act)        FALSE
-#endif
-
-char *
-db_act_stat(
-       register thread_act_t   thr_act,
-       char                    *status)
-{
-       register char *p = status;
-       
-       if (!thr_act->active) {
-               *p++ = 'D',
-               *p++ = 'y',
-               *p++ = 'i',
-               *p++ = 'n',
-               *p++ = 'g';
-               *p++ = ' ';
-       } else if (!thr_act->thread) {
-               *p++ = 'E',
-               *p++ = 'm',
-               *p++ = 'p',
-               *p++ = 't',
-               *p++ = 'y';
-               *p++ = ' ';
-       } else {
-               thread_t athread = thr_act->thread;
-
-               *p++ = (athread->state & TH_RUN)  ? 'R' : '.';
-               *p++ = (athread->state & TH_WAIT) ? 'W' : '.';
-               *p++ = (athread->state & TH_SUSP) ? 'S' : '.';
-               *p++ = (athread->state & TH_SWAPPED_OUT) ? 'O' : '.';
-               *p++ = (athread->state & TH_UNINT) ? 'N' : '.';
-               /* show if the FPU has been used */
-               *p++ = db_act_fp_used(thr_act) ? 'F' : '.';
-       }
-       *p++ = 0;
-       return(status);
-}
-
-char *
-db_act_swap_stat(
-       register thread_act_t   thr_act,
-       char                    *status)
-{
-       register char *p = status;
-
-#if    THREAD_SWAPPER
-       switch (thr_act->swap_state & TH_SW_STATE) {
-           case TH_SW_UNSWAPPABLE:
-               *p++ = 'U';
-               break;
-           case TH_SW_IN:
-               *p++ = 'I';
-               break;
-           case TH_SW_GOING_OUT:
-               *p++ = 'G';
-               break;
-           case TH_SW_WANT_IN:
-               *p++ = 'W';
-               break;
-           case TH_SW_OUT:
-               *p++ = 'O';
-               break;
-           case TH_SW_COMING_IN:
-               *p++ = 'C';
-               break;
-           default:
-               *p++ = '?';
-               break;
-       }
-       *p++ = (thr_act->swap_state & TH_SW_TASK_SWAPPING) ? 'T' : '.';
-#endif /* THREAD_SWAPPER */
-       *p++ = 0;
-
-       return status;
-}
-
-char   *policy_list[] = { "TS", "RR", "??", "FF",
-                          "??", "??", "??", "BE"};
-
-void
-db_print_act(
-       thread_act_t    thr_act,
-       int             act_id,
-       int             flag)
-{
-       thread_t athread;
-       char status[8];
-       char swap_status[3];
-       char *indent = "";
-       int      policy;
-
-       if (!thr_act) {
-           db_printf("db_print_act(NULL)!\n");
-           return;
-       }
-
-       athread = thr_act->thread;
-       if (flag & OPTION_USER) {
-
-           if (flag & OPTION_LONG) {
-               if (flag & OPTION_INDENT)
-                   indent = "    ";
-               if (flag & OPTION_THREAD_TITLE) {
-                   db_printf("%s ID:   ACT     STAT  SW STACK    SHUTTLE", indent);
-                   db_printf("  SUS  PRI  WAIT_FUNC\n");
-               }
-               policy = (athread ? athread->policy : 2);
-               db_printf("%s%3d%c %0*X %s %s %0*X %0*X %3d %3d/%s ",
-                   indent, act_id,
-                   (thr_act == current_act())? '#': ':',
-                   2*sizeof(vm_offset_t), thr_act,
-                   db_act_stat(thr_act, status),
-                   db_act_swap_stat(thr_act, swap_status),
-                   2*sizeof(vm_offset_t), (athread ?athread->kernel_stack:0),
-                   2*sizeof(vm_offset_t), athread,
-                   thr_act->suspend_count,
-                   (athread ? athread->sched_pri : 999), /* XXX */
-                   policy_list[policy-1]);
-               if (athread) {
-                   /* no longer TH_SWAP, no continuation to print */
-                   if (athread->state & TH_WAIT)
-                       db_task_printsym((db_addr_t)athread->wait_event,
-                                               DB_STGY_ANY, kernel_task);
-               }
-               db_printf("\n");
-           } else {
-               if (act_id % 3 == 0) {
-                   if (flag & OPTION_INDENT)
-                       db_printf("\n    ");
-               } else
-                   db_printf(" ");
-               db_printf("%3d%c(%0*X,%s)", act_id, 
-                   (thr_act == current_act())? '#': ':',
-                   2*sizeof(vm_offset_t), thr_act,
-                   db_act_stat(thr_act, status));
-           }
-       } else {
-           if (flag & OPTION_INDENT)
-               db_printf("            %3d (%0*X) ", act_id,
-                         2*sizeof(vm_offset_t), thr_act);
-           else
-               db_printf("(%0*X) ", 2*sizeof(vm_offset_t), thr_act);
-           if (athread) {
-               db_printf("%c%c%c%c%c",
-                       (athread->state & TH_RUN)  ? 'R' : ' ',
-                       (athread->state & TH_WAIT) ? 'W' : ' ',
-                       (athread->state & TH_SUSP) ? 'S' : ' ',
-                       (athread->state & TH_UNINT)? 'N' : ' ',
-                       db_act_fp_used(thr_act) ? 'F' : ' ');
-               /* Obsolete TH_STACK_HANDOFF code, left for now; might enhance
-                * to print out safe_points instead */
-               if (athread->state & TH_STACK_HANDOFF) {
-                   if (athread->continuation) {
-                       db_printf("(");
-                       db_task_printsym((db_addr_t)athread->continuation,
-                                               DB_STGY_ANY, kernel_task);
-                       db_printf(")");
-                   } else {
-                       db_printf("(handoff)");
-                   }
-               }
-               if (athread->state & TH_WAIT) {
-                   db_printf(" ");
-                   db_task_printsym((db_addr_t)athread->wait_event,
-                                               DB_STGY_ANY, kernel_task);
-               }
-           } else
-               db_printf("Empty");
-           db_printf("\n");
-       }
-}
-
-void
-db_print_task(
-       task_t  task,
-       int     task_id,
-       int     flag)
-{
-       thread_act_t thr_act;
-       int act_id;
-       char sstate;
-
-       if (flag & OPTION_USER) {
-           if (flag & OPTION_TASK_TITLE) {
-               db_printf(" ID: TASK     MAP      THD RES SUS PR SW %s", 
-                         DB_TASK_NAME_TITLE);
-               if ((flag & OPTION_LONG) == 0)
-                   db_printf("  ACTS");
-               db_printf("\n");
-           }
-#if    TASK_SWAPPER
-           switch ((int) task->swap_state) {
-               case TASK_SW_IN:
-                   sstate = 'I';
-                   break;
-               case TASK_SW_OUT:
-                   sstate = 'O';
-                   break;
-               case TASK_SW_GOING_OUT:
-                   sstate = 'G';
-                   break;
-               case TASK_SW_COMING_IN:
-                   sstate = 'C';
-                   break;
-               case TASK_SW_UNSWAPPABLE:
-                   sstate = 'U';
-                   break;
-               default:
-                   sstate = '?';
-                   break;
-           }
-#else  /* TASK_SWAPPER */
-           sstate = 'I';
-#endif /* TASK_SWAPPER */
-           /*** ??? fix me ***/
-           db_printf("%3d: %0*X %0*X %3d %3d %3d %2d %c  ",
-                           task_id, 2*sizeof(vm_offset_t), task,
-                           2*sizeof(vm_offset_t), task->map,
-                           task->thr_act_count, task->res_act_count,
-                           task->suspend_count,
-                           ((mk_sp_attributes_t)(task->sp_attributes))->priority,
-                           sstate);
-           DB_TASK_NAME(task);
-           if (flag & OPTION_LONG) {
-               if (flag & OPTION_TASK_TITLE)
-                   flag |= OPTION_THREAD_TITLE;
-               db_printf("\n");
-           } else if (task->thr_act_count <= 1)
-               flag &= ~OPTION_INDENT;
-           act_id = 0;
-           queue_iterate(&task->thr_acts, thr_act, thread_act_t, thr_acts) {
-               db_print_act(thr_act, act_id, flag);
-               flag &= ~OPTION_THREAD_TITLE;
-               act_id++;
-           }
-           if ((flag & OPTION_LONG) == 0)
-               db_printf("\n");
-       } else {
-           if (flag & OPTION_LONG) {
-               if (flag & OPTION_TASK_TITLE) {
-                   db_printf("    TASK        ACT\n");
-                   if (task->thr_act_count > 1)
-                       flag |= OPTION_THREAD_TITLE;
-               }
-           }
-           db_printf("%3d (%0*X): ", task_id, 2*sizeof(vm_offset_t), task);
-           if (task->thr_act_count == 0) {
-               db_printf("no threads\n");
-           } else {
-               if (task->thr_act_count > 1) {
-                   db_printf("%d threads: \n", task->thr_act_count);
-                   flag |= OPTION_INDENT;
-               } else
-                   flag &= ~OPTION_INDENT;
-               act_id = 0;
-               queue_iterate(&task->thr_acts, thr_act,
-                             thread_act_t, thr_acts) {
-                   db_print_act(thr_act, act_id++, flag);
-                   flag &= ~OPTION_THREAD_TITLE;
-               }
-           }
-       }
-}
-
-void
-db_print_space(
-       task_t  task,
-       int     task_id,
-       int     flag)
-{
-       ipc_space_t space;
-       thread_act_t act = (thread_act_t)queue_first(&task->thr_acts);
-       int count;
-
-       count = 0;
-       space = task->itk_space;
-       if (act)
-               count = db_port_iterate(act, FALSE, FALSE);
-       db_printf("%3d: %08x %08x %08x %sactive   %d\n",
-                 task_id, task, space, task->map,
-                 space->is_active? "":"!", count);
-}
-
-void
-db_print_task_vm(
-       task_t          task,
-       int             task_id,
-       boolean_t       title,
-       char            *modif)
-{
-       vm_map_t        map;
-       pmap_t          pmap;
-       vm_size_t       size;
-       long            resident;
-       long            wired;
-
-       if (title) {
-               db_printf("id     task      map     pmap  virtual  rss pg rss mem  wir pg wir mem\n");
-       }
-
-       map = task->map;
-       pmap = vm_map_pmap(map);
-
-       size = db_vm_map_total_size(map);
-       resident = pmap->stats.resident_count;
-       wired = pmap->stats.wired_count;
-
-       db_printf("%2d %08x %08x %08x %7dK  %6d %6dK  %6d %6dK\n",
-               task_id,
-               task,
-               map,
-               pmap,
-               size / 1024,
-               resident, (resident * PAGE_SIZE) / 1024,
-               wired, (wired * PAGE_SIZE) / 1024);
-}
-
-
-void
-db_show_one_task_vm(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif)
-{
-       thread_act_t    thread;
-       task_t          task;
-       int             task_id;
-
-       if (have_addr == FALSE) {
-               if ((thread = db_default_act) == THR_ACT_NULL) {
-                       if ((thread = current_act()) == THR_ACT_NULL) {
-                               db_printf("no thread.\n");
-                               return;
-                       }
-               }
-               task = thread->task;
-       } else {
-               task = (task_t) addr;
-       }
-
-       task_id = db_lookup_task(task);
-       if (task_id < 0) {
-               db_printf("0x%x is not a task_t\n", addr);
-               return;
-       }
-
-       db_print_task_vm(task, task_id, TRUE, modif);
-}
-
-void
-db_show_all_task_vm(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif)
-{
-       task_t          task;
-       int             task_id;
-       boolean_t       title = TRUE;
-       processor_set_t pset;
-
-       task_id = 0;
-       queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
-               queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
-                       db_print_task_vm(task, task_id, title, modif);
-                       title = FALSE;
-                       task_id++;
-               }
-       }
-}
-
-void
-db_show_all_acts(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       task_t task;
-       int task_id;
-       int flag;
-       processor_set_t pset;
-
-       flag = OPTION_TASK_TITLE|OPTION_INDENT;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       task_id = 0;
-       queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
-           queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
-               db_print_task(task, task_id, flag);
-               flag &= ~OPTION_TASK_TITLE;
-               task_id++;
-               if ((flag & (OPTION_LONG|OPTION_INDENT)) == OPTION_INDENT)
-                   db_printf("\n");
-           }
-       }
-}
-
-void
-db_show_one_space(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       int             flag;
-       int             task_id;
-       task_t          task;
-
-       flag = OPTION_TASK_TITLE;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       if (!have_addr) {
-           task = db_current_task();
-           if (task == TASK_NULL) {
-               db_error("No task\n");
-               /*NOTREACHED*/
-           }
-       } else
-           task = (task_t) addr;
-
-       if ((task_id = db_lookup_task(task)) < 0) {
-           db_printf("bad task address 0x%x\n", addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-
-       db_printf(" ID: TASK     SPACE    MAP               COUNT\n");
-       db_print_space(task, task_id, flag);
-}
-
-void
-db_show_all_spaces(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       task_t task;
-       int task_id = 0;
-       int flag;
-       processor_set_t pset;
-
-       flag = OPTION_TASK_TITLE|OPTION_INDENT;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       db_printf(" ID: TASK     SPACE    MAP               COUNT\n");
-       queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
-           queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
-                   db_print_space(task, task_id, flag);
-                   task_id++;
-           }
-       }
-}
-
-db_addr_t
-db_task_from_space(
-       ipc_space_t     space,
-       int             *task_id)
-{
-       task_t task;
-       int tid = 0;
-       processor_set_t pset;
-
-       queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
-           queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
-                   if (task->itk_space == space) {
-                           *task_id = tid;
-                           return (db_addr_t)task;
-                   }
-                   tid++;
-           }
-       }
-       *task_id = 0;
-       return (0);
-}
-
-void
-db_show_one_act(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       int             flag;
-       int             act_id;
-       thread_act_t            thr_act;
-
-       flag = OPTION_THREAD_TITLE;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       if (!have_addr) {
-           thr_act = current_act();
-           if (thr_act == THR_ACT_NULL) {
-               db_error("No thr_act\n");
-               /*NOTREACHED*/
-           }
-       } else
-           thr_act = (thread_act_t) addr;
-
-       if ((act_id = db_lookup_act(thr_act)) < 0) {
-           db_printf("bad thr_act address %#x\n", addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-
-       if (flag & OPTION_USER) {
-           db_printf("TASK%d(%0*X):\n",
-                     db_lookup_task(thr_act->task),
-                     2*sizeof(vm_offset_t), thr_act->task);
-           db_print_act(thr_act, act_id, flag);
-       } else {
-           db_printf("task %d(%0*Xx): thr_act    %d",
-                     db_lookup_task(thr_act->task),
-                     2*sizeof(vm_offset_t), thr_act->task, act_id);
-           db_print_act(thr_act, act_id, flag);
-       }
-       if (db_option(modif, 'i') &&  thr_act->thread &&
-           (thr_act->thread->state & TH_WAIT) && 
-           thr_act->thread->kernel_stack == 0) {
-
-           db_printf("Wait State: option 0x%x\n",
-               thr_act->thread->ith_option);
-       }
-}
-
-void
-db_show_one_task(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       int             flag;
-       int             task_id;
-       task_t          task;
-
-       flag = OPTION_TASK_TITLE|OPTION_INDENT;
-       if (db_option(modif, 'u'))
-           flag |= OPTION_USER;
-       if (db_option(modif, 'l'))
-           flag |= OPTION_LONG;
-
-       if (!have_addr) {
-           task = db_current_task();
-           if (task == TASK_NULL) {
-               db_error("No task\n");
-               /*NOTREACHED*/
-           }
-       } else
-           task = (task_t) addr;
-
-       if ((task_id = db_lookup_task(task)) < 0) {
-           db_printf("bad task address 0x%x\n", addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-
-       db_print_task(task, task_id, flag);
-}
-
-void
-db_show_shuttle(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       thread_shuttle_t        shuttle;
-       thread_act_t            thr_act;
-
-       if (have_addr)
-           shuttle = (thread_shuttle_t) addr;
-       else {
-           thr_act = current_act();
-           if (thr_act == THR_ACT_NULL) {
-               db_error("No thr_act\n");
-               /*NOTREACHED*/
-           }
-           shuttle = thr_act->thread;
-           if (shuttle == THREAD_NULL) {
-               db_error("No shuttle associated with current thr_act\n");
-               /*NOTREACHED*/
-           }
-       }
-       db_printf("shuttle %x:\n", shuttle);
-       if (shuttle->top_act == THR_ACT_NULL)
-           db_printf("  no activations\n");
-       else {
-           db_printf("  activations:");
-           for (thr_act = shuttle->top_act; thr_act != THR_ACT_NULL;
-                thr_act = thr_act->lower) {
-               if (thr_act != shuttle->top_act)
-                   printf(" from");
-               printf(" $task%d.%d(%x)", db_lookup_task(thr_act->task),
-                      db_lookup_act(thr_act), thr_act);
-           }
-           db_printf("\n");
-       }
-}
-
-#define        db_pset_kmsg_count(port) \
-       (ipc_list_count((port)->ip_pset->ips_messages.imq_messages.ikmq_base))
-
-int
-db_port_kmsg_count(
-       ipc_port_t      port)
-{
-       return (port->ip_pset ? db_pset_kmsg_count(port) : port->ip_msgcount);
-}
-
-static int db_print_ent_cnt = 0;
-
-void db_reset_print_entry(
-       void)
-{
-       db_print_ent_cnt = 0;
-}
-
-void
-db_print_one_entry(
-       ipc_entry_t     entry,
-       int             index,
-       mach_port_t     name,
-       boolean_t       is_pset)
-{
-       ipc_port_t aport = (ipc_port_t)entry->ie_object;
-       unsigned bits = entry->ie_bits;
-
-       if (is_pset && !aport->ip_pset)
-               return;
-       if (db_print_ent_cnt && db_print_ent_cnt % 2 == 0)
-               db_printf("\n");
-       if (!name)
-               db_printf("\t%s%d[%x]",
-                       !is_pset && aport->ip_pset ? "pset" : "port",
-                       index,
-                       MACH_PORT_MAKE(index, IE_BITS_GEN(bits)));
-       else
-               db_printf("\t%s[%x]",
-                       !is_pset && aport->ip_pset ? "pset" : "port",
-                       name);
-       if (!is_pset) {
-               db_printf("(%s,%x,%d)",
-                   (bits & MACH_PORT_TYPE_RECEIVE)? "r":
-                       (bits & MACH_PORT_TYPE_SEND)? "s": "S",
-                       aport,
-                   db_port_kmsg_count(aport));
-               db_print_ent_cnt++;
-       }
-       else {
-               db_printf("(%s,%x,set=%x,%d)",
-                       (bits & MACH_PORT_TYPE_RECEIVE)? "r":
-                               (bits & MACH_PORT_TYPE_SEND)? "s": "S",
-                       aport,
-                       aport->ip_pset,
-                       db_pset_kmsg_count(aport));
-               db_print_ent_cnt++;
-       }
-}
-
-int
-db_port_iterate(
-       thread_act_t    thr_act,
-       boolean_t       is_pset,
-       boolean_t       do_output)
-{
-       ipc_entry_t entry;
-       ipc_tree_entry_t tentry;
-       int index;
-       int size;
-       int count;
-       ipc_space_t space;
-
-       count = 0;
-       space = thr_act->task->itk_space;
-       entry = space->is_table;
-       size = space->is_table_size;
-       db_reset_print_entry();
-       for (index = 0; index < size; ++index, ++entry) {
-               if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) {
-                       if (do_output)
-                               db_print_one_entry(entry,
-                                       index, (mach_port_t)0, is_pset);
-                       ++count;
-               }
-       }
-       for (tentry = ipc_splay_traverse_start(&space->is_tree);
-               tentry != ITE_NULL;
-               tentry = ipc_splay_traverse_next(&space->is_tree, FALSE)) {
-               entry = &tentry->ite_entry;
-               if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) {
-                       if (do_output)
-                               db_print_one_entry(entry,
-                                       0, tentry->ite_name, is_pset);
-                       ++count;
-               }
-       }
-       return (count);
-}
-
-ipc_port_t
-db_lookup_port(
-       thread_act_t    thr_act,
-       int             id)
-{
-       register ipc_space_t space;
-       register ipc_entry_t entry;
-
-       if (thr_act == THR_ACT_NULL)
-           return(0);
-       space = thr_act->task->itk_space;
-       if (id < 0 || id >= space->is_table_size)
-           return(0);
-       entry = &space->is_table[id];
-       if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS)
-           return((ipc_port_t)entry->ie_object);
-       return(0);
-}
-
-static void
-db_print_port_id(
-       int             id,
-       ipc_port_t      port,
-       unsigned        bits,
-       int             n)
-{
-       if (n != 0 && n % 3 == 0)
-           db_printf("\n");
-       db_printf("\tport%d(%s,%x)", id,
-               (bits & MACH_PORT_TYPE_RECEIVE)? "r":
-               (bits & MACH_PORT_TYPE_SEND)? "s": "S", port);
-}
-
-void
-db_show_port_id(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       thread_act_t thr_act;
-
-       if (!have_addr) {
-           thr_act = current_act();
-           if (thr_act == THR_ACT_NULL) {
-               db_error("No thr_act\n");
-               /*NOTREACHED*/
-           }
-       } else
-           thr_act = (thread_act_t) addr;
-       if (db_lookup_act(thr_act) < 0) {
-           db_printf("Bad thr_act address 0x%x\n", addr);
-           db_error(0);
-           /*NOTREACHED*/
-       }
-       if (db_port_iterate(thr_act, db_option(modif,'s'), TRUE))
-           db_printf("\n");
-}
-
-/*
- *     Useful system state when the world has hung.
- */
-void
-db_system_stats()
-{
-       extern void     db_device(void);
-       extern void     db_sched(void);
-#if    DIPC
-       extern void     db_dipc_stats(void);
-       extern void     db_show_kkt(void);
-#endif /* DIPC */
-
-       db_sched();
-       iprintf("\n");
-       db_vm();
-       iprintf("\n");
-       db_device();
-#if    DIPC
-       iprintf("\n");
-       db_dipc_stats();
-       iprintf("\n");
-       db_show_kkt();
-#endif /* DIPC */
-       iprintf("\n");
-       db_printf("current_{thread/task} 0x%x 0x%x\n",
-                       current_thread(),current_task());
-}
-
-void db_show_one_runq(run_queue_t runq);
-
-void
-db_show_runq(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char *          modif)
-{
-       processor_set_t pset;
-       processor_t proc;
-       run_queue_t runq;
-       boolean_t showedany = FALSE;
-
-       queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
-           runq = &pset->runq;
-           if (runq->count > 0) {
-               db_printf("PROCESSOR SET %x\n", pset);
-               db_show_one_runq(runq);
-               showedany = TRUE;
-           }
-       }
-       if (!showedany)
-           db_printf("No runnable threads\n");
-}
-
-void
-db_show_one_runq(
-       run_queue_t     runq)
-{
-       int i, task_id, thr_act_id;
-       queue_t q;
-       thread_act_t thr_act;
-       thread_t thread;
-       task_t task;
-
-       printf("PRI  TASK.ACTIVATION\n");
-       for (i = runq->low, q = runq->runq + i; i < NRQS; i++, q++) {
-           if (!queue_empty(q)) {
-               db_printf("%3d:", i);
-               queue_iterate(q, thread, thread_t, links) {
-                   thr_act = thread->top_act;
-                   task = thr_act->task;
-                   task_id = db_lookup_task(task);
-                   thr_act_id = db_lookup_task_act(task, thr_act);
-                   db_printf(" %d.%d", task_id, thr_act_id);
-               }
-               db_printf("\n");
-           }
-       }
-}
diff --git a/osfmk/ddb/stab.h b/osfmk/ddb/stab.h
deleted file mode 100644 (file)
index 514ffb0..0000000
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:48  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:09  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.11.2  1995/01/06  19:11:14  devrcs
- *     mk6 CR668 - 1.3b26 merge
- *     added N_FRAME, an extension to aout symtabs
- *     for machines with non-self-describing frame formats
- *     [1994/10/14  03:40:05  dwm]
- *
- * Revision 1.1.11.1  1994/09/23  01:23:47  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:11:53  ezf]
- * 
- * Revision 1.1.4.3  1993/07/27  18:28:44  elliston
- *     Add ANSI prototypes.  CR #9523.
- *     [1993/07/27  18:13:49  elliston]
- * 
- * Revision 1.1.4.2  1993/06/02  23:13:40  jeffc
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  20:58:12  jeffc]
- * 
- * Revision 1.1  1992/09/30  02:24:31  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.2  91/10/09  16:05:28  af
- *      Revision 2.1  91/10/05  13:02:42  jeffreyh
- *      Created.
- * 
- * Revision 2.1.1.1  91/10/05  13:03:14  jeffreyh
- *     Initial MK63 checkin
- * 
- * Revision 2.1.1.1  91/07/31  13:14:49  jeffreyh
- *     Created from BSD network release #2
- *     [91/07/31            jeffreyh]
- * 
- *
- */
-/* CMU_ENDHIST */
-/*-
- * Copyright (c) 1991 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *     This product includes software developed by the University of
- *     California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *     @(#)stab.h      5.2 (Berkeley) 4/4/91
- */
-/*
- */
-
-#ifndef        _DDB_DB_STAB_H_
-#define        _DDB_DB_STAB_H_
-
-/*
- * The following are symbols used by various debuggers and by the Pascal
- * compiler.  Each of them must have one (or more) of the bits defined by
- * the N_STAB mask set.
- */
-
-#define        N_GSYM          0x20    /* global symbol */
-#define        N_FNAME         0x22    /* F77 function name */
-#define        N_FUN           0x24    /* procedure name */
-#define        N_STSYM         0x26    /* data segment variable */
-#define        N_LCSYM         0x28    /* bss segment variable */
-#define        N_MAIN          0x2a    /* main function name */
-#define        N_PC            0x30    /* global Pascal symbol */
-#define        N_FRAME         0x34    /* stack frame descriptor */
-#define        N_RSYM          0x40    /* register variable */
-#define        N_SLINE         0x44    /* text segment line number */
-#define        N_DSLINE        0x46    /* data segment line number */
-#define        N_BSLINE        0x48    /* bss segment line number */
-#define        N_SSYM          0x60    /* structure/union element */
-#define        N_SO            0x64    /* main source file name */
-#define        N_LSYM          0x80    /* stack variable */
-#define        N_BINCL         0x82    /* include file beginning */
-#define        N_SOL           0x84    /* included source file name */
-#define        N_PSYM          0xa0    /* parameter variable */
-#define        N_EINCL         0xa2    /* include file end */
-#define        N_ENTRY         0xa4    /* alternate entry point */
-#define        N_LBRAC         0xc0    /* left bracket */
-#define        N_EXCL          0xc2    /* deleted include file */
-#define        N_RBRAC         0xe0    /* right bracket */
-#define        N_BCOMM         0xe2    /* begin common */
-#define        N_ECOMM         0xe4    /* end common */
-#define        N_ECOML         0xe8    /* end common (local name) */
-#define        N_LENG          0xfe    /* length of preceding entry */
-
-#endif /* !_DDB_DB_STAB_H_ */
index 5b2ee7b417c1871df010f3df732b5452cd357b51..21d3667eb5dfc3f9317ba36639ffe4ec67e01dc1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -63,7 +63,6 @@
 #include "default_pager_internal.h"
 #include <default_pager/default_pager_object_server.h>
 #include <kern/host.h>
-#include <kern/ledger.h>
 #include <mach/host_info.h>
 #include <mach/host_priv.h>
 #include <mach/vm_map.h>
@@ -282,22 +281,11 @@ start_def_pager( __unused char *bs_device )
 */
 /*
        MACH_PORT_FACE          security_port;
-       MACH_PORT_FACE          root_ledger_wired;
-       MACH_PORT_FACE          root_ledger_paged;
 */
        __unused static char here[] = "main";
 
 
 
-/*
-       default_pager_host_port = ipc_port_make_send(realhost.host_priv_self);
-       master_device_port = ipc_port_make_send(master_device_port);
-       root_ledger_wired = ipc_port_make_send(root_wired_ledger_port);
-       root_ledger_paged = ipc_port_make_send(root_paged_ledger_port);
-       security_port = ipc_port_make_send(realhost.host_security_self);
-*/
-
-
 #if NORMA_VM
        norma_mk = 1;
 #else
index 9046437416f84731c192a592c534a827afc56546..225ac237818046427f4a8056d392b353794423ee 100644 (file)
@@ -297,6 +297,7 @@ typedef struct backing_store        *backing_store_t;
 
 #ifdef MACH_KERNEL
 #define BS_LOCK_INIT(bs)       lck_mtx_init(&(bs)->bs_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define BS_LOCK_DESTROY(bs)    lck_mtx_destroy(&(bs)->bs_lock, &default_pager_lck_grp)
 #define BS_LOCK(bs)                    lck_mtx_lock(&(bs)->bs_lock)
 #define BS_UNLOCK(bs)          lck_mtx_unlock(&(bs)->bs_lock)
 
@@ -309,6 +310,7 @@ extern struct backing_store_list_head       backing_store_list;
 extern int     backing_store_release_trigger_disable;
 
 #define        BSL_LOCK_INIT()         lck_mtx_init(&backing_store_list.bsl_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define        BSL_LOCK_DESTROY()      lck_mtx_destroy(&backing_store_list.bsl_lock, &default_pager_lck_grp)
 #define BSL_LOCK()                     lck_mtx_lock(&backing_store_list.bsl_lock)
 #define BSL_UNLOCK()           lck_mtx_unlock(&backing_store_list.bsl_lock)
 
@@ -365,6 +367,7 @@ typedef struct paging_segment *paging_segment_t;
 #define PAGING_SEGMENT_NULL    ((paging_segment_t) 0)
 
 #define PS_LOCK_INIT(ps)       lck_mtx_init(&(ps)->ps_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define PS_LOCK_DESTROY(ps)    lck_mtx_destroy(&(ps)->ps_lock, &default_pager_lck_grp)
 #define PS_LOCK(ps)                    lck_mtx_lock(&(ps)->ps_lock)
 #define PS_UNLOCK(ps)          lck_mtx_unlock(&(ps)->ps_lock)
 
@@ -386,7 +389,8 @@ extern int  paging_segment_count;   /* number of active paging segments */
 extern int     paging_segment_max;     /* highest used paging segment index */
 extern int ps_select_array[DEFAULT_PAGER_BACKING_STORE_MAXPRI+1];
 
-#define        PSL_LOCK_INIT() lck_mtx_init(&paging_segments_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define        PSL_LOCK_INIT()         lck_mtx_init(&paging_segments_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define        PSL_LOCK_DESTROY()      lck_mtx_destroy(&paging_segments_lock, &default_pager_lck_grp)
 #define PSL_LOCK()             lck_mtx_lock(&paging_segments_lock)
 #define PSL_UNLOCK()   lck_mtx_unlock(&paging_segments_lock)
 
@@ -523,6 +527,7 @@ typedef struct vstruct_alias {
 } vstruct_alias_t;
 
 #define DPT_LOCK_INIT(lock)            lck_mtx_init(&(lock), &default_pager_lck_grp, &default_pager_lck_attr)
+#define DPT_LOCK_DESTROY(lock)         lck_mtx_destroy(&(lock), &default_pager_lck_grp)
 #define DPT_LOCK(lock)                 lck_mtx_lock(&(lock))
 #define DPT_UNLOCK(lock)               lck_mtx_unlock(&(lock))
 #define DPT_SLEEP(lock, e, i)  lck_mtx_sleep(&(lock), LCK_SLEEP_DEFAULT, (event_t)(e), i)
@@ -533,6 +538,7 @@ typedef struct vstruct_alias {
 #define VS_UNLOCK(vs)                  hw_lock_unlock(&(vs)->vs_lock)
 #define VS_MAP_LOCK_TYPE               lck_mtx_t
 #define VS_MAP_LOCK_INIT(vs)   lck_mtx_init(&(vs)->vs_map_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define VS_MAP_LOCK_DESTROY(vs)        lck_mtx_destroy(&(vs)->vs_map_lock, &default_pager_lck_grp)
 #define VS_MAP_LOCK(vs)                        lck_mtx_lock(&(vs)->vs_map_lock)
 #define VS_MAP_TRY_LOCK(vs)            lck_mtx_try_lock(&(vs)->vs_map_lock)
 #define VS_MAP_UNLOCK(vs)              lck_mtx_unlock(&(vs)->vs_map_lock)
@@ -660,6 +666,7 @@ extern lck_grp_t            default_pager_lck_grp;
 extern lck_attr_t              default_pager_lck_attr;
 
 #define VSL_LOCK_INIT()                lck_mtx_init(&vstruct_list.vsl_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define VSL_LOCK_DESTROY()     lck_mtx_destroy(&vstruct_list.vsl_lock, &default_pager_lck_grp)
 #define VSL_LOCK()                     lck_mtx_lock(&vstruct_list.vsl_lock)
 #define VSL_LOCK_TRY()         lck_mtx_try_lock(&vstruct_list.vsl_lock)
 #define VSL_UNLOCK()           lck_mtx_unlock(&vstruct_list.vsl_lock)
index 9fcf6a2bd28cf7fee065687ac262aa72003f0528..2b97a2122a0f9b7652759a5d6267008d1998bca0 100644 (file)
@@ -151,6 +151,7 @@ void vs_free_async(struct vs_async *vsa);   /* forward */
 #define VS_ASYNC_LOCK()                lck_mtx_lock(&default_pager_async_lock)
 #define VS_ASYNC_UNLOCK()      lck_mtx_unlock(&default_pager_async_lock)
 #define VS_ASYNC_LOCK_INIT()   lck_mtx_init(&default_pager_async_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define VS_ASYNC_LOCK_DESTROY()        lck_mtx_destroy(&default_pager_async_lock, &default_pager_lck_grp)
 #define VS_ASYNC_LOCK_ADDR()   (&default_pager_async_lock)
 /*
  *  Paging Space Hysteresis triggers and the target notification port
@@ -603,7 +604,10 @@ default_pager_backing_store_create(
        }
        else {
                ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
+
+               BS_LOCK_DESTROY(bs);
                kfree(bs, sizeof (struct backing_store));
+
                return KERN_RESOURCE_SHORTAGE;
        }
 
@@ -1001,6 +1005,7 @@ restart:
        /*
         * Free the backing store structure.
         */
+       BS_LOCK_DESTROY(bs);
        kfree(bs, sizeof *bs);
 
        return KERN_SUCCESS;
@@ -1110,6 +1115,7 @@ default_pager_add_segment(
        PS_LOCK_INIT(ps);
        ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
        if (!ps->ps_bmap) {
+               PS_LOCK_DESTROY(ps);
                kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
@@ -1131,6 +1137,8 @@ default_pager_add_segment(
 
        if ((error = ps_enter(ps)) != 0) {
                kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+
+               PS_LOCK_DESTROY(ps);
                kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
@@ -1876,6 +1884,8 @@ ps_vstruct_dealloc(
 
        bs_commit(- vs->vs_size);
 
+       VS_MAP_LOCK_DESTROY(vs);
+
        zfree(vstruct_zone, vs);
 }
 
@@ -1886,8 +1896,6 @@ ps_vstruct_reclaim(
        boolean_t reclaim_backing_store)
 {
        unsigned int    i, j;
-//     spl_t   s;
-       unsigned int    request_flags;
        struct vs_map   *vsmap;
        boolean_t       vsmap_all_clear, vsimap_all_clear;
        struct vm_object_fault_info fault_info;
@@ -1895,15 +1903,6 @@ ps_vstruct_reclaim(
        unsigned int    vsmap_size;
        kern_return_t   kr;
 
-       request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
-       if (reclaim_backing_store) {
-#if USE_PRECIOUS
-               request_flags |= UPL_PRECIOUS | UPL_CLEAN_IN_PLACE;
-#else  /* USE_PRECIOUS */
-               request_flags |= UPL_REQUEST_SET_DIRTY;
-#endif /* USE_PRECIOUS */
-       }
-
        VS_MAP_LOCK(vs);
 
        fault_info.cluster_size = VM_SUPER_CLUSTER;
@@ -1912,6 +1911,7 @@ ps_vstruct_reclaim(
        fault_info.lo_offset = 0;
        fault_info.hi_offset = ptoa_32(vs->vs_size << vs->vs_clshift);
        fault_info.io_sync = reclaim_backing_store;
+       fault_info.batch_pmap_op = FALSE;
 
        /*
         * If this is an indirect structure, then we walk through the valid
@@ -2937,7 +2937,17 @@ pvs_cluster_read(
                i = pages_in_cl;
        } else {
                i = 1;
-               request_flags |= UPL_NOBLOCK;
+
+               /*
+                * if the I/O cluster size == PAGE_SIZE, we don't want to set
+                * the UPL_NOBLOCK since we may be trying to recover from a
+                * previous partial pagein I/O that occurred because we were low
+                * on memory and bailed early in order to honor the UPL_NOBLOCK...
+                * since we're only asking for a single page, we can block w/o fear
+                * of tying up pages while waiting for more to become available
+                */
+               if (fault_info == NULL || ((vm_object_fault_info_t)fault_info)->cluster_size > PAGE_SIZE)
+                       request_flags |= UPL_NOBLOCK;
        }
 
 again:
@@ -2975,7 +2985,8 @@ again:
                memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
                                                PAGE_SIZE, PAGE_SIZE, 
                                                &upl, NULL, &page_list_count,
-                                               request_flags);
+                                               request_flags  | UPL_SET_INTERNAL);
+               upl_range_needed(upl, 0, 1);
 
                if (clmap.cl_error)
                        upl_abort(upl, UPL_ABORT_ERROR);
@@ -3480,10 +3491,23 @@ vs_cluster_write(
                 * Ignore any non-present pages at the end of the
                 * UPL.
                 */
-               for (page_index = upl->size / vm_page_size; page_index > 0;) 
-                       if (UPL_PAGE_PRESENT(pl, --page_index))
+               for (page_index = upl->size / vm_page_size; page_index > 0;)  {
+                       if (UPL_PAGE_PRESENT(pl, --page_index)) {
+                               page_index++;
                                break;
-               num_of_pages = page_index + 1;
+                       }
+               }
+               if (page_index == 0) {
+                       /*
+                        * no pages in the UPL
+                        * abort and return
+                        */
+                       upl_abort(upl, 0);
+                       upl_deallocate(upl);
+
+                       return KERN_SUCCESS;
+               }
+               num_of_pages = page_index;
 
                base_index = (upl_offset_in_object % cl_size) / PAGE_SIZE;
 
@@ -3601,17 +3625,6 @@ vs_cluster_write(
                                                ps_offset[seg_index] 
                                                                + seg_offset, 
                                                transfer_size, flags);
-                       } else {
-                               boolean_t empty = FALSE;
-                               upl_abort_range(upl,
-                                               first_dirty * vm_page_size, 
-                                               num_dirty   * vm_page_size,
-                                               UPL_ABORT_NOTIFY_EMPTY,
-                                               &empty);
-                               if (empty) {
-                                       assert(page_index == num_of_pages);
-                                       upl_deallocate(upl);
-                               }
                        }
                }
 
@@ -4251,6 +4264,7 @@ default_pager_add_file(
        PS_LOCK_INIT(ps);
        ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
        if (!ps->ps_bmap) {
+               PS_LOCK_DESTROY(ps);
                kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
@@ -4273,6 +4287,7 @@ default_pager_add_file(
 
        if ((error = ps_enter(ps)) != 0) {
                kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+               PS_LOCK_DESTROY(ps);
                kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
@@ -4300,7 +4315,7 @@ default_pager_add_file(
         * online but not activated (till it's needed the next time).
         */
 #if CONFIG_FREEZE
-       if (!vm_freeze_enabled)
+       if (!memorystatus_freeze_enabled)
 #endif
        {
                ps = paging_segments[EMERGENCY_PSEG_INDEX];
@@ -4482,7 +4497,7 @@ default_pager_triggers( __unused MACH_PORT_FACE default_pager,
                /* High and low water signals aren't applicable when freeze is */
                /* enabled, so release the trigger ports here and return       */
                /* KERN_FAILURE.                                               */
-               if (vm_freeze_enabled) {
+               if (memorystatus_freeze_enabled) {
                        if (IP_VALID( trigger_port )){
                                ipc_port_release_send( trigger_port );
                        }
@@ -4500,7 +4515,7 @@ default_pager_triggers( __unused MACH_PORT_FACE default_pager,
        } else if (flags ==  LO_WAT_ALERT) {
                release = max_pages_trigger_port;
 #if CONFIG_FREEZE
-               if (vm_freeze_enabled) {
+               if (memorystatus_freeze_enabled) {
                        if (IP_VALID( trigger_port )){
                                ipc_port_release_send( trigger_port );
                        }
index e122e7711d967ff179a6786e84331beb7afe3976..83c24fe6f655eac1aadefeb08d4879815cd601a2 100644 (file)
@@ -350,8 +350,8 @@ default_pager_add(
                pset = default_pager_external_set;
        }
 
-       ipc_port_make_sonce(mem_obj);
        ip_lock(mem_obj);  /* unlocked in nsrequest below */
+       ipc_port_make_sonce_locked(mem_obj);
        ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 }
 
index 458d89540d9bd154ecc9eaabb19a5ea56e854acd..dda80916eb6714e141916710ee22705edc9b886c 100644 (file)
@@ -355,13 +355,15 @@ routine io_iterator_is_valid(
        out is_valid            : boolean_t
        );
 
-routine io_make_matching(
+skip;
+/* was routine io_make_matching(
            master_port         : mach_port_t;
        in  of_type             : uint32_t;
        in  options             : uint32_t;
        in  input               : io_struct_inband_t;
        out matching            : io_string_t
        );
+*/
 
 routine io_catalog_send_data(
            master_port         : mach_port_t;
@@ -695,6 +697,20 @@ routine io_connect_method_var_output(
        out var_output          : io_buf_ptr_t, physicalcopy
        );
 
+routine io_service_get_matching_service(
+           master_port         : mach_port_t;
+       in  matching            : io_string_t;
+       out service             : io_object_t
+       );
+
+routine io_service_get_matching_service_ool(
+           master_port         : mach_port_t;
+       in  matching            : io_buf_ptr_t, physicalcopy;
+        out result             : kern_return_t;
+       out service             : io_object_t
+       );
+
+
 #endif /* IOKIT */
 
 /* vim: set ft=c : */
index 05e76bc0e6d52c33a58cfeb4cd69e6204dc8da9f..b935e14c62334b9c86a8b99743eedbd376e68161 100644 (file)
 
 ipc_port_t     master_device_port;
 
+lck_grp_attr_t * dev_lck_grp_attr;
+lck_grp_t * dev_lck_grp;
+lck_attr_t * dev_lck_attr;
+lck_mtx_t iokit_obj_to_port_binding_lock;
+
 void
 device_service_create(void)
 {
@@ -92,6 +97,16 @@ device_service_create(void)
        kernel_set_special_port(host_priv_self(), HOST_IO_MASTER_PORT,
                                ipc_port_make_send(master_device_port));
 
+       /* allocate device lock group attribute and group */
+       dev_lck_grp_attr= lck_grp_attr_alloc_init();
+       dev_lck_grp = lck_grp_alloc_init("device",  dev_lck_grp_attr);
+
+       /* Allocate device lock attribute */
+       dev_lck_attr = lck_attr_alloc_init();
+
+       /* Initialize the IOKit object to port binding lock */
+       lck_mtx_init(&iokit_obj_to_port_binding_lock, dev_lck_grp, dev_lck_attr);
+
 #if 0
        ds_init();
        net_io_init();
index 5c5f8b742342c9599b3c2b537d168dccfbe6cf0d..1b6c4d4ec264f403f92d1176ab0bac82c8d46ef6 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#include <mach_kdb.h>
 #include <zone_debug.h>
-#include <mach_kdb.h>
-
 #include <mach/boolean.h>
 #include <mach/kern_return.h>
 #include <mach/mig_errors.h>
@@ -116,6 +113,9 @@ extern void iokit_retain_port( ipc_port_t port );
 extern void iokit_release_port( ipc_port_t port );
 extern void iokit_release_port_send( ipc_port_t port );
 
+extern void iokit_lock_port(ipc_port_t port);
+extern void iokit_unlock_port(ipc_port_t port);
+
 extern kern_return_t iokit_switch_object_port( ipc_port_t port, io_object_t obj, ipc_kobject_type_t type );
 
 /*
@@ -145,7 +145,7 @@ iokit_lookup_object_port(
        if (!IP_VALID(port))
            return (NULL);
 
-       ip_lock(port);
+       iokit_lock_port(port);
        if (ip_active(port) && (ip_kotype(port) == IKOT_IOKIT_OBJECT)) {
            obj = (io_object_t) port->ip_kobject;
            iokit_add_reference( obj );
@@ -153,7 +153,7 @@ iokit_lookup_object_port(
        else
            obj = NULL;
 
-       ip_unlock(port);
+       iokit_unlock_port(port);
 
        return( obj );
 }
@@ -167,7 +167,7 @@ iokit_lookup_connect_port(
        if (!IP_VALID(port))
            return (NULL);
 
-       ip_lock(port);
+       iokit_lock_port(port);
        if (ip_active(port) && (ip_kotype(port) == IKOT_IOKIT_CONNECT)) {
            obj = (io_object_t) port->ip_kobject;
            iokit_add_reference( obj );
@@ -175,7 +175,7 @@ iokit_lookup_connect_port(
        else
            obj = NULL;
 
-       ip_unlock(port);
+       iokit_unlock_port(port);
 
        return( obj );
 }
@@ -192,14 +192,19 @@ iokit_lookup_connect_ref(io_object_t connectRef, ipc_space_t space)
                kr = ipc_object_translate(space, CAST_MACH_PORT_TO_NAME(connectRef), MACH_PORT_RIGHT_SEND, (ipc_object_t *)&port);
 
                if (kr == KERN_SUCCESS) {
-            assert(IP_VALID(port));
-            
-            if (ip_active(port) && (ip_kotype(port) == IKOT_IOKIT_CONNECT)) {
-                obj = (io_object_t) port->ip_kobject;
-                iokit_add_reference(obj);
-            }
-            
-            ip_unlock(port);
+                       assert(IP_VALID(port));
+
+                       ip_reference(port);
+                       ip_unlock(port);
+
+                       iokit_lock_port(port);
+                       if (ip_active(port) && (ip_kotype(port) == IKOT_IOKIT_CONNECT)) {
+                               obj = (io_object_t) port->ip_kobject;
+                               iokit_add_reference(obj);
+                       }
+                       iokit_unlock_port(port);
+
+                       ip_release(port);
                }
        }
 
@@ -230,6 +235,20 @@ iokit_release_port_send( ipc_port_t port )
     ipc_port_release_send( port );
 }
 
+extern lck_mtx_t iokit_obj_to_port_binding_lock;
+
+EXTERN void
+iokit_lock_port( __unused ipc_port_t port )
+{
+    lck_mtx_lock(&iokit_obj_to_port_binding_lock);
+}
+
+EXTERN void
+iokit_unlock_port( __unused ipc_port_t port )
+{
+    lck_mtx_unlock(&iokit_obj_to_port_binding_lock);
+}
+
 /*
  * Get the port for a device.
  * Consumes a device reference; produces a naked send right.
@@ -298,9 +317,10 @@ iokit_alloc_object_port( io_object_t obj, ipc_kobject_type_t type )
        ipc_kobject_set( port, (ipc_kobject_t) obj, type);
 
         /* Request no-senders notifications on the port. */
-        notify = ipc_port_make_sonce( port);
         ip_lock( port);
+        notify = ipc_port_make_sonce_locked( port);
         ipc_port_nsrequest( port, 1, notify, &notify);
+       /* port unlocked */
         assert( notify == IP_NULL);
        gIOKitPortCount++;
 
@@ -326,7 +346,9 @@ iokit_destroy_object_port( ipc_port_t port )
 EXTERN kern_return_t
 iokit_switch_object_port( ipc_port_t port, io_object_t obj, ipc_kobject_type_t type )
 {
+    iokit_lock_port(port);
     ipc_kobject_set( port, (ipc_kobject_t) obj, type);
+    iokit_unlock_port(port);
 
     return( KERN_SUCCESS);
 }
@@ -388,7 +410,7 @@ iokit_no_senders( mach_no_senders_notification_t * notification )
 
     // convert a port to io_object_t.
     if( IP_VALID(port)) {
-        ip_lock(port);
+        iokit_lock_port(port);
         if( ip_active(port)) {
             obj = (io_object_t) port->ip_kobject;
            type = ip_kotype( port );
@@ -398,7 +420,7 @@ iokit_no_senders( mach_no_senders_notification_t * notification )
             else
                 obj = NULL;
        }
-        ip_unlock(port);
+        iokit_unlock_port(port);
 
         if( obj ) {
 
@@ -406,11 +428,15 @@ iokit_no_senders( mach_no_senders_notification_t * notification )
 
             if( KERN_SUCCESS != iokit_client_died( obj, port, type, &mscount ))
            {
-               /* Re-request no-senders notifications on the port. */
-               notify = ipc_port_make_sonce( port);
-               ip_lock( port);
-               ipc_port_nsrequest( port, mscount + 1, notify, &notify);
-               assert( notify == IP_NULL);
+               /* Re-request no-senders notifications on the port (if still active) */
+               ip_lock(port);
+               if (ip_active(port)) {
+                       notify = ipc_port_make_sonce_locked(port);
+                       ipc_port_nsrequest( port, mscount + 1, notify, &notify);
+                       /* port unlocked */
+                       if ( notify != IP_NULL)
+                               ipc_port_release_sonce(notify);
+               }
            }
             iokit_remove_reference( obj );
         }
@@ -478,6 +504,9 @@ kern_return_t IOMapPages(vm_map_t map, mach_vm_address_t va, mach_vm_address_t p
        case kIOMapCopybackCache:
            flags = VM_WIMG_COPYBACK;
            break;
+       case kIOMapCopybackInnerCache:
+           flags = VM_WIMG_INNERWBACK;
+           break;
     }
 
     pmap_set_cache_attributes(pagenum, flags);
@@ -540,7 +569,7 @@ kern_return_t IOProtectCacheMode(vm_map_t __unused map, mach_vm_address_t __unus
     {
        ppnum_t ppnum = pmap_find_phys(pmap, va + off);
        if (ppnum)
-           pmap_enter(pmap, va + off, ppnum, prot, flags, TRUE);
+           pmap_enter(pmap, va + off, ppnum, prot, VM_PROT_NONE, flags, TRUE);
     }
 
     return (KERN_SUCCESS);
index 105edff0f74f4b150a3eb6b8fd248287161c5c2b..4956a3b406597690845b7edbcb18f26e22bbf2cb 100644 (file)
@@ -187,6 +187,7 @@ strcmp(
  *      comparison runs for at most "n" characters.
  */
 
+// ARM implementation in ../arm/strncmp.s
 int
 strncmp(
         const char *s1,
@@ -285,6 +286,7 @@ strcpy(
  *      to the "to" string.
  */
 
+// ARM implementation in ../arm/strncpy.s
 char *
 strncpy(
        char *s1, 
@@ -378,6 +380,8 @@ atoi_term(
  * outputs:
  *     length of s or max; whichever is smaller
  */
+
+// ARM implementation in ../arm/strnlen.s
 size_t 
 strnlen(const char *s, size_t max) {
        const char *es = s + max, *p = s;
@@ -484,6 +488,8 @@ strlcat(char *dst, const char *src, size_t siz)
  * will be copied.  Always NUL terminates (unless siz == 0).
  * Returns strlen(src); if retval >= siz, truncation occurred.
  */
+
+// ARM implementation in ../arm/strlcpy.s
 size_t
 strlcpy(char *dst, const char *src, size_t siz)
 {
@@ -565,4 +571,3 @@ strprefix(register const char *s1, register const char *s2)
         }       
         return (1);
 }
-
index abe5ffe08a960f7e7ebdc6c678ddff7affc157a0..0c0a650e1b9a974bdab2c01071762fc0a1c19c96 100644 (file)
@@ -53,7 +53,7 @@ gssd_mach  999;
 
 serverprefix svc_;
 
-routine mach_gss_init_sec_context(
+Routine mach_gss_init_sec_context(
        server                  : mach_port_t;
        in  mech                : gssd_mechtype;
        in  intoken             : gssd_byte_buffer;
@@ -64,6 +64,7 @@ routine mach_gss_init_sec_context(
        in  gssd_flags          : uint32_t;
        inout context           : gssd_ctx;
        inout cred_handle       : gssd_cred;
+       ServerAuditToken atoken : audit_token_t;
        out  ret_flags          : uint32_t;
        out  key                : gssd_byte_buffer, dealloc;
        out outtoken            : gssd_byte_buffer, dealloc;
@@ -78,6 +79,7 @@ routine mach_gss_accept_sec_context(
        in  gssd_flags          : uint32_t;
        inout context           : gssd_ctx;
        inout cred_handle       : gssd_cred;
+       ServerAuditToken atoken : audit_token_t;
        out flags               : uint32_t;
        out uid                 : uint32_t;
        out gids                : gssd_gid_list;
@@ -93,7 +95,8 @@ simpleroutine mach_gss_log_error(
        in  uid                 : uint32_t;
        in  source              : gssd_string;
        in  major_stat          : uint32_t;
-       in  minor_stat          : uint32_t
+       in  minor_stat          : uint32_t;
+       ServerAuditToken atoken : audit_token_t
 );
 
 routine mach_gss_init_sec_context_v2(
@@ -109,6 +112,7 @@ routine mach_gss_init_sec_context_v2(
        inout gssd_flags        : uint32_t;
        inout context           : gssd_ctx;
        inout cred_handle       : gssd_cred;
+       ServerAuditToken atoken : audit_token_t;
        out  ret_flags          : uint32_t;
        out  key                : gssd_byte_buffer, dealloc;
        out outtoken            : gssd_byte_buffer, dealloc;
@@ -125,6 +129,7 @@ routine mach_gss_accept_sec_context_v2(
        inout gssd_flags        : uint32_t;
        inout context           : gssd_ctx;
        inout cred_handle       : gssd_cred;
+       ServerAuditToken atoken : audit_token_t;
        out flags               : uint32_t;
        out uid                 : uint32_t;
        out gids                : gssd_gid_list;
@@ -139,6 +144,7 @@ routine mach_gss_hold_cred(
        in  mech                : gssd_mechtype;
        in  nt                  : gssd_nametype;
        in  princ               : gssd_byte_buffer;
+       ServerAuditToken atoken : audit_token_t;
        out major_stat          : uint32_t;
        out minor_stat          : uint32_t
 );
@@ -148,6 +154,15 @@ routine mach_gss_unhold_cred(
        in  mech                : gssd_mechtype;
        in  nt                  : gssd_nametype;
        in  princ               : gssd_byte_buffer;
+       ServerAuditToken atoken : audit_token_t;
        out major_stat          : uint32_t;
        out minor_stat          : uint32_t
 );
+
+routine mach_gss_lookup(
+       server                  : mach_port_t;
+       in  uid                 : uint32_t;
+       in  asid                : int32_t;
+       ServerAuditToken atoken : audit_token_t;
+       out gssd_session_port   : mach_port_t
+);
index e3bde951ac57540d8a643030f80f0f0fc8139666..6015ea89c565bc4cf9530aebce7da668d6b27656 100644 (file)
 #define MAX_DISPLAY_STR 128
 #define MAX_PRINC_STR 1024
 
-typedef enum gssd_mechtype { GSSD_NO_MECH = -1, GSSD_KRB5_MECH = 0,
-                               GSSD_SPNEGO_MECH, GSSD_NTLM_MECH } gssd_mechtype;
-typedef enum gssd_nametype { GSSD_STRING_NAME = 0, GSSD_EXPORT, 
-                               GSSD_ANONYMOUS, GSSD_HOSTBASED, GSSD_USER, GSSD_MACHINE_UID,
-                               GSSD_STRING_UID, GSSD_KRB5_PRINCIPAL, GSSD_KRB5_REFERRAL, 
-                               GSSD_NTLM_PRINCIPAL, GSSD_NTLM_BLOB} gssd_nametype;
+typedef enum gssd_mechtype {
+       GSSD_NO_MECH = -1,
+       GSSD_KRB5_MECH = 0,
+       GSSD_SPNEGO_MECH,
+       GSSD_NTLM_MECH,
+       GSSD_IAKERB_MECH
+} gssd_mechtype;
+
+typedef enum gssd_nametype {
+       GSSD_STRING_NAME = 0,
+       GSSD_EXPORT, 
+       GSSD_ANONYMOUS,
+       GSSD_HOSTBASED,
+       GSSD_USER,
+       GSSD_MACHINE_UID,
+       GSSD_STRING_UID,
+       GSSD_KRB5_PRINCIPAL,
+       GSSD_KRB5_REFERRAL, 
+       GSSD_NTLM_PRINCIPAL,
+       GSSD_NTLM_BLOB,
+       GSSD_UUID
+} gssd_nametype;
+
 typedef char *gssd_string;
 typedef char *gssd_dstring;
 typedef uint8_t *gssd_byte_buffer;
index 44b99157982c7ea0a3d35a5b1b371ff69ea009c2..8a95f3f5334aa56e6c0b0e17be73e3a728842fc0 100644 (file)
@@ -67,7 +67,6 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/i386/vm_param.h>
 
 #include <i386/pmCPU.h>
 #include <architecture/i386/pio.h> /* inb() */
 #include <pexpert/i386/boot.h>
-#if    MACH_KDB
-#include <ddb/db_aout.h>
-#endif /* MACH_KDB */
 
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <pexpert/i386/efi.h>
 
 #include <kern/thread.h>
+#include <kern/sched.h>
 #include <mach-o/loader.h>
 #include <mach-o/nlist.h>
 
 
 static void machine_conf(void);
 
-extern int             default_preemption_rate;
 extern int             max_unsafe_quanta;
 extern int             max_poll_quanta;
 extern unsigned int    panic_is_inited;
@@ -151,12 +147,6 @@ volatile int panic_double_fault_cpu = -1;
 #error unsupported architecture
 #endif
 
-#ifdef __LP64__
-typedef struct nlist_64 kernel_nlist_t;
-#else
-typedef struct nlist kernel_nlist_t;
-#endif
-
 typedef struct _cframe_t {
     struct _cframe_t   *prev;
     uintptr_t          caller;
@@ -205,30 +195,6 @@ machine_startup(void)
 #endif
        hw_lock_init(&pbtlock);         /* initialize print backtrace lock */
 
-#if    MACH_KDB
-       /*
-        * Initialize KDB
-        */
-#if    DB_MACHINE_COMMANDS
-       db_machine_commands_install(ppc_db_commands);
-#endif /* DB_MACHINE_COMMANDS */
-       ddb_init();
-
-       if (boot_arg & DB_KDB)
-               current_debugger = KDB_CUR_DB;
-
-       /*
-        * Cause a breakpoint trap to the debugger before proceeding
-        * any further if the proper option bit was specified in
-        * the boot flags.
-        */
-       if (halt_in_debugger && (current_debugger == KDB_CUR_DB)) {
-               Debugger("inline call to debugger(machine_startup)");
-               halt_in_debugger = 0;
-               active_debugger =1;
-       }
-#endif /* MACH_KDB */
-
        if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
                default_preemption_rate = boot_arg;
        }
@@ -690,6 +656,11 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o
 void
 machine_init(void)
 {
+#if __x86_64__
+       /* Now with VM up, switch to dynamically allocated cpu data */
+       cpu_data_realloc();
+#endif
+
         /* Ensure panic buffer is initialized. */
         debug_log_init();
 
@@ -805,12 +776,20 @@ machine_halt_cpu(void) {
         * writing, this is routine is chained through AppleSMC->
         * AppleACPIPlatform
         */
-
        if (PE_halt_restart)
                (*PE_halt_restart)(kPERestartCPU);
        pmCPUHalt(PM_HALT_DEBUG);
 }
 
+void
+DebuggerWithContext(
+       __unused unsigned int   reason,
+       __unused void           *ctx,
+       const char              *message)
+{
+       Debugger(message);
+}
+
 void
 Debugger(
        const char      *message)
index f9fd283bce1f433e268af5c6a6c07e612bfa071d..ddbb4b8ec3acd7ed665bb86793579b0fd200f6ec 100644 (file)
@@ -36,9 +36,8 @@
  *     Author: Bill Angell, Apple
  *     Date:   10/auht-five
  *
- *     Random diagnostics
+ *     Random diagnostics, augmented Derek Kumar 2011
  *
- *     Try to keep the x86 selectors in-sync with the ppc selectors.
  *
  */
 
 #include <i386/pmCPU.h>
 #include <i386/tsc.h>
 #include <mach/i386/syscall_sw.h>
-
-extern uint64_t lastNapClear;
+#include <kern/kalloc.h>
 
 diagWork        dgWork;
-uint64_t        lastNapClear = 0ULL;
 uint64_t        lastRuptClear = 0ULL;
 
-
 int 
 diagCall64(x86_saved_state_t * state)
 {
@@ -96,6 +92,7 @@ diagCall64(x86_saved_state_t * state)
 
        switch (selector) {     /* Select the routine */
        case dgRuptStat:        /* Suck Interruption statistics */
+               (void) ml_set_interrupts_enabled(TRUE);
                data = regs->rsi; /* Get the number of processors */
 
                if (data == 0) { /* If no location is specified for data, clear all
@@ -133,82 +130,30 @@ diagCall64(x86_saved_state_t * state)
                        curpos = curpos + (256 * sizeof(uint32_t) + 8); /* Point to next out put
                                                                         * slot */
                }
+               return 1;
                break;
-
-       default:                /* Handle invalid ones */
-               return 0;       /* Return an exception */
-
+#if    DEBUG
+       case dgGzallocTest:
+       {
+               (void) ml_set_interrupts_enabled(TRUE);
+               unsigned *ptr = (unsigned *)kalloc(1024);
+               kfree(ptr, 1024);
+               *ptr = 0x42;
        }
+               break;
+#endif
 
-       return 1;               /* Normal non-ast check return */
-}
-
-
-int 
-diagCall(x86_saved_state_t * state)
-{
-       uint32_t        stk, curpos, i, j;
-       uint32_t        selector, data;
-       int             err;
-       uint64_t        currNap, durNap;
-       x86_saved_state32_t     *regs;
-
-       assert(is_saved_state32(state));
-       regs = saved_state32(state);
-
-       if (!(dgWork.dgFlags & enaDiagSCs))
-               return 0;       /* If not enabled, cause an exception */
-
-       stk = regs->uesp;       /* Point to the stack */
-       err = copyin((user_addr_t) (stk + 4), (char *) &selector, sizeof(uint32_t));    /* Get the selector */
-       if (err) {
-               return 0;       /* Failed to fetch stack */
+#if    defined(__x86_64__)             
+       case    dgPermCheck:
+       {
+               (void) ml_set_interrupts_enabled(TRUE);
+               return pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL);
        }
-       switch (selector) {     /* Select the routine */
-       case dgRuptStat:        /* Suck Interruption statistics */
-
-               err = copyin((user_addr_t) (stk + 8), (char *) &data, sizeof(uint32_t));        /* Get the selector */
-
-               if (data == 0) {/* If number of processors is 0, clear all
-                                * counts */
-                       for (i = 0; i < real_ncpus; i++) {      /* Cycle through
-                                                                * processors */
-                               for (j = 0; j < 256; j++)
-                                       cpu_data_ptr[i]->cpu_hwIntCnt[j] = 0;
-                       }
-
-                       lastRuptClear = mach_absolute_time();   /* Get the time of clear */
-                       return 1;       /* Normal return */
-               }
-
-               (void) copyout((char *) &real_ncpus, data, sizeof(real_ncpus)); /* Copy out number of
-                                                                                * processors */
-
-               currNap = mach_absolute_time(); /* Get the time now */
-               durNap = currNap - lastRuptClear;       /* Get the last interval
-                                                        * duration */
-               if (durNap == 0)
-                       durNap = 1;     /* This is a very short time, make it
-                                        * bigger */
-
-               curpos = (uint32_t)(data + sizeof(real_ncpus)); /* Point to the next
-                                                        * available spot */
-
-               for (i = 0; i < real_ncpus; i++) {      /* Move 'em all out */
-                       (void) copyout((char *) &durNap, curpos, 8);    /* Copy out the time
-                                                                        * since last clear */
-                       (void) copyout((char *) &cpu_data_ptr[i]->cpu_hwIntCnt, curpos + 8, 256 * sizeof(uint32_t));    /* Copy out interrupt
-                                                                                                                        * data for this
-                                                                                                                        * processor */
-                       curpos = (uint32_t)(curpos + (256 * sizeof(uint32_t) + 8));     /* Point to next out put
-                                                                        * slot */
-               }
-
-               break;
+               break;
+#endif /* __x86_64__*/
 
        default:                /* Handle invalid ones */
                return 0;       /* Return an exception */
-
        }
 
        return 1;               /* Normal non-ast check return */
index f5281c604722af70ce3eb452312e7e73e4222c13..3a2ffc17a1703b9d4cf21b45e82c9bac6dc32a1e 100644 (file)
@@ -35,7 +35,6 @@
 /*
  *     Here are the Diagnostic interface interfaces
  *     Lovingly crafted by Bill Angell using traditional methods
- *     Keep selectors in sync with the PPC version where possible.     
  */
 #ifdef KERNEL_PRIVATE
 
@@ -46,7 +45,6 @@
 #error This file is not useful on non-Intel
 #endif
 
-int diagCall(x86_saved_state_t *regs);
 int diagCall64(x86_saved_state_t *regs);
 
 #define diagSCnum 0x00006000
@@ -68,7 +66,7 @@ int diagCall64(x86_saved_state_t *regs);
 #define dgCPNull 14
 #define dgPerfMon 15
 #define dgMapPage 16
-#define dgScom 17
+#define dgGzallocTest 17
 #define dgBind 18
 #define dgPproc 19
 #define dgAcntg 20
@@ -77,28 +75,20 @@ int diagCall64(x86_saved_state_t *regs);
 #define        dgWar 23
 #define dgNapStat 24
 #define dgRuptStat 25
-
+#define        dgPermCheck 26
 
 typedef struct diagWork {                      /* Diagnostic work area */
 
        unsigned int dgLock;                    /* Lock if needed */
        unsigned int dgFlags;                   /* Flags */
 #define enaExpTrace 0x00000001
-#define enaExpTraceb 31
 #define enaUsrFCall 0x00000002
-#define enaUsrFCallb 30
 #define enaUsrPhyMp 0x00000004
-#define enaUsrPhyMpb 29
 #define enaDiagSCs  0x00000008
-#define enaDiagSCsb  28
 #define enaDiagDM  0x00000010
-#define enaDiagSDMb  27
 #define enaDiagEM  0x00000020
-#define enaDiagEMb  26
 #define enaDiagTrap  0x00000040
-#define enaDiagTrapb  25
 #define enaNotifyEM  0x00000080
-#define enaNotifyEMb  24
        
        unsigned int dgMisc0;
        unsigned int dgMisc1;
index a51f8ae925a06437752f58865c512cb4235299d1..a3a0d524bc1a23200954d92580196590b3d8dbd0 100644 (file)
 #include <gprof.h>
 #endif /* _KERNEL */
 
-#ifdef MACH_KERNEL
-#include <mach_kdb.h>
-#else  /* !MACH_KERNEL */
-#define        MACH_KDB 0
-#endif /* !MACH_KERNEL */
-
-
 #if    defined(MACH_KERNEL) || defined(_KERNEL)
 #include <gprof.h>
 #endif /* MACH_KERNEL || _KERNEL */
 #define Lgmemload(lab,reg)     movl Lgotoff(lab),reg
 #define Lgmemstore(reg,lab,tmp)        movl reg,Lgotoff(lab)
 
-#ifdef ASSEMBLER
-#if    MACH_KDB
-#include <ddb/stab.h>
-/*
- * This pseudo-assembler line is added so that there will be at least
- *     one N_SO entry in the symbol stable to define the current file name.
- */
-#endif /* MACH_KDB */
-
-#else /* NOT ASSEMBLER */
-
+#ifndef ASSEMBLER
 /* These defines are here for .c files that wish to reference global symbols
  * within __asm__ statements. 
  */
index 08afac97a326152831f56688252b968190984c9e..0f9213e7d63c4d7bbf29c16059999990dc39eaf5 100644 (file)
@@ -28,6 +28,7 @@
 
 /* Helper macros for 64-bit mode switching */
 
+#if __i386__
 /*
  * Long jump to 64-bit space from 32-bit compatibility mode.
  */
        .code32                                 ;\
 4:
 
+#else
+
+/*
+ * Long jump to 64-bit space from 32-bit compatibility mode.
+ * Effected, in fact, by a long return ..
+ *  - we push the 64-bit kernel code selector KERNEL64_CS
+ *  - call .+1 to get EIP on stack
+ *  - adjust return address after lret
+ *  - lret to return to next instruction but 64-bit mode.
+ */
+#define        ENTER_64BIT_MODE()                      \
+       push    $KERNEL64_CS                    ;\
+       call    1f                              ;\
+1:     addl    $(2f-1b), (%esp)                ;\
+       lret                                    ;\
+2:     .code64
+
+/*
+ * Long jump to 32-bit compatibility mode from 64-bit space.
+ * Effected by long return similar to ENTER_64BIT_MODE.
+ */
+#define ENTER_COMPAT_MODE()                    \
+       call    3f                              ;\
+3:     addq    $(4f-3b), (%rsp)                ;\
+       movl    $KERNEL32_CS, 4(%rsp)           ;\
+       lret                                    ;\
+4:     .code32
+
+#endif
index 57b222a14e7ad9bd2c131dab61a0865900e28c35..7ae3c29f1b7dd76a1ae7fac70a25f94c7fd67a0d 100644 (file)
@@ -146,6 +146,24 @@ thread_userstack(
        return (KERN_SUCCESS);
 }
 
+/*
+ * thread_userstackdefault:
+ *
+ * Return the default stack location for the
+ * thread, if otherwise unknown.
+ */
+kern_return_t
+thread_userstackdefault(
+       thread_t thread,
+       mach_vm_offset_t *default_user_stack)
+{
+       if (thread_is_64bit(thread)) {
+               *default_user_stack = VM_USRSTACK64;
+       } else {
+               *default_user_stack = VM_USRSTACK32;
+       }
+       return (KERN_SUCCESS);
+}
 
 kern_return_t
 thread_entrypoint(
@@ -229,7 +247,7 @@ machdep_syscall(x86_saved_state_t *state)
        int                     args[machdep_call_count];
        int                     trapno;
        int                     nargs;
-       machdep_call_t          *entry;
+       const machdep_call_t    *entry;
        x86_saved_state32_t     *regs;
 
        assert(is_saved_state32(state));
@@ -311,7 +329,7 @@ void
 machdep_syscall64(x86_saved_state_t *state)
 {
        int                     trapno;
-       machdep_call_t          *entry;
+       const machdep_call_t    *entry;
        x86_saved_state64_t     *regs;
 
        assert(is_saved_state64(state));
@@ -391,11 +409,24 @@ mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call
        case 2: args->arg2 = args32[1];
        case 1: args->arg1 = args32[0];
        }
-       if (call_number == 90) {
+       if (call_number == 10) {
+               /* munge the mach_vm_size_t for  mach_vm_allocate() */
+               args->arg3 = (((uint64_t)(args32[2])) | ((((uint64_t)(args32[3]))<<32)));
+               args->arg4 = args32[4];
+       } else if (call_number == 12) {
+               /* munge the mach_vm_address_t and mach_vm_size_t for mach_vm_deallocate() */
+               args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32)));
+               args->arg3 = (((uint64_t)(args32[3])) | ((((uint64_t)(args32[4]))<<32)));
+       } else if (call_number == 14) {
+               /* munge the mach_vm_address_t and mach_vm_size_t for  mach_vm_protect() */
+               args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32)));
+               args->arg3 = (((uint64_t)(args32[3])) | ((((uint64_t)(args32[4]))<<32)));
+               args->arg4 = args32[5];
+               args->arg5 = args32[6];
+       } else if (call_number == 90) {
                /* munge_l for mach_wait_until_trap() */
                args->arg1 = (((uint64_t)(args32[0])) | ((((uint64_t)(args32[1]))<<32)));
-       }
-       if (call_number == 93) {
+       } else if (call_number == 93) {
                /* munge_wl for mk_timer_arm_trap() */
                args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32)));
        }
@@ -460,15 +491,19 @@ mach_call_munger(x86_saved_state_t *state)
 #ifdef MACH_BSD
        mach_kauth_cred_uthread_update();
 #endif
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START,
-                       args.arg1, args.arg2, args.arg3, args.arg4, 0);
+
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START,
+               args.arg1, args.arg2, args.arg3, args.arg4, 0);
 
        retval = mach_call(&args);
 
        DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval);
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
-                       retval, 0, 0, 0, 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
+               retval, 0, 0, 0, 0);
+
        regs->eax = retval;
 
        throttle_lowpri_io(TRUE);
@@ -497,10 +532,9 @@ mach_call_munger64(x86_saved_state_t *state)
                "mach_call_munger64: code=%d(%s)\n",
                call_number, mach_syscall_name_table[call_number]);
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,
-                                          (call_number)) | DBG_FUNC_START,
-                             regs->rdi, regs->rsi,
-                             regs->rdx, regs->r10, 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+               MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START,
+               regs->rdi, regs->rsi, regs->rdx, regs->r10, 0);
        
        if (call_number < 0 || call_number >= mach_trap_count) {
                i386_exception(EXC_SYSCALL, regs->rax, 1);
@@ -535,9 +569,9 @@ mach_call_munger64(x86_saved_state_t *state)
        
        DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax);
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,
-                                          (call_number)) | DBG_FUNC_END,
-                             regs->rax, 0, 0, 0, 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+               MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, 
+               regs->rax, 0, 0, 0, 0);
 
        throttle_lowpri_io(TRUE);
 
index 375abc7c1d95353f1f66f662d91c380ce9e7ca01..7076ff53376932e7731cb771ae1c5e79ccbfdc6e 100644 (file)
@@ -74,9 +74,14 @@ extern       commpage_descriptor*    commpage_64_routines[];
 
 extern vm_map_t        commpage32_map; // the shared submap, set up in vm init
 extern vm_map_t        commpage64_map; // the shared submap, set up in vm init
+extern vm_map_t        commpage_text32_map;    // the shared submap, set up in vm init
+extern vm_map_t        commpage_text64_map;    // the shared submap, set up in vm init
+
 
 char   *commPagePtr32 = NULL;          // virtual addr in kernel map of 32-bit commpage
 char   *commPagePtr64 = NULL;          // ...and of 64-bit commpage
+char   *commPageTextPtr32 = NULL;              // virtual addr in kernel map of 32-bit commpage
+char   *commPageTextPtr64 = NULL;              // ...and of 64-bit commpage
 uint32_t     _cpu_capabilities = 0;          // define the capability vector
 
 int    noVMX = 0;              /* if true, do not set kHasAltivec in ppc _cpu_capabilities */
@@ -105,22 +110,24 @@ decl_simple_lock_data(static,commpage_active_cpus_lock);
 static  void*
 commpage_allocate( 
        vm_map_t        submap,                 // commpage32_map or commpage_map64
-       size_t          area_used )             // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
+       size_t          area_used,              // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
+       vm_prot_t       uperm)
 {
        vm_offset_t     kernel_addr = 0;        // address of commpage in kernel map
        vm_offset_t     zero = 0;
        vm_size_t       size = area_used;       // size actually populated
        vm_map_entry_t  entry;
        ipc_port_t      handle;
+       kern_return_t   kr;
 
        if (submap == NULL)
                panic("commpage submap is null");
 
-       if (vm_map(kernel_map,&kernel_addr,area_used,0,VM_FLAGS_ANYWHERE,NULL,0,FALSE,VM_PROT_ALL,VM_PROT_ALL,VM_INHERIT_NONE))
-               panic("cannot allocate commpage");
+       if ((kr = vm_map(kernel_map,&kernel_addr,area_used,0,VM_FLAGS_ANYWHERE,NULL,0,FALSE,VM_PROT_ALL,VM_PROT_ALL,VM_INHERIT_NONE)))
+               panic("cannot allocate commpage %d", kr);
 
-       if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+area_used,VM_PROT_DEFAULT,FALSE))
-               panic("cannot wire commpage");
+       if ((kr = vm_map_wire(kernel_map,kernel_addr,kernel_addr+area_used,VM_PROT_DEFAULT,FALSE)))
+               panic("cannot wire commpage: %d", kr);
 
        /* 
         * Now that the object is created and wired into the kernel map, mark it so that no delay
@@ -130,19 +137,19 @@ commpage_allocate(
         *
         * JMM - What we really need is a way to create it like this in the first place.
         */
-       if (!vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map)
-               panic("cannot find commpage entry");
+       if (!(kr = vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map))
+               panic("cannot find commpage entry %d", kr);
        entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
 
-       if (mach_make_memory_entry( kernel_map,         // target map
+       if ((kr = mach_make_memory_entry( kernel_map,           // target map
                                    &size,              // size 
                                    kernel_addr,        // offset (address in kernel map)
-                                   VM_PROT_ALL,        // map it RWX
+                                   uperm,      // protections as specified
                                    &handle,            // this is the object handle we get
-                                   NULL ))             // parent_entry (what is this?)
-               panic("cannot make entry for commpage");
+                                   NULL )))            // parent_entry (what is this?)
+               panic("cannot make entry for commpage %d", kr);
 
-       if (vm_map_64(  submap,                         // target map (shared submap)
+       if ((kr = vm_map_64(    submap,                         // target map (shared submap)
                        &zero,                          // address (map into 1st page in submap)
                        area_used,                      // size
                        0,                              // mask
@@ -150,19 +157,18 @@ commpage_allocate(
                        handle,                         // port is the memory entry we just made
                        0,                              // offset (map 1st page in memory entry)
                        FALSE,                          // copy
-                       VM_PROT_READ|VM_PROT_EXECUTE,   // cur_protection (R-only in user map)
-                       VM_PROT_READ|VM_PROT_EXECUTE,   // max_protection
-                       VM_INHERIT_SHARE ))             // inheritance
-               panic("cannot map commpage");
+                       uperm,   // cur_protection (R-only in user map)
+                       uperm,   // max_protection
+                       VM_INHERIT_SHARE )))             // inheritance
+               panic("cannot map commpage %d", kr);
 
        ipc_port_release(handle);
-       
-       // Initialize the text section of the commpage with INT3
-       char *commpage_ptr = (char*)(intptr_t)kernel_addr;
-       vm_size_t i;
-       for( i = _COMM_PAGE_TEXT_START - _COMM_PAGE_START_ADDRESS; i < size; i++ )
-               // This is the hex for the X86 opcode INT3
-               commpage_ptr[i] = 0xCC;
+       /* Make the kernel mapping non-executable. This cannot be done
+        * at the time of map entry creation as mach_make_memory_entry
+        * cannot handle disjoint permissions at this time.
+        */
+       kr = vm_protect(kernel_map, kernel_addr, area_used, FALSE, VM_PROT_READ | VM_PROT_WRITE);
+       assert (kr == KERN_SUCCESS);
 
        return (void*)(intptr_t)kernel_addr;                     // return address in kernel map
 }
@@ -331,21 +337,20 @@ commpage_populate_one(
        char **         kernAddressPtr, // &commPagePtr32 or &commPagePtr64
        size_t          area_used,      // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
        commpage_address_t base_offset, // will become commPageBaseOffset
-       commpage_descriptor** commpage_routines, // list of routine ptrs for this commpage
        commpage_time_data** time_data, // &time_data32 or &time_data64
-       const char*     signature )     // "commpage 32-bit" or "commpage 64-bit"
+       const char*     signature,      // "commpage 32-bit" or "commpage 64-bit"
+       vm_prot_t       uperm)
 {
        uint8_t c1;
        short   c2;
        int         c4;
        uint64_t c8;
        uint32_t        cfamily;
-       commpage_descriptor **rd;
        short   version = _COMM_PAGE_THIS_VERSION;
 
        next = 0;
        cur_routine = 0;
-       commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used );
+       commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used, uperm );
        *kernAddressPtr = commPagePtr;                          // save address either in commPagePtr32 or 64
        commPageBaseOffset = base_offset;
 
@@ -380,12 +385,6 @@ commpage_populate_one(
        cfamily = cpuid_info()->cpuid_cpufamily;
        commpage_stuff(_COMM_PAGE_CPUFAMILY, &cfamily, 4);
 
-       for( rd = commpage_routines; *rd != NULL ; rd++ )
-               commpage_stuff_routine(*rd);
-
-       if (!matched)
-               panic("commpage no match on last routine");
-
        if (next > _COMM_PAGE_END)
                panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next, commPagePtr);
 
@@ -408,9 +407,9 @@ commpage_populate( void )
                                &commPagePtr32,
                                _COMM_PAGE32_AREA_USED,
                                _COMM_PAGE32_BASE_ADDRESS,
-                               commpage_32_routines, 
                                &time_data32,
-                               "commpage 32-bit");
+                               "commpage 32-bit",
+                               VM_PROT_READ);
 #ifndef __LP64__
        pmap_commpage32_init((vm_offset_t) commPagePtr32, _COMM_PAGE32_BASE_ADDRESS, 
                           _COMM_PAGE32_AREA_USED/INTEL_PGBYTES);
@@ -422,9 +421,9 @@ commpage_populate( void )
                                        &commPagePtr64,
                                        _COMM_PAGE64_AREA_USED,
                                        _COMM_PAGE32_START_ADDRESS, /* commpage address are relative to 32-bit commpage placement */
-                                       commpage_64_routines, 
                                        &time_data64,
-                                       "commpage 64-bit");
+                                       "commpage 64-bit",
+                                       VM_PROT_READ);
 #ifndef __LP64__
                pmap_commpage64_init((vm_offset_t) commPagePtr64, _COMM_PAGE64_BASE_ADDRESS, 
                                   _COMM_PAGE64_AREA_USED/INTEL_PGBYTES);
@@ -437,6 +436,63 @@ commpage_populate( void )
        rtc_nanotime_init_commpage();
 }
 
+/* Fill in the common routines during kernel initialization. 
+ * This is called before user-mode code is running.
+ */
+void commpage_text_populate( void ){
+       commpage_descriptor **rd;
+       
+       next =0;
+       cur_routine=0;
+       commPagePtr = (char *) commpage_allocate(commpage_text32_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
+       commPageTextPtr32 = commPagePtr;
+       
+       char *cptr = commPagePtr;
+       int i=0;
+       for(; i< _COMM_PAGE_TEXT_AREA_USED; i++){
+               cptr[i]=0xCC;
+       }
+       
+       commPageBaseOffset = _COMM_PAGE_TEXT_START;
+       for (rd = commpage_32_routines; *rd != NULL; rd++) {
+               commpage_stuff_routine(*rd);
+       }
+       if (!matched)
+               panic(" commpage_text no match for last routine ");
+
+#ifndef __LP64__
+       pmap_commpage32_init((vm_offset_t) commPageTextPtr32, _COMM_PAGE_TEXT_START, 
+                          _COMM_PAGE_TEXT_AREA_USED/INTEL_PGBYTES);
+#endif 
+
+       if (_cpu_capabilities & k64Bit) {
+               next =0;
+               cur_routine=0;
+               commPagePtr = (char *) commpage_allocate(commpage_text64_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
+               commPageTextPtr64 = commPagePtr;
+
+               cptr=commPagePtr;
+               for(i=0; i<_COMM_PAGE_TEXT_AREA_USED; i++){
+                       cptr[i]=0xCC;
+               }
+
+               for (rd = commpage_64_routines; *rd !=NULL; rd++) {
+                       commpage_stuff_routine(*rd);
+               }
+
+#ifndef __LP64__
+       pmap_commpage64_init((vm_offset_t) commPageTextPtr64, _COMM_PAGE_TEXT_START, 
+                          _COMM_PAGE_TEXT_AREA_USED/INTEL_PGBYTES);
+#endif 
+       }
+
+       if (!matched)
+               panic(" commpage_text no match for last routine ");
+
+       if (next > _COMM_PAGE_TEXT_END) 
+               panic("commpage text overflow: next=0x%08x, commPagePtr=%p", next, commPagePtr); 
+
+}
 
 /* Update commpage nanotime information.  Note that we interleave
  * setting the 32- and 64-bit commpages, in order to keep nanotime more
@@ -618,13 +674,16 @@ commpage_update_active_cpus(void)
        simple_unlock(&commpage_active_cpus_lock);
 }
 
+extern user32_addr_t commpage_text32_location;
+extern user64_addr_t commpage_text64_location;
 
 /* Check to see if a given address is in the Preemption Free Zone (PFZ) */
 
 uint32_t
 commpage_is_in_pfz32(uint32_t addr32)
 {
-       if ( (addr32 >= _COMM_PAGE_PFZ_START) && (addr32 < _COMM_PAGE_PFZ_END)) {
+       if ( (addr32 >= (commpage_text32_location + _COMM_TEXT_PFZ_START_OFFSET)) 
+               && (addr32 < (commpage_text32_location+_COMM_TEXT_PFZ_END_OFFSET))) {
                return 1;
        }
        else
@@ -634,8 +693,8 @@ commpage_is_in_pfz32(uint32_t addr32)
 uint32_t
 commpage_is_in_pfz64(addr64_t addr64)
 {
-       if ( (addr64 >= _COMM_PAGE_32_TO_64(_COMM_PAGE_PFZ_START))
-            && (addr64 <  _COMM_PAGE_32_TO_64(_COMM_PAGE_PFZ_END))) {
+       if ( (addr64 >= (commpage_text64_location + _COMM_TEXT_PFZ_START_OFFSET))
+            && (addr64 <  (commpage_text64_location + _COMM_TEXT_PFZ_END_OFFSET))) {
                return 1;
        }
        else
index fa2bbf82c483b9e98b15998e977ecc7a587463f8..e994ae94569d66f993c4197557a72a5e6bfdf852 100644 (file)
@@ -56,6 +56,9 @@
  * But we still must take a spinlock to serialize, and in case of page faults.
  */
 
+/* Work around 10062261 with a dummy non-local symbol */
+fifo_queue_dummy_symbol:       
+
 /*
  *     typedef volatile struct {
  *             void    *opaque1;  <-- ptr to first queue element or null
index 1794228ff9194252878cd48c502ca2ea52ce4a14..a7226180bc88af4947c097f5b77bd46b65406ef4 100644 (file)
@@ -82,6 +82,8 @@
  * are located in the PFZ.
  */
 
+/* Work around 10062261 with a dummy non-local symbol */
+pthreads_dummy_symbol:
 
 /* Internal routine to handle pthread mutex lock operation.  This is in the PFZ.
  *     %edi == ptr to LVAL/UVAL pair
index eee6a8173eb72cffecb8e8e79ba9099b1411fcb4..3cf464e3414fb2a878c35fe889053867eebfebce 100644 (file)
@@ -96,20 +96,20 @@ int _NumCPUs( void )
  * Because Mach VM cannot map the last page of an address space, we don't use it.
  */
  
-#define        _COMM_PAGE32_AREA_LENGTH        ( 2 * 4096 )                            /* reserved length of entire comm area */
+#define        _COMM_PAGE32_AREA_LENGTH        ( 1 * 4096 )                            /* reserved length of entire comm area */
 #define _COMM_PAGE32_BASE_ADDRESS      ( 0xffff0000 )                          /* base address of allocated memory */
 #define _COMM_PAGE32_START_ADDRESS     ( _COMM_PAGE32_BASE_ADDRESS )   /* address traditional commpage code starts on */
-#define _COMM_PAGE32_AREA_USED         ( 2 * 4096 )                            /* this is the amt actually allocated */
+#define _COMM_PAGE32_AREA_USED         ( 1 * 4096 )                            /* this is the amt actually allocated */
 #define _COMM_PAGE32_SIGS_OFFSET       0x8000                                      /* offset to routine signatures */
 
-#define        _COMM_PAGE64_AREA_LENGTH        ( 2 * 1024 * 1024 )                     /* reserved length of entire comm area (2MB) */
+#define        _COMM_PAGE64_AREA_LENGTH        ( 1 * 4096 )                    /* reserved length of entire comm area (2MB) */
 #ifdef __ASSEMBLER__
 #define _COMM_PAGE64_BASE_ADDRESS      ( 0x00007fffffe00000 )   /* base address of allocated memory */
 #else /* __ASSEMBLER__ */
 #define _COMM_PAGE64_BASE_ADDRESS      ( 0x00007fffffe00000ULL )   /* base address of allocated memory */
 #endif /* __ASSEMBLER__ */
 #define _COMM_PAGE64_START_ADDRESS     ( _COMM_PAGE64_BASE_ADDRESS )   /* address traditional commpage code starts on */
-#define _COMM_PAGE64_AREA_USED         ( 2 * 4096 )                            /* this is the amt actually populated */
+#define _COMM_PAGE64_AREA_USED         ( 1 * 4096 )                            /* this is the amt actually populated */
 
 /* no need for an Objective-C area on Intel */
 #define _COMM_PAGE32_OBJC_SIZE         0ULL
@@ -183,6 +183,8 @@ int _NumCPUs( void )
 #define _COMM_PAGE_GTOD_NS_BASE                (_COMM_PAGE_START_ADDRESS+0x070)        /* used by gettimeofday() */
 #define _COMM_PAGE_GTOD_SEC_BASE       (_COMM_PAGE_START_ADDRESS+0x078)        /* used by gettimeofday() */
 
+#define _COMM_PAGE_END                 (_COMM_PAGE_START_ADDRESS+0xfff)        /* end of common page */
+
 /* Warning: kernel commpage.h has a matching c typedef for the following.  They must be kept in sync.  */
 /* These offsets are from _COMM_PAGE_TIME_DATA_START */
 
@@ -199,22 +201,40 @@ int _NumCPUs( void )
  /* When new jump table entries are added, corresponding symbols should be added below        */
  /* New slots should be allocated with at least 16-byte alignment. Some like bcopy require    */
  /* 32-byte alignment, and should be aligned as such in the assembly source before they are relocated */
-#define _COMM_PAGE_TEXT_START           (_COMM_PAGE_START_ADDRESS+0x080)    /* start of text section */
+#define _COMM_PAGE_TEXT_START          (_COMM_PAGE_START_ADDRESS+0x1000)
+#define _COMM_PAGE32_TEXT_START         (_COMM_PAGE32_BASE_ADDRESS+0x1000)    /* start of text section */
+#define _COMM_PAGE64_TEXT_START                (_COMM_PAGE64_BASE_ADDRESS+0x1000)
+#define _COMM_PAGE_TEXT_AREA_USED      ( 1 * 4096 )
+#define _COMM_PAGE_TEXT_AREA_LENGTH    ( 1 * 4096 )
+#define _PFZ32_SLIDE_RANGE             ( 14 ) /* pages between 0xfffff000 and _COMM_PAGE32_TEXT_START */
+#define _PFZ64_SLIDE_RANGE             ( 510 ) /* pages between 0x00007ffffffff000 and _COMM_PAGE64_TEXT_START */
+
+/* setup start offset in the commpage text region for each jump table entry 
+ * the Comm Page Offset is shortened to _COMM_TEXT_[label]_OFFSET
+ */
 
-#define _COMM_PAGE_PREEMPT             (_COMM_PAGE_START_ADDRESS+0x5a0)        /* used by PFZ code */
-#define _COMM_PAGE_BACKOFF             (_COMM_PAGE_START_ADDRESS+0x1600)       /* called from PFZ */
+#define _COMM_TEXT_PREEMPT_OFFSET              (0x5a0) /* called from withing pfz */
+#define _COMM_TEXT_BACKOFF_OFFSET              (0x600) /* called from PFZ */
+#define _COMM_TEXT_PFZ_START_OFFSET            (0xc00) /* offset for Preemption Free Zone */
+#define _COMM_TEXT_PFZ_ENQUEUE_OFFSET          (0xc00) /* internal FIFO enqueue */
+#define _COMM_TEXT_PFZ_DEQUEUE_OFFSET          (0xc80) /* internal FIFO dequeue */
+#define _COMM_TEXT_PFZ_MUTEX_LOCK_OFFSET       (0xd00) /* internal pthread_mutex_lock() */
+#define _COMM_TEXT_UNUSED_OFFSET               (0xd80) /* end of routines in text page */
+#define _COMM_TEXT_PFZ_END_OFFSET              (0xfff) /* offset for end of PFZ */
 
-#define _COMM_PAGE_PFZ_START           (_COMM_PAGE_START_ADDRESS+0x1c00)       /* start of Preemption Free Zone */
 
-#define _COMM_PAGE_PFZ_ENQUEUE         (_COMM_PAGE_START_ADDRESS+0x1c00)       /* internal routine for FIFO enqueue */
-#define _COMM_PAGE_PFZ_DEQUEUE         (_COMM_PAGE_START_ADDRESS+0x1c80)       /* internal routine for FIFO dequeue */
-#define        _COMM_PAGE_PFZ_MUTEX_LOCK       (_COMM_PAGE_START_ADDRESS+0x1d00)       /* internal routine for pthread_mutex_lock() */
+#define _COMM_PAGE_PREEMPT             (_COMM_PAGE_TEXT_START+_COMM_TEXT_PREEMPT_OFFSET)
+#define _COMM_PAGE_BACKOFF             (_COMM_PAGE_TEXT_START+_COMM_TEXT_BACKOFF_OFFSET)       
 
-#define        _COMM_PAGE_UNUSED6              (_COMM_PAGE_START_ADDRESS+0x1d80)       /* unused space for PFZ code up to 0x1fff */
+#define _COMM_PAGE_PFZ_START           (_COMM_PAGE_TEXT_START+_COMM_PAGE_PFZ_START_OFFSET)
 
-#define _COMM_PAGE_PFZ_END             (_COMM_PAGE_START_ADDRESS+0x1fff)       /* end of Preemption Free Zone */
+#define _COMM_PAGE_PFZ_ENQUEUE         (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_ENQUEUE_OFFSET)
+#define _COMM_PAGE_PFZ_DEQUEUE         (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_DEQUEUE_OFFSET)
+#define        _COMM_PAGE_PFZ_MUTEX_LOCK       (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_MUTEX_LOCK_OFFSET)
 
-#define _COMM_PAGE_END                 (_COMM_PAGE_START_ADDRESS+0x1fff)       /* end of common page - insert new stuff here */
+#define        _COMM_PAGE_UNUSED6              (_COMM_PAGE_TEXT_START+_COMM_TEXT_UNUSED_OFFSET)        
+#define _COMM_PAGE_PFZ_END             (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_END_OFFSET)
+#define _COMM_PAGE_TEXT_END            (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_END_OFFSET) /* end of common text page */
 
 /* _COMM_PAGE_COMPARE_AND_SWAP{32,64}B are not used on x86 and are
  * maintained here for source compatability.  These will be removed at
index 22de8b2b002ea429d3f43ac15f919ee0faef4ec0..3473fbd3ac0bba2b84f8f2823d2b8603b002094a 100644 (file)
@@ -75,9 +75,6 @@ typedef struct rtclock_timer {
 
 typedef struct {
        struct i386_tss         *cdi_ktss;
-#if    MACH_KDB
-       struct i386_tss         *cdi_dbtss;
-#endif /* MACH_KDB */
        struct __attribute__((packed)) {
                uint16_t size;
                struct fake_descriptor *ptr;
@@ -97,9 +94,6 @@ typedef enum {
 
 typedef struct {
        struct x86_64_tss               *cdi_ktss;
-#if    MACH_KDB
-       struct x86_64_tss               *cdi_dbtss;
-#endif /* MACH_KDB */
        struct __attribute__((packed)) {
                uint16_t size;
                void *ptr;
@@ -194,15 +188,6 @@ typedef struct cpu_data
        struct fake_descriptor  *cpu_ldtp;
        cpu_desc_index_t        cpu_desc_index;
        int                     cpu_ldt;
-#ifdef MACH_KDB
-       /* XXX Untested: */
-       int                     cpu_db_pass_thru;
-       vm_offset_t             cpu_db_stacks;
-       void                    *cpu_kdb_saved_state;
-       spl_t                   cpu_kdb_saved_ipl;
-       int                     cpu_kdb_is_slave;
-       int                     cpu_kdb_active;
-#endif /* MACH_KDB */
        boolean_t               cpu_iflag;
        boolean_t               cpu_boot_complete;
        int                     cpu_hibernate;
@@ -235,8 +220,10 @@ typedef struct cpu_data
                                                           * validity flag.
                                                           */
        pal_rtc_nanotime_t      *cpu_nanotime;          /* Nanotime info */
+#if    CONFIG_COUNTERS
        thread_t                csw_old_thread;
        thread_t                csw_new_thread;
+#endif /* CONFIG COUNTERS */   
 #if    defined(__x86_64__)
        uint32_t                cpu_pmap_pcid_enabled;
        pcid_t                  cpu_active_pcid;
@@ -409,5 +396,6 @@ cpu_datap(int cpu)
 }
 
 extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu);
+extern void cpu_data_realloc(void);
 
 #endif /* I386_CPU_DATA */
index a29bfda2692f2e96f2161a9e94cf4458f9a63265..c36eb89b6def29ccb5dd0de4c247d2f95d212bb1 100644 (file)
@@ -64,10 +64,10 @@ decl_simple_lock_data(, x86_topo_lock);
 static struct cpu_cache {
        int     level;  int     type;
 } cpu_caches [LCACHE_MAX] = {
-       [L1D] { 1,      CPU_CACHE_TYPE_DATA },
-       [L1I] { 1,      CPU_CACHE_TYPE_INST },
-       [L2U] { 2,      CPU_CACHE_TYPE_UNIF },
-       [L3U] { 3,      CPU_CACHE_TYPE_UNIF },
+       [L1D] = {       1,      CPU_CACHE_TYPE_DATA },
+       [L1I] = {       1,      CPU_CACHE_TYPE_INST },
+       [L2U] = { 2,    CPU_CACHE_TYPE_UNIF },
+       [L3U] = { 3,    CPU_CACHE_TYPE_UNIF },
 };
 
 static boolean_t
index 8050d75d661d7a57b919f5320e03807c47cb2d3c..abb0b94ed82c23590b89d9c53218b8ea253b3951 100644 (file)
  * @OSF_COPYRIGHT@
  */
 #include <platforms.h>
-#include <mach_kdb.h>
 #include <vm/vm_page.h>
 #include <pexpert/pexpert.h>
 
 #include <i386/cpuid.h>
-#if MACH_KDB
-#include <machine/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
-#endif
 
 static boolean_t       cpuid_dbg
 #if DEBUG
@@ -633,7 +622,7 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
        DBG(" features            : 0x%016llx\n", info_p->cpuid_features);
        DBG(" extfeatures         : 0x%016llx\n", info_p->cpuid_extfeatures);
        DBG(" logical_per_package : %d\n", info_p->cpuid_logical_per_package);
-        DBG(" microcode_version   : 0x%08x\n", info_p->cpuid_microcode_version);
+       DBG(" microcode_version   : 0x%08x\n", info_p->cpuid_microcode_version);
 
        /* Fold in the Invariant TSC feature bit, if present */
        if (info_p->cpuid_max_ext >= 0x80000007) {
@@ -1089,49 +1078,70 @@ cpuid_leaf7_features(void)
 {
        return cpuid_info()->cpuid_leaf7_features;
 }
 
-#if MACH_KDB
+static i386_vmm_info_t *_cpuid_vmm_infop = NULL;
+static i386_vmm_info_t _cpuid_vmm_info;
 
-/*
- *     Display the cpuid
- * *           
- *     cp
- */
-void 
-db_cpuid(__unused db_expr_t addr,
-        __unused int have_addr,
-        __unused db_expr_t count,
-        __unused char *modif)
+static void
+cpuid_init_vmm_info(i386_vmm_info_t *info_p)
 {
+       uint32_t        reg[4];
+       uint32_t        max_vmm_leaf;
 
-       uint32_t        i, mid;
-       uint32_t        cpid[4];
+       bzero(info_p, sizeof(*info_p));
 
-       do_cpuid(0, cpid);      /* Get the first cpuid which is the number of
-                                * basic ids */
-       db_printf("%08X - %08X %08X %08X %08X\n",
-               0, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]);
+       if (!cpuid_vmm_present())
+               return;
 
-       mid = cpid[eax];        /* Set the number */
-       for (i = 1; i <= mid; i++) {    /* Dump 'em out */
-               do_cpuid(i, cpid);      /* Get the next */
-               db_printf("%08X - %08X %08X %08X %08X\n",
-                       i, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]);
+       DBG("cpuid_init_vmm_info(%p)\n", info_p);
+
+       /* do cpuid 0x40000000 to get VMM vendor */
+       cpuid_fn(0x40000000, reg);
+       max_vmm_leaf = reg[eax];
+       bcopy((char *)&reg[ebx], &info_p->cpuid_vmm_vendor[0], 4);
+       bcopy((char *)&reg[ecx], &info_p->cpuid_vmm_vendor[4], 4);
+       bcopy((char *)&reg[edx], &info_p->cpuid_vmm_vendor[8], 4);
+       info_p->cpuid_vmm_vendor[12] = '\0';
+
+       if (0 == strcmp(info_p->cpuid_vmm_vendor, CPUID_VMM_ID_VMWARE)) {
+               /* VMware identification string: kb.vmware.com/kb/1009458 */
+               info_p->cpuid_vmm_family = CPUID_VMM_FAMILY_VMWARE;
+       } else {
+               info_p->cpuid_vmm_family = CPUID_VMM_FAMILY_UNKNOWN;
        }
-       db_printf("\n");
-
-       do_cpuid(0x80000000, cpid);     /* Get the first extended cpuid which
-                                        * is the number of extended ids */
-       db_printf("%08X - %08X %08X %08X %08X\n",
-               0x80000000, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]);
-
-       mid = cpid[eax];        /* Set the number */
-       for (i = 0x80000001; i <= mid; i++) {   /* Dump 'em out */
-               do_cpuid(i, cpid);      /* Get the next */
-               db_printf("%08X - %08X %08X %08X %08X\n",
-                       i, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]);
+
+       /* VMM generic leaves: https://lkml.org/lkml/2008/10/1/246 */
+       if (max_vmm_leaf >= 0x40000010) {
+               cpuid_fn(0x40000010, reg);
+               
+               info_p->cpuid_vmm_tsc_frequency = reg[eax];
+               info_p->cpuid_vmm_bus_frequency = reg[ebx];
        }
+
+       DBG(" vmm_vendor          : %s\n", info_p->cpuid_vmm_vendor);
+       DBG(" vmm_family          : %u\n", info_p->cpuid_vmm_family);
+       DBG(" vmm_bus_frequency   : %u\n", info_p->cpuid_vmm_bus_frequency);
+       DBG(" vmm_tsc_frequency   : %u\n", info_p->cpuid_vmm_tsc_frequency);
 }
 
-#endif
+boolean_t
+cpuid_vmm_present(void)
+{
+       return (cpuid_features() & CPUID_FEATURE_VMM) ? TRUE : FALSE;
+}
+
+i386_vmm_info_t *
+cpuid_vmm_info(void)
+{
+       if (_cpuid_vmm_infop == NULL) {
+               cpuid_init_vmm_info(&_cpuid_vmm_info);
+               _cpuid_vmm_infop = &_cpuid_vmm_info;
+       }
+       return _cpuid_vmm_infop;
+}
+
+uint32_t
+cpuid_vmm_family(void)
+{
+       return cpuid_vmm_info()->cpuid_vmm_family;
+}
index bc7fae0190910c8b4da45f4faaa7a7a1bb2ae93a..1bc3e29277f82362e5a4528ed9729fa2cc1a89b3 100644 (file)
@@ -44,6 +44,8 @@
 #define        CPUID_VID_INTEL         "GenuineIntel"
 #define        CPUID_VID_AMD           "AuthenticAMD"
 
+#define CPUID_VMM_ID_VMWARE    "VMwareVMware"
+
 #define CPUID_STRING_UNKNOWN    "Unknown CPU Typ"
 
 #define _Bit(n)                        (1ULL << n)
 #define CPUID_FEATURE_OSXSAVE   _HBit(27) /* XGETBV/XSETBV instructions */
 #define CPUID_FEATURE_AVX1_0   _HBit(28) /* AVX 1.0 instructions */
 #define CPUID_FEATURE_VMM       _HBit(31) /* VMM (Hypervisor) present */
-#define CPUID_FEATURE_RDRAND   _HBit(29) /* RDRAND instruction */
-#define CPUID_FEATURE_F16C     _HBit(30) /* Float16 convert instructions */
+#define CPUID_FEATURE_SEGLIM64  _HBit(11) /* 64-bit segment limit checking */
+#define CPUID_FEATURE_PCID      _HBit(17) /* ASID-PCID support */
+#define CPUID_FEATURE_TSCTMR    _HBit(24) /* TSC deadline timer */
+#define CPUID_FEATURE_AVX1_0   _HBit(28) /* AVX 1.0 instructions */
+#define CPUID_FEATURE_F16C     _HBit(29) /* Float16 convert instructions */
+#define CPUID_FEATURE_RDRAND   _HBit(30) /* RDRAND instruction */
 
 /*
  * Leaf 7, subleaf 0 additional features.
 #define CPUID_MODEL_IVYBRIDGE  0x3A
 
 
+#define CPUID_VMM_FAMILY_UNKNOWN       0x0
+#define CPUID_VMM_FAMILY_VMWARE                0x1
+
 #ifndef ASSEMBLER
 #include <stdint.h>
 #include <mach/mach_types.h>
@@ -337,6 +346,15 @@ typedef struct {
        uint32_t                cpuid_leaf7_features;
 } i386_cpu_info_t;
 
+#ifdef MACH_KERNEL_PRIVATE
+typedef struct {
+       char            cpuid_vmm_vendor[16];
+       uint32_t        cpuid_vmm_family;
+       uint32_t        cpuid_vmm_bus_frequency;
+       uint32_t        cpuid_vmm_tsc_frequency;
+} i386_vmm_info_t;
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -364,6 +382,12 @@ extern i386_cpu_info_t     *cpuid_info(void);
 
 extern void            cpuid_set_info(void);
 
+#ifdef MACH_KERNEL_PRIVATE
+extern boolean_t       cpuid_vmm_present(void);
+extern i386_vmm_info_t *cpuid_vmm_info(void);
+extern uint32_t                cpuid_vmm_family(void);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/osfmk/i386/db_disasm.c b/osfmk/i386/db_disasm.c
deleted file mode 100644 (file)
index c68d652..0000000
+++ /dev/null
@@ -1,1826 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:36  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:37  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.2.8.3  1996/07/31  09:43:35  paire
- *     Merged with nmk20b7_shared (1.2.11.1)
- *     [96/06/10            paire]
- *
- * Revision 1.2.11.1  1996/05/14  13:49:36  paire
- *     Added support for new cmpxchg8b, cpuid, rdtsc, rdwmr, rsm and wrmsr
- *     Pentium instructions
- *     [95/11/23            paire]
- * 
- * Revision 1.2.8.2  1994/09/23  01:50:45  ezf
- *     change marker to not FREE
- *     [1994/09/22  21:21:17  ezf]
- * 
- * Revision 1.2.8.1  1994/09/16  15:26:28  emcmanus
- *     Only skip over GAS-inserted NOPs after branches if they are really
- *     NOPs; this depends at least on assembler options.
- *     [1994/09/16  15:26:03  emcmanus]
- * 
- * Revision 1.2.6.3  1994/02/19  15:40:34  bolinger
- *     For load/store counting, mark all varieties of "call" as writing
- *     memory.
- *     [1994/02/15  20:25:18  bolinger]
- * 
- * Revision 1.2.6.2  1994/02/14  21:46:49  dwm
- *     Warning repair
- *     [1994/02/14  21:46:14  dwm]
- * 
- * Revision 1.2.6.1  1994/02/12  23:26:05  bolinger
- *     Implement load/store counting for ddb "until" command.
- *     [1994/02/12  03:34:55  bolinger]
- * 
- * Revision 1.2.2.3  1993/08/09  19:39:21  dswartz
- *     Add ANSI prototypes - CR#9523
- *     [1993/08/06  17:44:13  dswartz]
- * 
- * Revision 1.2.2.2  1993/06/09  02:27:29  gm
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  21:03:54  jeffc]
- * 
- * Revision 1.2  1993/04/19  16:12:57  devrcs
- *     Print file names and lineno on branch instructions.
- *     [barbou@gr.osf.org]
- *     [92/12/03            bernadat]
- * 
- * Revision 1.1  1992/09/30  02:02:19  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.5.3.1  92/03/03  16:14:27  jeffreyh
- *     Pick up changes from TRUNK
- *     [92/02/26  11:05:06  jeffreyh]
- * 
- * Revision 2.6  92/01/03  20:05:00  dbg
- *     Add a switch to disassemble 16-bit code.
- *     Fix spelling of 'lods' opcodes.
- *     [91/10/30            dbg]
- * 
- * Revision 2.5  91/10/09  16:05:58  af
- *     Supported disassemble of non current task by passing task parameter.
- *     [91/08/29            tak]
- * 
- * Revision 2.4  91/05/14  16:05:04  mrt
- *     Correcting copyright
- * 
- * Revision 2.3  91/02/05  17:11:03  mrt
- *     Changed to new Mach copyright
- *     [91/02/01  17:31:03  mrt]
- * 
- * Revision 2.2  90/08/27  21:55:56  dbg
- *     Fix register operand for move to/from control/test/debug
- *     register instructions.  Add i486 instructions.
- *     [90/08/27            dbg]
- * 
- *     Import db_sym.h.  Print instruction displacements in
- *     current radix (signed).  Change calling sequence of
- *     db_disasm.
- *     [90/08/21            dbg]
- *     Fix includes.
- *     [90/08/08            dbg]
- *     Created.
- *     [90/07/25            dbg]
- * 
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-/*
- * Instruction disassembler.
- */
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_output.h>
-
-#include <kern/task.h>
-#include <kern/misc_protos.h>
-
-struct i_addr {
-       int             is_reg; /* if reg, reg number is in 'disp' */
-       int             disp;
-       char *          base;
-       char *          index;
-       int             ss;
-};
-
-/* Forward */
-
-extern db_addr_t       db_read_address(
-                               db_addr_t       loc,
-                               int             short_addr,
-                               int             regmodrm,
-                               struct i_addr   * addrp,
-                               task_t          task);
-extern void            db_print_address(
-                               char *          seg,
-                               int             size,
-                               struct i_addr   *addrp,
-                               task_t          task);
-extern db_addr_t       db_disasm_esc(
-                               db_addr_t       loc,
-                               int             inst,
-                               int             short_addr,
-                               int             size,
-                               char *          seg,
-                               task_t          task);
-
-/*
- * Switch to disassemble 16-bit code.
- */
-boolean_t      db_disasm_16 = FALSE;
-
-/*
- * Size attributes
- */
-#define        BYTE    0
-#define        WORD    1
-#define        LONG    2
-#define        QUAD    3
-#define        SNGL    4
-#define        DBLR    5
-#define        EXTR    6
-#define        SDEP    7
-#define        NONE    8
-
-/*
- * Addressing modes
- */
-#define        E       1                       /* general effective address */
-#define        Eind    2                       /* indirect address (jump, call) */
-#define        Ew      3                       /* address, word size */
-#define        Eb      4                       /* address, byte size */
-#define        R       5                       /* register, in 'reg' field */
-#define        Rw      6                       /* word register, in 'reg' field */
-#define        Ri      7                       /* register in instruction */
-#define        S       8                       /* segment reg, in 'reg' field */
-#define        Si      9                       /* segment reg, in instruction */
-#define        A       10                      /* accumulator */
-#define        BX      11                      /* (bx) */
-#define        CL      12                      /* cl, for shifts */
-#define        DX      13                      /* dx, for IO */
-#define        SI      14                      /* si */
-#define        DI      15                      /* di */
-#define        CR      16                      /* control register */
-#define        DR      17                      /* debug register */
-#define        TR      18                      /* test register */
-#define        I       19                      /* immediate, unsigned */
-#define        Is      20                      /* immediate, signed */
-#define        Ib      21                      /* byte immediate, unsigned */
-#define        Ibs     22                      /* byte immediate, signed */
-#define        Iw      23                      /* word immediate, unsigned */
-#define        Il      24                      /* long immediate */
-#define        O       25                      /* direct address */
-#define        Db      26                      /* byte displacement from EIP */
-#define        Dl      27                      /* long displacement from EIP */
-#define        o1      28                      /* constant 1 */
-#define        o3      29                      /* constant 3 */
-#define        OS      30                      /* immediate offset/segment */
-#define        ST      31                      /* FP stack top */
-#define        STI     32                      /* FP stack */
-#define        X       33                      /* extended FP op */
-#define        XA      34                      /* for 'fstcw %ax' */
-
-struct inst {
-       char *  i_name;                 /* name */
-       short   i_has_modrm;            /* has regmodrm byte */
-       short   i_size;                 /* operand size */
-       int     i_mode;                 /* addressing modes */
-       char *  i_extra;                /* pointer to extra opcode table */
-};
-
-#define        op1(x)          (x)
-#define        op2(x,y)        ((x)|((y)<<8))
-#define        op3(x,y,z)      ((x)|((y)<<8)|((z)<<16))
-
-struct finst {
-       char *  f_name;                 /* name for memory instruction */
-       int     f_size;                 /* size for memory instruction */
-       int     f_rrmode;               /* mode for rr instruction */
-       char *  f_rrname;               /* name for rr instruction
-                                          (or pointer to table) */
-};
-
-char * db_Grp6[] = {
-       "sldt",
-       "str",
-       "lldt",
-       "ltr",
-       "verr",
-       "verw",
-       "",
-       ""
-};
-
-char * db_Grp7[] = {
-       "sgdt",
-       "sidt",
-       "lgdt",
-       "lidt",
-       "smsw",
-       "",
-       "lmsw",
-       "invlpg"
-};
-
-char * db_Grp8[] = {
-       "",
-       "",
-       "",
-       "",
-       "bt",
-       "bts",
-       "btr",
-       "btc"
-};
-
-struct inst db_inst_0f0x[] = {
-/*00*/ { "",      TRUE,  NONE,  op1(Ew),     (char *)db_Grp6 },
-/*01*/ { "",      TRUE,  NONE,  op1(Ew),     (char *)db_Grp7 },
-/*02*/ { "lar",   TRUE,  LONG,  op2(E,R),    0 },
-/*03*/ { "lsl",   TRUE,  LONG,  op2(E,R),    0 },
-/*04*/ { "",      FALSE, NONE,  0,           0 },
-/*05*/ { "",      FALSE, NONE,  0,           0 },
-/*06*/ { "clts",  FALSE, NONE,  0,           0 },
-/*07*/ { "",      FALSE, NONE,  0,           0 },
-
-/*08*/ { "invd",  FALSE, NONE,  0,           0 },
-/*09*/ { "wbinvd",FALSE, NONE,  0,           0 },
-/*0a*/ { "",      FALSE, NONE,  0,           0 },
-/*0b*/ { "",      FALSE, NONE,  0,           0 },
-/*0c*/ { "",      FALSE, NONE,  0,           0 },
-/*0d*/ { "",      FALSE, NONE,  0,           0 },
-/*0e*/ { "",      FALSE, NONE,  0,           0 },
-/*0f*/ { "",      FALSE, NONE,  0,           0 },
-};
-
-struct inst    db_inst_0f2x[] = {
-/*20*/ { "mov",   TRUE,  LONG,  op2(CR,E),   0 }, /* use E for reg */
-/*21*/ { "mov",   TRUE,  LONG,  op2(DR,E),   0 }, /* since mod == 11 */
-/*22*/ { "mov",   TRUE,  LONG,  op2(E,CR),   0 },
-/*23*/ { "mov",   TRUE,  LONG,  op2(E,DR),   0 },
-/*24*/ { "mov",   TRUE,  LONG,  op2(TR,E),   0 },
-/*25*/ { "",      FALSE, NONE,  0,           0 },
-/*26*/ { "mov",   TRUE,  LONG,  op2(E,TR),   0 },
-/*27*/ { "",      FALSE, NONE,  0,           0 },
-
-/*28*/ { "",      FALSE, NONE,  0,           0 },
-/*29*/ { "",      FALSE, NONE,  0,           0 },
-/*2a*/ { "",      FALSE, NONE,  0,           0 },
-/*2b*/ { "",      FALSE, NONE,  0,           0 },
-/*2c*/ { "",      FALSE, NONE,  0,           0 },
-/*2d*/ { "",      FALSE, NONE,  0,           0 },
-/*2e*/ { "",      FALSE, NONE,  0,           0 },
-/*2f*/ { "",      FALSE, NONE,  0,           0 },
-};
-struct inst    db_inst_0f3x[] = {
-/*30*/ { "rdtsc", FALSE, NONE,  0,           0 },
-/*31*/ { "rdmsr", FALSE, NONE,  0,           0 },
-/*32*/ { "wrmsr", FALSE, NONE,  0,           0 },
-/*33*/ { "",      FALSE, NONE,  0,           0 },
-/*34*/ { "",      FALSE, NONE,  0,           0 },
-/*35*/ { "",      FALSE, NONE,  0,           0 },
-/*36*/ { "",      FALSE, NONE,  0,           0 },
-/*37*/ { "",      FALSE, NONE,  0,           0 },
-
-/*38*/ { "",      FALSE, NONE,  0,           0 },
-/*39*/ { "",      FALSE, NONE,  0,           0 },
-/*3a*/ { "",      FALSE, NONE,  0,           0 },
-/*3b*/ { "",      FALSE, NONE,  0,           0 },
-/*3c*/ { "",      FALSE, NONE,  0,           0 },
-/*3d*/ { "",      FALSE, NONE,  0,           0 },
-/*3e*/ { "",      FALSE, NONE,  0,           0 },
-/*3f*/ { "",      FALSE, NONE,  0,           0 },
-};
-
-struct inst    db_inst_0f8x[] = {
-/*80*/ { "jo",    FALSE, NONE,  op1(Dl),     0 },
-/*81*/ { "jno",   FALSE, NONE,  op1(Dl),     0 },
-/*82*/ { "jb",    FALSE, NONE,  op1(Dl),     0 },
-/*83*/ { "jnb",   FALSE, NONE,  op1(Dl),     0 },
-/*84*/ { "jz",    FALSE, NONE,  op1(Dl),     0 },
-/*85*/ { "jnz",   FALSE, NONE,  op1(Dl),     0 },
-/*86*/ { "jbe",   FALSE, NONE,  op1(Dl),     0 },
-/*87*/ { "jnbe",  FALSE, NONE,  op1(Dl),     0 },
-
-/*88*/ { "js",    FALSE, NONE,  op1(Dl),     0 },
-/*89*/ { "jns",   FALSE, NONE,  op1(Dl),     0 },
-/*8a*/ { "jp",    FALSE, NONE,  op1(Dl),     0 },
-/*8b*/ { "jnp",   FALSE, NONE,  op1(Dl),     0 },
-/*8c*/ { "jl",    FALSE, NONE,  op1(Dl),     0 },
-/*8d*/ { "jnl",   FALSE, NONE,  op1(Dl),     0 },
-/*8e*/ { "jle",   FALSE, NONE,  op1(Dl),     0 },
-/*8f*/ { "jnle",  FALSE, NONE,  op1(Dl),     0 },
-};
-
-struct inst    db_inst_0f9x[] = {
-/*90*/ { "seto",  TRUE,  NONE,  op1(Eb),     0 },
-/*91*/ { "setno", TRUE,  NONE,  op1(Eb),     0 },
-/*92*/ { "setb",  TRUE,  NONE,  op1(Eb),     0 },
-/*93*/ { "setnb", TRUE,  NONE,  op1(Eb),     0 },
-/*94*/ { "setz",  TRUE,  NONE,  op1(Eb),     0 },
-/*95*/ { "setnz", TRUE,  NONE,  op1(Eb),     0 },
-/*96*/ { "setbe", TRUE,  NONE,  op1(Eb),     0 },
-/*97*/ { "setnbe",TRUE,  NONE,  op1(Eb),     0 },
-
-/*98*/ { "sets",  TRUE,  NONE,  op1(Eb),     0 },
-/*99*/ { "setns", TRUE,  NONE,  op1(Eb),     0 },
-/*9a*/ { "setp",  TRUE,  NONE,  op1(Eb),     0 },
-/*9b*/ { "setnp", TRUE,  NONE,  op1(Eb),     0 },
-/*9c*/ { "setl",  TRUE,  NONE,  op1(Eb),     0 },
-/*9d*/ { "setnl", TRUE,  NONE,  op1(Eb),     0 },
-/*9e*/ { "setle", TRUE,  NONE,  op1(Eb),     0 },
-/*9f*/ { "setnle",TRUE,  NONE,  op1(Eb),     0 },
-};
-
-struct inst    db_inst_0fax[] = {
-/*a0*/ { "push",  FALSE, NONE,  op1(Si),     0 },
-/*a1*/ { "pop",   FALSE, NONE,  op1(Si),     0 },
-/*a2*/ { "cpuid", FALSE, NONE,  0,           0 },
-/*a3*/ { "bt",    TRUE,  LONG,  op2(E,R),    0 },
-/*a4*/ { "shld",  TRUE,  LONG,  op3(Ib,E,R), 0 },
-/*a5*/ { "shld",  TRUE,  LONG,  op3(CL,E,R), 0 },
-/*a6*/ { "",      FALSE, NONE,  0,           0 },
-/*a7*/ { "",      FALSE, NONE,  0,           0 },
-
-/*a8*/ { "push",  FALSE, NONE,  op1(Si),     0 },
-/*a9*/ { "pop",   FALSE, NONE,  op1(Si),     0 },
-/*aa*/ { "rsm",   FALSE, NONE,  0,           0 },
-/*ab*/ { "bts",   TRUE,  LONG,  op2(E,R),    0 },
-/*ac*/ { "shrd",  TRUE,  LONG,  op3(Ib,E,R), 0 },
-/*ad*/ { "shrd",  TRUE,  LONG,  op3(CL,E,R), 0 },
-/*a6*/ { "",      FALSE, NONE,  0,           0 },
-/*a7*/ { "imul",  TRUE,  LONG,  op2(E,R),    0 },
-};
-
-struct inst    db_inst_0fbx[] = {
-/*b0*/ { "",      FALSE, NONE,  0,           0 },
-/*b1*/ { "",      FALSE, NONE,  0,           0 },
-/*b2*/ { "lss",   TRUE,  LONG,  op2(E, R),   0 },
-/*b3*/ { "bts",   TRUE,  LONG,  op2(R, E),   0 },
-/*b4*/ { "lfs",   TRUE,  LONG,  op2(E, R),   0 },
-/*b5*/ { "lgs",   TRUE,  LONG,  op2(E, R),   0 },
-/*b6*/ { "movzb", TRUE,  LONG,  op2(E, R),   0 },
-/*b7*/ { "movzw", TRUE,  LONG,  op2(E, R),   0 },
-
-/*b8*/ { "",      FALSE, NONE,  0,           0 },
-/*b9*/ { "",      FALSE, NONE,  0,           0 },
-/*ba*/ { "",      TRUE,  LONG,  op2(Is, E),  (char *)db_Grp8 },
-/*bb*/ { "btc",   TRUE,  LONG,  op2(R, E),   0 },
-/*bc*/ { "bsf",   TRUE,  LONG,  op2(E, R),   0 },
-/*bd*/ { "bsr",   TRUE,  LONG,  op2(E, R),   0 },
-/*be*/ { "movsb", TRUE,  LONG,  op2(E, R),   0 },
-/*bf*/ { "movsw", TRUE,  LONG,  op2(E, R),   0 },
-};
-
-struct inst    db_inst_0fcx[] = {
-/*c0*/ { "xadd",  TRUE,  BYTE,  op2(R, E),   0 },
-/*c1*/ { "xadd",  TRUE,  LONG,  op2(R, E),   0 },
-/*c2*/ { "",      FALSE, NONE,  0,           0 },
-/*c3*/ { "",      FALSE, NONE,  0,           0 },
-/*c4*/ { "",      FALSE, NONE,  0,           0 },
-/*c5*/ { "",      FALSE, NONE,  0,           0 },
-/*c6*/ { "",      FALSE, NONE,  0,           0 },
-/*c7*/ { "cmpxchg8b", FALSE, NONE, op1(E),   0 },
-/*c8*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-/*c9*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-/*ca*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-/*cb*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-/*cc*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-/*cd*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-/*ce*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-/*cf*/ { "bswap", FALSE, LONG,  op1(Ri),     0 },
-};
-
-struct inst    db_inst_0fdx[] = {
-/*c0*/ { "cmpxchg",TRUE, BYTE,  op2(R, E),   0 },
-/*c1*/ { "cmpxchg",TRUE, LONG,  op2(R, E),   0 },
-/*c2*/ { "",      FALSE, NONE,  0,           0 },
-/*c3*/ { "",      FALSE, NONE,  0,           0 },
-/*c4*/ { "",      FALSE, NONE,  0,           0 },
-/*c5*/ { "",      FALSE, NONE,  0,           0 },
-/*c6*/ { "",      FALSE, NONE,  0,           0 },
-/*c7*/ { "",      FALSE, NONE,  0,           0 },
-/*c8*/ { "",      FALSE, NONE,  0,           0 },
-/*c9*/ { "",      FALSE, NONE,  0,           0 },
-/*ca*/ { "",      FALSE, NONE,  0,           0 },
-/*cb*/ { "",      FALSE, NONE,  0,           0 },
-/*cc*/ { "",      FALSE, NONE,  0,           0 },
-/*cd*/ { "",      FALSE, NONE,  0,           0 },
-/*ce*/ { "",      FALSE, NONE,  0,           0 },
-/*cf*/ { "",      FALSE, NONE,  0,           0 },
-};
-
-struct inst *db_inst_0f[] = {
-       db_inst_0f0x,
-       0,
-       db_inst_0f2x,
-       db_inst_0f3x,
-       0,
-       0,
-       0,
-       0,
-       db_inst_0f8x,
-       db_inst_0f9x,
-       db_inst_0fax,
-       db_inst_0fbx,
-       db_inst_0fcx,
-       db_inst_0fdx,
-       0,
-       0
-};
-
-char * db_Esc92[] = {
-       "fnop", "",     "",     "",     "",     "",     "",     ""
-};
-char * db_Esc93[] = {
-       "",     "",     "",     "",     "",     "",     "",     ""
-};
-char * db_Esc94[] = {
-       "fchs", "fabs", "",     "",     "ftst", "fxam", "",     ""
-};
-char * db_Esc95[] = {
-       "fld1", "fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz",""
-};
-char * db_Esc96[] = {
-       "f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp",
-       "fincstp"
-};
-char * db_Esc97[] = {
-       "fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"
-};
-
-char * db_Esca4[] = {
-       "",     "fucompp","",   "",     "",     "",     "",     ""
-};
-
-char * db_Escb4[] = {
-       "",     "",     "fnclex","fninit","",   "",     "",     ""
-};
-
-char * db_Esce3[] = {
-       "",     "fcompp","",    "",     "",     "",     "",     ""
-};
-
-char * db_Escf4[] = {
-       "fnstsw","",    "",     "",     "",     "",     "",     ""
-};
-
-struct finst db_Esc8[] = {
-/*0*/  { "fadd",   SNGL,  op2(STI,ST), 0 },
-/*1*/  { "fmul",   SNGL,  op2(STI,ST), 0 },
-/*2*/  { "fcom",   SNGL,  op2(STI,ST), 0 },
-/*3*/  { "fcomp",  SNGL,  op2(STI,ST), 0 },
-/*4*/  { "fsub",   SNGL,  op2(STI,ST), 0 },
-/*5*/  { "fsubr",  SNGL,  op2(STI,ST), 0 },
-/*6*/  { "fdiv",   SNGL,  op2(STI,ST), 0 },
-/*7*/  { "fdivr",  SNGL,  op2(STI,ST), 0 },
-};
-
-struct finst db_Esc9[] = {
-/*0*/  { "fld",    SNGL,  op1(STI),    0 },
-/*1*/  { "",       NONE,  op1(STI),    "fxch" },
-/*2*/  { "fst",    SNGL,  op1(X),      (char *)db_Esc92 },
-/*3*/  { "fstp",   SNGL,  op1(X),      (char *)db_Esc93 },
-/*4*/  { "fldenv", NONE,  op1(X),      (char *)db_Esc94 },
-/*5*/  { "fldcw",  NONE,  op1(X),      (char *)db_Esc95 },
-/*6*/  { "fnstenv",NONE,  op1(X),      (char *)db_Esc96 },
-/*7*/  { "fnstcw", NONE,  op1(X),      (char *)db_Esc97 },
-};
-
-struct finst db_Esca[] = {
-/*0*/  { "fiadd",  WORD,  0,           0 },
-/*1*/  { "fimul",  WORD,  0,           0 },
-/*2*/  { "ficom",  WORD,  0,           0 },
-/*3*/  { "ficomp", WORD,  0,           0 },
-/*4*/  { "fisub",  WORD,  op1(X),      (char *)db_Esca4 },
-/*5*/  { "fisubr", WORD,  0,           0 },
-/*6*/  { "fidiv",  WORD,  0,           0 },
-/*7*/  { "fidivr", WORD,  0,           0 }
-};
-
-struct finst db_Escb[] = {
-/*0*/  { "fild",   WORD,  0,           0 },
-/*1*/  { "",       NONE,  0,           0 },
-/*2*/  { "fist",   WORD,  0,           0 },
-/*3*/  { "fistp",  WORD,  0,           0 },
-/*4*/  { "",       WORD,  op1(X),      (char *)db_Escb4 },
-/*5*/  { "fld",    EXTR,  0,           0 },
-/*6*/  { "",       WORD,  0,           0 },
-/*7*/  { "fstp",   EXTR,  0,           0 },
-};
-
-struct finst db_Escc[] = {
-/*0*/  { "fadd",   DBLR,  op2(ST,STI), 0 },
-/*1*/  { "fmul",   DBLR,  op2(ST,STI), 0 },
-/*2*/  { "fcom",   DBLR,  op2(ST,STI), 0 },
-/*3*/  { "fcomp",  DBLR,  op2(ST,STI), 0 },
-/*4*/  { "fsub",   DBLR,  op2(ST,STI), "fsubr" },
-/*5*/  { "fsubr",  DBLR,  op2(ST,STI), "fsub" },
-/*6*/  { "fdiv",   DBLR,  op2(ST,STI), "fdivr" },
-/*7*/  { "fdivr",  DBLR,  op2(ST,STI), "fdiv" },
-};
-
-struct finst db_Escd[] = {
-/*0*/  { "fld",    DBLR,  op1(STI),    "ffree" },
-/*1*/  { "",       NONE,  0,           0 },
-/*2*/  { "fst",    DBLR,  op1(STI),    0 },
-/*3*/  { "fstp",   DBLR,  op1(STI),    0 },
-/*4*/  { "frstor", NONE,  op1(STI),    "fucom" },
-/*5*/  { "",       NONE,  op1(STI),    "fucomp" },
-/*6*/  { "fnsave", NONE,  0,           0 },
-/*7*/  { "fnstsw", NONE,  0,           0 },
-};
-
-struct finst db_Esce[] = {
-/*0*/  { "fiadd",  LONG,  op2(ST,STI), "faddp" },
-/*1*/  { "fimul",  LONG,  op2(ST,STI), "fmulp" },
-/*2*/  { "ficom",  LONG,  0,           0 },
-/*3*/  { "ficomp", LONG,  op1(X),      (char *)db_Esce3 },
-/*4*/  { "fisub",  LONG,  op2(ST,STI), "fsubrp" },
-/*5*/  { "fisubr", LONG,  op2(ST,STI), "fsubp" },
-/*6*/  { "fidiv",  LONG,  op2(ST,STI), "fdivrp" },
-/*7*/  { "fidivr", LONG,  op2(ST,STI), "fdivp" },
-};
-
-struct finst db_Escf[] = {
-/*0*/  { "fild",   LONG,  0,           0 },
-/*1*/  { "",       LONG,  0,           0 },
-/*2*/  { "fist",   LONG,  0,           0 },
-/*3*/  { "fistp",  LONG,  0,           0 },
-/*4*/  { "fbld",   NONE,  op1(XA),     (char *)db_Escf4 },
-/*5*/  { "fld",    QUAD,  0,           0 },
-/*6*/  { "fbstp",  NONE,  0,           0 },
-/*7*/  { "fstp",   QUAD,  0,           0 },
-};
-
-struct finst *db_Esc_inst[] = {
-       db_Esc8, db_Esc9, db_Esca, db_Escb,
-       db_Escc, db_Escd, db_Esce, db_Escf
-};
-
-char * db_Grp1[] = {
-       "add",
-       "or",
-       "adc",
-       "sbb",
-       "and",
-       "sub",
-       "xor",
-       "cmp"
-};
-
-char * db_Grp2[] = {
-       "rol",
-       "ror",
-       "rcl",
-       "rcr",
-       "shl",
-       "shr",
-       "shl",
-       "sar"
-};
-
-struct inst db_Grp3[] = {
-       { "test",  TRUE, NONE, op2(I,E), 0 },
-       { "test",  TRUE, NONE, op2(I,E), 0 },
-       { "not",   TRUE, NONE, op1(E),   0 },
-       { "neg",   TRUE, NONE, op1(E),   0 },
-       { "mul",   TRUE, NONE, op2(E,A), 0 },
-       { "imul",  TRUE, NONE, op2(E,A), 0 },
-       { "div",   TRUE, NONE, op2(E,A), 0 },
-       { "idiv",  TRUE, NONE, op2(E,A), 0 },
-};
-
-struct inst    db_Grp4[] = {
-       { "inc",   TRUE, BYTE, op1(E),   0 },
-       { "dec",   TRUE, BYTE, op1(E),   0 },
-       { "",      TRUE, NONE, 0,        0 },
-       { "",      TRUE, NONE, 0,        0 },
-       { "",      TRUE, NONE, 0,        0 },
-       { "",      TRUE, NONE, 0,        0 },
-       { "",      TRUE, NONE, 0,        0 },
-       { "",      TRUE, NONE, 0,        0 }
-};
-
-struct inst    db_Grp5[] = {
-       { "inc",   TRUE, LONG, op1(E),   0 },
-       { "dec",   TRUE, LONG, op1(E),   0 },
-       { "call",  TRUE, NONE, op1(Eind),0 },
-       { "lcall", TRUE, NONE, op1(Eind),0 },
-       { "jmp",   TRUE, NONE, op1(Eind),0 },
-       { "ljmp",  TRUE, NONE, op1(Eind),0 },
-       { "push",  TRUE, LONG, op1(E),   0 },
-       { "",      TRUE, NONE, 0,        0 }
-};
-
-struct inst db_inst_table[256] = {
-/*00*/ { "add",   TRUE,  BYTE,  op2(R, E),  0 },
-/*01*/ { "add",   TRUE,  LONG,  op2(R, E),  0 },
-/*02*/ { "add",   TRUE,  BYTE,  op2(E, R),  0 },
-/*03*/ { "add",   TRUE,  LONG,  op2(E, R),  0 },
-/*04*/ { "add",   FALSE, BYTE,  op2(Is, A), 0 },
-/*05*/ { "add",   FALSE, LONG,  op2(Is, A), 0 },
-/*06*/ { "push",  FALSE, NONE,  op1(Si),    0 },
-/*07*/ { "pop",   FALSE, NONE,  op1(Si),    0 },
-
-/*08*/ { "or",    TRUE,  BYTE,  op2(R, E),  0 },
-/*09*/ { "or",    TRUE,  LONG,  op2(R, E),  0 },
-/*0a*/ { "or",    TRUE,  BYTE,  op2(E, R),  0 },
-/*0b*/ { "or",    TRUE,  LONG,  op2(E, R),  0 },
-/*0c*/ { "or",    FALSE, BYTE,  op2(I, A),  0 },
-/*0d*/ { "or",    FALSE, LONG,  op2(I, A),  0 },
-/*0e*/ { "push",  FALSE, NONE,  op1(Si),    0 },
-/*0f*/ { "",      FALSE, NONE,  0,          0 },
-
-/*10*/ { "adc",   TRUE,  BYTE,  op2(R, E),  0 },
-/*11*/ { "adc",   TRUE,  LONG,  op2(R, E),  0 },
-/*12*/ { "adc",   TRUE,  BYTE,  op2(E, R),  0 },
-/*13*/ { "adc",   TRUE,  LONG,  op2(E, R),  0 },
-/*14*/ { "adc",   FALSE, BYTE,  op2(Is, A), 0 },
-/*15*/ { "adc",   FALSE, LONG,  op2(Is, A), 0 },
-/*16*/ { "push",  FALSE, NONE,  op1(Si),    0 },
-/*17*/ { "pop",   FALSE, NONE,  op1(Si),    0 },
-
-/*18*/ { "sbb",   TRUE,  BYTE,  op2(R, E),  0 },
-/*19*/ { "sbb",   TRUE,  LONG,  op2(R, E),  0 },
-/*1a*/ { "sbb",   TRUE,  BYTE,  op2(E, R),  0 },
-/*1b*/ { "sbb",   TRUE,  LONG,  op2(E, R),  0 },
-/*1c*/ { "sbb",   FALSE, BYTE,  op2(Is, A), 0 },
-/*1d*/ { "sbb",   FALSE, LONG,  op2(Is, A), 0 },
-/*1e*/ { "push",  FALSE, NONE,  op1(Si),    0 },
-/*1f*/ { "pop",   FALSE, NONE,  op1(Si),    0 },
-
-/*20*/ { "and",   TRUE,  BYTE,  op2(R, E),  0 },
-/*21*/ { "and",   TRUE,  LONG,  op2(R, E),  0 },
-/*22*/ { "and",   TRUE,  BYTE,  op2(E, R),  0 },
-/*23*/ { "and",   TRUE,  LONG,  op2(E, R),  0 },
-/*24*/ { "and",   FALSE, BYTE,  op2(I, A),  0 },
-/*25*/ { "and",   FALSE, LONG,  op2(I, A),  0 },
-/*26*/ { "",      FALSE, NONE,  0,          0 },
-/*27*/ { "aaa",   FALSE, NONE,  0,          0 },
-
-/*28*/ { "sub",   TRUE,  BYTE,  op2(R, E),  0 },
-/*29*/ { "sub",   TRUE,  LONG,  op2(R, E),  0 },
-/*2a*/ { "sub",   TRUE,  BYTE,  op2(E, R),  0 },
-/*2b*/ { "sub",   TRUE,  LONG,  op2(E, R),  0 },
-/*2c*/ { "sub",   FALSE, BYTE,  op2(Is, A), 0 },
-/*2d*/ { "sub",   FALSE, LONG,  op2(Is, A), 0 },
-/*2e*/ { "",      FALSE, NONE,  0,          0 },
-/*2f*/ { "das",   FALSE, NONE,  0,          0 },
-
-/*30*/ { "xor",   TRUE,  BYTE,  op2(R, E),  0 },
-/*31*/ { "xor",   TRUE,  LONG,  op2(R, E),  0 },
-/*32*/ { "xor",   TRUE,  BYTE,  op2(E, R),  0 },
-/*33*/ { "xor",   TRUE,  LONG,  op2(E, R),  0 },
-/*34*/ { "xor",   FALSE, BYTE,  op2(I, A),  0 },
-/*35*/ { "xor",   FALSE, LONG,  op2(I, A),  0 },
-/*36*/ { "",      FALSE, NONE,  0,          0 },
-/*37*/ { "daa",   FALSE, NONE,  0,          0 },
-
-/*38*/ { "cmp",   TRUE,  BYTE,  op2(R, E),  0 },
-/*39*/ { "cmp",   TRUE,  LONG,  op2(R, E),  0 },
-/*3a*/ { "cmp",   TRUE,  BYTE,  op2(E, R),  0 },
-/*3b*/ { "cmp",   TRUE,  LONG,  op2(E, R),  0 },
-/*3c*/ { "cmp",   FALSE, BYTE,  op2(Is, A), 0 },
-/*3d*/ { "cmp",   FALSE, LONG,  op2(Is, A), 0 },
-/*3e*/ { "",      FALSE, NONE,  0,          0 },
-/*3f*/ { "aas",   FALSE, NONE,  0,          0 },
-
-/*40*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-/*41*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-/*42*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-/*43*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-/*44*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-/*45*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-/*46*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-/*47*/ { "inc",   FALSE, LONG,  op1(Ri),    0 },
-
-/*48*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-/*49*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-/*4a*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-/*4b*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-/*4c*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-/*4d*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-/*4e*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-/*4f*/ { "dec",   FALSE, LONG,  op1(Ri),    0 },
-
-/*50*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-/*51*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-/*52*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-/*53*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-/*54*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-/*55*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-/*56*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-/*57*/ { "push",  FALSE, LONG,  op1(Ri),    0 },
-
-/*58*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-/*59*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-/*5a*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-/*5b*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-/*5c*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-/*5d*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-/*5e*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-/*5f*/ { "pop",   FALSE, LONG,  op1(Ri),    0 },
-
-/*60*/ { "pusha", FALSE, LONG,  0,          0 },
-/*61*/ { "popa",  FALSE, LONG,  0,          0 },
-/*62*/  { "bound", TRUE,  LONG,  op2(E, R),  0 },
-/*63*/ { "arpl",  TRUE,  NONE,  op2(Ew,Rw), 0 },
-
-/*64*/ { "",      FALSE, NONE,  0,          0 },
-/*65*/ { "",      FALSE, NONE,  0,          0 },
-/*66*/ { "",      FALSE, NONE,  0,          0 },
-/*67*/ { "",      FALSE, NONE,  0,          0 },
-
-/*68*/ { "push",  FALSE, LONG,  op1(I),     0 },
-/*69*/  { "imul",  TRUE,  LONG,  op3(I,E,R), 0 },
-/*6a*/ { "push",  FALSE, LONG,  op1(Ib),    0 },
-/*6b*/  { "imul",  TRUE,  LONG,  op3(Ibs,E,R),0 },
-/*6c*/ { "ins",   FALSE, BYTE,  op2(DX, DI), 0 },
-/*6d*/ { "ins",   FALSE, LONG,  op2(DX, DI), 0 },
-/*6e*/ { "outs",  FALSE, BYTE,  op2(SI, DX), 0 },
-/*6f*/ { "outs",  FALSE, LONG,  op2(SI, DX), 0 },
-
-/*70*/ { "jo",    FALSE, NONE,  op1(Db),     0 },
-/*71*/ { "jno",   FALSE, NONE,  op1(Db),     0 },
-/*72*/ { "jb",    FALSE, NONE,  op1(Db),     0 },
-/*73*/ { "jnb",   FALSE, NONE,  op1(Db),     0 },
-/*74*/ { "jz",    FALSE, NONE,  op1(Db),     0 },
-/*75*/ { "jnz",   FALSE, NONE,  op1(Db),     0 },
-/*76*/ { "jbe",   FALSE, NONE,  op1(Db),     0 },
-/*77*/ { "jnbe",  FALSE, NONE,  op1(Db),     0 },
-
-/*78*/ { "js",    FALSE, NONE,  op1(Db),     0 },
-/*79*/ { "jns",   FALSE, NONE,  op1(Db),     0 },
-/*7a*/ { "jp",    FALSE, NONE,  op1(Db),     0 },
-/*7b*/ { "jnp",   FALSE, NONE,  op1(Db),     0 },
-/*7c*/ { "jl",    FALSE, NONE,  op1(Db),     0 },
-/*7d*/ { "jnl",   FALSE, NONE,  op1(Db),     0 },
-/*7e*/ { "jle",   FALSE, NONE,  op1(Db),     0 },
-/*7f*/ { "jnle",  FALSE, NONE,  op1(Db),     0 },
-
-/*80*/  { "",     TRUE,  BYTE,  op2(I, E),   (char *)db_Grp1 },
-/*81*/  { "",     TRUE,  LONG,  op2(I, E),   (char *)db_Grp1 },
-/*82*/  { "",     TRUE,  BYTE,  op2(Is,E),   (char *)db_Grp1 },
-/*83*/  { "",     TRUE,  LONG,  op2(Ibs,E),  (char *)db_Grp1 },
-/*84*/ { "test",  TRUE,  BYTE,  op2(R, E),   0 },
-/*85*/ { "test",  TRUE,  LONG,  op2(R, E),   0 },
-/*86*/ { "xchg",  TRUE,  BYTE,  op2(R, E),   0 },
-/*87*/ { "xchg",  TRUE,  LONG,  op2(R, E),   0 },
-
-/*88*/ { "mov",   TRUE,  BYTE,  op2(R, E),   0 },
-/*89*/ { "mov",   TRUE,  LONG,  op2(R, E),   0 },
-/*8a*/ { "mov",   TRUE,  BYTE,  op2(E, R),   0 },
-/*8b*/ { "mov",   TRUE,  LONG,  op2(E, R),   0 },
-/*8c*/  { "mov",   TRUE,  NONE,  op2(S, Ew),  0 },
-/*8d*/ { "lea",   TRUE,  LONG,  op2(E, R),   0 },
-/*8e*/ { "mov",   TRUE,  NONE,  op2(Ew, S),  0 },
-/*8f*/ { "pop",   TRUE,  LONG,  op1(E),      0 },
-
-/*90*/ { "nop",   FALSE, NONE,  0,           0 },
-/*91*/ { "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
-/*92*/ { "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
-/*93*/ { "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
-/*94*/ { "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
-/*95*/ { "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
-/*96*/ { "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
-/*97*/ { "xchg",  FALSE, LONG,  op2(A, Ri),  0 },
-
-/*98*/ { "cbw",   FALSE, SDEP,  0,           "cwde" }, /* cbw/cwde */
-/*99*/ { "cwd",   FALSE, SDEP,  0,           "cdq"  }, /* cwd/cdq */
-/*9a*/ { "lcall", FALSE, NONE,  op1(OS),     0 },
-/*9b*/ { "wait",  FALSE, NONE,  0,           0 },
-/*9c*/ { "pushf", FALSE, LONG,  0,           0 },
-/*9d*/ { "popf",  FALSE, LONG,  0,           0 },
-/*9e*/ { "sahf",  FALSE, NONE,  0,           0 },
-/*9f*/ { "lahf",  FALSE, NONE,  0,           0 },
-
-/*a0*/ { "mov",   FALSE, BYTE,  op2(O, A),   0 },
-/*a1*/ { "mov",   FALSE, LONG,  op2(O, A),   0 },
-/*a2*/ { "mov",   FALSE, BYTE,  op2(A, O),   0 },
-/*a3*/ { "mov",   FALSE, LONG,  op2(A, O),   0 },
-/*a4*/ { "movs",  FALSE, BYTE,  op2(SI,DI),  0 },
-/*a5*/ { "movs",  FALSE, LONG,  op2(SI,DI),  0 },
-/*a6*/ { "cmps",  FALSE, BYTE,  op2(SI,DI),  0 },
-/*a7*/ { "cmps",  FALSE, LONG,  op2(SI,DI),  0 },
-
-/*a8*/ { "test",  FALSE, BYTE,  op2(I, A),   0 },
-/*a9*/ { "test",  FALSE, LONG,  op2(I, A),   0 },
-/*aa*/ { "stos",  FALSE, BYTE,  op1(DI),     0 },
-/*ab*/ { "stos",  FALSE, LONG,  op1(DI),     0 },
-/*ac*/ { "lods",  FALSE, BYTE,  op1(SI),     0 },
-/*ad*/ { "lods",  FALSE, LONG,  op1(SI),     0 },
-/*ae*/ { "scas",  FALSE, BYTE,  op1(SI),     0 },
-/*af*/ { "scas",  FALSE, LONG,  op1(SI),     0 },
-
-/*b0*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-/*b1*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-/*b2*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-/*b3*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-/*b4*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-/*b5*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-/*b6*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-/*b7*/ { "mov",   FALSE, BYTE,  op2(I, Ri),  0 },
-
-/*b8*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-/*b9*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-/*ba*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-/*bb*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-/*bc*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-/*bd*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-/*be*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-/*bf*/ { "mov",   FALSE, LONG,  op2(I, Ri),  0 },
-
-/*c0*/ { "",      TRUE,  BYTE,  op2(Ib, E),  (char *)db_Grp2 },
-/*c1*/ { "",      TRUE,  LONG,  op2(Ib, E),  (char *)db_Grp2 },
-/*c2*/ { "ret",   FALSE, NONE,  op1(Iw),     0 },
-/*c3*/ { "ret",   FALSE, NONE,  0,           0 },
-/*c4*/ { "les",   TRUE,  LONG,  op2(E, R),   0 },
-/*c5*/ { "lds",   TRUE,  LONG,  op2(E, R),   0 },
-/*c6*/ { "mov",   TRUE,  BYTE,  op2(I, E),   0 },
-/*c7*/ { "mov",   TRUE,  LONG,  op2(I, E),   0 },
-
-/*c8*/ { "enter", FALSE, NONE,  op2(Ib, Iw), 0 },
-/*c9*/ { "leave", FALSE, NONE,  0,           0 },
-/*ca*/ { "lret",  FALSE, NONE,  op1(Iw),     0 },
-/*cb*/ { "lret",  FALSE, NONE,  0,           0 },
-/*cc*/ { "int",   FALSE, NONE,  op1(o3),     0 },
-/*cd*/ { "int",   FALSE, NONE,  op1(Ib),     0 },
-/*ce*/ { "into",  FALSE, NONE,  0,           0 },
-/*cf*/ { "iret",  FALSE, NONE,  0,           0 },
-
-/*d0*/ { "",      TRUE,  BYTE,  op2(o1, E),  (char *)db_Grp2 },
-/*d1*/ { "",      TRUE,  LONG,  op2(o1, E),  (char *)db_Grp2 },
-/*d2*/ { "",      TRUE,  BYTE,  op2(CL, E),  (char *)db_Grp2 },
-/*d3*/ { "",      TRUE,  LONG,  op2(CL, E),  (char *)db_Grp2 },
-/*d4*/ { "aam",   TRUE,  NONE,  0,           0 },
-/*d5*/ { "aad",   TRUE,  NONE,  0,           0 },
-/*d6*/ { "",      FALSE, NONE,  0,           0 },
-/*d7*/ { "xlat",  FALSE, BYTE,  op1(BX),     0 },
-
-/*d8*/  { "",      TRUE,  NONE,  0,          (char *)db_Esc8 },
-/*d9*/  { "",      TRUE,  NONE,  0,          (char *)db_Esc9 },
-/*da*/  { "",      TRUE,  NONE,  0,          (char *)db_Esca },
-/*db*/  { "",      TRUE,  NONE,  0,          (char *)db_Escb },
-/*dc*/  { "",      TRUE,  NONE,  0,          (char *)db_Escc },
-/*dd*/  { "",      TRUE,  NONE,  0,          (char *)db_Escd },
-/*de*/  { "",      TRUE,  NONE,  0,          (char *)db_Esce },
-/*df*/  { "",      TRUE,  NONE,  0,          (char *)db_Escf },
-
-/*e0*/ { "loopne",FALSE, NONE,  op1(Db),     0 },
-/*e1*/ { "loope", FALSE, NONE,  op1(Db),     0 },
-/*e2*/ { "loop",  FALSE, NONE,  op1(Db),     0 },
-/*e3*/ { "jcxz",  FALSE, SDEP,  op1(Db),     "jecxz" },
-/*e4*/ { "in",    FALSE, BYTE,  op2(Ib, A),  0 },
-/*e5*/ { "in",    FALSE, LONG,  op2(Ib, A) , 0 },
-/*e6*/ { "out",   FALSE, BYTE,  op2(A, Ib),  0 },
-/*e7*/ { "out",   FALSE, LONG,  op2(A, Ib) , 0 },
-
-/*e8*/ { "call",  FALSE, NONE,  op1(Dl),     0 },
-/*e9*/ { "jmp",   FALSE, NONE,  op1(Dl),     0 },
-/*ea*/ { "ljmp",  FALSE, NONE,  op1(OS),     0 },
-/*eb*/ { "jmp",   FALSE, NONE,  op1(Db),     0 },
-/*ec*/ { "in",    FALSE, BYTE,  op2(DX, A),  0 },
-/*ed*/ { "in",    FALSE, LONG,  op2(DX, A) , 0 },
-/*ee*/ { "out",   FALSE, BYTE,  op2(A, DX),  0 },
-/*ef*/ { "out",   FALSE, LONG,  op2(A, DX) , 0 },
-
-/*f0*/ { "",      FALSE, NONE,  0,          0 },
-/*f1*/ { "",      FALSE, NONE,  0,          0 },
-/*f2*/ { "",      FALSE, NONE,  0,          0 },
-/*f3*/ { "",      FALSE, NONE,  0,          0 },
-/*f4*/ { "hlt",   FALSE, NONE,  0,          0 },
-/*f5*/ { "cmc",   FALSE, NONE,  0,          0 },
-/*f6*/ { "",      TRUE,  BYTE,  0,          (char *)db_Grp3 },
-/*f7*/ { "",      TRUE,  LONG,  0,          (char *)db_Grp3 },
-
-/*f8*/ { "clc",   FALSE, NONE,  0,          0 },
-/*f9*/ { "stc",   FALSE, NONE,  0,          0 },
-/*fa*/ { "cli",   FALSE, NONE,  0,          0 },
-/*fb*/ { "sti",   FALSE, NONE,  0,          0 },
-/*fc*/ { "cld",   FALSE, NONE,  0,          0 },
-/*fd*/ { "std",   FALSE, NONE,  0,          0 },
-/*fe*/ { "",      TRUE,  NONE,  0,          (char *)db_Grp4 },
-/*ff*/ { "",      TRUE,  NONE,  0,          (char *)db_Grp5 },
-};
-
-struct inst    db_bad_inst =
-       { "???",   FALSE, NONE,  0,           0 }
-;
-
-#define        f_mod(byte)     ((byte)>>6)
-#define        f_reg(byte)     (((byte)>>3)&0x7)
-#define        f_rm(byte)      ((byte)&0x7)
-
-#define        sib_ss(byte)    ((byte)>>6)
-#define        sib_index(byte) (((byte)>>3)&0x7)
-#define        sib_base(byte)  ((byte)&0x7)
-
-char * db_index_reg_16[8] = {
-       "%bx,%si",
-       "%bx,%di",
-       "%bp,%si",
-       "%bp,%di",
-       "%si",
-       "%di",
-       "%bp",
-       "%bx"
-};
-
-char * db_reg[3][8] = {
-       "%al",  "%cl",  "%dl",  "%bl",  "%ah",  "%ch",  "%dh",  "%bh",
-       "%ax",  "%cx",  "%dx",  "%bx",  "%sp",  "%bp",  "%si",  "%di",
-       "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi"
-};
-
-char * db_seg_reg[8] = {
-       "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", ""
-};
-
-/*
- * lengths for size attributes
- */
-int db_lengths[] = {
-       1,      /* BYTE */
-       2,      /* WORD */
-       4,      /* LONG */
-       8,      /* QUAD */
-       4,      /* SNGL */
-       8,      /* DBLR */
-       10,     /* EXTR */
-};
-
-#define        get_value_inc(result, loc, size, is_signed, task) \
-       result = db_get_task_value((loc), (size), (is_signed), (task)); \
-       (loc) += (size);
-
-/*
- * Read address at location and return updated location.
- */
-db_addr_t
-db_read_address(
-       db_addr_t       loc,
-       int             short_addr,
-       int             regmodrm,
-       struct i_addr   *addrp,         /* out */
-       task_t          task)
-{
-       int             mod, rm, sib, index, ss, disp;
-
-       mod = f_mod(regmodrm);
-       rm  = f_rm(regmodrm);
-
-       if (mod == 3) {
-           addrp->is_reg = TRUE;
-           addrp->disp = rm;
-           return (loc);
-       }
-       addrp->is_reg = FALSE;
-       addrp->index = 0;
-
-       if (short_addr) {
-           addrp->index = 0;
-           addrp->ss = 0;
-           switch (mod) {
-               case 0:
-                   if (rm == 6) {
-                       get_value_inc(disp, loc, 2, TRUE, task);
-                       addrp->disp = disp;
-                       addrp->base = 0;
-                   }
-                   else {
-                       addrp->disp = 0;
-                       addrp->base = db_index_reg_16[rm];
-                   }
-                   break;
-               case 1:
-                   get_value_inc(disp, loc, 1, TRUE, task);
-                   addrp->disp = disp;
-                   addrp->base = db_index_reg_16[rm];
-                   break;
-               case 2:
-                   get_value_inc(disp, loc, 2, TRUE, task);
-                   addrp->disp = disp;
-                   addrp->base = db_index_reg_16[rm];
-                   break;
-           }
-       }
-       else {
-           if (mod != 3 && rm == 4) {
-               get_value_inc(sib, loc, 1, FALSE, task);
-               rm = sib_base(sib);
-               index = sib_index(sib);
-               if (index != 4)
-                   addrp->index = db_reg[LONG][index];
-               addrp->ss = sib_ss(sib);
-           }
-
-           switch (mod) {
-               case 0:
-                   if (rm == 5) {
-                       get_value_inc(addrp->disp, loc, 4, FALSE, task);
-                       addrp->base = 0;
-                   }
-                   else {
-                       addrp->disp = 0;
-                       addrp->base = db_reg[LONG][rm];
-                   }
-                   break;
-
-               case 1:
-                   get_value_inc(disp, loc, 1, TRUE, task);
-                   addrp->disp = disp;
-                   addrp->base = db_reg[LONG][rm];
-                   break;
-
-               case 2:
-                   get_value_inc(disp, loc, 4, FALSE, task);
-                   addrp->disp = disp;
-                   addrp->base = db_reg[LONG][rm];
-                   break;
-           }
-       }
-       return (loc);
-}
-
-void
-db_print_address(
-       char *          seg,
-       int             size,
-       struct i_addr   *addrp,
-       task_t          task)
-{
-       if (addrp->is_reg) {
-           db_printf("%s", db_reg[size][addrp->disp]);
-           return;
-       }
-
-       if (seg) {
-           db_printf("%s:", seg);
-       }
-
-       if (addrp->base != 0 || addrp->index != 0) {
-           db_printf("%#n", addrp->disp);
-           db_printf("(");
-           if (addrp->base)
-               db_printf("%s", addrp->base);
-           if (addrp->index)
-               db_printf(",%s,%d", addrp->index, 1<<addrp->ss);
-           db_printf(")");
-       } else
-           db_task_printsym((db_addr_t)addrp->disp, DB_STGY_ANY, task);
-}
-
-/*
- * Disassemble floating-point ("escape") instruction
- * and return updated location.
- */
-db_addr_t
-db_disasm_esc(
-       db_addr_t       loc,
-       int             inst,
-       int             short_addr,
-       int             size,
-       char *          seg,
-       task_t          task)
-{
-       int             regmodrm;
-       struct finst    *fp;
-       int             mod;
-       struct i_addr   address;
-       char *          name;
-
-       get_value_inc(regmodrm, loc, 1, FALSE, task);
-       fp = &db_Esc_inst[inst - 0xd8][f_reg(regmodrm)];
-       mod = f_mod(regmodrm);
-       if (mod != 3) {
-           /*
-            * Normal address modes.
-            */
-           loc = db_read_address(loc, short_addr, regmodrm, &address, task);
-           db_printf(fp->f_name);
-           switch(fp->f_size) {
-               case SNGL:
-                   db_printf("s");
-                   break;
-               case DBLR:
-                   db_printf("l");
-                   break;
-               case EXTR:
-                   db_printf("t");
-                   break;
-               case WORD:
-                   db_printf("s");
-                   break;
-               case LONG:
-                   db_printf("l");
-                   break;
-               case QUAD:
-                   db_printf("q");
-                   break;
-               default:
-                   break;
-           }
-           db_printf("\t");
-           db_print_address(seg, BYTE, &address, task);
-       }
-       else {
-           /*
-            * 'reg-reg' - special formats
-            */
-           switch (fp->f_rrmode) {
-               case op2(ST,STI):
-                   name = (fp->f_rrname) ? fp->f_rrname : fp->f_name;
-                   db_printf("%s\t%%st,%%st(%d)",name,f_rm(regmodrm));
-                   break;
-               case op2(STI,ST):
-                   name = (fp->f_rrname) ? fp->f_rrname : fp->f_name;
-                   db_printf("%s\t%%st(%d),%%st",name, f_rm(regmodrm));
-                   break;
-               case op1(STI):
-                   name = (fp->f_rrname) ? fp->f_rrname : fp->f_name;
-                   db_printf("%s\t%%st(%d)",name, f_rm(regmodrm));
-                   break;
-               case op1(X):
-                   db_printf("%s", ((char **)fp->f_rrname)[f_rm(regmodrm)]);
-                   break;
-               case op1(XA):
-                   db_printf("%s\t%%ax",
-                                ((char **)fp->f_rrname)[f_rm(regmodrm)]);
-                   break;
-               default:
-                   db_printf("<bad instruction>");
-                   break;
-           }
-       }
-
-       return (loc);
-}
-
-/*
- * Disassemble instruction at 'loc'.  'altfmt' specifies an
- * (optional) alternate format.  Return address of start of
- * next instruction.
- */
-db_addr_t
-db_disasm(
-       db_addr_t       loc,
-       boolean_t       altfmt,
-       task_t          task)
-{
-       int     inst;
-       int     size;
-       int     short_addr;
-       char *  seg;
-       struct inst *   ip;
-       char *  i_name;
-       int     i_size;
-       int     i_mode;
-       int     regmodrm;
-       boolean_t       first;
-       int     displ;
-       int     prefix;
-       int     imm;
-       int     imm2;
-       int     len;
-       struct i_addr   address;
-       char    *filename;
-       int     linenum;
-
-       get_value_inc(inst, loc, 1, FALSE, task);
-       if (db_disasm_16) {
-           short_addr = TRUE;
-           size = WORD;
-       }
-       else {
-           short_addr = FALSE;
-           size = LONG;
-       }
-       seg = 0;
-
-       /*
-        * Get prefixes
-        */
-       prefix = TRUE;
-       do {
-           switch (inst) {
-               case 0x66:              /* data16 */
-                   if (size == LONG)
-                       size = WORD;
-                   else
-                       size = LONG;
-                   break;
-               case 0x67:
-                   short_addr = !short_addr;
-                   break;
-               case 0x26:
-                   seg = "%es";
-                   break;
-               case 0x36:
-                   seg = "%ss";
-                   break;
-               case 0x2e:
-                   seg = "%cs";
-                   break;
-               case 0x3e:
-                   seg = "%ds";
-                   break;
-               case 0x64:
-                   seg = "%fs";
-                   break;
-               case 0x65:
-                   seg = "%gs";
-                   break;
-               case 0xf0:
-                   db_printf("lock ");
-                   break;
-               case 0xf2:
-                   db_printf("repne ");
-                   break;
-               case 0xf3:
-                   db_printf("repe "); /* XXX repe VS rep */
-                   break;
-               default:
-                   prefix = FALSE;
-                   break;
-           }
-           if (prefix) {
-               get_value_inc(inst, loc, 1, FALSE, task);
-           }
-       } while (prefix);
-
-       if (inst >= 0xd8 && inst <= 0xdf) {
-           loc = db_disasm_esc(loc, inst, short_addr, size, seg, task);
-           db_printf("\n");
-           return (loc);
-       }
-
-       if (inst == 0x0f) {
-           get_value_inc(inst, loc, 1, FALSE, task);
-           ip = db_inst_0f[inst>>4];
-           if (ip == 0) {
-               ip = &db_bad_inst;
-           }
-           else {
-               ip = &ip[inst&0xf];
-           }
-       }
-       else
-           ip = &db_inst_table[inst];
-
-       if (ip->i_has_modrm) {
-           get_value_inc(regmodrm, loc, 1, FALSE, task);
-           loc = db_read_address(loc, short_addr, regmodrm, &address, task);
-       }
-
-       i_name = ip->i_name;
-       i_size = ip->i_size;
-       i_mode = ip->i_mode;
-
-       if (ip->i_extra == (char *)db_Grp1 ||
-           ip->i_extra == (char *)db_Grp2 ||
-           ip->i_extra == (char *)db_Grp6 ||
-           ip->i_extra == (char *)db_Grp7 ||
-           ip->i_extra == (char *)db_Grp8) {
-           i_name = ((char **)ip->i_extra)[f_reg(regmodrm)];
-       }
-       else if (ip->i_extra == (char *)db_Grp3) {
-           ip = (struct inst *)ip->i_extra;
-           ip = &ip[f_reg(regmodrm)];
-           i_name = ip->i_name;
-           i_mode = ip->i_mode;
-       }
-       else if (ip->i_extra == (char *)db_Grp4 ||
-                ip->i_extra == (char *)db_Grp5) {
-           ip = (struct inst *)ip->i_extra;
-           ip = &ip[f_reg(regmodrm)];
-           i_name = ip->i_name;
-           i_mode = ip->i_mode;
-           i_size = ip->i_size;
-       }
-
-       if (i_size == SDEP) {
-           if (size == WORD)
-               db_printf(i_name);
-           else
-               db_printf(ip->i_extra);
-       }
-       else {
-           db_printf(i_name);
-           if (i_size != NONE) {
-               if (i_size == BYTE) {
-                   db_printf("b");
-                   size = BYTE;
-               }
-               else if (i_size == WORD) {
-                   db_printf("w");
-                   size = WORD;
-               }
-               else if (size == WORD)
-                   db_printf("w");
-               else
-                   db_printf("l");
-           }
-       }
-       db_printf("\t");
-       for (first = TRUE;
-            i_mode != 0;
-            i_mode >>= 8, first = FALSE)
-       {
-           if (!first)
-               db_printf(",");
-
-           switch (i_mode & 0xFF) {
-
-               case E:
-                   db_print_address(seg, size, &address, task);
-                   break;
-
-               case Eind:
-                   db_printf("*");
-                   db_print_address(seg, size, &address, task);
-                   break;
-
-               case Ew:
-                   db_print_address(seg, WORD, &address, task);
-                   break;
-
-               case Eb:
-                   db_print_address(seg, BYTE, &address, task);
-                   break;
-
-               case R:
-                   db_printf("%s", db_reg[size][f_reg(regmodrm)]);
-                   break;
-
-               case Rw:
-                   db_printf("%s", db_reg[WORD][f_reg(regmodrm)]);
-                   break;
-
-               case Ri:
-                   db_printf("%s", db_reg[size][f_rm(inst)]);
-                   break;
-
-               case S:
-                   db_printf("%s", db_seg_reg[f_reg(regmodrm)]);
-                   break;
-
-               case Si:
-                   db_printf("%s", db_seg_reg[f_reg(inst)]);
-                   break;
-
-               case A:
-                   db_printf("%s", db_reg[size][0]);   /* acc */
-                   break;
-
-               case BX:
-                   if (seg)
-                       db_printf("%s:", seg);
-                   db_printf("(%s)", short_addr ? "%bx" : "%ebx");
-                   break;
-
-               case CL:
-                   db_printf("%%cl");
-                   break;
-
-               case DX:
-                   db_printf("%%dx");
-                   break;
-
-               case SI:
-                   if (seg)
-                       db_printf("%s:", seg);
-                   db_printf("(%s)", short_addr ? "%si" : "%esi");
-                   break;
-
-               case DI:
-                   db_printf("%%es:(%s)", short_addr ? "%di" : "%edi");
-                   break;
-
-               case CR:
-                   db_printf("%%cr%d", f_reg(regmodrm));
-                   break;
-
-               case DR:
-                   db_printf("%%dr%d", f_reg(regmodrm));
-                   break;
-
-               case TR:
-                   db_printf("%%tr%d", f_reg(regmodrm));
-                   break;
-
-               case I:
-                   len = db_lengths[size];
-                   get_value_inc(imm, loc, len, FALSE, task);/* unsigned */
-                   db_printf("$%#n", imm);
-                   break;
-
-               case Is:
-                   len = db_lengths[size];
-                   get_value_inc(imm, loc, len, TRUE, task);   /* signed */
-                   db_printf("$%#r", imm);
-                   break;
-
-               case Ib:
-                   get_value_inc(imm, loc, 1, FALSE, task);    /* unsigned */
-                   db_printf("$%#n", imm);
-                   break;
-
-               case Ibs:
-                   get_value_inc(imm, loc, 1, TRUE, task);     /* signed */
-                   db_printf("$%#r", imm);
-                   break;
-
-               case Iw:
-                   get_value_inc(imm, loc, 2, FALSE, task);    /* unsigned */
-                   db_printf("$%#n", imm);
-                   break;
-
-               case Il:
-                   get_value_inc(imm, loc, 4, FALSE, task);
-                   db_printf("$%#n", imm);
-                   break;
-
-               case O:
-                   if (short_addr) {
-                       get_value_inc(displ, loc, 2, TRUE, task);
-                   }
-                   else {
-                       get_value_inc(displ, loc, 4, TRUE, task);
-                   }
-                   if (seg)
-                       db_printf("%s:%#r",seg, displ);
-                   else
-                       db_task_printsym((db_addr_t)displ, DB_STGY_ANY, task);
-                   break;
-
-               case Db:
-                   get_value_inc(displ, loc, 1, TRUE, task);
-                   if (short_addr) {
-                       /* offset only affects low 16 bits */
-                       displ = (loc & 0xffff0000)
-                             | ((loc + displ) & 0xffff);
-                   }
-                   else
-                       displ = displ + loc;
-                   db_task_printsym((db_addr_t)displ,DB_STGY_ANY,task);
-                   if (db_line_at_pc(0, &filename, &linenum, displ)) {
-                       db_printf(" [%s", filename);
-                       if (linenum > 0)
-                           db_printf(":%d", linenum);
-                       db_printf("]");
-                   }
-                   break;
-
-               case Dl:
-                   if (short_addr) {
-                       get_value_inc(displ, loc, 2, TRUE, task);
-                       /* offset only affects low 16 bits */
-                       displ = (loc & 0xffff0000)
-                             | ((loc + displ) & 0xffff);
-                   }
-                   else {
-                       get_value_inc(displ, loc, 4, TRUE, task);
-                       displ = displ + loc;
-                   }
-                   db_task_printsym((db_addr_t)displ, DB_STGY_ANY, task);
-                   if (db_line_at_pc(0, &filename, &linenum, displ)) {
-                       db_printf(" [%s", filename);
-                       if (linenum > 0)
-                           db_printf(":%d", linenum);
-                       db_printf("]");
-                   }
-                   break;
-
-               case o1:
-                   db_printf("$1");
-                   break;
-
-               case o3:
-                   db_printf("$3");
-                   break;
-
-               case OS:
-                   if (short_addr) {
-                       get_value_inc(imm, loc, 2, FALSE, task); /* offset */
-                   }
-                   else {
-                       get_value_inc(imm, loc, 4, FALSE, task); /* offset */
-                   }
-                   get_value_inc(imm2, loc, 2, FALSE, task);   /* segment */
-                   db_printf("$%#n,%#n", imm2, imm);
-                   break;
-           }
-       }
-
-       if (altfmt == 0 && !db_disasm_16) {
-           if (inst == 0xe9 || inst == 0xeb) { /* jmp, Dl or Db */
-               /*
-                * GAS pads to longword boundary after unconditional jumps.
-                */
-               while (loc & (4-1)) {
-                   get_value_inc(inst, loc, 0, FALSE, task);
-                   if (inst != 0x90)   /* nop */
-                       break;
-                   loc++;
-               }
-           }
-       }
-       db_printf("\n");
-       return (loc);
-}
-
-/*
- * Classify instructions by whether they read or write memory.
- */
-
-#define        DBLS_LOAD       0x01    /* instruction reads from memory */
-#define        DBLS_STORE      0x02    /* instruction writes to memory */
-
-#define DBLS_MODRM     0x10    /* instruction uses mod r/m byte */
-#define        DBLS_SECOND     0x20    /* instruction does two operations */
-#define        DBLS_ESCAPE     0x40    /* escape to two-byte opcodes */
-#define DBLS_SWREG     0x80    /* need to switch on reg bits of mod r/m */
-
-#define DBLS_MODS      0xf0
-#define DBLS_LMASK     (DBLS_MODS|DBLS_LOAD)
-#define DBLS_SMASK     (DBLS_MODS|DBLS_STORE)
-
-char db_ldstrtab[] = {
-       0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x02, 0x01,
-               0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x02, 0x40,
-       0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x02, 0x01,
-               0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x02, 0x01,
-       0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00,
-               0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00,
-       0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00,
-               0x12, 0x12, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00,
-
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-       0x02, 0x01, 0x21, 0x13, 0x00, 0x00, 0x00, 0x00,
-               0x02, 0x11, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-       0x12, 0x12, 0x00, 0x12, 0x11, 0x11, 0x13, 0x13,
-               0x12, 0x12, 0x11, 0x11, 0x12, 0x00, 0x11, 0x03,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x02, 0x00, 0x02, 0x01, 0x00, 0x00,
-       0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x21, 0x21,
-               0x00, 0x00, 0x02, 0x02, 0x01, 0x01, 0x01, 0x01,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-       0x13, 0x13, 0x00, 0x00, 0x01, 0x01, 0x12, 0x12,
-               0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x13, 0x13, 0x13, 0x13, 0x00, 0x00, 0x00, 0x01,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x13,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x13,
-};
-
-unsigned char db_ldstrtab0f[] = {
-       0x80, 0x80, 0x11, 0x11, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12,
-               0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12,
-       0x02, 0x01, 0x00, 0x11, 0x13, 0x13, 0x00, 0x00,
-               0x02, 0x01, 0x12, 0x13, 0x13, 0x13, 0x00, 0x11,
-       0x00, 0x00, 0x01, 0x13, 0x01, 0x01, 0x11, 0x11,
-               0x00, 0x00, 0x80, 0x13, 0x13, 0x13, 0x11, 0x11,
-
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-
-int db_inst_swreg(boolean_t, unsigned long, unsigned char);
-
-/*
- * Given four bytes of instruction (stored as an int, not an
- * array of characters), compute if the instruction reads
- * memory.
- */
-int
-db_inst_load(
-       unsigned long insw)
-{
-       unsigned char insb, bits;
-
-       insb = insw & 0xff;
-       insw >>= 8;
-       bits = db_ldstrtab[insb];
-       if (!(bits & DBLS_LOAD))
-               return (0);
-       while (1) {
-               switch (bits & DBLS_MODS) {
-               case 0:
-                       return (1);     
-               case DBLS_MODRM:
-                       insb = insw & 0xff;
-                       return ((insb & 0xc0) != 0xc0);
-               case DBLS_SECOND|DBLS_MODRM:
-                       insb = insw & 0xff;
-                       return ((insb & 0xc0) != 0xc0 ? 2 : 0);
-               case DBLS_SECOND:
-                       return (2);
-               case DBLS_ESCAPE:
-                       insb = insw & 0xff;
-                       insw >>= 8;
-                       bits = db_ldstrtab0f[insb];
-                       break;
-               case DBLS_SWREG:
-                       return (db_inst_swreg(TRUE, insw, insb));
-               default:
-                       panic ("db_inst_load: unknown mod bits");
-               }
-       }
-}
-
-/*
- * Given four bytes of instruction (stored as an int, not an
- * array of characters), compute if the instruction writes
- * memory.
- */
-int
-db_inst_store(
-       unsigned long insw)
-{
-       unsigned char insb, bits;
-
-       insb = insw & 0xff;
-       insw >>= 8;
-       bits = db_ldstrtab[insb];
-       if (!(bits & DBLS_STORE))
-               return (0);
-       while (1) {
-               switch (bits & DBLS_MODS) {
-               case 0:
-                       return (1);     
-               case DBLS_MODRM:
-                       insb = insw & 0xff;
-                       return ((insb & 0xc0) != 0xc0);
-               case DBLS_SECOND|DBLS_MODRM:
-                       insb = insw & 0xff;
-                       return ((insb & 0xc0) != 0xc0 ? 2 : 0);
-               case DBLS_SECOND:
-                       return (2);
-               case DBLS_ESCAPE:
-                       insb = insw & 0xff;
-                       insw >>= 8;
-                       bits = db_ldstrtab0f[insb];
-                       break;
-               case DBLS_SWREG:
-                       return (db_inst_swreg(FALSE, insw, insb));
-               default:
-                       panic ("db_inst_store: unknown mod bits");
-               }
-       }
-}
-
-/*
- * Parse a mod r/m byte to see if extended opcode reads
- * or writes memory.
- */
-int
-db_inst_swreg(
-       boolean_t isload,
-       unsigned long insw,
-       unsigned char insb)
-{
-       unsigned char modrm = insw & 0xff;
-
-       switch (insb) {
-       case 0x00:
-               switch (modrm & 0x38) {
-               case 0x00:
-               case 0x08:
-               case 0x10:
-               case 0x18:
-                       return ((modrm & 0xc0) != 0xc0);
-               }
-               break;
-       case 0x01:
-               switch (modrm & 0x38) {
-               case 0x00:
-               case 0x08:
-               case 0x10:
-               case 0x18:
-                       return ((modrm & 0xc0) != 0xc0 ? 2 : 0);
-               case 0x20:
-               case 0x30:
-                       return ((modrm & 0xc0) != 0xc0);
-               }
-               break;
-       case 0xba:
-               if (isload)
-                       return ((modrm & 0xc0) != 0xc0);
-               switch (modrm & 0x38) {
-               case 0x28:
-               case 0x30:
-               case 0x38:
-                       return ((modrm & 0xc0) != 0xc0);
-               }
-               break;
-       }
-       return (0);
-}
diff --git a/osfmk/i386/db_gcc_aout.c b/osfmk/i386/db_gcc_aout.c
deleted file mode 100644 (file)
index 508146b..0000000
+++ /dev/null
@@ -1,687 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * COPYRIGHT NOTICE
- * 
- * Copyright (c) 1990, 1991, 1992, 1993 Open Software Foundation, Inc. 
- * 
- * Permission is hereby granted to use, copy, modify and freely distribute
- * the software in this file and its documentation for any purpose without
- * fee, provided that the above copyright notice appears in all copies and
- * that both the copyright notice and this permission notice appear in
- * supporting documentation.  Further, provided that the name of Open
- * Software Foundation, Inc. ("OSF") not be used in advertising or
- * publicity pertaining to distribution of the software without prior
- * written permission from OSF.  OSF makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:36  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:37  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.2.2.3  1994/01/28  17:23:00  chasb
- *     Expand Copyrights
- *     [1994/01/27  19:40:16  chasb]
- *
- * Revision 1.2.2.2  1993/06/09  02:27:36  gm
- *     Added to OSF/1 R1.3 from NMK15.0.
- *     [1993/06/02  21:04:03  jeffc]
- * 
- * Revision 1.2  1993/04/19  16:13:10  devrcs
- *     pick up file_io.h from bootstrap directory
- *     [1993/02/27  15:01:09  david]
- * 
- *     Added new arguments and a missing one to db_add_symbol_table
- *     [barbou@gr.osf.org]
- *     [92/12/03            bernadat]
- * 
- *     Added gcc symbol table handling based on db_aout.c (Revsion 2.4)
- *     [91/07/31            tak]
- * 
- * Revision 1.1  1992/09/30  02:02:23  robert
- *     Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.1  91/07/31  13:13:51  jeffreyh
- * Created.
- * 
- * 31-Jul-91  Jeffrey Heller (tak) at Open Software Foundation
- *     Added gcc symbol table handling based on db_aout.c (Revsion 2.4)
- *
- */
-/* CMU_ENDHIST */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- * Symbol table routines for a.out format files.
- */
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>                /* data types */
-#include <ddb/db_sym.h>
-
-#ifdef DB_GCC_AOUT
-
-#include <ddb/nlist.h>                 /* a.out symbol table */
-#include <i386/stab.h>
-
-/*
- * An a.out symbol table as loaded into the kernel debugger:
- *
- * symtab      -> size of symbol entries, in bytes
- * sp          -> first symbol entry
- *                ...
- * ep          -> last symbol entry + 1
- * strtab      == start of string table
- *                size of string table in bytes,
- *                including this word
- *             -> strings
- */
-
-/*
- * Find pointers to the start and end of the symbol entries,
- * given a pointer to the start of the symbol table.
- */
-#define        db_get_aout_symtab(symtab, sp, ep) \
-       (sp = (struct nlist *)((symtab) + 1), \
-        ep = (struct nlist *)((char *)sp + *(symtab)))
-
-X_db_sym_init(symtab, esymtab, name)
-       int *   symtab;         /* pointer to start of symbol table */
-       char *  esymtab;        /* pointer to end of string table,
-                                  for checking - rounded up to integer
-                                  boundary */
-       char *  name;
-{
-       register struct nlist   *sym_start, *sym_end;
-       register struct nlist   *sp;
-       register char * strtab;
-       register int    strlen;
-
-       db_get_aout_symtab(symtab, sym_start, sym_end);
-
-       strtab = (char *)sym_end;
-       strlen = *(int *)strtab;
-
-       if (strtab + ((strlen + sizeof(int) - 1) & ~(sizeof(int)-1))
-           != esymtab)
-       {
-           db_printf("[ %s symbol table not valid ]\n", name);
-           return;
-       }
-
-       db_printf("[ preserving %#x bytes of %s symbol table ]\n",
-               esymtab - (char *)symtab, name);
-
-       for (sp = sym_start; sp < sym_end; sp++) {
-           register int strx;
-           strx = sp->n_un.n_strx;
-           if (strx != 0) {
-               if (strx > strlen) {
-                   db_printf("Bad string table index (%#x)\n", strx);
-                   sp->n_un.n_name = 0;
-                   continue;
-               }
-               sp->n_un.n_name = strtab + strx;
-           }
-       }
-
-       db_add_symbol_table(sym_start, sym_end, name, (char *)symtab,
-                           0, 0, 0, FALSE);
-}
-
-/*
- * check file name or not (check xxxx.x pattern)
- */
-boolean_t
-X_db_is_filename(name)
-       register char *name;
-{
-       while (*name) {
-           if (*name == '.') {
-               if (name[1])
-                   return(TRUE);
-           }
-           name++;
-       }
-       return(FALSE);
-}
-
-/*
- * special name comparison routine with a name in the symbol table entry
- */
-boolean_t
-X_db_eq_name(sp, name)
-       struct nlist *sp;
-       char *name;
-{
-       register char *s1, *s2;
-
-       s1 = sp->n_un.n_name;
-       s2 = name;
-       if (*s1 == '_' && *s2 && *s2 != '_')
-           s1++;
-       while (*s2) {
-           if (*s1++ != *s2++) {
-               /*
-                * check .c .o file name comparison case
-                */
-               if (*s2 == 0 && sp->n_un.n_name <= s1 - 2 
-                       && s1[-2] == '.' && s1[-1] == 'o')
-                   return(TRUE);
-               return(FALSE);
-           }
-       }
-       /*
-        * do special check for
-        *     xxx:yyy for N_FUN
-        *     xxx.ttt for N_DATA and N_BSS
-        */
-       return(*s1 == 0 || (*s1 == ':' && sp->n_type == N_FUN) || 
-               (*s1 == '.' && (sp->n_type == N_DATA || sp->n_type == N_BSS)));
-}
-
-/*
- * search a symbol table with name and type
- *     fp(in,out): last found text file name symbol entry
- */
-struct nlist *
-X_db_search_name(sp, ep, name, type, fp)
-       register struct nlist *sp;
-       struct nlist    *ep;
-       char            *name;
-       int             type;
-       struct nlist    **fp;
-{
-       struct nlist    *file_sp = *fp;
-       struct nlist    *found_sp = 0;
-
-       for ( ; sp < ep; sp++) {
-           if (sp->n_type == N_TEXT && X_db_is_filename(sp->n_un.n_name))
-               *fp = sp;
-           if (type) {
-               if (sp->n_type == type) {
-                   if (X_db_eq_name(sp, name))
-                       return(sp);
-               }
-               if (sp->n_type == N_SO)
-                   *fp = sp;
-               continue;
-           }
-           if (sp->n_type & N_STAB)
-               continue;
-           if (sp->n_un.n_name && X_db_eq_name(sp, name)) {
-               /*
-                * In case of qaulified search by a file,
-                * return it immediately with some check.
-                * Otherwise, search external one
-                */
-               if (file_sp) {
-                   if ((file_sp == *fp) || (sp->n_type & N_EXT))
-                       return(sp);
-               } else if (sp->n_type & N_EXT)
-                   return(sp);
-               else
-                   found_sp = sp;
-           }
-       }
-       return(found_sp);
-}
-
-/*
- * search a symbol with file, func and line qualification
- */
-struct nlist *
-X_db_qualified_search(stab, file, sym, line)
-       db_symtab_t     *stab;
-       char            *file;
-       char            *sym;
-       int             line;
-{
-       register struct nlist *sp = (struct nlist *)stab->start;
-       struct nlist    *ep = (struct nlist *)stab->end;
-       struct nlist    *fp = 0;
-       struct nlist    *found_sp;
-       unsigned        func_top;
-       boolean_t       in_file;
-
-       if (file == 0 && sym == 0)
-           return(0);
-       if (file) {
-           if ((sp = X_db_search_name(sp, ep, file, N_TEXT, &fp)) == 0)
-               return(0);
-       }
-       if (sym) {
-           sp = X_db_search_name(sp, ep, sym, (line > 0)? N_FUN: 0, &fp);
-           if (sp == 0)
-               return(0);
-       }
-       if (line > 0) {
-           if (file && !X_db_eq_name(fp, file))
-               return(0);
-           found_sp = 0;
-           if (sp->n_type == N_FUN) {
-               /*
-                * qualfied by function name
-                *     search backward because line number entries
-                *     for the function are above it in this case.
-                */
-               func_top = sp->n_value;
-               for (sp--; sp >= (struct nlist *)stab->start; sp--) {
-                   if (sp->n_type != N_SLINE)
-                       continue;
-                   if (sp->n_value < func_top)
-                       break;
-                   if (sp->n_desc <= line) {
-                       if (found_sp == 0 || found_sp->n_desc < sp->n_desc)
-                           found_sp = sp;
-                       if (sp->n_desc == line)
-                           break;
-                   }
-               }
-               if (sp->n_type != N_SLINE || sp->n_value < func_top)
-                   return(0);
-           } else {
-               /*
-                * qualified by only file name
-                *    search forward in this case
-                */
-               in_file = TRUE;
-               for (sp++; sp < ep; sp++) {
-                   if (sp->n_type == N_TEXT 
-                       && X_db_is_filename(sp->n_un.n_name))
-                       break;          /* enter into another file */
-                   if (sp->n_type == N_SOL) {
-                       in_file = X_db_eq_name(sp, file);
-                       continue;
-                   }
-                   if (!in_file || sp->n_type != N_SLINE)
-                       continue;
-                   if (sp->n_desc <= line) {
-                       if (found_sp == 0 || found_sp->n_desc < sp->n_desc)
-                           found_sp = sp;
-                       if (sp->n_desc == line)
-                           break;
-                   }
-               }
-           }
-           sp = found_sp;
-       }
-       return(sp);
-}
-
-/*
- * lookup symbol by name
- */
-db_sym_t
-X_db_lookup(stab, symstr)
-       db_symtab_t     *stab;
-       char *          symstr;
-{
-       register        char *p;
-       register        n;
-       int             n_name;
-       int             line_number;
-       char            *file_name = 0;
-       char            *sym_name = 0;
-       char            *component[3];
-       struct nlist    *found = 0;
-
-       /*
-        * disassemble component:   [file_name:]symbol[:line_nubmer]
-        */
-       component[0] = symstr;
-       component[1] = component[2] = 0;
-       for (p = symstr, n = 1; *p; p++) {
-           if (*p == ':') {
-               if (n >= 3)
-                       break;
-               *p = 0;
-               component[n++] = p+1;
-           }
-       }
-       if (*p != 0)
-           goto out;
-       line_number = 0;
-       n_name = n;
-       p = component[n-1];
-       if (*p >= '0' && *p <= '9') {
-           if (n == 1)
-               goto out;
-           for (line_number = 0; *p; p++) {
-               if (*p < '0' || *p > '9')
-                   goto out;
-               line_number = line_number*10 + *p - '0';
-           }
-           n_name--;
-       } else if (n >= 3)
-           goto out;
-       if (n_name == 1) {
-           if (X_db_is_filename(component[0])) {
-               file_name = component[0];
-               sym_name = 0;
-           } else {
-               file_name = 0;
-               sym_name = component[0];
-           }
-       } else {
-           file_name = component[0];
-           sym_name = component[1];
-       }
-       found = X_db_qualified_search(stab, file_name, sym_name, line_number);
-       
-out:
-       while (--n > 1)
-           component[n][-1] = ':';
-       return((db_sym_t) found);
-}
-
-db_sym_t
-X_db_search_symbol(symtab, off, strategy, diffp)
-       db_symtab_t *   symtab;
-       register
-       db_addr_t       off;
-       db_strategy_t   strategy;
-       db_expr_t       *diffp;         /* in/out */
-{
-       register unsigned int   diff = *diffp;
-       register struct nlist   *symp = 0;
-       register struct nlist   *sp, *ep;
-
-       sp = (struct nlist *)symtab->start;
-       ep = (struct nlist *)symtab->end;
-
-       for (; sp < ep; sp++) {
-           if (sp->n_un.n_name == 0)
-               continue;
-           if ((sp->n_type & N_STAB) != 0)
-               continue;
-           if (off >= sp->n_value) {
-               if (off - sp->n_value < diff) {
-                   diff = off - sp->n_value;
-                   symp = sp;
-                   if (diff == 0 && (sp->n_type & N_EXT))
-                       break;
-               }
-               else if (off - sp->n_value == diff) {
-                   if (symp == 0)
-                       symp = sp;
-                   else if ((symp->n_type & N_EXT) == 0 &&
-                               (sp->n_type & N_EXT) != 0)
-                       symp = sp;      /* pick the external symbol */
-               }
-           }
-       }
-       if (symp == 0) {
-           *diffp = off;
-       }
-       else {
-           *diffp = diff;
-       }
-       return ((db_sym_t)symp);
-}
-
-/*
- * Return the name and value for a symbol.
- */
-void
-X_db_symbol_values(sym, namep, valuep)
-       db_sym_t        sym;
-       char            **namep;
-       db_expr_t       *valuep;
-{
-       register struct nlist *sp;
-
-       sp = (struct nlist *)sym;
-       if (namep)
-           *namep = sp->n_un.n_name;
-       if (valuep)
-           *valuep = sp->n_value;
-}
-
-#define X_DB_MAX_DIFF  8       /* maximum allowable diff at the end of line */
-
-/*
- * search symbol by value
- */
-X_db_search_by_addr(stab, addr, file, func, line, diff)
-       db_symtab_t     *stab;
-       register        unsigned addr;
-       char            **file;
-       char            **func;
-       int             *line;
-       unsigned        *diff;
-{
-       register        struct nlist *sp;
-       register        struct nlist *line_sp, *func_sp, *file_sp, *line_func;
-       register        func_diff, line_diff;
-       boolean_t       found_line = FALSE;
-       struct          nlist *ep = (struct nlist *)stab->end;
-
-       line_sp = func_sp = file_sp = line_func = 0;
-       *file = *func = 0;
-       *line = 0;
-       for (sp = (struct nlist *)stab->start; sp < ep; sp++) {
-           switch(sp->n_type) {
-           case N_SLINE:
-               if (sp->n_value <= addr) {
-                   if (line_sp == 0 || line_diff >= addr - sp->n_value) {
-                       if (line_func)
-                           line_func = 0;
-                       line_sp = sp;
-                       line_diff = addr - sp->n_value;
-                   }
-               }
-               if (sp->n_value >= addr && line_sp)
-                   found_line = TRUE;
-               continue;
-           case N_FUN:
-               if ((found_line || (line_sp && line_diff < X_DB_MAX_DIFF))
-                   && line_func == 0)
-                   line_func = sp;
-               continue;
-           case N_TEXT:
-               if (X_db_is_filename(sp->n_un.n_name)) {
-                   if (sp->n_value > addr)
-                       continue;
-                   if (file_sp == 0 || file_sp->n_value < sp->n_value)
-                       file_sp = sp;
-               } else if (sp->n_value <= addr &&
-                        (func_sp == 0 || func_diff > addr - sp->n_value)) {
-                   func_sp = sp;
-                   func_diff = addr - sp->n_value;
-               }
-               continue;
-           case N_TEXT|N_EXT:
-               if (sp->n_value <= addr &&
-                        (func_sp == 0 || func_diff >= addr - sp->n_value)) {
-                   func_sp = sp;
-                   func_diff = addr - sp->n_value;
-                   if (func_diff == 0 && file_sp && func_sp)
-                       break;
-               }
-           default:
-               continue;
-           }
-           break;
-       }
-       if (line_sp) {
-           if (line_func == 0 || func_sp == 0
-               || line_func->n_value != func_sp->n_value)
-               line_sp = 0;
-       }
-       if (file_sp) {
-           *diff = addr - file_sp->n_value;
-           *file = file_sp->n_un.n_name;
-       }
-       if (func_sp) {
-           *diff = addr - func_sp->n_value;
-           *func = (func_sp->n_un.n_name[0] == '_')?
-                       func_sp->n_un.n_name + 1: func_sp->n_un.n_name;
-       }
-       if (line_sp) {
-           *diff = addr - line_sp->n_value;
-           *line = line_sp->n_desc;
-       }
-       return(file_sp || func_sp || line_sp);
-}
-
-/* ARGSUSED */
-boolean_t
-X_db_line_at_pc(stab, sym, file, line, pc)
-       db_symtab_t     *stab;
-       db_sym_t        sym;
-       char            **file;
-       int             *line;
-       db_expr_t       pc;
-{
-       char            *func;
-       unsigned        diff;
-       boolean_t       found;
-
-       found = X_db_search_by_addr(stab,(unsigned)pc,file,&func,line,&diff);
-       return(found && func && *file);
-}
-
-/*
- * Initialization routine for a.out files.
- */
-kdb_init()
-{
-       extern char     *esym;
-       extern int      end;
-
-       if (esym > (char *)&end) {
-           X_db_sym_init((int *)&end, esym, "mach");
-       }
-}
-
-/*
- * Read symbol table from file.
- * (should be somewhere else)
- */
-#include <bootstrap/file_io.h>
-#include <vm/vm_kern.h>
-
-read_symtab_from_file(fp, symtab_name)
-       struct file     *fp;
-       char *          symtab_name;
-{
-       vm_size_t       resid;
-       kern_return_t   result;
-       vm_offset_t     symoff;
-       vm_size_t       symsize;
-       vm_offset_t     stroff;
-       vm_size_t       strsize;
-       vm_size_t       table_size;
-       vm_offset_t     symtab;
-
-       if (!get_symtab(fp, &symoff, &symsize)) {
-           boot_printf("[ error %d reading %s file header ]\n",
-                       result, symtab_name);
-           return;
-       }
-
-       stroff = symoff + symsize;
-       result = read_file(fp, (vm_offset_t)stroff,
-                       (vm_offset_t)&strsize, sizeof(strsize), &resid);
-       if (result || resid) {
-           boot_printf("[ no valid symbol table present for %s ]\n",
-               symtab_name);
-               return;
-       }
-
-       table_size = sizeof(int) + symsize + strsize;
-       table_size = (table_size + sizeof(int)-1) & ~(sizeof(int)-1);
-
-       result = kmem_alloc_kobject(kernel_map, &symtab, table_size);
-       if (result) {
-           boot_printf("[ error %d allocating space for %s symbol table ]\n",
-                       result, symtab_name);
-           return;
-       }
-
-       *(int *)symtab = symsize;
-
-       result = read_file(fp, symoff,
-                       symtab + sizeof(int), symsize, &resid);
-       if (result || resid) {
-           boot_printf("[ error %d reading %s symbol table ]\n",
-                       result, symtab_name);
-           return;
-       }
-
-       result = read_file(fp, stroff,
-                       symtab + sizeof(int) + symsize, strsize, &resid);
-       if (result || resid) {
-           boot_printf("[ error %d reading %s string table ]\n",
-                       result, symtab_name);
-           return;
-       }
-
-       X_db_sym_init((int *)symtab,
-                       (char *)(symtab + table_size),
-                       symtab_name);
-       
-}
-
-#endif /* DB_GCC_AOUT */
diff --git a/osfmk/i386/db_interface.c b/osfmk/i386/db_interface.c
deleted file mode 100644 (file)
index 9e76b54..0000000
+++ /dev/null
@@ -1,1027 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-/*
- * Interface to new debugger.
- */
-#include <platforms.h>
-#include <time_stamp.h>
-#include <mach_mp_debug.h>
-#include <mach_ldebug.h>
-#include <kern/spl.h>
-#include <kern/cpu_number.h>
-#include <kern/kern_types.h>
-#include <kern/misc_protos.h>
-#include <vm/pmap.h>
-
-#include <i386/thread.h>
-#include <i386/db_machdep.h>
-#include <i386/seg.h>
-#include <i386/trap.h>
-#include <i386/setjmp.h>
-#include <i386/pmap.h>
-#include <i386/misc_protos.h>
-#include <i386/mp.h>
-#include <i386/machine_cpu.h>
-
-#include <mach/vm_param.h>
-#include <vm/vm_map.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-
-#include <ddb/db_command.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_run.h>
-#include <ddb/db_trap.h>
-#include <ddb/db_output.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_break.h>
-#include <ddb/db_watch.h>
-
-#include <i386/cpu_data.h>
-
-int     db_active = 0;
-x86_saved_state32_t    *i386_last_saved_statep;
-x86_saved_state32_t    i386_nested_saved_state;
-unsigned i386_last_kdb_sp;
-db_regs_t      ddb_regs;       /* register state */
-
-extern thread_t db_default_act;
-extern pt_entry_t *DMAP1;
-extern caddr_t DADDR1;
-
-#if    MACH_MP_DEBUG
-extern int masked_state_cnt[];
-#endif /* MACH_MP_DEBUG */
-
-/*
- *     Enter KDB through a keyboard trap.
- *     We show the registers as of the keyboard interrupt
- *     instead of those at its call to KDB.
- */
-struct int_regs {
-       int     gs;
-       int     fs;
-       int     edi;
-       int     esi;
-       int     ebp;
-       int     ebx;
-       x86_saved_state32_t *is;
-};
-
-extern char *  trap_type[];
-extern int     TRAP_TYPES;
-
-/* Forward */
-
-extern void    kdbprinttrap(
-                       int                     type,
-                       int                     code,
-                       int                     *pc,
-                       int                     sp);
-extern void    kdb_kentry(
-                       struct int_regs         *int_regs);
-extern int     db_user_to_kernel_address(
-                       task_t                  task,
-                       vm_offset_t             addr,
-                       unsigned                *kaddr,
-                       int                     flag);
-extern void    db_write_bytes_user_space(
-                       vm_offset_t             addr,
-                       int                     size,
-                       char                    *data,
-                       task_t                  task);
-extern int     db_search_null(
-                       task_t                  task,
-                       unsigned                *svaddr,
-                       unsigned                evaddr,
-                       unsigned                *skaddr,
-                       int                     flag);
-extern int     kdb_enter(int);
-extern void    kdb_leave(void);
-extern void    lock_kdb(void);
-extern void    unlock_kdb(void);
-
-/*
- *  kdb_trap - field a TRACE or BPT trap
- */
-
-
-extern jmp_buf_t *db_recover;
-
-/*
- * Translate the state saved in a task state segment into an
- * exception frame.  Since we "know" we always want the state
- * in a ktss, we hard-wire that in, rather than indexing the gdt
- * with tss_sel to derive a pointer to the desired tss.
- */
-
-/*
- * Code used to synchronize kdb among all cpus, one active at a time, switch
- * from one to another using cpu #cpu
- */
-
-decl_simple_lock_data(, kdb_lock)      /* kdb lock                     */
-
-#define        db_simple_lock_init(l, e)       hw_lock_init(&((l)->interlock))
-#define        db_simple_lock_try(l)           hw_lock_try(&((l)->interlock))
-#define        db_simple_unlock(l)             hw_lock_unlock(&((l)->interlock))
-
-int                    kdb_cpu = -1;   /* current cpu running kdb      */
-int                    kdb_debug = 1;
-volatile unsigned int  cpus_holding_bkpts;     /* counter for number of cpus
-                                                * holding breakpoints
-                                                */
-extern boolean_t       db_breakpoints_inserted;
-
-void
-db_tss_to_frame(
-       int tss_sel,
-       x86_saved_state32_t *regs)
-{
-       extern struct i386_tss ktss;
-       int mycpu = cpu_number();
-       struct i386_tss *tss;
-
-       tss = cpu_datap(mycpu)->cpu_desc_index.cdi_ktss;        /* XXX */
-
-       /*
-        * ddb will overwrite whatever's in esp, so put esp0 elsewhere, too.
-        */
-       regs->cr2 = tss->esp0;
-       regs->efl = tss->eflags;
-       regs->eip = tss->eip;
-       regs->trapno = tss->ss0;        /* XXX */
-       regs->err = tss->esp0;  /* XXX */
-       regs->eax = tss->eax;
-       regs->ecx = tss->ecx;
-       regs->edx = tss->edx;
-       regs->ebx = tss->ebx;
-       regs->uesp = tss->esp;
-       regs->ebp = tss->ebp;
-       regs->esi = tss->esi;
-       regs->edi = tss->edi;
-       regs->es = tss->es;
-       regs->ss = tss->ss;
-       regs->cs = tss->cs;
-       regs->ds = tss->ds;
-       regs->fs = tss->fs;
-       regs->gs = tss->gs;
-}
-
-/*
- * Compose a call to the debugger from the saved state in regs.  (No
- * reason not to do this in C.)
- */
-boolean_t
-db_trap_from_asm(
-       x86_saved_state32_t *regs)
-{
-       int     code;
-       int     type;
-
-       type = regs->trapno;
-       code = regs->err;
-       return (kdb_trap(type, code, regs));
-}
-
-int
-kdb_trap(
-       int                     type,
-       int                     code,
-       x86_saved_state32_t     *regs)
-{
-       extern char             etext;
-       boolean_t               trap_from_user;
-       spl_t                   s;
-       int                     previous_console_device;
-
-       s = splhigh();
-
-       previous_console_device = switch_to_serial_console();
-
-       db_printf("kdb_trap(): type %d, code %d, regs->eip 0x%x\n", type, code, regs->eip);
-       switch (type) {
-           case T_DEBUG:       /* single_step */
-           {
-               extern int dr_addr[];
-               int addr;
-               uint32_t status;
-
-               __asm__ volatile ("movl %%dr6, %0" : "=r" (status));
-
-               if (status & 0xf) {     /* hmm hdw break */
-                       addr =  status & 0x8 ? dr_addr[3] :
-                               status & 0x4 ? dr_addr[2] :
-                               status & 0x2 ? dr_addr[1] :
-                                              dr_addr[0];
-                       regs->efl |= EFL_RF;
-                       db_single_step_cmd(addr, 0, 1, "p");
-               }
-           }
-           case T_INT3:        /* breakpoint */
-           case T_WATCHPOINT:  /* watchpoint */
-           case -1:    /* keyboard interrupt */
-               break;
-
-           default:
-               if (db_recover) {
-                   i386_nested_saved_state = *regs;
-                   db_printf("Caught ");
-                   if (type < 0 || type > TRAP_TYPES)
-                       db_printf("type %d", type);
-                   else
-                       db_printf("%s", trap_type[type]);
-                   db_printf(" trap, code = %x, pc = %x\n",
-                             code, regs->eip);
-                       splx(s);
-                   db_error("");
-                   /*NOTREACHED*/
-               }
-               kdbprinttrap(type, code, (int *)&regs->eip, regs->uesp);
-       }
-
-       disable_preemption();
-
-       current_cpu_datap()->cpu_kdb_saved_ipl = s;
-       current_cpu_datap()->cpu_kdb_saved_state = regs;
-
-       i386_last_saved_statep = regs;
-       i386_last_kdb_sp = (unsigned) &type;
-
-       if (!kdb_enter(regs->eip))
-               goto kdb_exit;
-
-       /*  Should switch to kdb's own stack here. */
-
-       if (!IS_USER_TRAP(regs, &etext)) {
-               bzero((char *)&ddb_regs, sizeof (ddb_regs));
-               *(struct x86_saved_state32_from_kernel *)&ddb_regs =
-                       *(struct x86_saved_state32_from_kernel *)regs;
-               trap_from_user = FALSE;
-       }
-       else {
-               ddb_regs = *regs;
-               trap_from_user = TRUE;
-       }
-       if (!trap_from_user) {
-           /*
-            * Kernel mode - esp and ss not saved
-            */
-           ddb_regs.uesp = (int)&regs->uesp;   /* kernel stack pointer */
-           ddb_regs.ss   = KERNEL_DS;
-       }
-
-       db_active++;
-       db_task_trap(type, code, trap_from_user);
-       db_active--;
-
-       regs->eip    = ddb_regs.eip;
-       regs->efl    = ddb_regs.efl;
-       regs->eax    = ddb_regs.eax;
-       regs->ecx    = ddb_regs.ecx;
-       regs->edx    = ddb_regs.edx;
-       regs->ebx    = ddb_regs.ebx;
-
-       if (trap_from_user) {
-           /*
-            * user mode - saved esp and ss valid
-            */
-           regs->uesp = ddb_regs.uesp;         /* user stack pointer */
-           regs->ss   = ddb_regs.ss & 0xffff;  /* user stack segment */
-       }
-
-       regs->ebp    = ddb_regs.ebp;
-       regs->esi    = ddb_regs.esi;
-       regs->edi    = ddb_regs.edi;
-       regs->es     = ddb_regs.es & 0xffff;
-       regs->cs     = ddb_regs.cs & 0xffff;
-       regs->ds     = ddb_regs.ds & 0xffff;
-       regs->fs     = ddb_regs.fs & 0xffff;
-       regs->gs     = ddb_regs.gs & 0xffff;
-
-       if ((type == T_INT3) &&
-           (db_get_task_value(regs->eip,
-                              BKPT_SIZE,
-                              FALSE,
-                              db_target_space(current_thread(),
-                                              trap_from_user))
-                             == BKPT_INST))
-           regs->eip += BKPT_SIZE;
-       
-       switch_to_old_console(previous_console_device);
-kdb_exit:
-       kdb_leave();
-
-       current_cpu_datap()->cpu_kdb_saved_state = 0;
-
-       enable_preemption();
-
-       splx(s);
-
-       /* Allow continue to upper layers of exception handling if
-        * trap was not a debugging trap.
-        */
-
-       if (trap_from_user && type != T_DEBUG && type != T_INT3 
-               && type != T_WATCHPOINT)
-               return 0;
-       else
-               return (1);
-}
-
-/*
- *     Enter KDB through a keyboard trap.
- *     We show the registers as of the keyboard interrupt
- *     instead of those at its call to KDB.
- */
-
-spl_t kdb_oldspl;
-
-void
-kdb_kentry(
-       struct int_regs *int_regs)
-{
-       extern char etext;
-       boolean_t trap_from_user;
-       x86_saved_state32_t *is = int_regs->is;
-       x86_saved_state32_t regs;
-       spl_t s;
-
-       s = splhigh();
-       kdb_oldspl = s;
-
-       if (IS_USER_TRAP(is, &etext))
-       {
-           regs.uesp = ((int *)(is+1))[0];
-           regs.ss   = ((int *)(is+1))[1];
-       }
-       else {
-           regs.ss  = KERNEL_DS;
-           regs.uesp= (int)(is+1);
-       }
-       regs.efl = is->efl;
-       regs.cs  = is->cs;
-       regs.eip = is->eip;
-       regs.eax = is->eax;
-       regs.ecx = is->ecx;
-       regs.edx = is->edx;
-       regs.ebx = int_regs->ebx;
-       regs.ebp = int_regs->ebp;
-       regs.esi = int_regs->esi;
-       regs.edi = int_regs->edi;
-       regs.ds  = is->ds;
-       regs.es  = is->es;
-       regs.fs  = int_regs->fs;
-       regs.gs  = int_regs->gs;
-
-       disable_preemption();
-
-       current_cpu_datap()->cpu_kdb_saved_state = &regs;
-
-       if (!kdb_enter(regs.eip))
-               goto kdb_exit;
-
-       bcopy((char *)&regs, (char *)&ddb_regs, sizeof (ddb_regs));
-       trap_from_user = IS_USER_TRAP(&ddb_regs, &etext);
-
-       db_active++;
-       db_task_trap(-1, 0, trap_from_user);
-       db_active--;
-
-       if (trap_from_user) {
-           ((int *)(is+1))[0] = ddb_regs.uesp;
-           ((int *)(is+1))[1] = ddb_regs.ss & 0xffff;
-       }
-       is->efl = ddb_regs.efl;
-       is->cs  = ddb_regs.cs & 0xffff;
-       is->eip = ddb_regs.eip;
-       is->eax = ddb_regs.eax;
-       is->ecx = ddb_regs.ecx;
-       is->edx = ddb_regs.edx;
-       int_regs->ebx = ddb_regs.ebx;
-       int_regs->ebp = ddb_regs.ebp;
-       int_regs->esi = ddb_regs.esi;
-       int_regs->edi = ddb_regs.edi;
-       is->ds  = ddb_regs.ds & 0xffff;
-       is->es  = ddb_regs.es & 0xffff;
-       int_regs->fs = ddb_regs.fs & 0xffff;
-       int_regs->gs = ddb_regs.gs & 0xffff;
-
-kdb_exit:
-       kdb_leave();
-       current_cpu_datap()->cpu_kdb_saved_state = 0;
-
-       enable_preemption();
-
-       splx(s);
-}
-
-/*
- * Print trap reason.
- */
-
-void
-kdbprinttrap(
-       int     type,
-       int     code,
-       int     *pc,
-       int     sp)
-{
-       printf("kernel: ");
-       if (type < 0 || type > TRAP_TYPES)
-           db_printf("type %d", type);
-       else
-           db_printf("%s", trap_type[type]);
-       db_printf(" trap, code=%x eip@%x = %x esp=%x\n",
-                 code, pc, *(int *)pc, sp);
-       db_run_mode = STEP_CONTINUE;
-}
-
-int
-db_user_to_kernel_address(
-       task_t          task,
-       vm_offset_t     addr,
-       unsigned        *kaddr,
-       int             flag)
-{
-       register pt_entry_t *ptp;
-       vm_offset_t src;
-
-       /*
-        * must not pre-empted while using the pte pointer passed
-        * back since it's been mapped through a per-cpu window
-        */
-        mp_disable_preemption();
-
-       ptp = pmap_pte(task->map->pmap, (vm_map_offset_t)addr);
-       if (ptp == PT_ENTRY_NULL || (*ptp & INTEL_PTE_VALID) == 0) {
-           if (flag) {
-               db_printf("\nno memory is assigned to address %08x\n", addr);
-               db_error(0);
-               /* NOTREACHED */
-           }
-           mp_enable_preemption();
-           return(-1);
-       }
-       src = (vm_offset_t)pte_to_pa(*ptp);
-       mp_enable_preemption();
-
-       *(int *) DMAP1 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | 
-         INTEL_PTE_REF | INTEL_PTE_MOD;
-#if defined(I386_CPU)
-       if (cpu_class == CPUCLASS_386) {
-               invltlb();
-       } else
-#endif
-       {
-               invlpg((u_int)DADDR1);
-       }
-
-       *kaddr = (unsigned)DADDR1 + (addr & PAGE_MASK);
-
-       return(0);
-}
-       
-/*
- * Read bytes from kernel address space for debugger.
- */
-
-void
-db_read_bytes(
-       vm_offset_t     addr,
-       int             size,
-       char            *data,
-       task_t          task)
-{
-       register char   *src;
-       register int    n;
-       unsigned        kern_addr;
-
-       src = (char *)addr;
-       if (task == kernel_task || task == TASK_NULL) {
-           while (--size >= 0) {
-               if (addr++ > VM_MAX_KERNEL_ADDRESS) {
-                   db_printf("\nbad address %x\n", addr);
-                   db_error(0);
-                   /* NOTREACHED */
-               }
-               *data++ = *src++;
-           }
-           return;
-       }
-       while (size > 0) {
-           if (db_user_to_kernel_address(task, addr, &kern_addr, 1) < 0)
-               return;
-           src = (char *)kern_addr;
-           n = intel_trunc_page(addr+INTEL_PGBYTES) - addr;
-           if (n > size)
-               n = size;
-           size -= n;
-           addr += n;
-           while (--n >= 0)
-               *data++ = *src++;
-       }
-}
-
-/*
- * Write bytes to kernel address space for debugger.
- */
-
-void
-db_write_bytes(
-       vm_offset_t     addr,
-       int             size,
-       char            *data,
-       task_t          task)
-{
-       register char   *dst;
-
-       register pt_entry_t *ptep0 = 0;
-       pt_entry_t      oldmap0 = 0;
-       vm_offset_t     addr1;
-       register pt_entry_t *ptep1 = 0;
-       pt_entry_t      oldmap1 = 0;
-       extern char     etext;
-
-       if (task && task != kernel_task) {
-           db_write_bytes_user_space(addr, size, data, task);
-           return;
-       }
-
-           
-       if (addr >= VM_MIN_KERNEL_LOADED_ADDRESS) {
-               db_write_bytes_user_space(addr, size, data, kernel_task);
-               return;
-       }
-
-       if (addr >= VM_MIN_KERNEL_ADDRESS &&
-           addr <= (vm_offset_t)&etext)
-       {
-           ptep0 = pmap_pte(kernel_pmap, (vm_map_offset_t)addr);
-           oldmap0 = *ptep0;
-           *ptep0 |= INTEL_PTE_WRITE;
-
-           addr1 = i386_trunc_page(addr + size - 1);
-           if (i386_trunc_page(addr) != addr1) {
-               /* data crosses a page boundary */
-
-               ptep1 = pmap_pte(kernel_pmap, (vm_map_offset_t)addr1);
-               oldmap1 = *ptep1;
-               *ptep1 |= INTEL_PTE_WRITE;
-           }
-           flush_tlb();
-       } 
-
-       dst = (char *)addr;
-
-       while (--size >= 0) {
-           if (addr++ > VM_MAX_KERNEL_ADDRESS) {
-               db_printf("\nbad address %x\n", addr);
-               db_error(0);
-               /* NOTREACHED */
-           }
-           *dst++ = *data++;
-       }
-
-       if (ptep0) {
-           *ptep0 = oldmap0;
-           if (ptep1) {
-               *ptep1 = oldmap1;
-           }
-           flush_tlb();
-       }
-}
-       
-void
-db_write_bytes_user_space(
-       vm_offset_t     addr,
-       int             size,
-       char            *data,
-       task_t          task)
-{
-       register char   *dst;
-       register int    n;
-       unsigned        kern_addr;
-
-       while (size > 0) {
-           if (db_user_to_kernel_address(task, addr, &kern_addr, 1) < 0)
-               return;
-           dst = (char *)kern_addr;
-           n = intel_trunc_page(addr+INTEL_PGBYTES) - addr;
-           if (n > size)
-               n = size;
-           size -= n;
-           addr += n;
-           while (--n >= 0)
-               *dst++ = *data++;
-       }
-}
-
-boolean_t
-db_check_access(
-       vm_offset_t     addr,
-       int             size,
-       task_t          task)
-{
-       register        n;
-       unsigned        kern_addr;
-
-       if (task == kernel_task || task == TASK_NULL) {
-           if (kernel_task == TASK_NULL)
-               return(TRUE);
-           task = kernel_task;
-       } else if (task == TASK_NULL) {
-           if (current_thread() == THREAD_NULL)
-               return(FALSE);
-           task = current_thread()->task;
-       }
-       while (size > 0) {
-           if (db_user_to_kernel_address(task, addr, &kern_addr, 0) < 0)
-               return(FALSE);
-           n = intel_trunc_page(addr+INTEL_PGBYTES) - addr;
-           if (n > size)
-               n = size;
-           size -= n;
-           addr += n;
-       }
-       return(TRUE);
-}
-
-boolean_t
-db_phys_eq(
-       task_t          task1,
-       vm_offset_t     addr1,
-       task_t          task2,
-       vm_offset_t     addr2)
-{
-       unsigned        kern_addr1, kern_addr2;
-
-       if ((addr1 & (INTEL_PGBYTES-1)) != (addr2 & (INTEL_PGBYTES-1)))
-           return(FALSE);
-       if (task1 == TASK_NULL) {
-           if (current_thread() == THREAD_NULL)
-               return(FALSE);
-           task1 = current_thread()->task;
-       }
-       if (db_user_to_kernel_address(task1, addr1, &kern_addr1, 0) < 0 ||
-               db_user_to_kernel_address(task2, addr2, &kern_addr2, 0) < 0)
-           return(FALSE);
-       return(kern_addr1 == kern_addr2);
-}
-
-#define DB_USER_STACK_ADDR             (VM_MIN_KERNEL_ADDRESS)
-#define DB_NAME_SEARCH_LIMIT           (DB_USER_STACK_ADDR-(INTEL_PGBYTES*3))
-
-int
-db_search_null(
-       task_t          task,
-       unsigned        *svaddr,
-       unsigned        evaddr,
-       unsigned        *skaddr,
-       int             flag)
-{
-       register unsigned vaddr;
-       register unsigned *kaddr;
-
-       kaddr = (unsigned *)*skaddr;
-       for (vaddr = *svaddr; vaddr > evaddr; vaddr -= sizeof(unsigned)) {
-           if (vaddr % INTEL_PGBYTES == 0) {
-               vaddr -= sizeof(unsigned);
-               if (db_user_to_kernel_address(task, vaddr, skaddr, 0) < 0)
-                   return(-1);
-               kaddr = (unsigned *)*skaddr;
-           } else {
-               vaddr -= sizeof(unsigned);
-               kaddr--;
-           }
-           if ((*kaddr == 0) ^ (flag  == 0)) {
-               *svaddr = vaddr;
-               *skaddr = (unsigned)kaddr;
-               return(0);
-           }
-       }
-       return(-1);
-}
-
-void
-db_task_name(
-       task_t          task)
-{
-       register char *p;
-       register n;
-       unsigned vaddr, kaddr;
-
-       vaddr = DB_USER_STACK_ADDR;
-       kaddr = 0;
-
-       /*
-        * skip nulls at the end
-        */
-       if (db_search_null(task, &vaddr, DB_NAME_SEARCH_LIMIT, &kaddr, 0) < 0) {
-           db_printf(DB_NULL_TASK_NAME);
-           return;
-       }
-       /*
-        * search start of args
-        */
-       if (db_search_null(task, &vaddr, DB_NAME_SEARCH_LIMIT, &kaddr, 1) < 0) {
-           db_printf(DB_NULL_TASK_NAME);
-           return;
-       }
-
-       n = DB_TASK_NAME_LEN-1;
-       p = (char *)kaddr + sizeof(unsigned);
-       for (vaddr += sizeof(int); vaddr < DB_USER_STACK_ADDR && n > 0; 
-                                                       vaddr++, p++, n--) {
-           if (vaddr % INTEL_PGBYTES == 0) {
-               (void)db_user_to_kernel_address(task, vaddr, &kaddr, 0);
-               p = (char*)kaddr;
-           }
-           db_printf("%c", (*p < ' ' || *p > '~')? ' ': *p);
-       }
-       while (n-- >= 0)        /* compare with >= 0 for one more space */
-           db_printf(" ");
-}
-
-void
-db_machdep_init(void)
-{
-       int c;
-
-       db_simple_lock_init(&kdb_lock, 0);
-#if MACH_KDB /*this only works for legacy 32-bit machines */
-       for (c = 0; c < real_ncpus; ++c) {
-               if (c == master_cpu) {
-                       master_dbtss.esp0 = (int)(db_task_stack_store +
-                               (INTSTACK_SIZE * (c + 1)) - sizeof (natural_t));
-                       master_dbtss.esp = master_dbtss.esp0;
-                       master_dbtss.eip = (int)&db_task_start;
-                       /*
-                        * The TSS for the debugging task on each slave CPU
-                        * is set up in cpu_desc_init().
-                        */
-               }
-       }
-#endif
-}
-
-/*
- * Called when entering kdb:
- * Takes kdb lock. If if we were called remotely (slave state) we just
- * wait for kdb_cpu to be equal to cpu_number(). Otherwise enter kdb if
- * not active on another cpu.
- * If db_pass_thru[cpu_number()] > 0, then kdb can't stop now.
- */
-
-int
-kdb_enter(int pc)
-{
-       int my_cpu;
-       int retval;
-
-       disable_preemption();
-
-       my_cpu = cpu_number();
-
-       if (current_cpu_datap()->cpu_db_pass_thru) {
-               retval = 0;
-               goto kdb_exit;
-       }
-
-       current_cpu_datap()->cpu_kdb_active++;
-
-       lock_kdb();
-
-       db_printf("kdb_enter(): cpu_number %d, kdb_cpu %d\n", my_cpu, kdb_cpu);
-       
-       if (db_breakpoints_inserted)
-               cpus_holding_bkpts++;
-
-       if (kdb_cpu == -1 && !current_cpu_datap()->cpu_kdb_is_slave) {
-               kdb_cpu = my_cpu;
-               db_printf("Signaling other processors..\n");
-               remote_kdb();   /* stop other cpus */
-               retval = 1;
-       } else if (kdb_cpu == my_cpu) 
-               retval = 1;
-       else
-               retval = 0;
-
-kdb_exit:
-       enable_preemption();
-
-       return (retval);
-}
-
-void
-kdb_leave(void)
-{
-       int my_cpu;
-       boolean_t       wait = FALSE;
-
-       disable_preemption();
-
-       my_cpu = cpu_number();
-
-       if (db_run_mode == STEP_CONTINUE) {
-               wait = TRUE;
-               kdb_cpu = -1;
-       }
-       if (db_breakpoints_inserted)
-               cpus_holding_bkpts--;
-       if (current_cpu_datap()->cpu_kdb_is_slave)
-               current_cpu_datap()->cpu_kdb_is_slave--;
-       if (kdb_debug)
-               db_printf("kdb_leave: cpu %d, kdb_cpu %d, run_mode %d pc %x (%x) holds %d\n",
-                         my_cpu, kdb_cpu, db_run_mode,
-                         ddb_regs.eip, *(int *)ddb_regs.eip,
-                         cpus_holding_bkpts);
-       clear_kdb_intr();
-       unlock_kdb();
-       current_cpu_datap()->cpu_kdb_active--;
-
-       mp_kdb_exit();
-
-       enable_preemption();
-
-       if (wait) {
-               while(cpus_holding_bkpts);
-       }
-}
-
-void
-lock_kdb(void)
-{
-       int             my_cpu;
-       register        i;
-
-       disable_preemption();
-
-       my_cpu = cpu_number();
-
-       for(;;) {
-               if (kdb_cpu != -1 && kdb_cpu != my_cpu) {
-                       continue;
-               }
-               if (db_simple_lock_try(&kdb_lock)) {
-                       if (kdb_cpu == -1 || kdb_cpu == my_cpu)
-                               break;
-                       db_simple_unlock(&kdb_lock);
-               }
-       } 
-
-       enable_preemption();
-}
-
-#if    TIME_STAMP
-extern unsigned old_time_stamp;
-#endif /* TIME_STAMP */
-
-void
-unlock_kdb(void)
-{
-       db_simple_unlock(&kdb_lock);
-#if    TIME_STAMP
-       old_time_stamp = 0;
-#endif /* TIME_STAMP */
-}
-
-
-#ifdef __STDC__
-#define KDB_SAVE(type, name) extern type name; type name##_save = name
-#define KDB_RESTORE(name) name = name##_save
-#else  /* __STDC__ */
-#define KDB_SAVE(type, name) extern type name; type name/**/_save = name
-#define KDB_RESTORE(name) name = name/**/_save
-#endif /* __STDC__ */
-
-#define KDB_SAVE_CTXT() \
-       KDB_SAVE(int, db_run_mode); \
-       KDB_SAVE(boolean_t, db_sstep_print); \
-       KDB_SAVE(int, db_loop_count); \
-       KDB_SAVE(int, db_call_depth); \
-       KDB_SAVE(int, db_inst_count); \
-       KDB_SAVE(int, db_last_inst_count); \
-       KDB_SAVE(int, db_load_count); \
-       KDB_SAVE(int, db_store_count); \
-       KDB_SAVE(boolean_t, db_cmd_loop_done); \
-       KDB_SAVE(jmp_buf_t *, db_recover); \
-       KDB_SAVE(db_addr_t, db_dot); \
-       KDB_SAVE(db_addr_t, db_last_addr); \
-       KDB_SAVE(db_addr_t, db_prev); \
-       KDB_SAVE(db_addr_t, db_next); \
-       KDB_SAVE(db_regs_t, ddb_regs); 
-
-#define KDB_RESTORE_CTXT() \
-       KDB_RESTORE(db_run_mode); \
-       KDB_RESTORE(db_sstep_print); \
-       KDB_RESTORE(db_loop_count); \
-       KDB_RESTORE(db_call_depth); \
-       KDB_RESTORE(db_inst_count); \
-       KDB_RESTORE(db_last_inst_count); \
-       KDB_RESTORE(db_load_count); \
-       KDB_RESTORE(db_store_count); \
-       KDB_RESTORE(db_cmd_loop_done); \
-       KDB_RESTORE(db_recover); \
-       KDB_RESTORE(db_dot); \
-       KDB_RESTORE(db_last_addr); \
-       KDB_RESTORE(db_prev); \
-       KDB_RESTORE(db_next); \
-       KDB_RESTORE(ddb_regs); 
-
-/*
- * switch to another cpu
- */
-
-void
-kdb_on(
-       int             cpu)
-{
-       KDB_SAVE_CTXT();
-       if (cpu < 0 || cpu >= real_ncpus || !cpu_datap(cpu)->cpu_kdb_active)
-               return;
-       db_set_breakpoints();
-       db_set_watchpoints();
-       kdb_cpu = cpu;
-       unlock_kdb();
-       lock_kdb();
-       db_clear_breakpoints();
-       db_clear_watchpoints();
-       KDB_RESTORE_CTXT();
-       if (kdb_cpu == -1)  {/* someone continued */
-               kdb_cpu = cpu_number();
-               db_continue_cmd(0, 0, 0, "");
-       }
-}
-
-/*
- * system reboot
- */
-
-extern void kdp_machine_reboot(void);
-
-void db_reboot(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif)
-{
-       kdp_machine_reboot();
-}
diff --git a/osfmk/i386/db_machdep.h b/osfmk/i386/db_machdep.h
deleted file mode 100644 (file)
index e57dfca..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#ifndef        _I386_DB_MACHDEP_H_
-#define        _I386_DB_MACHDEP_H_
-
-/*
- * Machine-dependent defines for new kernel debugger.
- */
-
-#include <kern/kern_types.h>
-#include <mach/i386/vm_types.h>
-#include <mach/i386/vm_param.h>
-#ifdef __i386__
-#include <i386/thread.h>               /* for thread_status */
-#include <i386/eflags.h>
-#include <i386/trap.h>
-#include <i386/pmCPU.h>
-#endif
-
-typedef        addr64_t        db_addr_t;      /* address - unsigned */
-typedef        uint64_t        db_expr_t;      /* expression */
-
-#ifdef __i386__
-typedef struct x86_saved_state32 db_regs_t;
-extern db_regs_t       ddb_regs;       /* register state */
-#define        DDB_REGS        (&ddb_regs)
-extern int     db_active;      /* ddb is active */
-
-#define        PC_REGS(regs)   ((db_addr_t)(regs)->eip)
-
-#define        BKPT_INST       0xcc            /* breakpoint instruction */
-#define        BKPT_SIZE       (1)             /* size of breakpoint inst */
-#define        BKPT_SET(inst)  (BKPT_INST)
-
-#define        FIXUP_PC_AFTER_BREAK    ddb_regs.eip -= 1;
-
-#define        db_clear_single_step(regs)      ((regs)->efl &= ~EFL_TF)
-#define        db_set_single_step(regs)        ((regs)->efl |=  EFL_TF)
-
-#define        IS_BREAKPOINT_TRAP(type, code)  ((type) == T_INT3)
-#define IS_WATCHPOINT_TRAP(type, code) ((type) == T_WATCHPOINT)
-
-#define        I_CALL          0xe8
-#define        I_CALLI         0xff
-#define        I_RET           0xc3
-#define        I_IRET          0xcf
-
-#define        inst_trap_return(ins)   (((ins)&0xff) == I_IRET)
-#define        inst_return(ins)        (((ins)&0xff) == I_RET)
-#define        inst_call(ins)          (((ins)&0xff) == I_CALL || \
-                                (((ins)&0xff) == I_CALLI && \
-                                 ((ins)&0x3800) == 0x1000))
-
-int db_inst_load(unsigned long);
-int db_inst_store(unsigned long);
-
-/* access capability and access macros */
-
-#define DB_ACCESS_LEVEL                2       /* access any space */
-#define DB_CHECK_ACCESS(addr,size,task)                                \
-       db_check_access(addr,size,task)
-#define DB_PHYS_EQ(task1,addr1,task2,addr2)                    \
-       db_phys_eq(task1,addr1,task2,addr2)
-#define DB_VALID_KERN_ADDR(addr)               (1)
-#define DB_VALID_ADDRESS(addr,user)                            \
-       ((!(user) && DB_VALID_KERN_ADDR(addr)) ||               \
-        ((user) && (addr) < VM_MAX_ADDRESS))
-
-/*
- * Given pointer to i386_saved_state, determine if it represents
- * a thread executing in user space.
- */
-#define IS_USER_TRAP(regs, etext)      (((regs)->cs & 3) != 0)
-
-extern boolean_t       db_check_access(
-                               vm_offset_t     addr,
-                               int             size,
-                               task_t          task);
-extern boolean_t       db_phys_eq(
-                               task_t          task1,
-                               vm_offset_t     addr1,
-                               task_t          task2,
-                               vm_offset_t     addr2);
-extern db_addr_t       db_disasm(
-                               db_addr_t       loc,
-                               boolean_t       altfmt,
-                               task_t          task);
-extern void            db_read_bytes(
-                               vm_offset_t     addr,
-                               int             size,
-                               char            *data,
-                               task_t          task);
-extern void            db_write_bytes(
-                               vm_offset_t     addr,
-                               int             size,
-                               char            *data,
-                               task_t          task);
-extern void            db_stack_trace_cmd(
-                               db_expr_t       addr,
-                               boolean_t       have_addr,
-                               db_expr_t       count,
-                               char            *modif);
-extern void            db_reboot(
-                               db_expr_t       addr,
-                               boolean_t       have_addr,
-                               db_expr_t       count,
-                               char            *modif);
-
-extern void db_display_kmod(db_expr_t addr, boolean_t have_addr,
-                           db_expr_t count, char *modif);
-extern void db_display_real(db_expr_t addr, boolean_t have_addr,
-                           db_expr_t count, char *modif);
-extern void db_display_iokit(db_expr_t addr, boolean_t have_addr,
-                            db_expr_t count, char * modif);
-extern void db_cpuid(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-                    char *modif);
-extern void db_msr(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-                  char *modif);
-extern void db_apic(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-                   char *modif);
-
-/* macros for printing OS server dependent task name */
-
-#define DB_TASK_NAME(task)     db_task_name(task)
-#define DB_TASK_NAME_TITLE     "COMMAND                "
-#define DB_TASK_NAME_LEN       23
-#define DB_NULL_TASK_NAME      "?                      "
-
-extern void            db_task_name(
-                               task_t                  task);
-
-/* macro for checking if a thread has used floating-point */
-
-#define db_act_fp_used(act)    (act && act->machine.ifps)
-
-extern void            db_tss_to_frame(
-                               int                     tss_sel,
-                               x86_saved_state32_t     *regs);
-extern int             kdb_trap(
-                               int                     type,
-                               int                     code,
-                               x86_saved_state32_t     *regs);
-extern boolean_t       db_trap_from_asm(
-                               x86_saved_state32_t *regs);
-extern void            kdb_on(
-                               int                     cpu);
-
-#if MACH_KDB
-extern void db_chkpmgr(void);
-#endif /* MACH_KDB */
-extern void db_pmgr(db_expr_t addr, int have_addr, db_expr_t count, char * modif);
-extern void db_nap(db_expr_t addr, int have_addr, db_expr_t count, char * modif);
-#endif /* __i386__ */
-
-#endif /* _I386_DB_MACHDEP_H_ */
diff --git a/osfmk/i386/db_trace.c b/osfmk/i386/db_trace.c
deleted file mode 100644 (file)
index 136418e..0000000
+++ /dev/null
@@ -1,876 +0,0 @@
-/*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#include <string.h>
-
-#include <mach/boolean.h>
-#include <vm/vm_map.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-
-#include <machine/asm.h>
-#include <machine/db_machdep.h>
-#include <machine/setjmp.h>
-#include <mach/machine.h>
-#include <mach/kmod.h>
-
-#include <i386/mp.h>
-#include <i386/pio.h>
-#include <i386/cpuid.h>
-#include <i386/proc_reg.h>
-#include <i386/machine_routines.h>
-
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_output.h>
-
-extern jmp_buf_t *db_recover;
-struct x86_kernel_state ddb_null_kregs;
-extern kmod_info_t *kmod;
-
-
-/*
- * Stack trace.
- */
-
-#define        INKERNELSTACK(va, th) 1
-
-#define DB_NUMARGS_MAX  5
-
-struct i386_frame {
-       struct i386_frame       *f_frame;
-       int                     f_retaddr;
-       int                     f_arg0;
-};
-
-#define        TRAP            1
-#define        INTERRUPT       2
-#define SYSCALL                3
-
-db_addr_t      db_user_trap_symbol_value = 0;
-db_addr_t      db_kernel_trap_symbol_value = 0;
-db_addr_t      db_interrupt_symbol_value = 0;
-db_addr_t      db_return_to_iret_symbol_value = 0;
-db_addr_t      db_syscall_symbol_value = 0;
-boolean_t      db_trace_symbols_found = FALSE;
-
-struct i386_kregs {
-       char    *name;
-       unsigned int offset;
-} i386_kregs[] = {
-       { "ebx", (unsigned int)(&((struct x86_kernel_state *)0)->k_ebx) },
-       { "esp", (unsigned int)(&((struct x86_kernel_state *)0)->k_esp) },
-       { "ebp", (unsigned int)(&((struct x86_kernel_state *)0)->k_ebp) },
-       { "edi", (unsigned int)(&((struct x86_kernel_state *)0)->k_edi) },
-       { "esi", (unsigned int)(&((struct x86_kernel_state *)0)->k_esi) },
-       { "eip", (unsigned int)(&((struct x86_kernel_state *)0)->k_eip) },
-       { 0 }
-};
-
-/* Forward */
-
-extern unsigned int *  db_lookup_i386_kreg(
-                       char                    *name,
-                       int                     *kregp);
-extern int     db_i386_reg_value(
-                       struct db_variable      * vp,
-                       db_expr_t               * val,
-                       int                     flag,
-                       db_var_aux_param_t      ap);
-extern void    db_find_trace_symbols(void);
-extern int     db_numargs(
-                       struct i386_frame       *fp,
-                       task_t                  task);
-extern void    db_nextframe(
-                       struct i386_frame       **lfp,
-                       struct i386_frame       **fp,
-                       db_addr_t               *ip,
-                       int                     frame_type,
-                       thread_t                thr_act);
-extern int     _setjmp(
-                       jmp_buf_t               * jb);
-
-/*
- * Machine register set.
- */
-struct db_variable db_regs[] = {
-       { "cs", (unsigned int *)&ddb_regs.cs,  db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "ds", (unsigned int *)&ddb_regs.ds,  db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "es", (unsigned int *)&ddb_regs.es,  db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "fs", (unsigned int *)&ddb_regs.fs,  db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "gs", (unsigned int *)&ddb_regs.gs,  db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "ss", (unsigned int *)&ddb_regs.ss,  db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "eax",(unsigned int *)&ddb_regs.eax, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "ecx",(unsigned int *)&ddb_regs.ecx, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "edx",(unsigned int *)&ddb_regs.edx, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "ebx",(unsigned int *)&ddb_regs.ebx, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "esp",(unsigned int *)&ddb_regs.uesp,db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "ebp",(unsigned int *)&ddb_regs.ebp, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "esi",(unsigned int *)&ddb_regs.esi, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "edi",(unsigned int *)&ddb_regs.edi, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "eip",(unsigned int *)&ddb_regs.eip, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 },
-       { "efl",(unsigned int *)&ddb_regs.efl, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }
-};
-struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
-
-unsigned int *
-db_lookup_i386_kreg(
-       char    *name,
-       int     *kregp)
-{
-       register struct i386_kregs *kp;
-
-       for (kp = i386_kregs; kp->name; kp++) {
-           if (strcmp(name, kp->name) == 0)
-               return((unsigned int *)((int)kregp + kp->offset));
-       }
-       return(0);
-}
-       
-int
-db_i386_reg_value(
-       struct  db_variable     *vp,
-       db_expr_t               *valuep,
-       int                     flag,
-       db_var_aux_param_t      ap)
-{
-       extern char             etext;
-       unsigned int            *dp = 0;
-       db_expr_t               null_reg = 0;
-       register thread_t       thr_act = ap->thr_act;
-
-       if (db_option(ap->modif, 'u')) {
-           if (thr_act == THREAD_NULL) {
-               if ((thr_act = current_thread()) == THREAD_NULL)
-                   db_error("no user registers\n");
-           }
-           if (thr_act == current_thread()) {
-               if (IS_USER_TRAP(&ddb_regs, &etext))
-                   dp = vp->valuep;
-           }
-       } else {
-           if (thr_act == THREAD_NULL || thr_act == current_thread()) {
-               dp = vp->valuep;
-           } else {
-             if (thr_act &&
-             (thr_act->continuation != THREAD_CONTINUE_NULL) &&
-             thr_act->kernel_stack) {
-               int cpu;
-
-               for (cpu = 0; cpu < real_ncpus; cpu++) {
-                   if (cpu_datap(cpu)->cpu_running == TRUE &&
-                       cpu_datap(cpu)->cpu_active_thread == thr_act && cpu_datap(cpu)->cpu_kdb_saved_state) {
-                       dp = (unsigned int *) (((unsigned int)cpu_datap(cpu)->cpu_kdb_saved_state) +
-                                     (((unsigned int) vp->valuep) -
-                                      (unsigned int) &ddb_regs));
-                       break;
-                   }
-               }
-               if (dp == 0 && thr_act)
-                   dp = db_lookup_i386_kreg(vp->name,
-                        (unsigned int *)(STACK_IKS(thr_act->kernel_stack)));
-               if (dp == 0)
-                   dp = &null_reg;
-             } else if (thr_act &&
-             (thr_act->continuation != THREAD_CONTINUE_NULL)) {
-               /* only EIP is valid  */
-               if (vp->valuep == (unsigned int *) &ddb_regs.eip) {
-                   dp = (unsigned int *)(&thr_act->continuation);
-               } else {
-                   dp = &null_reg;
-               }
-             }
-           }
-       }
-       if (dp == 0) {
-           int cpu;
-
-           if (!db_option(ap->modif, 'u')) {
-               for (cpu = 0; cpu < real_ncpus; cpu++) {
-                   if (cpu_datap(cpu)->cpu_running == TRUE &&
-                       cpu_datap(cpu)->cpu_active_thread == thr_act && cpu_datap(cpu)->cpu_kdb_saved_state) {
-                           dp = (unsigned int *) (((unsigned int)cpu_datap(cpu)->cpu_kdb_saved_state) +
-                                         (((unsigned int) vp->valuep) -
-                                          (unsigned int) &ddb_regs));
-                           break;
-                   }
-               }
-           }
-           if (dp == 0) {
-               if (!thr_act)
-                   db_error("no pcb\n");
-               dp = (unsigned int *)((unsigned int)(thr_act->machine.iss) + 
-                            ((unsigned int)vp->valuep - (unsigned int)&ddb_regs));
-           }
-       }
-       if (flag == DB_VAR_SET)
-           *dp = *valuep;
-       else
-           *valuep = *dp;
-       return(0);
-}
-
-void
-db_find_trace_symbols(void)
-{
-       db_expr_t       value;
-       boolean_t       found_some;
-
-       found_some = FALSE;
-       if (db_value_of_name(CC_SYM_PREFIX "user_trap", &value)) {
-           db_user_trap_symbol_value = (db_addr_t) value;
-           found_some = TRUE;
-       }
-       if (db_value_of_name(CC_SYM_PREFIX "kernel_trap", &value)) {
-           db_kernel_trap_symbol_value = (db_addr_t) value;
-           found_some = TRUE;
-       }
-       if (db_value_of_name(CC_SYM_PREFIX "interrupt", &value)) {
-           db_interrupt_symbol_value = (db_addr_t) value;
-           found_some = TRUE;
-       }
-       if (db_value_of_name(CC_SYM_PREFIX "return_to_iret", &value)) {
-           db_return_to_iret_symbol_value = (db_addr_t) value;
-           found_some = TRUE;
-       }
-       if (db_value_of_name(CC_SYM_PREFIX "syscall", &value)) {
-           db_syscall_symbol_value = (db_addr_t) value;
-           found_some = TRUE;
-       }
-       if (found_some) 
-           db_trace_symbols_found = TRUE;
-}
-
-/*
- * Figure out how many arguments were passed into the frame at "fp".
- */
-int db_numargs_default = 5;
-
-int
-db_numargs(
-       struct i386_frame       *fp,
-       task_t                  task)
-{
-       int     *argp;
-       int     inst;
-       int     args;
-       extern char     etext;
-
-       argp = (int *)db_get_task_value((int)&fp->f_retaddr, 4, FALSE, task);
-       if (argp < (int *)VM_MIN_KERNEL_ADDRESS || (char *)argp > &etext)
-           args = db_numargs_default;
-       else if (!DB_CHECK_ACCESS((int)argp, 4, task))
-           args = db_numargs_default;
-       else {
-           inst = db_get_task_value((int)argp, 4, FALSE, task);
-           if ((inst & 0xff) == 0x59)  /* popl %ecx */
-               args = 1;
-           else if ((inst & 0xffff) == 0xc483) /* addl %n, %esp */
-               args = ((inst >> 16) & 0xff) / 4;
-           else
-               args = db_numargs_default;
-       }
-       return (args);
-}
-
-struct interrupt_frame {
-       struct i386_frame *if_frame;    /* point to next frame */
-       int               if_retaddr;   /* return address to _interrupt */
-       int               if_unit;      /* unit number */
-       int               if_spl;       /* saved spl */
-       int               if_iretaddr;  /* _return_to_{iret,iret_i} */
-       int               if_edx;       /* old sp(iret) or saved edx(iret_i) */
-       int               if_ecx;       /* saved ecx(iret_i) */
-       int               if_eax;       /* saved eax(iret_i) */
-       int               if_eip;       /* saved eip(iret_i) */
-       int               if_cs;        /* saved cs(iret_i) */
-       int               if_efl;       /* saved efl(iret_i) */
-};
-
-extern const char *trap_type[];
-extern int     TRAP_TYPES;
-
-/* 
- * Figure out the next frame up in the call stack.  
- * For trap(), we print the address of the faulting instruction and 
- *   proceed with the calling frame.  We return the ip that faulted.
- *   If the trap was caused by jumping through a bogus pointer, then
- *   the next line in the backtrace will list some random function as 
- *   being called.  It should get the argument list correct, though.  
- *   It might be possible to dig out from the next frame up the name
- *   of the function that faulted, but that could get hairy.
- */
-void
-db_nextframe(
-       struct i386_frame       **lfp,          /* in/out */
-       struct i386_frame       **fp,           /* in/out */
-       db_addr_t               *ip,            /* out */
-       int                     frame_type,     /* in */
-       thread_t                thr_act)        /* in */
-{
-       x86_saved_state32_t     *iss32;
-       struct interrupt_frame *ifp;
-       task_t task = (thr_act != THREAD_NULL)? thr_act->task: TASK_NULL;
-
-       switch(frame_type) {
-       case TRAP:
-               /*
-                * We know that trap() has 1 argument and we know that
-                * it is an (strcut x86_saved_state32_t *).
-                */
-               iss32 = (x86_saved_state32_t *)
-                       db_get_task_value((int)&((*fp)->f_arg0),4,FALSE,task);
-
-               if (iss32->trapno >= 0 && iss32->trapno < TRAP_TYPES) {
-                       db_printf(">>>>> %s trap at ",
-                                       trap_type[iss32->trapno]);
-               } else {
-                       db_printf(">>>>> trap (number %d) at ",
-                                       iss32->trapno & 0xffff);
-               }
-               db_task_printsym(iss32->eip, DB_STGY_PROC, task);
-               db_printf(" <<<<<\n");
-               *fp = (struct i386_frame *)iss32->ebp;
-               *ip = (db_addr_t)iss32->eip;
-               break;
-
-       case INTERRUPT:
-               if (*lfp == 0) {
-                       db_printf(">>>>> interrupt <<<<<\n");
-                       goto miss_frame;
-               }
-               db_printf(">>>>> interrupt at "); 
-               ifp = (struct interrupt_frame *)(*lfp);
-               *fp = ifp->if_frame;
-               if (ifp->if_iretaddr == db_return_to_iret_symbol_value) {
-                       *ip = ((x86_saved_state32_t *)ifp->if_edx)->eip;
-               } else
-                       *ip = (db_addr_t)ifp->if_eip;
-               db_task_printsym(*ip, DB_STGY_PROC, task);
-               db_printf(" <<<<<\n");
-               break;
-
-       case SYSCALL:
-               if (thr_act != THREAD_NULL) {
-                       iss32 = (x86_saved_state32_t *)thr_act->machine.iss;
-
-                       *ip = (db_addr_t)(iss32->eip);
-                       *fp = (struct i386_frame *)(iss32->ebp);
-               }
-               break;
-
-       default:        /* falling down for unknown case */
-miss_frame:
-               *ip = (db_addr_t)
-                       db_get_task_value((int)&(*fp)->f_retaddr, 4, FALSE, task);
-               *lfp = *fp;
-               *fp = (struct i386_frame *)
-                       db_get_task_value((int)&(*fp)->f_frame, 4, FALSE, task);
-               break;
-       }
-}
-
-void
-db_stack_trace_cmd(
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       db_expr_t       count,
-       char            *modif)
-{
-       struct i386_frame *frame, *lastframe;
-        x86_saved_state32_t    *iss32;
-       int             *argp;
-       db_addr_t       callpc, lastcallpc;
-       int             frame_type;
-       boolean_t       kernel_only = TRUE;
-       boolean_t       trace_thread = FALSE;
-       boolean_t       trace_all_threads = FALSE;
-       int             thcount = 0;
-       char            *filename;
-       int             linenum;
-       task_t          task;
-       thread_t        th, top_act;
-       int             user_frame;
-       int             frame_count;
-       jmp_buf_t       *prev;
-       jmp_buf_t       db_jmp_buf;
-       queue_entry_t   act_list;
-
-       if (!db_trace_symbols_found)
-           db_find_trace_symbols();
-
-       {
-           register char *cp = modif;
-           register char c;
-
-           while ((c = *cp++) != 0) {
-               if (c == 't')
-                   trace_thread = TRUE;
-               if (c == 'T') {
-                   trace_all_threads = TRUE;
-                   trace_thread = TRUE;
-               }
-               if (c == 'u')
-                   kernel_only = FALSE;
-           }
-       }
-
-       if (trace_all_threads) {
-           if (!have_addr && !trace_thread) {
-               have_addr = TRUE;
-               trace_thread = TRUE;
-               act_list = &(current_task()->threads);
-               addr = (db_expr_t) queue_first(act_list);
-           } else if (trace_thread) {
-               if (have_addr) {
-                   if (!db_check_act_address_valid((thread_t)addr)) {
-                       if (db_lookup_task((task_t)addr) == -1)
-                           return;
-                       act_list = &(((task_t)addr)->threads);
-                       addr = (db_expr_t) queue_first(act_list);
-                   } else {
-                       act_list = &(((thread_t)addr)->task->threads);
-                       thcount = db_lookup_task_act(((thread_t)addr)->task,
-                                                       (thread_t)addr);
-                   }
-               } else {
-                   th = db_default_act;
-                   if (th == THREAD_NULL)
-                       th = current_thread();
-                   if (th == THREAD_NULL) {
-                       db_printf("no active thr_act\n");
-                       return;
-                   }
-                   have_addr = TRUE;
-                   act_list = &th->task->threads;
-                   addr = (db_expr_t) queue_first(act_list);
-               }
-           }
-       }
-
-       if (count == -1)
-           count = 65535;
-
-next_thread:
-       top_act = THREAD_NULL;
-
-       user_frame = 0;
-       frame_count = count;
-
-       if (!have_addr && !trace_thread) {
-           frame = (struct i386_frame *)ddb_regs.ebp;
-           callpc = (db_addr_t)ddb_regs.eip;
-           th = current_thread();
-           task = (th != THREAD_NULL)? th->task: TASK_NULL;
-           db_printf("thread 0x%x, current_thread() is 0x%x, ebp is 0x%x, eip is 0x%x\n", th, current_thread(), ddb_regs.ebp, ddb_regs.eip);
-       } else if (trace_thread) {
-           if (have_addr) {
-               th = (thread_t) addr;
-               if (!db_check_act_address_valid(th)) {
-                       return;
-               }
-           } else {
-               th = db_default_act;
-               if (th == THREAD_NULL)
-                  th = current_thread();
-               if (th == THREAD_NULL) {
-                  db_printf("no active thread\n");
-                  return;
-               }
-           }
-           if (trace_all_threads)
-               db_printf("---------- Thread 0x%x (#%d of %d) ----------\n",
-               addr, thcount, th->task->thread_count);
-
-       next_activation:
-           user_frame = 0;
-//         kprintf("th is %x, current_thread() is %x, ddb_regs.ebp is %x ddb_regs.eip is %x\n", th, current_thread(), ddb_regs.ebp, ddb_regs.eip);
-           task = th->task;
-           if (th == current_thread()) {
-               frame = (struct i386_frame *)ddb_regs.ebp;
-               callpc = (db_addr_t)ddb_regs.eip;
-           } else {
-               if (!th) {
-                   db_printf("thread has no shuttle\n");
-
-                   goto thread_done;
-               }
-               else if ( (th->continuation != THREAD_CONTINUE_NULL) || 
-                         th->kernel_stack == 0) {
-
-                   db_printf("Continuation ");
-                   db_task_printsym((db_expr_t)th->continuation,
-                                                       DB_STGY_PROC, task);
-                   db_printf("\n");
-
-                   iss32 = (x86_saved_state32_t *)th->machine.iss;
-
-                       frame = (struct i386_frame *) (iss32->ebp);
-                       callpc = (db_addr_t) (iss32->eip);
-
-               } else {
-                   int cpu;
-
-                   for (cpu = 0; cpu < real_ncpus; cpu++) {
-                       if (cpu_datap(cpu)->cpu_running == TRUE &&
-                           cpu_datap(cpu)->cpu_active_thread == th &&
-                           cpu_datap(cpu)->cpu_kdb_saved_state) {
-                           break;
-                       }
-                   }
-                   if (top_act != THREAD_NULL) {
-                           /*
-                            * Trying to get the backtrace of an activation
-                            * which is not the top_most one in the RPC chain:
-                            * use the activation's pcb.
-                            */
-                           iss32 = (x86_saved_state32_t *)th->machine.iss;
-
-                                   frame = (struct i386_frame *) (iss32->ebp);
-                                   callpc = (db_addr_t) (iss32->eip);
-                   } else {
-                           if (cpu == real_ncpus) {
-                           register struct x86_kernel_state *iks;
-                           int r;
-
-                           iks = STACK_IKS(th->kernel_stack);
-                           prev = db_recover;
-                           if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-                               frame = (struct i386_frame *) (iks->k_ebp);
-                               callpc = (db_addr_t) (iks->k_eip);
-                           } else {
-                               /*
-                                * The kernel stack has probably been
-                                * paged out (swapped out activation).
-                                */
-                               db_recover = prev;
-                               if (r == 2)     /* 'q' from db_more() */
-                                   db_error(0);
-                               db_printf("<kernel stack (0x%x) error "
-                                         "(probably swapped out)>\n",
-                                         iks);
-                               goto thread_done;
-                           }
-                           db_recover = prev;
-                       } else {
-                           db_printf(">>>>> active on cpu %d <<<<<\n",
-                                     cpu);
-
-                           iss32 = (x86_saved_state32_t *)cpu_datap(cpu)->cpu_kdb_saved_state;
-
-                                   frame = (struct i386_frame *) (iss32->ebp);
-                                   callpc = (db_addr_t) (iss32->eip);
-                           }
-                       }
-                   }
-               }
-       } else {
-           frame = (struct i386_frame *)addr;
-           th = (db_default_act)? db_default_act: current_thread();
-           task = (th != THREAD_NULL)? th->task: TASK_NULL;
-           callpc = (db_addr_t)db_get_task_value((int)&frame->f_retaddr,
-                                                 4, 
-                                                 FALSE, 
-                                                 (user_frame) ? task : 0);
-       }
-
-       if (!INKERNELSTACK((unsigned)frame, th)) {
-           db_printf(">>>>> user space <<<<<\n");
-           if (kernel_only)
-               goto thread_done;
-           user_frame++;
-       }
-
-       lastframe = 0;
-       lastcallpc = (db_addr_t) 0;
-       while (frame_count-- && frame != 0) {
-           int narg = DB_NUMARGS_MAX;
-           char *      name;
-           db_expr_t   offset;
-           db_addr_t call_func = 0;
-           int r;
-           db_addr_t   off;
-           
-           db_symbol_values(NULL,
-                            db_search_task_symbol_and_line(
-                                       callpc,
-                                       DB_STGY_XTRN, 
-                                       &offset,
-                                       &filename,
-                                       &linenum,
-                                       (user_frame) ? task : 0,
-                                       &narg),
-                            &name, (db_expr_t *)&call_func);
-           if ( name == NULL) {
-                   db_find_task_sym_and_offset(callpc, 
-                   &name, &off, (user_frame) ? task : 0);
-                   offset = (db_expr_t) off;
-               }
-
-           if (user_frame == 0) {
-               if (call_func && call_func == db_user_trap_symbol_value ||
-                   call_func == db_kernel_trap_symbol_value) {
-                   frame_type = TRAP;
-                   narg = 1;
-               } else if (call_func &&
-                   call_func == db_interrupt_symbol_value) {
-                   frame_type = INTERRUPT;
-                   goto next_frame;
-               } else if (call_func && call_func == db_syscall_symbol_value) {
-                   frame_type = SYSCALL;
-                   goto next_frame;
-               } else {
-                   frame_type = 0;
-                   prev = db_recover;
-                   if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-                       if (narg < 0)
-                           narg = db_numargs(frame,
-                                             (user_frame) ? task : 0);
-                       db_recover = prev;
-                   } else {
-                       db_recover = prev;
-                       goto thread_done;
-                   }
-               }
-           } else {
-               frame_type = 0;
-               prev = db_recover;
-               if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-                   if (narg < 0)
-                       narg = db_numargs(frame,
-                                         (user_frame) ? task : 0);
-                   db_recover = prev;
-               } else {
-                   db_recover = prev;
-                   goto thread_done;
-               }
-           }
-
-           if (name == 0 || offset > db_maxoff) {
-               db_printf("0x%x 0x%x(", frame, callpc);
-               offset = 0;
-           } else
-               db_printf("0x%x %s(", frame, name);
-
-           argp = &frame->f_arg0;
-           while (narg > 0) {
-               int value;
-
-               prev = db_recover;
-               if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-                   value = db_get_task_value((int)argp,
-                                             4,
-                                             FALSE,
-                                             (user_frame) ? task : 0);
-               } else {
-                   db_recover = prev;
-                   if (r == 2)         /* 'q' from db_more() */
-                       db_error(0);
-                   db_printf("... <stack error>)");
-                   if (offset)
-                       db_printf("+%x", offset);
-                   if (filename) {
-                       db_printf(" [%s", filename);
-                       if (linenum > 0)
-                           db_printf(":%d", linenum);
-                       db_printf("]");
-                   }
-                   db_printf("\n");
-                   goto thread_done;
-               }
-               db_recover = prev;
-               db_printf("%x", value);
-               argp++;
-               if (--narg != 0)
-                   db_printf(",");
-           }
-           if (narg < 0)
-               db_printf("...");
-           db_printf(")");
-           if (offset) {
-               db_printf("+%x", offset);
-            }
-           if (filename) {
-               db_printf(" [%s", filename);
-               if (linenum > 0)
-                   db_printf(":%d", linenum);
-               db_printf("]");
-           }
-           db_printf("\n");
-
-next_frame:
-           lastcallpc = callpc;
-           db_nextframe(&lastframe, &frame, &callpc, frame_type,
-                        (user_frame) ? th : THREAD_NULL);
-
-           if (frame == 0) {
-               if (th->task_threads.prev != THREAD_NULL) {
-                   if (top_act == THREAD_NULL)
-                       top_act = th;
-                   th = th->task_threads.prev;
-                   db_printf(">>>>> next activation 0x%x ($task%d.%d) <<<<<\n",
-                             th,
-                             db_lookup_task(th->task),
-                             db_lookup_task_act(th->task, th));
-                   goto next_activation;
-               }
-               /* end of chain */
-               break;
-           }
-           if (!INKERNELSTACK(lastframe, th) ||
-               !INKERNELSTACK((unsigned)frame, th))
-               user_frame++;
-           if (user_frame == 1) {
-               db_printf(">>>>> user space <<<<<\n");
-               if (kernel_only)
-                   break;
-           }
-           if (frame <= lastframe) {
-               if ((INKERNELSTACK(lastframe, th) &&
-                       !INKERNELSTACK(frame, th)))
-                   continue;
-               db_printf("Bad frame pointer: 0x%x\n", frame);
-               break;
-           }
-       }
-
-thread_done:
-       if (trace_all_threads) {
-           if (top_act != THREAD_NULL)
-               th = top_act;
-           th = (thread_t) queue_next(&th->task_threads);
-           if (! queue_end(act_list, (queue_entry_t) th)) {
-               db_printf("\n");
-               addr = (db_expr_t) th;
-               thcount++;
-               goto next_thread;
-
-           }
-       }
-}
-
-extern mach_vm_size_t kdp_machine_vm_read(mach_vm_address_t, caddr_t, mach_vm_size_t);
-extern boolean_t kdp_trans_off;
-/*
- *             Print out 256 bytes of real storage
- *             
- *             dr [entaddr]
- */
-void
-db_display_real(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-               char *modif)
-{
-       int                             i;
-       unsigned int xbuf[8];
-       unsigned read_result = 0;
-/* Print 256 bytes */
-       for(i=0; i<8; i++) {
-
-/*
- * Do a physical read using kdp_machine_vm_read(), rather than replicating the same
- * facility
- */
-               kdp_trans_off = 1;
-               read_result = kdp_machine_vm_read(addr, &xbuf[0], 32);
-               kdp_trans_off = 0;
-
-               if (read_result != 32)
-                       db_printf("Unable to read address\n");
-               else
-                       db_printf("%016llX   %08X %08X %08X %08X  %08X %08X %08X %08X\n", addr, /* Print a line */
-                           xbuf[0], xbuf[1], xbuf[2], xbuf[3], 
-                           xbuf[4], xbuf[5], xbuf[6], xbuf[7]);
-               addr = addr + 0x00000020;                                                       /* Point to next address */
-       }
-       db_next = addr;
-}
-
-/*
- *     Displays all of the kmods in the system.
- *
- *             dk
- */
-void 
-db_display_kmod(__unused db_expr_t addr, __unused boolean_t have_addr,
-               __unused db_expr_t count, __unused char *modif)
-{
-
-       kmod_info_t    *kmd;
-       unsigned int    strt, end;
-
-       kmd = kmod;             /* Start at the start */
-
-       db_printf("info      addr      start    - end       name ver\n");
-
-       while (kmd) {           /* Dump 'em all */
-               strt = (unsigned int) kmd->address + kmd->hdr_size;
-               end = (unsigned int) kmd->address + kmd->size;
-               db_printf("%08X  %08X  %08X - %08X: %s, %s\n",
-                       kmd, kmd->address, strt, end, kmd->name, kmd->version);
-               kmd = kmd->next;
-       }
-}
-
-void
-db_display_iokit(__unused db_expr_t addr, __unused boolean_t have_addr,
-               __unused db_expr_t count, __unused char *modif)
-{
-}
index c196f8b9f4eaea01af197029fdc269a4af5ec5af..c834962ef86b39b60b3cabec10bab3eeed815f8a 100644 (file)
@@ -91,9 +91,11 @@ etimer_intr(int              user_mode,
                 */
                latency = (int32_t) (abstime - MAX(mytimer->deadline,
                                                   mytimer->when_set));
-               KERNEL_DEBUG_CONSTANT(
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                        DECR_TRAP_LATENCY | DBG_FUNC_NONE,
-                       -latency, rip, user_mode, 0, 0);
+                       -latency,
+                       ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)),
+                       user_mode, 0, 0);
 
                mytimer->has_expired = TRUE;    /* Remember that we popped */
                mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
@@ -106,11 +108,11 @@ etimer_intr(int           user_mode,
 
        /* is it time for power management state change? */
        if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) {
-               KERNEL_DEBUG_CONSTANT(
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                        DECR_PM_DEADLINE | DBG_FUNC_START,
                        0, 0, 0, 0, 0);
                pmCPUDeadline(pp);
-               KERNEL_DEBUG_CONSTANT(
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                        DECR_PM_DEADLINE | DBG_FUNC_END,
                        0, 0, 0, 0, 0);
        }
@@ -180,7 +182,7 @@ etimer_resync_deadlines(void)
 
        /* Record non-PM deadline for latency tool */
        if (deadline != pmdeadline) {
-               KERNEL_DEBUG_CONSTANT(
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                        DECR_SET_DEADLINE | DBG_FUNC_NONE,
                        decr, 2,
                        deadline, (uint32_t)(deadline >> 32), 0);
@@ -227,7 +229,7 @@ mpqueue_head_t *
 timer_queue_assign(
     uint64_t        deadline)
 {
-       cpu_data_t              *cdp = current_cpu_datap();
+       cpu_data_t                      *cdp = current_cpu_datap();
        mpqueue_head_t          *queue;
 
        if (cdp->cpu_running) {
@@ -239,7 +241,7 @@ timer_queue_assign(
        else
                queue = &cpu_datap(master_cpu)->rtclock_timer.queue;
 
-    return queue;
+    return (queue);
 }
 
 void
@@ -260,7 +262,7 @@ timer_queue_cancel(
  * deadline so that it's timer queue can be moved to another processor.
  * This target processor should be the least idle (most busy) --
  * currently this is the primary processor for the calling thread's package.
- * Locking restrictions demand that the target cpu must be the boot cpu. 
+ * Locking restrictions demand that the target cpu must be the boot cpu.
  */
 uint32_t
 etimer_queue_migrate(int target_cpu)
@@ -273,7 +275,7 @@ etimer_queue_migrate(int target_cpu)
        assert(target_cpu != cdp->cpu_number);
        assert(target_cpu == master_cpu);
 
-       KERNEL_DEBUG_CONSTANT(
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                DECR_TIMER_MIGRATE | DBG_FUNC_START,
                target_cpu,
                cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32),
@@ -297,7 +299,7 @@ etimer_queue_migrate(int target_cpu)
                setPop(EndOfAllTime);
        }
  
-       KERNEL_DEBUG_CONSTANT(
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                DECR_TIMER_MIGRATE | DBG_FUNC_END,
                target_cpu, ntimers_moved, 0, 0, 0);
 
index 478eb2b4e257ffb03b8688b9b08b274cf2193a9f..84f860b5ba988adf5794eac10602c9608df91e84 100644 (file)
@@ -415,13 +415,11 @@ fpu_module_init(void)
                          64 * fp_register_state_size,
                          "x86 fpsave state");
 
-#if    ZONE_DEBUG
        /* To maintain the required alignment, disable
         * zone debugging for this zone as that appends
         * 16 bytes to each element.
         */
-       zone_debug_disable(ifps_zone);
-#endif 
+       zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE);
        /* Determine MXCSR reserved bits and configure initial FPU state*/
        configure_mxcsr_capability_mask(&initial_fp_state);
 }
@@ -1014,7 +1012,7 @@ fp_setvalid(boolean_t value) {
        }
 }
 
-__private_extern__ boolean_t
+boolean_t
 ml_fpu_avx_enabled(void) {
        return (fpu_YMM_present == TRUE);
 }
index 7677f24887ce90f691fd77dead3eb7df259ac742..ae40e4f014e5a72a1f2a77531c7844a6683ecee9 100644 (file)
  */
 #include <i386/seg.h>
 
-struct real_descriptor master_gdt[GDTSZ] __attribute__ ((section("__INITGDT,__data")))= {
+struct real_descriptor master_gdt[GDTSZ]
+#if __x86_64__
+       __attribute__((section("__HIB,__desc")))
+#else
+       __attribute__((section("__INITGDT,__DATA")))
+#endif
+       __attribute__((aligned(CPU_CACHE_SIZE))) = {
        [SEL_TO_INDEX(KERNEL32_CS)] = MAKE_REAL_DESCRIPTOR(     /* kernel 32-bit code */ 
                0,
                0xfffff,
index bb77d38a288660e9a3966909264ed3e829fb5d48..0c7f1f59562ad026e63b52bf9aee9153727a4102 100644 (file)
@@ -55,9 +55,7 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 #include <mach_ldebug.h>
-#include <stat_time.h>
 
 /*
  * Pass field offsets to assembly code.
@@ -369,10 +367,6 @@ main(
        DECLARE("INTEL_PTE_INVALID",    INTEL_PTE_INVALID);
        DECLARE("NPGPTD", NPGPTD);
 #if defined(__x86_64__)
-       DECLARE("INITPT_SEG_BASE",INITPT_SEG_BASE);
-       DECLARE("INITGDT_SEG_BASE",INITGDT_SEG_BASE);
-       DECLARE("SLEEP_SEG_BASE",SLEEP_SEG_BASE);
-       DECLARE("PROT_MODE_GDT_SIZE",PROT_MODE_GDT_SIZE);
        DECLARE("KERNEL_PML4_INDEX",KERNEL_PML4_INDEX);
 #endif
        DECLARE("IDTSZ",        IDTSZ);
@@ -390,9 +384,6 @@ main(
 #ifdef __i386__
        DECLARE("DF_TSS",       DF_TSS);
        DECLARE("MC_TSS",       MC_TSS);
-#if    MACH_KDB
-       DECLARE("DEBUG_TSS",    DEBUG_TSS);
-#endif /* MACH_KDB */
        DECLARE("CPU_DATA_GS",  CPU_DATA_GS);
 #endif /* __i386__ */
        DECLARE("SYSENTER_CS",  SYSENTER_CS);
@@ -515,17 +506,11 @@ main(
                offsetof(cpu_data_t *, cpu_tlb_invalid_global));
 #endif /* x86_64 */
        DECLARE("enaExpTrace",  enaExpTrace);
-       DECLARE("enaExpTraceb", enaExpTraceb);
        DECLARE("enaUsrFCall",  enaUsrFCall);
-       DECLARE("enaUsrFCallb", enaUsrFCallb);
        DECLARE("enaUsrPhyMp",  enaUsrPhyMp);
-       DECLARE("enaUsrPhyMpb", enaUsrPhyMpb);
        DECLARE("enaDiagSCs",   enaDiagSCs);
-       DECLARE("enaDiagSCsb",  enaDiagSCsb);
        DECLARE("enaDiagEM",    enaDiagEM);
-       DECLARE("enaDiagEMb",   enaDiagEMb);
        DECLARE("enaNotifyEM",  enaNotifyEM);
-       DECLARE("enaNotifyEMb", enaNotifyEMb);
        DECLARE("dgLock",               offsetof(struct diagWork *, dgLock));
        DECLARE("dgFlags",              offsetof(struct diagWork *, dgFlags));
        DECLARE("dgMisc1",              offsetof(struct diagWork *, dgMisc1));
@@ -592,13 +577,11 @@ main(
        DECLARE("TIMER_HIGH",           offsetof(struct timer *, high_bits));
        DECLARE("TIMER_HIGHCHK",        offsetof(struct timer *, high_bits_check));     
 #endif
-#if !STAT_TIME
        DECLARE("TIMER_TSTAMP",
                offsetof(struct timer *, tstamp));
 
        DECLARE("THREAD_TIMER",
                offsetof(struct processor *, processor_data.thread_timer));
-#endif
        DECLARE("KERNEL_TIMER",
                offsetof(struct processor *, processor_data.kernel_timer));
        DECLARE("SYSTEM_TIMER",
index 47a5b9c7a791267d886a00e217726e1a5622a058..f04a56c4afa57e940763b867014ff75465160c69 100644 (file)
@@ -31,7 +31,7 @@
 
 #include <i386/pal_hibernate.h>
 
-extern pd_entry_t BootstrapPTD[2048];
+extern pd_entry_t BootPTD[2048];
 
 // src is virtually mapped, not page aligned, 
 // dst is a physical 4k page aligned ptr, len is one 4K page
@@ -82,9 +82,9 @@ pal_hib_map(uintptr_t virt, uint64_t phys)
     index = (virt >> I386_LPGSHIFT);
     virt += (uintptr_t)(phys & I386_LPGMASK);
     phys  = ((phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS  | INTEL_PTE_VALID | INTEL_PTE_WRITE);
-    BootstrapPTD[index] = phys;
+    BootPTD[index] = phys;
     invlpg(virt);
-    BootstrapPTD[index + 1] = (phys + I386_LPGBYTES);
+    BootPTD[index + 1] = (phys + I386_LPGBYTES);
     invlpg(virt + I386_LPGBYTES);
 
     return (virt);
index 994ba06b5489bb1a5e13e824c151a4ac915cb745..f8fd9832d37caad91825283a26378f9c5ea7cc15 100644 (file)
 #include <i386/cpu_topology.h>
 #include <i386/cpu_threads.h>
 #include <pexpert/device_tree.h>
-#if    MACH_KDB
-#include <machine/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
-#endif /* MACH_KDB */
 
 /* Decimal powers: */
 #define kilo (1000ULL)
@@ -296,10 +286,6 @@ hpet_init(void)
        hpet2bus = tmrCvt(hpetCvtt2n, busFCvtn2t);
        DBG(" CVT: HPET to BUS = %08X.%08X\n",
            (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus);
-
-#if MACH_KDB
-       db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */
-#endif
 }
 
 /*
@@ -484,64 +470,3 @@ rdHPET(void)
 
        return (((uint64_t) high) << 32) | low;
 }
-
-#if MACH_KDB
-
-#define HI32(x)        ((uint32_t)(((x) >> 32) & 0xFFFFFFFF))
-#define LO32(x)        ((uint32_t)((x) & 0xFFFFFFFF))
-
-/*
- *     Displays HPET memory mapped area
- *     hp
- */
-void 
-db_hpet(__unused db_expr_t addr, __unused int have_addr, __unused db_expr_t count, __unused char *modif)
-{
-
-       db_display_hpet((hpetReg_t *) hpetArea);        /* Dump out the HPET
-                                                        * stuff */
-       return;
-}
-
-void
-db_display_hpet(hpetReg_t *hpt)
-{
-       uint64_t        cmain;
-
-       cmain = hpt->MAIN_CNT;  /* Get the main timer */
-
-       /* General capabilities */
-       db_printf("  GCAP_ID = %08X.%08X\n",
-                 HI32(hpt->GCAP_ID), LO32(hpt->GCAP_ID));
-       /* General configuration */
-       db_printf(" GEN_CONF = %08X.%08X\n",
-                 HI32(hpt->GEN_CONF), LO32(hpt->GEN_CONF));
-       /* General Interrupt status */
-       db_printf("GINTR_STA = %08X.%08X\n",
-                 HI32(hpt->GINTR_STA), LO32(hpt->GINTR_STA));
-       /* Main counter */
-       db_printf(" MAIN_CNT = %08X.%08X\n",
-                 HI32(cmain), LO32(cmain));
-       /* Timer 0 config and cap */
-       db_printf("TIM0_CONF = %08X.%08X\n",
-                 HI32(hpt->TIM0_CONF), LO32(hpt->TIM0_CONF));
-       /* Timer 0 comparator */
-       db_printf("TIM0_COMP = %08X.%08X\n",
-                 HI32(hpt->TIM0_COMP), LO32(hpt->TIM0_COMP));
-       /* Timer 1 config and cap */
-       db_printf("TIM0_CONF = %08X.%08X\n",
-                 HI32(hpt->TIM1_CONF), LO32(hpt->TIM1_CONF));
-       /* Timer 1 comparator */
-       db_printf("TIM1_COMP = %08X.%08X\n",
-                 HI32(hpt->TIM1_COMP), LO32(hpt->TIM1_COMP));
-       /* Timer 2 config and cap */
-       db_printf("TIM2_CONF = %08X.%08X\n",
-                 HI32(hpt->TIM2_CONF), LO32(hpt->TIM2_CONF));
-       /* Timer 2 comparator */
-       db_printf("TIM2_COMP = %08X.%08X\n",
-                 HI32(hpt->TIM2_COMP), LO32(hpt->TIM2_COMP));
-
-       db_printf("\nHPET Frequency = %d.%05dMHz\n",
-         (uint32_t) (hpetFreq / 1000000), (uint32_t) (hpetFreq % 1000000));
-}
-#endif
index 560a88ffc65d907e25b0715d435caa1075c1021a..39102c926d377a5ca424f03bc8f4707f82dab733 100644 (file)
@@ -55,7 +55,6 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/i386/vm_param.h>
 
 #include <i386/locks.h> /* LcksOpts */
 #ifdef __i386__
 #include <i386/cpu_capabilities.h>
-#if    MACH_KDB
-#include <machine/db_machdep.h>
-#endif
 #endif
 #if DEBUG
 #include <machine/pal_routines.h>
 #else
 #define DBG(x...)
 #endif
-#if    MACH_KDB
-#include <ddb/db_aout.h>
-#endif /* MACH_KDB */
 
 int                    debug_task;
 
@@ -128,14 +121,15 @@ extern const char version[];
 extern const char      version_variant[];
 extern int             nx_enabled;
 
-#ifdef __x86_64__
-extern void            *low_eintstack;
-#endif
+uint64_t               physmap_base, physmap_max;
 
-void                   *KPTphys;
+pd_entry_t             *KPTphys;
 pd_entry_t             *IdlePTD;
 #ifdef __i386__
 pd_entry_t             *IdlePDPT64;
+#else
+pdpt_entry_t           *IdlePDPT;
+pml4_entry_t           *IdlePML4;
 #endif
 
 char *physfree;
@@ -200,8 +194,11 @@ x86_64_post_sleep(uint64_t new_cr3)
 // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account
 // the PCI hole (which is less 4GB but not more).
 
-// Compile-time guard:
-extern int maxphymapsupported[NPHYSMAP <= PTE_PER_PAGE ? 1 : -1];
+/* Compile-time guard: NPHYSMAP is capped to 256GiB, accounting for
+ * randomisation
+ */
+extern int maxphymapsupported[NPHYSMAP <= (PTE_PER_PAGE/2) ? 1 : -1];
+
 static void
 physmap_init(void)
 {
@@ -210,31 +207,71 @@ physmap_init(void)
                pt_entry_t entries[PTE_PER_PAGE];
        } * physmapL2 = ALLOCPAGES(NPHYSMAP);
 
-       uintptr_t i;
-       for(i=0;i<NPHYSMAP;i++) {
-               physmapL3[i] = ((uintptr_t)ID_MAP_VTOP(&physmapL2[i]))
+       uint64_t i;
+       uint8_t phys_random_L3 = ml_early_random() & 0xFF;
+
+       /* We assume NX support. Mark all levels of the PHYSMAP NX
+        * to avoid granting executability via a single bit flip.
+        */
+       assert(cpuid_extfeatures() & CPUID_EXTFEATURE_XD);
+
+       for(i = 0; i < NPHYSMAP; i++) {
+               physmapL3[i + phys_random_L3] =
+                               ((uintptr_t)ID_MAP_VTOP(&physmapL2[i]))
                                | INTEL_PTE_VALID
+                               | INTEL_PTE_NX
                                | INTEL_PTE_WRITE;
-               uintptr_t j;
-               for(j=0;j<PTE_PER_PAGE;j++) {
-                       physmapL2[i].entries[j] = (((i*PTE_PER_PAGE+j)<<PDSHIFT)
+
+               uint64_t j;
+               for(j = 0; j < PTE_PER_PAGE; j++) {
+                       physmapL2[i].entries[j] =
+                           ((i * PTE_PER_PAGE + j) << PDSHIFT)
                                                        | INTEL_PTE_PS
                                                        | INTEL_PTE_VALID
-                                                       | INTEL_PTE_WRITE);
+                                                       | INTEL_PTE_NX
+                                                       | INTEL_PTE_WRITE;
                }
        }
 
-       IdlePML4[KERNEL_PHYSMAP_INDEX] = ((uintptr_t)ID_MAP_VTOP(physmapL3))
-                                               | INTEL_PTE_VALID
-                                               | INTEL_PTE_WRITE;
-       if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
-               IdlePML4[KERNEL_PHYSMAP_INDEX] |= INTEL_PTE_NX;
-       }
+       IdlePML4[KERNEL_PHYSMAP_PML4_INDEX] =
+                                       ((uintptr_t)ID_MAP_VTOP(physmapL3))
+                                       | INTEL_PTE_VALID
+                                       | INTEL_PTE_NX
+                                       | INTEL_PTE_WRITE;
+
+       physmap_base = KVADDR(KERNEL_PHYSMAP_PML4_INDEX, phys_random_L3, 0, 0);
+       physmap_max = physmap_base + NPHYSMAP * GB;
+       DBG("Physical address map base: 0x%qx\n", physmap_base);
+       DBG("Physical map idlepml4[%d]: 0x%llx\n",
+               KERNEL_PHYSMAP_PML4_INDEX, IdlePML4[KERNEL_PHYSMAP_PML4_INDEX]);
+}
 
-       DBG("physical map idlepml4[%d]: 0x%llx\n",
-               KERNEL_PHYSMAP_INDEX, IdlePML4[KERNEL_PHYSMAP_INDEX]);
+static void
+descriptor_alias_init()
+{
+       vm_offset_t     master_gdt_phys;
+       vm_offset_t     master_gdt_alias_phys;
+       vm_offset_t     master_idt_phys;
+       vm_offset_t     master_idt_alias_phys;
+
+       assert(((vm_offset_t)master_gdt & PAGE_MASK) == 0);
+       assert(((vm_offset_t)master_idt64 & PAGE_MASK) == 0);
+
+       master_gdt_phys       = (vm_offset_t) ID_MAP_VTOP(master_gdt);
+       master_idt_phys       = (vm_offset_t) ID_MAP_VTOP(master_idt64);
+       master_gdt_alias_phys = (vm_offset_t) ID_MAP_VTOP(MASTER_GDT_ALIAS);
+       master_idt_alias_phys = (vm_offset_t) ID_MAP_VTOP(MASTER_IDT_ALIAS);
+       
+       DBG("master_gdt_phys:       %p\n", (void *) master_gdt_phys);
+       DBG("master_idt_phys:       %p\n", (void *) master_idt_phys);
+       DBG("master_gdt_alias_phys: %p\n", (void *) master_gdt_alias_phys);
+       DBG("master_idt_alias_phys: %p\n", (void *) master_idt_alias_phys);
+
+       KPTphys[atop_kernel(master_gdt_alias_phys)] = master_gdt_phys |
+               INTEL_PTE_VALID | INTEL_PTE_NX | INTEL_PTE_WRITE;
+       KPTphys[atop_kernel(master_idt_alias_phys)] = master_idt_phys |
+               INTEL_PTE_VALID | INTEL_PTE_NX; /* read-only */
 }
-#endif
 
 static void
 Idle_PTs_init(void)
@@ -242,37 +279,75 @@ Idle_PTs_init(void)
        /* Allocate the "idle" kernel page tables: */
        KPTphys  = ALLOCPAGES(NKPT);            /* level 1 */
        IdlePTD  = ALLOCPAGES(NPGPTD);          /* level 2 */
+       IdlePDPT = ALLOCPAGES(1);               /* level 3 */
+       IdlePML4 = ALLOCPAGES(1);               /* level 4 */
+
+       // Fill the lowest level with everything up to physfree
+       fillkpt(KPTphys,
+               INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT));
+
+       /* IdlePTD */
+       fillkpt(IdlePTD,
+               INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT);
+
+       // IdlePDPT entries
+       fillkpt(IdlePDPT,
+               INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD);
+
+       // IdlePML4 single entry for kernel space.
+       fillkpt(IdlePML4 + KERNEL_PML4_INDEX,
+               INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1);
+       
+       postcode(VSTART_PHYSMAP_INIT);
 
-#ifdef __x86_64__
        physmap_init();
-#else
+
+       postcode(VSTART_DESC_ALIAS_INIT);
+
+       descriptor_alias_init();
+
+       postcode(VSTART_SET_CR3);
+
+       // Switch to the page tables..
+       set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
+
+}
+
+#else /* __x86_64__ */
+
+static void
+Idle_PTs_init(void)
+{
+       /* Allocate the "idle" kernel page tables: */
+       KPTphys  = ALLOCPAGES(NKPT);            /* level 1 */
+       IdlePTD  = ALLOCPAGES(NPGPTD);          /* level 2 */
+
        IdlePDPT64 = ALLOCPAGES(1);
 
        // Recursive mapping of PTEs
        fillkpt(IdlePTD, INTEL_PTE_WRITE, (uintptr_t)IdlePTD, PTDPTDI, NPGPTD);
        // commpage
        fillkpt(IdlePTD, INTEL_PTE_WRITE|INTEL_PTE_USER, (uintptr_t)ALLOCPAGES(1), _COMM_PAGE32_BASE_ADDRESS >> PDESHIFT,1);
-#endif
+
        // Fill the lowest level with everything up to physfree
        fillkpt(KPTphys,
-                       INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT));
+               INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT));
 
        // Rewrite the 2nd-lowest level  to point to pages of KPTphys.
        // This was previously filled statically by idle_pt.c, and thus
        // must be done after the KPTphys fill since IdlePTD is in use
        fillkpt(IdlePTD,
-                       INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT);
+               INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT);
 
        // IdlePDPT entries
-#ifdef __i386__
        fillkpt(IdlePDPT, 0, (uintptr_t)IdlePTD, 0, NPGPTD);
-#else
-       fillkpt(IdlePDPT, INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD);
-#endif
+
+       postcode(VSTART_SET_CR3);
 
        // Flush the TLB now we're done rewriting the page tables..
        set_cr3_raw(get_cr3_raw());
 }
+#endif
 
 /*
  * vstart() is called in the natural mode (64bit for K64, 32 for K32)
@@ -294,7 +369,7 @@ vstart(vm_offset_t boot_args_start)
 {
        boolean_t       is_boot_cpu = !(boot_args_start == 0);
        int             cpu;
-       uint32_t lphysfree;
+       uint32_t        lphysfree;
 
        postcode(VSTART_ENTRY);
 
@@ -320,14 +395,8 @@ vstart(vm_offset_t boot_args_start)
                        kernelBootArgs, 
                        &kernelBootArgs->ksize,
                        &kernelBootArgs->kaddr);
-#ifdef __x86_64__
-               /* enable NX/XD, boot processor */
-               if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
-                       wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE);
-                       DBG("vstart() NX/XD enabled\n");
-               }
-#endif
-               postcode(PSTART_PAGE_TABLES);
+
+               postcode(VSTART_IDLE_PTS_INIT);
 
                Idle_PTs_init();
 
@@ -348,17 +417,16 @@ vstart(vm_offset_t boot_args_start)
                PE_init_platform(FALSE, kernelBootArgs);
                postcode(PE_INIT_PLATFORM_D);
        } else {
+#ifdef __x86_64__
+               /* Switch to kernel's page tables (from the Boot PTs) */
+               set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
+#endif
                /* Find our logical cpu number */
                cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK];
                DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE));
-#ifdef __x86_64__
-               if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
-                       wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE);
-                       DBG("vstart() NX/XD enabled, non-boot\n");
-               }
-#endif
        }
 
+       postcode(VSTART_CPU_DESC_INIT);
 #ifdef __x86_64__
        if(is_boot_cpu)
                cpu_desc_init64(cpu_datap(cpu));
@@ -368,16 +436,12 @@ vstart(vm_offset_t boot_args_start)
                cpu_desc_init(cpu_datap(cpu));
        cpu_desc_load(cpu_datap(cpu));
 #endif
+       postcode(VSTART_CPU_MODE_INIT);
        if (is_boot_cpu)
                cpu_mode_init(current_cpu_datap()); /* cpu_mode_init() will be
                                                     * invoked on the APs
                                                     * via i386_init_slave()
                                                     */
-#ifdef __x86_64__
-       /* Done with identity mapping */
-       IdlePML4[0] = 0;
-#endif
-
        postcode(VSTART_EXIT);
 #ifdef __i386__
        if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
@@ -391,26 +455,9 @@ vstart(vm_offset_t boot_args_start)
                i386_init_slave();
        /*NOTREACHED*/
 #else
-       /* We need to switch to a new per-cpu stack, but we must do this atomically with
-        * the call to ensure the compiler doesn't assume anything about the stack before
-        * e.g. tail-call optimisations
-        */
-       if (is_boot_cpu)
-       {
-               asm volatile(
-                               "mov %1, %%rdi;"
-                               "mov %0, %%rsp;"
-                               "call _i386_init;"      : : "r" 
-                               (cpu_datap(cpu)->cpu_int_stack_top), "r" (boot_args_start));
-       }
-       else
-       {
-               asm volatile(
-                               "mov %0, %%rsp;"
-                               "call _i386_init_slave;"        : : "r" 
-                               (cpu_datap(cpu)->cpu_int_stack_top));
-       }
-       /*NOTREACHED*/
+       x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init
+                                    : (uintptr_t) i386_init_slave,
+                        cpu_datap(cpu)->cpu_int_stack_top);
 #endif
 }
 
@@ -555,6 +602,7 @@ do_init_slave(boolean_t fast_restart)
                assert(!ml_get_interrupts_enabled());
   
                cpu_mode_init(current_cpu_datap());
+               pmap_cpu_init();
   
 #if CONFIG_MCA
                mca_cpu_init();
@@ -587,14 +635,6 @@ do_init_slave(boolean_t fast_restart)
 
        cpu_thread_init();      /* not strictly necessary */
 
-#ifdef __x86_64__
-       /* Re-zero the identity-map for the idle PT's. This MUST be done before 
-        * cpu_running is set so that other slaves can set up their own
-        * identity-map */
-       if (!fast_restart)
-           IdlePML4[0] = 0;
-#endif
-
        cpu_init();     /* Sets cpu_running which starter cpu waits for */ 
 
        slave_main(init_param);
index 0f7bdba3a57ad65b81a968764df74180b58904ff..6b34073c68bc7d3b9b733bf813d26c0d206d3606 100644 (file)
@@ -54,8 +54,7 @@
  *     When performance isn't the only concern, it's
  *     nice to build stack frames...
  */
-#define        BUILD_STACK_FRAMES   (GPROF || \
-                               ((MACH_LDEBUG) && MACH_KDB))
+#define        BUILD_STACK_FRAMES   (GPROF)
 
 #if    BUILD_STACK_FRAMES
 
@@ -360,7 +359,7 @@ LEAF_ENTRY(hw_lock_init)
 
 
 /*
- *     void hw_lock_byte_init(uint8_t *)
+ *     void hw_lock_byte_init(volatile uint8_t *)
  *
  *     Initialize a hardware byte lock.
  */
@@ -454,7 +453,6 @@ LEAF_ENTRY(hw_lock_to)
 
        lfence
        rdtsc                           /* read cyclecount into %edx:%eax */
-       lfence
        addl    %ecx,%eax               /* fetch and timeout */
        adcl    $0,%edx                 /* add carry */
        mov     %edx,%ecx
@@ -464,7 +462,6 @@ LEAF_ENTRY(hw_lock_to)
        push    %r9
        lfence
        rdtsc                           /* read cyclecount into %edx:%eax */
-       lfence
        shlq    $32, %rdx
        orq     %rdx, %rax              /* load 64-bit quantity into %rax */
        addq    %rax, %rsi              /* %rsi is the timeout expiry */
@@ -498,7 +495,6 @@ LEAF_ENTRY(hw_lock_to)
        mov     %edx,%edi               /* Save %edx */
        lfence
        rdtsc                           /* cyclecount into %edx:%eax */
-       lfence
        xchg    %edx,%edi               /* cyclecount into %edi:%eax */
        cmpl    %ecx,%edi               /* compare high-order 32-bits */
        jb      4b                      /* continue spinning if less, or */
@@ -510,7 +506,6 @@ LEAF_ENTRY(hw_lock_to)
 #else
        lfence
        rdtsc                           /* cyclecount into %edx:%eax */
-       lfence
        shlq    $32, %rdx
        orq     %rdx, %rax              /* load 64-bit quantity into %rax */
        cmpq    %rsi, %rax              /* compare to timeout */
@@ -708,7 +703,7 @@ Entry(lck_rw_try_lock_shared)
        LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point)
        ret
     /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+    LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
 #endif
        movl    $1, %eax                        /* return TRUE */
        ret
@@ -784,7 +779,7 @@ Entry(lck_rw_lock_exclusive)
        LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point)
        ret
     /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
 #endif
        ret
 2:
@@ -828,7 +823,7 @@ Entry(lck_rw_try_lock_exclusive)
        LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point)
        ret
     /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+    LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER)
 #endif
        movl    $1, %eax                        /* return TRUE */
        ret
@@ -889,7 +884,7 @@ Entry(lck_rw_lock_shared_to_exclusive)
        LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point)
        ret
     /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, LCK_RW_REGISTER)
 #endif
        movl    $1, %eax                        /* return success */
        ret
@@ -1419,14 +1414,14 @@ mutex_interlock_destroyed_str:
  * lck_mtx_convert_spin()
  */
 NONLEAF_ENTRY(lck_mtx_lock_spin_always)
-       LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
-       jmp     Llmls_avoid_check
-
+       LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
+       jmp     Llmls_avoid_check
+       
 NONLEAF_ENTRY(lck_mtx_lock_spin)
        LOAD_LMTX_REG(B_ARG0)           /* fetch lock pointer */
 
        CHECK_PREEMPTION_LEVEL()
-Llmls_avoid_check:     
+Llmls_avoid_check:
        mov     M_STATE(LMTX_REG), LMTX_C_REG32
        test    $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32  /* is the interlock or mutex held */
        jnz     Llmls_slow
@@ -2265,27 +2260,42 @@ LEAF_ENTRY(bit_unlock)
  * Atomic primitives, prototyped in kern/simple_lock.h
  */
 LEAF_ENTRY(hw_atomic_add)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
        movl    %esi, %eax              /* Load addend */
-       lock
-       xaddl   %eax, (%rdi)            /* Atomic exchange and add */
+       lock    xaddl %eax, (%rdi)              /* Atomic exchange and add */
        addl    %esi, %eax              /* Calculate result */
        LEAF_RET
 
 LEAF_ENTRY(hw_atomic_sub)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
        negl    %esi
        movl    %esi, %eax
-       lock
-       xaddl   %eax, (%rdi)            /* Atomic exchange and add */
+       lock    xaddl %eax, (%rdi)              /* Atomic exchange and add */
        addl    %esi, %eax              /* Calculate result */
        LEAF_RET
 
 LEAF_ENTRY(hw_atomic_or)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
        movl    (%rdi), %eax
 1:
        movl    %esi, %edx              /* Load mask */
        orl     %eax, %edx
-       lock
-       cmpxchgl        %edx, (%rdi)    /* Atomic CAS */
+       lock    cmpxchgl %edx, (%rdi)   /* Atomic CAS */
        jne     1b
        movl    %edx, %eax              /* Result */
        LEAF_RET
@@ -2295,18 +2305,29 @@ LEAF_ENTRY(hw_atomic_or)
  */
 
 LEAF_ENTRY(hw_atomic_or_noret)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
        lock
        orl     %esi, (%rdi)            /* Atomic OR */
        LEAF_RET
 
 
 LEAF_ENTRY(hw_atomic_and)
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
        movl    (%rdi), %eax
 1:
        movl    %esi, %edx              /* Load mask */
        andl    %eax, %edx
-       lock
-       cmpxchgl        %edx, (%rdi)    /* Atomic CAS */
+       lock    cmpxchgl %edx, (%rdi)   /* Atomic CAS */
        jne     1b
        movl    %edx, %eax              /* Result */
        LEAF_RET
@@ -2316,8 +2337,13 @@ LEAF_ENTRY(hw_atomic_and)
  */
 
 LEAF_ENTRY(hw_atomic_and_noret)
-       lock
-       andl    %esi, (%rdi)            /* Atomic OR */
+#if    MACH_LDEBUG
+       test    $3, %rdi
+       jz      1f
+       ud2
+1:
+#endif 
+       lock    andl    %esi, (%rdi)            /* Atomic OR */
        LEAF_RET
 
 #endif /* !__i386 __ */
index 1e571cd59cbcfc414091eca166a9536822c7abc6..97fa060129d80fa8e735222fd6ed1b13356b0570 100644 (file)
@@ -32,7 +32,9 @@
 
 #ifdef __APPLE_API_PRIVATE
 
-/* The kernel is linked at VM_MIN_KERNEL_ADDRESS + 0x100000 */
+/*
+ * The kernel better be statically linked at VM_MIN_KERNEL_ADDRESS + 0x100000
+ */
 #define I386_KERNEL_IMAGE_BASE_PAGE    0x100
 
 #if defined(__i386__)
index 866dfa1fb1ace6562e2f4af626756c4199a74145..9a9735c5c5b0a4ae8f95189286285f131da50b8e 100644 (file)
@@ -55,7 +55,6 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/i386/vm_param.h>
 
 #include <mach/thread_status.h>
 #include <pexpert/i386/efi.h>
 #include <i386/i386_lowmem.h>
+#ifdef __x86_64__
+#include <x86_64/lowglobals.h>
+#else
 #include <i386/lowglobals.h>
+#endif
 #include <i386/pal_routines.h>
 
 #include <mach-o/loader.h>
 #include <libkern/kernel_mach_header.h>
 
+
 vm_size_t      mem_size = 0; 
 pmap_paddr_t   first_avail = 0;/* first after page tables */
 
 uint64_t       max_mem;        /* Size of physical memory (bytes), adjusted by maxmem */
 uint64_t        mem_actual;
-uint64_t       sane_size = 0;  /* Memory size to use for defaults calculations */
+uint64_t       sane_size = 0;  /* Memory size for defaults calculations */
+
+/*
+ * KASLR parameters
+ */
+ppnum_t                vm_kernel_base_page;
+vm_offset_t    vm_kernel_base;
+vm_offset_t    vm_kernel_top;
+vm_offset_t    vm_kernel_stext;
+vm_offset_t    vm_kernel_etext;
+vm_offset_t    vm_kernel_slide;
 
 #define MAXLORESERVE   (32 * 1024 * 1024)
 
@@ -112,21 +126,23 @@ vm_offset_t       virtual_avail, virtual_end;
 static pmap_paddr_t    avail_remaining;
 vm_offset_t     static_memory_end = 0;
 
-vm_offset_t    sHIB, eHIB, stext, etext, sdata, edata, end;
+vm_offset_t    sHIB, eHIB, stext, etext, sdata, edata, sconstdata, econstdata, end;
 
 /*
  * _mh_execute_header is the mach_header for the currently executing kernel
  */
-void *sectTEXTB; unsigned long sectSizeTEXT;
-void *sectDATAB; unsigned long sectSizeDATA;
-void *sectOBJCB; unsigned long sectSizeOBJC;
-void *sectLINKB; unsigned long sectSizeLINK;
-void *sectPRELINKB; unsigned long sectSizePRELINK;
-void *sectHIBB; unsigned long sectSizeHIB;
-void *sectINITPTB; unsigned long sectSizeINITPT;
+vm_offset_t segTEXTB; unsigned long segSizeTEXT;
+vm_offset_t segDATAB; unsigned long segSizeDATA;
+vm_offset_t segLINKB; unsigned long segSizeLINK;
+vm_offset_t segPRELINKB; unsigned long segSizePRELINK;
+vm_offset_t segHIBB; unsigned long segSizeHIB;
+vm_offset_t sectCONSTB; unsigned long sectSizeConst;
 
-kernel_segment_command_t *segTEXT;
-kernel_section_t *cursectTEXT, *lastsectTEXT;
+boolean_t doconstro_override = FALSE;
+
+static kernel_segment_command_t *segTEXT, *segDATA;
+static kernel_section_t *cursectTEXT, *lastsectTEXT;
+static kernel_section_t *sectDCONST;
 
 extern uint64_t firmware_Conventional_bytes;
 extern uint64_t firmware_RuntimeServices_bytes;
@@ -138,8 +154,19 @@ extern uint64_t firmware_Unusable_bytes;
 extern uint64_t firmware_other_bytes;
 uint64_t firmware_MMIO_bytes;
 
+/*
+ * Linker magic to establish the highest address in the kernel.
+ * This is replicated from libsa which marks last_kernel_symbol
+ * but that's not visible from here in osfmk.
+ */
+__asm__(".zerofill __LAST, __last, _kernel_top, 0");
+extern void    *kernel_top;
+
 #if    DEBUG
 #define        PRINT_PMAP_MEMORY_TABLE
+#define DBG(x...)       kprintf(x)
+#else
+#define DBG(x...)
 #endif /* DEBUG */
 /*
  * Basic VM initialization.
@@ -164,64 +191,124 @@ i386_vm_init(uint64_t    maxmem,
        uint32_t  mbuf_reserve = 0;
        boolean_t mbuf_override = FALSE;
        boolean_t coalescing_permitted;
-#if DEBUG
-       kprintf("Boot args revision: %d version: %d",
-               args->Revision, args->Version);
-       kprintf("  commandline: \"");
-       for(i=0; i<BOOT_LINE_LENGTH; i++)
-               kprintf("%c", args->CommandLine[i]);
-       kprintf("\"\n");
-#endif
+       vm_kernel_base_page = i386_btop(args->kaddr);
+#ifdef __x86_64__
+       vm_offset_t base_address;
+       vm_offset_t static_base_address;
+
+       /*
+        * Establish the KASLR parameters.
+        */
+       static_base_address = ml_static_ptovirt(KERNEL_BASE_OFFSET);
+       base_address        = ml_static_ptovirt(args->kaddr);
+       vm_kernel_slide     = base_address - static_base_address;
+       if (args->kslide) {
+               kprintf("KASLR slide: 0x%016lx dynamic\n", vm_kernel_slide);
+               if (vm_kernel_slide != ((vm_offset_t)args->kslide))
+                       panic("Kernel base inconsistent with slide - rebased?");
+       } else {
+               /* No slide relative to on-disk symbols */
+               kprintf("KASLR slide: 0x%016lx static and ignored\n",
+                       vm_kernel_slide);
+               vm_kernel_slide = 0;
+       }
 
+       /*
+        * Zero out local relocations to avoid confusing kxld.
+        * TODO: might be better to move this code to OSKext::initialize
+        */
+       if (_mh_execute_header.flags & MH_PIE) {
+               struct load_command *loadcmd;
+               uint32_t cmd;
+
+               loadcmd = (struct load_command *)((uintptr_t)&_mh_execute_header +
+                                                 sizeof (_mh_execute_header));
+
+               for (cmd = 0; cmd < _mh_execute_header.ncmds; cmd++) {
+                       if (loadcmd->cmd == LC_DYSYMTAB) {
+                               struct dysymtab_command *dysymtab;
+
+                               dysymtab = (struct dysymtab_command *)loadcmd;
+                               dysymtab->nlocrel = 0;
+                               dysymtab->locreloff = 0;
+                               kprintf("Hiding local relocations\n");
+                               break;
+                       }
+                       loadcmd = (struct load_command *)((uintptr_t)loadcmd + loadcmd->cmdsize);
+               }
+       }
+
+#endif // __x86_64__
+        
        /*
         * Now retrieve addresses for end, edata, and etext 
         * from MACH-O headers.
         */
-
-       sectTEXTB = (void *) getsegdatafromheader(
-               &_mh_execute_header, "__TEXT", &sectSizeTEXT);
-       sectDATAB = (void *) getsegdatafromheader(
-               &_mh_execute_header, "__DATA", &sectSizeDATA);
-       sectOBJCB = (void *) getsegdatafromheader(
-               &_mh_execute_header, "__OBJC", &sectSizeOBJC);
-       sectLINKB = (void *) getsegdatafromheader(
-               &_mh_execute_header, "__LINKEDIT", &sectSizeLINK);
-       sectHIBB = (void *)getsegdatafromheader(
-               &_mh_execute_header, "__HIB", &sectSizeHIB);
-       sectINITPTB = (void *)getsegdatafromheader(
-               &_mh_execute_header, "__INITPT", &sectSizeINITPT);
-       sectPRELINKB = (void *) getsegdatafromheader(
-               &_mh_execute_header, "__PRELINK_TEXT", &sectSizePRELINK);
-
-       segTEXT = getsegbynamefromheader(&_mh_execute_header, "__TEXT");
+       segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
+                                       "__TEXT", &segSizeTEXT);
+       segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
+                                       "__DATA", &segSizeDATA);
+       segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
+                                       "__LINKEDIT", &segSizeLINK);
+       segHIBB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
+                                       "__HIB", &segSizeHIB);
+       segPRELINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
+                                       "__PRELINK_TEXT", &segSizePRELINK);
+       segTEXT = getsegbynamefromheader(&_mh_execute_header,
+                                       "__TEXT");
+       segDATA = getsegbynamefromheader(&_mh_execute_header,
+                                       "__DATA");
+       sectDCONST = getsectbynamefromheader(&_mh_execute_header,
+                                       "__DATA", "__const");
        cursectTEXT = lastsectTEXT = firstsect(segTEXT);
        /* Discover the last TEXT section within the TEXT segment */
        while ((cursectTEXT = nextsect(segTEXT, cursectTEXT)) != NULL) {
                lastsectTEXT = cursectTEXT;
        }
 
-       sHIB  = (vm_offset_t) sectHIBB;
-       eHIB  = (vm_offset_t) sectHIBB + sectSizeHIB;
+       sHIB  = segHIBB;
+       eHIB  = segHIBB + segSizeHIB;
        /* Zero-padded from ehib to stext if text is 2M-aligned */
-       stext = (vm_offset_t) sectTEXTB;
+       stext = segTEXTB;
+#ifdef __x86_64__
+       lowGlo.lgStext = stext;
+#endif
        etext = (vm_offset_t) round_page_64(lastsectTEXT->addr + lastsectTEXT->size);
        /* Zero-padded from etext to sdata if text is 2M-aligned */
-       sdata = (vm_offset_t) sectDATAB;
-       edata = (vm_offset_t) sectDATAB + sectSizeDATA;
-
-#if DEBUG
-       kprintf("sectTEXTB    = %p\n", sectTEXTB);
-       kprintf("sectDATAB    = %p\n", sectDATAB);
-       kprintf("sectOBJCB    = %p\n", sectOBJCB);
-       kprintf("sectLINKB    = %p\n", sectLINKB);
-       kprintf("sectHIBB     = %p\n", sectHIBB);
-       kprintf("sectPRELINKB = %p\n", sectPRELINKB);
-       kprintf("eHIB         = %p\n", (void *) eHIB);
-       kprintf("stext        = %p\n", (void *) stext);
-       kprintf("etext        = %p\n", (void *) etext);
-       kprintf("sdata        = %p\n", (void *) sdata);
-       kprintf("edata        = %p\n", (void *) edata);
-#endif
+       sdata = segDATAB;
+       edata = segDATAB + segSizeDATA;
+
+       sectCONSTB = (vm_offset_t) sectDCONST->addr;
+       sectSizeConst = sectDCONST->size;
+       sconstdata = sectCONSTB;
+       econstdata = sectCONSTB + sectSizeConst;
+
+       if (sectSizeConst & PAGE_MASK) {
+               kernel_section_t *ns = nextsect(segDATA, sectDCONST);
+               if (ns && !(ns->addr & PAGE_MASK))
+                       doconstro_override = TRUE;
+       } else
+               doconstro_override = TRUE;
+
+       DBG("segTEXTB    = %p\n", (void *) segTEXTB);
+       DBG("segDATAB    = %p\n", (void *) segDATAB);
+       DBG("segLINKB    = %p\n", (void *) segLINKB);
+       DBG("segHIBB     = %p\n", (void *) segHIBB);
+       DBG("segPRELINKB = %p\n", (void *) segPRELINKB);
+       DBG("sHIB        = %p\n", (void *) sHIB);
+       DBG("eHIB        = %p\n", (void *) eHIB);
+       DBG("stext       = %p\n", (void *) stext);
+       DBG("etext       = %p\n", (void *) etext);
+       DBG("sdata       = %p\n", (void *) sdata);
+       DBG("edata       = %p\n", (void *) edata);
+       DBG("sconstdata  = %p\n", (void *) sconstdata);
+       DBG("econstdata  = %p\n", (void *) econstdata);
+       DBG("kernel_top  = %p\n", (void *) &kernel_top);
+
+       vm_kernel_base  = sHIB;
+       vm_kernel_top   = (vm_offset_t) &kernel_top;
+       vm_kernel_stext = stext;
+       vm_kernel_etext = etext;
 
        vm_set_page_size();
 
@@ -328,10 +415,10 @@ i386_vm_init(uint64_t     maxmem,
                        break;
                }
 
-#if DEBUG
-               kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n",
-                       i, mptr->Type, pmap_type, base, top);
-#endif
+               DBG("EFI region %d: type %u/%d, base 0x%x, top 0x%x %s\n",
+                   i, mptr->Type, pmap_type, base, top,
+                   (mptr->Attribute&EFI_MEMORY_KERN_RESERVED)? "RESERVED" :
+                   (mptr->Attribute&EFI_MEMORY_RUNTIME)? "RUNTIME" : "");
 
                if (maxpg) {
                        if (base >= maxpg)
@@ -384,7 +471,7 @@ i386_vm_init(uint64_t       maxmem,
 
 
                                if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) &&
-                                   (top < I386_KERNEL_IMAGE_BASE_PAGE)) {
+                                   (top < vm_kernel_base_page)) {
                                        pmptr->alloc = pmptr->base;
                                        pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
                                }
@@ -518,7 +605,7 @@ i386_vm_init(uint64_t       maxmem,
        if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) {
                ppnum_t discarded_pages  = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT);
                ppnum_t highest_pn = 0;
-               ppnum_t cur_alloc  = 0;
+               ppnum_t cur_end  = 0;
                uint64_t        pages_to_use;
                unsigned        cur_region = 0;
 
@@ -532,15 +619,15 @@ i386_vm_init(uint64_t     maxmem,
                pages_to_use = avail_remaining;
 
                while (cur_region < pmap_memory_region_count && pages_to_use) {
-                       for (cur_alloc = pmap_memory_regions[cur_region].alloc;
-                            cur_alloc < pmap_memory_regions[cur_region].end && pages_to_use;
-                            cur_alloc++) {
-                               if (cur_alloc > highest_pn)
-                                       highest_pn = cur_alloc;
+                       for (cur_end = pmap_memory_regions[cur_region].base;
+                            cur_end < pmap_memory_regions[cur_region].end && pages_to_use;
+                            cur_end++) {
+                               if (cur_end > highest_pn)
+                                       highest_pn = cur_end;
                                pages_to_use--;
                        }
                        if (pages_to_use == 0)
-                               pmap_memory_regions[cur_region].end = cur_alloc;
+                               pmap_memory_regions[cur_region].end = cur_end;
 
                        cur_region++;
                }
index 663375acf4eb0397a6634cfc07c39e22a295abc8..4110c212ef841e2cb7621e5d3bf9f2ce17a7bda3 100644 (file)
 #include <i386/pmap.h>
 
 #define PML4_PROT (INTEL_PTE_VALID | INTEL_PTE_WRITE)
-pml4_entry_t   IdlePML4[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = {
-#ifdef __x86_64__
-               [  0]
-                       = ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | PML4_PROT),
-#if KERNEL_PML4_INDEX != 0
-               [KERNEL_PML4_INDEX]
-                       = ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | PML4_PROT),
-#endif
-#endif
-       };
+pml4_entry_t   IdlePML4[PTE_PER_PAGE]
+               __attribute__((section("__INITPT, __data"))) = {
+};
 
-#if defined(__x86_64__)
-#define PDPT_PROT (INTEL_PTE_VALID | INTEL_PTE_WRITE)
-#elif defined(__i386__)
 #define PDPT_PROT (INTEL_PTE_VALID)
-#endif
-pdpt_entry_t   IdlePDPT[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = {
-               [0] = ((uint64_t)(INITPT_SEG_BASE + 2*PAGE_SIZE) | PDPT_PROT), 
-               [1] = ((uint64_t)(INITPT_SEG_BASE + 3*PAGE_SIZE) | PDPT_PROT), 
-               [2] = ((uint64_t)(INITPT_SEG_BASE + 4*PAGE_SIZE) | PDPT_PROT), 
-               [3] = ((uint64_t)(INITPT_SEG_BASE + 5*PAGE_SIZE) | PDPT_PROT), 
-       };
+pdpt_entry_t   IdlePDPT[PTE_PER_PAGE]
+               __attribute__((section("__INITPT, __data"))) = {
+       [0] = ((uint64_t)(INITPT_SEG_BASE + 2*PAGE_SIZE) | PDPT_PROT), 
+       [1] = ((uint64_t)(INITPT_SEG_BASE + 3*PAGE_SIZE) | PDPT_PROT), 
+       [2] = ((uint64_t)(INITPT_SEG_BASE + 4*PAGE_SIZE) | PDPT_PROT), 
+       [3] = ((uint64_t)(INITPT_SEG_BASE + 5*PAGE_SIZE) | PDPT_PROT), 
+};
 
 #if NPGPTD != 4
 #error Please update idle_pt.c to reflect the new value of NPGPTD
@@ -74,7 +64,8 @@ pdpt_entry_t  IdlePDPT[PTE_PER_PAGE] __attribute__((section("__INITPT, __data")))
 
 #define FOR_0_TO_2047(x) L11(x,2047)
 
-pd_entry_t             BootstrapPTD[2048] __attribute__((section("__INITPT, __data"))) = {
+pd_entry_t     BootPTD[2048]
+               __attribute__((section("__INITPT, __data"))) = {
        FOR_0_TO_2047(ID_MAP_2MEG)
 };
 #endif /* MACHINE_BOOTSTRAPPTD */
index 362b783a463621d8a23b5397088d4dc1cee7e5ac..d565563646077cb120799129132cc5dadbadc766 100644 (file)
@@ -57,7 +57,6 @@
  */
 #include <i386/asm.h>
 #include <assym.s>
-#include <mach_kdb.h>
 #include <i386/eflags.h>
 #include <i386/trap.h>
 #include <i386/rtclock_asm.h>
@@ -76,8 +75,6 @@
 #define        LO_UNIX_SCALL           EXT(lo_unix_scall32)
 #define        LO_MACH_SCALL           EXT(lo_mach_scall32)
 #define        LO_MDEP_SCALL           EXT(lo_mdep_scall32)
-#define        LO_DIAG_SCALL           EXT(lo_diag_scall32)
-
 
 #define HI_DATA(lo_addr)       ( (EXT(lo_addr) - EXT(hi_remap_data)) + HIGH_IDT_BASE )
 #define HI_TEXT(lo_text)       ( (EXT(lo_text) - EXT(hi_remap_text)) + HIGH_MEM_BASE )
@@ -155,8 +152,6 @@ Entry(name)                         ;\
  * Extra-special interrupt code.  Note that no offset may be
  * specified in a task gate descriptor, so name is ignored.
  */
-#define        EXCEP_TASK(n,name)  \
-       IDT_BASE_ENTRY_TG(0,DEBUG_TSS,K_TASK_GATE)
 
 /* Double-fault fatal handler */
 #define DF_FATAL_TASK(n,name)  \
@@ -208,19 +203,11 @@ EXCEP_USR(0x04,t_into)
 EXCEP_USR(0x05,t_bounds)
 EXCEPTION(0x06,t_invop)
 EXCEPTION(0x07,t_nofpu)
-#if    MACH_KDB
-EXCEP_TASK(0x08,db_task_dbl_fault)
-#else
 DF_FATAL_TASK(0x08,df_task_start)
-#endif
 EXCEPTION(0x09,a_fpu_over)
 EXCEPTION(0x0a,a_inv_tss)
 EXCEP_SPC(0x0b,hi_segnp)
-#if    MACH_KDB
-EXCEP_TASK(0x0c,db_task_stk_fault)
-#else
 EXCEP_ERR(0x0c,t_stack_fault)
-#endif
 EXCEP_SPC(0x0d,hi_gen_prot)
 EXCEP_SPC(0x0e,hi_page_fault)
 EXCEPTION(0x0f,t_trap_0f)
@@ -346,8 +333,7 @@ EXCEP_USR(0x7f, t_dtrace_ret)
 EXCEP_SPC_USR(0x80,hi_unix_scall)
 EXCEP_SPC_USR(0x81,hi_mach_scall)
 EXCEP_SPC_USR(0x82,hi_mdep_scall)
-EXCEP_SPC_USR(0x83,hi_diag_scall)
-
+INTERRUPT(0x83)
 INTERRUPT(0x84)
 INTERRUPT(0x85)
 INTERRUPT(0x86)
@@ -606,14 +592,6 @@ Entry(hi_mdep_scall)
        jmp     enter_lohandler
 
        
-Entry(hi_diag_scall)
-       pushl   %eax                    // Save sselector
-        pushl   $0                      // Clear trap number slot
-        pusha                           // save the general registers
-       movl    $(LO_DIAG_SCALL),%ebx   // Get the function down low to transfer to
-       jmp     enter_lohandler                 // Leap to it...
-
-       
 /*
  * sysenter entry point
  * Requires user code to set up:
@@ -1222,32 +1200,6 @@ Entry(lo_mdep_scall32)
         */
 
 
-Entry(lo_diag_scall32)
-       TIME_TRAP_UENTRY
-
-       movl    %gs:CPU_KERNEL_STACK,%edi
-       xchgl   %edi,%esp                       /* switch to kernel stack */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx      /* get current thread     */
-       movl    TH_TASK(%ecx),%ebx              /* point to current task  */
-
-       /* Check for active vtimers in the current task */
-       TASK_VTIMER_CHECK(%ebx, %ecx)
-
-       pushl   %edi                    /* push pbc stack for later */
-
-       CCALL1(diagCall, %edi)          // Call diagnostics
-       
-       cli                             // Disable interruptions just in case
-       popl    %esp                    // Get back the original stack
-       cmpl    $0,%eax                 // What kind of return is this?
-       jne     EXT(return_to_user)     // Normal return, do not check asts...
-
-       CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
-               // pass what would be the diag syscall
-               // error return - cause an exception
-       /* no return */
-       
-
 LEXT(return_to_user)
        TIME_TRAP_UEXIT
        jmp     ret_to_user
@@ -1267,69 +1219,3 @@ Entry(df_task_start)
 Entry(mc_task_start)
        CCALL1(panic_machine_check32, $(T_MACHINE_CHECK))
        hlt
-
-#if MACH_KDB
-#include <i386/lapic.h>
-#define CX(addr,reg)   addr(,reg,4)
-#if    0
-/*
- * Note that the per-fault entry points are not currently
- * functional.  The only way to make them work would be to
- * set up separate TSS's for each fault type, which doesn't
- * currently seem worthwhile.  (The offset part of a task
- * gate is always ignored.)  So all faults that task switch
- * currently resume at db_task_start.
- */
-/*
- * Double fault (Murphy's point) - error code (0) on stack
- */
-Entry(db_task_dbl_fault)
-       popl    %eax
-       movl    $(T_DOUBLE_FAULT),%ebx
-       jmp     db_task_start
-/*
- * Segment not present - error code on stack
- */
-Entry(db_task_seg_np)
-       popl    %eax
-       movl    $(T_SEGMENT_NOT_PRESENT),%ebx
-       jmp     db_task_start
-/*
- * Stack fault - error code on (current) stack
- */
-Entry(db_task_stk_fault)
-       popl    %eax
-       movl    $(T_STACK_FAULT),%ebx
-       jmp     db_task_start
-/*
- * General protection fault - error code on stack
- */
-Entry(db_task_gen_prot)
-       popl    %eax
-       movl    $(T_GENERAL_PROTECTION),%ebx
-       jmp     db_task_start
-#endif /* 0 */
-/*
- * The entry point where execution resumes after last-ditch debugger task
- * switch.
- */
-Entry(db_task_start)
-       movl    %esp,%edx
-       subl    $(ISS32_SIZE),%edx
-       movl    %edx,%esp               /* allocate x86_saved_state on stack */
-       movl    %eax,R32_ERR(%esp)
-       movl    %ebx,R32_TRAPNO(%esp)
-       pushl   %edx
-       CPU_NUMBER(%edx)
-       movl    CX(EXT(master_dbtss),%edx),%edx
-       movl    TSS_LINK(%edx),%eax
-       pushl   %eax                    /* pass along selector of previous TSS */
-       call    EXT(db_tss_to_frame)
-       popl    %eax                    /* get rid of TSS selector */
-       call    EXT(db_trap_from_asm)
-       addl    $0x4,%esp
-       /*
-        * And now...?
-        */
-       iret                            /* ha, ha, ha... */
-#endif /* MACH_KDB */
index 4d91cb82f4673f0a26eb47cfeb55b4c64682ed4b..fd488ebd91fd42ef166c14d5a3446380b7b34b0e 100644 (file)
@@ -28,7 +28,6 @@
 #include <i386/asm.h>
 #include <i386/asm64.h>
 #include <assym.s>
-#include <mach_kdb.h>
 #include <i386/eflags.h>
 #include <i386/trap.h>
 #include <i386/rtclock_asm.h>
@@ -48,7 +47,6 @@
 #define        LO_UNIX_SCALL           EXT(lo_unix_scall)
 #define        LO_MACH_SCALL           EXT(lo_mach_scall)
 #define        LO_MDEP_SCALL           EXT(lo_mdep_scall)
-#define        LO_DIAG_SCALL           EXT(lo_diag_scall)
 #define        LO_DOUBLE_FAULT         EXT(lo_df64)
 #define        LO_MACHINE_CHECK        EXT(lo_mc64)
 
@@ -162,19 +160,11 @@ EXCEP64_USR(0x04,t64_into)
 EXCEP64_USR(0x05,t64_bounds)
 EXCEPTION64(0x06,t64_invop)
 EXCEPTION64(0x07,t64_nofpu)
-#if    MACH_KDB
-EXCEP64_IST(0x08,db_task_dbl_fault64,1)
-#else
 EXCEP64_IST(0x08,hi64_double_fault,1)
-#endif
 EXCEPTION64(0x09,a64_fpu_over)
 EXCEPTION64(0x0a,a64_inv_tss)
 EXCEP64_SPC(0x0b,hi64_segnp)
-#if    MACH_KDB
-EXCEP64_IST(0x0c,db_task_stk_fault64,1)
-#else
 EXCEP64_SPC(0x0c,hi64_stack_fault)
-#endif
 EXCEP64_SPC(0x0d,hi64_gen_prot)
 EXCEP64_SPC(0x0e, hi64_page_fault)
 EXCEPTION64(0x0f,t64_trap_0f)
@@ -300,8 +290,7 @@ EXCEP64_USR(0x7f, t64_dtrace_ret)
 EXCEP64_SPC_USR(0x80,hi64_unix_scall)
 EXCEP64_SPC_USR(0x81,hi64_mach_scall)
 EXCEP64_SPC_USR(0x82,hi64_mdep_scall)
-EXCEP64_SPC_USR(0x83,hi64_diag_scall)
-
+INTERRUPT64(0x83)
 INTERRUPT64(0x84)
 INTERRUPT64(0x85)
 INTERRUPT64(0x86)
@@ -616,7 +605,7 @@ EXT(ret32_set_gs):
 
        add     $(ISC32_OFFSET)+8+8+8, %rsp     /* pop compat frame +
                                                   trapno, trapfn and error */  
-        cmp    $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
+        cmpl   $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
                                        /* test for fast entry/exit */
         je      L_fast_exit
 EXT(ret32_iret):
@@ -630,7 +619,7 @@ L_fast_exit:
        pop     %rcx                    /* user return esp */
        .code32
        sti                             /* interrupts enabled after sysexit */
-       sysexit                         /* 32-bit sysexit */
+       .byte 0x0f,0x35                 /* 32-bit sysexit */
        .code64
 
 L_64bit_return:
@@ -731,14 +720,6 @@ L_mdep_scall_continue:
        jmp     L_32bit_enter_check
 
        
-Entry(hi64_diag_scall)
-       swapgs                          /* switch to kernel gs (cpu_data) */
-L_diag_scall_continue:
-       push    %rax                    /* save system call number */
-       push    $(LO_DIAG_SCALL)
-       push    $(DIAG_INT)
-       jmp     L_32bit_enter_check
-
 Entry(hi64_syscall)
        swapgs                          /* Kapow! get per-cpu data area */
 L_syscall_continue:
@@ -1605,34 +1586,6 @@ Entry(lo_mdep_scall)
         * always returns through thread_exception_return
         */
 
-
-Entry(lo_diag_scall)
-       TIME_TRAP_UENTRY
-
-       movl    %gs:CPU_KERNEL_STACK,%edi
-       xchgl   %edi,%esp                       /* switch to kernel stack */
-       movl    %gs:CPU_ACTIVE_THREAD,%ecx      /* get current thread     */
-       movl    TH_TASK(%ecx),%ebx              /* point to current task  */
-
-       /* Check for active vtimers in the current task */
-       TASK_VTIMER_CHECK(%ebx, %ecx)
-
-       pushl   %edi                    /* push pbc stack for later */
-
-       CCALL1(diagCall, %edi)          // Call diagnostics
-       
-       cli                             // Disable interruptions just in case
-       cmpl    $0,%eax                 // What kind of return is this?
-       je      1f                      // - branch if bad (zero)
-       popl    %esp                    // Get back the original stack
-       jmp     return_to_user          // Normal return, do not check asts...
-1:
-       CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
-               // pass what would be the diag syscall
-               // error return - cause an exception
-       /* no return */
-
-
 return_to_user:
        TIME_TRAP_UEXIT
        jmp     ret_to_user
index 973fc395adf1a0b4b090845e8fc10fa45a6dc78b..a0275828c9e6375d819817118d2a23b51583de05 100644 (file)
@@ -64,7 +64,6 @@
  */
 #include <i386/tss.h>
 #include <i386/pmap.h>
-#include <mach_kdb.h>
 
 #ifdef __i386__
 struct i386_tss        master_ktss
@@ -194,41 +193,4 @@ struct i386_tss    master_mctss
                                           so no bitmap */
 };
 
-#if    MACH_KDB
-
-struct i386_tss        master_dbtss
-       __attribute__ ((section ("__DESC, master_dbtss")))
-       __attribute__ ((aligned (4096))) = {
-       0,                              /* back link */
-       0,                              /* esp0 */
-       KERNEL_DS,                      /* ss0 */
-       0,                              /* esp1 */
-       0,                              /* ss1 */
-       0,                              /* esp2 */
-       0,                              /* ss2 */
-       (int) IdlePDPT,         /* cr3 */
-       0,                              /* eip */
-       0,                              /* eflags */
-       0,                              /* eax */
-       0,                              /* ecx */
-       0,                              /* edx */
-       0,                              /* ebx */
-       0,                              /* esp */
-       0,                              /* ebp */
-       0,                              /* esi */
-       0,                              /* edi */
-       KERNEL_DS,                      /* es */
-       KERNEL32_CS,                    /* cs */
-       KERNEL_DS,                      /* ss */
-       KERNEL_DS,                      /* ds */
-       KERNEL_DS,                      /* fs */
-       KERNEL_DS,                      /* gs */
-       KERNEL_LDT,                     /* ldt */
-       0,                              /* trace_trap */
-       0x0FFF                          /* IO bitmap offset -
-                                          beyond end of TSS segment,
-                                          so no bitmap */
-};
-
-#endif /* MACH_KDB */
-#endif
+#endif /* __i386__ */
index 9d6f53abbb8dcfe197c872e2e9a85351972f0dfe..378f4d7ebf6aad444c5b7999f654c571ba02537d 100644 (file)
@@ -215,8 +215,8 @@ typedef uint32_t lapic_timer_count_t;
  */
 
 #define LAPIC_PERFCNT_INTERRUPT                0xF
-#define LAPIC_TIMER_INTERRUPT          0xE
-#define LAPIC_INTERPROCESSOR_INTERRUPT 0xD
+#define LAPIC_INTERPROCESSOR_INTERRUPT 0xE
+#define LAPIC_TIMER_INTERRUPT          0xD
 #define LAPIC_THERMAL_INTERRUPT                0xC
 #define LAPIC_ERROR_INTERRUPT          0xB
 #define LAPIC_SPURIOUS_INTERRUPT       0xA
index 7142be2690c8759f4a0ac6100d019ee94b7fa8f4..3e699197423e1c9107f53a6dc73bccb2e9d2dbc6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <pmc/pmc.h>
 #endif
 
-#if MACH_KDB
-#include <machine/db_machdep.h>
-#endif
-
 #include <sys/kdebug.h>
 
 #if    MP_DEBUG
@@ -132,13 +128,19 @@ legacy_init(void)
                panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
        }
        vm_map_unlock(kernel_map);
-/* Map in the local APIC non-cacheable, as recommended by Intel
- * in section 8.4.1 of the "System Programming Guide".
- */
+
+       /*
+        * Map in the local APIC non-cacheable, as recommended by Intel
+        * in section 8.4.1 of the "System Programming Guide".
+        * In fact, this is redundant because EFI will have assigned an
+        * MTRR physical range containing the local APIC's MMIO space as
+        * UC and this will override the default PAT setting.
+        */
        pmap_enter(pmap_kernel(),
                        lapic_vbase,
                        (ppnum_t) i386_btop(lapic_pbase),
                        VM_PROT_READ|VM_PROT_WRITE,
+                       VM_PROT_NONE,
                        VM_WIMG_IO,
                        TRUE);
 }
@@ -258,7 +260,6 @@ static const char *TMR_str[] = {
        "Periodic",
        "TSC-Deadline",
        "Illegal"
-       "Illegal"
 };
 
 void
@@ -359,26 +360,6 @@ lapic_dump(void)
        kprintf("\n");
 }
 
-#if MACH_KDB
-/*
- *     Displays apic junk
- *
- *     da
- */
-void 
-db_apic(__unused db_expr_t addr,
-       __unused int have_addr,
-       __unused db_expr_t count,
-       __unused char *modif)
-{
-
-       lapic_dump();
-
-       return;
-}
-
-#endif
-
 boolean_t
 lapic_probe(void)
 {
@@ -550,7 +531,6 @@ lapic_config_timer(
 /*
  * Configure TSC-deadline timer mode. The lapic interrupt is always unmasked.
  */
-__private_extern__
 void
 lapic_config_tsc_deadline_timer(void)
 {
@@ -582,7 +562,6 @@ lapic_set_timer_fast(
        LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
 }
 
-__private_extern__
 void
 lapic_set_tsc_deadline_timer(uint64_t deadline)
 {
@@ -590,7 +569,6 @@ lapic_set_tsc_deadline_timer(uint64_t deadline)
        wrmsr64(MSR_IA32_TSC_DEADLINE, deadline);
 }
 
-__private_extern__
 uint64_t
 lapic_get_tsc_deadline_timer(void)
 {
@@ -917,3 +895,4 @@ lapic_disable_timer(void)
                lvt_timer = LAPIC_READ(LVT_TIMER);
        }
 }
+
index a0409d25749863cf14c194be36c1aae33bf31e2f..065dfecea7bc5f19ec58c0bc50e55e7d1c1ba803 100644 (file)
@@ -123,9 +123,9 @@ extern void         lck_mtx_unlock_wakeup_x86(lck_mtx_t *mutex, int prior_lock_state);
 extern void            lck_mtx_lock_mark_destroyed(lck_mtx_t *mutex);
 extern int             lck_mtx_lock_grab_mutex(lck_mtx_t *mutex);
 
-extern void            hw_lock_byte_init(uint8_t *lock_byte);
-extern void            hw_lock_byte_lock(uint8_t *lock_byte);
-extern void            hw_lock_byte_unlock(uint8_t *lock_byte);
+extern void            hw_lock_byte_init(volatile uint8_t *lock_byte);
+extern void            hw_lock_byte_lock(volatile uint8_t *lock_byte);
+extern void            hw_lock_byte_unlock(volatile uint8_t *lock_byte);
 
 typedef struct {
        unsigned int            type;
@@ -191,7 +191,7 @@ typedef struct __lck_mtx_ext_t__    lck_mtx_ext_t;
 #pragma pack(1)                /* Make sure the structure stays as we defined it */
 typedef struct _lck_rw_t_internal_ {
        volatile uint16_t       lck_rw_shared_count;    /* No. of accepted readers */
-       uint8_t                 lck_rw_interlock;       /* Interlock byte */
+       volatile uint8_t        lck_rw_interlock;       /* Interlock byte */
        volatile uint8_t
                                lck_rw_priv_excl:1,     /* Writers prioritized if set */
                                lck_rw_want_upgrade:1,  /* Read-to-write upgrade waiting */
index 048dc704dd89fb0cbf1439a884221c5a114cab87..a274e0e40769719402b19ef4a892514fe018eb42 100644 (file)
@@ -61,7 +61,6 @@
  *     Locking primitives implementation
  */
 
-#include <mach_kdb.h>
 #include <mach_ldebug.h>
 
 #include <kern/lock.h>
 #include <kern/debug.h>
 #include <string.h>
 
-#if    MACH_KDB
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_print.h>
-#endif /* MACH_KDB */
 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
 #include <machine/machine_cpu.h>
 #include <i386/mp.h>
@@ -123,12 +116,6 @@ unsigned int LcksOpts=0;
 
 /* Forwards */
 
-#if    MACH_KDB
-void   db_print_simple_lock(
-                       simple_lock_t   addr);
-#endif /* MACH_KDB */
-
-
 #if    USLOCK_DEBUG
 /*
  *     Perform simple lock checks.
@@ -2102,66 +2089,3 @@ lck_mtx_lock_wait_x86 (
        }
 #endif
 }
-
-
-#if    MACH_KDB
-
-void
-db_show_one_lock(
-       lock_t  *lock)
-{
-       db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
-                 lock->lck_rw_shared_count,
-                 lock->lck_rw_want_upgrade ? "" : "!",
-                 lock->lck_rw_want_write ? "" : "!");
-       db_printf("%swaiting, %scan_sleep\n", 
-                 (lock->lck_r_waiting || lock->lck_w_waiting) ? "" : "!", 
-                 lock->lck_rw_can_sleep ? "" : "!");
-       db_printf("Interlock:\n");
-       db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->lck_rw_interlock)),
-                       TRUE, (db_expr_t)0, (char *)0);
-}
-
-/*
- * Routines to print out simple_locks and mutexes in a nicely-formatted
- * fashion.
- */
-
-const char *simple_lock_labels =       "ENTRY    ILK THREAD   DURATION CALLER";
-
-void
-db_show_one_simple_lock (
-       db_expr_t       addr,
-       boolean_t       have_addr,
-       __unused db_expr_t      count,
-       __unused char           * modif)
-{
-       simple_lock_t   saddr = (simple_lock_t) ((vm_offset_t) addr);
-
-       if (saddr == (simple_lock_t)0 || !have_addr) {
-               db_error ("No simple_lock\n");
-       }
-#if    USLOCK_DEBUG
-       else if (saddr->lock_type != USLOCK_TAG)
-               db_error ("Not a simple_lock\n");
-#endif /* USLOCK_DEBUG */
-
-       db_printf ("%s\n", simple_lock_labels);
-       db_print_simple_lock (saddr);
-}
-
-void
-db_print_simple_lock (
-       simple_lock_t   addr)
-{
-
-       db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
-#if    USLOCK_DEBUG
-       db_printf (" %08x", addr->debug.lock_thread);
-       db_printf (" %08x ", addr->debug.duration[1]);
-       db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
-#endif /* USLOCK_DEBUG */
-       db_printf ("\n");
-}
-
-#endif /* MACH_KDB */
index 6e8e3d3a21d9b72f0ee157dc7f83f9526e78f276..911439764da709173b53329b9c47bb4bd501a47a 100644 (file)
 
 #include <mach_rt.h>
 #include <platforms.h>
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
 #include <mach_kdp.h>
-#include <stat_time.h>
 #include <mach_assert.h>
 
 #include <sys/errno.h>
@@ -391,18 +388,6 @@ rdmsr_fail:
        RECOVERY_SECTION
        RECOVER_TABLE_END
 
-       .data
-dr_msk:
-       .long   ~0x000f0003
-       .long   ~0x00f0000c
-       .long   ~0x0f000030
-       .long   ~0xf00000c0
-ENTRY(dr_addr)
-       .long   0,0,0,0
-       .long   0,0,0,0
-
-       .text
-
 /*
  * ffs(mask)
  */
index ee59d599a0b307b13cbe5a9088a117bc6df7ae89..fe0a6aabd1e0eaa42ab43f529b2cb3bd312a088e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -641,7 +641,7 @@ flush_dcache64(addr64_t addr, unsigned count, int phys)
                dcache_incoherent_io_flush64(addr, count);
        }
        else {
-               uint32_t  linesize = cpuid_info()->cache_linesize;
+               uint64_t  linesize = cpuid_info()->cache_linesize;
                addr64_t  bound = (addr + count + linesize - 1) & ~(linesize - 1);
                __mfence();
                while (addr < bound) {
@@ -710,6 +710,20 @@ kdp_register_callout(void)
 }
 #endif
 
+/*
+ * Return a uniformly distributed 64-bit random number.
+ *
+ * This interface should have minimal dependencies on kernel
+ * services, and thus be available very early in the life
+ * of the kernel.  But as a result, it may not be very random
+ * on all platforms.
+ */
+uint64_t
+early_random(void)
+{
+       return (ml_early_random());
+}
+
 #if !CONFIG_VMX
 int host_vmxon(boolean_t exclusive __unused)
 {
index 45b74dd108db629243ce4710f682c3799fa16ab7..c1b8a2e1852bde777ffe0c807f368a297c7e3286 100644 (file)
@@ -56,7 +56,6 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 #include <mach_kdp.h>
 
 #include <i386/asm.h>
index 9152b874178c6e883399fb12e5ae63d4c5643482..454eb1b02b5d2ffca1cb387ab6ea3982cbac056b 100644 (file)
@@ -42,7 +42,7 @@
 
 extern kern_return_t   kern_invalid(void);
 
-machdep_call_t         machdep_call_table[] = {
+const machdep_call_t           machdep_call_table[] = {
        MACHDEP_CALL_ROUTINE(kern_invalid,0),
        MACHDEP_CALL_ROUTINE(kern_invalid,0),
        MACHDEP_CALL_ROUTINE(kern_invalid,0),
@@ -51,7 +51,7 @@ machdep_call_t                machdep_call_table[] = {
        MACHDEP_BSD_CALL_ROUTINE(i386_set_ldt,3),
        MACHDEP_BSD_CALL_ROUTINE(i386_get_ldt,3),
 };
-machdep_call_t         machdep_call_table64[] = {
+const machdep_call_t           machdep_call_table64[] = {
        MACHDEP_CALL_ROUTINE(kern_invalid,0),
        MACHDEP_CALL_ROUTINE(kern_invalid,0),
        MACHDEP_CALL_ROUTINE(kern_invalid,0),
index 63cbf08cb0aab7b24b0e30c7414abed8467073e8..3b6d9fbe99768c4f252eae02928d3197fcb88ba1 100644 (file)
@@ -63,8 +63,8 @@ typedef struct {
     int                                bsd_style;
 } machdep_call_t;
 
-extern machdep_call_t          machdep_call_table[];
-extern machdep_call_t          machdep_call_table64[];
+extern const machdep_call_t            machdep_call_table[];
+extern const machdep_call_t            machdep_call_table64[];
 
 extern int                     machdep_call_count;
 
index 77681d3405fd67c7a2a1a8f17caacbd8e44d1b5e..64b0c9b8d41b8dd295c4102cd866ef4a4aeab9a8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <i386/machine_check.h>
 #include <i386/proc_reg.h>
 
+/*
+ * At the time of the machine-check exception, all hardware-threads panic.
+ * Each thread saves the state of its MCA registers to its per-cpu data area.
+ *
+ * State reporting is serialized so one thread dumps all valid state for all
+ * threads to the panic log. This may entail spinning waiting for other
+ * threads to complete saving state to memory. A timeout applies to this wait
+ * -- in particular, a 3-strikes timeout may prevent a thread from taking
+ * part is the affair.
+ */
+
 #define IF(bool,str)   ((bool) ? (str) : "")
 
 static boolean_t       mca_initialized = FALSE;
@@ -60,6 +71,8 @@ typedef struct {
 } mca_mci_bank_t;
 
 typedef struct mca_state {
+       boolean_t               mca_is_saved;
+       boolean_t               mca_is_valid;   /* some state is valid */
        ia32_mcg_ctl_t          mca_mcg_ctl;
        ia32_mcg_status_t       mca_mcg_status;
        mca_mci_bank_t          mca_error_bank[0];
@@ -206,6 +219,7 @@ mca_save_state(mca_state_t *mca_state)
                                        rdmsr64(IA32_MCi_MISC(i)) : 0ULL;       
                bank->mca_mci_addr = (bank->mca_mci_status.bits.addrv)?
                                        rdmsr64(IA32_MCi_ADDR(i)) : 0ULL;       
+               mca_state->mca_is_valid = TRUE;
        } 
 
        /*
@@ -213,7 +227,9 @@ mca_save_state(mca_state_t *mca_state)
         * and don't care about races
         */
        if (x86_package()->mca_state == NULL)
-               x86_package()->mca_state = mca_state;
+                       x86_package()->mca_state = mca_state;
+
+       mca_state->mca_is_saved = TRUE;
 }
 
 void
@@ -358,9 +374,9 @@ mca_dump_bank_mc8(mca_state_t *state, int i)
 
 static const char *mca_threshold_status[] = {
        [THRESHOLD_STATUS_NO_TRACKING] =        "No tracking",
-       [THRESHOLD_STATUS_GREEN] =      "Green",
-       [THRESHOLD_STATUS_YELLOW] =     "Yellow",
-       [THRESHOLD_STATUS_RESERVED] =   "Reserved"
+       [THRESHOLD_STATUS_GREEN] =              "Green",
+       [THRESHOLD_STATUS_YELLOW] =             "Yellow",
+       [THRESHOLD_STATUS_RESERVED] =           "Reserved"
 };
 
 static void
@@ -423,41 +439,24 @@ mca_dump_bank(mca_state_t *state, int i)
 }
 
 static void
-mca_dump_error_banks(mca_state_t *state)
+mca_cpu_dump_error_banks(mca_state_t *state)
 {
        unsigned int            i;
 
+       if (!state->mca_is_valid)
+               return;
+
        kdb_printf("MCA error-reporting registers:\n");
        for (i = 0; i < mca_error_bank_count; i++ ) {
-               if (i == 8) {
+               if (i == 8 && state == x86_package()->mca_state) {
                        /*
                         * Fatal Memory Error
                         */
 
-                       /* Dump MC8 for local package */
+                       /* Dump MC8 for this package */
                        kdb_printf(" Package %d logged:\n",
                                   x86_package()->ppkg_num);
                        mca_dump_bank_mc8(state, 8);
-
-                       /* If there's other packages, report their MC8s */
-                       x86_pkg_t       *pkg;
-                       uint64_t        deadline;
-                       for (pkg = x86_pkgs; pkg != NULL; pkg = pkg->next) {
-                               if (pkg == x86_package())
-                                       continue;
-                               deadline = mach_absolute_time() + LockTimeOut;
-                               while  (pkg->mca_state == NULL &&
-                                       mach_absolute_time() < deadline)
-                                       cpu_pause();
-                               if (pkg->mca_state) {
-                                       kdb_printf(" Package %d logged:\n",
-                                                  pkg->ppkg_num);
-                                       mca_dump_bank_mc8(pkg->mca_state, 8);
-                               } else {
-                                       kdb_printf(" Package %d timed out!\n",
-                                                  pkg->ppkg_num);
-                               }
-                       }
                        continue;
                }
                mca_dump_bank(state, i);
@@ -467,8 +466,9 @@ mca_dump_error_banks(mca_state_t *state)
 void
 mca_dump(void)
 {
-       ia32_mcg_status_t       status;
-       mca_state_t             *mca_state = current_cpu_datap()->cpu_mca_state;
+       mca_state_t     *mca_state = current_cpu_datap()->cpu_mca_state;
+       uint64_t        deadline;
+       unsigned int    i = 0;
 
        /*
         * Capture local MCA registers to per-cpu data.
@@ -476,8 +476,7 @@ mca_dump(void)
        mca_save_state(mca_state);
 
        /*
-        * Serialize in case of multiple simultaneous machine-checks.
-        * Only the first caller is allowed to dump MCA registers,
+        * Serialize: the first caller controls dumping MCA registers,
         * other threads spin meantime.
         */
        simple_lock(&mca_lock);
@@ -490,12 +489,24 @@ mca_dump(void)
        mca_dump_state = DUMPING;
        simple_unlock(&mca_lock);
 
+       /*
+        * Wait for all other hardware threads to save their state.
+        * Or timeout.
+        */
+       deadline = mach_absolute_time() + LockTimeOut;
+       while (mach_absolute_time() < deadline && i < real_ncpus) {
+               if (!cpu_datap(i)->cpu_mca_state->mca_is_saved) {
+                       cpu_pause();
+                       continue;
+               }
+               i += 1;
+       }
+
        /*
         * Report machine-check capabilities:
         */
        kdb_printf(
-               "Machine-check capabilities (cpu %d) 0x%016qx:\n",
-               cpu_number(), ia32_mcg_cap.u64);
+               "Machine-check capabilities 0x%016qx:\n", ia32_mcg_cap.u64);
 
        mca_report_cpu_info();
 
@@ -512,19 +523,32 @@ mca_dump(void)
                        " %d extended MSRs present\n", mca_extended_MSRs_count);
  
        /*
-        * Report machine-check status:
+        * Dump all processor state:
         */
-       status.u64 = rdmsr64(IA32_MCG_STATUS);
-       kdb_printf(
-               "Machine-check status 0x%016qx:\n%s%s%s", status.u64,
-               IF(status.bits.ripv, " restart IP valid\n"),
-               IF(status.bits.eipv, " error IP valid\n"),
-               IF(status.bits.mcip, " machine-check in progress\n"));
+       for (i = 0; i < real_ncpus; i++) {
+               mca_state_t             *mcsp = cpu_datap(i)->cpu_mca_state;
+               ia32_mcg_status_t       status;
+
+               kdb_printf("Processor %d: ", i);
+               if (mcsp == NULL ||
+                   mcsp->mca_is_saved == FALSE ||
+                   mcsp->mca_mcg_status.u64 == 0) {
+                       kdb_printf("no machine-check status reported\n");
+                       continue;
+               }
+               if (!mcsp->mca_is_valid) {
+                       kdb_printf("no valid machine-check state\n");
+                       continue;
+               }
+               status = mcsp->mca_mcg_status;
+               kdb_printf(
+                       "machine-check status 0x%016qx:\n%s%s%s", status.u64,
+                       IF(status.bits.ripv, " restart IP valid\n"),
+                       IF(status.bits.eipv, " error IP valid\n"),
+                       IF(status.bits.mcip, " machine-check in progress\n"));
 
-       /*
-        * Dump error-reporting registers:
-        */
-       mca_dump_error_banks(mca_state);
+               mca_cpu_dump_error_banks(mcsp);
+       }
 
        /*
         * Dump any extended machine state:
@@ -539,3 +563,15 @@ mca_dump(void)
        /* Update state to release any other threads. */
        mca_dump_state = DUMPED;
 }
+
+
+extern void mca_exception_panic(void);
+extern void mtrr_lapic_cached(void);
+void mca_exception_panic(void)
+{
+#if DEBUG
+       mtrr_lapic_cached();
+#else
+       kprintf("mca_exception_panic() requires DEBUG build\n");
+#endif
+}
index b7d3f559a65ea3b08fe1cc27db33a0225a154f7c..22eae015943d27947ef6bd4f438510ba4da5d3b3 100644 (file)
 #include <i386/proc_reg.h>
 #include <mach/vm_param.h>
 #include <i386/pmap.h>
+#include <i386/pmap_internal.h>
 #include <i386/misc_protos.h>
-#if MACH_KDB
-#include <machine/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
-#endif
 
 #if DEBUG
 #define DBG(x...)      kprintf("DBG: " x)
@@ -73,6 +64,7 @@ unsigned int  LockTimeOut;
 unsigned int   LockTimeOutTSC;
 unsigned int   MutexSpin;
 uint64_t       LastDebuggerEntryAllowance;
+uint64_t       delay_spin_threshold;
 
 extern uint64_t panic_restart_timeout;
 
@@ -129,13 +121,13 @@ ml_static_mfree(
 {
        addr64_t vaddr_cur;
        ppnum_t ppn;
-
+       uint32_t freed_pages = 0;
        assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
 
        assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */
 
        for (vaddr_cur = vaddr;
-            vaddr_cur < round_page_64(vaddr+size);
+            vaddr_cur < round_page_64(vaddr+size);
             vaddr_cur += PAGE_SIZE) {
                ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
                if (ppn != (vm_offset_t)NULL) {
@@ -146,10 +138,18 @@ ml_static_mfree(
                                        kernel_pmap->stats.resident_count;
                        }
                        pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur+PAGE_SIZE);
-                       vm_page_create(ppn,(ppn+1));
-                       vm_page_wire_count--;
+                       assert(pmap_valid_page(ppn));
+
+                       if (IS_MANAGED_PAGE(ppn)) {
+                               vm_page_create(ppn,(ppn+1));
+                               vm_page_wire_count--;
+                               freed_pages++;
+                       }
                }
        }
+#if    DEBUG   
+       kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+#endif
 }
 
 
@@ -553,6 +553,22 @@ ml_init_lock_timeout(void)
        interrupt_latency_tracker_setup();
 }
 
+/*
+ * Threshold above which we should attempt to block
+ * instead of spinning for clock_delay_until().
+ */
+void
+ml_init_delay_spin_threshold(void)
+{
+       nanoseconds_to_absolutetime(10ULL * NSEC_PER_USEC, &delay_spin_threshold);
+}
+
+boolean_t
+ml_delay_should_spin(uint64_t interval)
+{
+       return (interval < delay_spin_threshold) ? TRUE : FALSE;
+}
+
 /*
  * This is called from the machine-independent routine cpu_up()
  * to perform machine-dependent info updates. Defer to cpu_thread_init().
@@ -683,45 +699,3 @@ kernel_preempt_check(void)
 boolean_t machine_timeout_suspended(void) {
        return (virtualized || pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity());
 }
-
-#if MACH_KDB
-
-/*
- *     Display the global msrs
- * *           
- *     ms
- */
-void 
-db_msr(__unused db_expr_t addr,
-       __unused int have_addr,
-       __unused db_expr_t count,
-       __unused char *modif)
-{
-
-       uint32_t        i, msrlow, msrhigh;
-
-       /* Try all of the first 4096 msrs */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n", i, msrhigh, msrlow);
-               }
-       }
-
-       /* Try all of the 4096 msrs at 0x0C000000 */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(0x0C000000 | i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n",
-                               0x0C000000 | i, msrhigh, msrlow);
-               }
-       }
-
-       /* Try all of the 4096 msrs at 0xC0000000 */
-       for (i = 0; i < 4096; i++) {
-               if (!rdmsr_carefully(0xC0000000 | i, &msrlow, &msrhigh)) {
-                       db_printf("%08X - %08X.%08X\n",
-                               0xC0000000 | i, msrhigh, msrlow);
-               }
-       }
-}
-
-#endif
index 42f77f6c42aa95b961c3960ec234989a8d525753..65e28b742c796dba0513e9a49e83f471a58e9546 100644 (file)
@@ -80,6 +80,9 @@ void ml_install_interrupt_handler(
 
 void ml_get_timebase(unsigned long long *timestamp);
 void ml_init_lock_timeout(void); 
+void ml_init_delay_spin_threshold(void);
+
+boolean_t ml_delay_should_spin(uint64_t interval);
 
 vm_offset_t
 ml_static_ptovirt(
@@ -303,7 +306,9 @@ void ml_get_csw_threads(thread_t * /*old*/, thread_t * /*new*/);
 __END_DECLS
 
 #ifdef XNU_KERNEL_PRIVATE
+
 boolean_t ml_fpu_avx_enabled(void);
+
 void interrupt_latency_tracker_setup(void);
 void interrupt_reset_latency_stats(void);
 void interrupt_populate_latency_stats(char *, unsigned);
index 0e3d9fb68505a8e76a9e959d9e6237fb82a49d84..c4176cdf53fbc010c29bc51ab8491aacb1356aa0 100644 (file)
@@ -309,3 +309,6 @@ Entry(call_continuation)
        call    EXT(thread_terminate)
 
 
+Entry(ml_early_random)
+       xor     %eax, %eax
+       ret
index c05d69bef9b45903276e27f94e6be3ee1fa49323..3c2d93ffd30fe4f4315abb342a0cf803117893f9 100644 (file)
@@ -229,6 +229,31 @@ machine_task_get_state(task_t task,
        }
 }
 
+/*
+ * This is called when a task is terminated, and also on exec().
+ * Clear machine-dependent state that is stored on the task.
+ */
+void
+machine_task_terminate(task_t task)
+{
+       if (task) {
+               user_ldt_t user_ldt;
+               void *task_debug;
+
+               user_ldt = task->i386_ldt;
+               if (user_ldt != 0) {
+                       task->i386_ldt = 0;
+                       user_ldt_free(user_ldt);
+               }
+
+               task_debug = task->task_debug;
+               if (task_debug != NULL) {
+                       task->task_debug = NULL;
+                       zfree(ids_zone, task_debug);
+               }        
+       }
+}
+
 /*
  * Set initial default state on a thread as stored in the MACHINE_TASK data.
  * Note: currently only debug state is supported.
index 474733e96de174fbdeb99af8135e9b223db96481..4186c623b5a3430a99f1f62794f5455bd948009f 100644 (file)
 struct boot_args;
 struct cpu_data;
 
+extern boolean_t virtualized;
+
 extern void            vstart(vm_offset_t);
 extern void            i386_init(void);
-
+extern void            x86_init_wrapper(uintptr_t, uintptr_t) __attribute__((noreturn));
 extern void            i386_vm_init(
                                uint64_t,
                                boolean_t,
@@ -171,4 +173,5 @@ extern void act_machine_switch_pcb(thread_t old, thread_t new);
 #define FULL_SLAVE_INIT        (NULL)
 #define FAST_SLAVE_INIT        ((void *)(uintptr_t)1)
 
+uint64_t ml_early_random(void);
 #endif /* _I386_MISC_PROTOS_H_ */
index f4221f9640c3f09d136682933cbe0118f2fb51d2..b66399d2d7e2d66bd76703d4f505792bb9349874 100644 (file)
@@ -29,7 +29,6 @@
  */
 
 #include <mach_rt.h>
-#include <mach_kdb.h>
 #include <mach_kdp.h>
 #include <mach_ldebug.h>
 #include <gprof.h>
 #include <chud/chud_xnu_private.h>
 
 #include <sys/kdebug.h>
-#if MACH_KDB
-#include <machine/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
-#endif
 
 #if    MP_DEBUG
 #define PAUSE          delay(1000000)
 void           slave_boot_init(void);
 void           i386_cpu_IPI(int cpu);
 
-#if MACH_KDB
-static void    mp_kdb_wait(void);
-volatile boolean_t     mp_kdb_trap = FALSE;
-volatile long  mp_kdb_ncpus = 0;
-#endif
-
 static void    mp_kdp_wait(boolean_t flush, boolean_t isNMI);
 static void    mp_rendezvous_action(void);
 static void    mp_broadcast_action(void);
@@ -246,8 +229,12 @@ smp_init(void)
        mp_cpus_call_cpu_init();
 
        if (PE_parse_boot_argn("TSC_sync_margin",
-                               &TSC_sync_margin, sizeof(TSC_sync_margin)))
+                                       &TSC_sync_margin, sizeof(TSC_sync_margin))) {
                kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
+       } else if (cpuid_vmm_present()) {
+               kprintf("TSC sync margin disabled\n");
+               TSC_sync_margin = 0;
+       }
        smp_initialized = TRUE;
 
        cpu_prewarm_init();
@@ -394,12 +381,6 @@ start_cpu(void *arg)
        }
 }
 
-extern char    prot_mode_gdt[];
-extern char    slave_boot_base[];
-extern char real_mode_bootstrap_base[];
-extern char real_mode_bootstrap_end[];
-extern char    slave_boot_end[];
-
 kern_return_t
 intel_startCPU(
        int     slot_num)
@@ -476,9 +457,6 @@ cpu_signal_handler(x86_saved_state_t *regs)
 {
        int             my_cpu;
        volatile int    *my_word;
-#if    MACH_KDB && MACH_ASSERT
-       int             i=100;
-#endif /* MACH_KDB && MACH_ASSERT */
 
        SCHED_STATS_IPI(current_processor());
 
@@ -492,10 +470,6 @@ cpu_signal_handler(x86_saved_state_t *regs)
        cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
 
        do {
-#if    MACH_KDB && MACH_ASSERT
-               if (i-- <= 0)
-                   Debugger("cpu_signal_handler: signals did not clear");
-#endif /* MACH_KDB && MACH_ASSERT */
 #if    MACH_KDP
                if (i_bit(MP_KDP, my_word)) {
                        DBGLOG(cpu_handle,my_cpu,MP_KDP);
@@ -521,14 +495,6 @@ cpu_signal_handler(x86_saved_state_t *regs)
                        DBGLOG(cpu_handle,my_cpu,MP_AST);
                        i_bit_clear(MP_AST, my_word);
                        ast_check(cpu_to_processor(my_cpu));
-#if    MACH_KDB
-               } else if (i_bit(MP_KDB, my_word)) {
-
-                       i_bit_clear(MP_KDB, my_word);
-                       current_cpu_datap()->cpu_kdb_is_slave++;
-                       mp_kdb_wait();
-                       current_cpu_datap()->cpu_kdb_is_slave--;
-#endif /* MACH_KDB */
                } else if (i_bit(MP_RENDEZVOUS, my_word)) {
                        DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS);
                        i_bit_clear(MP_RENDEZVOUS, my_word);
@@ -894,79 +860,95 @@ typedef struct {
        void            *arg1;                  /* routine's 2nd arg */
        volatile long   *countp;                /* completion counter */
 } mp_call_t;
-       
+
+
+typedef struct {
+       queue_head_t            queue;
+       decl_simple_lock_data(, lock);
+} mp_call_queue_t;
 #define MP_CPUS_CALL_BUFS_PER_CPU      MAX_CPUS
-static queue_head_t    mp_cpus_call_freelist;
-static queue_head_t    mp_cpus_call_queue[MAX_CPUS];
-/*
- * The free list and the per-cpu call queues are protected by the following
- * lock which is taken wil interrupts disabled.
- */
-decl_simple_lock_data(,mp_cpus_call_lock);
+static mp_call_queue_t mp_cpus_call_freelist;
+static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
 
 static inline boolean_t
-mp_call_lock(void)
+mp_call_head_lock(mp_call_queue_t *cqp)
 {
        boolean_t       intrs_enabled;
 
        intrs_enabled = ml_set_interrupts_enabled(FALSE);
-       simple_lock(&mp_cpus_call_lock);
+       simple_lock(&cqp->lock);
 
        return intrs_enabled;
 }
 
 static inline boolean_t
-mp_call_is_locked(void)
+mp_call_head_is_locked(mp_call_queue_t *cqp)
 {
        return !ml_get_interrupts_enabled() &&
-               hw_lock_held((hw_lock_t)&mp_cpus_call_lock);
+               hw_lock_held((hw_lock_t)&cqp->lock);
 }
 
 static inline void
-mp_call_unlock(boolean_t intrs_enabled)
+mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
 {
-       simple_unlock(&mp_cpus_call_lock);
+       simple_unlock(&cqp->lock);
        ml_set_interrupts_enabled(intrs_enabled);
 }
 
 static inline mp_call_t *
 mp_call_alloc(void)
 {
-       mp_call_t       *callp;
+       mp_call_t       *callp = NULL;
+       boolean_t       intrs_enabled;
+       mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+       intrs_enabled = mp_call_head_lock(cqp);
+       if (!queue_empty(&cqp->queue))
+               queue_remove_first(&cqp->queue, callp, typeof(callp), link);
+       mp_call_head_unlock(cqp, intrs_enabled);
 
-       assert(mp_call_is_locked());
-       if (queue_empty(&mp_cpus_call_freelist))
-               return NULL;
-       queue_remove_first(&mp_cpus_call_freelist, callp, typeof(callp), link);
        return callp;
 }
 
 static inline void
 mp_call_free(mp_call_t *callp)
 {
-       assert(mp_call_is_locked());
-       queue_enter_first(&mp_cpus_call_freelist, callp, typeof(callp), link);
+       boolean_t       intrs_enabled;
+       mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
+       intrs_enabled = mp_call_head_lock(cqp);
+       queue_enter_first(&cqp->queue, callp, typeof(callp), link);
+       mp_call_head_unlock(cqp, intrs_enabled);
 }
 
 static inline mp_call_t *
-mp_call_dequeue(queue_t call_queue)
+mp_call_dequeue_locked(mp_call_queue_t *cqp)
 {
-       mp_call_t       *callp;
+       mp_call_t       *callp = NULL;
 
-       assert(mp_call_is_locked());
-       if (queue_empty(call_queue))
-               return NULL;
-       queue_remove_first(call_queue, callp, typeof(callp), link);
+       assert(mp_call_head_is_locked(cqp));
+       if (!queue_empty(&cqp->queue))
+               queue_remove_first(&cqp->queue, callp, typeof(callp), link);
        return callp;
 }
 
+static inline void
+mp_call_enqueue_locked(
+       mp_call_queue_t *cqp,
+       mp_call_t       *callp)
+{
+       queue_enter(&cqp->queue, callp, typeof(callp), link);
+}
+
 /* Called on the boot processor to initialize global structures */
 static void
 mp_cpus_call_init(void)
 {
+       mp_call_queue_t *cqp = &mp_cpus_call_freelist;
+
        DBG("mp_cpus_call_init()\n");
-       simple_lock_init(&mp_cpus_call_lock, 0);
-       queue_init(&mp_cpus_call_freelist);
+       simple_lock_init(&cqp->lock, 0);
+       queue_init(&cqp->queue);
 }
 
 /*
@@ -977,19 +959,18 @@ mp_cpus_call_init(void)
 static void
 mp_cpus_call_cpu_init(void)
 {
-       boolean_t       intrs_enabled;
        int             i;
+       mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()];
        mp_call_t       *callp;
 
-       if (mp_cpus_call_queue[cpu_number()].next != NULL)
+       if (cqp->queue.next != NULL)
                return; /* restart/wake case: called already */
 
-       queue_init(&mp_cpus_call_queue[cpu_number()]);
+       simple_lock_init(&cqp->lock, 0);
+       queue_init(&cqp->queue);
        for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
                callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
-               intrs_enabled = mp_call_lock();
                mp_call_free(callp);
-               mp_call_unlock(intrs_enabled);
        }
 
        DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number());
@@ -1002,56 +983,30 @@ mp_cpus_call_cpu_init(void)
 static void
 mp_cpus_call_action(void)
 {
-       queue_t         cpu_head;
+       mp_call_queue_t *cqp;
        boolean_t       intrs_enabled;
        mp_call_t       *callp;
        mp_call_t       call;
 
        assert(!ml_get_interrupts_enabled());
-       cpu_head = &mp_cpus_call_queue[cpu_number()];
-       intrs_enabled = mp_call_lock();
-       while ((callp = mp_call_dequeue(cpu_head)) != NULL) {
+       cqp = &mp_cpus_call_head[cpu_number()];
+       intrs_enabled = mp_call_head_lock(cqp);
+       while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
                /* Copy call request to the stack to free buffer */
                call = *callp;
                mp_call_free(callp);
                if (call.func != NULL) {
-                       mp_call_unlock(intrs_enabled);
+                       mp_call_head_unlock(cqp, intrs_enabled);
                        KERNEL_DEBUG_CONSTANT(
                                TRACE_MP_CPUS_CALL_ACTION,
                                call.func, call.arg0, call.arg1, call.countp, 0);
                        call.func(call.arg0, call.arg1);
-                       (void) mp_call_lock();
+                       (void) mp_call_head_lock(cqp);
                }
                if (call.countp != NULL)
                        atomic_incl(call.countp, 1);
        }
-       mp_call_unlock(intrs_enabled);
-}
-
-static boolean_t
-mp_call_queue(
-       int             cpu, 
-        void           (*action_func)(void *, void *),
-        void           *arg0,
-        void           *arg1,
-       volatile long   *countp)
-{
-       queue_t         cpu_head = &mp_cpus_call_queue[cpu];
-       mp_call_t       *callp;
-
-       assert(mp_call_is_locked());
-       callp = mp_call_alloc();
-       if (callp == NULL)
-               return FALSE;
-
-       callp->func = action_func;
-       callp->arg0 = arg0;
-       callp->arg1 = arg1;
-       callp->countp = countp;
-
-       queue_enter(cpu_head, callp, typeof(callp), link);
-
-       return TRUE;
+       mp_call_head_unlock(cqp, intrs_enabled);
 }
 
 /*
@@ -1085,19 +1040,19 @@ mp_cpus_call(
 }
 
 static void
-mp_cpus_call_wait(boolean_t intrs_enabled,
-                 long mp_cpus_signals,
-                 volatile long *mp_cpus_calls)
+mp_cpus_call_wait(boolean_t    intrs_enabled,
+                 long          mp_cpus_signals,
+                 volatile long *mp_cpus_calls)
 {
-       queue_t         cpu_head;
+       mp_call_queue_t         *cqp;
 
-       cpu_head = &mp_cpus_call_queue[cpu_number()];
+       cqp = &mp_cpus_call_head[cpu_number()];
 
        while (*mp_cpus_calls < mp_cpus_signals) {
                if (!intrs_enabled) {
-                       if (!queue_empty(cpu_head))
+                       /* Sniffing w/o locking */
+                       if (!queue_empty(&cqp->queue))
                                mp_cpus_call_action();
-
                        handle_pending_TLB_flushes();
                }
                cpu_pause();
@@ -1124,7 +1079,7 @@ mp_cpus_call1(
 
        KERNEL_DEBUG_CONSTANT(
                TRACE_MP_CPUS_CALL | DBG_FUNC_START,
-               cpus, mode, action_func, arg0, arg1);
+               cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1);
 
        if (!smp_initialized) {
                if ((cpus & CPUMASK_SELF) == 0)
@@ -1158,7 +1113,8 @@ mp_cpus_call1(
                        if (mode == SYNC && action_func != NULL) {
                                KERNEL_DEBUG_CONSTANT(
                                        TRACE_MP_CPUS_CALL_LOCAL,
-                                       action_func, arg0, arg1, 0, 0);
+                                       VM_KERNEL_UNSLIDE(action_func),
+                                       arg0, arg1, 0, 0);
                                action_func(arg0, arg1);
                        }
                } else {
@@ -1166,41 +1122,52 @@ mp_cpus_call1(
                         * Here to queue a call to cpu and IPI.
                         * Spinning for request buffer unless NOSYNC.
                         */
+                       mp_call_t       *callp = NULL;
+                       mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
+
                queue_call:
-                       intrs_enabled = mp_call_lock();
+                       if (callp == NULL)
+                               callp = mp_call_alloc();
+                       intrs_enabled = mp_call_head_lock(cqp);
                        if (!cpu_datap(cpu)->cpu_running) {
-                               mp_call_unlock(intrs_enabled);
+                               mp_call_head_unlock(cqp, intrs_enabled);
                                continue;
                        }
                        if (mode == NOSYNC) {
-                               if (!mp_call_queue(cpu, action_func, arg0, arg1,
-                                                  NULL)) {
+                               if (callp == NULL) {
                                        cpus_notcalled |= cpu_to_cpumask(cpu);
-                                       mp_call_unlock(intrs_enabled);
+                                       mp_call_head_unlock(cqp, intrs_enabled);
                                        KERNEL_DEBUG_CONSTANT(
                                                TRACE_MP_CPUS_CALL_NOBUF,
                                                cpu, 0, 0, 0, 0);
                                        continue;
                                }
+                               callp->countp = NULL;
                        } else {
-                               if (!mp_call_queue(cpu, action_func, arg0, arg1,
-                                                     &mp_cpus_calls)) {
-                                       mp_call_unlock(intrs_enabled);
+                               if (callp == NULL) {
+                                       mp_call_head_unlock(cqp, intrs_enabled);
                                        KERNEL_DEBUG_CONSTANT(
                                                TRACE_MP_CPUS_CALL_NOBUF,
                                                cpu, 0, 0, 0, 0);
                                        if (!intrs_enabled) {
-                                               mp_cpus_call_action();
+                                               /* Sniffing w/o locking */
+                                               if (!queue_empty(&cqp->queue))
+                                                       mp_cpus_call_action();
                                                handle_pending_TLB_flushes();
                                        }
                                        cpu_pause();
                                        goto queue_call;
                                }
+                               callp->countp = &mp_cpus_calls;
                        }
+                       callp->func = action_func;
+                       callp->arg0 = arg0;
+                       callp->arg1 = arg1;
+                       mp_call_enqueue_locked(cqp, callp);
                        mp_cpus_signals++;
                        cpus_called |= cpu_to_cpumask(cpu);
                        i386_signal_cpu(cpu, MP_CALL, ASYNC);
-                       mp_call_unlock(intrs_enabled);
+                       mp_call_head_unlock(cqp, intrs_enabled);
                        if (mode == SYNC) {
                                mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
                        }
@@ -1211,7 +1178,7 @@ mp_cpus_call1(
        if (mode != SYNC && call_self ) {
                KERNEL_DEBUG_CONSTANT(
                        TRACE_MP_CPUS_CALL_LOCAL,
-                       action_func, arg0, arg1, 0, 0);
+                       VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0);
                if (action_func != NULL) {
                        ml_set_interrupts_enabled(FALSE);
                        action_func(arg0, arg1);
@@ -1423,7 +1390,10 @@ mp_kdp_enter(void)
                 * "unsafe-to-interrupt" points such as the trampolines,
                 * but neither do we want to lose state by waiting too long.
                 */
-               tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000);
+               tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL);
+
+               if (virtualized)
+                       tsc_timeout = ~0ULL;
 
                while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
                        /*
@@ -1454,7 +1424,7 @@ mp_kdp_enter(void)
                        cpu_NMI_interrupt(cpu);
                }
 
-       DBG("mp_kdp_enter() %lu processors done %s\n",
+       DBG("mp_kdp_enter() %u processors done %s\n",
            (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
        
        postcode(MP_KDP_ENTER);
@@ -1596,104 +1566,6 @@ cause_ast_check(
        }
 }
 
-#if MACH_KDB
-/*
- * invoke kdb on slave processors 
- */
-
-void
-remote_kdb(void)
-{
-       unsigned int    my_cpu = cpu_number();
-       unsigned int    cpu;
-       int kdb_ncpus;
-       uint64_t tsc_timeout = 0;
-       
-       mp_kdb_trap = TRUE;
-       mp_kdb_ncpus = 1;
-       for (kdb_ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) {
-               if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
-                       continue;
-               kdb_ncpus++;
-               i386_signal_cpu(cpu, MP_KDB, ASYNC);
-       }
-       DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus);
-
-       tsc_timeout = rdtsc64() + (kdb_ncpus * 100 * 1000 * 1000);
-
-       while (mp_kdb_ncpus != kdb_ncpus && rdtsc64() < tsc_timeout) {
-               /*
-                * a TLB shootdown request may be pending... this would result in the requesting
-                * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
-                * Process it, so it can now enter mp_kdp_wait()
-                */
-               handle_pending_TLB_flushes();
-
-               cpu_pause();
-       }
-       DBG("mp_kdp_enter() %lu processors done %s\n",
-               mp_kdb_ncpus, (mp_kdb_ncpus == kdb_ncpus) ? "OK" : "timed out");
-}
-
-static void
-mp_kdb_wait(void)
-{
-       DBG("mp_kdb_wait()\n");
-
-       /* If an I/O port has been specified as a debugging aid, issue a read */
-       panic_io_port_read();
-
-       atomic_incl(&mp_kdb_ncpus, 1);
-       while (mp_kdb_trap) {
-               /*
-                * a TLB shootdown request may be pending... this would result in the requesting
-                * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
-                * Process it, so it can now enter mp_kdp_wait()
-                */
-               handle_pending_TLB_flushes();
-
-               cpu_pause();
-       }
-       atomic_decl((volatile long *)&mp_kdb_ncpus, 1);
-       DBG("mp_kdb_wait() done\n");
-}
-
-/*
- * Clear kdb interrupt
- */
-
-void
-clear_kdb_intr(void)
-{
-       mp_disable_preemption();
-       i_bit_clear(MP_KDB, &current_cpu_datap()->cpu_signals);
-       mp_enable_preemption();
-}
-
-void
-mp_kdb_exit(void)
-{
-       DBG("mp_kdb_exit()\n");
-       atomic_decl((volatile long *)&mp_kdb_ncpus, 1);
-       mp_kdb_trap = FALSE;
-       __asm__ volatile("mfence");
-
-       while (mp_kdb_ncpus > 0) {
-               /*
-                * a TLB shootdown request may be pending... this would result in the requesting
-                * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
-                * Process it, so it can now enter mp_kdp_wait()
-                */
-               handle_pending_TLB_flushes();
-
-               cpu_pause();
-       }
-
-       DBG("mp_kdb_exit() done\n");
-}
-
-#endif /* MACH_KDB */
-
 void
 slave_machine_init(void *param)
 {
@@ -1718,54 +1590,6 @@ int cpu_number(void)
        return get_cpu_number();
 }
 
-#if    MACH_KDB
-#include <ddb/db_output.h>
-
-#define TRAP_DEBUG 0 /* Must match interrupt.s and spl.s */
-
-
-#if    TRAP_DEBUG
-#define MTRAPS 100
-struct mp_trap_hist_struct {
-       unsigned char type;
-       unsigned char data[5];
-} trap_hist[MTRAPS], *cur_trap_hist = trap_hist,
-    *max_trap_hist = &trap_hist[MTRAPS];
-
-void db_trap_hist(void);
-
-/*
- * SPL:
- *     1: new spl
- *     2: old spl
- *     3: new tpr
- *     4: old tpr
- * INT:
- *     1: int vec
- *     2: old spl
- *     3: new spl
- *     4: post eoi tpr
- *     5: exit tpr
- */
-
-void
-db_trap_hist(void)
-{
-       int i,j;
-       for(i=0;i<MTRAPS;i++)
-           if (trap_hist[i].type == 1 || trap_hist[i].type == 2) {
-                   db_printf("%s%s",
-                             (&trap_hist[i]>=cur_trap_hist)?"*":" ",
-                             (trap_hist[i].type == 1)?"SPL":"INT");
-                   for(j=0;j<5;j++)
-                       db_printf(" %02x", trap_hist[i].data[j]);
-                   db_printf("\n");
-           }
-               
-}
-#endif /* TRAP_DEBUG */
-#endif /* MACH_KDB */
-
 static void
 cpu_prewarm_init()
 {
index 6974ef256cc67ddeabeeb67c01b8556873fc4fd7..faa84df7d18a53fc7c4cbcc39e6e0e34dabdb238 100644 (file)
@@ -114,9 +114,6 @@ extern      void    mp_kdp_enter(void);
 extern void    mp_kdp_exit(void);
 
 extern boolean_t       mp_recent_debugger_activity(void);
-#if MACH_KDB
-extern void mp_kdb_exit(void);
-#endif
 
 /*
  * All cpu rendezvous:
index 2421dc7348ebd3c0296d5c8869a639a2ce405465..fd4003f20e591037421863b7539a0b0957df1c2e 100644 (file)
 #include <i386/misc_protos.h>
 #include <i386/mp.h>
 #include <i386/pmap.h>
+#if defined(__i386__)
+#include <i386/pmap_internal.h>
+#endif /* i386 */
 #if CONFIG_MCA
 #include <i386/machine_check.h>
 #endif
 
 #include <kern/misc_protos.h>
 
-#include <mach_kdb.h>
-
 #ifdef __x86_64__
 #define K_INTR_GATE (ACC_P|ACC_PL_K|ACC_INTR_GATE)
 #define U_INTR_GATE (ACC_P|ACC_PL_U|ACC_INTR_GATE)
        },
 
 #define USER_TRAP_SPC USER_TRAP
-                        
 
 // Declare the table using the macros we just set up
-struct fake_descriptor64 master_idt64[IDTSZ] __attribute__ ((aligned (4096))) = {
+struct fake_descriptor64 master_idt64[IDTSZ]
+       __attribute__ ((section("__HIB,__desc")))
+       __attribute__ ((aligned(PAGE_SIZE))) = {
 #include "../x86_64/idt_table.h"
 };
 #endif
@@ -163,7 +165,7 @@ struct fake_descriptor64 master_idt64[IDTSZ] __attribute__ ((aligned (4096))) =
 /*
  * First cpu`s interrupt stack.
  */
-extern uint32_t                low_intstack[]; /* bottom */
+extern uint32_t                low_intstack[];         /* bottom */
 extern uint32_t                low_eintstack[];        /* top */
 
 /*
@@ -243,12 +245,14 @@ struct fake_descriptor cpudata_desc_pattern = {
        ACC_P|ACC_PL_K|ACC_DATA_W
 };
 
+#if    NCOPY_WINDOWS > 0
 struct fake_descriptor userwindow_desc_pattern = {
        (unsigned int) 0,
        ((NBPDE * NCOPY_WINDOWS) / PAGE_SIZE) - 1,
        SZ_32 | SZ_G,
        ACC_P|ACC_PL_U|ACC_DATA_W
 };
+#endif
 
 struct fake_descriptor physwindow_desc_pattern = {
        (unsigned int) 0,
@@ -433,14 +437,6 @@ cpu_desc_init(cpu_data_t *cdp)
                *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] =
                        temp_fake_desc;
 
-#if MACH_KDB
-               temp_fake_desc = tss_desc_pattern;
-               temp_fake_desc.offset = (vm_offset_t) &master_dbtss;
-               fix_desc(&temp_fake_desc, 1);
-               *(struct fake_descriptor *) &master_gdt[sel_idx(DEBUG_TSS)] =
-                       temp_fake_desc;
-#endif
-
                temp_fake_desc = cpudata_desc_pattern;
                temp_fake_desc.offset = (vm_offset_t) &cpu_data_master;
                fix_desc(&temp_fake_desc, 1);
@@ -508,13 +504,6 @@ cpu_desc_init(cpu_data_t *cdp)
                bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt));
                bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, sizeof(master_ldt));
                bzero((char *)&cdt->ktss, sizeof(struct i386_tss)); 
-#if    MACH_KDB
-               cdi->cdi_dbtss = (struct i386_tss *) (cpu_hi_desc +
-                               offsetof(cpu_desc_table_t, dbtss));
-               bcopy((char *)&master_dbtss,
-                               (char *)&cdt->dbtss,
-                               sizeof(struct i386_tss));
-#endif /* MACH_KDB */
 
                /*
                 * Fix up the entries in the GDT to point to
@@ -535,17 +524,6 @@ cpu_desc_init(cpu_data_t *cdp)
                cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp;
                fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1);
 
-#if    MACH_KDB /* this only works for legacy 32-bit machines */
-               cdt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern;
-               cdt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) cdi->cdi_dbtss;
-               fix_desc(&cdt->gdt[sel_idx(DEBUG_TSS)], 1);
-
-               cdt->dbtss.esp0 = (int)(db_task_stack_store +
-                               (INTSTACK_SIZE * (cdp->cpu_number + 1)) - sizeof (natural_t));
-               cdt->dbtss.esp = cdt->dbtss.esp0;
-               cdt->dbtss.eip = (int)&db_task_start;
-#endif /* MACH_KDB */
-
                cdt->ktss.ss0 = KERNEL_DS;
                cdt->ktss.io_bit_map_offset = 0x0FFF;   /* no IO bitmap */
 
@@ -568,11 +546,15 @@ cpu_desc_init64(cpu_data_t *cdp)
                 */
                cdi->cdi_ktss = (void *)&master_ktss64;
                cdi->cdi_sstk = (vm_offset_t) &master_sstk.top;
+#if __x86_64__
+               cdi->cdi_gdt.ptr  = (void *)MASTER_GDT_ALIAS;
+               cdi->cdi_idt.ptr  = (void *)MASTER_IDT_ALIAS;
+#else
                cdi->cdi_gdt.ptr  = (void *)master_gdt;
                cdi->cdi_idt.ptr  = (void *)master_idt64;
+#endif
                cdi->cdi_ldt  = (struct fake_descriptor *) master_ldt;
 
-
                /* Replace the expanded LDTs and TSS slots in the GDT */
                kernel_ldt_desc64.offset64 = UBER64(&master_ldt);
                *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_LDT)] =
@@ -592,7 +574,11 @@ cpu_desc_init64(cpu_data_t *cdp)
                /*
                 * Set the double-fault stack as IST1 in the 64-bit TSS
                 */
+#if __x86_64__
+               master_ktss64.ist1 = (uintptr_t) low_eintstack;
+#else
                master_ktss64.ist1 = UBER64((uintptr_t) df_task_stack_end);
+#endif
 
        } else {
                cpu_desc_table64_t      *cdt = (cpu_desc_table64_t *) cdp->cpu_desc_tablep;
@@ -601,8 +587,12 @@ cpu_desc_init64(cpu_data_t *cdp)
                 * heap (cpu_desc_table). 
                 * LDT descriptors are mapped into a separate area.
                 */
-               cdi->cdi_gdt.ptr  = (struct fake_descriptor *)cdt->gdt;
+#if __x86_64__
+               cdi->cdi_idt.ptr  = (void *)MASTER_IDT_ALIAS;
+#else
                cdi->cdi_idt.ptr  = (void *)cdt->idt;
+#endif
+               cdi->cdi_gdt.ptr  = (struct fake_descriptor *)cdt->gdt;
                cdi->cdi_ktss = (void *)&cdt->ktss;
                cdi->cdi_sstk = (vm_offset_t)&cdt->sstk.top;
                cdi->cdi_ldt  = cdp->cpu_ldtp;
@@ -610,7 +600,9 @@ cpu_desc_init64(cpu_data_t *cdp)
                /*
                 * Copy the tables
                 */
+#if !__x86_64__
                bcopy((char *)master_idt64, (char *)cdt->idt, sizeof(master_idt64));
+#endif
                bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt));
                bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, sizeof(master_ldt));
                bcopy((char *)&master_ktss64, (char *)&cdt->ktss, sizeof(struct x86_64_tss));
@@ -662,8 +654,8 @@ cpu_desc_load(cpu_data_t *cdp)
        cdi->cdi_idt.size = 0x1000 + cdp->cpu_number;
        cdi->cdi_gdt.size = sizeof(struct real_descriptor)*GDTSZ - 1;
 
-       lgdt((unsigned long *) &cdi->cdi_gdt);
-       lidt((unsigned long *) &cdi->cdi_idt);
+       lgdt((uintptr_t *) &cdi->cdi_gdt);
+       lidt((uintptr_t *) &cdi->cdi_idt);
        lldt(KERNEL_LDT);
 
        set_tr(KERNEL_TSS);
@@ -703,19 +695,18 @@ cpu_desc_load64(cpu_data_t *cdp)
        /* Load the GDT, LDT, IDT and TSS */
        cdi->cdi_gdt.size = sizeof(struct real_descriptor)*GDTSZ - 1;
        cdi->cdi_idt.size = 0x1000 + cdp->cpu_number;
-       lgdt((unsigned long *) &cdi->cdi_gdt);
-       lidt((unsigned long *) &cdi->cdi_idt);
+       lgdt((uintptr_t *) &cdi->cdi_gdt);
+       lidt((uintptr_t *) &cdi->cdi_idt);
        lldt(KERNEL_LDT);
        set_tr(KERNEL_TSS);
 
-       /* Stuff the pre-cpu data area into the MSR and swapgs to activate */
-       wrmsr64(MSR_IA32_KERNEL_GS_BASE, (unsigned long)cdp);
+       /* Stuff the kernel per-cpu data area address into the MSRs */
+       wrmsr64(MSR_IA32_GS_BASE, (uintptr_t) cdp);
+       wrmsr64(MSR_IA32_KERNEL_GS_BASE, (uintptr_t) cdp);
+
 #if GPROF // Hack to enable mcount to work on K64
        __asm__ volatile("mov %0, %%gs" : : "rm" ((unsigned short)(KERNEL_DS)));
 #endif
-       swapgs();
-
-       cpu_mode_init(cdp);
 #endif
 }
 
@@ -784,7 +775,7 @@ cpu_data_alloc(boolean_t is_boot_cpu)
 
        if (is_boot_cpu) {
                assert(real_ncpus == 1);
-               cdp = &cpu_data_master;
+               cdp = cpu_datap(0);
                if (cdp->cpu_processor == NULL) {
                        simple_lock_init(&ncpus_lock, 0);
                        cdp->cpu_processor = cpu_processor_alloc(TRUE);
@@ -826,6 +817,7 @@ cpu_data_alloc(boolean_t is_boot_cpu)
         * Allocate descriptor table:
         * Size depends on cpu mode.
         */
+
        ret = kmem_alloc(kernel_map, 
                         (vm_offset_t *) &cdp->cpu_desc_tablep,
                         cdp->cpu_is64bit ? sizeof(cpu_desc_table64_t)
@@ -1040,7 +1032,7 @@ cpu_physwindow_init(int cpu)
                 * pte pointer we're interested in actually
                 * exists in the page table
                 */
-               pmap_expand(kernel_pmap, phys_window);
+               pmap_expand(kernel_pmap, phys_window, PMAP_EXPAND_OPTIONS_NONE);
 
                cdp->cpu_physwindow_base = phys_window;
                cdp->cpu_physwindow_ptep = vtopte(phys_window);
@@ -1072,8 +1064,50 @@ cpu_mode_init(cpu_data_t *cdp)
 #else
        fast_syscall_init64(cdp);
 #endif
-
-       /* Call for per-cpu pmap mode initialization */
-       pmap_cpu_init();
 }
 
+#if __x86_64__
+/*
+ * Allocate a new interrupt stack for the boot processor from the
+ * heap rather than continue to use the statically allocated space.
+ * Also switch to a dynamically allocated cpu data area.
+ */
+void
+cpu_data_realloc(void)
+{
+       int             ret;
+       vm_offset_t     stack;
+       cpu_data_t      *cdp;
+       boolean_t       istate;
+
+       ret = kmem_alloc(kernel_map, &stack, INTSTACK_SIZE);
+       if (ret != KERN_SUCCESS) {
+               panic("cpu_data_realloc() stack alloc, ret=%d\n", ret);
+       }
+       bzero((void*) stack, INTSTACK_SIZE);
+       stack += INTSTACK_SIZE;
+
+       ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t));
+       if (ret != KERN_SUCCESS) {
+               panic("cpu_data_realloc() cpu data alloc, ret=%d\n", ret);
+       }
+
+       /* Copy old contents into new area and make fix-ups */
+       bcopy((void *) &cpu_data_master, (void*) cdp, sizeof(cpu_data_t));
+       cdp->cpu_this = cdp;
+       cdp->cpu_int_stack_top = stack;
+       timer_call_initialize_queue(&cdp->rtclock_timer.queue);
+
+       kprintf("Reallocated master cpu data: %p, interrupt stack top: %p\n",
+               (void *) cdp, (void *) stack);
+
+       /*
+        * With interrupts disabled commmit the new areas.
+        */
+       istate = ml_set_interrupts_enabled(FALSE);
+       cpu_data_ptr[0] = cdp;
+       wrmsr64(MSR_IA32_GS_BASE, (uintptr_t) cdp);
+       wrmsr64(MSR_IA32_KERNEL_GS_BASE, (uintptr_t) cdp);
+       (void) ml_set_interrupts_enabled(istate);
+}
+#endif /* __x86_64__ */
index 97b04c9cb2b06f1f1041c4068cdfcad7c76deace..3b8ef7ea1607c6454f280be4d761171c6349656c 100644 (file)
@@ -60,7 +60,6 @@
 #ifndef        _I386_MP_DESC_H_
 #define        _I386_MP_DESC_H_
 
-#include <mach_kdb.h>
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
@@ -80,19 +79,18 @@ __BEGIN_DECLS
 /*
  * The descriptor tables are together in a structure
  * allocated one per processor (except for the boot processor).
- * Note that dbtss could be conditionalized on MACH_KDB, but
- * doing so increases misconfiguration risk.
  */
 typedef struct cpu_desc_table {
        struct fake_descriptor  idt[IDTSZ] __attribute__ ((aligned (16)));
        struct fake_descriptor  gdt[GDTSZ] __attribute__ ((aligned (16)));
        struct i386_tss         ktss       __attribute__ ((aligned (16)));
-       struct i386_tss         dbtss      __attribute__ ((aligned (16)));
        struct sysenter_stack   sstk;
 } cpu_desc_table_t;
 
 typedef struct cpu_desc_table64 {
+#if !__x86_64__
        struct fake_descriptor64 idt[IDTSZ]      __attribute__ ((aligned (16)));
+#endif
        struct fake_descriptor  gdt[GDTSZ]       __attribute__ ((aligned (16)));
        struct x86_64_tss       ktss             __attribute__ ((aligned (16)));
        struct sysenter_stack   sstk             __attribute__ ((aligned (16)));
@@ -103,7 +101,6 @@ typedef struct cpu_desc_table64 {
 #define        current_idt()   (current_cpu_datap()->cpu_desc_index.cdi_idt.ptr)
 #define        current_ldt()   (current_cpu_datap()->cpu_desc_index.cdi_ldt)
 #define        current_ktss()  (current_cpu_datap()->cpu_desc_index.cdi_ktss)
-#define        current_dbtss() (current_cpu_datap()->cpu_desc_index.cdi_dbtss)
 #define        current_sstk()  (current_cpu_datap()->cpu_desc_index.cdi_sstk)
 
 #define        current_ktss64() ((struct x86_64_tss *) current_ktss())
index 73d3b1ca09b559008be38feb08050c18b5f4c4fc..ea379978071846753499f7ffbe1160ea711bca07 100644 (file)
@@ -110,17 +110,5 @@ i386_cpu_IPI(int cpu)
        }
 #endif /* MP_DEBUG */
 
-#if MACH_KDB
-#ifdef MP_DEBUG
-       if(!trappedalready && (cpu_datap(cpu)->cpu_signals & 6)) {      /* (BRINGUP) */
-               if(kdb_cpu != cpu_number()) {
-                       trappedalready = 1;
-                       panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n", 
-                               cpu_datap(cpu)->cpu_signals, cpu, kdb_cpu);
-               }
-       }
-#endif /* MP_DEBUG */
-#endif
-
        lapic_send_ipi(cpu, LAPIC_VECTOR(INTERPROCESSOR));
 }
index 63a19c6a217e31054be144faed4444a38c3a2a00..0978551b2e0e76c77961733cbcdbb17c0b209edd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -34,6 +34,7 @@
 #include <i386/mp.h>
 #include <i386/proc_reg.h>
 #include <i386/mtrr.h>
+#include <i386/machine_check.h>
 
 struct mtrr_var_range {
        uint64_t  base;         /* in IA32_MTRR_PHYSBASE format */
@@ -62,6 +63,7 @@ decl_simple_lock_data(static, mtrr_lock);
 #define MTRR_LOCK()    simple_lock(&mtrr_lock);
 #define MTRR_UNLOCK()  simple_unlock(&mtrr_lock);
 
+//#define MTRR_DEBUG 1
 #if    MTRR_DEBUG
 #define DBG(x...)      kprintf(x)
 #else
@@ -692,3 +694,62 @@ pat_init(void)
        }
        ml_set_interrupts_enabled(istate);
 }
+
+#if DEBUG
+void
+mtrr_lapic_cached(void);
+void
+mtrr_lapic_cached(void)
+{
+       boolean_t       istate;
+       uint32_t        lo;
+       uint32_t        hi;
+       uint64_t        lapic_pbase;
+       uint64_t        base;
+       uint64_t        length;
+       uint32_t        type;
+       unsigned int    i;
+
+       /* Find the local APIC physical base address */
+       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+       lapic_pbase = (lo &  MSR_IA32_APIC_BASE_BASE);
+
+       DBG("mtrr_lapic_cached() on cpu %d, lapic_pbase: 0x%016llx\n",
+           get_cpu_number(), lapic_pbase);
+
+       istate = ml_set_interrupts_enabled(FALSE);
+
+       /*
+        * Search for the variable range MTRR mapping the lapic.
+        * Flip its type to WC and return.
+        */
+       for (i = 0; i < mtrr_state.var_count; i++) {
+               if (!(mtrr_state.var_range[i].mask & IA32_MTRR_PHYMASK_VALID))
+                       continue;
+               base = mtrr_state.var_range[i].base & IA32_MTRR_PHYSBASE_MASK;
+               type = (uint32_t)(mtrr_state.var_range[i].base & IA32_MTRR_PHYSBASE_TYPE);
+               length = MASK_TO_LEN(mtrr_state.var_range[i].mask);
+               DBG("%d: base: 0x%016llx size: 0x%016llx type: %d\n",
+                    i, base, length, type);
+               if (base <= lapic_pbase &&
+                   lapic_pbase <= base + length - PAGE_SIZE) {
+                       DBG("mtrr_lapic_cached() matched var: %d\n", i);
+                       mtrr_state.var_range[i].base &=~IA32_MTRR_PHYSBASE_TYPE;
+                       mtrr_state.var_range[i].base |= MTRR_TYPE_WRITECOMBINE;
+                       ml_set_interrupts_enabled(istate);
+               }
+       }
+
+       /*
+        * In case we didn't find a covering variable range,
+        * we slam WC into the default memory type.
+        */
+       mtrr_state.MTRRdefType = MTRR_TYPE_WRITECOMBINE;
+
+       mtrr_update_cpu(); 
+
+       ml_set_interrupts_enabled(istate);
+
+       return;
+}
+#endif /* DEBUG */
index a1fefe4e501d85fd1ecf6237aa1b5e61aea154a2..13d48fbe4bbd21b5145a58d968a0b97d16f3c9c7 100644 (file)
@@ -36,7 +36,7 @@
 #define IMAGE_AREA     (BITMAP_AREA    - HIB_MAP_SIZE)
 #define IMAGE2_AREA    (IMAGE_AREA     - HIB_MAP_SIZE)
 
-#define HIB_BASE sectINITPTB
+#define HIB_BASE segHIBB
 #define HIB_ENTRYPOINT acpi_wake_prot_entry
 
 uintptr_t pal_hib_map(uintptr_t v, uint64_t p);
index 34e5bd0a57536adb6493fa20b7e8d2fea500581f..cb83084e63e61166d45991fcf740adb462a6f44a 100644 (file)
@@ -64,12 +64,7 @@ struct pal_apic_table *apic_table = NULL;
 
 decl_simple_lock_data(static , pal_efi_lock);
 #ifdef __x86_64__
-#define PML4_PROT      (INTEL_PTE_VALID | INTEL_PTE_WRITE)
-#define INIT_PDPT_BASE (INITPT_SEG_BASE + PAGE_SIZE)
-static pml4_entry_t IDPML4[PTE_PER_PAGE] __attribute__ ((aligned (4096))) = {
-       [0]                 = (uint64_t)(INIT_PDPT_BASE | PML4_PROT),
-       [KERNEL_PML4_INDEX] = (uint64_t)(INIT_PDPT_BASE | PML4_PROT),
-};
+static pml4_entry_t IDPML4[PTE_PER_PAGE] __attribute__ ((aligned (4096)));
 uint64_t       pal_efi_saved_cr0;
 uint64_t       pal_efi_saved_cr3;
 #endif
@@ -168,6 +163,14 @@ pal_efi_call_in_64bit_mode(uint64_t func,
         return KERN_NOT_SUPPORTED;
     }
 
+    if (func < VM_MIN_KERNEL_ADDRESS) {
+        /*
+         * EFI Runtime Services must be mapped in our address
+         * space at an appropriate location.
+         */
+        return KERN_INVALID_ADDRESS;
+    }
+
     _pal_efi_call_in_64bit_mode_asm(func,
                                     efi_reg,
                                     stack_contents,
@@ -245,6 +248,8 @@ pal_efi_call_in_32bit_mode(uint32_t func,
     MARK_CPU_IDLE(cpu_number());
     pal_efi_saved_cr3 = get_cr3_raw();
     pal_efi_saved_cr0 = get_cr0();
+    IDPML4[KERNEL_PML4_INDEX] = IdlePML4[KERNEL_PML4_INDEX];
+    IDPML4[0]                = IdlePML4[KERNEL_PML4_INDEX];
     clear_ts();
     set_cr3_raw((uint64_t) ID_MAP_VTOP(IDPML4));
     
index caf0c68dbad7aa92530c5a86d6de32f70e6d6418..a70348b338a455d04a316972325957b119c3ea3f 100644 (file)
@@ -155,24 +155,9 @@ set_thread_state64(thread_t thread, x86_thread_state64_t *ts);
 static inline void
 machine_pmc_cswitch(thread_t /* old */, thread_t /* new */);
 
-static inline boolean_t
-machine_thread_pmc_eligible(thread_t);
-
 static inline void
 pmc_swi(thread_t /* old */, thread_t /*new */);
 
-static inline boolean_t
-machine_thread_pmc_eligible(thread_t t) {
-       /*
-        * NOTE: Task-level reservations are propagated to child threads via
-        * thread_create_internal.  Any mutation of task reservations forces a
-        * recalculate of t_chud (for the pmc flag) for all threads in that task.
-        * Consequently, we can simply check the current thread's flag against
-        * THREAD_PMC_FLAG.  If the result is non-zero, we SWI for a PMC switch.
-        */
-       return (t != NULL) ? ((t->t_chud & THREAD_PMC_FLAG) ? TRUE : FALSE) : FALSE;
-}
-
 static inline void
 pmc_swi(thread_t old, thread_t new) {
        current_cpu_datap()->csw_old_thread = old;
@@ -182,7 +167,7 @@ pmc_swi(thread_t old, thread_t new) {
 
 static inline void
 machine_pmc_cswitch(thread_t old, thread_t new) {
-       if (machine_thread_pmc_eligible(old) || machine_thread_pmc_eligible(new)) {
+       if (pmc_thread_eligible(old) || pmc_thread_eligible(new)) {
                pmc_swi(old, new);
        }
 }
@@ -1675,27 +1660,6 @@ machine_set_current_thread(thread_t thread)
        current_cpu_datap()->cpu_active_thread = thread;
 }
 
-/*
- * This is called when a task is terminated, and also on exec().
- * Clear machine-dependent state that is stored on the task.
- */
-void
-machine_thread_terminate_self(void)
-{
-       task_t self_task = current_task();
-       if (self_task) {
-           user_ldt_t user_ldt = self_task->i386_ldt;
-           if (user_ldt != 0) {
-               self_task->i386_ldt = 0;
-               user_ldt_free(user_ldt);
-           }
-
-           if (self_task->task_debug != NULL) {
-               zfree(ids_zone, self_task->task_debug);
-               self_task->task_debug = NULL;
-           }    
-       }
-}
 
 /*
  * Perform machine-dependent per-thread initializations
@@ -2130,3 +2094,11 @@ copy_debug_state64(
        target->dr6 = src->dr6;
        target->dr7 = src->dr7;
 }
+
+boolean_t is_useraddr64_canonical(uint64_t addr64);
+
+boolean_t
+is_useraddr64_canonical(uint64_t addr64)
+{
+       return IS_USERADDR64_CANONICAL(addr64);
+}
index bfbb48d4ba6243ce5119ae37cf2d14004edc8dd5..4db5983c11c1285f9a056de55aa9ebee8917c908 100644 (file)
@@ -244,9 +244,6 @@ __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t b
        src = (void *) ((uintptr_t)src_map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK));
        dst = (void *) ((uintptr_t)dst_map->prv_CADDR | ((uint32_t)dst64 & INTEL_OFFMASK));
 #elif defined(__x86_64__)
-       src = PHYSMAP_PTOV(src64);
-       dst = PHYSMAP_PTOV(dst64);
-
        addr64_t debug_pa = 0;
 
        /* If either destination or source are outside the
@@ -256,10 +253,15 @@ __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t b
 
        if (physmap_enclosed(src64) == FALSE) {
                src = (void *)(debugger_window_kva | (src64 & INTEL_OFFMASK));
+               dst = PHYSMAP_PTOV(dst64);
                debug_pa = src64 & PG_FRAME;
        } else if (physmap_enclosed(dst64) == FALSE) {
+               src = PHYSMAP_PTOV(src64);
                dst = (void *)(debugger_window_kva | (dst64 & INTEL_OFFMASK));
                debug_pa = dst64 & PG_FRAME;
+       } else {
+               src = PHYSMAP_PTOV(src64);
+               dst = PHYSMAP_PTOV(dst64);
        }
        /* DRK: debugger only routine, we don't bother checking for an
         * identical mapping.
index 1f064b614dfb000a4de194581fe78789457afd69..b22749df79e3ce64935e3ba068baf6815d4611f2 100644 (file)
@@ -66,7 +66,6 @@ static boolean_t      earlyTopology           = FALSE;
 static uint64_t                earlyMaxBusDelay        = DELAY_UNSET;
 static uint64_t                earlyMaxIntDelay        = DELAY_UNSET;
 
-
 /*
  * Initialize the Cstate change code.
  */
@@ -97,8 +96,8 @@ machine_idle(void)
        /*
         * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
         * were called prior to the CPU PM kext being registered.  We do
-        * this here since we know at this point since it'll be at idle
-        * where the decision using these values will be used.
+        * this here since we know at this point the values will be first
+        * used since idle is where the decisions using these values is made.
         */
        if (earlyMaxBusDelay != DELAY_UNSET)
            ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
index c31ab2d8aa1f10a1131874c06eca5473d962309f..2e0594487a4f2068f8e700a0922aa0b898879465 100644 (file)
@@ -89,7 +89,6 @@
  */
 
 #include <string.h>
-#include <mach_kdb.h>
 #include <mach_ldebug.h>
 
 #include <libkern/OSAtomic.h>
 #include <kern/thread.h>
 #include <kern/zalloc.h>
 #include <kern/queue.h>
+#include <kern/ledger.h>
 
 #include <kern/lock.h>
 #include <kern/kalloc.h>
 #include <i386/acpi.h>
 #include <i386/pmap_internal.h>
 
-#if    MACH_KDB
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_print.h>
-#endif /* MACH_KDB */
-
 #include <vm/vm_protos.h>
 
 #include <i386/mp.h>
@@ -227,53 +220,9 @@ unsigned int       last_managed_page = 0;
 
 uint64_t pde_mapped_size;
 
-/*
- *     Locking and TLB invalidation
- */
+const boolean_t        pmap_disable_kheap_nx = TRUE;
+const boolean_t        pmap_disable_kstack_nx = TRUE;
 
-/*
- *     Locking Protocols: (changed 2/2007 JK)
- *
- *     There are two structures in the pmap module that need locking:
- *     the pmaps themselves, and the per-page pv_lists (which are locked
- *     by locking the pv_lock_table entry that corresponds to the pv_head
- *     for the list in question.)  Most routines want to lock a pmap and
- *     then do operations in it that require pv_list locking -- however
- *     pmap_remove_all and pmap_copy_on_write operate on a physical page
- *     basis and want to do the locking in the reverse order, i.e. lock
- *     a pv_list and then go through all the pmaps referenced by that list.
- *
- *      The system wide pmap lock has been removed. Now, paths take a lock
- *      on the pmap before changing its 'shape' and the reverse order lockers
- *      (coming in by phys ppn) take a lock on the corresponding pv and then
- *      retest to be sure nothing changed during the window before they locked
- *      and can then run up/down the pv lists holding the list lock. This also
- *      lets the pmap layer run (nearly completely) interrupt enabled, unlike
- *      previously.
- */
-
-
-/*
- * PV locking
- */
-
-#define LOCK_PVH(index)                {       \
-    mp_disable_preemption();           \
-    lock_pvh_pai(index);               \
-}
-
-#define UNLOCK_PVH(index)  {      \
-    unlock_pvh_pai(index);        \
-    mp_enable_preemption();       \
-}
-
-/*
- * PV hash locking
- */
-
-#define LOCK_PV_HASH(hash)         lock_hash_hash(hash)
-
-#define UNLOCK_PV_HASH(hash)       unlock_hash_hash(hash)
 
 #if    USLOCK_DEBUG
 extern int     max_lock_loops;
@@ -322,18 +271,6 @@ int                pt_fake_zone_index = -1;
 
 extern         long    NMIPI_acks;
 
-static inline void
-PMAP_ZINFO_SALLOC(vm_size_t bytes)
-{
-       current_thread()->tkm_shared.alloc += bytes;
-}
-
-static inline void
-PMAP_ZINFO_SFREE(vm_size_t bytes)
-{
-       current_thread()->tkm_shared.free += (bytes);
-}
-
 addr64_t       kernel64_cr3;
 boolean_t      no_shared_cr3 = FALSE;  /* -no_shared_cr3 boot arg */
 
@@ -557,7 +494,7 @@ pmap_map(
        ps = PAGE_SIZE;
        while (start_addr < end_addr) {
                pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
-                          (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
+                          (ppnum_t) i386_btop(start_addr), prot, VM_PROT_NONE, flags, FALSE);
                virt += ps;
                start_addr += ps;
        }
@@ -644,9 +581,6 @@ pmap_init_high_shared(void)
 
        vm_offset_t haddr;
        spl_t s;
-#if MACH_KDB
-       struct i386_tss *ttss;
-#endif
 
        cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index;
 
@@ -696,17 +630,6 @@ pmap_init_high_shared(void)
        fix_desc(&temp_fake_desc, 1);
        *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc;
        kprintf("KTSS: 0x%x, ",haddr);
-#if MACH_KDB
-       /* remap dbtss up high and put new high addr into gdt */
-       haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS,
-                                       (vm_offset_t) &master_dbtss, 1);
-       temp_fake_desc = tss_desc_pattern;
-       temp_fake_desc.offset = (vm_offset_t) haddr;
-       fix_desc(&temp_fake_desc, 1);
-       *(struct fake_descriptor *)&master_gdt[sel_idx(DEBUG_TSS)] = temp_fake_desc;
-       ttss = (struct i386_tss *)haddr;
-       kprintf("DBTSS: 0x%x, ",haddr);
-#endif /* MACH_KDB */
 
        /* remap dftss up high and put new high addr into gdt */
        haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
@@ -742,7 +665,7 @@ pmap_bootstrap(
        boolean_t               IA32e)
 {
        vm_offset_t     va;
-       int i;
+       unsigned i;
        pdpt_entry_t *pdpt;
        spl_t s;
 
@@ -1133,7 +1056,7 @@ pmap_lowmem_finalize(void)
        /*
         * Update wired memory statistics for early boot pages
         */
-       PMAP_ZINFO_PALLOC(bootstrap_wired_pages * PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE);
 
        /*
         * Free all pages in pmap regions below the base:
@@ -1375,11 +1298,12 @@ pmap_is_empty(
  */
 pmap_t
 pmap_create(
-           vm_map_size_t       sz,
-           boolean_t           is_64bit)
+           ledger_t            ledger,
+           vm_map_size_t                       sz,
+           boolean_t                           is_64bit)
 {
-       pmap_t                  p;
-       int             i;
+       pmap_t          p;
+       unsigned        i;
        vm_offset_t     va;
        vm_size_t       size;
        pdpt_entry_t    *pdpt;
@@ -1410,6 +1334,8 @@ pmap_create(
        p->stats.resident_count = 0;
        p->stats.resident_max = 0;
        p->stats.wired_count = 0;
+       ledger_reference(ledger);
+       p->ledger = ledger;
        p->ref_count = 1;
        p->nx_enabled = 1;
        p->pm_shared = FALSE;
@@ -1437,7 +1363,7 @@ pmap_create(
                va = (vm_offset_t)p->dirbase;
                p->pdirbase = kvtophys(va);
 
-               PMAP_ZINFO_SALLOC(NBPTD);
+               PMAP_ZINFO_SALLOC(p,NBPTD);
 
                template = INTEL_PTE_VALID;
                for (i = 0; i< NPGPTD; i++, pdpt++ ) {
@@ -1463,7 +1389,7 @@ pmap_create(
 
                OSAddAtomic(1,  &inuse_ptepages_count);
                OSAddAtomic64(1,  &alloc_ptepages_count);
-               PMAP_ZINFO_SALLOC(PAGE_SIZE);
+               PMAP_ZINFO_SALLOC(p, PAGE_SIZE);
 
                /* allocate the vm_objs to hold the pdpt, pde and pte pages */
 
@@ -1485,7 +1411,7 @@ pmap_create(
                if (!is_64bit) {
                        while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) {
                                splx(s);
-                               pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */
+                               pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pde entry */
                                s = splhigh();
                        }
                        pmap_store_pte(pdp, high_shared_pde);
@@ -1559,7 +1485,7 @@ pmap_set_4GB_pagezero(pmap_t p)
        PMAP_LOCK(p);
        while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
                PMAP_UNLOCK(p);
-               pmap_expand_pml4(p, 0x0);
+               pmap_expand_pml4(p, 0x0, PMAP_EXPAND_OPTIONS_NONE);
                PMAP_LOCK(p);
        }
        kern_pdptp = kernel_pmap->pm_pdpt;
@@ -1585,8 +1511,10 @@ pmap_clear_4GB_pagezero(pmap_t p)
        p->pm_task_map = TASK_MAP_64BIT;
 
        istate = ml_set_interrupts_enabled(FALSE);
+
        if (current_cpu_datap()->cpu_task_map == TASK_MAP_64BIT_SHARED)
-         current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT;
+               current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT;
+
        pmap_load_kernel_cr3();
 
        user_pdptp = pmap64_pdpt(p, 0x0);
@@ -1665,10 +1593,10 @@ pmap_destroy(
         */
        if (!cpu_64bit) {
                OSAddAtomic(-p->pm_obj->resident_page_count,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(p->pm_obj->resident_page_count * PAGE_SIZE);
+               PMAP_ZINFO_PFREE(p, p->pm_obj->resident_page_count * PAGE_SIZE);
 
                kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
-               PMAP_ZINFO_SFREE(NBPTD);
+               PMAP_ZINFO_SFREE(p, NBPTD);
 
                zfree(pdpt_zone, (void *)p->pm_hold);
 
@@ -1679,7 +1607,7 @@ pmap_destroy(
 
                /* free 64 bit mode structs */
                kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
-               PMAP_ZINFO_SFREE(PAGE_SIZE);
+               PMAP_ZINFO_SFREE(p, PAGE_SIZE);
 
                inuse_ptepages += p->pm_obj_pml4->resident_page_count;
                vm_object_deallocate(p->pm_obj_pml4);
@@ -1691,8 +1619,9 @@ pmap_destroy(
                vm_object_deallocate(p->pm_obj);
 
                OSAddAtomic(-(inuse_ptepages+1),  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(inuse_ptepages * PAGE_SIZE);
+               PMAP_ZINFO_PFREE(p, inuse_ptepages * PAGE_SIZE);
        }
+       ledger_dereference(p->ledger);
 
        zfree(pmap_zone, p);
 
@@ -1785,34 +1714,33 @@ pmap_protect(
 
                while (spte < epte) {
 
-                   if (*spte & INTEL_PTE_VALID) {
-                     
-                       if (prot & VM_PROT_WRITE)
-                           pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_WRITE));
-                       else
-                           pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_WRITE));
+                       if (*spte & INTEL_PTE_VALID) {
+                               if (prot & VM_PROT_WRITE)
+                                       pmap_update_pte(spte, 0, INTEL_PTE_WRITE);
+                               else
+                                       pmap_update_pte(spte, INTEL_PTE_WRITE, 0);
 
-                       if (set_NX == TRUE)
-                           pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_NX));
-                       else
-                           pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_NX));
+                               if (set_NX == TRUE)
+                                       pmap_update_pte(spte,0, INTEL_PTE_NX);
+                               else
+                                       pmap_update_pte(spte, INTEL_PTE_NX, 0);
 
-                       num_found++;
-                   }
-                   spte++;
+                               num_found++;
+                       }
+                       spte++;
                }
            }
            sva = lva;
        }
        if (num_found)
        {
-           PMAP_UPDATE_TLBS(map, orig_sva, eva);
+               PMAP_UPDATE_TLBS(map, orig_sva, eva);
        }
 
        PMAP_UNLOCK(map);
 
        PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END,
-                  0, 0, 0, 0, 0);
+           0, 0, 0, 0, 0);
 
 }
 
@@ -1830,7 +1758,7 @@ pmap_map_block(
     uint32_t page;
 
     for (page = 0; page < size; page++) {
-       pmap_enter(pmap, va, pa, prot, attr, TRUE);
+       pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
        va += PAGE_SIZE;
        pa++;
     }
@@ -1863,10 +1791,11 @@ pmap_extract(
        return (paddr);
 }
 
-void
+kern_return_t
 pmap_expand_pml4(
                 pmap_t map,
-                vm_map_offset_t vaddr)
+                vm_map_offset_t vaddr,
+                __unused unsigned int options)
 {
        register vm_page_t      m;
        register pmap_paddr_t   pa;
@@ -1907,7 +1836,7 @@ pmap_expand_pml4(
 
        OSAddAtomic(1,  &inuse_ptepages_count);
        OSAddAtomic64(1,  &alloc_ptepages_count);
-       PMAP_ZINFO_PALLOC(PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
 
        /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
        vm_object_lock(map->pm_obj_pml4);
@@ -1923,8 +1852,8 @@ pmap_expand_pml4(
                VM_PAGE_FREE(m);
 
                OSAddAtomic(-1,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(PAGE_SIZE);
-               return;
+               PMAP_ZINFO_PFREE(map, PAGE_SIZE);
+               return KERN_SUCCESS;
        }
        pmap_set_noencrypt(pn);
 
@@ -1949,14 +1878,11 @@ pmap_expand_pml4(
 
        PMAP_UNLOCK(map);
 
-       return;
-
+       return KERN_SUCCESS;
 }
 
-void
-pmap_expand_pdpt(
-                pmap_t map,
-                vm_map_offset_t vaddr)
+kern_return_t
+pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, __unused unsigned int options)
 {
        register vm_page_t      m;
        register pmap_paddr_t   pa;
@@ -1970,7 +1896,7 @@ pmap_expand_pdpt(
        spl = splhigh();
        while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
                splx(spl);
-               pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */
+               pmap_expand_pml4(map, vaddr, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pdpt entry */
                spl = splhigh();
        }
        splx(spl);
@@ -2000,7 +1926,7 @@ pmap_expand_pdpt(
 
        OSAddAtomic(1,  &inuse_ptepages_count);
        OSAddAtomic64(1,  &alloc_ptepages_count);
-       PMAP_ZINFO_PALLOC(PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
 
        /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
        vm_object_lock(map->pm_obj_pdpt);
@@ -2016,8 +1942,8 @@ pmap_expand_pdpt(
                VM_PAGE_FREE(m);
 
                OSAddAtomic(-1,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(PAGE_SIZE);
-               return;
+               PMAP_ZINFO_PFREE(map, PAGE_SIZE);
+               return KERN_SUCCESS;
        }
        pmap_set_noencrypt(pn);
 
@@ -2042,8 +1968,7 @@ pmap_expand_pdpt(
 
        PMAP_UNLOCK(map);
 
-       return;
-
+       return KERN_SUCCESS;
 }
 
 
@@ -2063,10 +1988,11 @@ pmap_expand_pdpt(
  *     has been expanded enough.
  *     (We won't loop forever, since page tables aren't shrunk.)
  */
-void
+kern_return_t
 pmap_expand(
        pmap_t          map,
-       vm_map_offset_t vaddr)
+       vm_map_offset_t vaddr,
+       __unused unsigned int options)
 {
        pt_entry_t              *pdp;
        register vm_page_t      m;
@@ -2084,7 +2010,7 @@ pmap_expand(
                spl = splhigh();
                while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
                        splx(spl);
-                       pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */
+                       pmap_expand_pdpt(map, vaddr, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pde entry */
                        spl = splhigh();
                }
                splx(spl);
@@ -2115,7 +2041,7 @@ pmap_expand(
 
        OSAddAtomic(1,  &inuse_ptepages_count);
        OSAddAtomic64(1,  &alloc_ptepages_count);
-       PMAP_ZINFO_PALLOC(PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
 
        /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
        vm_object_lock(map->pm_obj);
@@ -2132,8 +2058,8 @@ pmap_expand(
                VM_PAGE_FREE(m);
 
                OSAddAtomic(-1,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(PAGE_SIZE);
-               return;
+               PMAP_ZINFO_PFREE(map, PAGE_SIZE);
+               return KERN_SUCCESS;
        }
        pmap_set_noencrypt(pn);
 
@@ -2162,7 +2088,7 @@ pmap_expand(
 
        PMAP_UNLOCK(map);
 
-       return;
+       return KERN_SUCCESS;
 }
 
 
@@ -2287,7 +2213,7 @@ pmap_collect(
                        VM_PAGE_FREE(m);
 
                        OSAddAtomic(-1,  &inuse_ptepages_count);
-                       PMAP_ZINFO_PFREE(PAGE_SIZE);
+                       PMAP_ZINFO_PFREE(map, PAGE_SIZE);
                    }
 
                    PMAP_LOCK(p);
@@ -2379,105 +2305,6 @@ kern_return_t dtrace_copyio_postflight(__unused addr64_t va)
 }
 #endif /* CONFIG_DTRACE */
 
-#if    MACH_KDB
-
-/* show phys page mappings and attributes */
-
-extern void    db_show_page(pmap_paddr_t pa);
-
-#if 0
-void
-db_show_page(pmap_paddr_t pa)
-{
-       pv_entry_t      pv_h;
-       int             pai;
-       char            attr;
-       
-       pai = pa_index(pa);
-       pv_h = pai_to_pvh(pai);
-
-       attr = pmap_phys_attributes[pai];
-       printf("phys page %llx ", pa);
-       if (attr & PHYS_MODIFIED)
-               printf("modified, ");
-       if (attr & PHYS_REFERENCED)
-               printf("referenced, ");
-       if (pv_h->pmap || pv_h->next)
-               printf(" mapped at\n");
-       else
-               printf(" not mapped\n");
-       for (; pv_h; pv_h = pv_h->next)
-               if (pv_h->pmap)
-                       printf("%llx in pmap %p\n", pv_h->va, pv_h->pmap);
-}
-#endif
-
-#endif /* MACH_KDB */
-
-#if    MACH_KDB
-#if 0
-void db_kvtophys(vm_offset_t);
-void db_show_vaddrs(pt_entry_t  *);
-
-/*
- *     print out the results of kvtophys(arg)
- */
-void
-db_kvtophys(
-       vm_offset_t     vaddr)
-{
-       db_printf("0x%qx", kvtophys(vaddr));
-}
-
-/*
- *     Walk the pages tables.
- */
-void
-db_show_vaddrs(
-       pt_entry_t      *dirbase)
-{
-       pt_entry_t      *ptep, *pdep, tmp;
-       unsigned int    x, y, pdecnt, ptecnt;
-
-       if (dirbase == 0) {
-               dirbase = kernel_pmap->dirbase;
-       }
-       if (dirbase == 0) {
-               db_printf("need a dirbase...\n");
-               return;
-       }
-       dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK);
-
-       db_printf("dirbase: 0x%x\n", dirbase);
-
-       pdecnt = ptecnt = 0;
-       pdep = &dirbase[0];
-       for (y = 0; y < NPDEPG; y++, pdep++) {
-               if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
-                       continue;
-               }
-               pdecnt++;
-               ptep = (pt_entry_t *) ((unsigned long)(*pdep) & ~INTEL_OFFMASK);
-               db_printf("dir[%4d]: 0x%x\n", y, *pdep);
-               for (x = 0; x < NPTEPG; x++, ptep++) {
-                       if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
-                               continue;
-                       }
-                       ptecnt++;
-                       db_printf("   tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
-                               x,
-                               *ptep,
-                               (y << 22) | (x << 12),
-                               *ptep & ~INTEL_OFFMASK);
-               }
-       }
-
-       db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
-
-}
-#endif
-#endif /* MACH_KDB */
-
 #include <mach_vm_debug.h>
 #if    MACH_VM_DEBUG
 #include <vm/vm_debug.h>
@@ -2620,7 +2447,7 @@ pmap_cpu_alloc(boolean_t is_boot_cpu)
                  spl_t s;
                        s = splhigh();
                        while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0)
-                               pmap_expand(kernel_pmap, (vm_map_offset_t)address);
+                               pmap_expand(kernel_pmap, (vm_map_offset_t)address, PMAP_EXPAND_OPTIONS_NONE);
                        * (int *) pte = 0; 
                        cp->mapwindow[i].prv_CADDR = (caddr_t) address;
                        cp->mapwindow[i].prv_CMAP = pte;
@@ -2929,7 +2756,7 @@ dump_4GB_pdpt(pmap_t p)
        spl = splhigh();
        while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
                splx(spl);
-               pmap_expand_pml4(p, 0x0);
+               pmap_expand_pml4(p, 0x0, PMAP_EXPAND_OPTIONS_NONE);
                spl = splhigh();
        }
        kern_pdptp = kernel_pmap->pm_pdpt;
index a168562c90c89fb766386611fcc58c1ff3089262..5cc91b6e252431552db92b5cde3bc88ea4288ee7 100644 (file)
 
 #endif /* ASSEMBLER */
 
-#define NPGPTD          4
-#define PDESHIFT        21
-#define PTEMASK         0x1ff
-#define PTEINDX         3
-
-#define PTESHIFT        12
+#define NPGPTD          4ULL
+#define PDESHIFT        21ULL
+#define PTEMASK         0x1ffULL
+#define PTEINDX         3ULL
 
+#define PTESHIFT        12ULL
 
+#ifdef __i386__
 #define INITPT_SEG_BASE  0x100000
-#define INITGDT_SEG_BASE 0x106000
-#define SLEEP_SEG_BASE   0x107000
+#endif
 
 #ifdef __x86_64__
 #define LOW_4GB_MASK   ((vm_offset_t)0x00000000FFFFFFFFUL)
 #define NBPTD           (NPGPTD << PAGE_SHIFT)
 #define NPDEPTD         (NBPTD / (sizeof (pd_entry_t)))
 #define NPDEPG          (PAGE_SIZE/(sizeof (pd_entry_t)))
-#define NBPDE           (1 << PDESHIFT)
+#define NBPDE           (1ULL << PDESHIFT)
 #define PDEMASK         (NBPDE - 1)
 
 #define PTE_PER_PAGE   512 /* number of PTE's per page on any level */
@@ -153,7 +152,7 @@ typedef uint64_t        pdpt_entry_t;
 #define NPDPTPG         (PAGE_SIZE/(sizeof (pdpt_entry_t)))
 #define PDPTSHIFT       30
 #define PDPTPGSHIFT     9
-#define NBPDPT          (1 << PDPTSHIFT)
+#define NBPDPT          (1ULL << PDPTSHIFT)
 #define PDPTMASK        (NBPDPT-1)
 #define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
 
@@ -161,7 +160,7 @@ typedef uint64_t        pd_entry_t;
 #define NPDPG           (PAGE_SIZE/(sizeof (pd_entry_t)))
 #define PDSHIFT         21
 #define PDPGSHIFT       9
-#define NBPD            (1 << PDSHIFT)
+#define NBPD            (1ULL << PDSHIFT)
 #define PDMASK          (NBPD-1)
 #define PD_ENTRY_NULL   ((pd_entry_t *) 0)
 
@@ -169,7 +168,7 @@ typedef uint64_t        pt_entry_t;
 #define NPTPG           (PAGE_SIZE/(sizeof (pt_entry_t)))
 #define PTSHIFT         12
 #define PTPGSHIFT       9
-#define NBPT            (1 << PTSHIFT)
+#define NBPT            (1ULL << PTSHIFT)
 #define PTMASK          (NBPT-1)
 #define PT_ENTRY_NULL  ((pt_entry_t *) 0)
 
@@ -234,58 +233,6 @@ pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
 #endif
 }
 
-/*
- * Atomic 64-bit compare and exchange of a page table entry.
- */
-static inline boolean_t
-pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
-{
-       boolean_t               ret;
-
-#ifdef __i386__
-       /*
-        * Load the old value into %edx:%eax
-        * Load the new value into %ecx:%ebx
-        * Compare-exchange-8bytes at address entryp (loaded in %edi)
-        * If the compare succeeds, the new value is stored, return TRUE.
-        * Otherwise, no swap is made, return FALSE.
-        */
-       asm volatile(
-               "       lock; cmpxchg8b (%1)    \n\t"
-               "       setz    %%al            \n\t"
-               "       movzbl  %%al,%0"
-               : "=a" (ret)
-               : "D" (entryp),
-                 "a" ((uint32_t)old),
-                 "d" ((uint32_t)(old >> 32)),
-                 "b" ((uint32_t)new),
-                 "c" ((uint32_t)(new >> 32))
-               : "memory");
-#else
-       /*
-        * Load the old value into %rax
-        * Load the new value into another register
-        * Compare-exchange-quad at address entryp
-        * If the compare succeeds, the new value is stored, return TRUE.
-        * Otherwise, no swap is made, return FALSE.
-        */
-       asm volatile(
-               "       lock; cmpxchgq %2,(%3)  \n\t"
-               "       setz    %%al            \n\t"
-               "       movzbl  %%al,%0"
-               : "=a" (ret)
-               : "a" (old),
-                 "r" (new),
-                 "r" (entryp)
-               : "memory");
-#endif
-       return ret;
-}
-
-#define pmap_update_pte(entryp, old, new) \
-       while (!pmap_cmpx_pte((entryp), (old), (new)))
-
-
 /* in 64 bit spaces, the number of each type of page in the page tables */
 #define NPML4PGS        (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
 #define NPDPTPGS        (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
@@ -302,14 +249,15 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
 #define KERNEL_UBER_BASE       (0ULL - NBPML4)
 #define KERNEL_UBER_BASE_HI32  ((uint32_t)(KERNEL_UBER_BASE >> 32))
 #else
-#define KERNEL_PML4_INDEX      511
+#define KERNEL_PML4_INDEX              511
 #define KERNEL_KEXTS_INDEX     510     /* Home of KEXTs - the basement */
-#define KERNEL_PHYSMAP_INDEX   509     /* virtual to physical map */ 
+#define KERNEL_PHYSMAP_PML4_INDEX      509     /* virtual to physical map */ 
 #define KERNEL_BASE            (0ULL - NBPML4)
 #define KERNEL_BASEMENT                (KERNEL_BASE - NBPML4)
 #endif
 
 #define        VM_WIMG_COPYBACK        VM_MEM_COHERENT
+#define        VM_WIMG_COPYBACKLW      VM_WIMG_COPYBACK
 #define        VM_WIMG_DEFAULT         VM_MEM_COHERENT
 /* ?? intel ?? */
 #define VM_WIMG_IO             (VM_MEM_COHERENT |      \
@@ -317,7 +265,7 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
 #define VM_WIMG_WTHRU          (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
 /* write combining mode, aka store gather */
 #define VM_WIMG_WCOMB          (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT) 
-
+#define        VM_WIMG_INNERWBACK      VM_MEM_COHERENT
 /*
  * Pte related macros
  */
@@ -426,19 +374,19 @@ enum  high_fixed_addresses {
  *     without using the bit fields).
  */
 
-#define INTEL_PTE_VALID                0x00000001
-#define INTEL_PTE_WRITE                0x00000002
-#define INTEL_PTE_RW           0x00000002
-#define INTEL_PTE_USER         0x00000004
-#define INTEL_PTE_WTHRU                0x00000008
-#define INTEL_PTE_NCACHE       0x00000010
-#define INTEL_PTE_REF          0x00000020
-#define INTEL_PTE_MOD          0x00000040
-#define INTEL_PTE_PS           0x00000080
-#define INTEL_PTE_PTA          0x00000080
-#define INTEL_PTE_GLOBAL       0x00000100
-#define INTEL_PTE_WIRED                0x00000200
-#define INTEL_PDPTE_NESTED     0x00000400
+#define INTEL_PTE_VALID                0x00000001ULL
+#define INTEL_PTE_WRITE                0x00000002ULL
+#define INTEL_PTE_RW           0x00000002ULL
+#define INTEL_PTE_USER         0x00000004ULL
+#define INTEL_PTE_WTHRU                0x00000008ULL
+#define INTEL_PTE_NCACHE       0x00000010ULL
+#define INTEL_PTE_REF          0x00000020ULL
+#define INTEL_PTE_MOD          0x00000040ULL
+#define INTEL_PTE_PS           0x00000080ULL
+#define INTEL_PTE_PTA          0x00000080ULL
+#define INTEL_PTE_GLOBAL       0x00000100ULL
+#define INTEL_PTE_WIRED                0x00000200ULL
+#define INTEL_PDPTE_NESTED     0x00000400ULL
 #define INTEL_PTE_PFN          PG_FRAME
 
 #define INTEL_PTE_NX           (1ULL << 63)
@@ -477,14 +425,16 @@ extern pt_entry_t PTmap[], APTmap[], Upte;
 extern pd_entry_t      PTD[], APTD[], PTDpde[], APTDpde[], Upde;
 extern pmap_paddr_t    lo_kernel_cr3;
 extern pdpt_entry_t    *IdlePDPT64;
+extern pdpt_entry_t    IdlePDPT[];
+extern pml4_entry_t    IdlePML4[];
 #else
 extern pt_entry_t      *PTmap;
+extern pdpt_entry_t    *IdlePDPT;
+extern pml4_entry_t    *IdlePML4;
 #endif
 extern boolean_t       no_shared_cr3;
 extern addr64_t                kernel64_cr3;
 extern pd_entry_t      *IdlePTD;       /* physical addr of "Idle" state PTD */
-extern pdpt_entry_t    IdlePDPT[];
-extern pml4_entry_t    IdlePML4[];
 
 extern uint64_t                pmap_pv_hashlist_walks;
 extern uint64_t                pmap_pv_hashlist_cnts;
@@ -503,25 +453,46 @@ extern uint32_t           pmap_kernel_text_ps;
 #define        vtopte(va)      (PTmap + i386_btop((vm_offset_t)va))
 #endif
 
+
 #ifdef __x86_64__
 #define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
 
-#define PHYSMAP_BASE   KVADDR(KERNEL_PHYSMAP_INDEX,0,0,0)
+extern uint64_t physmap_base, physmap_max;
+
 #define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
-#define PHYSMAP_PTOV(x)        ((void *)(((uint64_t)(x)) + PHYSMAP_BASE))
 
 static inline boolean_t physmap_enclosed(addr64_t a) {
        return (a < (NPHYSMAP * GB));
 }
-#endif
+
+static inline void * PHYSMAP_PTOV_check(void *paddr) {
+       uint64_t pvaddr = (uint64_t)paddr + physmap_base;
+
+       if (__improbable(pvaddr >= physmap_max))
+               panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
+                     pvaddr, physmap_base, physmap_max);
+
+       return (void *)pvaddr;
+}
+
+#define PHYSMAP_PTOV(x)        (PHYSMAP_PTOV_check((void*) (x)))
+
+/*
+ * For KASLR, we alias the master processor's IDT and GDT at fixed
+ * virtual addresses to defeat SIDT/SGDT address leakage.
+ */
+#define MASTER_IDT_ALIAS       (VM_MIN_KERNEL_ADDRESS + 0x0000)
+#define MASTER_GDT_ALIAS       (VM_MIN_KERNEL_ADDRESS + 0x1000)
+
+/*
+ * The low global vector page is mapped at a fixed alias also.
+ */
+#define LOWGLOBAL_ALIAS                (VM_MIN_KERNEL_ADDRESS + 0x2000)
+
+#endif /*__x86_64__ */
 
 typedef        volatile long   cpu_set;        /* set of CPUs - must be <= 32 */
                                        /* changed by other processors */
-struct md_page {
-  int pv_list_count;
-  TAILQ_HEAD(,pv_entry)  pv_list;
-};
-
 #include <vm/vm_page.h>
 
 /*
@@ -551,6 +522,7 @@ struct pmap {
        struct pmap_statistics  stats;  /* map statistics */
        int             ref_count;      /* reference count */
         int            nx_enabled;
+       ledger_t        ledger;         /* ledger tracking phys mappings */
 };
 
 
@@ -639,9 +611,10 @@ extern void                pmap_update_interrupt(void);
 extern addr64_t                (kvtophys)(
                                vm_offset_t     addr);
 
-extern void            pmap_expand(
+extern kern_return_t   pmap_expand(
                                pmap_t          pmap,
-                               vm_map_offset_t addr);
+                               vm_map_offset_t addr,
+                               unsigned int options);
 #if    !defined(__x86_64__)
 extern pt_entry_t      *pmap_pte(
                                struct pmap     *pmap,
@@ -932,6 +905,8 @@ extern boolean_t pmap_is_empty(pmap_t               pmap,
 
 #define MACHINE_BOOTSTRAPPTD   1       /* Static bootstrap page-tables */
 
+kern_return_t
+pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
 
 #endif /* ASSEMBLER */
 
index 576b9c089858cb919a2f582581816e45049c2592..abe1e24a3cf44e3fd388e0ac36e118f20a1a1c0f 100644 (file)
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <vm/pmap.h>
+#include <kern/ledger.h>
 #include <i386/pmap_internal.h>
 
+
 /*
  *     Each entry in the pv_head_table is locked by a bit in the
  *     pv_lock_table.  The lock bits are accessed by the physical
index 37757f19183aaa2b7552ca864a5db31121365999..22bf95cece3bda35d0c0a3a7c6ae2a237df3c0f9 100644 (file)
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-#include <vm/pmap.h>
-#include <sys/kdebug.h>
 
+#ifndef        _I386_PMAP_INTERNAL_
+#define _I386_PMAP_INTERNAL_
 #ifdef MACH_KERNEL_PRIVATE
 
+#include <vm/pmap.h>
+#include <sys/kdebug.h>
+#include <kern/ledger.h>
+
 /*
  * pmap locking
  */
@@ -61,13 +65,15 @@ extern      boolean_t       pmap_trace;
 #define PMAP_TRACE_CONSTANT(x,a,b,c,d,e)                               \
        KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e);                             \
 
-void           pmap_expand_pml4(
+kern_return_t  pmap_expand_pml4(
                        pmap_t          map,
-                       vm_map_offset_t v);
+                       vm_map_offset_t v,
+                       unsigned int options);
 
-void           pmap_expand_pdpt(
+kern_return_t  pmap_expand_pdpt(
                        pmap_t          map,
-                       vm_map_offset_t v);
+                       vm_map_offset_t v,
+                       unsigned int options);
 
 void           phys_attribute_set(
                        ppnum_t         phys,
@@ -273,20 +279,19 @@ extern pv_rooted_entry_t pv_head_table;   /* array of entries, one per page */
 extern event_t mapping_replenish_event;
 
 static inline void     PV_HASHED_ALLOC(pv_hashed_entry_t *pvh_ep) {
-
+       pmap_assert(*pvh_ep == PV_HASHED_ENTRY_NULL);
        simple_lock(&pv_hashed_free_list_lock);
        /* If the kernel reserved pool is low, let non-kernel mappings allocate
         * synchronously, possibly subject to a throttle.
         */
-       if ((pv_hashed_kern_free_count >= pv_hashed_kern_low_water_mark) &&
-           (*pvh_ep = pv_hashed_free_list) != 0) {
+       if ((pv_hashed_kern_free_count > pv_hashed_kern_low_water_mark) && ((*pvh_ep = pv_hashed_free_list) != 0)) {
                pv_hashed_free_list = (pv_hashed_entry_t)(*pvh_ep)->qlink.next;
                pv_hashed_free_count--;
        }
 
        simple_unlock(&pv_hashed_free_list_lock);
 
-       if (pv_hashed_free_count < pv_hashed_low_water_mark) {
+       if (pv_hashed_free_count <= pv_hashed_low_water_mark) {
                if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
                        thread_wakeup(&mapping_replenish_event);
        }
@@ -303,6 +308,7 @@ static inline void  PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry
 extern unsigned pmap_kern_reserve_alloc_stat;
 
 static inline void     PV_HASHED_KERN_ALLOC(pv_hashed_entry_t *pvh_e) {
+       pmap_assert(*pvh_e == PV_HASHED_ENTRY_NULL);
        simple_lock(&pv_hashed_kern_free_list_lock);
 
        if ((*pvh_e = pv_hashed_kern_free_list) != 0) {
@@ -373,6 +379,13 @@ static inline void pmap_pv_throttle(__unused pmap_t p) {
 #define        PHYS_PTA        INTEL_PTE_PTA
 #define        PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE)
 
+extern const boolean_t pmap_disable_kheap_nx;
+extern const boolean_t pmap_disable_kstack_nx;
+
+#define PMAP_EXPAND_OPTIONS_NONE (0x0)
+#define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
+#define PMAP_EXPAND_OPTIONS_NOENTER (PMAP_OPTIONS_NOENTER)
+
 /*
  *     Amount of virtual memory mapped by one
  *     page-directory entry.
@@ -422,7 +435,7 @@ static inline void pmap_pv_throttle(__unused pmap_t p) {
 extern uint64_t pde_mapped_size;
 
 extern char            *pmap_phys_attributes;
-extern unsigned int    last_managed_page;
+extern ppnum_t         last_managed_page;
 
 extern ppnum_t lowest_lo;
 extern ppnum_t lowest_hi;
@@ -613,7 +626,7 @@ pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corru
 
 static inline pmap_pagetable_corruption_action_t
 pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident) {
-       pmap_pv_assertion_t     action = PMAP_ACTION_ASSERT;
+       pmap_pagetable_corruption_action_t      action = PMAP_ACTION_ASSERT;
        pmap_pagetable_corruption_t     suppress_reason = PTE_VALID;
        ppnum_t                 suppress_ppn = 0;
        pt_entry_t cpte = *ptep;
@@ -650,7 +663,7 @@ pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *
                        action = PMAP_ACTION_RETRY;
                        goto pmap_cpc_exit;
                }
-       } while((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink)) != pv_h);
+       } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h));
 
        /* Discover root entries with a Hamming
         * distance of 1 from the supplied
@@ -732,12 +745,12 @@ pmap_pv_remove_retry:
        pvh_e = PV_HASHED_ENTRY_NULL;
        pv_h = pai_to_pvh(ppn_to_pai(ppn));
 
-       if (pv_h->pmap == PMAP_NULL) {
+       if (__improbable(pv_h->pmap == PMAP_NULL)) {
                pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
                if (pac == PMAP_ACTION_IGNORE)
                        goto pmap_pv_remove_exit;
                else if (pac == PMAP_ACTION_ASSERT)
-                       panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx): null pv_list!", pmap, vaddr, ppn, *pte);
+                       panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list!", pmap, vaddr, ppn, *pte, ppnp, pte);
                else if (pac == PMAP_ACTION_RETRY_RELOCK) {
                        LOCK_PVH(ppn_to_pai(*ppnp));
                        pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
@@ -790,8 +803,8 @@ pmap_pv_remove_retry:
                LOCK_PV_HASH(pvhash_idx);
                pprevh = pvhash(pvhash_idx);
                if (PV_HASHED_ENTRY_NULL == *pprevh) {
-                       panic("pmap_pv_remove(%p,0x%llx,0x%x): empty hash",
-                             pmap, vaddr, ppn);
+                       panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash",
+                           pmap, vaddr, ppn, *pte, pte);
                }
                pvh_e = *pprevh;
                pmap_pv_hashlist_walks++;
@@ -810,7 +823,7 @@ pmap_pv_remove_retry:
                        pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
 
                        if (pac == PMAP_ACTION_ASSERT)
-                               panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx): pv not on hash, head: %p, 0x%llx", pmap, vaddr, ppn, *pte, pv_h->pmap, pv_h->va);
+                               panic("pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, pv_h->va);
                        else {
                                UNLOCK_PV_HASH(pvhash_idx);
                                if (pac == PMAP_ACTION_RETRY_RELOCK) {
@@ -841,31 +854,45 @@ pmap_pv_remove_exit:
 
 extern int     pt_fake_zone_index;
 static inline void
-PMAP_ZINFO_PALLOC(vm_size_t bytes)
+PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes)
 {
        thread_t thr = current_thread();
        task_t task;
        zinfo_usage_t zinfo;
 
-       thr->tkm_private.alloc += bytes;
+       pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
+
        if (pt_fake_zone_index != -1 && 
            (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
                OSAddAtomic64(bytes, (int64_t *)&zinfo[pt_fake_zone_index].alloc);
 }
 
 static inline void
-PMAP_ZINFO_PFREE(vm_size_t bytes)
+PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes)
 {
        thread_t thr = current_thread();
        task_t task;
        zinfo_usage_t zinfo;
 
-       thr->tkm_private.free += bytes;
+       pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
+
        if (pt_fake_zone_index != -1 && 
            (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
                OSAddAtomic64(bytes, (int64_t *)&zinfo[pt_fake_zone_index].free);
 }
 
+static inline void
+PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes)
+{
+       pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes);
+}
+
+static inline void
+PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes)
+{
+       pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes);
+}
+
 extern boolean_t       pmap_initialized;/* Has pmap_init completed? */
 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
 
@@ -893,6 +920,70 @@ void               phys_attribute_clear(
 #endif
 void   pmap_pcid_configure(void);
 
+
+/*
+ * Atomic 64-bit compare and exchange of a page table entry.
+ */
+static inline boolean_t
+pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
+{
+       boolean_t               ret;
+
+#ifdef __i386__
+       /*
+        * Load the old value into %edx:%eax
+        * Load the new value into %ecx:%ebx
+        * Compare-exchange-8bytes at address entryp (loaded in %edi)
+        * If the compare succeeds, the new value is stored, return TRUE.
+        * Otherwise, no swap is made, return FALSE.
+        */
+       asm volatile(
+               "       lock; cmpxchg8b (%1)    \n\t"
+               "       setz    %%al            \n\t"
+               "       movzbl  %%al,%0"
+               : "=a" (ret)
+               : "D" (entryp),
+                 "a" ((uint32_t)old),
+                 "d" ((uint32_t)(old >> 32)),
+                 "b" ((uint32_t)new),
+                 "c" ((uint32_t)(new >> 32))
+               : "memory");
+#else
+       /*
+        * Load the old value into %rax
+        * Load the new value into another register
+        * Compare-exchange-quad at address entryp
+        * If the compare succeeds, the new value is stored, return TRUE.
+        * Otherwise, no swap is made, return FALSE.
+        */
+       asm volatile(
+               "       lock; cmpxchgq %2,(%3)  \n\t"
+               "       setz    %%al            \n\t"
+               "       movzbl  %%al,%0"
+               : "=a" (ret)
+               : "a" (old),
+                 "r" (new),
+                 "r" (entryp)
+               : "memory");
+#endif
+       return ret;
+}
+
+extern uint32_t pmap_update_clear_pte_count;
+
+static inline void pmap_update_pte(pt_entry_t *mptep, uint64_t pclear_bits, uint64_t pset_bits) {
+       pt_entry_t npte, opte;
+       do {
+               opte = *mptep;
+               if (__improbable(opte == 0)) {
+                       pmap_update_clear_pte_count++;
+                       break;
+               }
+               npte = opte & ~(pclear_bits);
+               npte |= pset_bits;
+       }       while (!pmap_cmpx_pte(mptep, opte, npte));
+}
+
 #if    defined(__x86_64__)
 /*
  * The single pml4 page per pmap is allocated at pmap create time and exists
@@ -903,6 +994,11 @@ static inline
 pml4_entry_t *
 pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
 {
+       if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
+               (vaddr < 0xFFFF800000000000ULL))) {
+               return (NULL);
+       }
+
 #if    PMAP_ASSERT
        return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_cr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
 #else
@@ -919,12 +1015,6 @@ pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
        pml4_entry_t    newpf;
        pml4_entry_t    *pml4;
 
-       assert(pmap);
-       if ((vaddr > 0x00007FFFFFFFFFFFULL) &&
-           (vaddr < 0xFFFF800000000000ULL)) {
-               return (0);
-       }
-
        pml4 = pmap64_pml4(pmap, vaddr);
        if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
                newpf = *pml4 & PG_FRAME;
@@ -942,12 +1032,6 @@ pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
        pdpt_entry_t    newpf;
        pdpt_entry_t    *pdpt;
 
-       assert(pmap);
-       if ((vaddr > 0x00007FFFFFFFFFFFULL) &&
-           (vaddr < 0xFFFF800000000000ULL)) {
-               return (0);
-       }
-
        pdpt = pmap64_pdpt(pmap, vaddr);
 
        if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
@@ -963,7 +1047,6 @@ pmap_pde(pmap_t m, vm_map_offset_t v)
 {
        pd_entry_t     *pde;
 
-       assert(m);
        pde = pmap64_pde(m, v);
 
        return pde;
@@ -983,7 +1066,7 @@ pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
        pd_entry_t      newpf;
 
        assert(pmap);
-       pde = pmap_pde(pmap, vaddr);
+       pde = pmap64_pde(pmap, vaddr);
 
        if (pde && ((*pde & INTEL_PTE_VALID))) {
                if (*pde & INTEL_PTE_PS) 
@@ -995,4 +1078,11 @@ pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
        return (NULL);
 }
 #endif
+#if    DEBUG
+#define DPRINTF(x...)  kprintf(x)
+#else
+#define DPRINTF(x...)
+#endif
+
 #endif /* MACH_KERNEL_PRIVATE */
+#endif /* _I386_PMAP_INTERNAL_ */
index 9061d73cf06267a719e7ce84547649e3509d8451..f400bc280a62a2d1d565f882d9d6a0e6d0f94622 100644 (file)
@@ -27,6 +27,7 @@
  */
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
+#include <kern/ledger.h>
 #include <i386/pmap_internal.h>
 
 void           pmap_remove_range(
@@ -35,6 +36,8 @@ void          pmap_remove_range(
                        pt_entry_t      *spte,
                        pt_entry_t      *epte);
 
+uint32_t pmap_update_clear_pte_count;
+
 /*
  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  * on a NBPDE boundary.
@@ -105,7 +108,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
 
                        while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
                                PMAP_UNLOCK(subord);
-                               pmap_expand_pdpt(subord, nvaddr);
+                               pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(subord);
                                npde = pmap64_pdpt(subord, nvaddr);
                        }
@@ -118,7 +121,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
 
                        while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
                                PMAP_UNLOCK(subord);
-                               pmap_expand(subord, nvaddr);
+                               pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(subord);
                                npde = pmap_pde(subord, nvaddr);
                        }
@@ -144,7 +147,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
                        pde = pmap64_pdpt(grand, vaddr);
                        if (0 == pde) {
                                PMAP_UNLOCK(grand);
-                               pmap_expand_pml4(grand, vaddr);
+                               pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(grand);
                                pde = pmap64_pdpt(grand, vaddr);
                        }
@@ -163,7 +166,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
                        pde = pmap_pde(grand, vaddr);
                        if ((0 == pde) && cpu_64bit) {
                                PMAP_UNLOCK(grand);
-                               pmap_expand_pdpt(grand, vaddr);
+                               pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(grand);
                                pde = pmap_pde(grand, vaddr);
                        }
@@ -362,7 +365,7 @@ pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
                                panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
 
                        nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
-                       pmap_update_pte(ptep, *ptep, (*ptep & ~PHYS_CACHEABILITY_MASK) | attributes);
+                       pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
                        PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
                        pvh_e = nexth;
                } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
@@ -395,18 +398,34 @@ void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
  *     or lose information.  That is, this routine must actually
  *     insert this page into the given map NOW.
  */
+
 void
 pmap_enter(
        register pmap_t         pmap,
        vm_map_offset_t         vaddr,
        ppnum_t                 pn,
        vm_prot_t               prot,
+       vm_prot_t               fault_type,
        unsigned int            flags,
        boolean_t               wired)
+{
+       (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE);
+}
+
+kern_return_t
+pmap_enter_options(
+       register pmap_t         pmap,
+       vm_map_offset_t         vaddr,
+       ppnum_t                 pn,
+       vm_prot_t               prot,
+       __unused vm_prot_t      fault_type,
+       unsigned int            flags,
+       boolean_t               wired,
+       unsigned int            options)
 {
        pt_entry_t              *pte;
        pv_rooted_entry_t       pv_h;
-       int                     pai;
+       ppnum_t                 pai;
        pv_hashed_entry_t       pvh_e;
        pv_hashed_entry_t       pvh_new;
        pt_entry_t              template;
@@ -421,25 +440,35 @@ pmap_enter(
        vm_object_t             delpage_pm_obj = NULL;
        int                     delpage_pde_index = 0;
        pt_entry_t              old_pte;
+       kern_return_t           kr_expand;
 
        pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
 
        if (pmap == PMAP_NULL)
-               return;
+               return KERN_INVALID_ARGUMENT;
+
+       /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
+        * unused value for that scenario.
+        */
+       assert(pn != vm_page_fictitious_addr);
+
        if (pn == vm_page_guard_addr)
-               return;
+               return KERN_INVALID_ARGUMENT;
 
        PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
-                  pmap,
-                  (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
-                  pn, prot);
+           pmap,
+           (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
+           pn, prot);
 
        if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
                set_NX = FALSE;
        else
                set_NX = TRUE;
 
+       if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
+               set_NX = FALSE;
+       }
+
        /*
         *      Must allocate a new pvlist entry while we're unlocked;
         *      zalloc may cause pageout (which will lock the pmap system).
@@ -463,7 +492,9 @@ Retry:
                while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
                        /* need room for another pde entry */
                        PMAP_UNLOCK(pmap);
-                       pmap_expand_pdpt(pmap, vaddr);
+                       kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
+                       if (kr_expand != KERN_SUCCESS)
+                               return kr_expand;
                        PMAP_LOCK(pmap);
                }
        } else {
@@ -473,10 +504,16 @@ Retry:
                         * going to grow pde level page(s)
                         */
                        PMAP_UNLOCK(pmap);
-                       pmap_expand(pmap, vaddr);
+                       kr_expand = pmap_expand(pmap, vaddr, options);
+                       if (kr_expand != KERN_SUCCESS)
+                               return kr_expand;
                        PMAP_LOCK(pmap);
                }
        }
+       if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
+               PMAP_UNLOCK(pmap);
+               return KERN_SUCCESS;
+       }
 
        if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
                /*
@@ -540,14 +577,15 @@ Retry:
 
                if (wired) {
                        template |= INTEL_PTE_WIRED;
-                       if (!iswired(old_attributes))
-                               OSAddAtomic(+1,
-                                       &pmap->stats.wired_count);
+                       if (!iswired(old_attributes))  {
+                               OSAddAtomic(+1, &pmap->stats.wired_count);
+                               pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
+                       }
                } else {
                        if (iswired(old_attributes)) {
                                assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
+                               OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
                        }
                }
                if (superpage)          /* this path can not be used */
@@ -557,8 +595,11 @@ Retry:
                    ((old_attributes ^ template) != INTEL_PTE_WIRED);
 
                /* store modified PTE and preserve RC bits */
-               pmap_update_pte(pte, *pte,
-                       template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
+               pt_entry_t npte, opte;;
+               do {
+                       opte = *pte;
+                       npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
+               } while (!pmap_cmpx_pte(pte, opte, npte));
                if (old_pa_locked) {
                        UNLOCK_PVH(pai);
                        old_pa_locked = FALSE;
@@ -588,7 +629,7 @@ Retry:
                 */
 
                /* invalidate the PTE */
-               pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
+               pmap_update_pte(pte, INTEL_PTE_VALID, 0);
                /* propagate invalidate everywhere */
                PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
                /* remember reference and change */
@@ -599,14 +640,14 @@ Retry:
 
                if (IS_MANAGED_PAGE(pai)) {
                        pmap_assert(old_pa_locked == TRUE);
+                       pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
                        assert(pmap->stats.resident_count >= 1);
-                       OSAddAtomic(-1,
-                               &pmap->stats.resident_count);
-
+                       OSAddAtomic(-1, &pmap->stats.resident_count);
                        if (iswired(*pte)) {
                                assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
+                               OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem,
+                                   PAGE_SIZE);
                        }
                        pmap_phys_attributes[pai] |= oattr;
 
@@ -627,8 +668,8 @@ Retry:
 
                        if (iswired(*pte)) {
                                assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
+                               OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
                        }
                }
        }
@@ -708,6 +749,7 @@ Retry:
                 * only count the mapping
                 * for 'managed memory'
                 */
+               pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
                OSAddAtomic(+1,  &pmap->stats.resident_count);
                if (pmap->stats.resident_count > pmap->stats.resident_max) {
                        pmap->stats.resident_max = pmap->stats.resident_count;
@@ -716,6 +758,7 @@ Retry:
                /* Account for early mappings created before "managed pages"
                 * are determined. Consider consulting the available DRAM map.
                 */
+               pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
                OSAddAtomic(+1,  &pmap->stats.resident_count);
        }
        /*
@@ -746,6 +789,7 @@ Retry:
        if (wired) {
                template |= INTEL_PTE_WIRED;
                OSAddAtomic(+1,  & pmap->stats.wired_count);
+               pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
        }
        if (superpage)
                template |= INTEL_PTE_PS;
@@ -781,10 +825,11 @@ Done:
                vm_object_unlock(delpage_pm_obj);
                VM_PAGE_FREE(m);
                OSAddAtomic(-1,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(PAGE_SIZE);
+               PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
        }
 
        PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+       return KERN_SUCCESS;
 }
 
 /*
@@ -812,7 +857,7 @@ pmap_remove_range(
        pv_hashed_entry_t       pvh_e;
        int                     pvh_cnt = 0;
        int                     num_removed, num_unwired, num_found, num_invalid;
-       int                     pai;
+       ppnum_t                 pai;
        pmap_paddr_t            pa;
        vm_map_offset_t         vaddr;
 
@@ -861,8 +906,8 @@ pmap_remove_range(
                if ((p & INTEL_PTE_VALID) == 0)
                        num_invalid++;
 
-               /* invalidate the PTE */ 
-               pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
+               /* invalidate the PTE */
+               pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
        }
 
        if (num_found == 0) {
@@ -933,6 +978,7 @@ update_counts:
        if (pmap->stats.resident_count < num_removed)
                panic("pmap_remove_range: resident_count");
 #endif
+       pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
        assert(pmap->stats.resident_count >= num_removed);
        OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
 
@@ -942,6 +988,7 @@ update_counts:
 #endif
        assert(pmap->stats.wired_count >= num_unwired);
        OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
+       pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
 
        return;
 }
@@ -1144,11 +1191,12 @@ pmap_page_protect(
                        /*
                         * Remove the mapping, collecting dirty bits.
                         */
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
+                       pmap_update_pte(pte, INTEL_PTE_VALID, 0);
 
                        /* Remove per-pmap wired count */
                        if (iswired(*pte)) {
                                OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
                        }
 
                        PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
@@ -1160,9 +1208,9 @@ pmap_page_protect(
                        if (pmap->stats.resident_count < 1)
                                panic("pmap_page_protect: resident_count");
 #endif
+                       pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
                        assert(pmap->stats.resident_count >= 1);
                        OSAddAtomic(-1,  &pmap->stats.resident_count);
-
                        /*
                         * Deal with the pv_rooted_entry.
                         */
@@ -1190,8 +1238,7 @@ pmap_page_protect(
                         */
                        pmap_phys_attributes[pai] |=
                            *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
+                       pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
                        PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
                }
                pvh_e = nexth;
@@ -1265,7 +1312,7 @@ phys_attribute_clear(
        /*
         * Walk down PV list, clearing all modify or reference bits.
         * We do not have to lock the pv_list because we have
-        * the entire pmap system locked.
+        * the per-pmap lock
         */
        if (pv_h->pmap != PMAP_NULL) {
                /*
@@ -1285,8 +1332,7 @@ phys_attribute_clear(
                          */
                        pte = pmap_pte(pmap, va);
                        attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-
-                       pmap_update_pte(pte, *pte, (*pte & ~bits));
+                       pmap_update_pte(pte, bits, 0);
                        /* Ensure all processors using this translation
                         * invalidate this TLB entry. The invalidation *must*
                         * follow the PTE update, to ensure that the TLB
@@ -1415,8 +1461,9 @@ pmap_change_wiring(
                /*
                 * wiring down mapping
                 */
+               pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
                OSAddAtomic(+1,  &map->stats.wired_count);
-               pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
+               pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
        }
        else if (!wired && iswired(*pte)) {
                /*
@@ -1424,7 +1471,8 @@ pmap_change_wiring(
                 */
                assert(map->stats.wired_count >= 1);
                OSAddAtomic(-1,  &map->stats.wired_count);
-               pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
+               pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
+               pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
        }
 
        PMAP_UNLOCK(map);
@@ -1459,6 +1507,12 @@ pmap_map_bd(
                if (!(flags & (VM_MEM_GUARDED)))
                        template |= INTEL_PTE_PTA;
        }
+
+#if    defined(__x86_64__)
+       if ((prot & VM_PROT_EXECUTE) == 0)
+               template |= INTEL_PTE_NX;
+#endif
+
        if (prot & VM_PROT_WRITE)
                template |= INTEL_PTE_WRITE;
 
index 498a88143bf794300c40713a6cfb0c976af8c08e..9440fcbd02c1bb75c61a6192e2f8edfa77417f1d 100644 (file)
  * The following postcodes are defined for stages of early startup:
  */
 
-#define        _PSTART_ENTRY                   0xFF
-#define        _PSTART_RELOC                   0xFE
-#define        PSTART_ENTRY                    0xFD
-#define PSTART_PAGE_TABLES             0xFC
-#if defined(__x86_64__)
-#define PSTART_BEFORE_ID_MAP           0xFB
-#else
-#define PSTART_BEFORE_PAGING           0xFB
-#endif
-#define VSTART_ENTRY                   0xFA
-#define VSTART_STACK_SWITCH            0xF9
-#define VSTART_BEFORE_PAGING           0xF8
-#define VSTART_EXIT                    0xF7
-#define        I386_INIT_ENTRY                 0xF6
-#define        CPU_INIT_D                      0xF5
-#define        PE_INIT_PLATFORM_D              0xF4
-
-#define        SLAVE_RSTART_ENTRY              0xEF
-#define        SLAVE_REAL_TO_PROT_ENTRY        0xEE
-#define        SLAVE_REAL_TO_PROT_EXIT         0xED
-#define        SLAVE_STARTPROG_ENTRY           0xEC
-#define        SLAVE_STARTPROG_EXIT            0xEB
-#define        SLAVE_PSTART_ENTRY              0xEA
-#define        SLAVE_PSTART_EXIT               0xE9
-#if defined(__i386__)
-#define        SLAVE_VSTART_ENTRY              0xE8
-#define        SLAVE_VSTART_DESC_INIT          0xE7
-#define        SLAVE_VSTART_STACK_SWITCH       0xE6
-#define        SLAVE_VSTART_EXIT               0xE5
-#endif
-#define        I386_INIT_SLAVE                 0xE4
+#define        PSTART_ENTRY                    0xFF
+#define PSTART_REBASE                  0xFE
+#define PSTART_BEFORE_PAGING           0xFE
+#define        PSTART_VSTART                   0xFD
+#define VSTART_ENTRY                   0xFC
+#define VSTART_IDLE_PTS_INIT           0xFB
+#define VSTART_PHYSMAP_INIT            0xFA
+#define VSTART_DESC_ALIAS_INIT         0xF9
+#define VSTART_SET_CR3                 0xF8
+#define VSTART_CPU_DESC_INIT           0xF7
+#define VSTART_CPU_MODE_INIT           0xF6
+#define VSTART_EXIT                    0xF5
+#define        I386_INIT_ENTRY                 0xF4
+#define        CPU_INIT_D                      0xF3
+#define        PE_INIT_PLATFORM_D              0xF2
+
+#define        SLAVE_STARTPROG_ENTRY           0xEF
+#define        SLAVE_PSTART                    0xEE
+#define        I386_INIT_SLAVE                 0xED
 
 #define        PANIC_DOUBLE_FAULT              0xDF    /* Double Fault exception */
 #define        PANIC_MACHINE_CHECK             0xDE    /* Machine-Check */
index 05dd961f12163ca07edd036996f982c7f43e7072..755be1c694db2720b9e6dd437e5e8ea91b53b834 100644 (file)
 
 #define        PMAP_PCID_PRESERVE (1ULL << 63)
 #define        PMAP_PCID_MASK (0xFFF)
+
+#define RDRAND_RAX     .byte 0x48, 0x0f, 0xc7, 0xf0
+
 #ifndef        ASSEMBLER
 
 #include <sys/cdefs.h>
index d9de631857d8ec803eb5ce0bb84b1d5cc7980ee3..8a5f8c667b5beec375c59a05e56cae9a347505a3 100644 (file)
@@ -40,7 +40,6 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/mach_types.h>
 
@@ -378,6 +377,7 @@ rtclock_init(void)
                rtc_timer_init();
                clock_timebase_init();
                ml_init_lock_timeout();
+               ml_init_delay_spin_threshold();
        }
 
        /* Set fixed configuration for lapic timers */
index 4d17039a2c5bd0328a339fa8f2998db02fcc9222..cf24293beebaa22a8aa046fc86fd4f8bb8c9c134 100644 (file)
@@ -30,7 +30,6 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/mach_types.h>
 
index df191c5d16fc88a57132975ed39808a8a182c60d..94af4521faebf0d9bd95a5626a6918ea5ed3ffc3 100644 (file)
  */
 #ifndef        _I386_SEG_H_
 #define        _I386_SEG_H_
-#ifdef MACH_KERNEL
-#include <mach_kdb.h>
-#else
-#define        MACH_KDB 0
-#endif /* MACH_KERNEL */
 #ifndef __ASSEMBLER__
 #include <stdint.h>
 #include <mach/vm_types.h>
@@ -99,15 +94,7 @@ selector_to_sel(uint16_t selector)
 #define        LDTSZ_MIN       SEL_TO_INDEX(USER_SETTABLE)
                                        /* kernel ldt entries */
 
-#if    MACH_KDB
-#define        GDTSZ           20
-#else
 #define        GDTSZ           19
-#endif
-
-#ifdef __x86_64__
-#define PROT_MODE_GDT_SIZE 48 /* size of prot_mode_gdt in bytes */
-#endif
 
 /*
  * Interrupt table is always 256 entries long.
@@ -219,12 +206,6 @@ extern char                        mc_task_stack_end[];
 extern struct i386_tss         master_mctss;
 extern void                    mc_task_start(void);
 
-#if    MACH_KDB
-extern char                    db_task_stack_store[];
-extern struct i386_tss         master_dbtss;
-extern void                    db_task_start(void);
-#endif /* MACH_KDB */
-
 __END_DECLS
 
 #endif /*__ASSEMBLER__*/
@@ -355,10 +336,6 @@ __END_DECLS
 #define        SYSENTER_DS     KERNEL64_SS     /* sysenter kernel data segment */
 #endif
 
-#if    MACH_KDB
-#define        DEBUG_TSS       0x90            /* 18:  debug TSS (uniprocessor) */
-#endif
-
 #ifdef __x86_64__
 /*
  * 64-bit kernel LDT descriptors
index 5472ffde37d1182ac0ee30e8cbfd8bddf49cb650..c8a9040388d120a4da276da9be8a7e37cb76ae85 100644 (file)
@@ -124,18 +124,6 @@ EXT(mc_task_stack):
        .globl  EXT(mc_task_stack_end)
 EXT(mc_task_stack_end):
 
-#if    MACH_KDB
-/*
- * Stack for last-ditch debugger task for each processor.
- */
-       .align  12
-       .globl  EXT(db_task_stack_store)
-EXT(db_task_stack_store):
-       .space  (INTSTACK_SIZE*MAX_CPUS)
-
-#endif /* MACH_KDB */
-
-
 /*
  * BSP CPU start here.
  *     eax points to kernbootstruct
@@ -153,18 +141,18 @@ LEXT(_start)
        mov             %eax, %ebp              /* Move kernbootstruct to ebp */
        mov             %eax, %ebx              /* get pointer to kernbootstruct */
 
-       mov     $EXT(low_eintstack),%esp                        /* switch to the bootup stack */
+       mov     $EXT(low_eintstack),%esp        /* switch to the bootup stack */
 
        POSTCODE(PSTART_ENTRY)
 
-       lgdt    EXT(gdtptr)                                     /* load GDT */
+       lgdt    EXT(gdtptr)                     /* load GDT */
 
-       mov     $(KERNEL_DS),%ax                                /* set kernel data segment */
+       mov     $(KERNEL_DS),%ax                /* set kernel data segment */
        mov     %ax, %ds
        mov     %ax, %es
        mov     %ax, %ss
-       xor     %ax, %ax                                                /* fs must be zeroed; */
-       mov     %ax, %fs                                                /* some bootstrappers don`t do this */
+       xor     %ax, %ax                        /* fs must be zeroed; */
+       mov     %ax, %fs                        /* some bootstrappers don`t do this */
        mov     %ax, %gs
        cld
 
@@ -173,10 +161,10 @@ LEXT(_start)
        call .-1
 
 paging:
-       andl    $0xfffffff0, %esp                               /* align stack */
+       andl    $0xfffffff0, %esp               /* align stack */
        subl    $0xc, %esp
-       pushl   %ebp                                            /* push boot args addr */
-       xorl    %ebp, %ebp                                      /* zero frame pointer */
+       pushl   %ebp                            /* push boot args addr */
+       xorl    %ebp, %ebp                      /* zero frame pointer */
        
        POSTCODE(PSTART_BEFORE_PAGING)
 
@@ -185,14 +173,16 @@ paging:
  */
        movl    $EXT(IdlePDPT), %eax            /* CR3 */
        movl    %eax, %cr3
-       movl    %cr4, %eax                                      /* PAE */
+       movl    %cr4, %eax                      /* PAE */
        orl     $(CR4_PAE), %eax
        movl    %eax, %cr4
-       movl    %cr0,%eax                                       /* paging */
+       movl    %cr0,%eax                       /* paging */
        orl     $(CR0_PG|CR0_WP),%eax
        movl    %eax,%cr0
+
+       POSTCODE(PSTART_VSTART)
        
-       call    EXT(vstart)                                     /* run C code */
+       call    EXT(vstart)                     /* run C code */
        /*NOTREACHED*/
        hlt
 
@@ -292,7 +282,7 @@ LEXT(hibernate_machine_entrypoint)
        /* set up the page tables to use BootstrapPTD 
         * as done in idle_pt.c, but this must be done programatically */
        mov $EXT(IdlePDPT), %eax
-       mov $EXT(BootstrapPTD) + (INTEL_PTE_VALID), %ecx
+       mov $EXT(BootPTD) + (INTEL_PTE_VALID), %ecx
        mov $0x0, %edx
        mov     %ecx, (0*8+0)(%eax)
        mov %edx, (0*8+4)(%eax)
index bcabe2829f10f14ab82f3e0007741bc7188b0703..95a9dd66465bd0c679c6918a2f50337ce940aa84 100644 (file)
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
-
 #include <i386/asm.h>
 #include <i386/asm64.h>
 #include <i386/proc_reg.h>
 #include <i386/postcode.h>
+#include <i386/vmx/vmx_asm.h>
 #include <assym.s>
 
        .data
@@ -213,3 +212,45 @@ Entry(xrstor64o)
        .byte   0x29
        ENTER_COMPAT_MODE()
        ret
+
+#if CONFIG_VMX
+
+/*
+ *     __vmxon -- Enter VMX Operation
+ *     int __vmxon(addr64_t v);
+ */
+Entry(__vmxon)
+       FRAME
+       
+       ENTER_64BIT_MODE()
+       mov     $(VMX_FAIL_INVALID), %ecx
+       mov     $(VMX_FAIL_VALID), %edx
+       mov     $(VMX_SUCCEED), %eax
+       vmxon   8(%rbp)         /* physical addr passed on stack */
+       cmovcl  %ecx, %eax      /* CF = 1, ZF = 0 */
+       cmovzl  %edx, %eax      /* CF = 0, ZF = 1 */
+       ENTER_COMPAT_MODE()
+
+       EMARF
+       ret
+
+/*
+ *     __vmxoff -- Leave VMX Operation
+ *     int __vmxoff(void);
+ */
+Entry(__vmxoff)
+       FRAME
+       
+       ENTER_64BIT_MODE()
+       mov     $(VMX_FAIL_INVALID), %ecx
+       mov     $(VMX_FAIL_VALID), %edx
+       mov     $(VMX_SUCCEED), %eax
+       vmxoff
+       cmovcl  %ecx, %eax      /* CF = 1, ZF = 0 */
+       cmovzl  %edx, %eax      /* CF = 0, ZF = 1 */
+       ENTER_COMPAT_MODE()
+
+       EMARF
+       ret
+
+#endif /* CONFIG_VMX */
index b445882cd96e38e2e938d70adaca1bb7d9a888eb..b4f69f74143b69b21cf1ad83796a8794c122323f 100644 (file)
@@ -188,6 +188,7 @@ cpu_IA32e_disable(cpu_data_t *cdp)
 #endif
 
 #if DEBUG
+extern void dump_regs64(void);
 extern void dump_gdt(void *);
 extern void dump_ldt(void *);
 extern void dump_idt(void *);
@@ -349,4 +350,49 @@ dump_tss(void *tssp)
                kprintf("%p: 0x%08x\n", ip+0, *(ip+0));
        }
 }
+
+#if defined(__x86_64__)
+void dump_regs64(void)
+{
+
+#define SNAP_REG(reg)                                          \
+       uint64_t        reg;                                    \
+       __asm__ volatile("mov %%" #reg ", %0" : "=m" (reg))
+
+#define KPRINT_REG(reg)                                                \
+       kprintf("%3s: %p\n", #reg, (void *) reg)
+
+       SNAP_REG(rsp);
+       SNAP_REG(rbp);
+       SNAP_REG(rax);
+       SNAP_REG(rbx);
+       SNAP_REG(rcx);
+       SNAP_REG(rdx);
+       SNAP_REG(rsi);
+       SNAP_REG(rdi);
+       SNAP_REG(r8);
+       SNAP_REG(r9);
+       SNAP_REG(r10);
+       SNAP_REG(r11);
+       SNAP_REG(r12);
+       SNAP_REG(r13);
+       SNAP_REG(r14);
+
+       KPRINT_REG(rsp);
+       KPRINT_REG(rbp);
+       KPRINT_REG(rax);
+       KPRINT_REG(rbx);
+       KPRINT_REG(rcx);
+       KPRINT_REG(rdx);
+       KPRINT_REG(rsi);
+       KPRINT_REG(rdi);
+       KPRINT_REG(r8);
+       KPRINT_REG(r9);
+       KPRINT_REG(r10);
+       KPRINT_REG(r11);
+       KPRINT_REG(r12);
+       KPRINT_REG(r13);
+       KPRINT_REG(r14);
+}
+#endif /* __x86_64__ */
 #endif /* DEBUG */
index 2a77aedf3903c551fdcaff9f03c186177587369c..a2a805a033dad5eb33d79d621aadfa4403ddf150 100644 (file)
@@ -60,8 +60,6 @@
 * Hardware trap/fault handler.
  */
 
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
 #include <mach_kdp.h>
 #include <mach_ldebug.h>
 
 
 #include <sys/kdebug.h>
 
-#if    MACH_KGDB
-#include <kgdb/kgdb_defs.h>
-#endif /* MACH_KGDB */
-
-#if    MACH_KDB
-#include <debug.h>
-#include <ddb/db_watch.h>
-#include <ddb/db_run.h>
-#include <ddb/db_break.h>
-#include <ddb/db_trap.h>
-#endif /* MACH_KDB */
-
 #include <string.h>
 
 #include <i386/postcode.h>
@@ -167,7 +153,7 @@ thread_syscall_return(
                            == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT);
                if (kdebug_enable && is_mach) {
                        /* Mach trap */
-                       KERNEL_DEBUG_CONSTANT(
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
                              MACHDBG_CODE(DBG_MACH_EXCP_SC,code)|DBG_FUNC_END,
                              ret, 0, 0, 0, 0);
                }
@@ -191,7 +177,7 @@ thread_syscall_return(
                is_mach = (code < 0);
                if (kdebug_enable && is_mach) {
                        /* Mach trap */
-                       KERNEL_DEBUG_CONSTANT(
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
                              MACHDBG_CODE(DBG_MACH_EXCP_SC,-code)|DBG_FUNC_END,
                              ret, 0, 0, 0, 0);
                }
@@ -214,44 +200,6 @@ thread_syscall_return(
 }
 
 
-#if    MACH_KDB
-boolean_t      debug_all_traps_with_kdb = FALSE;
-extern struct db_watchpoint *db_watchpoint_list;
-extern boolean_t db_watchpoints_inserted;
-extern boolean_t db_breakpoints_inserted;
-
-void
-thread_kdb_return(void)
-{
-       thread_t                thr_act = current_thread();
-       x86_saved_state_t       *iss = USER_STATE(thr_act);
-
-       pal_register_cache_state(thr_act, DIRTY);
-
-        if (is_saved_state64(iss)) {
-               x86_saved_state64_t     *regs;
-               
-               regs = saved_state64(iss);
-
-               if (kdb_trap(regs->isf.trapno, (int)regs->isf.err, (void *)regs)) {
-                       thread_exception_return();
-                       /*NOTREACHED*/
-               }
-
-       } else {
-               x86_saved_state32_t     *regs;
-               
-               regs = saved_state32(iss);
-
-               if (kdb_trap(regs->trapno, regs->err, (void *)regs)) {
-                       thread_exception_return();
-                       /*NOTREACHED*/
-               }
-       }
-}
-
-#endif /* MACH_KDB */
-
 static inline void
 user_page_fault_continue(
                         kern_return_t  kr)
@@ -259,61 +207,24 @@ user_page_fault_continue(
        thread_t        thread = current_thread();
        user_addr_t     vaddr;
 
-#if    MACH_KDB
-       x86_saved_state_t *regs = USER_STATE(thread);
-       int             err;
-       int             trapno;
-
-       assert((is_saved_state32(regs) && !thread_is_64bit(thread)) ||
-              (is_saved_state64(regs) &&  thread_is_64bit(thread)));
-#endif
-
-        if (thread_is_64bit(thread)) {
-               x86_saved_state64_t     *uregs;
+       if (thread_is_64bit(thread)) {
+               x86_saved_state64_t     *uregs;
 
                uregs = USER_REGS64(thread);
 
-#if    MACH_KDB
-               trapno = uregs->isf.trapno;
-               err = (int)uregs->isf.err;
-#endif
                vaddr = (user_addr_t)uregs->cr2;
        } else {
                x86_saved_state32_t     *uregs;
 
                uregs = USER_REGS32(thread);
 
-#if    MACH_KDB
-               trapno = uregs->trapno;
-               err = uregs->err;
-#endif
                vaddr = uregs->cr2;
        }
 
        if (__probable((kr == KERN_SUCCESS) || (kr == KERN_ABORTED))) {
-#if    MACH_KDB
-               if (!db_breakpoints_inserted) {
-                       db_set_breakpoints();
-               }
-               if (db_watchpoint_list &&
-                   db_watchpoints_inserted &&
-                   (err & T_PF_WRITE) &&
-                   db_find_watchpoint(thread->map,
-                                      (vm_offset_t)vaddr,
-                                      saved_state32(regs)))
-                       kdb_trap(T_WATCHPOINT, 0, saved_state32(regs));
-#endif /* MACH_KDB */
-               thread_exception_return();
-               /*NOTREACHED*/
-       }
-
-#if    MACH_KDB
-       if (debug_all_traps_with_kdb &&
-           kdb_trap(trapno, err, saved_state32(regs))) {
                thread_exception_return();
                /*NOTREACHED*/
        }
-#endif /* MACH_KDB */
 
        /* PAL debug hook */
        pal_dbg_page_fault( thread, vaddr, kr );
@@ -442,7 +353,8 @@ interrupt(x86_saved_state_t *state)
        boolean_t       user_mode = FALSE;
        int             ipl;
        int             cnum = cpu_number();
-
+       int             itype = 0;
+       
        if (is_saved_state64(state) == TRUE) {
                x86_saved_state64_t     *state64;
 
@@ -465,14 +377,23 @@ interrupt(x86_saved_state_t *state)
                interrupt_num = state32->trapno;
        }
 
-       KERNEL_DEBUG_CONSTANT(
+       if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_INTERPROCESSOR_INTERRUPT))
+               itype = 1;
+       else if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT))
+               itype = 2;
+       else
+               itype = 3;
+
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
                MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START,
-               interrupt_num, rip, user_mode, 0, 0);
+               interrupt_num,
+               (user_mode ? rip : VM_KERNEL_UNSLIDE(rip)),
+               user_mode, itype, 0);
 
        SCHED_STATS_INTERRUPT(current_processor());
 
        ipl = get_preemption_level();
-
+       
        /*
         * Handle local APIC interrupts
         * else call platform expert for devices.
@@ -484,7 +405,8 @@ interrupt(x86_saved_state_t *state)
                panic("Preemption level altered by interrupt vector 0x%x: initial 0x%x, final: 0x%x\n", interrupt_num, ipl, get_preemption_level());
        }
 
-       KERNEL_DEBUG_CONSTANT(
+
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
                MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END,
                interrupt_num, 0, 0, 0, 0);
 
@@ -514,7 +436,7 @@ interrupt(x86_saved_state_t *state)
                        kernel_stack_depth_max = (vm_offset_t)depth;
                        KERNEL_DEBUG_CONSTANT(
                                MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DEPTH),
-                               (long) depth, (long) rip, 0, 0, 0);
+                               (long) depth, (long) VM_KERNEL_UNSLIDE(rip), 0, 0, 0);
                }
        }
 }
@@ -562,9 +484,6 @@ kernel_trap(
        int                     fault_in_copy_window = -1;
 #endif
        int                     is_user = 0;
-#if MACH_KDB
-       pt_entry_t              *pte;
-#endif /* MACH_KDB */
        
        thread = current_thread();
 
@@ -639,8 +558,9 @@ kernel_trap(
        if (__improbable(T_PREEMPT == type)) {
                ast_taken(AST_PREEMPTION, FALSE);
 
-               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
-                                     0, 0, 0, kern_ip, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                       (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
+                       0, 0, 0, VM_KERNEL_UNSLIDE(kern_ip), 0);
                return;
        }
        
@@ -713,10 +633,11 @@ kernel_trap(
 #endif
                }
        }
-
-       KERNEL_DEBUG_CONSTANT(
+       user_addr_t     kd_vaddr = is_user ? vaddr : VM_KERNEL_UNSLIDE(vaddr);  
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
                (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
-               (unsigned)(vaddr >> 32), (unsigned)vaddr, is_user, kern_ip, 0);
+               (unsigned)(kd_vaddr >> 32), (unsigned)kd_vaddr, is_user,
+               VM_KERNEL_UNSLIDE(kern_ip), 0);
 
 
        (void) ml_set_interrupts_enabled(intr);
@@ -760,24 +681,6 @@ kernel_trap(
 #endif
            case T_PAGE_FAULT:
 
-#if    MACH_KDB
-               /*
-                * Check for watchpoint on kernel static data.
-                * vm_fault would fail in this case 
-                */
-               if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted &&
-                   (code & T_PF_WRITE) && vaddr < vm_map_max(map) &&
-                   ((*(pte = pmap_pte(kernel_pmap, (vm_map_offset_t)vaddr))) & INTEL_PTE_WRITE) == 0) {
-                       pmap_store_pte(
-                               pte,
-                               *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE);
-                       /* XXX need invltlb here? */
-
-                       result = KERN_SUCCESS;
-                       goto look_for_watchpoints;
-               }
-#endif /* MACH_KDB */
-
 #if CONFIG_DTRACE
                if (thread != THREAD_NULL && thread->options & TH_OPT_DTRACE) { /* Executing under dtrace_probe? */
                        if (dtrace_tally_fault(vaddr)) { /* Should a fault under dtrace be ignored? */
@@ -790,7 +693,6 @@ kernel_trap(
                        }
                }
 #endif /* CONFIG_DTRACE */
-
                
                prot = VM_PROT_READ;
 
@@ -807,18 +709,6 @@ kernel_trap(
                                  FALSE, 
                                  THREAD_UNINT, NULL, 0);
 
-#if    MACH_KDB
-               if (result == KERN_SUCCESS) {
-                       /*
-                        * Look for watchpoints
-                        */
-look_for_watchpoints:
-                       if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted && (code & T_PF_WRITE) &&
-                           db_find_watchpoint(map, vaddr, saved_state))
-                               kdb_trap(T_WATCHPOINT, 0, saved_state);
-               }
-#endif /* MACH_KDB */
-
                if (result == KERN_SUCCESS) {
 #if NCOPY_WINDOWS > 0
                        if (fault_in_copy_window != -1) {
@@ -879,30 +769,14 @@ debugger_entry:
                 * access through the debugger.
                 */
                sync_iss_to_iks(state);
-#if MACH_KDB
-restart_debugger:
-#endif /* MACH_KDB */          
 #if  MACH_KDP
-                if (current_debugger != KDB_CUR_DB) {
+               if (current_debugger != KDB_CUR_DB) {
                        if (kdp_i386_trap(type, saved_state, result, (vm_offset_t)vaddr))
                                return;
-               } else {
-#endif /* MACH_KDP */
-#if MACH_KDB
-                       if (kdb_trap(type, code, saved_state)) {
-                               if (switch_debugger) {
-                                       current_debugger = KDP_CUR_DB;
-                                       switch_debugger = 0;
-                                       goto restart_debugger;
-                               }
-                               return;
-                       }
-#endif /* MACH_KDB */
-#if MACH_KDP
                }
 #endif
        }
-       __asm__ volatile("cli":::"cc");
+       pal_cli();
        panic_trap(saved_state);
        /*
         * NO RETURN
@@ -952,11 +826,12 @@ panic_trap(x86_saved_state32_t *regs)
              "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
              "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
              "EFL: 0x%08x, EIP: 0x%08x, CS:  0x%08x, DS:  0x%08x\n"
-             "Error code: 0x%08x\n",
+             "Error code: 0x%08x%s\n",
              regs->eip, regs->trapno, trapname, cr0, cr2, cr3, cr4,
              regs->eax,regs->ebx,regs->ecx,regs->edx,
              regs->cr2,regs->ebp,regs->esi,regs->edi,
-             regs->efl,regs->eip,regs->cs & 0xFFFF, regs->ds & 0xFFFF, regs->err);
+             regs->efl,regs->eip,regs->cs & 0xFFFF, regs->ds & 0xFFFF, regs->err,
+             virtualized ? " VMM" : "");
        /*
         * This next statement is not executed,
         * but it's needed to stop the compiler using tail call optimization
@@ -972,7 +847,7 @@ panic_trap(x86_saved_state64_t *regs)
 {
        const char      *trapname = "Unknown";
        pal_cr_t        cr0, cr2, cr3, cr4;
-       boolean_t       potential_smep_fault = FALSE;
+       boolean_t       potential_smep_fault = FALSE, potential_kernel_NX_fault = FALSE;
 
        pal_get_control_registers( &cr0, &cr2, &cr3, &cr4 );
        assert(ml_get_interrupts_enabled() == FALSE);
@@ -991,8 +866,12 @@ panic_trap(x86_saved_state64_t *regs)
        if (regs->isf.trapno < TRAP_TYPES)
                trapname = trap_type[regs->isf.trapno];
 
-       if ((regs->isf.trapno == T_PAGE_FAULT) && (regs->isf.err == (T_PF_PROT | T_PF_EXECUTE)) && (pmap_smep_enabled) && (regs->isf.rip == regs->cr2) && (regs->isf.rip < VM_MAX_USER_PAGE_ADDRESS)) {
-               potential_smep_fault = TRUE;
+       if ((regs->isf.trapno == T_PAGE_FAULT) && (regs->isf.err == (T_PF_PROT | T_PF_EXECUTE)) && (regs->isf.rip == regs->cr2)) {
+               if (pmap_smep_enabled && (regs->isf.rip < VM_MAX_USER_PAGE_ADDRESS)) {
+                       potential_smep_fault = TRUE;
+               } else if (regs->isf.rip >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
+                       potential_kernel_NX_fault = TRUE;
+               }
        }
 
 #undef panic
@@ -1003,7 +882,7 @@ panic_trap(x86_saved_state64_t *regs)
              "R8:  0x%016llx, R9:  0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
              "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
              "RFL: 0x%016llx, RIP: 0x%016llx, CS:  0x%016llx, SS:  0x%016llx\n"
-             "CR2: 0x%016llx, Error code: 0x%016llx, Faulting CPU: 0x%x%s\n",
+             "Fault CR2: 0x%016llx, Error code: 0x%016llx, Fault CPU: 0x%x%s%s%s\n",
              regs->isf.rip, regs->isf.trapno, trapname,
              cr0, cr2, cr3, cr4,
              regs->rax, regs->rbx, regs->rcx, regs->rdx,
@@ -1012,7 +891,9 @@ panic_trap(x86_saved_state64_t *regs)
              regs->r12, regs->r13, regs->r14, regs->r15,
              regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF,
              regs->isf.ss & 0xFFFF,regs->cr2, regs->isf.err, regs->isf.cpu,
-             potential_smep_fault ? " SMEP/NX fault" : "");
+             virtualized ? " VMM" : "",
+             potential_kernel_NX_fault ? " Kernel NX fault" : "",
+             potential_smep_fault ? " SMEP/User NX fault" : "");
        /*
         * This next statement is not executed,
         * but it's needed to stop the compiler using tail call optimization
@@ -1086,7 +967,7 @@ user_trap(
 
        pal_sti();
 
-       KERNEL_DEBUG_CONSTANT(
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
                (MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE,
                (unsigned)(vaddr>>32), (unsigned)vaddr,
                (unsigned)(rip>>32), (unsigned)rip, 0);
@@ -1269,14 +1150,6 @@ user_trap(
                break;
 
            default:
-#if     MACH_KGDB
-               Debugger("Unanticipated user trap");
-               return;
-#endif  /* MACH_KGDB */
-#if    MACH_KDB
-               if (kdb_trap(type, err, saved_state32(saved_state)))
-                   return;
-#endif /* MACH_KDB */
                panic("Unexpected user trap, type %d", type);
                return;
        }
@@ -1337,39 +1210,6 @@ i386_exception(
 }
 
 
-#if    MACH_KDB
-
-extern void    db_i386_state(x86_saved_state32_t *regs);
-
-#include <ddb/db_output.h>
-
-void 
-db_i386_state(
-       x86_saved_state32_t *regs)
-{
-       db_printf("eip  %8x\n", regs->eip);
-       db_printf("trap %8x\n", regs->trapno);
-       db_printf("err  %8x\n", regs->err);
-       db_printf("efl  %8x\n", regs->efl);
-       db_printf("ebp  %8x\n", regs->ebp);
-       db_printf("esp  %8x\n", regs->cr2);
-       db_printf("uesp %8x\n", regs->uesp);
-       db_printf("cs   %8x\n", regs->cs & 0xff);
-       db_printf("ds   %8x\n", regs->ds & 0xff);
-       db_printf("es   %8x\n", regs->es & 0xff);
-       db_printf("fs   %8x\n", regs->fs & 0xff);
-       db_printf("gs   %8x\n", regs->gs & 0xff);
-       db_printf("ss   %8x\n", regs->ss & 0xff);
-       db_printf("eax  %8x\n", regs->eax);
-       db_printf("ebx  %8x\n", regs->ebx);
-       db_printf("ecx  %8x\n", regs->ecx);
-       db_printf("edx  %8x\n", regs->edx);
-       db_printf("esi  %8x\n", regs->esi);
-       db_printf("edi  %8x\n", regs->edi);
-}
-
-#endif /* MACH_KDB */
-
 /* Synchronize a thread's i386_kernel_state (if any) with the given
  * i386_saved_state_t obtained from the trap/IPI handler; called in
  * kernel_trap() prior to entering the debugger, and when receiving
index 26a9cbf07578cd8b88acca04ca861b7cabd37024..8c5ea335080f52b661429892b52f50df97279b3f 100644 (file)
@@ -104,6 +104,7 @@ void panic_64(x86_saved_state_t *, int, const char *, boolean_t);
 
 extern volatile int panic_double_fault_cpu;
 
+
 #if defined(__x86_64__) && DEBUG
 /*
  * K64 debug - fatal handler for debug code in the trap vectors.
@@ -161,13 +162,13 @@ panic_32(__unused int code, __unused int pc, __unused const char *msg, boolean_t
              "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
              "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
              "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
-             "EFL: 0x%08x, EIP: 0x%08x\n",
+             "EFL: 0x%08x, EIP: 0x%08x%s\n",
                  msg,
              my_ktss->eip, code,
              (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
              my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx,
              my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi,
-             my_ktss->eflags, my_ktss->eip);
+             my_ktss->eflags, my_ktss->eip, virtualized ? " VMM" : "");
 }
 
 /*
@@ -233,7 +234,7 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole
                      "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n"
                      "R8:  0x%016qx, R9:  0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n"
                      "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n"
-                     "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx\n",
+                     "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx%s\n",
                          msg,
                      ss64p->isf.trapno, ss64p->isf.err,
                      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
@@ -241,7 +242,8 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole
                      ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi,
                      ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11,
                      ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15,
-                     ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2);
+                     ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2,
+                         virtualized ? " VMM" : "");
        } else {
                x86_saved_state32_t     *ss32p = saved_state32(sp);
                panic("%s at 0x%08x, trapno:0x%x, err:0x%x,"
@@ -249,13 +251,13 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole
                      "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
                      "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
                      "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
-                     "EFL: 0x%08x, EIP: 0x%08x\n",
+                     "EFL: 0x%08x, EIP: 0x%08x%s\n",
                      msg,
                          ss32p->eip, ss32p->trapno, ss32p->err,
                      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
                      ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx,
                      ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi,
-                     ss32p->efl, ss32p->eip);
+                     ss32p->efl, ss32p->eip, virtualized ? " VMM" : "");
        }
 #else
        x86_saved_state64_t *regs = saved_state64(sp);
@@ -266,7 +268,7 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole
              "R8:  0x%016llx, R9:  0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
              "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
              "RFL: 0x%016llx, RIP: 0x%016llx, CS:  0x%016llx, SS:  0x%016llx\n"
-             "Error code: 0x%016llx\n",
+             "Error code: 0x%016llx%s\n",
              msg,
                  regs->isf.rip,
              get_cr0(), get_cr2(), get_cr3_raw(), get_cr4(),
@@ -275,7 +277,7 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole
              regs->r8,  regs->r9,  regs->r10, regs->r11,
              regs->r12, regs->r13, regs->r14, regs->r15,
              regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF,  regs->isf.ss & 0xFFFF,
-             regs->isf.err);
+             regs->isf.err, virtualized ? " VMM" : "");
 #endif
 }
 
index b4bf3dfbd30fb09130801ce1de1742a8114885ac..02b41779c99eb2596b727701c75aded41eb4791d 100644 (file)
@@ -36,7 +36,6 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
 
 #include <mach/mach_types.h>
 
@@ -133,9 +132,33 @@ EFI_FSB_frequency(void)
 void
 tsc_init(void)
 {
-       uint64_t        busFCvtInt = 0;
        boolean_t       N_by_2_bus_ratio = FALSE;
 
+       if (cpuid_vmm_present()) {
+               kprintf("VMM vendor %u TSC frequency %u KHz bus frequency %u KHz\n",
+                               cpuid_vmm_info()->cpuid_vmm_family,
+                               cpuid_vmm_info()->cpuid_vmm_tsc_frequency,
+                               cpuid_vmm_info()->cpuid_vmm_bus_frequency);
+
+               if (cpuid_vmm_info()->cpuid_vmm_tsc_frequency &&
+                       cpuid_vmm_info()->cpuid_vmm_bus_frequency) {
+
+                       busFreq = (uint64_t)cpuid_vmm_info()->cpuid_vmm_bus_frequency * kilo;
+                       busFCvtt2n = ((1 * Giga) << 32) / busFreq;
+                       busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n;
+                       
+                       tscFreq = (uint64_t)cpuid_vmm_info()->cpuid_vmm_tsc_frequency * kilo;
+                       tscFCvtt2n = ((1 * Giga) << 32) / tscFreq;
+                       tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
+                       
+                       tscGranularity = tscFreq / busFreq;
+                       
+                       bus2tsc = tmrCvt(busFCvtt2n, tscFCvtn2t);
+
+                       return;
+               }
+       }
+
        /*
         * Get the FSB frequency and conversion factors from EFI.
         */
@@ -146,7 +169,6 @@ tsc_init(void)
        case CPUFAMILY_INTEL_SANDYBRIDGE:
        case CPUFAMILY_INTEL_WESTMERE:
        case CPUFAMILY_INTEL_NEHALEM: {
-               uint64_t cpu_mhz;
                uint64_t msr_flex_ratio;
                uint64_t msr_platform_info;
 
@@ -170,8 +192,6 @@ tsc_init(void)
                if (busFreq == 0)
                    busFreq = BASE_NHM_CLOCK_SOURCE;
 
-               cpu_mhz = tscGranularity * BASE_NHM_CLOCK_SOURCE;
-
                break;
             }
        default: {
@@ -186,19 +206,16 @@ tsc_init(void)
        if (busFreq != 0) {
                busFCvtt2n = ((1 * Giga) << 32) / busFreq;
                busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n;
-               busFCvtInt = tmrCvt(1 * Peta, 0xFFFFFFFFFFFFFFFFULL / busFreq); 
        } else {
                panic("tsc_init: EFI not supported!\n");
        }
 
-       kprintf(" BUS: Frequency = %6d.%04dMHz, "
-               "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, "
-               "cvtInt = %08X.%08X\n",
+       kprintf(" BUS: Frequency = %6d.%06dMHz, "
+               "cvtt2n = %08Xx.%08Xx, cvtn2t = %08Xx.%08Xx\n",
                (uint32_t)(busFreq / Mega),
                (uint32_t)(busFreq % Mega), 
                (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
-               (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t,
-               (uint32_t)(busFCvtInt >> 32), (uint32_t)busFCvtInt);
+               (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t);
 
        /*
         * Get the TSC increment.  The TSC is incremented by this
@@ -206,8 +223,12 @@ tsc_init(void)
         * to and from nano-seconds.
         * The tsc granularity is also called the "bus ratio". If the N/2 bit
         * is set this indicates the bus ration is 0.5 more than this - i.e.
-        * that the true bus ratio is (2*tscGranularity + 1)/2.
+        * that the true bus ratio is (2*tscGranularity + 1)/2. If we cannot
+        * determine the TSC conversion, assume it ticks at the bus frequency.
         */
+       if (tscGranularity == 0)
+               tscGranularity = 1;
+
        if (N_by_2_bus_ratio)
                tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
        else
@@ -216,8 +237,8 @@ tsc_init(void)
        tscFreq = ((1 * Giga)  << 32) / tscFCvtt2n;
        tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
 
-       kprintf(" TSC: Frequency = %6d.%04dMHz, "
-               "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
+       kprintf(" TSC: Frequency = %6d.%06dMHz, "
+               "cvtt2n = %08Xx.%08Xx, cvtn2t = %08Xx.%08Xx, gran = %lld%s\n",
                (uint32_t)(tscFreq / Mega),
                (uint32_t)(tscFreq % Mega), 
                (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
index c295f6b03c957920c8001bdde5cebd9e78215755..51a18b29dcd7454005dd8b3b1550cb15403ca2ec 100644 (file)
  
 #ifndef _I386_VMX_ASM_H_
 #define _I386_VMX_ASM_H_
-#include <mach/machine/vm_types.h>
-#include <mach/boolean.h>
-#include <kern/assert.h>
-#include <i386/eflags.h>
-#include <i386/seg.h>
 
 #define VMX_FAIL_INVALID       -1
 #define VMX_FAIL_VALID         -2
 #define VMX_SUCCEED                    0
 
-__attribute__((always_inline)) static inline void enter_64bit_mode(void) {
-       __asm__ __volatile__ (
-               ".byte   0xea    /* far jump longmode */        \n\t"
-               ".long   1f                                     \n\t"
-               ".word   %P0                                    \n\t"
-               ".code64                                        \n\t"
-               "1:"
-               :: "i" (KERNEL64_CS)
-       );
-}
-__attribute__((always_inline)) static inline void enter_compat_mode(void) {
-       asm(
-               "ljmp    *4f                                    \n\t"
-       "4:                                                     \n\t"
-               ".long   5f                                     \n\t"
-               ".word   %P0                                    \n\t"
-               ".code32                                        \n\t"
-       "5:"
-               :: "i" (KERNEL32_CS)
-       );
-}
-
-#define __VMXOFF(res)                                          \
-       __asm__ __volatile__ (                                  \
-               "vmxoff         \n\t"                           \
-               "cmovcl %2, %0  \n\t"   /* CF = 1, ZF = 0 */    \
-               "cmovzl %3, %0"         /* CF = 0, ZF = 1 */    \
-               : "=&r" (res)                           \
-               : "0" (VMX_SUCCEED),                            \
-                 "r" (VMX_FAIL_INVALID),                       \
-                 "r" (VMX_FAIL_VALID)                          \
-               : "memory", "cc"                                \
-       )
-
-#define __VMXON(addr, res)                                     \
-       __asm__ __volatile__ (                                  \
-               "vmxon %4       \n\t"                           \
-               "cmovcl %2, %0  \n\t"   /* CF = 1, ZF = 0 */    \
-               "cmovzl %3, %0"         /* CF = 0, ZF = 1 */    \
-               : "=&r" (res)                                   \
-               : "0" (VMX_SUCCEED),                            \
-                 "r" (VMX_FAIL_INVALID),                       \
-                 "r" (VMX_FAIL_VALID),                         \
-                 "m" (*addr)                                   \
-               : "memory", "cc"                                \
-       );
-
-
-/*
- *     __vmxoff -- Leave VMX Operation
- *
- */
-static inline int
-__vmxoff(void)
-{
-       int result;
-#if defined (__x86_64__)
-       __VMXOFF(result);
-#else
-       if (ml_is64bit()) {
-               /* don't put anything between these lines! */
-               enter_64bit_mode();
-               __VMXOFF(result);
-               enter_compat_mode();
-       } else {
-               __VMXOFF(result);
-       }
-#endif
-       return result;
-}
-
-/*
- *     __vmxon -- Enter VMX Operation
- *
- */
- static inline int
-__vmxon(addr64_t *v)
- {
-       int result;
-#if defined (__x86_64__)
-       __VMXON(v, result);
-#else
-       if (ml_is64bit()) {
-               /* don't put anything between these lines! */
-               enter_64bit_mode();
-               __VMXON(v, result);
-               enter_compat_mode();
-       } else {
-               __VMXON(v, result);
-       }
-#endif
-       return result;
-}
-
 /*
  * VMX Capability Registers (VCR)
  *
index 22cebe2d874cdfa4bf0566c481687cc50db0eb16..76b4c0ee3ef061c5554b13b2898ed4f2ee838a36 100644 (file)
@@ -211,7 +211,14 @@ vmx_on(void *arg __unused)
        assert(vmx_is_cr0_valid(&cpu->specs));
        assert(vmx_is_cr4_valid(&cpu->specs));
        
-       if ((result = __vmxon(&vmxon_region_paddr)) != VMX_SUCCEED) {
+#if defined(__i386__)
+       if (!cpu_mode_is64bit())
+               result = VMX_FAIL_INVALID; /* Not supported in legacy mode */
+       else
+#endif
+       result = __vmxon(vmxon_region_paddr);
+
+       if (result != VMX_SUCCEED) {
                panic("vmx_on: unexpected return %d from __vmxon()", result);
        }
 }
@@ -226,7 +233,14 @@ vmx_off(void *arg __unused)
        int result;
        
        /* Tell the CPU to release the VMXON region */
-       if ((result = __vmxoff()) != VMX_SUCCEED) {
+#if defined(__i386__)
+       if (!cpu_mode_is64bit())
+               result = VMX_FAIL_INVALID; /* Not supported in legacy mode */
+       else
+#endif
+       result = __vmxoff();
+
+       if (result != VMX_SUCCEED) {
                panic("vmx_off: unexpected return %d from __vmxoff()", result);
        }
 }
index 255ba421c33ebf3d38b5dbb8cd238fc6eba22c7b..bb9f5ad51c65fd5be392ab1fc3b5e672fe876eee 100644 (file)
@@ -93,4 +93,16 @@ void vmx_get_specs(void);
 void vmx_resume(void);
 void vmx_suspend(void);
 
+/*
+ *     __vmxoff -- Leave VMX Operation
+ *
+ */
+extern int __vmxoff(void);
+
+/*
+ *     __vmxon -- Enter VMX Operation
+ *
+ */
+extern int __vmxon(addr64_t v);
+
 #endif /* _I386_VMX_CPU_H_ */
index 5956602399f409423ba56efd98bd8f3481d79cff..e14c8d5e9ef9930455010cbb259cce6bb934bac8 100644 (file)
@@ -63,7 +63,6 @@
  *     Primitive functions to manipulate translation entries.
  */
 
-#include <mach_kdb.h>
 #include <mach_debug.h>
 
 #include <mach/kern_return.h>
 #include <kern/sched_prim.h>
 #include <kern/zalloc.h>
 #include <kern/misc_protos.h>
-#if MACH_KDB
-#include <kern/task.h>
-#endif
 #include <ipc/port.h>
 #include <ipc/ipc_entry.h>
 #include <ipc/ipc_space.h>
-#include <ipc/ipc_splay.h>
 #include <ipc/ipc_object.h>
 #include <ipc/ipc_hash.h>
 #include <ipc/ipc_table.h>
 #include <ipc/ipc_port.h>
 #include <string.h>
 
-zone_t ipc_tree_entry_zone;
-
-
-
-/*
- * Forward declarations
- */
-boolean_t ipc_entry_tree_collision(
-       ipc_space_t             space,
-       mach_port_name_t        name);
-
-/*
- *     Routine:        ipc_entry_tree_collision
- *     Purpose:
- *             Checks if "name" collides with an allocated name
- *             in the space's tree.  That is, returns TRUE
- *             if the splay tree contains a name with the same
- *             index as "name".
- *     Conditions:
- *             The space is locked (read or write) and active.
- */
-
-boolean_t
-ipc_entry_tree_collision(
-       ipc_space_t             space,
-       mach_port_name_t        name)
-{
-       mach_port_index_t index;
-       mach_port_name_t lower, upper;
-
-       assert(space->is_active);
-
-       /*
-        *      Check if we collide with the next smaller name
-        *      or the next larger name.
-        */
-
-       ipc_splay_tree_bounds(&space->is_tree, name, &lower, &upper);
-
-       index = MACH_PORT_INDEX(name);
-       return (((lower != (mach_port_name_t)~0) && 
-                (MACH_PORT_INDEX(lower) == index)) ||
-               ((upper != 0) && (MACH_PORT_INDEX(upper) == index)));
-}
-
 /*
  *     Routine:        ipc_entry_lookup
  *     Purpose:
@@ -147,44 +97,18 @@ ipc_entry_lookup(
        mach_port_index_t index;
        ipc_entry_t entry;
 
-       assert(space->is_active);
+       assert(is_active(space));
 
                        
        index = MACH_PORT_INDEX(name);
-       /*
-        * If space is fast, we assume no splay tree and name within table
-        * bounds, but still check generation numbers (if enabled) and
-        * look for null entries.
-        */
-       if (is_fast_space(space)) {
-               entry = &space->is_table[index];
+       if (index <  space->is_table_size) {
+                entry = &space->is_table[index];
                if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name) ||
                    IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
-                       entry = IE_NULL;
+                       entry = IE_NULL;                
        }
-       else
-       if (index < space->is_table_size) {
-               entry = &space->is_table[index];
-               if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name))
-                       if (entry->ie_bits & IE_BITS_COLLISION) {
-                               assert(space->is_tree_total > 0);
-                               goto tree_lookup;
-                       } else
-                               entry = IE_NULL;
-               else if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
-                       entry = IE_NULL;
-       } else if (space->is_tree_total == 0)
-               entry = IE_NULL;
        else {
-           tree_lookup:
-               entry = (ipc_entry_t)
-                               ipc_splay_tree_lookup(&space->is_tree, name);
-               /* with sub-space introduction, an entry may appear in      */
-               /* the splay tree and yet not show rights for this subspace */
-               if(entry != IE_NULL)  {
-                       if(!(IE_BITS_TYPE(entry->ie_bits)))
-                               entry = IE_NULL; 
-               }
+               entry = IE_NULL;
        }
 
        assert((entry == IE_NULL) || IE_BITS_TYPE(entry->ie_bits));
@@ -213,7 +137,7 @@ ipc_entry_get(
        mach_port_index_t first_free;
        ipc_entry_t free_entry;
 
-       assert(space->is_active);
+       assert(is_active(space));
 
        {
                table = space->is_table;
@@ -222,6 +146,7 @@ ipc_entry_get(
                if (first_free == 0)
                        return KERN_NO_SPACE;
 
+               assert(first_free < space->is_table_size);
                free_entry = &table[first_free];
                table->ie_next = free_entry->ie_next;
        }
@@ -280,7 +205,7 @@ ipc_entry_alloc(
        is_write_lock(space);
 
        for (;;) {
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_write_unlock(space);
                        return KERN_INVALID_TASK;
                }
@@ -308,6 +233,7 @@ ipc_entry_alloc(
  *             KERN_SUCCESS            Allocated a new entry.
  *             KERN_INVALID_TASK       The space is dead.
  *             KERN_RESOURCE_SHORTAGE  Couldn't allocate memory.
+ *             KERN_FAILURE            Couldn't allocate requested name.
  */
 
 kern_return_t
@@ -318,7 +244,6 @@ ipc_entry_alloc_name(
 {
        mach_port_index_t index = MACH_PORT_INDEX(name);
        mach_port_gen_t gen = MACH_PORT_GEN(name);
-       ipc_tree_entry_t tentry = ITE_NULL;
 
        assert(MACH_PORT_VALID(name));
 
@@ -327,12 +252,9 @@ ipc_entry_alloc_name(
 
        for (;;) {
                ipc_entry_t entry;
-               ipc_tree_entry_t tentry2;
-               ipc_table_size_t its;
 
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_write_unlock(space);
-                       if (tentry) ite_free(tentry);
                        return KERN_INVALID_TASK;
                }
 
@@ -352,18 +274,27 @@ ipc_entry_alloc_name(
                        entry = &table[index];
 
                        if (index == 0) {
+                               /* case #1 - the entry is reserved */
                                assert(!IE_BITS_TYPE(entry->ie_bits));
                                assert(!IE_BITS_GEN(entry->ie_bits));
+                               is_write_unlock(space);                         
+                               return KERN_FAILURE;
                        } else if (IE_BITS_TYPE(entry->ie_bits)) {
                                if (IE_BITS_GEN(entry->ie_bits) == gen) {
+                                       /* case #2 -- the entry is inuse, for the same name */
                                        *entryp = entry;
-                                       assert(!tentry);
                                        return KERN_SUCCESS;
+                               } else {
+                                       /* case #3 -- the entry is inuse, for a different name. */
+                                       /* Collisions are not allowed */
+                                       is_write_unlock(space);                                 
+                                       return KERN_FAILURE;
                                }
                        } else {
                                mach_port_index_t free_index, next_index;
 
                                /*
+                                *      case #4 -- the entry is free
                                 *      Rip the entry out of the free list.
                                 */
 
@@ -375,123 +306,36 @@ ipc_entry_alloc_name(
 
                                table[free_index].ie_next =
                                        table[next_index].ie_next;
+                               
+                               /* mark the previous entry modified - reconstructing the name */
+                               ipc_entry_modified(space, 
+                                                  MACH_PORT_MAKE(free_index, 
+                                                       IE_BITS_GEN(table[free_index].ie_bits)),
+                                                  &table[free_index]);
 
                                entry->ie_bits = gen;
                                entry->ie_request = IE_REQ_NONE;
                                *entryp = entry;
 
                                assert(entry->ie_object == IO_NULL);
-                               if (is_fast_space(space))
-                                       assert(!tentry);
-                               else if (tentry)
-                                       ite_free(tentry);
                                return KERN_SUCCESS;
                        }
                }
 
                /*
-                * In a fast space, ipc_entry_alloc_name may be
-                * used only to add a right to a port name already
-                * known in this space.
-                */
-               if (is_fast_space(space)) {
-                       is_write_unlock(space);
-                       assert(!tentry);
-                       return KERN_FAILURE;
-               }
-
-               /*
-                *      Before trying to allocate any memory,
-                *      check if the entry already exists in the tree.
-                *      This avoids spurious resource errors.
-                *      The splay tree makes a subsequent lookup/insert
-                *      of the same name cheap, so this costs little.
+                *      We grow the table so that the name
+                *      index fits in the array space.
+                *      Because the space will be unlocked,
+                *      we must restart.
                 */
-
-               if ((space->is_tree_total > 0) &&
-                   ((tentry2 = ipc_splay_tree_lookup(&space->is_tree, name))
-                    != ITE_NULL)) {
-                       assert(tentry2->ite_space == space);
-                       assert(IE_BITS_TYPE(tentry2->ite_bits));
-
-                       *entryp = &tentry2->ite_entry;
-                       if (tentry) ite_free(tentry);
-                       return KERN_SUCCESS;
-               }
-
-               its = space->is_table_next;
-
-               /*
-                *      Check if the table should be grown.
-                *
-                *      Note that if space->is_table_size == its->its_size,
-                *      then we won't ever try to grow the table.
-                *
-                *      Note that we are optimistically assuming that name
-                *      doesn't collide with any existing names.  (So if
-                *      it were entered into the tree, is_tree_small would
-                *      be incremented.)  This is OK, because even in that
-                *      case, we don't lose memory by growing the table.
-                */
-               if ((space->is_table_size <= index) &&
-                   (index < its->its_size) &&
-                   (((its->its_size - space->is_table_size) *
-                     sizeof(struct ipc_entry)) <
-                    ((space->is_tree_small + 1) *
-                     sizeof(struct ipc_tree_entry)))) {
-                       kern_return_t kr;
-
-                       /*
-                        *      Can save space by growing the table.
-                        *      Because the space will be unlocked,
-                        *      we must restart.
-                        */
-
-                       kr = ipc_entry_grow_table(space, ITS_SIZE_NONE);
-                       assert(kr != KERN_NO_SPACE);
-                       if (kr != KERN_SUCCESS) {
-                               /* space is unlocked */
-                               if (tentry) ite_free(tentry);
-                               return kr;
-                       }
-
-                       continue;
-               }
-
-               /*
-                *      If a splay-tree entry was allocated previously,
-                *      go ahead and insert it into the tree.
-                */
-
-               if (tentry != ITE_NULL) {
-
-                       space->is_tree_total++;
-
-                       if (index < space->is_table_size) {
-                               entry = &space->is_table[index];
-                               entry->ie_bits |= IE_BITS_COLLISION;
-                       } else if ((index < its->its_size) &&
-                                  !ipc_entry_tree_collision(space, name))
-                               space->is_tree_small++;
-
-                       ipc_splay_tree_insert(&space->is_tree, name, tentry);
-                       tentry->ite_bits = 0;
-                       tentry->ite_request = 0;
-                       tentry->ite_object = IO_NULL;
-                       tentry->ite_space = space;
-                       *entryp = &tentry->ite_entry;
-                       return KERN_SUCCESS;
+                kern_return_t kr;
+               kr = ipc_entry_grow_table(space, index);
+               assert(kr != KERN_NO_SPACE);
+               if (kr != KERN_SUCCESS) {
+                       /* space is unlocked */
+                       return kr;
                }
-
-               /*
-                *      Allocate a tree entry and try again.
-                */
-
-               is_write_unlock(space);
-               tentry = ite_alloc();
-               if (tentry == ITE_NULL)
-                       return KERN_RESOURCE_SHORTAGE;
-               is_write_lock(space);
+               continue;
        }
 }
 
@@ -514,7 +358,7 @@ ipc_entry_dealloc(
        ipc_entry_num_t size;
        mach_port_index_t index;
 
-       assert(space->is_active);
+       assert(is_active(space));
        assert(entry->ie_object == IO_NULL);
        assert(entry->ie_request == IE_REQ_NONE);
 
@@ -527,113 +371,78 @@ ipc_entry_dealloc(
        table = space->is_table;
        size = space->is_table_size;
 
-       if (is_fast_space(space)) {
-               assert(index < size);
-               assert(entry == &table[index]);
+       if ((index < size) && (entry == &table[index])) {
                assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
-               assert(!(entry->ie_bits & IE_BITS_COLLISION));
                entry->ie_bits &= IE_BITS_GEN_MASK;
                entry->ie_next = table->ie_next;
                table->ie_next = index;
-               return;
-       }
-
-
-       if ((index < size) && (entry == &table[index])) {
-               assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
-
-               if (entry->ie_bits & IE_BITS_COLLISION) {
-                       struct ipc_splay_tree small, collisions;
-                       ipc_tree_entry_t tentry;
-                       mach_port_name_t tname;
-                       boolean_t pick;
-                       ipc_object_t obj;
-
-                       /* must move an entry from tree to table */
-
-                       ipc_splay_tree_split(&space->is_tree,
-                                            MACH_PORT_MAKE(index+1, 0),
-                                            &collisions);
-                       ipc_splay_tree_split(&collisions,
-                                            MACH_PORT_MAKE(index, 0),
-                                            &small);
-
-                       pick = ipc_splay_tree_pick(&collisions,
-                                                  &tname, &tentry);
-                       assert(pick);
-                       assert(MACH_PORT_INDEX(tname) == index);
-
-                       entry->ie_object = obj = tentry->ite_object;
-                       entry->ie_bits = tentry->ite_bits|MACH_PORT_GEN(tname);
-                       entry->ie_request = tentry->ite_request;
-
-                       assert(tentry->ite_space == space);
-
-                       if (IE_BITS_TYPE(tentry->ite_bits)==MACH_PORT_TYPE_SEND) {
-                               ipc_hash_global_delete(space, obj,
-                                                      tname, tentry);
-                               ipc_hash_local_insert(space, obj,
-                                                     index, entry);
-                       }
-
-                       ipc_splay_tree_delete(&collisions, tname, tentry);
-
-                       assert(space->is_tree_total > 0);
-                       space->is_tree_total--;
-
-                       /* check if collision bit should still be on */
-
-                       pick = ipc_splay_tree_pick(&collisions,
-                                                  &tname, &tentry);
-                       if (pick) {
-                               entry->ie_bits |= IE_BITS_COLLISION;
-                               ipc_splay_tree_join(&space->is_tree,
-                                                   &collisions);
-                       }
-
-                       ipc_splay_tree_join(&space->is_tree, &small);
-
-               } else {
-                       entry->ie_bits &= IE_BITS_GEN_MASK;
-                       entry->ie_next = table->ie_next;
-                       table->ie_next = index;
-               }
-
        } else {
-               ipc_tree_entry_t tentry = (ipc_tree_entry_t) entry;
-
-               assert(tentry->ite_space == space);
-
-               ipc_splay_tree_delete(&space->is_tree, name, tentry);
+               /*
+                * Nothing to do.  The entry does not match
+                * so there is nothing to deallocate.
+                */
+                assert(index < size);
+               assert(entry == &table[index]);
+               assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
+       }
+       ipc_entry_modified(space, name, entry);
+}
 
-               assert(space->is_tree_total > 0);
-               space->is_tree_total--;
+/*
+ *     Routine:        ipc_entry_modified
+ *     Purpose:
+ *             Note that an entry was modified in a space.
+ *     Conditions:
+ *             Assumes exclusive write access to the space,
+ *             either through a write lock or being the cleaner
+ *             on an inactive space.
+ */
 
-               if (index < size) {
-                       ipc_entry_t ientry = &table[index];
+void
+ipc_entry_modified(
+       ipc_space_t             space,
+       mach_port_name_t        name,
+       __assert_only ipc_entry_t entry)
+{
+       ipc_entry_t table;
+       ipc_entry_num_t size;
+       mach_port_index_t index;
 
-                       assert(ientry->ie_bits & IE_BITS_COLLISION);
-                       
-                       if (!ipc_entry_tree_collision(space, name))
-                               ientry->ie_bits &= ~IE_BITS_COLLISION;
+       index = MACH_PORT_INDEX(name);
+       table = space->is_table;
+       size = space->is_table_size;
 
-               } else if ((index < space->is_table_next->its_size) &&
-                          !ipc_entry_tree_collision(space, name)) {
+       assert(index < size);
+       assert(entry == &table[index]);
 
-                       assert(space->is_tree_small > 0);
+       assert(space->is_low_mod <= size);
+       assert(space->is_high_mod < size);
 
-                       space->is_tree_small--;
-               }
-       }
+       if (index < space->is_low_mod)
+               space->is_low_mod = index;
+       if (index > space->is_high_mod)
+               space->is_high_mod = index;
 }
 
+#define IPC_ENTRY_GROW_STATS 1
+#if IPC_ENTRY_GROW_STATS
+static uint64_t ipc_entry_grow_count = 0;
+static uint64_t ipc_entry_grow_rescan = 0;
+static uint64_t ipc_entry_grow_rescan_max = 0;
+static uint64_t ipc_entry_grow_rescan_entries = 0;
+static uint64_t ipc_entry_grow_rescan_entries_max = 0;
+static uint64_t        ipc_entry_grow_freelist_entries = 0;
+static uint64_t        ipc_entry_grow_freelist_entries_max = 0;
+#endif
+
 /*
  *     Routine:        ipc_entry_grow_table
  *     Purpose:
  *             Grows the table in a space.
  *     Conditions:
  *             The space must be write-locked and active before.
- *             If successful, it is also returned locked.
+ *             If successful, the space is also returned locked.
+ *             On failure, the space is returned unlocked.
  *             Allocates memory.
  *     Returns:
  *             KERN_SUCCESS            Grew the table.
@@ -650,342 +459,233 @@ ipc_entry_grow_table(
 {
        ipc_entry_num_t osize, size, nsize, psize;
 
-       do {
-               boolean_t         reallocated=FALSE;
-       
-               ipc_entry_t otable, table;
-               ipc_table_size_t oits, its, nits;
-               mach_port_index_t i, free_index;
-
-               assert(space->is_active);
+       ipc_entry_t otable, table;
+       ipc_table_size_t oits, its, nits;
+       mach_port_index_t i, free_index;
+       mach_port_index_t low_mod, hi_mod;
+       ipc_table_index_t sanity;
+#if IPC_ENTRY_GROW_STATS
+       uint64_t rescan_count = 0;
+#endif
+       assert(is_active(space));
 
-               if (space->is_growing) {
-                       /*
-                        *      Somebody else is growing the table.
-                        *      We just wait for them to finish.
-                        */
+       if (is_growing(space)) {
+               /*
+                *      Somebody else is growing the table.
+                *      We just wait for them to finish.
+                */
 
-                       is_write_sleep(space);
-                       return KERN_SUCCESS;
-               }
+               is_write_sleep(space);
+               return KERN_SUCCESS;
+       }
 
-               otable = space->is_table;
+       otable = space->is_table;
                
-               its = space->is_table_next;
-               size = its->its_size;
+       its = space->is_table_next;
+       size = its->its_size;
                
-               /*
-                * Since is_table_next points to the next natural size
-                * we can identify the current size entry.
-                */
-               oits = its - 1;
-               osize = oits->its_size;
+       /*
+        * Since is_table_next points to the next natural size
+        * we can identify the current size entry.
+        */
+       oits = its - 1;
+       osize = oits->its_size;
                
-               /*
-                * If there is no target size, then the new size is simply
-                * specified by is_table_next.  If there is a target
-                * size, then search for the next entry.
-                */
-               if (target_size != ITS_SIZE_NONE) {
-                       if (target_size <= osize) {
-                               is_write_unlock(space);
-                               return KERN_SUCCESS;
-                       }
-
-                       psize = osize;
-                       while ((psize != size) && (target_size > size)) {
-                               psize = size;
-                               its++;
-                               size = its->its_size;
-                       }
-                       if (psize == size) {
-                               is_write_unlock(space);
-                               return KERN_NO_SPACE;
-                       }
+       /*
+        * If there is no target size, then the new size is simply
+        * specified by is_table_next.  If there is a target
+        * size, then search for the next entry.
+        */
+       if (target_size != ITS_SIZE_NONE) {
+               if (target_size <= osize) {
+                       /* the space is locked */                       
+                       return KERN_SUCCESS;
                }
 
-               if (osize == size) {
+               psize = osize;
+               while ((psize != size) && (target_size > size)) {
+                       psize = size;
+                       its++;
+                       size = its->its_size;
+               }
+               if (psize == size) {
                        is_write_unlock(space);
                        return KERN_NO_SPACE;
                }
-               nits = its + 1;
-               nsize = nits->its_size;
-
-               assert((osize < size) && (size <= nsize));
-
-               /*
-                *      OK, we'll attempt to grow the table.
-                *      The realloc requires that the old table
-                *      remain in existence.
-                */
+       }
 
-               space->is_growing = TRUE;
+       if (osize == size) {
                is_write_unlock(space);
+               return KERN_NO_SPACE;
+       }
+       nits = its + 1;
+       nsize = nits->its_size;
+       assert((osize < size) && (size <= nsize));
 
-               if (it_entries_reallocable(oits)) {
-                       table = it_entries_realloc(oits, otable, its);
-                       reallocated=TRUE;
-               }
-               else {
-                       table = it_entries_alloc(its);
-               }
+       /*
+        * We'll attempt to grow the table.
+        *
+        * Because we will be copying without the space lock, reset
+        * the lowest_mod index to just beyond the end of the current
+        * table.  Modification of entries (other than hashes) will
+        * bump this downward, and we only have to reprocess entries
+        * above that mark.  Eventually, we'll get done.
+        */
+       is_start_growing(space);
+       space->is_low_mod = osize;
+       space->is_high_mod = 0;
+#if IPC_ENTRY_GROW_STATS
+       ipc_entry_grow_count++;
+#endif
+       is_write_unlock(space);
 
+       table = it_entries_alloc(its);
+       if (table == IE_NULL) {
                is_write_lock(space);
-               space->is_growing = FALSE;
-
-               /*
-                *      We need to do a wakeup on the space,
-                *      to rouse waiting threads.  We defer
-                *      this until the space is unlocked,
-                *      because we don't want them to spin.
-                */
-
-               if (table == IE_NULL) {
-                       is_write_unlock(space);
-                       thread_wakeup((event_t) space);
-                       return KERN_RESOURCE_SHORTAGE;
-               }
-
-               if (!space->is_active) {
-                       /*
-                        *      The space died while it was unlocked.
-                        */
-
-                       is_write_unlock(space);
-                       thread_wakeup((event_t) space);
-                       it_entries_free(its, table);
-                       is_write_lock(space);
-                       return KERN_SUCCESS;
-               }
+               is_done_growing(space);
+               is_write_unlock(space);
+               thread_wakeup((event_t) space);
+               return KERN_RESOURCE_SHORTAGE;
+       }
 
-               assert(space->is_table == otable);
-               assert((space->is_table_next == its) ||
-                      (target_size != ITS_SIZE_NONE));
-               assert(space->is_table_size == osize);
+       /* initialize new entries (free chain in backwards order) */
+       for (i = osize; i < size; i++) {
+               table[i].ie_object = IO_NULL;
+               table[i].ie_bits = IE_BITS_GEN_MASK;
+               table[i].ie_index = 0;
+               table[i].ie_next = i + 1;
+       }
+       table[size-1].ie_next = 0;
 
-               space->is_table = table;
-               space->is_table_size = size;
-               space->is_table_next = nits;
+       /* clear out old entries in new table */
+       memset((void *)table, 0, osize * sizeof(*table));
 
-               /*
-                *      If we did a realloc, it remapped the data.
-                *      Otherwise we copy by hand first.  Then we have
-                *      to zero the new part and the old local hash
-                *   values.
-                */
-               if (!reallocated) 
-                       (void) memcpy((void *) table, (const void *) otable,
-                             osize * (sizeof(struct ipc_entry)));
+       low_mod = 0;
+       hi_mod = osize - 1;
+ rescan:       
+       /*
+        * Within the range of the table that changed, determine what we
+        * have to take action on. For each entry, take a snapshot of the
+        * corresponding entry in the old table (so it won't change
+        * during this iteration). The snapshot may not be self-consistent
+        * (if we caught it in the middle of being changed), so be very
+        * cautious with the values.
+        */
+       for (i = low_mod; i <= hi_mod; i++) {
+               ipc_entry_t entry = &table[i];
+               struct ipc_entry osnap = otable[i]; 
 
-               for (i = 0; i < osize; i++)
-                       table[i].ie_index = 0;
+               if (entry->ie_object != osnap.ie_object ||
+                   IE_BITS_TYPE(entry->ie_bits) != IE_BITS_TYPE(osnap.ie_bits)) {
+                       
+                       if (entry->ie_object != IO_NULL &&
+                           IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND)
+                               ipc_hash_table_delete(table, size, entry->ie_object, i, entry);
 
-               (void) memset((void *) (table + osize) , 0,
-                       ((size - osize) * (sizeof(struct ipc_entry))));
+                       entry->ie_object = osnap.ie_object;
+                       entry->ie_bits = osnap.ie_bits;
+                       entry->ie_request = osnap.ie_request; /* or ie_next */
 
-               /*
-                *      Put old entries into the reverse hash table.
-                */
-               for (i = 0; i < osize; i++) {
-                       ipc_entry_t entry = &table[i];
-
-                       if (IE_BITS_TYPE(entry->ie_bits)==MACH_PORT_TYPE_SEND) {
-                               ipc_hash_local_insert(space, entry->ie_object,
-                                                     i, entry);
-                       }
+                       if (entry->ie_object != IO_NULL &&
+                           IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND)
+                               ipc_hash_table_insert(table, size, entry->ie_object, i, entry);
+               } else {
+                       assert(entry->ie_object == osnap.ie_object);
+                       entry->ie_bits = osnap.ie_bits;
+                       entry->ie_request = osnap.ie_request; /* or ie_next */
                }
 
-               /*
-                *      If there are entries in the splay tree,
-                *      then we have work to do:
-                *              1) transfer entries to the table
-                *              2) update is_tree_small
-                */
-               assert(!is_fast_space(space) || space->is_tree_total == 0);
-               if (space->is_tree_total > 0) {
-                       mach_port_index_t index;
-                       boolean_t delete;
-                       struct ipc_splay_tree ignore;
-                       struct ipc_splay_tree move;
-                       struct ipc_splay_tree small;
-                       ipc_entry_num_t nosmall;
-                       ipc_tree_entry_t tentry;
-
-                       /*
-                        *      The splay tree divides into four regions,
-                        *      based on the index of the entries:
-                        *              1) 0 <= index < osize
-                        *              2) osize <= index < size
-                        *              3) size <= index < nsize
-                        *              4) nsize <= index
-                        *
-                        *      Entries in the first part are ignored.
-                        *      Entries in the second part, that don't
-                        *      collide, are moved into the table.
-                        *      Entries in the third part, that don't
-                        *      collide, are counted for is_tree_small.
-                        *      Entries in the fourth part are ignored.
-                        */
-
-                       ipc_splay_tree_split(&space->is_tree,
-                                            MACH_PORT_MAKE(nsize, 0),
-                                            &small);
-                       ipc_splay_tree_split(&small,
-                                            MACH_PORT_MAKE(size, 0),
-                                            &move);
-                       ipc_splay_tree_split(&move,
-                                            MACH_PORT_MAKE(osize, 0),
-                                            &ignore);
-
-                       /* move entries into the table */
-
-                       for (tentry = ipc_splay_traverse_start(&move);
-                            tentry != ITE_NULL;
-                            tentry = ipc_splay_traverse_next(&move, delete)) {
-
-                               mach_port_name_t name;
-                               mach_port_gen_t gen;
-                               mach_port_type_t type;
-                               ipc_entry_bits_t bits;
-                               ipc_object_t obj;
-                               ipc_entry_t entry;
-
-                               name = tentry->ite_name;
-                               gen = MACH_PORT_GEN(name);
-                               index = MACH_PORT_INDEX(name);
-
-                               assert(tentry->ite_space == space);
-                               assert((osize <= index) && (index < size));
-
-                               entry = &table[index];
-                               bits = entry->ie_bits;
-                               if (IE_BITS_TYPE(bits)) {
-                                       assert(IE_BITS_GEN(bits) != gen);
-                                       entry->ie_bits |= IE_BITS_COLLISION;
-                                       delete = FALSE;
-                                       continue;
-                               }
-                               
-                               bits = tentry->ite_bits;
-                               type = IE_BITS_TYPE(bits);
-                               assert(type != MACH_PORT_TYPE_NONE);
-
-                               entry->ie_bits = bits | gen;
-                               entry->ie_request = tentry->ite_request;
-                               entry->ie_object = obj = tentry->ite_object;
-
-                               if (type == MACH_PORT_TYPE_SEND) {
-                                       ipc_hash_global_delete(space, obj,
-                                                              name, tentry);
-                                       ipc_hash_local_insert(space, obj,
-                                                             index, entry);
-                               }
-                               space->is_tree_total--;
-                               delete = TRUE;
-                       }
-                       ipc_splay_traverse_finish(&move);
-
-                       /* count entries for is_tree_small */
-
-                       nosmall = 0; index = 0;
-                       for (tentry = ipc_splay_traverse_start(&small);
-                            tentry != ITE_NULL;
-                            tentry = ipc_splay_traverse_next(&small, FALSE)) {
-                               mach_port_index_t nindex;
-
-                               nindex = MACH_PORT_INDEX(tentry->ite_name);
-
-                               if (nindex != index) {
-                                       nosmall++;
-                                       index = nindex;
-                               }
-                       }
-                       ipc_splay_traverse_finish(&small);
-
-                       assert(nosmall <= (nsize - size));
-                       assert(nosmall <= space->is_tree_total);
-                       space->is_tree_small = nosmall;
+       }
+       table[0].ie_next = otable[0].ie_next;  /* always rebase the freelist */
 
-                       /* put the splay tree back together */
+       /*
+        * find the end of the freelist (should be short). But be careful,
+        * the list items can change so only follow through truly free entries
+        * (no problem stopping short in those cases, because we'll rescan).
+        */
+       free_index = 0;
+       for (sanity = 0; sanity < osize; sanity++) {
+               if (table[free_index].ie_object != IPC_OBJECT_NULL)
+                       break;
+               i = table[free_index].ie_next;
+               if (i == 0 || i >= osize)
+                       break;
+               free_index = i;
+       }
+#if IPC_ENTRY_GROW_STATS
+       ipc_entry_grow_freelist_entries += sanity;
+       if (sanity > ipc_entry_grow_freelist_entries_max)
+               ipc_entry_grow_freelist_entries_max = sanity;
+#endif
+               
+       is_write_lock(space);
 
-                       ipc_splay_tree_join(&space->is_tree, &small);
-                       ipc_splay_tree_join(&space->is_tree, &move);
-                       ipc_splay_tree_join(&space->is_tree, &ignore);
-               }
+       /*
+        *      We need to do a wakeup on the space,
+        *      to rouse waiting threads.  We defer
+        *      this until the space is unlocked,
+        *      because we don't want them to spin.
+        */
 
+       if (!is_active(space)) {
                /*
-                *      Add entries in the new part which still aren't used
-                *      to the free list.  Add them in reverse order,
-                *      and set the generation number to -1, so that
-                *      early allocations produce "natural" names.
+                *      The space died while it was unlocked.
                 */
 
-               free_index = table[0].ie_next;
-               for (i = size-1; i >= osize; --i) {
-                       ipc_entry_t entry = &table[i];
-
-                       if (entry->ie_bits == 0) {
-                               entry->ie_bits = IE_BITS_GEN_MASK;
-                               entry->ie_next = free_index;
-                               free_index = i;
-                       }
-               }
-               table[0].ie_next = free_index;
-
-               /*
-                *      Now we need to free the old table.
-                *      If the space dies or grows while unlocked,
-                *      then we can quit here.
-                */
+               is_done_growing(space);
                is_write_unlock(space);
                thread_wakeup((event_t) space);
-
-               it_entries_free(oits, otable);
+               it_entries_free(its, table);
                is_write_lock(space);
-               if (!space->is_active || (space->is_table_next != nits))
-                       return KERN_SUCCESS;
+               return KERN_SUCCESS;
+       }
 
-               /*
-                *      We might have moved enough entries from
-                *      the splay tree into the table that
-                *      the table can be profitably grown again.
-                *
-                *      Note that if size == nsize, then
-                *      space->is_tree_small == 0.
-                */
-       } while ((space->is_tree_small > 0) &&
-                (((nsize - size) * sizeof(struct ipc_entry)) <
-                 (space->is_tree_small * sizeof(struct ipc_tree_entry))));
+       /* If the space changed while unlocked, go back and process the changes */
+       if (space->is_low_mod < osize) {
+               assert(space->is_high_mod > 0);
+               low_mod = space->is_low_mod;
+               space->is_low_mod = osize;
+               hi_mod = space->is_high_mod;
+               space->is_high_mod = 0;
+               is_write_unlock(space);
+#if IPC_ENTRY_GROW_STATS
+               rescan_count++;
+               if (rescan_count > ipc_entry_grow_rescan_max)
+                       ipc_entry_grow_rescan_max = rescan_count;
+
+               ipc_entry_grow_rescan++;
+               ipc_entry_grow_rescan_entries += hi_mod - low_mod + 1;
+               if (hi_mod - low_mod + 1 > ipc_entry_grow_rescan_entries_max)
+                       ipc_entry_grow_rescan_entries_max = hi_mod - low_mod + 1;
+#endif
+               goto rescan;
+       }
 
-       return KERN_SUCCESS;
-}
+       /* link new free entries onto the rest of the freelist */
+       assert(table[free_index].ie_next == 0 &&
+              table[free_index].ie_object == IO_NULL);
+       table[free_index].ie_next = osize;
 
+       assert(space->is_table == otable);
+       assert((space->is_table_next == its) ||
+           (target_size != ITS_SIZE_NONE));
+       assert(space->is_table_size == osize);
 
-#if    MACH_KDB
-#include <ddb/db_output.h>
-#define        printf  kdbprintf 
+       space->is_table = table;
+       space->is_table_size = size;
+       space->is_table_next = nits;
 
-ipc_entry_t    db_ipc_object_by_name(
-                       task_t                  task,
-                       mach_port_name_t        name);
+       is_done_growing(space);
+       is_write_unlock(space);
 
+       thread_wakeup((event_t) space);
 
-ipc_entry_t
-db_ipc_object_by_name(
-       task_t          task,
-       mach_port_name_t        name)
-{
-        ipc_space_t space = task->itk_space;
-        ipc_entry_t entry;
-        entry = ipc_entry_lookup(space, name);
-        if(entry != IE_NULL) {
-                iprintf("(task 0x%x, name 0x%x) ==> object 0x%x\n",
-                       task, name, entry->ie_object);
-                return (ipc_entry_t) entry->ie_object;
-        }
-        return entry;
+       /*
+        *      Now we need to free the old table.
+        */
+       it_entries_free(oits, otable);
+       is_write_lock(space);
+
+       return KERN_SUCCESS;
 }
-#endif /* MACH_KDB */
index 14d7d1846393bb29a68387c73fa7b3c7bba1244a..592a31c7332d93a7cb5947335620f8a8fc3edfaf 100644 (file)
@@ -80,9 +80,6 @@
  *     Spaces hold capabilities for ipc_object_t's.
  *     Each ipc_entry_t records a capability.  Most capabilities have
  *     small names, and the entries are elements of a table.
- *     Capabilities can have large names, and a splay tree holds
- *     those entries.  The cutoff point between the table and the tree
- *     is adjusted dynamically to minimize memory consumption.
  *
  *     The ie_index field of entries in the table implements
  *     a ordered hash table with open addressing and linear probing.
 struct ipc_entry {
        struct ipc_object *ie_object;
        ipc_entry_bits_t ie_bits;
+       mach_port_index_t ie_index;
        union {
                mach_port_index_t next;         /* next in freelist, or...  */
                ipc_table_index_t request;      /* dead name request notify */
        } index;
-       union {
-               mach_port_index_t table;
-               struct ipc_tree_entry *tree;
-       } hash;
 };
 
 #define        ie_request      index.request
 #define        ie_next         index.next
-#define        ie_index        hash.table
 
 #define IE_REQ_NONE            0               /* no request */
 
@@ -122,9 +115,6 @@ struct ipc_entry {
 #define        IE_BITS_TYPE_MASK       0x001f0000      /* 5 bits of capability type */
 #define        IE_BITS_TYPE(bits)      ((bits) & IE_BITS_TYPE_MASK)
 
-#define        IE_BITS_COLLISION       0x00800000      /* 1 bit for collisions */
-
-
 #ifndef NO_PORT_GEN
 #define        IE_BITS_GEN_MASK        0xff000000      /* 8 bits for generation */
 #define        IE_BITS_GEN(bits)       ((bits) & IE_BITS_GEN_MASK)
@@ -140,24 +130,6 @@ struct ipc_entry {
 
 #define        IE_BITS_RIGHT_MASK      0x007fffff      /* relevant to the right */
 
-struct ipc_tree_entry {
-       struct ipc_entry ite_entry;
-       mach_port_name_t ite_name;
-       struct ipc_space *ite_space;
-       struct ipc_tree_entry *ite_lchild;
-       struct ipc_tree_entry *ite_rchild;
-};
-
-#define        ite_bits        ite_entry.ie_bits
-#define        ite_object      ite_entry.ie_object
-#define        ite_request     ite_entry.ie_request
-#define        ite_next        ite_entry.hash.tree
-
-extern zone_t ipc_tree_entry_zone;
-
-#define ite_alloc()    ((ipc_tree_entry_t) zalloc(ipc_tree_entry_zone))
-#define        ite_free(ite)   zfree(ipc_tree_entry_zone, (ite))
-
 /*
  * Exported interfaces
  */
@@ -191,6 +163,12 @@ extern void ipc_entry_dealloc(
        mach_port_name_t        name,
        ipc_entry_t             entry);
 
+/* Mark and entry modified in a space */
+extern void ipc_entry_modified(
+       ipc_space_t             space,
+       mach_port_name_t        name,
+       ipc_entry_t             entry);
+
 /* Grow the table in a space */
 extern kern_return_t ipc_entry_grow_table(
        ipc_space_t             space,
index 2f43a63cf7b8aa5637c2d93f61590c9e6c1c9944..87f4fc6c3baa94b4b6dc11547a8274c550173b53 100644 (file)
  * Forward declarations 
  */
 
-/* Lookup (space, obj) in global hash table */
-boolean_t ipc_hash_global_lookup(
-       ipc_space_t             space,
-       ipc_object_t            obj,
-       mach_port_name_t        *namep,
-       ipc_tree_entry_t        *entryp);
-
-/* Insert an entry into the global reverse hash table */
-void ipc_hash_global_insert(
-       ipc_space_t             space,
-       ipc_object_t            obj,
-       mach_port_name_t        name,
-       ipc_tree_entry_t        entry);
-
 /* Delete an entry from the local reverse hash table */
 void ipc_hash_local_delete(
        ipc_space_t             space,
@@ -124,16 +110,7 @@ ipc_hash_lookup(
        mach_port_name_t        *namep,
        ipc_entry_t             *entryp)
 {
-       boolean_t       rv;
-
-       rv = ipc_hash_local_lookup(space, obj, namep, entryp);
-       if (!rv) {
-               assert(!is_fast_space(space) || space->is_tree_hash == 0);
-               if (space->is_tree_hash > 0)
-                       rv = ipc_hash_global_lookup(space, obj, namep,
-                               (ipc_tree_entry_t *) entryp);
-       }
-       return (rv);
+       return ipc_hash_table_lookup(space->is_table, space->is_table_size, obj, namep, entryp);
 }
 
 /*
@@ -155,14 +132,7 @@ ipc_hash_insert(
        mach_port_index_t index;
 
        index = MACH_PORT_INDEX(name);
-       if ((index < space->is_table_size) &&
-           (entry == &space->is_table[index]))
-               ipc_hash_local_insert(space, obj, index, entry);
-       else {
-               assert(!is_fast_space(space));
-               ipc_hash_global_insert(space, obj, name,
-                                      (ipc_tree_entry_t) entry);
-       }
+       ipc_hash_table_insert(space->is_table, space->is_table_size, obj, index, entry);
 }
 
 /*
@@ -183,184 +153,7 @@ ipc_hash_delete(
        mach_port_index_t index;
 
        index = MACH_PORT_INDEX(name);
-       if ((index < space->is_table_size) &&
-           (entry == &space->is_table[index]))
-               ipc_hash_local_delete(space, obj, index, entry);
-       else {
-               assert(!is_fast_space(space));
-               ipc_hash_global_delete(space, obj, name,
-                                      (ipc_tree_entry_t) entry);
-       }
-}
-
-/*
- *     The global reverse hash table holds splay tree entries.
- *     It is a simple open-chaining hash table with singly-linked buckets.
- *     Each bucket is locked separately, with an exclusive lock.
- *     Within each bucket, move-to-front is used.
- */
-
-typedef natural_t ipc_hash_index_t;
-
-ipc_hash_index_t ipc_hash_global_size;
-ipc_hash_index_t ipc_hash_global_mask;
-
-#define IH_GLOBAL_HASH(space, obj)                                     \
-       (((((ipc_hash_index_t) ((vm_offset_t)space)) >> 4) +            \
-         (((ipc_hash_index_t) ((vm_offset_t)obj)) >> 6)) &             \
-        ipc_hash_global_mask)
-
-typedef struct ipc_hash_global_bucket {
-       decl_lck_mtx_data(,     ihgb_lock_data)
-       ipc_tree_entry_t        ihgb_head;
-} *ipc_hash_global_bucket_t;
-
-#define        IHGB_NULL       ((ipc_hash_global_bucket_t) 0)
-
-#define        ihgb_lock_init(ihgb)    lck_mtx_init(&(ihgb)->ihgb_lock_data, &ipc_lck_grp, &ipc_lck_attr)
-#define        ihgb_lock(ihgb)                 lck_mtx_lock(&(ihgb)->ihgb_lock_data)
-#define        ihgb_unlock(ihgb)               lck_mtx_unlock(&(ihgb)->ihgb_lock_data)
-
-ipc_hash_global_bucket_t ipc_hash_global_table;
-
-/*
- *     Routine:        ipc_hash_global_lookup
- *     Purpose:
- *             Converts (space, obj) -> (name, entry).
- *             Looks in the global table, for splay tree entries.
- *             Returns TRUE if an entry was found.
- *     Conditions:
- *             The space must be locked (read or write) throughout.
- */
-
-boolean_t
-ipc_hash_global_lookup(
-       ipc_space_t                     space,
-       ipc_object_t                    obj,
-       mach_port_name_t                *namep,
-       ipc_tree_entry_t                *entryp)
-{
-       ipc_hash_global_bucket_t bucket;
-       ipc_tree_entry_t this, *last;
-
-       assert(space != IS_NULL);
-       assert(obj != IO_NULL);
-
-       assert(!is_fast_space(space));
-       bucket = &ipc_hash_global_table[IH_GLOBAL_HASH(space, obj)];
-       ihgb_lock(bucket);
-
-       if ((this = bucket->ihgb_head) != ITE_NULL) {
-               if ((this->ite_object == obj) &&
-                   (this->ite_space == space)) {
-                       /* found it at front; no need to move */
-
-                       *namep = this->ite_name;
-                       *entryp = this;
-               } else for (last = &this->ite_next;
-                           (this = *last) != ITE_NULL;
-                           last = &this->ite_next) {
-                       if ((this->ite_object == obj) &&
-                           (this->ite_space == space)) {
-                               /* found it; move to front */
-
-                               *last = this->ite_next;
-                               this->ite_next = bucket->ihgb_head;
-                               bucket->ihgb_head = this;
-
-                               *namep = this->ite_name;
-                               *entryp = this;
-                               break;
-                       }
-               }
-       }
-
-       ihgb_unlock(bucket);
-       return this != ITE_NULL;
-}
-
-/*
- *     Routine:        ipc_hash_global_insert
- *     Purpose:
- *             Inserts an entry into the global reverse hash table.
- *     Conditions:
- *             The space must be write-locked.
- */
-
-void
-ipc_hash_global_insert(
-       ipc_space_t                             space,
-       ipc_object_t                            obj,
-       __assert_only mach_port_name_t  name,
-       ipc_tree_entry_t                        entry)
-{
-       ipc_hash_global_bucket_t bucket;
-
-       assert(!is_fast_space(space));
-       assert(entry->ite_name == name);
-       assert(space != IS_NULL);
-       assert(entry->ite_space == space);
-       assert(obj != IO_NULL);
-       assert(entry->ite_object == obj);
-
-       space->is_tree_hash++;
-       assert(space->is_tree_hash <= space->is_tree_total);
-
-       bucket = &ipc_hash_global_table[IH_GLOBAL_HASH(space, obj)];
-       ihgb_lock(bucket);
-
-       /* insert at front of bucket */
-
-       entry->ite_next = bucket->ihgb_head;
-       bucket->ihgb_head = entry;
-
-       ihgb_unlock(bucket);
-}
-
-/*
- *     Routine:        ipc_hash_global_delete
- *     Purpose:
- *             Deletes an entry from the global reverse hash table.
- *     Conditions:
- *             The space must be write-locked.
- */
-
-void
-ipc_hash_global_delete(
-       ipc_space_t                             space,
-       ipc_object_t                            obj,
-       __assert_only mach_port_name_t  name,
-       ipc_tree_entry_t                        entry)
-{
-       ipc_hash_global_bucket_t bucket;
-       ipc_tree_entry_t this, *last;
-
-       assert(!is_fast_space(space));
-       assert(entry->ite_name == name);
-       assert(space != IS_NULL);
-       assert(entry->ite_space == space);
-       assert(obj != IO_NULL);
-       assert(entry->ite_object == obj);
-
-       assert(space->is_tree_hash > 0);
-       space->is_tree_hash--;
-
-       bucket = &ipc_hash_global_table[IH_GLOBAL_HASH(space, obj)];
-       ihgb_lock(bucket);
-
-       for (last = &bucket->ihgb_head;
-            (this = *last) != ITE_NULL;
-            last = &this->ite_next) {
-               if (this == entry) {
-                       /* found it; remove from bucket */
-
-                       *last = this->ite_next;
-                       break;
-               }
-       }
-       assert(this != ITE_NULL);
-
-       ihgb_unlock(bucket);
+       ipc_hash_table_delete(space->is_table, space->is_table_size, obj, index, entry);
 }
 
 /*
@@ -393,36 +186,30 @@ ipc_hash_global_delete(
  *     So possibly a small win; probably nothing significant.
  */
 
-#define        IH_LOCAL_HASH(obj, size)                                \
+#define        IH_TABLE_HASH(obj, size)                                \
                ((mach_port_index_t)((((uintptr_t) (obj)) >> 6) % (size)))
 
 /*
- *     Routine:        ipc_hash_local_lookup
+ *     Routine:        ipc_hash_table_lookup
  *     Purpose:
- *             Converts (space, obj) -> (name, entry).
- *             Looks in the space's local table, for table entries.
- *             Returns TRUE if an entry was found.
+ *             Converts (table, obj) -> (name, entry).
  *     Conditions:
- *             The space must be locked (read or write) throughout.
+ *             Must have read consistency on the table.
  */
 
 boolean_t
-ipc_hash_local_lookup(
-       ipc_space_t             space,
+ipc_hash_table_lookup(
+       ipc_entry_t             table,
+       ipc_entry_num_t         size,
        ipc_object_t            obj,
        mach_port_name_t        *namep,
        ipc_entry_t             *entryp)
 {
-       ipc_entry_t table;
-       ipc_entry_num_t size;
        mach_port_index_t hindex, index;
 
-       assert(space != IS_NULL);
        assert(obj != IO_NULL);
 
-       table = space->is_table;
-       size = space->is_table_size;
-       hindex = IH_LOCAL_HASH(obj, size);
+       hindex = IH_TABLE_HASH(obj, size);
 
        /*
         *      Ideally, table[hindex].ie_index is the name we want.
@@ -432,8 +219,10 @@ ipc_hash_local_lookup(
         */
 
        while ((index = table[hindex].ie_index) != 0) {
-               ipc_entry_t entry = &table[index];
+               ipc_entry_t entry;
 
+               assert(index < size);
+               entry = &table[index];
                if (entry->ie_object == obj) {
                        *entryp = entry;
                        *namep = MACH_PORT_MAKE(index,
@@ -449,7 +238,7 @@ ipc_hash_local_lookup(
 }
 
 /*
- *     Routine:        ipc_hash_local_insert
+ *     Routine:        ipc_hash_table_insert
  *     Purpose:
  *             Inserts an entry into the space's reverse hash table.
  *     Conditions:
@@ -457,23 +246,19 @@ ipc_hash_local_lookup(
  */
 
 void
-ipc_hash_local_insert(
-       ipc_space_t                     space,
+ipc_hash_table_insert(
+       ipc_entry_t                     table,
+       ipc_entry_num_t                 size,
        ipc_object_t                    obj,
        mach_port_index_t               index,
        __assert_only ipc_entry_t       entry)
 {
-       ipc_entry_t table;
-       ipc_entry_num_t size;
        mach_port_index_t hindex;
 
        assert(index != 0);
-       assert(space != IS_NULL);
        assert(obj != IO_NULL);
 
-       table = space->is_table;
-       size = space->is_table_size;
-       hindex = IH_LOCAL_HASH(obj, size);
+       hindex = IH_TABLE_HASH(obj, size);
 
        assert(entry == &table[index]);
        assert(entry->ie_object == obj);
@@ -493,31 +278,27 @@ ipc_hash_local_insert(
 }
 
 /*
- *     Routine:        ipc_hash_local_delete
+ *     Routine:        ipc_hash_table_delete
  *     Purpose:
- *             Deletes an entry from the space's reverse hash table.
+ *             Deletes an entry from the table's reverse hash.
  *     Conditions:
- *             The space must be write-locked.
+ *             Exclusive access to the table.
  */
 
 void
-ipc_hash_local_delete(
-       ipc_space_t                     space,
+ipc_hash_table_delete(
+       ipc_entry_t                     table,
+       ipc_entry_num_t                 size,
        ipc_object_t                    obj,
        mach_port_index_t               index,
        __assert_only ipc_entry_t       entry)
 {
-       ipc_entry_t table;
-       ipc_entry_num_t size;
        mach_port_index_t hindex, dindex;
 
        assert(index != MACH_PORT_NULL);
-       assert(space != IS_NULL);
        assert(obj != IO_NULL);
 
-       table = space->is_table;
-       size = space->is_table_size;
-       hindex = IH_LOCAL_HASH(obj, size);
+       hindex = IH_TABLE_HASH(obj, size);
 
        assert(entry == &table[index]);
        assert(entry->ie_object == obj);
@@ -571,7 +352,7 @@ ipc_hash_local_delete(
 
                        tobj = table[index].ie_object;
                        assert(tobj != IO_NULL);
-                       tindex = IH_LOCAL_HASH(tobj, size);
+                       tindex = IH_TABLE_HASH(tobj, size);
 
                        if ((dindex < hindex) ?
                            ((dindex < tindex) && (tindex <= hindex)) :
@@ -583,112 +364,3 @@ ipc_hash_local_delete(
        }
 }
 
-/*
- *     Routine:        ipc_hash_init
- *     Purpose:
- *             Initialize the reverse hash table implementation.
- */
-
-void
-ipc_hash_init(void)
-{
-       ipc_hash_index_t i;
-
-       /* if not configured, initialize ipc_hash_global_size */
-
-       if (ipc_hash_global_size == 0) {
-               ipc_hash_global_size = ipc_tree_entry_max >> 8;
-               if (ipc_hash_global_size < 32)
-                       ipc_hash_global_size = 32;
-       }
-
-       /* make sure it is a power of two */
-
-       ipc_hash_global_mask = ipc_hash_global_size - 1;
-       if ((ipc_hash_global_size & ipc_hash_global_mask) != 0) {
-               natural_t bit;
-
-               /* round up to closest power of two */
-
-               for (bit = 1;; bit <<= 1) {
-                       ipc_hash_global_mask |= bit;
-                       ipc_hash_global_size = ipc_hash_global_mask + 1;
-
-                       if ((ipc_hash_global_size & ipc_hash_global_mask) == 0)
-                               break;
-               }
-       }
-
-       /* allocate ipc_hash_global_table */
-
-       ipc_hash_global_table = (ipc_hash_global_bucket_t)
-               kalloc((vm_size_t) (ipc_hash_global_size *
-                                   sizeof(struct ipc_hash_global_bucket)));
-       assert(ipc_hash_global_table != IHGB_NULL);
-
-       /* and initialize it */
-
-       for (i = 0; i < ipc_hash_global_size; i++) {
-               ipc_hash_global_bucket_t bucket;
-
-               bucket = &ipc_hash_global_table[i];
-               ihgb_lock_init(bucket);
-               bucket->ihgb_head = ITE_NULL;
-       }
-}
-
-#if    MACH_IPC_DEBUG
-
-/*
- *     Routine:        ipc_hash_size
- *     Purpose:
- *             Return the size of the global reverse hash table.
- */
-natural_t
-ipc_hash_size(void)
-{
-       return ipc_hash_global_size;
-}
-
-/*
- *     Routine:        ipc_hash_info
- *     Purpose:
- *             Return information about the global reverse hash table.
- *             Fills the buffer with as much information as possible
- *             and returns the desired size of the buffer.
- *     Conditions:
- *             Nothing locked.  The caller should provide
- *             possibly-pageable memory.
- */
-
-
-ipc_hash_index_t
-ipc_hash_info(
-       hash_info_bucket_t      *info,
-       natural_t               count)
-{
-       ipc_hash_index_t i;
-
-       if (ipc_hash_global_size < count)
-               count = ipc_hash_global_size;
-
-       for (i = 0; i < count; i++) {
-               ipc_hash_global_bucket_t bucket = &ipc_hash_global_table[i];
-               unsigned int bucket_count = 0;
-               ipc_tree_entry_t entry;
-
-               ihgb_lock(bucket);
-               for (entry = bucket->ihgb_head;
-                    entry != ITE_NULL;
-                    entry = entry->ite_next)
-                       bucket_count++;
-               ihgb_unlock(bucket);
-
-               /* don't touch pageable memory while holding locks */
-               info[i].hib_count = bucket_count;
-       }
-
-       return ipc_hash_global_size;
-}
-
-#endif /* MACH_IPC_DEBUG */
index 987e54013a82b989c476b2636912296dcf731322..b0249f0fb94bed28298ba1827a0e9416a5fda770 100644 (file)
@@ -98,40 +98,39 @@ extern void ipc_hash_delete(
 
 /*
  *     For use by functions that know what they're doing:
- *     the global primitives, for splay tree entries,
- *     and the local primitives, for table entries.
+ *     local primitives are for table entries.
  */
 
-/* Delete an entry from the global reverse hash table */
-extern void ipc_hash_global_delete(
-       ipc_space_t             space,
-       ipc_object_t            obj,
-       mach_port_name_t        name,
-       ipc_tree_entry_t        entry);
-
 /* Lookup (space, obj) in local hash table */
-extern boolean_t ipc_hash_local_lookup(
-       ipc_space_t             space,
+extern boolean_t ipc_hash_table_lookup(
+       ipc_entry_t             table,
+       ipc_entry_num_t         size,
        ipc_object_t            obj,
        mach_port_name_t        *namep,
        ipc_entry_t             *entryp);
 
 /* Inserts an entry into the local reverse hash table */
-extern void ipc_hash_local_insert(
-       ipc_space_t             space,
+extern void ipc_hash_table_insert(
+       ipc_entry_t             table,
+       ipc_entry_num_t         size,
        ipc_object_t            obj,
        mach_port_index_t       index,
        ipc_entry_t             entry);
 
-/* Initialize the reverse hash table implementation */
-extern void ipc_hash_init(void) __attribute__((section("__TEXT, initcode")));
+/* Delete an entry from the appropriate reverse hash table */
+extern void ipc_hash_table_delete(
+       ipc_entry_t             table,
+       ipc_entry_num_t         size,
+       ipc_object_t            obj,
+       mach_port_name_t        name,
+       ipc_entry_t             entry);
 
 #include <mach_ipc_debug.h>
 
 #if    MACH_IPC_DEBUG
 
 #include <mach_debug/hash_info.h>
-extern natural_t ipc_hash_size(void);
+
 extern natural_t ipc_hash_info(
        hash_info_bucket_t      *info,
        natural_t count);
index cc0970e3226d7e696d9a70edf6e8e538a1201ed0..11dd9dafcbe809841a7e1d305c7d3f113f07a7af 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -74,6 +74,7 @@
 #include <mach_rt.h>
 
 #include <mach/port.h>
+#include <mach/message.h>
 #include <mach/kern_return.h>
 
 #include <kern/kern_types.h>
 vm_map_t ipc_kernel_map;
 vm_size_t ipc_kernel_map_size = 1024 * 1024;
 
+/* values to limit physical copy out-of-line memory descriptors */
 vm_map_t ipc_kernel_copy_map;
 #define IPC_KERNEL_COPY_MAP_SIZE (8 * 1024 * 1024)
 vm_size_t ipc_kernel_copy_map_size = IPC_KERNEL_COPY_MAP_SIZE;
-vm_size_t ipc_kmsg_max_vm_space = (IPC_KERNEL_COPY_MAP_SIZE * 7)/8;
+vm_size_t ipc_kmsg_max_vm_space = ((IPC_KERNEL_COPY_MAP_SIZE * 7) / 8);
+
+/* 
+ * values to limit inline message body handling
+ * avoid copyin/out limits - even after accounting for maximum descriptor expansion.
+ */
+#define IPC_KMSG_MAX_SPACE (64 * 1024 * 1024) /* keep in sync with COPYSIZELIMIT_PANIC */
+vm_size_t ipc_kmsg_max_body_space = ((IPC_KMSG_MAX_SPACE * 3)/4 - MAX_TRAILER_SIZE);
 
 int ipc_space_max;
-int ipc_tree_entry_max;
 int ipc_port_max;
 int ipc_pset_max;
 
@@ -142,7 +150,6 @@ ipc_bootstrap(void)
        
        ipc_port_multiple_lock_init();
 
-       ipc_port_timestamp_lock_init();
        ipc_port_timestamp_data = 0;
 
        /* all IPC zones should be exhaustible */
@@ -153,13 +160,6 @@ ipc_bootstrap(void)
                               "ipc spaces");
        zone_change(ipc_space_zone, Z_NOENCRYPT, TRUE);
 
-       ipc_tree_entry_zone =
-               zinit(sizeof(struct ipc_tree_entry),
-                       ipc_tree_entry_max * sizeof(struct ipc_tree_entry),
-                       sizeof(struct ipc_tree_entry),
-                       "ipc tree entries");
-       zone_change(ipc_tree_entry_zone, Z_NOENCRYPT, TRUE);
-
        /*
         * populate all port(set) zones
         */
@@ -217,7 +217,7 @@ ipc_bootstrap(void)
 #endif
        mig_init();
        ipc_table_init();
-       ipc_hash_init();
+
        semaphore_init();
        lock_set_init();
        mk_timer_init();
index b590d8a3e89ae90ca6161a35e21c755c8d154b68..36fd8976b57e05355cd5e34ecaa1e51a7ef5ac2c 100644 (file)
 #define _IPC_IPC_INIT_H_
 
 extern int ipc_space_max;
-extern int ipc_tree_entry_max;
 extern int ipc_port_max;
 extern int ipc_pset_max;
 
index 167d42145944b075292e94151d7ec23c50ec7d6a..f45d1deab6b7c2e34f8c6d48f6079be28008a111 100644 (file)
@@ -484,7 +484,9 @@ ipc_msg_print_untyped64(
 #endif  /* !DEBUG_MSGS_K64 */
 
 extern vm_map_t                ipc_kernel_copy_map;
+extern vm_size_t       ipc_kmsg_max_space;
 extern vm_size_t       ipc_kmsg_max_vm_space;
+extern vm_size_t       ipc_kmsg_max_body_space;
 extern vm_size_t       msg_ool_size_small;
 
 #define MSG_OOL_SIZE_SMALL     msg_ool_size_small
@@ -591,10 +593,17 @@ ipc_kmsg_alloc(
         * data backwards.
         */
        mach_msg_size_t size = msg_and_trailer_size - MAX_TRAILER_SIZE;
+
+       /* compare against implementation upper limit for the body */
+       if (size > ipc_kmsg_max_body_space)
+               return IKM_NULL;
+
        if (size > sizeof(mach_msg_base_t)) {
                mach_msg_size_t max_desc = (mach_msg_size_t)(((size - sizeof(mach_msg_base_t)) /
                                           sizeof(mach_msg_ool_descriptor32_t)) *
                                           DESC_SIZE_ADJUSTMENT);
+
+               /* make sure expansion won't cause wrap */
                if (msg_and_trailer_size > MACH_MSG_SIZE_MAX - max_desc)
                        return IKM_NULL;
 
@@ -602,9 +611,7 @@ ipc_kmsg_alloc(
        } else
                max_expanded_size = msg_and_trailer_size;
 
-       if (max_expanded_size > ikm_less_overhead(MACH_MSG_SIZE_MAX))
-               return IKM_NULL;
-       else if (max_expanded_size < IKM_SAVED_MSG_SIZE)
+       if (max_expanded_size < IKM_SAVED_MSG_SIZE)
                max_expanded_size = IKM_SAVED_MSG_SIZE;         /* round up for ikm_cache */
 
        if (max_expanded_size == IKM_SAVED_MSG_SIZE) {
@@ -674,9 +681,11 @@ ipc_kmsg_free(
                if (ip_active(port) && (port->ip_premsg == kmsg)) {
                        assert(IP_PREALLOC(port));
                        ip_unlock(port);
+                       ip_release(port);
                        return;
                }
-               ip_check_unlock(port);  /* May be last reference */
+                ip_unlock(port);
+               ip_release(port); /* May be last reference */
        }
 
        /*
@@ -1104,6 +1113,7 @@ ipc_kmsg_prealloc(mach_msg_size_t size)
  *             MACH_MSG_SUCCESS        Acquired a message buffer.
  *             MACH_SEND_MSG_TOO_SMALL Message smaller than a header.
  *             MACH_SEND_MSG_TOO_SMALL Message size not long-word multiple.
+ *             MACH_SEND_TOO_LARGE     Message too large to ever be sent.
  *             MACH_SEND_NO_BUFFER     Couldn't allocate a message buffer.
  *             MACH_SEND_INVALID_DATA  Couldn't copy message data.
  */
@@ -1124,7 +1134,7 @@ ipc_kmsg_get(
        if ((size < sizeof(mach_msg_legacy_header_t)) || (size & 3))
                return MACH_SEND_MSG_TOO_SMALL;
 
-       if (size > MACH_MSG_SIZE_MAX - MAX_TRAILER_SIZE)
+       if (size > ipc_kmsg_max_body_space)
                return MACH_SEND_TOO_LARGE;
 
        if(size == sizeof(mach_msg_legacy_header_t))
@@ -1250,7 +1260,7 @@ ipc_kmsg_get_from_kernel(
        ipc_port_t      dest_port;
 
        assert(size >= sizeof(mach_msg_header_t));
-//     assert((size & 3) == 0);
+       assert((size & 3) == 0);
 
        dest_port = (ipc_port_t)msg->msgh_remote_port;
 
@@ -1395,9 +1405,8 @@ ipc_kmsg_send(
                 *      in an infinite loop trying to deliver
                 *      a send-once notification.
                 */
-
+               ip_unlock(port);
                ip_release(port);
-               ip_check_unlock(port);
                kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL;
                ipc_kmsg_destroy(kmsg);
                return MACH_MSG_SUCCESS;
@@ -1575,8 +1584,15 @@ ipc_kmsg_copyin_header(
        mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
        mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
        ipc_object_t dest_port, reply_port;
-       ipc_port_t dest_soright, reply_soright;
        ipc_entry_t dest_entry, reply_entry;
+       ipc_port_t dest_soright, reply_soright;
+       ipc_port_t release_port = IP_NULL;
+
+       queue_head_t links_data;
+       queue_t links = &links_data;
+       wait_queue_link_t wql;
+
+       queue_init(links);
 
        if ((mbits != msg->msgh_bits) ||
            (!MACH_MSG_TYPE_PORT_ANY_SEND(dest_type)) ||
@@ -1588,7 +1604,7 @@ ipc_kmsg_copyin_header(
        reply_soright = IP_NULL; /* in case we go to invalid dest early */
 
        is_write_lock(space);
-       if (!space->is_active)
+       if (!is_active(space))
                goto invalid_dest;
 
        if (!MACH_PORT_VALID(dest_name))
@@ -1665,7 +1681,9 @@ ipc_kmsg_copyin_header(
                           (reply_type == MACH_MSG_TYPE_MAKE_SEND_ONCE)) {
                        kr = ipc_right_copyin(space, name, dest_entry,
                                              dest_type, FALSE,
-                                             &dest_port, &dest_soright);
+                                             &dest_port, &dest_soright,
+                                             &release_port,
+                                             links);
                        if (kr != KERN_SUCCESS)
                                goto invalid_dest;
 
@@ -1684,7 +1702,9 @@ ipc_kmsg_copyin_header(
 
                        kr = ipc_right_copyin(space, name, reply_entry,
                                              reply_type, TRUE,
-                                             &reply_port, &reply_soright);
+                                             &reply_port, &reply_soright,
+                                             &release_port,
+                                             links);
 
                        assert(kr == KERN_SUCCESS);
                        assert(reply_port == dest_port);
@@ -1699,7 +1719,9 @@ ipc_kmsg_copyin_header(
 
                        kr = ipc_right_copyin(space, name, dest_entry,
                                              dest_type, FALSE,
-                                             &dest_port, &dest_soright);
+                                             &dest_port, &dest_soright,
+                                             &release_port,
+                                             links);
                        if (kr != KERN_SUCCESS)
                                goto invalid_dest;
 
@@ -1724,7 +1746,8 @@ ipc_kmsg_copyin_header(
                         */
 
                        kr = ipc_right_copyin_two(space, name, dest_entry,
-                                                 &dest_port, &dest_soright);
+                                                 &dest_port, &dest_soright,
+                                                 &release_port);
                        if (kr != KERN_SUCCESS)
                                goto invalid_dest;
 
@@ -1751,7 +1774,9 @@ ipc_kmsg_copyin_header(
 
                        kr = ipc_right_copyin(space, name, dest_entry,
                                              MACH_MSG_TYPE_MOVE_SEND, FALSE,
-                                             &dest_port, &soright);
+                                             &dest_port, &soright,
+                                             &release_port,
+                                             links);
                        if (kr != KERN_SUCCESS)
                                goto invalid_dest;
 
@@ -1791,7 +1816,9 @@ ipc_kmsg_copyin_header(
 
                kr = ipc_right_copyin(space, dest_name, dest_entry,
                                      dest_type, FALSE,
-                                     &dest_port, &dest_soright);
+                                     &dest_port, &dest_soright,
+                                     &release_port,
+                                     links);
                if (kr != KERN_SUCCESS)
                        goto invalid_dest;
 
@@ -1856,7 +1883,9 @@ ipc_kmsg_copyin_header(
 
                kr = ipc_right_copyin(space, dest_name, dest_entry,
                                      dest_type, FALSE,
-                                     &dest_port, &dest_soright);
+                                     &dest_port, &dest_soright,
+                                     &release_port,
+                                     links);
                if (kr != KERN_SUCCESS)
                        goto invalid_dest;
 
@@ -1864,8 +1893,9 @@ ipc_kmsg_copyin_header(
 
                kr = ipc_right_copyin(space, reply_name, reply_entry,
                                      reply_type, TRUE,
-                                     &reply_port, &reply_soright);
-
+                                     &reply_port, &reply_soright,
+                                     &release_port,
+                                     links);
                assert(kr == KERN_SUCCESS);
 
                /* the entries might need to be deallocated */
@@ -1915,16 +1945,43 @@ ipc_kmsg_copyin_header(
        msg->msgh_remote_port = (ipc_port_t)dest_port;
        msg->msgh_local_port = (ipc_port_t)reply_port;
 
+       while(!queue_empty(links)) {
+               wql = (wait_queue_link_t) dequeue(links);
+               wait_queue_link_free(wql);
+       }
+
+       if (release_port != IP_NULL)
+               ip_release(release_port);
+
        return MACH_MSG_SUCCESS;
 
 invalid_reply:
        is_write_unlock(space);
+
+       while(!queue_empty(links)) {
+               wql = (wait_queue_link_t) dequeue(links);
+               wait_queue_link_free(wql);
+       }
+
+       if (release_port != IP_NULL)
+               ip_release(release_port);
+
        return MACH_SEND_INVALID_REPLY;
 
 invalid_dest:
        is_write_unlock(space);
+
+       while(!queue_empty(links)) {
+               wql = (wait_queue_link_t) dequeue(links);
+               wait_queue_link_free(wql);
+       }
+
+       if (release_port != IP_NULL)
+               ip_release(release_port);
+
        if (reply_soright != IP_NULL)
                ipc_notify_port_deleted(reply_soright, reply_name);
+
        return MACH_SEND_INVALID_DEST;
 }
 
@@ -2872,6 +2929,7 @@ ipc_kmsg_copyout_header(
        mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
        mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
        ipc_port_t reply = (ipc_port_t) msg->msgh_local_port;
+       ipc_port_t release_port = IP_NULL;
        mach_port_name_t dest_name, reply_name;
 
        if (IP_VALID(reply)) {
@@ -2886,7 +2944,7 @@ ipc_kmsg_copyout_header(
                is_write_lock(space);
 
                for (;;) {
-                       if (!space->is_active) {
+                       if (!is_active(space)) {
                                is_write_unlock(space);
                                return (MACH_RCV_HEADER_ERROR|
                                        MACH_MSG_IPC_SPACE);
@@ -2903,12 +2961,11 @@ ipc_kmsg_copyout_header(
 
                        ip_lock(reply);
                        if (!ip_active(reply)) {
-                               ip_release(reply);
-                               ip_check_unlock(reply);
-
+                               ip_unlock(reply);
                                ip_lock(dest);
                                is_write_unlock(space);
 
+                               release_port = reply;
                                reply = IP_DEAD;
                                reply_name = MACH_PORT_DEAD;
                                goto copyout_dest;
@@ -2957,7 +3014,7 @@ ipc_kmsg_copyout_header(
                 */
 
                is_read_lock(space);
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_read_unlock(space);
                        return MACH_RCV_HEADER_ERROR|MACH_MSG_IPC_SPACE;
                }
@@ -3025,8 +3082,8 @@ ipc_kmsg_copyout_header(
                ipc_port_timestamp_t timestamp;
 
                timestamp = dest->ip_timestamp;
+               ip_unlock(dest);
                ip_release(dest);
-               ip_check_unlock(dest);
 
                if (IP_VALID(reply)) {
                        ip_lock(reply);
@@ -3042,7 +3099,10 @@ ipc_kmsg_copyout_header(
        }
 
        if (IP_VALID(reply))
-               ipc_port_release(reply);
+               ip_release(reply);
+
+       if (IP_VALID(release_port))
+               ip_release(release_port);
 
        msg->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) |
                          MACH_MSGH_BITS(reply_type, dest_type));
@@ -3153,7 +3213,7 @@ mach_msg_descriptor_t *
 ipc_kmsg_copyout_ool_descriptor(mach_msg_ool_descriptor_t *dsc, mach_msg_descriptor_t *user_dsc, int is_64bit, vm_map_t map, mach_msg_return_t *mr)
 {
     vm_map_copy_t                      copy;
-    mach_vm_offset_t           rcv_addr;
+    vm_map_address_t                   rcv_addr;
     mach_msg_copy_options_t            copy_options;
     mach_msg_size_t                    size;
     mach_msg_descriptor_type_t dsc_type;
@@ -3693,8 +3753,8 @@ ipc_kmsg_copyout_dest(
                ipc_object_copyout_dest(space, dest, dest_type, &dest_name);
                /* dest is unlocked */
        } else {
+               io_unlock(dest);
                io_release(dest);
-               io_check_unlock(dest);
                dest_name = MACH_PORT_DEAD;
        }
 
@@ -3911,8 +3971,8 @@ ipc_kmsg_copyout_to_kernel(
                ipc_object_copyout_dest(space, dest, dest_type, &dest_name);
                /* dest is unlocked */
        } else {
+               io_unlock(dest);
                io_release(dest);
-               io_check_unlock(dest);
                dest_name = MACH_PORT_DEAD;
        }
 
@@ -3949,8 +4009,8 @@ ipc_kmsg_copyout_to_kernel_legacy(
                ipc_object_copyout_dest(space, dest, dest_type, &dest_name);
                /* dest is unlocked */
        } else {
+               io_unlock(dest);
                io_release(dest);
-               io_check_unlock(dest);
                dest_name = MACH_PORT_DEAD;
        }
 
@@ -4044,272 +4104,81 @@ ipc_kmsg_copyout_to_kernel_legacy(
 }
 #endif /* IKM_SUPPORT_LEGACY */
 
-
-#include <mach_kdb.h>
-#if    MACH_KDB
-
-#include <ddb/db_output.h>
-#include <ipc/ipc_print.h>
-/*
- * Forward declarations
- */
-void ipc_msg_print_untyped(
-       mach_msg_body_t         *body);
-
-const char * ipc_type_name(
-       int             type_name,
-       boolean_t       received);
-
-const char *
-msgh_bit_decode(
-       mach_msg_bits_t bit);
-
-const char *
-mm_copy_options_string(
-       mach_msg_copy_options_t option);
-
-void db_print_msg_uid(mach_msg_header_t *);
-
-
-const char *
-ipc_type_name(
-       int             type_name,
-       boolean_t       received)
-{
-       switch (type_name) {
-               case MACH_MSG_TYPE_PORT_NAME:
-               return "port_name";
-               
-               case MACH_MSG_TYPE_MOVE_RECEIVE:
-               if (received) {
-                       return "port_receive";
-               } else {
-                       return "move_receive";
-               }
-               
-               case MACH_MSG_TYPE_MOVE_SEND:
-               if (received) {
-                       return "port_send";
-               } else {
-                       return "move_send";
-               }
-               
-               case MACH_MSG_TYPE_MOVE_SEND_ONCE:
-               if (received) {
-                       return "port_send_once";
-               } else {
-                       return "move_send_once";
-               }
-               
-               case MACH_MSG_TYPE_COPY_SEND:
-               return "copy_send";
-               
-               case MACH_MSG_TYPE_MAKE_SEND:
-               return "make_send";
-               
-               case MACH_MSG_TYPE_MAKE_SEND_ONCE:
-               return "make_send_once";
-               
-               default:
-               return (char *) 0;
-       }
-}
-               
-void
-ipc_print_type_name(
-       int     type_name)
-{
-       const char *name = ipc_type_name(type_name, TRUE);
-       if (name) {
-               printf("%s", name);
-       } else {
-               printf("type%d", type_name);
-       }
-}
-
-/*
- * ipc_kmsg_print      [ debug ]
- */
-void
-ipc_kmsg_print(
-       ipc_kmsg_t      kmsg)
-{
-       iprintf("kmsg=0x%x\n", kmsg);
-       iprintf("ikm_next=0x%x, prev=0x%x, size=%d",
-               kmsg->ikm_next,
-               kmsg->ikm_prev,
-               kmsg->ikm_size);
-       printf("\n");
-       ipc_msg_print(kmsg->ikm_header);
-}
-
-const char *
-msgh_bit_decode(
-       mach_msg_bits_t bit)
-{
-       switch (bit) {
-           case MACH_MSGH_BITS_COMPLEX:        return "complex";
-           case MACH_MSGH_BITS_CIRCULAR:       return "circular";
-           default:                            return (char *) 0;
-       }
-}
-
-/*
- * ipc_msg_print       [ debug ]
- */
-void
-ipc_msg_print(
-       mach_msg_header_t       *msgh)
+mach_msg_trailer_size_t
+ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space, 
+               mach_msg_option_t option, thread_t thread, 
+               mach_port_seqno_t seqno, boolean_t minimal_trailer,
+               mach_vm_offset_t context)
 {
-       mach_msg_bits_t mbits;
-       unsigned int    bit, i;
-       const char      *bit_name;
-       int             needs_comma;
+       mach_msg_max_trailer_t *trailer;
 
-       mbits = msgh->msgh_bits;
-       iprintf("msgh_bits=0x%x:  l=0x%x,r=0x%x\n",
-               mbits,
-               MACH_MSGH_BITS_LOCAL(msgh->msgh_bits),
-               MACH_MSGH_BITS_REMOTE(msgh->msgh_bits));
+       (void)thread;
+       trailer = (mach_msg_max_trailer_t *)
+               ((vm_offset_t)kmsg->ikm_header +
+                round_msg(kmsg->ikm_header->msgh_size));
 
-       mbits = MACH_MSGH_BITS_OTHER(mbits) & MACH_MSGH_BITS_USED;
-       db_indent += 2;
-       if (mbits)
-               iprintf("decoded bits:  ");
-       needs_comma = 0;
-       for (i = 0, bit = 1; i < sizeof(mbits) * 8; ++i, bit <<= 1) {
-               if ((mbits & bit) == 0)
-                       continue;
-               bit_name = msgh_bit_decode((mach_msg_bits_t)bit);
-               if (bit_name)
-                       printf("%s%s", needs_comma ? "," : "", bit_name);
-               else
-                       printf("%sunknown(0x%x),", needs_comma ? "," : "", bit);
-               ++needs_comma;
-       }
-       if (msgh->msgh_bits & ~MACH_MSGH_BITS_USED) {
-               printf("%sunused=0x%x,", needs_comma ? "," : "",
-                      msgh->msgh_bits & ~MACH_MSGH_BITS_USED);
+       if (!(option & MACH_RCV_TRAILER_MASK)) {
+               return trailer->msgh_trailer_size;
        }
-       printf("\n");
-       db_indent -= 2;
 
-       needs_comma = 1;
-       if (msgh->msgh_remote_port) {
-               iprintf("remote=0x%x(", msgh->msgh_remote_port);
-               ipc_print_type_name(MACH_MSGH_BITS_REMOTE(msgh->msgh_bits));
-               printf(")");
-       } else {
-               iprintf("remote=null");
-       }
+       trailer->msgh_seqno = seqno;
+       trailer->msgh_context = context;
+       trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(thread_is_64bit(thread), option);
 
-       if (msgh->msgh_local_port) {
-               printf("%slocal=%p(", needs_comma ? "," : "",
-                      msgh->msgh_local_port);
-               ipc_print_type_name(MACH_MSGH_BITS_LOCAL(msgh->msgh_bits));
-               printf(")\n");
-       } else {
-               printf("local=null\n");
+       if (minimal_trailer) { 
+               goto done;
        }
 
-       iprintf("msgh_id=%d, size=%d\n",
-               msgh->msgh_id,
-               msgh->msgh_size);
-
-       if (mbits & MACH_MSGH_BITS_COMPLEX) {   
-               ipc_msg_print_untyped((mach_msg_body_t *) (msgh + 1));
+       if (MACH_RCV_TRAILER_ELEMENTS(option) >= 
+                       MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_AV)){
+#if CONFIG_MACF_MACH
+               if (kmsg->ikm_sender != NULL &&
+                               IP_VALID(kmsg->ikm_header->msgh_remote_port) &&
+                               mac_port_check_method(kmsg->ikm_sender,
+                                       &kmsg->ikm_sender->maclabel,
+                                       &kmsg->ikm_header->msgh_remote_port->ip_label,
+                                       kmsg->ikm_header->msgh_id) == 0)
+                       trailer->msgh_ad = 1;
+               else
+#endif
+                       trailer->msgh_ad = 0;
        }
-}
 
+       /*
+        * The ipc_kmsg_t holds a reference to the label of a label
+        * handle, not the port. We must get a reference to the port
+        * and a send right to copyout to the receiver.
+        */
 
-const char *
-mm_copy_options_string(
-       mach_msg_copy_options_t option)
-{
-       const char      *name;
-
-       switch (option) {
-           case MACH_MSG_PHYSICAL_COPY:
-               name = "PHYSICAL";
-               break;
-           case MACH_MSG_VIRTUAL_COPY:
-               name = "VIRTUAL";
-               break;
-           case MACH_MSG_OVERWRITE:
-               name = "OVERWRITE";
-               break;
-           case MACH_MSG_ALLOCATE:
-               name = "ALLOCATE";
-               break;
-           case MACH_MSG_KALLOC_COPY_T:
-               name = "KALLOC_COPY_T";
-               break;
-           default:
-               name = "unknown";
-               break;
+       if (option & MACH_RCV_TRAILER_ELEMENTS (MACH_RCV_TRAILER_LABELS)) {
+#if CONFIG_MACF_MACH
+               if (kmsg->ikm_sender != NULL) {
+                       ipc_labelh_t  lh = kmsg->ikm_sender->label;
+                       kern_return_t kr;
+
+                       ip_lock(lh->lh_port);
+                       lh->lh_port->ip_mscount++;
+                       lh->lh_port->ip_srights++;
+                       ip_reference(lh->lh_port);
+                       ip_unlock(lh->lh_port);
+
+                       kr = ipc_object_copyout(space, (ipc_object_t)lh->lh_port,
+                                       MACH_MSG_TYPE_PORT_SEND, 0,
+                                       &trailer->msgh_labels.sender);
+                       if (kr != KERN_SUCCESS) {
+                               ip_release(lh->lh_port);
+                               trailer->msgh_labels.sender = 0;
+                       }
+               } else {
+                       trailer->msgh_labels.sender = 0;
+               }
+#else
+               (void)space;
+               trailer->msgh_labels.sender = 0;
+#endif
        }
-       return name;
-}
 
-void
-ipc_msg_print_untyped(
-       mach_msg_body_t         *body)
-{
-    mach_msg_descriptor_t      *saddr, *send;
-    mach_msg_descriptor_type_t type;
-
-    iprintf("%d descriptors %d: \n", body->msgh_descriptor_count);
 
-    saddr = (mach_msg_descriptor_t *) (body + 1);
-    send = saddr + body->msgh_descriptor_count;
-
-    for ( ; saddr < send; saddr++ ) {
-       
-       type = saddr->type.type;
-
-       switch (type) {
-           
-           case MACH_MSG_PORT_DESCRIPTOR: {
-               mach_msg_port_descriptor_t *dsc;
-
-               dsc = &saddr->port;
-               iprintf("-- PORT name = 0x%x disp = ", dsc->name);
-               ipc_print_type_name(dsc->disposition);
-               printf("\n");
-               break;
-           }
-           case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
-           case MACH_MSG_OOL_DESCRIPTOR: {
-               mach_msg_ool_descriptor_t *dsc;
-               
-               dsc = &saddr->out_of_line;
-               iprintf("-- OOL%s addr = 0x%x size = 0x%x copy = %s %s\n",
-                       type == MACH_MSG_OOL_DESCRIPTOR ? "" : " VOLATILE",
-                       dsc->address, dsc->size,
-                       mm_copy_options_string(dsc->copy),
-                       dsc->deallocate ? "DEALLOC" : "");
-               break;
-           } 
-           case MACH_MSG_OOL_PORTS_DESCRIPTOR : {
-               mach_msg_ool_ports_descriptor_t *dsc;
-
-               dsc = &saddr->ool_ports;
-
-               iprintf("-- OOL_PORTS addr = 0x%x count = 0x%x ",
-                         dsc->address, dsc->count);
-               printf("disp = ");
-               ipc_print_type_name(dsc->disposition);
-               printf(" copy = %s %s\n",
-                      mm_copy_options_string(dsc->copy),
-                      dsc->deallocate ? "DEALLOC" : "");
-               break;
-           }
-
-           default: {
-               iprintf("-- UNKNOWN DESCRIPTOR 0x%x\n", type);
-               break;
-           }
-       }
-    }
+done:
+       return trailer->msgh_trailer_size;
 }
-#endif /* MACH_KDB */
index 6fb07b6dd07a3ce321ed6de5e24a79330195fd49..cb35797377f95d7c93b434a811e99d3bf1bb559d 100644 (file)
@@ -144,7 +144,6 @@ MACRO_END
 #define ikm_prealloc_clear_inuse(kmsg, port)                           \
 MACRO_BEGIN                                                            \
        (kmsg)->ikm_prealloc = IP_NULL;                                 \
-       ip_release(port);                                               \
 MACRO_END
 
 #define        ikm_init(kmsg, size)                                            \
@@ -408,5 +407,12 @@ extern void ipc_kmsg_free_scatter(
         mach_msg_body_t        *slist,
         mach_msg_size_t                slist_size);
 
+
+extern mach_msg_trailer_size_t
+ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space, 
+               mach_msg_option_t option, thread_t thread, 
+               mach_port_seqno_t seqno, boolean_t minimal_trailer,
+               mach_vm_offset_t context);
+
 #endif /* _IPC_IPC_KMSG_H_ */
 
index 934eaf7e435e378e634fb7f5cd602ebd64e0398d..9b14d07d5eb1e219d52d1874dd84b76e6723d06a 100644 (file)
@@ -234,6 +234,7 @@ labelh_destroy(ipc_port_t port)
        ipc_labelh_t lh = (ipc_labelh_t) port->ip_kobject;
 
        mac_task_label_destroy(&lh->lh_label);
+       lh_lock_destroy(lh);
        zfree(ipc_labelh_zone, (vm_offset_t)lh);
 }
 #else
index 5eba16ca36552ed00f75e834459e7e4c4ba060fb..7126ff162b754a5a0d873b6c3ba9c3500b7e462f 100644 (file)
@@ -89,6 +89,7 @@ MACRO_END
 extern zone_t ipc_labelh_zone;
 
 #define lh_lock_init(lh)       lck_mtx_init(&(lh)->lh_lock_data, &ipc_lck_grp, &ipc_lck_attr)
+#define lh_lock_destroy(lh)    lck_mtx_destroy(&(lh)->lh_lock_data, &ipc_lck_grp)
 #define lh_lock(lh)                    lck_mtx_lock(&(lh)->lh_lock_data)
 #define lh_unlock(lh)          lck_mtx_unlock(&(lh)->lh_lock_data)
 
index 406b5ae932c7bb768c3b608bf2c154828402813b..569c6fb0b63a464c0c304272216e534225034505 100644 (file)
@@ -159,13 +159,14 @@ ipc_mqueue_member(
 
 kern_return_t
 ipc_mqueue_remove(
-       ipc_mqueue_t     mqueue,
-       ipc_mqueue_t     set_mqueue)
+       ipc_mqueue_t      mqueue,
+       ipc_mqueue_t      set_mqueue,
+       wait_queue_link_t *wqlp)
 {
        wait_queue_t     mq_waitq = &mqueue->imq_wait_queue;
        wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;
 
-       return wait_queue_unlink(mq_waitq, set_waitq);
+       return wait_queue_unlink_nofree(mq_waitq, set_waitq, wqlp);
 }
 
 /*
@@ -177,11 +178,12 @@ ipc_mqueue_remove(
  */
 void
 ipc_mqueue_remove_from_all(
-       ipc_mqueue_t    mqueue)
+       ipc_mqueue_t    mqueue,
+       queue_t         links)
 {
        wait_queue_t    mq_waitq = &mqueue->imq_wait_queue;
 
-       wait_queue_unlink_all(mq_waitq);
+       wait_queue_unlink_all_nofree(mq_waitq, links);
        return;
 }
 
@@ -194,11 +196,12 @@ ipc_mqueue_remove_from_all(
  */
 void
 ipc_mqueue_remove_all(
-       ipc_mqueue_t    mqueue)
+       ipc_mqueue_t    mqueue,
+       queue_t         links)
 {
        wait_queue_set_t        mq_setq = &mqueue->imq_set_queue;
 
-       wait_queue_set_unlink_all(mq_setq);
+       wait_queue_set_unlink_all_nofree(mq_setq, links);
        return;
 }
 
@@ -217,7 +220,8 @@ ipc_mqueue_remove_all(
 kern_return_t
 ipc_mqueue_add(
        ipc_mqueue_t     port_mqueue,
-       ipc_mqueue_t     set_mqueue)
+       ipc_mqueue_t     set_mqueue,
+       wait_queue_link_t wql)
 {
        wait_queue_t     port_waitq = &port_mqueue->imq_wait_queue;
        wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;
@@ -226,7 +230,7 @@ ipc_mqueue_add(
        kern_return_t    kr;
        spl_t            s;
 
-       kr = wait_queue_link(port_waitq, set_waitq);
+       kr = wait_queue_link_noalloc(port_waitq, set_waitq, wql);
        if (kr != KERN_SUCCESS)
                return kr;
 
@@ -278,7 +282,7 @@ ipc_mqueue_add(
                         */
                        msize = ipc_kmsg_copyout_size(kmsg, th->map);
                        if (th->ith_msize <
-                                       (msize + REQUESTED_TRAILER_SIZE(th->ith_option))) {
+                                       (msize + REQUESTED_TRAILER_SIZE(thread_is_64bit(th), th->ith_option))) {
                                th->ith_state = MACH_RCV_TOO_LARGE;
                                th->ith_msize = msize;
                                if (th->ith_option & MACH_RCV_LARGE) {
@@ -539,7 +543,7 @@ ipc_mqueue_post(
                 */
                msize = ipc_kmsg_copyout_size(kmsg, receiver->map);
                if (receiver->ith_msize <
-                               (msize + REQUESTED_TRAILER_SIZE(receiver->ith_option))) {
+                               (msize + REQUESTED_TRAILER_SIZE(thread_is_64bit(receiver), receiver->ith_option))) {
                        receiver->ith_msize = msize;
                        receiver->ith_state = MACH_RCV_TOO_LARGE;
                } else {
@@ -917,7 +921,7 @@ ipc_mqueue_select_on_thread(
         * (and size needed).
         */
        rcv_size = ipc_kmsg_copyout_size(kmsg, thread->map);
-       if (rcv_size + REQUESTED_TRAILER_SIZE(option) > max_size) {
+       if (rcv_size + REQUESTED_TRAILER_SIZE(thread_is_64bit(thread), option) > max_size) {
                mr = MACH_RCV_TOO_LARGE;
                if (option & MACH_RCV_LARGE) {
                        thread->ith_receiver_name = mqueue->imq_receiver_name;
@@ -1136,7 +1140,7 @@ ipc_mqueue_copyin(
        ipc_mqueue_t mqueue;
 
        is_read_lock(space);
-       if (!space->is_active) {
+       if (!is_active(space)) {
                is_read_unlock(space);
                return MACH_RCV_INVALID_NAME;
        }
index c8a3f7a2eeb0809f415310cf1d4289c3c66a701e..f452f70796d0df31bf2546e3205c03a784bb2af4 100644 (file)
@@ -150,7 +150,8 @@ extern void ipc_mqueue_changed(
 /* Add the specific mqueue as a member of the set */
 extern kern_return_t ipc_mqueue_add(
        ipc_mqueue_t            mqueue,
-       ipc_mqueue_t            set_mqueue);
+       ipc_mqueue_t            set_mqueue,
+       wait_queue_link_t       wql);
 
 /* Check to see if mqueue is member of set_mqueue */
 extern boolean_t ipc_mqueue_member(
@@ -160,15 +161,18 @@ extern boolean_t ipc_mqueue_member(
 /* Remove an mqueue from a specific set */
 extern kern_return_t ipc_mqueue_remove(
        ipc_mqueue_t            mqueue,
-       ipc_mqueue_t            set_mqueue);
+       ipc_mqueue_t            set_mqueue,
+       wait_queue_link_t       *wqlp);
 
 /* Remove an mqueue from all sets */
 extern void ipc_mqueue_remove_from_all(
-       ipc_mqueue_t            mqueue);
+       ipc_mqueue_t            mqueue,
+       queue_t                 links);
 
 /* Remove all the members of the specifiied set */
 extern void ipc_mqueue_remove_all(
-       ipc_mqueue_t            mqueue);
+       ipc_mqueue_t            mqueue,
+       queue_t                 links);
 
 /* Send a message to a port */
 extern mach_msg_return_t ipc_mqueue_send(
index 176e80ec827375fef40e9aca3739f29b9af2043d..35f9224f0f60b9c586bdcad026858c87a81095ea 100644 (file)
@@ -108,10 +108,7 @@ void
 ipc_object_reference(
        ipc_object_t    object)
 {
-       io_lock(object);
-       assert(object->io_references > 0);
        io_reference(object);
-       io_unlock(object);
 }
 
 /*
@@ -124,10 +121,7 @@ void
 ipc_object_release(
        ipc_object_t    object)
 {
-       io_lock(object);
-       assert(object->io_references > 0);
        io_release(object);
-       io_check_unlock(object);
 }
 
 /*
@@ -263,7 +257,7 @@ ipc_object_alloc_dead(
 
        assert(entry->ie_object == IO_NULL);
        entry->ie_bits |= MACH_PORT_TYPE_DEAD_NAME | 1;
-       
+       ipc_entry_modified(space, *namep, entry);
        is_write_unlock(space);
        return KERN_SUCCESS;
 }
@@ -301,7 +295,7 @@ ipc_object_alloc_dead_name(
 
        assert(entry->ie_object == IO_NULL);
        entry->ie_bits |= MACH_PORT_TYPE_DEAD_NAME | 1;
-
+       ipc_entry_modified(space, name, entry);
        is_write_unlock(space);
        return KERN_SUCCESS;
 }
@@ -366,6 +360,7 @@ ipc_object_alloc(
 
        entry->ie_bits |= type | urefs;
        entry->ie_object = object;
+       ipc_entry_modified(space, *namep, entry);
 
        io_lock(object);
        is_write_unlock(space);
@@ -441,6 +436,7 @@ ipc_object_alloc_name(
 
        entry->ie_bits |= type | urefs;
        entry->ie_object = object;
+       ipc_entry_modified(space, name, entry);
 
        io_lock(object);
        is_write_unlock(space);
@@ -506,7 +502,13 @@ ipc_object_copyin(
 {
        ipc_entry_t entry;
        ipc_port_t soright;
+       ipc_port_t release_port;
        kern_return_t kr;
+       queue_head_t links_data;
+       queue_t links = &links_data;
+       wait_queue_link_t wql;
+
+       queue_init(links);
 
        /*
         *      Could first try a read lock when doing
@@ -519,13 +521,24 @@ ipc_object_copyin(
                return kr;
        /* space is write-locked and active */
 
+       release_port = IP_NULL;
        kr = ipc_right_copyin(space, name, entry,
                              msgt_name, TRUE,
-                             objectp, &soright);
+                             objectp, &soright,
+                             &release_port,
+                             links);
        if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
                ipc_entry_dealloc(space, name, entry);
        is_write_unlock(space);
 
+       while(!queue_empty(links)) {
+               wql = (wait_queue_link_t) dequeue(links);
+               wait_queue_link_free(wql);
+       }
+
+       if (release_port != IP_NULL)
+               ip_release(release_port);
+
        if ((kr == KERN_SUCCESS) && (soright != IP_NULL))
                ipc_notify_port_deleted(soright, name);
 
@@ -752,7 +765,7 @@ ipc_object_copyout(
        is_write_lock(space);
 
        for (;;) {
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_write_unlock(space);
                        return KERN_INVALID_TASK;
                }
@@ -1075,82 +1088,3 @@ io_free(
        io_lock_destroy(object);
        zfree(ipc_object_zones[otype], object);
 }
-
-#include <mach_kdb.h>
-#if    MACH_KDB
-
-#include <ddb/db_output.h>
-
-#define        printf  kdbprintf 
-
-/*
- *     Routine:        ipc_object_print
- *     Purpose:
- *             Pretty-print an object for kdb.
- */
-
-const char *ikot_print_array[IKOT_MAX_TYPE] = {
-       "(NONE)             ",
-       "(THREAD)           ",
-       "(TASK)             ",
-       "(HOST)             ",
-       "(HOST_PRIV)        ",
-       "(PROCESSOR)        ",
-       "(PSET)             ",
-       "(PSET_NAME)        ",
-       "(TIMER)            ",
-       "(PAGER_REQUEST)    ",
-       "(DEVICE)           ",  /* 10 */
-       "(XMM_OBJECT)       ",
-       "(XMM_PAGER)        ",
-       "(XMM_KERNEL)       ",
-       "(XMM_REPLY)        ",
-       "(NOTDEF 15)        ",
-       "(NOTDEF 16)        ",
-       "(HOST_SECURITY)    ",
-       "(LEDGER)           ",
-       "(MASTER_DEVICE)    ",
-       "(ACTIVATION)       ",  /* 20 */
-       "(SUBSYSTEM)        ",
-       "(IO_DONE_QUEUE)    ",
-       "(SEMAPHORE)        ",
-       "(LOCK_SET)         ",
-       "(CLOCK)            ",
-       "(CLOCK_CTRL)       ",  /* 26 */
-       "(IOKIT_SPARE)      ",  /* 27 */
-       "(NAMED_MEM_ENTRY)  ",  /* 28 */
-       "(IOKIT_CONNECT)    ",
-       "(IOKIT_OBJECT)     ",  /* 30 */
-       "(UPL)              ",
-       "(MEM_OBJ_CONTROL)  ",
-       "(AU_SESSIONPORT)   ",  /* 33 */
-       "(FILEPORT)", /* 34 */
-#if CONFIG_MACF_MACH
-       "(LABELH)           ",
-#endif
-/*
- * Add new entries here.
- * Please keep in sync with kern/ipc_kobject.h
- */
-       "(UNKNOWN)          "   /* magic catchall       */
-};
-
-void
-ipc_object_print(
-       ipc_object_t    object)
-{
-       int kotype;
-
-       iprintf("%s", io_active(object) ? "active" : "dead");
-       printf(", refs=%d", object->io_references);
-       printf(", otype=%d", io_otype(object));
-       kotype = io_kotype(object);
-       if (kotype >= 0 && kotype < IKOT_MAX_TYPE)
-               printf(", kotype=%d %s\n", io_kotype(object),
-                      ikot_print_array[kotype]);
-       else
-               printf(", kotype=0x%x %s\n", io_kotype(object),
-                      ikot_print_array[IKOT_UNKNOWN]);
-}
-
-#endif /* MACH_KDB */
index a813b29bf5ba2d6acc1b9c2d9f946976d7ca24b0..05822d0faef4dc44f3689e0177268144ddb5b33c 100644 (file)
@@ -73,7 +73,6 @@
 #define _IPC_IPC_OBJECT_H_
 
 #include <mach_rt.h>
-#include <mach_kdb.h>
 
 #include <mach/kern_return.h>
 #include <mach/message.h>
@@ -81,6 +80,7 @@
 #include <kern/macro_help.h>
 #include <kern/zalloc.h>
 #include <ipc/ipc_types.h>
+#include <libkern/OSAtomic.h>
 
 typedef natural_t ipc_object_refs_t;   /* for ipc/ipc_object.h         */
 typedef natural_t ipc_object_bits_t;
@@ -100,7 +100,7 @@ typedef natural_t ipc_object_type_t;
 struct ipc_object {
        ipc_object_bits_t io_bits;
        ipc_object_refs_t io_references;
-       decl_lck_mtx_data(,     io_lock_data)
+       lck_spin_t      io_lock_data;
 };
 
 /*
@@ -165,27 +165,18 @@ extern void       io_free(
  * (ipc_port and ipc_pset).
  */
 #define io_lock_init(io) \
-       lck_mtx_init(&(io)->io_lock_data, &ipc_lck_grp, &ipc_lck_attr)
+       lck_spin_init(&(io)->io_lock_data, &ipc_lck_grp, &ipc_lck_attr)
 #define io_lock_destroy(io) \
-       lck_mtx_destroy(&(io)->io_lock_data, &ipc_lck_grp)
+       lck_spin_destroy(&(io)->io_lock_data, &ipc_lck_grp)
 #define        io_lock(io) \
-       lck_mtx_lock(&(io)->io_lock_data)
+       lck_spin_lock(&(io)->io_lock_data)
 #define        io_lock_try(io) \
-       lck_mtx_try_lock(&(io)->io_lock_data)
+       lck_spin_try_lock(&(io)->io_lock_data)
 #define        io_unlock(io) \
-       lck_mtx_unlock(&(io)->io_lock_data)
+       lck_spin_unlock(&(io)->io_lock_data)
 
 #define _VOLATILE_ volatile
 
-#define io_check_unlock(io)                                            \
-MACRO_BEGIN                                                            \
-       _VOLATILE_ ipc_object_refs_t _refs = (io)->io_references;       \
-                                                                       \
-       io_unlock(io);                                                  \
-       if (_refs == 0)                                                 \
-               io_free(io_otype(io), io);                              \
-MACRO_END
-
 /* Sanity check the ref count.  If it is 0, we may be doubly zfreeing.
  * If it is larger than max int, it has been corrupted, probably by being
  * modified into an address (this is architecture dependent, but it's
@@ -198,18 +189,24 @@ MACRO_END
 #define IO_MAX_REFERENCES                                              \
        (unsigned)(~0 ^ (1 << (sizeof(int)*BYTE_SIZE - 1)))
 
-#define        io_reference(io)                                                \
-MACRO_BEGIN                                                            \
-       assert((io)->io_references < IO_MAX_REFERENCES);                \
-       (io)->io_references++;                                          \
-MACRO_END
-
-#define        io_release(io)                                                  \
-MACRO_BEGIN                                                            \
-       assert((io)->io_references > 0 &&                               \
-                   (io)->io_references <= IO_MAX_REFERENCES);          \
-       (io)->io_references--;                                          \
-MACRO_END
+static inline void
+io_reference(ipc_object_t io) {
+       assert((io)->io_references > 0 &&
+           (io)->io_references < IO_MAX_REFERENCES);
+       OSIncrementAtomic(&((io)->io_references));
+}
+
+
+static inline void
+io_release(ipc_object_t io) {
+       assert((io)->io_references > 0 &&
+           (io)->io_references < IO_MAX_REFERENCES);
+        /* If we just removed the last reference count */
+       if ( 1 == OSDecrementAtomic(&((io)->io_references))) {
+               /* Free the object */
+               io_free(io_otype((io)), (io));
+       }
+}
 
 /*   
  * Retrieve a label for use in a kernel call that takes a security
@@ -334,12 +331,4 @@ extern kern_return_t ipc_object_rename(
        mach_port_name_t        oname,
        mach_port_name_t        nname);
 
-#if    MACH_KDB
-/* Pretty-print an ipc object */
-
-extern void ipc_object_print(
-       ipc_object_t    object);
-
-#endif /* MACH_KDB */
-
 #endif /* _IPC_IPC_OBJECT_H_ */
index 0ece0705ce6bd1dad68cd14a22a2c60eb0fe0113..d76463f64e07b24995af9c353dd54fa67250ac00 100644 (file)
@@ -70,7 +70,6 @@
  */
 
 #include <norma_vm.h>
-#include <mach_kdb.h>
 #include <zone_debug.h>
 #include <mach_assert.h>
 
 #include <ipc/ipc_kmsg.h>
 #include <ipc/ipc_mqueue.h>
 #include <ipc/ipc_notify.h>
-#include <ipc/ipc_print.h>
 #include <ipc/ipc_table.h>
 
 #include <security/mac_mach_internal.h>
 
-#if    MACH_KDB
-#include <machine/db_machdep.h>
-#include <ddb/db_command.h>
-#include <ddb/db_expr.h>
-#endif /* MACH_KDB */
-
 #include <string.h>
 
 decl_lck_mtx_data(,    ipc_port_multiple_lock_data)
-decl_lck_mtx_data(,    ipc_port_timestamp_lock_data)
 lck_mtx_ext_t  ipc_port_multiple_lock_data_ext;
-lck_mtx_ext_t  ipc_port_timestamp_lock_data_ext;
 ipc_port_timestamp_t   ipc_port_timestamp_data;
 int ipc_portbt;
 
 #if    MACH_ASSERT
 void   ipc_port_init_debug(
-               ipc_port_t      port);
+               ipc_port_t      port,
+               natural_t       *callstack,
+               unsigned int    callstack_max);
+
+void   ipc_port_callstack_init_debug(
+               natural_t       *callstack,
+               unsigned int    callstack_max);
+       
 #endif /* MACH_ASSERT */
 
-#if    MACH_KDB && ZONE_DEBUG
-/* Forwards */
-void   print_type_ports(unsigned, unsigned);
-void   print_ports(void);
-#endif /* MACH_KDB && ZONE_DEBUG */
+void
+ipc_port_release(ipc_port_t port)
+{
+       ip_release(port);
+}
+
+void
+ipc_port_reference(ipc_port_t port)
+{
+       ip_reference(port);
+}
 
 /*
  *     Routine:        ipc_port_timestamp
@@ -129,13 +132,7 @@ void       print_ports(void);
 ipc_port_timestamp_t
 ipc_port_timestamp(void)
 {
-       ipc_port_timestamp_t timestamp;
-
-       ipc_port_timestamp_lock();
-       timestamp = ipc_port_timestamp_data++;
-       ipc_port_timestamp_unlock();
-
-       return timestamp;
+       return OSIncrementAtomic(&ipc_port_timestamp_data);
 }
 
 /*
@@ -249,12 +246,11 @@ ipc_port_request_grow(
 
        if ((its->its_size == 0) ||
            ((ntable = it_requests_alloc(its)) == IPR_NULL)) {
-               ipc_port_release(port);
+               ip_release(port);
                return KERN_RESOURCE_SHORTAGE;
        }
 
        ip_lock(port);
-       ip_release(port);
 
        /*
         *      Check that port is still active and that nobody else
@@ -302,12 +298,14 @@ ipc_port_request_grow(
                ntable->ipr_size = its;
                port->ip_requests = ntable;
                ip_unlock(port);
+               ip_release(port);
 
                if (otable != IPR_NULL) {
                        it_requests_free(oits, otable);
                }
        } else {
-               ip_check_unlock(port);
+               ip_unlock(port);
+               ip_release(port);
                it_requests_free(its, ntable);
        }
 
@@ -378,8 +376,6 @@ ipc_port_request_type(
 
                        if (!IPR_SOR_SPARMED(ipr->ipr_soright)) {
                                type |= MACH_PORT_TYPE_SPREQUEST_DELAYED;
-                       } else {
-                               assert(port->ip_sprequests == TRUE);
                        }
                }
        }
@@ -501,7 +497,8 @@ ipc_port_nsrequest(
 
 void
 ipc_port_clear_receiver(
-       ipc_port_t      port)
+       ipc_port_t      port,
+       queue_t         links)
 {
        spl_t           s;
 
@@ -511,7 +508,7 @@ ipc_port_clear_receiver(
         * pull ourselves from any sets.
         */
        if (port->ip_pset_count != 0) {
-               ipc_pset_remove_from_all(port);
+               ipc_pset_remove_from_all(port, links);
                assert(port->ip_pset_count == 0);
        }
 
@@ -558,10 +555,6 @@ ipc_port_init(
        port->ip_premsg = IKM_NULL;
        port->ip_context = 0;
 
-#if    MACH_ASSERT
-       ipc_port_init_debug(port);
-#endif /* MACH_ASSERT */
-
        ipc_mqueue_init(&port->ip_messages, FALSE /* set */);
 }
 
@@ -589,6 +582,11 @@ ipc_port_alloc(
        mach_port_name_t name;
        kern_return_t kr;
 
+#if     MACH_ASSERT
+       natural_t buf[IP_CALLSTACK_MAX];
+       ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
+#endif /* MACH_ASSERT */
+           
        kr = ipc_object_alloc(space, IOT_PORT,
                              MACH_PORT_TYPE_RECEIVE, 0,
                              &name, (ipc_object_t *) &port);
@@ -599,6 +597,10 @@ ipc_port_alloc(
 
        ipc_port_init(port, space, name);
 
+#if     MACH_ASSERT
+       ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
+#endif  /* MACH_ASSERT */
+
 #if CONFIG_MACF_MACH
        task_t issuer = current_task();
        tasklabel_lock2 (issuer, space->is_task);
@@ -636,6 +638,11 @@ ipc_port_alloc_name(
        ipc_port_t port;
        kern_return_t kr;
 
+#if     MACH_ASSERT
+       natural_t buf[IP_CALLSTACK_MAX];
+       ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
+#endif /* MACH_ASSERT */       
+
        kr = ipc_object_alloc_name(space, IOT_PORT,
                                   MACH_PORT_TYPE_RECEIVE, 0,
                                   name, (ipc_object_t *) &port);
@@ -646,6 +653,10 @@ ipc_port_alloc_name(
 
        ipc_port_init(port, space, name);
 
+#if     MACH_ASSERT
+       ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
+#endif  /* MACH_ASSERT */      
+
 #if CONFIG_MACF_MACH
        task_t issuer = current_task();
        tasklabel_lock2 (issuer, space->is_task);
@@ -815,13 +826,21 @@ ipc_port_destroy(
         * like a normal buffer.
         */
        if (IP_PREALLOC(port)) {
+               ipc_port_t inuse_port;
+
                kmsg = port->ip_premsg;
                assert(kmsg != IKM_NULL);
+               inuse_port = ikm_prealloc_inuse_port(kmsg);
                IP_CLEAR_PREALLOC(port, kmsg);
-               if (!ikm_prealloc_inuse(kmsg))
+               ip_unlock(port);
+               if (inuse_port != IP_NULL) {
+                       assert(inuse_port == port);
+               } else {
                        ipc_kmsg_free(kmsg);
+               }
+       } else {
+               ip_unlock(port);
        }
-       ip_unlock(port);
 
        /* throw away no-senders request */
        nsrequest = port->ip_nsrequest;
@@ -837,7 +856,7 @@ ipc_port_destroy(
 
        ipc_kobject_destroy(port);
 
-       ipc_port_release(port); /* consume caller's ref */
+       ip_release(port); /* consume caller's ref */
 }
 
 /*
@@ -1001,7 +1020,7 @@ ipc_port_lookup_notify(
        ipc_port_t port;
        ipc_entry_t entry;
 
-       assert(space->is_active);
+       assert(is_active(space));
 
        entry = ipc_entry_lookup(space, name);
        if (entry == IE_NULL)
@@ -1160,10 +1179,10 @@ ipc_port_release_send(
                return;
 
        ip_lock(port);
-       ip_release(port);
 
        if (!ip_active(port)) {
-               ip_check_unlock(port);
+               ip_unlock(port);
+               ip_release(port);
                return;
        }
 
@@ -1175,9 +1194,30 @@ ipc_port_release_send(
                port->ip_nsrequest = IP_NULL;
                mscount = port->ip_mscount;
                ip_unlock(port);
+               ip_release(port);
                ipc_notify_no_senders(nsrequest, mscount);
-       } else
+       } else {
                ip_unlock(port);
+               ip_release(port);
+       }
+}
+
+/*
+ *     Routine:        ipc_port_make_sonce_locked
+ *     Purpose:
+ *             Make a naked send-once right from a receive right.
+ *     Conditions:
+ *             The port is locked and active.
+ */
+
+ipc_port_t
+ipc_port_make_sonce_locked(
+       ipc_port_t      port)
+{
+       assert(ip_active(port));
+       port->ip_sorights++;
+       ip_reference(port);
+       return port;
 }
 
 /*
@@ -1185,7 +1225,7 @@ ipc_port_release_send(
  *     Purpose:
  *             Make a naked send-once right from a receive right.
  *     Conditions:
- *             The port is not locked but it is active.
+ *             The port is not locked.
  */
 
 ipc_port_t
@@ -1196,12 +1236,14 @@ ipc_port_make_sonce(
                return port;
 
        ip_lock(port);
-       assert(ip_active(port));
-       port->ip_sorights++;
-       ip_reference(port);
+       if (ip_active(port)) {
+               port->ip_sorights++;
+               ip_reference(port);
+               ip_unlock(port);
+               return port;
+       }
        ip_unlock(port);
-
-       return port;
+       return IP_DEAD;
 }
 
 /*
@@ -1231,14 +1273,8 @@ ipc_port_release_sonce(
 
        port->ip_sorights--;
 
-       ip_release(port);
-
-       if (!ip_active(port)) {
-               ip_check_unlock(port);
-               return;
-       }
-
        ip_unlock(port);
+       ip_release(port);
 }
 
 /*
@@ -1267,7 +1303,7 @@ ipc_port_release_receive(
        ipc_port_destroy(port); /* consumes ref, unlocks */
 
        if (dest != IP_NULL)
-               ipc_port_release(dest);
+               ip_release(dest);
 }
 
 /*
@@ -1290,6 +1326,11 @@ ipc_port_alloc_special(
        if (port == IP_NULL)
                return IP_NULL;
 
+#if     MACH_ASSERT
+       natural_t buf[IP_CALLSTACK_MAX];
+       ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
+#endif /* MACH_ASSERT */       
+
        bzero((char *)port, sizeof(*port));
        io_lock_init(&port->ip_object);
        port->ip_references = 1;
@@ -1297,6 +1338,10 @@ ipc_port_alloc_special(
 
        ipc_port_init(port, space, 1);
 
+#if     MACH_ASSERT
+       ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
+#endif  /* MACH_ASSERT */              
+
 #if CONFIG_MACF_MACH
        /* Currently, ipc_port_alloc_special is used for two things:
         * - Reply ports for messages from the kernel
@@ -1387,8 +1432,7 @@ ipc_port_finalize(
  *     deallocation is intercepted via io_free.
  */
 queue_head_t   port_alloc_queue;
-decl_lck_mtx_data(,port_alloc_queue_lock)
-lck_mtx_ext_t  port_alloc_queue_lock_ext;
+lck_spin_t     port_alloc_queue_lock;
 
 unsigned long  port_count = 0;
 unsigned long  port_count_warning = 20000;
@@ -1412,7 +1456,8 @@ void
 ipc_port_debug_init(void)
 {
        queue_init(&port_alloc_queue);
-       lck_mtx_init_ext(&port_alloc_queue_lock, &port_alloc_queue_lock_ext, &ipc_lck_grp, &ipc_lck_attr);
+
+       lck_spin_init(&port_alloc_queue_lock, &ipc_lck_grp, &ipc_lck_attr);
 
        if (!PE_parse_boot_argn("ipc_portbt", &ipc_portbt, sizeof (ipc_portbt)))
                ipc_portbt = 0;
@@ -1428,16 +1473,18 @@ extern int proc_pid(struct proc*);
  */
 void
 ipc_port_init_debug(
-       ipc_port_t      port)
+       ipc_port_t      port,
+       natural_t       *callstack,
+       unsigned int    callstack_max)
 {
        unsigned int    i;
 
        port->ip_thread = current_thread();
        port->ip_timetrack = port_timestamp++;
-       for (i = 0; i < IP_CALLSTACK_MAX; ++i)
-               port->ip_callstack[i] = 0;
+       for (i = 0; i < callstack_max; ++i)
+               port->ip_callstack[i] = callstack[i];   
        for (i = 0; i < IP_NSPARES; ++i)
-               port->ip_spares[i] = 0;
+               port->ip_spares[i] = 0; 
 
 #ifdef MACH_BSD
        task_t task = current_task();
@@ -1448,24 +1495,39 @@ ipc_port_init_debug(
        }
 #endif /* MACH_BSD */
 
-       /*
-        *      Machine-dependent routine to fill in an
-        *      array with up to IP_CALLSTACK_MAX levels
-        *      of return pc information.
-        */
-       if (ipc_portbt)
-               machine_callstack(&port->ip_callstack[0], IP_CALLSTACK_MAX);
-
 #if 0
-       lck_mtx_lock(&port_alloc_queue_lock);
+       lck_spin_lock(&port_alloc_queue_lock);
        ++port_count;
        if (port_count_warning > 0 && port_count >= port_count_warning)
                assert(port_count < port_count_warning);
        queue_enter(&port_alloc_queue, port, ipc_port_t, ip_port_links);
-       lck_mtx_unlock(&port_alloc_queue_lock);
+       lck_spin_unlock(&port_alloc_queue_lock);
 #endif
 }
 
+/*
+ *     Routine:        ipc_port_callstack_init_debug
+ *     Purpose:
+ *             Calls the machine-dependent routine to
+ *             fill in an array with up to IP_CALLSTACK_MAX
+ *             levels of return pc information
+ *     Conditions:
+ *             May block (via copyin)
+ */
+void
+ipc_port_callstack_init_debug(
+       natural_t       *callstack,
+       unsigned int    callstack_max)
+{
+       unsigned int    i;
+
+       /* guarantee the callstack is initialized */
+       for (i=0; i < callstack_max; i++)
+               callstack[i] = 0;       
+
+       if (ipc_portbt)
+               machine_callstack(callstack, callstack_max);
+}
 
 /*
  *     Remove a port from the queue of allocated ports.
@@ -1483,672 +1545,13 @@ void
 ipc_port_track_dealloc(
        ipc_port_t              port)
 {
-       lck_mtx_lock(&port_alloc_queue_lock);
+       lck_spin_lock(&port_alloc_queue_lock);
        assert(port_count > 0);
        --port_count;
        queue_remove(&port_alloc_queue, port, ipc_port_t, ip_port_links);
-       lck_mtx_unlock(&port_alloc_queue_lock);
+       lck_spin_unlock(&port_alloc_queue_lock);
 }
 #endif
 
 
 #endif /* MACH_ASSERT */
-
-
-#if    MACH_KDB
-
-#include <ddb/db_output.h>
-#include <ddb/db_print.h>
-
-#define        printf  kdbprintf
-
-int
-db_port_queue_print(
-       ipc_port_t      port);
-
-/*
- *     Routine:        ipc_port_print
- *     Purpose:
- *             Pretty-print a port for kdb.
- */
-int    ipc_port_print_long = 0;        /* set for more detail */
-
-void
-ipc_port_print(
-       ipc_port_t              port,
-       __unused boolean_t      have_addr,
-       __unused db_expr_t      count,
-       char                    *modif)
-{
-       db_addr_t       task;
-       int             task_id;
-       int             nmsgs;
-       int             verbose = 0;
-#if    MACH_ASSERT
-       int             i, needs_db_indent, items_printed;
-#endif /* MACH_ASSERT */
-       
-       if (db_option(modif, 'l') || db_option(modif, 'v'))
-               ++verbose;
-
-       printf("port 0x%x\n", port);
-
-       db_indent += 2;
-
-       ipc_object_print(&port->ip_object);
-
-       if (ipc_port_print_long) {
-               printf("\n");
-       }
-
-       if (!ip_active(port)) {
-               iprintf("timestamp=0x%x", port->ip_timestamp);
-       } else if (port->ip_receiver_name == MACH_PORT_NULL) {
-               iprintf("destination=0x%x (", port->ip_destination);
-               if (port->ip_destination != MACH_PORT_NULL &&
-                   (task = db_task_from_space(port->ip_destination->
-                                              ip_receiver, &task_id)))
-                       printf("task%d at 0x%x", task_id, task);
-               else
-                       printf("unknown");
-               printf(")");
-       } else {
-               iprintf("receiver=0x%x (", port->ip_receiver);
-               if (port->ip_receiver == ipc_space_kernel)
-                       printf("kernel");
-               else if (port->ip_receiver == ipc_space_reply)
-                       printf("reply");
-               else if (port->ip_receiver == default_pager_space)
-                       printf("default_pager");
-               else if ((task = db_task_from_space(port->ip_receiver, &task_id)) != (db_addr_t)0)
-                       printf("task%d at 0x%x", task_id, task);
-               else
-                       printf("unknown");
-               printf(")");
-       }
-       printf(", receiver_name=0x%x\n", port->ip_receiver_name);
-
-       iprintf("mscount=%d", port->ip_mscount);
-       printf(", srights=%d", port->ip_srights);
-       printf(", sorights=%d\n", port->ip_sorights);
-
-       iprintf("nsrequest=0x%x", port->ip_nsrequest);
-       printf(", pdrequest=0x%x", port->ip_pdrequest);
-       printf(", requests=0x%x\n", port->ip_requests);
-
-       iprintf("pset_count=0x%x", port->ip_pset_count);
-       printf(", seqno=%d", port->ip_messages.imq_seqno);
-       printf(", msgcount=%d", port->ip_messages.imq_msgcount);
-       printf(", qlimit=%d\n", port->ip_messages.imq_qlimit);
-
-       iprintf("kmsgs=0x%x", port->ip_messages.imq_messages.ikmq_base);
-       printf(", rcvrs queue=0x%x", port->ip_messages.imq_wait_queue);
-       printf(", kobj=0x%x\n", port->ip_kobject);
-
-       iprintf("premsg=0x%x", port->ip_premsg);
-
-#if    MACH_ASSERT
-       /* don't bother printing callstack or queue links */
-       iprintf("ip_thread=0x%x, ip_timetrack=0x%x\n",
-               port->ip_thread, port->ip_timetrack);
-       items_printed = 0;
-       needs_db_indent = 1;
-       for (i = 0; i < IP_NSPARES; ++i) {
-               if (port->ip_spares[i] != 0) {
-                       if (needs_db_indent) {
-                               iprintf("");
-                               needs_db_indent = 0;
-                       }
-                       printf("%sip_spares[%d] = %d",
-                              items_printed ? ", " : "", i, 
-                              port->ip_spares[i]);
-                       if (++items_printed >= 4) {
-                               needs_db_indent = 1;
-                               printf("\n");
-                               items_printed = 0;
-                       }
-               }
-       }
-#endif /* MACH_ASSERT */
-
-       if (verbose) {
-               iprintf("kmsg queue contents:\n");
-               db_indent += 2;
-               nmsgs = db_port_queue_print(port);
-               db_indent -= 2;
-               iprintf("...total kmsgs:  %d\n", nmsgs);
-       }
-
-       db_indent -=2;
-}
-
-ipc_port_t
-ipc_name_to_data(
-       task_t                  task,
-       mach_port_name_t        name)
-{
-       ipc_space_t     space;
-       ipc_entry_t     entry;
-
-       if (task == TASK_NULL) {
-               db_printf("port_name_to_data: task is null\n");
-               return (0);
-       }
-       if ((space = task->itk_space) == 0) {
-               db_printf("port_name_to_data: task->itk_space is null\n");
-               return (0);
-       }
-       if (!space->is_active) {
-               db_printf("port_name_to_data: task->itk_space not active\n");
-               return (0);
-       }
-       if ((entry = ipc_entry_lookup(space, name)) == 0) {
-               db_printf("port_name_to_data: lookup yields zero\n");
-               return (0);
-       }
-       return ((ipc_port_t)entry->ie_object);
-}
-
-#if    ZONE_DEBUG
-void
-print_type_ports(type, dead)
-       unsigned type;
-       unsigned dead;
-{
-       ipc_port_t port;
-       int n;
-
-       n = 0;
-       for (port = (ipc_port_t)first_element(ipc_object_zones[IOT_PORT]);
-            port;
-            port = (ipc_port_t)next_element(ipc_object_zones[IOT_PORT], 
-                                            port))
-               if (ip_kotype(port) == type &&
-                   (!dead || !ip_active(port))) {
-                       if (++n % 5)
-                               printf("0x%x\t", port);
-                       else
-                               printf("0x%x\n", port);
-               }
-       if (n % 5)
-               printf("\n");
-}
-
-void
-print_ports(void)
-{
-       ipc_port_t port;
-       int total_port_count;
-       int space_null_count;
-       int space_kernel_count;
-       int space_reply_count;
-       int space_pager_count;
-       int space_other_count;
-
-       struct {
-               int total_count;
-               int dead_count;
-       } port_types[IKOT_MAX_TYPE];
-
-       total_port_count = 0;
-
-       bzero((char *)&port_types[0], sizeof(port_types));
-       space_null_count = 0;
-       space_kernel_count = 0;
-       space_reply_count = 0;
-       space_pager_count = 0;
-       space_other_count = 0;
-
-       for (port = (ipc_port_t)first_element(ipc_object_zones[IOT_PORT]);
-            port;
-            port = (ipc_port_t)next_element(ipc_object_zones[IOT_PORT], 
-                                            port)) {
-               total_port_count++;
-               if (ip_kotype(port) >= IKOT_MAX_TYPE) {
-                       port_types[IKOT_UNKNOWN].total_count++;
-                       if (!io_active(&port->ip_object))
-                               port_types[IKOT_UNKNOWN].dead_count++;
-               } else {
-                       port_types[ip_kotype(port)].total_count++;
-                       if (!io_active(&port->ip_object))
-                               port_types[ip_kotype(port)].dead_count++;
-               }
-
-               if (!port->ip_receiver)
-                       space_null_count++;
-               else if (port->ip_receiver == ipc_space_kernel)
-                       space_kernel_count++;
-               else if (port->ip_receiver == ipc_space_reply)
-                       space_reply_count++;
-               else if (port->ip_receiver == default_pager_space)
-                       space_pager_count++;
-               else
-                       space_other_count++;
-       }
-       printf("\n%7d   total ports\n\n", total_port_count);
-
-#define PRINT_ONE_PORT_TYPE(name) \
-       printf("%7d     %s", port_types[IKOT_##name].total_count, # name); \
-       if (port_types[IKOT_##name].dead_count) \
-            printf(" (%d dead ports)", port_types[IKOT_##name].dead_count);\
-       printf("\n");
-
-       PRINT_ONE_PORT_TYPE(NONE);
-       PRINT_ONE_PORT_TYPE(THREAD);
-       PRINT_ONE_PORT_TYPE(TASK);
-       PRINT_ONE_PORT_TYPE(HOST);
-       PRINT_ONE_PORT_TYPE(HOST_PRIV);
-       PRINT_ONE_PORT_TYPE(PROCESSOR);
-       PRINT_ONE_PORT_TYPE(PSET);
-       PRINT_ONE_PORT_TYPE(PSET_NAME);
-       PRINT_ONE_PORT_TYPE(TIMER);
-       PRINT_ONE_PORT_TYPE(PAGING_REQUEST);
-       PRINT_ONE_PORT_TYPE(MIG);
-       PRINT_ONE_PORT_TYPE(MEMORY_OBJECT);
-       PRINT_ONE_PORT_TYPE(XMM_PAGER);
-       PRINT_ONE_PORT_TYPE(XMM_KERNEL);
-       PRINT_ONE_PORT_TYPE(XMM_REPLY);
-       PRINT_ONE_PORT_TYPE(UND_REPLY);
-       PRINT_ONE_PORT_TYPE(HOST_NOTIFY);
-       PRINT_ONE_PORT_TYPE(HOST_SECURITY);
-       PRINT_ONE_PORT_TYPE(LEDGER);
-       PRINT_ONE_PORT_TYPE(MASTER_DEVICE);
-       PRINT_ONE_PORT_TYPE(TASK_NAME);
-       PRINT_ONE_PORT_TYPE(SUBSYSTEM);
-       PRINT_ONE_PORT_TYPE(IO_DONE_QUEUE);
-       PRINT_ONE_PORT_TYPE(SEMAPHORE);
-       PRINT_ONE_PORT_TYPE(LOCK_SET);
-       PRINT_ONE_PORT_TYPE(CLOCK);
-       PRINT_ONE_PORT_TYPE(CLOCK_CTRL);
-       PRINT_ONE_PORT_TYPE(IOKIT_SPARE);
-       PRINT_ONE_PORT_TYPE(NAMED_ENTRY);
-       PRINT_ONE_PORT_TYPE(IOKIT_CONNECT);
-       PRINT_ONE_PORT_TYPE(IOKIT_OBJECT);
-       PRINT_ONE_PORT_TYPE(UPL);
-       PRINT_ONE_PORT_TYPE(MEM_OBJ_CONTROL);
-
-       PRINT_ONE_PORT_TYPE(UNKNOWN);
-       printf("\nipc_space:\n\n");
-       printf("NULL    KERNEL  REPLY   PAGER   OTHER\n");
-       printf("%d      %d      %d      %d      %d\n",
-              space_null_count,
-              space_kernel_count,
-              space_reply_count,
-              space_pager_count,
-              space_other_count
-       );
-}
-
-#endif /* ZONE_DEBUG */
-
-
-/*
- *     Print out all the kmsgs in a queue.  Aggregate kmsgs with
- *     identical message ids into a single entry.  Count up the
- *     amount of inline and out-of-line data consumed by each
- *     and every kmsg.
- *
- */
-
-#define        KMSG_MATCH_FIELD(kmsg)  (kmsg->ikm_header->msgh_id)
-#define        DKQP_LONG(kmsg) FALSE
-const char     *dkqp_long_format = "(%3d) <%10d> 0x%x   %10d %10d\n";
-const char     *dkqp_format = "(%3d) <%10d> 0x%x   %10d %10d\n";
-
-int
-db_kmsg_queue_print(
-       ipc_kmsg_t      kmsg);
-int
-db_kmsg_queue_print(
-       ipc_kmsg_t      kmsg)
-{
-       ipc_kmsg_t      ikmsg, first_kmsg;
-       register int    icount;
-       mach_msg_id_t   cur_id;
-       unsigned int    inline_total, ool_total;
-       int             nmsgs;
-
-       iprintf("Count      msgh_id  kmsg addr inline bytes   ool bytes\n");
-       inline_total = ool_total = (vm_size_t) 0;
-       cur_id = KMSG_MATCH_FIELD(kmsg);
-       for (icount = 0, nmsgs = 0, first_kmsg = ikmsg = kmsg;
-            kmsg != IKM_NULL && (kmsg != first_kmsg || nmsgs == 0);
-            kmsg = kmsg->ikm_next) {
-               ++nmsgs;
-               if (!(KMSG_MATCH_FIELD(kmsg) == cur_id)) {
-                       iprintf(DKQP_LONG(kmsg) ? dkqp_long_format:dkqp_format,
-                               icount, cur_id, ikmsg, inline_total,ool_total);
-                       cur_id = KMSG_MATCH_FIELD(kmsg);
-                       icount = 1;
-                       ikmsg = kmsg;
-                       inline_total = ool_total = 0;
-               } else {
-                       icount++;
-               }
-               if (DKQP_LONG(kmsg))
-                       inline_total += kmsg->ikm_size;
-               else
-                       inline_total += kmsg->ikm_header->msgh_size;
-       }
-       iprintf(DKQP_LONG(kmsg) ? dkqp_long_format : dkqp_format,
-               icount, cur_id, ikmsg, inline_total, ool_total);
-       return nmsgs;
-}
-
-
-/*
- *     Process all of the messages on a port - prints out the
- *     number of occurences of each message type, and the first
- *     kmsg with a particular msgh_id.
- */
-int
-db_port_queue_print(
-       ipc_port_t      port)
-{
-       ipc_kmsg_t      kmsg;
-
-       if (ipc_kmsg_queue_empty(&port->ip_messages.imq_messages))
-               return 0;
-       kmsg = ipc_kmsg_queue_first(&port->ip_messages.imq_messages);
-       return db_kmsg_queue_print(kmsg);
-}
-
-
-#if    MACH_ASSERT
-#include <ddb/db_sym.h>
-#include <ddb/db_access.h>
-
-#define        FUNC_NULL       ((void (*)) 0)
-#define        MAX_REFS        5               /* bins for tracking ref counts */
-
-/*
- *     Translate port's cache of call stack pointers
- *     into symbolic names.
- */
-void
-db_port_stack_trace(
-       ipc_port_t      port)
-{
-       unsigned int    i;
-
-       for (i = 0; i < IP_CALLSTACK_MAX; ++i) {
-               iprintf("[%d] 0x%x\t", i, port->ip_callstack[i]);
-               if (port->ip_callstack[i] != 0 &&
-                   DB_VALID_KERN_ADDR(port->ip_callstack[i]))
-                       db_printsym(port->ip_callstack[i], DB_STGY_PROC);
-               printf("\n");
-       }
-}
-
-
-typedef struct port_item {
-       unsigned long   item;
-       unsigned long   count;
-} port_item;
-
-
-#define        ITEM_MAX        400
-typedef struct port_track {
-       const char      *name;
-       unsigned long   max;
-       unsigned long   warning;
-       port_item       items[ITEM_MAX];
-} port_track;
-
-port_track     port_callers;           /* match against calling addresses */
-port_track     port_threads;           /* match against allocating threads */
-port_track     port_spaces;            /* match against ipc spaces */
-
-void           port_track_init(
-                       port_track      *trackp,
-                       const char      *name);
-void           port_item_add(
-                       port_track      *trackp,
-                       unsigned long   item);
-void           port_track_sort(
-                       port_track      *trackp);
-void           port_track_print(
-                       port_track      *trackp,
-                       void            (*func)(port_item *));
-void           port_callers_print(
-                       port_item       *p);
-
-void
-port_track_init(
-       port_track      *trackp,
-       const char      *name)
-{
-       port_item       *i;
-
-       trackp->max = trackp->warning = 0;
-       trackp->name = name;
-       for (i = trackp->items; i < trackp->items + ITEM_MAX; ++i)
-               i->item = i->count = 0;
-}
-
-
-void
-port_item_add(
-       port_track      *trackp,
-       unsigned long   item)
-{
-       port_item       *limit, *i;
-
-       limit = trackp->items + trackp->max;
-       for (i = trackp->items; i < limit; ++i)
-               if (i->item == item) {
-                       i->count++;
-                       return;
-               }
-       if (trackp->max >= ITEM_MAX) {
-               if (trackp->warning++ == 0)
-                       iprintf("%s:  no room\n", trackp->name);
-               return;
-       }
-       i->item = item;
-       i->count = 1;
-       trackp->max++;
-}
-
-
-/*
- *     Simple (and slow) bubble sort.
- */
-void
-port_track_sort(
-       port_track      *trackp)
-{
-       port_item       *limit, *p;
-       port_item       temp;
-       boolean_t       unsorted;
-
-       limit = trackp->items + trackp->max - 1;
-       do {
-               unsorted = FALSE;
-               for (p = trackp->items; p < limit - 1; ++p) {
-                       if (p->count < (p+1)->count) {
-                               temp = *p;
-                               *p = *(p+1);
-                               *(p+1) = temp;
-                               unsorted = TRUE;
-                       }
-               }
-       } while (unsorted == TRUE);
-}
-
-
-void
-port_track_print(
-       port_track      *trackp,
-       void            (*func)(port_item *))
-{
-       port_item       *limit, *p;
-
-       limit = trackp->items + trackp->max;
-       iprintf("%s:\n", trackp->name);
-       for (p = trackp->items; p < limit; ++p) {
-               if (func != FUNC_NULL)
-                       (*func)(p);
-               else
-                       iprintf("0x%x\t%8d\n", p->item, p->count);
-       }
-}
-
-
-void
-port_callers_print(
-       port_item       *p)
-{
-       iprintf("0x%x\t%8d\t", p->item, p->count);
-       db_printsym(p->item, DB_STGY_PROC);
-       printf("\n");
-}
-
-
-/*
- *     Show all ports with a given reference count.
- */
-void
-db_ref(
-       int             refs)
-{
-       db_port_walk(1, 1, 1, refs);
-}
-
-
-/*
- *     Examine all currently allocated ports.
- *     Options:
- *             verbose         display suspicious ports
- *             display         print out each port encountered
- *             ref_search      restrict examination to ports with
- *                             a specified reference count
- *             ref_target      reference count for ref_search
- */
-int
-db_port_walk(
-       unsigned int    verbose,
-       unsigned int    display,
-       unsigned int    ref_search,
-       unsigned int    ref_target)
-{
-       ipc_port_t      port;
-       unsigned int    ref_overflow, refs, i, ref_inactive_overflow;
-       unsigned int    no_receiver, no_match;
-       unsigned int    ref_counts[MAX_REFS];
-       unsigned int    inactive[MAX_REFS];
-       unsigned int    ipc_ports = 0;
-
-       iprintf("Allocated port count is %d\n", port_count);
-       no_receiver = no_match = ref_overflow = 0;
-       ref_inactive_overflow = 0;
-       for (i = 0; i < MAX_REFS; ++i) {
-               ref_counts[i] = 0;
-               inactive[i] = 0;
-       }
-       port_track_init(&port_callers, "port callers");
-       port_track_init(&port_threads, "port threads");
-       port_track_init(&port_spaces, "port spaces");
-       if (ref_search)
-               iprintf("Walking ports of ref_count=%d.\n", ref_target);
-       else
-               iprintf("Walking all ports.\n");
-
-       queue_iterate(&port_alloc_queue, port, ipc_port_t, ip_port_links) {
-               const char *port_type;
-
-               port_type = " IPC port";
-               if (ip_active(port))
-                 ipc_ports++;
-
-               refs = port->ip_references;
-               if (ref_search && refs != ref_target)
-                       continue;
-
-               if (refs >= MAX_REFS) {
-                       if (ip_active(port))
-                               ++ref_overflow;
-                       else
-                               ++ref_inactive_overflow;
-               } else {
-                       if (refs == 0 && verbose)
-                               iprintf("%s 0x%x has ref count of zero!\n",
-                                       port_type, port);
-                       if (ip_active(port))
-                               ref_counts[refs]++;
-                       else
-                               inactive[refs]++;
-               }
-               port_item_add(&port_threads, (unsigned long) port->ip_thread);
-               for (i = 0; i < IP_CALLSTACK_MAX; ++i) {
-                       if (port->ip_callstack[i] != 0 &&
-                           DB_VALID_KERN_ADDR(port->ip_callstack[i]))
-                               port_item_add(&port_callers,
-                                             port->ip_callstack[i]);
-               }
-               if (!ip_active(port)) {
-                       if (verbose)
-                               iprintf("%s 0x%x, inactive, refcnt %d\n",
-                                       port_type, port, refs);
-                       continue;
-               }
-
-               if (port->ip_receiver_name == MACH_PORT_NULL) {
-                       iprintf("%s  0x%x, no receiver, refcnt %d\n",
-                               port, refs);
-                       ++no_receiver;
-                       continue;
-               }
-               if (port->ip_receiver == ipc_space_kernel ||
-                   port->ip_receiver == ipc_space_reply ||
-                   ipc_entry_lookup(port->ip_receiver,
-                                       port->ip_receiver_name) 
-                                       != IE_NULL) {
-                       port_item_add(&port_spaces,
-                                     (unsigned long)port->ip_receiver);
-                       if (display) {
-                               iprintf( "%s 0x%x time 0x%x ref_cnt %d\n",
-                                               port_type, port,
-                                               port->ip_timetrack, refs);
-                       }
-                       continue;
-               }
-               iprintf("%s 0x%x, rcvr 0x%x, name 0x%x, ref %d, no match\n",
-                               port_type, port, port->ip_receiver,
-                               port->ip_receiver_name, refs);
-               ++no_match;
-       }
-       iprintf("Active port type summary:\n");
-       iprintf("\tlocal  IPC %6d\n", ipc_ports);
-       iprintf("summary:\tcallers %d threads %d spaces %d\n",
-               port_callers.max, port_threads.max, port_spaces.max);
-
-       iprintf("\tref_counts:\n");
-       for (i = 0; i < MAX_REFS; ++i)
-               iprintf("\t  ref_counts[%d] = %d\n", i, ref_counts[i]);
-
-       iprintf("\t%d ports w/o receivers, %d w/o matches\n",
-               no_receiver, no_match);
-
-       iprintf("\tinactives:");
-       if ( ref_inactive_overflow || inactive[0] || inactive[1] ||
-            inactive[2] || inactive[3] || inactive[4] )
-               printf(" [0]=%d [1]=%d [2]=%d [3]=%d [4]=%d [5+]=%d\n",
-                       inactive[0], inactive[1], inactive[2],
-                       inactive[3], inactive[4], ref_inactive_overflow);
-       else
-               printf(" No inactive ports.\n");
-
-       port_track_sort(&port_spaces);
-       port_track_print(&port_spaces, FUNC_NULL);
-       port_track_sort(&port_threads);
-       port_track_print(&port_threads, FUNC_NULL);
-       port_track_sort(&port_callers);
-       port_track_print(&port_callers, port_callers_print);
-       return 0;
-}
-
-
-#endif /* MACH_ASSERT */
-
-#endif /* MACH_KDB */
index 34aab79d821ce4288c45b219f79221bc1d5cbe5f..100195f7b0ae24de76f42a0115d53bab051fa023 100644 (file)
@@ -186,7 +186,6 @@ struct ipc_port {
 #define        ip_lock(port)           io_lock(&(port)->ip_object)
 #define        ip_lock_try(port)       io_lock_try(&(port)->ip_object)
 #define        ip_unlock(port)         io_unlock(&(port)->ip_object)
-#define        ip_check_unlock(port)   io_check_unlock(&(port)->ip_object)
 
 #define        ip_reference(port)      io_reference(&(port)->ip_object)
 #define        ip_release(port)        io_release(&(port)->ip_object)
@@ -259,6 +258,7 @@ extern lck_attr_t   ipc_lck_attr;
  *     when it is taken.
  */
 
+#if 1
 decl_lck_mtx_data(extern,ipc_port_multiple_lock_data)
 extern lck_mtx_ext_t   ipc_port_multiple_lock_data_ext;
 
@@ -270,6 +270,18 @@ extern lck_mtx_ext_t       ipc_port_multiple_lock_data_ext;
 
 #define        ipc_port_multiple_unlock()                                      \
                lck_mtx_unlock(&ipc_port_multiple_lock_data)
+#else
+lck_spin_t ipc_port_multiple_lock_data;
+
+#define        ipc_port_multiple_lock_init()                                   \
+               lck_spin_init(&ipc_port_multiple_lock_data, &ipc_lck_grp, &ipc_lck_attr)
+
+#define        ipc_port_multiple_lock()                                        \
+               lck_spin_lock(&ipc_port_multiple_lock_data)
+
+#define        ipc_port_multiple_unlock()                                      \
+               lck_spin_unlock(&ipc_port_multiple_lock_data)
+#endif
 
 /*
  *     The port timestamp facility provides timestamps
@@ -277,20 +289,8 @@ extern lck_mtx_ext_t       ipc_port_multiple_lock_data_ext;
  *     mach_port_names with port death.
  */
 
-decl_lck_mtx_data(extern,ipc_port_timestamp_lock_data)
-extern lck_mtx_ext_t   ipc_port_timestamp_lock_data_ext;
-
 extern ipc_port_timestamp_t ipc_port_timestamp_data;
 
-#define        ipc_port_timestamp_lock_init()                                  \
-               lck_mtx_init_ext(&ipc_port_timestamp_lock_data, &ipc_port_timestamp_lock_data_ext, &ipc_lck_grp, &ipc_lck_attr)
-
-#define        ipc_port_timestamp_lock()                                       \
-               lck_mtx_lock(&ipc_port_timestamp_lock_data)
-
-#define        ipc_port_timestamp_unlock()                                     \
-               lck_mtx_unlock(&ipc_port_timestamp_lock_data)
-
 /* Retrieve a port timestamp value */
 extern ipc_port_timestamp_t ipc_port_timestamp(void);
 
@@ -385,7 +385,8 @@ MACRO_END
 
 /* Prepare a receive right for transmission/destruction */
 extern void ipc_port_clear_receiver(
-       ipc_port_t              port);
+       ipc_port_t              port,
+       queue_t                 links);
 
 /* Initialize a newly-allocated port */
 extern void ipc_port_init(
@@ -454,10 +455,20 @@ extern mach_port_name_t ipc_port_copyout_send(
 extern void ipc_port_release_send(
        ipc_port_t      port);
 
+extern void ipc_port_reference(
+       ipc_port_t port);
+
+extern void ipc_port_release(
+       ipc_port_t port);
+
 #endif /* KERNEL_PRIVATE */
 
 #if MACH_KERNEL_PRIVATE
 
+/* Make a naked send-once right from a locked and active receive right */
+extern ipc_port_t ipc_port_make_sonce_locked(
+       ipc_port_t      port);
+
 /* Make a naked send-once right from a receive right */
 extern ipc_port_t ipc_port_make_sonce(
        ipc_port_t      port);
@@ -502,12 +513,6 @@ extern void ipc_port_debug_init(void);
 #define        ipc_port_dealloc_reply(port)    \
                ipc_port_dealloc_special((port), ipc_space_reply)
 
-#define        ipc_port_reference(port)        \
-               ipc_object_reference(&(port)->ip_object)
-
-#define        ipc_port_release(port)          \
-               ipc_object_release(&(port)->ip_object)
-
 #endif /* MACH_KERNEL_PRIVATE */
 
 #endif /* _IPC_IPC_PORT_H_ */
diff --git a/osfmk/ipc/ipc_print.h b/osfmk/ipc/ipc_print.h
deleted file mode 100644 (file)
index 8365a1e..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef _IPC_PRINT_H_
-#define        _IPC_PRINT_H_
-
-#if     MACH_KDB
-
-#include <mach_kdb.h>
-
-#include <mach/mach_types.h>
-#include <mach/message.h>
-#include <ipc/ipc_types.h>
-#include <ddb/db_expr.h>
-
-extern void ipc_pset_print(
-                       ipc_pset_t      pset);
-
-extern void ipc_port_print(
-                       ipc_port_t      port,
-                       boolean_t       have_addr,
-                       db_expr_t       count,
-                       char            *modif);
-
-extern void    ipc_kmsg_print(
-                       ipc_kmsg_t      kmsg);
-
-extern void    ipc_msg_print(
-               mach_msg_header_t       *msgh);
-
-extern ipc_port_t ipc_name_to_data(
-                       task_t                  task,
-                       mach_port_name_t        name);
-
-#endif  /* MACH_KDB */
-
-#endif /* IPC_PRINT_H */
index 4625271196fe73e095675dc250f5aef0f5d25b81..e2e6a65987c24f2dc13efbf5a226f0dafcb47293 100644 (file)
@@ -72,7 +72,6 @@
 #include <ipc/ipc_right.h>
 #include <ipc/ipc_space.h>
 #include <ipc/ipc_port.h>
-#include <ipc/ipc_print.h>
 
 #include <kern/kern_types.h>
 #include <kern/spl.h>
@@ -186,15 +185,16 @@ ipc_pset_member(
 
 kern_return_t
 ipc_pset_add(
-       ipc_pset_t      pset,
-       ipc_port_t      port)
+       ipc_pset_t        pset,
+       ipc_port_t        port,
+       wait_queue_link_t wql)
 {
        kern_return_t kr;
 
        assert(ips_active(pset));
        assert(ip_active(port));
        
-       kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages);
+       kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages, wql);
 
        if (kr == KERN_SUCCESS)
                port->ip_pset_count++;
@@ -216,8 +216,9 @@ ipc_pset_add(
 
 kern_return_t
 ipc_pset_remove(
-       ipc_pset_t      pset,
-       ipc_port_t      port)
+       ipc_pset_t        pset,
+       ipc_port_t        port,
+       wait_queue_link_t *wqlp)
 {
        kern_return_t kr;
 
@@ -226,7 +227,7 @@ ipc_pset_remove(
        if (port->ip_pset_count == 0)
                return KERN_NOT_IN_SET;
 
-       kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages);
+       kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages, wqlp);
 
        if (kr == KERN_SUCCESS)
                port->ip_pset_count--;
@@ -244,7 +245,8 @@ ipc_pset_remove(
 
 kern_return_t
 ipc_pset_remove_from_all(
-       ipc_port_t      port)
+       ipc_port_t      port,
+       queue_t         links)
 {
        assert(ip_active(port));
        
@@ -254,7 +256,7 @@ ipc_pset_remove_from_all(
        /* 
         * Remove the port's mqueue from all sets
         */
-       ipc_mqueue_remove_from_all(&port->ip_messages);
+       ipc_mqueue_remove_from_all(&port->ip_messages, links);
        port->ip_pset_count = 0;
        return KERN_SUCCESS;
 }
@@ -275,6 +277,11 @@ ipc_pset_destroy(
        ipc_pset_t      pset)
 {
        spl_t           s;
+       queue_head_t link_data;
+       queue_t links = &link_data;
+       wait_queue_link_t wql;
+
+       queue_init(links);
 
        assert(ips_active(pset));
 
@@ -283,7 +290,7 @@ ipc_pset_destroy(
        /*
         * remove all the member message queues
         */
-       ipc_mqueue_remove_all(&pset->ips_messages);
+       ipc_mqueue_remove_all(&pset->ips_messages, links);
        
        /*
         * Set all waiters on the portset running to
@@ -295,8 +302,14 @@ ipc_pset_destroy(
        imq_unlock(&pset->ips_messages);
        splx(s);
 
-       ips_release(pset);      /* consume the ref our caller gave us */
-       ips_check_unlock(pset);
+       ips_unlock(pset);
+       ips_release(pset);       /* consume the ref our caller gave us */
+
+       while(!queue_empty(links)) {
+               wql = (wait_queue_link_t) dequeue(links);
+               wait_queue_link_free(wql);
+       }
+
 }
 
 /* Kqueue EVFILT_MACHPORT support */
@@ -321,6 +334,7 @@ filt_machportattach(
         struct knote *kn)
 {
         mach_port_name_t        name = (mach_port_name_t)kn->kn_kevent.ident;
+       wait_queue_link_t       wql = wait_queue_link_allocate();
         ipc_pset_t              pset = IPS_NULL;
         int                     result = ENOSYS;
         kern_return_t           kr;
@@ -329,24 +343,28 @@ filt_machportattach(
                                   MACH_PORT_RIGHT_PORT_SET,
                                   (ipc_object_t *)&pset);
         if (kr != KERN_SUCCESS) {
-                result = (kr == KERN_INVALID_NAME ? ENOENT : ENOTSUP);
-                goto done;
+               wait_queue_link_free(wql);
+                return (kr == KERN_INVALID_NAME ? ENOENT : ENOTSUP);
         }
         /* We've got a lock on pset */
 
-       /* keep a reference for the knote */
-       kn->kn_ptr.p_pset = pset; 
-       ips_reference(pset);
-
        /* 
         * Bind the portset wait queue directly to knote/kqueue.
         * This allows us to just use wait_queue foo to effect a wakeup,
         * rather than having to call knote() from the Mach code on each
         * message.
         */
-       result = knote_link_wait_queue(kn, &pset->ips_messages.imq_wait_queue);
+       result = knote_link_wait_queue(kn, &pset->ips_messages.imq_wait_queue, wql);
+       if (result == 0) {
+               /* keep a reference for the knote */
+               kn->kn_ptr.p_pset = pset; 
+               ips_reference(pset);
+               ips_unlock(pset);
+               return 0;
+       }
+
        ips_unlock(pset);
-done:
+       wait_queue_link_free(wql);
        return result;
 }
 
@@ -355,16 +373,19 @@ filt_machportdetach(
         struct knote *kn)
 {
         ipc_pset_t              pset = kn->kn_ptr.p_pset;
+       wait_queue_link_t       wql = WAIT_QUEUE_LINK_NULL;
 
        /*
         * Unlink the portset wait queue from knote/kqueue,
         * and release our reference on the portset.
         */
        ips_lock(pset);
-       knote_unlink_wait_queue(kn, &pset->ips_messages.imq_wait_queue);
-       ips_release(kn->kn_ptr.p_pset);
-       kn->kn_ptr.p_pset = IPS_NULL; 
-       ips_check_unlock(pset);
+       (void)knote_unlink_wait_queue(kn, &pset->ips_messages.imq_wait_queue, &wql);
+       kn->kn_ptr.p_pset = IPS_NULL;
+       ips_unlock(pset);
+       ips_release(pset);
+       if (wql != WAIT_QUEUE_LINK_NULL)
+               wait_queue_link_free(wql);
 }
 
 static int
@@ -393,8 +414,9 @@ filt_machport(
        if (kr != KERN_SUCCESS || pset != kn->kn_ptr.p_pset || !ips_active(pset)) {
                kn->kn_data = 0;
                kn->kn_flags |= (EV_EOF | EV_ONESHOT);
-               if (pset != IPS_NULL)
-                       ips_check_unlock(pset);
+               if (pset != IPS_NULL) {
+                       ips_unlock(pset);
+               }
                return(1);
         }
 
@@ -448,7 +470,7 @@ filt_machport(
         * portset and return zero.
         */
        if (self->ith_state == MACH_RCV_TIMED_OUT) {
-               ipc_pset_release(pset);
+               ips_release(pset);
                return 0;
        }
 
@@ -461,7 +483,7 @@ filt_machport(
                assert(self->ith_state == MACH_RCV_TOO_LARGE);
                assert(self->ith_kmsg == IKM_NULL);
                kn->kn_data = self->ith_receiver_name;
-               ipc_pset_release(pset);
+               ips_release(pset);
                return 1;
        }
 
@@ -523,55 +545,3 @@ filt_machportpeek(struct knote *kn)
 
        return (ipc_mqueue_peek(set_mq));
 }
-
-
-#include <mach_kdb.h>
-#if    MACH_KDB
-
-#include <ddb/db_output.h>
-
-#define        printf  kdbprintf
-
-int
-ipc_list_count(
-       struct ipc_kmsg *base)
-{
-       register int count = 0;
-
-       if (base) {
-               struct ipc_kmsg *kmsg = base;
-
-               ++count;
-               while (kmsg && kmsg->ikm_next != base
-                           && kmsg->ikm_next != IKM_BOGUS){
-                       kmsg = kmsg->ikm_next;
-                       ++count;
-               }
-       }
-       return(count);
-}
-
-/*
- *     Routine:        ipc_pset_print
- *     Purpose:
- *             Pretty-print a port set for kdb.
- */
-void
-ipc_pset_print(
-       ipc_pset_t      pset)
-{
-       printf("pset 0x%x\n", pset);
-
-       db_indent += 2;
-
-       ipc_object_print(&pset->ips_object);
-       iprintf("local_name = 0x%x\n", pset->ips_local_name);
-       iprintf("%d kmsgs => 0x%x",
-               ipc_list_count(pset->ips_messages.imq_messages.ikmq_base),
-               pset->ips_messages.imq_messages.ikmq_base);
-       printf(",rcvrs queue= 0x%x\n", &pset->ips_messages.imq_wait_queue);
-
-       db_indent -=2;
-}
-
-#endif /* MACH_KDB */
index 26c1f26c01f4ee83dae3aaf6c80efc86a372fc7d..5dd8af59352531915f46d43043e84127d1bfac32 100644 (file)
@@ -74,8 +74,6 @@
 #include <ipc/ipc_object.h>
 #include <ipc/ipc_mqueue.h>
 
-#include <mach_kdb.h>
-
 struct ipc_pset {
 
        /*
@@ -93,7 +91,6 @@ struct ipc_pset {
 #define        ips_lock(pset)          io_lock(&(pset)->ips_object)
 #define        ips_lock_try(pset)      io_lock_try(&(pset)->ips_object)
 #define        ips_unlock(pset)        io_unlock(&(pset)->ips_object)
-#define        ips_check_unlock(pset)  io_check_unlock(&(pset)->ips_object)
 #define        ips_reference(pset)     io_reference(&(pset)->ips_object)
 #define        ips_release(pset)       io_release(&(pset)->ips_object)
 
@@ -112,7 +109,8 @@ extern kern_return_t ipc_pset_alloc_name(
 /* Add a port to a port set */
 extern kern_return_t ipc_pset_add(
        ipc_pset_t      pset,
-       ipc_port_t      port);
+       ipc_port_t      port,
+       wait_queue_link_t wql);
 
 /* determine if port is a member of set */
 extern boolean_t ipc_pset_member(
@@ -122,25 +120,16 @@ extern boolean_t ipc_pset_member(
 /* Remove a port from a port set */
 extern kern_return_t ipc_pset_remove(
        ipc_pset_t      pset,
-       ipc_port_t      port);
+       ipc_port_t      port,
+       wait_queue_link_t *wqlp);
 
 /* Remove a port from all its current port sets */
 extern kern_return_t ipc_pset_remove_from_all(
-       ipc_port_t      port);
+       ipc_port_t      port,
+       queue_t         links);
 
 /* Destroy a port_set */
 extern void ipc_pset_destroy(
        ipc_pset_t      pset);
 
-#define        ipc_pset_reference(pset)        \
-               ipc_object_reference(&(pset)->ips_object)
-
-#define        ipc_pset_release(pset)          \
-               ipc_object_release(&(pset)->ips_object)
-
-
-#if    MACH_KDB
-int ipc_list_count(struct ipc_kmsg *base);
-#endif /* MACH_KDB */
-
 #endif /* _IPC_IPC_PSET_H_ */
index d3db278b84362eb964dab7afe0864ef69644c97b..46d7f1ec7e821df8f6f3a6e26583c55c741faf6f 100644 (file)
@@ -112,7 +112,7 @@ ipc_right_lookup_write(
 
        is_write_lock(space);
 
-       if (!space->is_active) {
+       if (!is_active(space)) {
                is_write_unlock(space);
                return KERN_INVALID_TASK;
        }
@@ -154,7 +154,7 @@ ipc_right_lookup_two_write(
 
        is_write_lock(space);
 
-       if (!space->is_active) {
+       if (!is_active(space)) {
                is_write_unlock(space);
                return KERN_INVALID_TASK;
        }
@@ -197,7 +197,7 @@ ipc_right_reverse(
 
        /* would switch on io_otype to handle multiple types of object */
 
-       assert(space->is_active);
+       assert(is_active(space));
        assert(io_otype(object) == IOT_PORT);
 
        port = (ipc_port_t) object;
@@ -273,6 +273,8 @@ ipc_right_request_alloc(
        kern_return_t kr;
 
        for (;;) {
+               ipc_port_t port = IP_NULL;
+
                kr = ipc_right_lookup_write(space, name, &entry);
                if (kr != KERN_SUCCESS)
                        return kr;
@@ -291,7 +293,6 @@ ipc_right_request_alloc(
                /* see if the entry is of proper type for requests */
                if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) {
                        ipc_port_request_index_t new_request;
-                       ipc_port_t port;
 
                        port = (ipc_port_t) entry->ie_object;
                        assert(port != IP_NULL);
@@ -305,6 +306,7 @@ ipc_right_request_alloc(
                                                previous = ipc_port_request_cancel(port, name, prev_request);
                                        ip_unlock(port);
                                        entry->ie_request = IE_REQ_NONE;
+                                       ipc_entry_modified(space, name, entry);
                                        is_write_unlock(space);
                                        break;
                                }
@@ -320,6 +322,7 @@ ipc_right_request_alloc(
                                                previous = ipc_port_request_cancel(port, name, prev_request);
                                        ip_unlock(port);
                                        entry->ie_request = IE_REQ_NONE;
+                                       ipc_entry_modified(space, name, entry);
                                        is_write_unlock(space);
 
                                        ipc_notify_send_possible(notify, name);
@@ -352,6 +355,7 @@ ipc_right_request_alloc(
                                assert(new_request != IE_REQ_NONE);
                                ip_unlock(port);
                                entry->ie_request = new_request;
+                               ipc_entry_modified(space, name, entry);
                                is_write_unlock(space);
                                break;
                        }
@@ -368,18 +372,28 @@ ipc_right_request_alloc(
 
                        if (MACH_PORT_UREFS_OVERFLOW(urefs, 1)) {
                                is_write_unlock(space);
+                               if (port != IP_NULL)
+                                       ip_release(port);
                                return KERN_UREFS_OVERFLOW;
                        }
 
                        (entry->ie_bits)++; /* increment urefs */
+                       ipc_entry_modified(space, name, entry);
                        is_write_unlock(space);
 
+                       if (port != IP_NULL)
+                               ip_release(port);
+
                        ipc_notify_dead_name(notify, name);
                        previous = IP_NULL;
                        break;
                }
 
                is_write_unlock(space);
+
+               if (port != IP_NULL)
+                       ip_release(port);
+
                if (entry->ie_bits & MACH_PORT_TYPE_PORT_OR_DEAD)
                        return KERN_INVALID_ARGUMENT;
                else
@@ -417,6 +431,7 @@ ipc_right_request_cancel(
 
        previous = ipc_port_request_cancel(port, name, entry->ie_request);
        entry->ie_request = IE_REQ_NONE;
+       ipc_entry_modified(space, name, entry);
        return previous;
 }
 
@@ -451,8 +466,10 @@ ipc_right_inuse(
  *     Conditions:
  *             The space is write-locked; the port is not locked.
  *             If returns FALSE, the port is also locked and active.
- *             Otherwise, entry is converted to a dead name, freeing
- *             a reference to port.
+ *             Otherwise, entry is converted to a dead name.
+ *
+ *             Caller is responsible for a reference to port if it
+ *             had died (returns TRUE).
  */
 
 boolean_t
@@ -464,7 +481,7 @@ ipc_right_check(
 {
        ipc_entry_bits_t bits;
 
-       assert(space->is_active);
+       assert(is_active(space));
        assert(port == (ipc_port_t) entry->ie_object);
 
        ip_lock(port);
@@ -518,16 +535,14 @@ ipc_right_check(
        }
        entry->ie_bits = bits;
        entry->ie_object = IO_NULL;
-
-       ipc_port_release(port);
-
+       ipc_entry_modified(space, name, entry);
        return TRUE;
 }
 
 /*
- *     Routine:        ipc_right_clean
+ *     Routine:        ipc_right_terminate
  *     Purpose:
- *             Cleans up an entry in a dead space.
+ *             Cleans up an entry in a terminated space.
  *             The entry isn't deallocated or removed
  *             from reverse hash tables.
  *     Conditions:
@@ -535,7 +550,7 @@ ipc_right_check(
  */
 
 void
-ipc_right_clean(
+ipc_right_terminate(
        ipc_space_t             space,
        mach_port_name_t        name,
        ipc_entry_t             entry)
@@ -546,7 +561,7 @@ ipc_right_clean(
        bits = entry->ie_bits;
        type = IE_BITS_TYPE(bits);
 
-       assert(!space->is_active);
+       assert(!is_active(space));
 
        /*
         *      IE_BITS_COMPAT/ipc_right_dncancel doesn't have this
@@ -571,7 +586,6 @@ ipc_right_clean(
 
                ips_lock(pset);
                assert(ips_active(pset));
-
                ipc_pset_destroy(pset); /* consumes ref, unlocks */
                break;
            }
@@ -589,8 +603,8 @@ ipc_right_clean(
                ip_lock(port);
 
                if (!ip_active(port)) {
+                       ip_unlock(port);
                        ip_release(port);
-                       ip_check_unlock(port);
                        break;
                }
 
@@ -610,11 +624,21 @@ ipc_right_clean(
                }
 
                if (type & MACH_PORT_TYPE_RECEIVE) {
+                       wait_queue_link_t wql;
+                       queue_head_t links_data;
+                       queue_t links = &links_data;
+
                        assert(port->ip_receiver_name == name);
                        assert(port->ip_receiver == space);
 
-                       ipc_port_clear_receiver(port);
+                       queue_init(links);
+                       ipc_port_clear_receiver(port, links);
                        ipc_port_destroy(port); /* consumes our ref, unlocks */
+                       while(!queue_empty(links)) {
+                               wql = (wait_queue_link_t) dequeue(links);
+                               wait_queue_link_free(wql);
+                       }
+
                } else if (type & MACH_PORT_TYPE_SEND_ONCE) {
                        assert(port->ip_sorights > 0);
                        ip_unlock(port);
@@ -623,8 +647,8 @@ ipc_right_clean(
                } else {
                        assert(port->ip_receiver != space);
 
-                       ip_release(port);
-                       ip_unlock(port); /* port is active */
+                       ip_unlock(port);
+                       ip_release(port);                       
                }
 
                if (nsrequest != IP_NULL)
@@ -636,7 +660,7 @@ ipc_right_clean(
            }
 
            default:
-               panic("ipc_right_clean: strange type - 0x%x", type);
+               panic("ipc_right_terminate: strange type - 0x%x", type);
        }
 }
 
@@ -645,7 +669,7 @@ ipc_right_clean(
  *     Purpose:
  *             Destroys an entry in a space.
  *     Conditions:
- *             The space is write-locked.
+ *             The space is write-locked (returns unlocked).
  *             The space must be active.
  *     Returns:
  *             KERN_SUCCESS            The entry was destroyed.
@@ -664,7 +688,7 @@ ipc_right_destroy(
        entry->ie_bits &= ~IE_BITS_TYPE_MASK;
        type = IE_BITS_TYPE(bits);
 
-       assert(space->is_active);
+       assert(is_active(space));
 
        switch (type) {
            case MACH_PORT_TYPE_DEAD_NAME:
@@ -672,6 +696,7 @@ ipc_right_destroy(
                assert(entry->ie_object == IO_NULL);
 
                ipc_entry_dealloc(space, name, entry);
+               is_write_unlock(space);
                break;
 
            case MACH_PORT_TYPE_PORT_SET: {
@@ -684,8 +709,9 @@ ipc_right_destroy(
                ipc_entry_dealloc(space, name, entry);
 
                ips_lock(pset);
-               assert(ips_active(pset));
+               is_write_unlock(space);
 
+               assert(ips_active(pset));
                ipc_pset_destroy(pset); /* consumes ref, unlocks */
                break;
            }
@@ -709,13 +735,12 @@ ipc_right_destroy(
 
                if (!ip_active(port)) {
                        assert((type & MACH_PORT_TYPE_RECEIVE) == 0);
-                       ip_release(port);
-                       ip_check_unlock(port);
-
+                       ip_unlock(port);
                        entry->ie_request = IE_REQ_NONE;
                        entry->ie_object = IO_NULL;
                        ipc_entry_dealloc(space, name, entry);
-
+                       is_write_unlock(space);
+                       ip_release(port);
                        break;
                }
 
@@ -723,6 +748,7 @@ ipc_right_destroy(
 
                entry->ie_object = IO_NULL;
                ipc_entry_dealloc(space, name, entry);
+               is_write_unlock(space);
 
                if (type & MACH_PORT_TYPE_SEND) {
                        assert(port->ip_srights > 0);
@@ -736,11 +762,21 @@ ipc_right_destroy(
                }
 
                if (type & MACH_PORT_TYPE_RECEIVE) {
+                       queue_head_t links_data;
+                       queue_t links = &links_data;
+                       wait_queue_link_t wql;
+
                        assert(ip_active(port));
                        assert(port->ip_receiver == space);
 
-                       ipc_port_clear_receiver(port);
+                       queue_init(links);
+                       ipc_port_clear_receiver(port, links);
                        ipc_port_destroy(port); /* consumes our ref, unlocks */
+                       while(!queue_empty(links)) {
+                               wql = (wait_queue_link_t) dequeue(links);
+                               wait_queue_link_free(wql);
+                       }
+
                } else if (type & MACH_PORT_TYPE_SEND_ONCE) {
                        assert(port->ip_sorights > 0);
                        ip_unlock(port);
@@ -749,8 +785,8 @@ ipc_right_destroy(
                } else {
                        assert(port->ip_receiver != space);
 
-                       ip_release(port);
                        ip_unlock(port);
+                       ip_release(port);
                }
 
                if (nsrequest != IP_NULL)
@@ -788,7 +824,7 @@ ipc_right_dealloc(
        mach_port_name_t        name,
        ipc_entry_t             entry)
 {
-
+       ipc_port_t port = IP_NULL;
        ipc_entry_bits_t bits;
        mach_port_type_t type;
 
@@ -796,7 +832,7 @@ ipc_right_dealloc(
        type = IE_BITS_TYPE(bits);
 
 
-       assert(space->is_active);
+       assert(is_active(space));
 
        switch (type) {
            case MACH_PORT_TYPE_DEAD_NAME: {
@@ -808,16 +844,20 @@ ipc_right_dealloc(
 
                if (IE_BITS_UREFS(bits) == 1) {
                        ipc_entry_dealloc(space, name, entry);
-               }
-               else
+               } else {
                        entry->ie_bits = bits-1; /* decrement urefs */
-
+                       ipc_entry_modified(space, name, entry);
+               }
                is_write_unlock(space);
+
+               /* release any port that got converted to dead name below */
+               if (port != IP_NULL)
+                       ip_release(port);
                break;
            }
 
            case MACH_PORT_TYPE_SEND_ONCE: {
-               ipc_port_t port, request;
+               ipc_port_t request;
 
                assert(IE_BITS_UREFS(bits) == 1);
 
@@ -828,7 +868,7 @@ ipc_right_dealloc(
 
                        bits = entry->ie_bits;
                        assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
-                       goto dead_name;
+                       goto dead_name;     /* it will release port */
                }
                /* port is locked and active */
 
@@ -850,7 +890,6 @@ ipc_right_dealloc(
            }
 
            case MACH_PORT_TYPE_SEND: {
-               ipc_port_t port;
                ipc_port_t request = IP_NULL;
                ipc_port_t nsrequest = IP_NULL;
                mach_port_mscount_t mscount =  0;
@@ -864,7 +903,7 @@ ipc_right_dealloc(
                if (ipc_right_check(space, port, name, entry)) {
                        bits = entry->ie_bits;
                        assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
-                       goto dead_name;
+                       goto dead_name;     /* it will release port */
                }
                /* port is locked and active */
 
@@ -884,16 +923,19 @@ ipc_right_dealloc(
                        ipc_hash_delete(space, (ipc_object_t) port,
                                        name, entry);
 
-                       ip_release(port);
+                       ip_unlock(port);
                        entry->ie_object = IO_NULL;
                        ipc_entry_dealloc(space, name, entry);
+                       is_write_unlock(space);
+                       ip_release(port);
 
-               } else
+               } else {
+                       ip_unlock(port);                        
                        entry->ie_bits = bits-1; /* decrement urefs */
-
-               /* even if dropped a ref, port is active */
-               ip_unlock(port);
-               is_write_unlock(space);
+                       ipc_entry_modified(space, name, entry);
+                       is_write_unlock(space);
+               }
+               
 
                if (nsrequest != IP_NULL)
                        ipc_notify_no_senders(nsrequest, mscount);
@@ -904,7 +946,6 @@ ipc_right_dealloc(
            }
 
            case MACH_PORT_TYPE_SEND_RECEIVE: {
-               ipc_port_t port;
                ipc_port_t nsrequest = IP_NULL;
                mach_port_mscount_t mscount = 0;
 
@@ -934,6 +975,8 @@ ipc_right_dealloc(
                        entry->ie_bits = bits-1; /* decrement urefs */
 
                ip_unlock(port);
+
+               ipc_entry_modified(space, name, entry);
                is_write_unlock(space);
 
                if (nsrequest != IP_NULL)
@@ -972,6 +1015,7 @@ ipc_right_delta(
        mach_port_right_t       right,
        mach_port_delta_t       delta)
 {
+       ipc_port_t port = IP_NULL;
        ipc_entry_bits_t bits;
        
        bits = entry->ie_bits;
@@ -985,7 +1029,7 @@ ipc_right_delta(
  *     we postpone doing so when we are holding the space lock.
  */
 
-       assert(space->is_active);
+       assert(is_active(space));
        assert(right < MACH_PORT_RIGHT_NUMBER);
 
        /* Rights-specific restrictions and operations. */
@@ -1010,12 +1054,9 @@ ipc_right_delta(
                pset = (ipc_pset_t) entry->ie_object;
                assert(pset != IPS_NULL);
 
-
-
                entry->ie_object = IO_NULL;
                ipc_entry_dealloc(space, name, entry);
 
-
                ips_lock(pset);
                assert(ips_active(pset));
                is_write_unlock(space);
@@ -1025,8 +1066,10 @@ ipc_right_delta(
            }
 
            case MACH_PORT_RIGHT_RECEIVE: {
-               ipc_port_t port;
                ipc_port_t request = IP_NULL;
+               queue_head_t links_data;
+               queue_t links = &links_data;
+               wait_queue_link_t wql;
 
                if ((bits & MACH_PORT_TYPE_RECEIVE) == 0)
                        goto invalid_right;
@@ -1069,6 +1112,7 @@ ipc_right_delta(
                                 * right and enter the remaining send right
                                 * into the hash table.
                                 */
+                               ipc_entry_modified(space, name, entry);
                                entry->ie_bits &= ~MACH_PORT_TYPE_RECEIVE;
                                ipc_hash_insert(space, (ipc_object_t) port,
                                    name, entry);
@@ -1089,6 +1133,7 @@ ipc_right_delta(
                                }
                                entry->ie_bits = bits;
                                entry->ie_object = IO_NULL;
+                               ipc_entry_modified(space, name, entry);
                        }
                } else {
                        assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_RECEIVE);
@@ -1101,8 +1146,13 @@ ipc_right_delta(
                }
                is_write_unlock(space);
 
-               ipc_port_clear_receiver(port);
+               queue_init(links);
+               ipc_port_clear_receiver(port, links);
                ipc_port_destroy(port); /* consumes ref, unlocks */
+               while(!queue_empty(links)) {
+                       wql = (wait_queue_link_t) dequeue(links);
+                       wait_queue_link_free(wql);
+               }
 
                if (request != IP_NULL)
                        ipc_notify_port_deleted(request, name);
@@ -1110,7 +1160,7 @@ ipc_right_delta(
            }
 
            case MACH_PORT_RIGHT_SEND_ONCE: {
-               ipc_port_t port, request;
+               ipc_port_t request;
 
                if ((bits & MACH_PORT_TYPE_SEND_ONCE) == 0)
                        goto invalid_right;
@@ -1158,7 +1208,6 @@ ipc_right_delta(
                mach_port_urefs_t urefs;
 
                if (bits & MACH_PORT_TYPE_SEND_RIGHTS) {
-                       ipc_port_t port;
 
                        port = (ipc_port_t) entry->ie_object;
                        assert(port != IP_NULL);
@@ -1166,6 +1215,7 @@ ipc_right_delta(
                        if (!ipc_right_check(space, port, name, entry)) {
                                /* port is locked and active */
                                ip_unlock(port);
+                               port = IP_NULL;
                                goto invalid_right;
                        }
                        bits = entry->ie_bits;
@@ -1185,10 +1235,10 @@ ipc_right_delta(
 
                if ((urefs + delta) == 0) {
                        ipc_entry_dealloc(space, name, entry);
-               }
-               else
+               } else {
                        entry->ie_bits = bits + delta;
-
+                       ipc_entry_modified(space, name, entry);
+               }
                is_write_unlock(space);
 
                break;
@@ -1196,7 +1246,6 @@ ipc_right_delta(
 
            case MACH_PORT_RIGHT_SEND: {
                mach_port_urefs_t urefs;
-               ipc_port_t port;
                ipc_port_t request = IP_NULL;
                ipc_port_t nsrequest = IP_NULL;
                mach_port_mscount_t mscount = 0;
@@ -1239,11 +1288,13 @@ ipc_right_delta(
                        if (bits & MACH_PORT_TYPE_RECEIVE) {
                                assert(port->ip_receiver_name == name);
                                assert(port->ip_receiver == space);
+                               ip_unlock(port);                                
                                assert(IE_BITS_TYPE(bits) ==
                                                MACH_PORT_TYPE_SEND_RECEIVE);
 
                                entry->ie_bits = bits &~ (IE_BITS_UREFS_MASK|
                                                       MACH_PORT_TYPE_SEND);
+                               ipc_entry_modified(space, name, entry);
                        } else {
                                assert(IE_BITS_TYPE(bits) ==
                                                MACH_PORT_TYPE_SEND);
@@ -1253,16 +1304,18 @@ ipc_right_delta(
                                ipc_hash_delete(space, (ipc_object_t) port,
                                                name, entry);
 
+                               ip_unlock(port);
                                ip_release(port);
 
                                entry->ie_object = IO_NULL;
                                ipc_entry_dealloc(space, name, entry);
                        }
-               } else
+               } else {
+                       ip_unlock(port);
                        entry->ie_bits = bits + delta;
+                       ipc_entry_modified(space, name, entry);
+               }
 
-               /* even if dropped a ref, port is active */
-               ip_unlock(port);
                is_write_unlock(space);
 
                if (nsrequest != IP_NULL)
@@ -1285,6 +1338,8 @@ ipc_right_delta(
 
     invalid_right:
        is_write_unlock(space);
+       if (port != IP_NULL)
+               ip_release(port);
        return KERN_INVALID_RIGHT;
 
     invalid_value:
@@ -1301,10 +1356,10 @@ ipc_right_delta(
  *     Purpose:
  *             Retrieves information about the right.
  *     Conditions:
- *             The space is write-locked, and is unlocked upon return
- *             if the call is unsuccessful.  The space must be active.
+ *             The space is active and write-locked.
+ *             The space is unlocked upon return.
  *     Returns:
- *             KERN_SUCCESS            Retrieved info; space still locked.
+ *             KERN_SUCCESS            Retrieved info
  */
 
 kern_return_t
@@ -1333,6 +1388,7 @@ ipc_right_info(
                        type |= ipc_port_request_type(port, name, request);
                        ip_unlock(port);
                }
+               is_write_unlock(space);
 
        } else if (bits & MACH_PORT_TYPE_SEND_RIGHTS) {
                /*
@@ -1344,10 +1400,15 @@ ipc_right_info(
                        if (request != IE_REQ_NONE)
                                type |= ipc_port_request_type(port, name, request);
                        ip_unlock(port);
+                       is_write_unlock(space);
                } else {
                        bits = entry->ie_bits;
                        assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
+                       is_write_unlock(space);
+                       ip_release(port);
                }
+       } else {
+               is_write_unlock(space);
        }
 
        type |= IE_BITS_TYPE(bits);
@@ -1380,7 +1441,7 @@ ipc_right_copyin_check(
 #endif
 
        bits= entry->ie_bits;
-       assert(space->is_active);
+       assert(is_active(space));
 
        switch (msgt_name) {
            case MACH_MSG_TYPE_MAKE_SEND:
@@ -1525,21 +1586,25 @@ ipc_right_copyin(
        mach_msg_type_name_t    msgt_name,
        boolean_t               deadok,
        ipc_object_t            *objectp,
-       ipc_port_t              *sorightp)
+       ipc_port_t              *sorightp,
+       ipc_port_t              *releasep,
+       queue_t                 links)
 {
        ipc_entry_bits_t bits;
+       ipc_port_t port;
 #if CONFIG_MACF_MACH
        task_t self = current_task();
        int    rc;
 #endif
        
+       *releasep = IP_NULL;
+
        bits = entry->ie_bits;
 
-       assert(space->is_active);
+       assert(is_active(space));
 
        switch (msgt_name) {
            case MACH_MSG_TYPE_MAKE_SEND: {
-               ipc_port_t port;
 
                if ((bits & MACH_PORT_TYPE_RECEIVE) == 0)
                        goto invalid_right;
@@ -1573,7 +1638,6 @@ ipc_right_copyin(
            }
 
            case MACH_MSG_TYPE_MAKE_SEND_ONCE: {
-               ipc_port_t port;
 
                if ((bits & MACH_PORT_TYPE_RECEIVE) == 0)
                        goto invalid_right;
@@ -1606,7 +1670,6 @@ ipc_right_copyin(
            }
 
            case MACH_MSG_TYPE_MOVE_RECEIVE: {
-               ipc_port_t port;
                ipc_port_t request = IP_NULL;
 
                if ((bits & MACH_PORT_TYPE_RECEIVE) == 0)
@@ -1649,9 +1712,9 @@ ipc_right_copyin(
                        entry->ie_object = IO_NULL;
                }
                entry->ie_bits = bits &~ MACH_PORT_TYPE_RECEIVE;
+               ipc_entry_modified(space, name, entry);
 
-               ipc_port_clear_receiver(port);
-
+               ipc_port_clear_receiver(port, links);
                port->ip_receiver_name = MACH_PORT_NULL;
                port->ip_destination = IP_NULL;
                ip_unlock(port);
@@ -1662,7 +1725,6 @@ ipc_right_copyin(
            }
 
            case MACH_MSG_TYPE_COPY_SEND: {
-               ipc_port_t port;
 
                if (bits & MACH_PORT_TYPE_DEAD_NAME)
                        goto copy_dead;
@@ -1679,6 +1741,7 @@ ipc_right_copyin(
 
                if (ipc_right_check(space, port, name, entry)) {
                        bits = entry->ie_bits;
+                       *releasep = port;
                        goto copy_dead;
                }
                /* port is locked and active */
@@ -1713,7 +1776,6 @@ ipc_right_copyin(
            }
 
            case MACH_MSG_TYPE_MOVE_SEND: {
-               ipc_port_t port;
                ipc_port_t request = IP_NULL;
 
                if (bits & MACH_PORT_TYPE_DEAD_NAME)
@@ -1731,6 +1793,7 @@ ipc_right_copyin(
 
                if (ipc_right_check(space, port, name, entry)) {
                        bits = entry->ie_bits;
+                       *releasep = port;
                        goto move_dead;
                }
                /* port is locked and active */
@@ -1781,7 +1844,7 @@ ipc_right_copyin(
                        ip_reference(port);
                        entry->ie_bits = bits-1; /* decrement urefs */
                }
-
+               ipc_entry_modified(space, name, entry);
                ip_unlock(port);
 
                *objectp = (ipc_object_t) port;
@@ -1790,7 +1853,6 @@ ipc_right_copyin(
            }
 
            case MACH_MSG_TYPE_MOVE_SEND_ONCE: {
-               ipc_port_t port;
                ipc_port_t request;
 
                if (bits & MACH_PORT_TYPE_DEAD_NAME)
@@ -1841,7 +1903,7 @@ ipc_right_copyin(
                entry->ie_object = IO_NULL;
                entry->ie_bits = bits &~
                        (IE_BITS_UREFS_MASK | MACH_PORT_TYPE_SEND_ONCE);
-
+               ipc_entry_modified(space, name, entry);
                *objectp = (ipc_object_t) port;
                *sorightp = request;
                break;
@@ -1880,7 +1942,7 @@ ipc_right_copyin(
                bits &= ~MACH_PORT_TYPE_DEAD_NAME;
        }
        entry->ie_bits = bits-1; /* decrement urefs */
-
+       ipc_entry_modified(space, name, entry);
        *objectp = IO_DEAD;
        *sorightp = IP_NULL;
        return KERN_SUCCESS;
@@ -1910,7 +1972,7 @@ ipc_right_copyin_undo(
 
        bits = entry->ie_bits;
 
-       assert(space->is_active);
+       assert(is_active(space));
 
        assert((msgt_name == MACH_MSG_TYPE_MOVE_SEND) ||
               (msgt_name == MACH_MSG_TYPE_COPY_SEND) ||
@@ -1961,11 +2023,11 @@ ipc_right_copyin_undo(
                                       name, entry);
                /* object is dead so it is not locked */
        }
-
+       ipc_entry_modified(space, name, entry);
        /* release the reference acquired by copyin */
 
        if (object != IO_DEAD)
-               ipc_object_release(object);
+               io_release(object);
 }
 
 /*
@@ -1988,7 +2050,8 @@ ipc_right_copyin_two(
        mach_port_name_t        name,
        ipc_entry_t             entry,
        ipc_object_t            *objectp,
-       ipc_port_t              *sorightp)
+       ipc_port_t              *sorightp,
+       ipc_port_t              *releasep)
 {
        ipc_entry_bits_t bits;
        mach_port_urefs_t urefs;
@@ -1999,7 +2062,9 @@ ipc_right_copyin_two(
        int    rc;
 #endif
 
-       assert(space->is_active);
+       *releasep = IP_NULL;
+
+       assert(is_active(space));
 
        bits = entry->ie_bits;
 
@@ -2014,6 +2079,7 @@ ipc_right_copyin_two(
        assert(port != IP_NULL);
 
        if (ipc_right_check(space, port, name, entry)) {
+               *releasep = port;
                goto invalid_right;
        }
        /* port is locked and active */
@@ -2059,6 +2125,8 @@ ipc_right_copyin_two(
                ip_reference(port);
                entry->ie_bits = bits-2; /* decrement urefs */
        }
+       ipc_entry_modified(space, name, entry);
+
        ip_unlock(port);
 
        *objectp = (ipc_object_t) port;
@@ -2140,6 +2208,7 @@ ipc_right_copyout(
                ip_unlock(port);
 
                entry->ie_bits = bits | (MACH_PORT_TYPE_SEND_ONCE | 1);
+               ipc_entry_modified(space, name, entry);
                break;
 
            case MACH_MSG_TYPE_PORT_SEND:
@@ -2171,25 +2240,26 @@ ipc_right_copyout(
                                        /* leave urefs pegged to maximum */
 
                                        port->ip_srights--;
-                                       ip_release(port);
                                        ip_unlock(port);
+                                       ip_release(port);
                                        return KERN_SUCCESS;
                                }
 
                                ip_unlock(port);
                                return KERN_UREFS_OVERFLOW;
                        }
-
                        port->ip_srights--;
-                       ip_release(port);
                        ip_unlock(port);
+                       ip_release(port);
+                       
                } else if (bits & MACH_PORT_TYPE_RECEIVE) {
                        assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_RECEIVE);
                        assert(IE_BITS_UREFS(bits) == 0);
 
                        /* transfer send right to entry */
-                       ip_release(port);
                        ip_unlock(port);
+                       ip_release(port);
+                       
                } else {
                        assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_NONE);
                        assert(IE_BITS_UREFS(bits) == 0);
@@ -2204,6 +2274,7 @@ ipc_right_copyout(
                }
 
                entry->ie_bits = (bits | MACH_PORT_TYPE_SEND) + 1;
+               ipc_entry_modified(space, name, entry);
                break;
 
            case MACH_MSG_TYPE_PORT_RECEIVE: {
@@ -2237,8 +2308,8 @@ ipc_right_copyout(
                        assert(IE_BITS_UREFS(bits) > 0);
                        assert(port->ip_srights > 0);
 
-                       ip_release(port);
                        ip_unlock(port);
+                       ip_release(port);
 
                        /* entry is locked holding ref, so can use port */
 
@@ -2252,9 +2323,10 @@ ipc_right_copyout(
                        ip_unlock(port);
                }
                entry->ie_bits = bits | MACH_PORT_TYPE_RECEIVE;
+               ipc_entry_modified(space, name, entry);
 
                if (dest != IP_NULL)
-                       ipc_port_release(dest);
+                       ip_release(dest);
                break;
            }
 
@@ -2289,8 +2361,9 @@ ipc_right_rename(
        ipc_port_request_index_t request = oentry->ie_request;
        ipc_entry_bits_t bits = oentry->ie_bits;
        ipc_object_t object = oentry->ie_object;
+       ipc_port_t release_port = IP_NULL;
 
-       assert(space->is_active);
+       assert(is_active(space));
        assert(oname != nname);
 
        /*
@@ -2311,6 +2384,7 @@ ipc_right_rename(
                        request = IE_REQ_NONE;
                        object = IO_NULL;
                        bits = oentry->ie_bits;
+                       release_port = port;
                        assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
                        assert(oentry->ie_request == IE_REQ_NONE);
                } else {
@@ -2387,7 +2461,11 @@ ipc_right_rename(
        assert(oentry->ie_request == IE_REQ_NONE);
        oentry->ie_object = IO_NULL;
        ipc_entry_dealloc(space, oname, oentry);
+       ipc_entry_modified(space, nname, nentry);
        is_write_unlock(space);
 
+       if (release_port != IP_NULL)
+               ip_release(release_port);
+
        return KERN_SUCCESS;
 }
index 8b12cd89593eb94d8de5f2bc6422a15480dca13b..dfbfd232e2d67437de7a1e1c4fced3808c7d8965 100644 (file)
@@ -129,7 +129,7 @@ extern boolean_t ipc_right_check(
        ipc_entry_t             entry);
 
 /* Clean up an entry in a dead space */
-extern void ipc_right_clean(
+extern void ipc_right_terminate(
        ipc_space_t             space,
        mach_port_name_t        name,
        ipc_entry_t             entry);
@@ -177,7 +177,9 @@ extern kern_return_t ipc_right_copyin(
        mach_msg_type_name_t    msgt_name,
        boolean_t               deadok,
        ipc_object_t            *objectp,
-       ipc_port_t              *sorightp);
+       ipc_port_t              *sorightp,
+       ipc_port_t              *releasep,
+       queue_t                 links);
 
 /* Undo the effects of an ipc_right_copyin */
 extern void ipc_right_copyin_undo(
@@ -194,7 +196,8 @@ extern kern_return_t ipc_right_copyin_two(
        mach_port_name_t        name,
        ipc_entry_t             entry,
        ipc_object_t            *objectp,
-       ipc_port_t              *sorightp);
+       ipc_port_t              *sorightp,
+       ipc_port_t              *releasep);
 
 /* Copyout a capability to a space */
 extern kern_return_t ipc_right_copyout(
index 1aaecc5941cedb3a1e840c9be63ce6bdbd757dda..803ab7321c2d92eaa61ddf06ab9146a7fa2088ae 100644 (file)
@@ -69,8 +69,6 @@
  *     Functions to manipulate IPC capability spaces.
  */
 
-#include <mach_kdb.h>
-
 #include <mach/boolean.h>
 #include <mach/kern_return.h>
 #include <mach/port.h>
@@ -79,7 +77,6 @@
 #include <kern/zalloc.h>
 #include <ipc/port.h>
 #include <ipc/ipc_entry.h>
-#include <ipc/ipc_splay.h>
 #include <ipc/ipc_object.h>
 #include <ipc/ipc_hash.h>
 #include <ipc/ipc_table.h>
 zone_t ipc_space_zone;
 ipc_space_t ipc_space_kernel;
 ipc_space_t ipc_space_reply;
-#if    MACH_KDB
-ipc_space_t default_pager_space;
-#endif /* MACH_KDB */
 
 /*
  *     Routine:        ipc_space_reference
  *     Routine:        ipc_space_release
  *     Purpose:
- *             Function versions of the IPC space macros.
- *             The "is_" cover macros can be defined to use the
- *             macros or the functions, as desired.
+ *             Function versions of the IPC space inline reference.
  */
 
 void
 ipc_space_reference(
        ipc_space_t     space)
 {
-       ipc_space_reference_macro(space);
+       is_reference(space);
 }
 
 void
 ipc_space_release(
        ipc_space_t     space)
 {
-       ipc_space_release_macro(space);
+       is_release(space);
 }
 
 /*
@@ -169,21 +161,14 @@ ipc_space_create(
        }
        table[new_size-1].ie_next = 0;
 
-       is_ref_lock_init(space);
-       space->is_references = 2;
-
        is_lock_init(space);
-       space->is_active = TRUE;
-       space->is_growing = FALSE;
-       space->is_table = table;
+       space->is_bits = 2; /* 2 refs, active, not growing */
        space->is_table_size = new_size;
+       space->is_table = table;
        space->is_table_next = initial+1;
-
-       ipc_splay_tree_init(&space->is_tree);
-       space->is_tree_total = 0;
-       space->is_tree_small = 0;
-       space->is_tree_hash = 0;
        space->is_task = NULL;
+       space->is_low_mod = new_size;
+       space->is_high_mod = 0;
 
        *spacep = space;
        return KERN_SUCCESS;
@@ -214,12 +199,8 @@ ipc_space_create_special(
        if (space == IS_NULL)
                return KERN_RESOURCE_SHORTAGE;
 
-       is_ref_lock_init(space);
-       space->is_references = 1;
-
        is_lock_init(space);
-       space->is_active = FALSE;
-
+       space->is_bits = IS_INACTIVE | 1; /* 1 ref, not active, not growing */
        *spacep = space;
        return KERN_SUCCESS;
 }
@@ -235,7 +216,6 @@ void
 ipc_space_clean(
        ipc_space_t space)
 {
-       ipc_tree_entry_t tentry;
        ipc_entry_t table;
        ipc_entry_num_t size;
        mach_port_index_t index;
@@ -245,11 +225,12 @@ ipc_space_clean(
         *      we must wait until they finish and figure
         *      out the space died.
         */
+ retry:
        is_write_lock(space);
-       while (space->is_growing)
+       while (is_growing(space))
                is_write_sleep(space);
 
-       if (!space->is_active) {
+       if (!is_active(space)) {
                is_write_unlock(space);
                return;
        }
@@ -257,10 +238,6 @@ ipc_space_clean(
        /*
         *      Now we can futz with it since we have the write lock.
         */
-#if    MACH_KDB
-       if (space == default_pager_space)
-               default_pager_space = IS_NULL;
-#endif /* MACH_KDB */
 
        table = space->is_table;
        size = space->is_table_size;
@@ -273,40 +250,23 @@ ipc_space_clean(
                if (type != MACH_PORT_TYPE_NONE) {
                        mach_port_name_t name = MACH_PORT_MAKE(index,
                                                IE_BITS_GEN(entry->ie_bits));
-                       ipc_right_destroy(space, name, entry);
+                       ipc_right_destroy(space, name, entry); /* unlocks space */
+                       goto retry;
                }
        }
 
-       /*
+        /*
         * JMM - Now the table is cleaned out.  We don't bother shrinking the
         * size of the table at this point, but we probably should if it is
-        * really large.  Lets just clean up the splay tree.
+        * really large.
         */
- start_splay:
-       for (tentry = ipc_splay_traverse_start(&space->is_tree);
-            tentry != ITE_NULL;
-            tentry = ipc_splay_traverse_next(&space->is_tree, TRUE)) {
-               mach_port_type_t type;
-               mach_port_name_t name = tentry->ite_name;
-
-               type = IE_BITS_TYPE(tentry->ite_bits);
-               /*
-                * If it is a real right, then destroy it.  This will have the
-                * side effect of removing it from the splay, so start over.
-                */
-               if(type != MACH_PORT_TYPE_NONE) {
-                       ipc_splay_traverse_finish(&space->is_tree);
-                       ipc_right_destroy(space, name, &tentry->ite_entry);
-                       goto start_splay;
-               }
-       }
-       ipc_splay_traverse_finish(&space->is_tree);
+       
        is_write_unlock(space);
 }
 
 
 /*
- *     Routine:        ipc_space_destroy
+ *     Routine:        ipc_space_terminate
  *     Purpose:
  *             Marks the space as dead and cleans up the entries.
  *             Does nothing if the space is already dead.
@@ -315,11 +275,9 @@ ipc_space_clean(
  */
 
 void
-ipc_space_destroy(
+ipc_space_terminate(
        ipc_space_t     space)
 {
-       boolean_t active;
-       ipc_tree_entry_t tentry;
        ipc_entry_t table;
        ipc_entry_num_t size;
        mach_port_index_t index;
@@ -327,31 +285,26 @@ ipc_space_destroy(
        assert(space != IS_NULL);
 
        is_write_lock(space);
-       active = space->is_active;
-       space->is_active = FALSE;
-       is_write_unlock(space);
-
-       if (!active)
+       if (!is_active(space)) {
+               is_write_unlock(space);
                return;
-
+       }
+       is_mark_inactive(space);
 
        /*
         *      If somebody is trying to grow the table,
         *      we must wait until they finish and figure
         *      out the space died.
         */
-       is_read_lock(space);
-       while (space->is_growing)
-               is_read_sleep(space);
+       while (is_growing(space))
+               is_write_sleep(space);
+
+       is_write_unlock(space);
+
 
-       is_read_unlock(space);
        /*
         *      Now we can futz with it unlocked.
         */
-#if    MACH_KDB
-       if (space == default_pager_space)
-               default_pager_space = IS_NULL;
-#endif /* MACH_KDB */
 
        table = space->is_table;
        size = space->is_table_size;
@@ -366,30 +319,13 @@ ipc_space_destroy(
 
                        name = MACH_PORT_MAKE(index,
                                              IE_BITS_GEN(entry->ie_bits));
-                       ipc_right_clean(space, name, entry);
+                       ipc_right_terminate(space, name, entry);
                }
        }
 
        it_entries_free(space->is_table_next-1, table);
        space->is_table_size = 0;
 
-       for (tentry = ipc_splay_traverse_start(&space->is_tree);
-            tentry != ITE_NULL;
-            tentry = ipc_splay_traverse_next(&space->is_tree, TRUE)) {
-               mach_port_type_t type;
-               mach_port_name_t name = tentry->ite_name;
-
-               type = IE_BITS_TYPE(tentry->ite_bits);
-               assert(type != MACH_PORT_TYPE_NONE);
-
-               ipc_right_clean(space, name, &tentry->ite_entry);
-
-               if(type == MACH_PORT_TYPE_SEND)
-                       ipc_hash_global_delete(space, tentry->ite_object,
-                                              name, tentry);
-       }
-       ipc_splay_traverse_finish(&space->is_tree);
-
        /*
         *      Because the space is now dead,
         *      we must release the "active" reference for it.
index 39c2e45a4cf6f45936c882cc57e5b83f635bedee..2f9edeb47aa9fb124c15b2e2f539363efc6ea1f9 100644 (file)
 
 #ifdef __APPLE_API_PRIVATE
 #if MACH_KERNEL_PRIVATE
-#include <mach_kdb.h>
 #include <kern/macro_help.h>
 #include <kern/kern_types.h>
 #include <kern/lock.h>
 #include <kern/task.h>
 #include <kern/zalloc.h>
 #include <ipc/ipc_entry.h>
-#include <ipc/ipc_splay.h>
 #include <ipc/ipc_types.h>
 
+#include <libkern/OSAtomic.h>
+
 /*
  *     Every task has a space of IPC capabilities.
  *     IPC operations like send and receive use this space.
  *     IPC kernel calls manipulate the space of the target task.
  *
  *     Every space has a non-NULL is_table with is_table_size entries.
- *     A space may have a NULL is_tree.  is_tree_small records the
- *     number of entries in the tree that, if the table were to grow
- *     to the next larger size, would move from the tree to the table.
- *
- *     is_growing marks when the table is in the process of growing.
- *     When the table is growing, it can't be freed or grown by another
- *     thread, because of krealloc/kmem_realloc's requirements.
  *
+ *     Only one thread can be growing the space at a time.  Others
+ *     that need it grown wait for the first.  We do almost all the
+ *     work with the space unlocked, so lookups proceed pretty much
+ *     unaffected while the grow operation is underway.
  */
 
 typedef natural_t ipc_space_refs_t;
+#define IS_REFS_MAX    0x0fffffff
+#define IS_INACTIVE    0x40000000      /* space is inactive */
+#define IS_GROWING     0x20000000      /* space is growing */
 
 struct ipc_space {
-       decl_lck_mtx_data(,is_ref_lock_data)
-       ipc_space_refs_t is_references;
-
-       decl_lck_mtx_data(,is_lock_data)
-       boolean_t is_active;            /* is the space alive? */
-       boolean_t is_growing;           /* is the space growing? */
-       ipc_entry_t is_table;           /* an array of entries */
+       lck_spin_t      is_lock_data;
+       ipc_space_refs_t is_bits;       /* holds refs, active, growing */
        ipc_entry_num_t is_table_size;  /* current size of table */
-       struct ipc_table_size *is_table_next; /* info for larger table */
-       struct ipc_splay_tree is_tree;  /* a splay tree of entries */
-       ipc_entry_num_t is_tree_total;  /* number of entries in the tree */
-       ipc_entry_num_t is_tree_small;  /* # of small entries in the tree */
-       ipc_entry_num_t is_tree_hash;   /* # of hashed entries in the tree */
-       boolean_t is_fast;              /* for is_fast_space() */
-
+       ipc_entry_t is_table;           /* an array of entries */
        task_t is_task;                 /* associated task */
+       struct ipc_table_size *is_table_next; /* info for larger table */
+       ipc_entry_num_t is_low_mod;     /* lowest modified entry during growth */
+       ipc_entry_num_t is_high_mod;    /* highest modified entry during growth */
 };
 
 #define        IS_NULL                 ((ipc_space_t) 0)
 
+#define is_active(is)          (((is)->is_bits & IS_INACTIVE) != IS_INACTIVE)
+
+static inline void 
+is_mark_inactive(ipc_space_t is)
+{
+       assert(is_active(is));
+       OSBitOrAtomic(IS_INACTIVE, &is->is_bits);
+}
+
+#define is_growing(is)         (((is)->is_bits & IS_GROWING) == IS_GROWING)
+
+static inline void
+is_start_growing(ipc_space_t is)
+{
+       assert(!is_growing(is));
+       OSBitOrAtomic(IS_GROWING, &is->is_bits);
+}
+
+static inline void
+is_done_growing(ipc_space_t is)        
+{
+       assert(is_growing(is));
+       OSBitAndAtomic(~IS_GROWING, &is->is_bits);
+}
+
 extern zone_t ipc_space_zone;
 
 #define is_alloc()             ((ipc_space_t) zalloc(ipc_space_zone))
@@ -141,62 +158,52 @@ extern ipc_space_t ipc_space_reply;
 #if    DIPC
 extern ipc_space_t ipc_space_remote;
 #endif /* DIPC */
-#if    DIPC || MACH_KDB
+#if    DIPC
 extern ipc_space_t default_pager_space;
-#endif /* DIPC || MACH_KDB */
-
-#define is_fast_space(is)      ((is)->is_fast)
-
-#define        is_ref_lock_init(is)    lck_mtx_init(&(is)->is_ref_lock_data, &ipc_lck_grp, &ipc_lck_attr)
-#define        is_ref_lock_destroy(is) lck_mtx_destroy(&(is)->is_ref_lock_data, &ipc_lck_grp)
-
-#define        ipc_space_reference_macro(is)                                   \
-MACRO_BEGIN                                                            \
-       lck_mtx_lock(&(is)->is_ref_lock_data);                          \
-       assert((is)->is_references > 0);                                \
-       (is)->is_references++;                                          \
-       lck_mtx_unlock(&(is)->is_ref_lock_data);                                \
-MACRO_END
-
-#define        ipc_space_release_macro(is)                                     \
-MACRO_BEGIN                                                            \
-       ipc_space_refs_t _refs;                                         \
-                                                                       \
-       lck_mtx_lock(&(is)->is_ref_lock_data);                          \
-       assert((is)->is_references > 0);                                \
-       _refs = --(is)->is_references;                                  \
-       lck_mtx_unlock(&(is)->is_ref_lock_data);                                \
-                                                                       \
-       if (_refs == 0) {                                               \
-               is_lock_destroy(is);                                    \
-               is_ref_lock_destroy(is);                                \
-               is_free(is);                                            \
-       }                                                               \
-MACRO_END
-
-#define        is_lock_init(is)        lck_mtx_init(&(is)->is_lock_data, &ipc_lck_grp, &ipc_lck_attr)
-#define        is_lock_destroy(is)     lck_mtx_destroy(&(is)->is_lock_data, &ipc_lck_grp)
-
-#define        is_read_lock(is)        lck_mtx_lock(&(is)->is_lock_data)
-#define is_read_unlock(is)     lck_mtx_unlock(&(is)->is_lock_data)
-#define is_read_sleep(is)      lck_mtx_sleep(&(is)->is_lock_data,      \
+#endif /* DIPC */
+
+extern lck_grp_t       ipc_lck_grp;
+extern lck_attr_t      ipc_lck_attr;
+
+#define        is_lock_init(is)        lck_spin_init(&(is)->is_lock_data, &ipc_lck_grp, &ipc_lck_attr)
+#define        is_lock_destroy(is)     lck_spin_destroy(&(is)->is_lock_data, &ipc_lck_grp)
+
+#define        is_read_lock(is)        lck_spin_lock(&(is)->is_lock_data)
+#define is_read_unlock(is)     lck_spin_unlock(&(is)->is_lock_data)
+#define is_read_sleep(is)      lck_spin_sleep(&(is)->is_lock_data,     \
                                                        LCK_SLEEP_DEFAULT,                                      \
                                                        (event_t)(is),                                          \
                                                        THREAD_UNINT)
 
-#define        is_write_lock(is)       lck_mtx_lock(&(is)->is_lock_data)
-#define        is_write_lock_try(is)   lck_mtx_try_lock(&(is)->is_lock_data)
-#define is_write_unlock(is)    lck_mtx_unlock(&(is)->is_lock_data)
-#define is_write_sleep(is)     lck_mtx_sleep(&(is)->is_lock_data,      \
+#define        is_write_lock(is)       lck_spin_lock(&(is)->is_lock_data)
+#define        is_write_lock_try(is)   lck_spin_try_lock(&(is)->is_lock_data)
+#define is_write_unlock(is)    lck_spin_unlock(&(is)->is_lock_data)
+#define is_write_sleep(is)     lck_spin_sleep(&(is)->is_lock_data,     \
                                                        LCK_SLEEP_DEFAULT,                                      \
                                                        (event_t)(is),                                          \
                                                        THREAD_UNINT)
 
-#define        is_reference(is)        ipc_space_reference(is)
-#define        is_release(is)          ipc_space_release(is)
+#define is_refs(is)            ((is)->is_bits & IS_REFS_MAX)
+
+static inline void
+is_reference(ipc_space_t is)
+{
+       assert(is_refs(is) > 0 && is_refs(is) < IS_REFS_MAX);
+       OSIncrementAtomic(&(is->is_bits));
+}
+
 
-#define        is_write_to_read_lock(is)
+static inline void
+is_release(ipc_space_t is) {
+       assert(is_refs(is) > 0);
 
+        /* If we just removed the last reference count */
+       if ( 1 == (OSDecrementAtomic(&(is->is_bits)) & IS_REFS_MAX)) {
+               is_lock_destroy(is);
+               is_free(is);
+       }
+}
+       
 #define        current_space_fast()    (current_task_fast()->itk_space)
 #define current_space()                (current_space_fast())
 
@@ -210,7 +217,7 @@ extern kern_return_t ipc_space_create(
        ipc_space_t             *spacep);
 
 /* Mark a space as dead and cleans up the entries*/
-extern void ipc_space_destroy(
+extern void ipc_space_terminate(
        ipc_space_t     space);
 
 /* Clean up the entries - but leave the space alive */
diff --git a/osfmk/ipc/ipc_splay.c b/osfmk/ipc/ipc_splay.c
deleted file mode 100644 (file)
index e0fec76..0000000
+++ /dev/null
@@ -1,950 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     File:   ipc/ipc_splay.c
- *     Author: Rich Draves
- *     Date:   1989
- *
- *     Primitive splay tree operations.
- */
-
-#include <mach/port.h>
-#include <kern/assert.h>
-#include <kern/macro_help.h>
-#include <ipc/ipc_entry.h>
-#include <ipc/ipc_splay.h>
-
-/*
- *     Splay trees are self-adjusting binary search trees.
- *     They have the following attractive properties:
- *             1) Space efficient; only two pointers per entry.
- *             2) Robust performance; amortized O(log n) per operation.
- *             3) Recursion not needed.
- *     This makes them a good fall-back data structure for those
- *     entries that don't fit into the lookup table.
- *
- *     The paper by Sleator and Tarjan, JACM v. 32, no. 3, pp. 652-686,
- *     describes the splaying operation.  ipc_splay_prim_lookup
- *     and ipc_splay_prim_assemble implement the top-down splay
- *     described on p. 669.
- *
- *     The tree is stored in an unassembled form.  If ist_root is null,
- *     then the tree has no entries.  Otherwise, ist_name records
- *     the value used for the last lookup.  ist_root points to the
- *     middle tree obtained from the top-down splay.  ist_ltree and
- *     ist_rtree point to left and right subtrees, whose entries
- *     are all smaller (larger) than those in the middle tree.
- *     ist_ltreep and ist_rtreep are pointers to fields in the
- *     left and right subtrees.  ist_ltreep points to the rchild field
- *     of the largest entry in ltree, and ist_rtreep points to the
- *     lchild field of the smallest entry in rtree.  The pointed-to
- *     fields aren't initialized.  If the left (right) subtree is null,
- *     then ist_ltreep (ist_rtreep) points to the ist_ltree (ist_rtree)
- *     field in the splay structure itself.
- *
- *     The primary advantage of the unassembled form is that repeated
- *     unsuccessful lookups are efficient.  In particular, an unsuccessful
- *     lookup followed by an insert only requires one splaying operation.
- *
- *     The traversal algorithm works via pointer inversion.
- *     When descending down the tree, child pointers are reversed
- *     to point back to the parent entry.  When ascending,
- *     the pointers are restored to their original value.
- *
- *     The biggest potential problem with the splay tree implementation
- *     is that the operations, even lookup, require an exclusive lock.
- *     If IPC spaces are protected with exclusive locks, then
- *     the splay tree doesn't require its own lock, and ist_lock/ist_unlock
- *     needn't do anything.  If IPC spaces are protected with read/write
- *     locks then ist_lock/ist_unlock should provide exclusive access.
- *
- *     If it becomes important to let lookups run in parallel,
- *     or if the restructuring makes lookups too expensive, then
- *     there is hope.  Use a read/write lock on the splay tree.
- *     Keep track of the number of entries in the tree.  When doing
- *     a lookup, first try a non-restructuring lookup with a read lock held,
- *     with a bound (based on log of size of the tree) on the number of
- *     entries to traverse.  If the lookup runs up against the bound,
- *     then take a write lock and do a reorganizing lookup.
- *     This way, if lookups only access roughly balanced parts
- *     of the tree, then lookups run in parallel and do no restructuring.
- *
- *     The traversal algorithm currently requires an exclusive lock.
- *     If that is a problem, the tree could be changed from an lchild/rchild
- *     representation to a leftmost child/right sibling representation.
- *     In conjunction with non-restructing lookups, this would let
- *     lookups and traversals all run in parallel.  But this representation
- *     is more complicated and would slow down the operations.
- */
-
-/*
- *     Boundary values to hand to ipc_splay_prim_lookup:
- */
-
-#define        MACH_PORT_SMALLEST      ((mach_port_name_t) 0)
-#define MACH_PORT_LARGEST      ((mach_port_name_t) ~0)
-
-/*
- *     Routine:        ipc_splay_prim_lookup
- *     Purpose:
- *             Searches for the node labeled name in the splay tree.
- *             Returns three nodes (treep, ltreep, rtreep) and
- *             two pointers to nodes (ltreepp, rtreepp).
- *
- *             ipc_splay_prim_lookup splits the supplied tree into
- *             three subtrees, left, middle, and right, returned
- *             in ltreep, treep, and rtreep.
- *
- *             If name is present in the tree, then it is at
- *             the root of the middle tree.  Otherwise, the root
- *             of the middle tree is the last node traversed.
- *
- *             ipc_splay_prim_lookup returns a pointer into
- *             the left subtree, to the rchild field of its
- *             largest node, in ltreepp.  It returns a pointer
- *             into the right subtree, to the lchild field of its
- *             smallest node, in rtreepp.
- */
-
-static void
-ipc_splay_prim_lookup(
-       mach_port_name_t        name,
-       ipc_tree_entry_t        tree,
-       ipc_tree_entry_t        *treep,
-       ipc_tree_entry_t        *ltreep,
-       ipc_tree_entry_t        **ltreepp,
-       ipc_tree_entry_t        *rtreep,
-       ipc_tree_entry_t        **rtreepp)
-{
-       mach_port_name_t tname;                 /* temp name */
-       ipc_tree_entry_t lchild, rchild;        /* temp child pointers */
-
-       assert(tree != ITE_NULL);
-
-#define        link_left                                       \
-MACRO_BEGIN                                            \
-       *ltreep = tree;                                 \
-       ltreep = &tree->ite_rchild;                     \
-       tree = *ltreep;                                 \
-MACRO_END
-
-#define        link_right                                      \
-MACRO_BEGIN                                            \
-       *rtreep = tree;                                 \
-       rtreep = &tree->ite_lchild;                     \
-       tree = *rtreep;                                 \
-MACRO_END
-
-#define rotate_left                                    \
-MACRO_BEGIN                                            \
-       ipc_tree_entry_t temp = tree;                   \
-                                                       \
-       tree = temp->ite_rchild;                        \
-       temp->ite_rchild = tree->ite_lchild;            \
-       tree->ite_lchild = temp;                        \
-MACRO_END
-
-#define rotate_right                                   \
-MACRO_BEGIN                                            \
-       ipc_tree_entry_t temp = tree;                   \
-                                                       \
-       tree = temp->ite_lchild;                        \
-       temp->ite_lchild = tree->ite_rchild;            \
-       tree->ite_rchild = temp;                        \
-MACRO_END
-
-       while (name != (tname = tree->ite_name)) {
-               if (name < tname) {
-                       /* descend to left */
-
-                       lchild = tree->ite_lchild;
-                       if (lchild == ITE_NULL)
-                               break;
-                       tname = lchild->ite_name;
-
-                       if ((name < tname) &&
-                           (lchild->ite_lchild != ITE_NULL))
-                               rotate_right;
-                       link_right;
-                       if ((name > tname) &&
-                           (lchild->ite_rchild != ITE_NULL))
-                               link_left;
-               } else {
-                       /* descend to right */
-
-                       rchild = tree->ite_rchild;
-                       if (rchild == ITE_NULL)
-                               break;
-                       tname = rchild->ite_name;
-
-                       if ((name > tname) &&
-                           (rchild->ite_rchild != ITE_NULL))
-                               rotate_left;
-                       link_left;
-                       if ((name < tname) &&
-                           (rchild->ite_lchild != ITE_NULL))
-                               link_right;
-               }
-
-               assert(tree != ITE_NULL);
-       }
-
-       *treep = tree;
-       *ltreepp = ltreep;
-       *rtreepp = rtreep;
-
-#undef link_left
-#undef link_right
-#undef rotate_left
-#undef rotate_right
-}
-
-/*
- *     Routine:        ipc_splay_prim_assemble
- *     Purpose:
- *             Assembles the results of ipc_splay_prim_lookup
- *             into a splay tree with the found node at the root.
- *
- *             ltree and rtree are by-reference so storing
- *             through ltreep and rtreep can change them.
- */
-
-static void
-ipc_splay_prim_assemble(
-       ipc_tree_entry_t        tree,
-       ipc_tree_entry_t        *ltree,
-       ipc_tree_entry_t        *ltreep,
-       ipc_tree_entry_t        *rtree,
-       ipc_tree_entry_t        *rtreep)
-{
-       assert(tree != ITE_NULL);
-
-       *ltreep = tree->ite_lchild;
-       *rtreep = tree->ite_rchild;
-
-       tree->ite_lchild = *ltree;
-       tree->ite_rchild = *rtree;
-}
-
-/*
- *     Routine:        ipc_splay_tree_init
- *     Purpose:
- *             Initialize a raw splay tree for use.
- */
-
-void
-ipc_splay_tree_init(
-       ipc_splay_tree_t        splay)
-{
-       splay->ist_root = ITE_NULL;
-}
-
-/*
- *     Routine:        ipc_splay_tree_pick
- *     Purpose:
- *             Picks and returns a random entry in a splay tree.
- *             Returns FALSE if the splay tree is empty.
- */
-
-boolean_t
-ipc_splay_tree_pick(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        *namep,
-       ipc_tree_entry_t        *entryp)
-{
-       ipc_tree_entry_t root;
-
-       ist_lock(splay);
-
-       root = splay->ist_root;
-       if (root != ITE_NULL) {
-               *namep = root->ite_name;
-               *entryp = root;
-       }
-
-       ist_unlock(splay);
-
-       return root != ITE_NULL;
-}
-
-/*
- *     Routine:        ipc_splay_tree_lookup
- *     Purpose:
- *             Finds an entry in a splay tree.
- *             Returns ITE_NULL if not found.
- */
-
-ipc_tree_entry_t
-ipc_splay_tree_lookup(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name)
-{
-       ipc_tree_entry_t root;
-
-       ist_lock(splay);
-
-       root = splay->ist_root;
-       if (root != ITE_NULL) {
-               if (splay->ist_name != name) {
-                       ipc_splay_prim_assemble(root,
-                               &splay->ist_ltree, splay->ist_ltreep,
-                               &splay->ist_rtree, splay->ist_rtreep);
-                       ipc_splay_prim_lookup(name, root, &root,
-                               &splay->ist_ltree, &splay->ist_ltreep,
-                               &splay->ist_rtree, &splay->ist_rtreep);
-                       splay->ist_name = name;
-                       splay->ist_root = root;
-               }
-
-               if (name != root->ite_name)
-                       root = ITE_NULL;
-       }
-
-       ist_unlock(splay);
-
-       return root;
-}
-
-/*
- *     Routine:        ipc_splay_tree_insert
- *     Purpose:
- *             Inserts a new entry into a splay tree.
- *             The caller supplies a new entry.
- *             The name can't already be present in the tree.
- */
-
-void
-ipc_splay_tree_insert(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name,
-       ipc_tree_entry_t        entry)
-{
-       ipc_tree_entry_t root;
-
-       assert(entry != ITE_NULL);
-
-       ist_lock(splay);
-
-       root = splay->ist_root;
-       if (root == ITE_NULL) {
-               entry->ite_lchild = ITE_NULL;
-               entry->ite_rchild = ITE_NULL;
-       } else {
-               if (splay->ist_name != name) {
-                       ipc_splay_prim_assemble(root,
-                               &splay->ist_ltree, splay->ist_ltreep,
-                               &splay->ist_rtree, splay->ist_rtreep);
-                       ipc_splay_prim_lookup(name, root, &root,
-                               &splay->ist_ltree, &splay->ist_ltreep,
-                               &splay->ist_rtree, &splay->ist_rtreep);
-               }
-
-               assert(root->ite_name != name);
-
-               if (name < root->ite_name) {
-                       assert(root->ite_lchild == ITE_NULL);
-
-                       *splay->ist_ltreep = ITE_NULL;
-                       *splay->ist_rtreep = root;
-               } else {
-                       assert(root->ite_rchild == ITE_NULL);
-
-                       *splay->ist_ltreep = root;
-                       *splay->ist_rtreep = ITE_NULL;
-               }
-
-               entry->ite_lchild = splay->ist_ltree;
-               entry->ite_rchild = splay->ist_rtree;
-       }
-
-       entry->ite_name = name;
-       splay->ist_root = entry;
-       splay->ist_name = name;
-       splay->ist_ltreep = &splay->ist_ltree;
-       splay->ist_rtreep = &splay->ist_rtree;
-
-       ist_unlock(splay);
-}
-
-/*
- *     Routine:        ipc_splay_tree_delete
- *     Purpose:
- *             Deletes an entry from a splay tree.
- *             The name must be present in the tree.
- *             Frees the entry.
- *
- *             The "entry" argument isn't currently used.
- *             Other implementations might want it, though.
- */
-
-void
-ipc_splay_tree_delete(
-       ipc_splay_tree_t                        splay,
-       mach_port_name_t                        name,
-       __assert_only ipc_tree_entry_t  entry)
-{
-       ipc_tree_entry_t root, saved;
-
-       ist_lock(splay);
-
-       root = splay->ist_root;
-       assert(root != ITE_NULL);
-
-       if (splay->ist_name != name) {
-               ipc_splay_prim_assemble(root,
-                       &splay->ist_ltree, splay->ist_ltreep,
-                       &splay->ist_rtree, splay->ist_rtreep);
-               ipc_splay_prim_lookup(name, root, &root,
-                       &splay->ist_ltree, &splay->ist_ltreep,
-                       &splay->ist_rtree, &splay->ist_rtreep);
-       }
-
-       assert(root->ite_name == name);
-       assert(root == entry);
-
-       *splay->ist_ltreep = root->ite_lchild;
-       *splay->ist_rtreep = root->ite_rchild;
-       ite_free(root);
-
-       root = splay->ist_ltree;
-       saved = splay->ist_rtree;
-
-       if (root == ITE_NULL)
-               root = saved;
-       else if (saved != ITE_NULL) {
-               /*
-                *      Find the largest node in the left subtree, and splay it
-                *      to the root.  Then add the saved right subtree.
-                */
-
-               ipc_splay_prim_lookup(MACH_PORT_LARGEST, root, &root,
-                       &splay->ist_ltree, &splay->ist_ltreep,
-                       &splay->ist_rtree, &splay->ist_rtreep);
-               ipc_splay_prim_assemble(root,
-                       &splay->ist_ltree, splay->ist_ltreep,
-                       &splay->ist_rtree, splay->ist_rtreep);
-
-               assert(root->ite_rchild == ITE_NULL);
-               root->ite_rchild = saved;
-       }
-
-       splay->ist_root = root;
-       if (root != ITE_NULL) {
-               splay->ist_name = root->ite_name;
-               splay->ist_ltreep = &splay->ist_ltree;
-               splay->ist_rtreep = &splay->ist_rtree;
-       }
-
-       ist_unlock(splay);
-}
-
-/*
- *     Routine:        ipc_splay_tree_split
- *     Purpose:
- *             Split a splay tree.  Puts all entries smaller than "name"
- *             into a new tree, "small".
- *
- *             Doesn't do locking on "small", because nobody else
- *             should be fiddling with the uninitialized tree.
- */
-
-void
-ipc_splay_tree_split(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name,
-       ipc_splay_tree_t        small)
-{
-       ipc_tree_entry_t root;
-
-       ipc_splay_tree_init(small);
-
-       ist_lock(splay);
-
-       root = splay->ist_root;
-       if (root != ITE_NULL) {
-               /* lookup name, to get it (or last traversed) to the top */
-
-               if (splay->ist_name != name) {
-                       ipc_splay_prim_assemble(root,
-                               &splay->ist_ltree, splay->ist_ltreep,
-                               &splay->ist_rtree, splay->ist_rtreep);
-                       ipc_splay_prim_lookup(name, root, &root,
-                               &splay->ist_ltree, &splay->ist_ltreep,
-                               &splay->ist_rtree, &splay->ist_rtreep);
-               }
-
-               if (root->ite_name < name) {
-                       /* root goes into small */
-
-                       *splay->ist_ltreep = root->ite_lchild;
-                       *splay->ist_rtreep = ITE_NULL;
-                       root->ite_lchild = splay->ist_ltree;
-                       assert(root->ite_rchild == ITE_NULL);
-
-                       small->ist_root = root;
-                       small->ist_name = root->ite_name;
-                       small->ist_ltreep = &small->ist_ltree;
-                       small->ist_rtreep = &small->ist_rtree;
-
-                       /* rtree goes into splay */
-
-                       root = splay->ist_rtree;
-                       splay->ist_root = root;
-                       if (root != ITE_NULL) {
-                               splay->ist_name = root->ite_name;
-                               splay->ist_ltreep = &splay->ist_ltree;
-                               splay->ist_rtreep = &splay->ist_rtree;
-                       }
-               } else {
-                       /* root stays in splay */
-
-                       *splay->ist_ltreep = root->ite_lchild;
-                       root->ite_lchild = ITE_NULL;
-
-                       splay->ist_root = root;
-                       splay->ist_name = name;
-                       splay->ist_ltreep = &splay->ist_ltree;
-
-                       /* ltree goes into small */
-
-                       root = splay->ist_ltree;
-                       small->ist_root = root;
-                       if (root != ITE_NULL) {
-                               small->ist_name = root->ite_name;
-                               small->ist_ltreep = &small->ist_ltree;
-                               small->ist_rtreep = &small->ist_rtree;
-                       }
-               }               
-       }
-
-       ist_unlock(splay);
-}
-
-/*
- *     Routine:        ipc_splay_tree_join
- *     Purpose:
- *             Joins two splay trees.  Merges the entries in "small",
- *             which must all be smaller than the entries in "splay",
- *             into "splay".
- */
-
-void
-ipc_splay_tree_join(
-       ipc_splay_tree_t        splay,
-       ipc_splay_tree_t        small)
-{
-       ipc_tree_entry_t sroot;
-
-       /* pull entries out of small */
-
-       ist_lock(small);
-
-       sroot = small->ist_root;
-       if (sroot != ITE_NULL) {
-               ipc_splay_prim_assemble(sroot,
-                       &small->ist_ltree, small->ist_ltreep,
-                       &small->ist_rtree, small->ist_rtreep);
-               small->ist_root = ITE_NULL;
-       }
-
-       ist_unlock(small);
-
-       /* put entries, if any, into splay */
-
-       if (sroot != ITE_NULL) {
-               ipc_tree_entry_t root;
-
-               ist_lock(splay);
-
-               root = splay->ist_root;
-               if (root == ITE_NULL) {
-                       root = sroot;
-               } else {
-                       /* get smallest entry in splay tree to top */
-
-                       if (splay->ist_name != MACH_PORT_SMALLEST) {
-                               ipc_splay_prim_assemble(root,
-                                       &splay->ist_ltree, splay->ist_ltreep,
-                                       &splay->ist_rtree, splay->ist_rtreep);
-                               ipc_splay_prim_lookup(MACH_PORT_SMALLEST,
-                                       root, &root,
-                                       &splay->ist_ltree, &splay->ist_ltreep,
-                                       &splay->ist_rtree, &splay->ist_rtreep);
-                       }
-
-                       ipc_splay_prim_assemble(root,
-                               &splay->ist_ltree, splay->ist_ltreep,
-                               &splay->ist_rtree, splay->ist_rtreep);
-
-                       assert(root->ite_lchild == ITE_NULL);
-                       assert(sroot->ite_name < root->ite_name);
-                       root->ite_lchild = sroot;
-               }
-
-               splay->ist_root = root;
-               splay->ist_name = root->ite_name;
-               splay->ist_ltreep = &splay->ist_ltree;
-               splay->ist_rtreep = &splay->ist_rtree;
-
-               ist_unlock(splay);
-       }
-}
-
-/*
- *     Routine:        ipc_splay_tree_bounds
- *     Purpose:
- *             Given a name, returns the largest value present
- *             in the tree that is smaller than or equal to the name,
- *             or ~0 if no such value exists.  Similarly, returns
- *             the smallest value present that is greater than or
- *             equal to the name, or 0 if no such value exists.
- *
- *             Hence, if
- *             lower = upper, then lower = name = upper
- *                             and name is present in the tree
- *             lower = ~0 and upper = 0,
- *                             then the tree is empty
- *             lower = ~0 and upper > 0, then name < upper
- *                             and upper is smallest value in tree
- *             lower < ~0 and upper = 0, then lower < name
- *                             and lower is largest value in tree
- *             lower < ~0 and upper > 0, then lower < name < upper
- *                             and they are tight bounds on name
- *
- *             (Note MACH_PORT_SMALLEST = 0 and MACH_PORT_LARGEST = ~0.)
- */
-
-void
-ipc_splay_tree_bounds(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name,
-       mach_port_name_t        *lowerp, 
-       mach_port_name_t        *upperp)
-{
-       ipc_tree_entry_t root;
-
-       ist_lock(splay);
-
-       root = splay->ist_root;
-       if (root == ITE_NULL) {
-               *lowerp = MACH_PORT_LARGEST;
-               *upperp = MACH_PORT_SMALLEST;
-       } else {
-               mach_port_name_t rname;
-
-               if (splay->ist_name != name) {
-                       ipc_splay_prim_assemble(root,
-                               &splay->ist_ltree, splay->ist_ltreep,
-                               &splay->ist_rtree, splay->ist_rtreep);
-                       ipc_splay_prim_lookup(name, root, &root,
-                               &splay->ist_ltree, &splay->ist_ltreep,
-                               &splay->ist_rtree, &splay->ist_rtreep);
-                       splay->ist_name = name;
-                       splay->ist_root = root;
-               }
-
-               rname = root->ite_name;
-
-               /*
-                *      OK, it's a hack.  We convert the ltreep and rtreep
-                *      pointers back into real entry pointers,
-                *      so we can pick the names out of the entries.
-                */
-
-               if (rname <= name)
-                       *lowerp = rname;
-               else if (splay->ist_ltreep == &splay->ist_ltree)
-                       *lowerp = MACH_PORT_LARGEST;
-               else {
-                       ipc_tree_entry_t entry;
-
-                       entry = (ipc_tree_entry_t)
-                               ((char *)splay->ist_ltreep -
-                                ((char *)&root->ite_rchild -
-                                 (char *)root));
-                       *lowerp = entry->ite_name;
-               }
-
-               if (rname >= name)
-                       *upperp = rname;
-               else if (splay->ist_rtreep == &splay->ist_rtree)
-                       *upperp = MACH_PORT_SMALLEST;
-               else {
-                       ipc_tree_entry_t entry;
-
-                       entry = (ipc_tree_entry_t)
-                               ((char *)splay->ist_rtreep -
-                                ((char *)&root->ite_lchild -
-                                 (char *)root));
-                       *upperp = entry->ite_name;
-               }
-       }
-
-       ist_unlock(splay);
-}
-
-/*
- *     Routine:        ipc_splay_traverse_start
- *     Routine:        ipc_splay_traverse_next
- *     Routine:        ipc_splay_traverse_finish
- *     Purpose:
- *             Perform a symmetric order traversal of a splay tree.
- *     Usage:
- *             for (entry = ipc_splay_traverse_start(splay);
- *                  entry != ITE_NULL;
- *                  entry = ipc_splay_traverse_next(splay, delete)) {
- *                     do something with entry
- *             }
- *             ipc_splay_traverse_finish(splay);
- *
- *             If "delete" is TRUE, then the current entry
- *             is removed from the tree and deallocated.
- *
- *             During the traversal, the splay tree is locked.
- */
-
-ipc_tree_entry_t
-ipc_splay_traverse_start(
-       ipc_splay_tree_t        splay)
-{
-       ipc_tree_entry_t current, parent;
-
-       ist_lock(splay);
-
-       current = splay->ist_root;
-       if (current != ITE_NULL) {
-               ipc_splay_prim_assemble(current,
-                       &splay->ist_ltree, splay->ist_ltreep,
-                       &splay->ist_rtree, splay->ist_rtreep);
-
-               parent = ITE_NULL;
-
-               while (current->ite_lchild != ITE_NULL) {
-                       ipc_tree_entry_t next;
-
-                       next = current->ite_lchild;
-                       current->ite_lchild = parent;
-                       parent = current;
-                       current = next;
-               }
-
-               splay->ist_ltree = current;
-               splay->ist_rtree = parent;
-       }
-
-       return current;
-}
-
-ipc_tree_entry_t
-ipc_splay_traverse_next(
-       ipc_splay_tree_t        splay,
-       boolean_t               delete)
-{
-       ipc_tree_entry_t current, parent;
-
-       /* pick up where traverse_entry left off */
-
-       current = splay->ist_ltree;
-       parent = splay->ist_rtree;
-       assert(current != ITE_NULL);
-
-       if (!delete)
-               goto traverse_right;
-
-       /* we must delete current and patch the tree */
-
-       if (current->ite_lchild == ITE_NULL) {
-               if (current->ite_rchild == ITE_NULL) {
-                       /* like traverse_back, but with deletion */
-
-                       if (parent == ITE_NULL) {
-                               ite_free(current);
-
-                               splay->ist_root = ITE_NULL;
-                               return ITE_NULL;
-                       }
-
-                       if (current->ite_name < parent->ite_name) {
-                               ite_free(current);
-
-                               current = parent;
-                               parent = current->ite_lchild;
-                               current->ite_lchild = ITE_NULL;
-                               goto traverse_entry;
-                       } else {
-                               ite_free(current);
-
-                               current = parent;
-                               parent = current->ite_rchild;
-                               current->ite_rchild = ITE_NULL;
-                               goto traverse_back;
-                       }
-               } else {
-                       ipc_tree_entry_t prev;
-
-                       prev = current;
-                       current = current->ite_rchild;
-                       ite_free(prev);
-                       goto traverse_left;
-               }
-       } else {
-               if (current->ite_rchild == ITE_NULL) {
-                       ipc_tree_entry_t prev;
-
-                       prev = current;
-                       current = current->ite_lchild;
-                       ite_free(prev);
-                       goto traverse_back;
-               } else {
-                       ipc_tree_entry_t prev;
-                       ipc_tree_entry_t ltree, rtree;
-                       ipc_tree_entry_t *ltreep, *rtreep;
-
-                       /* replace current with largest of left children */
-
-                       prev = current;
-                       ipc_splay_prim_lookup(MACH_PORT_LARGEST,
-                               current->ite_lchild, &current,
-                               &ltree, &ltreep, &rtree, &rtreep);
-                       ipc_splay_prim_assemble(current,
-                               &ltree, ltreep, &rtree, rtreep);
-
-                       assert(current->ite_rchild == ITE_NULL);
-                       current->ite_rchild = prev->ite_rchild;
-                       ite_free(prev);
-                       goto traverse_right;
-               }
-       }
-       /*NOTREACHED*/
-
-       /*
-        *      A state machine:  for each entry, we
-        *              1) traverse left subtree
-        *              2) traverse the entry
-        *              3) traverse right subtree
-        *              4) traverse back to parent
-        */
-
-    traverse_left:
-       if (current->ite_lchild != ITE_NULL) {
-               ipc_tree_entry_t next;
-
-               next = current->ite_lchild;
-               current->ite_lchild = parent;
-               parent = current;
-               current = next;
-               goto traverse_left;
-       }
-
-    traverse_entry:
-       splay->ist_ltree = current;
-       splay->ist_rtree = parent;
-       return current;
-
-    traverse_right:
-       if (current->ite_rchild != ITE_NULL) {
-               ipc_tree_entry_t next;
-
-               next = current->ite_rchild;
-               current->ite_rchild = parent;
-               parent = current;
-               current = next;
-               goto traverse_left;
-       }
-
-    traverse_back:
-       if (parent == ITE_NULL) {
-               splay->ist_root = current;
-               return ITE_NULL;
-       }
-
-       if (current->ite_name < parent->ite_name) {
-               ipc_tree_entry_t prev;
-
-               prev = current;
-               current = parent;
-               parent = current->ite_lchild;
-               current->ite_lchild = prev;
-               goto traverse_entry;
-       } else {
-               ipc_tree_entry_t prev;
-
-               prev = current;
-               current = parent;
-               parent = current->ite_rchild;
-               current->ite_rchild = prev;
-               goto traverse_back;
-       }
-}
-
-void
-ipc_splay_traverse_finish(
-       ipc_splay_tree_t        splay)
-{
-       ipc_tree_entry_t root;
-
-       root = splay->ist_root;
-       if (root != ITE_NULL) {
-               splay->ist_name = root->ite_name;
-               splay->ist_ltreep = &splay->ist_ltree;
-               splay->ist_rtreep = &splay->ist_rtree;
-       }
-
-       ist_unlock(splay);
-}
-
diff --git a/osfmk/ipc/ipc_splay.h b/osfmk/ipc/ipc_splay.h
deleted file mode 100644 (file)
index 03cfe0d..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     File:   ipc/ipc_splay.h
- *     Author: Rich Draves
- *     Date:   1989
- *
- *     Declarations of primitive splay tree operations.
- */
-
-#ifndef        _IPC_IPC_SPLAY_H_
-#define _IPC_IPC_SPLAY_H_
-
-#include <mach/port.h>
-#include <kern/assert.h>
-#include <kern/macro_help.h>
-#include <ipc/ipc_entry.h>
-
-typedef struct ipc_splay_tree {
-       mach_port_name_t ist_name;      /* name used in last lookup */
-       ipc_tree_entry_t ist_root;      /* root of middle tree */
-       ipc_tree_entry_t ist_ltree;     /* root of left tree */
-       ipc_tree_entry_t *ist_ltreep;   /* pointer into left tree */
-       ipc_tree_entry_t ist_rtree;     /* root of right tree */
-       ipc_tree_entry_t *ist_rtreep;   /* pointer into right tree */
-} *ipc_splay_tree_t;
-
-#define        ist_lock(splay)         /* no locking */
-#define ist_unlock(splay)      /* no locking */
-
-/* Initialize a raw splay tree */
-extern void ipc_splay_tree_init(
-       ipc_splay_tree_t        splay);
-
-/* Pick a random entry in a splay tree */
-extern boolean_t ipc_splay_tree_pick(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        *namep,
-       ipc_tree_entry_t        *entryp);
-
-/* Find an entry in a splay tree */
-extern ipc_tree_entry_t ipc_splay_tree_lookup(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name);
-
-/* Insert a new entry into a splay tree */
-extern void ipc_splay_tree_insert(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name,
-       ipc_tree_entry_t        entry);
-
-/* Delete an entry from a splay tree */
-extern void ipc_splay_tree_delete(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name,
-       ipc_tree_entry_t        entry);
-
-/* Split a splay tree */
-extern void ipc_splay_tree_split(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name,
-       ipc_splay_tree_t        entry);
-
-/* Join two splay trees */
-extern void ipc_splay_tree_join(
-       ipc_splay_tree_t        splay,
-       ipc_splay_tree_t        small);
-
-/* Do a bounded splay tree lookup */
-extern void ipc_splay_tree_bounds(
-       ipc_splay_tree_t        splay,
-       mach_port_name_t        name,
-       mach_port_name_t        *lowerp, 
-       mach_port_name_t        *upperp);
-
-/* Initialize a symmetric order traversal of a splay tree */
-extern ipc_tree_entry_t ipc_splay_traverse_start(
-       ipc_splay_tree_t        splay);
-
-/* Return the next entry in a symmetric order traversal of a splay tree */
-extern ipc_tree_entry_t ipc_splay_traverse_next(
-       ipc_splay_tree_t        splay,
-       boolean_t               delete);
-
-/* Terminate a symmetric order traversal of a splay tree */
-extern void ipc_splay_traverse_finish(
-       ipc_splay_tree_t        splay);
-
-#endif /* _IPC_IPC_SPLAY_H_ */
index 4e19f884480c1eda1a7a3d9ae553ef5f4035ce1e..1add1f5b371e5c2538e5f982e034b253349bab3b 100644 (file)
@@ -187,33 +187,6 @@ ipc_table_alloc(
        return (void *)table;
 }
 
-/*
- *     Routine:        ipc_table_realloc
- *     Purpose:
- *             Reallocate a big table.
- *
- *             The new table remaps the old table,
- *             so copying is not necessary.
- *     Conditions:
- *             Only works for page-size or bigger tables.
- *             May block.
- */
-
-void *
-ipc_table_realloc(
-       vm_size_t       old_size,
-       void *          old_table,
-       vm_size_t       new_size)
-{
-       vm_offset_t new_table;
-
-       if (kmem_realloc(kalloc_map,
-                        (vm_offset_t) old_table, old_size,
-                        &new_table, new_size) != KERN_SUCCESS)
-               new_table = 0;
-
-       return (void *)new_table;
-}
 
 /*
  *     Routine:        ipc_table_free
index fee56f77871659c82ede0302a4e93bf579d6b132..a310197e004707bf4c78bea4b69b014414841eae 100644 (file)
@@ -112,56 +112,30 @@ extern ipc_table_size_t ipc_table_requests;
 extern void ipc_table_init(void) __attribute__((section("__TEXT, initcode")));
 
 /*
- *     Note that ipc_table_alloc, ipc_table_realloc, and ipc_table_free
- *     all potentially use the VM system.  Hence simple locks can't
+ *     Note that ipc_table_alloc and ipc_table_free
+ *     potentially use the VM system.  Hence simple locks can't
  *     be held across them.
- *
- *     We can't use a copying realloc, because the realloc happens
- *     with the data unlocked.  ipc_table_realloc remaps the data,
- *     so it is OK.
  */
 
 /* Allocate a table */
 extern void * ipc_table_alloc(
        vm_size_t       size);
 
-/* Reallocate a big table */
-extern void * ipc_table_realloc(
-       vm_size_t       old_size,
-       void *          old_table,
-       vm_size_t       new_size);
-
 /* Free a table */
 extern void ipc_table_free(
        vm_size_t       size,
        void *          table);
 
-#define it_entries_reallocable(its)                                    \
-       ((its)->its_size * sizeof(struct ipc_entry) >= PAGE_SIZE)
-
 #define        it_entries_alloc(its)                                           \
        ((ipc_entry_t)                                                  \
-       ipc_table_alloc(it_entries_reallocable(its) ?                   \
-           round_page((its)->its_size * sizeof(struct ipc_entry)) :    \
-           (its)->its_size * sizeof(struct ipc_entry)                  \
-       ))
-
-#define        it_entries_realloc(its, table, nits)                            \
-       ((ipc_entry_t)                                                  \
-       ipc_table_realloc(                                              \
-           round_page((its)->its_size * sizeof(struct ipc_entry)),     \
-           (void *)(table),                                    \
-           round_page((nits)->its_size * sizeof(struct ipc_entry))     \
-       ))
+        ipc_table_alloc((its)->its_size * sizeof(struct ipc_entry)))
 
 #define        it_entries_free(its, table)                                     \
-       ipc_table_free(it_entries_reallocable(its) ?                    \
-           round_page((its)->its_size * sizeof(struct ipc_entry)) :    \
-           (its)->its_size * sizeof(struct ipc_entry),                 \
-           (void *)(table)                                     \
-       )
+       ipc_table_free((its)->its_size * sizeof(struct ipc_entry),      \
+                      (void *)(table))
+
 
-#define        it_requests_alloc(its)                                  \
+#define        it_requests_alloc(its)                                          \
        ((ipc_port_request_t)                                           \
         ipc_table_alloc((its)->its_size *                              \
                         sizeof(struct ipc_port_request)))
index 5857e5ecf7cf170b262ce56cb5048393a177797b..b601a2e69fd2deb44c10426b54dec17e5b91881d 100644 (file)
@@ -55,16 +55,16 @@ typedef mach_port_name_t mach_port_index_t;         /* index values */
 typedef mach_port_name_t mach_port_gen_t;              /* generation numbers */
 
 typedef struct ipc_entry *ipc_entry_t;
-typedef struct ipc_tree_entry *ipc_tree_entry_t;
+
 typedef struct ipc_table_size *ipc_table_size_t;
 typedef struct ipc_port_request *ipc_port_request_t;
 typedef struct ipc_pset *ipc_pset_t;
 typedef struct ipc_kmsg *ipc_kmsg_t;
 
 #define        IE_NULL ((ipc_entry_t) 0)
-#define        ITE_NULL        ((ipc_tree_entry_t) 0)
+
 #define        ITS_NULL        ((ipc_table_size_t) 0)
-#define        ITS_SIZE_NONE   ((ipc_table_elems_t) -1)
+#define ITS_SIZE_NONE  ((ipc_table_elems_t) -1)
 #define        IPR_NULL        ((ipc_port_request_t) 0)
 #define        IPS_NULL        ((ipc_pset_t) 0)
 #define        IKM_NULL        ((ipc_kmsg_t) 0)
index f255df6f5943672e682d96ad109139a735c12453..87b7329d1a6a32e7a0337756d1ca0ee3fb8f3134 100644 (file)
@@ -137,67 +137,6 @@ mach_port_get_srights(
 }
 #endif /* MACH_IPC_DEBUG */
 
-/*
- *     Routine:        host_ipc_hash_info
- *     Purpose:
- *             Return information about the global reverse hash table.
- *     Conditions:
- *             Nothing locked.  Obeys CountInOut protocol.
- *     Returns:
- *             KERN_SUCCESS            Returned information.
- *             KERN_INVALID_HOST       The host is null.
- *             KERN_RESOURCE_SHORTAGE  Couldn't allocate memory.
- */
-
-#if !MACH_IPC_DEBUG
-kern_return_t
-host_ipc_hash_info(
-       __unused host_t                 host,
-       __unused hash_info_bucket_array_t       *infop,
-       __unused mach_msg_type_number_t         *countp)
-{
-        return KERN_FAILURE;
-}
-#else
-kern_return_t
-host_ipc_hash_info(
-       host_t                                  host,
-       hash_info_bucket_array_t                *infop,
-       mach_msg_type_number_t          *countp)
-{
-       vm_map_copy_t copy;
-       vm_offset_t addr;
-       vm_size_t size;
-       hash_info_bucket_t *info;
-       natural_t count;
-       kern_return_t kr;
-
-       if (host == HOST_NULL)
-               return KERN_INVALID_HOST;
-
-       /* start with in-line data */
-
-       count = ipc_hash_size();
-       size = round_page(count * sizeof(hash_info_bucket_t));
-       kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size);
-       if (kr != KERN_SUCCESS)
-               return KERN_RESOURCE_SHORTAGE;
-
-       info = (hash_info_bucket_t *) addr;
-       count = ipc_hash_info(info, count);
-
-       if (size > count * sizeof(hash_info_bucket_t))
-               bzero((char *)&info[count], size - count * sizeof(hash_info_bucket_t));
-
-       kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, 
-                          (vm_map_size_t)size, TRUE, &copy);
-       assert(kr == KERN_SUCCESS);
-
-       *infop = (hash_info_bucket_t *) copy;
-       *countp = count;
-       return KERN_SUCCESS;
-}
-#endif /* MACH_IPC_DEBUG */
 
 /*
  *     Routine:        mach_port_space_info
@@ -231,16 +170,12 @@ mach_port_space_info(
        ipc_info_space_t                *infop,
        ipc_info_name_array_t           *tablep,
        mach_msg_type_number_t          *tableCntp,
-       ipc_info_tree_name_array_t      *treep,
-       mach_msg_type_number_t          *treeCntp)
+       __unused ipc_info_tree_name_array_t     *treep,
+       __unused mach_msg_type_number_t         *treeCntp)
 {
        ipc_info_name_t *table_info;
        vm_offset_t table_addr;
        vm_size_t table_size, table_size_needed;
-       ipc_info_tree_name_t *tree_info;
-       vm_offset_t tree_addr;
-       vm_size_t tree_size, tree_size_needed;
-       ipc_tree_entry_t tentry;
        ipc_entry_t table;
        ipc_entry_num_t tsize;
        mach_port_index_t index;
@@ -254,28 +189,21 @@ mach_port_space_info(
        /* start with in-line memory */
 
        table_size = 0;
-       tree_size = 0;
 
        for (;;) {
                is_read_lock(space);
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_read_unlock(space);
                        if (table_size != 0)
                                kmem_free(ipc_kernel_map,
                                          table_addr, table_size);
-                       if (tree_size != 0)
-                               kmem_free(ipc_kernel_map,
-                                         tree_addr, tree_size);
                        return KERN_INVALID_TASK;
                }
 
                table_size_needed = round_page(space->is_table_size
                                               * sizeof(ipc_info_name_t));
-               tree_size_needed = round_page(space->is_tree_total
-                                             * sizeof(ipc_info_tree_name_t));
 
-               if ((table_size_needed == table_size) &&
-                   (tree_size_needed == tree_size))
+               if (table_size_needed == table_size)
                        break;
 
                is_read_unlock(space);
@@ -285,23 +213,11 @@ mach_port_space_info(
                                kmem_free(ipc_kernel_map, table_addr, table_size);
                        kr = kmem_alloc(ipc_kernel_map, &table_addr, table_size_needed);
                        if (kr != KERN_SUCCESS) {
-                               if (tree_size != 0)
-                                       kmem_free(ipc_kernel_map, tree_addr, tree_size);
                                return KERN_RESOURCE_SHORTAGE;
                        }
                        table_size = table_size_needed;
                }
-               if (tree_size != tree_size_needed) {
-                       if (tree_size != 0)
-                               kmem_free(ipc_kernel_map, tree_addr, tree_size);
-                       kr = kmem_alloc(ipc_kernel_map, &tree_addr, tree_size_needed);
-                       if (kr != KERN_SUCCESS) {
-                               if (table_size != 0)
-                                       kmem_free(ipc_kernel_map, table_addr, table_size);
-                               return KERN_RESOURCE_SHORTAGE;
-                       }
-                       tree_size = tree_size_needed;
-               }
+
        }
        /* space is read-locked and active; we have enough wired memory */
 
@@ -309,9 +225,6 @@ mach_port_space_info(
        infop->iis_genno_mask = MACH_PORT_NGEN(MACH_PORT_DEAD);
        infop->iis_table_size = space->is_table_size;
        infop->iis_table_next = space->is_table_next->its_size;
-       infop->iis_tree_size = space->is_tree_total;
-       infop->iis_tree_small = space->is_tree_small;
-       infop->iis_tree_hash = space->is_tree_hash;
 
        /* walk the table for this space */
        table = space->is_table;
@@ -324,7 +237,6 @@ mach_port_space_info(
 
                bits = entry->ie_bits;
                iin->iin_name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits));
-               iin->iin_collision = (bits & IE_BITS_COLLISION) ? TRUE : FALSE;
                iin->iin_type = IE_BITS_TYPE(bits);
                if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE &&
                    entry->ie_request != IE_REQ_NONE) {
@@ -342,48 +254,6 @@ mach_port_space_info(
                iin->iin_hash = entry->ie_index;
        }
 
-       /* walk the splay tree for this space */
-       tree_info = (ipc_info_tree_name_array_t)tree_addr;
-       for (tentry = ipc_splay_traverse_start(&space->is_tree), index = 0;
-            tentry != ITE_NULL;
-            tentry = ipc_splay_traverse_next(&space->is_tree, FALSE)) {
-               ipc_info_tree_name_t *iitn = &tree_info[index++];
-               ipc_info_name_t *iin = &iitn->iitn_name;
-               ipc_entry_t entry = &tentry->ite_entry;
-               ipc_entry_bits_t bits = entry->ie_bits;
-
-               assert(IE_BITS_TYPE(bits) != MACH_PORT_TYPE_NONE);
-
-               iin->iin_name = tentry->ite_name;
-               iin->iin_collision = (bits & IE_BITS_COLLISION) ? TRUE : FALSE;
-               iin->iin_type = IE_BITS_TYPE(bits);
-               if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE &&
-                   entry->ie_request != IE_REQ_NONE) {
-                       ipc_port_t port = (ipc_port_t) entry->ie_object;
-
-                       assert(IP_VALID(port));
-                       ip_lock(port);
-                       iin->iin_type |= ipc_port_request_type(port, iin->iin_name, entry->ie_request);
-                       ip_unlock(port);
-               }
-
-               iin->iin_urefs = IE_BITS_UREFS(bits);
-               iin->iin_object = (natural_t)(uintptr_t)entry->ie_object;
-               iin->iin_next = entry->ie_next;
-               iin->iin_hash = entry->ie_index;
-
-               if (tentry->ite_lchild == ITE_NULL)
-                       iitn->iitn_lchild = MACH_PORT_NULL;
-               else
-                       iitn->iitn_lchild = tentry->ite_lchild->ite_name;
-
-               if (tentry->ite_rchild == ITE_NULL)
-                       iitn->iitn_rchild = MACH_PORT_NULL;
-               else
-                       iitn->iitn_rchild = tentry->ite_rchild->ite_name;
-
-       }
-       ipc_splay_traverse_finish(&space->is_tree);
        is_read_unlock(space);
 
        /* prepare the table out-of-line data for return */
@@ -405,24 +275,9 @@ mach_port_space_info(
                *tableCntp = 0;
        }
 
-       /* prepare the tree out-of-line data for return */
-       if (tree_size > 0) {
-               if (tree_size > infop->iis_tree_size * sizeof(ipc_info_tree_name_t))
-                       bzero((char *)&tree_info[infop->iis_tree_size],
-                             tree_size - infop->iis_tree_size * sizeof(ipc_info_tree_name_t));
-
-               kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(tree_addr),
-                                  vm_map_round_page(tree_addr + tree_size), FALSE);
-               assert(kr == KERN_SUCCESS);
-               kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)tree_addr, 
-                                  (vm_map_size_t)tree_size, TRUE, &copy);
-               assert(kr == KERN_SUCCESS);
-               *treep = (ipc_info_tree_name_t *)copy;
-               *treeCntp = infop->iis_tree_size;
-       } else {
-               *treep = (ipc_info_tree_name_t *)0;
-               *treeCntp = 0;
-       }
+       /* splay tree is obsolete, no work to do... */
+       *treep = (ipc_info_tree_name_t *)0;
+       *treeCntp = 0;
        return KERN_SUCCESS;
 }
 #endif /* MACH_IPC_DEBUG */
@@ -537,6 +392,7 @@ mach_port_kobject(
        ipc_entry_t entry;
        ipc_port_t port;
        kern_return_t kr;
+       mach_vm_address_t kaddr;
 
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
@@ -563,10 +419,15 @@ mach_port_kobject(
        }
 
        *typep = (unsigned int) ip_kotype(port);
-       *addrp = (mach_vm_address_t)port->ip_kobject;
+       kaddr = (mach_vm_address_t)port->ip_kobject;
        ip_unlock(port);
-       return KERN_SUCCESS;
 
+       if (0 != kaddr && is_ipc_kobject(*typep))
+               *addrp = VM_KERNEL_ADDRPERM(VM_KERNEL_UNSLIDE(kaddr));
+       else
+               *addrp = 0;
+
+       return KERN_SUCCESS;
 }
 #endif /* MACH_IPC_DEBUG */
 /*
diff --git a/osfmk/ipc/mach_kernelrpc.c b/osfmk/ipc/mach_kernelrpc.c
new file mode 100644 (file)
index 0000000..7524496
--- /dev/null
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/mach_traps.h>
+#include <mach/mach_vm_server.h>
+#include <mach/mach_port_server.h>
+#include <mach/vm_map.h>
+#include <kern/task.h>
+#include <kern/ipc_tt.h>
+#include <vm/vm_protos.h>
+
+int
+_kernelrpc_mach_vm_allocate_trap(struct _kernelrpc_mach_vm_allocate_trap_args *args)
+{
+       mach_vm_offset_t addr;
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       if (copyin(args->addr, (char *)&addr, sizeof (addr)))
+               goto done;
+
+       rv = mach_vm_allocate(task->map, &addr, args->size, args->flags);
+       if (rv == KERN_SUCCESS)
+               rv = copyout(&addr, args->addr, sizeof (addr));
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_vm_deallocate_trap(struct _kernelrpc_mach_vm_deallocate_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_vm_deallocate(task->map, args->address, args->size);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_vm_protect_trap(struct _kernelrpc_mach_vm_protect_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_vm_protect(task->map, args->address, args->size,
+           args->set_maximum, args->new_protection);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_port_allocate_trap(struct _kernelrpc_mach_port_allocate_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       mach_port_name_t name;
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_port_allocate(task->itk_space, args->right, &name);
+       if (rv == KERN_SUCCESS)
+               rv = copyout(&name, args->name, sizeof (name));
+
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_port_destroy_trap(struct _kernelrpc_mach_port_destroy_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_port_destroy(task->itk_space, args->name);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_port_deallocate_trap(struct _kernelrpc_mach_port_deallocate_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_port_deallocate(task->itk_space, args->name);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_port_mod_refs_trap(struct _kernelrpc_mach_port_mod_refs_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_port_mod_refs(task->itk_space, args->name, args->right, args->delta);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+
+int
+_kernelrpc_mach_port_move_member_trap(struct _kernelrpc_mach_port_move_member_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_port_move_member(task->itk_space, args->member, args->after);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_port_insert_right_trap(struct _kernelrpc_mach_port_insert_right_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       ipc_port_t port;
+       mach_msg_type_name_t disp;
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = ipc_object_copyin(task->itk_space, args->poly, args->polyPoly,
+           (ipc_object_t *)&port);
+       if (rv != KERN_SUCCESS)
+               goto done;
+       disp = ipc_object_copyin_type(args->polyPoly);
+
+       rv = mach_port_insert_right(task->itk_space, args->name, port, disp);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+int
+_kernelrpc_mach_port_insert_member_trap(struct _kernelrpc_mach_port_insert_member_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_port_insert_member(task->itk_space, args->name, args->pset);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
+
+
+int
+_kernelrpc_mach_port_extract_member_trap(struct _kernelrpc_mach_port_extract_member_args *args)
+{
+       task_t task = port_name_to_task(args->target);
+       int rv = MACH_SEND_INVALID_DEST;
+
+       if (task != current_task())
+               goto done;
+
+       rv = mach_port_extract_member(task->itk_space, args->name, args->pset);
+       
+done:
+       if (task)
+               task_deallocate(task);
+       return (rv);
+}
index b83ef81913aba960cce03f710de08b1f946a3d2e..2b4c67eb224daff2c1246557924726805dc98b71 100644 (file)
@@ -261,7 +261,7 @@ mach_msg_receive_results(void)
 {
        thread_t          self = current_thread();
        ipc_space_t       space = current_space();
-       vm_map_t          map = current_map();
+       vm_map_t          map = current_map();
 
        ipc_object_t      object = self->ith_object;
        mach_msg_return_t mr = self->ith_state;
@@ -269,10 +269,9 @@ mach_msg_receive_results(void)
        mach_msg_option_t option = self->ith_option;
        ipc_kmsg_t        kmsg = self->ith_kmsg;
        mach_port_seqno_t seqno = self->ith_seqno;
+       mach_msg_trailer_size_t trailer_size;
 
-       mach_msg_max_trailer_t *trailer;
-
-       ipc_object_release(object);
+       io_release(object);
 
        if (mr != MACH_MSG_SUCCESS) {
 
@@ -298,67 +297,8 @@ mach_msg_receive_results(void)
          goto out;
        }
 
-       trailer = (mach_msg_max_trailer_t *)
-                       ((vm_offset_t)kmsg->ikm_header +
-                       round_msg(kmsg->ikm_header->msgh_size));
-       if (option & MACH_RCV_TRAILER_MASK) {
-               trailer->msgh_seqno = seqno;
-               trailer->msgh_context = 
-                       kmsg->ikm_header->msgh_remote_port->ip_context;
-               trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option);
-
-               if (MACH_RCV_TRAILER_ELEMENTS(option) >= 
-                               MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_AV)){
-#if CONFIG_MACF_MACH
-                 if (kmsg->ikm_sender != NULL &&
-                   IP_VALID(kmsg->ikm_header->msgh_remote_port) &&
-                   mac_port_check_method(kmsg->ikm_sender,
-                   &kmsg->ikm_sender->maclabel,
-                   &kmsg->ikm_header->msgh_remote_port->ip_label,
-                   kmsg->ikm_header->msgh_id) == 0)
-                     trailer->msgh_ad = 1;
-                 else
-#endif
-                     trailer->msgh_ad = 0;
-               }
-
-               /*
-                * The ipc_kmsg_t holds a reference to the label of a label
-                * handle, not the port. We must get a reference to the port
-                * and a send right to copyout to the receiver.
-                */
-
-               if (option & MACH_RCV_TRAILER_ELEMENTS (MACH_RCV_TRAILER_LABELS)) {
-#if CONFIG_MACF_MACH
-                 if (kmsg->ikm_sender != NULL) {
-                   ipc_labelh_t  lh = kmsg->ikm_sender->label;
-                   kern_return_t kr;
-
-                   ip_lock(lh->lh_port);
-                   lh->lh_port->ip_mscount++;
-                   lh->lh_port->ip_srights++;
-                   ip_reference(lh->lh_port);
-                   ip_unlock(lh->lh_port);
-
-                   kr = ipc_object_copyout(space, (ipc_object_t)lh->lh_port,
-                                           MACH_MSG_TYPE_PORT_SEND, 0,
-                                           &trailer->msgh_labels.sender);
-                   if (kr != KERN_SUCCESS) {
-                     ip_lock(lh->lh_port);
-                     ip_release(lh->lh_port);
-                     ip_check_unlock(lh->lh_port);
-
-                     trailer->msgh_labels.sender = 0;
-                   }
-                 } else {
-                   trailer->msgh_labels.sender = 0;
-                 }
-#else
-                   trailer->msgh_labels.sender = 0;
-#endif
-               }
-       }
-
+       trailer_size = ipc_kmsg_add_trailer(kmsg, space, option, self, seqno, FALSE, 
+                       kmsg->ikm_header->msgh_remote_port->ip_context);
        /*
         * If MACH_RCV_OVERWRITE was specified, try to get the scatter
         * list and verify it against the contents of the message.  If
@@ -379,7 +319,7 @@ mach_msg_receive_results(void)
        if (mr != MACH_MSG_SUCCESS) {
                if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) {
                        if (ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size +
-                          trailer->msgh_trailer_size) == MACH_RCV_INVALID_DATA)
+                          trailer_size) == MACH_RCV_INVALID_DATA)
                                mr = MACH_RCV_INVALID_DATA;
                } 
                else {
@@ -392,7 +332,7 @@ mach_msg_receive_results(void)
        mr = ipc_kmsg_put(msg_addr,
                          kmsg,
                          kmsg->ikm_header->msgh_size + 
-                         trailer->msgh_trailer_size);
+                         trailer_size);
  out:
        return mr;
 }
@@ -577,8 +517,9 @@ msg_receive_error(
        mach_port_seqno_t       seqno,
        ipc_space_t             space)
 {
-       mach_msg_max_trailer_t *trailer;
        mach_vm_address_t       context;
+       mach_msg_trailer_size_t trailer_size;
+       mach_msg_max_trailer_t  *trailer;
 
        context = kmsg->ikm_header->msgh_remote_port->ip_context;
 
@@ -598,17 +539,16 @@ msg_receive_error(
        bcopy(  (char *)&trailer_template, 
                (char *)trailer, 
                sizeof(trailer_template));
-       if (option & MACH_RCV_TRAILER_MASK) {
-               trailer->msgh_context = context; 
-               trailer->msgh_seqno = seqno;
-               trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option);
-       }
+
+       trailer_size = ipc_kmsg_add_trailer(kmsg, space, 
+                       option, current_thread(), seqno,
+                       TRUE, context);
 
        /*
         * Copy the message to user space
         */
        if (ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size +
-                       trailer->msgh_trailer_size) == MACH_RCV_INVALID_DATA)
+                       trailer_size) == MACH_RCV_INVALID_DATA)
                return(MACH_RCV_INVALID_DATA);
        else 
                return(MACH_MSG_SUCCESS);
index adfc70bcb3caa556f36d4d764f9b6e429077614f..18c7648928d7eadb67faee3bbe15defedfd3d599 100644 (file)
@@ -221,7 +221,6 @@ mach_port_names(
        mach_port_type_t        **typesp,
        mach_msg_type_number_t  *typesCnt)
 {
-       ipc_tree_entry_t tentry;
        ipc_entry_t table;
        ipc_entry_num_t tsize;
        mach_port_index_t index;
@@ -250,7 +249,7 @@ mach_port_names(
                vm_size_t size_needed;
 
                is_read_lock(space);
-               if (!space->is_active) {
+               if (!is_active(space)) {
                        is_read_unlock(space);
                        if (size != 0) {
                                kmem_free(ipc_kernel_map, addr1, size);
@@ -260,8 +259,7 @@ mach_port_names(
                }
 
                /* upper bound on number of names in the space */
-
-               bound = space->is_table_size + space->is_tree_total;
+               bound = space->is_table_size;
                size_needed = round_page(bound * sizeof(mach_port_name_t));
 
                if (size_needed <= size)
@@ -330,17 +328,6 @@ mach_port_names(
                }
        }
 
-       for (tentry = ipc_splay_traverse_start(&space->is_tree);
-           tentry != ITE_NULL;
-           tentry = ipc_splay_traverse_next(&space->is_tree, FALSE)) {
-               ipc_entry_t entry = &tentry->ite_entry;
-               mach_port_name_t name = tentry->ite_name;
-
-               assert(IE_BITS_TYPE(tentry->ite_bits) != MACH_PORT_TYPE_NONE);
-               mach_port_names_helper(timestamp, entry, name, names,
-                                      types, &actual);
-       }
-       ipc_splay_traverse_finish(&space->is_tree);
        is_read_unlock(space);
 
        if (actual == 0) {
@@ -441,17 +428,16 @@ mach_port_type(
        kr = ipc_right_lookup_write(space, name, &entry);
        if (kr != KERN_SUCCESS)
                return kr;
-       /* space is write-locked and active */
 
+       /* space is write-locked and active */
        kr = ipc_right_info(space, name, entry, typep, &urefs);
-       if (kr == KERN_SUCCESS)
-               is_write_unlock(space);
+       /* space is unlocked */
+
 #if 1
         /* JMM - workaround rdar://problem/9121297 (CF being too picky on these bits). */
         *typep &= ~(MACH_PORT_TYPE_SPREQUEST | MACH_PORT_TYPE_SPREQUEST_DELAYED);
 #endif
 
-       /* space is unlocked */
        return kr;
 }
 
@@ -470,26 +456,21 @@ mach_port_type(
  *             KERN_INVALID_VALUE      The nname isn't a legal name.
  *             KERN_NAME_EXISTS        The nname already denotes a right.
  *             KERN_RESOURCE_SHORTAGE  Couldn't allocate memory.
+ *
+ *      This interface is obsolete and always returns
+ *      KERN_NOT_SUPPORTED.
  */
 
 kern_return_t
 mach_port_rename(
-       ipc_space_t             space,
-       mach_port_name_t        oname,
-       mach_port_name_t        nname)
+       __unused ipc_space_t            space,
+       __unused mach_port_name_t       oname,
+       __unused mach_port_name_t       nname)
 {
-       if (space == IS_NULL)
-               return KERN_INVALID_TASK;
-
-       if (!MACH_PORT_VALID(oname))
-               return KERN_INVALID_NAME;
-
-       if (!MACH_PORT_VALID(nname))
-               return KERN_INVALID_VALUE;
-
-       return ipc_object_rename(space, oname, nname);
+       return KERN_NOT_SUPPORTED;
 }
 
+
 /*
  *     Routine:        mach_port_allocate_name [kernel call]
  *     Purpose:
@@ -650,8 +631,6 @@ mach_port_allocate_full(
        if (qosp->name) {
                if (!MACH_PORT_VALID (*namep))
                        return (KERN_INVALID_VALUE);
-               if (is_fast_space (space))
-                       return (KERN_FAILURE);
        }
 
        if (qosp->prealloc) {
@@ -750,8 +729,7 @@ mach_port_destroy(
                return kr;
        /* space is write-locked and active */
 
-       kr = ipc_right_destroy(space, name, entry); 
-       is_write_unlock(space);
+       kr = ipc_right_destroy(space, name, entry); /* unlocks space */
        return kr;
 }
 
@@ -843,12 +821,13 @@ mach_port_get_refs(
        kr = ipc_right_lookup_write(space, name, &entry);
        if (kr != KERN_SUCCESS)
                return kr;
+
        /* space is write-locked and active */
+       kr = ipc_right_info(space, name, entry, &type, &urefs);
+       /* space is unlocked */
 
-       kr = ipc_right_info(space, name, entry, &type, &urefs); /* unlocks */
        if (kr != KERN_SUCCESS)
-               return kr;      /* space is unlocked */
-       is_write_unlock(space);
+               return kr;      
 
        if (type & MACH_PORT_TYPE(right))
                switch (right) {
@@ -1027,7 +1006,7 @@ kern_return_t
 mach_port_get_context(
        ipc_space_t             space,
        mach_port_name_t        name,
-       mach_vm_address_t       *context)
+       mach_vm_address_t       *context)
 {
        ipc_port_t port;
        kern_return_t kr;
@@ -1068,7 +1047,7 @@ kern_return_t
 mach_port_set_context(
        ipc_space_t             space,
        mach_port_name_t        name,
-       mach_vm_address_t       context)
+       mach_vm_address_t       context)
 {
        ipc_port_t port;
        kern_return_t kr;
@@ -1093,6 +1072,9 @@ mach_port_set_context(
 
 /*
  *     Routine:        mach_port_gst_helper
+ *     Conditions:
+ *             portspace is locked for both the recieve right and pset
+ *             under observation.
  *     Purpose:
  *             A helper function for mach_port_get_set_status.
  */
@@ -1108,15 +1090,14 @@ mach_port_gst_helper(
        mach_port_name_t name;
 
        assert(port != IP_NULL);
-
-       ip_lock(port);
+       /*
+        * The space lock is held by the calling function,
+        * hence it is OK to read name without the port lock.
+        */
        assert(ip_active(port));
-
        name = port->ip_receiver_name;
        assert(name != MACH_PORT_NULL);
 
-       ip_unlock(port);
-
        if (ipc_pset_member(pset, port)) {
                ipc_entry_num_t actual = *actualp;
 
@@ -1167,7 +1148,6 @@ mach_port_get_set_status(
        size = PAGE_SIZE;       /* initial guess */
 
        for (;;) {
-               ipc_tree_entry_t tentry;
                ipc_entry_t entry, table;
                ipc_entry_num_t tsize;
                mach_port_index_t index;
@@ -1220,21 +1200,6 @@ mach_port_get_set_status(
                        }
                }
 
-               for (tentry = ipc_splay_traverse_start(&space->is_tree);
-                   tentry != ITE_NULL;
-                   tentry = ipc_splay_traverse_next(&space->is_tree,FALSE)) {
-                       ipc_entry_bits_t bits = tentry->ite_bits;
-
-                       assert(IE_BITS_TYPE(bits) != MACH_PORT_TYPE_NONE);
-
-                       if (bits & MACH_PORT_TYPE_RECEIVE) {
-                           ipc_port_t port = (ipc_port_t) tentry->ite_object;
-
-                           mach_port_gst_helper(pset, port, maxnames,
-                                                names, &actual);
-                       }
-               }
-               ipc_splay_traverse_finish(&space->is_tree);
                is_read_unlock(space);
 
                if (actual <= maxnames)
@@ -1311,6 +1276,9 @@ mach_port_move_member(
        ipc_port_t port;
        ipc_pset_t nset;
        kern_return_t kr;
+       wait_queue_link_t wql;
+       queue_head_t links_data;
+       queue_t links = &links_data;
 
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
@@ -1320,15 +1288,22 @@ mach_port_move_member(
 
        if (after == MACH_PORT_DEAD)
                return KERN_INVALID_RIGHT;
+       else if (after == MACH_PORT_NULL)
+               wql = WAIT_QUEUE_LINK_NULL;
+       else
+               wql = wait_queue_link_allocate();
+
+       queue_init(links);
 
        kr = ipc_right_lookup_read(space, member, &entry);
        if (kr != KERN_SUCCESS)
-               return kr;
+               goto done;
        /* space is read-locked and active */
 
        if ((entry->ie_bits & MACH_PORT_TYPE_RECEIVE) == 0) {
                is_read_unlock(space);
-               return KERN_INVALID_RIGHT;
+               kr = KERN_INVALID_RIGHT;
+               goto done;
        }
 
        port = (ipc_port_t) entry->ie_object;
@@ -1340,27 +1315,38 @@ mach_port_move_member(
                entry = ipc_entry_lookup(space, after);
                if (entry == IE_NULL) {
                        is_read_unlock(space);
-                       return KERN_INVALID_NAME;
+                       kr = KERN_INVALID_NAME;
+                       goto done;
                }
 
                if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) == 0) {
                        is_read_unlock(space);
-                       return KERN_INVALID_RIGHT;
+                       kr = KERN_INVALID_RIGHT;
+                       goto done;
                }
 
                nset = (ipc_pset_t) entry->ie_object;
                assert(nset != IPS_NULL);
        }
        ip_lock(port);
-       ipc_pset_remove_from_all(port);
+       ipc_pset_remove_from_all(port, links);
 
        if (nset != IPS_NULL) {
                ips_lock(nset);
-               kr = ipc_pset_add(nset, port);
+               kr = ipc_pset_add(nset, port, wql);
                ips_unlock(nset);
        }
        ip_unlock(port);
        is_read_unlock(space);
+
+ done:
+       if (kr != KERN_SUCCESS && wql != WAIT_QUEUE_LINK_NULL)
+               wait_queue_link_free(wql);
+       while(!queue_empty(links)) {
+               wql = (wait_queue_link_t) dequeue(links);
+               wait_queue_link_free(wql);
+       }
+
        return kr;
 }
 
@@ -1811,6 +1797,7 @@ mach_port_insert_member(
        ipc_object_t obj;
        ipc_object_t psobj;
        kern_return_t kr;
+       wait_queue_link_t wql;
 
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
@@ -1818,19 +1805,26 @@ mach_port_insert_member(
        if (!MACH_PORT_VALID(name) || !MACH_PORT_VALID(psname))
                return KERN_INVALID_RIGHT;
 
+       wql = wait_queue_link_allocate();
+
        kr = ipc_object_translate_two(space, 
                                      name, MACH_PORT_RIGHT_RECEIVE, &obj,
                                      psname, MACH_PORT_RIGHT_PORT_SET, &psobj);
        if (kr != KERN_SUCCESS)
-               return kr;
+               goto done;
 
        /* obj and psobj are locked (and were locked in that order) */
        assert(psobj != IO_NULL);
        assert(obj != IO_NULL);
 
-       kr = ipc_pset_add((ipc_pset_t)psobj, (ipc_port_t)obj);
+       kr = ipc_pset_add((ipc_pset_t)psobj, (ipc_port_t)obj, wql);
        io_unlock(psobj);
        io_unlock(obj);
+
+ done:
+       if (kr != KERN_SUCCESS)
+               wait_queue_link_free(wql);
+
        return kr;
 }
 
@@ -1861,6 +1855,7 @@ mach_port_extract_member(
        ipc_object_t psobj;
        ipc_object_t obj;
        kern_return_t kr;
+       wait_queue_link_t wql = WAIT_QUEUE_LINK_NULL;
 
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
@@ -1878,9 +1873,13 @@ mach_port_extract_member(
        assert(psobj != IO_NULL);
        assert(obj != IO_NULL);
 
-       kr = ipc_pset_remove((ipc_pset_t)psobj, (ipc_port_t)obj);
+       kr = ipc_pset_remove((ipc_pset_t)psobj, (ipc_port_t)obj, &wql);
        io_unlock(psobj);
        io_unlock(obj);
+
+       if (wql != WAIT_QUEUE_LINK_NULL)
+               wait_queue_link_free(wql);
+
        return kr;
 }
 
@@ -1898,7 +1897,7 @@ task_set_port_space(
        
        is_write_lock(space);
 
-       if (!space->is_active) {
+       if (!is_active(space)) {
                is_write_unlock(space);
                return KERN_INVALID_TASK;
        }
@@ -1937,6 +1936,7 @@ mach_get_label(
        dead = ipc_right_check(space, port, name, entry);
        if (dead) {
                is_write_unlock(space);
+               ip_release(port);
                return KERN_INVALID_RIGHT;
        }
        /* port is now locked */
@@ -1980,6 +1980,7 @@ mach_get_label_text(
        labelstr_t              outlabel)
 {
        ipc_entry_t entry;
+       ipc_port_t port;
        kern_return_t kr;
        struct label *l;
        int dead;
@@ -1994,10 +1995,11 @@ mach_get_label_text(
        if (kr != KERN_SUCCESS)
                return kr;
 
-       dead = ipc_right_check(space, (ipc_port_t) entry->ie_object, name,
-           entry);
+       port = (ipc_port_t)entry->ie_object;
+       dead = ipc_right_check(space, port, name, entry);
        if (dead) {
                is_write_unlock(space);
+               ip_release(port);
                return KERN_INVALID_RIGHT;
        }
        /* object (port) is now locked */
index 3382d9243306a5f2d79a748006b647738b35946d..03e7bb5266e8b032716cf61aa504375410891df4 100644 (file)
@@ -15,10 +15,13 @@ DATAFILES = \
                kdp_callout.h \
                kdp_en_debugger.h
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${DATAFILES} kdp_dyld.h
 
-EXPORT_MI_DIR = kdp
+INSTALL_KF_MI_LIST = ${DATAFILES}
+
+INSTALL_KF_MI_LCL_LIST = ${DATAFILES}
 
+EXPORT_MI_DIR = kdp
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 7eb3459ac6a329d484c0cc7fe26503f77c8d1c68..ec8e73b4e1701d6a35e94fdd38c7e5a3e83e6732 100644 (file)
@@ -27,6 +27,7 @@
  */
 
 #include <mach/mach_types.h>
+#include <mach/vm_param.h>
 #include <sys/appleapiopts.h>
 #include <kern/debug.h>
 #include <uuid/uuid.h>
@@ -46,6 +47,9 @@
 #include <kern/clock.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
+#include <vm/vm_pageout.h>
+
+extern int count_busy_buffers(void);   /* must track with declaration in bsd/sys/buf_internal.h */
 
 #define DO_ALIGN       1       /* align all packet data accesses */
 
@@ -1068,19 +1072,35 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) {
 
 
 static void
-kdp_mem_snapshot(struct mem_snapshot *mem_snap)
+kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
 {
-  mem_snap->snapshot_magic = STACKSHOT_MEM_SNAPSHOT_MAGIC;
-  mem_snap->free_pages = vm_page_free_count;
-  mem_snap->active_pages = vm_page_active_count;
-  mem_snap->inactive_pages = vm_page_inactive_count;
-  mem_snap->purgeable_pages = vm_page_purgeable_count;
-  mem_snap->wired_pages = vm_page_wire_count;
-  mem_snap->speculative_pages = vm_page_speculative_count;
-  mem_snap->throttled_pages = vm_page_throttled_count;
+  unsigned int pages_reclaimed;
+  unsigned int pages_wanted;
+  kern_return_t kErr;
+
+  memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
+  memio_snap->free_pages = vm_page_free_count;
+  memio_snap->active_pages = vm_page_active_count;
+  memio_snap->inactive_pages = vm_page_inactive_count;
+  memio_snap->purgeable_pages = vm_page_purgeable_count;
+  memio_snap->wired_pages = vm_page_wire_count;
+  memio_snap->speculative_pages = vm_page_speculative_count;
+  memio_snap->throttled_pages = vm_page_throttled_count;
+  memio_snap->busy_buffer_count = count_busy_buffers();
+  kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
+  if ( ! kErr ) {
+       memio_snap->pages_wanted = (uint32_t)pages_wanted;
+       memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
+       memio_snap->pages_wanted_reclaimed_valid = 1;
+  } else {
+       memio_snap->pages_wanted = 0;
+       memio_snap->pages_reclaimed = 0;
+       memio_snap->pages_wanted_reclaimed_valid = 0;
+  }
 }
 
 
+
 /* 
  * Method for grabbing timer values safely, in the sense that no infinite loop will occur 
  * Certain flavors of the timer_grab function, which would seem to be the thing to use,   
@@ -1126,12 +1146,12 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
        boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
 
        if(trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
-         if(tracepos + sizeof(struct mem_snapshot) > tracebound) {
+         if(tracepos + sizeof(struct mem_and_io_snapshot) > tracebound) {
            error = -1;
            goto error_exit;
          }
-         kdp_mem_snapshot((struct mem_snapshot *)tracepos);
-         tracepos += sizeof(struct mem_snapshot);
+         kdp_mem_and_io_snapshot((struct mem_and_io_snapshot *)tracepos);
+         tracepos += sizeof(struct mem_and_io_snapshot);
        }
 
 walk_list:
@@ -1165,14 +1185,14 @@ walk_list:
                        if (have_pmap && task->active && save_loadinfo_p && task_pid > 0) {
                                // Read the dyld_all_image_infos struct from the task memory to get UUID array count and location
                                if (task64) {
-                                       struct dyld_all_image_infos64 task_image_infos;
-                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos64))) {
+                                       struct user64_dyld_all_image_infos task_image_infos;
+                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user64_dyld_all_image_infos))) {
                                                uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
                                                uuid_info_addr = task_image_infos.uuidArray;
                                        }
                                } else {
-                                       struct dyld_all_image_infos task_image_infos;
-                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos))) {
+                                       struct user32_dyld_all_image_infos task_image_infos;
+                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user32_dyld_all_image_infos))) {
                                                uuid_info_count = task_image_infos.uuidArrayCount;
                                                uuid_info_addr = task_image_infos.uuidArray;
                                        }
@@ -1205,6 +1225,8 @@ walk_list:
                                task_snap->ss_flags |= kUser64_p;
                        if (!task->active) 
                                task_snap->ss_flags |= kTerminatedSnapshot;
+                       if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended;
+                       if(task->frozen) task_snap->ss_flags |= kFrozen;
 
                        task_snap->suspend_count = task->suspend_count;
                        task_snap->task_size = have_pmap ? pmap_resident_count(task->map->pmap) : 0;
@@ -1217,7 +1239,7 @@ walk_list:
                        tracepos += sizeof(struct task_snapshot);
 
                        if (task_pid > 0 && uuid_info_count > 0) {
-                               uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct dyld_uuid_info64) : sizeof(struct dyld_uuid_info));
+                               uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
                                uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
 
                                if (tracepos + uuid_info_array_size > tracebound) {
@@ -1234,6 +1256,8 @@ walk_list:
                        }
 
                        queue_iterate(&task->threads, thread, thread_t, task_threads){
+                               uint64_t tval;
+
                                if ((thread == NULL) || (ml_nofault_copy((vm_offset_t) thread, (vm_offset_t) &cthread, sizeof(struct thread)) != sizeof(struct thread)))
                                        goto error_exit;
 
@@ -1245,10 +1269,19 @@ walk_list:
                                tsnap = (thread_snapshot_t) tracepos;
                                tsnap->thread_id = thread_tid(thread);
                                tsnap->state = thread->state;
-                               tsnap->wait_event = thread->wait_event;
-                               tsnap->continuation = (uint64_t) (uintptr_t) thread->continuation;
-                               tsnap->user_time = safe_grab_timer_value(&thread->user_timer);
-                               tsnap->system_time = safe_grab_timer_value(&thread->system_timer);
+                               tsnap->sched_pri = thread->sched_pri;
+                               tsnap->sched_flags = thread->sched_flags;
+                               tsnap->wait_event = VM_KERNEL_UNSLIDE(thread->wait_event);
+                               tsnap->continuation = VM_KERNEL_UNSLIDE(thread->continuation);
+                               tval = safe_grab_timer_value(&thread->user_timer);
+                               tsnap->user_time = tval;
+                               tval = safe_grab_timer_value(&thread->system_timer);
+                               if (thread->precise_user_kernel_time) {
+                                       tsnap->system_time = tval;
+                               } else {
+                                       tsnap->user_time += tval;
+                                       tsnap->system_time = 0;
+                               }
                                tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC;
                                tracepos += sizeof(struct thread_snapshot);
                                tsnap->ss_flags = 0;
index 910565f2e0831c89103fe1d890c71ae5a9a662d7..18a27c4fb631474db25c5731db6ad9dc0057d303 100644 (file)
 
 /* From dyld/include/dyld_images.h */
 
-struct dyld_uuid_info {
+struct user32_dyld_uuid_info {
        user32_addr_t   imageLoadAddress;       /* base address image is mapped into */
        uuid_t                  imageUUID;                      /* UUID of image */
 };
 
-struct dyld_uuid_info64 {
+struct user64_dyld_uuid_info {
        user64_addr_t   imageLoadAddress;       /* base address image is mapped into */
        uuid_t                  imageUUID;                      /* UUID of image */
 };
 
+struct user32_dyld_image_info {
+       user32_addr_t   imageLoadAddress;       /* base address image is mapped int */
+       user32_addr_t   imageFilePath;          /* path dyld used to load the image */
+       user32_ulong_t  imageFileModDate;       /* time_t of image file */
+};
+
+struct user64_dyld_image_info {
+       user64_addr_t   imageLoadAddress;       /* base address image is mapped int */
+       user64_addr_t   imageFilePath;          /* path dyld used to load the image */
+       user64_ulong_t  imageFileModDate;       /* time_t of image file */
+};
+
 // FIXME: dyld is in C++, and some of the fields in dyld_all_image_infos are C++ 
 // native booleans.  There must be a better way...
 typedef uint8_t bool;
 
-struct dyld_all_image_infos {
+struct user32_dyld_all_image_infos {
        uint32_t                                        version;
        uint32_t                                        infoArrayCount;
        user32_addr_t                           infoArray;
@@ -63,9 +75,10 @@ struct dyld_all_image_infos {
        user32_addr_t                           systemOrderFlag;
        user32_size_t                           uuidArrayCount; // dyld defines this as a uintptr_t despite it being a count
        user32_addr_t                           uuidArray;
+       user32_addr_t                           dyldAllImageInfosAddress;
 };
 
-struct dyld_all_image_infos64 {
+struct user64_dyld_all_image_infos {
        uint32_t                                        version;
        uint32_t                                        infoArrayCount;
        user64_addr_t                           infoArray;
@@ -81,4 +94,5 @@ struct dyld_all_image_infos64 {
        user64_addr_t                           systemOrderFlag;
        user64_size_t                           uuidArrayCount; // dyld defines this as a uintptr_t despite it being a count
        user64_addr_t                           uuidArray;
-};
\ No newline at end of file
+       user64_addr_t                           dyldAllImageInfosAddress;
+};
index caa07dfa5b53129959e630827bae555199742a5b..4415e10f67ca7aaab9d7c587cec8e766f6c5a57a 100644 (file)
@@ -35,7 +35,6 @@
  * Kernel Debugging Protocol UDP implementation.
  */
 
-#include <mach_kdb.h>
 #include <mach/boolean.h>
 #include <mach/mach_types.h>
 #include <mach/exception_types.h>
@@ -302,7 +301,6 @@ inline static void kdp_receive_data(void *packet, unsigned int *len,
 }
 
 
-
 void kdp_register_link(kdp_link_t link, kdp_mode_t mode)
 {
         kdp_en_linkstatus = link;
@@ -322,12 +320,7 @@ kdp_register_send_receive(
 {
        unsigned int    debug = 0;
 
-       debug_log_init();
-
-       kdp_timer_callout_init();
-
        PE_parse_boot_argn("debug", &debug, sizeof (debug));
-       kdp_crashdump_feature_mask = htonl(kdp_crashdump_feature_mask);
 
 
        if (!debug)
@@ -1144,6 +1137,7 @@ kdp_connection_wait(void)
         }
             
        printf("\nWaiting for remote debugger connection.\n");
+       kprintf("\nWaiting for remote debugger connection.\n");
 
 
        if (reattach_wait == 0) {
@@ -1153,9 +1147,6 @@ kdp_connection_wait(void)
                        printf("------------    ----\n");
                        printf("continue....    'c'\n");
                        printf("reboot......    'r'\n");
-#if MACH_KDB
-                       printf("enter kdb...    'k'\n");
-#endif
                }
        } else
                reattach_wait = 0;
@@ -1175,15 +1166,7 @@ kdp_connection_wait(void)
                                        printf("Rebooting...\n");
                                        kdp_machine_reboot();
                                        break;
-#if MACH_KDB
-                               case 'k':
-                                       printf("calling kdb...\n");
-                                       if (kdp_call_kdb())
-                                               return;
-                                       else
-                                               printf("not implemented...\n");
-#endif
-                                       default:
+                               default:
                                        break;
                                }
                        }
@@ -1218,6 +1201,7 @@ kdp_connection_wait(void)
        if (current_debugger == KDP_CUR_DB)
                active_debugger=1;
        printf("Connected to remote debugger.\n");
+       kprintf("Connected to remote debugger.\n");
 }
 
 static void
@@ -1280,13 +1264,16 @@ kdp_raise_exception(
 )
 {
     int                        index;
+    unsigned int       initial_not_in_kdp = not_in_kdp;
 
+    not_in_kdp = 0;
     /* Was a system trace requested ? */
     if (kdp_snapshot && (!panic_active()) && (panic_caller == 0)) {
            stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
            stack_snapshot_buf, stack_snapshot_bufsize,
            stack_snapshot_flags, stack_snapshot_dispatch_offset, 
                &stack_snapshot_bytes_traced);
+           not_in_kdp = initial_not_in_kdp;
            return;
     }
 
@@ -1396,6 +1383,9 @@ kdp_raise_exception(
 exit_raise_exception:
     if (kdp_en_setmode)  
         (*kdp_en_setmode)(FALSE); /* link cleanup */
+
+    not_in_kdp = initial_not_in_kdp;
+
     enable_preemption();
 }
 
@@ -1419,13 +1409,13 @@ create_panic_header(unsigned int request, const char *corename,
        struct ether_header     *eh;
        struct corehdr          *coreh;
        const char              *mode = "octet";
-       char                    modelen  = strlen(mode);
+       char                    modelen  = strlen(mode) + 1;
 
        size_t                  fmask_size = sizeof(KDP_FEATURE_MASK_STRING) + sizeof(kdp_crashdump_feature_mask);
 
        pkt.off = sizeof (struct ether_header);
        pkt.len = (unsigned int)(length + ((request == KDP_WRQ) ? modelen + fmask_size : 0) + 
-           (corename ? strlen(corename): 0) + sizeof(struct corehdr));
+       (corename ? (strlen(corename) + 1 ): 0) + sizeof(struct corehdr));
 
 #if DO_ALIGN
        bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui));
@@ -1902,7 +1892,6 @@ kdp_panic_dump(void)
        }
 
        flag_panic_dump_in_progress = TRUE;
-       not_in_kdp = 0;
 
        if (pkt.input)
                kdp_panic("kdp_panic_dump: unexpected pending input packet");
@@ -2024,7 +2013,6 @@ abort_panic_transfer(void)
 {
        flag_panic_dump_in_progress = FALSE;
        flag_dont_abort_panic_dump  = FALSE;
-       not_in_kdp = 1;
        panic_block = 0;
 }
 
@@ -2120,11 +2108,26 @@ kdp_init(void)
                strlcat(kdp_kernelversion_string, kernel_uuid, sizeof(kdp_kernelversion_string));
        }
 
+#if defined(__x86_64__) || defined(__arm__)
+       debug_log_init();
+
+       if (vm_kernel_slide) {
+               char    KASLR_stext[19];
+               strlcat(kdp_kernelversion_string, "; stext=", sizeof(kdp_kernelversion_string));
+               snprintf(KASLR_stext, sizeof(KASLR_stext), "%p", (void *) vm_kernel_stext);
+               strlcat(kdp_kernelversion_string, KASLR_stext, sizeof(kdp_kernelversion_string));
+       }
+#endif
+
        if (debug_boot_arg & DB_REBOOT_POST_CORE)
                kdp_flag |= REBOOT_POST_CORE;
 #if    defined(__x86_64__)     
        kdp_machine_init();
 #endif
+
+       kdp_timer_callout_init();
+       kdp_crashdump_feature_mask = htonl(kdp_crashdump_feature_mask);
+
 #if CONFIG_SERIAL_KDP
        char kdpname[80];
        struct in_addr ipaddr;
index 8beb2959ed7dd0333c84238868da29935398fc7e..ca38128a4b643bd8dcdc68d0474727d2e2fd3f04 100644 (file)
@@ -290,6 +290,7 @@ void
 kdp_machine_reboot(void)
 {
        printf("Attempting system restart...");
+       kprintf("Attempting system restart...");
        /* Call the platform specific restart*/
        if (PE_halt_restart)
                (*PE_halt_restart)(kPERestartCPU);
index 5633c73b9ead13e4b3b510f293683b0778b07b97..58f614d067772633b75121045d14c10d7330a2e0 100644 (file)
 #include <mach/vm_param.h>
 #include <libsa/types.h>
 
-#include <vm/vm_map.h>
-#include <i386/pmap.h>
-
 #include <kdp/kdp_core.h>
 #include <kdp/kdp_internal.h>
+#include <kdp/ml/i386/kdp_x86_common.h>
 #include <mach-o/loader.h>
-#include <mach/vm_map.h>
-#include <mach/vm_statistics.h>
 #include <mach/thread_status.h>
 #include <i386/thread.h>
 
-#include <vm/vm_protos.h>
-#include <vm/vm_kern.h>
-
-int    kern_dump(void);
 int    kdp_dump_trap(int type, x86_saved_state32_t *regs);
 
-typedef struct {
-       int     flavor;                 /* the number for this flavor */
-       mach_msg_type_number_t  count;  /* count of ints in this flavor */
-} mythread_state_flavor_t;
-
-static mythread_state_flavor_t thread_flavor_array [] = { 
+static const x86_state_hdr_t thread_flavor_array [] = { 
        {x86_THREAD_STATE32, x86_THREAD_STATE32_COUNT}
 };
 
-static int kdp_mynum_flavors = 1;
-static int MAX_TSTATE_FLAVORS = 1;
+size_t
+kern_collectth_state_size(void)
+{
+       unsigned int i;
+       size_t tstate_size = 0;
 
-typedef struct {
-       vm_offset_t header; 
-       int  hoffset;
-       mythread_state_flavor_t *flavors;
-       int tstate_size;
-} tir_t;
+       for (i = 0; i < sizeof(thread_flavor_array)/sizeof(thread_flavor_array[0]); i++)
+               tstate_size += sizeof(x86_state_hdr_t) +
+                   (thread_flavor_array[i].count * sizeof(int));
 
-char command_buffer[512];
+       return tstate_size;
+}
 
-static void
-kern_collectth_state(thread_t thread, tir_t *t)
+void
+kern_collectth_state(thread_t thread, void *buffer, size_t size)
 {
-       vm_offset_t     header;
-       int  hoffset, i ;
-       mythread_state_flavor_t *flavors;
+       size_t                  hoffset;
+       unsigned int    i;
        struct thread_command   *tc;
+
        /*
         *      Fill in thread command structure.
         */
-       header = t->header;
-       hoffset = t->hoffset;
-       flavors = t->flavors;
+       hoffset = 0;
        
-       tc = (struct thread_command *) (header + hoffset);
+       if (hoffset + sizeof(struct thread_command) > size)
+               return;
+
+       tc = (struct thread_command *) ((uintptr_t)buffer + hoffset);
        tc->cmd = LC_THREAD;
-       tc->cmdsize = sizeof(struct thread_command) + t->tstate_size;
+       tc->cmdsize = sizeof(struct thread_command) + kern_collectth_state_size();
        hoffset += sizeof(struct thread_command);
        /*
         * Follow with a struct thread_state_flavor and
         * the appropriate thread state struct for each
         * thread state flavor.
         */
-       for (i = 0; i < kdp_mynum_flavors; i++) {
-               *(mythread_state_flavor_t *)(header+hoffset) =
-                   flavors[i];
-               hoffset += sizeof(mythread_state_flavor_t);
+       for (i = 0; i < sizeof(thread_flavor_array)/sizeof(thread_flavor_array[0]); i++) {
+
+               if (hoffset + sizeof(x86_state_hdr_t) > size)
+                       return;
+
+               *(x86_state_hdr_t *)((uintptr_t)buffer + hoffset) =
+                   thread_flavor_array[i];
+               hoffset += sizeof(x86_state_hdr_t);
+
+
+               if (hoffset + thread_flavor_array[i].count*sizeof(int) > size)
+                       return;
+
                /* Locate and obtain the non-volatile register context
                 * for this kernel thread. This should ideally be
                 * encapsulated in machine_thread_get_kern_state()
                 * but that routine appears to have been co-opted
                 * by CHUD to obtain pre-interrupt state.
                 */
-               if (flavors[i].flavor == x86_THREAD_STATE32) {
-                       x86_thread_state32_t *tstate = (x86_thread_state32_t *) (header + hoffset);
+               if (thread_flavor_array[i].flavor == x86_THREAD_STATE32) {
+                       x86_thread_state32_t *tstate = (x86_thread_state32_t *) ((uintptr_t)buffer + hoffset);
                        vm_offset_t kstack;
+
                        bzero(tstate, x86_THREAD_STATE32_COUNT * sizeof(int));
                        if ((kstack = thread->kernel_stack) != 0){
                                struct x86_kernel_state *iks = STACK_IKS(kstack);
@@ -113,16 +112,15 @@ kern_collectth_state(thread_t thread, tir_t *t)
                                tstate->edi = iks->k_edi;
                                tstate->esi = iks->k_esi;
                                tstate->eip = iks->k_eip;
+                       }
+               } else {
+                       void *tstate = (void *)((uintptr_t)buffer + hoffset);
+
+                       bzero(tstate, thread_flavor_array[i].count*sizeof(int));
                }
-               }
-               else if (machine_thread_get_kern_state(thread,
-                       flavors[i].flavor, (thread_state_t) (header+hoffset),
-                       &flavors[i].count) != KERN_SUCCESS)
-                       printf ("Failure in machine_thread_get_kern_state()\n");
-               hoffset += flavors[i].count*sizeof(int);
-       }
 
-       t->hoffset = hoffset;
+               hoffset += thread_flavor_array[i].count*sizeof(int);
+       }
 }
 
 /* Intended to be called from the kernel trap handler if an unrecoverable fault
@@ -146,224 +144,3 @@ kdp_dump_trap(
        kdp_raise_exception(EXC_BAD_ACCESS, 0, 0, kdp.saved_state);
        return( 0 );
 }
-
-int
-kern_dump(void)
-{
-       vm_map_t        map;
-       unsigned int    thread_count, segment_count;
-       unsigned int    command_size = 0, header_size = 0, tstate_size = 0;
-
-       uint64_t        hoffset = 0, foffset = 0, nfoffset = 0, max_header_size;
-       vm_offset_t     header, txstart;                                        
-       vm_address_t    vmoffset;                                               
-
-       struct mach_header      *mh;
-       struct segment_command  *sc;
-       vm_size_t       size;
-       vm_prot_t       prot = 0;
-       vm_prot_t       maxprot = 0;
-       mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
-       vm_size_t       nflavors;
-       vm_size_t       i;
-       uint32_t        nesting_depth = 0;
-       kern_return_t   kret = 0;
-       struct vm_region_submap_info_64 vbr;
-       mach_msg_type_number_t  vbrcount  = 0;
-       tir_t tir1;
-
-       int error = 0;
-       int panic_error = 0;
-
-       map = kernel_map;
-
-       thread_count = 1;
-       segment_count = get_vmmap_entries(map); 
-  
-       printf("Kernel map has %d entries\n", segment_count);
-
-       nflavors = kdp_mynum_flavors;
-       bcopy((char *)thread_flavor_array,(char *) flavors,sizeof(thread_flavor_array));
-
-       for (i = 0; i < nflavors; i++)
-               tstate_size += sizeof(mythread_state_flavor_t) +
-                   (flavors[i].count * sizeof(int));
-
-       command_size = (segment_count) *
-           sizeof(struct segment_command) +
-           thread_count * sizeof(struct thread_command) +
-           tstate_size * thread_count;
-
-       header_size = command_size + sizeof(struct mach_header);
-       header = (vm_offset_t) command_buffer;
-       
-       /*
-        *      Set up Mach-O header for currently executing 32 bit kernel.
-        */
-       printf ("Generated Mach-O header size was %d\n", header_size);
-
-       mh = (struct mach_header *) header;
-       mh->magic = MH_MAGIC;
-       mh->cputype = cpu_type();
-       mh->cpusubtype = cpu_subtype();
-       mh->filetype = MH_CORE;
-       mh->ncmds = segment_count + thread_count;
-       mh->sizeofcmds = command_size;
-       mh->flags = 0;
-
-       hoffset = sizeof(struct mach_header);   /* offset into header */
-       foffset = round_page_32(header_size);   /* offset into file */
-       /* Padding */
-       if ((foffset - header_size) < (4*sizeof(struct segment_command))) {
-               foffset += ((4*sizeof(struct segment_command)) - (foffset-header_size)); 
-       }
-
-       max_header_size = foffset;
-
-       vmoffset = VM_MIN_KERNEL_ADDRESS;               /* offset into VM */
-
-       /* Transmit the Mach-O MH_CORE header, and seek forward past the 
-        * area reserved for the segment and thread commands 
-        * to begin data transmission 
-        */
-
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(nfoffset) , &nfoffset)) < 0) { 
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       } 
-
-       if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct mach_header), (caddr_t) mh) < 0)) {
-               printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) {
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-       printf ("Transmitting kernel state:\n");
-
-       while ((segment_count > 0) || (kret == KERN_SUCCESS)) {
-               while (1) {
-
-                       /*
-                        *      Get region information for next region.
-                        */
-
-                       vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
-                       if((kret = vm_region_recurse_64(map, 
-                                   &vmoffset, &size, &nesting_depth, 
-                                   (vm_region_recurse_info_t)&vbr,
-                                   &vbrcount)) != KERN_SUCCESS) {
-                               break;
-                       }
-
-                       if(vbr.is_submap) {
-                               nesting_depth++;
-                               continue;
-                       } else {
-                               break;
-                       }
-               }
-
-               if(kret != KERN_SUCCESS)
-                       break;
-
-               prot = vbr.protection;
-               maxprot = vbr.max_protection;
-               /*
-                *      Fill in segment command structure.
-                */
-    
-               if (hoffset > max_header_size)
-                       break;
-               sc = (struct segment_command *) (header);
-               sc->cmd = LC_SEGMENT;
-               sc->cmdsize = sizeof(struct segment_command);
-               sc->segname[0] = 0;
-               sc->vmaddr = vmoffset;
-               sc->vmsize = size;
-               sc->fileoff = (uint32_t) foffset;                               
-               sc->filesize = size;
-               sc->maxprot = maxprot;
-               sc->initprot = prot;
-               sc->nsects = 0;
-
-               if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
-                       printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-                       error = panic_error;
-                       goto out;
-               } 
-    
-               if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct segment_command) , (caddr_t) sc)) < 0) {
-                       printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-                       error = panic_error;
-                       goto out;
-               }
-
-               /* Do not transmit memory tagged VM_MEMORY_IOKIT - instead,
-                * seek past that region on the server - this creates a
-                * hole in the file.
-                */
-
-               if ((vbr.user_tag != VM_MEMORY_IOKIT)) {
-      
-                       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset)) < 0) {
-                               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-                               error = panic_error;
-                               goto out;
-                       }
-
-                       txstart = vmoffset;
-
-                       if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, size, (caddr_t) txstart)) < 0)      {
-                               printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-                               error = panic_error;
-                               goto out;
-                       }
-               }
-
-               hoffset += sizeof(struct segment_command);
-               foffset += size;
-               vmoffset += size;
-               segment_count--;
-       }
-       tir1.header = header;
-       tir1.hoffset = 0;
-       tir1.flavors = flavors;
-       tir1.tstate_size = tstate_size;
-
-       /* Now send out the LC_THREAD load command, with the thread information
-        * for the current activation.
-        * Note that the corefile can contain LC_SEGMENT commands with file
-        * offsets that point past the edge of the corefile, in the event that
-        * the last N VM regions were all I/O mapped or otherwise
-        * non-transferable memory,  not followed by a normal VM region;
-        * i.e. there will be no hole that reaches to the end of the core file.
-        */
-       kern_collectth_state (current_thread(), &tir1);
-
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-  
-       if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, tir1.hoffset , (caddr_t) header)) < 0) {
-               printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-    
-       /* last packet */
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0) {
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-out:
-       return (error);
-}
index 6016e48353b44f3bc058da235f1243f57014f47e..b1c4669bacb11cd3b9fc0ba3f5a589666c38dc52 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <vm/vm_map.h>
 #include <i386/pmap.h>
+#include <i386/pmap_internal.h> /* pmap_pde */
 #include <i386/mp.h>
 #include <i386/misc_protos.h>
 #include <i386/pio.h>
 #include <i386/pmap_internal.h>
 
 #include <kdp/kdp_internal.h>
+#include <kdp/kdp_core.h>
+#include <kdp/ml/i386/kdp_x86_common.h>
 #include <mach/vm_map.h>
 
 #include <vm/vm_protos.h>
 #include <vm/vm_kern.h>
 
 #include <machine/pal_routines.h>
+#include <libkern/kernel_mach_header.h>
 
 // #define KDP_VM_READ_DEBUG 1
 // #define KDP_VM_WRITE_DEBUG 1
@@ -56,6 +60,13 @@ boolean_t kdp_trans_off;
 
 static addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
 
+int kern_dump_pmap_traverse_preflight_callback(vm_map_offset_t start,
+                                                                                          vm_map_offset_t end,
+                                                                                          void *context);
+int kern_dump_pmap_traverse_send_callback(vm_map_offset_t start,
+                                                                                 vm_map_offset_t end,
+                                                                                 void *context);
+
 pmap_t kdp_pmap = 0;
 
 static addr64_t
@@ -377,6 +388,290 @@ kdp_machine_msr64_write(kdp_writemsr64_req_t *rq, caddr_t data, uint16_t lcpu)
        return KDPERR_NO_ERROR;
 }
 
+int
+pmap_traverse_present_mappings(pmap_t pmap,
+                                                          vm_map_offset_t start,
+                                                          vm_map_offset_t end,
+                                                          pmap_traverse_callback callback,
+                                                          void *context)
+{
+       int ret = KERN_SUCCESS;
+       vm_map_offset_t vcurstart, vcur;
+       boolean_t lastvavalid = FALSE;
+
+       /* Assumes pmap is locked, or being called from the kernel debugger */
+       
+       if (start > end) {
+               return (KERN_INVALID_ARGUMENT);
+       }
+
+       if (start & PAGE_MASK_64) {
+               return (KERN_INVALID_ARGUMENT);
+       }
+
+       for (vcur = vcurstart = start; (ret == KERN_SUCCESS) && (vcur < end); ) {
+               ppnum_t ppn = pmap_find_phys(pmap, vcur);
+
+               if (ppn != 0 && !pmap_valid_page(ppn)) {
+                       /* not something we want */
+                       ppn = 0;
+               }
+
+               if (ppn != 0) {
+                       if (!lastvavalid) {
+                               /* Start of a new virtual region */
+                               vcurstart = vcur;
+                               lastvavalid = TRUE;
+                       }
+               } else {
+                       if (lastvavalid) {
+                               /* end of a virtual region */
+                               
+                               ret = callback(vcurstart, vcur, context);
+
+                               lastvavalid = FALSE;
+                       }
+
+                       /* Try to skip by 2MB if possible */
+                       if (((vcur & PDMASK) == 0) && cpu_64bit) {
+                               pd_entry_t *pde;
+
+                               pde = pmap_pde(pmap, vcur);
+                               if (0 == pde || ((*pde & INTEL_PTE_VALID) == 0)) {
+                                       /* Make sure we wouldn't overflow */
+                                       if (vcur < (end - NBPD)) {
+                                               vcur += NBPD;
+                                               continue;
+                                       }
+                               }
+                       }
+               }
+               
+               vcur += PAGE_SIZE_64;
+       }
+       
+       if ((ret == KERN_SUCCESS)
+               && lastvavalid) {
+               /* send previous run */
+
+               ret = callback(vcurstart, vcur, context);
+       }
+       return (ret);
+}
+
+struct kern_dump_preflight_context {
+       uint32_t        region_count;
+       uint64_t        dumpable_bytes;
+};
+
+struct kern_dump_send_context {
+       uint64_t        hoffset;
+       uint64_t        foffset;
+       uint64_t        header_size;
+};
+
+int
+kern_dump_pmap_traverse_preflight_callback(vm_map_offset_t start,
+                                                                                  vm_map_offset_t end,
+                                                                                  void *context)
+{
+       struct kern_dump_preflight_context *kdc = (struct kern_dump_preflight_context *)context;
+       int ret = KERN_SUCCESS;
+
+       kdc->region_count++;
+       kdc->dumpable_bytes += (end - start);
+
+       return (ret);
+}
+
+int
+kern_dump_pmap_traverse_send_callback(vm_map_offset_t start,
+                                                                         vm_map_offset_t end,
+                                                                         void *context)
+{
+       struct kern_dump_send_context *kdc = (struct kern_dump_send_context *)context;
+       int ret = KERN_SUCCESS;
+       kernel_segment_command_t sc;
+       vm_size_t size = (vm_size_t)(end - start);
+
+       if (kdc->hoffset + sizeof(sc) > kdc->header_size) {
+               return (KERN_NO_SPACE);
+       }
+
+       /*
+        *      Fill in segment command structure.
+        */
+    
+       sc.cmd = LC_SEGMENT_KERNEL;
+       sc.cmdsize = sizeof(kernel_segment_command_t);
+       sc.segname[0] = 0;
+       sc.vmaddr = (vm_address_t)start;
+       sc.vmsize = size;
+       sc.fileoff = (vm_address_t)kdc->foffset;
+       sc.filesize = size;
+       sc.maxprot = VM_PROT_READ;
+       sc.initprot = VM_PROT_READ;
+       sc.nsects = 0;
+       sc.flags = 0;
+
+       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(kdc->hoffset) , &kdc->hoffset)) < 0) { 
+               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
+               goto out;
+       } 
+    
+       if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(kernel_segment_command_t) , (caddr_t) &sc)) < 0) {
+               printf ("kdp_send_crashdump_data failed with error %d\n", ret);
+               goto out;
+       }
+       
+       kdc->hoffset += sizeof(kernel_segment_command_t);
+
+       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(kdc->foffset) , &kdc->foffset)) < 0) {
+               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
+               goto out;
+       }
+               
+       if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, (unsigned int)size, (caddr_t)(uintptr_t)start)) < 0)        {
+               printf ("kdp_send_crashdump_data failed with error %d\n", ret);
+               goto out;
+       }
+       
+       kdc->foffset += size;
+
+out:
+       return (ret);
+}
+
+int
+kern_dump(void)
+{
+       int                     ret;
+       struct kern_dump_preflight_context kdc_preflight;
+       struct kern_dump_send_context kdc_send;
+       uint32_t        segment_count;
+       size_t          command_size = 0, header_size = 0, tstate_size = 0;
+       uint64_t        hoffset = 0, foffset = 0;
+       kernel_mach_header_t    mh;
+
+
+       kdc_preflight.region_count = 0;
+       kdc_preflight.dumpable_bytes = 0;
+
+       ret = pmap_traverse_present_mappings(kernel_pmap,
+                                                                                VM_MIN_KERNEL_AND_KEXT_ADDRESS,
+                                                                                VM_MAX_KERNEL_ADDRESS,
+                                                                                kern_dump_pmap_traverse_preflight_callback,
+                                                                                &kdc_preflight);
+       if (ret) {
+               printf("pmap traversal failed: %d\n", ret);
+               return (ret);
+       }
+
+       printf("Kernel dump region count: %u\n", kdc_preflight.region_count);
+       printf("Kernel dump byte count: %llu\n", kdc_preflight.dumpable_bytes);
+                       
+       segment_count = kdc_preflight.region_count;
+
+       tstate_size = sizeof(struct thread_command) + kern_collectth_state_size();
+
+       command_size = segment_count * sizeof(kernel_segment_command_t) +
+                               tstate_size;
+
+       header_size = command_size + sizeof(kernel_mach_header_t);
+
+       /*
+        *      Set up Mach-O header for currently executing kernel.
+        */
+       printf ("Generated Mach-O header size was %lu\n", header_size);
+
+       mh.magic = _mh_execute_header.magic;
+       mh.cputype = _mh_execute_header.cputype;;
+       mh.cpusubtype = _mh_execute_header.cpusubtype;
+       mh.filetype = MH_CORE;
+       mh.ncmds = segment_count + 1 /* thread */;
+       mh.sizeofcmds = (uint32_t)command_size;
+       mh.flags = 0;
+#if defined(__LP64__)
+       mh.reserved = 0;
+#endif
+
+       hoffset = 0;    /* offset into header */
+       foffset = (uint32_t)round_page(header_size);    /* offset into file */
+
+       /* Transmit the Mach-O MH_CORE header, and seek forward past the 
+        * area reserved for the segment and thread commands 
+        * to begin data transmission 
+        */
+       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
+               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
+               goto out;
+       } 
+       if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(kernel_mach_header_t), (caddr_t) &mh) < 0)) {
+               printf ("kdp_send_crashdump_data failed with error %d\n", ret);
+               goto out;
+       }
+
+       hoffset += sizeof(kernel_mach_header_t);
+
+       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) {
+               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
+               goto out;
+       }
+
+       printf ("Transmitting kernel state, please wait: ");
+
+       kdc_send.hoffset = hoffset;
+       kdc_send.foffset = foffset;
+       kdc_send.header_size = header_size;
+
+       ret = pmap_traverse_present_mappings(kernel_pmap,
+                                                                                VM_MIN_KERNEL_AND_KEXT_ADDRESS,
+                                                                                VM_MAX_KERNEL_ADDRESS,
+                                                                                kern_dump_pmap_traverse_send_callback,
+                                                                                &kdc_send);
+       if (ret) {
+               kprintf("pmap traversal failed: %d\n", ret);
+               return (ret);
+       }
+
+       /* Reload mutated offsets */
+       hoffset = kdc_send.hoffset;
+       foffset = kdc_send.foffset;
+
+       /*
+        * Now send out the LC_THREAD load command, with the thread information
+        * for the current activation.
+        */
+       if (tstate_size > 0) {
+               char tstate[tstate_size];
+
+               kern_collectth_state (current_thread(), tstate, tstate_size);
+
+               if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset), &hoffset)) < 0) { 
+                       printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
+                       goto out;
+               }
+               
+               if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, tstate_size, tstate)) < 0) {
+                       printf ("kdp_send_crashdump_data failed with error %d\n", ret);
+                       goto out;
+               }
+
+               hoffset += tstate_size;
+       }
+
+       /* last packet */
+       if ((ret = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0)
+       {
+               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
+               goto out;
+       }       
+
+out:
+       return (ret);
+}
+
+
 pt_entry_t *debugger_ptep;
 vm_map_offset_t debugger_window_kva;
 
@@ -405,7 +700,8 @@ kdp_machine_init(void) {
        debugger_ptep = pmap_pte(kernel_pmap, debugger_window_kva);
 
        if (debugger_ptep == NULL) {
-               pmap_expand(kernel_pmap, debugger_window_kva);
+               pmap_expand(kernel_pmap, debugger_window_kva, PMAP_EXPAND_OPTIONS_NONE);
                debugger_ptep = pmap_pte(kernel_pmap, debugger_window_kva);
        }
 }
+
diff --git a/osfmk/kdp/ml/i386/kdp_x86_common.h b/osfmk/kdp/ml/i386/kdp_x86_common.h
new file mode 100644 (file)
index 0000000..8c1a7ce
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _KDP_X86_COMMON_H_
+#define _KDP_X86_COMMON_H_
+
+#include <libsa/types.h>
+#include <mach/machine/vm_types.h>
+#include <i386/pmap.h>
+
+/*
+ * Attempt to discover all virtually contiguous ranges in a pmap
+ * that have valid mappings to DRAM (not MMIO device memory for example).
+ * Results are returned via a callback. If the callback returns an error,
+ * traversal is aborted.
+ */
+typedef int (*pmap_traverse_callback)(vm_map_offset_t start,
+                                                                         vm_map_offset_t end,
+                                                                         void *context);
+
+extern int pmap_traverse_present_mappings(pmap_t pmap,
+                                                                                 vm_map_offset_t start,
+                                                                                 vm_map_offset_t end,
+                                                                                 pmap_traverse_callback callback,
+                                                                                 void *context);
+
+
+extern int kern_dump(void);
+extern size_t kern_collectth_state_size(void);
+extern void kern_collectth_state(thread_t thread, void *buffer, size_t size);
+
+#endif /* _KDP_X86_COMMON_H_ */
index d7e071569c08ade6e13b56b213ab7a6da42c8058..d8587db3dd4f89c3898faa1d19d50df11710d908 100644 (file)
@@ -606,6 +606,7 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf
        else {
                stackptr = STACK_IKS(thread->kernel_stack)->k_rbp;
                init_rip = STACK_IKS(thread->kernel_stack)->k_rip;
+               init_rip = VM_KERNEL_UNSLIDE(init_rip);
                kdp_pmap = 0;
        }
 
@@ -635,6 +636,9 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf
                if (machine_read64(stackptr + RETURN_OFFSET64, (caddr_t) tracebuf, sizeof(addr64_t)) != sizeof(addr64_t)) {
                        break;
                }
+               if (!user_p)
+                       *tracebuf = VM_KERNEL_UNSLIDE(*tracebuf);
+
                tracebuf++;
 
                prevsp = stackptr;
index a76167621e6fed423dd2622427d6fab550a51e54..2cd1c5cbaa3b11eb99962ea25b2460d219f5c637 100644 (file)
 #include <mach/vm_param.h>
 #include <libsa/types.h>
 
-#include <vm/vm_map.h>
-#include <i386/pmap.h>
-
 #include <kdp/kdp_core.h>
 #include <kdp/kdp_internal.h>
+#include <kdp/ml/i386/kdp_x86_common.h>
 #include <mach-o/loader.h>
-#include <mach/vm_map.h>
-#include <mach/mach_vm.h> 
-#include <mach/vm_statistics.h>
 #include <mach/thread_status.h>
 #include <i386/thread.h>
 
-#include <vm/vm_protos.h>
-#include <vm/vm_kern.h>
-
-int    kern_dump(void);
 int    kdp_dump_trap(int type, x86_saved_state64_t *regs);
 
-typedef struct {
-       int     flavor;                 /* the number for this flavor */
-       mach_msg_type_number_t  count;  /* count of ints in this flavor */
-} mythread_state_flavor_t;
-
-static mythread_state_flavor_t thread_flavor_array [] = { 
+static const x86_state_hdr_t thread_flavor_array [] = { 
        {x86_THREAD_STATE64, x86_THREAD_STATE64_COUNT}
 };
 
-static int kdp_mynum_flavors = 1;
-static int MAX_TSTATE_FLAVORS = 1;
+size_t
+kern_collectth_state_size(void)
+{
+       unsigned int i;
+       size_t tstate_size = 0;
 
-typedef struct {
-       vm_offset_t header; 
-       int  hoffset;
-       mythread_state_flavor_t *flavors;
-       int tstate_size;
-} tir_t;
+       for (i = 0; i < sizeof(thread_flavor_array)/sizeof(thread_flavor_array[0]); i++)
+               tstate_size += sizeof(x86_state_hdr_t) +
+                   (thread_flavor_array[i].count * sizeof(int));
 
-char command_buffer[512];
+       return tstate_size;
+}
 
-static void
-kern_collectth_state(thread_t thread, tir_t *t)
+void
+kern_collectth_state(thread_t thread, void *buffer, size_t size)
 {
-       vm_offset_t     header;
-       int  hoffset, i ;
-       mythread_state_flavor_t *flavors;
+       size_t                  hoffset;
+       unsigned int    i;
        struct thread_command   *tc;
+
        /*
         *      Fill in thread command structure.
         */
-       header = t->header;
-       hoffset = t->hoffset;
-       flavors = t->flavors;
+       hoffset = 0;
        
-       tc = (struct thread_command *) (header + hoffset);
+       if (hoffset + sizeof(struct thread_command) > size)
+               return;
+
+       tc = (struct thread_command *) ((uintptr_t)buffer + hoffset);
        tc->cmd = LC_THREAD;
-       tc->cmdsize = (uint32_t)sizeof(struct thread_command) + t->tstate_size;
-       hoffset += (uint32_t)sizeof(struct thread_command);
+       tc->cmdsize = (uint32_t)(sizeof(struct thread_command) + kern_collectth_state_size());
+       hoffset += sizeof(struct thread_command);
        /*
         * Follow with a struct thread_state_flavor and
         * the appropriate thread state struct for each
         * thread state flavor.
         */
-       for (i = 0; i < kdp_mynum_flavors; i++) {
-               *(mythread_state_flavor_t *)(header+hoffset) =
-                   flavors[i];
-               hoffset += (uint32_t)sizeof(mythread_state_flavor_t);
+       for (i = 0; i < sizeof(thread_flavor_array)/sizeof(thread_flavor_array[0]); i++) {
+
+               if (hoffset + sizeof(x86_state_hdr_t) > size)
+                       return;
+
+               *(x86_state_hdr_t *)((uintptr_t)buffer + hoffset) =
+                   thread_flavor_array[i];
+               hoffset += sizeof(x86_state_hdr_t);
+
+
+               if (hoffset + thread_flavor_array[i].count*sizeof(int) > size)
+                       return;
+
                /* Locate and obtain the non-volatile register context
                 * for this kernel thread. This should ideally be
                 * encapsulated in machine_thread_get_kern_state()
                 * but that routine appears to have been co-opted
                 * by CHUD to obtain pre-interrupt state.
                 */
-               if (flavors[i].flavor == x86_THREAD_STATE64) {
-                       x86_thread_state64_t *tstate = (x86_thread_state64_t *) (header + hoffset);
+               if (thread_flavor_array[i].flavor == x86_THREAD_STATE64) {
+                       x86_thread_state64_t *tstate = (x86_thread_state64_t *) ((uintptr_t)buffer + hoffset);
                        vm_offset_t kstack;
                        x86_saved_state64_t *cpstate = current_cpu_datap()->cpu_fatal_trap_state;
+
                        bzero(tstate, x86_THREAD_STATE64_COUNT * sizeof(int));
                        if ((current_thread() == thread) && (cpstate != NULL)) {
                                tstate->rax = cpstate->rax;
@@ -140,15 +138,14 @@ kern_collectth_state(thread_t thread, tir_t *t)
                                tstate->r15 = iks->k_r15;
                                tstate->rip = iks->k_rip;
                        }
+               } else {
+                       void *tstate = (void *)((uintptr_t)buffer + hoffset);
+
+                       bzero(tstate, thread_flavor_array[i].count*sizeof(int));
                }
-               else if (machine_thread_get_kern_state(thread,
-                       flavors[i].flavor, (thread_state_t) (header+hoffset),
-                       &flavors[i].count) != KERN_SUCCESS)
-                       printf ("Failure in machine_thread_get_kern_state()\n");
-               hoffset += (uint32_t)(flavors[i].count*sizeof(int));
-       }
 
-       t->hoffset = hoffset;
+               hoffset += thread_flavor_array[i].count*sizeof(int);
+       }
 }
 
 /* Intended to be called from the kernel trap handler if an unrecoverable fault
@@ -172,225 +169,3 @@ kdp_dump_trap(
        kdp_raise_exception(EXC_BAD_ACCESS, 0, 0, kdp.saved_state);
        return( 0 );
 }
-
-int
-kern_dump(void)
-{
-       vm_map_t        map;
-       unsigned int    thread_count, segment_count;
-       unsigned int    command_size = 0, header_size = 0, tstate_size = 0;
-       uint64_t        hoffset = 0, foffset = 0, nfoffset = 0;
-       unsigned int    max_header_size = 0;
-       vm_offset_t     header, txstart;
-       vm_map_offset_t vmoffset;
-       struct mach_header_64           *mh64;
-       struct segment_command_64       *sc64;
-       mach_vm_size_t  size = 0;
-       vm_prot_t       prot = 0;
-       vm_prot_t       maxprot = 0;
-       mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
-       vm_size_t       nflavors;
-       vm_size_t       i;
-       uint32_t        nesting_depth = 0;
-       kern_return_t   kret = 0;
-       struct vm_region_submap_info_64 vbr;
-       mach_msg_type_number_t  vbrcount  = 0;
-       tir_t tir1;
-
-       int error = 0;
-       int panic_error = 0;
-
-       map = kernel_map;
-
-       thread_count = 1;
-       segment_count = get_vmmap_entries(map); 
-  
-       printf("Kernel map has %d entries\n", segment_count);
-
-       nflavors = kdp_mynum_flavors;
-       bcopy((char *)thread_flavor_array,(char *) flavors,sizeof(thread_flavor_array));
-
-       for (i = 0; i < nflavors; i++)
-               tstate_size += (uint32_t)(sizeof(mythread_state_flavor_t) +
-                   (flavors[i].count * sizeof(int)));
-
-       command_size = (uint32_t)((segment_count) *
-           sizeof(struct segment_command_64) +
-           thread_count * sizeof(struct thread_command) +
-           tstate_size * thread_count);
-
-       header_size = command_size + (uint32_t)sizeof(struct mach_header_64);
-       header = (vm_offset_t) command_buffer;
-       
-       /*
-        *      Set up Mach-O header for currently executing 32 bit kernel.
-        */
-       printf ("Generated Mach-O header size was %d\n", header_size);
-
-       mh64 = (struct mach_header_64 *) header;
-       mh64->magic = MH_MAGIC_64;
-       mh64->cputype = cpu_type();
-       mh64->cpusubtype = cpu_subtype();
-       mh64->filetype = MH_CORE;
-       mh64->ncmds = segment_count + thread_count;
-       mh64->sizeofcmds = command_size;
-       mh64->flags = 0;
-       mh64->reserved = 0;
-
-       hoffset = sizeof(struct mach_header_64);        /* offset into header */
-       foffset = (uint32_t)round_page(header_size);    /* offset into file */
-       /* Padding */
-       if ((foffset - header_size) < (4*sizeof(struct segment_command_64))) {
-               foffset += (uint32_t)((4*sizeof(struct segment_command_64)) - (foffset-header_size)); 
-       }
-
-       max_header_size = (unsigned int)foffset;
-
-       vmoffset = vm_map_min(map);
-
-       /* Transmit the Mach-O MH_CORE header, and seek forward past the 
-        * area reserved for the segment and thread commands 
-        * to begin data transmission 
-        */
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(nfoffset) , &nfoffset)) < 0) { 
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       } 
-
-       if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct mach_header_64), (caddr_t) mh64) < 0)) {
-               printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) {
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-       printf ("Transmitting kernel state, please wait: ");
-
-       while ((segment_count > 0) || (kret == KERN_SUCCESS)){
-
-               while (1) {
-
-                       /*
-                        *      Get region information for next region.
-                        */
-
-                       vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
-                       if((kret = mach_vm_region_recurse(map, 
-                                   &vmoffset, &size, &nesting_depth, 
-                                   (vm_region_recurse_info_t)&vbr,
-                                   &vbrcount)) != KERN_SUCCESS) {
-                               break;
-                       }
-
-                       if(vbr.is_submap) {
-                               nesting_depth++;
-                               continue;
-                       } else {
-                               break;
-                       }
-               }
-
-               if(kret != KERN_SUCCESS)
-                       break;
-
-               prot = vbr.protection;
-               maxprot = vbr.max_protection;
-
-               /*
-                *      Fill in segment command structure.
-                */
-    
-               if (hoffset > max_header_size)
-                       break;
-               sc64 = (struct segment_command_64 *) (header);
-               sc64->cmd = LC_SEGMENT_64;
-               sc64->cmdsize = sizeof(struct segment_command_64);
-               sc64->segname[0] = 0;
-               sc64->vmaddr = vmoffset;
-               sc64->vmsize = size;
-               sc64->fileoff = foffset;
-               sc64->filesize = size;
-               sc64->maxprot = maxprot;
-               sc64->initprot = prot;
-               sc64->nsects = 0;
-
-               if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
-                       printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-                       error = panic_error;
-                       goto out;
-               } 
-    
-               if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct segment_command_64) , (caddr_t) sc64)) < 0) {
-                       printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-                       error = panic_error;
-                       goto out;
-               }
-
-               /* Do not transmit memory tagged VM_MEMORY_IOKIT - instead,
-                * seek past that region on the server - this creates a
-                * hole in the file.
-                */
-
-               if ((vbr.user_tag != VM_MEMORY_IOKIT)) {
-
-                       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset)) < 0) {
-                               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-                               error = panic_error;
-                               goto out;
-                       }
-
-                       txstart = vmoffset;
-
-                       if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, (unsigned int)size, (caddr_t) txstart)) < 0)        {
-                               printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-                               error = panic_error;
-                               goto out;
-                       }
-               }
-
-               hoffset += (unsigned int)sizeof(struct segment_command_64);
-               foffset += (unsigned int)size;
-               vmoffset += size;
-               segment_count--;
-       }
-       tir1.header = header;
-       tir1.hoffset = 0;
-       tir1.flavors = flavors;
-       tir1.tstate_size = tstate_size;
-
-       /* Now send out the LC_THREAD load command, with the thread information
-        * for the current activation.
-        * Note that the corefile can contain LC_SEGMENT commands with file
-        * offsets that point past the edge of the corefile, in the event that
-        * the last N VM regions were all I/O mapped or otherwise
-        * non-transferable memory,  not followed by a normal VM region;
-        * i.e. there will be no hole that reaches to the end of the core file.
-        */
-       kern_collectth_state (current_thread(), &tir1);
-
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-  
-       if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, tir1.hoffset , (caddr_t) header)) < 0) {
-               printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-    
-       /* last packet */
-       if ((panic_error = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0)
-       {
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-               error = panic_error;
-               goto out;
-       }
-out:
-       return (error);
-}
index cf8f5539e94692819aa259123d8fd6d5f77b3506..846b95682914047e53fda10594f90d9c30db773b 100644 (file)
@@ -24,6 +24,7 @@ EXPORT_ONLY_FILES = \
        kalloc.h \
        kext_alloc.h \
        kern_types.h \
+       ledger.h \
        lock.h \
        locks.h \
        host.h \
index eb509545972c7819e9a21e75ca932325016bfc8c..002482dea8946f0bbd1892efb9ceae5d391073ac 100644 (file)
@@ -528,7 +528,10 @@ affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
         */
        queue_iterate(&aspc->aspc_affinities,
                         aset, affinity_set_t, aset_affinities) {
-               set_occupancy[aset->aset_num]++;
+               if(aset->aset_num < num_cpu_asets)
+                       set_occupancy[aset->aset_num]++;
+               else
+                       panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__);
        }
 
        /*
index e7b895598ecbaf01560420bece5f61b29d5e2e37..9f6757a6bd1335517c83f750ee26fadbc22dc951 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <kern/processor.h>
 #include <kern/spl.h>
 #include <kern/wait_queue.h>
+#include <kern/ledger.h>
 #include <mach/policy.h>
 #include <machine/trap.h> // for CHUD AST hook
 #include <machine/pal_routines.h>
-
+#include <security/mac_mach_internal.h> // for MACF AST hook
 
 volatile perfASTCallback perfASTHook;
 
@@ -91,6 +92,8 @@ ast_init(void)
 {
 }
 
+extern void chudxnu_thread_ast(thread_t); // XXX this should probably be in a header...
+
 /*
  * Called at splsched.
  */
@@ -157,13 +160,35 @@ ast_taken(
                                bsd_ast(thread);
                        }
 #endif
-
+#if CONFIG_MACF
+                       /*
+                        * Handle MACF hook.
+                        */
+                       if (reasons & AST_MACF) {
+                               thread_ast_clear(thread, AST_MACF);
+                               mac_thread_userret(thread);
+                       }
+#endif
                        /* 
                         * Thread APC hook.
                         */
                        if (reasons & AST_APC)
                                act_execute_returnhandlers();
 
+                       if (reasons & AST_LEDGER) {
+                               thread_ast_clear(thread, AST_LEDGER);
+                               ledger_ast(thread);
+                       }
+
+                       /*
+                        * Kernel Profiling Hook
+                        */
+                       if (reasons & AST_KPERF)
+                       {
+                               thread_ast_clear(thread, AST_KPERF);
+                               chudxnu_thread_ast(thread);
+                       }
+
                        ml_set_interrupts_enabled(FALSE);
 
                        /* 
index b6f42e4ec36da25ff3e2d4df830d0a133a3dd1a4..713c6158cfcaa28c4638189ae6ab0e5055674cb6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -89,12 +89,16 @@ typedef uint32_t            ast_t;
 #define AST_HANDOFF            0x08
 #define AST_YIELD              0x10
 #define AST_APC                        0x20    /* migration APC hook */
+#define AST_LEDGER             0x40
+
 /*
  * JMM - This is here temporarily. AST_BSD is used to simulate a
  * general purpose mechanism for setting asynchronous procedure calls
  * from the outside.
  */
 #define AST_BSD                        0x80
+#define AST_KPERF              0x100   /* kernel profiling */
+#define        AST_MACF                0x200   /* MACF user ret pending */
 
 #define AST_NONE               0x00
 #define AST_ALL                        (~AST_NONE)
@@ -140,7 +144,7 @@ extern ast_t        *ast_pending(void);
 #define MACHINE_AST_PER_THREAD  0
 #endif
 
-#define AST_PER_THREAD (AST_APC | AST_BSD | MACHINE_AST_PER_THREAD)
+#define AST_PER_THREAD (AST_APC | AST_BSD | AST_MACF | MACHINE_AST_PER_THREAD | AST_LEDGER)
 /*
  *     ast_pending(), ast_on(), ast_off(), ast_context(), and ast_propagate()
  *     assume splsched.
index 7e8ee9c3007bfa9299511f5b106b5f7e3396a427..9d3bd7ab9dd5489585485f883908cc24b05ad189 100644 (file)
@@ -86,11 +86,11 @@ audit_session_mksend(struct auditinfo_addr *aia_p, ipc_port_t *sessionport)
 
                audit_session_aiaref(aia_p);
 
-               /* Need a send-once right for the target of the notification */
-               notifyport = ipc_port_make_sonce(port);
 
-               /* Request a no-senders notification (at the new make-send threshold) */
                ip_lock(port);
+               /* Need a send-once right for the target of the notification */
+               notifyport = ipc_port_make_sonce_locked(port);
+               /* Request a no-senders notification (at the new make-send threshold) */
                ipc_port_nsrequest(port, port->ip_mscount, notifyport, &notifyport);
                /* port unlocked */
 
@@ -175,9 +175,7 @@ audit_session_nosenders(mach_msg_header_t *msg)
         * request, re-arm the notification with the new threshold.
         */
        if (port->ip_mscount > notification->not_count) {
-               ip_unlock(port);
-               notifyport = ipc_port_make_sonce(port);
-               ip_lock(port);
+               notifyport = ipc_port_make_sonce_locked(port);
                ipc_port_nsrequest(port, port->ip_mscount, notifyport, &notifyport);
                /* port unlocked */
 
index 07de86ef02ce61427bcf37915dc069c542dbd094..ea5f9e13912bee4c7fcb7c081cb1bc0484bb2354 100644 (file)
@@ -43,7 +43,6 @@
 #include <vm/vm_protos.h> /* last */
 
 #undef thread_should_halt
-#undef ipc_port_release
 
 /* BSD KERN COMPONENT INTERFACE */
 
@@ -54,10 +53,8 @@ extern unsigned int not_in_kdp; /* Skip acquiring locks if we're in kdp */
 thread_t get_firstthread(task_t);
 int get_task_userstop(task_t);
 int get_thread_userstop(thread_t);
-boolean_t thread_should_abort(thread_t);
 boolean_t current_thread_aborted(void);
 void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
-void ipc_port_release(ipc_port_t);
 kern_return_t get_signalact(task_t , thread_t *, int);
 int get_vmsubmap_entries(vm_map_t, vm_object_offset_t, vm_object_offset_t);
 void syscall_exit_funnelcheck(void);
@@ -218,6 +215,11 @@ check_actforsig(
        return (result);
 }
 
+ledger_t  get_task_ledger(task_t t)
+{
+       return(t->ledger);
+}
+
 /*
  * This is only safe to call from a thread executing in
  * in the task's context or if the task is locked  Otherwise,
@@ -302,8 +304,9 @@ swap_task_map(task_t task, thread_t thread, vm_map_t map, boolean_t doswitch)
        mp_disable_preemption();
        old_map = task->map;
        thread->map = task->map = map;
-       if (doswitch)
+       if (doswitch) {
                pmap_switch(map->pmap);
+       }
        mp_enable_preemption();
        task_unlock(task);
 
@@ -458,6 +461,26 @@ get_thread_userstop(
        return(th->user_stop_count);
 }
 
+/*
+ *
+ */
+boolean_t
+get_task_pidsuspended(
+       task_t task)
+{
+    return (task->pidsuspended);
+}
+
+/*
+ *
+ */
+boolean_t 
+get_task_frozen(
+       task_t task)
+{
+    return (task->frozen);   
+}
+
 /*
  *
  */
@@ -519,12 +542,6 @@ task_act_iterate_wth_args(
        task_unlock(task);
 }
 
-void
-ipc_port_release(
-       ipc_port_t port)
-{
-       ipc_object_release(&(port)->ip_object);
-}
 
 void
 astbsd_on(void)
@@ -567,6 +584,10 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
 
        queue_iterate(&task->threads, thread, thread_t, task_threads) {
                uint64_t    tval;
+               spl_t x;
+
+               x = splsched();
+               thread_lock(thread);
 
                if ((thread->state & TH_RUN) == TH_RUN)
                        numrunning++;
@@ -576,11 +597,21 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
                tinfo.total_user += tval;
 
                tval = timer_grab(&thread->system_timer);
-               tinfo.threads_system += tval;
-               tinfo.total_system += tval;
+
+               if (thread->precise_user_kernel_time) {
+                       tinfo.threads_system += tval;
+                       tinfo.total_system += tval;
+               } else {
+                       /* system_timer may represent either sys or user */
+                       tinfo.threads_user += tval;
+                       tinfo.total_user += tval;
+               }
 
                syscalls_unix += thread->syscalls_unix;
                syscalls_mach += thread->syscalls_mach;
+
+               thread_unlock(thread);
+               splx(x);
        }
 
        ptinfo->pti_total_system = tinfo.total_system;
@@ -604,19 +635,21 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
 }
 
 int 
-fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_internal * ptinfo, void * vpp, int *vidp)
+fill_taskthreadinfo(task_t task, uint64_t thaddr, int thuniqueid, struct proc_threadinfo_internal * ptinfo, void * vpp, int *vidp)
 {
        thread_t  thact;
        int err=0;
        mach_msg_type_number_t count;
        thread_basic_info_data_t basic_info;
        kern_return_t kret;
+       uint64_t addr = 0;
 
        task_lock(task);
 
        for (thact  = (thread_t)queue_first(&task->threads);
                        !queue_end(&task->threads, (queue_entry_t)thact); ) {
-               if (thact->machine.cthread_self == thaddr)
+               addr = (thuniqueid==0)?thact->machine.cthread_self: thact->thread_id;
+               if (addr == thaddr)
                {
                
                        count = THREAD_BASIC_INFO_COUNT;
@@ -624,14 +657,9 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_interna
                                err = 1;
                                goto out;       
                        }
-#if 0
-                       ptinfo->pth_user_time = timer_grab(&basic_info.user_time);
-                       ptinfo->pth_system_time = timer_grab(&basic_info.system_time);
-#else
                        ptinfo->pth_user_time = ((basic_info.user_time.seconds * NSEC_PER_SEC) + (basic_info.user_time.microseconds * NSEC_PER_USEC));
                        ptinfo->pth_system_time = ((basic_info.system_time.seconds * NSEC_PER_SEC) + (basic_info.system_time.microseconds * NSEC_PER_USEC));
 
-#endif
                        ptinfo->pth_cpu_usage = basic_info.cpu_usage;
                        ptinfo->pth_policy = basic_info.policy;
                        ptinfo->pth_run_state = basic_info.run_state;
index e9c487ad64f211dea31495f1d384e08e05144965..c7aaa6faf743290b9a0727ffd120815607a4e5d0 100644 (file)
@@ -126,6 +126,9 @@ static thread_call_data_t   calend_wakecall;
 
 extern void    IOKitResetTime(void);
 
+void _clock_delay_until_deadline(uint64_t              interval,
+                                                                uint64_t               deadline);
+
 static uint64_t                clock_boottime;                         /* Seconds boottime epoch */
 
 #define TIME_ADD(rsecs, secs, rfrac, frac, unit)       \
@@ -773,6 +776,15 @@ mach_wait_until_continue(
        /*NOTREACHED*/
 }
 
+/*
+ * mach_wait_until_trap: Suspend execution of calling thread until the specified time has passed
+ *
+ * Parameters:    args->deadline          Amount of time to wait
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
 kern_return_t
 mach_wait_until_trap(
        struct mach_wait_until_trap_args        *args)
@@ -796,27 +808,44 @@ clock_delay_until(
        if (now >= deadline)
                return;
 
-       if (    (deadline - now) < (8 * sched_cswtime)  ||
+       _clock_delay_until_deadline(deadline - now, deadline);
+}
+
+/*
+ * Preserve the original precise interval that the client
+ * requested for comparison to the spin threshold.
+ */
+void
+_clock_delay_until_deadline(
+       uint64_t                interval,
+       uint64_t                deadline)
+{
+
+       if (interval == 0)
+               return;
+
+       if (    ml_delay_should_spin(interval)  ||
                        get_preemption_level() != 0                             ||
-                       ml_get_interrupts_enabled() == FALSE    )
+                       ml_get_interrupts_enabled() == FALSE    ) {
                machine_delay_until(deadline);
-       else {
-               assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline - sched_cswtime);
+       else {
+               assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
 
                thread_block(THREAD_CONTINUE_NULL);
        }
 }
 
+
 void
 delay_for_interval(
        uint32_t                interval,
        uint32_t                scale_factor)
 {
-       uint64_t                end;
+       uint64_t                abstime;
 
-       clock_interval_to_deadline(interval, scale_factor, &end);
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
 
-       clock_delay_until(end);
+       _clock_delay_until_deadline(abstime, mach_absolute_time() + abstime);
 }
 
 void
index d456198dbe605abfeee40e099e6afcddd74c4778..ed8218d17d8a2196836ffd4182299aa37dbb0661 100644 (file)
@@ -120,17 +120,6 @@ extern void                        clock_gettimeofday_set_commpage(
 extern void                    machine_delay_until(
                                                uint64_t                deadline);
 
-#include <stat_time.h>
-
-#if    STAT_TIME || GPROF
-
-extern void            hertz_tick(
-                                       natural_t               ticks,
-                                       boolean_t               usermode,       /* executing user code */
-                                       natural_t               pc);
-
-#endif /* STAT_TIME */
-
 extern uint32_t                hz_tick_interval;
 
 extern void            absolutetime_to_nanotime(
@@ -267,10 +256,12 @@ extern void             nanoseconds_to_absolutetime(
        }                                                                                       \
   } while (0)
 
+#include <Availability.h>
 
-extern mach_timespec_t clock_get_system_value(void);
+/* Use mach_absolute_time() */
+extern mach_timespec_t clock_get_system_value(void) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_2_0, __IPHONE_NA);
 
-extern mach_timespec_t clock_get_calendar_value(void);
+extern mach_timespec_t clock_get_calendar_value(void) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_2_0, __IPHONE_NA);
 
 #else  /* __LP64__ */
 
index 1dd1aee281c1fc2a990b8688d5b51c55ac52f818..b89774897c23216fa7baadc80df1900a0227a8ab 100644 (file)
@@ -55,8 +55,6 @@
  */
 
 #include <mach_assert.h>
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
 #include <mach_kdp.h>
 
 #include <kern/cpu_number.h>
@@ -123,6 +121,9 @@ unsigned int debug_buf_size = sizeof(debug_buf);
 static char model_name[64];
 /* uuid_string_t */ char kernel_uuid[37]; 
 
+static spl_t panic_prologue(const char *str);
+static void panic_epilogue(spl_t s);
+
 struct pasc {
   unsigned a: 7;
   unsigned b: 7;
@@ -227,13 +228,10 @@ void _consume_panic_args(int a __unused, ...)
     panic("panic");
 }
 
-void
-panic(const char *str, ...)
+static spl_t
+panic_prologue(const char *str)
 {
-       va_list listp;
        spl_t   s;
-       thread_t thread;
-       wait_queue_t wq;
 
        if (kdebug_enable) {
                ml_set_interrupts_enabled(TRUE);
@@ -255,21 +253,14 @@ panic(const char *str, ...)
 
        panic_safe();
 
-       thread = current_thread();              /* Get failing thread */
-       wq = thread->wait_queue;                /* Save the old value */
-       thread->wait_queue = NULL;              /* Clear the wait so we do not get double panics when we try locks */
-
        if( logPanicDataToScreen )
                disable_debug_output = FALSE;
                
        debug_mode = TRUE;
 
-       /* panic_caller is initialized to 0.  If set, don't change it */
-       if ( ! panic_caller )
-               panic_caller = (unsigned long)(char *)__builtin_return_address(0);
-       
 restart:
        PANIC_LOCK();
+
        if (panicstr) {
                if (cpu_number() != paniccpu) {
                        PANIC_UNLOCK();
@@ -294,26 +285,19 @@ restart:
        panicwait = 1;
 
        PANIC_UNLOCK();
-       kdb_printf("panic(cpu %d caller 0x%lx): ", (unsigned) paniccpu, panic_caller);
-       if (str) {
-               va_start(listp, str);
-               _doprnt(str, &listp, consdebug_putc, 0);
-               va_end(listp);
-       }
-       kdb_printf("\n");
+       return(s);
+}
 
-       /*
-        * Release panicwait indicator so that other cpus may call Debugger().
-        */
-       panicwait = 0;
-       Debugger("panic");
+
+static void
+panic_epilogue(spl_t   s)
+{
        /*
         * Release panicstr so that we can handle normally other panics.
         */
        PANIC_LOCK();
        panicstr = (char *)0;
        PANIC_UNLOCK();
-       thread->wait_queue = wq;        /* Restore the wait queue */
 
        if (return_on_panic) {
                panic_normal();
@@ -321,12 +305,65 @@ restart:
                splx(s);
                return;
        }
-
        kdb_printf("panic: We are hanging here...\n");
        panic_stop();
        /* NOTREACHED */
 }
 
+void
+panic(const char *str, ...)
+{
+       va_list listp;
+       spl_t   s;
+
+       /* panic_caller is initialized to 0.  If set, don't change it */
+       if ( ! panic_caller )
+               panic_caller = (unsigned long)(char *)__builtin_return_address(0);
+       
+       s = panic_prologue(str);
+       kdb_printf("panic(cpu %d caller 0x%lx): ", (unsigned) paniccpu, panic_caller);
+       if (str) {
+               va_start(listp, str);
+               _doprnt(str, &listp, consdebug_putc, 0);
+               va_end(listp);
+       }
+       kdb_printf("\n");
+
+       /*
+        * Release panicwait indicator so that other cpus may call Debugger().
+        */
+       panicwait = 0;
+       Debugger("panic");
+       panic_epilogue(s);
+}
+
+void
+panic_context(unsigned int reason, void *ctx, const char *str, ...)
+{
+       va_list listp;
+       spl_t   s;
+
+       /* panic_caller is initialized to 0.  If set, don't change it */
+       if ( ! panic_caller )
+               panic_caller = (unsigned long)(char *)__builtin_return_address(0);
+       
+       s = panic_prologue(str);
+       kdb_printf("panic(cpu %d caller 0x%lx): ", (unsigned) paniccpu, panic_caller);
+       if (str) {
+               va_start(listp, str);
+               _doprnt(str, &listp, consdebug_putc, 0);
+               va_end(listp);
+       }
+       kdb_printf("\n");
+
+       /*
+        * Release panicwait indicator so that other cpus may call Debugger().
+        */
+       panicwait = 0;
+       DebuggerWithContext(reason, ctx, "panic");
+       panic_epilogue(s);
+}
+
 void
 log(__unused int level, char *fmt, ...)
 {
@@ -446,6 +483,15 @@ static void panic_display_kernel_uuid(void) {
                kdb_printf("Kernel UUID: %s\n", tmp_kernel_uuid);
 }
 
+static void panic_display_kernel_aslr(void) {
+#if    defined(__x86_64__)
+       if (vm_kernel_slide) {
+               kdb_printf("Kernel slide:     0x%016lx\n", vm_kernel_slide);
+               kdb_printf("Kernel text base: %p\n", (void *) vm_kernel_stext);
+       }
+#endif
+}
+
 static void panic_display_uptime(void) {
        uint64_t        uptime;
        absolutetime_to_nanoseconds(mach_absolute_time(), &uptime);
@@ -469,6 +515,7 @@ __private_extern__ void panic_display_system_configuration(void) {
                    (osversion[0] != 0) ? osversion : "Not yet set");
                kdb_printf("\nKernel version:\n%s\n",version);
                panic_display_kernel_uuid();
+               panic_display_kernel_aslr();
                panic_display_pal_info();
                panic_display_model_name();
                panic_display_uptime();
index 66702bc161d028efb3c7ac3d82962ea18c10687a..c773ca6233fe2c4f78e3b314a9c536a8a0dd60e6 100644 (file)
@@ -45,6 +45,8 @@ struct thread_snapshot {
        uint64_t                user_time;
        uint64_t                system_time;
        int32_t                 state;
+       int32_t                 sched_pri;   // scheduled (current) priority
+       int32_t                 sched_flags; // scheduler flags
        char                    ss_flags;
 } __attribute__ ((packed));
 
@@ -69,7 +71,7 @@ struct task_snapshot {
 } __attribute__ ((packed));
 
 
-struct mem_snapshot {
+struct mem_and_io_snapshot {
        uint32_t        snapshot_magic;
        uint32_t        free_pages;
        uint32_t        active_pages;
@@ -78,24 +80,33 @@ struct mem_snapshot {
        uint32_t        wired_pages;
        uint32_t        speculative_pages;
        uint32_t        throttled_pages;
+       int                     busy_buffer_count;
+       uint32_t        pages_wanted;
+       uint32_t        pages_reclaimed;
+       uint8_t         pages_wanted_reclaimed_valid; // did mach_vm_pressure_monitor succeed?
 } __attribute__((packed));
 
+
 enum {
        kUser64_p = 0x1,
        kKernel64_p = 0x2,
        kHasDispatchSerial = 0x4,
-       kTerminatedSnapshot = 0x8
+       kTerminatedSnapshot = 0x8,
+       kPidSuspended = 0x10,  // true for suspended task
+       kFrozen = 0x20         // true for hibernated task (along with pidsuspended)
 };
 
+#define VM_PRESSURE_TIME_WINDOW 5 /* seconds */
+
 enum {
        STACKSHOT_GET_DQ = 0x1,
        STACKSHOT_SAVE_LOADINFO = 0x2,
        STACKSHOT_GET_GLOBAL_MEM_STATS = 0x4
 };
 
-#define STACKSHOT_THREAD_SNAPSHOT_MAGIC 0xfeedface
-#define STACKSHOT_TASK_SNAPSHOT_MAGIC 0xdecafbad
-#define STACKSHOT_MEM_SNAPSHOT_MAGIC  0xabcddcba
+#define STACKSHOT_THREAD_SNAPSHOT_MAGIC        0xfeedface
+#define STACKSHOT_TASK_SNAPSHOT_MAGIC          0xdecafbad
+#define STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC    0xbfcabcde
 
 #endif /* __APPLE_API_UNSTABLE */
 #endif /* __APPLE_API_PRIVATE */
@@ -204,7 +215,7 @@ extern int debug_kprint_current_process(const char **namep);
        } while (0)
 #else /* !DEBUG */
 #define DEBUG_KPRINT_SYSCALL_PREDICATE_INTERNAL(mask, namep) (0)
-#define DEBUG_KPRINT_SYSCALL_MASK(mask, fmt, args...) do { } while(0)
+#define DEBUG_KPRINT_SYSCALL_MASK(mask, fmt, args...) do { } while (0) /* kprintf(fmt, args) */
 #endif /* !DEBUG */
 
 enum {
@@ -233,6 +244,7 @@ extern void panic(const char *string, ...) __printflike(1,2);
 
 #if KERNEL_PRIVATE
 void _consume_panic_args(int, ...);
+void panic_context(unsigned int reason, void *ctx, const char *string, ...);
 #endif
 
 #if CONFIG_NO_PANIC_STRINGS
index 27082522fc49c5610578ccb7b2cca258aeb323e1..8df6e268bb0dc373e61ab82ca10ed34d5660efb6 100644 (file)
@@ -56,8 +56,6 @@
 /*
  */
 
-#include <mach_kdb.h>
-
 #include <mach/mach_types.h>
 #include <mach/boolean.h>
 #include <mach/kern_return.h>
 #include <kern/host.h>
 #include <kern/misc_protos.h>
 #include <string.h>
-
-#if    MACH_KDB
-#include <ddb/db_trap.h>
-#endif /* MACH_KDB */
-
-#if    MACH_KDB
-
-#include <ddb/db_output.h>
-
-#if iPSC386 || iPSC860
-boolean_t debug_user_with_kdb = TRUE;
-#else
-boolean_t debug_user_with_kdb = FALSE;
-#endif
-
-#endif /* MACH_KDB */
+#include <pexpert/pexpert.h>
 
 unsigned long c_thr_exc_raise = 0;
 unsigned long c_thr_exc_raise_state = 0;
@@ -328,13 +311,11 @@ exception_triage(
 
        assert(exception != EXC_RPC_ALERT);
 
-       if (exception == KERN_SUCCESS)
-               panic("exception");
+       thread = current_thread();
 
        /*
         * Try to raise the exception at the activation level.
         */
-       thread = current_thread();
        mutex = &thread->mutex;
        excp = &thread->exc_actions[exception];
        kr = exception_deliver(thread, exception, code, codeCnt, excp, mutex);
@@ -365,22 +346,10 @@ exception_triage(
         * Nobody handled it, terminate the task.
         */
 
-#if    MACH_KDB
-       if (debug_user_with_kdb) {
-               /*
-                *      Debug the exception with kdb.
-                *      If kdb handles the exception,
-                *      then thread_kdb_return won't return.
-                */
-               db_printf("No exception server, calling kdb...\n");
-               thread_kdb_return();
-       }
-#endif /* MACH_KDB */
-
        (void) task_terminate(task);
 
 out:
-       if (exception != EXC_CRASH)
+       if ((exception != EXC_CRASH) && (exception != EXC_RESOURCE))
                thread_exception_return();
        return;
 }
@@ -413,11 +382,11 @@ bsd_exception(
 
 
 /*
- * Raise an EXC_CRASH exception on the dying task.
+ * Raise an exception on a task.
  * This should tell launchd to launch Crash Reporter for this task.
  */
-kern_return_t abnormal_exit_notify(mach_exception_data_type_t exccode, 
-               mach_exception_data_type_t excsubcode)
+kern_return_t task_exception_notify(exception_type_t exception,
+       mach_exception_data_type_t exccode, mach_exception_data_type_t excsubcode)
 {
        mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
        wait_interrupt_t                wsave;
@@ -426,7 +395,7 @@ kern_return_t abnormal_exit_notify(mach_exception_data_type_t exccode,
        code[1] = excsubcode;
 
        wsave = thread_interrupt_level(THREAD_UNINT);
-       exception_triage(EXC_CRASH, code, EXCEPTION_CODE_MAX);
+       exception_triage(exception, code, EXCEPTION_CODE_MAX);
        (void) thread_interrupt_level(wsave);
        return (KERN_SUCCESS);
 }
index 78e659a97fce8b63e9351e1ef74f0b67bcc14cd0..a35895d8b94d11c7c73a9e1472977aa1c8ece6a4 100644 (file)
@@ -58,7 +58,7 @@ extern void exception_triage(
 extern kern_return_t sys_perf_notify(thread_t thread, int pid);
 
 /* Notify crash reporter */
-extern kern_return_t abnormal_exit_notify(mach_exception_data_type_t code, 
-               mach_exception_data_type_t subcode);
+extern kern_return_t task_exception_notify(exception_type_t exception,
+       mach_exception_data_type_t code, mach_exception_data_type_t subcode);
 
 #endif /* _KERN_EXCEPTION_H_ */
diff --git a/osfmk/kern/gzalloc.c b/osfmk/kern/gzalloc.c
new file mode 100644 (file)
index 0000000..10b315d
--- /dev/null
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ *     File:   kern/gzalloc.c
+ *     Author: Derek Kumar
+ *
+ *     "Guard mode" zone allocator, used to trap use-after-free errors,
+ *     overruns, underruns, mismatched allocations/frees, uninitialized
+ *     zone element use, timing dependent races etc.
+ *
+ *     The allocator is configured by these boot-args:
+ *     gzalloc_size=<size>: target all zones with elements of <size> bytes
+ *     gzalloc_min=<size>: target zones with elements >= size
+ *     gzalloc_max=<size>: target zones with elements <= size
+ *     gzalloc_min/max can be specified in conjunction to target a range of
+ *     sizes
+ *     gzalloc_fc_size=<size>: number of zone elements (effectively page
+ *     multiple sized) to retain in the free VA cache. This cache is evicted
+ *     (backing pages and VA released) in a least-recently-freed fashion.
+ *     Larger free VA caches allow for a longer window of opportunity to trap
+ *     delayed use-after-free operations, but use more memory.
+ *     -gzalloc_wp: Write protect, rather than unmap, freed allocations
+ *     lingering in the free VA cache. Useful to disambiguate between
+ *     read-after-frees/read overruns and writes. Also permits direct inspection
+ *     of the freed element in the cache via the kernel debugger. As each
+ *     element has a "header" (trailer in underflow detection mode), the zone
+ *     of origin of the element can be easily determined in this mode.
+ *     -gzalloc_uf_mode: Underflow detection mode, where the guard page
+ *     adjoining each element is placed *before* the element page rather than
+ *     after. The element is also located at the top of the page, rather than
+ *     abutting the bottom as with the standard overflow detection mode.
+ *     -gzalloc_noconsistency: disable consistency checks that flag mismatched
+ *     frees, corruptions of the header/trailer signatures etc.
+ *     -nogzalloc_mode: Disables the guard mode allocator. The DEBUG kernel
+ *     enables the guard allocator for zones sized 8K-16K (if present) by
+ *     default, this option can disable that behaviour.
+ */
+
+#include <zone_debug.h>
+#include <zone_alias_addr.h>
+
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+#include <mach/kern_return.h>
+#include <mach/machine/vm_types.h>
+#include <mach_debug/zone_info.h>
+#include <mach/vm_map.h>
+
+#include <kern/kern_types.h>
+#include <kern/assert.h>
+#include <kern/sched.h>
+#include <kern/locks.h>
+#include <kern/misc_protos.h>
+#include <kern/zalloc.h>
+#include <kern/kalloc.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <pexpert/pexpert.h>
+
+#include <machine/machparam.h>
+
+#include <libkern/OSDebug.h>
+#include <libkern/OSAtomic.h>
+#include <sys/kdebug.h>
+
+extern boolean_t vm_kernel_ready, kmem_ready;
+boolean_t gzalloc_mode = FALSE;
+uint32_t pdzalloc_count, pdzfree_count;
+
+#define        GZALLOC_MIN_DEFAULT (1024)
+#define GZDEADZONE ((zone_t) 0xDEAD201E)
+#define GZALLOC_SIGNATURE (0xABADCAFE)
+#define GZALLOC_RESERVE_SIZE_DEFAULT (2 * 1024 * 1024)
+#define GZFC_DEFAULT_SIZE (1024)
+
+char gzalloc_fill_pattern = 0x67; /* 'g' */
+
+uint32_t gzalloc_min = ~0U;
+uint32_t gzalloc_max = 0;
+uint32_t gzalloc_size = 0;
+uint64_t gzalloc_allocated, gzalloc_freed, gzalloc_early_alloc, gzalloc_early_free, gzalloc_wasted;
+boolean_t gzalloc_uf_mode = FALSE, gzalloc_consistency_checks = TRUE;
+vm_prot_t gzalloc_prot = VM_PROT_NONE;
+uint32_t gzalloc_guard = KMA_GUARD_LAST;
+uint32_t gzfc_size = GZFC_DEFAULT_SIZE;
+
+vm_map_t gzalloc_map;
+vm_offset_t gzalloc_map_min, gzalloc_map_max;
+vm_offset_t gzalloc_reserve;
+vm_size_t gzalloc_reserve_size;
+
+typedef struct gzalloc_header {
+       zone_t gzone;
+       uint32_t  gzsize;
+       uint32_t  gzsig;
+} gzhdr_t;
+
+#define GZHEADER_SIZE (sizeof(gzhdr_t))
+
+extern zone_t vm_page_zone;
+
+void gzalloc_reconfigure(__unused zone_t z) {
+       /* Nothing for now */
+}
+
+boolean_t gzalloc_enabled(void) {
+       return gzalloc_mode;
+}
+
+void gzalloc_zone_init(zone_t z) {
+       if (gzalloc_mode) {
+               bzero(&z->gz, sizeof(z->gz));
+
+               if (gzfc_size && (z->elem_size >= gzalloc_min) && (z->elem_size <= gzalloc_max) && (z->gzalloc_exempt == FALSE)) {
+                       vm_size_t gzfcsz = round_page(sizeof(*z->gz.gzfc) * gzfc_size);
+
+                       /* If the VM/kmem system aren't yet configured, carve
+                        * out the free element cache structure directly from the
+                        * gzalloc_reserve supplied by the pmap layer.
+                       */
+                       if (!kmem_ready) {
+                               if (gzalloc_reserve_size < gzfcsz)
+                                       panic("gzalloc reserve exhausted");
+
+                               z->gz.gzfc = (vm_offset_t *)gzalloc_reserve;
+                               gzalloc_reserve += gzfcsz;
+                               gzalloc_reserve_size -= gzfcsz;
+                       } else {
+                               kern_return_t kr;
+
+                               if ((kr = kernel_memory_allocate(kernel_map, (vm_offset_t *)&z->gz.gzfc, gzfcsz, 0, KMA_KOBJECT)) != KERN_SUCCESS) {
+                                       panic("zinit/gzalloc: kernel_memory_allocate failed (%d) for 0x%lx bytes", kr, (unsigned long) gzfcsz);
+                               }
+                       }
+                       bzero((void *)z->gz.gzfc, gzfcsz);
+               }
+       }
+}
+
+void gzalloc_configure(void) {
+       char temp_buf[16];
+
+       if (PE_parse_boot_argn("-gzalloc_mode", temp_buf, sizeof (temp_buf))) {
+               gzalloc_mode = TRUE;
+               gzalloc_min = GZALLOC_MIN_DEFAULT;
+#if    ZONE_DEBUG              
+               gzalloc_min += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
+#endif
+               gzalloc_max = ~0U;
+       }
+
+       if (PE_parse_boot_argn("gzalloc_min", &gzalloc_min, sizeof(gzalloc_min))) {
+#if    ZONE_DEBUG              
+               gzalloc_min += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
+#endif
+               gzalloc_mode = TRUE;
+               gzalloc_max = ~0U;
+       }
+
+       if (PE_parse_boot_argn("gzalloc_max", &gzalloc_max, sizeof(gzalloc_max))) {
+#if    ZONE_DEBUG              
+               gzalloc_max += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
+#endif
+               gzalloc_mode = TRUE;
+               if (gzalloc_min == ~0U)
+                       gzalloc_min = 0;
+       }
+
+       if (PE_parse_boot_argn("gzalloc_size", &gzalloc_size, sizeof(gzalloc_size))) {
+#if    ZONE_DEBUG              
+               gzalloc_size += (typeof(gzalloc_min))ZONE_DEBUG_OFFSET;
+#endif
+               gzalloc_min = gzalloc_max = gzalloc_size;
+               gzalloc_mode = TRUE;
+       }
+
+       (void)PE_parse_boot_argn("gzalloc_fc_size", &gzfc_size, sizeof(gzfc_size));
+
+       if (PE_parse_boot_argn("-gzalloc_wp", temp_buf, sizeof (temp_buf))) {
+               gzalloc_prot = VM_PROT_READ;
+       }
+
+       if (PE_parse_boot_argn("-gzalloc_uf_mode", temp_buf, sizeof (temp_buf))) {
+               gzalloc_uf_mode = TRUE;
+               gzalloc_guard = KMA_GUARD_FIRST;
+       }
+
+       if (PE_parse_boot_argn("-gzalloc_noconsistency", temp_buf, sizeof (temp_buf))) {
+               gzalloc_consistency_checks = FALSE;
+       }
+#if    DEBUG
+       if (gzalloc_mode == FALSE) {
+               gzalloc_min = 8192;
+               gzalloc_max = 16384;
+               gzalloc_prot = VM_PROT_READ;
+               gzalloc_mode = TRUE;
+       }
+#endif
+       if (PE_parse_boot_argn("-nogzalloc_mode", temp_buf, sizeof (temp_buf)))
+               gzalloc_mode = FALSE;
+
+       if (gzalloc_mode) {
+               gzalloc_reserve_size = GZALLOC_RESERVE_SIZE_DEFAULT;
+               gzalloc_reserve = (vm_offset_t) pmap_steal_memory(gzalloc_reserve_size);
+       }
+}
+
+void gzalloc_init(vm_size_t max_zonemap_size) {
+       kern_return_t retval;
+
+       if (gzalloc_mode) {
+               retval = kmem_suballoc(kernel_map, &gzalloc_map_min, (max_zonemap_size << 2),
+                   FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
+                   &gzalloc_map);
+       
+               if (retval != KERN_SUCCESS)
+                       panic("zone_init: kmem_suballoc(gzalloc) failed");
+               gzalloc_map_max = gzalloc_map_min + (max_zonemap_size << 2);
+       }
+}
+
+vm_offset_t
+gzalloc_alloc(zone_t zone, boolean_t canblock) {
+       vm_offset_t addr = 0;
+
+       if (__improbable(gzalloc_mode &&
+               (((zone->elem_size >= gzalloc_min) &&
+                   (zone->elem_size <= gzalloc_max))) &&
+               (zone->gzalloc_exempt == 0))) {
+
+               if (get_preemption_level() != 0) {
+                       if (canblock == TRUE) {
+                               pdzalloc_count++;
+                       }
+                       else
+                               return 0;
+               }
+
+               vm_offset_t rounded_size = round_page(zone->elem_size + GZHEADER_SIZE);
+               vm_offset_t residue = rounded_size - zone->elem_size;
+               vm_offset_t gzaddr = 0;
+               gzhdr_t *gzh;
+
+               if (!kmem_ready || (vm_page_zone == ZONE_NULL)) {
+                       /* Early allocations are supplied directly from the
+                        * reserve.
+                        */
+                       if (gzalloc_reserve_size < rounded_size)
+                               panic("gzalloc reserve exhausted");
+                       gzaddr = gzalloc_reserve;
+                       /* No guard page for these early allocations, just
+                        * waste an additional page.
+                        */
+                       gzalloc_reserve += rounded_size + PAGE_SIZE;
+                       gzalloc_reserve_size -= rounded_size + PAGE_SIZE;
+                       OSAddAtomic64((SInt32) (rounded_size), &gzalloc_early_alloc);
+               }
+               else {
+                       kern_return_t kr = kernel_memory_allocate(gzalloc_map,
+                           &gzaddr, rounded_size + (1*PAGE_SIZE),
+                           0, KMA_KOBJECT | gzalloc_guard);
+                       if (kr != KERN_SUCCESS)
+                               panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d", (uint64_t)rounded_size, kr);
+
+               }
+
+               if (gzalloc_uf_mode) {
+                       gzaddr += PAGE_SIZE;
+                       /* The "header" becomes a "footer" in underflow
+                        * mode.
+                        */
+                       gzh = (gzhdr_t *) (gzaddr + zone->elem_size);
+                       addr = gzaddr;
+               } else {
+                       gzh = (gzhdr_t *) (gzaddr + residue - GZHEADER_SIZE);
+                       addr = (gzaddr + residue);
+               }
+
+               /* Fill with a pattern on allocation to trap uninitialized
+                * data use. Since the element size may be "rounded up"
+                * by higher layers such as the kalloc layer, this may
+                * also identify overruns between the originally requested
+                * size and the rounded size via visual inspection.
+                * TBD: plumb through the originally requested size,
+                * prior to rounding by kalloc/IOMalloc etc.
+                * We also add a signature and the zone of origin in a header
+                * prefixed to the allocation.
+                */
+               memset((void *)gzaddr, gzalloc_fill_pattern, rounded_size);
+
+               gzh->gzone = (kmem_ready && vm_page_zone) ? zone : GZDEADZONE;
+               gzh->gzsize = (uint32_t) zone->elem_size;
+               gzh->gzsig = GZALLOC_SIGNATURE;
+
+               lock_zone(zone);
+               zone->count++;
+               zone->sum_count++;
+               zone->cur_size += rounded_size;
+               unlock_zone(zone);
+
+               OSAddAtomic64((SInt32) rounded_size, &gzalloc_allocated);
+               OSAddAtomic64((SInt32) (rounded_size - zone->elem_size), &gzalloc_wasted);
+       }
+       return addr;
+}
+
+boolean_t gzalloc_free(zone_t zone, void *addr) {
+       boolean_t gzfreed = FALSE;
+       kern_return_t kr;
+
+       if (__improbable(gzalloc_mode &&
+               (((zone->elem_size >= gzalloc_min) &&
+                   (zone->elem_size <= gzalloc_max))) &&
+               (zone->gzalloc_exempt == 0))) {
+               gzhdr_t *gzh;
+               vm_offset_t rounded_size = round_page(zone->elem_size + GZHEADER_SIZE);
+               vm_offset_t residue = rounded_size - zone->elem_size;
+               vm_offset_t saddr;
+               vm_offset_t free_addr = 0;
+
+               if (gzalloc_uf_mode) {
+                       gzh = (gzhdr_t *)((vm_offset_t)addr + zone->elem_size);
+                       saddr = (vm_offset_t) addr - PAGE_SIZE;
+               } else {
+                       gzh = (gzhdr_t *)((vm_offset_t)addr - GZHEADER_SIZE);
+                       saddr = ((vm_offset_t)addr) - residue;
+               }
+
+               assert((saddr & PAGE_MASK) == 0);
+
+               if (gzalloc_consistency_checks) {
+                       if (gzh->gzsig != GZALLOC_SIGNATURE) {
+                               panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", addr, GZALLOC_SIGNATURE, gzh->gzsig);
+                       }
+
+                       if (gzh->gzone != zone && (gzh->gzone != GZDEADZONE))
+                               panic("%s: Mismatched zone or under/overflow, current zone: %p, recorded zone: %p, address: %p", __FUNCTION__, zone, gzh->gzone, (void *)addr);
+                       /* Partially redundant given the zone check, but may flag header corruption */
+                       if (gzh->gzsize != zone->elem_size) {
+                               panic("Mismatched zfree or under/overflow for zone %p, recorded size: 0x%x, element size: 0x%x, address: %p\n", zone, gzh->gzsize, (uint32_t) zone->elem_size, (void *)addr);
+                       }
+               }
+
+               if (!kmem_ready || gzh->gzone == GZDEADZONE) {
+                       /* For now, just leak frees of early allocations
+                        * performed before kmem is fully configured.
+                        * They don't seem to get freed currently;
+                        * consider ml_static_mfree in the future.
+                        */
+                       OSAddAtomic64((SInt32) (rounded_size), &gzalloc_early_free);
+                       return TRUE;
+               }
+
+               if (get_preemption_level() != 0) {
+                               pdzfree_count++;
+               }
+
+               if (gzfc_size) {
+                       /* Either write protect or unmap the newly freed
+                        * allocation
+                        */
+                       kr = vm_map_protect(
+                               gzalloc_map,
+                               saddr,
+                               saddr + rounded_size + (1 * PAGE_SIZE),
+                               gzalloc_prot,
+                               FALSE);
+                       if (kr != KERN_SUCCESS)
+                               panic("%s: vm_map_protect: %p, 0x%x", __FUNCTION__, (void *)saddr, kr);
+               } else {
+                       free_addr = saddr;
+               }
+
+               lock_zone(zone);
+
+               /* Insert newly freed element into the protected free element
+                * cache, and rotate out the LRU element.
+                */
+               if (gzfc_size) {
+                       if (zone->gz.gzfc_index >= gzfc_size) {
+                               zone->gz.gzfc_index = 0;
+                       }
+                       free_addr = zone->gz.gzfc[zone->gz.gzfc_index];
+                       zone->gz.gzfc[zone->gz.gzfc_index++] = saddr;
+               }
+
+               if (free_addr) {
+                       zone->count--;
+                       zone->cur_size -= rounded_size;
+               }
+
+               unlock_zone(zone);
+
+               if (free_addr) {
+                       kr = vm_map_remove(
+                               gzalloc_map,
+                               free_addr,
+                               free_addr + rounded_size + (1 * PAGE_SIZE),
+                               VM_MAP_REMOVE_KUNWIRE);
+                       if (kr != KERN_SUCCESS)
+                               panic("gzfree: vm_map_remove: %p, 0x%x", (void *)free_addr, kr);
+
+                       OSAddAtomic64((SInt32)rounded_size, &gzalloc_freed);
+                       OSAddAtomic64(-((SInt32) (rounded_size - zone->elem_size)), &gzalloc_wasted);
+               }
+
+               gzfreed = TRUE;
+       }
+       return gzfreed;
+}
index 90542a946691c06abbc3f5a2433e0211167a5a9a..dc55ce790528b48f9fead704f62c23f512cd94df 100644 (file)
@@ -411,7 +411,12 @@ MACRO_END
                        timer_t         idle_state;
 
                        GET_TICKS_VALUE(processor, CPU_STATE_USER, user_state);
-                       GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM, system_state);
+                       if (precise_user_kernel_time) {
+                               GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM, system_state);
+                       } else {
+                               /* system_state may represent either sys or user */
+                               GET_TICKS_VALUE(processor, CPU_STATE_USER, system_state);
+                       }
 
                        idle_state = &PROCESSOR_DATA(processor, idle_state);
                        idle_temp = *idle_state;
@@ -427,6 +432,7 @@ MACRO_END
                        }
                }
                simple_unlock(&processor_list_lock);
+
                *count = HOST_CPU_LOAD_INFO_COUNT;
 
                return (KERN_SUCCESS);
index 8963abea6daef13a9b7aa8782513360a21903911..89b8f9e68f807f38fc4984c81dfcafa2ebc14fa9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,7 +93,6 @@
 #include <mach/host_security_server.h>
 #include <mach/clock_server.h>
 #include <mach/clock_priv_server.h>
-#include <mach/ledger_server.h>
 #include <mach/lock_set_server.h>
 #include <default_pager/default_pager_object_server.h>
 #include <mach/memory_object_server.h>
@@ -189,7 +188,6 @@ const struct mig_subsystem *mig_e[] = {
         (const struct mig_subsystem *)&is_iokit_subsystem,
         (const struct mig_subsystem *)&memory_object_name_subsystem,
        (const struct mig_subsystem *)&lock_set_subsystem,
-       (const struct mig_subsystem *)&ledger_subsystem,
        (const struct mig_subsystem *)&task_subsystem,
        (const struct mig_subsystem *)&thread_act_subsystem,
 #if VM32_SUPPORT
@@ -317,6 +315,13 @@ ipc_kobject_server(
 #define        InP     ((mach_msg_header_t *) request->ikm_header)
 #define        OutP    ((mig_reply_error_t *) reply->ikm_header)
 
+           /* 
+            * MIG should really assure no data leakage -
+            * but until it does, pessimistically zero the
+            * whole reply buffer.
+            */
+           bzero((void *)OutP, reply_size);
+
            OutP->NDR = NDR_record;
            OutP->Head.msgh_size = sizeof(mig_reply_error_t);
 
@@ -324,6 +329,7 @@ ipc_kobject_server(
                MACH_MSGH_BITS(MACH_MSGH_BITS_LOCAL(InP->msgh_bits), 0);
            OutP->Head.msgh_remote_port = InP->msgh_local_port;
            OutP->Head.msgh_local_port  = MACH_PORT_NULL;
+           OutP->Head.msgh_reserved = (mach_msg_size_t)InP->msgh_id; /* useful for debug */
            OutP->Head.msgh_id = InP->msgh_id + 100;
 
 #undef InP
@@ -590,68 +596,3 @@ ipc_kobject_notify(
                 return FALSE;
         }
 }
-
-
-
-#include <mach_kdb.h>
-#if    MACH_COUNTERS && MACH_KDB
-
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-
-#define printf  kdbprintf
-
-extern void kobjserver_stats(void);
-extern void bucket_stats_print(mig_hash_t *bucket);
-
-extern void kobjserver_stats_clear(void);
-
-
-void
-kobjserver_stats_clear(void)
-{
-       int i;
-       for (i = 0; i < MAX_MIG_ENTRIES; i++) {
-               mig_buckets[i].callcount = 0;
-       }
-}
-
-void
-kobjserver_stats(void)
-{
-    register unsigned int i, n = sizeof(mig_e)/sizeof(struct mig_subsystem);
-    register unsigned int howmany;
-    register mach_msg_id_t j, pos, nentry, range;
-       
-    db_printf("Kobject server call counts:\n");
-    for (i = 0; i < n; i++) {
-       db_printf("  ");
-       db_printsym((vm_offset_t)mig_e[i], DB_STGY_ANY);
-       db_printf(":\n");
-       range = mig_e[i]->end - mig_e[i]->start;
-       if (!mig_e[i]->start || range < 0) continue;
-
-       for  (j = 0; j < range; j++) {
-           nentry = j + mig_e[i]->start;       
-           for (pos = MIG_HASH(nentry) % MAX_MIG_ENTRIES, howmany = 1;
-                mig_buckets[pos].num;
-                pos++, pos = pos % MAX_MIG_ENTRIES, howmany++) {
-                   if (mig_buckets[pos].num == nentry)
-                       bucket_stats_print(&mig_buckets[pos]);
-           }
-       }
-    }
-}
-
-void
-bucket_stats_print(mig_hash_t *bucket)
-{
-       if (bucket->callcount) {
-               db_printf("    ");
-               db_printsym((vm_offset_t)bucket->routine, DB_STGY_ANY);
-               db_printf(" (%d):\t%d\n", bucket->num, bucket->callcount);
-       }
-}
-
-
-#endif /* MACH_COUNTERS && MACH_KDB */
index 1b2a9163d188aa9358a297079be03db7f11c4fbd..5120736756816c260d8a0cd024bfe06e44ae4d68 100644 (file)
@@ -277,7 +277,7 @@ mach_msg_rpc_from_kernel_body(
        kmsg->ikm_header->msgh_bits |=
                MACH_MSGH_BITS(0, MACH_MSG_TYPE_MAKE_SEND_ONCE);
 
-       ipc_port_reference(reply);
+       ip_reference(reply);
 
 #if IKM_SUPPORT_LEGACY
     if(legacy)
@@ -303,12 +303,12 @@ mach_msg_rpc_from_kernel_body(
                ip_lock(reply);
                if ( !ip_active(reply)) {
                        ip_unlock(reply);
-                       ipc_port_release(reply);
+                       ip_release(reply);
                        return MACH_RCV_PORT_DIED;
                }
                if (!self->active) {
                        ip_unlock(reply);
-                       ipc_port_release(reply);
+                       ip_release(reply);
                        return MACH_RCV_INTERRUPTED;
                }
 
@@ -336,11 +336,11 @@ mach_msg_rpc_from_kernel_body(
                assert(mr == MACH_RCV_INTERRUPTED);
 
                if (self->handlers) {
-                       ipc_port_release(reply);
+                       ip_release(reply);
                        return(mr);
                }
        }
-       ipc_port_release(reply);
+       ip_release(reply);
 
        /* 
         * Check to see how much of the message/trailer can be received.
@@ -419,7 +419,7 @@ mach_msg_overwrite(
        ipc_kmsg_t kmsg;
        mach_port_seqno_t seqno;
        mach_msg_return_t mr;
-       mach_msg_max_trailer_t *trailer;
+       mach_msg_trailer_size_t trailer_size;
 
        if (option & MACH_SEND_MSG) {
                mach_msg_size_t msg_and_trailer_size;
@@ -489,22 +489,17 @@ mach_msg_overwrite(
                        kmsg = self->ith_kmsg;
                        seqno = self->ith_seqno;
 
-                       ipc_object_release(object);
+                       io_release(object);
 
                } while (mr == MACH_RCV_INTERRUPTED);
                if (mr != MACH_MSG_SUCCESS)
                        return mr;
 
-               trailer = (mach_msg_max_trailer_t *) 
-                   ((vm_offset_t)kmsg->ikm_header + kmsg->ikm_header->msgh_size);
-               if (option & MACH_RCV_TRAILER_MASK) {
-                       trailer->msgh_seqno = seqno;
-                       trailer->msgh_context = 
-                               kmsg->ikm_header->msgh_remote_port->ip_context;
-                       trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option);
-               }
 
-               if (rcv_size < (kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size)) {
+               trailer_size = ipc_kmsg_add_trailer(kmsg, space, option, current_thread(), seqno, TRUE,
+                               kmsg->ikm_header->msgh_remote_port->ip_context);
+
+               if (rcv_size < (kmsg->ikm_header->msgh_size + trailer_size)) {
                        ipc_kmsg_copyout_dest(kmsg, space);
                        (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, sizeof *msg);
                        ipc_kmsg_free(kmsg);
@@ -515,7 +510,7 @@ mach_msg_overwrite(
                if (mr != MACH_MSG_SUCCESS) {
                        if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) {
                                ipc_kmsg_put_to_kernel(msg, kmsg,
-                                               kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size);
+                                               kmsg->ikm_header->msgh_size + trailer_size);
                        } else {
                                ipc_kmsg_copyout_dest(kmsg, space);
                                (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, sizeof *msg);
@@ -526,7 +521,7 @@ mach_msg_overwrite(
                }
 
                (void) memcpy((void *) msg, (const void *) kmsg->ikm_header,
-                             kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size);
+                             kmsg->ikm_header->msgh_size + trailer_size);
                ipc_kmsg_free(kmsg);
        }
 
index 06d3ae97e1e9468df72c141efbcbfc5c666b8bbe..ff7326b4b2eeb89cc88e0f3f93f23c321005b44a 100644 (file)
@@ -50,7 +50,8 @@
 #ifdef _MIG_TRACE_PARAMETERS_
 
 #define __BeforeRcvCallTrace(msgid,arg1,arg2,arg3,arg4)                                      \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_START,                   \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                             KDBG_MIGCODE(msgid) | DBG_FUNC_START,   \
                              (unsigned int)(arg1),                                   \
                              (unsigned int)(arg2),                                   \
                              (unsigned int)(arg3),                                   \
@@ -58,7 +59,8 @@
                              (unsigned int)(0));
 
 #define __AfterRcvCallTrace(msgid,arg1,arg2,arg3,arg4)                               \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_END,                     \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                             KDBG_MIGCODE(msgid) | DBG_FUNC_END,     \
                              (unsigned int)(arg1),                                   \
                              (unsigned int)(arg2),                                   \
                              (unsigned int)(arg3),                                   \
@@ -66,7 +68,8 @@
                              (unsigned int)(0));
 
 #define __BeforeSimpleCallTrace(msgid,arg1,arg2,arg3,arg4)                           \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_START,                   \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                             KDBG_MIGCODE(msgid) | DBG_FUNC_START,   \
                              (unsigned int)(arg1),                                   \
                              (unsigned int)(arg2),                                   \
                              (unsigned int)(arg3),                                   \
@@ -74,7 +77,8 @@
                              (unsigned int)(0));
 
 #define __AfterSimpleCallTrace(msgid,arg1,arg2,arg3,arg4)                            \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_END,                     \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                     KDBG_MIGCODE(msgid) | DBG_FUNC_END,     \
                              (unsigned int)(arg1),                                   \
                              (unsigned int)(arg2),                                   \
                              (unsigned int)(arg3),                                   \
@@ -84,7 +88,8 @@
 #else /* !_MIG_TRACE_PARAMETERS_ */
 
 #define        __BeforeRcvRpc(msgid, _NAME_)                                                 \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_START,                   \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                             KDBG_MIGCODE(msgid) | DBG_FUNC_START,   \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
@@ -92,7 +97,8 @@
                              (unsigned int)(0));
 
 #define        __AfterRcvRpc(msgid, _NAME_)                                                  \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_END,                     \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                     KDBG_MIGCODE(msgid) | DBG_FUNC_END,     \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
 
 
 #define        __BeforeRcvSimple(msgid, _NAME_)                                              \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_START,                   \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                             KDBG_MIGCODE(msgid) | DBG_FUNC_START,   \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0));
 
 #define        __AfterRcvSimple(msgid, _NAME_)                                               \
-       KERNEL_DEBUG_CONSTANT(KDBG_MIGCODE(msgid) | DBG_FUNC_END,                     \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                             KDBG_MIGCODE(msgid) | DBG_FUNC_END,     \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
 #endif /* !_MIG_TRACE_PARAMETERS_ */
 
 #define _MIG_MSGID_INVALID(msgid)                                                    \
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_MSGID_INVALID, (msgid)),          \
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,          \
+                             MACHDBG_CODE(DBG_MACH_MSGID_INVALID, (msgid)),  \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
                              (unsigned int)(0),                                      \
index 547abeaad580bbf60fe33b1f8f8c31e749a560f4..6690db41088b1b79bdd62f858a1743c176ff97e7 100644 (file)
@@ -62,8 +62,8 @@ fileport_alloc(struct fileglob *fg)
        }
 
        ipc_kobject_set(fileport, (ipc_kobject_t)fg, IKOT_FILEPORT);
-       notifyport = ipc_port_make_sonce(fileport);
        ip_lock(fileport); /* unlocked by ipc_port_nsrequest */
+       notifyport = ipc_port_make_sonce_locked(fileport);
        ipc_port_nsrequest(fileport, 1, notifyport, &notifyport);
 
        sendport = ipc_port_make_send(fileport);
index 019dacd6be708c58bd8b2ec0b7753801e44b0071..61013e3999cad9e0359934b0e9003b03031c6784 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -144,7 +144,6 @@ ipc_task_init(
        task->itk_nself = nport;
        task->itk_sself = ipc_port_make_send(kport);
        task->itk_space = space;
-       space->is_fast = FALSE;
 
 #if CONFIG_MACF_MACH
        if (parent)
@@ -326,9 +325,6 @@ ipc_task_terminate(
                if (IP_VALID(task->itk_registered[i]))
                        ipc_port_release_send(task->itk_registered[i]);
 
-       ipc_port_release_send(task->wired_ledger_port);
-       ipc_port_release_send(task->paged_ledger_port);
-
        /* destroy the kernel ports */
        ipc_port_dealloc_kernel(kport);
        ipc_port_dealloc_kernel(nport);
@@ -858,22 +854,10 @@ task_get_special_port(
                port = ipc_port_copy_send(task->itk_bootstrap);
                break;
 
-           case TASK_WIRED_LEDGER_PORT:
-               port = ipc_port_copy_send(task->wired_ledger_port);
-               break;
-
-           case TASK_PAGED_LEDGER_PORT:
-               port = ipc_port_copy_send(task->paged_ledger_port);
-               break;
-                    
            case TASK_SEATBELT_PORT:
                port = ipc_port_copy_send(task->itk_seatbelt);
                break;
 
-           case TASK_GSSD_PORT:
-               port = ipc_port_copy_send(task->itk_gssd);
-               break;
-                       
            case TASK_ACCESS_PORT:
                port = ipc_port_copy_send(task->itk_task_access);
                break;
@@ -929,22 +913,10 @@ task_set_special_port(
                whichp = &task->itk_bootstrap;
                break;
 
-           case TASK_WIRED_LEDGER_PORT:
-               whichp = &task->wired_ledger_port;
-               break;
-
-           case TASK_PAGED_LEDGER_PORT:
-               whichp = &task->paged_ledger_port;
-               break;
-                    
            case TASK_SEATBELT_PORT:
                whichp = &task->itk_seatbelt;
                break;
 
-           case TASK_GSSD_PORT:
-               whichp = &task->itk_gssd;
-               break;
-               
            case TASK_ACCESS_PORT:
                whichp = &task->itk_task_access;
                break;
index 4a3a9ac1c28b5e798ddc48b4f6de34db09ffbecd..7c1384c41675edc69fd7c46579c4891d74725326 100644 (file)
@@ -149,18 +149,6 @@ extern thread_t    convert_port_to_thread(
 extern thread_t        port_name_to_thread(
        mach_port_name_t        port_name);
 
-/* Convert from a task to a port */
-extern ipc_port_t convert_task_to_port(
-       task_t                  task);
-
-/* Convert from a task name to a port */
-extern ipc_port_t convert_task_name_to_port(
-       task_name_t             task_name);
-
-/* Convert from a thread to a port */
-extern ipc_port_t      convert_thread_to_port(
-       thread_t                thread);
-
 /* Deallocate a space ref produced by convert_port_to_space */
 extern void space_deallocate(
        ipc_space_t             space);
index f84a19956c713721be4a4fbc48a30e6893d65e8e..a5febf94c7fa14740329e8676b38760f76700d6e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,6 +73,7 @@
 #include <kern/zalloc.h>
 #include <kern/kalloc.h>
 #include <kern/lock.h>
+#include <kern/ledger.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_map.h>
@@ -114,7 +115,8 @@ KALLOC_ZINFO_SALLOC(vm_size_t bytes)
        task_t task;
        zinfo_usage_t zinfo;
 
-       thr->tkm_shared.alloc += bytes;
+       ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, bytes);
+
        if (kalloc_fake_zone_index != -1 && 
            (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
                zinfo[kalloc_fake_zone_index].alloc += bytes;
@@ -127,7 +129,8 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes)
        task_t task;
        zinfo_usage_t zinfo;
 
-       thr->tkm_shared.free += bytes;
+       ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, bytes);
+
        if (kalloc_fake_zone_index != -1 && 
            (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
                zinfo[kalloc_fake_zone_index].free += bytes;
@@ -135,32 +138,147 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes)
 
 /*
  *     All allocations of size less than kalloc_max are rounded to the
- *     next highest power of 2.  This allocator is built on top of
+ *     next nearest sized zone.  This allocator is built on top of
  *     the zone allocator.  A zone is created for each potential size
  *     that we are willing to get in small blocks.
  *
  *     We assume that kalloc_max is not greater than 64K;
- *     thus 16 is a safe array size for k_zone and k_zone_name.
  *
  *     Note that kalloc_max is somewhat confusingly named.
  *     It represents the first power of two for which no zone exists.
  *     kalloc_max_prerounded is the smallest allocation size, before
  *     rounding, for which no zone exists.
- *  Also if the allocation size is more than kalloc_kernmap_size 
- *  then allocate from kernel map rather than kalloc_map.
+ *
+ *     Also if the allocation size is more than kalloc_kernmap_size 
+ *     then allocate from kernel map rather than kalloc_map.
+ */
+
+#if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
+
+/*
+ * "Legacy" aka "power-of-2" backing zones with 16-byte minimum
+ * size and alignment.  Users of this profile would probably
+ * benefit from some tuning.
+ */
+
+#define K_ZONE_SIZES                   \
+       16,                             \
+       32,                             \
+/* 6 */        64,                             \
+       128,                            \
+       256,                            \
+/* 9 */        512,                            \
+       1024,                           \
+       2048,                           \
+/* C */        4096
+
+
+#define K_ZONE_NAMES                   \
+       "kalloc.16",                    \
+       "kalloc.32",                    \
+/* 6 */        "kalloc.64",                    \
+       "kalloc.128",                   \
+       "kalloc.256",                   \
+/* 9 */        "kalloc.512",                   \
+       "kalloc.1024",                  \
+       "kalloc.2048",                  \
+/* C */        "kalloc.4096"
+
+#define K_ZONE_MAXIMA                  \
+       1024,                           \
+       4096,                           \
+/* 6 */        4096,                           \
+       4096,                           \
+       4096,                           \
+/* 9 */        1024,                           \
+       1024,                           \
+       1024,                           \
+/* C */        1024
+
+#elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
+
+/*
+ * Tweaked for ARM (and x64) in 04/2011
  */
 
-int first_k_zone = -1;
-struct zone *k_zone[16];
-static const char *k_zone_name[16] = {
-       "kalloc.1",             "kalloc.2",
-       "kalloc.4",             "kalloc.8",
-       "kalloc.16",            "kalloc.32",
-       "kalloc.64",            "kalloc.128",
-       "kalloc.256",           "kalloc.512",
-       "kalloc.1024",          "kalloc.2048",
-       "kalloc.4096",          "kalloc.8192",
-       "kalloc.16384",         "kalloc.32768"
+#define K_ZONE_SIZES                   \
+/* 3 */        8,                              \
+       16,     24,                     \
+       32,     40,     48,             \
+/* 6 */        64,     88,     112,            \
+       128,    192,                    \
+       256,    384,                    \
+/* 9 */        512,    768,                    \
+       1024,   1536,                   \
+       2048,   3072,                   \
+       4096,   6144
+
+#define K_ZONE_NAMES                   \
+/* 3 */        "kalloc.8",                     \
+       "kalloc.16",    "kalloc.24",    \
+       "kalloc.32",    "kalloc.40",    "kalloc.48",    \
+/* 6 */        "kalloc.64",    "kalloc.88",    "kalloc.112",   \
+       "kalloc.128",   "kalloc.192",   \
+       "kalloc.256",   "kalloc.384",   \
+/* 9 */        "kalloc.512",   "kalloc.768",   \
+       "kalloc.1024",  "kalloc.1536",  \
+       "kalloc.2048",  "kalloc.3072",  \
+       "kalloc.4096",  "kalloc.6144"
+
+#define        K_ZONE_MAXIMA                   \
+/* 3 */        1024,                           \
+       1024,   1024,                   \
+       4096,   4096,   4096,           \
+/* 6 */        4096,   4096,   4096,           \
+       4096,   4096,                   \
+       4096,   4096,                   \
+/* 9 */        1024,   1024,                   \
+       1024,   1024,                   \
+       1024,   1024,                   \
+/* C */        1024,   64
+
+#else
+#error missing zone size parameters for kalloc
+#endif
+
+#define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
+
+static const int k_zone_size[] = {
+       K_ZONE_SIZES,
+       8192,
+       16384,
+/* F */        32768
+};
+
+#define N_K_ZONE       (sizeof (k_zone_size) / sizeof (k_zone_size[0]))
+
+/*
+ * Many kalloc() allocations are for small structures containing a few
+ * pointers and longs - the k_zone_dlut[] direct lookup table, indexed by
+ * size normalized to the minimum alignment, finds the right zone index
+ * for them in one dereference.
+ */
+
+#define INDEX_ZDLUT(size)      \
+                       (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
+#define N_K_ZDLUT      (2048 / KALLOC_MINALIGN)
+                               /* covers sizes [0 .. 2048 - KALLOC_MINALIGN] */
+#define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN)
+
+static int8_t k_zone_dlut[N_K_ZDLUT];  /* table of indices into k_zone[] */
+
+/*
+ * If there's no hit in the DLUT, then start searching from k_zindex_start.
+ */
+static int k_zindex_start;
+
+static zone_t k_zone[N_K_ZONE];
+
+static const char *k_zone_name[N_K_ZONE] = {
+       K_ZONE_NAMES,
+       "kalloc.8192",
+       "kalloc.16384",
+/* F */        "kalloc.32768"
 };
 
 /*
@@ -169,25 +287,15 @@ static const char *k_zone_name[16] = {
  *  based on need, rather than just guessing; it also
  *  means its patchable in case you're wrong!
  */
-unsigned long k_zone_max[16] = {
-      1024,            /*      1 Byte  */
-      1024,            /*      2 Byte  */
-      1024,            /*      4 Byte  */
-      1024,            /*      8 Byte  */
-      1024,            /*     16 Byte  */
-      4096,            /*     32 Byte  */
-      4096,            /*     64 Byte  */
-      4096,            /*    128 Byte  */
-      4096,            /*    256 Byte  */
-      1024,            /*    512 Byte  */
-      1024,            /*   1024 Byte  */
-      1024,            /*   2048 Byte  */
-      1024,            /*   4096 Byte  */
-      4096,            /*   8192 Byte  */
-      64,              /*  16384 Byte  */
-      64,              /*  32768 Byte  */
+unsigned int k_zone_max[N_K_ZONE] = {
+       K_ZONE_MAXIMA,
+       4096,
+       64,
+/* F */        64
 };
 
+/* #define KALLOC_DEBUG                1 */
+
 /* forward declarations */
 void * kalloc_canblock(
                vm_size_t       size,
@@ -277,25 +385,101 @@ kalloc_init(
         *      for the allocation, as we aren't sure how the memory
         *      will be handled.
         */
-       for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) {
-               if (size < KALLOC_MINSIZE) {
-                       k_zone[i] = NULL;
-                       continue;
-               }
-               if (size == KALLOC_MINSIZE) {
-                       first_k_zone = i;
-               }
+       for (i = 0; (size = k_zone_size[i]) < kalloc_max; i++) {
                k_zone[i] = zinit(size, k_zone_max[i] * size, size,
                                  k_zone_name[i]);
                zone_change(k_zone[i], Z_CALLERACCT, FALSE);
        }
+
+       /*
+        * Build the Direct LookUp Table for small allocations
+        */
+       for (i = 0, size = 0; i <= N_K_ZDLUT; i++, size += KALLOC_MINALIGN) {
+               int zindex = 0;
+
+               while ((vm_size_t)k_zone_size[zindex] < size)
+                       zindex++;
+
+               if (i == N_K_ZDLUT) {
+                       k_zindex_start = zindex;
+                       break;
+               }
+               k_zone_dlut[i] = (int8_t)zindex;
+       }
+
+#ifdef KALLOC_DEBUG
+       printf("kalloc_init: k_zindex_start %d\n", k_zindex_start);
+
+       /*
+        * Do a quick synthesis to see how well/badly we can
+        * find-a-zone for a given size.
+        * Useful when debugging/tweaking the array of zone sizes.
+        * Cache misses probably more critical than compare-branches!
+        */
+       for (i = 0; i < (int)N_K_ZONE; i++) {
+               vm_size_t testsize = (vm_size_t)k_zone_size[i] - 1;
+               int compare = 0;
+               int zindex;
+
+               if (testsize < MAX_SIZE_ZDLUT) {
+                       compare += 1;   /* 'if' (T) */
+
+                       long dindex = INDEX_ZDLUT(testsize);
+                       zindex = (int)k_zone_dlut[dindex];
+
+               } else if (testsize < kalloc_max_prerounded) {
+
+                       compare += 2;   /* 'if' (F), 'if' (T) */
+
+                       zindex = k_zindex_start;
+                       while ((vm_size_t)k_zone_size[zindex] < testsize) {
+                               zindex++;
+                               compare++;      /* 'while' (T) */
+                       }
+                       compare++;      /* 'while' (F) */
+               } else
+                       break;  /* not zone-backed */
+
+               zone_t z = k_zone[zindex];
+               printf("kalloc_init: req size %4lu: %11s took %d compare%s\n",
+                   (unsigned long)testsize, z->zone_name, compare,
+                   compare == 1 ? "" : "s");
+       }
+#endif
        kalloc_lck_grp = lck_grp_alloc_init("kalloc.large", LCK_GRP_ATTR_NULL);
        lck_mtx_init(&kalloc_lock, kalloc_lck_grp, LCK_ATTR_NULL);
        OSMalloc_init();
 #ifdef MUTEX_ZONE      
        lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx");
 #endif 
+}
 
+/*
+ * Given an allocation size, return the kalloc zone it belongs to.
+ * Direct LookUp Table variant.
+ */
+static __inline zone_t
+get_zone_dlut(vm_size_t size)
+{
+       long dindex = INDEX_ZDLUT(size);
+       int zindex = (int)k_zone_dlut[dindex];
+       return (k_zone[zindex]);
+}
+
+/* As above, but linear search k_zone_size[] for the next zone that fits. */
+
+static __inline zone_t
+get_zone_search(vm_size_t size, int zindex)
+{
+       assert(size < kalloc_max_prerounded);
+
+       while ((vm_size_t)k_zone_size[zindex] < size)
+               zindex++;
+
+       assert((unsigned)zindex < N_K_ZONE &&
+           (vm_size_t)k_zone_size[zindex] < kalloc_max);
+
+       return (k_zone[zindex]);
 }
 
 void *
@@ -303,17 +487,19 @@ kalloc_canblock(
                vm_size_t       size,
                boolean_t       canblock)
 {
-       register int zindex;
-       register vm_size_t allocsize;
-       vm_map_t alloc_map = VM_MAP_NULL;
-
-       /*
-        * If size is too large for a zone, then use kmem_alloc.
-        * (We use kmem_alloc instead of kmem_alloc_kobject so that
-        * krealloc can use kmem_realloc.)
-        */
-
-       if (size >= kalloc_max_prerounded) {
+       zone_t z;
+
+       if (size < MAX_SIZE_ZDLUT)
+               z = get_zone_dlut(size);
+       else if (size < kalloc_max_prerounded)
+               z = get_zone_search(size, k_zindex_start);
+       else {
+               /*
+                * If size is too large for a zone, then use kmem_alloc.
+                * (We use kmem_alloc instead of kmem_alloc_kobject so that
+                * krealloc can use kmem_realloc.)
+                */
+               vm_map_t alloc_map;
                void *addr;
 
                /* kmem_alloc could block so we return if noblock */
@@ -357,19 +543,13 @@ kalloc_canblock(
                }
                return(addr);
        }
-
-       /* compute the size of the block that we will actually allocate */
-
-       allocsize = KALLOC_MINSIZE;
-       zindex = first_k_zone;
-       while (allocsize < size) {
-               allocsize <<= 1;
-               zindex++;
-       }
-
-       /* allocate from the appropriate zone */
-       assert(allocsize < kalloc_max);
-       return(zalloc_canblock(k_zone[zindex], canblock));
+#ifdef KALLOC_DEBUG
+       if (size > z->elem_size)
+               panic("%s: z %p (%s) but requested size %lu", __func__,
+                   z, z->zone_name, (unsigned long)size);
+#endif
+       assert(size <= z->elem_size);
+       return (zalloc_canblock(z, canblock));
 }
 
 void *
@@ -386,164 +566,6 @@ kalloc_noblock(
        return( kalloc_canblock(size, FALSE) );
 }
 
-
-void
-krealloc(
-       void            **addrp,
-       vm_size_t       old_size,
-       vm_size_t       new_size,
-       simple_lock_t   lock)
-{
-       register int zindex;
-       register vm_size_t allocsize;
-       void *naddr;
-       vm_map_t alloc_map = VM_MAP_NULL;
-
-       /* can only be used for increasing allocation size */
-
-       assert(new_size > old_size);
-
-       /* if old_size is zero, then we are simply allocating */
-
-       if (old_size == 0) {
-               simple_unlock(lock);
-               naddr = kalloc(new_size);
-               simple_lock(lock);
-               *addrp = naddr;
-               return;
-       }
-
-       /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */
-
-       if (old_size >= kalloc_max_prerounded) {
-               if (old_size >=  kalloc_kernmap_size) 
-                       alloc_map = kernel_map;
-               else
-                       alloc_map = kalloc_map;
-
-               old_size = round_page(old_size);
-               new_size = round_page(new_size);
-               if (new_size > old_size) {
-
-                       if (KERN_SUCCESS != kmem_realloc(alloc_map, 
-                           (vm_offset_t)*addrp, old_size,
-                           (vm_offset_t *)&naddr, new_size))
-                               panic("krealloc: kmem_realloc");
-
-                       simple_lock(lock);
-                       *addrp = (void *) naddr;
-
-                       /* kmem_realloc() doesn't free old page range. */
-                       kmem_free(alloc_map, (vm_offset_t)*addrp, old_size);
-
-                       kalloc_large_total += (new_size - old_size);
-                       kalloc_large_sum += (new_size - old_size);
-
-                       if (kalloc_large_total > kalloc_large_max)
-                               kalloc_large_max = kalloc_large_total;
-
-               }
-               return;
-       }
-
-       /* compute the size of the block that we actually allocated */
-
-       allocsize = KALLOC_MINSIZE;
-       zindex = first_k_zone;
-       while (allocsize < old_size) {
-               allocsize <<= 1;
-               zindex++;
-       }
-
-       /* if new size fits in old block, then return */
-
-       if (new_size <= allocsize) {
-               return;
-       }
-
-       /* if new size does not fit in zone, kmem_alloc it, else zalloc it */
-
-       simple_unlock(lock);
-       if (new_size >= kalloc_max_prerounded) {
-               if (new_size >=  kalloc_kernmap_size) 
-                       alloc_map = kernel_map;
-               else
-                       alloc_map = kalloc_map;
-               if (KERN_SUCCESS != kmem_alloc(alloc_map, 
-                   (vm_offset_t *)&naddr, new_size)) {
-                       panic("krealloc: kmem_alloc");
-                       simple_lock(lock);
-                       *addrp = NULL;
-                       return;
-               }
-               kalloc_spin_lock();
-
-               kalloc_large_inuse++;
-               kalloc_large_sum += new_size;
-               kalloc_large_total += new_size;
-
-               if (kalloc_large_total > kalloc_large_max)
-                       kalloc_large_max = kalloc_large_total;
-
-               kalloc_unlock();
-
-               KALLOC_ZINFO_SALLOC(new_size);
-       } else {
-               register int new_zindex;
-
-               allocsize <<= 1;
-               new_zindex = zindex + 1;
-               while (allocsize < new_size) {
-                       allocsize <<= 1;
-                       new_zindex++;
-               }
-               naddr = zalloc(k_zone[new_zindex]);
-       }
-       simple_lock(lock);
-
-       /* copy existing data */
-
-       bcopy((const char *)*addrp, (char *)naddr, old_size);
-
-       /* free old block, and return */
-
-       zfree(k_zone[zindex], *addrp);
-
-       /* set up new address */
-
-       *addrp = (void *) naddr;
-}
-
-
-void *
-kget(
-       vm_size_t       size)
-{
-       register int zindex;
-       register vm_size_t allocsize;
-
-       /* size must not be too large for a zone */
-
-       if (size >= kalloc_max_prerounded) {
-               /* This will never work, so we might as well panic */
-               panic("kget");
-       }
-
-       /* compute the size of the block that we will actually allocate */
-
-       allocsize = KALLOC_MINSIZE;
-       zindex = first_k_zone;
-       while (allocsize < size) {
-               allocsize <<= 1;
-               zindex++;
-       }
-
-       /* allocate from the appropriate zone */
-
-       assert(allocsize < kalloc_max);
-       return(zget(k_zone[zindex]));
-}
-
 volatile SInt32 kfree_nop_count = 0;
 
 void
@@ -551,19 +573,23 @@ kfree(
        void            *data,
        vm_size_t       size)
 {
-       register int zindex;
-       register vm_size_t freesize;
-       vm_map_t alloc_map = kernel_map;
+       zone_t z;
+
+       if (size < MAX_SIZE_ZDLUT)
+               z = get_zone_dlut(size);
+       else if (size < kalloc_max_prerounded)
+               z = get_zone_search(size, k_zindex_start);
+       else {
+               /* if size was too large for a zone, then use kmem_free */
 
-       /* if size was too large for a zone, then use kmem_free */
+               vm_map_t alloc_map = kernel_map;
 
-       if (size >= kalloc_max_prerounded) {
                if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max))
                        alloc_map = kalloc_map;
                if (size > kalloc_largest_allocated) {
                                /*
                                 * work around double FREEs of small MALLOCs
-                                * this use to end up being a nop
+                                * this used to end up being a nop
                                 * since the pointer being freed from an
                                 * alloc backed by the zalloc world could
                                 * never show up in the kalloc_map... however,
@@ -574,7 +600,7 @@ kfree(
                                 * the underlying allocation... that pointer ends up 
                                 * looking like a really big size on the 2nd FREE and
                                 * pushes the kfree into the kernel_map...  we
-                                * end up removing a ton of virutal space before we panic
+                                * end up removing a ton of virtual space before we panic
                                 * this check causes us to ignore the kfree for a size
                                 * that must be 'bogus'... note that it might not be due
                                 * to the above scenario, but it would still be wrong and
@@ -597,19 +623,14 @@ kfree(
                return;
        }
 
-       /* compute the size of the block that we actually allocated from */
-
-       freesize = KALLOC_MINSIZE;
-       zindex = first_k_zone;
-       while (freesize < size) {
-               freesize <<= 1;
-               zindex++;
-       }
-
        /* free to the appropriate zone */
-
-       assert(freesize < kalloc_max);
-       zfree(k_zone[zindex], data);
+#ifdef KALLOC_DEBUG
+       if (size > z->elem_size)
+               panic("%s: z %p (%s) but requested size %lu", __func__,
+                   z, z->zone_name, (unsigned long)size);
+#endif
+       assert(size <= z->elem_size);
+       zfree(z, data);
 }
 
 #ifdef MACH_BSD
@@ -617,21 +638,10 @@ zone_t
 kalloc_zone(
        vm_size_t       size)
 {
-       register int zindex = 0;
-       register vm_size_t allocsize;
-
-       /* compute the size of the block that we will actually allocate */
-
-       allocsize = size;
-       if (size <= kalloc_max) {
-               allocsize = KALLOC_MINSIZE;
-               zindex = first_k_zone;
-               while (allocsize < size) {
-                       allocsize <<= 1;
-                       zindex++;
-               }
-               return (k_zone[zindex]);
-       }
+       if (size < MAX_SIZE_ZDLUT)
+               return (get_zone_dlut(size));
+       if (size <= kalloc_max)
+               return (get_zone_search(size, k_zindex_start));
        return (ZONE_NULL);
 }
 #endif
@@ -705,7 +715,7 @@ OSMalloc_Tagref(
         OSMallocTag            tag)
 {
        if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) 
-               panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state);
+               panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state);
 
        (void)hw_atomic_add(&tag->OSMT_refcnt, 1);
 }
@@ -715,7 +725,7 @@ OSMalloc_Tagrele(
         OSMallocTag            tag)
 {
        if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID))
-               panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state);
+               panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state);
 
        if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
                if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) {
@@ -724,7 +734,7 @@ OSMalloc_Tagrele(
                        OSMalloc_tag_unlock();
                        kfree((void*)tag, sizeof(*tag));
                } else
-                       panic("OSMalloc_Tagrele(): refcnt 0\n");
+                       panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag->OSMT_name);
        }
 }
 
@@ -733,7 +743,7 @@ OSMalloc_Tagfree(
         OSMallocTag            tag)
 {
        if (!hw_compare_and_store(OSMT_VALID, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state))
-               panic("OSMalloc_Tagfree(): bad state 0x%08X\n", tag->OSMT_state);
+               panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag->OSMT_name, tag->OSMT_state);
 
        if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
                OSMalloc_tag_spin_lock();
@@ -813,7 +823,7 @@ OSFree(
            && (size & ~PAGE_MASK)) {
                kmem_free(kernel_map, (vm_offset_t)addr, size);
        } else
-               kfree((void*)addr, size);
+               kfree((void *)addr, size);
 
        OSMalloc_Tagrele(tag);
 }
index 9fcb07edc63c7b0302b14a46b2875ba973f0eed3..77b8cd3be0c1eb70205bf37804e7b2f337d536a5 100644 (file)
@@ -68,8 +68,6 @@ extern void *kalloc(vm_size_t size);
 
 extern void *kalloc_noblock(vm_size_t  size);
 
-extern void *kget(vm_size_t    size);
-
 extern void kfree(void         *data,
                  vm_size_t     size);
 
@@ -79,15 +77,8 @@ __END_DECLS
 
 #include <kern/lock.h>
 
-#define KALLOC_MINSIZE         16
-
 extern void            kalloc_init(void) __attribute__((section("__TEXT, initcode")));
 
-extern void            krealloc(void           **addrp,
-                                vm_size_t      old_size,
-                                vm_size_t      new_size,
-                                simple_lock_t  lock);
-
 extern void            kalloc_fake_zone_init( int );
 
 extern void            kalloc_fake_zone_info(
diff --git a/osfmk/kern/kern_print.h b/osfmk/kern/kern_print.h
deleted file mode 100644 (file)
index 8362110..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:34  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:55  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.5.1  1995/01/06  19:47:13  devrcs
- *     mk6 CR668 - 1.3b26 merge
- *     new file for mk6
- *     [1994/10/12  22:19:25  dwm]
- *
- * Revision 1.1.2.1  1993/11/22  20:14:46  jeffc
- *     Modularized declarations of ddb print functions.
- *     [1993/11/22  19:03:03  jeffc]
- * 
- * $EndLog$
- */
-
-#ifndef        KERN_PRINT_H_
-#define        KERN_PRINT_H_
-
-#include <ddb/db_expr.h>
-
-extern void db_show_all_slocks(void);
-
-extern void db_show_one_zone(db_expr_t, boolean_t, db_expr_t, char *);
-
-extern void db_show_all_zones(db_expr_t, boolean_t, db_expr_t, char *);
-
-#endif /* KERN_PRINT_H_ */
index 1d3aea127e0a62347e1c7ccb734dd9d0eb57b525..c44446335b4927881861b30280b91409077bb7c1 100644 (file)
 #include <mach-o/loader.h>
 #include <libkern/kernel_mach_header.h>
 
+#define KASLR_IOREG_DEBUG 0
+
 
 vm_map_t g_kext_map = 0;
+#if KASLR_IOREG_DEBUG
+mach_vm_offset_t kext_alloc_base = 0;
+mach_vm_offset_t kext_alloc_max = 0;
+#else
 static mach_vm_offset_t kext_alloc_base = 0;
 static mach_vm_offset_t kext_alloc_max = 0;
+#if CONFIG_KEXT_BASEMENT
+static mach_vm_offset_t kext_post_boot_base = 0;
+#endif
+#endif
 
 /*
  * On x86_64 systems, kernel extension text must remain within 2GB of the
@@ -52,9 +62,10 @@ static mach_vm_offset_t kext_alloc_max = 0;
 void 
 kext_alloc_init(void)
 {
-#if __x86_64__
+#if CONFIG_KEXT_BASEMENT
     kern_return_t rval = 0;
     kernel_segment_command_t *text = NULL;
+    kernel_segment_command_t *prelinkTextSegment = NULL;
     mach_vm_offset_t text_end, text_start;
     mach_vm_size_t text_size;
     mach_vm_size_t kext_alloc_size;
@@ -72,9 +83,21 @@ kext_alloc_init(void)
     kext_alloc_base = KEXT_ALLOC_BASE(text_end);
     kext_alloc_size = KEXT_ALLOC_SIZE(text_size);
     kext_alloc_max = kext_alloc_base + kext_alloc_size;
+    
+    /* Post boot kext allocation will start after the prelinked kexts */
+    prelinkTextSegment = getsegbyname("__PRELINK_TEXT");
+    if (prelinkTextSegment) {
+        /* use kext_post_boot_base to start allocations past all the prelinked 
+         * kexts
+         */
+        kext_post_boot_base = 
+            vm_map_round_page(kext_alloc_base + prelinkTextSegment->vmsize);
+    }
+    else {
+        kext_post_boot_base = kext_alloc_base;
+    }
 
-    /* Allocate the subblock of the kernel map */
-
+    /* Allocate the sub block of the kernel map */
     rval = kmem_suballoc(kernel_map, (vm_offset_t *) &kext_alloc_base, 
                         kext_alloc_size, /* pageable */ TRUE,
                         VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE,
@@ -91,33 +114,65 @@ kext_alloc_init(void)
            kernel_map->min_offset = kext_alloc_base;
     }
 
-    printf("kext submap [0x%llx - 0x%llx], kernel text [0x%llx - 0x%llx]\n",
-          kext_alloc_base, kext_alloc_max, text->vmaddr,
-          text->vmaddr + text->vmsize);
+    printf("kext submap [0x%lx - 0x%lx], kernel text [0x%lx - 0x%lx]\n",
+          VM_KERNEL_UNSLIDE(kext_alloc_base),
+          VM_KERNEL_UNSLIDE(kext_alloc_max),
+          VM_KERNEL_UNSLIDE(text->vmaddr),
+          VM_KERNEL_UNSLIDE(text->vmaddr + text->vmsize));
+
 #else
     g_kext_map = kernel_map;
     kext_alloc_base = VM_MIN_KERNEL_ADDRESS;
     kext_alloc_max = VM_MAX_KERNEL_ADDRESS;
-#endif /* __x86_64__ */
+#endif /* CONFIG_KEXT_BASEMENT */
 }
 
 kern_return_t
 kext_alloc(vm_offset_t *_addr, vm_size_t size, boolean_t fixed)
 {
     kern_return_t rval = 0;
+#if CONFIG_KEXT_BASEMENT
+    mach_vm_offset_t addr = (fixed) ? *_addr : kext_post_boot_base;
+#else
     mach_vm_offset_t addr = (fixed) ? *_addr : kext_alloc_base;
+#endif
     int flags = (fixed) ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE;
  
-    /* Allocate the kext virtual memory */
+#if CONFIG_KEXT_BASEMENT
+    /* Allocate the kext virtual memory
+     * 10608884 - use mach_vm_map since we want VM_FLAGS_ANYWHERE allocated past
+     * kext_post_boot_base (when possible).  mach_vm_allocate will always 
+     * start at 0 into the map no matter what you pass in addr.  We want non 
+     * fixed (post boot) kext allocations to start looking for free space 
+     * just past where prelinked kexts have loaded.  
+     */
+    rval = mach_vm_map(g_kext_map, 
+                       &addr, 
+                       size, 
+                       0,
+                       flags,
+                       MACH_PORT_NULL,
+                       0,
+                       TRUE,
+                       VM_PROT_DEFAULT,
+                       VM_PROT_ALL,
+                       VM_INHERIT_DEFAULT);
+    if (rval != KERN_SUCCESS) {
+        printf("mach_vm_map failed - %d\n", rval);
+        goto finish;
+    }
+#else
     rval = mach_vm_allocate(g_kext_map, &addr, size, flags);
     if (rval != KERN_SUCCESS) {
         printf("vm_allocate failed - %d\n", rval);
         goto finish;
     }
+#endif
 
     /* Check that the memory is reachable by kernel text */
     if ((addr + size) > kext_alloc_max) {
         kext_free((vm_offset_t)addr, size);
+        rval = KERN_INVALID_ADDRESS;
         goto finish;
     }
 
index c97771d04a3a0288e2f9b8329651c5dbfce499b9..cf1a7aa02763bee878f4576f2de1d954ec85d5e4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 /*
  * @OSF_COPYRIGHT@
  */
-/*
- * 8/13/93
- * 
- * This is a half-hearted attempt at providing the parts of the
- * ledger facility to satisfy the ledger interfaces.
- *
- * This implementation basically leaves the (dysfunctional) ledgers
- * unfunctional and are mearly here to satisfy the Mach spec interface
- * reqirements.
- */
-
-#include <mach/mach_types.h>
-#include <mach/message.h>
-#include <mach/port.h>
-#include <mach/ledger_server.h>
 
-#include <kern/mach_param.h>
-#include <kern/misc_protos.h>
 #include <kern/lock.h>
-#include <kern/ipc_kobject.h>
-#include <kern/host.h>
 #include <kern/ledger.h>
 #include <kern/kalloc.h>
+#include <kern/task.h>
 
-#include <ipc/ipc_space.h>
-#include <ipc/ipc_port.h>
+#include <kern/processor.h>
+#include <kern/machine.h>
+#include <kern/queue.h>
+#include <sys/errno.h>
 
-ledger_t       root_wired_ledger;
-ledger_t       root_paged_ledger;
+#include <libkern/OSAtomic.h>
+#include <mach/mach_types.h>
 
+/*
+ * Ledger entry flags. Bits in second nibble (masked by 0xF0) are used for
+ * ledger actions (LEDGER_ACTION_BLOCK, etc).
+ */
+#define        ENTRY_ACTIVE            0x0001  /* entry is active if set */
+#define        WAKE_NEEDED             0x0100  /* one or more threads are asleep */
+#define        WAKE_INPROGRESS         0x0200  /* the wait queue is being processed */
+#define        REFILL_SCHEDULED        0x0400  /* a refill timer has been set */
+#define        REFILL_INPROGRESS       0x0800  /* the ledger is being refilled */
+#define        CALLED_BACK             0x1000  /* callback has already been called */
 
-/* Utility routine to handle entries to a ledger */
-kern_return_t
-ledger_enter(
-            ledger_t           ledger,
-            ledger_item_t      amount)
+/* Determine whether a ledger entry exists and has been initialized and active */
+#define        ENTRY_VALID(l, e)                                       \
+       (((l) != NULL) && ((e) >= 0) && ((e) < (l)->l_size) &&  \
+       (((l)->l_entries[e].le_flags & ENTRY_ACTIVE) == ENTRY_ACTIVE))
+
+#ifdef LEDGER_DEBUG
+int ledger_debug = 0;
+
+#define ASSERT(a) assert(a)
+#define        lprintf(a) if (ledger_debug) {                                  \
+       printf("%lld  ", abstime_to_nsecs(mach_absolute_time() / 1000000)); \
+       printf a ;                                                      \
+}
+#else
+#define        lprintf(a)
+#define        ASSERT(a)
+#endif
+
+struct ledger_callback {
+       ledger_callback_t       lc_func;
+       const void              *lc_param0;
+       const void              *lc_param1;
+};
+
+struct entry_template {
+       char                    et_key[LEDGER_NAME_MAX];
+       char                    et_group[LEDGER_NAME_MAX];
+       char                    et_units[LEDGER_NAME_MAX];
+       uint32_t                et_flags;
+       struct ledger_callback  *et_callback;
+};
+
+lck_grp_t ledger_lck_grp;
+
+/*
+ * Modifying the reference count, table size, or table contents requires
+ * holding the lt_lock.  Modfying the table address requires both lt_lock
+ * and setting the inuse bit.  This means that the lt_entries field can be
+ * safely dereferenced if you hold either the lock or the inuse bit.  The
+ * inuse bit exists solely to allow us to swap in a new, larger entries
+ * table without requiring a full lock to be acquired on each lookup.
+ * Accordingly, the inuse bit should never be held for longer than it takes
+ * to extract a value from the table - i.e., 2 or 3 memory references.
+ */
+struct ledger_template {
+       const char              *lt_name;
+       int                     lt_refs;
+       int                     lt_cnt;
+       int                     lt_table_size;
+       volatile uint32_t       lt_inuse;
+       lck_mtx_t               lt_lock;
+       struct entry_template   *lt_entries;
+};
+
+#define template_lock(template)                lck_mtx_lock(&(template)->lt_lock)
+#define template_unlock(template)      lck_mtx_unlock(&(template)->lt_lock)
+
+#define TEMPLATE_INUSE(s, t) {                                         \
+       s = splsched();                                         \
+       while (OSCompareAndSwap(0, 1, &((t)->lt_inuse)))        \
+               ;                                               \
+}
+
+#define TEMPLATE_IDLE(s, t) {                                  \
+       (t)->lt_inuse = 0;                                      \
+       splx(s);                                                \
+}
+
+/*
+ * The explicit alignment is to ensure that atomic operations don't panic
+ * on ARM.
+ */
+struct ledger_entry {
+       volatile uint32_t               le_flags;
+        ledger_amount_t                        le_limit;
+        volatile ledger_amount_t       le_credit __attribute__((aligned(8)));
+        volatile ledger_amount_t       le_debit __attribute__((aligned(8)));
+       /*
+        * XXX - the following two fields can go away if we move all of
+        * the refill logic into process policy
+        */
+       uint64_t                        le_refill_period;
+       uint64_t                        le_last_refill;
+} __attribute__((aligned(8)));
+
+struct ledger {
+       int                     l_id;
+       struct ledger_template  *l_template;
+       int                     l_refs;
+       int                     l_size;
+       struct ledger_entry     *l_entries;
+};
+
+static int ledger_cnt = 0;
+/* ledger ast helper functions */
+static uint32_t ledger_check_needblock(ledger_t l, uint64_t now);
+static kern_return_t ledger_perform_blocking(ledger_t l);
+static uint32_t flag_set(volatile uint32_t *flags, uint32_t bit);
+static uint32_t flag_clear(volatile uint32_t *flags, uint32_t bit);
+
+#if 0
+static void
+debug_callback(const void *p0, __unused const void *p1)
 {
-       if (ledger == LEDGER_NULL)
-               return KERN_SUCCESS;
+       printf("ledger: resource exhausted [%s] for task %p\n",
+           (const char *)p0, p1);
+}
+#endif
 
-       /* Need to lock the ledger */
-       ledger_lock(ledger);
-       
-       if (amount > 0) {
-               if (ledger->ledger_limit != LEDGER_ITEM_INFINITY &&
-                   ledger->ledger_balance + amount > ledger->ledger_limit) {
-                       /* XXX this is where you do BAD things */
-                       printf("Ledger limit exceeded ! ledger=%p lim=%d balance=%d\n",
-                              ledger, ledger->ledger_limit,
-                              ledger->ledger_balance);
-                       ledger_unlock(ledger);
-                       return(KERN_RESOURCE_SHORTAGE);
+/************************************/
+
+static uint64_t
+abstime_to_nsecs(uint64_t abstime)
+{
+       uint64_t nsecs;
+
+       absolutetime_to_nanoseconds(abstime, &nsecs);
+       return (nsecs);
+}
+
+static uint64_t
+nsecs_to_abstime(uint64_t nsecs)
+{
+       uint64_t abstime;
+
+       nanoseconds_to_absolutetime(nsecs, &abstime);
+       return (abstime);
+}
+
+void
+ledger_init(void)
+{
+        lck_grp_init(&ledger_lck_grp, "ledger", LCK_GRP_ATTR_NULL);
+}
+
+ledger_template_t
+ledger_template_create(const char *name)
+{
+       ledger_template_t template;
+
+       template = (ledger_template_t)kalloc(sizeof (*template));
+       if (template == NULL)
+               return (NULL);
+
+       template->lt_name = name;
+       template->lt_refs = 1;
+       template->lt_cnt = 0;
+       template->lt_table_size = 1;
+       template->lt_inuse = 0;
+       lck_mtx_init(&template->lt_lock, &ledger_lck_grp, LCK_ATTR_NULL);
+
+       template->lt_entries = (struct entry_template *)
+           kalloc(sizeof (struct entry_template) * template->lt_table_size);
+       if (template->lt_entries == NULL) {
+               kfree(template, sizeof (*template));
+               template = NULL;
+       }
+
+       return (template);
+}
+
+void
+ledger_template_dereference(ledger_template_t template)
+{
+       template_lock(template);
+       template->lt_refs--;
+       template_unlock(template);
+
+       if (template->lt_refs == 0)
+               kfree(template, sizeof (*template));
+}
+
+/*
+ * Add a new entry to the list of entries in a ledger template. There is
+ * currently no mechanism to remove an entry.  Implementing such a mechanism
+ * would require us to maintain per-entry reference counts, which we would
+ * prefer to avoid if possible.
+ */
+int
+ledger_entry_add(ledger_template_t template, const char *key,
+    const char *group, const char *units)
+{
+       int idx;
+       struct entry_template *et;
+
+       if ((key == NULL) || (strlen(key) >= LEDGER_NAME_MAX))
+               return (-1);
+
+       template_lock(template);
+
+       /* If the table is full, attempt to double its size */
+       if (template->lt_cnt == template->lt_table_size) {
+               struct entry_template *new_entries, *old_entries;
+               int old_cnt, old_sz;
+               spl_t s;
+
+               old_cnt = template->lt_table_size;
+               old_sz = (int)(old_cnt * sizeof (struct entry_template));
+               new_entries = kalloc(old_sz * 2);
+               if (new_entries == NULL) {
+                       template_unlock(template);
+                       return (-1);
                }
-               if ((ledger->ledger_balance + amount) 
-                       < LEDGER_ITEM_INFINITY)
-                       ledger->ledger_balance += amount;
-               else
-                       ledger->ledger_balance = LEDGER_ITEM_INFINITY;
+               memcpy(new_entries, template->lt_entries, old_sz);
+               memset(((char *)new_entries) + old_sz, 0, old_sz);
+               template->lt_table_size = old_cnt * 2;
+
+               old_entries = template->lt_entries;
+
+               TEMPLATE_INUSE(s, template);
+               template->lt_entries = new_entries;
+               TEMPLATE_IDLE(s, template);
+
+               kfree(old_entries, old_sz);
        }
-       else if (amount) {
-               if (ledger->ledger_balance + amount > 0)
-                       ledger->ledger_balance += amount;
-               else
-                       ledger->ledger_balance = 0;
+
+       et = &template->lt_entries[template->lt_cnt];
+       strlcpy(et->et_key, key, LEDGER_NAME_MAX);
+       strlcpy(et->et_group, group, LEDGER_NAME_MAX);
+       strlcpy(et->et_units, units, LEDGER_NAME_MAX);
+       et->et_flags = ENTRY_ACTIVE;
+       et->et_callback = NULL;
+
+       idx = template->lt_cnt++;
+       template_unlock(template);
+
+       return (idx);
+}
+
+
+kern_return_t
+ledger_entry_setactive(ledger_t ledger, int entry)
+{
+       struct ledger_entry *le;
+
+       if ((ledger == NULL)  || (entry < 0) || (entry >= ledger->l_size))
+               return (KERN_INVALID_ARGUMENT);
+
+       le = &ledger->l_entries[entry];
+       if ((le->le_flags & ENTRY_ACTIVE) == 0) {
+               flag_set(&le->le_flags, ENTRY_ACTIVE);
        }
-       ledger_unlock(ledger);
-       return(KERN_SUCCESS);
+       return (KERN_SUCCESS);
 }
 
-/* Utility routine to create a new ledger */
-static ledger_t
-ledger_allocate(
-               ledger_item_t   limit,
-               ledger_t        ledger_ledger,
-               ledger_t        ledger_parent)
+
+int
+ledger_key_lookup(ledger_template_t template, const char *key)
 {
-       ledger_t        ledger;
+       int idx;
+
+       template_lock(template);
+       for (idx = 0; idx < template->lt_cnt; idx++)
+               if (template->lt_entries[idx].et_key &&
+                   (strcmp(key, template->lt_entries[idx].et_key) == 0))
+                       break;
 
-       ledger = (ledger_t)kalloc(sizeof(ledger_data_t));
-       if (ledger == LEDGER_NULL)
-               return(LEDGER_NULL);
+       if (idx >= template->lt_cnt)
+               idx = -1;
+       template_unlock(template);
+
+       return (idx);
+}
 
-       ledger->ledger_self = ipc_port_alloc_kernel();
-       if (ledger->ledger_self == IP_NULL) {
-               kfree(ledger, sizeof(ledger_data_t));
-               return(LEDGER_NULL);
+/*
+ * Create a new ledger based on the specified template.  As part of the
+ * ledger creation we need to allocate space for a table of ledger entries.
+ * The size of the table is based on the size of the template at the time
+ * the ledger is created.  If additional entries are added to the template
+ * after the ledger is created, they will not be tracked in this ledger.
+ */
+ledger_t
+ledger_instantiate(ledger_template_t template, int entry_type)
+{
+       ledger_t ledger;
+       size_t sz;
+       int i;
+
+       ledger = (ledger_t)kalloc(sizeof (struct ledger));
+       if (ledger == NULL)
+               return (LEDGER_NULL);
+
+       ledger->l_template = template;
+       ledger->l_id = ledger_cnt++;
+       ledger->l_refs = 1;
+
+       template_lock(template);
+       template->lt_refs++;
+       ledger->l_size = template->lt_cnt;
+       template_unlock(template);
+
+       sz = ledger->l_size * sizeof (struct ledger_entry);
+       ledger->l_entries = kalloc(sz);
+       if (sz && (ledger->l_entries == NULL)) {
+               ledger_template_dereference(template);
+               kfree(ledger, sizeof(struct ledger));
+               return (LEDGER_NULL);
        }
 
-       ledger_lock_init(ledger);
-       ledger->ledger_limit = limit;
-       ledger->ledger_balance = 0;
-       ledger->ledger_service_port = MACH_PORT_NULL;
-       ledger->ledger_ledger = ledger_ledger;
-       ledger->ledger_parent = ledger_parent;
-       ipc_kobject_set(ledger->ledger_self, (ipc_kobject_t)ledger,
-                       IKOT_LEDGER);
+       template_lock(template);
+       assert(ledger->l_size <= template->lt_cnt);
+       for (i = 0; i < ledger->l_size; i++) {
+               struct ledger_entry *le = &ledger->l_entries[i];
+               struct entry_template *et = &template->lt_entries[i];
 
-       return(ledger);
+               le->le_flags = et->et_flags;
+               /* make entry inactive by removing  active bit */
+               if (entry_type == LEDGER_CREATE_INACTIVE_ENTRIES)
+                       flag_clear(&le->le_flags, ENTRY_ACTIVE);
+               /*
+                * If template has a callback, this entry is opted-in,
+                * by default.
+                */
+               if (et->et_callback != NULL)
+                       flag_set(&le->le_flags, LEDGER_ACTION_CALLBACK);
+               le->le_credit = 0;
+               le->le_debit = 0;
+               le->le_limit = LEDGER_LIMIT_INFINITY;
+               le->le_refill_period = 0;
+       }
+       template_unlock(template);
+
+       return (ledger);
 }
 
-/* Utility routine to destroy a ledger */
-static void
-ledger_deallocate(
-                 ledger_t      ledger)
+static uint32_t
+flag_set(volatile uint32_t *flags, uint32_t bit)
 {
-       /* XXX can be many send rights (copies) of this */
-       ipc_port_dealloc_kernel(ledger->ledger_self);
+       return (OSBitOrAtomic(bit, flags));
+}
 
-       /* XXX release send right on service port */
-       kfree(ledger, sizeof(*ledger));
+static uint32_t
+flag_clear(volatile uint32_t *flags, uint32_t bit)
+{
+       return (OSBitAndAtomic(~bit, flags));
+}
+
+/*
+ * Take a reference on a ledger
+ */
+kern_return_t
+ledger_reference(ledger_t ledger)
+{
+       if (!LEDGER_VALID(ledger))
+               return (KERN_INVALID_ARGUMENT);
+       OSIncrementAtomic(&ledger->l_refs);
+       return (KERN_SUCCESS);
 }
 
+int
+ledger_reference_count(ledger_t ledger)
+{
+       if (!LEDGER_VALID(ledger))
+               return (-1);
+
+       return (ledger->l_refs);
+}
 
 /*
- * Inititalize the ledger facility
+ * Remove a reference on a ledger.  If this is the last reference,
+ * deallocate the unused ledger.
  */
-void ledger_init(void)
+kern_return_t
+ledger_dereference(ledger_t ledger)
 {
-       /*
-        * Allocate the root ledgers; wired and paged.
-        */
-       root_wired_ledger = ledger_allocate(LEDGER_ITEM_INFINITY,
-                                           LEDGER_NULL, LEDGER_NULL);
-       if (root_wired_ledger == LEDGER_NULL)
-               panic("can't allocate root (wired) ledger");
-       ipc_port_make_send(root_wired_ledger->ledger_self);
+       int v;
+
+       if (!LEDGER_VALID(ledger))
+               return (KERN_INVALID_ARGUMENT);
+
+       v = OSDecrementAtomic(&ledger->l_refs);
+       ASSERT(v >= 1);
 
-       root_paged_ledger = ledger_allocate(LEDGER_ITEM_INFINITY,
-                                           LEDGER_NULL, LEDGER_NULL);
-       if (root_paged_ledger == LEDGER_NULL)
-               panic("can't allocate root (paged) ledger");
-       ipc_port_make_send(root_paged_ledger->ledger_self);
+       /* Just released the last reference.  Free it. */
+       if (v == 1) {
+               kfree(ledger->l_entries,
+                   ledger->l_size * sizeof (struct ledger_entry));
+               kfree(ledger, sizeof (*ledger));
+       }
+
+       return (KERN_SUCCESS);
+}
+
+/*
+ * Determine whether an entry has exceeded its limit.
+ */
+static inline int
+limit_exceeded(struct ledger_entry *le)
+{
+       ledger_amount_t balance;
+
+       balance = le->le_credit - le->le_debit;
+       if ((le->le_limit <= 0) && (balance < le->le_limit))
+               return (1);
+
+       if ((le->le_limit > 0) && (balance > le->le_limit))
+               return (1);
+       return (0);
+}
+
+static inline struct ledger_callback *
+entry_get_callback(ledger_t ledger, int entry)
+{
+       struct ledger_callback *callback;
+       spl_t s;
+
+       TEMPLATE_INUSE(s, ledger->l_template);
+       callback = ledger->l_template->lt_entries[entry].et_callback;
+       TEMPLATE_IDLE(s, ledger->l_template);
+
+       return (callback);
+}
+
+/*
+ * If the ledger value is positive, wake up anybody waiting on it.
+ */
+static inline void
+ledger_limit_entry_wakeup(struct ledger_entry *le)
+{
+       uint32_t flags;
+
+       if (!limit_exceeded(le)) {
+               flags = flag_clear(&le->le_flags, CALLED_BACK);
+
+               while (le->le_flags & WAKE_NEEDED) {
+                       flag_clear(&le->le_flags, WAKE_NEEDED);
+                       thread_wakeup((event_t)le);
+               }
+       }
 }
 
 /*
- *     Create a subordinate ledger
+ * Refill the coffers.
  */
-kern_return_t ledger_create(
-                           ledger_t parent_ledger,
-                           ledger_t ledger_ledger,
-                           ledger_t *new_ledger,
-                           ledger_item_t transfer)
+static void
+ledger_refill(uint64_t now, ledger_t ledger, int entry)
 {
-       if (parent_ledger == LEDGER_NULL)
-               return(KERN_INVALID_ARGUMENT);
+       uint64_t elapsed, period, periods;
+       struct ledger_entry *le;
+       ledger_amount_t balance, due;
+       int cnt;
 
-       if (ledger_ledger == LEDGER_NULL)
-               return(KERN_INVALID_LEDGER);
+       le = &ledger->l_entries[entry];
 
        /*
-        * Allocate a new ledger and change the ledger_ledger for
-        * its space.
+        * If another thread is handling the refill already, we're not
+        * needed.  Just sit here for a few cycles while the other thread
+        * finishes updating the balance.  If it takes too long, just return
+        * and we'll block again.
         */
-       ledger_lock(ledger_ledger);
-       if ((ledger_ledger->ledger_limit != LEDGER_ITEM_INFINITY) &&
-           (ledger_ledger->ledger_balance + sizeof(ledger_data_t) >
-            ledger_ledger->ledger_limit)) {
-               ledger_unlock(ledger_ledger);
-               return(KERN_RESOURCE_SHORTAGE);
+       if (flag_set(&le->le_flags, REFILL_INPROGRESS) & REFILL_INPROGRESS) {
+               cnt = 0;
+               while (cnt++ < 100 && (le->le_flags & REFILL_INPROGRESS))
+                       ;
+               return;
        }
 
-       *new_ledger = ledger_allocate(LEDGER_ITEM_INFINITY, ledger_ledger, parent_ledger);
-       if (*new_ledger == LEDGER_NULL) {
-               ledger_unlock(ledger_ledger);
-               return(KERN_RESOURCE_SHORTAGE);
+       /*
+        * See how many refill periods have passed since we last
+        * did a refill.
+        */
+       period = le->le_refill_period;
+       elapsed = now - le->le_last_refill;
+       if ((period == 0) || (elapsed < period)) {
+               flag_clear(&le->le_flags, REFILL_INPROGRESS);
+               return;
        }
-       
+
+       /*
+        * Optimize for the most common case of only one or two
+        * periods elapsing.
+        */
+       periods = 0;
+       while ((periods < 2) && (elapsed > 0)) {
+               periods++;
+               elapsed -= period;
+       }
+
+       /*
+        * OK, it's been a long time.  Do a divide to figure out
+        * how long.
+        */
+       if (elapsed > 0)
+               periods = (now - le->le_last_refill) / period;
+
+       balance = le->le_credit - le->le_debit;
+       due = periods * le->le_limit;
+       if (balance - due < 0)
+               due = balance;
+       OSAddAtomic64(due, &le->le_debit);
+
        /*
-        * Now transfer the limit for the new ledger from the parent
+        * If we've completely refilled the pool, set the refill time to now.
+        * Otherwise set it to the time at which it last should have been
+        * fully refilled.
         */
-       ledger_lock(parent_ledger);
-       if (parent_ledger->ledger_limit != LEDGER_ITEM_INFINITY) {
-               /* Would the existing balance exceed the new limit ? */
-               if (parent_ledger->ledger_limit - transfer < parent_ledger->ledger_balance) {
-                       ledger_unlock(parent_ledger);
-                       ledger_unlock(ledger_ledger);
-                       return(KERN_RESOURCE_SHORTAGE);
+       if (balance == due)
+               le->le_last_refill = now;
+       else
+               le->le_last_refill += (le->le_refill_period * periods);
+
+       flag_clear(&le->le_flags, REFILL_INPROGRESS);
+
+       lprintf(("Refill %lld %lld->%lld\n", periods, balance, balance - due));
+       if (!limit_exceeded(le))
+               ledger_limit_entry_wakeup(le);
+}
+
+static void
+ledger_check_new_balance(ledger_t ledger, int entry)
+{
+       struct ledger_entry *le;
+       uint64_t now;
+
+       le = &ledger->l_entries[entry];
+
+       /* Check to see whether we're due a refill */
+       if (le->le_refill_period) {
+               now = mach_absolute_time();
+               if ((now - le->le_last_refill) > le->le_refill_period)
+                       ledger_refill(now, ledger, entry);
+       }
+
+       if (limit_exceeded(le)) {
+               /*
+                * We've exceeded the limit for this entry.  There
+                * are several possible ways to handle it.  We can block,
+                * we can execute a callback, or we can ignore it.  In
+                * either of the first two cases, we want to set the AST
+                * flag so we can take the appropriate action just before
+                * leaving the kernel.  The one caveat is that if we have
+                * already called the callback, we don't want to do it
+                * again until it gets rearmed.
+                */
+               if ((le->le_flags & LEDGER_ACTION_BLOCK) ||
+                   (!(le->le_flags & CALLED_BACK) &&
+                   entry_get_callback(ledger, entry))) {
+                       set_astledger(current_thread());
                }
-               if (parent_ledger->ledger_limit - transfer > 0)
-                       parent_ledger->ledger_limit -= transfer;
-               else
-                       parent_ledger->ledger_limit = 0;
+       } else {
+               /*
+                * The balance on the account is below the limit.  If
+                * there are any threads blocked on this entry, now would
+                * be a good time to wake them up.
+                */
+               if (le->le_flags & WAKE_NEEDED)
+                       ledger_limit_entry_wakeup(le);
        }
-       (*new_ledger)->ledger_limit = transfer;
+}
 
-       /* Charge the ledger against the ledger_ledger */
-       ledger_ledger->ledger_balance += (ledger_item_t)sizeof(ledger_data_t);
-       ledger_unlock(parent_ledger);
+/*
+ * Add value to an entry in a ledger.
+ */
+kern_return_t
+ledger_credit(ledger_t ledger, int entry, ledger_amount_t amount)
+{
+       ledger_amount_t old, new;
+       struct ledger_entry *le;
 
-       ledger_unlock(ledger_ledger);
-       
-       return(KERN_SUCCESS);
+       if (!ENTRY_VALID(ledger, entry) || (amount < 0))
+               return (KERN_INVALID_VALUE);
+
+       if (amount == 0)
+               return (KERN_SUCCESS);
+
+       le = &ledger->l_entries[entry];
+
+       old = OSAddAtomic64(amount, &le->le_credit);
+       new = old + amount;
+       lprintf(("%p Credit %lld->%lld\n", current_thread(), old, new));
+       ledger_check_new_balance(ledger, entry);
+
+       return (KERN_SUCCESS);
 }
 
+
 /*
- *     Destroy a ledger
+ * Adjust the limit of a limited resource.  This does not affect the
+ * current balance, so the change doesn't affect the thread until the
+ * next refill.
  */
-kern_return_t ledger_terminate(
-                              ledger_t ledger)
+kern_return_t
+ledger_set_limit(ledger_t ledger, int entry, ledger_amount_t limit)
 {
-       if (ledger == LEDGER_NULL)
-               return(KERN_INVALID_ARGUMENT);
-       
-       /* You can't deallocate kernel ledgers */
-       if (ledger == root_wired_ledger ||
-           ledger == root_paged_ledger)
-               return(KERN_INVALID_LEDGER);
+       struct ledger_entry *le;
 
-       /* Lock the ledger */
-       ledger_lock(ledger);
-       
-       /* the parent ledger gets back the limit */
-       ledger_lock(ledger->ledger_parent);
-       if (ledger->ledger_parent->ledger_limit != LEDGER_ITEM_INFINITY) {
-               assert((natural_t)(ledger->ledger_parent->ledger_limit +
-                                 ledger->ledger_limit) <
-                      LEDGER_ITEM_INFINITY);
-               ledger->ledger_parent->ledger_limit += ledger->ledger_limit;
+       if (!ENTRY_VALID(ledger, entry))
+               return (KERN_INVALID_VALUE);
+
+       lprintf(("ledger_set_limit: %x\n", (uint32_t)limit));
+       le = &ledger->l_entries[entry];
+       le->le_limit = limit;
+       le->le_last_refill = 0;
+       flag_clear(&le->le_flags, CALLED_BACK);
+       ledger_limit_entry_wakeup(le);
+
+       return (KERN_SUCCESS);
+}
+
+/*
+ * Add a callback to be executed when the resource goes into deficit
+ */
+kern_return_t
+ledger_set_callback(ledger_template_t template, int entry,
+   ledger_callback_t func, const void *param0, const void *param1)
+{
+       struct entry_template *et;
+       struct ledger_callback *old_cb, *new_cb;
+
+       if ((entry < 0) || (entry >= template->lt_cnt))
+               return (KERN_INVALID_VALUE);
+
+       if (func) {
+               new_cb = (struct ledger_callback *)kalloc(sizeof (*new_cb));
+               new_cb->lc_func = func;
+               new_cb->lc_param0 = param0;
+               new_cb->lc_param1 = param1;
+       } else {
+               new_cb = NULL;
        }
-       ledger_unlock(ledger->ledger_parent);
 
-       /*
-        * XXX The spec says that you have to destroy all objects that
-        * have been created with this ledger. Nice work eh? For now
-        * Transfer the balance to the parent and let it worry about
-        * it.
-        */
-       /* XXX the parent ledger inherits the debt ?? */
-       (void) ledger_enter(ledger->ledger_parent, ledger->ledger_balance);
-       
-       /* adjust the balance of the creation ledger */
-       (void) ledger_enter(ledger->ledger_ledger, (ledger_item_t)-sizeof(*ledger));
+       template_lock(template);
+       et = &template->lt_entries[entry];
+       old_cb = et->et_callback;
+       et->et_callback = new_cb;
+       template_unlock(template);
+       if (old_cb)
+               kfree(old_cb, sizeof (*old_cb));
 
-       /* delete the ledger */
-       ledger_deallocate(ledger);
+       return (KERN_SUCCESS);
+}
 
-       return(KERN_SUCCESS);
+/*
+ * Disable callback notification for a specific ledger entry.
+ *
+ * Otherwise, if using a ledger template which specified a
+ * callback function (ledger_set_callback()), it will be invoked when
+ * the resource goes into deficit.
+ */
+kern_return_t
+ledger_disable_callback(ledger_t ledger, int entry)
+{
+       if (!ENTRY_VALID(ledger, entry))
+               return (KERN_INVALID_VALUE);
+
+       flag_clear(&ledger->l_entries[entry].le_flags, LEDGER_ACTION_CALLBACK);
+       return (KERN_SUCCESS);
 }
 
 /*
- *     Return the ledger limit and balance
+ * Clear the called_back flag, indicating that we want to be notified
+ * again when the limit is next exceeded.
  */
-kern_return_t ledger_read(
-                         ledger_t ledger,
-                         ledger_item_t *balance,
-                         ledger_item_t *limit)
+kern_return_t
+ledger_reset_callback(ledger_t ledger, int entry)
 {
-       if (ledger == LEDGER_NULL)
-               return(KERN_INVALID_ARGUMENT);
-       
-       ledger_lock(ledger);
-       *balance = ledger->ledger_balance;
-       *limit = ledger->ledger_limit;
-       ledger_unlock(ledger);
+       if (!ENTRY_VALID(ledger, entry))
+               return (KERN_INVALID_VALUE);
 
-       return(KERN_SUCCESS);
+       flag_clear(&ledger->l_entries[entry].le_flags, CALLED_BACK);
+       return (KERN_SUCCESS);
 }
 
 /*
- *     Transfer resources from a parent ledger to a child
+ * Adjust the automatic refill period.
  */
-kern_return_t ledger_transfer(
-                             ledger_t parent_ledger,
-                             ledger_t child_ledger,
-                             ledger_item_t transfer)
+kern_return_t
+ledger_set_period(ledger_t ledger, int entry, uint64_t period)
 {
-#define abs(v) ((v) > 0)?(v):-(v)
-       
-       ledger_t src, dest;
-       ledger_item_t amount = abs(transfer);
-       
-       if (parent_ledger == LEDGER_NULL)
-               return(KERN_INVALID_ARGUMENT);
+       struct ledger_entry *le;
 
-       if (child_ledger == LEDGER_NULL)
-               return(KERN_INVALID_ARGUMENT);
+       lprintf(("ledger_set_period: %llx\n", period));
+       if (!ENTRY_VALID(ledger, entry))
+               return (KERN_INVALID_VALUE);
 
-       /* Must be different ledgers */
-       if (parent_ledger == child_ledger)
-               return(KERN_INVALID_ARGUMENT);
+       le = &ledger->l_entries[entry];
+       le->le_refill_period = nsecs_to_abstime(period);
 
-       if (transfer == 0)
-               return(KERN_SUCCESS);
-       
-       ledger_lock(child_ledger);
-       ledger_lock(parent_ledger);
-
-       /* XXX Should be the parent you created it from ?? */
-       if (parent_ledger != child_ledger->ledger_parent) {
-               ledger_unlock(parent_ledger);
-               ledger_unlock(child_ledger);
-               return(KERN_INVALID_LEDGER);
+       return (KERN_SUCCESS);
+}
+
+kern_return_t
+ledger_set_action(ledger_t ledger, int entry, int action)
+{
+       lprintf(("ledger_set_action: %d\n", action));
+       if (!ENTRY_VALID(ledger, entry))
+               return (KERN_INVALID_VALUE);
+
+       flag_set(&ledger->l_entries[entry].le_flags, action);
+       return (KERN_SUCCESS);
+}
+
+void
+set_astledger(thread_t thread)
+{
+       spl_t s = splsched();
+
+       if (thread == current_thread()) {
+               thread_ast_set(thread, AST_LEDGER);
+               ast_propagate(thread->ast);
+       } else {
+               processor_t p;
+
+               thread_lock(thread);
+               thread_ast_set(thread, AST_LEDGER);
+               p = thread->last_processor;
+               if ((p != PROCESSOR_NULL) && (p->state == PROCESSOR_RUNNING) &&
+                  (p->active_thread == thread))
+                       cause_ast_check(p);
+               thread_unlock(thread);
        }
+       
+       splx(s);
+}
+
+kern_return_t
+ledger_debit(ledger_t ledger, int entry, ledger_amount_t amount)
+{
+       struct ledger_entry *le;
+       ledger_amount_t old, new;
+
+       if (!ENTRY_VALID(ledger, entry) || (amount < 0))
+               return (KERN_INVALID_ARGUMENT);
+
+       if (amount == 0)
+               return (KERN_SUCCESS);
+
+       le = &ledger->l_entries[entry];
+
+       old = OSAddAtomic64(amount, &le->le_debit);
+       new = old + amount;
+
+       lprintf(("%p Debit %lld->%lld\n", thread, old, new));
+       ledger_check_new_balance(ledger, entry);
+       return (KERN_SUCCESS);
 
-       if (transfer > 0) {
-               dest = child_ledger;
-               src = parent_ledger;
+}
+
+void
+ledger_ast(thread_t thread)
+{
+       struct ledger *l = thread->t_ledger;
+       struct ledger  *thl = thread->t_threadledger;
+       uint32_t block;
+       uint64_t now;
+       kern_return_t ret;
+       task_t task = thread->task;
+
+       lprintf(("Ledger AST for %p\n", thread));
+
+       ASSERT(task != NULL);
+       ASSERT(thread == current_thread());
+
+top:
+       /*
+        * Make sure this thread is up to date with regards to any task-wide per-thread
+        * CPU limit.
+        */
+       if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) &&
+           ((thread->options & TH_OPT_PROC_CPULIMIT) == 0) ) {
+               /*
+                * Task has a per-thread CPU limit on it, and this thread
+                * needs it applied.
+                */
+               thread_set_cpulimit(THREAD_CPULIMIT_EXCEPTION, task->rusage_cpu_perthr_percentage,
+                       task->rusage_cpu_perthr_interval);
+               assert((thread->options & TH_OPT_PROC_CPULIMIT) != 0);
+       } else if (((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) &&
+                   (thread->options & TH_OPT_PROC_CPULIMIT)) {
+               /*
+                * Task no longer has a per-thread CPU limit; remove this thread's
+                * corresponding CPU limit.
+                */
+               thread_set_cpulimit(THREAD_CPULIMIT_EXCEPTION, 0, 0);
+               assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
        }
-       else {
-               src = child_ledger;
-               dest = parent_ledger;
+
+       /*
+        * If the task or thread is being terminated, let's just get on with it
+        */
+       if ((l == NULL) || !task->active || task->halting || !thread->active)
+               return;
+       
+       /*
+        * Examine all entries in deficit to see which might be eligble for
+        * an automatic refill, which require callbacks to be issued, and
+        * which require blocking.
+        */
+       block = 0;
+       now = mach_absolute_time();
+
+       if (LEDGER_VALID(thl)) {
+               block |= ledger_check_needblock(thl, now);
        }
+       block |= ledger_check_needblock(l, now);
 
-       if (src->ledger_limit != LEDGER_ITEM_INFINITY) {
-               /* Would the existing balance exceed the new limit ? */
-               if (src->ledger_limit - amount < src->ledger_balance) {
-                       ledger_unlock(parent_ledger);
-                       ledger_unlock(child_ledger);
-                       return(KERN_RESOURCE_SHORTAGE);
+       /*
+        * If we are supposed to block on the availability of one or more
+        * resources, find the first entry in deficit for which we should wait.
+        * Schedule a refill if necessary and then sleep until the resource
+        * becomes available.
+        */
+       if (block) {
+               if (LEDGER_VALID(thl)) {
+                       ret = ledger_perform_blocking(thl);
+                       if (ret != KERN_SUCCESS)
+                               goto top;
                }
-               if (src->ledger_limit - amount > 0)
-                       src->ledger_limit -= amount;
-               else
-                       src->ledger_limit = 0;
-       }
+               ret = ledger_perform_blocking(l);
+               if (ret != KERN_SUCCESS)
+                       goto top;
+       } /* block */
+}
 
-       if (dest->ledger_limit != LEDGER_ITEM_INFINITY) {
-               if ((natural_t)(dest->ledger_limit + amount) 
-                       < LEDGER_ITEM_INFINITY)
-                       dest->ledger_limit += amount;
-               else
-                       dest->ledger_limit = (LEDGER_ITEM_INFINITY - 1);
+static uint32_t
+ledger_check_needblock(ledger_t l, uint64_t now)
+{
+       int i;
+       uint32_t flags, block = 0;
+       struct ledger_entry *le;
+       struct ledger_callback *lc;
+
+
+       for (i = 0; i < l->l_size; i++) {
+               le = &l->l_entries[i];
+               if (limit_exceeded(le) == FALSE)
+                       continue;
+
+               /* Check for refill eligibility */
+               if (le->le_refill_period) {
+                       if ((le->le_last_refill + le->le_refill_period) > now) {
+                               ledger_refill(now, l, i);
+                               if (limit_exceeded(le) == FALSE)
+                                       continue;
+                       }
+               }
+
+               if (le->le_flags & LEDGER_ACTION_BLOCK)
+                       block = 1;
+               if ((le->le_flags & LEDGER_ACTION_CALLBACK) == 0)
+                       continue;
+               lc = entry_get_callback(l, i);
+               assert(lc != NULL);
+               flags = flag_set(&le->le_flags, CALLED_BACK);
+               /* Callback has already been called */
+               if (flags & CALLED_BACK)
+                       continue;
+               lc->lc_func(lc->lc_param0, lc->lc_param1);
        }
+       return(block);
+}
 
-       ledger_unlock(parent_ledger);
-       ledger_unlock(child_ledger);
-       
+
+/* return KERN_SUCCESS to continue, KERN_FAILURE to restart */
+static kern_return_t
+ledger_perform_blocking(ledger_t l)
+{
+       int i;
+       kern_return_t ret;
+       struct ledger_entry *le;
+
+       for (i = 0; i < l->l_size; i++) {
+               le = &l->l_entries[i];
+               if ((!limit_exceeded(le)) ||
+                   ((le->le_flags & LEDGER_ACTION_BLOCK) == 0))
+                       continue;
+
+               /* Prepare to sleep until the resource is refilled */
+               ret = assert_wait_deadline(le, TRUE,
+                   le->le_last_refill + le->le_refill_period);
+               if (ret != THREAD_WAITING)
+                       return(KERN_SUCCESS);
+
+               /* Mark that somebody is waiting on this entry  */
+               flag_set(&le->le_flags, WAKE_NEEDED);
+
+               ret = thread_block_reason(THREAD_CONTINUE_NULL, NULL,
+                   AST_LEDGER);
+               if (ret != THREAD_AWAKENED)
+                       return(KERN_SUCCESS);
+
+               /*
+                * The world may have changed while we were asleep.
+                * Some other resource we need may have gone into
+                * deficit.  Or maybe we're supposed to die now.
+                * Go back to the top and reevaluate.
+                */
+               return(KERN_FAILURE);
+       }
        return(KERN_SUCCESS);
-#undef abs
 }
 
-/*
- *     Routine:        convert_port_to_ledger
- *     Purpose:
- *             Convert from a port to a ledger.
- *             Doesn't consume the port ref; the ledger produced may be null.
- *     Conditions:
- *             Nothing locked.
- */
 
-ledger_t
-convert_port_to_ledger(
-                      ipc_port_t port)
+kern_return_t
+ledger_get_entries(ledger_t ledger, int entry, ledger_amount_t *credit,
+    ledger_amount_t *debit)
+{
+       struct ledger_entry *le;
+
+       if (!ENTRY_VALID(ledger, entry))
+               return (KERN_INVALID_ARGUMENT);
+
+       le = &ledger->l_entries[entry];
+
+       *credit = le->le_credit;
+       *debit = le->le_debit;
+
+       return (KERN_SUCCESS);
+}
+
+int
+ledger_template_info(void **buf, int *len)
 {
-       ledger_t ledger = LEDGER_NULL;
+       struct ledger_template_info *lti;
+       struct entry_template *et;
+       int i;
+       ledger_t l;
 
-       if (IP_VALID(port)) {
-               ip_lock(port);
-               if (ip_active(port) &&
-                   (ip_kotype(port) == IKOT_LEDGER))
-                       ledger = (ledger_t) port->ip_kobject;
-               ip_unlock(port);
+       /*
+        * Since all tasks share a ledger template, we'll just use the
+        * caller's as the source.
+        */
+       l = current_task()->ledger;
+       if ((*len < 0) || (l == NULL))
+               return (EINVAL);
+
+       if (*len > l->l_size)
+                *len = l->l_size;
+       lti = kalloc((*len) * sizeof (struct ledger_template_info));
+       if (lti == NULL)
+               return (ENOMEM);
+       *buf = lti;
+
+       template_lock(l->l_template);
+       et = l->l_template->lt_entries;
+
+       for (i = 0; i < *len; i++) {
+               memset(lti, 0, sizeof (*lti));
+               strlcpy(lti->lti_name, et->et_key, LEDGER_NAME_MAX);
+               strlcpy(lti->lti_group, et->et_group, LEDGER_NAME_MAX);
+               strlcpy(lti->lti_units, et->et_units, LEDGER_NAME_MAX);
+               et++;
+               lti++;
        }
+       template_unlock(l->l_template);
 
-       return ledger;
+       return (0);
 }
 
-/*
- *     Routine:        convert_ledger_to_port
- *     Purpose:
- *             Convert from a ledger to a port.
- *             Produces a naked send right which may be invalid.
- *     Conditions:
- *             Nothing locked.
- */
+int
+ledger_entry_info(task_t task, void **buf, int *len)
+{
+       struct ledger_entry_info *lei;
+       struct ledger_entry *le;
+       uint64_t now = mach_absolute_time();
+       int i;
+       ledger_t l;
+
+       if ((*len < 0) || ((l = task->ledger) == NULL))
+               return (EINVAL);
 
-ipc_port_t
-convert_ledger_to_port(
-                      ledger_t ledger)
+       if (*len > l->l_size)
+                *len = l->l_size;
+       lei = kalloc((*len) * sizeof (struct ledger_entry_info));
+       if (lei == NULL)
+               return (ENOMEM);
+       *buf = lei;
+
+       le = l->l_entries;
+
+       for (i = 0; i < *len; i++) {
+               memset(lei, 0, sizeof (*lei));
+               lei->lei_limit = le->le_limit;
+               lei->lei_credit = le->le_credit;
+               lei->lei_debit = le->le_debit;
+               lei->lei_balance = lei->lei_credit - lei->lei_debit;
+               lei->lei_refill_period =
+                       abstime_to_nsecs(le->le_refill_period);
+               lei->lei_last_refill =
+                       abstime_to_nsecs(now - le->le_last_refill);
+               le++;
+               lei++;
+       }
+
+       return (0);
+}
+
+int
+ledger_info(task_t task, struct ledger_info *info)
 {
-       ipc_port_t port;
+       ledger_t l;
+
+       if ((l = task->ledger) == NULL)
+               return (ENOENT);
 
-       if (ledger == LEDGER_NULL)
-               return IP_NULL;
+       memset(info, 0, sizeof (*info));
 
-       port = ipc_port_make_send(ledger->ledger_self);
-       return port;
+       strlcpy(info->li_name, l->l_template->lt_name, LEDGER_NAME_MAX);
+       info->li_id = l->l_id;
+       info->li_entries = l->l_size;
+       return (0);
 }
 
-/*
- * Copy a ledger
- */
-ipc_port_t
-ledger_copy(
-           ledger_t ledger)
+#ifdef LEDGER_DEBUG
+int
+ledger_limit(task_t task, struct ledger_limit_args *args)
 {
-       if (ledger == LEDGER_NULL)
-               return IP_NULL;
+       ledger_t l;
+       int64_t limit;
+       int idx;
+
+       if ((l = task->ledger) == NULL)
+               return (EINVAL);
+
+       idx = ledger_key_lookup(l->l_template, args->lla_name);
+       if ((idx < 0) || (idx >= l->l_size))
+               return (EINVAL);
+
+       /*
+        * XXX - this doesn't really seem like the right place to have
+        * a context-sensitive conversion of userspace units into kernel
+        * units.  For now I'll handwave and say that the ledger() system
+        * call isn't meant for civilians to use - they should be using
+        * the process policy interfaces.
+        */
+       if (idx == task_ledgers.cpu_time) {
+               int64_t nsecs;
+
+               if (args->lla_refill_period) {
+                       /*
+                        * If a refill is scheduled, then the limit is 
+                        * specified as a percentage of one CPU.  The
+                        * syscall specifies the refill period in terms of
+                        * milliseconds, so we need to convert to nsecs.
+                        */
+                       args->lla_refill_period *= 1000000;
+                       nsecs = args->lla_limit *
+                           (args->lla_refill_period / 100);
+                       lprintf(("CPU limited to %lld nsecs per second\n",
+                           nsecs));
+               } else {
+                       /*
+                        * If no refill is scheduled, then this is a
+                        * fixed amount of CPU time (in nsecs) that can
+                        * be consumed.
+                        */
+                       nsecs = args->lla_limit;
+                       lprintf(("CPU limited to %lld nsecs\n", nsecs));
+               }
+               limit = nsecs_to_abstime(nsecs);
+       } else {
+               limit = args->lla_limit;
+               lprintf(("%s limited to %lld\n", args->lla_name, limit));
+       }
+
+       if (args->lla_refill_period > 0)
+               ledger_set_period(l, idx, args->lla_refill_period);
 
-       return(ipc_port_copy_send(ledger->ledger_self));
+       ledger_set_limit(l, idx, limit);
+       flag_set(&l->l_entries[idx].le_flags, LEDGER_ACTION_BLOCK);
+       return (0);
 }
+#endif
index fe0fa2c04a474d5411e715e2ab2573d547abb132..98278168663443cd6f417b2e3b776b03755cf10a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * @OSF_COPYRIGHT@
  */
 
-#ifdef MACH_KERNEL_PRIVATE
-
 #ifndef _KERN_LEDGER_H_
 #define _KERN_LEDGER_H_
 
-#include <mach/mach_types.h>
+#define        LEDGER_INFO             0
+#define        LEDGER_ENTRY_INFO       1
+#define        LEDGER_TEMPLATE_INFO    2
+#define        LEDGER_LIMIT            3
 
-#include <kern/kern_types.h>
-#include <kern/lock.h>
-#include <ipc/ipc_types.h>
+#define        LEDGER_NAME_MAX 32
 
-struct ledger {
-        ipc_port_t     ledger_self;
-        ipc_port_t     ledger_service_port;
-        ledger_item_t  ledger_balance;
-        ledger_item_t  ledger_limit;
-        struct ledger  *ledger_ledger;
-        struct ledger  *ledger_parent;
-       decl_simple_lock_data(,lock)
+struct ledger_info {
+       char    li_name[LEDGER_NAME_MAX];
+       int64_t li_id;
+       int64_t li_entries;
 };
 
-typedef struct ledger ledger_data_t;
-
-#define ledger_lock(ledger)    simple_lock(&(ledger)->lock)
-#define ledger_unlock(ledger)  simple_unlock(&(ledger)->lock)
-#define        ledger_lock_init(ledger) \
-       simple_lock_init(&(ledger)->lock, 0)
-
-extern ledger_t        root_wired_ledger;
-extern ledger_t        root_paged_ledger;
-
-#define root_wired_ledger_port root_wired_ledger->ledger_self
-#define root_paged_ledger_port root_paged_ledger->ledger_self
-
-extern void ledger_init(void) __attribute__((section("__TEXT, initcode")));
-
-extern ipc_port_t ledger_copy(ledger_t);
+struct ledger_template_info {
+       char            lti_name[LEDGER_NAME_MAX];
+       char            lti_group[LEDGER_NAME_MAX];
+       char            lti_units[LEDGER_NAME_MAX];
+};
 
-extern kern_return_t ledger_enter(ledger_t, ledger_item_t);
+struct ledger_entry_info {
+        int64_t                lei_balance;
+        int64_t                lei_credit;
+        int64_t                lei_debit;
+        uint64_t       lei_limit;
+       uint64_t        lei_refill_period;      /* In milliseconds */
+       uint64_t        lei_last_refill;        /* Time since last refill */
+};
 
-extern ledger_t convert_port_to_ledger(ipc_port_t);
+struct ledger_limit_args {
+       char            lla_name[LEDGER_NAME_MAX];
+        uint64_t       lla_limit;
+        uint64_t       lla_refill_period;
+};
 
-extern ipc_port_t convert_ledger_to_port(ledger_t);
+#ifdef KERNEL_PRIVATE
+
+typedef struct ledger_template *ledger_template_t;
+
+#define        LEDGER_VALID(ledger)    (ledger != LEDGER_NULL)
+
+/* Action to take when a ledger goes into deficit */
+#define        LEDGER_ACTION_IGNORE    0x0000
+#define        LEDGER_ACTION_BLOCK     0x0010
+#define        LEDGER_ACTION_EXCEPTION 0x0020
+#define        LEDGER_ACTION_CALLBACK  0x0040
+#define        LEDGER_ACTION_MASK      0x00f0
+
+typedef void (*ledger_callback_t)(const void * param0, const void *param1);
+
+extern void ledger_init(void);
+
+extern ledger_template_t ledger_template_create(const char *name);
+extern void ledger_template_dereference(ledger_template_t template);
+extern int ledger_entry_add(ledger_template_t template, const char *key,
+    const char *group, const char *units);
+extern kern_return_t ledger_set_callback(ledger_template_t template, int entry,
+       ledger_callback_t callback, const void *param0, const void *param1);
+extern int ledger_key_lookup(ledger_template_t template, const char *key);
+
+/* value of entry type */
+#define        LEDGER_CREATE_ACTIVE_ENTRIES    0
+#define        LEDGER_CREATE_INACTIVE_ENTRIES  1
+extern ledger_t ledger_instantiate(ledger_template_t template, int entry_type);
+extern kern_return_t ledger_disable_callback(ledger_t ledger, int entry);
+extern kern_return_t ledger_reset_callback(ledger_t ledger, int entry);
+extern kern_return_t ledger_set_limit(ledger_t ledger, int entry,
+       ledger_amount_t limit);
+extern kern_return_t ledger_set_action(ledger_t ledger, int entry, int action);
+extern kern_return_t ledger_set_period(ledger_t ledger, int entry,
+    uint64_t period);
+extern kern_return_t ledger_entry_setactive(ledger_t ledger, int entry);
+extern kern_return_t ledger_credit(ledger_t ledger, int entry,
+       ledger_amount_t amount);
+extern kern_return_t ledger_debit(ledger_t ledger, int entry,
+       ledger_amount_t amount);
+extern kern_return_t ledger_get_entries(ledger_t ledger, int entry,
+       ledger_amount_t *credit, ledger_amount_t *debit);
+
+extern void ledger_ast(thread_t thread);
+extern void set_astledger(thread_t thread);
+
+extern int ledger_reference_count(ledger_t ledger);
+extern kern_return_t ledger_reference(ledger_t ledger);
+extern kern_return_t ledger_dereference(ledger_t ledger);
+
+/* Per-pmap ledger operations */
+#define        pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
+#define        pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
+
+/* Support for ledger() syscall */
+#ifdef LEDGER_DEBUG
+extern int ledger_limit(task_t task, struct ledger_limit_args *args);
+#endif
+extern int ledger_info(task_t task, struct ledger_info *info);
+extern int ledger_entry_info(task_t task, void **buf, int *len);
+extern int ledger_template_info(void **buf, int *len);
+
+#endif /* KERNEL_PRIVATE */
 
 #endif /* _KERN_LEDGER_H_ */
-
-#endif /* MACH_KERNEL_PRIVATE */
index 07b9924a12f4acb7e918126238643dd9ce70b85b..7ee5a2f4ea5172be3456cd10aa28fe761414f14e 100644 (file)
@@ -53,7 +53,6 @@
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  */
-#include <mach_kdb.h>
 #include <mach_ldebug.h>
 #include <debug.h>
 
@@ -366,6 +365,7 @@ void
 lck_attr_setdefault(
        lck_attr_t      *attr)
 {
+#if   __i386__ || __x86_64__
 #if     !DEBUG
        if (LcksOpts & enaLkDeb)
                attr->lck_attr_val =  LCK_ATTR_DEBUG;
@@ -374,6 +374,9 @@ lck_attr_setdefault(
 #else
        attr->lck_attr_val =  LCK_ATTR_DEBUG;
 #endif /* !DEBUG */
+#else
+#error Unknown architecture.
+#endif /* __arm__ */
 }
 
 
diff --git a/osfmk/kern/mach_clock.c b/osfmk/kern/mach_clock.c
deleted file mode 100644 (file)
index 7798552..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     File:   clock_prim.c
- *     Author: Avadis Tevanian, Jr.
- *     Date:   1986
- *
- *     Clock primitives.
- */
-#include <gprof.h>
-
-#include <mach/boolean.h>
-#include <mach/machine.h>
-#include <mach/time_value.h>
-#include <mach/vm_param.h>
-#include <mach/vm_prot.h>
-#include <kern/clock.h>
-#include <kern/cpu_number.h>
-#include <kern/host.h>
-#include <kern/lock.h>
-#include <kern/mach_param.h>
-#include <kern/misc_protos.h>
-#include <kern/processor.h>
-#include <kern/sched.h>
-#include <kern/sched_prim.h>
-#include <kern/thread.h>
-
-#include <profiling/profile-mk.h>
-
-#if GPROF
-static void prof_tick(boolean_t usermode, natural_t pc);
-#endif
-
-#if STAT_TIME || GPROF
-/*
- * Hertz rate clock interrupt servicing. Used to update processor
- * statistics and perform kernel profiling.
- */
-void
-hertz_tick(
-#if GPROF
-       __unused natural_t      ticks,
-#else
-       natural_t               ticks,
-#endif
-       boolean_t               usermode,
-#if GPROF
-       natural_t               pc)
-#else
-       __unused natural_t              pc)
-#endif
-{
-       processor_t             processor = current_processor();
-       thread_t                thread = current_thread();
-       timer_t                 state;
-
-       if (usermode) {
-               TIMER_BUMP(&thread->user_timer, ticks);
-
-               state = &PROCESSOR_DATA(processor, user_state);
-       }
-       else {
-               /* If this thread is idling, do not charge that time as system time */
-               if ((thread->state & TH_IDLE) == 0) {
-                       TIMER_BUMP(&thread->system_timer, ticks);
-               }
-        
-               if (processor->state == PROCESSOR_IDLE)
-                       state = &PROCESSOR_DATA(processor, idle_state);
-               else
-                       state = &PROCESSOR_DATA(processor, system_state);
-       }
-
-       TIMER_BUMP(state, ticks);
-
-#if GPROF
-       prof_tick(usermode, pc);
-#endif /* GPROF */
-}
-
-#endif /* STAT_TIME */
-
-#if GPROF
-
-static void
-prof_tick(
-       boolean_t       usermode,
-       natural_t       pc)
-{
-       struct profile_vars     *pv;
-       prof_uptrint_t          s;
-
-       pv = PROFILE_VARS(cpu_number());
-
-       if (usermode) {
-               if (pv->active)
-                       PROF_CNT_INC(pv->stats.user_ticks);
-       }
-       else {
-               if (pv->active) {
-                       if (current_processor()->state == CPU_STATE_IDLE)
-                               PROF_CNT_INC(pv->stats.idle_ticks);
-                       else
-                               PROF_CNT_INC(pv->stats.kernel_ticks);
-
-                       if ((prof_uptrint_t)pc < _profile_vars.profil_info.lowpc)
-                               PROF_CNT_INC(pv->stats.too_low);
-                       else {
-                               s = (prof_uptrint_t)pc - _profile_vars.profil_info.lowpc;
-                               if (s < pv->profil_info.text_len) {
-                                       LHISTCOUNTER *ptr = (LHISTCOUNTER *) pv->profil_buf;
-                                       LPROF_CNT_INC(ptr[s / HISTFRACTION]);
-                               }
-                               else
-                                       PROF_CNT_INC(pv->stats.too_high);
-                       }
-               }
-       }
-}
-
-#endif /* GPROF */
index 44b21a9dad54c8d1e229faf8d0d0a177576219d7..a89e08fc30e4bea532e3e56539ba08f645dddad7 100644 (file)
@@ -83,8 +83,6 @@ extern int thread_max, task_threadmax, task_max;
 #define SET_MAX                (task_max + (thread_max * 2) + 200)
                                        /* Max number of port sets */
 
-#define        ITE_MAX         (1 << 16)       /* Max number of splay tree entries */
-
 #define        SPACE_MAX       (task_max + 5)  /* Max number of IPC spaces */
 
 #define SEMAPHORE_MAX   (PORT_MAX >> 1)        /* Maximum number of semaphores */
index f7fb46b3c5f1db9a62cda1d0b3bf3b1845736e06..fa9a5725160106c8a38d5690570ef64ac7e4edbf 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -112,6 +112,9 @@ extern int copyoutmsg(
 extern void inval_copy_windows(thread_t);
 extern void copy_window_fault(thread_t, vm_map_t, int);
 
+extern int copyin_validate(const user_addr_t, uintptr_t, vm_size_t);
+extern int copyout_validate(uintptr_t, const user_addr_t, vm_size_t);
+
 extern int sscanf(const char *input, const char *fmt, ...) __scanflike(2,3);
 
 /* sprintf() is being deprecated. Please use snprintf() instead. */ 
@@ -131,8 +134,6 @@ int     _consume_printf_args(int, ...);
 #endif
 #endif
 
-extern void dbugprintf(const char *format, ...) __printflike(1,2);
-
 extern int kdb_printf(const char *format, ...) __printflike(1,2);
 
 extern int kdb_log(const char *format, ...) __printflike(1,2);
@@ -195,6 +196,11 @@ extern void bootstrap_create(void);
 extern void Debugger(
                const char      * message);
 
+extern void DebuggerWithContext(
+               unsigned int    reason,
+               void            *ctx,
+               const char      *message);
+
 extern void delay(
                int             n);
 
@@ -215,4 +221,6 @@ user_addr_t get_useraddr(void);
 /* symbol lookup */
 struct kmod_info_t;
 
+extern uint64_t early_random(void);
+
 #endif /* _MISC_PROTOS_H_ */
index d8e86124badd6d1704314414a3cd69ed1961138d..83ec87bb12fc4e455e18dfa8b79362dcd2dfcb0e 100644 (file)
@@ -131,12 +131,20 @@ thread_policy_common(
                if (priority < MINPRI)
                        priority = MINPRI;
 
-               thread->importance = priority - thread->task_priority;
-
 #if CONFIG_EMBEDDED
+               if ((thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON)  &&
+                       (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL)) {
+                       thread->saved_importance = priority - thread->task_priority;
+                       priority = MAXPRI_THROTTLE;
+               } else  {
+                       thread->importance = priority - thread->task_priority;
+               }
                /* No one can have a base priority less than MAXPRI_THROTTLE */
                if (priority < MAXPRI_THROTTLE) 
                        priority = MAXPRI_THROTTLE;
+#else  /* CONFIG_EMBEDDED */
+               thread->importance = priority - thread->task_priority;
+
 #endif /* CONFIG_EMBEDDED */
 
                set_priority(thread, priority);
index 6bbb1b1949487c956539f1f878cb91b1b0c415f9..f0dd81cc9aa304022bde55e367e83be4d12933ed 100644 (file)
@@ -81,7 +81,7 @@ mk_timer_create_trap(
        }
 
        simple_lock_init(&timer->lock, 0);
-       call_entry_setup(&timer->call_entry, mk_timer_expire, timer);
+       thread_call_setup(&timer->call_entry, mk_timer_expire, timer);
        timer->is_armed = timer->is_dead = FALSE;
        timer->active = 0;
 
@@ -190,6 +190,18 @@ mk_timer_expire(
        simple_unlock(&timer->lock);
 }
 
+/*
+ * mk_timer_destroy_trap: Destroy the Mach port associated with a timer
+ *
+ * Parameters:  args                     User argument descriptor (see below)
+ *
+ * Indirect:     args->name               Mach port name
+ *
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
 kern_return_t
 mk_timer_destroy_trap(
        struct mk_timer_destroy_trap_args *args)
@@ -215,6 +227,19 @@ mk_timer_destroy_trap(
        return (result);
 }
 
+/*
+ * mk_timer_arm_trap: Start (arm) a timer
+ *
+ * Parameters:  args                     User argument descriptor (see below)
+ *
+ * Indirect:     args->name               Mach port name
+ *               args->expire_time        Time when timer expires
+ *
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
 kern_return_t
 mk_timer_arm_trap(
        struct mk_timer_arm_trap_args *args)
@@ -254,6 +279,19 @@ mk_timer_arm_trap(
        return (result);
 }
 
+/*
+ * mk_timer_cancel_trap: Cancel a timer
+ *
+ * Parameters:  args                     User argument descriptor (see below)
+ *
+ * Indirect:     args->name               Mach port name
+ *               args->result_time        The armed time of the cancelled timer (return value)
+ *
+ *
+ * Returns:        0                      Success
+ *                !0                      Not success           
+ *
+ */
 kern_return_t
 mk_timer_cancel_trap(
        struct mk_timer_cancel_trap_args *args)
@@ -278,7 +316,7 @@ mk_timer_cancel_trap(
                ip_unlock(port);
 
                if (timer->is_armed) {
-                       armed_time = timer->call_entry.deadline;
+                       armed_time = timer->call_entry.tc_call.deadline;
                        if (thread_call_cancel(&timer->call_entry))
                                timer->active--;
                        timer->is_armed = FALSE;
index c67d74ba9a561b6f2ae9242e797c32e9d95f7c11..adcad213327752b3b92543a6ea9e1e8f5ab79d15 100644 (file)
 #ifdef MACH_KERNEL_PRIVATE
 #include <mach/mach_types.h>
 
-#include <kern/call_entry.h>
+#include <kern/thread_call.h>
 
 struct mk_timer {
-       decl_simple_lock_data(,lock)
-       call_entry_data_t       call_entry;
+       decl_simple_lock_data(,lock);
+       thread_call_data_t              call_entry;
        uint32_t                        is_dead:1,
                                                is_armed:1;
        int                                     active;
index 730be5c8191213fafd7c198d9980a2584aa0e811..88813d84431488966834aebc2a04180f667b83e9 100644 (file)
  */
 
 #include <debug.h>
-#include <mach_kdb.h>
 #include <mach_kdp.h>
 #include <platforms.h>
 #include <mach/boolean.h>
@@ -765,24 +764,6 @@ cons_putc_locked(
                cnputc(c);
 }
 
-#if    MACH_KDB
-extern void db_putchar(char c);
-#endif
-
-void
-dbugprintf(__unused const char *fmt, ...)
-{
-
-#if    MACH_KDB
-       va_list listp;
-
-       va_start(listp, fmt);
-       _doprnt(fmt, &listp, db_putchar, 16);
-       va_end(listp);
-#endif
-       return;
-}
-
 int
 printf(const char *fmt, ...)
 {
index 3273a4f6c57f64639a2e5a0108ce54ea20db643b..a7fa6ea44a8dcccf550b37b83391e0f741874a7c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,6 +73,7 @@
 #include <kern/spl.h>
 #include <kern/thread.h>
 #include <kern/processor.h>
+#include <kern/ledger.h>
 #include <machine/machparam.h>
 
 /*
@@ -94,6 +95,16 @@ thread_quantum_expire(
 
        SCHED_STATS_QUANTUM_TIMER_EXPIRATION(processor);
 
+       /*
+        * We bill CPU time to both the individual thread and its task.
+        *
+        * Because this balance adjustment could potentially attempt to wake this very
+        * thread, we must credit the ledger before taking the thread lock. The ledger
+        * pointers are only manipulated by the thread itself at the ast boundary.
+        */
+       ledger_credit(thread->t_ledger, task_ledgers.cpu_time, thread->current_quantum);
+       ledger_credit(thread->t_threadledger, thread_ledgers.cpu_time, thread->current_quantum);
+
        thread_lock(thread);
 
        /*
@@ -101,19 +112,18 @@ thread_quantum_expire(
         * continue without re-entering the scheduler, so update this now.
         */
        thread->last_run_time = processor->quantum_end;
-       
+
        /*
         *      Check for fail-safe trip.
         */
-       if ((thread->sched_mode == TH_MODE_REALTIME || thread->sched_mode == TH_MODE_FIXED) && 
-           !(thread->sched_flags & TH_SFLAG_PROMOTED) &&
-           !(thread->options & TH_OPT_SYSTEM_CRITICAL)) {
-               uint64_t new_computation;
-
-               new_computation = processor->quantum_end - thread->computation_epoch;
-               new_computation += thread->computation_metered;
-               if (new_computation > max_unsafe_computation) {
-
+       if ((thread->sched_mode == TH_MODE_REALTIME || thread->sched_mode == TH_MODE_FIXED) && 
+           !(thread->sched_flags & TH_SFLAG_PROMOTED) &&
+           !(thread->options & TH_OPT_SYSTEM_CRITICAL)) {
+               uint64_t new_computation;
+  
+               new_computation = processor->quantum_end - thread->computation_epoch;
+               new_computation += thread->computation_metered;
+               if (new_computation > max_unsafe_computation) {
                        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_FAILSAFE)|DBG_FUNC_NONE,
                                        (uintptr_t)thread->sched_pri, (uintptr_t)thread->sched_mode, 0, 0, 0);
 
@@ -158,7 +168,23 @@ thread_quantum_expire(
        thread_quantum_init(thread);
        thread->last_quantum_refill_time = processor->quantum_end;
 
-       processor->quantum_end += thread->current_quantum;
+       /* Reload precise timing global policy to thread-local policy */
+       thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
+
+       /*
+        * Since non-precise user/kernel time doesn't update the state/thread timer
+        * during privilege transitions, synthesize an event now.
+        */
+       if (!thread->precise_user_kernel_time) {
+               timer_switch(PROCESSOR_DATA(processor, current_state),
+                                        processor->quantum_end,
+                                        PROCESSOR_DATA(processor, current_state));
+               timer_switch(PROCESSOR_DATA(processor, thread_timer),
+                                        processor->quantum_end,
+                                        PROCESSOR_DATA(processor, thread_timer));
+       }
+
+       processor->quantum_end = mach_absolute_time() + thread->current_quantum;
        timer_call_enter1(&processor->quantum_timer, thread,
            processor->quantum_end, TIMER_CALL_CRITICAL);
 
@@ -449,6 +475,50 @@ update_priority(
                thread->sched_flags &= ~TH_SFLAG_FAILSAFE;
        }
 
+#if CONFIG_EMBEDDED
+       /* Check for pending throttle transitions, and safely switch queues */
+       if (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_MASK) {
+                       boolean_t               removed = thread_run_queue_remove(thread);
+
+                       if (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_DEMOTION) {
+                               if (thread->sched_mode == TH_MODE_REALTIME) {
+                                       thread->saved_mode = thread->sched_mode;
+                                       thread->sched_mode = TH_MODE_TIMESHARE;
+
+                                       if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+                                               sched_share_incr();
+                               } else {
+                                       /*
+                                        * It's possible that this is a realtime thread that has
+                                        * already tripped the failsafe, in which case saved_mode
+                                        * is already set correctly.
+                                        */
+                                       if (!(thread->sched_flags & TH_SFLAG_FAILSAFE)) {
+                                               thread->saved_mode = thread->sched_mode;
+                                       }
+                                       thread->sched_flags &= ~TH_SFLAG_FAILSAFE;
+                               }
+                               thread->sched_flags |= TH_SFLAG_THROTTLED;
+
+                       } else {
+                               if ((thread->sched_mode == TH_MODE_TIMESHARE)
+                                       && (thread->saved_mode == TH_MODE_REALTIME)) {
+                                       if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+                                               sched_share_decr();
+                               }
+
+                               thread->sched_mode = thread->saved_mode;
+                               thread->saved_mode = TH_MODE_NONE;
+                               thread->sched_flags &= ~TH_SFLAG_THROTTLED;
+                       }
+
+                       thread->sched_flags &= ~(TH_SFLAG_PENDING_THROTTLE_MASK);
+
+                       if (removed)
+                               thread_setrun(thread, SCHED_TAILQ);
+       }
+#endif
+
        /*
         *      Recompute scheduled priority if appropriate.
         */
index b0771351f96e6b18374d70f3bba7e44aa4b49ac6..23a5496119f7ea63cce73147b87f3c0cd5b8bdc6 100644 (file)
@@ -332,34 +332,41 @@ processor_info(
 
        case PROCESSOR_CPU_LOAD_INFO:
        {
-               register processor_cpu_load_info_t      cpu_load_info;
+               processor_cpu_load_info_t       cpu_load_info;
+               timer_data_t    idle_temp;
+               timer_t         idle_state;
 
                if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT)
                        return (KERN_FAILURE);
 
                cpu_load_info = (processor_cpu_load_info_t) info;
-               cpu_load_info->cpu_ticks[CPU_STATE_USER] =
+               if (precise_user_kernel_time) {
+                       cpu_load_info->cpu_ticks[CPU_STATE_USER] =
                                                        (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, user_state)) / hz_tick_interval);
-               cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
+                       cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
                                                        (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, system_state)) / hz_tick_interval);
-               {
-               timer_data_t    idle_temp;
-               timer_t         idle_state;
+               } else {
+                       uint64_t tval = timer_grab(&PROCESSOR_DATA(processor, user_state)) +
+                               timer_grab(&PROCESSOR_DATA(processor, system_state));
+
+                       cpu_load_info->cpu_ticks[CPU_STATE_USER] = (uint32_t)(tval / hz_tick_interval);
+                       cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
+               }
 
                idle_state = &PROCESSOR_DATA(processor, idle_state);
                idle_temp = *idle_state;
 
                if (PROCESSOR_DATA(processor, current_state) != idle_state ||
-                   timer_grab(&idle_temp) != timer_grab(idle_state))
+                   timer_grab(&idle_temp) != timer_grab(idle_state)) {
                        cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
                                                        (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, idle_state)) / hz_tick_interval);
-               else {
+               else {
                        timer_advance(&idle_temp, mach_absolute_time() - idle_temp.tstamp);
                                
                        cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
                                (uint32_t)(timer_grab(&idle_temp) / hz_tick_interval);
                }
-               }
+
                cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
 
            *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
@@ -525,6 +532,9 @@ processor_get_assignment(
 {
        int state;
 
+       if (processor == PROCESSOR_NULL)
+               return(KERN_INVALID_ARGUMENT);
+
        state = processor->state;
        if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE)
                return(KERN_FAILURE);
index 836b55293137ffbfd6b9d538cc9ca680c84d5963..2202f812d133eb9c837e848537949acd868dba3e 100644 (file)
@@ -614,8 +614,12 @@ MACRO_END
  */
 struct mpqueue_head {
        struct queue_entry      head;           /* header for queue */
+#if defined(__i386__) || defined(__x86_64__)
        lck_mtx_t               lock_data;
        lck_mtx_ext_t           lock_data_ext;
+#else
+       lck_spin_t              lock_data;
+#endif
 };
 
 typedef struct mpqueue_head    mpqueue_head_t;
index 9532f4095c8e4d3725f400c110af4237251f04d8..8f87afad2d659fc18c4e81ce0de5a48130d62f4b 100644 (file)
@@ -67,8 +67,6 @@
 #ifndef        _KERN_SCHED_H_
 #define _KERN_SCHED_H_
 
-#include <stat_time.h>
-
 #include <mach/policy.h>
 #include <kern/kern_types.h>
 #include <kern/queue.h>
@@ -288,7 +286,8 @@ extern uint32_t default_timeshare_constraint;
 
 extern uint32_t        max_rt_quantum, min_rt_quantum;
 
-extern uint32_t        sched_cswtime;
+extern int default_preemption_rate;
+extern int default_bg_preemption_rate;
 
 #if defined(CONFIG_SCHED_TRADITIONAL)
 
@@ -319,6 +318,9 @@ extern void         compute_memory_pressure(
 extern void            compute_zone_gc_throttle(
                                        void                    *arg);
 
+extern void            compute_pageout_gc_throttle(
+                                       void                    *arg);
+
 extern void            compute_pmap_gc_throttle(
                                        void                    *arg);
 
index 5db6219373f3f0236f410a7cb5ae04edaa20f41b..d2a2ce6cb98bb191691de4e79edb7ba229ec1696 100644 (file)
@@ -104,7 +104,8 @@ static struct sched_average {
        { compute_averunnable, &sched_nrun, 5, 0 },
        { compute_stack_target, NULL, 5, 1 },
        { compute_memory_pressure, NULL, 1, 0 },
-       { compute_zone_gc_throttle, NULL, 1, 0 },
+       { compute_zone_gc_throttle, NULL, 60, 0 },
+       { compute_pageout_gc_throttle, NULL, 1, 0 },
        { compute_pmap_gc_throttle, NULL, 60, 0 },
        { NULL, NULL, 0, 0 }
 };
index 1eca4aaacef8808e5151337ba95527788dca8f07..ccde4a094bd018ae5e92749bb3db8bc79a7f476e 100644 (file)
@@ -551,9 +551,15 @@ static ast_t
 sched_fixedpriority_processor_csw_check(processor_t processor)
 {
        run_queue_t             runq;
-       
+       boolean_t               has_higher;
+
        runq = runq_for_processor(processor);
-       if (runq->highq > processor->current_pri) {
+       if (first_timeslice(processor)) {
+               has_higher = (runq->highq > processor->current_pri);
+       } else {
+               has_higher = (runq->highq >= processor->current_pri);
+       }
+       if (has_higher) {
                if (runq->urgency > 0)
                        return (AST_PREEMPT | AST_URGENT);
 
@@ -647,6 +653,61 @@ sched_fixedpriority_update_priority(thread_t       thread)
 
        }
        
+#if CONFIG_EMBEDDED
+       /* Check for pending throttle transitions, and safely switch queues */
+       if ((thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_MASK) && (thread->bound_processor == PROCESSOR_NULL)) {
+                       boolean_t               removed = thread_run_queue_remove(thread);
+
+                       if (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_DEMOTION) {
+                               if (thread->sched_mode == TH_MODE_REALTIME) {
+                                       thread->saved_mode = thread->sched_mode;
+                                       thread->sched_mode = TH_MODE_TIMESHARE;
+
+                                       if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+                                               sched_share_incr();
+                               } else {
+                                       /*
+                                        * It's possible that this is a realtime thread that has
+                                        * already tripped the failsafe, in which case it should not
+                                        * degrade further.
+                                        */
+                                       if (!(thread->sched_flags & TH_SFLAG_FAILSAFE)) {
+
+                                               thread->saved_mode = thread->sched_mode;
+
+                                               if (thread->sched_mode == TH_MODE_TIMESHARE) {
+                                                       thread->sched_mode = TH_MODE_FAIRSHARE;
+                                               }
+                                       }
+                               }
+                               thread->sched_flags |= TH_SFLAG_THROTTLED;
+
+                               KERNEL_DEBUG_CONSTANT(
+                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_ENTER) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), 0xFFFFFFFF, 0, 0, 0);
+
+                       } else {
+                               if ((thread->sched_mode == TH_MODE_TIMESHARE)
+                                       && (thread->saved_mode == TH_MODE_REALTIME)) {
+                                       if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+                                               sched_share_decr();
+                               }
+
+                               thread->sched_mode = thread->saved_mode;
+                               thread->saved_mode = TH_MODE_NONE;
+                               thread->sched_flags &= ~TH_SFLAG_THROTTLED;
+
+                               KERNEL_DEBUG_CONSTANT1(
+                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_EXIT) | DBG_FUNC_NONE, 0, 0, 0, 0, thread_tid(thread));
+
+                       }
+
+                       thread->sched_flags &= ~(TH_SFLAG_PENDING_THROTTLE_MASK);
+
+                       if (removed)
+                               thread_setrun(thread, SCHED_TAILQ);
+       }
+#endif
+
        /*
         *      Check for fail-safe release.
         */
index d27b29e87b7e2c2e5c8f784a6d48964dbe3ec901..0c4d1a3d0a8ca25442e881deafb25ea80c2911fa 100644 (file)
@@ -231,7 +231,6 @@ const struct sched_dispatch_table sched_grrr_dispatch = {
        TRUE /* direct_dispatch_to_idle_processors */
 };
 
-extern int     default_preemption_rate;
 extern int     max_unsafe_quanta;
 
 static uint32_t grrr_quantum_us;
index d7b959249aabd874f45413858fbcd9df123ee79c..5f4803119455b6a24e442ad9217f5fb42717f26a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,9 +65,6 @@
  */
 
 #include <debug.h>
-#include <mach_kdb.h>
-
-#include <ddb/db_output.h>
 
 #include <mach/mach_types.h>
 #include <mach/machine.h>
@@ -98,6 +95,7 @@
 #include <kern/task.h>
 #include <kern/thread.h>
 #include <kern/wait_queue.h>
+#include <kern/ledger.h>
 
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
@@ -122,6 +120,9 @@ decl_simple_lock_data(static,fs_lock);
 #define                DEFAULT_PREEMPTION_RATE         100             /* (1/s) */
 int                    default_preemption_rate = DEFAULT_PREEMPTION_RATE;
 
+#define                DEFAULT_BG_PREEMPTION_RATE      400             /* (1/s) */
+int                    default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
+
 #define                MAX_UNSAFE_QUANTA                       800
 int                    max_unsafe_quanta = MAX_UNSAFE_QUANTA;
 
@@ -140,8 +141,10 @@ uint64_t   sched_safe_duration;
 
 uint32_t       std_quantum;
 uint32_t       min_std_quantum;
+uint32_t       bg_quantum;
 
 uint32_t       std_quantum_us;
+uint32_t       bg_quantum_us;
 
 #endif /* CONFIG_SCHED_TRADITIONAL */
 
@@ -152,8 +155,6 @@ uint32_t    default_timeshare_constraint;
 uint32_t       max_rt_quantum;
 uint32_t       min_rt_quantum;
 
-uint32_t       sched_cswtime;
-
 #if defined(CONFIG_SCHED_TRADITIONAL)
 
 unsigned       sched_tick;
@@ -594,6 +595,12 @@ sched_traditional_init(void)
 
        printf("standard timeslicing quantum is %d us\n", std_quantum_us);
 
+       if (default_bg_preemption_rate < 1)
+               default_bg_preemption_rate = DEFAULT_BG_PREEMPTION_RATE;
+       bg_quantum_us = (1000 * 1000) / default_bg_preemption_rate;
+
+       printf("standard background quantum is %d us\n", bg_quantum_us);
+
        load_shift_init();
        preempt_pri_init();
        sched_tick = 0;
@@ -616,6 +623,12 @@ sched_traditional_timebase_init(void)
        assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
        min_std_quantum = (uint32_t)abstime;
 
+       /* quantum for background tasks */
+       clock_interval_to_absolutetime_interval(
+                                                       bg_quantum_us, NSEC_PER_USEC, &abstime);
+       assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
+       bg_quantum = (uint32_t)abstime;
+
        /* scheduler tick interval */
        clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
                                                                                                        NSEC_PER_USEC, &abstime);
@@ -911,9 +924,12 @@ thread_unblock(
        thread->computation_metered = 0;
        thread->reason = AST_NONE;
 
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
-                                       (uintptr_t)thread_tid(thread), thread->sched_pri, 0, 0, 0);
+       /* Event should only be triggered if thread is not already running */
+       if (result == FALSE) {
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
+                       (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, 0, 0);
+       }
 
        DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
 
@@ -985,7 +1001,8 @@ thread_mark_wait_locked(
                        (!at_safe_point &&
                                (thread->sched_flags & TH_SFLAG_ABORTSAFELY))) {
 
-               DTRACE_SCHED(sleep);
+               if ( !(thread->state & TH_TERMINATE))
+                       DTRACE_SCHED(sleep);
 
                thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
                thread->at_safe_point = at_safe_point;
@@ -1062,6 +1079,10 @@ assert_wait(
 
        assert(event != NO_EVENT);
 
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
+               VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0);
+
        index = wait_hash(event);
        wq = &wait_queues[index];
        return wait_queue_assert_wait(wq, event, interruptible, 0);
@@ -1088,6 +1109,11 @@ assert_wait_timeout(
        thread_lock(thread);
 
        clock_interval_to_deadline(interval, scale_factor, &deadline);
+       
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
+               VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+       
        wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
                                                                                                        interruptible, deadline, thread);
 
@@ -1116,6 +1142,10 @@ assert_wait_deadline(
        wait_queue_lock(wqueue);
        thread_lock(thread);
 
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
+               VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+
        wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
                                                                                                        interruptible, deadline, thread);
 
@@ -1325,6 +1355,16 @@ thread_unstop(
        splx(s);
 }
 
+/*
+ * Thread locked, returns the same way
+ */
+static inline boolean_t
+thread_isoncpu(thread_t thread)
+{
+       processor_t processor = thread->last_processor;
+
+       return ((processor != PROCESSOR_NULL) && (processor->active_thread == thread));
+}
 /*
  * thread_wait:
  *
@@ -1333,19 +1373,32 @@ thread_unstop(
  */
 void
 thread_wait(
-       thread_t                thread)
+       thread_t        thread,
+       boolean_t       until_not_runnable)
 {
        wait_result_t   wresult;
-       spl_t                   s = splsched();
+       boolean_t       oncpu;
+       processor_t     processor;
+       spl_t           s = splsched();
 
        wake_lock(thread);
        thread_lock(thread);
 
-       while (thread->state & TH_RUN) {
-               processor_t             processor = thread->last_processor;
+       /*
+        * Wait until not running on a CPU.  If stronger requirement
+        * desired, wait until not runnable.  Assumption: if thread is
+        * on CPU, then TH_RUN is set, so we're not waiting in any case
+        * where the original, pure "TH_RUN" check would have let us 
+        * finish.
+        */
+       while ((oncpu = thread_isoncpu(thread)) || 
+                       (until_not_runnable && (thread->state & TH_RUN))) {
 
-               if (processor != PROCESSOR_NULL && processor->active_thread == thread)
+               if (oncpu) {
+                       assert(thread->state & TH_RUN);
+                       processor = thread->last_processor;
                        cause_ast_check(processor);
+               }
 
                thread->wake_active = TRUE;
                thread_unlock(thread);
@@ -1481,7 +1534,7 @@ thread_wakeup_prim_internal(
        if (one_thread)
                return (wait_queue_wakeup_one(wq, event, result, priority));
        else
-               return (wait_queue_wakeup_all(wq, event, result));
+           return (wait_queue_wakeup_all(wq, event, result));
 }
 
 /*
@@ -1766,6 +1819,9 @@ thread_select_idle(
        processor->current_pri = IDLEPRI;
        processor->current_thmode = TH_MODE_NONE;
 
+       /* Reload precise timing global policy to thread-local policy */
+       thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
+       
        thread_unlock(thread);
 
        /*
@@ -1982,6 +2038,9 @@ thread_invoke(
        assert(thread_runnable(thread));
 #endif
 
+       /* Reload precise timing global policy to thread-local policy */
+       thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
+       
        /*
         * Allow time constraint threads to hang onto
         * a stack.
@@ -2025,9 +2084,20 @@ thread_invoke(
                        self->last_run_time = processor->last_dispatch;
                        thread_timer_event(processor->last_dispatch, &thread->system_timer);
                        PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
+
+                       /*
+                        * Since non-precise user/kernel time doesn't update the state timer
+                        * during privilege transitions, synthesize an event now.
+                        */
+                       if (!thread->precise_user_kernel_time) {
+                               timer_switch(PROCESSOR_DATA(processor, current_state),
+                                                        processor->last_dispatch,
+                                                        PROCESSOR_DATA(processor, current_state));
+                       }
        
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
-                                                                               self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
+                               self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
                        if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
                                KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
@@ -2062,8 +2132,9 @@ thread_invoke(
                        counter(++c_thread_invoke_same);
                        thread_unlock(self);
 
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
-                                                               self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
+                               self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
                        self->continuation = self->parameter = NULL;
 
@@ -2092,8 +2163,9 @@ need_stack:
                        counter(++c_thread_invoke_same);
                        thread_unlock(self);
 
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
-                                                               self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
+                               self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
                        return (TRUE);
                }
@@ -2126,8 +2198,20 @@ need_stack:
        thread_timer_event(processor->last_dispatch, &thread->system_timer);
        PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
-                                                       self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
+       /*
+        * Since non-precise user/kernel time doesn't update the state timer
+        * during privilege transitions, synthesize an event now.
+        */
+       if (!thread->precise_user_kernel_time) {
+               timer_switch(PROCESSOR_DATA(processor, current_state),
+                                        processor->last_dispatch,
+                                        PROCESSOR_DATA(processor, current_state));
+       }
+       
+
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
+               self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
        if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
                KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
@@ -2143,7 +2227,9 @@ need_stack:
         * and address space if required.  We will next run
         * as a result of a subsequent context switch.
         */
+       assert(continuation == self->continuation);
        thread = machine_switch_context(self, continuation, thread);
+       assert(self == current_thread());
        TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
 
        DTRACE_SCHED(on__cpu);
@@ -2192,15 +2278,34 @@ thread_dispatch(
                        stack_free(thread);
 
                if (!(thread->state & TH_IDLE)) {
+                       int64_t consumed;
+                       int64_t remainder = 0;
+
+                       if (processor->quantum_end > processor->last_dispatch)
+                               remainder = processor->quantum_end -
+                                   processor->last_dispatch;
+
+                       consumed = thread->current_quantum - remainder;
+
+                       if ((thread->reason & AST_LEDGER) == 0)
+                               /*
+                                * Bill CPU time to both the individual thread
+                                * and the task.
+                                */
+                               ledger_credit(thread->t_ledger,
+                                   task_ledgers.cpu_time, consumed);
+                               ledger_credit(thread->t_threadledger,
+                                   thread_ledgers.cpu_time, consumed);
+
                        wake_lock(thread);
                        thread_lock(thread);
 
                        /*
                         *      Compute remainder of current quantum.
                         */
-                       if (    first_timeslice(processor)                                                      &&
-                                       processor->quantum_end > processor->last_dispatch               )
-                               thread->current_quantum = (uint32_t)(processor->quantum_end - processor->last_dispatch);
+                       if (first_timeslice(processor) &&
+                           processor->quantum_end > processor->last_dispatch)
+                               thread->current_quantum = (uint32_t)remainder;
                        else
                                thread->current_quantum = 0;
 
@@ -2222,7 +2327,7 @@ thread_dispatch(
                                 */
                                if (thread->current_quantum < min_std_quantum) {
                                        thread->reason |= AST_QUANTUM;
-                                       thread->current_quantum += std_quantum;
+                                       thread->current_quantum += SCHED(initial_quantum_size)(thread);
                                }
 #endif
                        }
@@ -2253,7 +2358,15 @@ thread_dispatch(
 
                                thread->reason = AST_NONE;
 
-                               thread_unlock(thread);
+                               if (thread->wake_active) {
+                                       thread->wake_active = FALSE;
+                                       thread_unlock(thread);
+
+                                       thread_wakeup(&thread->wake_active);
+                               }
+                               else
+                                       thread_unlock(thread);
+
                                wake_unlock(thread);
                        }
                        else {
@@ -2382,13 +2495,10 @@ thread_block_reason(
        self->continuation = continuation;
        self->parameter = parameter;
 
-       if (__improbable(kdebug_thread_block && kdebug_enable && self->state != TH_RUN)) {
-               uint32_t        bt[8];
-
-               OSBacktrace((void **)&bt[0], 8);
-
-               KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_START, bt[0], bt[1], bt[2], bt[3], 0);
-               KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_END, bt[4], bt[5], bt[6], bt[7], 0);
+       if (__improbable(kdebug_thread_block && kdebug_enable && self->state != TH_RUN)) {              
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_BLOCK), 
+                       reason, VM_KERNEL_UNSLIDE(continuation), 0, 0, 0);
        }
 
        do {
@@ -2471,9 +2581,9 @@ void
 thread_continue(
        register thread_t       thread)
 {
-       register thread_t                       self = current_thread();
+       register thread_t               self = current_thread();
        register thread_continue_t      continuation;
-       register void                           *parameter;
+       register void                   *parameter;
 
        DTRACE_SCHED(on__cpu);
 
@@ -2506,9 +2616,12 @@ thread_quantum_init(thread_t thread)
 
 #if defined(CONFIG_SCHED_TRADITIONAL)
 static uint32_t
-sched_traditional_initial_quantum_size(thread_t thread __unused)
+sched_traditional_initial_quantum_size(thread_t thread)
 {
-       return std_quantum;
+       if ((thread == THREAD_NULL) || thread->priority > MAXPRI_THROTTLE)
+               return std_quantum;
+       else
+               return bg_quantum;
 }
 
 static sched_mode_t
@@ -2851,7 +2964,7 @@ realtime_setrun(
                int prstate = processor->state;
                if (processor == current_processor())
                        ast_on(AST_PREEMPT | AST_URGENT);
-               else if ((prstate == PROCESSOR_DISPATCHING)  || (prstate == PROCESSOR_IDLE))
+               else if ((prstate == PROCESSOR_IDLE)  || (prstate == PROCESSOR_DISPATCHING))
                        machine_signal_idle(processor);
                else
                        cause_ast_check(processor);
@@ -3021,11 +3134,17 @@ static ast_t
 processor_csw_check(processor_t processor)
 {
        run_queue_t             runq;
+       boolean_t               has_higher;
 
        assert(processor->active_thread != NULL);
        
        runq = runq_for_processor(processor);
-       if (runq->highq > processor->current_pri) {
+       if (first_timeslice(processor)) {
+               has_higher = (runq->highq > processor->current_pri);
+       } else {
+               has_higher = (runq->highq >= processor->current_pri);
+       }
+       if (has_higher) {
                if (runq->urgency > 0)
                        return (AST_PREEMPT | AST_URGENT);
                
@@ -3529,24 +3648,18 @@ csw_check(
        processor_t             processor)
 {
        ast_t                   result = AST_NONE;
+       thread_t                thread = processor->active_thread;
 
        if (first_timeslice(processor)) {
                if (rt_runq.count > 0)
                        return (AST_PREEMPT | AST_URGENT);
-
-               result |= SCHED(processor_csw_check)(processor);
-               if (result & AST_URGENT)
-                       return result;
        }
        else {
                if (rt_runq.count > 0 && BASEPRI_RTQUEUES >= processor->current_pri)
                        return (AST_PREEMPT | AST_URGENT);
-
-               result |= SCHED(processor_csw_check)(processor);
-               if (result & AST_URGENT)
-                       return result;
        }
 
+       result = SCHED(processor_csw_check)(processor);
        if (result != AST_NONE)
                return (result);
 
@@ -3556,7 +3669,7 @@ csw_check(
        if (machine_processor_is_inactive(processor))
                return (AST_PREEMPT);
 
-       if (processor->active_thread->state & TH_SUSP)
+       if (thread->state & TH_SUSP)
                return (AST_PREEMPT);
 
        return (AST_NONE);
@@ -3911,8 +4024,9 @@ processor_idle(
        int                                     state;
        (void)splsched();
 
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (uintptr_t)thread_tid(thread), 0, 0, 0, 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, 
+               (uintptr_t)thread_tid(thread), 0, 0, 0, 0);
 
        SCHED_STATS_CPU_IDLE_START(processor);
 
@@ -3962,16 +4076,18 @@ processor_idle(
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
-                       KERNEL_DEBUG_CONSTANT(
-                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 
+                               (uintptr_t)thread_tid(thread), state, 0, 0, 0);
 
                        return (THREAD_NULL);
                }
 
                pset_unlock(pset);
 
-               KERNEL_DEBUG_CONSTANT(
-                                     MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 
+                       (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
                        
                return (new_thread);
        }
@@ -4003,8 +4119,9 @@ processor_idle(
                        thread_setrun(new_thread, SCHED_HEADQ);
                        thread_unlock(new_thread);
 
-                       KERNEL_DEBUG_CONSTANT(
-                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 
+                               (uintptr_t)thread_tid(thread), state, 0, 0, 0);
                
                        return (THREAD_NULL);
                }
@@ -4012,8 +4129,9 @@ processor_idle(
 
        pset_unlock(pset);
 
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, 
+               (uintptr_t)thread_tid(thread), state, 0, 0, 0);
                
        return (THREAD_NULL);
 }
@@ -4087,15 +4205,14 @@ sched_startup(void)
        thread_deallocate(thread);
 
        /*
-        * Yield to the sched_init_thread while it times
-        * a series of context switches back.  It stores
-        * the baseline value in sched_cswtime.
+        * Yield to the sched_init_thread once, to
+        * initialize our own thread after being switched
+        * back to.
         *
         * The current thread is the only other thread
         * active at this point.
         */
-       while (sched_cswtime == 0)
-               thread_block(THREAD_CONTINUE_NULL);
+       thread_block(THREAD_CONTINUE_NULL);
 }
 
 #if defined(CONFIG_SCHED_TRADITIONAL)
@@ -4139,67 +4256,10 @@ sched_traditional_tick_continue(void)
 
 #endif /* CONFIG_SCHED_TRADITIONAL */
 
-static uint32_t
-time_individual_cswitch(void)
-{
-       uint32_t switches = 0;
-       uint64_t newtime, starttime;
-
-       /* Wait for absolute time to increase. */
-       starttime = mach_absolute_time();
-       do {
-               newtime = mach_absolute_time();
-       } while (newtime == starttime);
-
-       /* Measure one or more context switches until time increases again.
-        * This ensures we get non-zero timings even if absolute time
-        * increases very infrequently compared to CPU clock. */
-       starttime = newtime;
-       do {
-               thread_block(THREAD_CONTINUE_NULL);
-               newtime = mach_absolute_time();
-               ++switches;
-       } while (newtime == starttime);
-       /* Round up. */
-       return (uint32_t) ((newtime - starttime + switches - 1) / switches);
-}
-
-/*
- * Time a series of context switches to determine
- * a baseline.  Toss the high and low and return
- * the one-way value.
- */
-static uint32_t
-time_cswitch(void)
-{
-       uint32_t        new, hi, low, accum;
-       int                     i, tries = 7, denom;
-
-       accum = hi = low = 0;
-       for (i = 0; i < tries; ++i) {
-               new = time_individual_cswitch();
-
-               if (i == 0)
-                       accum = hi = low = new;
-               else {
-                       if (new < low)
-                               low = new;
-                       else
-                       if (new > hi)
-                               hi = new;
-                       accum += new;
-               }
-       }
-       /* Round up. */
-       denom = 2 * (tries - 2);
-       return (accum - hi - low + denom - 1) / denom;
-}
-
 void
 sched_init_thread(void (*continuation)(void))
 {
-       sched_cswtime = time_cswitch();
-       assert(sched_cswtime > 0);
+       thread_block(THREAD_CONTINUE_NULL);
 
        continuation();
 
@@ -4446,35 +4506,3 @@ thread_runnable(
        return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
 }
 #endif /* DEBUG */
-
-#if    MACH_KDB
-#include <ddb/db_output.h>
-#define        printf          kdbprintf
-void                   db_sched(void);
-
-void
-db_sched(void)
-{
-       iprintf("Scheduling Statistics:\n");
-       db_indent += 2;
-       iprintf("Thread invocations:  csw %d same %d\n",
-               c_thread_invoke_csw, c_thread_invoke_same);
-#if    MACH_COUNTERS
-       iprintf("Thread block:  calls %d\n",
-               c_thread_block_calls);
-       iprintf("Idle thread:\n\thandoff %d block %d\n",
-               c_idle_thread_handoff,
-               c_idle_thread_block);
-       iprintf("Sched thread blocks:  %d\n", c_sched_thread_block);
-#endif /* MACH_COUNTERS */
-       db_indent -= 2;
-}
-
-#include <ddb/db_output.h>
-void           db_show_thread_log(void);
-
-void
-db_show_thread_log(void)
-{
-}
-#endif /* MACH_KDB */
index 0f89239aeebb6851c08817c293aa7413481484ad..c22ba7efd70b36fdebfd2c67b1c158b63b10395d 100644 (file)
@@ -93,7 +93,8 @@ extern void                   thread_unstop(
 
 /* Wait for a thread to stop running */
 extern void                    thread_wait(
-                                               thread_t        thread);
+                                               thread_t        thread,
+                                               boolean_t       until_not_runnable);
 
 /* Unblock thread on wake up */
 extern boolean_t       thread_unblock(
@@ -385,23 +386,22 @@ extern kern_return_t      thread_wakeup_prim(
                                                        boolean_t                       one_thread,
                                                        wait_result_t                   result);
 
-#ifdef MACH_KERNEL_PRIVATE
-extern kern_return_t   thread_wakeup_prim_internal(
-                                                       event_t                         event,
+extern kern_return_t    thread_wakeup_prim_internal(
+                                                       event_t                         event,
                                                        boolean_t                       one_thread,
                                                        wait_result_t                   result,
                                                        int                             priority);
-#endif
+
 
 #define thread_wakeup(x)                                       \
-                       thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
+                       thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
 #define thread_wakeup_with_result(x, z)                \
-                       thread_wakeup_prim((x), FALSE, (z))
+                       thread_wakeup_prim((x), FALSE, (z))
 #define thread_wakeup_one(x)                           \
-                       thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
+                       thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
 
 #ifdef MACH_KERNEL_PRIVATE
-#define thread_wakeup_one_with_pri(x, pri)                             \
+#define thread_wakeup_one_with_pri(x, pri)                              \
                        thread_wakeup_prim_internal((x), TRUE, THREAD_AWAKENED, pri)
 #endif
 
index b638479e9f0e85053df3f16fe36b44931c055d2c..948803887e4b8e71d852a7c9d81c996d4f8c5a99 100644 (file)
@@ -82,9 +82,7 @@ mach_get_task_label(
        kr = ipc_object_copyout(space, (ipc_object_t) lh->lh_port,
            MACH_MSG_TYPE_PORT_SEND, 0, outlabel);
        if (kr != KERN_SUCCESS) {
-               ip_lock(lh->lh_port);
                ip_release(lh->lh_port);
-               ip_check_unlock(lh->lh_port);
                *outlabel = MACH_PORT_NULL;
        }
   
@@ -236,14 +234,16 @@ mac_port_check_service_obj(
                return kr;
        }
 
-       dead = ipc_right_check(space, (ipc_port_t) entry->ie_object, obj, entry);
+       objp = entry->ie_object;
+       port = (ipc_port_t)objp;
+       dead = ipc_right_check(space, port, obj, entry);
        if (dead) {
                is_write_unlock(space);
+               ip_release(port);
                mac_task_label_destroy(&subjl);
                return KERN_INVALID_RIGHT;
        }
 
-       objp = entry->ie_object;
        io_lock (objp);
        is_write_unlock (space);
 
index 6b5ea83027170c69662cc9d3642a1667334e37ed..9906b8b3a9a0364dd4d6574f89d864fe8b15b44f 100644 (file)
@@ -39,6 +39,7 @@
 #include <kern/thread.h>
 #include <kern/zalloc.h>
 #include <kern/kalloc.h>
+#include <kern/ledger.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
@@ -85,7 +86,8 @@ STACK_ZINFO_PALLOC(thread_t thread)
        task_t task;
        zinfo_usage_t zinfo;
 
-       thread->tkm_private.alloc += kernel_stack_size;
+       ledger_credit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
+
        if (stack_fake_zone_index != -1 &&
            (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
                OSAddAtomic64(kernel_stack_size,
@@ -98,7 +100,8 @@ STACK_ZINFO_PFREE(thread_t thread)
        task_t task;
        zinfo_usage_t zinfo;
 
-       thread->tkm_private.free += kernel_stack_size;
+       ledger_debit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
+
        if (stack_fake_zone_index != -1 &&
            (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
                OSAddAtomic64(kernel_stack_size, 
@@ -108,8 +111,9 @@ STACK_ZINFO_PFREE(thread_t thread)
 static inline void
 STACK_ZINFO_HANDOFF(thread_t from, thread_t to)
 {
-       from->tkm_private.free += kernel_stack_size;
-       to->tkm_private.alloc += kernel_stack_size;
+       ledger_debit(from->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
+       ledger_credit(to->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
+
        if (stack_fake_zone_index != -1) {
                task_t task;
                zinfo_usage_t zinfo;
@@ -213,7 +217,7 @@ stack_alloc_internal(void)
                if (kernel_memory_allocate(kernel_map, &stack,
                                           kernel_stack_size + (2*PAGE_SIZE),
                                           stack_addr_mask,
-                                          KMA_KOBJECT | guard_flags)
+                                          KMA_KSTACK | KMA_KOBJECT | guard_flags)
                    != KERN_SUCCESS)
                        panic("stack_alloc: kernel_memory_allocate");
 
index cb31f783cd17eb9d1561caf06fc77e4d3f3fce3d..b20629ffa82b832546b1bc4ddd130745f49188e6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -225,13 +225,11 @@ kernel_bootstrap(void)
        kernel_bootstrap_kprintf("calling clock_init\n");
        clock_init();
 
+       ledger_init();
 
        /*
         *      Initialize the IPC, task, and thread subsystems.
         */
-       kernel_bootstrap_kprintf("calling ledger_init\n");
-       ledger_init();
-
        kernel_bootstrap_kprintf("calling task_init\n");
        task_init();
 
@@ -249,13 +247,6 @@ kernel_bootstrap(void)
        thread->state = TH_RUN;
        thread_deallocate(thread);
 
-       /* transfer statistics from init thread to kernel */
-       thread_t init_thread = current_thread();
-       kernel_task->tkm_private.alloc = init_thread->tkm_private.alloc;
-       kernel_task->tkm_private.free = init_thread->tkm_private.free;
-       kernel_task->tkm_shared.alloc = init_thread->tkm_shared.alloc;
-       kernel_task->tkm_shared.free = init_thread->tkm_shared.free;
-
        kernel_bootstrap_kprintf("calling load_context - done\n");
        load_context(thread);
        /*NOTREACHED*/
@@ -263,6 +254,8 @@ kernel_bootstrap(void)
 
 int kth_started = 0;
 
+vm_offset_t vm_kernel_addrperm;
+
 /*
  * Now running in a thread.  Kick off other services,
  * invoke user bootstrap, enter pageout loop.
@@ -383,11 +376,21 @@ kernel_bootstrap_thread(void)
         */
        vm_shared_region_init();
        vm_commpage_init();
+       vm_commpage_text_init();
 
 #if CONFIG_MACF
        mac_policy_initmach();
 #endif
 
+       /*
+        * Initialize the global used for permuting kernel
+        * addresses that may be exported to userland as tokens
+        * using VM_KERNEL_ADDRPERM(). Force the random number
+        * to be odd to avoid mapping a non-zero
+        * word-aligned address to zero via addition.
+        */
+       vm_kernel_addrperm = (vm_offset_t)early_random() | 1;
+
        /*
         *      Start the user bootstrap.
         */
@@ -496,7 +499,7 @@ load_context(
         * should never occur since the thread is expected
         * to have reserved stack.
         */
-       load_context_kprintf("stack %x, stackptr %x\n"
+       load_context_kprintf("thread %p, stack %x, stackptr %x\n", thread
                             thread->kernel_stack, thread->machine.kstackptr);
        if (!thread->kernel_stack) {
                load_context_kprintf("calling stack_alloc_try\n");
@@ -560,7 +563,6 @@ scale_setup()
        bsd_scale_setup(scale);
        
        ipc_space_max = SPACE_MAX;
-       ipc_tree_entry_max = ITE_MAX;
        ipc_port_max = PORT_MAX;
        ipc_pset_max = SET_MAX;
        semaphore_max = SEMAPHORE_MAX;
index b69958ad777b5b09139e02817bedff773065c749..5a06b28f8cbb05c98822864f9054725ef2262ecb 100644 (file)
@@ -51,6 +51,7 @@
 
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_space.h>
+#include <libkern/OSAtomic.h>
 
 /*
  *     Ulock ownership MACROS
@@ -838,9 +839,7 @@ lock_handoff_accept (lock_set_t lock_set, int lock_id)
 void
 lock_set_reference(lock_set_t lock_set)
 {
-       lock_set_lock(lock_set);
-       lock_set->ref_count++;
-       lock_set_unlock(lock_set);
+       OSIncrementAtomic(&((lock_set)->ref_count));
 }
 
 /*
@@ -852,14 +851,9 @@ lock_set_reference(lock_set_t lock_set)
 void
 lock_set_dereference(lock_set_t lock_set)
 {
-       int     ref_count;
        int     size;
 
-       lock_set_lock(lock_set);
-       ref_count = --(lock_set->ref_count);
-       lock_set_unlock(lock_set);
-
-       if (ref_count == 0) {
+       if (1 == OSDecrementAtomic(&((lock_set)->ref_count))) {
                ipc_port_dealloc_kernel(lock_set->port);
                size = (int)(sizeof(struct lock_set) +
                        (sizeof(struct ulock) * (lock_set->n_ulocks - 1)));
index 80ffb819907bbf5a054d401cdc8e84eab63b6d89..687387b3cad02afe2dd420ea619f9d7e00e45ed7 100644 (file)
@@ -58,6 +58,8 @@
 #include <kern/zalloc.h>
 #include <kern/mach_param.h>
 
+#include <libkern/OSAtomic.h>
+
 static unsigned int semaphore_event;
 #define SEMAPHORE_EVENT CAST_EVENT64_T(&semaphore_event)
 
@@ -179,7 +181,12 @@ semaphore_create(
        }
 
        s->count = value;
-       s->ref_count = (task == kernel_task) ? 1 : 2;
+
+       /*
+        * One reference for caller, one for port, and one for owner
+        * task (if not the kernel itself).
+        */
+       s->ref_count = (task == kernel_task) ? 2 : 3;
 
        /*
         *  Create and initialize the semaphore port
@@ -1060,9 +1067,21 @@ semaphore_dereference(
        if (semaphore != NULL) {
                ref_count = hw_atomic_sub(&semaphore->ref_count, 1);
 
+               if (ref_count == 1) {
+                       ipc_port_t port = semaphore->port;
+
+                       if (IP_VALID(port) && 
+                           OSCompareAndSwapPtr(port, IP_NULL, &semaphore->port)) {
+                               /*
+                                * We get to disassociate the port from the sema and
+                                * drop the port's reference on the sema.
+                                */
+                               ipc_port_dealloc_kernel(port);
+                               ref_count = hw_atomic_sub(&semaphore->ref_count, 1);
+                       }
+               }
                if (ref_count == 0) {
                        assert(wait_queue_empty(&semaphore->wait_queue));
-                       ipc_port_dealloc_kernel(semaphore->port);
                        zfree(semaphore_zone, semaphore);
                }
        }
index 3b1f0edafef601456c8d481ccd57432361b731cd..89fc63b1bda826058287590789a518a0165d84bd 100644 (file)
@@ -254,7 +254,7 @@ thread_switch(
                        ip_unlock(port);
 
                        thread = convert_port_to_thread(port);
-                       ipc_port_release(port);
+                       ip_release(port);
 
                        if (thread == self) {
                                (void)thread_deallocate_internal(thread);
index 7dc2d61fdd658f02437787e149823c20f27ca360..9abedea98eb33fd83f125dd4bd6a4ea0e1ed0ff6 100644 (file)
@@ -60,6 +60,7 @@
 #include <mach/mach_traps.h>
 
 #include <kern/syscall_sw.h>
+#include <sys/munge.h>
 
 /* Forwards */
 
  *
  * WARNING:    Don't use numbers 0 through -9.  They (along with
  *             the positive numbers) are reserved for Unix.
+ *
+ * WARNING:    The 'arg_count' parameter in the list below is poorly named.
+ *             It doesn't refer to the number of arguments the trap takes -
+ *             it actually refers to the number of 32-bit words that need
+ *             to be copied in from userspace.  The munging of words to trap
+ *             arguments is done in mach_call_munger().
  */
 
 int kern_invalid_debug = 0;
@@ -91,7 +98,7 @@ int kern_invalid_debug = 0;
 #include <kern/clock.h>
 #include <mach/mk_timer.h>
 
-mach_trap_t    mach_trap_table[MACH_TRAP_TABLE_COUNT] = {
+const mach_trap_t      mach_trap_table[MACH_TRAP_TABLE_COUNT] = {
 /* 0 */                MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 1 */                MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 2 */                MACH_TRAP(kern_invalid, 0, NULL, NULL),
@@ -102,20 +109,20 @@ mach_trap_t       mach_trap_table[MACH_TRAP_TABLE_COUNT] = {
 /* 7 */                MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 8 */                MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 9 */                MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 10 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
+/* 10 */       MACH_TRAP(_kernelrpc_mach_vm_allocate_trap, 5, munge_wwlw, munge_dddd),
 /* 11 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 12 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
+/* 12 */       MACH_TRAP(_kernelrpc_mach_vm_deallocate_trap, 5, munge_wll, munge_ddd),
 /* 13 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 14 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
+/* 14 */       MACH_TRAP(_kernelrpc_mach_vm_protect_trap, 7, munge_wllww, munge_ddddd),
 /* 15 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 16 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 17 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 18 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 19 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 20 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 21 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 22 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
-/* 23 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
+/* 16 */       MACH_TRAP(_kernelrpc_mach_port_allocate_trap, 3, munge_www, munge_ddd),
+/* 17 */       MACH_TRAP(_kernelrpc_mach_port_destroy_trap, 2, munge_ww, munge_dd),
+/* 18 */       MACH_TRAP(_kernelrpc_mach_port_deallocate_trap, 2, munge_ww, munge_dd),
+/* 19 */       MACH_TRAP(_kernelrpc_mach_port_mod_refs_trap, 4, munge_wwww, munge_dddd),
+/* 20 */       MACH_TRAP(_kernelrpc_mach_port_move_member_trap, 3, munge_www, munge_ddd),
+/* 21 */       MACH_TRAP(_kernelrpc_mach_port_insert_right_trap, 4, munge_wwww, munge_dddd),
+/* 22 */       MACH_TRAP(_kernelrpc_mach_port_insert_member_trap, 3, munge_www, munge_ddd),
+/* 23 */       MACH_TRAP(_kernelrpc_mach_port_extract_member_trap, 3, munge_www, munge_ddd),
 /* 24 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 25 */       MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 26 */       MACH_TRAP(mach_reply_port, 0, NULL, NULL),
@@ -241,20 +248,20 @@ const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = {
 /* 7 */                "kern_invalid",
 /* 8 */                "kern_invalid",
 /* 9 */                "kern_invalid",
-/* 10 */       "kern_invalid",
+/* 10 */       "_kernelrpc_mach_vm_allocate_trap",
 /* 11 */       "kern_invalid",
-/* 12 */       "kern_invalid",
+/* 12 */       "_kernelrpc_mach_vm_deallocate_trap",
 /* 13 */       "kern_invalid",
-/* 14 */       "kern_invalid",
+/* 14 */       "_kernelrpc_mach_vm_protect_trap",
 /* 15 */       "kern_invalid",
-/* 16 */       "kern_invalid",
-/* 17 */       "kern_invalid",
-/* 18 */       "kern_invalid",
-/* 19 */       "kern_invalid",
-/* 20 */       "kern_invalid",
-/* 21 */       "kern_invalid",
-/* 22 */       "kern_invalid",
-/* 23 */       "kern_invalid",
+/* 16 */       "_kernelrpc_mach_port_allocate_trap",
+/* 17 */       "_kernelrpc_mach_port_destroy_trap",
+/* 18 */       "_kernelrpc_mach_port_deallocate_trap",
+/* 19 */       "_kernelrpc_mach_port_mod_refs_trap",
+/* 20 */       "_kernelrpc_mach_port_move_member_trap",
+/* 21 */       "_kernelrpc_mach_port_insert_right_trap",
+/* 22 */       "_kernelrpc_mach_port_insert_member_trap",
+/* 23 */       "_kernelrpc_mach_port_extract_member_trap",
 /* 24 */       "kern_invalid",
 /* 25 */       "kern_invalid",
 /* 26 */       "mach_reply_port",
index d186546d580435555241f50155a66daebebf1ca5..879b9cd5c0cf828fc77d9c260dbbac1f96e76732 100644 (file)
@@ -70,11 +70,7 @@ typedef      void    mach_munge_t(const void *, void *);
 
 typedef struct {
        int                     mach_trap_arg_count;
-       int                     (*mach_trap_function)(void);
-#if 0 /* no active architectures use mungers for mach traps */
-       mach_munge_t            *mach_trap_arg_munge32; /* system call arguments for 32-bit */
-       mach_munge_t            *mach_trap_arg_munge64; /* system call arguments for 64-bit */
-#endif
+       kern_return_t           (*mach_trap_function)(void *);
 #if    MACH_ASSERT
        const char*             mach_trap_name;
 #endif /* MACH_ASSERT */
@@ -83,16 +79,16 @@ typedef struct {
 #define MACH_TRAP_TABLE_COUNT   128
 
 
-extern mach_trap_t             mach_trap_table[];
+extern const mach_trap_t       mach_trap_table[];
 extern int                     mach_trap_count;
 
 #if defined(__i386__) || defined(__x86_64__)
 #if    !MACH_ASSERT
 #define        MACH_TRAP(name, arg_count, munge32, munge64)    \
-               { (arg_count), (int (*)(void)) (name)  }
+       { (arg_count), (kern_return_t (*)(void *)) (name)  }
 #else
 #define MACH_TRAP(name, arg_count, munge32, munge64)           \
-               { (arg_count), (int (*)(void)) (name), #name }
+       { (arg_count), (kern_return_t (*)(void *)) (name), #name }
 #endif /* !MACH_ASSERT */
 #else  /* !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) */
 #error Unsupported architecture
index 985f3c144ce52a97db9da68dea07b2f14e439c09..96a00ff5a6ec6b6c5a06ed6c856fe20d3e1e32e7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -86,7 +86,6 @@
  * Copyright (c) 2005 SPARTA, Inc.
  */
 
-#include <mach_kdb.h>
 #include <fast_tas.h>
 #include <platforms.h>
 
 #include <kern/processor.h>
 #include <kern/sched_prim.h>   /* for thread_wakeup */
 #include <kern/ipc_tt.h>
-#include <kern/ledger.h>
 #include <kern/host.h>
 #include <kern/clock.h>
 #include <kern/timer.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_protos.h>
 
-#if    MACH_KDB
-#include <ddb/db_sym.h>
-#endif /* MACH_KDB */
-
 /*
  * Exported interfaces
  */
@@ -156,10 +150,18 @@ zone_t                    task_zone;
 lck_attr_t      task_lck_attr;
 lck_grp_t       task_lck_grp;
 lck_grp_attr_t  task_lck_grp_attr;
+#if CONFIG_EMBEDDED
+lck_mtx_t      task_watch_mtx;
+#endif /* CONFIG_EMBEDDED */
 
 zinfo_usage_store_t tasks_tkm_private;
 zinfo_usage_store_t tasks_tkm_shared;
 
+static ledger_template_t task_ledger_template = NULL;
+struct _task_ledger_indices task_ledgers = {-1, -1, -1, -1, -1};
+void init_task_ledgers(void);
+
+
 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 
 /* externs for BSD kernel */
@@ -170,7 +172,8 @@ extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 void           task_hold_locked(
                        task_t          task);
 void           task_wait_locked(
-                       task_t          task);
+                       task_t          task,
+                       boolean_t       until_not_runnable);
 void           task_release_locked(
                        task_t          task);
 void           task_free(
@@ -178,11 +181,6 @@ void               task_free(
 void           task_synchronizer_destroy_all(
                        task_t          task);
 
-kern_return_t  task_set_ledger(
-                       task_t          task,
-                       ledger_t        wired,
-                       ledger_t        paged);
-
 int check_for_tasksuspend(
                        task_t task);
 
@@ -268,6 +266,9 @@ task_init(void)
        lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
        lck_attr_setdefault(&task_lck_attr);
        lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
+#if CONFIG_EMBEDDED
+       lck_mtx_init(&task_watch_mtx, &task_lck_grp, &task_lck_attr);
+#endif /* CONFIG_EMBEDDED */
 
        task_zone = zinit(
                        sizeof(struct task),
@@ -277,6 +278,8 @@ task_init(void)
 
        zone_change(task_zone, Z_NOENCRYPT, TRUE);
 
+       init_task_ledgers();
+
        /*
         * Create the kernel task as the first task.
         */
@@ -289,6 +292,7 @@ task_init(void)
 
        vm_map_deallocate(kernel_task->map);
        kernel_task->map = kernel_map;
+
 }
 
 /*
@@ -347,6 +351,36 @@ host_security_create_task_token(
        return(KERN_FAILURE);
 }
 
+void
+init_task_ledgers(void)
+{
+       ledger_template_t t;
+       
+       assert(task_ledger_template == NULL);
+       assert(kernel_task == TASK_NULL);
+
+       if ((t = ledger_template_create("Per-task ledger")) == NULL)
+               panic("couldn't create task ledger template");
+
+       task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
+       task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
+           "physmem", "bytes");
+       task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
+           "bytes");
+       task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
+           "bytes");
+       task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
+           "bytes");
+
+       if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) ||
+           (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) ||
+           (task_ledgers.wired_mem < 0)) {
+               panic("couldn't create entries for task ledger template");
+       }
+
+       task_ledger_template = t;
+}
+
 kern_return_t
 task_create_internal(
        task_t          parent_task,
@@ -356,6 +390,7 @@ task_create_internal(
 {
        task_t                  new_task;
        vm_shared_region_t      shared_region;
+       ledger_t                ledger = NULL;
 
        new_task = (task_t) zalloc(task_zone);
 
@@ -365,13 +400,22 @@ task_create_internal(
        /* one ref for just being alive; one for our caller */
        new_task->ref_count = 2;
 
+       /* allocate with active entries */
+       assert(task_ledger_template != NULL);
+       if ((ledger = ledger_instantiate(task_ledger_template,
+                       LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
+               zfree(task_zone, new_task);
+               return(KERN_RESOURCE_SHORTAGE);
+       }
+       new_task->ledger = ledger;
+
        /* if inherit_memory is true, parent_task MUST not be NULL */
        if (inherit_memory)
-               new_task->map = vm_map_fork(parent_task->map);
+               new_task->map = vm_map_fork(ledger, parent_task->map);
        else
-               new_task->map = vm_map_create(pmap_create(0, is_64bit),
-                                       (vm_map_offset_t)(VM_MIN_ADDRESS),
-                                       (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
+               new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
+                               (vm_map_offset_t)(VM_MIN_ADDRESS),
+                               (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 
        /* Inherit memlock limit from parent */
        if (parent_task)
@@ -399,11 +443,6 @@ task_create_internal(
        new_task->taskFeatures[0] = 0;                          /* Init task features */
        new_task->taskFeatures[1] = 0;                          /* Init task features */
 
-       new_task->tkm_private.alloc = 0;
-       new_task->tkm_private.free = 0;
-       new_task->tkm_shared.alloc = 0;
-       new_task->tkm_shared.free = 0;
-
        zinfo_task_init(new_task);
 
 #ifdef MACH_BSD
@@ -441,6 +480,21 @@ task_create_internal(
        new_task->t_chud = 0U;
 #endif
 
+       new_task->pidsuspended = FALSE;
+       new_task->frozen = FALSE;
+       new_task->rusage_cpu_flags = 0;
+       new_task->rusage_cpu_percentage = 0;
+       new_task->rusage_cpu_interval = 0;
+       new_task->rusage_cpu_deadline = 0;
+       new_task->rusage_cpu_callt = NULL;
+       new_task->proc_terminate = 0;
+#if CONFIG_EMBEDDED
+       queue_init(&new_task->task_watchers);
+       new_task->appstate = TASK_APPSTATE_ACTIVE;
+       new_task->num_taskwatchers  = 0;
+       new_task->watchapplying  = 0;
+#endif /* CONFIG_EMBEDDED */
+
        if (parent_task != TASK_NULL) {
                new_task->sec_token = parent_task->sec_token;
                new_task->audit_token = parent_task->audit_token;
@@ -449,10 +503,6 @@ task_create_internal(
                shared_region = vm_shared_region_get(parent_task);
                vm_shared_region_set(new_task, shared_region);
 
-               new_task->wired_ledger_port = ledger_copy(
-                       convert_port_to_ledger(parent_task->wired_ledger_port));
-               new_task->paged_ledger_port = ledger_copy(
-                       convert_port_to_ledger(parent_task->paged_ledger_port));
                if(task_has_64BitAddr(parent_task))
                        task_set_64BitAddr(new_task);
                new_task->all_image_info_addr = parent_task->all_image_info_addr;
@@ -468,20 +518,18 @@ task_create_internal(
                new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
                new_task->policystate = parent_task->policystate;
                /* inherit the self action state */
-               new_task->actionstate = parent_task->actionstate;
+               new_task->appliedstate = parent_task->appliedstate;
                new_task->ext_policystate = parent_task->ext_policystate;
 #if NOTYET
                /* till the child lifecycle is cleared do not inherit external action */
-               new_task->ext_actionstate = parent_task->ext_actionstate;
+               new_task->ext_appliedstate = parent_task->ext_appliedstate;
 #else
-               new_task->ext_actionstate = default_task_null_policy;
+               new_task->ext_appliedstate = default_task_null_policy;
 #endif
        }
        else {
                new_task->sec_token = KERNEL_SECURITY_TOKEN;
                new_task->audit_token = KERNEL_AUDIT_TOKEN;
-               new_task->wired_ledger_port = ledger_copy(root_wired_ledger);
-               new_task->paged_ledger_port = ledger_copy(root_paged_ledger);
 #ifdef __LP64__
                if(is_64bit)
                        task_set_64BitAddr(new_task);
@@ -492,8 +540,8 @@ task_create_internal(
                new_task->pset_hint = PROCESSOR_SET_NULL;
                new_task->policystate = default_task_proc_policy;
                new_task->ext_policystate = default_task_proc_policy;
-               new_task->actionstate = default_task_null_policy;
-               new_task->ext_actionstate = default_task_null_policy;
+               new_task->appliedstate = default_task_null_policy;
+               new_task->ext_appliedstate = default_task_null_policy;
        }
 
        if (kernel_task == TASK_NULL) {
@@ -530,6 +578,8 @@ void
 task_deallocate(
        task_t          task)
 {
+       ledger_amount_t credit, debit;
+
        if (task == TASK_NULL)
            return;
 
@@ -540,6 +590,13 @@ task_deallocate(
        queue_remove(&terminated_tasks, task, task_t, tasks);
        lck_mtx_unlock(&tasks_threads_lock);
 
+       /*
+        *      Give the machine dependent code a chance
+        *      to perform cleanup before ripping apart
+        *      the task.
+        */
+       machine_task_terminate(task);
+
        ipc_task_terminate(task);
 
        if (task->affinity_space)
@@ -553,10 +610,18 @@ task_deallocate(
 #if CONFIG_MACF_MACH
        labelh_release(task->label);
 #endif
-       OSAddAtomic64(task->tkm_private.alloc, (int64_t *)&tasks_tkm_private.alloc);
-       OSAddAtomic64(task->tkm_private.free, (int64_t *)&tasks_tkm_private.free);
-       OSAddAtomic64(task->tkm_shared.alloc, (int64_t *)&tasks_tkm_shared.alloc);
-       OSAddAtomic64(task->tkm_shared.free, (int64_t *)&tasks_tkm_shared.free);
+
+       if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
+           &debit)) {
+               OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
+               OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
+       }
+       if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
+           &debit)) {
+               OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
+               OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
+       }
+       ledger_dereference(task->ledger);
        zinfo_task_free(task);
        zfree(task_zone, task);
 }
@@ -665,15 +730,14 @@ task_terminate_internal(
                        thread_terminate_internal(thread);
        }
 
+       task_unlock(task);
+
+#if CONFIG_EMBEDDED
        /*
-        *      Give the machine dependent code a chance
-        *      to perform cleanup before ripping apart
-        *      the task.
+        * remove all task watchers 
         */
-       if (self_task == task)
-               machine_thread_terminate_self();
-
-       task_unlock(task);
+       task_removewatchers(task);
+#endif /* CONFIG_EMBEDDED */
 
        /*
         *      Destroy all synchronizers owned by the task.
@@ -683,7 +747,7 @@ task_terminate_internal(
        /*
         *      Destroy the IPC space, leaving just a reference for it.
         */
-       ipc_space_destroy(task->itk_space);
+       ipc_space_terminate(task->itk_space);
 
        if (vm_map_has_4GB_pagezero(task->map))
                vm_map_clear_4GB_pagezero(task->map);
@@ -800,20 +864,10 @@ task_complete_halt(task_t task)
        assert(task->halting);
        assert(task == current_task());
 
-       /*
-        *      Give the machine dependent code a chance
-        *      to perform cleanup of task-level resources
-        *      associated with the current thread before
-        *      ripping apart the task.
-        *
-        *      This must be done with the task locked.
-        */
-       machine_thread_terminate_self();
-
        /*
         *      Wait for the other threads to get shut down.
         *      When the last other thread is reaped, we'll be
-        *      worken up.
+        *      woken up.
         */
        if (task->thread_count > 1) {
                assert_wait((event_t)&task->halting, THREAD_UNINT);
@@ -823,6 +877,14 @@ task_complete_halt(task_t task)
                task_unlock(task);
        }
 
+       /*
+        *      Give the machine dependent code a chance
+        *      to perform cleanup of task-level resources
+        *      associated with the current thread before
+        *      ripping apart the task.
+        */
+       machine_task_terminate(task);
+
        /*
         *      Destroy all synchronizers owned by the task.
         */
@@ -906,6 +968,28 @@ task_hold(
        return (KERN_SUCCESS);
 }
 
+kern_return_t
+task_wait(
+               task_t          task,
+               boolean_t       until_not_runnable)
+{
+       if (task == TASK_NULL)
+               return (KERN_INVALID_ARGUMENT);
+
+       task_lock(task);
+
+       if (!task->active) {
+               task_unlock(task);
+
+               return (KERN_FAILURE);
+       }
+
+       task_wait_locked(task, until_not_runnable);
+       task_unlock(task);
+
+       return (KERN_SUCCESS);
+}
+
 /*
  *     task_wait_locked:
  *
@@ -916,7 +1000,8 @@ task_hold(
  */
 void
 task_wait_locked(
-       register task_t         task)
+       register task_t         task,
+       boolean_t               until_not_runnable)
 {
        register thread_t       thread, self;
 
@@ -932,7 +1017,7 @@ task_wait_locked(
         */
        queue_iterate(&task->threads, thread, thread_t, task_threads) {
                if (thread != self)
-                       thread_wait(thread);
+                       thread_wait(thread, until_not_runnable);
        }
 }
 
@@ -1100,26 +1185,11 @@ task_threads(
        return (KERN_SUCCESS);
 }
 
-/*
- *     task_suspend:
- *
- *     Implement a user-level suspension on a task.
- *
- * Conditions:
- *     The caller holds a reference to the task
- */
-kern_return_t
-task_suspend(
+static kern_return_t
+place_task_hold    (
        register task_t         task)
-{
-       if (task == TASK_NULL || task == kernel_task)
-               return (KERN_INVALID_ARGUMENT);
-
-       task_lock(task);
-
+{    
        if (!task->active) {
-               task_unlock(task);
-
                return (KERN_FAILURE);
        }
 
@@ -1128,8 +1198,6 @@ task_suspend(
                 *      If the stop count was positive, the task is
                 *      already stopped and we can exit.
                 */
-               task_unlock(task);
-
                return (KERN_SUCCESS);
        }
 
@@ -1140,11 +1208,71 @@ task_suspend(
         * to stop executing user code.
         */
        task_hold_locked(task);
-       task_wait_locked(task);
+       task_wait_locked(task, TRUE);
+       
+       return (KERN_SUCCESS);
+}
+
+static kern_return_t
+release_task_hold    (
+       register task_t         task,
+       boolean_t           pidresume)
+{
+       register boolean_t release = FALSE;
+    
+       if (!task->active) {
+               return (KERN_FAILURE);
+       }
+       
+       if (pidresume) {
+           if (task->pidsuspended == FALSE) {
+            return (KERN_FAILURE);
+           }
+           task->pidsuspended = FALSE;
+       }
+
+    if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
+               if (--task->user_stop_count == 0) {
+                       release = TRUE;
+               }
+       }
+       else {
+               return (KERN_FAILURE);
+       }
+
+       /*
+        *      Release the task if necessary.
+        */
+       if (release)
+               task_release_locked(task);
+               
+    return (KERN_SUCCESS);
+}
+
+/*
+ *     task_suspend:
+ *
+ *     Implement a user-level suspension on a task.
+ *
+ * Conditions:
+ *     The caller holds a reference to the task
+ */
+kern_return_t
+task_suspend(
+       register task_t         task)
+{
+       kern_return_t    kr;
+       
+       if (task == TASK_NULL || task == kernel_task)
+               return (KERN_INVALID_ARGUMENT);
+
+       task_lock(task);
+
+       kr = place_task_hold(task);
 
        task_unlock(task);
 
-       return (KERN_SUCCESS);
+       return (kr);
 }
 
 /*
@@ -1158,39 +1286,107 @@ kern_return_t
 task_resume(
        register task_t task)
 {
-       register boolean_t      release = FALSE;
+       kern_return_t    kr;
 
        if (task == TASK_NULL || task == kernel_task)
                return (KERN_INVALID_ARGUMENT);
 
        task_lock(task);
 
-       if (!task->active) {
-               task_unlock(task);
+       kr = release_task_hold(task, FALSE);
 
-               return (KERN_FAILURE);
-       }
+       task_unlock(task);
 
-       if (task->user_stop_count > 0) {
-               if (--task->user_stop_count == 0) {
-                       release = TRUE;
-               }
+       return (kr);
+}
+
+kern_return_t
+task_pidsuspend_locked(task_t task)
+{
+       kern_return_t kr;
+
+       if (task->pidsuspended) {
+               kr = KERN_FAILURE;
+               goto out;
        }
-       else {
-               task_unlock(task);
 
-               return (KERN_FAILURE);
+       task->pidsuspended = TRUE;
+
+       kr = place_task_hold(task);
+       if (kr != KERN_SUCCESS) {
+               task->pidsuspended = FALSE;
        }
+out:
+       return(kr);
+}
 
-       /*
-        *      Release the task if necessary.
-        */
-       if (release)
-               task_release_locked(task);
+
+/*
+ *     task_pidsuspend:
+ *
+ *     Suspends a task by placing a hold on its threads.
+ *
+ * Conditions:
+ *     The caller holds a reference to the task
+ */
+kern_return_t
+task_pidsuspend(
+       register task_t         task)
+{
+       kern_return_t    kr;
+    
+       if (task == TASK_NULL || task == kernel_task)
+               return (KERN_INVALID_ARGUMENT);
+
+       task_lock(task);
+
+       kr = task_pidsuspend_locked(task);
 
        task_unlock(task);
 
-       return (KERN_SUCCESS);
+       return (kr);
+}
+
+/* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
+#define THAW_ON_RESUME 1
+
+/*
+ *     task_pidresume:
+ *             Resumes a previously suspended task.
+ *             
+ * Conditions:
+ *             The caller holds a reference to the task
+ */
+kern_return_t 
+task_pidresume(
+       register task_t task)
+{
+       kern_return_t    kr;
+#if (CONFIG_FREEZE && THAW_ON_RESUME)
+    boolean_t frozen;
+#endif
+
+       if (task == TASK_NULL || task == kernel_task)
+               return (KERN_INVALID_ARGUMENT);
+
+       task_lock(task);
+       
+#if (CONFIG_FREEZE && THAW_ON_RESUME)
+    frozen = task->frozen;
+    task->frozen = FALSE;
+#endif
+
+       kr = release_task_hold(task, TRUE);
+
+       task_unlock(task);
+
+#if (CONFIG_FREEZE && THAW_ON_RESUME)
+       if ((kr == KERN_SUCCESS) && (frozen == TRUE)) {
+           kr = vm_map_thaw(task->map);
+       }
+#endif
+
+       return (kr);
 }
 
 #if CONFIG_FREEZE
@@ -1198,7 +1394,7 @@ task_resume(
 /*
  *     task_freeze:
  *
- *     Freeze a currently suspended task.
+ *     Freeze a task.
  *
  * Conditions:
  *     The caller holds a reference to the task
@@ -1210,19 +1406,35 @@ task_freeze(
        uint32_t           *wired_count,
        uint32_t           *clean_count,
        uint32_t           *dirty_count,
+       uint32_t           dirty_budget,
        boolean_t          *shared,
        boolean_t          walk_only)
 {
+       kern_return_t kr;
+    
        if (task == TASK_NULL || task == kernel_task)
                return (KERN_INVALID_ARGUMENT);
 
+       task_lock(task);
+
+       if (task->frozen) {
+           task_unlock(task);
+           return (KERN_FAILURE);
+       }
+
+    if (walk_only == FALSE) {
+           task->frozen = TRUE;
+    }
+
+       task_unlock(task);
+
        if (walk_only) {
-               vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, shared);          
+               kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);               
        } else {
-               vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, shared);
+               kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
        }
 
-       return (KERN_SUCCESS);
+       return (kr);
 }
 
 /*
@@ -1237,12 +1449,25 @@ kern_return_t
 task_thaw(
        register task_t         task)
 {
+       kern_return_t kr;
+    
        if (task == TASK_NULL || task == kernel_task)
                return (KERN_INVALID_ARGUMENT);
 
-       vm_map_thaw(task->map);
+       task_lock(task);
+       
+       if (!task->frozen) {
+           task_unlock(task);
+           return (KERN_FAILURE);
+       }
+
+       task->frozen = FALSE;
 
-       return (KERN_SUCCESS);
+       task_unlock(task);
+
+       kr = vm_map_thaw(task->map);
+
+       return (kr);
 }
 
 #endif /* CONFIG_FREEZE */
@@ -1279,32 +1504,6 @@ host_security_set_task_token(
         return(kr);
 }
 
-/*
- * Utility routine to set a ledger
- */
-kern_return_t
-task_set_ledger(
-        task_t         task,
-        ledger_t       wired,
-        ledger_t       paged)
-{
-       if (task == TASK_NULL)
-               return(KERN_INVALID_ARGUMENT);
-
-        task_lock(task);
-        if (wired) {
-                ipc_port_release_send(task->wired_ledger_port);
-                task->wired_ledger_port = ledger_copy(wired);
-        }                
-        if (paged) {
-                ipc_port_release_send(task->paged_ledger_port);
-                task->paged_ledger_port = ledger_copy(paged);
-        }                
-        task_unlock(task);
-
-        return(KERN_SUCCESS);
-}
-
 /*
  * This routine was added, pretty much exclusively, for registering the
  * RPC glue vector for in-kernel short circuited tasks.  Rather than
@@ -1435,6 +1634,51 @@ task_info(
                break;
        }
 
+       case MACH_TASK_BASIC_INFO:
+       {
+               mach_task_basic_info_t  basic_info;
+               vm_map_t                map;
+               clock_sec_t             secs;
+               clock_usec_t            usecs;
+
+               if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
+                   error = KERN_INVALID_ARGUMENT;
+                   break;
+               }
+
+               basic_info = (mach_task_basic_info_t)task_info_out;
+
+               map = (task == kernel_task) ? kernel_map : task->map;
+
+               basic_info->virtual_size  = map->size;
+
+               basic_info->resident_size =
+                   (mach_vm_size_t)(pmap_resident_count(map->pmap));
+               basic_info->resident_size *= PAGE_SIZE_64;
+
+               basic_info->resident_size_max =
+                   (mach_vm_size_t)(pmap_resident_max(map->pmap));
+               basic_info->resident_size_max *= PAGE_SIZE_64;
+
+               basic_info->policy = ((task != kernel_task) ? 
+                                     POLICY_TIMESHARE : POLICY_RR);
+
+               basic_info->suspend_count = task->user_stop_count;
+
+               absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
+               basic_info->user_time.seconds = 
+                   (typeof(basic_info->user_time.seconds))secs;
+               basic_info->user_time.microseconds = usecs;
+
+               absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
+               basic_info->system_time.seconds =
+                   (typeof(basic_info->system_time.seconds))secs;
+               basic_info->system_time.microseconds = usecs;
+
+               *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
+               break;
+       }
+
        case TASK_THREAD_TIMES_INFO:
        {
                register task_thread_times_info_t       times_info;
@@ -1485,14 +1729,27 @@ task_info(
 
                queue_iterate(&task->threads, thread, thread_t, task_threads) {
                        uint64_t        tval;
+                       spl_t           x;
+
+                       x = splsched();
+                       thread_lock(thread);
 
                        tval = timer_grab(&thread->user_timer);
                        info->threads_user += tval;
                        info->total_user += tval;
 
                        tval = timer_grab(&thread->system_timer);
-                       info->threads_system += tval;
-                       info->total_system += tval;
+                       if (thread->precise_user_kernel_time) {
+                               info->threads_system += tval;
+                               info->total_system += tval;
+                       } else {
+                               /* system_timer may represent either sys or user */
+                               info->threads_user += tval;
+                               info->total_user += tval;
+                       }
+
+                       thread_unlock(thread);
+                       splx(x);
                }
 
 
@@ -1561,7 +1818,7 @@ task_info(
        case TASK_KERNELMEMORY_INFO:
        {
                task_kernelmemory_info_t        tkm_info;
-               thread_t                        thread;
+               ledger_amount_t                 credit, debit;
 
                if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
                   error = KERN_INVALID_ARGUMENT;
@@ -1569,6 +1826,10 @@ task_info(
                }
 
                tkm_info = (task_kernelmemory_info_t) task_info_out;
+               tkm_info->total_palloc = 0;
+               tkm_info->total_pfree = 0;
+               tkm_info->total_salloc = 0;
+               tkm_info->total_sfree = 0;
 
                if (task == kernel_task) {
                        /*
@@ -1581,41 +1842,37 @@ task_info(
                        /* start by accounting for all the terminated tasks against the kernel */
                        tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
                        tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
-                       tkm_info->total_salloc = 0;
-                       tkm_info->total_sfree = 0;
 
                        /* count all other task/thread shared alloc/free against the kernel */
                        lck_mtx_lock(&tasks_threads_lock);
+
+                       /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
                        queue_iterate(&tasks, task, task_t, tasks) {
                                if (task == kernel_task) {
-                                       tkm_info->total_palloc += task->tkm_private.alloc;
-                                       tkm_info->total_pfree += task->tkm_private.free;
+                                       if (ledger_get_entries(task->ledger,
+                                           task_ledgers.tkm_private, &credit,
+                                           &debit) == KERN_SUCCESS) {
+                                               tkm_info->total_palloc += credit;
+                                               tkm_info->total_pfree += debit;
+                                       }
                                }
-                               tkm_info->total_palloc += task->tkm_shared.alloc;
-                               tkm_info->total_pfree += task->tkm_shared.free;
-                       }
-                       queue_iterate(&threads, thread, thread_t, threads) {
-                               if (thread->task == kernel_task) {
-                                       tkm_info->total_palloc += thread->tkm_private.alloc;
-                                       tkm_info->total_pfree += thread->tkm_private.free;
+                               if (!ledger_get_entries(task->ledger,
+                                   task_ledgers.tkm_shared, &credit, &debit)) {
+                                       tkm_info->total_palloc += credit;
+                                       tkm_info->total_pfree += debit;
                                }
-                               tkm_info->total_palloc += thread->tkm_shared.alloc;
-                               tkm_info->total_pfree += thread->tkm_shared.free;
                        }
                        lck_mtx_unlock(&tasks_threads_lock);
                } else {
-                       /* account for all the terminated threads in the process */
-                       tkm_info->total_palloc = task->tkm_private.alloc;
-                       tkm_info->total_pfree = task->tkm_private.free;
-                       tkm_info->total_salloc = task->tkm_shared.alloc;
-                       tkm_info->total_sfree = task->tkm_shared.free;
-
-                       /* then add in all the running threads */
-                       queue_iterate(&task->threads, thread, thread_t, task_threads) {
-                               tkm_info->total_palloc += thread->tkm_private.alloc;
-                               tkm_info->total_pfree += thread->tkm_private.free;
-                               tkm_info->total_salloc += thread->tkm_shared.alloc;
-                               tkm_info->total_sfree += thread->tkm_shared.free;
+                       if (!ledger_get_entries(task->ledger,
+                           task_ledgers.tkm_private, &credit, &debit)) {
+                               tkm_info->total_palloc = credit;
+                               tkm_info->total_pfree = debit;
+                       }
+                       if (!ledger_get_entries(task->ledger,
+                           task_ledgers.tkm_shared, &credit, &debit)) {
+                               tkm_info->total_salloc = credit;
+                               tkm_info->total_sfree = debit;
                        }
                        task_unlock(task);
                }
@@ -1785,6 +2042,7 @@ task_vtimer_set(
        integer_t       which)
 {
        thread_t        thread;
+       spl_t           x;
 
        /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
 
@@ -1796,21 +2054,36 @@ task_vtimer_set(
 
        case TASK_VTIMER_USER:
                queue_iterate(&task->threads, thread, thread_t, task_threads) {
-                       thread->vtimer_user_save = timer_grab(&thread->user_timer);
+                       x = splsched();
+                       thread_lock(thread);
+                       if (thread->precise_user_kernel_time)
+                               thread->vtimer_user_save = timer_grab(&thread->user_timer);
+                       else
+                               thread->vtimer_user_save = timer_grab(&thread->system_timer);
+                       thread_unlock(thread);
+                       splx(x);
                }
                break;
 
        case TASK_VTIMER_PROF:
                queue_iterate(&task->threads, thread, thread_t, task_threads) {
+                       x = splsched();
+                       thread_lock(thread);
                        thread->vtimer_prof_save = timer_grab(&thread->user_timer);
                        thread->vtimer_prof_save += timer_grab(&thread->system_timer);
+                       thread_unlock(thread);
+                       splx(x);
                }
                break;
 
        case TASK_VTIMER_RLIM:
                queue_iterate(&task->threads, thread, thread_t, task_threads) {
+                       x = splsched();
+                       thread_lock(thread);
                        thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
                        thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
+                       thread_unlock(thread);
+                       splx(x);
                }
                break;
        }
@@ -1853,8 +2126,13 @@ __unused
        switch (which) {
 
        case TASK_VTIMER_USER:
-               tdelt = (uint32_t)timer_delta(&thread->user_timer,
+               if (thread->precise_user_kernel_time) {
+                       tdelt = (uint32_t)timer_delta(&thread->user_timer,
+                                                               &thread->vtimer_user_save);
+               } else {
+                       tdelt = (uint32_t)timer_delta(&thread->system_timer,
                                                                &thread->vtimer_user_save);
+               }
                absolutetime_to_microtime(tdelt, &secs, microsecs);
                break;
 
@@ -2137,9 +2415,9 @@ task_findtid(task_t task, uint64_t tid)
 
        queue_iterate(&task->threads, thread, thread_t, task_threads) {
                        if (thread->thread_id == tid)
-                               break;
+                               return(thread);
        }
-       return(thread);
+       return(THREAD_NULL);
 }
 
 
index af0482aca50663d9fad807a7713af538928d0fd5..7d2c981ce121f40d3c0f1d22f862a42916e787a3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -155,39 +155,40 @@ The bit defns of the policy states
 
 /* Hardware disk access attributes, bit different as it should reflect IOPOL_XXX */
 #define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NONE       0x00
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS 0x01
 #define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL     0x01
 #define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_PASSIVE    0x02
 #define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE   0x03
-#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_DEFAULT    TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_UTILITY    0x04
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_DEFAULT    TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS
 
-/* Hardware disk access attributes */
+/* Hardware GPU access attributes */
 #define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NONE                0x00
-#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NORMAL      0x00
 #define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS  0x00
 #define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS    0x01
-#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT     0x00
+#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT     TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS
 
 /* Hardware Network access attributes */
 #define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NONE                0x00
-#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NORMAL      0x00
+#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_FULLACCESS  0x00
 #define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_THROTTLE    0x01
-#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_DEFAULT     0x00
+#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_DEFAULT     TASK_POLICY_HWACCESS_NET_ATTRIBUTE_FULLACCESS
 
 /* Hardware CPU access attributes */
 #define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_NONE                0x00
-#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_NORMAL      0x00
-#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_ALL         0x00
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_FULLACCESS  0x00
 #define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_ONE         0x01
 #define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_LLCACHE     0x02
-#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_DEFAULT     0x00
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_DEFAULT     TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_FULLACCESS
 
 /* Resource usage/low resource attributes */
 #define TASK_POLICY_RESOURCE_ATTRIBUTE_NONE            0x00
 #define TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE                0x01
 #define TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND                 0x02
 #define TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE       0x03
-#define TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY          0x04
-#define TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT         0x00
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ       0x04
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC      0x05
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT         TASK_POLICY_RESOURCE_ATTRIBUTE_NONE
 
 #endif /* XNU_KERNEL_PRIVATE */
 
@@ -211,10 +212,24 @@ typedef struct process_policy {
                  hw_bg:8;      /* Darwin Background Policy */
 } process_policy_t;
 
+#if CONFIG_EMBEDDED
+
+typedef struct task_watcher {
+       queue_chain_t   tw_links;               /* queueing of threads */
+       task_t          tw_task;        /* task that is being watched */
+       thread_t        tw_thread;      /* thread that is watching the watch_task */
+       int             tw_state;       /* the current app state of the thread */
+       int             tw_importance;  /* importance prior to backgrounding */
+} task_watch_t;
+
+extern lck_mtx_t task_watch_mtx;
+
+#endif /* CONFIG_EMBEDDED */
+
 #include <kern/thread.h>
 
 extern process_policy_t default_task_proc_policy;      /* init value for the process policy attributes */
-extern process_policy_t default_task_null_policy;              /* none as the value for the process policy attributes */
+extern process_policy_t default_task_null_policy;      /* none as the value for the process policy attributes */
 
 struct task {
        /* Synchronization/destruction information */
@@ -280,9 +295,8 @@ struct task {
        int             semaphores_owned;       /* number of semaphores owned */
        int             lock_sets_owned;        /* number of lock sets owned  */
 
-       /* Ledgers */
-       struct ipc_port *wired_ledger_port;
-       struct ipc_port *paged_ledger_port;
+       ledger_t        ledger;
+
        unsigned int    priv_flags;                     /* privilege resource flags */
 #define VM_BACKING_STORE_PRIV  0x1
 
@@ -295,12 +309,10 @@ struct task {
         integer_t messages_received;   /* messages received counter */
         integer_t syscalls_mach;       /* mach system call counter */
         integer_t syscalls_unix;       /* unix system call counter */
-       uint32_t  c_switch;                        /* total context switches */
-       uint32_t  p_switch;                        /* total processor switches */
-       uint32_t  ps_switch;               /* total pset switches */
+               uint32_t  c_switch;                        /* total context switches */
+               uint32_t  p_switch;                        /* total processor switches */
+               uint32_t  ps_switch;               /* total pset switches */
 
-       zinfo_usage_store_t tkm_private;/* private kmem alloc/free stats (reaped threads) */
-       zinfo_usage_store_t tkm_shared; /* shared kmem alloc/free stats (reaped threads) */
        zinfo_usage_t tkm_zinfo;        /* per-task, per-zone usage statistics */
 
 #ifdef  MACH_BSD 
@@ -328,14 +340,28 @@ struct task {
        uint32_t t_chud;                /* CHUD flags, used for Shark */
 #endif
 
-       process_policy_t ext_actionstate;       /* externally applied actions */
+       boolean_t pidsuspended; /* pid_suspend called; no threads can execute */
+       boolean_t frozen;       /* frozen; private resident pages committed to swap */
+       process_policy_t ext_appliedstate;      /* externally applied actions */
        process_policy_t ext_policystate;       /* externally defined process policy states*/
-       process_policy_t actionstate;           /* self applied acions */
+       process_policy_t appliedstate;          /* self applied acions */
        process_policy_t policystate;           /* process wide policy states */
-
-       uint64_t rsu_controldata[TASK_POLICY_RESOURCE_USAGE_COUNT];
+       uint8_t  rusage_cpu_flags;
+       uint8_t  rusage_cpu_percentage;         /* Task-wide CPU limit percentage */
+       uint64_t rusage_cpu_interval;           /* Task-wide CPU limit interval */
+       uint8_t  rusage_cpu_perthr_percentage;  /* Per-thread CPU limit percentage */
+       uint64_t rusage_cpu_perthr_interval;    /* Per-thread CPU limit interval */
+       uint64_t rusage_cpu_deadline;
+       thread_call_t rusage_cpu_callt;
+#if CONFIG_EMBEDDED
+       uint32_t        appstate;               /* the current appstate */
+       queue_head_t    task_watchers;          /* app state watcher threads */
+       int     num_taskwatchers;
+       int             watchapplying;
+#endif /* CONFIG_EMBEDDED */
 
        vm_extmod_statistics_data_t     extmod_statistics;
+       natural_t       proc_terminate; /* the process is marked for proc_terminate */
 };
 
 #define task_lock(task)                lck_mtx_lock(&(task)->lock)
@@ -404,10 +430,25 @@ __BEGIN_DECLS
 extern kern_return_t   task_hold(
                                                        task_t          task);
 
+/* Wait for task to stop running, either just to get off CPU or to cease being runnable */
+extern kern_return_t   task_wait(
+                                                       task_t          task,
+                                                       boolean_t       until_not_runnable);
+
 /* Release hold on all threads in a task */
 extern kern_return_t   task_release(
                                                        task_t          task);
 
+/* Suspends a task by placing a hold on its threads */
+extern kern_return_t    task_pidsuspend(
+                                                       task_t          task);
+extern kern_return_t    task_pidsuspend_locked(
+                                                       task_t          task);
+
+/* Resumes a previously paused task */
+extern kern_return_t    task_pidresume(
+                                                       task_t          task);
+
 #if CONFIG_FREEZE
 
 /* Freeze a task's resident pages */
@@ -417,6 +458,7 @@ extern kern_return_t        task_freeze(
                                                        uint32_t        *wired_count,
                                                        uint32_t        *clean_count,
                                                        uint32_t        *dirty_count,
+                                                       uint32_t        dirty_budget,
                                                        boolean_t       *shared,
                                                        boolean_t       walk_only);
 
@@ -505,6 +547,16 @@ extern kern_return_t machine_task_set_state(
                                        thread_state_t state, 
                                        mach_msg_type_number_t state_count);
 
+extern void machine_task_terminate(task_t task);
+
+struct _task_ledger_indices {
+       int cpu_time;
+       int tkm_private;
+       int tkm_shared;
+       int phys_mem;
+       int wired_mem;
+};
+extern struct _task_ledger_indices task_ledgers;
 
 int proc_get_task_bg_policy(task_t task);
 int proc_get_thread_bg_policy(task_t task, uint64_t tid);
@@ -513,9 +565,9 @@ int proc_get_selfthread_isbackground(void);
 
 int proc_get_darwinbgstate(task_t, uint32_t *);
 int proc_set_bgtaskpolicy(task_t task, int intval);
-int proc_set1_bgtaskpolicy(task_t task, int intval);
+int proc_set_and_apply_bgtaskpolicy(task_t task, int intval);
 int proc_set_bgthreadpolicy(task_t task, uint64_t tid, int val);
-int proc_set1_bgthreadpolicy(task_t task, uint64_t tid, int val);
+int proc_set_and_apply_bgthreadpolicy(task_t task, uint64_t tid, int val);
 
 int proc_add_bgtaskpolicy(task_t task, int val);
 int proc_add_bgthreadpolicy(task_t task, uint64_t tid, int val);
@@ -524,7 +576,6 @@ int proc_remove_bgthreadpolicy(task_t task, uint64_t tid, int val);
 
 int proc_apply_bgtaskpolicy(task_t task);
 int proc_apply_bgtaskpolicy_external(task_t task);
-int proc_apply_bgtaskpolicy_internal(task_t task);
 int proc_apply_bgthreadpolicy(task_t task, uint64_t tid);
 int proc_apply_bgtask_selfpolicy(void);
 int proc_apply_bgthread_selfpolicy(void);
@@ -534,6 +585,7 @@ int proc_restore_bgtaskpolicy(task_t task);
 int proc_restore_bgthreadpolicy(task_t task, uint64_t tid);
 int proc_restore_bgthread_selfpolicy(void);
 int proc_restore_workq_bgthreadpolicy(thread_t);
+void proc_task_remove_throttle(task_t task);
 
 /* hw access routines */
 int proc_apply_task_diskacc(task_t task, int policy);
@@ -541,6 +593,7 @@ int proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy);
 int proc_apply_thread_selfdiskacc(int policy);
 int proc_get_task_disacc(task_t task);
 int proc_get_task_selfdiskacc(void);
+int proc_get_diskacc(thread_t thread);
 int proc_get_thread_selfdiskacc(void);
 int proc_denyinherit_policy(task_t task);
 int proc_denyselfset_policy(task_t task);
@@ -550,21 +603,32 @@ int proc_apply_task_gpuacc(task_t task, int prio);
 
 int proc_get_task_ruse_cpu(task_t task, uint32_t * policyp, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep);
 int proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64_t interval, uint64_t deadline);
+int proc_clear_task_ruse_cpu(task_t task);
 thread_t task_findtid(task_t, uint64_t);
 
+#define TASK_RUSECPU_FLAGS_PROC_LIMIT  0x1
+#define TASK_RUSECPU_FLAGS_PERTHR_LIMIT        0x2
+#define TASK_RUSECPU_FLAGS_DEADLINE            0x4
+
 #define PROC_POLICY_OSX_APPTYPE_NONE           0
+#if CONFIG_EMBEDDED
+#define PROC_POLICY_IOS_RESV1_APPTYPE          1
+#define PROC_POLICY_IOS_APPLE_DAEMON           2
+#define PROC_POLICY_IOS_APPTYPE                        3
+#define PROC_POLICY_IOS_NONUITYPE              4
+#else
 #define PROC_POLICY_OSX_APPTYPE_TAL            1
 #define PROC_POLICY_OSX_APPTYPE_WIDGET         2
 #define PROC_POLICY_OSX_APPTYPE_DBCLIENT       2       /* Not a bug, just rename of widget */
-#define PROC_POLICY_IOS_APPTYPE                        3
-#define PROC_POLICY_IOS_NONUITYPE              4
+#endif
 
-void proc_set_task_apptype(task_t, int);
+void proc_set_task_apptype(task_t task, int type, thread_t thread);
 int proc_disable_task_apptype(task_t task, int policy_subtype);
 int proc_enable_task_apptype(task_t task, int policy_subtype);
 
-/* resource handle callback */
-int task_action_cpuusage(task_t);
+#if CONFIG_EMBEDDED
+extern int proc_setthread_saved_importance(thread_t thread, int importance);
+#endif
 
 /* BSD call back functions */
 extern int proc_apply_resource_actions(void * p, int type, int action);
@@ -574,6 +638,22 @@ extern int task_restore_resource_actions(task_t task, int type);
 extern void proc_apply_task_networkbg(void * bsd_info);
 extern void proc_restore_task_networkbg(void * bsd_info);
 extern void proc_set_task_networkbg(void * bsd_info, int setbg);
+extern int task_clear_cpuusage(task_t task);
+
+#if CONFIG_EMBEDDED
+#define TASK_APPSTATE_NONE              0
+#define TASK_APPSTATE_ACTIVE            1
+#define TASK_APPSTATE_BACKGROUND        2
+#define TASK_APPSTATE_NONUI             3
+#define TASK_APPSTATE_INACTIVE          4
+
+extern int proc_lf_getappstate(task_t task);
+extern int proc_lf_setappstate(task_t task, int state);
+extern int proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind);
+extern void thead_remove_taskwatch(thread_t thread);
+extern void task_removewatchers(task_t task);
+#endif /* CONFIG_EMBEDDED */
+
 #endif /* XNU_KERNEL_PRIVATE */
 
 #ifdef KERNEL_PRIVATE
@@ -581,6 +661,16 @@ extern void proc_set_task_networkbg(void * bsd_info, int setbg);
 extern void    *get_bsdtask_info(task_t);
 extern void    *get_bsdthreadtask_info(thread_t);
 extern vm_map_t get_task_map(task_t);
+extern ledger_t        get_task_ledger(task_t);
+
+extern boolean_t get_task_pidsuspended(task_t);
+extern boolean_t get_task_frozen(task_t);
+
+/* Convert from a task to a port */
+extern ipc_port_t convert_task_to_port(task_t);
+
+/* Convert from a task name to a port */
+extern ipc_port_t convert_task_name_to_port(task_name_t);
 
 #endif /* KERNEL_PRIVATE */
 
index e8f9bc628e102057b4e7be30c5a7c0bff069ca45..2cbb2daa2d0500c0bf9301e2ba11695f5b0a6303 100644 (file)
 #include <sys/errno.h>
 #include <sys/resource.h>
 #include <machine/limits.h>
+#include <kern/ledger.h>
+#include <kern/thread_call.h>
+#if CONFIG_EMBEDDED
+#include <kern/kalloc.h>
+#include <sys/errno.h>
+#endif /* CONFIG_EMBEDDED */
+#include <sys/kdebug.h>
+
+#if CONFIG_MEMORYSTATUS
+extern void memorystatus_on_suspend(int pid);
+extern void memorystatus_on_resume(int pid);
+#endif
 
-static int proc_apply_bgtaskpolicy_locked(task_t task, int, int);
-static int proc_restore_bgtaskpolicy_locked(task_t, int, int, int);
+static int proc_apply_bgtaskpolicy_internal(task_t, int, int);
+static int proc_restore_bgtaskpolicy_internal(task_t, int, int, int);
 static int task_get_cpuusage(task_t task, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep);
-static int task_set_cpuusage(task_t task, uint32_t percentage, uint64_t interval, uint64_t deadline);
+int task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t deadline, int scope);
+static int task_clear_cpuusage_locked(task_t task);
 static int task_apply_resource_actions(task_t task, int type);
+static void task_priority(task_t task, integer_t priority, integer_t max_priority);
+static kern_return_t task_role_default_handler(task_t task, task_role_t role);
+void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
 static int proc_apply_bgthreadpolicy_locked(thread_t thread, int selfset);
-static void restore_bgthreadpolicy_locked(thread_t thread, int selfset);
+static void restore_bgthreadpolicy_locked(thread_t thread, int selfset, int importance);
+static int proc_get_task_selfdiskacc_internal(task_t task, thread_t thread);
+extern void unthrottle_thread(void * uthread);
+
+#if CONFIG_EMBEDDED
+static void set_thread_appbg(thread_t thread, int setbg,int importance);
+static void apply_bgthreadpolicy_external(thread_t thread);
+static void add_taskwatch_locked(task_t task, task_watch_t * twp);
+static void remove_taskwatch_locked(task_t task, task_watch_t * twp);
+static void task_watch_lock(void);
+static void task_watch_unlock(void);
+static void apply_appstate_watchers(task_t task, int setbg);
+void proc_apply_task_networkbg_internal(void *, thread_t);
+void proc_restore_task_networkbg_internal(void *, thread_t);
+int proc_pid(void * proc);
+
+typedef struct thread_watchlist {
+       thread_t thread;        /* thread being worked on for taskwatch action */
+       int     importance;     /* importance to be restored if thread is being made active */
+} thread_watchlist_t;
+
+#endif /* CONFIG_EMBEDDED */
+
 
 process_policy_t default_task_proc_policy = {0,
                                             0,
@@ -54,8 +92,8 @@ process_policy_t default_task_proc_policy = {0,
                                            TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
                                            TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
                                            0,
-                                           TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_ALL,
-                                           TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NORMAL,
+                                           TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_FULLACCESS,
+                                           TASK_POLICY_HWACCESS_NET_ATTRIBUTE_FULLACCESS,
                                            TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS,
                                            TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL,
                                            TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL
@@ -79,11 +117,44 @@ process_policy_t default_task_null_policy = {0,
                                            };
                        
 
-static void
-task_priority(
-       task_t                  task,
-       integer_t               priority,
-       integer_t               max_priority);
+
+/*
+ * This routine should always be called with the task lock held.
+ * This routine handles Default operations for TASK_FOREGROUND_APPLICATION 
+ * and TASK_BACKGROUND_APPLICATION of task with no special app type.
+ */
+static kern_return_t
+task_role_default_handler(task_t task, task_role_t role)
+{
+       kern_return_t result = KERN_SUCCESS;
+
+       switch (task->role) {
+               case TASK_FOREGROUND_APPLICATION:
+               case TASK_BACKGROUND_APPLICATION:
+               case TASK_UNSPECIFIED:
+                       /* if there are no process wide backgrounding ... */
+                       if ((task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) &&
+                               (task->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)) {
+                                       task_priority(task,
+                                               ((role == TASK_FOREGROUND_APPLICATION)?
+                                               BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
+                                               task->max_priority);
+                       }
+                       task->role = role;
+                       break;
+
+               case TASK_CONTROL_APPLICATION:
+               case TASK_RENICED:
+                       /* else fail silently */
+                       break;
+
+               default:
+                       result = KERN_INVALID_ARGUMENT;
+                       break;
+       }
+       return(result);
+}
+
 
 kern_return_t
 task_policy_set(
@@ -115,99 +186,66 @@ task_policy_set(
 #endif
 
                task_lock(task);
-               if (    info->role == TASK_FOREGROUND_APPLICATION ||
-                               info->role == TASK_BACKGROUND_APPLICATION) {
+               switch(info->role) {
+                       case TASK_FOREGROUND_APPLICATION : {
+                               if (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) {
+                                       result = task_role_default_handler(task, info->role);
+                               } else {
+                                       switch (task->ext_appliedstate.apptype) {
 #if !CONFIG_EMBEDDED
-                       if (task->ext_actionstate.apptype != PROC_POLICY_OSX_APPTYPE_NONE) {
-                               switch (info->role) {
-                                       case TASK_FOREGROUND_APPLICATION:
-                                               switch (task->ext_actionstate.apptype) {
-                                                       case PROC_POLICY_OSX_APPTYPE_TAL:
-                                                               /* Move the app to foreground with no DarwinBG */
-                                                               proc_restore_bgtaskpolicy_locked(task, 1, 1, BASEPRI_FOREGROUND);
-                                                               bsdinfo = task->bsd_info;
-                                                               setbg = 0;
-                                                               break;
-
-                                                       case PROC_POLICY_OSX_APPTYPE_DBCLIENT: 
-                                                               /* reset the apptype so enforcement on background/foregound */
-                                                               task->ext_actionstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
-                                                               /* Internal application and make it foreground pri */
-                                                               proc_restore_bgtaskpolicy_locked(task, 1, 0, BASEPRI_FOREGROUND);
-                                                               bsdinfo = task->bsd_info;
-                                                               setbg = 0;
-                                                               break;
-
-                                                       default:
-                                                               /* the app types cannot be in CONTROL, GRAPHICS STATE, so it will de default state here */
-                                                               task_priority(task,
-                                                                       ((info->role == TASK_FOREGROUND_APPLICATION)?
-                                                                       BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
-                                                                       task->max_priority);
-                                                               break;
-                                       }
-                                       task->role = TASK_FOREGROUND_APPLICATION;
-                                       break;
-
-                                       case TASK_BACKGROUND_APPLICATION:
-                                               switch (task->ext_actionstate.apptype) {
-                                                       case PROC_POLICY_OSX_APPTYPE_TAL:
-                                                               /* TAL apps will get Darwin backgrounded if not already set */
-                                                               if (task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
-                                                                       /* external application of Darwin BG */
-                                                                       proc_apply_bgtaskpolicy_locked(task, 1, 1);
-                                                                       bsdinfo = task->bsd_info;
-                                                                       setbg = 1;
-                                                               }
-                                                               break;
-
-                                                       default:
-                                                               task_priority(task,
-                                                                       ((info->role == TASK_FOREGROUND_APPLICATION)?
-                                                                       BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
-                                                                       task->max_priority);
-                                                               break;
-                                               }
-                                               task->role = TASK_BACKGROUND_APPLICATION;
-                                               break;
-
-                                       default:
-                                               /* do nothing */
-                                               break;
-
-                               } /* switch info->role */
-                       } else   { /* apptype != PROC_POLICY_OSX_APPTYPE_NONE */
+                                               case PROC_POLICY_OSX_APPTYPE_TAL:
+                                                       /* Move the app to foreground with no DarwinBG */
+                                                       proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_FOREGROUND);
+                                                       bsdinfo = task->bsd_info;
+                                                       setbg = 0;
+                                                       break;
+
+                                               case PROC_POLICY_OSX_APPTYPE_DBCLIENT: 
+                                                       /* reset the apptype so enforcement on background/foregound */
+                                                       task->ext_appliedstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+                                                       /* Internal application and make it foreground pri */
+                                                       proc_restore_bgtaskpolicy_internal(task, 1, 0, BASEPRI_FOREGROUND);
+                                                       bsdinfo = task->bsd_info;
+                                                       setbg = 0;
+                                                       break;
 #endif /* !CONFIG_EMBEDDED */
-                       switch (task->role) {
-
-                       case TASK_FOREGROUND_APPLICATION:
-                       case TASK_BACKGROUND_APPLICATION:
-                       case TASK_UNSPECIFIED:
-                               /* if there are no process wide backgrounding ... */
-                               if ((task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) &&
-                                       (task->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)) {
-                                               task_priority(task,
-                                                       ((info->role == TASK_FOREGROUND_APPLICATION)?
-                                                       BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
-                                                       task->max_priority);
-                               }
-                               task->role = info->role;
-                               break;
 
-                       case TASK_CONTROL_APPLICATION:
-                       case TASK_RENICED:
-                               /* else fail silently */
-                               break;
+                                               default:
+                                               /* the app types cannot be in CONTROL, GRAPHICS STATE, so it will de default state here */
+                                                       task_priority(task, BASEPRI_FOREGROUND, task->max_priority);
+                                                       break;
 
-                       default:
-                               result = KERN_INVALID_ARGUMENT;
-                               break;
+                                       } /* switch (task->ext_appliedstate.apptype) */
+                                       task->role = TASK_FOREGROUND_APPLICATION;
+                               }
                        }
+                       break;
+
+                       case TASK_BACKGROUND_APPLICATION : {
+                               if (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) {
+                                       result = task_role_default_handler(task, info->role);
+                               } else  { /* apptype != PROC_POLICY_OSX_APPTYPE_NONE */
+                                       switch (task->ext_appliedstate.apptype) {
 #if !CONFIG_EMBEDDED
-               } /* apptype != PROC_POLICY_OSX_APPTYPE_NONE */
+                                               case PROC_POLICY_OSX_APPTYPE_TAL:
+                                                        /* TAL apps will get Darwin backgrounded if not already set */
+                                                       if (task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
+                                                               proc_apply_bgtaskpolicy_internal(task, 1, 1);
+                                                               bsdinfo = task->bsd_info;
+                                                               setbg = 1;
+                                                       }
+                                                       break;
 #endif /* !CONFIG_EMBEDDED */
+                                               default:
+                                                       task_priority(task, BASEPRI_BACKGROUND, task->max_priority);
+                                                       break;
+                                       } /* switch (task->ext_appliedstate.apptype) */
+                                       task->role = TASK_BACKGROUND_APPLICATION;
+                               }
+                       }
+                       break;
 
-               } else if (info->role == TASK_CONTROL_APPLICATION) {
+               case TASK_CONTROL_APPLICATION: 
                        if (task != current_task()||
                                        task->sec_token.val[0] != 0)
                                result = KERN_INVALID_ARGUMENT;
@@ -215,7 +253,9 @@ task_policy_set(
                                task_priority(task, BASEPRI_CONTROL, task->max_priority);
                                task->role = info->role;
                        }
-               } else if (info->role == TASK_GRAPHICS_SERVER) {
+                       break;
+
+               case TASK_GRAPHICS_SERVER:
                        if (task != current_task() ||
                                        task->sec_token.val[0] != 0)
                                result = KERN_INVALID_ARGUMENT;
@@ -223,24 +263,16 @@ task_policy_set(
                                task_priority(task, MAXPRI_RESERVED - 3, MAXPRI_RESERVED);
                                task->role = info->role;
                        }
-               } else
-#if CONFIG_EMBEDDED
-               if (info->role == TASK_THROTTLE_APPLICATION) {
-                       task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
-                       task->role = info->role;
-               } else if (info->role == TASK_DEFAULT_APPLICATION || info->role == TASK_NONUI_APPLICATION)
-               {
-                       task_priority(task, BASEPRI_DEFAULT, MAXPRI_USER);
-                       task->role = info->role;
-               } else
-#else /* CONFIG_EMBEDDED */
-               if (info->role == TASK_DEFAULT_APPLICATION)
-               {
+                       break;
+               case TASK_DEFAULT_APPLICATION:
                        task_priority(task, BASEPRI_DEFAULT, MAXPRI_USER);
                        task->role = info->role;
-               } else
-#endif /* CONFIG_EMBEDDED */
+                       break;
+
+               default :
                        result = KERN_INVALID_ARGUMENT;
+                       break;
+               } /* switch (info->role) */
 
                task_unlock(task);
 
@@ -378,18 +410,22 @@ proc_get_task_bg_policy(task_t task)
 int 
 proc_get_thread_bg_policy(task_t task, uint64_t tid)
 {
+       int selfset = 0;
        thread_t self = current_thread();
        thread_t thread = THREAD_NULL;
        int val = 0;
 
-       if (tid == self->thread_id)  {
-               val = self->policystate.hw_bg;
-       } else {
+       if (tid == self->thread_id)
+               selfset = 1;
+       
+       if (selfset == 0)  {
                task_lock(task);
                thread = task_findtid(task, tid);
                if (thread != NULL)
                        val = thread->ext_policystate.hw_bg;
                task_unlock(task);
+       } else {
+               val = self->policystate.hw_bg;
        }
 
        return(val);
@@ -401,10 +437,10 @@ proc_get_self_isbackground(void)
        task_t task = current_task();;
        thread_t thread = current_thread();
 
-       if ((task->ext_actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
-               (task->actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
-               (thread->ext_actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
-               (thread->actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE))
+       if ((task->ext_appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+               (task->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+               (thread->ext_appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+               (thread->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE))
                        return(1);
        else
                return(0);      
@@ -415,8 +451,8 @@ int proc_get_selfthread_isbackground(void)
 {
        thread_t thread = current_thread();
 
-       if ((thread->ext_actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
-               (thread->actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE))
+       if ((thread->ext_appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+               (thread->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE))
                        return(1);
        else
                return(0);      
@@ -447,17 +483,32 @@ proc_set_bgtaskpolicy(task_t task, int intval)
        return(0);
 }
 
-/* set and apply as well */
-int proc_set1_bgtaskpolicy(task_t task, int prio)
+/* set and apply as well , handles reset of NONUI due to setprio() task app state implmn side effect */
+int 
+proc_set_and_apply_bgtaskpolicy(task_t task, int prio)
 {
        int error = 0;
 
        if (prio == PRIO_DARWIN_BG) {
                error = proc_set_bgtaskpolicy(task, TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL);
-               if (error == 0)
+               if (error == 0) {
                        error = proc_apply_bgtaskpolicy(task);
+#if CONFIG_EMBEDDED
+                       /* XXX: till SB uses newer SPIs */
+                       apply_appstate_watchers(task, 1);
+#endif /* CONFIG_EMBEDDED */
+               }
        } else {
                error = proc_restore_bgtaskpolicy(task);
+               if (error == 0) {
+                       /* since prior impl of non UI was overloaded with bg state, need to reset */
+                       error = proc_apply_task_gpuacc(task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT);
+#if CONFIG_EMBEDDED
+                       /* XXX: till SB uses newer SPIs */
+                       apply_appstate_watchers(task, 0);
+#endif /* CONFIG_EMBEDDED */
+               }
+               
        }
 
        return(error);
@@ -467,19 +518,23 @@ int proc_set1_bgtaskpolicy(task_t task, int prio)
 int 
 proc_set_bgthreadpolicy(task_t task, uint64_t tid, int prio)
 {
+       int selfset = 0;
        thread_t self = current_thread();
        thread_t thread = THREAD_NULL;
        int reset;
 
        if (prio == 0)
                reset = 1;
+       if (tid == self->thread_id)
+               selfset = 1;
+
        task_lock(task);
-       if (tid == self->thread_id) {
-               self->policystate.hw_bg = prio;
-       } else {
+       if (selfset == 0)  {
                thread = task_findtid(task, tid);
                if (thread != NULL)
                        thread->ext_policystate.hw_bg = prio;
+       } else {
+               self->policystate.hw_bg = prio;
        }
                
        task_unlock(task);
@@ -488,7 +543,7 @@ proc_set_bgthreadpolicy(task_t task, uint64_t tid, int prio)
 }
 
 int 
-proc_set1_bgthreadpolicy(task_t task, uint64_t tid, int prio)
+proc_set_and_apply_bgthreadpolicy(task_t task, uint64_t tid, int prio)
 {
        int error = 0;
 
@@ -526,19 +581,23 @@ proc_add_bgtaskpolicy(task_t task, int val)
 int 
 proc_add_bgthreadpolicy(task_t task, uint64_t tid, int val)
 {
+       int selfset = 0;
        thread_t self = current_thread();
        thread_t thread = THREAD_NULL;
        int reset;
 
        if (val == 0)
                reset = 1;
+       if (tid == self->thread_id)
+               selfset = 1;
+
        task_lock(task);
-       if (tid == self->thread_id) {
-               self->policystate.hw_bg |= val;
-       } else {
+       if (selfset == 0)  {
                thread = task_findtid(task, tid);
                if (thread != NULL)
                        thread->ext_policystate.hw_bg |= val;
+       } else {
+               self->policystate.hw_bg |= val;
        }
                
        task_unlock(task);
@@ -569,19 +628,23 @@ proc_remove_bgtaskpolicy(task_t task, int intval)
 int 
 proc_remove_bgthreadpolicy(task_t task, uint64_t tid, int val)
 {
+       int selfset = 0;
        thread_t self = current_thread();
        thread_t thread = THREAD_NULL;
        int reset;
 
        if (val == 0)
                reset = 1;
+       if (tid == self->thread_id)
+               selfset = 1;
+
        task_lock(task);
-       if (tid == self->thread_id) {
-               self->policystate.hw_bg &= ~val;
-       } else {
+       if (selfset == 0)  {
                thread = task_findtid(task, tid);
                if (thread != NULL)
                        thread->ext_policystate.hw_bg &= ~val;
+       } else {
+               self->policystate.hw_bg &= ~val;
        }
                
        task_unlock(task);
@@ -602,50 +665,47 @@ proc_apply_bgtaskpolicy(task_t task)
 
        if (task == current_task())
                external = 0;
-
-       return(proc_apply_bgtaskpolicy_locked(task, 0, external));
+       return(proc_apply_bgtaskpolicy_internal(task, 0, external));
 }
 
-int
+int 
 proc_apply_bgtaskpolicy_external(task_t task)
 {
-       return(proc_apply_bgtaskpolicy_locked(task, 0, 1));
-
-}
-
-int
-proc_apply_bgtaskpolicy_internal(task_t task)
-{
-       return(proc_apply_bgtaskpolicy_locked(task, 0, 0));
+       return(proc_apply_bgtaskpolicy_internal(task, 0, 1));
 }
 
-
 static int
-proc_apply_bgtaskpolicy_locked(task_t task, int locked, int external)
+proc_apply_bgtaskpolicy_internal(task_t task, int locked, int external)
 {
+
        if (locked == 0)
                task_lock(task);
 
+       /* if the process is exiting, no action to be done */
+       if (task->proc_terminate != 0)
+               goto out;
+
        if (external != 0) {
                /* allready set? */
-               if (task->ext_actionstate.hw_bg != task->ext_policystate.hw_bg) {
-                       task->ext_actionstate.hw_bg = task->ext_policystate.hw_bg;
+               if (task->ext_appliedstate.hw_bg != task->ext_policystate.hw_bg) {
+                       task->ext_appliedstate.hw_bg = task->ext_policystate.hw_bg;
                        task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
                        /* background state applied */
                }
        } else {
-               if (task->actionstate.hw_bg != task->policystate.hw_bg) {
-                       task->actionstate.hw_bg = task->policystate.hw_bg;
+               if (task->appliedstate.hw_bg != task->policystate.hw_bg) {
+                       task->appliedstate.hw_bg = task->policystate.hw_bg;
                        task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
                }
        }
+out:
        if (locked == 0)
                task_unlock(task);
        return(0);
 }
 
-/* apply the self backgrounding even if the thread is not current thread/task(timer threads) */
-int
+/* apply the self backgrounding even if the thread is not current thread */
+int 
 proc_apply_workq_bgthreadpolicy(thread_t thread)
 {
        int error;
@@ -657,7 +717,7 @@ proc_apply_workq_bgthreadpolicy(thread_t thread)
                /* apply the background as selfset internal one */
                error = proc_apply_bgthreadpolicy_locked(thread, 1);
                task_unlock(wqtask);
-       } else  
+       } else
                error = ESRCH;
 
        return(error);
@@ -666,56 +726,66 @@ proc_apply_workq_bgthreadpolicy(thread_t thread)
 int 
 proc_apply_bgthreadpolicy(task_t task, uint64_t tid)
 {
+       int selfset = 0, error = 0;
        thread_t self = current_thread();
        thread_t thread = THREAD_NULL;
-       int selfset = 0, error = 0;
        task_t localtask = TASK_NULL;
 
        if (tid == self->thread_id) {
                selfset = 1;
                localtask = current_task();
-       } else {
+       } else
                localtask = task;
-       }
 
        task_lock(localtask);
-       if (selfset != 0) {
+       if (selfset != 0)  {
                thread = self;
        } else {
-               thread = task_findtid(task, tid);
+               thread = task_findtid(localtask, tid);
        }
 
        error = proc_apply_bgthreadpolicy_locked(thread, selfset);
-       task_unlock(localtask);
 
+       task_unlock(localtask);
+       
        return(error);
 }
 
-static int
+static int 
 proc_apply_bgthreadpolicy_locked(thread_t thread, int selfset)
 {
        int set = 0;
        thread_precedence_policy_data_t policy;
 
+
        if (thread != NULL) {
-               if (selfset != 0) {
+               /* if the process is exiting, no action to be done */
+               if (thread->task->proc_terminate != 0)
+                       goto out;
+
+               if (selfset != 0)  {
                        /* internal application */
-                       if (thread->actionstate.hw_bg != thread->policystate.hw_bg) {
-                               thread->actionstate.hw_bg = thread->policystate.hw_bg;
-                               if (thread->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) 
+                       if (thread->appliedstate.hw_bg != thread->policystate.hw_bg) {
+                               thread->appliedstate.hw_bg = thread->policystate.hw_bg;
+                               if (thread->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) 
                                        set = 1;
                
                        }
                } else {
                        /* external application */
-                       if (thread->ext_actionstate.hw_bg != thread->ext_policystate.hw_bg) {
-                               thread->ext_actionstate.hw_bg = thread->ext_policystate.hw_bg;
-                               if (thread->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
+                       if (thread->ext_appliedstate.hw_bg != thread->ext_policystate.hw_bg) {
+                               thread->ext_appliedstate.hw_bg = thread->ext_policystate.hw_bg;
+                               if (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
                                        set = 1;
                        }
                }
                        
                if (set != 0) {
+#if CONFIG_EMBEDDED
+               if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) {
+                       thread->saved_importance = thread->importance;
+               }
+#endif /* CONFIG_EMBEDDED */
                        /* set thread priority (we did not save previous value) */
                        policy.importance = INT_MIN;
                                
@@ -724,12 +794,45 @@ proc_apply_bgthreadpolicy_locked(thread_t thread, int selfset)
                                                    THREAD_PRECEDENCE_POLICY_COUNT );
 
                }
-       } else  
+       } else
                return(ESRCH);
-               
+
+out:
        return(0);
 }
 
+#if CONFIG_EMBEDDED
+/* set external application of background */
+static void 
+apply_bgthreadpolicy_external(thread_t thread)
+{
+int set = 0;
+thread_precedence_policy_data_t policy;
+
+       /* if the process is exiting, no action to be done */
+       if (thread->task->proc_terminate != 0)
+               return;
+
+       thread->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL;
+
+       if (thread->ext_appliedstate.hw_bg != thread->ext_policystate.hw_bg) {
+               thread->ext_appliedstate.hw_bg = thread->ext_policystate.hw_bg;
+               if (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
+                       set = 1;
+       }
+
+       if (set != 0) {
+               /* set thread priority (we did not save previous value) */
+               policy.importance = INT_MIN;
+
+               thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
+                                                   (thread_policy_t)&policy,
+                                                   THREAD_PRECEDENCE_POLICY_COUNT );
+       }
+
+}
+#endif /* CONFIG_EMBEDDED */
+
 int
 proc_apply_bgthread_selfpolicy(void)
 {
@@ -742,39 +845,41 @@ proc_restore_bgtaskpolicy(task_t task)
 {
        int external = 1;
 
-       if (current_task() == task) 
+       if (current_task() == task)
                external = 0;
-       return(proc_restore_bgtaskpolicy_locked(task, 0, external,  BASEPRI_DEFAULT));
+       return(proc_restore_bgtaskpolicy_internal(task, 0, external, BASEPRI_DEFAULT));
 }
 
 static int
-proc_restore_bgtaskpolicy_locked(task_t task, int locked, int external, int pri)
+proc_restore_bgtaskpolicy_internal(task_t task, int locked, int external, int pri)
 {
        if (locked == 0)
                task_lock(task);
 
+       /* if the process is exiting, no action to be done */
+       if (task->proc_terminate != 0)
+               goto out;
+
        if (external != 0) {
-               task->ext_actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+               task->ext_appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
                /* self BG in flight? */
-               if (task->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
+               if (task->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
                        task_priority(task, pri, MAXPRI_USER);
 #if CONFIG_EMBEDDED
-                       /* non embedded users need role for policy reapplication */
                        task->role = TASK_DEFAULT_APPLICATION;
 #endif /* CONFIG_EMBEDDED */
                }
         } else {
-               task->actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+               task->appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
                /* external BG in flight? */
-               if (task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
+               if (task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
                        task_priority(task, pri, MAXPRI_USER);
 #if CONFIG_EMBEDDED
-                       /* non embedded users need role for policy reapplication */
                        task->role = TASK_DEFAULT_APPLICATION;
 #endif /* CONFIG_EMBEDDED */
                }
        }
-
+out:
        if (locked == 0)
                task_unlock(task);
 
@@ -782,17 +887,25 @@ proc_restore_bgtaskpolicy_locked(task_t task, int locked, int external, int pri)
 }
 
 /* restore the self backgrounding even if the thread is not current thread */
-int
+int 
 proc_restore_workq_bgthreadpolicy(thread_t thread)
 {
        int error = 0;
        task_t wqtask = TASK_NULL;
+       int importance = 0;
 
        if (thread != THREAD_NULL) {
                wqtask = thread->task;
                task_lock(wqtask);
                /* remove the background and restore default importance as self(internal) removal */
-               restore_bgthreadpolicy_locked(thread, 1);
+#if CONFIG_EMBEDDED
+               if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) {
+                       /* restore prev set importnace */
+                       importance = thread->saved_importance;
+                       thread->saved_importance = 0;
+               }
+#endif /* CONFIG_EMBEDDED */
+               restore_bgthreadpolicy_locked(thread, 1, importance);
                task_unlock(wqtask);
        } else
                error = ESRCH;
@@ -800,31 +913,41 @@ proc_restore_workq_bgthreadpolicy(thread_t thread)
        return(error);
 }
 
-int proc_restore_bgthread_selfpolicy(void)
+int 
+proc_restore_bgthread_selfpolicy(void)
 {
        return(proc_restore_bgthreadpolicy(current_task(), thread_tid(current_thread())));
-
 }
 
-
 int 
 proc_restore_bgthreadpolicy(task_t task, uint64_t tid)
 {
+
        int selfset = 0;
        thread_t self = current_thread();
        thread_t thread = THREAD_NULL;
+       int importance = 0;
 
-       task_lock(task);
-       if (tid == self->thread_id) {
-               thread = self;
+       if (tid == self->thread_id)
                selfset = 1;
-       } else {
+
+       task_lock(task);
+       if (selfset == 0)  {
                thread = task_findtid(task, tid);
+       } else {
+               thread = self;
        }
 
-       if (thread != NULL)
-               restore_bgthreadpolicy_locked(thread, selfset);
-
+       if (thread != NULL) {
+#if CONFIG_EMBEDDED
+               if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) {
+                       /* restore prev set importnace */
+                       importance = thread->saved_importance;
+                       thread->saved_importance = 0;
+               }
+#endif /* CONFIG_EMBEDDED */
+               restore_bgthreadpolicy_locked(thread, selfset, importance);
+       }
        task_unlock(task);
 
        if (thread != NULL)
@@ -834,28 +957,32 @@ proc_restore_bgthreadpolicy(task_t task, uint64_t tid)
 }
 
 static void
-restore_bgthreadpolicy_locked(thread_t thread, int selfset)
+restore_bgthreadpolicy_locked(thread_t thread, int selfset, int importance)
 {
        thread_precedence_policy_data_t policy;
        int reset = 0;
 
        if (thread != NULL) {
-               if (selfset != 0) {
-                       thread->actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+               /* if the process is exiting, no action to be done */
+               if (thread->task->proc_terminate != 0)
+                       return;
+
+               if (selfset != 0)  {
+                       thread->appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
                        /* external BG in flight? */
-                       if (thread->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
+                       if (thread->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
                                        reset = 1;
                
                } else {
-                       thread->ext_actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+                       thread->ext_appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
                        /* self BG in flight? */
-                       if (thread->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
+                       if (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
                                        reset = 1;
                }
                        
                if (reset != 0) {
                        /* reset thread priority (we did not save previous value) */
-                       policy.importance = 0;
+                       policy.importance = importance;
                        thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
                                                    (thread_policy_t)&policy,
                                                    THREAD_PRECEDENCE_POLICY_COUNT );
@@ -864,23 +991,41 @@ restore_bgthreadpolicy_locked(thread_t thread, int selfset)
 }
 
 void 
-proc_set_task_apptype(task_t task, int type)
+#if CONFIG_EMBEDDED
+proc_set_task_apptype(task_t task, int type, thread_t thread)
+#else
+proc_set_task_apptype(task_t task, int type, __unused thread_t thread)
+#endif
 {
+#if CONFIG_EMBEDDED
+       thread_t th = THREAD_NULL;
+#endif /* CONFIG_EMBEDDED */
+
        switch (type) {
-               case PROC_POLICY_OSX_APPTYPE_TAL:
+#if CONFIG_EMBEDDED
+               case PROC_POLICY_IOS_RESV1_APPTYPE:
                        task->ext_policystate.apptype = type;
                        task->policystate.apptype = type;
                        proc_apply_bgtaskpolicy_external(task);
                        /* indicate that BG is set and next foreground needs to reset */
-                       task->ext_actionstate.apptype = type;
+                       task->ext_appliedstate.apptype = type;
                        break;
 
-               case PROC_POLICY_OSX_APPTYPE_DBCLIENT:
+               case PROC_POLICY_IOS_APPLE_DAEMON:
                        task->ext_policystate.apptype = type;
                        task->policystate.apptype = type;
-                       proc_apply_bgtaskpolicy_internal(task);
-                       /* indicate that BG is set and next foreground needs to reset */
-                       task->ext_actionstate.apptype = type;
+                       task->ext_appliedstate.apptype = type;
+                       /* posix spawn will already have thread created, so backround it */
+                       if (thread == NULL)
+                               th = current_thread();
+                       else
+                               th = thread;
+                       if (th->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL) {
+                               /* apply self backgrounding if not already set */
+                               task_lock(th->task);
+                               proc_apply_bgthreadpolicy_locked(th, 1);
+                               task_unlock(th->task);
+                       }
                        break;
        
                case PROC_POLICY_IOS_APPTYPE:
@@ -891,9 +1036,25 @@ proc_set_task_apptype(task_t task, int type)
                        task->ext_policystate.apptype = type;
                        task->policystate.apptype = type;
                        /* set to deny access to gpu */
-                       task->ext_actionstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                       task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
                        task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
                        break;
+#else /* CONFIG_EMBEDDED */
+               case PROC_POLICY_OSX_APPTYPE_TAL:
+                       task->ext_policystate.apptype = type;
+                       task->policystate.apptype = type;
+                       proc_apply_bgtaskpolicy_external(task);
+                       /* indicate that BG is set and next foreground needs to reset */
+                       task->ext_appliedstate.apptype = type;
+                       break;
+
+               case PROC_POLICY_OSX_APPTYPE_DBCLIENT:
+                       task->ext_policystate.apptype = type;
+                       task->policystate.apptype = type;
+                       proc_apply_bgtaskpolicy_internal(task, 0, 0);
+                       break;
+       
+#endif /* CONFIG_EMBEDDED */
 
                default:
                        break;
@@ -903,16 +1064,22 @@ proc_set_task_apptype(task_t task, int type)
 /* update the darwin backdground action state in the flags field for libproc */
 #define PROC_FLAG_DARWINBG      0x8000  /* process in darwin background */
 #define PROC_FLAG_EXT_DARWINBG  0x10000 /* process in darwin background - external enforcement */
+#define PROC_FLAG_IOS_APPLEDAEMON  0x20000 /* process is apple ios daemon */
 
 int
 proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
 {
-       if (task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){
+       if (task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){
                *flagsp |= PROC_FLAG_EXT_DARWINBG;
        }
-       if (task->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){
+       if (task->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){
                *flagsp |= PROC_FLAG_DARWINBG;
        }
+#if CONFIG_EMBEDDED
+       if (task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) {
+               *flagsp |= PROC_FLAG_IOS_APPLEDAEMON;
+       }
+#endif /* CONFIG_EMBEDDED */
                
        return(0);
 }
@@ -925,81 +1092,132 @@ proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
 int 
 proc_get_task_disacc(task_t task)
 {
-       if ((task->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+#if CONFIG_EMBEDDED
+       if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (task->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(task->ext_actionstate.hw_disk);
-       if ((task->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+#else /* CONFIG_EMBEDDED */
+       if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) {
+               /* if it is a TAL or DBClient and not self throttled, return Utility */
+               if ((task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_TAL) || (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) {
+                       /* any setting for DBG, we need to honor that */
+                       if ((task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE) &&
+                               ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE)!= 0) &&
+                               (task->appliedstate.hw_disk !=  TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE)) {
+                               return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_UTILITY);
+                       }  else
+                               return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+                } else 
+                       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+       }
+#endif /* CONFIG_EMBEDDED */
+       if (task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(task->ext_appliedstate.hw_disk);
+       if ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (task->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(task->actionstate.hw_disk);
-       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL);
+       if (task->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(task->appliedstate.hw_disk);
+       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS);
 }
 
 int
-proc_get_task_selfdiskacc(void)
+proc_get_task_selfdiskacc_internal(task_t task, thread_t thread)
 {
-       task_t task = current_task();
-       thread_t thread= current_thread();
-
+       /* if the task is marked for proc_terminate, no throttling for it */
+       if (task->proc_terminate != 0)
+               goto out;
        /* 
         * As per defined iopolicysys behavior, thread trumps task. 
         * Do we need to follow that for external enforcements of BG or hw access?
         * Status quo for now..
         */
-       if((thread->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+               
+       if((thread->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (thread->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(thread->ext_actionstate.hw_disk);
-       if((thread->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+       if (thread->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(thread->ext_appliedstate.hw_disk);
+       if((thread->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (thread->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(thread->actionstate.hw_disk);
+       if (thread->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(thread->appliedstate.hw_disk);
 
-       if ((task->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+#if CONFIG_EMBEDDED
+       if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (task->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(task->ext_actionstate.hw_disk);
-       if ((task->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+#else /* CONFIG_EMBEDDED */
+       if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) {
+               /* if it is a TAL or DBClient and not self throttled, return Utility */
+               if ((task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_TAL) || (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) {
+                       /* any setting for DBG, we need to honor that */
+                       if ((task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE) &&
+                               ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE)!= 0) &&
+                               (task->appliedstate.hw_disk !=  TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE)) {
+                               return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_UTILITY);
+                       }  else
+                               return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+                } else 
+                       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+       }
+#endif /* CONFIG_EMBEDDED */
+       if (task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(task->ext_appliedstate.hw_disk);
+       if ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (task->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(task->actionstate.hw_disk);
-       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL);
+       if (task->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(task->appliedstate.hw_disk);
+out:
+       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS);
+}
+
+
+int
+proc_get_task_selfdiskacc(void)
+{
+       return(proc_get_task_selfdiskacc_internal(current_task(), current_thread()));
+}
+
+
+int
+proc_get_diskacc(thread_t thread)
+{
+       return(proc_get_task_selfdiskacc_internal(thread->task, thread));
 }
 
+
 int
 proc_get_thread_selfdiskacc(void)
 {
        thread_t thread = current_thread();
 
-       if((thread->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+       if((thread->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (thread->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(thread->ext_actionstate.hw_disk);
-       if((thread->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+       if (thread->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(thread->ext_appliedstate.hw_disk);
+       if((thread->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
                return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
-       if (thread->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
-               return(thread->actionstate.hw_disk);
-       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL);
+       if (thread->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS)
+               return(thread->appliedstate.hw_disk);
+       return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS);
 }
 
-int proc_apply_task_diskacc(task_t task, int policy)
+int 
+proc_apply_task_diskacc(task_t task, int policy)
 {
        task_t self = current_task();
 
        task_lock(task);
        if (task ==  self) {
-               task->actionstate.hw_disk = policy;
+               task->appliedstate.hw_disk = policy;
                task->policystate.hw_disk = policy;
        } else {
-               task->ext_actionstate.hw_disk = policy;
+               task->ext_appliedstate.hw_disk = policy;
                task->ext_policystate.hw_disk = policy;
        }
        task_unlock(task);
        return(0);
 }
 
-int proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy)
+int 
+proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy)
 {
        thread_t thread;
 
@@ -1010,7 +1228,7 @@ int proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy)
                task_lock(task);
                thread = task_findtid(task, tid);
                if (thread != NULL) {
-                       thread->ext_actionstate.hw_disk = policy;
+                       thread->ext_appliedstate.hw_disk = policy;
                        thread->ext_policystate.hw_disk = policy;
                }
                task_unlock(task);
@@ -1021,22 +1239,66 @@ int proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy)
                return(0);
 }
 
-int
-proc_apply_thread_selfdiskacc(int policy)
+void
+proc_task_remove_throttle(task_t task)
 {
-       task_t task = current_task();
-       thread_t thread = current_thread();
+       thread_t        thread;
+       int importance = 0;
 
        task_lock(task);
-       thread->actionstate.hw_disk = policy;
-       thread->policystate.hw_disk = policy;
-       task_unlock(task);
-       return(0);
-}
 
-int 
-proc_denyinherit_policy(__unused task_t task)
-{
+
+       /* remove processwide internal DBG applicationn */
+       proc_restore_bgtaskpolicy_internal(task, 1, 0, BASEPRI_DEFAULT);
+       /* remove processwide external DBG applicationn */
+       proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT);
+
+       for (thread  = (thread_t)queue_first(&task->threads);
+                       !queue_end(&task->threads, (queue_entry_t)thread); ) {
+#if CONFIG_EMBEDDED
+               if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) {
+                       /* restore prev set importnace */
+                       importance = thread->saved_importance;
+                       thread->saved_importance = 0;
+               }
+#endif /* CONFIG_EMBEDDED */
+               /* remove thread level internal DBG application */
+               restore_bgthreadpolicy_locked(thread, 1, importance);
+               /* remove thread level external DBG application */
+               restore_bgthreadpolicy_locked(thread, 0, importance);
+               /* reset thread io policy */
+               thread->ext_appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS;
+               thread->appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS;
+               unthrottle_thread(thread->uthread);
+               thread = (thread_t)queue_next(&thread->task_threads);
+       }
+
+       /* reset task iopolicy */
+       task->ext_appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS;
+       task->appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS;
+       task->proc_terminate = 1;
+
+       task_unlock(task);
+}
+
+
+
+int
+proc_apply_thread_selfdiskacc(int policy)
+{
+       task_t task = current_task();
+       thread_t thread = current_thread();
+
+       task_lock(task);
+       thread->appliedstate.hw_disk = policy;
+       thread->policystate.hw_disk = policy;
+       task_unlock(task);
+       return(0);
+}
+
+int 
+proc_denyinherit_policy(__unused task_t task)
+{
        return(0);
 }
 
@@ -1051,18 +1313,28 @@ int
 proc_get_task_selfgpuacc_deny(void)
 {
        task_t task = current_task();
+#ifdef NOTYET
        thread_t thread = current_thread();
+#endif /* NOTYET */
 
-       if (((task->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->ext_actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+       if (((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->ext_appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
                return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
-       if (((task->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+       if (((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
                return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
-       if (((thread->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->ext_actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+#ifdef NOTYET
+       /* 
+        * Since background dispatch items run in a thread can also be
+        * denied access, we need to make sure there are no unintended
+        * consequences of background dispatch usage. So till this is 
+        * hashed out, disable thread level checking.
+        */
+       if (((thread->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->ext_appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
                return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
-       if (((thread->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+       if (((thread->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
                return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
 
-       return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NORMAL);
+#endif /* NOTYET */
+       return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS);
 }
 
 int
@@ -1073,10 +1345,10 @@ proc_apply_task_gpuacc(task_t task, int policy)
 
        task_lock(task);
        if (task ==  self) {
-               task->actionstate.hw_gpu = policy;
+               task->appliedstate.hw_gpu = policy;
                task->policystate.hw_gpu = policy;
        } else {
-               task->ext_actionstate.hw_gpu = policy;
+               task->ext_appliedstate.hw_gpu = policy;
                task->ext_policystate.hw_gpu = policy;
        }
        task_unlock(task);
@@ -1103,10 +1375,64 @@ proc_get_task_ruse_cpu(task_t task, uint32_t * policyp, uint32_t * percentagep,
        return(error);
 }
 
+/*
+ * Currently supported configurations for CPU limits.
+ *
+ *                                     Deadline-based CPU limit        Percentage-based CPU limit
+ * PROC_POLICY_RSRCACT_THROTTLE                ENOTSUP                         Task-wide scope only
+ * PROC_POLICY_RSRCACT_SUSPEND         Task-wide scope only            ENOTSUP
+ * PROC_POLICY_RSRCACT_TERMINATE       Task-wide scope only            ENOTSUP
+ * PROC_POLICY_RSRCACT_NOTIFY_KQ       Task-wide scope only            ENOTSUP
+ * PROC_POLICY_RSRCACT_NOTIFY_EXC      ENOTSUP                         Per-thread scope only
+ *
+ * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
+ * after the specified amount of wallclock time has elapsed.
+ *
+ * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
+ * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
+ * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
+ * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
+ *
+ * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
+ * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
+ * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
+ * but the potential consumer of the API at the time was insisting on wallclock time instead.
+ *
+ * Currently, requesting notification via an exception is the only way to get per-thread scope for a
+ * CPU limit. All other types of notifications force task-wide scope for the limit.
+ */
 int 
 proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64_t interval, uint64_t deadline)
 {
        int error = 0;
+       int scope;
+
+       /*
+        * Enforce the matrix of supported configurations for policy, percentage, and deadline.
+        */
+       switch (policy) {
+       // If no policy is explicitly given, the default is to throttle.
+       case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
+       case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
+               if (deadline != 0)
+                       return (ENOTSUP);
+               scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
+               break;
+       case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
+       case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
+       case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
+               if (percentage != 0)
+                       return (ENOTSUP);
+               scope = TASK_RUSECPU_FLAGS_DEADLINE;
+               break;
+       case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
+               if (deadline != 0)
+                       return (ENOTSUP);
+               scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
+               break;
+       default:
+               return (EINVAL);
+       }
 
        task_lock(task);
        if (task != current_task()) {
@@ -1114,11 +1440,47 @@ proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64
        } else {
                task->policystate.ru_cpu = policy;      
        }
-       error = task_set_cpuusage(task, percentage, interval, deadline);
+       error = task_set_cpuusage(task, percentage, interval, deadline, scope);
        task_unlock(task);
        return(error);
 }
 
+int 
+proc_clear_task_ruse_cpu(task_t task)
+{
+       int error = 0;
+       int action;
+       void * bsdinfo = NULL;
+
+       task_lock(task);
+       if (task != current_task()) {
+               task->ext_policystate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;  
+       } else {
+               task->policystate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;      
+       }
+
+       error = task_clear_cpuusage_locked(task);
+       if (error != 0)
+               goto out;       
+
+       action = task->ext_appliedstate.ru_cpu;
+       if (task->ext_appliedstate.ru_cpu != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+               /* reset action */
+               task->ext_appliedstate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
+       }
+       if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+               bsdinfo = task->bsd_info;
+               task_unlock(task);
+               proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
+               goto out1;
+       }
+
+out:
+       task_unlock(task);
+out1:
+       return(error);
+
+}
 
 /* used to apply resource limit related actions */
 static int
@@ -1144,11 +1506,14 @@ task_apply_resource_actions(task_t task, int type)
        /* only cpu actions for now */
        task_lock(task);
        
-       if (task->ext_actionstate.ru_cpu == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+       if (task->ext_appliedstate.ru_cpu == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
                /* apply action */
-               task->ext_actionstate.ru_cpu = task->ext_policystate.ru_cpu;
-               action = task->ext_actionstate.ru_cpu;
+               task->ext_appliedstate.ru_cpu = task->ext_policystate.ru_cpu;
+               action = task->ext_appliedstate.ru_cpu;
+       } else {
+               action = task->ext_appliedstate.ru_cpu;
        }
+
        if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
                bsdinfo = task->bsd_info;
                task_unlock(task);
@@ -1159,76 +1524,660 @@ task_apply_resource_actions(task_t task, int type)
        return(0);
 }
 
+/* For ledger hookups */
+static int
+task_get_cpuusage(task_t task, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep)
+{
+       *percentagep = task->rusage_cpu_percentage;
+       *intervalp = task->rusage_cpu_interval;
+       *deadlinep = task->rusage_cpu_deadline;
+
+       return(0);
+}
+
 int
-task_restore_resource_actions(task_t task, int type)
+task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t deadline, int scope)
 {
-       int action;
-       void * bsdinfo = NULL;
-       
-       switch (type) {
-               case TASK_POLICY_CPU_RESOURCE_USAGE:
-                       break;
-               case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
-               case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
-               case TASK_POLICY_DISK_RESOURCE_USAGE:
-               case TASK_POLICY_NETWORK_RESOURCE_USAGE:
-               case TASK_POLICY_POWER_RESOURCE_USAGE:
-                       return(0);
+       uint64_t abstime = 0;
+       uint64_t save_abstime = 0;
+       uint64_t limittime = 0;
+       thread_t thread;
 
-               default:
-                       return(1);
-       };
+       lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
+
+       /* By default, refill once per second */
+       if (interval == 0)
+               interval = NSEC_PER_SEC;
+
+       if (percentage != 0) {
+               if (percentage > 100)
+                       percentage = 100;
+               limittime = (interval * percentage)/ 100;
+               nanoseconds_to_absolutetime(limittime, &abstime);
+               if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
+                       /*
+                        * A per-thread CPU limit on a task generates an exception
+                        * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
+                        * exceeds the limit.
+                        */
+                       task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
+                       task->rusage_cpu_perthr_percentage = percentage;
+                       task->rusage_cpu_perthr_interval = interval;
+                       queue_iterate(&task->threads, thread, thread_t, task_threads) {
+                               set_astledger(thread);
+                       }
+               } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
+                       /*
+                        * Currently, a proc-wide CPU limit always blocks if the limit is
+                        * exceeded (LEDGER_ACTION_BLOCK).
+                        */
+                       task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
+                       task->rusage_cpu_percentage = percentage;
+                       task->rusage_cpu_interval = interval;
+
+                       ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime);
+                       ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
+                       ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
+               }
+       }
 
-       /* only cpu actions for now */
-       task_lock(task);
-       
-       action = task->ext_actionstate.ru_cpu;
-       if (task->ext_actionstate.ru_cpu != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
-               /* reset action */
-               task->ext_actionstate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
+       if (deadline != 0) {
+               assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
+
+               /* if already in use, cancel and wait for it to cleanout */
+               if (task->rusage_cpu_callt != NULL) {
+                       task_unlock(task);
+                       thread_call_cancel_wait(task->rusage_cpu_callt);
+                       task_lock(task);
+               }
+               if (task->rusage_cpu_callt == NULL) {
+                       task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
+               }
+               /* setup callout */
+               if (task->rusage_cpu_callt != 0) {
+                       task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
+                       task->rusage_cpu_deadline = deadline;
+
+                       nanoseconds_to_absolutetime(deadline, &abstime);
+                       save_abstime = abstime;
+                       clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
+                       thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
+               }
        }
-       if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
-               bsdinfo = task->bsd_info;
-               task_unlock(task);
-               proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
-       } else
-               task_unlock(task);
 
        return(0);
-
 }
 
-/* For ledger hookups */
-static int
-task_get_cpuusage(__unused task_t task, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep)
+int
+task_clear_cpuusage(task_t task)
 {
-       *percentagep = 0;
-       *intervalp = 0;
-       *deadlinep = 0;
+       int retval = 0;
 
-       return(0);
+       task_lock(task);
+       retval = task_clear_cpuusage_locked(task);
+       task_unlock(task);
+
+       return(retval);
 }
 
-static int
-task_set_cpuusage(__unused task_t task, __unused uint32_t percentage, __unused uint64_t interval, __unused uint64_t deadline)
+int
+task_clear_cpuusage_locked(task_t task)
 {
+       thread_call_t savecallt;
+       thread_t thread;
+
+       /* cancel percentage handling if set */
+       if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
+               task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;               
+               ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY);
+               task->rusage_cpu_percentage = 0;
+               task->rusage_cpu_interval = 0;
+       }
+
+       if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
+               task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PERTHR_LIMIT;             
+               queue_iterate(&task->threads, thread, thread_t, task_threads) {
+                       set_astledger(thread);
+               }
+               task->rusage_cpu_perthr_percentage = 0;
+               task->rusage_cpu_perthr_interval = 0;
+
+       }
+
+       /* cancel deadline handling if set */
+       if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
+               task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
+               if (task->rusage_cpu_callt != 0) {
+                       savecallt = task->rusage_cpu_callt;
+                       task->rusage_cpu_callt = NULL;
+                       task->rusage_cpu_deadline = 0;
+                       task_unlock(task);
+                       thread_call_cancel_wait(savecallt);
+                       thread_call_free(savecallt);
+                       task_lock(task);
+               }
+       }
        return(0);
 }
 
 /* called by ledger unit to enforce action due to  resource usage criteria being met */
+void
+task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
+{
+       task_t task = (task_t)param0;
+       (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
+       return;
+}
+
+#if CONFIG_EMBEDDED
+/* return the appstate of a task */
 int
-task_action_cpuusage(task_t task)
+proc_lf_getappstate(task_t task)
 {
-       return(task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE));
+       return(task->appstate);
+
 }
 
+
+/* set appstate of a task and apply approp actions */
+int 
+proc_lf_setappstate(task_t task, int state)
+{
+       int ret = 0, oldstate;
+       kern_return_t kret = KERN_SUCCESS;
+       int applywatch = 0, setbg = 0, setnetbg = 0;
+       int sethib_suspend = 0, sethib_resume=0;
+
+       if (state == TASK_APPSTATE_NONE)
+               goto out;
+
+       /* valid states? */
+       switch (state) {
+               case TASK_APPSTATE_ACTIVE:
+               case TASK_APPSTATE_BACKGROUND:
+               case TASK_APPSTATE_NONUI:
+               case TASK_APPSTATE_INACTIVE:
+                       break;
+               default:
+                       ret = EINVAL;
+                       goto out;
+
+       }
+
+       task_lock(task);
+       oldstate = task->appstate;
+       if (oldstate == state) {
+               /* no changes */
+               goto out1;
+       }
+
+       switch(oldstate) {
+               case TASK_APPSTATE_ACTIVE:
+                       switch(state) {
+                               case TASK_APPSTATE_BACKGROUND:
+                                       /* moving from active to  app background */
+                                       task->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL;
+                                       proc_apply_bgtaskpolicy_internal(task, 1, 1);
+                                       /* watchers need update */
+                                       applywatch = 1;
+                                       setbg = 1;
+                                       /* set network part */
+                                       setnetbg = 1;
+                                       break;
+
+                               case TASK_APPSTATE_NONUI:
+                                       /* set no graphics */
+                                       task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                                       task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                                       break;
+
+                               case TASK_APPSTATE_INACTIVE:
+                                       /* suspend the process */
+                                       kret = task_pidsuspend_locked(task);
+                                       if (kret != KERN_SUCCESS)
+                                               ret = EINVAL;
+                                       else
+                                               sethib_suspend = 1;
+                                               
+                                       break;
+                       }
+                       break;
+
+               case TASK_APPSTATE_BACKGROUND:
+                       switch(state) {
+                               /* watchers need update */
+                               applywatch = 1;
+                               setbg = 0;
+                               /* set network part */
+                               setnetbg = 1;
+                               case TASK_APPSTATE_ACTIVE:
+                                       /* remove app background */
+                                       ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT);
+                                       /* going from BG to active */
+                                       break;
+
+                               case TASK_APPSTATE_NONUI:
+                                       /* remove app background + no graphics */
+                                       task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                                       task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                                       ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT);
+                                       break;
+
+                               case TASK_APPSTATE_INACTIVE:
+                                       /* suspend and then remove app background */
+                                       kret = task_pidsuspend_locked(task);
+                                       if (kret != KERN_SUCCESS) {
+                                               ret = EINVAL;
+                                       } else {
+                                               ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT);
+                                               sethib_suspend = 1;
+                                       }
+                               
+                                       break;
+
+                       }
+                       break;
+
+               case TASK_APPSTATE_NONUI:
+                       switch(state) {
+                               case TASK_APPSTATE_ACTIVE:
+                                       /* restore graphics access */
+                                       task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                                       task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT;
+                                       break;
+
+                               case TASK_APPSTATE_BACKGROUND:
+                                       /* set app background */
+                                       task->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL;
+                       
+                                       ret = proc_apply_bgtaskpolicy_internal(task, 1, 1);
+                                       if (ret == 0) {
+                                               task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT;
+                                               task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT;
+                                       }
+                                       /* watchers need update */
+                                       applywatch = 1;
+                                       setbg = 1;
+                                       /* set network part */
+                                       setnetbg = 1;
+                                       break;
+
+                               case TASK_APPSTATE_INACTIVE:
+                                       /* suspend & restore graphics access */
+                                       kret = task_pidsuspend_locked(task);
+                                       if (kret != KERN_SUCCESS) {
+                                               ret = EINVAL;
+                                       } else {
+                                               ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT);
+                                               task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT;
+                                               task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT;
+                                               sethib_suspend = 1;
+                                       }
+                                       break;
+                       }
+                       break;
+
+               case TASK_APPSTATE_INACTIVE:
+                       switch(state) {
+                               case TASK_APPSTATE_ACTIVE:
+                                       /* resume process */
+                                       /* going from inactive to active */
+                                       break;
+
+                               case TASK_APPSTATE_BACKGROUND:
+                                       task->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL;
+                                       ret = proc_apply_bgtaskpolicy_internal(task, 1, 1);
+                                       /* put in app background & resume process */
+                                       /* watchers need update */
+                                       applywatch = 1;
+                                       setbg = 1;
+                                       /* set network part */
+                                       setnetbg = 1;
+                                       break;
+
+                               case TASK_APPSTATE_NONUI:
+                                       /* remove graphics access and resume */
+                                       task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                                       task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+                                       break;
+                       }
+                       /* pidresume does drop task lock,so no need to have locked version */
+                       task_unlock(task);
+                       kret = task_pidresume(task);
+                       task_lock(task);
+                       sethib_resume = 1;
+                       break;
+       }
+       /* set the new app state on the task */
+       task->appstate = state;
+out1:
+       task_unlock(task);
+       if (setnetbg != 0) {
+               /* apply network background */
+               if (setbg != 0)
+                       proc_apply_task_networkbg_internal(task->bsd_info, NULL);
+               else
+                       proc_restore_task_networkbg_internal(task->bsd_info, NULL);
+       }
+#if CONFIG_MEMORYSTATUS
+       if (sethib_suspend != 0)
+                       memorystatus_on_suspend(proc_pid(task->bsd_info));
+       if (sethib_resume != 0)
+                       memorystatus_on_resume(proc_pid(task->bsd_info));
+#endif /* CONFIG_MEMORYSTATUS */
+       /* if watchers need update, safe point to do that */
+       if (applywatch != 0)
+               apply_appstate_watchers(task, setbg);
+
+out:
+       return(ret);
+}
+
+static void
+task_watch_lock(void)
+{
+       lck_mtx_lock(&task_watch_mtx);
+}
+
+static void
+task_watch_unlock(void)
+{
+       lck_mtx_unlock(&task_watch_mtx);
+}
+
+static void
+add_taskwatch_locked(task_t task, task_watch_t * twp)
+{
+       queue_enter(&task->task_watchers, twp, task_watch_t *, tw_links);
+       task->num_taskwatchers++;
+
+}
+
+static void
+remove_taskwatch_locked(task_t task, task_watch_t * twp)
+{
+       queue_remove(&task->task_watchers, twp, task_watch_t *, tw_links);
+       task->num_taskwatchers--;
+}
+
+
+int 
+proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind)
+{
+       thread_t self = current_thread();
+       thread_t target_thread = NULL;
+       int selfset = 0, ret = 0, setbg = 0;
+       task_watch_t *twp = NULL;
+       task_t task = TASK_NULL;
+
+
+       if ((tid == 0) || (tid == self->thread_id)) {
+               selfset = 1;
+               target_thread = self;
+               thread_reference(target_thread);
+       } else {
+               task_lock(curtask);
+               target_thread = task_findtid(curtask, tid);
+               if (target_thread != NULL)
+                       thread_reference(target_thread);
+               else {
+                       ret = ESRCH;
+                       goto out;
+               }
+                       
+               task_unlock(curtask);
+       }
+       
+       if (bind != 0) {
+               /* task is still active ? */
+               task_lock(target_task);
+               if (target_task->active == 0) {
+                       task_unlock(target_task);
+                       ret = ESRCH;
+                       goto out;
+               }
+               task_unlock(target_task);
+
+               twp = (task_watch_t *)kalloc(sizeof(task_watch_t));
+               if (twp == NULL) {
+                       task_watch_unlock();
+                       ret = ENOMEM;
+                       goto out;
+               }
+
+               bzero(twp, sizeof(task_watch_t));
+
+               task_watch_lock();
+
+               if (target_thread->taskwatch != NULL){
+                       /* already bound to another task */
+                       task_watch_unlock();
+
+                       kfree(twp, sizeof(task_watch_t));
+                       ret = EBUSY;
+                       goto out;
+               }
+
+               task_reference(target_task);
+
+               twp->tw_task = target_task;             /* holds the task reference */
+               twp->tw_thread = target_thread;         /* holds the thread reference */
+               twp->tw_state = target_task->appstate;
+               twp->tw_importance = target_thread->importance;
+       
+               add_taskwatch_locked(target_task, twp);
+
+               target_thread->taskwatch = twp;
+
+               if (target_task->appstate == TASK_APPSTATE_BACKGROUND)
+                       setbg = 1;
+
+               task_watch_unlock();
+
+               if (setbg != 0) {
+                       set_thread_appbg(target_thread, setbg, INT_MIN);
+               }
+
+               /* retain the thread reference as it is in twp */
+               target_thread = NULL;
+       } else {
+               /* unbind */            
+               task_watch_lock();
+               if ((twp = target_thread->taskwatch) != NULL) {
+                       task = twp->tw_task;
+                       target_thread->taskwatch = NULL;
+                       remove_taskwatch_locked(task, twp);
+
+                       task_watch_unlock();
+
+                       task_deallocate(task);                  /* drop task ref in twp */
+                       set_thread_appbg(target_thread, 0, twp->tw_importance);
+                       thread_deallocate(target_thread);       /* drop thread ref in twp */
+                       kfree(twp, sizeof(task_watch_t));
+               } else {
+                       task_watch_unlock();
+                       ret = 0;                /* return success if it not alredy bound */
+                       goto out;
+               }
+       }
+out:
+       if (target_thread != NULL)
+               thread_deallocate(target_thread);       /* drop thread ref acquired in this routine */
+       return(ret);
+}
+
+static void
+set_thread_appbg(thread_t thread, int setbg,int importance)
+{
+       /* TBD: ensure the proc for network is fine */
+       if (setbg == 0) {
+               restore_bgthreadpolicy_locked(thread, 0, importance);
+               proc_restore_task_networkbg_internal(thread->task->bsd_info, thread);
+        } else {
+               apply_bgthreadpolicy_external(thread);
+               proc_apply_task_networkbg_internal(thread->task->bsd_info, thread);
+       }
+}
+
+static void
+apply_appstate_watchers(task_t task, int setbg)
+{
+       int numwatchers = 0, i, j;
+       thread_watchlist_t * threadlist;
+       task_watch_t * twp;
+
+retry:
+       /* if no watchers on the list return */
+       if ((numwatchers = task->num_taskwatchers) == 0)
+               return;
+
+       threadlist = (thread_watchlist_t *)kalloc(numwatchers*sizeof(thread_watchlist_t));
+       if (threadlist == NULL)
+               return;
+
+       bzero(threadlist, numwatchers*sizeof(thread_watchlist_t));
+
+       task_watch_lock();
+       /*serialize application of app state changes */
+       if (task->watchapplying != 0) {
+               lck_mtx_sleep(&task_watch_mtx, LCK_SLEEP_DEFAULT, &task->watchapplying, THREAD_UNINT);
+               task_watch_unlock();
+               kfree(threadlist, numwatchers*sizeof(thread_watchlist_t));
+               goto retry;
+       }
+
+       if (numwatchers != task->num_taskwatchers) {
+               task_watch_unlock();
+               kfree(threadlist, numwatchers*sizeof(thread_watchlist_t));
+               goto retry;
+       }
+       
+       task->watchapplying = 1;
+       i = 0;
+       queue_iterate(&task->task_watchers, twp, task_watch_t *, tw_links) {
+
+               threadlist[i].thread = twp->tw_thread;
+               thread_reference(threadlist[i].thread);
+               if (setbg != 0) {
+                       twp->tw_importance = twp->tw_thread->importance;
+                       threadlist[i].importance = INT_MIN;
+               } else
+                       threadlist[i].importance = twp->tw_importance;
+               i++;
+               if (i > numwatchers)
+                       break;
+       }
+       task_watch_unlock();
+
+       for (j = 0; j< i; j++) {
+               set_thread_appbg(threadlist[j].thread, setbg, threadlist[j].importance);
+               thread_deallocate(threadlist[j].thread);
+       }
+       kfree(threadlist, numwatchers*sizeof(thread_watchlist_t));
+
+
+       task_watch_lock();
+       task->watchapplying = 0;
+       thread_wakeup_one(&task->watchapplying);
+       task_watch_unlock();
+}
+
+void
+thead_remove_taskwatch(thread_t thread)
+{
+       task_watch_t * twp;
+       int importance = 0;
+
+       task_watch_lock();
+       if ((twp = thread->taskwatch) != NULL) {
+               thread->taskwatch = NULL;
+               remove_taskwatch_locked(twp->tw_task, twp);
+       }
+       task_watch_unlock();
+       if (twp != NULL) {
+               thread_deallocate(twp->tw_thread);
+               task_deallocate(twp->tw_task);
+               importance = twp->tw_importance;
+               kfree(twp, sizeof(task_watch_t));
+               /* remove the thread and networkbg */
+               set_thread_appbg(thread, 0, importance);
+       }
+}
+
+void
+task_removewatchers(task_t task)
+{
+       int numwatchers = 0, i, j;
+       task_watch_t ** twplist = NULL;
+       task_watch_t * twp = NULL;
+
+retry:
+       if ((numwatchers = task->num_taskwatchers) == 0)
+               return;
+
+       twplist = (task_watch_t **)kalloc(numwatchers*sizeof(task_watch_t *));
+       if (twplist == NULL)
+               return;
+
+       bzero(twplist, numwatchers*sizeof(task_watch_t *));
+
+       task_watch_lock();
+       if (task->num_taskwatchers == 0) {
+               task_watch_unlock();
+               goto out;
+       }
+
+       if (numwatchers != task->num_taskwatchers) {
+               task_watch_unlock();
+               kfree(twplist, numwatchers*sizeof(task_watch_t *));
+               numwatchers = 0;
+               goto retry;
+       }
+       
+       i = 0;
+       while((twp = (task_watch_t *)dequeue_head(&task->task_watchers)) != NULL)
+       {
+               twplist[i] = twp;
+               task->num_taskwatchers--;       
+
+               /* 
+                * Since the linkage is removed and thead state cleanup is already set up,
+                * remove the refernce from the thread.
+                */
+               twp->tw_thread->taskwatch = NULL;       /* removed linkage, clear thread holding ref */
+               i++;
+               if ((task->num_taskwatchers == 0) || (i > numwatchers))
+                       break;
+       }
+
+       task_watch_unlock();
+
+       for (j = 0; j< i; j++) {
+               
+               twp = twplist[j];
+               /* remove thread and network bg */
+               set_thread_appbg(twp->tw_thread, 0, twp->tw_importance);
+               thread_deallocate(twp->tw_thread);
+               task_deallocate(twp->tw_task);
+               kfree(twp, sizeof(task_watch_t));
+       }
+
+out:
+       kfree(twplist, numwatchers*sizeof(task_watch_t *));
+
+}
+#endif /* CONFIG_EMBEDDED */
+
+
 int 
 proc_disable_task_apptype(task_t task, int policy_subtype)
 {
        void * bsdinfo = NULL;
-       int setbg = 0;
        int ret = 0;
+       int setbg = 0;
+#if !CONFIG_EMBEDDED
        int maxpri = BASEPRI_DEFAULT;
+#endif /* !CONFIG_EMBEDDED */
 
        task_lock(task);
 
@@ -1248,34 +2197,41 @@ proc_disable_task_apptype(task_t task, int policy_subtype)
                default:
                        maxpri = BASEPRI_DEFAULT;
        }
-#endif
-                       
-       if (task->ext_actionstate.apptype != PROC_POLICY_OSX_APPTYPE_NONE) {
-                       switch (task->ext_actionstate.apptype) {
+
+
+#endif /* !CONFIG_EMBEDDED */
+
+       /* TAL apps are cleared with BG handling on first foreground application */
+       if (task->ext_appliedstate.apptype != PROC_POLICY_OSX_APPTYPE_NONE) {
+                       switch (task->ext_appliedstate.apptype) {
+#if !CONFIG_EMBEDDED
                                case PROC_POLICY_OSX_APPTYPE_TAL:
                                        /* disable foreground/background handling */
-                                       task->ext_actionstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+                                       task->ext_appliedstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
                                        /* external BG application removal */
-                                       proc_restore_bgtaskpolicy_locked(task, 1, 1, maxpri);
+                                       proc_restore_bgtaskpolicy_internal(task, 1, 1, maxpri);
                                        bsdinfo = task->bsd_info;
                                        setbg = 0;
                                        break;
 
                                case PROC_POLICY_OSX_APPTYPE_DBCLIENT:
                                        /* disable foreground/background handling */
-                                       task->ext_actionstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+                                       task->ext_appliedstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
                                        /* internal BG application removal */
-                                       proc_restore_bgtaskpolicy_locked(task, 1, 0, maxpri);
+                                       proc_restore_bgtaskpolicy_internal(task, 1, 0, maxpri);
                                        bsdinfo = task->bsd_info;
                                        setbg = 0;
                                        break;
 
+#endif /* !CONFIG_EMBEDDED */
                                default:
                                        ret = EINVAL;
                                        break;
                        }
-       } else
+
+       } else {
                ret = EINVAL;
+       }
 
 out:
        task_unlock(task);
@@ -1300,20 +2256,22 @@ proc_enable_task_apptype(task_t task, int policy_subtype)
                goto out;
        }
 
-       if (task->ext_actionstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) {
+       if (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) {
                switch (task->ext_policystate.apptype) {
+#if !CONFIG_EMBEDDED
                        case PROC_POLICY_OSX_APPTYPE_TAL:
                                 /* TAL policy is activated again */
-                               task->ext_actionstate.apptype = task->ext_policystate.apptype;
+                               task->ext_appliedstate.apptype = task->ext_policystate.apptype;
                                if (task->role == TASK_BACKGROUND_APPLICATION) {
                                        if (task->role == TASK_BACKGROUND_APPLICATION) {
-                                               proc_apply_bgtaskpolicy_locked(task, 1, 1);
+                                               proc_apply_bgtaskpolicy_internal(task, 1, 1);
                                                bsdinfo = task->bsd_info;
                                                setbg = 1;
                                        }
                                }
                                ret = 0;
                                break;
+#endif /* !CONFIG_EMBEDDED */
                        default:
                                ret = EINVAL;
                }
@@ -1329,3 +2287,18 @@ out:
        return(ret);
 }
 
+#if CONFIG_EMBEDDED
+int
+proc_setthread_saved_importance(thread_t thread, int importance)
+{
+       if ((thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON)  &&
+               (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL))
+       {
+               /* the thread is still backgrounded , save the importance for restore time */
+               thread->saved_importance = importance;
+
+               return(1);
+       } else
+               return(0);
+}
+#endif /* CONFIG_EMBEDDED */
index 3c3e5ce07def1844e7b1183c2c9a71e10ea49102..2738d385001223c9161c64b61e30fcb4068189fb 100644 (file)
@@ -92,6 +92,7 @@
 
 #include <machine/thread.h>
 #include <machine/pal_routines.h>
+#include <machine/limits.h>
 
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
@@ -161,6 +162,10 @@ int task_threadmax = CONFIG_THREAD_MAX;
 
 static uint64_t                thread_unique_id = 0;
 
+struct _thread_ledger_indices thread_ledgers = { -1 };
+static ledger_template_t thread_ledger_template = NULL;
+void init_thread_ledgers(void);
+
 void
 thread_bootstrap(void)
 {
@@ -196,7 +201,7 @@ thread_bootstrap(void)
        thread_template.promotions = 0;
        thread_template.pending_promoter_index = 0;
        thread_template.pending_promoter[0] =
-               thread_template.pending_promoter[1] = NULL;
+       thread_template.pending_promoter[1] = NULL;
 
        thread_template.realtime.deadline = UINT64_MAX;
 
@@ -257,14 +262,17 @@ thread_bootstrap(void)
        thread_template.syscalls_unix = 0;
        thread_template.syscalls_mach = 0;
 
-       thread_template.tkm_private.alloc = 0;
-       thread_template.tkm_private.free = 0;
-       thread_template.tkm_shared.alloc = 0;
-       thread_template.tkm_shared.free = 0;
-       thread_template.actionstate = default_task_null_policy;
-       thread_template.ext_actionstate = default_task_null_policy;
+       thread_template.t_ledger = LEDGER_NULL;
+       thread_template.t_threadledger = LEDGER_NULL;
+
+       thread_template.appliedstate = default_task_null_policy;
+       thread_template.ext_appliedstate = default_task_null_policy;
        thread_template.policystate = default_task_proc_policy;
        thread_template.ext_policystate = default_task_proc_policy;
+#if CONFIG_EMBEDDED
+       thread_template.taskwatch = NULL;
+       thread_template.saved_importance = 0;
+#endif /* CONFIG_EMBEDDED */
 
        init_thread = thread_template;
        machine_set_current_thread(&init_thread);
@@ -290,6 +298,8 @@ thread_init(void)
         *      per-thread structures necessary.
         */
        machine_thread_init();
+
+       init_thread_ledgers();
 }
 
 static void
@@ -354,6 +364,10 @@ thread_terminate_self(void)
 
        thread_policy_reset(thread);
 
+#if CONFIG_EMBEDDED
+       thead_remove_taskwatch(thread);
+#endif /* CONFIG_EMBEDDED */
+
        task = thread->task;
        uthread_cleanup(task, thread->uthread, task->bsd_info);
        threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
@@ -438,6 +452,11 @@ thread_deallocate(
        }
 #endif  /* MACH_BSD */   
 
+       if (thread->t_ledger)
+               ledger_dereference(thread->t_ledger);
+       if (thread->t_threadledger)
+               ledger_dereference(thread->t_threadledger);
+
        if (thread->kernel_stack != 0)
                stack_free(thread);
 
@@ -474,7 +493,11 @@ thread_terminate_daemon(void)
 
                task_lock(task);
                task->total_user_time += timer_grab(&thread->user_timer);
-               task->total_system_time += timer_grab(&thread->system_timer);
+               if (thread->precise_user_kernel_time) {
+                       task->total_system_time += timer_grab(&thread->system_timer);
+               } else {
+                       task->total_user_time += timer_grab(&thread->system_timer);
+               }
 
                task->c_switch += thread->c_switch;
                task->p_switch += thread->p_switch;
@@ -483,11 +506,6 @@ thread_terminate_daemon(void)
                task->syscalls_unix += thread->syscalls_unix;
                task->syscalls_mach += thread->syscalls_mach;
 
-               task->tkm_private.alloc += thread->tkm_private.alloc;
-               task->tkm_private.free += thread->tkm_private.free;
-               task->tkm_shared.alloc += thread->tkm_shared.alloc;
-               task->tkm_shared.free += thread->tkm_shared.free;
-
                queue_remove(&task->threads, thread, thread_t, task_threads);
                task->thread_count--;
 
@@ -669,7 +687,7 @@ thread_create_internal(
                return (KERN_FAILURE);
        }
 
-    new_thread->task = parent_task;
+       new_thread->task = parent_task;
 
        thread_lock_init(new_thread);
        wake_lock_init(new_thread);
@@ -716,6 +734,18 @@ thread_create_internal(
 
        task_reference_internal(parent_task);
 
+       if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
+               /*
+                * This task has a per-thread CPU limit; make sure this new thread
+                * gets its limit set too, before it gets out of the kernel.
+                */
+               set_astledger(new_thread);
+       }
+       new_thread->t_threadledger = LEDGER_NULL;       /* per thread ledger is not inherited */
+       new_thread->t_ledger = new_thread->task->ledger;
+       if (new_thread->t_ledger)
+               ledger_reference(new_thread->t_ledger);
+
        /* Cache the task's map */
        new_thread->map = parent_task->map;
 
@@ -759,6 +789,21 @@ thread_create_internal(
 #endif /* CONFIG_EMBEDDED */
        new_thread->importance =
                                        new_thread->priority - new_thread->task_priority;
+#if CONFIG_EMBEDDED
+       new_thread->saved_importance = new_thread->importance;
+       /* apple ios daemon starts all threads in darwin background */
+       if (parent_task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) {
+               /* Cannot use generic routines here so apply darwin bacground directly */
+               new_thread->policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL;
+               /* set thread self backgrounding */
+               new_thread->appliedstate.hw_bg = new_thread->policystate.hw_bg;
+               /* priority will get recomputed suitably bit later */
+               new_thread->importance = INT_MIN;
+               /* to avoid changes to many pri compute routines, set the effect of those here */
+               new_thread->priority = MAXPRI_THROTTLE;
+       }
+#endif /* CONFIG_EMBEDDED */
+
 #if defined(CONFIG_SCHED_TRADITIONAL)
        new_thread->sched_stamp = sched_tick;
        new_thread->pri_shift = sched_pri_shift;
@@ -774,16 +819,16 @@ thread_create_internal(
 
                kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);
 
-               KERNEL_DEBUG_CONSTANT(
-                                       TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE,
-                                                       (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                       TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE,
+                       (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0);
 
                kdbg_trace_string(parent_task->bsd_info,
                                                        &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
 
-               KERNEL_DEBUG_CONSTANT(
-                                       TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE,
-                                                       dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                       TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE,
+                       dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
        }
 
        DTRACE_PROC1(lwp__create, thread_t, *out_thread);
@@ -1026,7 +1071,7 @@ kernel_thread_start(
        return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
 }
 
-#ifndef        __LP64__
+#if defined(__i386__)
 
 thread_t
 kernel_thread(
@@ -1048,7 +1093,7 @@ kernel_thread(
        return (thread);
 }
 
-#endif /* __LP64__ */
+#endif /* defined(__i386__) */
 
 kern_return_t
 thread_info_internal(
@@ -1270,14 +1315,29 @@ thread_read_times(
 {
        clock_sec_t             secs;
        clock_usec_t    usecs;
+       uint64_t                tval_user, tval_system;
 
-       absolutetime_to_microtime(timer_grab(&thread->user_timer), &secs, &usecs);
-       user_time->seconds = (typeof(user_time->seconds))secs;
-       user_time->microseconds = usecs;
+       tval_user = timer_grab(&thread->user_timer);
+       tval_system = timer_grab(&thread->system_timer);
 
-       absolutetime_to_microtime(timer_grab(&thread->system_timer), &secs, &usecs);
-       system_time->seconds = (typeof(system_time->seconds))secs;
-       system_time->microseconds = usecs;
+       if (thread->precise_user_kernel_time) {
+               absolutetime_to_microtime(tval_user, &secs, &usecs);
+               user_time->seconds = (typeof(user_time->seconds))secs;
+               user_time->microseconds = usecs;
+               
+               absolutetime_to_microtime(tval_system, &secs, &usecs);
+               system_time->seconds = (typeof(system_time->seconds))secs;
+               system_time->microseconds = usecs;
+       } else {
+               /* system_timer may represent either sys or user */
+               tval_user += tval_system;
+               absolutetime_to_microtime(tval_user, &secs, &usecs);
+               user_time->seconds = (typeof(user_time->seconds))secs;
+               user_time->microseconds = usecs;
+
+               system_time->seconds = 0;
+               system_time->microseconds = 0;
+       }
 }
 
 kern_return_t
@@ -1369,6 +1429,128 @@ thread_wire(
     return (thread_wire_internal(host_priv, thread, wired, NULL));
 }
 
+static void
+thread_resource_exception(const void *arg0, __unused const void *arg1)
+{
+       thread_t thread = current_thread();
+       int code = (int)((uintptr_t)arg0 & ((int)-1));
+       
+       assert(thread->t_threadledger != LEDGER_NULL);
+
+       /*
+        * Disable the exception notification so we don't overwhelm
+        * the listener with an endless stream of redundant exceptions.
+        */
+       ledger_set_action(thread->t_threadledger, thread_ledgers.cpu_time,
+           LEDGER_ACTION_IGNORE);
+       ledger_disable_callback(thread->t_threadledger, thread_ledgers.cpu_time);
+
+       /* XXX code should eventually be a user-exported namespace of resources */
+       (void) task_exception_notify(EXC_RESOURCE, code, 0); 
+}
+
+void
+init_thread_ledgers(void) {
+       ledger_template_t t;
+       int idx;
+       
+       assert(thread_ledger_template == NULL);
+
+       if ((t = ledger_template_create("Per-thread ledger")) == NULL)
+               panic("couldn't create thread ledger template");
+
+       if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
+               panic("couldn't create cpu_time entry for thread ledger template");
+       }
+
+       if (ledger_set_callback(t, idx, thread_resource_exception,
+                               (void *)(uintptr_t)idx, NULL) < 0) {
+               panic("couldn't set thread ledger callback for cpu_time entry");
+       }
+
+       thread_ledgers.cpu_time = idx;
+       thread_ledger_template = t;
+}
+
+/*
+ * Set CPU usage limit on a thread.
+ *
+ * Calling with percentage of 0 will unset the limit for this thread.
+ */
+int
+thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
+{
+       thread_t        thread = current_thread(); 
+       ledger_t        l;
+       uint64_t        limittime = 0;
+       uint64_t        abstime = 0;
+
+       assert(percentage <= 100);
+
+       if (percentage == 0) {
+               /*
+                * Remove CPU limit, if any exists.
+                */
+               if (thread->t_threadledger != LEDGER_NULL) {
+                       /*
+                        * The only way to get a per-thread ledger is via CPU limits.
+                        */
+                       assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT));
+                       ledger_dereference(thread->t_threadledger);
+                       thread->t_threadledger = LEDGER_NULL;
+                       thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
+               }
+
+               return (0);
+       }
+
+       l = thread->t_threadledger;
+       if (l == LEDGER_NULL) {
+               /*
+                * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
+                */
+               if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
+                       return (KERN_RESOURCE_SHORTAGE);
+
+               /*
+                * We are the first to create this thread's ledger, so only activate our entry.
+                */
+               ledger_entry_setactive(l, thread_ledgers.cpu_time);
+               thread->t_threadledger = l;
+       }
+
+       /*
+        * The limit is specified as a percentage of CPU over an interval in nanoseconds.
+        * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
+        */
+       limittime = (interval_ns * percentage) / 100;
+       nanoseconds_to_absolutetime(limittime, &abstime); 
+       ledger_set_limit(l, thread_ledgers.cpu_time, abstime);
+       /*
+        * Refill the thread's allotted CPU time every interval_ns nanoseconds.
+        */
+       ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
+
+       /*
+        * Ledgers supports multiple actions for one ledger entry, so we do too.
+        */
+       if (action == THREAD_CPULIMIT_EXCEPTION) {
+               thread->options |= TH_OPT_PROC_CPULIMIT;
+               ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_EXCEPTION);
+       }
+
+       if (action == THREAD_CPULIMIT_BLOCK) {
+               thread->options |= TH_OPT_PRVT_CPULIMIT;
+               /* The per-thread ledger template by default has a callback for CPU time */
+               ledger_disable_callback(l, thread_ledgers.cpu_time);
+               ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
+       }
+
+       thread->t_threadledger = l;
+       return (0);
+}
+
 int            split_funnel_off = 0;
 lck_grp_t      *funnel_lck_grp = LCK_GRP_NULL;
 lck_grp_attr_t *funnel_lck_grp_attr;
@@ -1603,12 +1785,6 @@ vm_offset_t dtrace_get_kernel_stack(thread_t thread)
 
 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
 {
-#if STAT_TIME
-       if (thread != THREAD_NULL) {
-               return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer));
-       } else
-               return 0;
-#else
        if (thread != THREAD_NULL) {
                processor_t             processor = current_processor();
                uint64_t                                abstime = mach_absolute_time();
@@ -1620,7 +1796,6 @@ int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
                                (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
        } else
                return 0;
-#endif
 }
 
 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
index 916391593453e668af5021cd603043082f545020..b497fa3facfd3e7999dd50c42d37bc9e5726bf38 100644 (file)
@@ -147,6 +147,8 @@ struct thread {
 #define TH_OPT_VMPRIV          0x04            /* may allocate reserved memory */
 #define TH_OPT_DTRACE          0x08            /* executing under dtrace_probe */
 #define TH_OPT_SYSTEM_CRITICAL 0x10            /* Thread must always be allowed to run - even under heavy load */
+#define TH_OPT_PROC_CPULIMIT   0x20            /* Thread has a task-wide CPU limit applied to it */
+#define TH_OPT_PRVT_CPULIMIT   0x40            /* Thread has a thread-private CPU limit applied to it */
 
        /* Data updated during assert_wait/thread_wakeup */
        decl_simple_lock_data(,sched_lock)      /* scheduling lock (thread_lock()) */
@@ -203,6 +205,13 @@ struct thread {
 #define TH_SFLAG_PRI_UPDATE            0x0100          /* Updating priority */
 #define TH_SFLAG_EAGERPREEMPT          0x0200          /* Any preemption of this thread should be treated as if AST_URGENT applied */
 
+/*
+ * A thread can either be completely unthrottled, about to be throttled,
+ * throttled (TH_SFLAG_THROTTLED), or about to be unthrottled
+ */
+#define        TH_SFLAG_PENDING_THROTTLE_DEMOTION      0x1000  /* Pending sched_mode demotion */
+#define        TH_SFLAG_PENDING_THROTTLE_PROMOTION     0x2000  /* Pending sched_mode promition */
+#define        TH_SFLAG_PENDING_THROTTLE_MASK          (TH_SFLAG_PENDING_THROTTLE_DEMOTION | TH_SFLAG_PENDING_THROTTLE_PROMOTION)
 
        integer_t                       sched_pri;                      /* scheduled (current) priority */
        integer_t                       priority;                       /* base priority */
@@ -268,6 +277,7 @@ struct thread {
        uint32_t                        ps_switch;              /* total pset switches */
 
        /* Timing data structures */
+       int                                     precise_user_kernel_time; /* precise user/kernel enabled for this thread */
        timer_data_t            user_timer;                     /* user mode timer */
        uint64_t                        user_timer_save;        /* saved user timer value */
        uint64_t                        system_timer_save;      /* saved system timer value */
@@ -382,13 +392,23 @@ struct thread {
                int64_t t_dtrace_vtime;
 #endif
 
-#define T_CHUD_MARKED          0x1             /* this thread is marked by CHUD */
-#define T_IN_CHUD                      0x2             /* this thread is already in a CHUD handler */
-#define THREAD_PMC_FLAG                0x4             /* Bit in "t_chud" signifying PMC interest */
                uint32_t    t_page_creation_count;
                clock_sec_t t_page_creation_time;
 
+#define T_CHUD_MARKED           0x01          /* this thread is marked by CHUD */
+#define T_IN_CHUD               0x02          /* this thread is already in a CHUD handler */
+#define THREAD_PMC_FLAG         0x04          /* Bit in "t_chud" signifying PMC interest */    
+#define T_AST_CALLSTACK         0x08          /* Thread scheduled to dump a
+                                              * callstack on its next
+                                              * AST */
+#define T_AST_NAME              0x10          /* Thread scheduled to dump
+                                              * its name on its next
+                                              * AST */
+#define T_NAME_DONE             0x20          /* Thread has previously
+                                              * recorded its name */
+
                uint32_t t_chud;        /* CHUD flags, used for Shark */
+               uint32_t chud_c_switch; /* last dispatch detection */
 
                integer_t mutex_count;  /* total count of locks held */
 
@@ -397,12 +417,16 @@ struct thread {
        /* Statistics accumulated per-thread and aggregated per-task */
        uint32_t                syscalls_unix;
        uint32_t                syscalls_mach;
-       zinfo_usage_store_t     tkm_private;    /* private kernel memory allocs/frees */
-       zinfo_usage_store_t     tkm_shared;     /* shared kernel memory allocs/frees */
-       struct process_policy ext_actionstate;  /* externally applied actions */
+       ledger_t                t_ledger;
+       ledger_t                t_threadledger; /* per thread ledger */
+       struct process_policy ext_appliedstate; /* externally applied actions */
        struct process_policy ext_policystate;  /* externally defined process policy states*/
-       struct process_policy actionstate;              /* self applied acions */
+       struct process_policy appliedstate;             /* self applied acions */
        struct process_policy policystate;              /* process wide policy states */
+#if CONFIG_EMBEDDED
+       task_watch_t *  taskwatch;              /* task watch */
+       integer_t               saved_importance;               /* saved task-relative importance */
+#endif /* CONFIG_EMBEDDED */
 };
 
 #define ith_state              saved.receive.state
@@ -495,21 +519,6 @@ extern void                                stack_collect(void);
 extern void                            stack_init(void) __attribute__((section("__TEXT, initcode")));
 
 
-extern kern_return_t    thread_state_initialize(
-                                                       thread_t                                thread);
-
-extern kern_return_t   thread_setstatus(
-                                                       thread_t                                thread,
-                                                       int                                             flavor,
-                                                       thread_state_t                  tstate,
-                                                       mach_msg_type_number_t  count);
-
-extern kern_return_t   thread_getstatus(
-                                                       thread_t                                thread,
-                                                       int                                             flavor,
-                                                       thread_state_t                  tstate,
-                                                       mach_msg_type_number_t  *count);
-
 extern kern_return_t   thread_info_internal(
                                                        thread_t                                thread,
                                                        thread_flavor_t                 flavor,
@@ -588,8 +597,6 @@ extern void                     machine_thread_destroy(
 extern void                            machine_set_current_thread(
                                                        thread_t                        thread);
 
-extern void                    machine_thread_terminate_self(void);
-
 extern kern_return_t   machine_thread_get_kern_state(
                                                        thread_t                                thread,
                                                        thread_flavor_t                 flavor,
@@ -658,13 +665,13 @@ __END_DECLS
 
 __BEGIN_DECLS
 
-#ifndef        __LP64__
+#if defined(__i386__)
 
 extern thread_t                kernel_thread(
                                                task_t          task,
                                                void            (*start)(void));
 
-#endif /* __LP64__ */
+#endif /* defined(__i386__) */
 
 extern uint64_t                        thread_tid(
                                                thread_t thread);
@@ -680,6 +687,21 @@ __BEGIN_DECLS
 
 #ifdef XNU_KERNEL_PRIVATE
 
+extern kern_return_t    thread_state_initialize(
+                                                       thread_t                                thread);
+
+extern kern_return_t   thread_setstatus(
+                                                       thread_t                                thread,
+                                                       int                                             flavor,
+                                                       thread_state_t                  tstate,
+                                                       mach_msg_type_number_t  count);
+
+extern kern_return_t   thread_getstatus(
+                                                       thread_t                                thread,
+                                                       int                                             flavor,
+                                                       thread_state_t                  tstate,
+                                                       mach_msg_type_number_t  *count);
+
 extern kern_return_t   thread_create_workq(
                                                        task_t                  task,
                                                        thread_continue_t       thread_return,
@@ -688,6 +710,23 @@ extern kern_return_t       thread_create_workq(
 extern void    thread_yield_internal(
        mach_msg_timeout_t      interval);
 
+/*
+ * Thread-private CPU limits: apply a private CPU limit to this thread only. Available actions are:
+ * 
+ * 1) Block. Prevent CPU consumption of the thread from exceeding the limit.
+ * 2) Exception. Generate a resource consumption exception when the limit is exceeded.
+ */
+#define THREAD_CPULIMIT_BLOCK          0x1
+#define THREAD_CPULIMIT_EXCEPTION      0x2
+
+struct _thread_ledger_indices {
+       int cpu_time;
+};
+
+extern struct _thread_ledger_indices thread_ledgers;
+
+extern int thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns);
+
 typedef struct funnel_lock             funnel_t;
 
 #define THR_FUNNEL_NULL (funnel_t *)0
@@ -733,13 +772,16 @@ extern kern_return_t      thread_userstack(
                                                mach_vm_offset_t *,
                                                int *);
 
-kern_return_t  thread_entrypoint(
-                               thread_t,
-                               int,
-                               thread_state_t,
-                               unsigned int,
-                               mach_vm_offset_t *); 
+extern kern_return_t   thread_entrypoint(
+                                               thread_t,
+                                               int,
+                                               thread_state_t,
+                                               unsigned int,
+                                               mach_vm_offset_t *); 
 
+extern kern_return_t   thread_userstackdefault(
+                                               thread_t,
+                                               mach_vm_offset_t *);
 
 extern kern_return_t   thread_wire_internal(
                                                        host_priv_t             host_priv,
@@ -786,9 +828,13 @@ extern void                uthread_cred_free(void *);
 extern boolean_t       thread_should_halt(
                                                thread_t                thread);
 
+extern boolean_t       thread_should_abort(
+                                               thread_t);
+
 extern int is_64signalregset(void);
 
 void act_set_apc(thread_t);
+void act_set_kperf(thread_t);
 
 extern uint32_t dtrace_get_thread_predcache(thread_t);
 extern int64_t dtrace_get_thread_vtime(thread_t);
@@ -835,6 +881,7 @@ extern kern_return_t        kernel_thread_start(
 #ifdef KERNEL_PRIVATE
 void thread_set_eager_preempt(thread_t thread);
 void thread_clear_eager_preempt(thread_t thread);
+extern ipc_port_t convert_thread_to_port(thread_t);
 #endif /* KERNEL_PRIVATE */
 
 __END_DECLS
index 455a0fb016cb13a1c84dce2dcbb3787d5a9155ef..d99ee186c07c9082add778e62c6f1d428385e2ac 100644 (file)
@@ -78,6 +78,8 @@
 
 #include <mach/rpc.h>
 
+#include <security/mac_mach_internal.h>
+
 void                   act_abort(thread_t);
 void                   install_special_handler_locked(thread_t);
 void                   special_handler_continue(void);
@@ -134,7 +136,7 @@ thread_terminate_internal(
        thread_mtx_unlock(thread);
 
        if (thread != current_thread() && result == KERN_SUCCESS)
-               thread_wait(thread);
+               thread_wait(thread, FALSE);
 
        return (result);
 }
@@ -236,7 +238,7 @@ thread_suspend(
        thread_mtx_unlock(thread);
 
        if (thread != self && result == KERN_SUCCESS)
-               thread_wait(thread);
+               thread_wait(thread, TRUE);
 
        return (result);
 }
@@ -575,7 +577,7 @@ thread_state_initialize(
                        thread_release(thread);
                }
                else
-            result = machine_thread_state_initialize( thread );
+                       result = machine_thread_state_initialize( thread );
        }
        else
                result = KERN_TERMINATED;
@@ -897,25 +899,26 @@ act_get_state(
     return (thread_get_state(thread, flavor, state, count));
 }
 
-void
-act_set_astbsd(
-       thread_t        thread)
+static void
+act_set_ast(
+           thread_t    thread,
+           ast_t ast)
 {
        spl_t           s = splsched();
        
        if (thread == current_thread()) {
-               thread_ast_set(thread, AST_BSD);
+               thread_ast_set(thread, ast);
                ast_propagate(thread->ast);
        }
        else {
                processor_t             processor;
 
                thread_lock(thread);
-               thread_ast_set(thread, AST_BSD);
+               thread_ast_set(thread, ast);
                processor = thread->last_processor;
-               if (    processor != PROCESSOR_NULL                                     &&
-                               processor->state == PROCESSOR_RUNNING           &&
-                               processor->active_thread == thread                      )
+               if ( processor != PROCESSOR_NULL            &&
+                    processor->state == PROCESSOR_RUNNING  &&
+                    processor->active_thread == thread      )
                        cause_ast_check(processor);
                thread_unlock(thread);
        }
@@ -923,28 +926,37 @@ act_set_astbsd(
        splx(s);
 }
 
+void
+act_set_astbsd(
+       thread_t        thread)
+{
+       act_set_ast( thread, AST_BSD );
+}
+
 void
 act_set_apc(
        thread_t        thread)
 {
-       spl_t           s = splsched();
-       
-       if (thread == current_thread()) {
-               thread_ast_set(thread, AST_APC);
-               ast_propagate(thread->ast);
-       }
-       else {
-               processor_t             processor;
+       act_set_ast( thread, AST_APC );
+}
 
-               thread_lock(thread);
-               thread_ast_set(thread, AST_APC);
-               processor = thread->last_processor;
-               if (    processor != PROCESSOR_NULL                                     &&
-                               processor->state == PROCESSOR_RUNNING           &&
-                               processor->active_thread == thread                      )
-                       cause_ast_check(processor);
-               thread_unlock(thread);
-       }
-       
-       splx(s);
+void
+act_set_kperf(
+       thread_t        thread)
+{
+       /* safety check */
+       if (thread != current_thread())
+               if( !ml_get_interrupts_enabled() )
+                       panic("unsafe act_set_kperf operation");
+
+       act_set_ast( thread, AST_KPERF );
+}
+
+#if CONFIG_MACF
+void
+act_set_astmacf(
+       thread_t        thread)
+{
+       act_set_ast( thread, AST_MACF);
 }
+#endif
index 93edbc4896bb51ca62e585f2c9728c2136a1829f..7d43919ae6cbb3610b92fa80cd5c518c7c1cd62c 100644 (file)
 
 #include <kern/thread_call.h>
 #include <kern/call_entry.h>
-
 #include <kern/timer_call.h>
 
+#include <libkern/OSAtomic.h>
+
 #include <sys/kdebug.h>
 
 
-static zone_t          thread_call_zone;
+static zone_t                  thread_call_zone;
+static struct wait_queue       daemon_wqueue;
 
 struct thread_call_group {
        queue_head_t            pending_queue;
        uint32_t                pending_count;
 
        queue_head_t            delayed_queue;
+       uint32_t                delayed_count;
 
        timer_call_data_t       delayed_timer;
+       timer_call_data_t       dealloc_timer;
 
        struct wait_queue       idle_wqueue;
-       struct wait_queue       daemon_wqueue;
        uint32_t                idle_count, active_count;
-};
-
-typedef struct thread_call_group       *thread_call_group_t;
-
-static struct thread_call_group                thread_call_group0;
-
-static boolean_t                       thread_call_daemon_awake;
-
-#define thread_call_thread_min 4
-
-#define internal_call_count    768
-
-static thread_call_data_t      internal_call_storage[internal_call_count];
-static queue_head_t                    thread_call_internal_queue;
-
-static __inline__ thread_call_t                _internal_call_allocate(void);
-
-static __inline__ void _internal_call_release(
-                                                       thread_call_t           call);
 
-static __inline__ boolean_t    _pending_call_enqueue(
-                                                               thread_call_t           call,
-                                                               thread_call_group_t     group),
-                                                       _delayed_call_enqueue(
-                                                               thread_call_t           call,
-                                                               thread_call_group_t     group,
-                                                               uint64_t                        deadline),
-                                                       _call_dequeue(
-                                                               thread_call_t           call,
-                                                               thread_call_group_t     group);
+       integer_t               pri;
+       uint32_t                target_thread_count;
+       uint64_t                idle_timestamp;
 
-static __inline__ void thread_call_wake(
-                                                       thread_call_group_t     group);
-
-static __inline__ void _set_delayed_call_timer(
-                                                       thread_call_t           call,
-                                                       thread_call_group_t     group);
-                                       
-static boolean_t       _remove_from_pending_queue(
-                                               thread_call_func_t              func,
-                                               thread_call_param_t             param0,
-                                               boolean_t                               remove_all),
-                                       _remove_from_delayed_queue(
-                                               thread_call_func_t              func,
-                                               thread_call_param_t             param0,
-                                               boolean_t                               remove_all);
+       uint32_t                flags;
+       sched_call_t            sched_call;
+};
 
-static void            thread_call_daemon(
-                                       thread_call_group_t             group),
-                               thread_call_thread(
-                                       thread_call_group_t             group);
+typedef struct thread_call_group       *thread_call_group_t;
 
-extern void            thread_call_delayed_timer(
-                                       timer_call_param_t              p0,
-                                       timer_call_param_t              p1);
+#define TCG_PARALLEL           0x01
+#define TCG_DEALLOC_ACTIVE     0x02
+
+#define THREAD_CALL_GROUP_COUNT                4
+#define THREAD_CALL_THREAD_MIN         4
+#define INTERNAL_CALL_COUNT            768
+#define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * 1000 * 1000) /* 5 ms */
+#define THREAD_CALL_ADD_RATIO          4
+#define THREAD_CALL_MACH_FACTOR_CAP    3
+
+static struct thread_call_group        thread_call_groups[THREAD_CALL_GROUP_COUNT];
+static boolean_t               thread_call_daemon_awake;
+static thread_call_data_t      internal_call_storage[INTERNAL_CALL_COUNT];
+static queue_head_t            thread_call_internal_queue;
+static uint64_t                thread_call_dealloc_interval_abs;
+
+static __inline__ thread_call_t        _internal_call_allocate(void);
+static __inline__ void         _internal_call_release(thread_call_t call);
+static __inline__ boolean_t    _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
+static __inline__ boolean_t    _delayed_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t deadline);
+static __inline__ boolean_t    _call_dequeue(thread_call_t call, thread_call_group_t group);
+static __inline__ void         thread_call_wake(thread_call_group_t group);
+static __inline__ void         _set_delayed_call_timer(thread_call_t call, thread_call_group_t group);
+static boolean_t               _remove_from_pending_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
+static boolean_t               _remove_from_delayed_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
+static void                    thread_call_daemon(void *arg);
+static void                    thread_call_thread(thread_call_group_t group, wait_result_t wres);
+extern void                    thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
+static void                    thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
+static void                    thread_call_group_setup(thread_call_group_t group, thread_call_priority_t pri, uint32_t target_thread_count, boolean_t parallel);
+static void                    sched_call_thread(int type, thread_t thread);
+static void                    thread_call_start_deallocate_timer(thread_call_group_t group);
+static void                    thread_call_wait_locked(thread_call_t call);
 
 #define qe(x)          ((queue_entry_t)(x))
 #define TC(x)          ((thread_call_t)(x))
@@ -131,6 +123,7 @@ lck_mtx_t           thread_call_lock_data;
 lck_spin_t             thread_call_lock_data;
 #endif
 
+
 #define thread_call_lock_spin()                        \
        lck_mtx_lock_spin_always(&thread_call_lock_data)
 
@@ -138,6 +131,158 @@ lck_spin_t                thread_call_lock_data;
        lck_mtx_unlock_always(&thread_call_lock_data)
 
 
+static inline spl_t
+disable_ints_and_lock(void)
+{
+       spl_t s;
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       return s;
+}
+
+static inline void 
+enable_ints_and_unlock(void)
+{
+       thread_call_unlock();
+       (void)spllo();
+}
+
+
+static inline boolean_t
+group_isparallel(thread_call_group_t group)
+{
+       return ((group->flags & TCG_PARALLEL) != 0);
+}
+
+static boolean_t
+thread_call_group_should_add_thread(thread_call_group_t group) 
+{
+       uint32_t thread_count;
+
+       if (!group_isparallel(group)) {
+               if (group->pending_count > 0 && group->active_count == 0) {
+                       return TRUE;
+               }
+
+               return FALSE;
+       }
+
+       if (group->pending_count > 0) {
+               if (group->idle_count > 0) {
+                       panic("Pending work, but threads are idle?");
+               }
+
+               thread_count = group->active_count;
+
+               /*
+                * Add a thread if either there are no threads,
+                * the group has fewer than its target number of
+                * threads, or the amount of work is large relative
+                * to the number of threads.  In the last case, pay attention
+                * to the total load on the system, and back off if 
+         * it's high.
+                */
+               if ((thread_count == 0) ||
+                       (thread_count < group->target_thread_count) ||
+                       ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) && 
+                        (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
+                       return TRUE;
+               }
+       }
+                       
+       return FALSE;
+}
+
+static inline integer_t
+thread_call_priority_to_sched_pri(thread_call_priority_t pri) 
+{
+       switch (pri) {
+       case THREAD_CALL_PRIORITY_HIGH:
+               return BASEPRI_PREEMPT;
+       case THREAD_CALL_PRIORITY_KERNEL:
+               return BASEPRI_KERNEL;
+       case THREAD_CALL_PRIORITY_USER:
+               return BASEPRI_DEFAULT;
+       case THREAD_CALL_PRIORITY_LOW:
+               return DEPRESSPRI;
+       default:
+               panic("Invalid priority.");
+       }
+
+       return 0;
+}
+
+/* Lock held */
+static inline thread_call_group_t
+thread_call_get_group(
+               thread_call_t call)
+{
+       thread_call_priority_t  pri = call->tc_pri;
+
+       assert(pri == THREAD_CALL_PRIORITY_LOW ||
+                       pri == THREAD_CALL_PRIORITY_USER ||
+                       pri == THREAD_CALL_PRIORITY_KERNEL ||
+                       pri == THREAD_CALL_PRIORITY_HIGH);
+
+       return &thread_call_groups[pri];
+}
+
+static void
+thread_call_group_setup(
+               thread_call_group_t             group, 
+               thread_call_priority_t          pri,
+               uint32_t                        target_thread_count,
+               boolean_t                       parallel)
+{
+       queue_init(&group->pending_queue);
+       queue_init(&group->delayed_queue);
+
+       timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
+       timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
+
+       wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
+
+       group->target_thread_count = target_thread_count;
+       group->pri = thread_call_priority_to_sched_pri(pri);
+
+       group->sched_call = sched_call_thread; 
+       if (parallel) {
+               group->flags |= TCG_PARALLEL;
+               group->sched_call = NULL;
+       } 
+}
+
+/*
+ * Simple wrapper for creating threads bound to 
+ * thread call groups.
+ */
+static kern_return_t
+thread_call_thread_create(
+               thread_call_group_t             group)
+{
+       thread_t thread;
+       kern_return_t result;
+
+       result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, group->pri, &thread);
+       if (result != KERN_SUCCESS) {
+               return result;
+       }
+
+       if (group->pri < BASEPRI_PREEMPT) {
+               /*
+                * New style doesn't get to run to completion in 
+                * kernel if there are higher priority threads 
+                * available.
+                */
+               thread_set_eager_preempt(thread);
+       }
+
+       thread_deallocate(thread);
+       return KERN_SUCCESS;
+}
+
 /*
  *     thread_call_initialize:
  *
@@ -148,11 +293,9 @@ void
 thread_call_initialize(void)
 {
        thread_call_t                   call;
-       thread_call_group_t             group = &thread_call_group0;
        kern_return_t                   result;
-       thread_t                                thread;
-       int                                             i;
-       spl_t                                   s;
+       thread_t                        thread;
+       int                             i;
 
        i = sizeof (thread_call_data_t);
        thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call");
@@ -169,21 +312,21 @@ thread_call_initialize(void)
 #else
         lck_spin_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
 #endif
-       queue_init(&group->pending_queue);
-       queue_init(&group->delayed_queue);
 
-       s = splsched();
-       thread_call_lock_spin();
+       nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
+       wait_queue_init(&daemon_wqueue, SYNC_POLICY_FIFO);
 
-       timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_LOW], THREAD_CALL_PRIORITY_LOW, 0, TRUE);
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_USER], THREAD_CALL_PRIORITY_USER, 0, TRUE);
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_KERNEL], THREAD_CALL_PRIORITY_KERNEL, 1, TRUE);
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_HIGH], THREAD_CALL_PRIORITY_HIGH, THREAD_CALL_THREAD_MIN, FALSE);
 
-       wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
-       wait_queue_init(&group->daemon_wqueue, SYNC_POLICY_FIFO);
+       disable_ints_and_lock();
 
        queue_init(&thread_call_internal_queue);
        for (
-               call = internal_call_storage;
-                       call < &internal_call_storage[internal_call_count];
+                       call = internal_call_storage;
+                       call < &internal_call_storage[INTERNAL_CALL_COUNT];
                        call++) {
 
                enqueue_tail(&thread_call_internal_queue, qe(call));
@@ -191,10 +334,9 @@ thread_call_initialize(void)
 
        thread_call_daemon_awake = TRUE;
 
-       thread_call_unlock();
-       splx(s);
+       enable_ints_and_unlock();
 
-       result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, group, BASEPRI_PREEMPT + 1, &thread);
+       result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, NULL, BASEPRI_PREEMPT + 1, &thread);
        if (result != KERN_SUCCESS)
                panic("thread_call_initialize");
 
@@ -207,7 +349,9 @@ thread_call_setup(
        thread_call_func_t              func,
        thread_call_param_t             param0)
 {
-       call_entry_setup(call, func, param0);
+       bzero(call, sizeof(*call));
+       call_entry_setup((call_entry_t)call, func, param0);
+       call->tc_pri = THREAD_CALL_PRIORITY_HIGH; /* Default priority */
 }
 
 /*
@@ -243,7 +387,7 @@ _internal_call_release(
     thread_call_t              call)
 {
     if (    call >= internal_call_storage                                              &&
-                   call < &internal_call_storage[internal_call_count]          )
+                   call < &internal_call_storage[INTERNAL_CALL_COUNT]          )
                enqueue_head(&thread_call_internal_queue, qe(call));
 }
 
@@ -265,10 +409,16 @@ _pending_call_enqueue(
 {
        queue_head_t            *old_queue;
 
-       old_queue = call_entry_enqueue_tail(call, &group->pending_queue);
+       old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
+
+       if (old_queue == NULL) {
+               call->tc_submit_count++;
+       }
 
        group->pending_count++;
 
+       thread_call_wake(group);
+
        return (old_queue != NULL);
 }
 
@@ -286,16 +436,18 @@ _pending_call_enqueue(
  */
 static __inline__ boolean_t
 _delayed_call_enqueue(
-    thread_call_t              call,
+       thread_call_t           call,
        thread_call_group_t     group,
        uint64_t                deadline)
 {
        queue_head_t            *old_queue;
 
-       old_queue = call_entry_enqueue_deadline(call, &group->delayed_queue, deadline);
+       old_queue = call_entry_enqueue_deadline(CE(call), &group->delayed_queue, deadline);
 
        if (old_queue == &group->pending_queue)
                group->pending_count--;
+       else if (old_queue == NULL) 
+               call->tc_submit_count++;
 
        return (old_queue != NULL);
 }
@@ -316,10 +468,13 @@ _call_dequeue(
 {
        queue_head_t            *old_queue;
 
-       old_queue = call_entry_dequeue(call);
+       old_queue = call_entry_dequeue(CE(call));
 
-       if (old_queue == &group->pending_queue)
-               group->pending_count--;
+       if (old_queue != NULL) {
+               call->tc_finish_count++;
+               if (old_queue == &group->pending_queue)
+                       group->pending_count--;
+       }
 
        return (old_queue != NULL);
 }
@@ -337,7 +492,7 @@ _set_delayed_call_timer(
     thread_call_t              call,
        thread_call_group_t     group)
 {
-    timer_call_enter(&group->delayed_timer, call->deadline, 0);
+    timer_call_enter(&group->delayed_timer, call->tc_call.deadline, 0);
 }
 
 /*
@@ -357,32 +512,32 @@ _remove_from_pending_queue(
     thread_call_param_t                param0,
     boolean_t                          remove_all)
 {
-       boolean_t                       call_removed = FALSE;
+       boolean_t                               call_removed = FALSE;
        thread_call_t                   call;
-       thread_call_group_t             group = &thread_call_group0;
-    
-    call = TC(queue_first(&group->pending_queue));
-    
-    while (!queue_end(&group->pending_queue, qe(call))) {
-       if (    call->func == func                      &&
-                               call->param0 == param0                  ) {
+       thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+
+       call = TC(queue_first(&group->pending_queue));
+
+       while (!queue_end(&group->pending_queue, qe(call))) {
+               if (call->tc_call.func == func &&
+                               call->tc_call.param0 == param0) {
                        thread_call_t   next = TC(queue_next(qe(call)));
-               
+
                        _call_dequeue(call, group);
 
                        _internal_call_release(call);
-           
+
                        call_removed = TRUE;
                        if (!remove_all)
                                break;
-               
+
                        call = next;
                }
                else    
                        call = TC(queue_next(qe(call)));
-    }
-    
-    return (call_removed);
+       }
+
+       return (call_removed);
 }
 
 /*
@@ -402,32 +557,32 @@ _remove_from_delayed_queue(
     thread_call_param_t                param0,
     boolean_t                          remove_all)
 {
-    boolean_t                          call_removed = FALSE;
-    thread_call_t                      call;
-       thread_call_group_t             group = &thread_call_group0;
-    
-    call = TC(queue_first(&group->delayed_queue));
-    
-    while (!queue_end(&group->delayed_queue, qe(call))) {
-       if (    call->func == func                      &&
-                               call->param0 == param0                  ) {
+       boolean_t                       call_removed = FALSE;
+       thread_call_t                   call;
+       thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+
+       call = TC(queue_first(&group->delayed_queue));
+
+       while (!queue_end(&group->delayed_queue, qe(call))) {
+               if (call->tc_call.func == func  &&
+                               call->tc_call.param0 == param0) {
                        thread_call_t   next = TC(queue_next(qe(call)));
-               
+
                        _call_dequeue(call, group);
-           
+
                        _internal_call_release(call);
-           
+
                        call_removed = TRUE;
                        if (!remove_all)
                                break;
-               
+
                        call = next;
                }
                else    
                        call = TC(queue_next(qe(call)));
-    }
-    
-    return (call_removed);
+       }
+
+       return (call_removed);
 }
 
 #ifndef        __LP64__
@@ -446,38 +601,34 @@ thread_call_func(
     thread_call_param_t                param,
     boolean_t                          unique_call)
 {
-    thread_call_t                      call;
-       thread_call_group_t             group = &thread_call_group0;
-    spl_t                                      s;
-    
-    s = splsched();
-    thread_call_lock_spin();
-    
-    call = TC(queue_first(&group->pending_queue));
-    
+       thread_call_t           call;
+       thread_call_group_t     group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+       spl_t                   s;
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       call = TC(queue_first(&group->pending_queue));
+
        while (unique_call && !queue_end(&group->pending_queue, qe(call))) {
-       if (    call->func == func                      &&
-                               call->param0 == param                   ) {
+               if (call->tc_call.func == func && call->tc_call.param0 == param) {
                        break;
                }
-       
+
                call = TC(queue_next(qe(call)));
-    }
-    
-    if (!unique_call || queue_end(&group->pending_queue, qe(call))) {
+       }
+
+       if (!unique_call || queue_end(&group->pending_queue, qe(call))) {
                call = _internal_call_allocate();
-               call->func                      = func;
-               call->param0            = param;
-               call->param1            = NULL;
-       
+               call->tc_call.func      = func;
+               call->tc_call.param0    = param;
+               call->tc_call.param1    = NULL;
+
                _pending_call_enqueue(call, group);
-               
-               if (group->active_count == 0)
-                       thread_call_wake(group);
-    }
+       }
 
-    thread_call_unlock();
-    splx(s);
+       thread_call_unlock();
+       splx(s);
 }
 
 #endif /* __LP64__ */
@@ -490,29 +641,29 @@ thread_call_func(
  */
 void
 thread_call_func_delayed(
-    thread_call_func_t         func,
-    thread_call_param_t                param,
-    uint64_t                           deadline)
+               thread_call_func_t              func,
+               thread_call_param_t             param,
+               uint64_t                        deadline)
 {
-    thread_call_t                      call;
-       thread_call_group_t             group = &thread_call_group0;
-    spl_t                                      s;
-    
-    s = splsched();
-    thread_call_lock_spin();
-    
-    call = _internal_call_allocate();
-    call->func                 = func;
-    call->param0               = param;
-    call->param1               = 0;
-    
-    _delayed_call_enqueue(call, group, deadline);
-    
-    if (queue_first(&group->delayed_queue) == qe(call))
-       _set_delayed_call_timer(call, group);
-    
-    thread_call_unlock();
-    splx(s);
+       thread_call_t           call;
+       thread_call_group_t     group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+       spl_t                   s;
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       call = _internal_call_allocate();
+       call->tc_call.func      = func;
+       call->tc_call.param0    = param;
+       call->tc_call.param1    = 0;
+
+       _delayed_call_enqueue(call, group, deadline);
+
+       if (queue_first(&group->delayed_queue) == qe(call))
+               _set_delayed_call_timer(call, group);
+
+       thread_call_unlock();
+       splx(s);
 }
 
 /*
@@ -529,29 +680,53 @@ thread_call_func_delayed(
  */
 boolean_t
 thread_call_func_cancel(
-    thread_call_func_t         func,
-    thread_call_param_t                param,
-    boolean_t                          cancel_all)
+               thread_call_func_t              func,
+               thread_call_param_t             param,
+               boolean_t                       cancel_all)
 {
-       boolean_t                       result;
-    spl_t                              s;
-    
-    s = splsched();
-    thread_call_lock_spin();
+       boolean_t       result;
+       spl_t           s;
 
-    if (cancel_all)
+       s = splsched();
+       thread_call_lock_spin();
+
+       if (cancel_all)
                result = _remove_from_pending_queue(func, param, cancel_all) |
-                                               _remove_from_delayed_queue(func, param, cancel_all);
+                       _remove_from_delayed_queue(func, param, cancel_all);
        else
                result = _remove_from_pending_queue(func, param, cancel_all) ||
-                                               _remove_from_delayed_queue(func, param, cancel_all);
-    
-    thread_call_unlock();
-    splx(s);
+                       _remove_from_delayed_queue(func, param, cancel_all);
+
+       thread_call_unlock();
+       splx(s);
 
        return (result);
 }
 
+/*
+ * Allocate a thread call with a given priority.  Importances
+ * other than THREAD_CALL_PRIORITY_HIGH will be run in threads
+ * with eager preemption enabled (i.e. may be aggressively preempted
+ * by higher-priority threads which are not in the normal "urgent" bands).
+ */
+thread_call_t
+thread_call_allocate_with_priority(
+               thread_call_func_t              func,
+               thread_call_param_t             param0,
+               thread_call_priority_t          pri)
+{
+       thread_call_t call;
+
+       if (pri > THREAD_CALL_PRIORITY_LOW) {
+               panic("Invalid pri: %d\n", pri);
+       }
+
+       call = thread_call_allocate(func, param0);
+       call->tc_pri = pri;
+
+       return call;
+}
+
 /*
  *     thread_call_allocate:
  *
@@ -559,41 +734,53 @@ thread_call_func_cancel(
  */
 thread_call_t
 thread_call_allocate(
-    thread_call_func_t         func,
-    thread_call_param_t                param0)
+               thread_call_func_t              func,
+               thread_call_param_t             param0)
 {
-    thread_call_t              call = zalloc(thread_call_zone);
+       thread_call_t   call = zalloc(thread_call_zone);
 
-       call_entry_setup(call, func, param0);
+       thread_call_setup(call, func, param0);
+       call->tc_refs = 1;
+       call->tc_flags = THREAD_CALL_ALLOC;
 
-    return (call);
+       return (call);
 }
 
 /*
  *     thread_call_free:
  *
- *     Free a callout entry.
+ *     Release a callout.  If the callout is currently
+ *     executing, it will be freed when all invocations
+ *     finish.
  */
 boolean_t
 thread_call_free(
-    thread_call_t              call)
+               thread_call_t           call)
 {
-    spl_t              s;
-    
-    s = splsched();
-    thread_call_lock_spin();
-    
-    if (call->queue != NULL) {
-           thread_call_unlock();
-           splx(s);
+       spl_t   s;
+       int32_t refs;
 
-           return (FALSE);
-    }
-    
-    thread_call_unlock();
-    splx(s);
-    
-       zfree(thread_call_zone, call);
+       s = splsched();
+       thread_call_lock_spin();
+
+       if (call->tc_call.queue != NULL) {
+               thread_call_unlock();
+               splx(s);
+
+               return (FALSE);
+       }
+
+       refs = --call->tc_refs;
+       if (refs < 0) {
+               panic("Refcount negative: %d\n", refs);
+       }       
+
+       thread_call_unlock();
+       splx(s);
+
+       if (refs == 0) {
+               zfree(thread_call_zone, call);
+       }
 
        return (TRUE);
 }
@@ -608,23 +795,22 @@ thread_call_free(
  */
 boolean_t
 thread_call_enter(
-    thread_call_t              call)
+               thread_call_t           call)
 {
-       boolean_t                               result = TRUE;
-       thread_call_group_t             group = &thread_call_group0;
-       spl_t                                   s;
-    
+       boolean_t               result = TRUE;
+       thread_call_group_t     group;
+       spl_t                   s;
+
+       group = thread_call_get_group(call);
+
        s = splsched();
        thread_call_lock_spin();
-    
-    if (call->queue != &group->pending_queue) {
-       result = _pending_call_enqueue(call, group);
-               
-               if (group->active_count == 0)
-                       thread_call_wake(group);
+
+       if (call->tc_call.queue != &group->pending_queue) {
+               result = _pending_call_enqueue(call, group);
        }
 
-       call->param1 = 0;
+       call->tc_call.param1 = 0;
 
        thread_call_unlock();
        splx(s);
@@ -634,24 +820,23 @@ thread_call_enter(
 
 boolean_t
 thread_call_enter1(
-    thread_call_t                      call,
-    thread_call_param_t                param1)
+               thread_call_t                   call,
+               thread_call_param_t             param1)
 {
-       boolean_t                               result = TRUE;
-       thread_call_group_t             group = &thread_call_group0;
-       spl_t                                   s;
-    
+       boolean_t               result = TRUE;
+       thread_call_group_t     group;
+       spl_t                   s;
+
+       group = thread_call_get_group(call);
+
        s = splsched();
        thread_call_lock_spin();
-    
-    if (call->queue != &group->pending_queue) {
-       result = _pending_call_enqueue(call, group);
-               
-               if (group->active_count == 0)
-                       thread_call_wake(group);
+
+       if (call->tc_call.queue != &group->pending_queue) {
+               result = _pending_call_enqueue(call, group);
        }
 
-       call->param1 = param1;
+       call->tc_call.param1 = param1;
 
        thread_call_unlock();
        splx(s);
@@ -670,12 +855,14 @@ thread_call_enter1(
  */
 boolean_t
 thread_call_enter_delayed(
-    thread_call_t              call,
-    uint64_t                   deadline)
+               thread_call_t           call,
+               uint64_t                        deadline)
 {
-       boolean_t                               result = TRUE;
-       thread_call_group_t             group = &thread_call_group0;
-       spl_t                                   s;
+       boolean_t               result = TRUE;
+       thread_call_group_t     group;
+       spl_t                   s;
+       
+       group = thread_call_get_group(call);
 
        s = splsched();
        thread_call_lock_spin();
@@ -685,7 +872,7 @@ thread_call_enter_delayed(
        if (queue_first(&group->delayed_queue) == qe(call))
                _set_delayed_call_timer(call, group);
 
-       call->param1 = 0;
+       call->tc_call.param1 = 0;
 
        thread_call_unlock();
        splx(s);
@@ -695,13 +882,15 @@ thread_call_enter_delayed(
 
 boolean_t
 thread_call_enter1_delayed(
-    thread_call_t                      call,
-    thread_call_param_t                param1,
-    uint64_t                           deadline)
+               thread_call_t                   call,
+               thread_call_param_t             param1,
+               uint64_t                        deadline)
 {
-       boolean_t                               result = TRUE;
-       thread_call_group_t             group = &thread_call_group0;
-       spl_t                                   s;
+       boolean_t               result = TRUE;
+       thread_call_group_t     group;
+       spl_t                   s;
+
+       group = thread_call_get_group(call);
 
        s = splsched();
        thread_call_lock_spin();
@@ -711,7 +900,7 @@ thread_call_enter1_delayed(
        if (queue_first(&group->delayed_queue) == qe(call))
                _set_delayed_call_timer(call, group);
 
-       call->param1 = param1;
+       call->tc_call.param1 = param1;
 
        thread_call_unlock();
        splx(s);
@@ -729,23 +918,61 @@ thread_call_enter1_delayed(
  */
 boolean_t
 thread_call_cancel(
-    thread_call_t              call)
+               thread_call_t           call)
 {
-       boolean_t                               result;
-       thread_call_group_t             group = &thread_call_group0;
-       spl_t                                   s;
-    
+       boolean_t               result;
+       thread_call_group_t     group;
+       spl_t                   s;
+
+       group = thread_call_get_group(call);
+
        s = splsched();
        thread_call_lock_spin();
 
        result = _call_dequeue(call, group);
-       
+
        thread_call_unlock();
        splx(s);
 
        return (result);
 }
 
+/*
+ * Cancel a thread call.  If it cannot be cancelled (i.e.
+ * is already in flight), waits for the most recent invocation
+ * to finish.  Note that if clients re-submit this thread call,
+ * it may still be pending or in flight when thread_call_cancel_wait
+ * returns, but all requests to execute this work item prior
+ * to the call to thread_call_cancel_wait will have finished.
+ */
+boolean_t
+thread_call_cancel_wait(
+               thread_call_t           call)
+{
+       boolean_t               result;
+       thread_call_group_t     group;
+
+       if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
+               panic("%s: Can't wait on thread call whose storage I don't own.", __FUNCTION__);
+       }
+
+       group = thread_call_get_group(call);
+
+       (void) splsched();
+       thread_call_lock_spin();
+
+       result = _call_dequeue(call, group);
+       if (result == FALSE) {
+               thread_call_wait_locked(call);
+       }
+
+       thread_call_unlock();
+       (void) spllo();
+
+       return result;
+}
+
+
 #ifndef        __LP64__
 
 /*
@@ -761,16 +988,18 @@ thread_call_is_delayed(
        thread_call_t           call,
        uint64_t                        *deadline)
 {
-       boolean_t                               result = FALSE;
-       thread_call_group_t             group = &thread_call_group0;
-       spl_t                                   s;
+       boolean_t                       result = FALSE;
+       thread_call_group_t             group;
+       spl_t                           s;
+
+       group = thread_call_get_group(call);
 
        s = splsched();
        thread_call_lock_spin();
 
-       if (call->queue == &group->delayed_queue) {
+       if (call->tc_call.queue == &group->delayed_queue) {
                if (deadline != NULL)
-                       *deadline = call->deadline;
+                       *deadline = call->tc_call.deadline;
                result = TRUE;
        }
 
@@ -791,65 +1020,137 @@ thread_call_is_delayed(
  *     create additional call threads.
  *
  *     Called with thread_call_lock held.
+ *
+ *     For high-priority group, only does wakeup/creation if there are no threads
+ *     running.
  */
 static __inline__ void
 thread_call_wake(
        thread_call_group_t             group)
 {
-       if (group->idle_count > 0 && wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_AWAKENED, -1) == KERN_SUCCESS) {
-               group->idle_count--; group->active_count++;
-       }
-       else
-       if (!thread_call_daemon_awake) {
-               thread_call_daemon_awake = TRUE;
-               wait_queue_wakeup_one(&group->daemon_wqueue, NO_EVENT, THREAD_AWAKENED, -1);
+       /* 
+        * New behavior: use threads if you've got 'em.
+        * Traditional behavior: wake only if no threads running.
+        */
+       if (group_isparallel(group) || group->active_count == 0) {
+               if (wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_AWAKENED, -1) == KERN_SUCCESS) {
+                       group->idle_count--; group->active_count++;
+
+                       if (group->idle_count == 0) {
+                               timer_call_cancel(&group->dealloc_timer);
+                               group->flags &= TCG_DEALLOC_ACTIVE;
+                       }
+               } else {
+                       if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
+                               thread_call_daemon_awake = TRUE;
+                               wait_queue_wakeup_one(&daemon_wqueue, NO_EVENT, THREAD_AWAKENED, -1);
+                       }
+               }
        }
 }
 
 /*
  *     sched_call_thread:
  *
- *     Call out invoked by the scheduler.
+ *     Call out invoked by the scheduler.  Used only for high-priority
+ *     thread call group.
  */
 static void
 sched_call_thread(
-       int                             type,
-__unused       thread_t                thread)
+               int                             type,
+               __unused        thread_t                thread)
 {
-       thread_call_group_t             group = &thread_call_group0;
+       thread_call_group_t             group;
+
+       group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH]; /* XXX */
 
        thread_call_lock_spin();
 
        switch (type) {
 
-       case SCHED_CALL_BLOCK:
-               if (--group->active_count == 0 && group->pending_count > 0)
-                       thread_call_wake(group);
-               break;
+               case SCHED_CALL_BLOCK:
+                       --group->active_count;
+                       if (group->pending_count > 0)
+                               thread_call_wake(group);
+                       break;
 
-       case SCHED_CALL_UNBLOCK:
-               group->active_count++;
-               break;
+               case SCHED_CALL_UNBLOCK:
+                       group->active_count++;
+                       break;
        }
 
        thread_call_unlock();
 }
 
+/* 
+ * Interrupts disabled, lock held; returns the same way. 
+ * Only called on thread calls whose storage we own.  Wakes up
+ * anyone who might be waiting on this work item and frees it
+ * if the client has so requested.
+ */
+static void
+thread_call_finish(thread_call_t call)
+{
+       boolean_t dowake = FALSE;
+
+       call->tc_finish_count++;
+       call->tc_refs--;
+
+       if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
+               dowake = TRUE;
+               call->tc_flags &= ~THREAD_CALL_WAIT;
+
+               /* 
+                * Dropping lock here because the sched call for the 
+                * high-pri group can take the big lock from under
+                * a thread lock.
+                */
+               thread_call_unlock();
+               thread_wakeup((event_t)call);
+               thread_call_lock_spin();
+       }
+
+       if (call->tc_refs == 0) {
+               if (dowake) {
+                       panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
+               }
+
+               enable_ints_and_unlock();
+
+               zfree(thread_call_zone, call);
+
+               (void)disable_ints_and_lock();
+       }
+
+}
+
 /*
  *     thread_call_thread:
  */
 static void
 thread_call_thread(
-       thread_call_group_t             group)
+               thread_call_group_t             group,
+               wait_result_t                   wres)
 {
-       thread_t                self = current_thread();
+       thread_t        self = current_thread();
+       boolean_t       canwait;
 
-       (void) splsched();
-       thread_call_lock_spin();
+       /*
+        * A wakeup with THREAD_INTERRUPTED indicates that 
+        * we should terminate.
+        */
+       if (wres == THREAD_INTERRUPTED) {
+               thread_terminate(self);
+
+               /* NOTREACHED */
+               panic("thread_terminate() returned?");
+       }
+
+       (void)disable_ints_and_lock();
 
-       thread_sched_call(self, sched_call_thread);
+       thread_sched_call(self, group->sched_call);
 
-    while (group->pending_count > 0) {
+       while (group->pending_count > 0) {
                thread_call_t                   call;
                thread_call_func_t              func;
                thread_call_param_t             param0, param1;
@@ -857,142 +1158,315 @@ thread_call_thread(
                call = TC(dequeue_head(&group->pending_queue));
                group->pending_count--;
 
-               func = call->func;
-               param0 = call->param0;
-               param1 = call->param1;
-       
-               call->queue = NULL;
+               func = call->tc_call.func;
+               param0 = call->tc_call.param0;
+               param1 = call->tc_call.param1;
+
+               call->tc_call.queue = NULL;
 
                _internal_call_release(call);
 
-               thread_call_unlock();
-               (void) spllo();
+               /*
+                * Can only do wakeups for thread calls whose storage
+                * we control.
+                */
+               if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
+                       canwait = TRUE;
+                       call->tc_refs++;        /* Delay free until we're done */
+               } else
+                       canwait = FALSE;
+
+               enable_ints_and_unlock();
 
                KERNEL_DEBUG_CONSTANT(
-                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
-                               func, param0, param1, 0, 0);
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
+                               VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
 
                (*func)(param0, param1);
 
                if (get_preemption_level() != 0) {
                        int pl = get_preemption_level();
                        panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
-                                 pl, func, param0, param1);
+                                       pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
                }
-               
+
                (void)thread_funnel_set(self->funnel_lock, FALSE);              /* XXX */
 
-               (void) splsched();
-               thread_call_lock_spin();
-    }
+               (void) disable_ints_and_lock();
+               
+               if (canwait) {
+                       /* Frees if so desired */
+                       thread_call_finish(call);
+               }
+       }
 
        thread_sched_call(self, NULL);
        group->active_count--;
 
-    if (group->idle_count < thread_call_thread_min) {
+       if (group_isparallel(group)) {
+               /*
+                * For new style of thread group, thread always blocks. 
+                * If we have more than the target number of threads,
+                * and this is the first to block, and it isn't active 
+                * already, set a timer for deallocating a thread if we 
+                * continue to have a surplus.
+                */
                group->idle_count++;
 
-               wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0);
-       
-               thread_call_unlock();
-               (void) spllo();
+               if (group->idle_count == 1) {
+                       group->idle_timestamp = mach_absolute_time();
+               }   
+
+               if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
+                               ((group->active_count + group->idle_count) > group->target_thread_count)) {
+                       group->flags |= TCG_DEALLOC_ACTIVE;
+                       thread_call_start_deallocate_timer(group);
+               }   
+
+               /* Wait for more work (or termination) */
+               wres = wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTIBLE, 0); 
+               if (wres != THREAD_WAITING) {
+                       panic("kcall worker unable to assert wait?");
+               }   
+
+               enable_ints_and_unlock();
 
                thread_block_parameter((thread_continue_t)thread_call_thread, group);
-               /* NOTREACHED */
-    }
+       } else {
+               if (group->idle_count < group->target_thread_count) {
+                       group->idle_count++;
 
-    thread_call_unlock();
-    (void) spllo();
-    
-    thread_terminate(self);
+                       wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */
+
+                       enable_ints_and_unlock();
+
+                       thread_block_parameter((thread_continue_t)thread_call_thread, group);
+                       /* NOTREACHED */
+               }
+       }
+
+       enable_ints_and_unlock();
+
+       thread_terminate(self);
        /* NOTREACHED */
 }
 
 /*
- *     thread_call_daemon:
+ *     thread_call_daemon: walk list of groups, allocating
+ *     threads if appropriate (as determined by 
+ *     thread_call_group_should_add_thread()).  
  */
 static void
-thread_call_daemon_continue(
-       thread_call_group_t             group)
+thread_call_daemon_continue(__unused void *arg)
 {
-       kern_return_t   result;
-       thread_t                thread;
-
-    (void) splsched();
-    thread_call_lock_spin();
-        
-       while (group->active_count == 0 && group->pending_count > 0) {
-               group->active_count++;
-
-               thread_call_unlock();
-               (void) spllo();
-       
-               result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, BASEPRI_PREEMPT, &thread);
-               if (result != KERN_SUCCESS)
-                       panic("thread_call_daemon");
+       int             i;
+       kern_return_t   kr;
+       thread_call_group_t group;
+
+       (void)disable_ints_and_lock();
+
+       /* Starting at zero happens to be high-priority first. */
+       for (i = 0; i < THREAD_CALL_GROUP_COUNT; i++) {
+               group = &thread_call_groups[i];
+               while (thread_call_group_should_add_thread(group)) {
+                       group->active_count++;
+
+                       enable_ints_and_unlock();
+
+                       kr = thread_call_thread_create(group);
+                       if (kr != KERN_SUCCESS) {
+                               /*
+                                * On failure, just pause for a moment and give up. 
+                                * We can try again later.
+                                */
+                               delay(10000); /* 10 ms */
+                               (void)disable_ints_and_lock();
+                               goto out;
+                       }
+
+                       (void)disable_ints_and_lock();
+               }
+       }
 
-               thread_deallocate(thread);
+out:
+       thread_call_daemon_awake = FALSE;
+       wait_queue_assert_wait(&daemon_wqueue, NO_EVENT, THREAD_UNINT, 0);
 
-               (void) splsched();
-               thread_call_lock_spin();
-    }
+       enable_ints_and_unlock();
 
-    thread_call_daemon_awake = FALSE;
-    wait_queue_assert_wait(&group->daemon_wqueue, NO_EVENT, THREAD_UNINT, 0);
-    
-    thread_call_unlock();
-       (void) spllo();
-    
-       thread_block_parameter((thread_continue_t)thread_call_daemon_continue, group);
+       thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
        /* NOTREACHED */
 }
 
 static void
 thread_call_daemon(
-       thread_call_group_t             group)
+               __unused void    *arg)
 {
        thread_t        self = current_thread();
 
        self->options |= TH_OPT_VMPRIV;
        vm_page_free_reserve(2);        /* XXX */
-    
-    thread_call_daemon_continue(group);
-    /* NOTREACHED */
+
+       thread_call_daemon_continue(NULL);
+       /* NOTREACHED */
+}
+
+/*
+ * Schedule timer to deallocate a worker thread if we have a surplus 
+ * of threads (in excess of the group's target) and at least one thread
+ * is idle the whole time.
+ */
+static void
+thread_call_start_deallocate_timer(
+               thread_call_group_t group)
+{
+        uint64_t deadline;
+        boolean_t onqueue;
+
+       assert(group->idle_count > 0);
+
+        group->flags |= TCG_DEALLOC_ACTIVE;
+        deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
+        onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0); 
+
+        if (onqueue) {
+                panic("Deallocate timer already active?");
+        }   
 }
 
 void
 thread_call_delayed_timer(
-       timer_call_param_t                              p0,
-       __unused timer_call_param_t             p1
+               timer_call_param_t              p0,
+               __unused timer_call_param_t     p1
 )
 {
-    thread_call_t                      call;
+       thread_call_t                   call;
        thread_call_group_t             group = p0;
-       boolean_t                               new_pending = FALSE;
        uint64_t                                timestamp;
 
        thread_call_lock_spin();
 
        timestamp = mach_absolute_time();
-    
-    call = TC(queue_first(&group->delayed_queue));
-    
-    while (!queue_end(&group->delayed_queue, qe(call))) {
-       if (call->deadline <= timestamp) {
+
+       call = TC(queue_first(&group->delayed_queue));
+
+       while (!queue_end(&group->delayed_queue, qe(call))) {
+               if (call->tc_call.deadline <= timestamp) {
                        _pending_call_enqueue(call, group);
-                       new_pending = TRUE;
                }
                else
                        break;
-           
+
                call = TC(queue_first(&group->delayed_queue));
-    }
+       }
 
        if (!queue_end(&group->delayed_queue, qe(call)))
                _set_delayed_call_timer(call, group);
 
-    if (new_pending && group->active_count == 0)
-               thread_call_wake(group);
+       thread_call_unlock();
+}
+
+/*
+ * Timer callback to tell a thread to terminate if
+ * we have an excess of threads and at least one has been
+ * idle for a long time.
+ */
+static void
+thread_call_dealloc_timer(
+               timer_call_param_t              p0,
+               __unused timer_call_param_t     p1)
+{
+       thread_call_group_t group = (thread_call_group_t)p0;
+       uint64_t now;
+       kern_return_t res;
+       boolean_t terminated = FALSE;
+       
+       thread_call_lock_spin();
+
+       now = mach_absolute_time();
+       if (group->idle_count > 0) {
+               if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
+                       terminated = TRUE;
+                       group->idle_count--;
+                       res = wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTED, -1);
+                       if (res != KERN_SUCCESS) {
+                               panic("Unable to wake up idle thread for termination?");
+                       }
+               }
+
+       }
+
+       /*
+        * If we still have an excess of threads, schedule another
+        * invocation of this function.
+        */
+       if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
+               /*
+                * If we killed someone just now, push out the
+                * next deadline.
+                */
+               if (terminated) {
+                       group->idle_timestamp = now;
+               }
 
-    thread_call_unlock();
+               thread_call_start_deallocate_timer(group);
+       } else {
+               group->flags &= ~TCG_DEALLOC_ACTIVE;
+       }
+
+       thread_call_unlock();
 }
+
+/*
+ * Wait for all requested invocations of a thread call prior to now
+ * to finish.  Can only be invoked on thread calls whose storage we manage.  
+ * Just waits for the finish count to catch up to the submit count we find
+ * at the beginning of our wait.
+ */
+static void
+thread_call_wait_locked(thread_call_t call)
+{
+       uint64_t submit_count;
+       wait_result_t res;
+
+       assert(call->tc_flags & THREAD_CALL_ALLOC);
+
+       submit_count = call->tc_submit_count;
+
+       while (call->tc_finish_count < submit_count) {
+               call->tc_flags |= THREAD_CALL_WAIT;
+
+               res = assert_wait(call, THREAD_UNINT);
+               if (res != THREAD_WAITING) {
+                       panic("Unable to assert wait?");
+               }
+
+               thread_call_unlock();
+               (void) spllo();
+
+               res = thread_block(NULL);
+               if (res != THREAD_AWAKENED) {
+                       panic("Awoken with %d?", res);
+               }
+       
+               (void) splsched();
+               thread_call_lock_spin();
+       }
+}
+
+/*
+ * Determine whether a thread call is either on a queue or
+ * currently being executed.
+ */
+boolean_t
+thread_call_isactive(thread_call_t call) 
+{
+       boolean_t active;
+
+       disable_ints_and_lock();
+       active = (call->tc_submit_count > call->tc_finish_count);
+       enable_ints_and_unlock();
+
+       return active;
+}
+
index aa38f0ddaa20f932cd52625b59a7d6136f373293..e2836e293dd1c2ec7f8a268165aacdd2ee4a34c5 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Declarations for thread-based callouts.
+
+/*!
+ @header thread_call.h
+ @discussion Facilities for executing work asynchronously.
  */
 
 #ifndef _KERN_THREAD_CALL_H_
 
 #include <sys/cdefs.h>
 
-typedef struct call_entry      *thread_call_t;
-typedef void                           *thread_call_param_t;
-typedef void                           (*thread_call_func_t)(
-                                                                       thread_call_param_t             param0,
-                                                                       thread_call_param_t             param1);
+struct thread_call;
+typedef struct thread_call *thread_call_t;
+
+typedef void *thread_call_param_t;
+typedef void (*thread_call_func_t)(
+                                       thread_call_param_t     param0,
+                                       thread_call_param_t     param1);
+/*!
+ @enum thread_call_priority_t
+ @discussion Thread call priorities should not be assumed to have any specific 
+ numerical value; they should be interpreted as importances or roles for work 
+ items, priorities for which will be reasonably managed by the subsystem.
+ @constant THREAD_CALL_PRIORITY_HIGH Importance above everything but realtime.
+ Thread calls allocated with this priority execute at extremely high priority, 
+ above everything but realtime threads.  They are generally executed  in serial.  
+ Though they may execute concurrently under some circumstances, no fan-out is implied.  
+ These work items should do very small amounts of work or risk disrupting system
+ responsiveness.
+ @constant THREAD_CALL_PRIORITY_KERNEL Importance similar to that of normal kernel
+ threads.
+ @constant THREAD_CALL_PRIORITY_USER Importance similar to that of normal user threads.
+ @constant THREAD_CALL_PRIORITY_LOW Very low importance.
+ */
+typedef enum {
+       THREAD_CALL_PRIORITY_HIGH       = 0,
+       THREAD_CALL_PRIORITY_KERNEL     = 1,
+       THREAD_CALL_PRIORITY_USER       = 2,
+       THREAD_CALL_PRIORITY_LOW        = 3
+} thread_call_priority_t;
+
 __BEGIN_DECLS
 
+/*!
+ @function thread_call_enter
+ @abstract Submit a thread call work item for immediate execution.
+ @discussion If the work item is already scheduled for delayed execution, and it has
+ not yet begun to run, that delayed invocation will be cancelled.  Note that if a
+ thread call is rescheduled from its own callback, then multiple invocations of the
+ callback may be in flight at the same time.
+ @result TRUE if the call was already pending for either delayed or immediate
+ execution, FALSE otherwise.
+ @param call The thread call to execute.
+ */
 extern boolean_t       thread_call_enter(
                                                thread_call_t           call);
-
+/*!
+ @function thread_call_enter1
+ @abstract Submit a thread call work item for immediate execution, with an extra parameter.
+ @discussion This routine is identical to thread_call_enter(), except that 
+ the second parameter to the callback is specified.
+ @result TRUE if the call was already pending for either delayed or immediate
+ execution, FALSE otherwise.
+ @param call The thread call to execute.
+ @param param1 Parameter to pass callback.
+ */
 extern boolean_t       thread_call_enter1(
-                                               thread_call_t                   call,
-                                               thread_call_param_t             param1);
-
+                                               thread_call_t           call,
+                                               thread_call_param_t     param1);
+
+/*! 
+ @function thread_call_enter_delayed
+ @abstract Submit a thread call to be executed at some point in the future.
+ @discussion If the work item is already scheduled for delayed or immediate execution, 
+ and it has not yet begun to run, that invocation will be cancelled in favor of execution
+ at the newly specified time.  Note that if a thread call is rescheduled from its own callback, 
+ then multiple invocations of the callback may be in flight at the same time.
+ @result TRUE if the call was already pending for either delayed or immediate
+ execution, FALSE otherwise.
+ @param call The thread call to execute.
+ @param deadline Time, in absolute time units, at which to execute callback.
+ */
 extern boolean_t       thread_call_enter_delayed(
                                                thread_call_t           call,
-                                               uint64_t                        deadline);
-
+                                               uint64_t                deadline);
+/*! 
+ @function thread_call_enter1_delayed
+ @abstract Submit a thread call to be executed at some point in the future, with an extra parameter.
+ @discussion This routine is identical to thread_call_enter_delayed(),
+ except that a second parameter to the callback is specified.
+ @result TRUE if the call was already pending for either delayed or immediate
+ execution, FALSE otherwise.
+ @param call The thread call to execute.
+ @param param1 Second parameter to callback.
+ @param deadline Time, in absolute time units, at which to execute callback.
+ */
 extern boolean_t       thread_call_enter1_delayed(
-                                               thread_call_t                   call,
-                                               thread_call_param_t             param1,
-                                               uint64_t                                deadline);
-
+                                               thread_call_t           call,
+                                               thread_call_param_t     param1,
+                                               uint64_t                deadline);
+
+/*!
+ @function thread_call_cancel
+ @abstract Attempt to cancel a pending invocation of a thread call.
+ @discussion Attempt to cancel a thread call which has been scheduled
+ for execution with a thread_call_enter* variant.  If the call has not 
+ yet begun executing, the pending invocation will be cancelled and TRUE
+ will be returned.  If the work item has already begun executing,
+ thread_call_cancel will return FALSE immediately; the callback may be
+ about to run, currently running, or already done executing.
+ @result TRUE if the call was successfully cancelled, FALSE otherwise.
+ */
 extern boolean_t       thread_call_cancel(
                                                thread_call_t           call);
+/*!
+ @function thread_call_cancel_wait
+ @abstract Attempt to cancel a pending invocation of a thread call.  
+ If unable to cancel, wait for current invocation to finish.
+ @discussion Attempt to cancel a thread call which has been scheduled
+ for execution with a thread_call_enter* variant.  If the call has not 
+ yet begun executing, the pending invocation will be cancelled and TRUE
+ will be returned.  If the work item has already begun executing,
+ thread_call_cancel_wait waits for the most recent invocation to finish. When
+ called on a work item which has already finished, it will return FALSE immediately.
+ Note that this routine can only be used on thread calls set up with either
+ thread_call_allocate or thread_call_allocate_with_priority, and that invocations
+ of the thread call <i>after</i> the current invocation may be in flight when 
+ thread_call_cancel_wait returns.
+ @result TRUE if the call was successfully cancelled, FALSE otherwise.
+ */
+extern boolean_t       thread_call_cancel_wait(
+                                               thread_call_t           call);
 
+ /*!
+  @function thread_call_allocate
+  @abstract Allocate a thread call to execute with default (high) priority.
+  @discussion  Allocates a thread call that will run with properties of 
+  THREAD_CALL_PRIORITY_HIGH, binding the first parameter to the callback.
+  @param func Callback to invoke when thread call is scheduled.
+  @param param0 First argument ot pass to callback.
+  @result Thread call which can be passed to thread_call_enter variants.
+  */
 extern thread_call_t   thread_call_allocate(
-                                                       thread_call_func_t              func,
-                                                       thread_call_param_t             param0);
-
-extern boolean_t               thread_call_free(
-                                                       thread_call_t           call);
+                                               thread_call_func_t      func,
+                                               thread_call_param_t     param0);
+
+ /*!
+  @function thread_call_allocate_with_priority
+  @abstract Allocate a thread call to execute with a specified priority.
+  @discussion Identical to thread_call_allocate, except that priority 
+  is specified by caller.
+  @param func Callback to invoke when thread call is scheduled.
+  @param param0 First argument to pass to callback.
+  @param pri Priority of item.
+  @result Thread call which can be passed to thread_call_enter variants.
+  */
+extern thread_call_t   thread_call_allocate_with_priority(
+                                               thread_call_func_t      func,
+                                               thread_call_param_t     param0,
+                                               thread_call_priority_t  pri);
+
+/*!
+ @function thread_call_free
+ @abstract Release a thread call.
+ @discussion Should only be used on thread calls allocated with thread_call_allocate
+ or thread_call_allocate_with_priority.  Once thread_call_free has been called,
+ no other operations may be performed on a thread call.  If the thread call is
+ currently pending, thread_call_free will return FALSE and will have no effect.
+ Calling thread_call_free from a thread call's own callback is safe; the work
+ item is not considering "pending" at that point.
+ @result TRUE if the thread call has been successfully released, else FALSE.
+ @param call The thread call to release.
+ */
+extern boolean_t       thread_call_free(
+                                               thread_call_t           call);
 
+/*!
+ @function thread_call_isactive
+ @abstract Determine whether a thread call is pending or currently executing.
+ @param call Thread call to examine.
+ @result TRUE if the thread call is either scheduled for execution (immediately
+ or at some point in the future) or is currently executing.
+ */
+boolean_t              thread_call_isactive(
+                                               thread_call_t call);
 __END_DECLS
 
 #ifdef MACH_KERNEL_PRIVATE
 
 #include <kern/call_entry.h>
 
-typedef struct call_entry      thread_call_data_t;
+struct thread_call {
+       struct call_entry               tc_call;        /* Must be first */
+       uint64_t                        tc_submit_count;
+       uint64_t                        tc_finish_count;
+       thread_call_priority_t  tc_pri;
+
+       uint32_t                        tc_flags;
+       int32_t                         tc_refs;
+}; 
+
+#define THREAD_CALL_ALLOC              0x01
+#define THREAD_CALL_WAIT               0x02
+
+typedef struct thread_call thread_call_data_t;
 
 extern void            thread_call_initialize(void);
 
@@ -100,22 +256,22 @@ __BEGIN_DECLS
 
 extern boolean_t       thread_call_is_delayed(
                                                thread_call_t           call,
-                                               uint64_t                        *deadline);
+                                               uint64_t                *deadline);
 
 extern void            thread_call_func(
                                        thread_call_func_t              func,
                                        thread_call_param_t             param,
-                                       boolean_t                               unique_call);
+                                       boolean_t                       unique_call);
 
 extern void            thread_call_func_delayed(
                                        thread_call_func_t              func,
                                        thread_call_param_t             param,
-                                       uint64_t                                deadline);
+                                       uint64_t                        deadline);
 
 extern boolean_t       thread_call_func_cancel(
-                                               thread_call_func_t              func,
-                                               thread_call_param_t             param,
-                                               boolean_t                               cancel_all);
+                                               thread_call_func_t      func,
+                                               thread_call_param_t     param,
+                                               boolean_t               cancel_all);
 
 #else  /* __LP64__ */
 
@@ -124,12 +280,12 @@ extern boolean_t  thread_call_func_cancel(
 extern void            thread_call_func_delayed(
                                        thread_call_func_t              func,
                                        thread_call_param_t             param,
-                                       uint64_t                                deadline);
+                                       uint64_t                        deadline);
 
 extern boolean_t       thread_call_func_cancel(
-                                               thread_call_func_t              func,
-                                               thread_call_param_t             param,
-                                               boolean_t                               cancel_all);
+                                               thread_call_func_t      func,
+                                               thread_call_param_t     param,
+                                               boolean_t               cancel_all);
 
 #endif /* XNU_KERNEL_PRIVATE */
 
index 7ed70a1511e6ad814c3ffec3144c0732ddc653ac..8108514f547f3d0d3c530dfb82b96eef45208b30 100644 (file)
@@ -164,12 +164,6 @@ thread_policy_set_internal(
                if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
                        thread->saved_mode = TH_MODE_REALTIME;
                }
-#if CONFIG_EMBEDDED
-               else if (thread->task_priority <= MAXPRI_THROTTLE) {
-                       thread->saved_mode = TH_MODE_REALTIME;
-                       thread->sched_flags |= TH_SFLAG_THROTTLED;              
-               }
-#endif
                else {
                        if (thread->sched_mode == TH_MODE_TIMESHARE) {
                                if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
@@ -293,55 +287,31 @@ thread_throttle(
        thread_t                thread,
        integer_t               task_priority)
 {
-       if (!(thread->sched_flags & TH_SFLAG_THROTTLED) && 
-               (task_priority <= MAXPRI_THROTTLE)) {
-
-               if (!((thread->sched_mode == TH_MODE_REALTIME) ||
-                         (thread->saved_mode == TH_MODE_REALTIME))) {
-                       return;
-               }
-
-               /* Demote to timeshare if throttling */
-               if (thread->sched_mode == TH_MODE_REALTIME)             
-               {
-                       thread->saved_mode = TH_MODE_REALTIME;
-
-                       if (thread->sched_mode == TH_MODE_TIMESHARE) {
-                               if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
-                                       sched_share_incr();
-                       }
+       if ((!(thread->sched_flags & TH_SFLAG_THROTTLED)
+                || (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_PROMOTION))
+                && (task_priority <= MAXPRI_THROTTLE)) {
+
+               /* Kill a promotion if it was in flight */
+               thread->sched_flags &= ~TH_SFLAG_PENDING_THROTTLE_PROMOTION;
+
+               if (!(thread->sched_flags & TH_SFLAG_THROTTLED)) {
+                       /*
+                        * Set the pending bit so that we can switch runqueues
+                        * (potentially) at a later time safely
+                        */
+                       thread->sched_flags |= TH_SFLAG_PENDING_THROTTLE_DEMOTION;
                }
-
-               /* TH_SFLAG_FAILSAFE and TH_SFLAG_THROTTLED are mutually exclusive,
-                * since a throttled thread is not realtime during the throttle
-                * and doesn't need the failsafe repromotion. We therefore clear
-                * the former and set the latter flags here.
-                */
-               thread->sched_flags &= ~TH_SFLAG_FAILSAFE;
-               thread->sched_flags |= TH_SFLAG_THROTTLED;
-               
-               if (SCHED(supports_timeshare_mode)())
-                       thread->sched_mode = TH_MODE_TIMESHARE;
-               else
-                       thread->sched_mode = TH_MODE_FIXED;
        }
-       else if ((thread->sched_flags & TH_SFLAG_THROTTLED) &&
-                        (task_priority > MAXPRI_THROTTLE)) {
-
-               /* Promote back to real time if unthrottling */
-               if (!(thread->saved_mode == TH_MODE_TIMESHARE)) {
+       else if (((thread->sched_flags & TH_SFLAG_THROTTLED)
+                         || (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_DEMOTION))
+                         && (task_priority > MAXPRI_THROTTLE)) {
 
-                       thread->sched_mode = thread->saved_mode;
+               /* Kill a demotion if it was in flight */
+               thread->sched_flags &= ~TH_SFLAG_PENDING_THROTTLE_DEMOTION;
 
-                       if (thread->sched_mode == TH_MODE_TIMESHARE) {
-                               if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
-                                       sched_share_decr();
-                       }
-                       
-                       thread->saved_mode = TH_MODE_NONE;
+               if (thread->sched_flags & TH_SFLAG_THROTTLED) {
+                       thread->sched_flags |= TH_SFLAG_PENDING_THROTTLE_PROMOTION;
                }
-
-               thread->sched_flags &= ~TH_SFLAG_THROTTLED;
        }       
 }
 #endif
@@ -393,6 +363,7 @@ thread_policy_reset(
                }
        }
        else {
+               thread->sched_mode = thread->saved_mode;
                thread->saved_mode = TH_MODE_NONE;
                thread->sched_flags &= ~TH_SFLAG_DEMOTED_MASK;
        }
index 7cce6afe31c340f62fb45c2fb05ba5ab4c830f21..02a088597a59f889259899d02e96c9f8f468b6e0 100644 (file)
@@ -56,7 +56,6 @@
 /* 
  */
 
-#include <stat_time.h>
 #include <machine_timer_routines.h>
 
 #include <mach/kern_return.h>
 #include <kern/sched_prim.h>
 #include <kern/timer.h>
 
+#if CONFIG_EMBEDDED
+int precise_user_kernel_time = 0;
+#else
+int precise_user_kernel_time = 1;
+#endif
+
 /*
  *     timer_init initializes a timer.
  */
@@ -74,9 +79,7 @@ void
 timer_init(
        timer_t         timer)
 {
-#if    !STAT_TIME
        timer->tstamp = 0;
-#endif /* STAT_TIME */
 #if    defined(__LP64__)
        timer->all_bits = 0;
 #else
@@ -120,8 +123,6 @@ timer_advance(
 #endif         /* defined(__LP64__) */
 }
 
-#if    !STAT_TIME
-
 void
 timer_start(
        timer_t         timer,
@@ -188,5 +189,3 @@ thread_timer_event(
 }
 
 #endif /* MACHINE_TIMER_ROUTINES */
-
-#endif /* STAT_TIME */
index abbfcb5e398e3e5441817079a027f836cbfcde88..a353c6c29b7f336fffb75706c9b615840bcf4982 100644 (file)
 #ifndef        _KERN_TIMER_H_
 #define _KERN_TIMER_H_
 
-#include <stat_time.h>
-
 #include <kern/kern_types.h>
 
+/*
+ * Some platforms have very expensive timebase routines. An optimization
+ * is to avoid switching timers on kernel exit/entry, which results in all
+ * time billed to the system timer. However, when exposed to userspace,
+ * we report as user time to indicate that work was done on behalf of
+ * userspace.
+ *
+ * Although this policy is implemented as a global variable, we snapshot it
+ * at key points in the thread structure (when the thread is locked and
+ * executing in the kernel) to avoid imbalances.
+ */
+extern int precise_user_kernel_time;
+
+/*
+ * thread must be locked, or be the current executing thread, so that
+ * it doesn't transition from user to kernel while updating the
+ * thread-local value (or in kernel debugger context). In the future,
+ * we make take into account task-level or thread-level policy.
+ */
+#define use_precise_user_kernel_time(thread) ( precise_user_kernel_time ) 
+
 /*
  *     Definitions for high resolution timers.  A check
  *     word on the high portion allows atomic updates.
  */
 
 struct timer {
-#if    !STAT_TIME
        uint64_t        tstamp;
-#endif /* STAT_TIME */
 #if    defined(__LP64__)
        uint64_t        all_bits;
 #else
@@ -87,32 +104,6 @@ typedef struct timer        timer_data_t, *timer_t;
  *     Exported kernel interface to timers
  */
 
-#if    STAT_TIME
-
-#include <kern/macro_help.h>
-
-/* Advance a timer by a 32 bit value */
-#define TIMER_BUMP(timer, ticks)                                                               \
-MACRO_BEGIN                                                                                                            \
-       uint32_t        old_low, low;                                                                   \
-                                                                                                                               \
-       old_low = (timer)->low_bits;                                                            \
-       low = old_low + (ticks);                                                                        \
-       if (low < old_low)                                                                                      \
-               timer_update((timer), (timer)->high_bits + 1, low);             \
-       else                                                                                                            \
-               (timer)->low_bits = low;                                                                \
-MACRO_END
-
-#define timer_start(timer, tstamp)
-#define timer_stop(timer, tstamp)
-#define timer_switch(timer, tstamp, new_timer)
-#define thread_timer_event(tstamp, new_timer)
-
-#else  /* STAT_TIME */
-
-#define        TIMER_BUMP(timer, ticks)
-
 /* Start a timer by setting the timestamp */
 extern void            timer_start(
                                        timer_t         timer,
@@ -134,8 +125,6 @@ extern void         thread_timer_event(
                                        uint64_t        tstamp,
                                        timer_t         new_timer);
 
-#endif /* STAT_TIME */
-
 /* Initialize a timer */
 extern void            timer_init(
                                        timer_t         timer);
index 83eb0e43ac25286868ce3c3eef04c2a7e8b79b2e..0d737dbbbee80e7a88650ff20872d7e9567579e8 100644 (file)
@@ -74,6 +74,15 @@ lck_grp_attr_t          timer_call_lck_grp_attr;
 #define MPQUEUE(x)     ((mpqueue_head_t *)(x))
 #define TIMER_CALL(x)  ((timer_call_t)(x))
 
+
+uint64_t past_deadline_timers;
+uint64_t past_deadline_deltas;
+uint64_t past_deadline_longest;
+uint64_t past_deadline_shortest = ~0ULL;
+enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000};
+
+uint64_t past_deadline_timer_adjustment;
+
 static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint32_t flags);
 boolean_t      mach_timer_coalescing_enabled = TRUE;
 
@@ -92,6 +101,7 @@ timer_call_initialize(void)
        lck_attr_setdefault(&timer_call_lck_attr);
        lck_grp_attr_setdefault(&timer_call_lck_grp_attr);
        lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr);
+       nanotime_to_absolutetime(0, PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment);
 }
 
 
@@ -332,6 +342,22 @@ timer_call_enter_internal(
                deadline += slop;
        }
 
+#if    defined(__i386__) || defined(__x86_64__)        
+       uint64_t ctime = mach_absolute_time();
+       if (__improbable(deadline < ctime)) {
+               uint64_t delta = (ctime - deadline);
+
+               past_deadline_timers++;
+               past_deadline_deltas += delta;
+               if (delta > past_deadline_longest)
+                       past_deadline_longest = deadline;
+               if (delta < past_deadline_shortest)
+                       past_deadline_shortest = delta;
+
+               deadline = ctime + past_deadline_timer_adjustment;
+               call->soft_deadline = deadline;
+       }
+#endif
        queue = timer_queue_assign(deadline);
 
        old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline);
@@ -469,10 +495,9 @@ timer_queue_expire(
                        simple_unlock(&call->lock);
                        timer_call_unlock(queue);
 
-                       KERNEL_DEBUG_CONSTANT(DECR_TIMER_CALLOUT | DBG_FUNC_START,
-                                             func,
-                                             param0,
-                                             param1, 0, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                               DECR_TIMER_CALLOUT | DBG_FUNC_START,
+                               VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
 
 #if CONFIG_DTRACE && (DEVELOPMENT || DEBUG )
                        DTRACE_TMR3(callout__start, timer_call_func_t, func, 
@@ -488,10 +513,9 @@ timer_queue_expire(
                                                                                timer_call_param_t, param1);
 #endif
 
-                       KERNEL_DEBUG_CONSTANT(DECR_TIMER_CALLOUT | DBG_FUNC_END,
-                                             func,
-                                             param0,
-                                             param1, 0, 0);
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                               DECR_TIMER_CALLOUT | DBG_FUNC_END,
+                               VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
 
                        timer_call_lock_spin(queue);
                }
index 14cd087241cd4d1a29fc0576347d8695d63501a9..6bcabf8c1cfcc12672e7e1a2da49e7f208e4b300 100644 (file)
@@ -84,7 +84,6 @@ static boolean_t wait_queue_member_locked(
 
 static void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
 
-
 #define WAIT_QUEUE_MAX thread_max
 #define WAIT_QUEUE_SET_MAX task_max * 3
 #define WAIT_QUEUE_LINK_MAX PORT_MAX / 2 + (WAIT_QUEUE_MAX * WAIT_QUEUE_SET_MAX) / 64
@@ -128,16 +127,21 @@ volatile WaitQueueLink *unused_except_for_debugging;
 
 struct wait_queue boot_wait_queue[1];
 __private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0];
-
 __private_extern__ uint32_t num_wait_queues = 1;
 
+#define        P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+
 static uint32_t
-compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) {
-       uint32_t hsize = (uint32_t)round_page_64((thread_max / 11) * sizeof(struct wait_queue));
-       uint32_t bhsize;
+compute_wait_hash_size(void)
+{
+       uint32_t hsize, queues;
        
-       if (PE_parse_boot_argn("wqsize", &bhsize, sizeof(bhsize)))
-               hsize = bhsize;
+       if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize)))
+               return (hsize);
+
+       queues = thread_max / 11;
+       hsize = P2ROUNDUP(queues * sizeof(struct wait_queue), PAGE_SIZE);
 
        return hsize;
 }
@@ -145,13 +149,37 @@ compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) {
 static void
 wait_queues_init(void)
 {
-       uint32_t        i, whsize;
+       uint32_t        i, whsize, qsz;
        kern_return_t   kret;
 
-       whsize = compute_wait_hash_size(processor_avail_count, machine_info.max_mem);
-       num_wait_queues = (whsize / ((uint32_t)sizeof(struct wait_queue))) - 1;
+       /*
+        * Determine the amount of memory we're willing to reserve for
+        * the waitqueue hash table
+        */
+       whsize = compute_wait_hash_size();
+
+       /* Determine the number of waitqueues we can fit. */
+       qsz = sizeof (struct wait_queue);
+       whsize = ROUNDDOWN(whsize, qsz);
+       num_wait_queues = whsize / qsz;
+
+       /*
+        * The hash algorithm requires that this be a power of 2, so we
+        * just mask off all the low-order bits.
+        */
+       for (i = 0; i < 31; i++) {
+               uint32_t bit = (1 << i);
+               if ((num_wait_queues & bit) == num_wait_queues)
+                       break;
+               num_wait_queues &= ~bit;
+       }
+       assert(num_wait_queues > 0);
+
+       /* Now determine how much memory we really need. */
+       whsize = P2ROUNDUP(num_wait_queues * qsz, PAGE_SIZE);
 
-       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues, whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
+       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues,
+           whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
 
        if (kret != KERN_SUCCESS || wait_queues == NULL)
                panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize);
@@ -676,6 +704,60 @@ wait_queue_unlink_locked(
        WAIT_QUEUE_SET_CHECK(wq_set);
 }
 
+/*
+ *     Routine:        wait_queue_unlink_nofree
+ *     Purpose:
+ *             Remove the linkage between a wait queue and a set,
+ *             returning the linkage structure to the caller to
+ *             free later.
+ *     Conditions:
+ *             The wait queue being must be a member set queue
+ */
+kern_return_t
+wait_queue_unlink_nofree(
+       wait_queue_t wq,
+       wait_queue_set_t wq_set,
+       wait_queue_link_t *wqlp)
+{
+       wait_queue_element_t wq_element;
+       wait_queue_link_t wql;
+       queue_t q;
+       spl_t s;
+
+       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+       s = splsched();
+       wait_queue_lock(wq);
+
+       q = &wq->wq_queue;
+       wq_element = (wait_queue_element_t) queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wq_element)) {
+               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
+
+                       wql = (wait_queue_link_t)wq_element;
+                       
+                       if (wql->wql_setqueue == wq_set) {
+
+                               wqs_lock(wq_set);
+                               wait_queue_unlink_locked(wq, wq_set, wql);
+                               wqs_unlock(wq_set);
+                               wait_queue_unlock(wq);
+                               splx(s);
+                               *wqlp = wql;
+                               return KERN_SUCCESS;
+                       }
+               }
+               wq_element = (wait_queue_element_t)
+                               queue_next((queue_t) wq_element);
+       }
+       wait_queue_unlock(wq);
+       splx(s);
+       return KERN_NOT_IN_SET;
+}      
+
 /*
  *     Routine:        wait_queue_unlink
  *     Purpose:
@@ -732,36 +814,97 @@ wait_queue_unlink(
 }      
 
 /*
- *     Routine:        wait_queue_unlink_all
+ *     Routine:        wait_queue_unlink_all_nofree_locked
  *     Purpose:
  *             Remove the linkage between a wait queue and all its sets.
- *             All the linkage structures that were allocated internally
- *             are freed.  The others are the caller's responsibility.
+ *             All the linkage structures are returned to the caller for
+ *             later freeing.
  *     Conditions:
- *             Nothing of interest locked.
+ *             Wait queue locked.
  */
 
-kern_return_t
-wait_queue_unlink_all(
-       wait_queue_t wq)
+static void
+wait_queue_unlink_all_nofree_locked(
+       wait_queue_t wq,
+       queue_t links)
 {
        wait_queue_element_t wq_element;
        wait_queue_element_t wq_next_element;
        wait_queue_set_t wq_set;
        wait_queue_link_t wql;
-       queue_head_t links_queue_head;
-       queue_t links = &links_queue_head;
        queue_t q;
+
+       q = &wq->wq_queue;
+
+       wq_element = (wait_queue_element_t) queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wq_element)) {
+
+               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
+               wq_next_element = (wait_queue_element_t)
+                            queue_next((queue_t) wq_element);
+
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
+                       wql = (wait_queue_link_t)wq_element;
+                       wq_set = wql->wql_setqueue;
+                       wqs_lock(wq_set);
+                       wait_queue_unlink_locked(wq, wq_set, wql);
+                       wqs_unlock(wq_set);
+                       enqueue(links, &wql->wql_links);
+               }
+               wq_element = wq_next_element;
+       }
+}      
+
+/*
+ *     Routine:        wait_queue_unlink_all_nofree
+ *     Purpose:
+ *             Remove the linkage between a wait queue and all its sets.
+ *             All the linkage structures are returned to the caller for
+ *             later freeing.
+ *     Conditions:
+ *             Nothing of interest locked.
+ */
+
+kern_return_t
+wait_queue_unlink_all_nofree(
+       wait_queue_t wq,
+       queue_t links)
+{
        spl_t s;
 
        if (!wait_queue_is_valid(wq)) {
                return KERN_INVALID_ARGUMENT;
        }
 
-       queue_init(links);
-
        s = splsched();
        wait_queue_lock(wq);
+       wait_queue_unlink_all_nofree_locked(wq, links);
+       wait_queue_unlock(wq);
+       splx(s);
+
+       return(KERN_SUCCESS);
+}      
+
+/*
+ *     Routine:        wait_queue_unlink_all_locked
+ *     Purpose:
+ *             Remove the linkage between a locked wait queue and all its
+ *             sets and enqueue the allocated ones onto the links queue
+ *             provided.
+ *     Conditions:
+ *             Wait queue locked.
+ */
+static void
+wait_queue_unlink_all_locked(
+       wait_queue_t wq,
+       queue_t links)
+{
+       wait_queue_element_t wq_element;
+       wait_queue_element_t wq_next_element;
+       wait_queue_set_t wq_set;
+       wait_queue_link_t wql;
+       queue_t q;
 
        q = &wq->wq_queue;
 
@@ -785,6 +928,38 @@ wait_queue_unlink_all(
                }
                wq_element = wq_next_element;
        }
+
+}
+
+
+/*
+ *     Routine:        wait_queue_unlink_all
+ *     Purpose:
+ *             Remove the linkage between a wait queue and all its sets.
+ *             All the linkage structures that were allocated internally
+ *             are freed.  The others are the caller's responsibility.
+ *     Conditions:
+ *             Nothing of interest locked.
+ */
+
+kern_return_t
+wait_queue_unlink_all(
+       wait_queue_t wq)
+{
+       wait_queue_link_t wql;
+       queue_head_t links_queue_head;
+       queue_t links = &links_queue_head;
+       spl_t s;
+
+       if (!wait_queue_is_valid(wq)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       queue_init(links);
+
+       s = splsched();
+       wait_queue_lock(wq);
+       wait_queue_unlink_all_locked(wq, links);
        wait_queue_unlock(wq);
        splx(s);
 
@@ -805,12 +980,70 @@ wait_subqueue_unlink_all(
 }
 
 
+/*
+ *     Routine:        wait_queue_set_unlink_all_nofree
+ *     Purpose:
+ *             Remove the linkage between a set wait queue and all its
+ *             member wait queues and all the sets it may be a member of.
+ *             The links structures are returned for later freeing by the
+ *             caller.
+ *     Conditions:
+ *             The wait queue must be a set
+ */
+kern_return_t
+wait_queue_set_unlink_all_nofree(
+       wait_queue_set_t wq_set,
+       queue_t         links)
+{
+       wait_queue_link_t wql;
+       wait_queue_t wq;
+       queue_t q;
+       spl_t s;
+
+       if (!wait_queue_is_set(wq_set)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+retry:
+       s = splsched();
+       wqs_lock(wq_set);
+
+       /* remove the wait queues that are members of our set */
+       q = &wq_set->wqs_setlinks;
+
+       wql = (wait_queue_link_t)queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wql)) {
+               WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
+               wq = wql->wql_queue;
+               if (wait_queue_lock_try(wq)) {
+                       wait_queue_unlink_locked(wq, wq_set, wql);
+                       wait_queue_unlock(wq);
+                       enqueue(links, &wql->wql_links);
+                       wql = (wait_queue_link_t)queue_first(q);
+               } else {
+                       wqs_unlock(wq_set);
+                       splx(s);
+                       delay(1);
+                       goto retry;
+               }
+       }
+
+       /* remove this set from sets it belongs to */
+       wait_queue_unlink_all_nofree_locked(&wq_set->wqs_wait_queue, links);
+
+       wqs_unlock(wq_set);
+       splx(s);
+
+       return(KERN_SUCCESS);
+}      
+
 /*
  *     Routine:        wait_queue_set_unlink_all
  *     Purpose:
  *             Remove the linkage between a set wait queue and all its
- *             member wait queues. The link structures are freed for those
- *             links which were dynamically allocated.
+ *             member wait queues and all the sets it may be members of.
+ *             The link structures are freed for those links which were
+ *             dynamically allocated.
  *     Conditions:
  *             The wait queue must be a set
  */
@@ -835,6 +1068,7 @@ retry:
        s = splsched();
        wqs_lock(wq_set);
 
+       /* remove the wait queues that are members of our set */
        q = &wq_set->wqs_setlinks;
 
        wql = (wait_queue_link_t)queue_first(q);
@@ -857,6 +1091,11 @@ retry:
                        goto retry;
                }
        }
+
+
+       /* remove this set from sets it belongs to */
+       wait_queue_unlink_all_locked(&wq_set->wqs_wait_queue, links);
+
        wqs_unlock(wq_set);
        splx(s);
 
index 42675a30b2bb11361132e23ab05363674ea94f05..fc91a60afbecfb60282d4de780004decdaab21e6 100644 (file)
 #include <mach/kern_return.h>          /* for kern_return_t */
 
 #include <kern/kern_types.h>           /* for wait_queue_t */
+#include <kern/queue.h>
 
 #include <sys/cdefs.h>
 
 #ifdef MACH_KERNEL_PRIVATE
 
 #include <kern/lock.h>
-#include <kern/queue.h>
 #include <mach/branch_predicates.h>
 
 #include <machine/cpu_number.h>
@@ -271,12 +271,11 @@ static inline uint32_t wq_hash(char *key)
        hash ^= (hash >> 11);
        hash += (hash << 15);
 
+       hash &= (num_wait_queues - 1);
        return hash;
 }
 
-/* TBD: It should be possible to eliminate the divide here */
-#define       wait_hash(event)                                         \
-       (wq_hash((char *)&event) % (num_wait_queues))
+#define        wait_hash(event) wq_hash((char *)&event) 
 
 #endif /* MACH_KERNEL_PRIVATE */
 
@@ -335,6 +334,19 @@ extern kern_return_t wait_queue_set_unlink_one(
                        wait_queue_set_t set_queue,
                        wait_queue_link_t link);
 
+extern kern_return_t wait_queue_unlink_nofree(
+                       wait_queue_t wait_queue,
+                       wait_queue_set_t set_queue,
+                       wait_queue_link_t *wqlp);
+
+extern kern_return_t wait_queue_unlink_all_nofree(
+                       wait_queue_t wait_queue,
+                       queue_t links);
+
+extern kern_return_t wait_queue_set_unlink_all_nofree(
+                       wait_queue_set_t set_queue,
+                       queue_t links);
+
 extern wait_queue_link_t wait_queue_link_allocate(void);
 
 #endif /* XNU_KERNEL_PRIVATE */
index 1b4d1670711ecaf8e0217acc40e9c19986623e1c..a10ec384d0fba69e8c48752412af5b181cbf8105 100644 (file)
@@ -53,7 +53,6 @@
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  */
-#include <mach_kdb.h>
 /*
  * xpr silent tracing circular buffer.
  */
@@ -125,174 +124,3 @@ xpr(
        mp_enable_preemption();
 }
 
-void 
-xprbootstrap(void)
-{
-       vm_offset_t     addr;
-       vm_size_t       size;
-       kern_return_t   kr;
-
-       simple_lock_init(&xprlock, 0);
-       if (nxprbufs == 0)
-               return; /* assume XPR support not desired */
-
-       /* leave room at the end for a saved copy of xprptr */
-       size = nxprbufs * sizeof(struct xprbuf) + sizeof xprptr;
-
-       kr = kmem_alloc_kobject(kernel_map, &addr, size);
-       if (kr != KERN_SUCCESS)
-               panic("xprbootstrap");
-
-       if (xprenable) {
-               /*
-                *      If xprenable is set (the default) then we zero
-                *      the buffer so xpr_dump doesn't encounter bad pointers.
-                *      If xprenable isn't set, then we preserve
-                *      the original contents of the buffer.  This is useful
-                *      if memory survives reboots, so xpr_dump can show
-                *      the previous buffer contents.
-                */
-
-               (void) memset((void *) addr, 0, size);
-       }
-
-       xprbase = (struct xprbuf *) addr;
-       xprlast = &xprbase[nxprbufs];
-       xprptr = xprbase;       /* setting xprptr enables tracing */
-}
-
-int            xprinitial = 0;
-
-void
-xprinit(void)
-{
-       xprflags |= xprinitial;
-}
-
-#if    MACH_KDB
-#include <ddb/db_output.h>
-
-/*
- * Prototypes for functions called from the debugger
- */
-void
-xpr_dump(
-       struct xprbuf   *base,
-       int             nbufs);
-
-void
-xpr_search(
-       int     arg_index,
-       int     value);
-
-extern jmp_buf_t *db_recover;
-
-/*
- *     Print current content of xpr buffers (KDB's sake)
- *     Use stack order to make it understandable.
- *
- *     Called as "!xpr_dump" this dumps the kernel's xpr buffer.
- *     Called with arguments, it can dump xpr buffers in user tasks,
- *     assuming they use the same format as the kernel.
- */
-static spl_t xpr_dump_spl;
-static struct xprbuf *base;
-static int nbufs;
-void
-xpr_dump(
-       struct xprbuf   *_base,
-       int             _nbufs)
-{
-       jmp_buf_t db_jmpbuf;
-       jmp_buf_t *prev;
-       struct xprbuf *last, *ptr;
-       register struct xprbuf *x;
-       int i;
-
-       base = _base;
-       nbufs = _nbufs;
-
-       if (base == 0) {
-               base = xprbase;
-               nbufs = nxprbufs;
-       }
-
-       if (nbufs == 0)
-               return;
-
-       if (base == xprbase) {
-               xpr_dump_spl = splhigh();
-               simple_lock(&xprlock);
-       }
-
-       last = base + nbufs;
-       ptr = * (struct xprbuf **) last;
-
-       prev = db_recover;
-       if (_setjmp(db_recover = &db_jmpbuf) == 0)
-           for (x = ptr, i = 0; i < nbufs; i++) {
-               if (--x < base)
-                       x = last - 1;
-
-               if (x->msg == 0)
-                       break;
-
-               db_printf("<%d:%x:%x> ", x - base, x->cpuinfo, x->timestamp);
-               db_printf(x->msg, x->arg1,x->arg2,x->arg3,x->arg4,x->arg5);
-           }
-       db_recover = prev;
-
-       if (base == xprbase) {
-               simple_unlock(&xprlock);
-               splx(xpr_dump_spl);
-       }
-}
-
-/*
- * dump xpr table with a selection criteria.
- * argument number "arg_index" must equal "value"
- */
-
-void
-xpr_search(
-       int     arg_index,
-       int     value)
-{
-       jmp_buf_t db_jmpbuf;
-       jmp_buf_t *prev;
-       register struct xprbuf *x;
-       spl_t s;
-       int n;
-
-       if (!nxprbufs)
-               return;
-
-       s = splhigh();
-       simple_lock(&xprlock);
-
-       prev = db_recover;
-       if (_setjmp(db_recover = &db_jmpbuf) == 0) {
-           n = nxprbufs;
-
-           for (x = *(struct xprbuf **)xprlast ; n--; ) {
-               if (--x < xprbase)
-                       x = xprlast - 1;
-
-               if (x->msg == 0) {
-                       break;
-               }
-
-               if (*((&x->arg1)+arg_index) != value)
-                       continue;
-
-               db_printf("<%d:%d:%x> ", x - xprbase,
-                         x->cpuinfo, x->timestamp);
-               db_printf(x->msg, x->arg1,x->arg2,x->arg3,x->arg4,x->arg5);
-           }
-       }
-       db_recover = prev;
-
-       simple_unlock(&xprlock);
-       splx(s);
-}
-#endif /* MACH_KDB */
index 9d1afa5d6e84a9e430c8c7218737481dd28c24d0..dc9ea000ca8eba965a977f601faf39c06dbbbd61 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -64,8 +64,6 @@
  */
 #include <zone_debug.h>
 #include <zone_alias_addr.h>
-#include <norma_vm.h>
-#include <mach_kdb.h>
 
 #include <mach/mach_types.h>
 #include <mach/vm_param.h>
@@ -74,6 +72,7 @@
 #include <mach/task_server.h>
 #include <mach/machine/vm_types.h>
 #include <mach_debug/zone_info.h>
+#include <mach/vm_map.h>
 
 #include <kern/kern_types.h>
 #include <kern/assert.h>
@@ -92,6 +91,8 @@
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 
+#include <pexpert/pexpert.h>
+
 #include <machine/machparam.h>
 
 #include <libkern/OSDebug.h>
 /* 
  * Zone Corruption Debugging
  *
- * We provide three methods to detect use of a zone element after it's been freed.  These
- * checks are enabled by specifying "-zc" and/or "-zp" in the boot-args:
+ * We perform three methods to detect use of a zone element after it's been freed. These
+ * checks are enabled for every N'th element (counted per-zone) by specifying
+ * "zp-factor=N" as a boot-arg. To turn this feature off, set "zp-factor=0" or "-no-zp".
+ *
+ * (1) Range-check the free-list "next" pointer for sanity.
+ * (2) Store the pointer in two different words, one at the beginning of the freed element
+ *     and one at the end, and compare them against each other when re-using the element,
+ *     to detect modifications.
+ * (3) Poison the freed memory by overwriting it with 0xdeadbeef, and check it when the
+ *     memory is being reused to make sure it is still poisoned.
+ *
+ * As a result, each element (that is large enough to hold this data inside) must be marked
+ * as either "ZP_POISONED" or "ZP_NOT_POISONED" in the first integer within the would-be
+ * poisoned segment after the first free-list pointer.
  *
- * (1) Range-check the free-list "next" ptr for sanity.
- * (2) Store the ptr in two different words, and compare them against
- *     each other when re-using the zone element, to detect modifications.
- * (3) poison the freed memory by overwriting it with 0xdeadbeef.
+ * Performance slowdown is inversely proportional to the frequency with which you check
+ * (as would be expected), with a 4-5% hit around N=1, down to ~0.3% at N=16 and just
+ * "noise" at N=32 and higher. You can expect to find a 100% reproducible
+ * bug in an average of N tries, with a standard deviation of about N, but you will probably
+ * want to set "zp-factor=1" or "-zp" if you are attempting to reproduce a known bug.
  *
- * The first two checks are fairly light weight and are enabled by specifying "-zc" 
- * in the boot-args.  If you want more aggressive checking for use-after-free bugs
- * and you don't mind the additional overhead, then turn on poisoning by adding
- * "-zp" to the boot-args in addition to "-zc".  If you specify -zp without -zc,
- * it still poisons the memory when it's freed, but doesn't check if the memory
- * has been altered later when it's reallocated.
+ *
+ * Zone corruption logging
+ *
+ * You can also track where corruptions come from by using the boot-arguments:
+ * "zlog=<zone name to log> -zc". Search for "Zone corruption logging" later in this
+ * document for more implementation and usage information.
+ */
+
+#define ZP_POISON       0xdeadbeef
+#define ZP_POISONED     0xfeedface
+#define ZP_NOT_POISONED 0xbaddecaf
+
+#if CONFIG_EMBEDDED
+       #define ZP_DEFAULT_SAMPLING_FACTOR 0
+#else /* CONFIG_EMBEDDED */
+       #define ZP_DEFAULT_SAMPLING_FACTOR 16
+#endif /* CONFIG_EMBEDDED */
+
+uint32_t       free_check_sample_factor = 0;           /* set by zp-factor=N boot arg */
+boolean_t      corruption_debug_flag    = FALSE;       /* enabled by "-zc" boot-arg */
+
+/* 
+ * Zone checking helper macro.
+ */
+#define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
+
+/*
+ * Frees the specified element, which is within the specified zone. If this
+ * element should be poisoned and its free list checker should be set, both are
+ * done here. These checks will only be enabled if the element size is at least
+ * large enough to hold two vm_offset_t's and one uint32_t (to enable both types
+ * of checks).
+ */
+static inline void
+free_to_zone(zone_t zone, void *elem) {
+       /* get the index of the first uint32_t beyond the 'next' pointer */
+       unsigned int i = sizeof(vm_offset_t) / sizeof(uint32_t);
+       
+       /* should we run checks on this piece of memory? */
+       if (free_check_sample_factor != 0 &&
+           zone->free_check_count++ % free_check_sample_factor == 0 &&
+           zone->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) {
+               zone->free_check_count = 1;
+               ((uint32_t *) elem)[i] = ZP_POISONED;
+               for (i++; i < zone->elem_size / sizeof(uint32_t); i++) {
+                       ((uint32_t *) elem)[i] = ZP_POISON;
+               }
+               ((vm_offset_t *) elem)[((zone->elem_size)/sizeof(vm_offset_t))-1] = zone->free_elements;
+       } else {
+               ((uint32_t *) elem)[i] = ZP_NOT_POISONED;
+       }
+       
+       /* maintain free list and decrement number of active objects in zone */
+       ((vm_offset_t *) elem)[0] = zone->free_elements;
+       zone->free_elements = (vm_offset_t) elem;
+       zone->count--;
+}
+
+/*
+ * Allocates an element from the specifed zone, storing its address in the
+ * return arg. This function will look for corruptions revealed through zone
+ * poisoning and free list checks.
  */
+static inline void
+alloc_from_zone(zone_t zone, void **ret) {
+       void *elem = (void *) zone->free_elements;
+       if (elem != NULL) {
+               /* get the index of the first uint32_t beyond the 'next' pointer */
+               unsigned int i = sizeof(vm_offset_t) / sizeof(uint32_t);
+               
+               /* first int in data section must be ZP_POISONED or ZP_NOT_POISONED */
+               if (((uint32_t *) elem)[i] == ZP_POISONED &&
+                   zone->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) {
+                       /* check the free list pointers */
+                       if (!is_kernel_data_addr(((vm_offset_t *) elem)[0]) ||
+                           ((vm_offset_t *) elem)[0] !=
+                           ((vm_offset_t *) elem)[(zone->elem_size/sizeof(vm_offset_t))-1]) {
+                               panic("a freed zone element has been modified in zone: %s",
+                                     zone->zone_name);
+                       }
+                       
+                       /* check for poisoning in free space */
+                       for (i++;
+                            i < zone->elem_size / sizeof(uint32_t) -
+                                sizeof(vm_offset_t) / sizeof(uint32_t);
+                            i++) {
+                               if (((uint32_t *) elem)[i] != ZP_POISON) {
+                                       panic("a freed zone element has been modified in zone: %s",
+                                             zone->zone_name);
+                               }
+                       }
+               } else if (((uint32_t *) elem)[i] != ZP_NOT_POISONED) {
+                       panic("a freed zone element has been modified in zone: %s",
+                             zone->zone_name);
+               }
+               
+               zone->count++;
+               zone->sum_count++;
+               zone->free_elements = ((vm_offset_t *) elem)[0];
+       }
+       *ret = elem;
+}
 
-boolean_t check_freed_element = FALSE;         /* enabled by -zc in boot-args */
-boolean_t zfree_clear = FALSE;                 /* enabled by -zp in boot-args */
 
 /*
  * Fake zones for things that want to report via zprint but are not actually zones.
@@ -131,26 +238,25 @@ struct fake_zone_info {
                      uint64_t *, int *, int *, int *);
 };
 
-static struct fake_zone_info fake_zones[] = {
+static const struct fake_zone_info fake_zones[] = {
        {
                .name = "kernel_stacks",
                .init = stack_fake_zone_init,
                .query = stack_fake_zone_info,
        },
-#if defined(__i386__) || defined (__x86_64__)
        {
                .name = "page_tables",
                .init = pt_fake_zone_init,
                .query = pt_fake_zone_info,
        },
-#endif /* i386 */
        {
                .name = "kalloc.large",
                .init = kalloc_fake_zone_init,
                .query = kalloc_fake_zone_info,
        },
 };
-unsigned int num_fake_zones = sizeof(fake_zones)/sizeof(fake_zones[0]);
+static const unsigned int num_fake_zones =
+       sizeof (fake_zones) / sizeof (fake_zones[0]);
 
 /*
  * Zone info options
@@ -159,61 +265,6 @@ boolean_t zinfo_per_task = FALSE;          /* enabled by -zinfop in boot-args */
 #define ZINFO_SLOTS 200                                /* for now */
 #define ZONES_MAX (ZINFO_SLOTS - num_fake_zones - 1)
 
-/* 
- * Allocation helper macros
- */
-#define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
-
-#define ADD_TO_ZONE(zone, element)                                     \
-MACRO_BEGIN                                                            \
-       if (zfree_clear)                                                \
-       {   unsigned int i;                                             \
-           for (i=0;                                                   \
-                i < zone->elem_size/sizeof(uint32_t);                  \
-                i++)                                                   \
-           ((uint32_t *)(element))[i] = 0xdeadbeef;                    \
-       }                                                               \
-       *((vm_offset_t *)(element)) = (zone)->free_elements;            \
-       if (check_freed_element) {                                      \
-               if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)))     \
-                       ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
-                               (zone)->free_elements;                  \
-       }                                                               \
-       (zone)->free_elements = (vm_offset_t) (element);                \
-       (zone)->count--;                                                \
-MACRO_END
-
-#define REMOVE_FROM_ZONE(zone, ret, type)                                      \
-MACRO_BEGIN                                                                    \
-       (ret) = (type) (zone)->free_elements;                                   \
-       if ((ret) != (type) 0) {                                                \
-               if (check_freed_element) {                                      \
-                       if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0]) ||  \
-                           ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) &&  \
-                           ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
-                           ((vm_offset_t *)(ret))[0]))                         \
-                               panic("a freed zone element has been modified");\
-                       if (zfree_clear) {                                      \
-                               unsigned int ii;                                \
-                               for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \
-                                    ii < (zone)->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
-                                        ii++)                                  \
-                                       if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \
-                                               panic("a freed zone element has been modified");\
-                       }                                                       \
-               }                                                               \
-               (zone)->count++;                                                \
-               (zone)->sum_count++;                                            \
-               (zone)->free_elements = *((vm_offset_t *)(ret));                \
-       }                                                                       \
-MACRO_END
-
-#if    ZONE_DEBUG
-#define zone_debug_enabled(z) z->active_zones.next
-#define        ROUNDUP(x,y)            ((((x)+(y)-1)/(y))*(y))
-#define ZONE_DEBUG_OFFSET      ROUNDUP(sizeof(queue_chain_t),16) 
-#endif /* ZONE_DEBUG */
-
 /*
  * Support for garbage collection of unused zone pages
  *
@@ -255,7 +306,8 @@ void                zone_page_alloc(
                                vm_size_t       size);
 
 void           zone_page_free_element(
-                               zone_page_index_t       *free_page_list,
+                               zone_page_index_t       *free_page_head,
+                               zone_page_index_t       *free_page_tail,
                                vm_offset_t     addr,
                                vm_size_t       size);
 
@@ -277,12 +329,6 @@ void               zalloc_async(
 
 void           zone_display_zprint( void );
 
-#if    ZONE_DEBUG && MACH_KDB
-int            zone_count(
-                               zone_t          z,
-                               int             tail);
-#endif /* ZONE_DEBUG && MACH_KDB */
-
 vm_map_t       zone_map = VM_MAP_NULL;
 
 zone_t         zone_zone = ZONE_NULL;  /* the zone containing other zones */
@@ -298,16 +344,6 @@ zone_t             zinfo_zone = ZONE_NULL; /* zone of per-task zone info */
 vm_offset_t    zdata;
 vm_size_t      zdata_size;
 
-#define lock_zone(zone)                                        \
-MACRO_BEGIN                                            \
-       lck_mtx_lock_spin(&(zone)->lock);                       \
-MACRO_END
-
-#define unlock_zone(zone)                              \
-MACRO_BEGIN                                            \
-       lck_mtx_unlock(&(zone)->lock);                  \
-MACRO_END
-
 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
 #define zone_sleep(zone)                               \
        (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT);
@@ -354,15 +390,14 @@ lck_grp_t       zone_lck_grp;
 lck_grp_attr_t  zone_lck_grp_attr;
 lck_mtx_ext_t   zone_lck_ext;
 
-
 #if    !ZONE_ALIAS_ADDR
 #define from_zone_map(addr, size) \
        ((vm_offset_t)(addr) >= zone_map_min_address && \
         ((vm_offset_t)(addr) + size -1) <  zone_map_max_address)
 #else
 #define from_zone_map(addr, size) \
-       ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) >= zone_map_min_address && \
-        ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) <  zone_map_max_address)
+       ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) >= zone_map_min_address && \
+        ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) + size -1) <  zone_map_max_address)
 #endif
 
 /*
@@ -423,8 +458,9 @@ static char zone_name_to_log[MAX_ZONE_NAME] = "";   /* the zone name we're logging
  * records since going much larger than this tends to make the system unresponsive and unbootable on small
  * memory configurations.  The default value is 4000 records.
  */
+
 #if    defined(__LP64__)
-#define ZRECORDS_MAX           16000           /* Max records allowed in the log */
+#define ZRECORDS_MAX           128000          /* Max records allowed in the log */
 #else
 #define ZRECORDS_MAX           8000            /* Max records allowed in the log */
 #endif
@@ -518,7 +554,7 @@ extern boolean_t zlog_ready;
 
 /* 
  * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
- * allocations made by the zone allocator.  Every z_sample_factor allocations in each zone, we capture a
+ * allocations made by the zone allocator.  Every zleak_sample_factor allocations in each zone, we capture a
  * backtrace.  Every free, we examine the table and determine if the allocation was being tracked, 
  * and stop tracking it if it was being tracked.
  *
@@ -539,7 +575,7 @@ uint32_t    zleak_state = 0;                /* State of collection, as above */
 boolean_t      panic_include_ztrace    = FALSE;        /* Enable zleak logging on panic */
 vm_size_t      zleak_global_tracking_threshold;        /* Size of zone map at which to start collecting data */
 vm_size_t      zleak_per_zone_tracking_threshold;      /* Size a zone will have before we will collect data on it */
-unsigned int   z_sample_factor = 1000;                 /* Allocations per sample attempt */
+unsigned int   zleak_sample_factor     = 1000;         /* Allocations per sample attempt */
 
 /*
  * Counters for allocation statistics.
@@ -575,11 +611,8 @@ struct zallocation {
 };
 
 /* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
-#define ZLEAK_ALLOCATION_MAP_NUM       16384
-#define ZLEAK_TRACE_MAP_NUM            8192
-
-uint32_t zleak_alloc_buckets = ZLEAK_ALLOCATION_MAP_NUM;
-uint32_t zleak_trace_buckets = ZLEAK_TRACE_MAP_NUM;
+uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM;
+uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM;
 
 vm_size_t zleak_max_zonemap_size;
 
@@ -591,7 +624,7 @@ static struct ztrace*               ztraces;
 struct ztrace*                         top_ztrace;
 
 /* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
-static lck_mtx_t                       zleak_lock;
+static lck_spin_t                      zleak_lock;
 static lck_attr_t                      zleak_lock_attr;
 static lck_grp_t                       zleak_lock_grp;
 static lck_grp_attr_t                  zleak_lock_grp_attr;
@@ -609,6 +642,15 @@ zleak_init(vm_size_t max_zonemap_size)
        zleak_global_tracking_threshold = max_zonemap_size / 2; 
        zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
 
+#if CONFIG_EMBEDDED
+       if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) {
+               zleak_enable_flag = TRUE;
+               printf("zone leak detection enabled\n");
+       } else {
+               zleak_enable_flag = FALSE;
+               printf("zone leak detection disabled\n");
+       }
+#else /* CONFIG_EMBEDDED */
        /* -zleakoff (flag to disable zone leak monitor) */
        if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
                zleak_enable_flag = FALSE;
@@ -617,12 +659,13 @@ zleak_init(vm_size_t max_zonemap_size)
                zleak_enable_flag = TRUE;
                printf("zone leak detection enabled\n");
        }
+#endif /* CONFIG_EMBEDDED */
        
        /* zfactor=XXXX (override how often to sample the zone allocator) */
-       if (PE_parse_boot_argn("zfactor", &z_sample_factor, sizeof(z_sample_factor))) {
-               printf("Zone leak factor override:%u\n", z_sample_factor);
+       if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) {
+               printf("Zone leak factor override:%u\n", zleak_sample_factor);
        }
-       
+
        /* zleak-allocs=XXXX (override number of buckets in zallocations) */
        if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
                printf("Zone leak alloc buckets override:%u\n", zleak_alloc_buckets);
@@ -645,7 +688,7 @@ zleak_init(vm_size_t max_zonemap_size)
        lck_grp_attr_setdefault(&zleak_lock_grp_attr);
        lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr);
        lck_attr_setdefault(&zleak_lock_attr);
-       lck_mtx_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr);
+       lck_spin_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr);
        
        if (zleak_enable_flag) {
                zleak_state = ZLEAK_STATE_ENABLED;
@@ -656,7 +699,7 @@ zleak_init(vm_size_t max_zonemap_size)
 
 /*
  * Support for kern.zleak.active sysctl - a simplified
- * simplified version of the zleak_state variable.
+ * version of the zleak_state variable.
  */
 int
 get_zleak_state(void)
@@ -686,14 +729,14 @@ zleak_activate(void)
        }
 
        /* Indicate that we're doing the setup */
-       lck_mtx_lock_spin(&zleak_lock);
+       lck_spin_lock(&zleak_lock);
        if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
-               lck_mtx_unlock(&zleak_lock);
+               lck_spin_unlock(&zleak_lock);
                return KERN_SUCCESS;
        }
 
        zleak_state |= ZLEAK_STATE_ACTIVATING;
-       lck_mtx_unlock(&zleak_lock);
+       lck_spin_unlock(&zleak_lock);
 
        /* Allocate and zero tables */
        retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size);
@@ -724,10 +767,10 @@ zleak_activate(void)
         * the tables and setting the active flag, because the zfree()
         * path accesses the table without a lock if we're active.
         */
-       lck_mtx_lock_spin(&zleak_lock);
+       lck_spin_lock(&zleak_lock);
        zleak_state |= ZLEAK_STATE_ACTIVE;
        zleak_state &= ~ZLEAK_STATE_ACTIVATING;
-       lck_mtx_unlock(&zleak_lock);
+       lck_spin_unlock(&zleak_lock);
        
        return 0;
 
@@ -736,10 +779,10 @@ fail:
         * If we fail to allocate memory, don't further tax
         * the system by trying again.
         */
-       lck_mtx_lock_spin(&zleak_lock);
+       lck_spin_lock(&zleak_lock);
        zleak_state |= ZLEAK_STATE_FAILED;
        zleak_state &= ~ZLEAK_STATE_ACTIVATING;
-       lck_mtx_unlock(&zleak_lock);
+       lck_spin_unlock(&zleak_lock);
 
        if (allocations_ptr != NULL) {
                kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
@@ -776,7 +819,7 @@ zleak_log(uintptr_t* bt,
                  vm_size_t allocation_size) 
 {
        /* Quit if there's someone else modifying the hash tables */
-       if (!lck_mtx_try_lock_spin(&zleak_lock)) {
+       if (!lck_spin_try_lock(&zleak_lock)) {
                z_total_conflicts++;
                return FALSE;
        }
@@ -796,7 +839,7 @@ zleak_log(uintptr_t* bt,
        if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) {
                z_alloc_collisions++;
                
-               lck_mtx_unlock(&zleak_lock);
+               lck_spin_unlock(&zleak_lock);
                return TRUE;
        }
        
@@ -812,7 +855,7 @@ zleak_log(uintptr_t* bt,
                trace->zt_collisions++;
                z_trace_collisions++;
                
-               lck_mtx_unlock(&zleak_lock);
+               lck_spin_unlock(&zleak_lock);
                return TRUE;
        } else if (trace->zt_size > 0) {
                /* Same trace, already added, so increment refcount */
@@ -858,7 +901,7 @@ zleak_log(uintptr_t* bt,
        if (top_ztrace->zt_size < trace->zt_size)
                top_ztrace = trace;
        
-       lck_mtx_unlock(&zleak_lock);
+       lck_spin_unlock(&zleak_lock);
        return TRUE;
 }
 
@@ -881,7 +924,7 @@ zleak_free(uintptr_t addr,
        
        if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
                /* if the allocation was the one, grab the lock, check again, then delete it */
-               lck_mtx_lock_spin(&zleak_lock);
+               lck_spin_lock(&zleak_lock);
                
                if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
                        struct ztrace *trace;
@@ -902,7 +945,7 @@ zleak_free(uintptr_t addr,
                        /* A NULL element means the allocation bucket is unused */
                        allocation->za_element = 0;
                }
-               lck_mtx_unlock(&zleak_lock);
+               lck_spin_unlock(&zleak_lock);
        }
 }
 
@@ -918,21 +961,23 @@ zleak_free(uintptr_t addr,
  * It's fast because it does no checking to make sure there isn't bad data.
  * Since it's only called from threads that we're going to keep executing,
  * if there's bad data we were going to die eventually.
- * This seems to work for x86 and X86_64.
- * ARMTODO: Test it on ARM, I think it will work but I can't test it.  If it works, remove the ifdef.
  * If this function is inlined, it doesn't record the frame of the function it's inside.
  * (because there's no stack frame!)
  */
+
 uint32_t
 fastbacktrace(uintptr_t* bt, uint32_t max_frames)
 {
-#if defined(__x86_64__) || defined(__i386__)
        uintptr_t* frameptr = NULL, *frameptr_next = NULL;
        uintptr_t retaddr = 0;
        uint32_t frame_index = 0, frames = 0;
        uintptr_t kstackb, kstackt;
+       thread_t cthread = current_thread();
 
-       kstackb = current_thread()->kernel_stack;
+       if (__improbable(cthread == NULL))
+               return 0;
+
+       kstackb = cthread->kernel_stack;
        kstackt = kstackb + kernel_stack_size;
        /* Load stack frame pointer (EBP on x86) into frameptr */
        frameptr = __builtin_frame_address(0);
@@ -965,9 +1010,6 @@ fastbacktrace(uintptr_t* bt, uint32_t max_frames)
                bt[frame_index++] = 0;
 
        return frames;
-#else
-       return OSBacktrace((void*)bt, max_frames);
-#endif
 }
 
 /* "Thomas Wang's 32/64 bit mix functions."  http://www.concentric.net/~Ttwang/tech/inthash.htm */
@@ -1001,8 +1043,8 @@ hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size)
        uintptr_t hash = 0;
        uintptr_t mask = max_size - 1;
 
-       while (--depth) {
-               hash += bt[depth];
+       while (depth) {
+               hash += bt[--depth];
        }
 
        hash = hash_mix(hash) & mask;
@@ -1053,6 +1095,7 @@ zinit(
                zdata_size -= sizeof(*z);
        } else
                z = (zone_t) zalloc(zone_zone);
+
        if (z == ZONE_NULL)
                return(ZONE_NULL);
 
@@ -1128,6 +1171,8 @@ use_this_allocation:
        z->noencrypt = FALSE;
        z->no_callout = FALSE;
        z->async_prio_refill = FALSE;
+       z->gzalloc_exempt = FALSE;
+       z->alignment_required = FALSE;
        z->prio_refill_watermark = 0;
        z->zone_replenish_thread = NULL;
 #if CONFIG_ZLEAKS
@@ -1165,8 +1210,7 @@ use_this_allocation:
        /*
         * Check if we should be logging this zone.  If so, remember the zone pointer.
         */
-
-        if (log_this_zone(z->zone_name, zone_name_to_log)) {
+       if (log_this_zone(z->zone_name, zone_name_to_log)) {
                zone_of_interest = z;
        }
 
@@ -1178,7 +1222,6 @@ use_this_allocation:
         * later on some other zone.  So note we may be allocating a buffer to log a zone other than the one being initialized
         * right now.
         */
-
        if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) {
                if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) {
 
@@ -1195,7 +1238,9 @@ use_this_allocation:
                        zone_of_interest = NULL;
                }
        }
-
+#if    CONFIG_GZALLOC  
+       gzalloc_zone_init(z);
+#endif
        return(z);
 }
 unsigned       zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated;
@@ -1306,10 +1351,10 @@ zcram(
 
        lock_zone(zone);
        while (size >= elem_size) {
-               ADD_TO_ZONE(zone, newmem);
+               free_to_zone(zone, (void *) newmem);
                if (from_zm)
                        zone_page_alloc(newmem, elem_size);
-               zone->count++;  /* compensate for ADD_TO_ZONE */
+               zone->count++;  /* compensate for free_to_zone */
                size -= elem_size;
                newmem += elem_size;
                zone->cur_size += elem_size;
@@ -1325,6 +1370,9 @@ zcram(
 void
 zone_steal_memory(void)
 {
+#if    CONFIG_GZALLOC
+       gzalloc_configure();
+#endif
        /* Request enough early memory to get to the pmap zone */
        zdata_size = 12 * sizeof(struct zone);
        zdata = (vm_offset_t)pmap_steal_memory(round_page(zdata_size));
@@ -1375,35 +1423,30 @@ zone_bootstrap(void)
 {
        char temp_buf[16];
 
-#if 6094439
-       /* enable zone checks by default, to try and catch offenders... */
-#if 0
-       /* 7968354: turn "-zc" back off */
-       check_freed_element = TRUE;
-       /* 7995202: turn "-zp" back off */
-       zfree_clear = TRUE;
-#endif
-       
-       /* ... but allow them to be turned off explicitely */
-       if (PE_parse_boot_argn("-no_zc", temp_buf, sizeof (temp_buf))) {
-               check_freed_element = FALSE;
-       }
-       if (PE_parse_boot_argn("-no_zp", temp_buf, sizeof (temp_buf))) {
-               zfree_clear = FALSE;
+       if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof(temp_buf))) {
+               zinfo_per_task = TRUE;
        }
-#endif
 
-       /* see if we want freed zone element checking and/or poisoning */
-       if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
-               check_freed_element = TRUE;
+       /* do we want corruption-style debugging with zlog? */
+       if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) {
+               corruption_debug_flag = TRUE;
        }
+       
+       /* Set up zone poisoning */
 
-       if (PE_parse_boot_argn("-zp", temp_buf, sizeof (temp_buf))) {
-               zfree_clear = TRUE;
+       free_check_sample_factor = ZP_DEFAULT_SAMPLING_FACTOR;
+
+       /* support for old zone poisoning boot-args */
+       if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) {
+               free_check_sample_factor = 1;
+       }
+       if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) {
+               free_check_sample_factor = 0;
        }
 
-       if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof (temp_buf))) {
-               zinfo_per_task = TRUE;
+       /* zp-factor=XXXX (override how often to poison freed zone elements) */
+       if (PE_parse_boot_argn("zp-factor", &free_check_sample_factor, sizeof(free_check_sample_factor))) {
+               printf("Zone poisoning factor override:%u\n", free_check_sample_factor);
        }
 
        /*
@@ -1498,6 +1541,9 @@ zone_init(
        if (retval != KERN_SUCCESS)
                panic("zone_init: kmem_suballoc failed");
        zone_max = zone_min + round_page(max_zonemap_size);
+#if    CONFIG_GZALLOC
+       gzalloc_init(max_zonemap_size);
+#endif
        /*
         * Setup garbage collection information:
         */
@@ -1608,19 +1654,26 @@ zalloc_canblock(
        register zone_t zone,
        boolean_t canblock)
 {
-       vm_offset_t     addr;
-       kern_return_t retval;
+       vm_offset_t     addr = 0;
+       kern_return_t   retval;
        uintptr_t       zbt[MAX_ZTRACE_DEPTH];  /* used in zone leak logging and zone leak detection */
        int             numsaved = 0;
-       int                     i;
+       int             i;
        boolean_t       zone_replenish_wakeup = FALSE;
+       boolean_t       did_gzalloc;
 
+       did_gzalloc = FALSE;
 #if CONFIG_ZLEAKS
        uint32_t        zleak_tracedepth = 0;  /* log this allocation if nonzero */
 #endif /* CONFIG_ZLEAKS */
 
        assert(zone != ZONE_NULL);
-       
+
+#if    CONFIG_GZALLOC
+       addr = gzalloc_alloc(zone, canblock);
+       did_gzalloc = (addr != 0);
+#endif
+
        lock_zone(zone);
 
        /*
@@ -1632,10 +1685,10 @@ zalloc_canblock(
        
 #if CONFIG_ZLEAKS
        /* 
-        * Zone leak detection: capture a backtrace every z_sample_factor
+        * Zone leak detection: capture a backtrace every zleak_sample_factor
         * allocations in this zone. 
         */
-       if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) {
+       if (zone->zleak_on && (zone->zleak_capture++ % zleak_sample_factor == 0)) {
                zone->zleak_capture = 1;
                
                /* Avoid backtracing twice if zone logging is on */
@@ -1646,10 +1699,12 @@ zalloc_canblock(
        }
 #endif /* CONFIG_ZLEAKS */
 
-       REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+       if (__probable(addr == 0))
+               alloc_from_zone(zone, (void **) &addr);
 
        if (zone->async_prio_refill &&
-       ((zone->cur_size - (zone->count * zone->elem_size)) < (zone->prio_refill_watermark * zone->elem_size))) {
+           ((zone->cur_size - (zone->count * zone->elem_size)) <
+           (zone->prio_refill_watermark * zone->elem_size))) {
                zone_replenish_wakeup = TRUE;
                zone_replenish_wakeups_initiated++;
        }
@@ -1698,6 +1753,11 @@ zalloc_canblock(
                                } else {
                                        unlock_zone(zone);
 
+                                       panic_include_zprint = TRUE;
+#if CONFIG_ZLEAKS
+                                       if (zleak_state & ZLEAK_STATE_ACTIVE)
+                                               panic_include_ztrace = TRUE;
+#endif /* CONFIG_ZLEAKS */
                                        panic("zalloc: zone \"%s\" empty.", zone->zone_name);
                                }
                        }
@@ -1749,7 +1809,7 @@ zalloc_canblock(
                                        retry++;
                                        
                                        if (retry == 2) {
-                                               zone_gc();
+                                               zone_gc(TRUE);
                                                printf("zalloc did gc\n");
                                                zone_display_zprint();
                                        }
@@ -1775,7 +1835,7 @@ zalloc_canblock(
                                zone->waiting = FALSE;
                                zone_wakeup(zone);
                        }
-                       REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+                       alloc_from_zone(zone, (void **) &addr);
                        if (addr == 0 &&
                                retval == KERN_RESOURCE_SHORTAGE) {
                                unlock_zone(zone);
@@ -1785,7 +1845,7 @@ zalloc_canblock(
                        }
                }
                if (addr == 0)
-                       REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+                       alloc_from_zone(zone, (void **) &addr);
        }
 
 #if CONFIG_ZLEAKS
@@ -1796,7 +1856,7 @@ zalloc_canblock(
                /* Sampling can fail if another sample is happening at the same time in a different zone. */
                if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
                        /* If it failed, roll back the counter so we sample the next allocation instead. */
-                       zone->zleak_capture = z_sample_factor;
+                       zone->zleak_capture = zleak_sample_factor;
                }
        }
 #endif /* CONFIG_ZLEAKS */                     
@@ -1815,16 +1875,16 @@ zalloc_canblock(
                 * depending on whether we're looking for the source of a zone leak or a zone corruption.  When looking
                 * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker
                 * among all the records.  So we look for an unused slot in the log and fill that in before overwriting
-                * an old entry.  When looking for a corrution however, it's better to have a chronological log of all
+                * an old entry.  When looking for a corruption however, it's better to have a chronological log of all
                 * the allocations and frees done in the zone so that the history of operations for a specific zone 
                 * element can be inspected.  So in this case, we treat the log as a circular buffer and overwrite the
                 * oldest entry whenever a new one needs to be added.
                 *
-                * The check_freed_element flag tells us what style of logging to do.  It's set if we're supposed to be
+                * The corruption_debug_flag flag tells us what style of logging to do.  It's set if we're supposed to be
                 * doing corruption style logging (indicated via -zc in the boot-args).
                 */
 
-               if (!check_freed_element && zrecords[zcurrent].z_element && zrecorded < log_records) {
+               if (!corruption_debug_flag && zrecords[zcurrent].z_element && zrecorded < log_records) {
 
                        /*
                         * If we get here, we're doing leak style logging and there's still some unused entries in
@@ -1832,8 +1892,8 @@ zalloc_canblock(
                         * starting at zcurrent and wrap-around if we reach the end of the buffer.  If the buffer
                         * is already full, we just fall through and overwrite the element indexed by zcurrent.
                         */
-       
-                      for (i = zcurrent; i < log_records; i++) {
+
+                       for (i = zcurrent; i < log_records; i++) {
                                if (zrecords[i].z_element == NULL) {
                                        zcurrent = i;
                                        goto empty_slot;
@@ -1877,11 +1937,11 @@ empty_slot:
                unlock_zone(zone);
                thread_call_enter(&zone->call_async_alloc);
                lock_zone(zone);
-               REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+               alloc_from_zone(zone, (void **) &addr);
        }
 
 #if    ZONE_DEBUG
-       if (addr && zone_debug_enabled(zone)) {
+       if (!did_gzalloc && addr && zone_debug_enabled(zone)) {
                enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
                addr += ZONE_DEBUG_OFFSET;
        }
@@ -1904,14 +1964,15 @@ empty_slot:
                thread_t thr = current_thread();
                task_t task;
                zinfo_usage_t zinfo;
+               vm_size_t sz = zone->elem_size;
 
                if (zone->caller_acct)
-                       thr->tkm_private.alloc += zone->elem_size;
+                       ledger_credit(thr->t_ledger, task_ledgers.tkm_private, sz);
                else
-                       thr->tkm_shared.alloc += zone->elem_size;
+                       ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, sz);
 
                if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
-                       OSAddAtomic64(zone->elem_size, (int64_t *)&zinfo[zone->index].alloc);
+                       OSAddAtomic64(sz, (int64_t *)&zinfo[zone->index].alloc);
        }
        return((void *)addr);
 }
@@ -1945,7 +2006,6 @@ zalloc_async(
        unlock_zone(((zone_t)p0));
 }
 
-
 /*
  *     zget returns an element from the specified zone
  *     and immediately returns nothing if there is nothing there.
@@ -1961,7 +2021,7 @@ void *
 zget(
        register zone_t zone)
 {
-       register vm_offset_t    addr;
+       vm_offset_t     addr;
        
 #if CONFIG_ZLEAKS
        uintptr_t       zbt[MAX_ZTRACE_DEPTH];          /* used for zone leak detection */
@@ -1977,13 +2037,13 @@ zget(
        /*
         * Zone leak detection: capture a backtrace
         */
-       if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) {
+       if (zone->zleak_on && (zone->zleak_capture++ % zleak_sample_factor == 0)) {
                zone->zleak_capture = 1;
                zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH);
        }
 #endif /* CONFIG_ZLEAKS */
 
-       REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
+       alloc_from_zone(zone, (void **) &addr);
 #if    ZONE_DEBUG
        if (addr && zone_debug_enabled(zone)) {
                enqueue_tail(&zone->active_zones, (queue_entry_t)addr);
@@ -1999,7 +2059,7 @@ zget(
                /* Sampling can fail if another sample is happening at the same time in a different zone. */
                if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
                        /* If it failed, roll back the counter so we sample the next allocation instead. */
-                       zone->zleak_capture = z_sample_factor;
+                       zone->zleak_capture = zleak_sample_factor;
                }
        }
        
@@ -2026,8 +2086,9 @@ zfree(
        void            *addr)
 {
        vm_offset_t     elem = (vm_offset_t) addr;
-       void            *zbt[MAX_ZTRACE_DEPTH];                 /* only used if zone logging is enabled via boot-args */
+       void            *zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */
        int             numsaved = 0;
+       boolean_t       gzfreed = FALSE;
 
        assert(zone != ZONE_NULL);
 
@@ -2047,10 +2108,14 @@ zfree(
                panic("zfree: freeing to zone_zone breaks zone_gc!");
 #endif
 
+#if    CONFIG_GZALLOC  
+       gzfreed = gzalloc_free(zone, addr);
+#endif
+
        TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr);
 
-       if (zone->collectable && !zone->allows_foreign &&
-           !from_zone_map(elem, zone->elem_size)) {
+       if (__improbable(!gzfreed && zone->collectable && !zone->allows_foreign &&
+               !from_zone_map(elem, zone->elem_size))) {
 #if MACH_ASSERT
                panic("zfree: non-allocated memory in collectable zone!");
 #endif
@@ -2069,7 +2134,7 @@ zfree(
        if (DO_LOGGING(zone)) {
                int  i;
 
-               if (check_freed_element) {
+               if (corruption_debug_flag) {
 
                        /*
                         * We're logging to catch a corruption.  Add a record of this zfree operation
@@ -2116,7 +2181,7 @@ zfree(
 
 
 #if    ZONE_DEBUG
-       if (zone_debug_enabled(zone)) {
+       if (!gzfreed && zone_debug_enabled(zone)) {
                queue_t tmp_elem;
 
                elem -= ZONE_DEBUG_OFFSET;
@@ -2145,7 +2210,10 @@ zfree(
                        if (!pmap_kernel_va(this) || this == elem)
                                panic("zfree");
        }
-       ADD_TO_ZONE(zone, elem);
+
+       if (__probable(!gzfreed))
+               free_to_zone(zone, (void *) elem);
+
 #if MACH_ASSERT
        if (zone->count < 0)
                panic("zfree: count < 0!");
@@ -2178,14 +2246,15 @@ zfree(
                thread_t thr = current_thread();
                task_t task;
                zinfo_usage_t zinfo;
+               vm_size_t sz = zone->elem_size;
 
                if (zone->caller_acct)
-                       thr->tkm_private.free += zone->elem_size;
+                       ledger_debit(thr->t_ledger, task_ledgers.tkm_private, sz);
                else
-                       thr->tkm_shared.free += zone->elem_size;
+                       ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, sz);
+
                if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
-                       OSAddAtomic64(zone->elem_size,
-                                     (int64_t *)&zinfo[zone->index].free);
+                       OSAddAtomic64(sz, (int64_t *)&zinfo[zone->index].free);
        }
 }
 
@@ -2224,11 +2293,24 @@ zone_change(
                case Z_NOCALLOUT:
                        zone->no_callout = value;
                        break;
-#if MACH_ASSERT
+               case Z_GZALLOC_EXEMPT:
+                       zone->gzalloc_exempt = value;
+#if    CONFIG_GZALLOC
+                       gzalloc_reconfigure(zone);
+#endif
+                       break;
+               case Z_ALIGNMENT_REQUIRED:
+                       zone->alignment_required = value;
+#if    ZONE_DEBUG                      
+                       zone_debug_disable(zone);
+#endif
+#if    CONFIG_GZALLOC
+                       gzalloc_reconfigure(zone);
+#endif
+                       break;
                default:
                        panic("Zone_change: Wrong Item Type!");
                        /* break; */
-#endif
        }
 }
 
@@ -2253,24 +2335,6 @@ zone_free_count(zone_t zone)
        return(free_count);
 }
 
-/*
- *     zprealloc preallocates wired memory, exanding the specified
- *      zone to the specified size
- */
-void
-zprealloc(
-       zone_t  zone,
-       vm_size_t size)
-{
-        vm_offset_t addr;
-
-       if (size != 0) {
-               if (kmem_alloc_kobject(zone_map, &addr, size) != KERN_SUCCESS)
-                 panic("zprealloc");
-               zcram(zone, addr, size);
-       }
-}
-
 /*
  *  Zone garbage collection subroutines
  */
@@ -2419,7 +2483,8 @@ zone_page_alloc(
 
 void
 zone_page_free_element(
-       zone_page_index_t       *free_page_list,
+       zone_page_index_t       *free_page_head,
+       zone_page_index_t       *free_page_tail,
        vm_offset_t     addr,
        vm_size_t       size)
 {
@@ -2444,6 +2509,7 @@ zone_page_free_element(
                        --zp->collect_count;
                if (--zp->alloc_count == 0) {
                        vm_address_t        free_page_address;
+                       vm_address_t        prev_free_page_address;
 
                        zp->alloc_count  = ZONE_PAGE_UNUSED;
                        zp->collect_count = 0;
@@ -2454,8 +2520,16 @@ zone_page_free_element(
                         * storage for a page freelist
                         */
                        free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)i);
-                       *(zone_page_index_t *)free_page_address = *free_page_list;
-                       *free_page_list = i;
+                       *(zone_page_index_t *)free_page_address = ZONE_PAGE_INDEX_INVALID;
+
+                       if (*free_page_head == ZONE_PAGE_INDEX_INVALID) {
+                               *free_page_head = i;
+                               *free_page_tail = i;
+                       } else {
+                               prev_free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)(*free_page_tail));
+                               *(zone_page_index_t *)prev_free_page_address = i;
+                               *free_page_tail = i;
+                       }
                }
        }
 }
@@ -2471,14 +2545,12 @@ struct zone_free_element {
  * Add a linked list of pages starting at base back into the zone
  * free list. Tail points to the last element on the list.
  */
-
 #define ADD_LIST_TO_ZONE(zone, base, tail)                             \
 MACRO_BEGIN                                                            \
        (tail)->next = (void *)((zone)->free_elements);                 \
-       if (check_freed_element) {                                      \
-               if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)))     \
-                       ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
-                                        (zone)->free_elements;         \
+       if ((zone)->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) {        \
+               ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] =    \
+                       (zone)->free_elements;                          \
        }                                                               \
        (zone)->free_elements = (unsigned long)(base);                  \
 MACRO_END
@@ -2486,15 +2558,13 @@ MACRO_END
 /*
  * Add an element to the chain pointed to by prev.
  */
-
-#define ADD_ELEMENT(zone, prev, elem)                                          \
+#define ADD_ELEMENT(zone, prev, elem)                                  \
 MACRO_BEGIN                                                            \
        (prev)->next = (elem);                                          \
-       if (check_freed_element) {                                      \
-               if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)))     \
-                       ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
-                                       (vm_offset_t)(elem);            \
-        }                                                              \
+       if ((zone)->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) {        \
+               ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] =    \
+                       (vm_offset_t)(elem);                            \
+       }                                                               \
 MACRO_END
 
 struct {
@@ -2513,12 +2583,14 @@ struct {
  *     begins to run out of memory.
  */
 void
-zone_gc(void)
+zone_gc(boolean_t all_zones)
 {
        unsigned int    max_zones;
        zone_t                  z;
        unsigned int    i;
        zone_page_index_t zone_free_page_head;
+       zone_page_index_t zone_free_page_tail;
+       thread_t        mythread = current_thread();
 
        lck_mtx_lock(&zone_gc_lock);
 
@@ -2527,6 +2599,14 @@ zone_gc(void)
        z = first_zone;
        simple_unlock(&all_zones_lock);
 
+
+       /*
+        * it's ok to allow eager kernel preemption while
+        * while holding a zone lock since it's taken
+        * as a spin lock (which prevents preemption)
+        */
+       thread_set_eager_preempt(mythread);
+
 #if MACH_ASSERT
        for (i = 0; i < zone_pages; i++) {
                struct zone_page_table_entry    *zp;
@@ -2536,24 +2616,26 @@ zone_gc(void)
        }
 #endif /* MACH_ASSERT */
 
-       zone_free_page_head = ZONE_PAGE_INDEX_INVALID;
-
        for (i = 0; i < max_zones; i++, z = z->next_zone) {
-               unsigned int                            n, m;
-               vm_size_t                                       elt_size, size_freed;
+               unsigned int                    n, m;
+               vm_size_t                       elt_size, size_freed;
                struct zone_free_element        *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail;
+               int                             kmem_frees = 0;
 
                assert(z != ZONE_NULL);
 
                if (!z->collectable)
                        continue;
 
+               if (all_zones == FALSE && z->elem_size < PAGE_SIZE)
+                       continue;
+
                lock_zone(z);
 
                elt_size = z->elem_size;
 
                /*
-                * Do a quick feasability check before we scan the zone: 
+                * Do a quick feasibility check before we scan the zone: 
                 * skip unless there is likelihood of getting pages back
                 * (i.e we need a whole allocation block's worth of free
                 * elements before we can garbage collect) and
@@ -2589,6 +2671,11 @@ zone_gc(void)
                prev = (void *)&scan;
                elt = scan;
                n = 0; tail = keep = NULL;
+
+               zone_free_page_head = ZONE_PAGE_INDEX_INVALID;
+               zone_free_page_tail = ZONE_PAGE_INDEX_INVALID;
+
+
                while (elt != NULL) {
                        if (from_zone_map(elt, elt_size)) {
                                zone_page_collect((vm_offset_t)elt, elt_size);
@@ -2676,6 +2763,7 @@ zone_gc(void)
                size_freed = 0;
                elt = scan;
                n = 0; tail = keep = NULL;
+
                while (elt != NULL) {
                        if (zone_page_collectable((vm_offset_t)elt, elt_size)) {
                                struct zone_free_element *next_elt = elt->next;
@@ -2688,8 +2776,7 @@ zone_gc(void)
                                 * list of free-able pages. So store elt->next because
                                 * "elt" may be scribbled over.
                                 */
-                               zone_page_free_element(&zone_free_page_head,
-                                                                               (vm_offset_t)elt, elt_size);
+                               zone_page_free_element(&zone_free_page_head, &zone_free_page_tail, (vm_offset_t)elt, elt_size);
 
                                elt = next_elt;
 
@@ -2760,29 +2847,67 @@ zone_gc(void)
                        zone_wakeup(z);
                }
                unlock_zone(z);
-       }
 
-       /*
-        * Reclaim the pages we are freeing.
-        */
 
-       while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) {
-               zone_page_index_t       zind = zone_free_page_head;
-               vm_address_t            free_page_address;
-#if    ZONE_ALIAS_ADDR
-               z = (zone_t)zone_virtual_addr((vm_map_address_t)z);
-#endif
-               /* Use the first word of the page about to be freed to find the next free page */
-               free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)zind);
-               zone_free_page_head = *(zone_page_index_t *)free_page_address;
+               if (zone_free_page_head == ZONE_PAGE_INDEX_INVALID)
+                       continue;
+
+               /*
+                * we don't want to allow eager kernel preemption while holding the
+                * various locks taken in the kmem_free path of execution
+                */
+               thread_clear_eager_preempt(mythread);
+
+               /*
+                * Reclaim the pages we are freeing.
+                */
+               while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) {
+                       zone_page_index_t       zind = zone_free_page_head;
+                       vm_address_t            free_page_address;
+                       int                     page_count;
+
+                       /*
+                        * Use the first word of the page about to be freed to find the next free page
+                        */
+                       free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)zind);
+                       zone_free_page_head = *(zone_page_index_t *)free_page_address;
+
+                       page_count = 1;
+
+                       while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) {
+                               zone_page_index_t       next_zind = zone_free_page_head;
+                               vm_address_t            next_free_page_address;
+
+                               next_free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)next_zind);
+
+                               if (next_free_page_address == (free_page_address - PAGE_SIZE)) {
+                                       free_page_address = next_free_page_address;
+                               } else if (next_free_page_address != (free_page_address + (PAGE_SIZE * page_count)))
+                                       break;
+
+                               zone_free_page_head = *(zone_page_index_t *)next_free_page_address;
+                               page_count++;
+                       }
+                       kmem_free(zone_map, free_page_address, page_count * PAGE_SIZE);
+
+                       zgc_stats.pgs_freed += page_count;
 
-               kmem_free(zone_map, free_page_address, PAGE_SIZE);
-               ++zgc_stats.pgs_freed;
+                       if (++kmem_frees == 32) {
+                               thread_yield_internal(1);
+                               kmem_frees = 0;
+                       }
+               }
+               thread_set_eager_preempt(mythread);
        }
+       thread_clear_eager_preempt(mythread);
 
        lck_mtx_unlock(&zone_gc_lock);
+
 }
 
+extern vm_offset_t kmapoff_kaddr;
+extern unsigned int kmapoff_pgcnt;
+
 /*
  *     consider_zone_gc:
  *
@@ -2792,14 +2917,29 @@ zone_gc(void)
 void
 consider_zone_gc(boolean_t force)
 {
+       boolean_t all_zones = FALSE;
+
+       if (kmapoff_kaddr != 0) {
+               /*
+                * One-time reclaim of kernel_map resources we allocated in
+                * early boot.
+                */
+               (void) vm_deallocate(kernel_map,
+                   kmapoff_kaddr, kmapoff_pgcnt * PAGE_SIZE_64);
+               kmapoff_kaddr = 0;
+       }
 
        if (zone_gc_allowed &&
            (zone_gc_allowed_by_time_throttle ||
             zone_gc_forced ||
             force)) {
+               if (zone_gc_allowed_by_time_throttle == TRUE) {
+                       zone_gc_allowed_by_time_throttle = FALSE;
+                       all_zones = TRUE;
+               }
                zone_gc_forced = FALSE;
-               zone_gc_allowed_by_time_throttle = FALSE; /* reset periodically */
-               zone_gc();
+
+               zone_gc(all_zones);
        }
 }
 
@@ -2814,6 +2954,8 @@ compute_zone_gc_throttle(void *arg __unused)
 }
 
 
+#if CONFIG_TASK_ZONE_INFO
+
 kern_return_t
 task_zone_info(
        task_t                  task,
@@ -2972,9 +3114,24 @@ task_zone_info(
        return KERN_SUCCESS;
 }
 
+#else  /* CONFIG_TASK_ZONE_INFO */
+
+kern_return_t
+task_zone_info(
+       __unused task_t         task,
+       __unused mach_zone_name_array_t *namesp,
+       __unused mach_msg_type_number_t *namesCntp,
+       __unused task_zone_info_array_t *infop,
+       __unused mach_msg_type_number_t *infoCntp)
+{
+       return KERN_FAILURE;
+}
+
+#endif /* CONFIG_TASK_ZONE_INFO */
+
 kern_return_t
 mach_zone_info(
-       host_t                  host,
+       host_priv_t             host,
        mach_zone_name_array_t  *namesp,
        mach_msg_type_number_t  *namesCntp,
        mach_zone_info_array_t  *infop,
@@ -2998,8 +3155,10 @@ mach_zone_info(
 
        if (host == HOST_NULL)
                return KERN_INVALID_HOST;
-
-       num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
+#if CONFIG_DEBUGGER_FOR_ZONE_INFO
+       if (!PE_i_can_has_debugger(NULL))
+               return KERN_INVALID_HOST;
+#endif
 
        /*
         *      We assume that zones aren't freed once allocated.
@@ -3122,7 +3281,7 @@ mach_zone_info(
  */
 kern_return_t
 host_zone_info(
-       host_t                  host,
+       host_priv_t             host,
        zone_name_array_t       *namesp,
        mach_msg_type_number_t  *namesCntp,
        zone_info_array_t       *infop,
@@ -3146,6 +3305,10 @@ host_zone_info(
 
        if (host == HOST_NULL)
                return KERN_INVALID_HOST;
+#if CONFIG_DEBUGGER_FOR_ZONE_INFO
+       if (!PE_i_can_has_debugger(NULL))
+               return KERN_INVALID_HOST;
+#endif
 
 #if defined(__LP64__)
        if (!thread_is_64bit(current_thread()))
@@ -3155,8 +3318,6 @@ host_zone_info(
                return KERN_NOT_SUPPORTED;
 #endif
 
-       num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
-
        /*
         *      We assume that zones aren't freed once allocated.
         *      We won't pick up any zones that are allocated later.
@@ -3260,6 +3421,19 @@ host_zone_info(
        return KERN_SUCCESS;
 }
 
+kern_return_t
+mach_zone_force_gc(
+       host_t host)
+{
+
+       if (host == HOST_NULL)
+               return KERN_INVALID_HOST;
+
+       consider_zone_gc(TRUE);
+
+       return (KERN_SUCCESS);
+}
+
 extern unsigned int stack_total;
 extern unsigned long long stack_allocs;
 
@@ -3297,279 +3471,10 @@ void zone_display_zprint()
        printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total);
 }
 
-
-
-#if    MACH_KDB
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <kern/kern_print.h>
-
-const char *zone_labels =
-"ENTRY       COUNT   TOT_SZ   MAX_SZ ELT_SZ ALLOC_SZ NAME";
-
-/* Forwards */
-void   db_print_zone(
-               zone_t          addr);
-
-#if    ZONE_DEBUG
-void   db_zone_check_active(
-               zone_t          zone);
-void   db_zone_print_active(
-               zone_t          zone);
-#endif /* ZONE_DEBUG */
-void   db_zone_print_free(
-               zone_t          zone);
-void
-db_print_zone(
-       zone_t          addr)
-{
-       struct zone zcopy;
-
-       zcopy = *addr;
-
-       db_printf("%8x %8x %8x %8x %6x %8x %s ",
-                 addr, zcopy.count, zcopy.cur_size,
-                 zcopy.max_size, zcopy.elem_size,
-                 zcopy.alloc_size, zcopy.zone_name);
-       if (zcopy.exhaustible)
-               db_printf("H");
-       if (zcopy.collectable)
-               db_printf("C");
-       if (zcopy.expandable)
-               db_printf("X");
-       if (zcopy.caller_acct)
-               db_printf("A");
-       db_printf("\n");
-}
-
-/*ARGSUSED*/
-void
-db_show_one_zone(db_expr_t addr, boolean_t have_addr,
-                __unused db_expr_t count, __unused char *modif)
-{
-       struct zone *z = (zone_t)((char *)0 + addr);
-
-       if (z == ZONE_NULL || !have_addr){
-               db_error("No Zone\n");
-               /*NOTREACHED*/
-       }
-
-       db_printf("%s\n", zone_labels);
-       db_print_zone(z);
-}
-
-/*ARGSUSED*/
-void
-db_show_all_zones(__unused db_expr_t addr, boolean_t have_addr, db_expr_t count,
-                 __unused char *modif)
-{
-       zone_t          z;
-       unsigned total = 0;
-
-       /*
-        * Don't risk hanging by unconditionally locking,
-        * risk of incoherent data is small (zones aren't freed).
-        */
-       have_addr = simple_lock_try(&all_zones_lock);
-       count = num_zones;
-       z = first_zone;
-       if (have_addr) {
-               simple_unlock(&all_zones_lock);
-       }
-
-       db_printf("%s\n", zone_labels);
-       for (  ; count > 0; count--) {
-               if (!z) {
-                       db_error("Mangled Zone List\n");
-                       /*NOTREACHED*/
-               }
-               db_print_zone(z);
-               total += z->cur_size,
-
-               have_addr = simple_lock_try(&all_zones_lock);
-               z = z->next_zone;
-               if (have_addr) {
-                       simple_unlock(&all_zones_lock);
-               }
-       }
-       db_printf("\nTotal              %8x", total);
-       db_printf("\n\nzone_gc() has reclaimed %d pages\n", zgc_stats.pgs_freed);
-}
-
-#if    ZONE_DEBUG
-void
-db_zone_check_active(
-       zone_t  zone)
-{
-       int count = 0;
-       queue_t tmp_elem;
-
-       if (!zone_debug_enabled(zone) || !zone_check)
-               return;
-       tmp_elem = queue_first(&zone->active_zones);
-       while (count < zone->count) {
-               count++;
-               if (tmp_elem == 0) {
-                       printf("unexpected zero element, zone=%p, count=%d\n",
-                               zone, count);
-                       assert(FALSE);
-                       break;
-               }
-               if (queue_end(tmp_elem, &zone->active_zones)) {
-                       printf("unexpected queue_end, zone=%p, count=%d\n",
-                               zone, count);
-                       assert(FALSE);
-                       break;
-               }
-               tmp_elem = queue_next(tmp_elem);
-       }
-       if (!queue_end(tmp_elem, &zone->active_zones)) {
-               printf("not at queue_end, zone=%p, tmp_elem=%p\n",
-                       zone, tmp_elem);
-               assert(FALSE);
-       }
-}
-
-void
-db_zone_print_active(
-       zone_t  zone)
-{
-       int count = 0;
-       queue_t tmp_elem;
-
-       if (!zone_debug_enabled(zone)) {
-               printf("zone %p debug not enabled\n", zone);
-               return;
-       }
-       if (!zone_check) {
-               printf("zone_check FALSE\n");
-               return;
-       }
-
-       printf("zone %p, active elements %d\n", zone, zone->count);
-       printf("active list:\n");
-       tmp_elem = queue_first(&zone->active_zones);
-       while (count < zone->count) {
-               printf("  %p", tmp_elem);
-               count++;
-               if ((count % 6) == 0)
-                       printf("\n");
-               if (tmp_elem == 0) {
-                       printf("\nunexpected zero element, count=%d\n", count);
-                       break;
-               }
-               if (queue_end(tmp_elem, &zone->active_zones)) {
-                       printf("\nunexpected queue_end, count=%d\n", count);
-                       break;
-               }
-               tmp_elem = queue_next(tmp_elem);
-       }
-       if (!queue_end(tmp_elem, &zone->active_zones))
-               printf("\nnot at queue_end, tmp_elem=%p\n", tmp_elem);
-       else
-               printf("\n");
-}
-#endif /* ZONE_DEBUG */
-
-void
-db_zone_print_free(
-       zone_t  zone)
-{
-       int count = 0;
-       int freecount;
-       vm_offset_t elem;
-
-       freecount = zone_free_count(zone);
-       printf("zone %p, free elements %d\n", zone, freecount);
-       printf("free list:\n");
-       elem = zone->free_elements;
-       while (count < freecount) {
-               printf("  0x%x", elem);
-               count++;
-               if ((count % 6) == 0)
-                       printf("\n");
-               if (elem == 0) {
-                       printf("\nunexpected zero element, count=%d\n", count);
-                       break;
-               }
-               elem = *((vm_offset_t *)elem);
-       }
-       if (elem != 0)
-               printf("\nnot at end of free list, elem=0x%x\n", elem);
-       else
-               printf("\n");
-}
-
-#endif /* MACH_KDB */
-
-
 #if    ZONE_DEBUG
 
 /* should we care about locks here ? */
 
-#if    MACH_KDB
-void *
-next_element(
-       zone_t          z,
-       void            *prev)
-{
-       char            *elt = (char *)prev;
-
-       if (!zone_debug_enabled(z))
-               return(NULL);
-       elt -= ZONE_DEBUG_OFFSET;
-       elt = (char *) queue_next((queue_t) elt);
-       if ((queue_t) elt == &z->active_zones)
-               return(NULL);
-       elt += ZONE_DEBUG_OFFSET;
-       return(elt);
-}
-
-void *
-first_element(
-       zone_t          z)
-{
-       char            *elt;
-
-       if (!zone_debug_enabled(z))
-               return(NULL);
-       if (queue_empty(&z->active_zones))
-               return(NULL);
-       elt = (char *)queue_first(&z->active_zones);
-       elt += ZONE_DEBUG_OFFSET;
-       return(elt);
-}
-
-/*
- * Second arg controls how many zone elements are printed:
- *   0 => none
- *   n, n < 0 => all
- *   n, n > 0 => last n on active list
- */
-int
-zone_count(
-       zone_t          z,
-       int             tail)
-{
-       void            *elt;
-       int             count = 0;
-       boolean_t       print = (tail != 0);
-
-       if (tail < 0)
-               tail = z->count;
-       if (z->count < tail)
-               tail = 0;
-       tail = z->count - tail;
-       for (elt = first_element(z); elt; elt = next_element(z, elt)) {
-               if (print && tail <= count)
-                       db_printf("%8x\n", elt);
-               count++;
-       }
-       assert(count == z->count);
-       return(count);
-}
-#endif /* MACH_KDB */
-
 #define zone_in_use(z)         ( z->count || z->free_elements )
 
 void
index 81322fd9f92bba554bc85f82e4e92b36597ead57..630a4fff0aaf331b5ae19bfea86dc4ada7c5f112 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #ifdef MACH_KERNEL_PRIVATE
 
 #include <zone_debug.h>
-#include <mach_kdb.h>
 #include <kern/lock.h>
 #include <kern/locks.h>
 #include <kern/queue.h>
-#include <kern/call_entry.h>
+#include <kern/thread_call.h>
+
+#if    CONFIG_GZALLOC
+typedef struct gzalloc_data {
+       uint32_t        gzfc_index;
+       vm_offset_t     *gzfc;
+} gzalloc_data_t;
+#endif
 
 /*
  *     A zone is a collection of fixed size blocks for which there
@@ -111,27 +117,33 @@ struct zone {
        /* boolean_t */ async_pending :1,       /* asynchronous allocation pending? */
 #if CONFIG_ZLEAKS
        /* boolean_t */ zleak_on :1,    /* Are we collecting allocation information? */
-#endif /* ZONE_DEBUG */
+#endif /* CONFIG_ZLEAKS */
        /* boolean_t */ caller_acct: 1, /* do we account allocation/free to the caller? */  
        /* boolean_t */ doing_gc :1,    /* garbage collect in progress? */
        /* boolean_t */ noencrypt :1,
        /* boolean_t */ no_callout:1,
-       /* boolean_t */ async_prio_refill:1;
+       /* boolean_t */ async_prio_refill:1,
+       /* boolean_t */ gzalloc_exempt:1,
+       /* boolean_t */ alignment_required:1;
        int             index;          /* index into zone_info arrays for this zone */
        struct zone *   next_zone;      /* Link for all-zones list */
-       call_entry_data_t       call_async_alloc;       /* callout for asynchronous alloc */
+       thread_call_data_t call_async_alloc;    /* callout for asynchronous alloc */
        const char      *zone_name;     /* a name for the zone */
 #if    ZONE_DEBUG
        queue_head_t    active_zones;   /* active elements */
 #endif /* ZONE_DEBUG */
 
 #if CONFIG_ZLEAKS
-       uint32_t num_allocs;    /* alloc stats for zleak benchmarks */
+       uint32_t num_allocs;            /* alloc stats for zleak benchmarks */
        uint32_t num_frees;             /* free stats for zleak benchmarks */
-       uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */
+       uint32_t zleak_capture;         /* per-zone counter for capturing every N allocations */
 #endif /* CONFIG_ZLEAKS */
+       uint32_t free_check_count;      /* counter for poisoning/checking every N frees */
        vm_size_t       prio_refill_watermark;
        thread_t        zone_replenish_thread;
+#if    CONFIG_GZALLOC
+       gzalloc_data_t  gz;
+#endif /* CONFIG_GZALLOC */
 };
 
 /*
@@ -145,7 +157,7 @@ typedef struct zinfo_usage_store_t {
 } zinfo_usage_store_t;
 typedef zinfo_usage_store_t *zinfo_usage_t;
 
-extern void            zone_gc(void);
+extern void            zone_gc(boolean_t);
 extern void            consider_zone_gc(boolean_t);
 
 /* Steal memory for zone module */
@@ -178,23 +190,15 @@ extern void               stack_fake_zone_info(
 
 #if            ZONE_DEBUG
 
-#if            MACH_KDB
-
-extern void *  next_element(
-                               zone_t          z,
-                               void            *elt);
-
-extern void *  first_element(
-                               zone_t          z);
-
-#endif /* MACH_KDB */
-
 extern void            zone_debug_enable(
                                zone_t          z);
 
 extern void            zone_debug_disable(
                                zone_t          z);
 
+#define zone_debug_enabled(z) z->active_zones.next
+#define        ROUNDUP(x,y)            ((((x)+(y)-1)/(y))*(y))
+#define ZONE_DEBUG_OFFSET      ROUNDUP(sizeof(queue_chain_t),16)
 #endif /* ZONE_DEBUG */
 
 #endif /* MACH_KERNEL_PRIVATE */
@@ -260,6 +264,8 @@ extern void         zone_prio_refill_configure(zone_t, vm_size_t);
 #define Z_NOCALLOUT    7       /* Don't asynchronously replenish the zone via
                                 * callouts
                                 */
+#define Z_ALIGNMENT_REQUIRED 8
+#define Z_GZALLOC_EXEMPT 9     /* Not tracked in guard allocation mode */
 /* Preallocate space for zone from zone map */
 extern void            zprealloc(
                                        zone_t          zone,
@@ -305,10 +311,31 @@ extern int get_zleak_state(void);
 #endif /* CONFIG_ZLEAKS */
 
 /* These functions used for leak detection both in zalloc.c and mbuf.c */
-extern uint32_t fastbacktrace(uintptr_t* bt, uint32_t max_frames);
-extern uintptr_t hash_mix(uintptr_t x);
-extern uint32_t hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size);
-extern uint32_t hashaddr(uintptr_t pt, uint32_t max_size);
+extern uint32_t fastbacktrace(uintptr_t* bt, uint32_t max_frames) __attribute__((noinline));
+extern uintptr_t hash_mix(uintptr_t);
+extern uint32_t hashbacktrace(uintptr_t *, uint32_t, uint32_t);
+extern uint32_t hashaddr(uintptr_t, uint32_t);
+
+#define lock_zone(zone)                                        \
+MACRO_BEGIN                                            \
+       lck_mtx_lock_spin(&(zone)->lock);               \
+MACRO_END
+
+#define unlock_zone(zone)                              \
+MACRO_BEGIN                                            \
+       lck_mtx_unlock(&(zone)->lock);                  \
+MACRO_END
+
+#if    CONFIG_GZALLOC
+void gzalloc_init(vm_size_t);
+void gzalloc_zone_init(zone_t);
+void gzalloc_configure(void);
+void gzalloc_reconfigure(zone_t);
+boolean_t gzalloc_enabled(void);
+
+vm_offset_t gzalloc_alloc(zone_t, boolean_t);
+boolean_t gzalloc_free(zone_t, void *);
+#endif /* CONFIG_GZALLOC */
 
 #endif /* XNU_KERNEL_PRIVATE */
 
diff --git a/osfmk/kperf/Makefile b/osfmk/kperf/Makefile
new file mode 100644 (file)
index 0000000..9ede0b0
--- /dev/null
@@ -0,0 +1,33 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+EXPORT_ONLY_FILES =    \
+                       context.h       \
+                       timetrigger.h   \
+                       pet.h           \
+                       filter.h        \
+                       kperfbsd.h      \
+                       action.h        \
+                       kperf.h
+
+
+INSTALL_MI_LIST = 
+
+# Export our headers
+EXPORT_MI_LIST = ${EXPORT_ONLY_FILES}
+
+# Don't install in non-local, though
+INSTALL_KF_MI_LIST = ""
+
+EXPORT_MI_DIR = kperf
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
+
+
diff --git a/osfmk/kperf/action.c b/osfmk/kperf/action.c
new file mode 100644 (file)
index 0000000..3f1b5e2
--- /dev/null
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Called from a trigger. Actually takes the data from the different
+ * modules and puts them in a buffer
+ */
+
+#include <mach/mach_types.h>
+#include <machine/machine_routines.h>
+// #include <libkern/libkern.h>
+#include <kern/kalloc.h>
+#include <kern/debug.h> /* panic */
+#include <kern/thread.h>
+#include <sys/errno.h>
+
+#include <chud/chud_xnu.h>
+#include <kperf/kperf.h>
+
+#include <kperf/buffer.h>
+#include <kperf/timetrigger.h>
+#include <kperf/threadinfo.h>
+#include <kperf/callstack.h>
+#include <kperf/sample.h>
+#include <kperf/filter.h>
+#include <kperf/action.h>
+#include <kperf/context.h>
+#include <kperf/ast.h>
+
+#define ACTION_MAX 32
+
+/* XXX: callback handler from chudxnu */
+/* FIXME: hook this up to something */
+//void (*kperf_thread_ast_handler)(thread_t);
+
+/* the list of different actions to take */
+struct action
+{
+       unsigned sample;
+};
+
+/* the list of actions */
+static unsigned actionc = 0;
+static struct action *actionv = NULL;
+
+
+/* Do the real work! */
+/* this can be called in any context ... right? */
+static kern_return_t
+kperf_sample_internal( struct kperf_sample *sbuf,
+              struct kperf_context *context,
+              unsigned sample_what, boolean_t pend_user )
+{
+       boolean_t enabled;
+       int did_ucallstack = 0, did_tinfo_extra = 0;
+
+       /* not much point continuing here, but what to do ? return
+        * Shutdown? cut a tracepoint and continue?
+        */
+       if( sample_what == 0 )
+               return SAMPLE_CONTINUE;
+
+       int is_kernel = (context->cur_pid == 0);
+
+       /*  an event occurred. Sample everything and dump it in a
+        *  buffer.
+        */
+
+       /* collect data from samplers */
+       if( sample_what & SAMPLER_TINFO ) {
+               kperf_threadinfo_sample( &sbuf->threadinfo, context );
+               
+               /* XXX FIXME This drops events when the thread is idle.
+                * This should be configurable. */
+               if (sbuf->threadinfo.runmode & 0x40)
+                       return SAMPLE_CONTINUE;
+       }
+
+       if( sample_what & SAMPLER_KSTACK )
+               kperf_kcallstack_sample( &sbuf->kcallstack, context );
+
+       /* sensitive ones */
+       if ( !is_kernel ) {
+               if( pend_user )
+               {
+                       if( sample_what & SAMPLER_USTACK )
+                               did_ucallstack = kperf_ucallstack_pend( context );
+
+                       if( sample_what & SAMPLER_TINFOEX )
+                               did_tinfo_extra = kperf_threadinfo_extra_pend( context );
+               }
+               else
+               {
+                       if( sample_what & SAMPLER_USTACK )
+                               kperf_ucallstack_sample( &sbuf->ucallstack, context );
+
+                       if( sample_what & SAMPLER_TINFOEX )
+                               kperf_threadinfo_extra_sample( &sbuf->tinfo_ex,
+                                                              context );
+               }
+       }
+
+       /* stash the data into the buffer
+        * interrupts off to ensure we don't get split
+        */
+       enabled = ml_set_interrupts_enabled(FALSE);
+
+       if ( pend_user )
+               BUF_DATA1( PERF_GEN_EVENT | DBG_FUNC_START, sample_what );
+
+       /* dump threadinfo */
+       if( sample_what & SAMPLER_TINFO )
+               kperf_threadinfo_log( &sbuf->threadinfo );
+
+       /* dump kcallstack */
+       if( sample_what & SAMPLER_KSTACK )
+               kperf_kcallstack_log( &sbuf->kcallstack );
+
+
+       /* dump user stuff */
+       if ( !is_kernel ) {
+               if ( pend_user )
+               {
+                       if ( did_ucallstack )
+                               BUF_INFO1( PERF_CS_UPEND, 0 );
+
+                       if ( did_tinfo_extra )
+                               BUF_INFO1( PERF_TI_XPEND, 0 );
+               }
+               else
+               {
+                       if( sample_what & SAMPLER_USTACK )
+                               kperf_ucallstack_log( &sbuf->ucallstack );
+
+                       if( sample_what & SAMPLER_TINFOEX )
+                               kperf_threadinfo_extra_log( &sbuf->tinfo_ex );
+               }
+       }
+
+       if ( pend_user )
+               BUF_DATA1( PERF_GEN_EVENT | DBG_FUNC_END, sample_what );
+
+       /* intrs back on */
+       ml_set_interrupts_enabled(enabled);
+
+       return SAMPLE_CONTINUE;
+}
+
+/* Translate actionid into sample bits and take a sample */
+kern_return_t
+kperf_sample( struct kperf_sample *sbuf,
+             struct kperf_context *context,
+              unsigned actionid, boolean_t pend_user )
+{
+       unsigned sample_what = 0;
+
+       /* check samppling is on, or panic */
+       if( kperf_sampling_status() == KPERF_SAMPLING_OFF )
+               panic("trigger fired while sampling off");
+       else if( kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN )
+               return SAMPLE_SHUTDOWN;
+
+       /* work out what to sample, if anything */
+       if( actionid >= actionc )
+               return SAMPLE_SHUTDOWN;
+
+       sample_what = actionv[actionid].sample;
+
+       return kperf_sample_internal( sbuf, context, sample_what, pend_user );
+}
+
+/* ast callback on a thread */
+void
+kperf_thread_ast_handler( thread_t thread )
+{
+       int r;
+       uint32_t t_chud;
+       unsigned sample_what = 0;
+       /* we know we're on a thread, so let's do stuff */
+       task_t task = NULL;
+
+       /* Don't sample if we are shutting down or off */
+       if( kperf_sampling_status() != KPERF_SAMPLING_ON )
+               return;
+
+       BUF_INFO1(PERF_AST_HNDLR | DBG_FUNC_START, thread);
+
+       /* FIXME: probably want a faster allocator here... :P */
+       struct kperf_sample *sbuf = kalloc( sizeof(*sbuf) );
+       if( sbuf == NULL )
+       {
+               /* FIXME: error code */
+               BUF_INFO1( PERF_AST_ERROR, 0 );
+               goto error;
+       }
+
+       /* make a context, take a sample */
+       struct kperf_context ctx;
+       ctx.cur_thread = thread;
+       ctx.cur_pid = -1;
+
+       task = chudxnu_task_for_thread(thread);
+       if(task)
+               ctx.cur_pid = chudxnu_pid_for_task(task);
+
+       /* decode the chud bits so we know what to sample */
+       t_chud = kperf_get_thread_bits(thread);
+       
+       if (t_chud & T_AST_NAME)
+               sample_what |= SAMPLER_TINFOEX;
+       
+       if (t_chud & T_AST_CALLSTACK)
+               sample_what |= SAMPLER_USTACK;
+
+       /* do the sample, just of the user stuff */
+       r = kperf_sample_internal( sbuf, &ctx, sample_what, FALSE );
+
+       /* free it again */
+       kfree( sbuf, sizeof(*sbuf) );
+
+error:
+       BUF_INFO1(PERF_AST_HNDLR | DBG_FUNC_END, r);
+
+}
+
+/* register AST bits */
+int
+kperf_ast_pend( thread_t cur_thread, uint32_t check_bits,
+               uint32_t set_bits )
+{
+       /* pend on the thread */
+       uint32_t t_chud, set_done = 0;
+       /* can only pend on the current thread */
+       if( cur_thread != chudxnu_current_thread() )
+               panic("pending to non-current thread");
+
+       /* get our current bits */
+       t_chud = kperf_get_thread_bits(cur_thread);
+
+       /* see if it's already been done or pended */
+       if( !(t_chud & check_bits ) )
+       {
+               /* set the bit on the thread */
+               t_chud |= set_bits;
+               kperf_set_thread_bits(cur_thread, t_chud);
+
+               /* set the actual AST */
+               kperf_set_thread_ast( cur_thread );
+
+               set_done = 1;
+       }
+
+       return set_done;
+
+//     BUF_INFO3( dbg_code, (uintptr_t)cur_thread, t_chud, set_done );
+}
+
+unsigned
+kperf_action_get_count(void)
+{
+       return actionc;
+}
+
+int
+kperf_action_set_samplers( unsigned actionid, uint32_t samplers )
+{
+       if( actionid >= actionc )
+               return EINVAL;
+
+       actionv[actionid].sample = samplers;
+
+       return 0;
+}
+
+int
+kperf_action_get_samplers( unsigned actionid, uint32_t *samplers_out )
+{
+       if( actionid >= actionc )
+               return EINVAL;
+
+       *samplers_out = actionv[actionid].sample;
+
+       return 0;
+}
+
+int
+kperf_action_set_count(unsigned count)
+{
+       struct action *new_actionv = NULL, *old_actionv = NULL;
+       unsigned old_count;
+
+       /* easy no-op */
+       if( count == actionc )
+               return 0;
+
+       /* TODO: allow shrinking? */
+       if( count < actionc )
+               return EINVAL;
+
+       /* cap it for good measure */
+       if( count > ACTION_MAX )
+               return EINVAL;
+
+       /* creating the action arror for the first time. create a few
+        * more things, too.
+        */
+               if( actionc == 0 )
+       {
+               int r;
+               r = kperf_init();
+
+               if( r != 0 )
+                       return r;
+       }
+
+       /* create a new array */
+       new_actionv = kalloc( count * sizeof(*new_actionv) );
+       if( new_actionv == NULL )
+               return ENOMEM;
+
+       old_actionv = actionv;
+       old_count = actionc;
+
+       if( old_actionv != NULL )
+               bcopy( actionv, new_actionv, actionc * sizeof(*actionv) );
+
+       bzero( &new_actionv[actionc], (count - old_count) * sizeof(*actionv) );
+
+       actionv = new_actionv;
+       actionc = count;
+
+       if( old_actionv != NULL )
+               kfree( old_actionv, old_count * sizeof(*actionv) );
+
+       printf( "kperf: done the alloc\n" );
+
+       return 0;
+}
diff --git a/osfmk/kperf/action.h b/osfmk/kperf/action.h
new file mode 100644 (file)
index 0000000..71e91df
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* fwd decl */
+struct kperf_sample;
+struct kperf_context;
+
+
+/* bits for defining what to do on an action */
+#define SAMPLER_TINFO    (1<<0)
+#define SAMPLER_TINFOEX  (1<<1)
+#define SAMPLER_KSTACK   (1<<2)
+#define SAMPLER_USTACK   (1<<3)
+
+/*  Take a sample into "sbuf" using current thread "cur_thread" */
+extern kern_return_t kperf_sample( struct kperf_sample *sbuf, 
+                                  struct kperf_context*, 
+                                   unsigned actionid,
+                                   boolean_t pend_user );
+
+/* return codes from taking a sample
+ * either keep trigger, or something went wrong (or we're shutting down)
+ * so turn off.
+ */
+#define SAMPLE_CONTINUE (0)
+#define SAMPLE_SHUTDOWN (1)
+
+/* Get the sample buffer to use from interrupt handler context. Only
+ * valid in interrupt contexts.
+ */
+extern struct kperf_sample* kperf_intr_sample_buffer(void);
+
+/* Interface functions  */
+extern unsigned kperf_action_get_count(void);
+extern int kperf_action_set_count(unsigned count);
+
+extern int kperf_action_set_samplers( unsigned actionid,
+                                      uint32_t samplers );
+extern int kperf_action_get_samplers( unsigned actionid,
+                                      uint32_t *samplers_out );
+
+extern void
+kperf_thread_ast_handler( thread_t thread );
diff --git a/osfmk/kperf/ast.h b/osfmk/kperf/ast.h
new file mode 100644 (file)
index 0000000..897d549
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* pend ast bits on a thread */
+extern int kperf_ast_pend( thread_t, uint32_t, uint32_t );
diff --git a/osfmk/kperf/buffer.h b/osfmk/kperf/buffer.h
new file mode 100644 (file)
index 0000000..0bb0f09
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* wrapper around kdebug */
+
+#include <sys/kdebug.h>
+
+/* KDEBUG codes */
+#define PERF_CODE(SubClass, code) KDBG_CODE(DBG_PERF, SubClass, code)
+
+/* broad sub-classes */
+#define PERF_GENERIC    (0) 
+#define PERF_THREADINFO (1)
+#define PERF_CALLSTACK  (2)
+#define PERF_TIMER      (3)
+#define PERF_PET        (4)
+#define PERF_AST        (5)  /* not confusing at all */
+
+/* sub-class codes */
+#define PERF_GEN_CODE(code) PERF_CODE(PERF_GENERIC, code)
+#define PERF_GEN_EVENT      PERF_GEN_CODE(0)
+
+#define PERF_TI_CODE(code) PERF_CODE(PERF_THREADINFO, code)
+#define PERF_TI_SAMPLE     PERF_TI_CODE(0)
+#define PERF_TI_DATA       PERF_TI_CODE(1)
+#define PERF_TI_XSAMPLE    PERF_TI_CODE(2)
+#define PERF_TI_XPEND      PERF_TI_CODE(3)
+#define PERF_TI_XDATA      PERF_TI_CODE(4)
+
+#define PERF_CS_CODE(code) PERF_CODE(PERF_CALLSTACK, code)
+#define PERF_CS_KSAMPLE    PERF_CS_CODE(0)
+#define PERF_CS_UPEND      PERF_CS_CODE(1)
+#define PERF_CS_USAMPLE    PERF_CS_CODE(2)
+#define PERF_CS_KDATA      PERF_CS_CODE(3)
+#define PERF_CS_UDATA      PERF_CS_CODE(4)
+
+#define PERF_TM_CODE(code) PERF_CODE(PERF_TIMER, code)
+#define PERF_TM_ASCHED     PERF_TM_CODE(0)
+#define PERF_TM_SCHED      PERF_TM_CODE(1)
+#define PERF_TM_HNDLR      PERF_TM_CODE(2)
+
+#define PERF_PET_CODE(code) PERF_CODE(PERF_PET, code)
+#define PERF_PET_THREAD     PERF_PET_CODE(0)
+#define PERF_PET_ERROR      PERF_PET_CODE(1)
+#define PERF_PET_RUN        PERF_PET_CODE(2)
+#define PERF_PET_PAUSE      PERF_PET_CODE(3)
+#define PERF_PET_IDLE       PERF_PET_CODE(4)
+#define PERF_PET_SAMPLE     PERF_PET_CODE(5)
+
+#define PERF_AST_CODE(code) PERF_CODE(PERF_AST, code)
+#define PERF_AST_HNDLR      PERF_TM_CODE(0)
+#define PERF_AST_ERROR      PERF_PET_CODE(1)
+
+/* error sub-codes for trace data */
+enum
+{
+       ERR_TASK,
+       ERR_THREAD,
+       ERR_PID,
+       ERR_FRAMES,
+       ERR_GETSTACK,
+       ERR_NOMEM,
+};
+
+/* for logging information / debugging -- optional */
+#define BUF_INFO( id, a0, a1, a2, a3) KERNEL_DEBUG_CONSTANT(id,a0,a1,a2,a3,0)
+
+#define BUF_INFO1( id, a0 )         BUF_INFO(id, a0,  0,  0,  0 )
+#define BUF_INFO2( id, a0, a1 )     BUF_INFO(id, a0, a1,  0,  0 )
+#define BUF_INFO3( id, a0, a1, a2 ) BUF_INFO(id, a0, a1, a2,  0 )
+
+/* for logging actual data -- never compiled out */
+#define BUF_DATA( id, a0, a1, a2, a3) KERNEL_DEBUG_CONSTANT(id,a0,a1,a2,a3,0)
+
+/* code neatness */
+#define BUF_DATA1( id, a0 )         BUF_DATA(id, a0, 0, 0, 0 )
+#define BUF_DATA2( id, a0, a1 )     BUF_DATA(id, a0, a1, 0, 0 )
+#define BUF_DATA3( id, a0, a1, a3 ) BUF_DATA(id, a0, a1, a2, a3 )
diff --git a/osfmk/kperf/callstack.c b/osfmk/kperf/callstack.c
new file mode 100644 (file)
index 0000000..d0c1e39
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* Collect kernel callstacks */
+
+#include <mach/mach_types.h>
+#include <machine/machine_routines.h>  /* XXX: remove me */
+#include <kern/thread.h>
+
+#include <chud/chud_xnu.h>
+
+#include <kperf/buffer.h>
+#include <kperf/context.h>
+#include <kperf/callstack.h>
+#include <kperf/ast.h>
+
+static void
+callstack_sample( struct callstack *cs, 
+                  struct kperf_context *context,
+                  uint32_t is_user )
+{
+       kern_return_t kr;
+       mach_msg_type_number_t nframes; /* WTF with the type? */
+       uint32_t code;
+
+       if( is_user )
+               code = PERF_CS_USAMPLE;
+       else
+               code = PERF_CS_KSAMPLE;
+
+       BUF_INFO1( code, (uintptr_t)context->cur_thread );
+
+       /* fill out known flags */
+       cs->flags = 0;
+       if( !is_user )
+       {
+               cs->flags |= CALLSTACK_KERNEL;
+#ifdef __LP64__
+               cs->flags |= CALLSTACK_64BIT;
+#endif
+       }
+       else
+       {
+               /* FIXME: detect 32 vs 64-bit? */
+       }
+
+       /* collect the callstack */
+       nframes = MAX_CALLSTACK_FRAMES;
+       kr = chudxnu_thread_get_callstack64( context->cur_thread, 
+                                            cs->frames, 
+                                            &nframes,
+                                            is_user );
+
+       /* check for overflow */
+       if( kr == KERN_SUCCESS )
+       {
+               cs->flags |= CALLSTACK_VALID;
+               cs->nframes = nframes;
+       }
+       else if( kr == KERN_RESOURCE_SHORTAGE )
+       {
+               /* FIXME: more here */
+               cs->flags |= CALLSTACK_TRUNCATED;
+               cs->flags |= CALLSTACK_VALID;
+               cs->nframes = nframes;
+       }
+       else
+       {
+               BUF_INFO2(PERF_PET_ERROR, ERR_GETSTACK, kr);
+               cs->nframes = 0;
+       }
+
+       if( cs->nframes >= MAX_CALLSTACK_FRAMES )
+       {
+               /* necessary? */
+               BUF_INFO1(PERF_PET_ERROR, ERR_FRAMES);
+               cs->nframes = 0;
+       }
+
+}
+
+void
+kperf_kcallstack_sample( struct callstack *cs, struct kperf_context *context )
+{
+       callstack_sample( cs, context, 0 );
+}
+
+void
+kperf_ucallstack_sample( struct callstack *cs, struct kperf_context *context )
+{
+       callstack_sample( cs, context, 1 );
+}
+
+static void
+callstack_log( struct callstack *cs, uint32_t code )
+{
+       unsigned int i, j, n, of = 4;
+
+       /* Header on the stack */
+       BUF_DATA2( code, cs->flags, cs->nframes );
+
+       /* look for how many batches of 4 */
+       n  = cs->nframes / 4;
+       of = cs->nframes % 4;
+       if( of != 0 )
+               n++;
+
+       /* print all the stack data, and zero the overflow */
+       for( i = 0; i < n; i++ )
+       {
+#define SCRUB_FRAME(x) (((x)<cs->nframes)?cs->frames[x]:0)
+               j = i * 4;
+               BUF_DATA ( code, 
+                          SCRUB_FRAME(j+0),
+                          SCRUB_FRAME(j+1),
+                          SCRUB_FRAME(j+2),
+                          SCRUB_FRAME(j+3) );
+#undef SCRUB_FRAME
+       }
+}
+
+void
+kperf_kcallstack_log( struct callstack *cs )
+{
+       callstack_log( cs, PERF_CS_KDATA );
+}
+
+void
+kperf_ucallstack_log( struct callstack *cs )
+{
+       callstack_log( cs, PERF_CS_UDATA );
+}
+
+int
+kperf_ucallstack_pend( struct kperf_context * context )
+{
+       return kperf_ast_pend( context->cur_thread, T_AST_CALLSTACK,
+                              T_AST_CALLSTACK );
+}
+
+//     kr = chudxnu_thread_get_callstack(context->generic->threadID, 
+//              (uint32_t*)frames, &frameCount, !collectingSupervisorStack);
diff --git a/osfmk/kperf/callstack.h b/osfmk/kperf/callstack.h
new file mode 100644 (file)
index 0000000..3bfd964
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __AP_CALLSTACK_H__
+#define __AP_CALLSTACK_H__
+
+#define MAX_CALLSTACK_FRAMES (128)
+
+#define CALLSTACK_VALID     (1<<0)
+#define CALLSTACK_DEFERRED  (1<<1)
+#define CALLSTACK_64BIT     (1<<2)
+#define CALLSTACK_KERNEL    (1<<3)
+#define CALLSTACK_TRUNCATED (1<<4)
+
+struct callstack
+{
+       uint32_t flags;
+       uint32_t nframes;
+       uint64_t frames[MAX_CALLSTACK_FRAMES];
+};
+
+struct kperf_context;
+
+extern void kperf_kcallstack_sample( struct callstack *cs, struct kperf_context * );
+extern void kperf_kcallstack_log( struct callstack *cs );
+
+extern void kperf_ucallstack_sample( struct callstack *cs, struct kperf_context * );
+extern int kperf_ucallstack_pend( struct kperf_context * );
+extern void kperf_ucallstack_log( struct callstack *cs );
+
+
+#endif /* __AP_CALLSTACK_H__ */
diff --git a/osfmk/kperf/context.h b/osfmk/kperf/context.h
new file mode 100644 (file)
index 0000000..e06b9f9
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* context of what we're looking at */
+struct kperf_context
+{
+       /* who was running during the event */
+       int cur_pid;
+       thread_t cur_thread;
+
+       /* who caused the event */
+       unsigned trigger_type;
+       unsigned trigger_id;
+};
diff --git a/osfmk/kperf/filter.c b/osfmk/kperf/filter.c
new file mode 100644 (file)
index 0000000..1485d74
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*  Toy filtering. Allow system-wide or filtering on 4 PIDs */
+
+#include <mach/mach_types.h>
+#include <sys/types.h> /* NULL */
+// #include <libkern/libkern.h>
+
+#include <kperf/context.h>
+#include <kperf/filter.h>
+
+// Filter params... dodge for now
+#define NPIDS (4)
+int pid_list[NPIDS];
+
+// function to determine whether we should take a sample
+int
+kperf_filter_should_sample(struct kperf_context *context)
+{
+       int i, restricted = 0;
+
+       /* see if the pids are restricted */
+       for( i = 0; i < NPIDS; i++ )
+       {
+               if( context->cur_pid == pid_list[i] )
+                       return 1;
+
+               if( pid_list[i] != -1 )
+                       restricted = 1;
+       }
+
+       /* wasn't in the pid list, but something was */
+       if( restricted )
+               return 0;
+
+       /* not fitered, sample it */
+       return 1;
+}
+
+/* check whether pid filtering is enabled */
+int
+kperf_filter_on_pid(void)
+{
+       int i;
+
+       for( i = 0; i < NPIDS; i++ )
+               if( pid_list[i] != -1 )
+                       return 1;
+
+       return 0;
+}
+
+/* create a list of pids to filter */
+void
+kperf_filter_pid_list( int *outcount, int **outv )
+{
+       int i, found = 0;
+
+       for( i = 0; i < NPIDS; i++ )
+               if( pid_list[i] != -1 )
+                       found = 1;
+
+       if( !found )
+       {
+               *outcount = 0;
+               *outv = NULL;
+               return;
+       }
+
+       /* just return our list */
+       *outcount = NPIDS;
+       *outv = pid_list;
+}
+
+/* free a list we created*/
+void
+kperf_filter_free_pid_list( int *incount, int **inv )
+{
+       // no op
+       (void) incount;
+       (void) inv;
+}
+
+/* init the filters to nothing */
+void
+kperf_filter_init(void)
+{
+       int i;
+       for( i = 0; i < NPIDS; i++ )
+               pid_list[i] = -1;
+}
diff --git a/osfmk/kperf/filter.h b/osfmk/kperf/filter.h
new file mode 100644 (file)
index 0000000..655c4fd
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* so we can pull this in without the context header... */
+struct kperf_context;
+
+extern void kperf_filter_init(void);
+extern int kperf_filter_should_sample(struct kperf_context *context);
+
+extern int kperf_filter_on_pid(void);
+extern void kperf_filter_pid_list( int *outcount, int **outv );
+extern void kperf_filter_free_pid_list( int *incount, int **inv );
+
+extern int pid_list[];
diff --git a/osfmk/kperf/kperf.c b/osfmk/kperf/kperf.c
new file mode 100644 (file)
index 0000000..2d6fc40
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <mach/mach_types.h>
+#include <kern/thread.h>
+#include <kern/machine.h>
+#include <kern/kalloc.h>
+#include <sys/errno.h>
+
+#include <kperf/filter.h>
+#include <kperf/sample.h>
+#include <kperf/kperfbsd.h>
+#include <kperf/pet.h>
+#include <kperf/action.h>
+#include <kperf/kperf.h>
+#include <kperf/timetrigger.h>
+
+/** misc functions **/
+#include <chud/chud_xnu.h> /* XXX: should bust this out */
+
+static struct kperf_sample *intr_samplev = NULL;
+static unsigned intr_samplec = 0;
+static unsigned sampling_status = KPERF_SAMPLING_OFF;
+static unsigned kperf_initted = 0;
+
+
+extern void (*chudxnu_thread_ast_handler)(thread_t);
+
+struct kperf_sample*
+kperf_intr_sample_buffer(void)
+{
+       unsigned ncpu = chudxnu_cpu_number();
+
+       // XXX: assert?
+       if( ncpu >= intr_samplec )
+               return NULL;
+
+       return &intr_samplev[ncpu];
+}
+
+/* setup interrupt sample buffers */
+int
+kperf_init(void)
+{
+       unsigned ncpus = 0;
+
+       if( kperf_initted )
+               return 0;
+
+       /* get number of cpus */
+       ncpus = machine_info.logical_cpu_max;
+
+       /* make the CPU array 
+        * FIXME: cache alignment
+        */
+       intr_samplev = kalloc( ncpus * sizeof(*intr_samplev));
+
+       if( intr_samplev == NULL )
+               return ENOMEM;
+
+       /* clear it */
+       bzero( intr_samplev, ncpus * sizeof(*intr_samplev) );
+       
+       chudxnu_thread_ast_handler = kperf_thread_ast_handler;
+
+       /* we're done */
+       intr_samplec = ncpus;
+       kperf_initted = 1;
+
+       return 0;
+}
+
+
+/** kext start/stop functions **/
+kern_return_t kperf_start (kmod_info_t * ki, void * d);
+
+kern_return_t
+kperf_start (kmod_info_t * ki, void * d)
+{
+       (void) ki;
+       (void) d;
+
+       /* say hello */
+       printf( "aprof: kext starting\n" );
+
+       /* register modules */
+       // kperf_action_init();
+       kperf_filter_init();
+       kperf_pet_init();
+
+       /* register the sysctls */
+       //kperf_register_profiling();
+
+       return KERN_SUCCESS;
+}
+
+
+/* random misc-ish functions */
+uint32_t
+kperf_get_thread_bits( thread_t thread )
+{
+       return thread->t_chud;
+}
+
+void
+kperf_set_thread_bits( thread_t thread, uint32_t bits )
+{
+       thread->t_chud = bits;
+}
+
+/* mark an AST to fire on a thread */
+void
+kperf_set_thread_ast( thread_t thread )
+{
+       /* FIXME: only call this on current thread from an interrupt
+        * handler for now... 
+        */
+       if( thread != current_thread() )
+               panic( "unsafe AST set" );
+
+       act_set_kperf(thread);
+}
+
+unsigned
+kperf_sampling_status(void)
+{
+       return sampling_status;
+}
+
+int
+kperf_sampling_enable(void)
+{
+       /* already running! */
+       if( sampling_status == KPERF_SAMPLING_ON )
+               return 0;
+
+       if ( sampling_status != KPERF_SAMPLING_OFF )
+               panic( "kperf: sampling wasn't off" );
+
+       /* make sure interrupt tables and actions are initted */
+       if( !kperf_initted
+           || (kperf_action_get_count() == 0) )
+               return ECANCELED;
+
+       /* mark as running */
+       sampling_status = KPERF_SAMPLING_ON;
+
+       /* tell timers to enable */
+       kperf_timer_go();
+
+       return 0;
+}
+
+int
+kperf_sampling_disable(void)
+{
+       if( sampling_status != KPERF_SAMPLING_ON )
+               return 0;
+
+       /* mark a shutting down */
+       sampling_status = KPERF_SAMPLING_SHUTDOWN;
+
+       /* tell timers to disable */
+       kperf_timer_stop();
+
+       /* mark as off */
+       sampling_status = KPERF_SAMPLING_OFF;
+
+       return 0;
+}
diff --git a/osfmk/kperf/kperf.h b/osfmk/kperf/kperf.h
new file mode 100644 (file)
index 0000000..1e1ab32
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* XXX: still needed? just access directly? */
+
+#define TRIGGER_TYPE_TIMER (0)
+#define TRIGGER_TYPE_PMI   (1)
+#define TRIGGER_TYPE_TRACE (2)
+
+extern uint32_t kperf_get_thread_bits( thread_t thread );
+extern void     kperf_set_thread_bits( thread_t thread, uint32_t bits );
+extern void     kperf_set_thread_ast( thread_t thread );
+
+#define KPERF_SAMPLING_OFF 0
+#define KPERF_SAMPLING_ON  1
+#define KPERF_SAMPLING_SHUTDOWN 2
+
+extern int kperf_init(void);
+extern unsigned kperf_sampling_status(void);
+extern int kperf_sampling_enable(void);
+extern int kperf_sampling_disable(void);
diff --git a/osfmk/kperf/kperf_arch.h b/osfmk/kperf/kperf_arch.h
new file mode 100644 (file)
index 0000000..dd6e319
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _KPERF_ARCH_H
+#define _KPERF_ARCH_H
+
+/* per-arch header */
+#if defined(__x86_64__)
+#include "kperf/x86_64/kperf_arch.h"
+#else
+#error architecture not supported
+#endif
+
+/* common definitions */
+extern int kperf_mp_broadcast( void (*func)(void*), void *arg );
+
+#endif /* _KPERF_ARCH_H */
diff --git a/osfmk/kperf/kperfbsd.c b/osfmk/kperf/kperfbsd.c
new file mode 100644 (file)
index 0000000..6e626e4
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*  sysctl interface for paramters from user-land */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <libkern/libkern.h>
+
+#include <kperf/context.h>
+#include <kperf/action.h>
+#include <kperf/timetrigger.h>
+#include <kperf/pet.h>
+#include <kperf/filter.h>
+#include <kperf/kperfbsd.h>
+#include <kperf/kperf.h>
+
+#define REQ_SAMPLING        (1)
+#define REQ_ACTION_COUNT    (2)
+#define REQ_ACTION_SAMPLERS (3)
+#define REQ_TIMER_COUNT     (4)
+#define REQ_TIMER_PERIOD    (5)
+#define REQ_TIMER_PET       (6)
+
+
+static int
+sysctl_timer_period( __unused struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int error = 0;
+    uint64_t inputs[2], retval;
+    unsigned timer, set = 0;
+    
+    /* get 2x 64-bit words */
+    error = SYSCTL_IN( req, inputs, 2*sizeof(inputs[0]) );
+    if(error)
+    {
+           printf( "error in\n" );
+           return (error);
+    }
+
+    /* setup inputs */
+    timer = (unsigned) inputs[0];
+    if( inputs[1] != ~0ULL )
+           set = 1;
+
+    printf( "%s timer: %u, inp[0] %llu\n", set ? "set" : "get", 
+            timer, inputs[0] );
+
+    if( set )
+    {
+           printf( "timer set period\n" );
+           error = kperf_timer_set_period( timer, inputs[1] );
+           if( error )
+                   return error;
+    }
+
+    error = kperf_timer_get_period(timer, &retval);
+    if(error)
+    {
+           printf( "error get period\n" );
+           return (error);
+    }
+
+    inputs[1] = retval;
+    
+    if( error == 0 )
+    {
+           error = SYSCTL_OUT( req, inputs, 2*sizeof(inputs[0]) );
+           if( error )
+                   printf( "error out\n" );
+    }
+
+    return error;
+}
+
+static int
+sysctl_action_samplers( __unused struct sysctl_oid *oidp, 
+                        struct sysctl_req *req )
+{
+    int error = 0;
+    uint64_t inputs[3];
+    uint32_t retval;
+    unsigned actionid, set = 0;
+    
+    /* get 3x 64-bit words */
+    error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) );
+    if(error)
+    {
+           printf( "error in\n" );
+           return (error);
+    }
+
+    /* setup inputs */
+    set = (unsigned) inputs[0];
+    actionid = (unsigned) inputs[1];
+
+    if( set )
+    {
+           error = kperf_action_set_samplers( actionid, inputs[2] );
+           if( error )
+                   return error;
+    }
+
+    printf("set %d actionid %u samplers val %u\n", 
+           set, actionid, (unsigned) inputs[2] );
+
+    error = kperf_action_get_samplers(actionid, &retval);
+    if(error)
+    {
+           printf( "error get samplers\n" );
+           return (error);
+    }
+
+    inputs[2] = retval;
+    
+    if( error == 0 )
+    {
+           error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) );
+           if( error )
+                   printf( "error out\n" );
+    }
+
+    return error;
+}
+
+static int
+sysctl_sampling( struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int error = 0;
+    uint32_t value = 0;
+    
+    /* get the old value and process it */
+    value = kperf_sampling_status();
+
+    /* copy out the old value, get the new value */
+    error = sysctl_handle_int(oidp, &value, 0, req);
+    if (error || !req->newptr)
+           return (error);
+
+    printf( "setting sampling to %d\n", value );
+
+    /* if that worked, and we're writing... */
+    if( value )
+           error = kperf_sampling_enable();
+    else
+           error = kperf_sampling_disable();
+
+    return error;
+}
+
+static int
+sysctl_action_count( struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int error = 0;
+    uint32_t value = 0;
+    
+    /* get the old value and process it */
+    value = kperf_action_get_count();
+
+    /* copy out the old value, get the new value */
+    error = sysctl_handle_int(oidp, &value, 0, req);
+    if (error || !req->newptr)
+           return (error);
+
+    printf( "setting action count to %d\n", value );
+
+    /* if that worked, and we're writing... */
+    return kperf_action_set_count(value);
+}
+
+static int
+sysctl_timer_count( struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int error = 0;
+    uint32_t value = 0;
+    
+    /* get the old value and process it */
+    value = kperf_timer_get_count();
+
+    /* copy out the old value, get the new value */
+    error = sysctl_handle_int(oidp, &value, 0, req);
+    if (error || !req->newptr)
+           return (error);
+
+    printf( "setting timer count to %d\n", value );
+
+    /* if that worked, and we're writing... */
+    return kperf_timer_set_count(value);
+}
+
+static int
+sysctl_timer_pet( struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int error = 0;
+    uint32_t value = 0;
+    
+    /* get the old value and process it */
+    value = kperf_timer_get_petid();
+
+    /* copy out the old value, get the new value */
+    error = sysctl_handle_int(oidp, &value, 0, req);
+    if (error || !req->newptr)
+           return (error);
+
+    printf( "setting timer petid to %d\n", value );
+
+    /* if that worked, and we're writing... */
+    return kperf_timer_set_petid(value);
+}
+
+/*
+ * #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp,         \
+ *                                void *arg1, int arg2,                 \
+ *                              struct sysctl_req *req )
+ */
+static int
+kperf_sysctl SYSCTL_HANDLER_ARGS
+{
+       // __unused struct sysctl_oid *unused_oidp = oidp;
+       (void)arg2;
+    
+       /* which request */
+       switch( (uintptr_t) arg1 )
+       {
+       case REQ_ACTION_COUNT:
+               return sysctl_action_count( oidp, req );
+       case REQ_ACTION_SAMPLERS:
+               return sysctl_action_samplers( oidp, req );
+       case REQ_TIMER_COUNT:
+               return sysctl_timer_count( oidp, req );
+       case REQ_TIMER_PERIOD:
+               return sysctl_timer_period( oidp, req );
+       case REQ_TIMER_PET:
+               return sysctl_timer_pet( oidp, req );
+       case REQ_SAMPLING:
+               return sysctl_sampling( oidp, req );
+
+#if 0
+       case REQ_TIMER:
+               return sysctl_timer_period( req );
+       case REQ_PET:
+               return sysctl_pet_period( req );
+#endif
+       default:
+               return ENOENT;
+       }
+}
+
+/* root kperf node */
+SYSCTL_NODE(, OID_AUTO, kperf, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+            "kperf");
+
+/* action sub-section */
+SYSCTL_NODE(_kperf, OID_AUTO, action, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+            "action");
+
+SYSCTL_PROC(_kperf_action, OID_AUTO, count,
+            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
+            (void*)REQ_ACTION_COUNT, 
+            sizeof(int), kperf_sysctl, "I", "Number of actions");
+
+SYSCTL_PROC(_kperf_action, OID_AUTO, samplers,
+            CTLFLAG_RW|CTLFLAG_ANYBODY,
+            (void*)REQ_ACTION_SAMPLERS, 
+            3*sizeof(uint64_t), kperf_sysctl, "UQ", 
+            "What to sample what a trigger fires an action");
+
+/* timer sub-section */
+SYSCTL_NODE(_kperf, OID_AUTO, timer, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+            "timer");
+
+SYSCTL_PROC(_kperf_timer, OID_AUTO, count,
+            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
+            (void*)REQ_TIMER_COUNT, 
+            sizeof(int), kperf_sysctl, "I", "Number of time triggers");
+
+SYSCTL_PROC(_kperf_timer, OID_AUTO, period,
+            CTLFLAG_RW|CTLFLAG_ANYBODY,
+            (void*)REQ_TIMER_PERIOD, 
+            2*sizeof(uint64_t), kperf_sysctl, "UQ", "Timer number and period");
+
+SYSCTL_PROC(_kperf_timer, OID_AUTO, pet_timer,
+            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
+            (void*)REQ_TIMER_PET, 
+            sizeof(int), kperf_sysctl, "I", "Which timer ID does PET");
+
+/* misc */
+SYSCTL_PROC(_kperf, OID_AUTO, sampling,
+            CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
+            (void*)REQ_SAMPLING, 
+            sizeof(int), kperf_sysctl, "I", "Sampling running");
+
+int legacy_mode = 1;
+SYSCTL_INT(_kperf, OID_AUTO, legacy_mode, CTLFLAG_RW, &legacy_mode, 0, "legacy_mode");
+
+#if 0
+SYSCTL_PROC(_kperf, OID_AUTO, timer_period, 
+            CTLFLAG_RW, (void*)REQ_TIMER, 
+            sizeof(uint64_t), kperf_sysctl, "QU", "nanoseconds");
+
+SYSCTL_PROC(_kperf, OID_AUTO, pet_period, 
+            CTLFLAG_RW, (void*)REQ_PET, 
+            sizeof(uint64_t), kperf_sysctl, "QU", "nanoseconds");
+
+/* FIXME: do real stuff */
+SYSCTL_INT(_kperf, OID_AUTO, filter_pid0, 
+           CTLFLAG_RW, &pid_list[0], 0, "");
+SYSCTL_INT(_kperf, OID_AUTO, filter_pid1, 
+           CTLFLAG_RW, &pid_list[1], 0, "");
+SYSCTL_INT(_kperf, OID_AUTO, filter_pid2, 
+           CTLFLAG_RW, &pid_list[2], 0, "");
+SYSCTL_INT(_kperf, OID_AUTO, filter_pid3, 
+           CTLFLAG_RW, &pid_list[3], 0, "");
+
+#endif
diff --git a/osfmk/kperf/kperfbsd.h b/osfmk/kperf/kperfbsd.h
new file mode 100644 (file)
index 0000000..8c58640
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
diff --git a/osfmk/kperf/pet.c b/osfmk/kperf/pet.c
new file mode 100644 (file)
index 0000000..3b039c0
--- /dev/null
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* all thread states code */
+#include <mach/mach_types.h>
+#include <IOKit/IOTypes.h>
+#include <IOKit/IOLocks.h>
+#include <sys/errno.h>
+
+#include <chud/chud_xnu.h>
+
+#include <kperf/buffer.h>
+#include <kperf/sample.h>
+#include <kperf/context.h>
+#include <kperf/action.h>
+#include <kperf/filter.h>
+#include <kperf/pet.h>
+#include <kperf/timetrigger.h>
+
+/* timer id to call back on */
+static unsigned pet_timerid = 0;
+
+/* aciton ID to call
+ * We also use this as the sync point for waiting, for no good reason
+ */
+static unsigned pet_actionid = 0;
+
+/* the actual thread pointer */
+static thread_t pet_thread = NULL;
+
+/* Lock on which to synchronise */
+static IOLock *pet_lock = NULL;
+
+/* where to sample data to */
+static struct kperf_sample pet_sample_buf;
+
+/* sample an actual, honest to god thread! */
+static void
+pet_sample_thread( thread_t thread )
+{
+       struct kperf_context ctx;
+       task_t task;
+
+       /* work out the context */
+       ctx.cur_thread = thread;
+       ctx.cur_pid = -1;
+
+       task = chudxnu_task_for_thread(thread);
+       if(task)
+               ctx.cur_pid = chudxnu_pid_for_task(task);
+
+       /* do the actual sample */
+       kperf_sample( &pet_sample_buf, &ctx, pet_actionid, false );
+}
+
+/* given a list of threads, preferably stopped, sample 'em! */
+static void
+pet_sample_thread_list( mach_msg_type_number_t threadc, thread_array_t threadv )
+{
+       unsigned int i;
+
+       for( i = 0; i < threadc; i++ )
+       {
+               thread_t thread = threadv[i];
+
+               if( !thread )
+                       /* XXX? */
+                       continue;
+
+               pet_sample_thread( thread );
+       }
+}
+
+/* given a task (preferably stopped), sample all the threads in it */
+static void
+pet_sample_task( task_t task )
+{
+       mach_msg_type_number_t threadc;
+       thread_array_t threadv;
+       kern_return_t kr;
+
+       kr = chudxnu_task_threads(task, &threadv, &threadc);
+       if( kr != KERN_SUCCESS )
+       {
+               BUF_INFO2(PERF_PET_ERROR, ERR_THREAD, kr);
+               return;
+       }
+
+       pet_sample_thread_list( threadc, threadv );
+
+       chudxnu_free_thread_list(&threadv, &threadc);
+}
+
+/* given a list of tasks, sample all the threads in 'em */
+static void
+pet_sample_task_list( int taskc, task_array_t taskv  )
+{
+       int i;
+       
+       for( i = 0; i < taskc; i++ )
+       {
+               kern_return_t kr;
+               task_t task = taskv[i];
+
+               /* FIXME: necessary? old code did this, our hacky
+                * filtering code does, too
+                */
+               if(!task) {
+                       continue;
+               }
+               
+               /* try and stop any task other than the kernel task */
+               if( task != kernel_task )
+               {
+                       kr = task_suspend( task );
+
+                       /* try the next task */
+                       if( kr != KERN_SUCCESS )
+                               continue;
+               }
+               
+               /* sample it */
+               pet_sample_task( task );
+
+               /* if it wasn't the kernel, resume it */
+               if( task != kernel_task )
+                       task_resume(task);
+       }
+}
+
+static void
+pet_sample_all_tasks(void)
+{
+       task_array_t taskv = NULL;
+       mach_msg_type_number_t taskc = 0;
+       kern_return_t kr;
+
+       kr = chudxnu_all_tasks(&taskv, &taskc);
+
+       if( kr != KERN_SUCCESS )
+       {
+               BUF_INFO2(PERF_PET_ERROR, ERR_TASK, kr);
+               return;
+       }
+
+       pet_sample_task_list( taskc, taskv );
+       chudxnu_free_task_list(&taskv, &taskc);
+}
+
+static void
+pet_sample_pid_filter(void)
+{
+       task_t *taskv = NULL;
+       int *pidv, pidc, i;
+       vm_size_t asize;
+
+       kperf_filter_pid_list( &pidc, &pidv );
+       if( pidc == 0  )
+       {
+               BUF_INFO2(PERF_PET_ERROR, ERR_PID, 0);
+               return;
+       }
+
+       asize = pidc * sizeof(task_t);
+       taskv = kalloc( asize );
+
+       if( taskv == NULL )
+               goto out;
+
+       /* convert the pid list into a task list */
+       for( i = 0; i < pidc; i++ )
+       {
+               int pid = pidv[i];
+               if( pid == -1 )
+                       taskv[i] = NULL;
+               else
+                       taskv[i] = chudxnu_task_for_pid(pid);
+       }
+
+       /* now sample the task list */
+       pet_sample_task_list( pidc, taskv );
+
+       kfree(taskv, asize);
+
+out:
+       kperf_filter_free_pid_list( &pidc, &pidv );
+}
+
+/* do the pet sample */
+static void
+pet_work_unit(void)
+{
+       int pid_filter;
+
+       /* check if we're filtering on pid  */
+       pid_filter = kperf_filter_on_pid();
+
+       if( pid_filter )
+       {
+               BUF_INFO1(PERF_PET_SAMPLE | DBG_FUNC_START, 1);
+               pet_sample_pid_filter();
+       }
+       else
+       {
+               /* otherwise filter everything */
+               BUF_INFO1(PERF_PET_SAMPLE | DBG_FUNC_START, 0);
+               pet_sample_all_tasks();
+       }
+
+       BUF_INFO1(PERF_PET_SAMPLE | DBG_FUNC_END, 0);
+
+}
+
+/* sleep indefinitely */
+static void 
+pet_idle(void)
+{
+       IOLockLock(pet_lock);
+       IOLockSleep(pet_lock, &pet_actionid, THREAD_UNINT);
+       IOLockUnlock(pet_lock);
+}
+
+/* loop between sampling and waiting */
+static void
+pet_thread_loop( __unused void *param, __unused wait_result_t wr )
+{
+       BUF_INFO1(PERF_PET_THREAD, 1);
+
+       while(1)
+       {
+               BUF_INFO1(PERF_PET_IDLE, 0);
+               pet_idle();
+
+               BUF_INFO1(PERF_PET_RUN, 0);
+               pet_work_unit();
+
+               /* re-program the timer */
+               kperf_timer_pet_set( pet_timerid );
+
+               /* FIXME: break here on a condition? */
+       }
+}
+
+/* make sure the thread takes a new period value */
+void
+kperf_pet_timer_config( unsigned timerid, unsigned actionid )
+{
+       /* hold the lock so pet thread doesn't run while we do this */
+       IOLockLock(pet_lock);
+
+       BUF_INFO1(PERF_PET_THREAD, 3);
+
+       /* set values */
+       pet_timerid = timerid;
+       pet_actionid = actionid;
+
+       /* done */
+       IOLockUnlock(pet_lock);
+}
+
+/* make the thread run! */
+void
+kperf_pet_thread_go(void)
+{
+       /* Make the thread go */
+       IOLockWakeup(pet_lock, &pet_actionid, FALSE);
+}
+
+
+/* wait for the pet thread to finish a run */
+void
+kperf_pet_thread_wait(void)
+{
+       /* acquire the lock to ensure the thread is parked. */
+       IOLockLock(pet_lock);
+       IOLockUnlock(pet_lock);
+}
+
+/* keep the pet thread around while we run */
+int
+kperf_pet_init(void)
+{
+       kern_return_t rc;
+       thread_t t;
+
+       if( pet_thread != NULL )
+               return 0;
+
+       /* make the sync poing */
+       pet_lock = IOLockAlloc();
+       if( pet_lock == NULL )
+               return ENOMEM;
+
+       /* create the thread */
+       BUF_INFO1(PERF_PET_THREAD, 0);
+       rc = kernel_thread_start( pet_thread_loop, NULL, &t );
+       if( rc != KERN_SUCCESS )
+       {
+               IOLockFree( pet_lock );
+               pet_lock = NULL;
+               return ENOMEM;
+       }
+
+       /* OK! */
+       return 0;
+}
diff --git a/osfmk/kperf/pet.h b/osfmk/kperf/pet.h
new file mode 100644 (file)
index 0000000..9ffa736
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+/* create the pet thread */
+extern int kperf_pet_init(void);
+
+/* Kick the pet thread so it runs a sample of all threads */
+extern void kperf_pet_thread_go(void);
+
+/* ensure the pet thread has stopped sampling */
+extern void kperf_pet_thread_wait(void);
+
+/* tell pet the timer parameters */
+extern void kperf_pet_timer_config( unsigned timerid, unsigned actionid );
diff --git a/osfmk/kperf/sample.h b/osfmk/kperf/sample.h
new file mode 100644 (file)
index 0000000..5a87121
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+// what goes in a sample
+
+#include "threadinfo.h"
+#include "callstack.h"
+
+struct kperf_sample
+{
+       struct threadinfo threadinfo;
+       struct tinfo_ex   tinfo_ex;
+       struct callstack  kcallstack;
+       struct callstack  ucallstack;
+};
+
diff --git a/osfmk/kperf/threadinfo.c b/osfmk/kperf/threadinfo.c
new file mode 100644 (file)
index 0000000..88388c3
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+/*  Sample thread data */
+
+#include <mach/mach_types.h>
+#include <kern/thread.h> /* thread_* */
+#include <kern/debug.h> /* panic */
+// #include <sys/proc.h>
+
+#include <chud/chud_xnu.h>
+#include <kperf/kperf.h>
+
+#include <kperf/buffer.h>
+#include <kperf/context.h>
+#include <kperf/threadinfo.h>
+#include <kperf/ast.h>
+
+// kAppleProfileTriggerClientThreadModeIdle                            = 0x40, // TH_IDLE
+// #define TH_IDLE 0x40
+
+//kAppleProfileTriggerClientThreadModeNotIdle                          = kAppleProfileTriggerClientThreadModeIdle << 16, // !TH_IDLE
+#define TH_IDLE_N (TH_IDLE << 16)
+
+static uint64_t
+make_runmode(thread_t thread)
+{
+       /* CEG: This is a translation of
+        * AppleProfileGetRunModeOfThread below... kinda magic :/
+        */
+       const int mode = chudxnu_thread_get_scheduler_state(thread);
+       
+#if !TARGET_OS_EMBEDDED
+       if( 0 == mode)
+       {
+               return (chudxnu_thread_get_idle(thread) ? TH_IDLE : TH_IDLE_N);
+       }
+       else
+#endif
+               // Today we happen to know there's a one-to-one mapping.
+               return ((mode & 0xffff) | ((~mode & 0xffff) << 16));
+}
+
+
+/* code to collect current thread info */
+void
+kperf_threadinfo_sample(struct threadinfo *ti, struct kperf_context *context)
+{
+       thread_t cur_thread = context->cur_thread;
+       BUF_INFO1( PERF_TI_SAMPLE, (uintptr_t)cur_thread );
+
+       // fill out the fields
+       ti->pid = context->cur_pid;
+       ti->tid = thread_tid(cur_thread);
+       ti->dq_addr = thread_dispatchqaddr(cur_thread);
+       ti->runmode = make_runmode(cur_thread);
+}
+
+/* log an existing sample into the buffer */
+void
+kperf_threadinfo_log(struct threadinfo *ti)
+{
+       /* XXX: K64 only? */
+       BUF_DATA( PERF_TI_DATA, ti->pid, ti->tid, ti->dq_addr, ti->runmode );
+}
+
+/* 'extra' thread-info functions that are deferred 'til thread-context
+ * time
+ */
+void
+kperf_threadinfo_extra_sample(struct tinfo_ex *tex, struct kperf_context *context)
+{
+       thread_t cur_thread = context->cur_thread;
+       uint32_t t_chud;
+
+       /* can only pend on the current thread */
+       /* this is valid from PET mode... */
+       /*
+       if( cur_thread != chudxnu_current_thread() )
+               panic("pending to non-current thread");
+       */
+
+       /* get our current bits */
+       t_chud = kperf_get_thread_bits(cur_thread);
+
+       /* check if there's anything for us to do */
+       if( t_chud & T_AST_NAME )
+       {
+               BUF_INFO1( PERF_TI_XSAMPLE, (uintptr_t)cur_thread );
+
+               /* get the name out */
+#ifdef FIXME
+               /* need kperfbsd.c? */
+               proc_name( context->cur_pid, 
+                          &tex->p_comm[0], CHUD_MAXPCOMM );
+#endif
+
+               /* mark that it's done */
+               t_chud &= ~T_AST_NAME;
+               t_chud |= T_NAME_DONE;
+
+               kperf_set_thread_bits(cur_thread, t_chud);
+       }
+       else
+               /* empty string */
+               tex->p_comm[0] = '\0';
+
+}
+
+/* log it if there's anyting useful there */
+void
+kperf_threadinfo_extra_log(struct tinfo_ex *tex)
+{
+       /* no data */
+       if( tex->p_comm[0] == '\0' )
+               return;
+
+       /* FIXME: log more */
+       BUF_DATA1( PERF_TI_XDATA, (uintptr_t)*(uintptr_t*)&tex->p_comm[0] );
+}
+
+/* pend a flag on a thread */
+int
+kperf_threadinfo_extra_pend(struct kperf_context *context)
+{
+       return kperf_ast_pend( context->cur_thread, T_NAME_DONE | T_AST_NAME,
+                              T_AST_NAME );
+}
+
+
+#if 0
+
+/* transalted from the APF */
+
+APTIAKernelEntry_t *threadInfo = (APTIAKernelEntry_t*)(threadInfos + account->offset);
+
+context->timeStamp = mach_absolute_time();
+context->cpuNum = chudxnu_cpu_number();
+
+// record the process info from the callback context
+context->pid = chudxnu_current_pid();
+threadInfo->pid = context->generic->pid;
+
+// thread_tid is a thread_t to ID function in the kernel
+context->threadID = chudxnu_current_thread();
+threadInfo->tid = thread_tid(context->generic->threadID);
+
+// also a kernel function
+threadInfo->dispatch_queue_addr = thread_dispatchqaddr(context->generic->threadID);
+
+// see below
+threadInfo->runMode = AppleProfileGetRunModeOfThread(context->generic->threadID);
+
+
+/****** WTF is this?! *******/
+
+/*!enum AppleProfileTriggerClientThreadRunMode
+ *
+ * Specifies the thread mode in which to record samples.
+ */
+typedef enum { // Target Thread State - can be OR'd
+       // Basic Building Blocks:
+       // for Time Profile, use kAppleProfileTriggerClientThreadModeRunning (optionally with kAppleProfileTriggerClientThreadModeNotIdle).
+       // for Time Profile (All Thread States), use kAppleProfileTriggerClientThreadModeAny (or just don't specify any thread mode filters).
+       // for Time Profile (Blocked Threads), use kIOProfileTriggerClientThreadModeBlocked.
+       // etc...
+       
+       kAppleProfileTriggerClientThreadModeNone                                = 0x0,
+       
+       kAppleProfileTriggerClientThreadModeRunning                             = 0x1, // On a core
+       kAppleProfileTriggerClientThreadModeRunnable                    = 0x2, // TH_RUN
+       kAppleProfileTriggerClientThreadModeBlocked                             = 0x4, // TH_WAIT
+       kAppleProfileTriggerClientThreadModeUninterruptible             = 0x8, // TH_UNINT
+       kAppleProfileTriggerClientThreadModeSuspended                   = 0x10, // TH_SUSP
+       kAppleProfileTriggerClientThreadModeTerminating                 = 0x20, // TH_TERMINATE
+       kAppleProfileTriggerClientThreadModeIdle                                = 0x40, // TH_IDLE
+       
+       kAppleProfileTriggerClientThreadModeNotRunning                  = kAppleProfileTriggerClientThreadModeRunning << 16, // Not on a core
+       kAppleProfileTriggerClientThreadModeNotRunnable                 = kAppleProfileTriggerClientThreadModeRunnable << 16, // !TH_RUN
+       kAppleProfileTriggerClientThreadModeNotBlocked                  = kAppleProfileTriggerClientThreadModeBlocked << 16, // !TH_WAIT
+       kAppleProfileTriggerClientThreadModeNotUninterruptible  = kAppleProfileTriggerClientThreadModeUninterruptible << 16, // !TH_UNINT
+       kAppleProfileTriggerClientThreadModeNotSuspended                = kAppleProfileTriggerClientThreadModeSuspended << 16, // !TH_SUSP
+       kAppleProfileTriggerClientThreadModeNotTerminating              = kAppleProfileTriggerClientThreadModeTerminating << 16, // !TH_TERMINATE
+       kAppleProfileTriggerClientThreadModeNotIdle                             = kAppleProfileTriggerClientThreadModeIdle << 16, // !TH_IDLE
+       
+       kAppleProfileTriggerClientThreadModeAny                                 = (   kAppleProfileTriggerClientThreadModeRunning
+                                                                                                                               | kAppleProfileTriggerClientThreadModeNotRunning),
+} AppleProfileTriggerClientThreadRunMode;
+
+extern "C" AppleProfileTriggerClientThreadRunMode AppleProfileGetRunModeOfThread(thread_t thread) {    
+       const int mode = chudxnu_thread_get_scheduler_state(thread);
+       
+#if !TARGET_OS_EMBEDDED
+       if (0 == mode) {
+               return (chudxnu_thread_get_idle(thread) ? kAppleProfileTriggerClientThreadModeIdle : kAppleProfileTriggerClientThreadModeNotIdle);
+       } else
+#endif
+       return (AppleProfileTriggerClientThreadRunMode)((mode & 0xffff) | ((~mode & 0xffff) << 16)); // Today we happen to know there's a one-to-one mapping.
+}
+
+#endif
diff --git a/osfmk/kperf/threadinfo.h b/osfmk/kperf/threadinfo.h
new file mode 100644 (file)
index 0000000..e7bcaaf
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __AP_THREADINFO_H__
+#define __AP_THREADINFO_H__
+
+/* 'live' threadinfo */
+struct threadinfo
+{
+       uint64_t pid;
+       uint64_t tid;
+       uint64_t dq_addr;
+       uint64_t runmode;
+};
+
+/* extra info we sample out of bounds */
+#define CHUD_MAXPCOMM 16  /* copy from kernel somewhere :P */
+struct tinfo_ex
+{
+       char p_comm[CHUD_MAXPCOMM+1]; /* XXX: 16 + 1 */
+};
+
+struct kperf_context;
+extern void kperf_threadinfo_sample(struct threadinfo *ti, struct kperf_context *);
+extern void kperf_threadinfo_log(struct threadinfo *ti);
+
+extern void kperf_threadinfo_extra_sample(struct tinfo_ex *, struct kperf_context *);
+extern int kperf_threadinfo_extra_pend(struct kperf_context *);
+extern void kperf_threadinfo_extra_log(struct tinfo_ex *);
+
+#endif /* __AP_THREADINFO_H__ */
diff --git a/osfmk/kperf/timetrigger.c b/osfmk/kperf/timetrigger.c
new file mode 100644 (file)
index 0000000..643d63c
--- /dev/null
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*  Manage time triggers */
+
+#include <mach/mach_types.h>
+#include <kern/cpu_data.h> /* current_thread() */
+#include <kern/kalloc.h>
+#include <sys/errno.h>
+
+#include <chud/chud_xnu.h>
+
+#include <kperf/kperf.h>
+#include <kperf/buffer.h>
+#include <kperf/context.h>
+#include <kperf/action.h>
+#include <kperf/timetrigger.h>
+#include <kperf/kperf_arch.h>
+#include <kperf/pet.h>
+
+/* represents a periodic timer */
+struct time_trigger
+{
+       struct timer_call tcall;
+       uint64_t period;
+       unsigned actionid;
+       volatile unsigned active;
+};
+
+/* the list of timers */
+static unsigned timerc = 0;
+static struct time_trigger *timerv;
+static unsigned pet_timer = 999;
+
+/* maximum number of timers we can construct */
+#define TIMER_MAX 16
+
+/* minimal interval for a timer (100usec in nsec) */
+#define MIN_TIMER (100000)
+
+static void
+kperf_timer_schedule( struct time_trigger *trigger, uint64_t now )
+{
+       uint64_t deadline;
+
+       BUF_INFO1(PERF_TM_SCHED, trigger->period);
+
+       /* calculate deadline */
+       deadline = now + trigger->period;
+       
+       /* re-schedule the timer, making sure we don't apply slop */
+       timer_call_enter( &trigger->tcall, deadline, TIMER_CALL_CRITICAL);
+}
+
+static void
+kperf_ipi_handler( void *param )
+{
+       int r;
+       struct kperf_sample *intbuf = NULL;
+       struct kperf_context ctx;
+       struct time_trigger *trigger = param;
+       task_t task = NULL;
+       
+       BUF_INFO1(PERF_TM_HNDLR | DBG_FUNC_START, 0);
+
+       /* In an interrupt, get the interrupt buffer for this CPU */
+       intbuf = kperf_intr_sample_buffer();
+
+       /* On a timer, we can see the "real" current thread */
+       ctx.cur_pid = 0; /* remove this? */
+       ctx.cur_thread = current_thread();
+
+       task = chudxnu_task_for_thread(ctx.cur_thread);
+       if (task)
+               ctx.cur_pid = chudxnu_pid_for_task(task);
+
+       /* who fired */
+       ctx.trigger_type = TRIGGER_TYPE_TIMER;
+       ctx.trigger_id = (unsigned)(trigger-timerv); /* computer timer number */
+
+       /* call the action -- kernel-only from interrupt, pend user */
+       r = kperf_sample( intbuf, &ctx, trigger->actionid, TRUE );
+       
+       BUF_INFO1(PERF_TM_HNDLR | DBG_FUNC_END, r);
+}
+
+static void
+kperf_timer_handler( void *param0, __unused void *param1 )
+{
+       struct time_trigger *trigger = param0;
+       unsigned ntimer = (unsigned)(trigger - timerv);
+
+       trigger->active = 1;
+
+       /* along the lines of do not ipi if we are all shutting down */
+       if( kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN )
+               goto deactivate;
+
+       /* ping all CPUs */
+       kperf_mp_broadcast( kperf_ipi_handler, trigger );
+
+       /* release the pet thread? */
+       if( ntimer == pet_timer )
+       {
+               /* timer re-enabled when thread done */
+               kperf_pet_thread_go();
+       }
+       else
+       {
+               /* re-enable the timer 
+                * FIXME: get the current time from elsewhere
+                */
+               uint64_t now = mach_absolute_time();
+               kperf_timer_schedule( trigger, now );
+       }
+
+deactivate:
+       trigger->active = 0;
+}
+
+/* program the timer from the pet thread */
+int
+kperf_timer_pet_set( unsigned timer )
+{
+       uint64_t now;
+       struct time_trigger *trigger = NULL;
+
+       if( timer != pet_timer )
+               panic( "PET setting with bogus ID\n" );
+
+       if( timer >= timerc )
+               return EINVAL;
+
+       /* CHECKME: we probably took so damn long in the PET thread,
+        * it makes sense to take the time again.
+        */
+       now = mach_absolute_time();
+       trigger = &timerv[timer];
+
+       /* reprogram */
+       kperf_timer_schedule( trigger, now );
+
+       return 0;
+}
+
+
+/* turn on all the timers */
+extern int
+kperf_timer_go(void)
+{
+       unsigned i;
+       uint64_t now = mach_absolute_time();
+
+       for( i = 0; i < timerc; i++ )
+       {
+               if( timerv[i].period == 0 )
+                       continue;
+
+               kperf_timer_schedule( &timerv[i], now );
+       }
+
+       return 0;
+}
+
+
+extern int
+kperf_timer_stop(void)
+{
+       unsigned i;
+
+       for( i = 0; i < timerc; i++ )
+       {
+               if( timerv[i].period == 0 )
+                       continue;
+
+               while (timerv[i].active)
+                       ;
+
+               timer_call_cancel( &timerv[i].tcall );
+       }
+
+       /* wait for PET to stop, too */
+       kperf_pet_thread_wait();
+
+       return 0;
+}
+
+unsigned
+kperf_timer_get_petid(void)
+{
+       return pet_timer;
+}
+
+int
+kperf_timer_set_petid(unsigned timerid)
+{
+       struct time_trigger *trigger = NULL;
+
+       /* they can program whatever... */
+       pet_timer = timerid;
+       
+       /* clear them if it's a bogus ID */
+       if( pet_timer >= timerc )
+       {
+               kperf_pet_timer_config( 0, 0 );
+
+               return 0;
+       }
+
+       /* update the values */
+       trigger = &timerv[pet_timer];
+       kperf_pet_timer_config( pet_timer, trigger->actionid );
+
+       return 0;
+}
+
+int
+kperf_timer_get_period( unsigned timer, uint64_t *period )
+{
+       printf( "get timer %u / %u\n", timer, timerc );
+
+       if( timer >= timerc )
+               return EINVAL;
+
+       *period = timerv[timer].period;
+
+       return 0;
+}
+
+int
+kperf_timer_set_period( unsigned timer, uint64_t period )
+{
+       printf( "set timer %u\n", timer );
+
+       if( timer >= timerc )
+               return EINVAL;
+
+       if( period < MIN_TIMER )
+               period = MIN_TIMER;
+
+       timerv[timer].period = period;
+
+       /* FIXME: re-program running timers? */
+
+       return 0;
+}
+
+unsigned
+kperf_timer_get_count(void)
+{
+       return timerc;
+}
+
+static void
+setup_timer_call( struct time_trigger *trigger )
+{
+       timer_call_setup( &trigger->tcall, kperf_timer_handler, trigger );
+}
+
+extern int
+kperf_timer_set_count(unsigned count)
+{
+       struct time_trigger *new_timerv = NULL, *old_timerv = NULL;
+       unsigned old_count, i;
+
+       /* easy no-op */
+       if( count == timerc )
+       {
+               printf( "already got %d timers\n", timerc );
+               return 0;
+       }
+
+       /* TODO: allow shrinking? */
+       if( count < timerc )
+               return EINVAL;
+
+       /* cap it for good measure */
+       if( count > TIMER_MAX )
+               return EINVAL;
+
+       /* creating the action arror for the first time. create a few
+        * more things, too.
+        */
+       if( timerc == 0 )
+       {
+               int r;
+
+               /* main kperf */
+               r = kperf_init();
+               if( r )
+                       return r;
+               
+               /* get the PET thread going */
+               r = kperf_pet_init();
+               if( r )
+                       return r;
+       }
+
+       /* create a new array */
+       new_timerv = kalloc( count * sizeof(*new_timerv) );
+       if( new_timerv == NULL )
+               return ENOMEM;
+
+       old_timerv = timerv;
+       old_count = timerc;
+
+       if( old_timerv != NULL )
+               bcopy( timerv, new_timerv, timerc * sizeof(*timerv) );
+
+       /* zero the new entries */
+       bzero( &new_timerv[timerc], (count - old_count) * sizeof(*new_timerv) );
+
+       /* setup the timer call info */
+       for( i = old_count; i < count; i++ )
+               setup_timer_call( &new_timerv[i] );
+
+       timerv = new_timerv;
+       timerc = count;
+
+       if( old_timerv != NULL )
+               kfree( old_timerv, old_count * sizeof(*timerv) );
+
+       printf( "kperf: done timer alloc, timerc %d\n", timerc );
+
+       return 0;
+}
diff --git a/osfmk/kperf/timetrigger.h b/osfmk/kperf/timetrigger.h
new file mode 100644 (file)
index 0000000..b0d67b0
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+// extern uint64_t timer_period;
+extern void kperf_timer_reprogram(void);
+extern void kperf_timer_reprogram_all(void);
+
+
+// return values from the action
+#define TIMER_REPROGRAM (0)
+#define TIMER_STOP (1)
+
+/* blah */
+extern unsigned kperf_timer_get_count(void);
+extern int kperf_timer_set_count(unsigned count);
+
+extern int kperf_timer_get_period( unsigned timer, uint64_t *period );
+extern int kperf_timer_set_period( unsigned timer, uint64_t period );
+
+extern int kperf_timer_go(void);
+extern int kperf_timer_stop(void);
+
+extern unsigned kperf_timer_get_petid(void);
+extern int kperf_timer_set_petid(unsigned count);
+
+/* so PET thread can re-arm the timer */
+extern int kperf_timer_pet_set( unsigned timer );
diff --git a/osfmk/kperf/x86_64/kperf_arch.h b/osfmk/kperf/x86_64/kperf_arch.h
new file mode 100644 (file)
index 0000000..7d361c7
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* arch-dependent wrapper for kperf */
+
diff --git a/osfmk/kperf/x86_64/kperf_mp.c b/osfmk/kperf/x86_64/kperf_mp.c
new file mode 100644 (file)
index 0000000..d4a1e8b
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <i386/mp.h>
+
+#include <kperf/kperf_arch.h>
+
+int
+kperf_mp_broadcast( void (*func)(void*), void *arg )
+{
+       mp_cpus_call( CPUMASK_ALL, ASYNC, func, arg );
+
+       return 0;
+}
index 770208eaad5db3123111555dea9de206f5ba81d6..b435d9794ba09e93e152f7fb2f07a77a18a2415f 100644 (file)
@@ -30,7 +30,6 @@ MIG_DEFS =    \
        host_notify_reply.defs \
        host_priv.defs \
        host_security.defs \
-       ledger.defs \
        lock_set.defs \
        mach_exc.defs \
        mach_host.defs \
@@ -74,7 +73,6 @@ MIG_UUHDRS = \
        clock_priv.h \
        host_priv.h \
        host_security.h \
-       ledger.h \
        lock_set.h \
        mach_host.h \
        mach_port.h \
@@ -128,7 +126,6 @@ DATAFILES = \
        sync_policy.h \
         syscall_sw.h \
        task_info.h \
-       task_ledger.h \
        task_policy.h \
        task_special_ports.h \
         thread_info.h \
@@ -251,7 +248,6 @@ MIG_KSHDRS = \
        exc_server.h \
        host_priv_server.h \
        host_security_server.h \
-       ledger_server.h \
        lock_set_server.h \
        mach_exc_server.h \
        mach_host_server.h \
@@ -277,7 +273,6 @@ MIG_KSSRC = \
        exc_server.c \
        host_priv_server.c \
        host_security_server.c \
-       ledger_server.c \
        lock_set_server.c \
        mach_exc_server.c \
        mach_host_server.c \
index 8de51072177ca2010569682e4544577aee4f8b86..fa15c23a1a9b621b104990641463407e1fb865bd 100644 (file)
@@ -32,8 +32,6 @@ CLOCK_REPLY_FILES = clock_reply.h clock_reply_user.c
 
 BOOTSTRAP_FILES = bootstrap_server.h bootstrap_server.c
 
-LEDGER_FILES = ledger_user.c ledger_server.h ledger_server.c
-
 SYNC_FILES = sync_server.h sync_server.c
 
 MACH_USER_FILES = mach_user.h mach_user.c
@@ -41,7 +39,7 @@ MACH_USER_FILES = mach_user.h mach_user.c
 OTHERS = ${MACH_FILES} ${MACH_PORT_FILES} \
        ${EXC_FILES} ${MACH_EXC_FILES} \
        ${MEMORY_OBJECT_FILES} ${MEMORY_OBJECT_DEFAULT_FILES} \
-       ${PROF_FILES} ${MACH_HOST_FILES} ${LEDGER_FILES} \
+       ${PROF_FILES} ${MACH_HOST_FILES} \
        ${CLOCK_FILES} ${CLOCK_REPLY_FILES} ${BOOTSTRAP_FILES} \
        ${BOOTSTRAP_FILES} ${SYNC_FILES} \
        ${MACH_USER_FILES} 
@@ -150,16 +148,6 @@ ${BOOTSTRAP_FILES}: mach/bootstrap.defs
                -server bootstrap_server.c              \
                $<
 
-.ORDER: ${LEDGER_FILES}
-
-${LEDGER_FILES}: mach/ledger.defs ${MACH_TYPES_DEFS}
-       ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} ${MIGKUFLAGS}      \
-               -header /dev/null                               \
-               -user ledger_user.c                             \
-               -sheader ledger_server.h                        \
-               -server ledger_server.c                         \
-               $<
-                  
 .ORDER: ${SYNC_FILES}
         
 ${SYNC_FILES}: mach/sync.defs 
index b0631f16beeeb1237b472197a3524f337b9cf3a9..96de421d261597005a31f57c220a394fc1332406 100644 (file)
@@ -95,6 +95,9 @@
  
 #define EXC_CRASH              10      /* Abnormal process exit */
 
+#define EXC_RESOURCE           11      /* Hit resource consumption limit */
+               /* Exact resource is in code field. */
+
 /*
  *     Machine-independent exception behaviors
  */
 #define EXC_MASK_MACH_SYSCALL          (1 << EXC_MACH_SYSCALL)
 #define EXC_MASK_RPC_ALERT             (1 << EXC_RPC_ALERT)
 #define EXC_MASK_CRASH                 (1 << EXC_CRASH)
+#define EXC_MASK_RESOURCE              (1 << EXC_RESOURCE)
 
 #define EXC_MASK_ALL   (EXC_MASK_BAD_ACCESS |                  \
                         EXC_MASK_BAD_INSTRUCTION |             \
                         EXC_MASK_SYSCALL |                     \
                         EXC_MASK_MACH_SYSCALL |                \
                         EXC_MASK_RPC_ALERT |                   \
+                        EXC_MASK_RESOURCE |                    \
                         EXC_MASK_MACHINE)
 
 #ifdef KERNEL_PRIVATE
  * Machine independent codes for EXC_SOFTWARE
  * Codes 0x10000 - 0x1FFFF reserved for OS emulation (Unix) 
  * 0x10000 - 0x10002 in use for unix signals
+ * 0x20000 - 0x2FFFF reserved for MACF
  */
 #define        EXC_SOFT_SIGNAL         0x10003 /* Unix signal exceptions */
 
+#define        EXC_MACF_MIN            0x20000 /* MACF exceptions */
+#define        EXC_MACF_MAX            0x2FFFF
+
 #ifndef        ASSEMBLER
 
 #include <mach/port.h>
index 3be39868b9bba67accbf65f72637ebe32e79d47d..ac4997b221edd9df28d16a5ce0ff1b087cca4d0e 100644 (file)
@@ -265,18 +265,7 @@ routine    host_swap_exception_ports(
          out   old_behaviors   : exception_behavior_array_t, SameCount;
          out   old_flavors     : exception_flavor_array_t, SameCount);
 
-/*
- *     Loads a symbol table for an external file into the kernel debugger.
- *     The symbol table data is an array of characters.  It is assumed that
- *     the caller and the kernel debugger agree on its format.
- *      This call is only supported in MACH_DEBUG and MACH_KDB kernels,
- *      otherwise KERN_FAILURE is returned.
- */
-routine host_load_symbol_table(
-               host            : host_priv_t;
-               task            : task_t;
-               name            : symtab_name_t;
-               symtab          : pointer_t);
+skip; /* old host_load_symbol_table */
 
 /*
  *     Specify that the range of the virtual address space
index 755327c5c8e8d7dc76602b3e45e3b8a859910324..4b20372569cf7c8c03d138233601f65493664b80 100644 (file)
@@ -90,7 +90,8 @@
 #define HOST_CHUD_PORT                  (9 + HOST_MAX_SPECIAL_KERNEL_PORT)
 #define HOST_UNFREED_PORT              (10 + HOST_MAX_SPECIAL_KERNEL_PORT)
 #define HOST_AMFID_PORT                        (11 + HOST_MAX_SPECIAL_KERNEL_PORT)
-#define HOST_MAX_SPECIAL_PORT           (12 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_GSSD_PORT                 (12 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_MAX_SPECIAL_PORT           (13 + HOST_MAX_SPECIAL_KERNEL_PORT)
                                         /* room to grow here as well */
 
 /*
 #define host_set_amfid_port(host, port)        \
        (host_set_special_port((host), HOST_AMFID_PORT, (port)))
 
+#define host_get_gssd_port(host, port) \
+       (host_get_special_port((host),                  \
+       HOST_LOCAL_NODE, HOST_GSSD_PORT, (port)))
+
+#define host_set_gssd_port(host, port) \
+       (host_set_special_port((host), HOST_GSSD_PORT, (port)))
+
 #endif /* _MACH_HOST_SPECIAL_PORTS_H_ */
index 77fe4e404442fcc5e96283a6cce1b1530ad12fff..a9b4fbf0478dc17658aca193c73df0d4f00d1677 100644 (file)
@@ -63,7 +63,7 @@
  * No machine dependent types for the 80386
  */
 
-#define        EXC_TYPES_COUNT 11      /* incl. illegal exception 0 */
+#define        EXC_TYPES_COUNT 12      /* incl. illegal exception 0 */
 
 /*
  *     Codes and subcodes for 80386 exceptions.
index 6a356154e12b67f2322596591ab7cef67b8af9db..9830993b2ed0b7d87dcc060a74b4cd9cd15e87e6 100644 (file)
@@ -95,6 +95,8 @@ type vm_offset_t = natural_t;
 type vm_size_t = natural_t;
 #endif
 
+type mach_port_context_t = uint64_t;
+
 /*
  * The mach_vm_xxx_t types are sized to hold the
  * maximum pointer, offset, etc... supported on the
index 503f5ce633f841b5974f5b8e31ff54382aea6258..14549e8101778f77b53df590ebf0511cee37d585 100644 (file)
@@ -41,7 +41,7 @@
  */
 #ifdef __x86_64__
 #define DTRACE_LAB(p, n)               \
-   "__dtrace_probeDOLLAR" DTRACE_TOSTRING(%=__LINE__) DTRACE_STRINGIFY(_##p##___##n)
+   "__dtrace_probe$" DTRACE_TOSTRING(%=__LINE__) DTRACE_STRINGIFY(_##p##___##n)
 
 #define DTRACE_LABEL(p, n)             \
       ".section __DATA, __data\n\t"    \
index fb2ca164fb272e99f6cd0d2d0a8aea9c1c87ff2b..12eb226e2c93843e022e407c0af8c629d292381b 100644 (file)
 #define i386_btop(x)           ((ppnum_t)((x) >> I386_PGSHIFT))
 #define machine_btop(x)                i386_btop(x)
 #define i386_ptob(x)           (((pmap_paddr_t)(x)) << I386_PGSHIFT)
+#define machine_ptob(x)                i386_ptob(x)
 
 /*
  *     Round off or truncate to the nearest page.  These will work
 
 /* process-relative values (all 32-bit legacy only for now) */
 #define VM_MIN_ADDRESS         ((vm_offset_t) 0)
-#define VM_USRSTACK32          ((vm_offset_t) 0xC0000000)
+#define VM_USRSTACK32          ((vm_offset_t) 0xC0000000)      /* ASLR slides stack down by up to 1 MB */
 #define VM_MAX_ADDRESS         ((vm_offset_t) 0xFFE00000)
 
 
 #define KEXT_ALLOC_BASE(x)  ((x) - KEXT_ALLOC_MAX_OFFSET)
 #define KEXT_ALLOC_SIZE(x)  (KEXT_ALLOC_MAX_OFFSET - (x))
 
+#define VM_KERNEL_IS_KEXT(_o)                                                 \
+                (((vm_offset_t)(_o) >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) &&      \
+                 ((vm_offset_t)(_o) <  VM_MIN_KERNEL_ADDRESS))
+
 #else
 #error unsupported architecture
 #endif
 #define VM32_MIN_ADDRESS               ((vm32_offset_t) 0)
 #define VM32_MAX_ADDRESS               ((vm32_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF))
 
+/*
+ * kalloc() parameters:
+ *
+ * Historically kalloc's underlying zones were power-of-2 sizes, with a
+ * KALLOC_MINSIZE of 16 bytes.  The allocator ensured that
+ * (sizeof == alignof) >= 16 for all kalloc allocations.
+ *
+ * Today kalloc may use zones with intermediate sizes, constrained by
+ * KALLOC_MINSIZE and a minimum alignment, expressed by KALLOC_LOG2_MINALIGN.
+ *
+ * The common alignment for LP64 is for longs and pointers i.e. 8 bytes.
+ */
+
 #if defined(__i386__)
 
+#define        KALLOC_MINSIZE          16      /* minimum allocation size */
+#define        KALLOC_LOG2_MINALIGN    4       /* log2 minimum alignment */
+
 #define LINEAR_KERNEL_ADDRESS  ((vm_offset_t) 0x00000000)
 
 #define VM_MIN_KERNEL_LOADED_ADDRESS   ((vm_offset_t) 0x00000000U)
 
 #elif defined(__x86_64__)
 
+#define        KALLOC_MINSIZE          16      /* minimum allocation size */
+#define        KALLOC_LOG2_MINALIGN    4       /* log2 minimum alignment */
+
 #define LINEAR_KERNEL_ADDRESS  ((vm_offset_t) 0x00000000)
 
 #define VM_MIN_KERNEL_LOADED_ADDRESS   ((vm_offset_t) 0xFFFFFF8000000000UL)
 
 #define NCOPY_WINDOWS 0
 
+
 #else
 #error unsupported architecture
 #endif
 #define round_i386_to_vm(p)    (atop(round_page(i386_ptob(p))))
 #define vm_to_i386(p)          (i386_btop(ptoa(p)))
 
-#define PMAP_ENTER(pmap, virtual_address, page, protection, flags, wired) \
-       MACRO_BEGIN                                     \
-       pmap_t __pmap = (pmap);                         \
-       vm_page_t __page = (page);                      \
-       vm_prot_t __prot__ =  (protection);             \
-                                                       \
-       if (__pmap == kernel_pmap) {                    \
-               __prot__ |= VM_PROT_WRITE;              \
-       } else {                                        \
-               assert(!__page->encrypted);             \
-       }                                               \
-                                                       \
-       pmap_enter(                                     \
-               __pmap,                                 \
-               (virtual_address),                      \
-               __page->phys_page,                      \
-               __prot__,                               \
-               flags,                                  \
-               (wired)                                 \
-        );                                             \
-       MACRO_END
 
-#define PMAP_ENTER_OPTIONS(pmap, virtual_address, page, protection,    \
-                               flags, wired, options, result)          \
+#define PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op)    \
+       MACRO_BEGIN                                                     \
+               pmap_set_cache_attributes((mem)->phys_page, (cache_attr));      \
+               (object)->set_cache_attr = TRUE;                                \
+               (void) batch_pmap_op;                                   \
+       MACRO_END                                                       
+
+#define PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op)\
        MACRO_BEGIN                                                     \
-               result=KERN_SUCCESS;                                    \
-               PMAP_ENTER(pmap, virtual_address, page, protection,     \
-                               flags, wired);                          \
+       (void) user_page_list;                                          \
+       (void) num_pages;                                               \
+       (void) batch_pmap_op;                                           \
        MACRO_END
 
 #define IS_USERADDR64_CANONICAL(addr)                  \
index ef737ac4d9c00086b90365727eb2e938fb35c7a0..ecdc42070a7023250f43fe7bb80460b37bda5606 100644 (file)
@@ -129,6 +129,8 @@ typedef uint64_t            vm_map_offset_t;
 typedef uint64_t               vm_map_address_t;
 typedef uint64_t               vm_map_size_t;
 
+typedef mach_vm_address_t      mach_port_context_t;
+
 #ifdef MACH_KERNEL_PRIVATE
 
 #if VM32_SUPPORT
index a5c5587ff174ae22724dee50b5b971b04c3afdbf..99449b7bf37339e19c12fee709875dcd9e147504 100644 (file)
@@ -168,6 +168,15 @@ typedef struct kmod_info_64_v1 {
 /* Implementation now in libkern/OSKextLib.cpp. */
 extern void kmod_panic_dump(vm_offset_t * addr, unsigned int dump_cnt);
 
+#if CONFIG_DTRACE
+/*
+ * DTrace can take a flag indicating whether it should instrument
+ * probes immediately based on kernel symbols.  This per kext
+ * flag overrides system mode in dtrace_modload().
+ */
+#define KMOD_DTRACE_FORCE_INIT 0x01
+#endif /* CONFIG_DTRACE */
+
 #endif    /* KERNEL_PRIVATE */
 
 
index 76367a99060746a7bb893d2934f29e25e1729c87..97aa09dbf04f2a203ea07cd7915cb163ef2529c5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -45,35 +45,4 @@ userprefix r_;
 #include <mach/std_types.defs>
 #include <mach/mach_types.defs>
 
-/*
- *     Create a subordinate ledger
- */
-routine ledger_create(
-               parent_ledger           : ledger_t;
-               ledger_ledger           : ledger_t;
-       out     new_ledger              : ledger_t;
-               transfer                : ledger_item_t);
-
-/*
- *     Destroy a ledger
- */
-routine ledger_terminate(
-               ledger                  : ledger_t);
-
-/*
- *     Transfer resources from a parent ledger to a child
- */
-routine ledger_transfer(
-               parent_ledger           : ledger_t;
-               child_ledger            : ledger_t;
-               transfer                : ledger_item_t);
-
-/*
- *     Return the ledger limit and balance
- */
-routine ledger_read(
-               ledger                  : ledger_t;
-       out     balance                 : ledger_item_t;
-       out     limit                   : ledger_item_t);
-
 /* vim: set ft=c : */
index 536cdce8353e343db9ea3ed51e299e5476a22fc6..184d1349bfe8a7521f0dd2991c132369847c299d 100644 (file)
@@ -172,7 +172,7 @@ routine     kmod_get_info(
  *     DEPRECATED!  Use mach_zone_info() instead.
  */
 routine host_zone_info(
-               host            : host_t;
+               host            : host_priv_t;
        out     names           : zone_name_array_t,
                                        Dealloc;
        out     info            : zone_info_array_t,
@@ -188,16 +188,8 @@ routine host_virtual_physical_table_info(
        out     info            : hash_info_bucket_array_t,
                                        Dealloc);
 
-/*
- *     Returns information about the global reverse hash table.
- *      This call is only valid on MACH_IPC_DEBUG kernels.
- *      Otherwise, KERN_FAILURE is returned.
- */
-routine host_ipc_hash_info(
-               host            : host_t;
-       out     info            : hash_info_bucket_array_t,
-                                       Dealloc);
 
+skip; /* was host_ipc_hash_info */
 skip; /* was enable_bluebox */
 skip; /* was disable_bluebox */
 
@@ -265,11 +257,22 @@ routine host_statistics64(
  *     address space sizes (unlike host_zone_info()).
  */
 routine mach_zone_info(
-               host            : host_t;
+               host            : host_priv_t;
        out     names           : mach_zone_name_array_t,
                                        Dealloc;
        out     info            : mach_zone_info_array_t,
                                        Dealloc);
 
+#ifdef PRIVATE
+/*
+ *     Forces a zone allocator garbage collections pass.
+ *     Pages with no in-use allocations are returned to
+ *     the VM system for re-use.
+ */    
+routine mach_zone_force_gc(
+               host            : host_t);
+#else
+skip;
+#endif
 
 /* vim: set ft=c : */
index 576f4119d19ce2912a1f4f1a6992c4d0ac48fcc2..12218bd0114b4ef76524329801c9e432dbb4a962 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -35,7 +35,6 @@
 #include <mach/exc_server.h>
 #include <mach/host_priv.h>
 #include <mach/host_security.h>
-#include <mach/ledger.h>
 #include <mach/lock_set.h>
 #include <mach/mach_exc_server.h>
 #include <mach/mach_host.h>
index 6c612758d1667305bb1238f364919f1af3e6db6e..2799790b61adf8b8e9454790bf90d44528789f27 100644 (file)
@@ -68,6 +68,10 @@ subsystem
 #endif /* KERNEL_SERVER */
          mach_port 3200;
 
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+       UserPrefix _kernelrpc_;
+#endif
+
 #include <mach/std_types.defs>
 #include <mach/mach_types.defs>
 #include <mach_debug/mach_debug_types.defs>
@@ -98,6 +102,9 @@ routine mach_port_type(
  *     Changes the name by which a port (or port set) is known to
  *     the target task.  The new name can't be in use.  The
  *     old name becomes available for recycling.
+ *
+ *     This interface is OBSOLETE and will always
+ *     return KERN_NOT_SUPPORTED.
  */
 
 routine mach_port_rename(
@@ -475,7 +482,12 @@ routine mach_port_extract_member(
 routine mach_port_get_context(
                task            : ipc_space_t;
                name            : mach_port_name_t;
-       out context             : mach_vm_address_t);
+#ifdef LIBSYSCALL_INTERFACE
+       out context             : mach_port_context_t
+#else
+       out context             : mach_vm_address_t
+#endif
+       );
 
 /*
  * Only valid for receive rights.
@@ -485,7 +497,12 @@ routine mach_port_get_context(
 routine mach_port_set_context(
                task            : ipc_space_t;
                name            : mach_port_name_t;
-               context         : mach_vm_address_t);
+#ifdef LIBSYSCALL_INTERFACE
+               context         : mach_port_context_t
+#else
+               context         : mach_vm_address_t
+#endif
+               );
 
 /*
  *     Return the type and address of the kernel object
index 37ab3277fda194ceb75d0fecddd2bfd59861a323..915bf938109c0cff7cff2703248bcbb646fc2d9d 100644 (file)
@@ -139,6 +139,8 @@ extern kern_return_t semaphore_timedwait_signal_trap(
                                unsigned int sec,
                                clock_res_t nsec);
 
+#endif /* PRIVATE */
+
 extern kern_return_t clock_sleep_trap(
                                mach_port_name_t clock_name,
                                sleep_type_t sleep_type,
@@ -146,7 +148,74 @@ extern kern_return_t clock_sleep_trap(
                                int sleep_nsec,
                                mach_timespec_t *wakeup_time);
 
-#endif /* PRIVATE */
+extern kern_return_t _kernelrpc_mach_vm_allocate_trap(
+                               mach_port_name_t target,
+                               mach_vm_offset_t *addr,
+                               mach_vm_size_t size,
+                               int flags);
+
+extern kern_return_t _kernelrpc_mach_vm_deallocate_trap(
+                               mach_port_name_t target,
+                               mach_vm_address_t address,
+                               mach_vm_size_t size
+);
+
+extern kern_return_t _kernelrpc_mach_vm_protect_trap(
+                               mach_port_name_t target,
+                               mach_vm_address_t address,
+                               mach_vm_size_t size,
+                               boolean_t set_maximum,
+                               vm_prot_t new_protection
+);
+
+extern kern_return_t _kernelrpc_mach_port_allocate_trap(
+                               mach_port_name_t target,
+                               mach_port_right_t right,
+                               mach_port_name_t *name
+);
+
+
+extern kern_return_t _kernelrpc_mach_port_destroy_trap(
+                               mach_port_name_t target,
+                               mach_port_name_t name
+);
+
+extern kern_return_t _kernelrpc_mach_port_deallocate_trap(
+                               mach_port_name_t target,
+                               mach_port_name_t name
+);
+
+extern kern_return_t _kernelrpc_mach_port_mod_refs_trap(
+                               mach_port_name_t target,
+                               mach_port_name_t name,
+                               mach_port_right_t right,
+                               mach_port_delta_t delta
+);
+
+extern kern_return_t _kernelrpc_mach_port_move_member_trap(
+                               mach_port_name_t target,
+                               mach_port_name_t member,
+                               mach_port_name_t after
+);
+
+extern kern_return_t _kernelrpc_mach_port_insert_right_trap(
+                               mach_port_name_t target,
+                               mach_port_name_t name,
+                               mach_port_name_t poly,
+                               mach_msg_type_name_t polyPoly
+);
+
+extern kern_return_t _kernelrpc_mach_port_insert_member_trap(
+                               mach_port_name_t target,
+                               mach_port_name_t name,
+                               mach_port_name_t pset
+);
+
+extern kern_return_t _kernelrpc_mach_port_extract_member_trap(
+                               mach_port_name_t target,
+                               mach_port_name_t name,
+                               mach_port_name_t pset
+);
 
 extern kern_return_t macx_swapon(
                                uint64_t filename,
@@ -300,7 +369,7 @@ extern mach_port_name_t host_self_trap(
                                struct host_self_trap_args *args);
 
 struct mach_msg_overwrite_trap_args {
-       PAD_ARG_(mach_vm_address_t, msg);
+       PAD_ARG_(user_addr_t, msg);
        PAD_ARG_(mach_msg_option_t, option);
        PAD_ARG_(mach_msg_size_t, send_size);
        PAD_ARG_(mach_msg_size_t, rcv_size);
@@ -308,7 +377,7 @@ struct mach_msg_overwrite_trap_args {
        PAD_ARG_(mach_msg_timeout_t, timeout);
        PAD_ARG_(mach_port_name_t, notify);
        PAD_ARG_8
-       PAD_ARG_(mach_vm_address_t, rcv_msg);  /* Unused on mach_msg_trap */
+       PAD_ARG_(user_addr_t, rcv_msg);  /* Unused on mach_msg_trap */
 };
 extern mach_msg_return_t mach_msg_trap(
                                struct mach_msg_overwrite_trap_args *args);
@@ -459,7 +528,7 @@ struct clock_sleep_trap_args{
        PAD_ARG_(sleep_type_t, sleep_type);
        PAD_ARG_(int, sleep_sec);
        PAD_ARG_(int, sleep_nsec);
-       PAD_ARG_(mach_vm_address_t, wakeup_time);
+       PAD_ARG_(user_addr_t, wakeup_time);
 };
 extern kern_return_t clock_sleep_trap(
                                struct clock_sleep_trap_args *args);
@@ -473,7 +542,7 @@ extern kern_return_t thread_switch(
                                struct thread_switch_args *args);
 
 struct mach_timebase_info_trap_args {
-       PAD_ARG_(mach_vm_address_t, info);
+       PAD_ARG_(user_addr_t, info);
 };
 extern kern_return_t mach_timebase_info_trap(
                                struct mach_timebase_info_trap_args *args);
@@ -505,11 +574,104 @@ extern kern_return_t mk_timer_arm_trap(
 
 struct mk_timer_cancel_trap_args {
     PAD_ARG_(mach_port_name_t, name);
-    PAD_ARG_(mach_vm_address_t, result_time);
+    PAD_ARG_(user_addr_t, result_time);
 };
 extern kern_return_t mk_timer_cancel_trap(
                                struct mk_timer_cancel_trap_args *args);
 
+struct _kernelrpc_mach_vm_allocate_trap_args {
+       PAD_ARG_(mach_port_name_t, target);     /* 1 word */
+       PAD_ARG_(user_addr_t, addr);            /* 1 word */
+       PAD_ARG_(mach_vm_size_t, size);         /* 2 words */
+       PAD_ARG_(int, flags);                   /* 1 word */
+};                                             /* Total: 5 */
+
+extern kern_return_t _kernelrpc_mach_vm_allocate_trap(
+                               struct _kernelrpc_mach_vm_allocate_trap_args *args);
+
+struct _kernelrpc_mach_vm_deallocate_args {
+       PAD_ARG_(mach_port_name_t, target);     /* 1 word */
+       PAD_ARG_(mach_vm_address_t, address);   /* 2 words */
+       PAD_ARG_(mach_vm_size_t, size);         /* 2 words */
+};                                             /* Total: 5 */
+extern kern_return_t _kernelrpc_mach_vm_deallocate_trap(
+                               struct _kernelrpc_mach_vm_deallocate_args *args);
+
+struct _kernelrpc_mach_vm_protect_args {
+       PAD_ARG_(mach_port_name_t, target);     /* 1 word */
+       PAD_ARG_(mach_vm_address_t, address);   /* 2 words */
+       PAD_ARG_(mach_vm_size_t, size);         /* 2 words */
+       PAD_ARG_(boolean_t, set_maximum);       /* 1 word */
+       PAD_ARG_(vm_prot_t, new_protection);    /* 1 word */
+};                                             /* Total: 7 */
+extern kern_return_t _kernelrpc_mach_vm_protect_trap(
+                               struct _kernelrpc_mach_vm_protect_args *args);
+
+struct _kernelrpc_mach_port_allocate_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_right_t, right);
+       PAD_ARG_(user_addr_t, name);
+};
+extern kern_return_t _kernelrpc_mach_port_allocate_trap(
+                               struct _kernelrpc_mach_port_allocate_args *args);
+
+
+struct _kernelrpc_mach_port_destroy_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_name_t, name);
+};
+extern kern_return_t _kernelrpc_mach_port_destroy_trap(
+                               struct _kernelrpc_mach_port_destroy_args *args);
+
+struct _kernelrpc_mach_port_deallocate_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_name_t, name);
+};
+extern kern_return_t _kernelrpc_mach_port_deallocate_trap(
+                               struct _kernelrpc_mach_port_deallocate_args *args);
+
+struct _kernelrpc_mach_port_mod_refs_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_name_t, name);
+       PAD_ARG_(mach_port_right_t, right);
+       PAD_ARG_(mach_port_delta_t, delta);
+};
+extern kern_return_t _kernelrpc_mach_port_mod_refs_trap(
+                               struct _kernelrpc_mach_port_mod_refs_args *args);
+
+struct _kernelrpc_mach_port_move_member_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_name_t, member);
+       PAD_ARG_(mach_port_name_t, after);
+};
+extern kern_return_t _kernelrpc_mach_port_move_member_trap(
+                               struct _kernelrpc_mach_port_move_member_args *args);
+
+struct _kernelrpc_mach_port_insert_right_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_name_t, name);
+       PAD_ARG_(mach_port_name_t, poly);
+       PAD_ARG_(mach_msg_type_name_t, polyPoly);
+};
+extern kern_return_t _kernelrpc_mach_port_insert_right_trap(
+                               struct _kernelrpc_mach_port_insert_right_args *args);
+
+struct _kernelrpc_mach_port_insert_member_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_name_t, name);
+       PAD_ARG_(mach_port_name_t, pset);
+};
+extern kern_return_t _kernelrpc_mach_port_insert_member_trap(
+                               struct _kernelrpc_mach_port_insert_member_args *args);
+
+struct _kernelrpc_mach_port_extract_member_args {
+       PAD_ARG_(mach_port_name_t, target);
+       PAD_ARG_(mach_port_name_t, name);
+       PAD_ARG_(mach_port_name_t, pset);
+};
+extern kern_return_t _kernelrpc_mach_port_extract_member_trap(
+                               struct _kernelrpc_mach_port_extract_member_args *args);
+
 /* not published to LP64 clients yet */
 struct iokit_user_client_trap_args {
        PAD_ARG_(void *, userClientRef);
index 0f36eeec56132332a6f555b8be078b4ee9352765..a013ff1ae06aa8c740101217a670e39da4d38d15 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -215,6 +215,8 @@ type thread_policy_t                = array[*:16] of integer_t;
                 * task audit token (8 ints)
                 * dyld info (2 64-bit ints and 1 int)
                 * task_extmod_info_t (8 64-bit ints)
+                * task_basic_info_64_2_t
+                * mach_task_basic_info_t (12 ints)
                 * If other task_info flavors are added, this
                 * definition may need to be changed. (See
                 * mach/task_info.h and mach/policy.h) */
@@ -404,6 +406,9 @@ type ledger_t = mach_port_t
 
 type ledger_array_t            = ^array[] of ledger_t;
 type ledger_item_t             = integer_t;
+                                 /* DEPRECATED */
+
+type ledger_amount_t           = int64_t;
 
 type security_token_t          = struct[2] of uint32_t;
 type audit_token_t             = struct[8] of uint32_t;
index e4e47f63af1bc36bdb3219a6017bccb186b50bb5..9c9a1afbbbffc410c62e2224b0d38558dce8e390 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -260,8 +260,12 @@ typedef exception_handler_array_t exception_port_arrary_t;
 #define CLOCK_NULL             ((clock_t) 0)
 #define UND_SERVER_NULL                ((UNDServerRef) 0)
 
-typedef natural_t              ledger_item_t;
-#define LEDGER_ITEM_INFINITY ((ledger_item_t) (~0))
+/* DEPRECATED */
+typedef natural_t      ledger_item_t;
+#define LEDGER_ITEM_INFINITY   ((ledger_item_t) (~0))
+
+typedef int64_t                ledger_amount_t;
+#define LEDGER_LIMIT_INFINITY ((ledger_amount_t)(((uint64_t)1 << 63) - 1))
 
 typedef mach_vm_offset_t       *emulation_vector_t;
 typedef char                   *user_subsystem_t;
index ade3eaa61212383233c0702620371f7f62d1c92f..f4793f8e0f53f2349a030a8226582ed57ddaaafb 100644 (file)
@@ -76,6 +76,12 @@ subsystem
 #include <mach/mach_types.defs>
 #include <mach_debug/mach_debug_types.defs>
 
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+#define PREFIX(NAME) _kernelrpc_ ## NAME
+#else
+#define PREFIX(NAME) NAME
+#endif
+
 /*
  *     Allocate zero-filled memory in the address space
  *     of the target task, either at the specified address,
@@ -84,27 +90,50 @@ subsystem
  *     allocation actually took place is returned.
  */
 #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_)
-routine mach_vm_allocate(
+routine PREFIX(mach_vm_allocate) (
+               target          : vm_task_entry_t;
+       inout   address         : mach_vm_address_t;
+               size            : mach_vm_size_t;
+               flags           : int);
+
 #else
-routine vm_allocate(
-#endif
+
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+skip;
+#else
+routine PREFIX(vm_allocate) (
                target          : vm_task_entry_t;
        inout   address         : mach_vm_address_t;
                size            : mach_vm_size_t;
                flags           : int);
 
+#endif
+
+#endif
+
+
 /*
  *     Deallocate the specified range from the virtual
  *     address space of the target virtual memory map.
  */
 #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_)
-routine mach_vm_deallocate(
+routine PREFIX(mach_vm_deallocate) (
+               target          : vm_task_entry_t;
+               address         : mach_vm_address_t;
+               size            : mach_vm_size_t);
+
 #else
-routine vm_deallocate(
-#endif
+
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+skip;
+#else
+routine PREFIX(vm_deallocate) (
                target          : vm_task_entry_t;
                address         : mach_vm_address_t;
                size            : mach_vm_size_t);
+#endif
+
+#endif
 
 /*
  *     Set the current or maximum protection attribute
@@ -117,16 +146,31 @@ routine vm_deallocate(
  *     *permissions*.
  */
 #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_)
-routine mach_vm_protect(
+routine PREFIX(mach_vm_protect) (
+               target_task     : vm_task_entry_t;
+               address         : mach_vm_address_t;
+               size            : mach_vm_size_t;
+               set_maximum     : boolean_t;
+               new_protection  : vm_prot_t);
+
+
 #else
-routine vm_protect(
-#endif
+
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+skip;
+#else
+
+routine PREFIX(vm_protect) (
                target_task     : vm_task_entry_t;
                address         : mach_vm_address_t;
                size            : mach_vm_size_t;
                set_maximum     : boolean_t;
                new_protection  : vm_prot_t);
 
+#endif
+
+#endif
+
 /*
  *     Set the inheritance attribute for the specified range
  *     of the virtual address space of the target address space.
index fbbe306100ff219846d045fb58e48fe5e45bb141..57a0c28e690ace66b824e4aa4dfc9e897a356e77 100644 (file)
@@ -60,6 +60,8 @@
 #ifndef        _MACH_MACHINE_H_
 #define _MACH_MACHINE_H_
 
+#ifndef __ASSEMBLER__
+
 #include <stdint.h>
 #include <mach/machine/vm_types.h>
 #include <mach/boolean.h>
@@ -348,6 +350,10 @@ __END_DECLS
 #define CPU_SUBTYPE_ARM_V5TEJ           ((cpu_subtype_t) 7)
 #define CPU_SUBTYPE_ARM_XSCALE         ((cpu_subtype_t) 8)
 #define CPU_SUBTYPE_ARM_V7             ((cpu_subtype_t) 9)
+#define CPU_SUBTYPE_ARM_V7F            ((cpu_subtype_t) 10) /* Cortex A9 */
+#define CPU_SUBTYPE_ARM_V7K            ((cpu_subtype_t) 12) /* Kirkwood40 */
+
+#endif /* !__ASSEMBLER__ */
 
 /*
  *     CPU families (sysctl hw.cpufamily)
index 846987cfd7907f3a685c23248cf19c959a244193..281ff1c9f05d2b777e0ffb7b882b1814c4d97163 100644 (file)
@@ -370,6 +370,7 @@ typedef struct memory_object_attr_info      memory_object_attr_info_data_t;
 #define MAP_MEM_WTHRU          3
 #define MAP_MEM_WCOMB          4       /* Write combining mode */
                                        /* aka store gather     */
+#define MAP_MEM_INNERWBACK     5
 
 #define GET_MAP_MEM(flags)     \
        ((((unsigned int)(flags)) >> 24) & 0xFF)
@@ -410,6 +411,7 @@ struct upl_page_info {
                speculative:1,  /* page is valid, but not yet accessed */
                cs_validated:1, /* CODE SIGNING: page was validated */
                cs_tainted:1,   /* CODE SIGNING: page is tainted */
+               needed:1,       /* page should be left in cache on abort */
                :0;             /* force to long boundary */
 #else
                opaque;         /* use upl_page_xxx() accessor funcs */
@@ -685,6 +687,7 @@ extern boolean_t    upl_device_page(upl_page_info_t *upl);
 extern boolean_t       upl_speculative_page(upl_page_info_t *upl, int index);
 extern void    upl_clear_dirty(upl_t upl, boolean_t value);
 extern void    upl_set_referenced(upl_t upl, boolean_t value);
+extern void    upl_range_needed(upl_t upl, int index, int count);
 
 __END_DECLS
 
index 195607585a7fd288d0e190c580fa0b8b0f10e018..eba414fc1e28d054d3252eb55b5b558718b20989 100644 (file)
@@ -78,6 +78,7 @@
 #include <mach/machine/vm_types.h>
 
 #include <sys/cdefs.h>
+#include <sys/appleapiopts.h>
 
 /*
  *  The timeout mechanism uses mach_msg_timeout_t values,
@@ -427,10 +428,9 @@ typedef struct
   mach_port_seqno_t            msgh_seqno;
   security_token_t             msgh_sender;
   audit_token_t                        msgh_audit;
-  mach_vm_address_t            msgh_context;
+  mach_port_context_t          msgh_context;
 } mach_msg_context_trailer_t;
 
-
 typedef struct
 {
   mach_port_name_t sender;
@@ -448,7 +448,7 @@ typedef struct
   mach_port_seqno_t             msgh_seqno;
   security_token_t              msgh_sender;
   audit_token_t                 msgh_audit;
-  mach_vm_address_t             msgh_context;
+  mach_port_context_t          msgh_context;
   int                          msgh_ad;
   msg_labels_t                  msgh_labels;
 } mach_msg_mac_trailer_t;
@@ -515,9 +515,19 @@ typedef union
 /*
  *  There is no fixed upper bound to the size of Mach messages.
  */
-
 #define        MACH_MSG_SIZE_MAX       ((mach_msg_size_t) ~0)
 
+#if defined(__APPLE_API_PRIVATE)
+/*
+ *  But architectural limits of a given implementation, or
+ *  temporal conditions may cause unpredictable send failures
+ *  for messages larger than MACH_MSG_SIZE_RELIABLE.
+ *
+ *  In either case, waiting for memory is [currently] outside
+ *  the scope of send timeout values provided to IPC.
+ */
+#define        MACH_MSG_SIZE_RELIABLE  ((mach_msg_size_t) 256 * 1024)
+#endif
 /*
  *  Compatibility definitions, for code written
  *  when there was a msgh_kind instead of msgh_seqno.
@@ -621,7 +631,8 @@ typedef integer_t mach_msg_option_t;
  * It also makes things work properly if MACH_RCV_TRAILER_LABELS is ORed 
  * with one of the other options.
  */
-#define REQUESTED_TRAILER_SIZE(y)                              \
+
+#define REQUESTED_TRAILER_SIZE_NATIVE(y)                       \
        ((mach_msg_trailer_size_t)                              \
         ((GET_RCV_ELEMENTS(y) == MACH_RCV_TRAILER_NULL) ?      \
          sizeof(mach_msg_trailer_t) :                          \
@@ -637,6 +648,15 @@ typedef integer_t mach_msg_option_t;
              sizeof(mach_msg_mac_trailer_t) :                  \
             sizeof(mach_msg_max_trailer_t))))))))
 
+
+#ifdef XNU_KERNEL_PRIVATE
+
+#define REQUESTED_TRAILER_SIZE(is64, y) REQUESTED_TRAILER_SIZE_NATIVE(y)
+
+#else /* XNU_KERNEL_PRIVATE */
+#define REQUESTED_TRAILER_SIZE(y) REQUESTED_TRAILER_SIZE_NATIVE(y)
+#endif /* XNU_KERNEL_PRIVATE */
+
 /*
  *  Much code assumes that mach_msg_return_t == kern_return_t.
  *  This definition is useful for descriptive purposes.
index 31a454a60c407d43dc85cc057a9c4e4fa7bc7833..3d7655076ba4e13a32e068d952cd1b822b8fb70e 100644 (file)
 
 #if defined(MACH_KERNEL)
 
-#if defined(BSMALL_LATER)
-/* Really small configurations don't need type checking */
-#define __MigTypeCheck 0
-#else
+#if !defined(__MigTypeCheck)
 /* Turn MIG type checking on by default for kernel */
 #define __MigTypeCheck 1
 #endif
+
 #define __MigKernelSpecificCode 1
 #define _MIG_KERNEL_SPECIFIC_CODE_ 1
 
-/* Otherwise check legacy setting (temporary) */
-#elif defined(TypeCheck)  
-
-#define __MigTypeCheck TypeCheck
-
 #elif !defined(__MigTypeCheck)
 
-/* otherwise, default MIG type checking on - except in small configurations */
-#if defined(BSMALL)
-#define __MigTypeCheck 0
+#if defined(TypeCheck)
+/* use legacy setting (temporary) */
+#define __MigTypeCheck TypeCheck
 #else
+/* default MIG type checking on */
 #define __MigTypeCheck 1
 #endif
 
-#endif /* !defined(__MigTypeCheck) */
+#endif /* !defined(MACH_KERNEL) && !defined(__MigTypeCheck) */
 
 /*
  * Pack MIG message structs.
index cb64d3fc34247e5669982b951bca8b8288e91379..9baa731fe37045cf7ab7da0ff63150831a26bed9 100644 (file)
@@ -34,6 +34,8 @@
 
 #include <stdint.h>
 #include <sys/cdefs.h>
+#include <libkern/OSByteOrder.h>
+
 
 typedef struct {
     unsigned char       mig_vers;
@@ -65,20 +67,16 @@ typedef struct {
 
 extern NDR_record_t NDR_record;
 
-#if defined(BSMALL)
+/* NDR conversion off by default */
+
+#if !defined(__NDR_convert__)
 #define __NDR_convert__ 0
-#define __NDR_convert__int_rep__ 0
-#else
-#ifndef __NDR_convert__
-#define __NDR_convert__ 1
-#endif /* __NDR_convert__ */
+#endif /* !defined(__NDR_convert__) */
 
 #ifndef __NDR_convert__int_rep__
-#define __NDR_convert__int_rep__ 1
+#define __NDR_convert__int_rep__ __NDR_convert__
 #endif /* __NDR_convert__int_rep__ */
 
-#endif /* defined(BSMALL) */
-
 #ifndef __NDR_convert__char_rep__
 #define __NDR_convert__char_rep__ 0
 #endif /* __NDR_convert__char_rep__ */
@@ -103,8 +101,6 @@ extern NDR_record_t NDR_record;
 
 #if __NDR_convert__int_rep__
 
-#include <libkern/OSByteOrder.h>
-
 #define __NDR_READSWAP_assign(a, rs)   do { *(a) = rs(a); } while (0)
 
 #define __NDR_READSWAP__uint16_t(a)    OSReadSwapInt16((void *)a, 0)
index 29ced2a401993ab94f79af30a3d8299643aa4a69..2297ba7b7b9bbfb9736e1c21420985b3ae0f0f6b 100644 (file)
@@ -70,6 +70,7 @@
 #define SHARED_REGION_NESTING_MIN_ARM          ?
 #define SHARED_REGION_NESTING_MAX_ARM          ?
 
+
 #if defined(__i386__)
 #define SHARED_REGION_BASE                     SHARED_REGION_BASE_I386
 #define SHARED_REGION_SIZE                     SHARED_REGION_SIZE_I386
index bac3552d3b15b99c6c33af42cc868e1a76df5598..a3ef52610ba0634c0b8be349f70f6b70d8c874fc 100644 (file)
  *     table in <kern/syscall_sw.c>.
  */
 
+/* 
+ * i386 and x86_64 just load of the stack or use
+ * registers in order; no munging is required, 
+ * and number of args is ignored.  ARM loads args
+ * into registers beyond r3, unlike the normal 
+ * procedure call standard; we pad for 64-bit args.
+ */
+kernel_trap(_kernelrpc_mach_vm_allocate_trap,-10,5) /* 4 args, +1 for mach_vm_size_t */
+kernel_trap(_kernelrpc_mach_vm_deallocate_trap,-12,5) /* 3 args, +2 for mach_vm_size_t and mach_vm_address_t */
+kernel_trap(_kernelrpc_mach_vm_protect_trap,-14,7) /* 5 args, +2 for mach_vm_address_t and mach_vm_size_t */
+kernel_trap(_kernelrpc_mach_port_allocate_trap,-16,3)
+kernel_trap(_kernelrpc_mach_port_destroy_trap,-17,2)
+kernel_trap(_kernelrpc_mach_port_deallocate_trap,-18,2)
+kernel_trap(_kernelrpc_mach_port_mod_refs_trap,-19,4)
+kernel_trap(_kernelrpc_mach_port_move_member_trap,-20,3)
+kernel_trap(_kernelrpc_mach_port_insert_right_trap,-21,4)
+kernel_trap(_kernelrpc_mach_port_insert_member_trap,-22,3)
+kernel_trap(_kernelrpc_mach_port_extract_member_trap,-23,3)
+
 kernel_trap(mach_reply_port,-26,0)
 kernel_trap(thread_self_trap,-27,0)
 kernel_trap(task_self_trap,-28,0)
index a43dc6cb9a2e49cc98e9e2f29fe29736590b20ba..d62982d0c35c52383dba04b08c6f7d39be85e37d 100644 (file)
@@ -69,6 +69,8 @@
 #include <mach/machine/vm_types.h>
 #include <mach/time_value.h>
 #include <mach/policy.h>
+#include <mach/vm_statistics.h> /* for vm_extmod_statistics_data_t */
+#include <Availability.h>
 
 #include <sys/cdefs.h>
 
@@ -78,6 +80,7 @@
 typedef        natural_t       task_flavor_t;
 typedef        integer_t       *task_info_t;           /* varying array of int */
 
+/* Deprecated, use per structure _data_t's instead */
 #define        TASK_INFO_MAX   (1024)          /* maximum array size */
 typedef        integer_t       task_info_data_t[TASK_INFO_MAX];
 
@@ -87,6 +90,7 @@ typedef       integer_t       task_info_data_t[TASK_INFO_MAX];
 
 #pragma pack(4)
 
+/* Don't use this, use MACH_TASK_BASIC_INFO instead */
 #define TASK_BASIC_INFO_32      4       /* basic information */
 #define TASK_BASIC2_INFO_32      6
 
@@ -105,9 +109,7 @@ typedef struct task_basic_info_32       *task_basic_info_32_t;
 #define TASK_BASIC_INFO_32_COUNT   \
                 (sizeof(task_basic_info_32_data_t) / sizeof(natural_t))
 
-
-#define TASK_BASIC_INFO_64      5       /* 64-bit capable basic info */
-
+/* Don't use this, use MACH_TASK_BASIC_INFO instead */
 struct task_basic_info_64 {
         integer_t       suspend_count;  /* suspend count for task */
         mach_vm_size_t  virtual_size;   /* virtual memory size (bytes) */
@@ -120,12 +122,14 @@ struct task_basic_info_64 {
 };
 typedef struct task_basic_info_64       task_basic_info_64_data_t;
 typedef struct task_basic_info_64       *task_basic_info_64_t;
+
+#define TASK_BASIC_INFO_64      5       /* 64-bit capable basic info */
 #define TASK_BASIC_INFO_64_COUNT   \
                 (sizeof(task_basic_info_64_data_t) / sizeof(natural_t))
 
 
 /* localized structure - cannot be safely passed between tasks of differing sizes */
-
+/* Don't use this, use MACH_TASK_BASIC_INFO instead */
 struct task_basic_info {
         integer_t       suspend_count;  /* suspend count for task */
         vm_size_t       virtual_size;   /* virtual memory size (bytes) */
@@ -245,7 +249,8 @@ typedef struct task_dyld_info       *task_dyld_info_t;
 #define TASK_DYLD_ALL_IMAGE_INFO_32    0       /* format value */
 #define TASK_DYLD_ALL_IMAGE_INFO_64    1       /* format value */
 
-#define TASK_EXTMOD_INFO                       18
+
+#define TASK_EXTMOD_INFO                       19
 
 struct task_extmod_info {
        unsigned char   task_uuid[16];
@@ -256,8 +261,24 @@ typedef struct task_extmod_info    *task_extmod_info_t;
 #define TASK_EXTMOD_INFO_COUNT \
                (sizeof(task_extmod_info_data_t) / sizeof(natural_t))
 
-#pragma pack()
-
+/* Always 64-bit in user and kernel */
+#define MACH_TASK_BASIC_INFO     20         /* always 64-bit basic info */
+
+struct mach_task_basic_info {
+        mach_vm_size_t  virtual_size;       /* virtual memory size (bytes) */
+        mach_vm_size_t  resident_size;      /* resident memory size (bytes) */
+        mach_vm_size_t  resident_size_max;  /* maximum resident memory size (bytes) */
+        time_value_t    user_time;          /* total user run time for
+                                               terminated threads */
+        time_value_t    system_time;        /* total system run time for
+                                               terminated threads */
+        policy_t        policy;             /* default policy for new threads */
+        integer_t       suspend_count;      /* suspend count for task */
+};
+typedef struct mach_task_basic_info       mach_task_basic_info_data_t;
+typedef struct mach_task_basic_info       *mach_task_basic_info_t;
+#define MACH_TASK_BASIC_INFO_COUNT   \
+                (sizeof(mach_task_basic_info_data_t) / sizeof(natural_t))
 
 /*
  * Obsolete interfaces.
@@ -269,4 +290,6 @@ typedef struct task_extmod_info     *task_extmod_info_t;
 
 #define TASK_SCHED_INFO                        14
 
+#pragma pack()
+
 #endif /* _MACH_TASK_INFO_H_ */
index ec980cfe22c39c6ab7547f4c11d500917c6cff24..fb08e1b7d66f1964f77d8dfadffe7cb1bfccedd8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -82,27 +82,12 @@ typedef     int     task_special_port_t;
  * Evolving and likely to change.
  */
 
-#define TASK_WIRED_LEDGER_PORT 5       /* Wired resource ledger for task. */
-
-#define TASK_PAGED_LEDGER_PORT 6       /* Paged resource ledger for task. */
-
 #define TASK_SEATBELT_PORT     7       /* Seatbelt compiler/DEM port for task. */
 
-#define TASK_GSSD_PORT         8       /* GSSD port for security context */
+/* PORT 8 was the GSSD TASK PORT which transformed to a host port */
 
 #define TASK_ACCESS_PORT       9       /* Permission check for task_for_pid. */
 
-#define task_get_wired_ledger_port(task, port) \
-               (task_get_special_port((task), TASK_WIRED_LEDGER_PORT, (port)))
-
-#define task_set_wired_ledger_port(task, port) \
-               (task_set_special_port((task), TASK_WIRED_LEDGER_PORT, (port)))
-
-#define task_get_paged_ledger_port(task, port) \
-               (task_get_special_port((task), TASK_PAGED_LEDGER_PORT, (port)))
-
-#define task_set_paged_ledger_port(task, port) \
-               (task_set_special_port((task), TASK_PAGED_LEDGER_PORT, (port)))
 
 /*
  *     Definitions for ease of use
@@ -126,12 +111,6 @@ typedef    int     task_special_port_t;
 #define task_set_bootstrap_port(task, port)    \
                (task_set_special_port((task), TASK_BOOTSTRAP_PORT, (port)))
 
-#define task_get_gssd_port(task, port) \
-               (task_get_special_port((task), TASK_GSSD_PORT, (port)))
-
-#define task_set_gssd_port(task, port) \
-               (task_set_special_port((task), TASK_GSSD_PORT, (port)))
-
 #define task_get_task_access_port(task, port)  \
                (task_get_special_port((task), TASK_ACCESS_PORT, (port)))
 
index b59e795ef6b1e1719f6c35dd76e07c22fdb260c8..521d5886edfd2b23ba193b87d1f337def7e907e4 100644 (file)
@@ -71,6 +71,12 @@ subsystem
 #include <mach/mach_types.defs>
 #include <mach_debug/mach_debug_types.defs>
 
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+#define PREFIX(NAME) _kernelrpc_ ## NAME
+#else
+#define PREFIX(NAME) NAME
+#endif
+
 /*
  *      Returns information about the contents of the virtual
  *      address space of the target task at the specified
@@ -99,21 +105,33 @@ routine vm_region(
  *     of the specified size.  The address at which the
  *     allocation actually took place is returned.
  */
-routine vm_allocate(
+
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+skip;
+#else
+routine PREFIX(vm_allocate)(
                target_task     : vm_task_entry_t;
        inout   address         : vm_address_t;
                size            : vm_size_t;
                flags           : int);
 
+#endif
+
 /*
  *     Deallocate the specified range from the virtual
  *     address space of the target task.
  */
-routine vm_deallocate(
+
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+skip;
+#else
+routine PREFIX(vm_deallocate)(
                target_task     : vm_task_entry_t;
                address         : vm_address_t;
                size            : vm_size_t);
 
+#endif
+
 /*
  *     Set the current or maximum protection attribute
  *     for the specified range of the virtual address
@@ -124,12 +142,17 @@ routine vm_deallocate(
  *     Protections are specified as a set of {read, write, execute}
  *     *permissions*.
  */
-routine vm_protect(
+
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+skip;
+#else
+routine PREFIX(vm_protect)(
                target_task     : vm_task_entry_t;
                address         : vm_address_t;
                size            : vm_size_t;
                set_maximum     : boolean_t;
                new_protection  : vm_prot_t);
+#endif
 
 /*
  *     Set the inheritance attribute for the specified range
index 468920caaaef13bbd2c7cfa825fceb3adb81184f..60e65de2414929e0f7cfb71dd26e7e3cc66cce78 100644 (file)
  */
 
 #if 1
-#define atop(x)        ((uint32_t)(x) >> PAGE_SHIFT)
-#define ptoa(x)        ((uint32_t)(x) << PAGE_SHIFT)
+#define atop(x)        ((vm_address_t)(x) >> PAGE_SHIFT)
+#define ptoa(x)        ((vm_address_t)(x) << PAGE_SHIFT)
 #else
 #define atop(x) (0UL = 0)
 #define ptoa(x) (0UL = 0)
@@ -240,11 +240,41 @@ extern addr64_t   vm_last_addr;   /* Highest kernel virtual address known to the VM
 extern const vm_offset_t       vm_min_kernel_address;
 extern const vm_offset_t       vm_max_kernel_address;
 
+extern vm_offset_t             vm_kernel_stext;
+extern vm_offset_t             vm_kernel_etext;
+extern vm_offset_t             vm_kernel_base;
+extern vm_offset_t             vm_kernel_top;
+extern vm_offset_t             vm_kernel_slide;
+extern vm_offset_t             vm_kernel_addrperm;
+
+#define VM_KERNEL_IS_SLID(_o)                                                 \
+               (((vm_offset_t)(_o) >= vm_kernel_base) &&                      \
+                ((vm_offset_t)(_o) <  vm_kernel_top))
+/*
+ * VM_KERNEL_IS_KEXT is platform-specific, defined in <mach/machine/vm_param.h>.
+ * Set default if undefined.
+ */
+#ifndef        VM_KERNEL_IS_KEXT
+#define VM_KERNEL_IS_KEXT(_o)  (FALSE)
+#endif
+#define VM_KERNEL_UNSLIDE(_v)                                                 \
+               ((VM_KERNEL_IS_SLID(_v) ||                                     \
+                 VM_KERNEL_IS_KEXT(_v)) ?                                     \
+                       (vm_offset_t)(_v) - vm_kernel_slide :                  \
+                       (vm_offset_t)(_v))
+#define VM_KERNEL_SLIDE(_u)                                                   \
+               ((vm_offset_t)(_u) + vm_kernel_slide)
+
+#define        VM_KERNEL_ADDRPERM(_v)                                                  \
+               (((vm_offset_t)(_v) == 0) ?                                     \
+                       (vm_offset_t)(0) :                                      \
+                       (vm_offset_t)(_v) + vm_kernel_addrperm)
+
 #endif /* XNU_KERNEL_PRIVATE */
 
 extern vm_size_t       page_size;
 extern vm_size_t       page_mask;
-extern int                     page_shift;
+extern int             page_shift;
 
 /* We need a way to get rid of compiler warnings when we cast from   */
 /* a 64 bit value to an address (which may be 32 bits or 64-bits).   */
index 4d1b13a5675fbdbad482cd8553fe30dd77c26a9f..487549894c2eeca5f5d1f39c4ea4eec4d0b126b1 100644 (file)
@@ -328,12 +328,17 @@ typedef struct pmap_statistics    *pmap_statistics_t;
 #define VM_MEMORY_DYLIB        33
 #define VM_MEMORY_OBJC_DISPATCHERS 34
 
+/* Was a nested pmap (VM_MEMORY_SHARED_PMAP) which has now been unnested */
+#define        VM_MEMORY_UNSHARED_PMAP 35
+
+
 // Placeholders for now -- as we analyze the libraries and find how they
 // use memory, we can make these labels more specific.
 #define VM_MEMORY_APPKIT 40
 #define VM_MEMORY_FOUNDATION 41
 #define VM_MEMORY_COREGRAPHICS 42
-#define VM_MEMORY_CARBON 43
+#define VM_MEMORY_CORESERVICES 43
+#define VM_MEMORY_CARBON VM_MEMORY_CORESERVICES
 #define VM_MEMORY_JAVA 44
 #define VM_MEMORY_ATS 50
 #define VM_MEMORY_LAYERKIT 51
@@ -388,7 +393,7 @@ typedef struct pmap_statistics      *pmap_statistics_t;
 #define VM_MEMORY_COREPROFILE  71
 
 /* assetsd / MobileSlideShow memory */
-#define VM_MEMORY_ASSETSD    72
+#define VM_MEMORY_ASSETSD      72
 
 /* Reserve 240-255 for application */
 #define VM_MEMORY_APPLICATION_SPECIFIC_1 240
index 2170671dd88e2423a0d9042ef36a787815177373..ca45cc170199d4b5757c59b6bc6d53f6149bd44d 100644 (file)
@@ -9,11 +9,12 @@ include $(MakeInc_def)
 
 
 DATAFILES = \
-       cpu_number.h    \
        cpu_capabilities.h      \
+       cpu_number.h    \
        io_map_entries.h \
        lock.h  \
        locks.h \
+       machine_cpuid.h \
        machine_routines.h      \
        pal_routines.h          \
        pal_hibernate.h         \
index d115217020b34de8119c97802aaf0d69f832cb63..2a525b0f2c7657bfc1ff4e874837769965fa26ea 100644 (file)
@@ -38,7 +38,7 @@
 #ifndef        __ASSEMBLER__
 
 extern void    commpage_populate( void );      /* called once during startup */
-
+extern  void   commpage_text_populate( void );
 #endif /* __ASSEMBLER__ */
 
 #endif /* _MACHINE_COMMPAGE_H */
diff --git a/osfmk/machine/db_machdep.h b/osfmk/machine/db_machdep.h
deleted file mode 100644 (file)
index 76ce9b3..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _MACHINE_DB_MACHDEP_H
-#define _MACHINE_DB_MACHDEP_H
-
-#if defined (__i386__) || defined (__x86_64__)
-#include "i386/db_machdep.h"
-#else
-#error architecture not supported
-#endif
-
-#endif /* _MACHINE_DB_MACHDEP_H */
diff --git a/osfmk/machine/machine_cpuid.h b/osfmk/machine/machine_cpuid.h
new file mode 100644 (file)
index 0000000..ebe9396
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifdef KERNEL_PRIVATE
+
+#ifndef _MACHINE_CPUID_H
+#define _MACHINE_CPUID_H
+
+#error architecture not supported
+
+#endif /* _MACHINE_CPUID_H */
+
+#endif /* KERNEL_PRIVATE */
index f5a8948231b24e821dc7da48a1b22ce54f57713a..a637a1b93b5c7223ae5cd1e33bc862bef40d83b2 100644 (file)
@@ -73,8 +73,8 @@ uint64_t pmc_spin_timeout_count = 0;  /* Number of times where a PMC spin loop ca
        do { \
                kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \
                        x, x->object, x->useCount, \
-                       x->methods.supports_context_switching ? \
-                       x->methods.supports_context_switching(x->object) : 0); \
+                       (x->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING) ? \
+                       1 : 0); \
        } while(0)
 
 static const char const * pmc_state_state_name(pmc_state_t state) {
@@ -190,8 +190,10 @@ static lck_grp_attr_t *pmc_lock_grp_attr;
 static lck_attr_t *pmc_lock_attr;
 
 /* PMC tracking queue locks */
-static lck_spin_t perf_monitor_queue_spin;             /* protects adding and removing from queue */
-static lck_spin_t perf_counters_queue_spin;            /* protects adding and removing from queue */
+
+static lck_mtx_t  cpu_monitor_queue_mutex;   /* protects per-cpu queues at initialisation time */
+static lck_spin_t perf_monitor_queue_spin;   /* protects adding and removing from queue */
+static lck_spin_t perf_counters_queue_spin;  /* protects adding and removing from queue */
 
 /* Reservation tracking queues lock */
 static lck_spin_t reservations_spin;
@@ -201,10 +203,13 @@ static lck_spin_t reservations_spin;
  *
  * Keeps track of registered perf monitors and perf counters
  */
-static queue_t perf_monitors_queue = NULL;
+
+static queue_head_t **cpu_monitor_queues = NULL;
+
+static queue_head_t *perf_monitors_queue = NULL;
 static volatile uint32_t perf_monitors_count = 0U;
 
-static queue_perf_counters_queue = NULL;
+static queue_head_t *perf_counters_queue = NULL;
 static volatile uint32_t perf_counters_count = 0U;
 
 /* 
@@ -218,16 +223,16 @@ static volatile uint32_t perf_counters_count = 0U;
  * every task and thread) to determine if/when a new reservation would
  * constitute a conflict.
  */
-static queue_t system_reservations = NULL;
+static queue_head_t *system_reservations = NULL;
 static volatile uint32_t system_reservation_count = 0U;
 
-static queue_task_reservations = NULL;
+static queue_head_t *task_reservations = NULL;
 static volatile uint32_t task_reservation_count = 0U;
 
-static queue_thread_reservations = NULL;
+static queue_head_t *thread_reservations = NULL;
 static volatile uint32_t thread_reservation_count = 0U;
 
-
 #if XNU_KERNEL_PRIVATE
 
 /*
@@ -248,6 +253,8 @@ static void init_pmc_locks(void) {
        lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr);
 
        lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr);
+
+       lck_mtx_init(&cpu_monitor_queue_mutex, pmc_lock_grp, pmc_lock_attr);
 }
 
 /*
@@ -272,27 +279,28 @@ static void init_pmc_zones(void) {
  * registering and reserving individual pmcs and perf monitors.
  */
 static void init_pmc_queues(void) {
-       perf_monitors_queue = (queue_t)kalloc(sizeof(queue_t));
+    
+       perf_monitors_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
        assert(perf_monitors_queue);
 
        queue_init(perf_monitors_queue);
 
-       perf_counters_queue = (queue_t)kalloc(sizeof(queue_t));
+       perf_counters_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
        assert(perf_counters_queue);
 
        queue_init(perf_counters_queue);
 
-       system_reservations = (queue_t)kalloc(sizeof(queue_t));
+       system_reservations = (queue_head_t*)kalloc(sizeof(queue_t));
        assert(system_reservations);
 
        queue_init(system_reservations);
 
-       task_reservations = (queue_t)kalloc(sizeof(queue_t));
+       task_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
        assert(task_reservations);
 
        queue_init(task_reservations);
 
-       thread_reservations = (queue_t)kalloc(sizeof(queue_t));
+       thread_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
        assert(thread_reservations);
 
        queue_init(thread_reservations);
@@ -329,7 +337,7 @@ static void perf_monitor_free(void *pm) {
        zfree(perf_small_zone, pm);
 }
 
-static void perf_monitor_init(perf_monitor_t pm) {
+static void perf_monitor_init(perf_monitor_t pm, int cpu) {
        assert(pm);
 
        pm->object = NULL;
@@ -337,8 +345,13 @@ static void perf_monitor_init(perf_monitor_t pm) {
        bzero(&(pm->methods), sizeof(perf_monitor_methods_t));
 
        pm->useCount = 1;       /* initial retain count of 1, for caller */
+       
+       pm->reservedCounters = 0;
+    
+       pm->cpu = cpu;
 
        pm->link.next = pm->link.prev = (queue_entry_t)NULL;
+       pm->cpu_link.next = pm->cpu_link.prev = (queue_entry_t)NULL;
 }
 
 /*
@@ -348,6 +361,13 @@ static void perf_monitor_init(perf_monitor_t pm) {
 static void perf_monitor_dequeue(perf_monitor_t pm) {
        lck_spin_lock(&perf_monitor_queue_spin);
        
+       if (pm->methods.flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
+               /* If this flag is set, the monitor is already validated to be 
+                * accessible from a single cpu only.
+                */
+               queue_remove(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); 
+       }
+       
        /* 
         * remove the @pm object from the @perf_monitor_queue queue (it is of type
         * <perf_monitor_t> and has a field called @link that is the queue_link_t
@@ -364,13 +384,45 @@ static void perf_monitor_dequeue(perf_monitor_t pm) {
  * thereby registering it for use with the system.
  */
 static void perf_monitor_enqueue(perf_monitor_t pm) {
+    
+       lck_mtx_lock(&cpu_monitor_queue_mutex);
        lck_spin_lock(&perf_monitor_queue_spin);
 
+       if (pm->cpu >= 0) {
+               /* Deferred initialisation; saves memory and permits ml_get_max_cpus()
+                * to block until cpu initialisation is complete.
+                */
+               if (!cpu_monitor_queues) {
+                       uint32_t max_cpus;
+                       queue_head_t **queues;
+                       uint32_t i;
+               
+                       lck_spin_unlock(&perf_monitor_queue_spin);
+               
+                       max_cpus = ml_get_max_cpus();
+
+                       queues = (queue_head_t**)kalloc(sizeof(queue_head_t*) * max_cpus);
+                       assert(queues);
+                       for (i = 0; i < max_cpus; i++) {
+                               queue_head_t *queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
+                               assert(queue);
+                               queue_init(queue);
+                               queues[i] = queue;
+                       }
+               
+                       lck_spin_lock(&perf_monitor_queue_spin);
+               
+                       cpu_monitor_queues = queues;
+               }
+           
+               queue_enter(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link);
+       }
+       
        queue_enter(perf_monitors_queue, pm, perf_monitor_t, link);
-
        perf_monitors_count++;
-
+       
        lck_spin_unlock(&perf_monitor_queue_spin);
+       lck_mtx_unlock(&cpu_monitor_queue_mutex);
 }
 
 /*
@@ -417,8 +469,7 @@ static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
        lck_spin_lock(&perf_monitor_queue_spin);
        
        queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) {
-               if(element && element->object == monitor) {
-                       /* We found it - reference the object. */
+               if(element->object == monitor) {
                        perf_monitor_reference(element);
                        found = element;
                        break;
@@ -432,8 +483,9 @@ static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
 
 /*
  * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is
- * aassociated with.
+ * associated with.
  */
+
 static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
        assert(pm);
        assert(pmc);
@@ -546,9 +598,8 @@ static pmc_t pmc_find(pmc_object_t object) {
        pmc_t found = NULL;
 
        queue_iterate(perf_counters_queue, element, pmc_t, link) {
-               if(element && element->object == object) {
+               if(element->object == object) {
                        pmc_reference(element);
-
                        found = element;
                        break;
                }
@@ -750,8 +801,7 @@ static uint32_t pmc_accessible_core_count(pmc_t pmc) {
  * matches the new incoming one (for thread/task reservations only).  Will only
  * return TRUE if the task/thread matches.
  */
-static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t
-resv) {
+static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t resv) {
        assert(queue);
        assert(resv);
 
@@ -759,62 +809,60 @@ resv) {
        pmc_reservation_t tmp = NULL;
 
        queue_iterate(queue, tmp, pmc_reservation_t, link) {
-               if(tmp) {
-                       if(tmp->pmc == resv->pmc) {
-                               /* PMC matches - make sure scope matches first */
-                               switch(PMC_FLAG_SCOPE(tmp->flags)) {
-                                       case PMC_FLAG_SCOPE_SYSTEM:
-                                               /*
-                                                * Found a reservation in system queue with same pmc - always a
-                                                * conflict.
-                                                */
-                                               ret = TRUE;
-                                               break;
-                                       case PMC_FLAG_SCOPE_THREAD:
+               if(tmp->pmc == resv->pmc) {
+                       /* PMC matches - make sure scope matches first */
+                       switch(PMC_FLAG_SCOPE(tmp->flags)) {
+                               case PMC_FLAG_SCOPE_SYSTEM:
+                                       /*
+                                        * Found a reservation in system queue with same pmc - always a
+                                        * conflict.
+                                        */
+                                       ret = TRUE;
+                                       break;
+                               case PMC_FLAG_SCOPE_THREAD:
+                                       /*
+                                        * Found one in thread queue with the same PMC as the
+                                        * argument. Only a conflict if argument scope isn't
+                                        * thread or system, or the threads match.
+                                        */
+                                       ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || 
+                                               (tmp->thread == resv->thread);
+
+                                       if(!ret) {
                                                /*
-                                                * Found one in thread queue with the same PMC as the
-                                                * argument. Only a conflict if argument scope isn't
-                                                * thread or system, or the threads match.
+                                                * so far, no conflict - check that the pmc that is
+                                                * being reserved isn't accessible from more than
+                                                * one core, if it is, we need to say it's already
+                                                * taken.
                                                 */
-                                               ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || 
-                                                       (tmp->thread == resv->thread);
-
-                                               if(!ret) {
-                                                       /*
-                                                        * so far, no conflict - check that the pmc that is
-                                                        * being reserved isn't accessible from more than
-                                                        * one core, if it is, we need to say it's already
-                                                        * taken.
-                                                        */
-                                                       if(1 != pmc_accessible_core_count(tmp->pmc)) {
-                                                               ret = TRUE;
-                                                       }
+                                               if(1 != pmc_accessible_core_count(tmp->pmc)) {
+                                                       ret = TRUE;
                                                }
-                                               break;
-                                       case PMC_FLAG_SCOPE_TASK:
-                                               /* 
-                                                * Follow similar semantics for task scope.
+                                       }
+                                       break;
+                               case PMC_FLAG_SCOPE_TASK:
+                                       /* 
+                                        * Follow similar semantics for task scope.
+                                        */
+
+                                       ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
+                                               (tmp->task == resv->task);
+                                       if(!ret) {
+                                               /*
+                                                * so far, no conflict - check that the pmc that is
+                                                * being reserved isn't accessible from more than
+                                                * one core, if it is, we need to say it's already
+                                                * taken.
                                                 */
-
-                                               ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
-                                                       (tmp->task == resv->task);
-                                               if(!ret) {
-                                                       /*
-                                                        * so far, no conflict - check that the pmc that is
-                                                        * being reserved isn't accessible from more than
-                                                        * one core, if it is, we need to say it's already
-                                                        * taken.
-                                                        */
-                                                       if(1 != pmc_accessible_core_count(tmp->pmc)) {
-                                                               ret = TRUE;
-                                                       }
+                                               if(1 != pmc_accessible_core_count(tmp->pmc)) {
+                                                       ret = TRUE;
                                                }
+                                       }
 
-                                               break;
-                               }
-
-                               if(ret) break;
+                                       break;
                        }
+
+                       if(ret) break;
                }
        }
 
@@ -823,7 +871,7 @@ resv) {
 
 /*
  * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be 
- * added to its target queue without createing conflicts (target queue is 
+ * added to its target queue without creating conflicts (target queue is 
  * determined by the reservation's scope flags). Further, this method returns
  * FALSE if any level contains a reservation for a PMC that can be accessed from
  * more than just 1 core, and the given reservation also wants the same PMC.
@@ -912,54 +960,50 @@ static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
 
        /* Check if the reservation can be added without conflicts */
        if(pmc_internal_reservation_validate_for_pmc(resv)) {
-               ret = TRUE;
-       }
-
-       if(ret) {
+           
                /* add reservation to appropriate scope */
                switch(PMC_FLAG_SCOPE(resv->flags)) {
+               case PMC_FLAG_SCOPE_SYSTEM:
+                       /* Simply add it to the system queue */
+                       pmc_internal_reservation_enqueue(system_reservations, resv);
+                       system_reservation_count++;
+                       
+                       lck_spin_unlock(&reservations_spin);
 
-                       /* System-wide counter */
-                       case PMC_FLAG_SCOPE_SYSTEM:
-                               /* Simply add it to the system queue */
-                               pmc_internal_reservation_enqueue(system_reservations, resv);
-                               system_reservation_count++;
-                               
-                               lck_spin_unlock(&reservations_spin);
-
-                               break;
+                       break;
 
-                       /* Task-switched counter */
-                       case PMC_FLAG_SCOPE_TASK:
-                               assert(resv->task);
+               case PMC_FLAG_SCOPE_TASK:
+                       assert(resv->task);
 
-                               /* Not only do we enqueue it in our local queue for tracking */
-                               pmc_internal_reservation_enqueue(task_reservations, resv);
-                               task_reservation_count++;
+                       /* Not only do we enqueue it in our local queue for tracking */
+                       pmc_internal_reservation_enqueue(task_reservations, resv);
+                       task_reservation_count++;
 
-                               lck_spin_unlock(&reservations_spin);
+                       lck_spin_unlock(&reservations_spin);
 
-                               /* update the task mask, and propagate it to existing threads */
-                               pmc_internal_update_task_flag(resv->task, TRUE);
-                               break;
+                       /* update the task mask, and propagate it to existing threads */
+                       pmc_internal_update_task_flag(resv->task, TRUE);
+                       break;
 
-                       /* Thread-switched counter */
-                       case PMC_FLAG_SCOPE_THREAD:
-                               assert(resv->thread);
+               /* Thread-switched counter */
+               case PMC_FLAG_SCOPE_THREAD:
+                       assert(resv->thread);
 
-                               /*
-                                * Works the same as a task-switched counter, only at
-                                * thread-scope
-                                */
+                       /*
+                        * Works the same as a task-switched counter, only at
+                        * thread-scope
+                        */
 
-                               pmc_internal_reservation_enqueue(thread_reservations, resv);
-                               thread_reservation_count++;
+                       pmc_internal_reservation_enqueue(thread_reservations, resv);
+                       thread_reservation_count++;
 
-                               lck_spin_unlock(&reservations_spin);
-                               
-                               pmc_internal_update_thread_flag(resv->thread, TRUE);
-                               break;
-                       }
+                       lck_spin_unlock(&reservations_spin);
+                       
+                       pmc_internal_update_thread_flag(resv->thread, TRUE);
+                       break;
+               }
+               
+               ret = TRUE;
        } else {
                lck_spin_unlock(&reservations_spin);
        }                       
@@ -993,8 +1037,6 @@ static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, vo
                                /* core_cnt = 0 really means all cpus */
                                mask = CPUMASK_ALL;
                        }
-                       
-                       /* Have each core run pmc_internal_reservation_stop_cpu asynchronously. */
                        mp_cpus_call(mask, ASYNC, action_func, reservation);
 #else
 #error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
@@ -1021,20 +1063,20 @@ static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
         * using the reservation's scope flags.
         */
 
+       /* Lock the global spin lock */
+       lck_spin_lock(&reservations_spin);
+
        switch(PMC_FLAG_SCOPE(resv->flags)) {
 
                case PMC_FLAG_SCOPE_SYSTEM:
-                       lck_spin_lock(&reservations_spin);
                        pmc_internal_reservation_dequeue(system_reservations, resv);
                        system_reservation_count--;
+                       
                        lck_spin_unlock(&reservations_spin);
+                       
                        break;
 
                case PMC_FLAG_SCOPE_TASK:
-                       
-                       /* Lock the global spin lock */
-                       lck_spin_lock(&reservations_spin);
-
                        /* remove from the global queue */
                        pmc_internal_reservation_dequeue(task_reservations, resv);
                        task_reservation_count--;
@@ -1044,11 +1086,10 @@ static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
 
                        /* Recalculate task's counter mask */
                        pmc_internal_update_task_flag(resv->task, FALSE);
+                       
                        break;
 
                case PMC_FLAG_SCOPE_THREAD:
-                       lck_spin_lock(&reservations_spin);
-
                        pmc_internal_reservation_dequeue(thread_reservations, resv);
                        thread_reservation_count--;
 
@@ -1489,11 +1530,6 @@ static void pmc_internal_reservation_store(pmc_reservation_t reservation) {
                COUNTER_DEBUG("  [error] disable: 0x%x\n", ret);
        }
 
-       /*
-        * At this point, we're off the hardware, so we don't have to
-        * set_on_hardare(TRUE) if anything fails from here on.
-        */
-
        /* store the counter value into the reservation's stored count */
        ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value);
        if(KERN_SUCCESS != ret) {
@@ -1576,11 +1612,28 @@ static void pmc_internal_reservation_load(pmc_reservation_t reservation) {
        
 }
 
+/*
+ * pmc_accessible_from_core will return TRUE if the given @pmc is directly
+ * (e.g., hardware) readable from the given logical core.
+ *
+ * NOTE: This method is interrupt safe.
+ */
+static inline boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
+       boolean_t ret = FALSE;
+
+       assert(pmc);
+
+       ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);
+
+       return ret;
+}
+
 static void pmc_internal_reservation_start_cpu(void * arg) {
        pmc_reservation_t reservation = (pmc_reservation_t)arg;
        
        assert(reservation);
        
+
        if (pmc_internal_reservation_matches_context(reservation)) {
                /* We are in context, but the reservation may have already had the context_in method run.  Attempt
                 * to set this cpu's bit in the active_last_context_in mask.  If we set it, call context_in.
@@ -1600,6 +1653,7 @@ static void pmc_internal_reservation_stop_cpu(void * arg) {
        
        assert(reservation);
        
+       
        if (pmc_internal_reservation_matches_context(reservation)) {
                COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number());
 
@@ -1703,6 +1757,7 @@ static void pmc_reservation_interrupt(void *target, void *refCon) {
  */
 kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
        perf_monitor_methods_t *methods) {
+       int cpu = -1;
 
        COUNTER_DEBUG("registering perf monitor %p\n", monitor);
 
@@ -1715,9 +1770,30 @@ kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
                return KERN_INVALID_ARGUMENT;
        }
 
+       /* If the monitor requires idle notifications, ensure that it is 
+        * accessible from a single core only.
+        */
+       if (methods->flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
+               uint32_t *cores;
+               size_t core_cnt;
+           
+               if (KERN_SUCCESS == methods->accessible_cores(monitor, &cores, &core_cnt)) {
+                       /* 
+                        * Guard against disabled cores - monitors will always match and
+                        * attempt registration, irrespective of 'cpus=x' boot-arg.
+                        */
+                       if ((core_cnt == 1) && (cores[0] < (uint32_t)ml_get_max_cpus())) {
+                               cpu = cores[0];
+                       } else {
+                               return KERN_INVALID_ARGUMENT;
+                       }
+               }           
+       }
+
        /* All methods are required */
-       if(!methods->supports_context_switching || !methods->enable_counters ||
-               !methods->disable_counters) {
+       if(!methods->accessible_cores |
+          !methods->enable_counters || !methods->disable_counters ||
+          !methods->on_idle || !methods->on_idle_exit) {
                return KERN_INVALID_ARGUMENT;
        }
 
@@ -1735,13 +1811,13 @@ kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
        }
 
        /* initialize the object */
-       perf_monitor_init(pm);
+       perf_monitor_init(pm, cpu);
 
        /* copy in the registration info */
        pm->object = monitor;
        memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t));
 
-       /* place it in the tracking queue */
+       /* place it in the tracking queues */
        perf_monitor_enqueue(pm);
 
        /* debug it */
@@ -1766,7 +1842,7 @@ kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) {
 
        perf_monitor_t pm = perf_monitor_find(monitor);
        if(pm) {
-               /* Remove it from the queue. */
+               /* Remove it from the queues. */
                perf_monitor_dequeue(pm);
 
                /* drop extra retain from find */
@@ -1903,6 +1979,16 @@ kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_obj
        return KERN_SUCCESS;
 }
 
+static void perf_monitor_reservation_add(perf_monitor_t monitor) {
+    assert(monitor);
+    OSIncrementAtomic(&(monitor->reservedCounters));
+}
+
+static void perf_monitor_reservation_remove(perf_monitor_t monitor) {
+    assert(monitor);
+    OSDecrementAtomic(&(monitor->reservedCounters));    
+}
+
 #if 0
 #pragma mark -
 #pragma mark KPI
@@ -2089,10 +2175,8 @@ kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) {
 
        /* copy the bits out */
        queue_iterate(perf_counters_queue, pmc, pmc_t, link) {
-               if(pmc) {
-                       /* copy out the pointer */
-                       array[count++] = pmc;
-               }
+               /* copy out the pointer */
+               array[count++] = pmc;
        }
 
        lck_spin_unlock(&perf_counters_queue_spin);
@@ -2227,22 +2311,6 @@ kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores,
        return ret;
 }
 
-/*
- * pmc_accessible_from_core will return TRUE if the given @pmc is directly
- * (e.g., hardware) readable from the given logical core.
- *
- * NOTE: This method is interrupt safe.
- */
-boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
-       boolean_t ret = FALSE;
-
-       assert(pmc);
-
-       ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);
-
-       return ret;
-}
-
 static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) {
        assert(resv);
        assert(resv->pmc);
@@ -2318,7 +2386,7 @@ kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config,
                return KERN_FAILURE;
        }
 
-       /* Here's where we setup the PMI method (if needed) */
+       perf_monitor_reservation_add(pmc->monitor);
        
        *reservation = resv;
 
@@ -2346,7 +2414,7 @@ kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config,
                return KERN_INVALID_ARGUMENT;
        }
 
-       if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
+       if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
                COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
                return KERN_INVALID_ARGUMENT;
        }
@@ -2377,6 +2445,8 @@ kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config,
                return KERN_FAILURE;
        }
 
+       perf_monitor_reservation_add(pmc->monitor);
+
        *reservation = resv;
 
        return KERN_SUCCESS;
@@ -2402,7 +2472,7 @@ kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config,
                return KERN_INVALID_ARGUMENT;
        }
 
-       if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) {
+       if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
                COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
                return KERN_INVALID_ARGUMENT;
        }
@@ -2433,6 +2503,8 @@ kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config,
                return KERN_FAILURE;
        }
 
+       perf_monitor_reservation_add(pmc->monitor);
+
        *reservation = resv;
 
        return KERN_SUCCESS;
@@ -2632,6 +2704,8 @@ kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
                return KERN_INVALID_ARGUMENT;
        }
        
+       perf_monitor_reservation_remove(reservation->pmc->monitor);
+       
        /* Move the state machine */
        if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) {
                return KERN_FAILURE;
@@ -2661,6 +2735,56 @@ kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
        return KERN_SUCCESS;
 }
 
+/*
+ * pmc_idle notifies eligible monitors of impending per-CPU idle, and can be used to save state.
+ */
+boolean_t pmc_idle(void) {
+       perf_monitor_t monitor = NULL;
+       queue_head_t *cpu_queue;
+
+       lck_spin_lock(&perf_monitor_queue_spin);
+       
+       if (cpu_monitor_queues) {
+               cpu_queue = cpu_monitor_queues[cpu_number()];
+       
+               queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
+                       perf_monitor_methods_t *methods = &(monitor->methods);
+                       if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {                 
+                               methods->on_idle(monitor->object);
+                       }
+               }
+       }
+
+       lck_spin_unlock(&perf_monitor_queue_spin);
+
+       return TRUE;
+}
+
+/*
+ * pmc_idle_exit notifies eligible monitors of wake from idle; it can be used to restore state.
+ */
+boolean_t pmc_idle_exit(void) {
+       perf_monitor_t monitor = NULL;
+       queue_head_t *cpu_queue;
+
+       lck_spin_lock(&perf_monitor_queue_spin);
+       
+       if (cpu_monitor_queues) {
+               cpu_queue = cpu_monitor_queues[cpu_number()];
+       
+               queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
+                       perf_monitor_methods_t *methods = &(monitor->methods);
+                       if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {                 
+                               methods->on_idle_exit(monitor->object);
+                       }
+               }
+       }
+
+       lck_spin_unlock(&perf_monitor_queue_spin);
+
+       return TRUE;
+}
+
 /*
  * pmc_context_switch performs all context switching necessary to save all pmc
  * state associated with @oldThread (and the task to which @oldThread belongs),
@@ -2673,43 +2797,37 @@ boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) {
        pmc_reservation_t resv = NULL;
        uint32_t cpuNum = cpu_number();
 
-       /* Out going thread: save pmc state */
        lck_spin_lock(&reservations_spin);
 
-       /* interate over any reservations */
-       queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
-               if(resv && oldThread == resv->thread) {
-
-                       /* check if we can read the associated pmc from this core. */
-                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
-                               /* save the state At this point, if it fails, it fails. */
+       /* Save pmc states */
+       if (thread_reservation_count) {
+               queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
+                       if ((oldThread == resv->thread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
                                (void)pmc_internal_reservation_context_out(resv);
                        }
                }
        }
        
-       queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
-               if(resv && resv->task == oldThread->task) {
-                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
-                               (void)pmc_internal_reservation_context_out(resv);
+       if (task_reservation_count) {
+               queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
+                       if ((resv->task == oldThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
+                       (void)pmc_internal_reservation_context_out(resv);
                        }
                }
        }
        
-       /* Incoming task: restore */
-
-       queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
-               if(resv && resv->thread == newThread) {
-                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
+       /* Restore */
+       if (thread_reservation_count) {
+               queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
+                       if ((resv->thread == newThread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
                                (void)pmc_internal_reservation_context_in(resv);
                        }
                }
        }
-       
 
-       queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
-               if(resv && resv->task == newThread->task) {
-                       if(pmc_accessible_from_core(resv->pmc, cpuNum)) {
+       if (task_reservation_count) {
+               queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
+                       if ((resv->task == newThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
                                (void)pmc_internal_reservation_context_in(resv);
                        }
                }
@@ -2792,11 +2910,6 @@ kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused,
        return KERN_FAILURE;
 }
 
-boolean_t pmc_accessible_from_core(pmc_t pmc __unused, 
-       uint32_t logicalCore __unused) {
-       return FALSE;
-}
-
 kern_return_t pmc_reserve(pmc_t pmc __unused, 
        pmc_config_t config __unused, pmc_reservation_t *reservation __unused) {
        return KERN_FAILURE;
index 72692fa54f1cd5398eb97ca35f02de230fc6ea43..789a5b223eabb9ceaffc98db3410ae7ab52e0660 100644 (file)
@@ -34,8 +34,6 @@ extern "C" {
 #include <mach/mach_time.h>
 #include <mach/mach_types.h>
 
-#include <libkern/version.h>
-
 /****************************************************************************
  * The four main object types
  *
@@ -85,12 +83,7 @@ typedef struct pmc_config *pmc_config_t;
  * to the IORegistry (this way only usable PMCs and Perf Monitors will be shown.)
  ****************************************************************************/
 
-/*!typedef
- * @abstract A pointer to a method that returns whether or not the given performance monitor driver supports context switched counters
- * @param pm A registered performance monitor driver object (see <link>perf_monitor_register</link>).
- * @result TRUE if the driver supports context switching, FALSE otherwise.
- */
-typedef boolean_t (*perfmon_supports_context_switch_method_t)(perf_monitor_object_t pm);
+typedef kern_return_t (*perfmon_get_accessible_cores_method_t)(pmc_object_t pmc, uint32_t **cores, size_t *coreCt);
 
 /*!typedef
  * @abstract A pointer to a method that enables a set of counters.
@@ -109,7 +102,14 @@ typedef kern_return_t (*perfmon_enable_counters_method_t)(perf_monitor_object_t
  */
 typedef kern_return_t (*perfmon_disable_counters_method_t)(perf_monitor_object_t pm, pmc_object_t *pmcs, uint32_t pmcCount);
 
-#define MACH_PERFMON_METHODS_VERSION 0
+typedef void (*perfmon_on_idle_method_t)(perf_monitor_object_t pm);
+typedef void (*perfmon_on_idle_exit_method_t)(perf_monitor_object_t pm);
+
+#define MACH_PERFMON_METHODS_VERSION 1
+
+#define PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING     0x1
+#define PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS    0x2
+#define PERFMON_FLAG_ALWAYS_ACTIVE                  0x4
 
 /*!struct perf_monitor_methods
  * @abstract A set of method pointers to be used when interacting with a performance monitor object
@@ -119,12 +119,16 @@ typedef kern_return_t (*perfmon_disable_counters_method_t)(perf_monitor_object_t
 typedef struct perf_monitor_methods {
        uint32_t perf_monitor_methods_version;  // Always set to MACH_PERFMON_METHODS_VERSION when writing driver kexts
        
-       // All methods are required.
-       perfmon_supports_context_switch_method_t supports_context_switching;
+       uint32_t flags;
+
+       perfmon_get_accessible_cores_method_t accessible_cores;
+
        perfmon_enable_counters_method_t enable_counters;
        perfmon_disable_counters_method_t disable_counters;
-}perf_monitor_methods_t;
 
+       perfmon_on_idle_method_t on_idle;
+    perfmon_on_idle_exit_method_t on_idle_exit;
+} perf_monitor_methods_t;
 
 /****************************************************************************
  * Method types for performance counter registration
@@ -233,7 +237,8 @@ typedef boolean_t (*pmc_is_accessible_from_logical_core_method_t)(pmc_object_t p
 
 /*!typedef 
  * @abstract A pointer to a method that returns an array of the logical cores from which a PMC can be accessed.
- * @discussion A pointer to a method that returns an array of the logical cores from which a PMC can be accessed. Resulting array of cores should not be released by xnu.
+ * @discussion A pointer to a method that returns an array of the logical cores from which a PMC can be accessed. 
+ * Resulting array of cores should not be released by xnu.
  * Implementations of this method type must be safe to call at interrupt context.
  * @param pmc A valid pmc object
  * @param cores A value-returned array of logical cores that can access the given PMC.
@@ -311,7 +316,7 @@ typedef struct pmc_methods {
        pmc_enable_method_t enable;
        pmc_open_method_t open;
        pmc_close_method_t close;
-}pmc_methods_t;
+} pmc_methods_t;
 
 /*
  * Kext interface Methods
@@ -338,14 +343,6 @@ typedef struct pmc_methods {
  * KERN_RESOURCE_SHORTAGE if the kernel lacks the resources to register another performance monitor
  * driver, KERN_INVALID_ARGUMENT if one or both of the arguments is null
  */
-
-/* Prevent older AppleProfileFamily kexts from loading on newer kernels.
- * Alas, C doesn't necessarily have a cleaner way to do the version number concatenation
- */
-#define PERF_REG_NAME1(a, b) a ## b
-#define PERF_REG_NAME(a, b) PERF_REG_NAME1(a, b)
-#define perf_monitor_register PERF_REG_NAME(perf_monitor_register_, VERSION_MAJOR)
-
 kern_return_t perf_monitor_register(perf_monitor_object_t monitor, perf_monitor_methods_t *methods);
 
 /*!fn
@@ -414,8 +411,14 @@ typedef struct perf_monitor {
        // reference counted
        uint32_t useCount;
        
-       // link to other perf monitors
+       uint32_t reservedCounters;
+    
+       // A value of -1 here indicates independence from a particular core
+       int cpu;
+       
+       // links to other perf monitors
        queue_chain_t link;
+       queue_chain_t cpu_link;
 }*perf_monitor_t;
 
 /*!struct pmc
@@ -554,14 +557,17 @@ void pmc_free_config(pmc_t pmc, pmc_config_t config);
 
 /*!fn
  * @abstract Setup the configuration
- * @discussion Configurations for counter are architecture-neutral key-value pairs (8bit key, 64bit value).  Meanings of the keys and values are defined by the driver-writer and are listed in XML form available for interrogation via the CoreProfile framework. This method is not interrupt safe.
+ * @discussion Configurations for counter are architecture-neutral key-value pairs (8bit key, 64bit value). Meanings of the keys and values are defined
+ * by the driver-writer and are listed in XML form available for interrogation via the CoreProfile framework. This method is not interrupt safe.
  * @result KERN_SUCCESS on success. 
  */
 kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config, uint8_t id, uint64_t value);
 
 /*!fn
  * @abstract Interrupt Threshold Setup
- * @discussion In order to configure a PMC to use PMI (cause an interrupt after so-many events occur), use this method, and provide a function to be called after the interrupt occurs, along with a reference context. PMC Threshold handler methods will have the pmc that generated the interrupt as the first argument when the interrupt handler is invoked, and the given  @refCon (which may be NULL) as the second.  This method is not interrupt safe.
+ * @discussion In order to configure a PMC to use PMI (cause an interrupt after so-many events occur), use this method, and provide a function to be
+ * called after the interrupt occurs, along with a reference context. PMC Threshold handler methods will have the pmc that generated the interrupt as 
+ * the first argument when the interrupt handler is invoked, and the given  @refCon (which may be NULL) as the second. This method is not interrupt safe.
  */
 kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, uint64_t threshold, pmc_interrupt_method_t method, void *refCon);
 
@@ -583,7 +589,8 @@ void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount);
 
 /*!fn
  * @abstract Finds pmcs by partial string matching.
- * @discussion This method returns a list of pmcs (similar to <link>pmc_get_pmc_list</link>) whose names match the given string up to it's length.  For example, searching for "ia32" would return pmcs "ia32gp0" and "ia32gp1". Results should be released by the caller using <link>pmc_free_pmc_list</link>
+ * @discussion This method returns a list of pmcs (similar to <link>pmc_get_pmc_list</link>) whose names match the given string up to it's length.  
+ * For example, searching for "ia32" would return pmcs "ia32gp0" and "ia32gp1". Results should be released by the caller using <link>pmc_free_pmc_list</link>
  * @param name Partial string to search for.
  * @param pmcs Storage for the resultant pmc_t array pointer.
  * @param pmcCount Storage for the resultant count of pmc_t's.
@@ -599,21 +606,14 @@ const char *pmc_get_name(pmc_t pmc);
 
 /*!fn
  * @abstract Returns a list of logical cores from which the given pmc can be read from or written to.
- * @discussion This method can return a NULL list with count of 0 -- this indicates any core can read the given pmc. This method does not allocate the list, therefore callers should take care not to mutate or free the resultant list. This method is interrupt safe.
+ * @discussion This method can return a NULL list with count of 0 -- this indicates any core can read the given pmc. This method does not allocate the list, 
+ * therefore callers should take care not to mutate or free the resultant list. This method is interrupt safe.
  * @param pmc The PMC for which to return the cores that can read/write it.
  * @param logicalCores Storage for the pointer to the list.
  * @param logicalCoreCt Value-return number of elements in the returned list.  0 indicates all cores can read/write the given pmc.
  */
 kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores, size_t *logicalCoreCt);
 
-/*!fn
- * @abstract Returns TRUE if the given logical core can read/write the given PMC.
- * @discussion This method is interrupt safe.
- * @param pmc The PMC to test
- * @param logicalCore The core from which to test.
- */
-boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore);
-
 /* 
  * BEGIN PMC Reservations
  *
@@ -623,7 +623,9 @@ boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore);
 
 /*!fn
  * @abstract Reserve a PMC for System-wide counting.
- * @discussion This method will attempt to reserve the given pmc at system-scope. It will configure the given pmc to count the event indicated by the given configuration object. This method consumes the given configuration object if the return value is KERN_SUCCESS - any other return value indicates the caller should free the configuration object via <link>pmc_free_config</link>. This method is not interrupt safe.
+ * @discussion This method will attempt to reserve the given pmc at system-scope. It will configure the given pmc to count the event indicated by the given 
+ * configuration object. This method consumes the given configuration object if the return value is KERN_SUCCESS - any other return value indicates the caller 
+ * should free the configuration object via <link>pmc_free_config</link>. This method is not interrupt safe.
  * @param pmc The PMC to reserve.
  * @param config The configuration object to use with the given pmc.
  * @param reservation A value-return reservation object to be used in pmc_reservation_* methods.
@@ -638,7 +640,8 @@ kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config, pmc_reservation_t *res
 
 /*!fn
  * @abstract Reserve a PMC for task-wide counting.
- * @discussion This method will attempt to reserve the given pmc for task-wide counting. The resulting reservation will only count when the task is running on one of the logical cores that can read the given pmc. The semantics of this method are the same as <link>pmc_reserve</link> in all other respects.
+ * @discussion This method will attempt to reserve the given pmc for task-wide counting. The resulting reservation will only count when the task is running 
+ * on one of the logical cores that can read the given pmc. The semantics of this method are the same as <link>pmc_reserve</link> in all other respects.
  * @param pmc The PMC to reserve
  * @param config The configuration object to use.
  * @param task The task for which to enable the counter.
@@ -649,7 +652,8 @@ kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, task_t task, pmc_
 
 /*!fn
  * @abstract Reserve a PMC for thread-wide counting.
- * @discussion This method will attempt to reserve the given pmc for thread-wide counting. The resulting reservation will only count when the thread is running on one of the logical cores that can read the given pmc. The semantics of this method are the same as <link>pmc_reserve_task</link> in all other respects.
+ * @discussion This method will attempt to reserve the given pmc for thread-wide counting. The resulting reservation will only count when the thread is 
+ * running on one of the logical cores that can read the given pmc. The semantics of this method are the same as <link>pmc_reserve_task</link> in all other respects.
  * @param pmc The PMC to reserve
  * @param config The configuration object to use.
  * @param thread The thread for which to enable the counter.
@@ -660,21 +664,28 @@ kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, thread_t thread
 
 /*!fn
  * @abstract Start counting
- * @discussion This method instructs the given reservation to start counting as soon as possible. If the reservation is for a thread (or task) other than the current thread, or for a pmc that is not accessible from the current logical core, the reservation will start counting the next time the thread (or task) runs on a logical core than can access the pmc. This method is interrupt safe. If this method is called from outside of interrupt context, it may block.
+ * @discussion This method instructs the given reservation to start counting as soon as possible. If the reservation is for a thread (or task) other than the 
+ * current thread, or for a pmc that is not accessible from the current logical core, the reservation will start counting the next time the thread (or task) 
+ * runs on a logical core than can access the pmc. This method is interrupt safe. If this method is called from outside of interrupt context, it may block.
  * @param reservation The reservation to start counting
  */
 kern_return_t pmc_reservation_start(pmc_reservation_t reservation);
 
 /*!fn
  * @abstract Stop counting
- * @discussion This method instructs the given reservation to stop counting as soon as possible. If the reservation is for a thread (or task) other than the current thread, or for a pmc that is not accessible from the current logical core, the reservation will stop counting the next time the thread (or task) ceases to run on a logical core than can access the pmc. This method is interrupt safe. If called form outside of interrupt context, this method may block.
+ * @discussion This method instructs the given reservation to stop counting as soon as possible. If the reservation is for a thread (or task) other than the 
+ * current thread, or for a pmc that is not accessible from the current logical core, the reservation will stop counting the next time the thread (or task) c
+ * eases to run on a logical core than can access the pmc. This method is interrupt safe. If called form outside of interrupt context, this method may block.
  * @param reservation The reservation to stop counting
  */
 kern_return_t pmc_reservation_stop(pmc_reservation_t reservation);
 
 /*!fn
  * @abstract Read the counter value
- * @discussion This method will read the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be read directly from the hardware.  Otherwise, the value stored in the reservation is returned. This method is interrupt safe. If the caller is calling from outside of interrupt context, this method may block.
+ * @discussion This method will read the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently ]
+ * executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be read directly 
+ * from the hardware.  Otherwise, the value stored in the reservation is returned. This method is interrupt safe. If the caller is calling from outside of 
+ * interrupt context, this method may block.
  * @param reservation The reservation whose value to read.
  * @param value Value-return event count
  */
@@ -682,7 +693,10 @@ kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *valu
 
 /*!fn
  * @abstract Write the counter value
- * @discussion This method will write the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be written directly to the hardware.  Otherwise, the value stored in the reservation is overwritten. This method is interrupt safe. If the caller is calling from outside of interrupt context, this method may block.
+ * @discussion This method will write the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently 
+ * executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be written directly 
+ * to the hardware. Otherwise, the value stored in the reservation is overwritten. This method is interrupt safe. If the caller is calling from outside of 
+ * interrupt context, this method may block.
  * @param reservation The reservation to write.
  * @param value The event count to write
  */
@@ -690,7 +704,8 @@ kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t valu
 
 /*!fn
  * @abstract Free a reservation and all associated resources.
- * @discussion This method will free the resources associated with the given reservation and release the associated PMC back to general availability. If the reservation is currently counting, it will be stopped prior to release. This method is not interrupt safe.
+ * @discussion This method will free the resources associated with the given reservation and release the associated PMC back to general availability. 
+ * If the reservation is currently counting, it will be stopped prior to release. This method is not interrupt safe.
  * @param reservation The reservation to free
  */
 kern_return_t pmc_reservation_free(pmc_reservation_t reservation);
@@ -705,10 +720,38 @@ void pmc_bootstrap(void);
 
 /*!fn
  * @abstract Performs a pmc context switch.
- * @discussion This method will save all PMCs reserved for oldThread (and the task associated with oldThread), as well as restore all PMCs reserved for newThread (and the task associated with newThread). This method is for xnu-internal context switching routines only.
+ * @discussion This method will save all PMCs reserved for oldThread (and the task associated with oldThread), as well as restore all PMCs reserved 
+ * for newThread (and the task associated with newThread). This method is for xnu-internal context switching routines only.
  */
 boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread);
 
+/*!fn
+ * @abstract Called on per-core idle.
+ * @discussion This method notifies registered performance monitors of impending cpu idle, and can be used to save counter state.
+ */
+boolean_t pmc_idle(void);
+
+/*!fn
+ * @abstract Called on per-core wake from idle.
+ * @discussion This method notifies registered performance monitors of wake-up from the prior idle, and can be used to restore 
+ * previously saved counter configuration.
+ */
+boolean_t pmc_idle_exit(void);
+
+#if defined(THREAD_PMC_FLAG)
+/* Allow inclusion from outside of MACH_KERNEL_PRIVATE scope. */
+
+/*!fn
+ * @abstract Returns true if thread has been marked for counting.
+ * @discussion Task-level reservations are propagated to child threads via thread_create_internal. Any mutation of task reservations forces a recalculate 
+ * of t_chud (for the pmc flag) for all threads in that task. Consequently, we can simply check the current thread's flag against THREAD_PMC_FLAG.
+ */
+static inline boolean_t pmc_thread_eligible(thread_t t) {
+       return (t != NULL) ? ((t->t_chud & THREAD_PMC_FLAG) ? TRUE : FALSE) : FALSE;
+}
+
+#endif /* THREAD_PMC_FLAG*/
+
 #endif // XNU_KERNEL_PRIVATE
 
 #ifdef __cplusplus
index 3b2b64363543aacef7652a05844b62afb1f19f54..3fda07c40e83f712df7a63ba41832ace8b26be06 100644 (file)
@@ -16,8 +16,6 @@ INSTINC_SUBDIRS_I386 = \
 INSTINC_SUBDIRS_X86_64 = \
        x86_64
 
-INSTINC_SUBDIRS_ARM = \
-       arm
 
 EXPINC_SUBDIRS = \
        machine
@@ -25,8 +23,6 @@ EXPINC_SUBDIRS = \
 EXPINC_SUBDIRS_I386 = \
        i386
 
-EXPINC_SUBDIRS_ARM = \
-       arm
 
 EXPINC_SUBDIRS_X86_64 = \
        x86_64
index cd8dc83173b6daa18a65d8ccf5847cc0595e31a8..97d76cdf0191b447a938909802fb407f184cc910 100644 (file)
@@ -407,63 +407,8 @@ memory_object_control_uiomove(
                        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
                                break;
 
-                       /*
-                        * if we're in this routine, we are inside a filesystem's
-                        * locking model, so we don't ever want to wait for pages that have
-                        * list_req_pending == TRUE since it means that the
-                        * page is a candidate for some type of I/O operation,
-                        * but that it has not yet been gathered into a UPL...
-                        * this implies that it is still outside the domain
-                        * of the filesystem and that whoever is responsible for
-                        * grabbing it into a UPL may be stuck behind the filesystem
-                        * lock this thread owns, or trying to take a lock exclusively
-                        * and waiting for the readers to drain from a rw lock...
-                        * if we block in those cases, we will deadlock
-                        */
-                       if (dst_page->list_req_pending) {
-
-                               if (dst_page->absent) {
-                                       /*
-                                        * this is the list_req_pending | absent | busy case
-                                        * which originates from vm_fault_page... we want
-                                        * to fall out of the fast path and go back
-                                        * to the caller which will gather this page
-                                        * into a UPL and issue the I/O if no one
-                                        * else beats us to it
-                                        */
-                                       break;
-                               }
-                               if (dst_page->pageout || dst_page->cleaning) {
-                                       /*
-                                        * this is the list_req_pending | pageout | busy case
-                                        * or the list_req_pending | cleaning case...
-                                        * which originate from the pageout_scan and
-                                        * msync worlds for the pageout case and the hibernate
-                                        * pre-cleaning world for the cleaning case...
-                                        * we need to reset the state of this page to indicate
-                                        * it should stay in the cache marked dirty... nothing else we
-                                        * can do at this point... we can't block on it, we can't busy
-                                        * it and we can't clean it from this routine.
-                                        */
-                                       vm_page_lockspin_queues();
-
-                                       vm_pageout_queue_steal(dst_page, TRUE); 
-                                       vm_page_deactivate(dst_page);
-
-                                       vm_page_unlock_queues();
-                               }
-                               /*
-                                * this is the list_req_pending | cleaning case...
-                                * we can go ahead and deal with this page since
-                                * its ok for us to mark this page busy... if a UPL
-                                * tries to gather this page, it will block until the
-                                * busy is cleared, thus allowing us safe use of the page
-                                * when we're done with it, we will clear busy and wake
-                                * up anyone waiting on it, thus allowing the UPL creation
-                                * to finish
-                                */
 
-                       } else if (dst_page->busy || dst_page->cleaning) {
+                       if (dst_page->busy || dst_page->cleaning) {
                                /*
                                 * someone else is playing with the page... if we've
                                 * already collected pages into this run, go ahead
@@ -476,7 +421,11 @@ memory_object_control_uiomove(
                                PAGE_SLEEP(object, dst_page, THREAD_UNINT);
                                continue;
                        }
-
+                       if (dst_page->laundry) {
+                               dst_page->pageout = FALSE;
+                               
+                               vm_pageout_steal_laundry(dst_page, FALSE);
+                       }
                        /*
                         * this routine is only called when copying
                         * to/from real files... no need to consider
@@ -485,7 +434,7 @@ memory_object_control_uiomove(
                        assert(!dst_page->encrypted);
 
                        if (mark_dirty) {
-                               dst_page->dirty = TRUE;
+                               SET_PAGE_DIRTY(dst_page, FALSE);
                                if (dst_page->cs_validated && 
                                    !dst_page->cs_tainted) {
                                        /*
index 9233e644f0575a5187dc9fecc33486267afa9b06..4d4b968d20c7c1c71b50ccf2a9ea8f276c3f136a 100644 (file)
@@ -41,7 +41,6 @@
  *     Contiguous physical memory allocator.
  */
 
-#include <mach_kdb.h>
 #include <mach_counters.h>
 
 #include <mach/mach_types.h>
 extern kern_return_t
 cpm_allocate(vm_size_t size, vm_page_t *list, ppnum_t max_pnum, ppnum_t pnum_mask, boolean_t wire, int flags);
 
-/*
- *     CPM-specific event counters.
- */
-#define        VM_CPM_COUNTERS         (MACH_KDB && MACH_COUNTERS && VM_CPM)
-#if    VM_CPM_COUNTERS
-#define        cpm_counter(foo)        foo
-#else  /* VM_CPM_COUNTERS */
-#define        cpm_counter(foo)
-#endif /* VM_CPM_COUNTERS */
-
 #endif /* _VM_CPM_H_ */
index dd8197d7d70b2dbbfada6a86cfe2e807f76308a9..c6e9c8ef62ebc27d8b49b8deb77d617e84172ca2 100644 (file)
 
 #if CONFIG_FREEZE
 
-#include "default_freezer.h"
+#ifndef CONFIG_MEMORYSTATUS
+#error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
+#endif
+
+#include <vm/default_freezer.h>
 
 /*
  * Indicates that a page has been faulted back in.
  */
 #define FREEZER_OFFSET_ABSENT ((vm_object_offset_t)(-1))
 
+lck_grp_attr_t default_freezer_handle_lck_grp_attr;    
+lck_grp_t      default_freezer_handle_lck_grp;
+
+void
+default_freezer_init(void)
+{
+       lck_grp_attr_setdefault(&default_freezer_handle_lck_grp_attr);
+       lck_grp_init(&default_freezer_handle_lck_grp, "default_freezer_handle",
+                    &default_freezer_handle_lck_grp_attr);
+
+}
+
+
 /*
  * Create the mapping table that will
  * tell us the object/offset pair that
@@ -42,7 +59,7 @@
  * out or being brought back in.
  */
 
-void*
+default_freezer_mapping_table_t
 default_freezer_mapping_create(vm_object_t object, vm_offset_t offset)
 {
        default_freezer_mapping_table_t table;
@@ -57,13 +74,18 @@ default_freezer_mapping_create(vm_object_t object, vm_offset_t offset)
        table->object = object;
        table->offset = offset;
        
-       return (void*)table;
+       return table;
 }
 
+/*
+ * Table modifications/lookup are done behind
+ * the compact_object lock.
+ */
+
 void
-default_freezer_mapping_free(void **table, boolean_t all)
+default_freezer_mapping_free(default_freezer_mapping_table_t *table_p, boolean_t all)
 {      
-       default_freezer_mapping_table_t freezer_table = *((default_freezer_mapping_table_t *)table);
+       default_freezer_mapping_table_t freezer_table = *table_p;
        assert(freezer_table);
        
        if (all) {
@@ -79,31 +101,33 @@ default_freezer_mapping_free(void **table, boolean_t all)
  
 kern_return_t
 default_freezer_mapping_store(
-               default_freezer_mapping_table_t *table,
+               default_freezer_mapping_table_t table,
                memory_object_offset_t table_offset,
                memory_object_t memory_object,
                memory_object_offset_t offset)
 {
        default_freezer_mapping_table_entry_t entry;
        uint32_t index;
-       
-       assert(*table);
-       
-       if ((*table)->index >= MAX_FREEZE_TABLE_ENTRIES) {
-               vm_object_t compact_object = (*table)->object;
+
+       assert(table);
+
+       while (table->next) {
+               table = table->next;
+       }
+
+       if (table->index >= MAX_FREEZE_TABLE_ENTRIES) {
+               vm_object_t compact_object = table->object;
                default_freezer_mapping_table_t next;
                
                next = default_freezer_mapping_create(compact_object, table_offset);
                if (!next) {
                        return KERN_FAILURE;
                }
-               
-               (*table)->next = next;
-               *table = next;
+               table->next = next;
        }
 
-       index = (*table)->index++;
-       entry = &(*table)->entry[index];
+       index = (table)->index++;
+       entry = &(table)->entry[index];
 
        entry->memory_object = memory_object;
        entry->offset = offset;
@@ -165,15 +189,17 @@ default_freezer_mapping_update(
        return kr;
 }
 
+
+
 /*
  * Create a freezer memory object for this
- * vm object.
+ * vm object. This will be one of the vm
+ * objects that will pack the compact object.
  */
 void
 default_freezer_memory_object_create(
-                       vm_object_t object,
-                       vm_object_t compact_object,
-                       default_freezer_mapping_table_t table)
+                       vm_object_t     object,
+                       default_freezer_handle_t df_handle)
 {
 
        default_freezer_memory_object_t fo = NULL;
@@ -189,9 +215,10 @@ default_freezer_memory_object_create(
                assert (control != MEMORY_OBJECT_CONTROL_NULL);
 
                df_memory_object_init((memory_object_t)fo, control, 0);         
-               fo->fo_compact_object = compact_object;
-               fo->fo_table = table;
-               
+               fo->fo_df_handle = df_handle;
+
+               default_freezer_handle_reference_locked(fo->fo_df_handle);
+       
                object->pager = (memory_object_t)fo;
                object->pager_created = TRUE;
                object->pager_initialized = TRUE;
@@ -203,53 +230,110 @@ default_freezer_memory_object_create(
        }
 }
 
+kern_return_t
+default_freezer_pack(
+       unsigned int    *purgeable_count,
+       unsigned int    *wired_count,
+       unsigned int    *clean_count,
+       unsigned int    *dirty_count,
+       unsigned int    dirty_budget,
+       boolean_t       *shared,
+       vm_object_t     src_object,
+       default_freezer_handle_t df_handle)
+{
+       kern_return_t                   kr = KERN_SUCCESS;
+
+       if (df_handle) {
+               default_freezer_handle_lock(df_handle);
+       }
+
+       kr = vm_object_pack(purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared, src_object, df_handle);
+       
+       if (df_handle) {
+               default_freezer_handle_unlock(df_handle);
+       }
+
+       return kr;
+}
+
+/* 
+ * Called with freezer_handle locked.
+ * default_freezer_pack locks the handle, calls
+ * vm_object_pack which, in turn, will call
+ * default_freezer_pack_page().
+ */
 void
 default_freezer_pack_page(
                vm_page_t p, 
-               vm_object_t compact_object, 
-               vm_object_offset_t offset, 
-               void **table)
+               default_freezer_handle_t df_handle)
 {
 
-       default_freezer_mapping_table_t *freeze_table = (default_freezer_mapping_table_t *)table;
-       memory_object_t memory_object = p->object->pager;
-       
+       default_freezer_mapping_table_t freeze_table = NULL;
+       memory_object_t                 memory_object = NULL;
+       vm_object_t                     compact_object =  VM_OBJECT_NULL;
+
+       assert(df_handle);
+
+       compact_object = df_handle->dfh_compact_object;
+
+       assert(compact_object);
+
+       freeze_table =  df_handle->dfh_table;
+       memory_object = p->object->pager;
+
        if (memory_object == NULL) {
-               default_freezer_memory_object_create(p->object, compact_object, *freeze_table);
+               default_freezer_memory_object_create(p->object, df_handle);
                memory_object = p->object->pager;
        } else {
-               default_freezer_memory_object_t fo = (default_freezer_memory_object_t)memory_object;
-               if (fo->fo_compact_object == VM_OBJECT_NULL) {
-                       fo->fo_compact_object = compact_object;
-                       fo->fo_table = *freeze_table;
-               }
+               assert(df_handle == ((default_freezer_memory_object_t)memory_object)->fo_df_handle);
        }
-       
-       default_freezer_mapping_store(freeze_table, offset, memory_object, p->offset + p->object->paging_offset);
 
-       /* Remove from the original and insert into the compact destination object */
-       vm_page_rename(p, compact_object, offset, FALSE);
+       vm_object_lock(compact_object);
+       default_freezer_mapping_store(freeze_table, df_handle->dfh_compact_offset, memory_object, p->offset + p->object->paging_offset);
+       vm_page_rename(p, compact_object, df_handle->dfh_compact_offset, FALSE);
+       vm_object_unlock(compact_object);
+       
+       df_handle->dfh_compact_offset += PAGE_SIZE;
 }
 
 void
 default_freezer_unpack(
-               vm_object_t object, 
-               void **table)
+                default_freezer_handle_t df_handle)
 {
        
-       vm_page_t p = VM_PAGE_NULL;
-       uint32_t index = 0;
-       vm_object_t src_object = VM_OBJECT_NULL;
-       memory_object_t src_mem_object = MEMORY_OBJECT_NULL;
-       memory_object_offset_t  src_offset = 0;
-       vm_object_offset_t      compact_offset = 0;
-       default_freezer_memory_object_t fo = NULL;
-       default_freezer_memory_object_t last_memory_object_thawed = NULL;
-       default_freezer_mapping_table_t freeze_table = *(default_freezer_mapping_table_t *)table;
-
-       assert(freeze_table);
+       vm_page_t                               compact_page = VM_PAGE_NULL, src_page = VM_PAGE_NULL;
+       uint32_t                                index = 0;
+       vm_object_t                             src_object = VM_OBJECT_NULL;
+       vm_object_t                             compact_object = VM_OBJECT_NULL;
+       memory_object_t                         src_mem_object = MEMORY_OBJECT_NULL;
+       memory_object_offset_t                  src_offset = 0;
+       vm_object_offset_t                      compact_offset = 0;
+       default_freezer_memory_object_t         fo = NULL;
+       default_freezer_mapping_table_t         freeze_table = NULL;
+       boolean_t                               should_unlock_handle = FALSE;
+
+       assert(df_handle);
+
+       default_freezer_handle_lock(df_handle);
+       should_unlock_handle = TRUE;
+
+       freeze_table = df_handle->dfh_table;    
+       compact_object = df_handle->dfh_compact_object;
+
+       assert(compact_object);
+       assert(compact_object->alive);
+       assert(!compact_object->terminating);
+       assert(compact_object->pager_ready);
        
-       vm_object_lock(object);
+       /* Bring the pages back in */
+       if (vm_object_pagein(compact_object) != KERN_SUCCESS) {
+               if (should_unlock_handle) {
+                       default_freezer_handle_unlock(df_handle);
+               }
+               return;
+       }
+
+       vm_object_lock(compact_object);
        
        for (index = 0, compact_offset = 0; ; index++, compact_offset += PAGE_SIZE){
                if (index >= freeze_table->index) {
@@ -258,8 +342,8 @@ default_freezer_unpack(
                        table_next = freeze_table->next; 
                        
                        /* Free the tables as we go along */
-                       default_freezer_mapping_free((void**)&freeze_table, FALSE);
-                       
+                       default_freezer_mapping_free(&freeze_table, FALSE);
+               
                        if (table_next == NULL){
                                break;
                        }
@@ -281,8 +365,8 @@ default_freezer_unpack(
                src_offset = freeze_table->entry[index].offset;
                if (src_offset != FREEZER_OFFSET_ABSENT) {
                        
-                       p = vm_page_lookup(object, compact_offset);
-                       assert(p);
+                       compact_page = vm_page_lookup(compact_object, compact_offset);
+                       assert(compact_page);
 
                        fo = (default_freezer_memory_object_t)src_mem_object;
                
@@ -290,39 +374,37 @@ default_freezer_unpack(
        
                        /* Move back over from the freeze object to the original */
                        vm_object_lock(src_object);
-                       vm_page_rename(p, src_object, src_offset - src_object->paging_offset, FALSE);
+                       src_page = vm_page_lookup(src_object, src_offset - src_object->paging_offset);
+                       if (src_page != VM_PAGE_NULL){
+                               /*
+                                * We might be racing with a VM fault. 
+                                * So handle that gracefully.
+                                */
+                               assert(src_page->absent == TRUE);
+                               VM_PAGE_FREE(src_page);
+                       }
+                       vm_page_rename(compact_page, src_object, src_offset - src_object->paging_offset, FALSE);
                        vm_object_unlock(src_object);
                }
                
-               if (src_mem_object != ((memory_object_t)last_memory_object_thawed)){
-                       if (last_memory_object_thawed != NULL){
-                               last_memory_object_thawed->fo_compact_object = VM_OBJECT_NULL;
-                               last_memory_object_thawed->fo_table = NULL;
-                       }
-                       last_memory_object_thawed = (default_freezer_memory_object_t)src_mem_object;
-               }
        }
        
-       if (last_memory_object_thawed != NULL){
-               last_memory_object_thawed->fo_compact_object = VM_OBJECT_NULL;
-               last_memory_object_thawed->fo_table = NULL;
-       }
+       vm_object_unlock(compact_object);
        
-       vm_object_unlock(object);
-}
-
-vm_object_t
-default_freezer_get_compact_vm_object(void** table)
-{
-       default_freezer_mapping_table_t freeze_table = *((default_freezer_mapping_table_t *)table);
-       assert(freeze_table);
-       return ((vm_object_t)(freeze_table->object));
+       vm_object_deallocate(compact_object);
+       
+       if (should_unlock_handle) {
+               df_handle->dfh_table = NULL;
+               df_handle->dfh_compact_object = VM_OBJECT_NULL;
+               df_handle->dfh_compact_offset = 0;
+               default_freezer_handle_unlock(df_handle);
+       }
 }
 
 void
 df_memory_object_reference(__unused memory_object_t mem_obj)
 {
-       
+
        /* No-op */
 }
 
@@ -331,52 +413,65 @@ df_memory_object_deallocate(memory_object_t mem_obj)
 {
 
        default_freezer_memory_object_t fo = (default_freezer_memory_object_t)mem_obj;
-       vm_object_t compact_object = fo->fo_compact_object;
-       
+
        assert(fo);
        
-       if (compact_object != VM_OBJECT_NULL) {
+       if (fo->fo_df_handle != NULL) {
                
-               default_freezer_mapping_table_t fo_table = fo->fo_table;
+               default_freezer_mapping_table_t table = NULL;
                default_freezer_mapping_table_entry_t entry;
                boolean_t found = FALSE;
                uint32_t index = 0;
+               vm_object_t compact_object = VM_OBJECT_NULL;
                
-               vm_object_lock(compact_object);
-       
-               /* Remove from table */
-               while (1) {     
-                       if (index >= fo_table->index) {
-                               if (fo_table->next) {
-                                       fo_table = fo_table->next;
-                                       index = 0;
-                               } else {
-                                       /* End of tables */
-                                       break;
+               default_freezer_handle_lock(fo->fo_df_handle);
+
+               compact_object =  fo->fo_df_handle->dfh_compact_object;
+               table = fo->fo_df_handle->dfh_table;
+
+               if (compact_object == VM_OBJECT_NULL || table == NULL) {
+                       /*Nothing to do. A thaw must have cleared it all out.*/
+               } else {
+                       vm_object_lock(compact_object);
+               
+                       /* Remove from table */
+                       while (1) {     
+                               if (index >= table->index) {
+                                       if (table->next) {
+                                               table = table->next;
+                                               index = 0;
+                                       } else {
+                                               /* End of tables */
+                                               break;
+                                       }
                                }
-                       }
 
-                       entry = &fo_table->entry[index];
-                       if (mem_obj == entry->memory_object) {
-                               /* It matches, so clear the entry */
-                               if (!found) {
-                                       found = TRUE;
-                               } 
-                               entry->memory_object = MEMORY_OBJECT_NULL;
-                               entry->offset = 0;
-                       } else if (MEMORY_OBJECT_NULL != entry->memory_object) {
-                               /* We have a different valid object; we're done */
-                               if (found) {
-                                       break;
+                               entry = &table->entry[index];
+                               if (mem_obj == entry->memory_object) {
+                                       /* It matches, so clear the entry */
+                                       if (!found) {
+                                               found = TRUE;
+                                       } 
+                                       entry->memory_object = MEMORY_OBJECT_NULL;
+                                       entry->offset = 0;
+                               } else if (MEMORY_OBJECT_NULL != entry->memory_object) {
+                                       /* We have a different valid object; we're done */
+                                       if (found) {
+                                               break;
+                                       }
                                }
+                       
+                               index++;
                        }
                
-                       index++;
+                       vm_object_unlock(compact_object);
                }
-       
-               vm_object_unlock(compact_object);
+
+               if (default_freezer_handle_deallocate_locked(fo->fo_df_handle)) {
+                       default_freezer_handle_unlock(fo->fo_df_handle);
+               }       
        }
-       
+
        kfree(fo, sizeof(*fo));
 }
 
@@ -407,6 +502,7 @@ df_memory_object_terminate(memory_object_t mem_obj)
        return KERN_SUCCESS;
 }
 
+
 kern_return_t
 df_memory_object_data_request(
                memory_object_t mem_obj, 
@@ -420,29 +516,44 @@ df_memory_object_data_request(
        memory_object_offset_t  compact_offset = 0;
        memory_object_t pager = NULL;
        kern_return_t kr = KERN_SUCCESS;
+       boolean_t       drop_object_ref = FALSE;
 
        default_freezer_memory_object_t fo = (default_freezer_memory_object_t)mem_obj;
+       default_freezer_handle_t        df_handle = NULL;
 
-       src_object = memory_object_control_to_vm_object(fo->fo_pager_control);
-       compact_object = fo->fo_compact_object;
-       
-       if (compact_object != VM_OBJECT_NULL) {
+       df_handle = fo->fo_df_handle;
+
+       if (df_handle == NULL) {
+               kr = KERN_FAILURE;
+       } else {
+               default_freezer_handle_lock(df_handle);
                
-               vm_object_lock(compact_object);
+               src_object = memory_object_control_to_vm_object(fo->fo_pager_control);
+               compact_object = fo->fo_df_handle->dfh_compact_object;
        
-               kr = default_freezer_mapping_update(fo->fo_table,
-                                                       mem_obj,
-                                                       offset,
-                                                       &compact_offset,
-                                                       FALSE);
-                                               
-               vm_object_unlock(compact_object);
-       } else {
-               kr = KERN_FAILURE;
+               if (compact_object == NULL) {
+                       kr = KERN_FAILURE;
+               } else {        
+                       vm_object_lock(compact_object);
+                       vm_object_reference_locked(compact_object);
+                       drop_object_ref = TRUE;
+
+                       kr = default_freezer_mapping_update(fo->fo_df_handle->dfh_table,
+                                                               mem_obj,
+                                                               offset,
+                                                               &compact_offset,
+                                                               FALSE);
+                       vm_object_unlock(compact_object);
+               }
+               default_freezer_handle_unlock(df_handle);
        }
        
+
        if (length == 0){
                /*Caller is just querying to see if we have the page*/
+               if (drop_object_ref) {
+                       vm_object_deallocate(compact_object);
+               }
                return kr;
        }
 
@@ -466,30 +577,38 @@ df_memory_object_data_request(
                                                PAGE_SIZE, PAGE_SIZE, 
                                                &upl, NULL, &page_list_count,
                                                request_flags);
+               upl_range_needed(upl, 0, 1);
 
                upl_abort(upl, UPL_ABORT_UNAVAILABLE);
                upl_deallocate(upl);
                
+               if (drop_object_ref) {
+                       vm_object_deallocate(compact_object);
+               }
+
                return KERN_SUCCESS;
        }
 
-       vm_object_lock(compact_object);
+       assert(compact_object->alive);
+       assert(!compact_object->terminating);
+       assert(compact_object->pager_ready);
 
-       pager = (memory_object_t)compact_object->pager;
+       vm_object_lock(compact_object);
 
-       if (!compact_object->pager_ready || pager == MEMORY_OBJECT_NULL){
-               vm_object_unlock(compact_object);
-               return KERN_FAILURE;
-       }
-       
        vm_object_paging_wait(compact_object, THREAD_UNINT);
        vm_object_paging_begin(compact_object);
 
        compact_object->blocked_access = TRUE;
+       pager = (memory_object_t)compact_object->pager;
+
        vm_object_unlock(compact_object);
 
        ((vm_object_fault_info_t) fault_info)->io_sync = TRUE;
 
+       /*
+        * We have a reference on both the default_freezer
+        * memory object handle and the compact object.
+        */
        kr = dp_memory_object_data_request(pager,
                                        compact_offset,
                                        length,
@@ -497,7 +616,7 @@ df_memory_object_data_request(
                                        fault_info);
        if (kr == KERN_SUCCESS){
 
-               vm_page_t src_page = VM_PAGE_NULL, dst_page = VM_PAGE_NULL;
+               vm_page_t compact_page = VM_PAGE_NULL, dst_page = VM_PAGE_NULL;
 
                vm_object_lock(compact_object);
 
@@ -506,31 +625,42 @@ df_memory_object_data_request(
 
                vm_object_lock(src_object);
 
-               if ((src_page = vm_page_lookup(compact_object, compact_offset)) != VM_PAGE_NULL){
+               if ((compact_page = vm_page_lookup(compact_object, compact_offset)) != VM_PAGE_NULL){
                        
                        dst_page = vm_page_lookup(src_object, offset - src_object->paging_offset);
                        
-                       VM_PAGE_FREE(dst_page);
-                       vm_page_rename(src_page, src_object, offset - src_object->paging_offset, FALSE);
-                       
-                       if (default_freezer_mapping_update(fo->fo_table,
-                                                       mem_obj,
-                                                       offset,
-                                                       NULL,
-                                                       TRUE) != KERN_SUCCESS) {
-                               printf("Page for object: 0x%lx at offset: 0x%lx not found in table\n", (uintptr_t)src_object, (uintptr_t)offset);
+                       if (!dst_page->absent){
+                               /*
+                                * Someone raced us here and unpacked
+                                * the object behind us.
+                                * So cleanup before we return.
+                                */
+                               VM_PAGE_FREE(compact_page);
+                       } else {
+                               VM_PAGE_FREE(dst_page);
+                               vm_page_rename(compact_page, src_object, offset - src_object->paging_offset, FALSE);
+                               
+                               if (default_freezer_mapping_update(fo->fo_df_handle->dfh_table,
+                                                               mem_obj,
+                                                               offset,
+                                                               NULL,
+                                                               TRUE) != KERN_SUCCESS) {
+                                       printf("Page for object: 0x%lx at offset: 0x%lx not found in table\n", (uintptr_t)src_object, (uintptr_t)offset);
+                               }
+                               
+                               PAGE_WAKEUP_DONE(compact_page);
                        }
-                       
-                       PAGE_WAKEUP_DONE(src_page);
                } else {
                        printf("%d: default_freezer: compact_object doesn't have the page for object 0x%lx at offset 0x%lx \n", kr, (uintptr_t)compact_object, (uintptr_t)compact_offset);
-                       kr = KERN_FAILURE;
+                       kr = KERN_SUCCESS;
                }
                vm_object_unlock(src_object);
                vm_object_unlock(compact_object);
+               vm_object_deallocate(compact_object);
        } else {
                panic("%d: default_freezer TOC pointed us to default_pager incorrectly\n", kr);
        }
+       
        return kr;
 }
 
@@ -613,4 +743,111 @@ df_memory_object_data_reclaim(
        panic("df_memory_object_data_reclaim\n");
        return KERN_SUCCESS;
 }
+
+
+/*
+ * The freezer handle is used to make sure that
+ * we don't race against the lookup and termination
+ * of the compact object.
+ */
+
+void
+default_freezer_handle_lock(default_freezer_handle_t df_handle) {
+       lck_rw_lock_exclusive(&df_handle->dfh_lck);
+}
+
+void
+default_freezer_handle_unlock(default_freezer_handle_t df_handle) {
+       lck_rw_done(&df_handle->dfh_lck);
+}
+
+default_freezer_handle_t
+default_freezer_handle_allocate(void)
+{
+
+       default_freezer_handle_t                df_handle = NULL;
+       df_handle = kalloc(sizeof(struct default_freezer_handle));
+
+       if (df_handle) {
+               memset(df_handle, 0, sizeof(struct default_freezer_handle));
+               lck_rw_init(&df_handle->dfh_lck, &default_freezer_handle_lck_grp, NULL);
+               /* No one knows of this handle yet so no need to lock it. */
+               default_freezer_handle_reference_locked(df_handle);
+       } else {
+               panic("Failed to allocated default_freezer_handle structure\n");
+       }
+       return df_handle;
+}
+
+kern_return_t
+default_freezer_handle_init(
+       default_freezer_handle_t df_handle) 
+{
+       kern_return_t                           kr = KERN_SUCCESS;
+       vm_object_t                             compact_object = VM_OBJECT_NULL;
+
+       if (df_handle == NULL || df_handle->dfh_table != NULL) {
+               kr = KERN_FAILURE;
+       } else {
+               /* Create our compact object */
+               compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
+               if (!compact_object) {
+                       kr = KERN_FAILURE;
+               } else {
+                       df_handle->dfh_compact_object = compact_object;
+                       df_handle->dfh_compact_offset = 0;
+                       df_handle->dfh_table = default_freezer_mapping_create(df_handle->dfh_compact_object, df_handle->dfh_compact_offset);
+                       if (!df_handle->dfh_table) {
+                               kr = KERN_FAILURE;
+                       }       
+               }
+       }
+
+       return kr;
+}
+
+void
+default_freezer_handle_reference_locked(
+       default_freezer_handle_t df_handle)
+{
+       assert(df_handle);
+       df_handle->dfh_ref_count++;
+}
+
+void
+default_freezer_handle_deallocate(
+       default_freezer_handle_t df_handle)
+{
+       assert(df_handle);
+       default_freezer_handle_lock(df_handle);
+       if (default_freezer_handle_deallocate_locked(df_handle)) {
+               default_freezer_handle_unlock(df_handle);
+       }
+}
+
+boolean_t
+default_freezer_handle_deallocate_locked(
+       default_freezer_handle_t df_handle)
+{
+       boolean_t       should_unlock = TRUE;
+
+       assert(df_handle);
+       df_handle->dfh_ref_count--;
+       if (df_handle->dfh_ref_count == 0) {
+               lck_rw_destroy(&df_handle->dfh_lck, &default_freezer_handle_lck_grp);
+               kfree(df_handle, sizeof(struct default_freezer_handle));
+               should_unlock = FALSE;
+       }
+       return should_unlock;
+}
+
+void
+default_freezer_pageout(
+       default_freezer_handle_t df_handle)
+{
+       assert(df_handle);
+
+       vm_object_pageout(df_handle->dfh_compact_object);
+}
+
 #endif /* CONFIG_FREEZE */
index 46730fd71791cf0a1829234bf6c8ee1bd568c5ab..f08de63a57cc0c4a477c507ebc7df4a8aed3f9ab 100644 (file)
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-#if CONFIG_FREEZE
-
 #ifndef        _DEFAULT_FREEZER_H_
 #define _DEFAULT_FREEZER_H_
 
+#if CONFIG_FREEZE
+
 #ifdef MACH_KERNEL
 
 #include <default_pager/default_pager_internal.h>
@@ -41,6 +41,7 @@
 #include <mach/memory_object_server.h>
 #include <mach/upl.h>
 #include <mach/vm_map.h>
+#include <vm/vm_protos.h>
 #include <vm/memory_object.h>
 #include <vm/vm_pageout.h> 
 #include <vm/vm_map.h>
@@ -122,21 +123,35 @@ struct default_freezer_mapping_table {
 };
 typedef struct default_freezer_mapping_table_entry *default_freezer_mapping_table_entry_t;
 
+struct default_freezer_handle {
+       lck_rw_t                                dfh_lck;
+       uint32_t                                dfh_ref_count;
+       default_freezer_mapping_table_t         dfh_table;
+       vm_object_t                             dfh_compact_object;
+       vm_object_offset_t                      dfh_compact_offset;
+};
+typedef struct default_freezer_handle  *default_freezer_handle_t;
+
 struct default_freezer_memory_object{
        struct ipc_object_header        fo_pager_header;        /* fake ip_kotype() */
        memory_object_pager_ops_t       fo_pager_ops;           /* == &default_freezer_ops */
        memory_object_control_t         fo_pager_control;
-       vm_object_t                             fo_compact_object;
-       default_freezer_mapping_table_t         fo_table;
+       default_freezer_handle_t        fo_df_handle;
 };
 typedef struct default_freezer_memory_object *default_freezer_memory_object_t;
 
 
-__private_extern__ void*       default_freezer_mapping_create(vm_object_t, vm_offset_t);
+__private_extern__ void        default_freezer_handle_lock(default_freezer_handle_t);
+__private_extern__ void        default_freezer_handle_unlock(default_freezer_handle_t);
 
-__private_extern__ void                default_freezer_mapping_free(void**, boolean_t all);
+extern lck_grp_attr_t  default_freezer_handle_lck_grp_attr;    
+extern lck_grp_t       default_freezer_handle_lck_grp;
 
-__private_extern__  kern_return_t      default_freezer_mapping_store( default_freezer_mapping_table_t *,
+__private_extern__ default_freezer_mapping_table_t     default_freezer_mapping_create(vm_object_t, vm_offset_t);
+
+__private_extern__ void                default_freezer_mapping_free(default_freezer_mapping_table_t *table_p, boolean_t all);
+
+__private_extern__  kern_return_t      default_freezer_mapping_store( default_freezer_mapping_table_t ,
                                                                        memory_object_offset_t,
                                                                        memory_object_t,
                                                                        memory_object_offset_t );
@@ -147,14 +162,12 @@ __private_extern__ kern_return_t  default_freezer_mapping_update( default_freezer
                                                                        memory_object_offset_t *,
                                                                        boolean_t );
 
-__private_extern__  void       default_freezer_memory_object_create(vm_object_t, vm_object_t, default_freezer_mapping_table_t);
-
-__private_extern__  void       default_freezer_pack_page(vm_page_t, vm_object_t, vm_object_offset_t, void**);
+__private_extern__ void        default_freezer_handle_reference_locked(default_freezer_handle_t);
 
-__private_extern__  void       default_freezer_unpack(vm_object_t, void**);
+__private_extern__ boolean_t   default_freezer_handle_deallocate_locked(default_freezer_handle_t);
 
-__private_extern__ vm_object_t default_freezer_get_compact_vm_object(void**);
+__private_extern__ void        default_freezer_memory_object_create(vm_object_t, default_freezer_handle_t);
 
 #endif /* MACH_KERNEL */
-#endif /* DEFAULT_FREEZER_H */
 #endif /* CONFIG_FREEZE */
+#endif /* DEFAULT_FREEZER_H */
index de7baff299a1821cc16e4cf6e17ac9bc2c7c1c41..67b69df41c6ba61ca90f7760543715d8a8f80b67 100644 (file)
@@ -176,57 +176,12 @@ memory_object_lock_page(
             m, should_return, should_flush, prot, 0);
 
 
-       if (m->busy || m->cleaning) {
-               if (m->list_req_pending &&
-                   should_return == MEMORY_OBJECT_RETURN_NONE &&
-                   should_flush == TRUE) {
+       if (m->busy || m->cleaning)
+               return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 
-                       if (m->absent) {
-                               /*
-                                * this is the list_req_pending | absent | busy case
-                                * which originates from vm_fault_page. 
-                                * Combine that with should_flush == TRUE and we
-                                * have a case where we need to toss the page from
-                                * the object.
-                                */
-                               if (!VM_PAGE_WIRED(m)) {
-                                       return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
-                               } else {
-                                       return (MEMORY_OBJECT_LOCK_RESULT_DONE);
-                               }
-                       }
-                       if  (m->pageout || m->cleaning) {
-                               /*
-                                * if pageout is set, page was earmarked by vm_pageout_scan
-                                * to be cleaned and stolen... if cleaning is set, we're
-                                * pre-cleaning pages for a hibernate...
-                                * in either case, we're going
-                                * to take it back since we are being asked to
-                                * flush the page w/o cleaning it (i.e. we don't
-                                * care that it's dirty, we want it gone from
-                                * the cache) and we don't want to stall
-                                * waiting for it to be cleaned for 2 reasons...
-                                * 1 - no use paging it out since we're probably
-                                *     shrinking the file at this point or we no
-                                *     longer care about the data in the page
-                                * 2 - if we stall, we may casue a deadlock in
-                                *     the FS trying to acquire its locks
-                                *     on the VNOP_PAGEOUT path presuming that
-                                *     those locks are already held on the truncate
-                                *     path before calling through to this function
-                                *
-                                * so undo all of the state that vm_pageout_scan
-                                * hung on this page
-                                */
+       if (m->laundry)
+               vm_pageout_steal_laundry(m, FALSE);
 
-                               vm_pageout_queue_steal(m, FALSE);
-                               PAGE_WAKEUP_DONE(m);
-                       } else {
-                               panic("list_req_pending on page %p without absent/pageout/cleaning set\n", m);
-                       }
-               } else
-                       return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
-       }
        /*
         *      Don't worry about pages for which the kernel
         *      does not have any data.
@@ -262,8 +217,9 @@ memory_object_lock_page(
                 * for the page to go from the clean to the dirty state
                 * after we've made our decision
                 */
-               if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
-                       m->dirty = TRUE;
+               if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) {
+                       SET_PAGE_DIRTY(m, FALSE);
+               }
        } else {
                /*
                 * If we are decreasing permission, do it now;
@@ -650,12 +606,6 @@ vm_object_update_extent(
                                data_cnt += PAGE_SIZE;
                                next_offset = offset + PAGE_SIZE_64;
 
-                               /*
-                                * Clean
-                                */
-                               m->list_req_pending = TRUE;
-                               m->cleaning = TRUE;
-
                                /*
                                 * wired pages shouldn't be flushed and
                                 * since they aren't on any queue,
@@ -667,10 +617,7 @@ vm_object_update_extent(
                                                /*
                                                 * add additional state for the flush
                                                 */
-                                               m->busy = TRUE;
                                                m->pageout = TRUE;
-
-                                               dwp->dw_mask |= DW_vm_page_wire;
                                        }
                                        /*
                                         * we use to remove the page from the queues at this
@@ -858,6 +805,7 @@ vm_object_update(
                fault_info.io_sync = FALSE;
                fault_info.cs_bypass = FALSE;
                fault_info.mark_zf_absent = FALSE;
+               fault_info.batch_pmap_op = FALSE;
 
                vm_object_paging_begin(copy_object);
 
@@ -1793,7 +1741,6 @@ host_default_memory_manager(
 
                thread_wakeup((event_t) &memory_manager_default);
 
-#ifndef CONFIG_FREEZE
                /*
                 * Now that we have a default pager for anonymous memory,
                 * reactivate all the throttled pages (i.e. dirty pages with
@@ -1803,7 +1750,6 @@ host_default_memory_manager(
                {
                        vm_page_reactivate_all_throttled();
                }
-#endif
        }
  out:
        lck_mtx_unlock(&memory_manager_default_lock);
index 76d7cb3056a09d90ea20088820fe792f1bca5c5d..26c26d6c0b2cd5af98ce5f50a2a28046105c62ab 100644 (file)
@@ -187,6 +187,7 @@ extern void         pmap_virtual_space(
  *     Routines to manage the physical map data structure.
  */
 extern pmap_t          pmap_create(    /* Create a pmap_t. */
+                               ledger_t        ledger,
                                vm_map_size_t   size,
 #ifdef __i386__
                                boolean_t       is_64bit);
@@ -204,6 +205,7 @@ extern void         pmap_enter(     /* Enter a mapping */
                                vm_map_offset_t v,
                                ppnum_t         pn,
                                vm_prot_t       prot,
+                               vm_prot_t       fault_type,
                                unsigned int    flags,
                                boolean_t       wired);
 
@@ -212,6 +214,7 @@ extern kern_return_t        pmap_enter_options(
                                           vm_map_offset_t v,
                                           ppnum_t pn,
                                           vm_prot_t prot,
+                                          vm_prot_t fault_type,
                                           unsigned int flags,
                                           boolean_t wired,
                                           unsigned int options);
@@ -374,24 +377,25 @@ extern kern_return_t      (pmap_attribute)(       /* Get/Set special memory
 /*
  *     Macro to be used in place of pmap_enter()
  */
-#define PMAP_ENTER(pmap, virtual_address, page, protection, flags, wired) \
+#define PMAP_ENTER(pmap, virtual_address, page, protection, fault_type, flags, wired) \
        MACRO_BEGIN                                                     \
        pmap_t          __pmap = (pmap);                                \
        vm_page_t       __page = (page);                                \
                                                                        \
        PMAP_ENTER_CHECK(__pmap, __page)                                \
-       pmap_enter(__pmap,                                      \
+       pmap_enter(__pmap,                                              \
                (virtual_address),                                      \
                __page->phys_page,                                      \
-                       (protection),                                   \
+               (protection),                                           \
+               (fault_type),                                           \
                (flags),                                                \
                (wired));                                               \
        MACRO_END
 #endif /* !PMAP_ENTER */
 
 #ifndef        PMAP_ENTER_OPTIONS
-#define PMAP_ENTER_OPTIONS(pmap, virtual_address, page, protection,    \
-                               flags, wired, options, result) \
+#define PMAP_ENTER_OPTIONS(pmap, virtual_address, page, protection, fault_type,        \
+                               flags, wired, options, result)          \
        MACRO_BEGIN                                                     \
        pmap_t          __pmap = (pmap);                                \
        vm_page_t       __page = (page);                                \
@@ -400,13 +404,41 @@ extern kern_return_t      (pmap_attribute)(       /* Get/Set special memory
        result = pmap_enter_options(__pmap,                             \
                (virtual_address),                                      \
                __page->phys_page,                                      \
-                       (protection),                                   \
+               (protection),                                           \
+               (fault_type),                                           \
                (flags),                                                \
                (wired),                                                \
-               options);                                       \
+               options);                                               \
        MACRO_END
 #endif /* !PMAP_ENTER_OPTIONS */
 
+#ifndef PMAP_SET_CACHE_ATTR
+#define PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op)            \
+       MACRO_BEGIN                                                             \
+               if (!batch_pmap_op) {                                           \
+                       pmap_set_cache_attributes(mem->phys_page, cache_attr);  \
+                       object->set_cache_attr = TRUE;                          \
+               }                                                               \
+       MACRO_END                                                       
+#endif /* PMAP_SET_CACHE_ATTR */
+
+#ifndef PMAP_BATCH_SET_CACHE_ATTR
+#define PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list,                      \
+                                       cache_attr, num_pages, batch_pmap_op)   \
+       MACRO_BEGIN                                                             \
+               if ((batch_pmap_op)) {                                          \
+                       unsigned int __page_idx=0;                              \
+                       while (__page_idx < (num_pages)) {                      \
+                               pmap_set_cache_attributes(                      \
+                                       user_page_list[__page_idx].phys_addr,   \
+                                       (cache_attr));                          \
+                               __page_idx++;                                   \
+                       }                                                       \
+                       (object)->set_cache_attr = TRUE;                        \
+               }                                                               \
+       MACRO_END
+#endif /* PMAP_BATCH_SET_CACHE_ATTR */
+
 #define PMAP_ENTER_CHECK(pmap, page)                                   \
 {                                                                      \
        if ((pmap) != kernel_pmap) {                                    \
@@ -494,10 +526,14 @@ extern pmap_t     kernel_pmap;                    /* The kernel's map */
 #define VM_WIMG_MASK           0xFF
 
 #define VM_MEM_SUPERPAGE       0x100           /* map a superpage instead of a base page */
+#define VM_MEM_STACK           0x200
 
 #define PMAP_OPTIONS_NOWAIT    0x1             /* don't block, return 
                                                 * KERN_RESOURCE_SHORTAGE 
                                                 * instead */
+#define PMAP_OPTIONS_NOENTER   0x2             /* expand pmap if needed
+                                                * but don't enter mapping
+                                                */
 
 #if    !defined(__LP64__)
 extern vm_offset_t     pmap_extract(pmap_t pmap,
index ef46cfeca1ae96ca97175555d756216f51bc555d..a98fdbb3af325444e14a9e28c0b7b81810f9d19d 100644 (file)
@@ -357,6 +357,7 @@ apple_protect_pager_data_request(
        fault_info.stealth = TRUE;
        fault_info.io_sync = FALSE;
        fault_info.mark_zf_absent = FALSE;
+       fault_info.batch_pmap_op = FALSE;
        interruptible = fault_info.interruptible;
 
        pager = apple_protect_pager_lookup(mem_obj);
@@ -512,6 +513,7 @@ apple_protect_pager_data_request(
                           kernel_mapping,
                           src_page->phys_page,
                           VM_PROT_READ,
+                          VM_PROT_NONE,
                           0,
                           TRUE);
                /*
@@ -527,6 +529,7 @@ apple_protect_pager_data_request(
                           kernel_mapping + PAGE_SIZE_64,
                           dst_pnum,
                           VM_PROT_READ | VM_PROT_WRITE,
+                          VM_PROT_NONE,
                           0,
                           TRUE);
 
index ab281c92821544ffd75248bdb7c66da26de54dcb..6dc7767f59b4f254c4c80d8311b7d9893c869d68 100644 (file)
@@ -64,7 +64,6 @@
 
 #include <mach_cluster_stats.h>
 #include <mach_pagemap.h>
-#include <mach_kdb.h>
 #include <libkern/OSAtomic.h>
 
 #include <mach/mach_types.h>
@@ -141,10 +140,6 @@ extern unsigned int dp_pages_free, dp_pages_reserve;
 
 extern int cs_debug;
 
-#if    MACH_KDB
-extern struct db_watchpoint *db_watchpoint_list;
-#endif /* MACH_KDB */
-
 boolean_t current_thread_aborted(void);
 
 /* Forward declarations of internal routines. */
@@ -173,6 +168,7 @@ extern void vm_fault_classify_init(void);
 #endif
 
 unsigned long vm_pmap_enter_blocked = 0;
+unsigned long vm_pmap_enter_retried = 0;
 
 unsigned long vm_cs_validates = 0;
 unsigned long vm_cs_revalidates = 0;
@@ -233,7 +229,7 @@ vm_fault_cleanup(
        register vm_page_t      top_page)
 {
        vm_object_paging_end(object);
-       vm_object_unlock(object);
+       vm_object_unlock(object);
 
        if (top_page != VM_PAGE_NULL) {
                object = top_page->object;
@@ -493,7 +489,7 @@ vm_fault_deactivate_behind(
         for (n = 0; n < max_pages_in_run; n++) {
                m = vm_page_lookup(object, offset + run_offset + (n * pg_offset));
 
-               if (m && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) {
+               if (m && !m->laundry && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) {
                        page_run[pages_in_run++] = m;
                        pmap_clear_reference(m->phys_page);
                }
@@ -698,6 +694,12 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
 
                assert(!VM_PAGE_WIRED(m));
 
+               /*
+                * can't be on the pageout queue since we don't
+                * have a pager to try and clean to
+                */
+               assert(!m->pageout_queue);
+
                VM_PAGE_QUEUES_REMOVE(m);
 
                 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
@@ -705,11 +707,6 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
                 vm_page_throttled_count++;
 
                vm_page_unlock_queues();
-       } else {
-               if (current_thread()->t_page_creation_count > vm_page_creation_throttle) {
-                       m->zero_fill = TRUE;
-                       VM_ZF_COUNT_INCR();
-               }
        }
        return (my_fault);
 }
@@ -764,6 +761,7 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
  *             paging_in_progress reference.
  */
 unsigned int vm_fault_page_blocked_access = 0;
+unsigned int vm_fault_page_forced_retry = 0;
 
 vm_fault_return_t
 vm_fault_page(
@@ -799,12 +797,16 @@ vm_fault_page(
        vm_object_t             next_object;
        vm_object_t             copy_object;
        boolean_t               look_for_page;
+       boolean_t               force_fault_retry = FALSE;
        vm_prot_t               access_required = fault_type;
        vm_prot_t               wants_copy_flag;
        CLUSTER_STAT(int pages_at_higher_offsets;)
        CLUSTER_STAT(int pages_at_lower_offsets;)
        kern_return_t           wait_result;
        boolean_t               interruptible_state;
+       boolean_t               data_already_requested = FALSE;
+       vm_behavior_t           orig_behavior;
+       vm_size_t               orig_cluster_size;
        vm_fault_return_t       error;
        int                     my_fault;
        uint32_t                try_failed_count;
@@ -866,25 +868,6 @@ vm_fault_page(
        dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
 #endif
 
-
-#if    MACH_KDB
-               /*
-                *      If there are watchpoints set, then
-                *      we don't want to give away write permission
-                *      on a read fault.  Make the task write fault,
-                *      so that the watchpoint code notices the access.
-                */
-           if (db_watchpoint_list) {
-               /*
-                *      If we aren't asking for write permission,
-                *      then don't give it away.  We're using write
-                *      faults to set the dirty bit.
-                */
-               if (!(fault_type & VM_PROT_WRITE))
-                       *protection &= ~VM_PROT_WRITE;
-       }
-#endif /* MACH_KDB */
-
        interruptible = fault_info->interruptible;
        interruptible_state = thread_interrupt_level(interruptible);
  
@@ -986,116 +969,35 @@ vm_fault_page(
                                /*
                                 * The page is being brought in,
                                 * wait for it and then retry.
-                                *
-                                * A possible optimization: if the page
-                                * is known to be resident, we can ignore
-                                * pages that are absent (regardless of
-                                * whether they're busy).
                                 */
 #if TRACEFAULTPAGE
                                dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */
 #endif
-                               if (m->list_req_pending) {
-                                       /*
-                                        * "list_req_pending" means that the
-                                        * page has been marked for a page-in
-                                        * or page-out operation but hasn't been
-                                        * grabbed yet.
-                                        * Since whoever marked it
-                                        * "list_req_pending" might now be
-                                        * making its way through other layers
-                                        * of code and possibly blocked on locks
-                                        * that we might be holding, we can't
-                                        * just block on a "busy" and
-                                        * "list_req_pending" page or we might
-                                        * deadlock with that other thread.
-                                        * 
-                                        * [ For pages backed by a file on an
-                                        * HFS volume, we might deadlock with
-                                        * the HFS truncate lock, for example:
-                                        * A: starts a pageout or pagein
-                                        * operation and marks a page "busy",
-                                        * "list_req_pending" and either
-                                        * "pageout", "cleaning" or "absent".
-                                        * A: makes its way through the
-                                        * memory object (vnode) code.
-                                        * B: starts from the memory object
-                                        * side, via a write() on a file, for
-                                        * example.
-                                        * B: grabs some filesystem locks.
-                                        * B: attempts to grab the same page for
-                                        * its I/O.
-                                        * B: blocks here because the page is
-                                        * "busy".
-                                        * A: attempts to grab the filesystem
-                                        * lock we're holding.
-                                        * And we have a deadlock... ]
-                                        *
-                                        * Since the page hasn't been claimed
-                                        * by the other thread yet, it's fair
-                                        * for us to grab here.
-                                        */
-                                       if (m->absent) {
-                                               /*
-                                                * The page needs to be paged
-                                                * in.  We can do it here but we
-                                                * need to get rid of "m", the
-                                                * place holder page inserted by
-                                                * another thread who is also
-                                                * trying to page it in.  When
-                                                * that thread resumes, it will
-                                                * either wait for our page to
-                                                * arrive or it will find it
-                                                * already there.
-                                                */
-                                               VM_PAGE_FREE(m);
+                               wait_result = PAGE_SLEEP(object, m, interruptible);
 
-                                               /*
-                                                * Retry the fault.  We'll find
-                                                * that the page is not resident
-                                                * and initiate a page-in again.
-                                                */
-                                               continue;
-                                       }
-                                       if (m->pageout || m->cleaning) {
-                                               /*
-                                                * This page has been selected
-                                                * for a page-out but we want
-                                                * to bring it in.  Let's just
-                                                * cancel the page-out...
-                                                */
-                                               vm_pageout_queue_steal(m, FALSE);
-                                               /*
-                                                * ... and clear "busy" and
-                                                * wake up any waiters...
-                                                */
-                                               PAGE_WAKEUP_DONE(m);
-                                               /*
-                                                * ... and continue with the
-                                                * "fault" handling.
-                                                */
-                                       }
-                               } else {
-                                       wait_result = PAGE_SLEEP(object, m, interruptible);
-                                       XPR(XPR_VM_FAULT,
-                                           "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
-                                               object, offset,
-                                               m, 0, 0);
-                                       counter(c_vm_fault_page_block_busy_kernel++);
+                               XPR(XPR_VM_FAULT,
+                                   "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
+                                   object, offset,
+                                   m, 0, 0);
+                               counter(c_vm_fault_page_block_busy_kernel++);
 
-                                       if (wait_result != THREAD_AWAKENED) {
-                                               vm_fault_cleanup(object, first_m);
-                                               thread_interrupt_level(interruptible_state);
+                               if (wait_result != THREAD_AWAKENED) {
+                                       vm_fault_cleanup(object, first_m);
+                                       thread_interrupt_level(interruptible_state);
 
-                                               if (wait_result == THREAD_RESTART)
-                                                       return (VM_FAULT_RETRY);
-                                               else
-                                                       return (VM_FAULT_INTERRUPTED);
-                                       }
-                                       continue;
+                                       if (wait_result == THREAD_RESTART)
+                                               return (VM_FAULT_RETRY);
+                                       else
+                                               return (VM_FAULT_INTERRUPTED);
                                }
+                               continue;
                        }
+                       if (m->laundry) {
+                               m->pageout = FALSE;
 
+                               if (!m->cleaning) 
+                                       vm_pageout_steal_laundry(m, FALSE);
+                       }
                        if (m->phys_page == vm_page_guard_addr) {
                                /*
                                 * Guard page: off limits !
@@ -1253,7 +1155,10 @@ vm_fault_page(
                                                m->busy = TRUE;
 
                                                vm_page_lockspin_queues();
+
+                                               assert(!m->pageout_queue);
                                                VM_PAGE_QUEUES_REMOVE(m);
+
                                                vm_page_unlock_queues();
                                        }
                                        XPR(XPR_VM_FAULT,
@@ -1348,7 +1253,8 @@ vm_fault_page(
                                 * the page in the speculative queue.
                                 */
                                vm_page_lockspin_queues();
-                               VM_PAGE_QUEUES_REMOVE(m);
+                               if (m->speculative)
+                                       VM_PAGE_QUEUES_REMOVE(m);
                                vm_page_unlock_queues();
                        }
 
@@ -1416,14 +1322,17 @@ vm_fault_page(
                 * this object can provide the data or we're the top object...
                 * object is locked;  m == NULL
                 */
+               if (must_be_resident)
+                       goto dont_look_for_page;
+
                look_for_page = (object->pager_created && (MUST_ASK_PAGER(object, offset) == TRUE) && !data_supply);
                
 #if TRACEFAULTPAGE
                dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);      /* (TEST/DEBUG) */
 #endif
-               if ((look_for_page || (object == first_object)) && !must_be_resident && !object->phys_contiguous) {
+               if (!look_for_page && object == first_object && !object->phys_contiguous) {
                        /*
-                        * Allocate a new page for this object/offset pair
+                        * Allocate a new page for this object/offset pair as a placeholder
                         */
                        m = vm_page_grab();
 #if TRACEFAULTPAGE
@@ -1436,9 +1345,14 @@ vm_fault_page(
 
                                return (VM_FAULT_MEMORY_SHORTAGE);
                        }
-                       vm_page_insert(m, object, offset);
+
+                       if (fault_info && fault_info->batch_pmap_op == TRUE) {
+                               vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE);
+                       } else {
+                               vm_page_insert(m, object, offset);
+                       }
                }
-               if (look_for_page && !must_be_resident) {
+               if (look_for_page) {
                        kern_return_t   rc;
 
                        /*
@@ -1523,12 +1437,8 @@ vm_fault_page(
                                }
                        }
                        if (m != VM_PAGE_NULL) {
-                               /*
-                                * Indicate that the page is waiting for data
-                                * from the memory manager.
-                                */
-                               m->list_req_pending = TRUE;
-                               m->absent = TRUE;
+                               VM_PAGE_FREE(m);
+                               m = VM_PAGE_NULL;
                        }
 
 #if TRACEFAULTPAGE
@@ -1577,6 +1487,45 @@ vm_fault_page(
                                object, offset, m,
                                access_required | wants_copy_flag, 0);
 
+                       if (object->copy == first_object) {
+                               /*
+                                * if we issue the memory_object_data_request in
+                                * this state, we are subject to a deadlock with
+                                * the underlying filesystem if it is trying to
+                                * shrink the file resulting in a push of pages
+                                * into the copy object...  that push will stall
+                                * on the placeholder page, and if the pushing thread
+                                * is holding a lock that is required on the pagein
+                                * path (such as a truncate lock), we'll deadlock...
+                                * to avoid this potential deadlock, we throw away
+                                * our placeholder page before calling memory_object_data_request
+                                * and force this thread to retry the vm_fault_page after
+                                * we have issued the I/O.  the second time through this path
+                                * we will find the page already in the cache (presumably still
+                                * busy waiting for the I/O to complete) and then complete
+                                * the fault w/o having to go through memory_object_data_request again
+                                */
+                               assert(first_m != VM_PAGE_NULL);
+                               assert(first_m->object == first_object);
+                                       
+                               vm_object_lock(first_object);
+                               VM_PAGE_FREE(first_m);
+                               vm_object_paging_end(first_object);
+                               vm_object_unlock(first_object);
+
+                               first_m = VM_PAGE_NULL;
+                               force_fault_retry = TRUE;
+
+                               vm_fault_page_forced_retry++;
+                       }
+
+                       if (data_already_requested == TRUE) {
+                               orig_behavior = fault_info->behavior;
+                               orig_cluster_size = fault_info->cluster_size;
+
+                               fault_info->behavior = VM_BEHAVIOR_RANDOM;
+                               fault_info->cluster_size = PAGE_SIZE;
+                       }
                        /*
                         * Call the memory manager to retrieve the data.
                         */
@@ -1587,6 +1536,12 @@ vm_fault_page(
                                access_required | wants_copy_flag,
                                (memory_object_fault_info_t)fault_info);
 
+                       if (data_already_requested == TRUE) {
+                               fault_info->behavior = orig_behavior;
+                               fault_info->cluster_size = orig_cluster_size;
+                       } else
+                               data_already_requested = TRUE;
+
 #if TRACEFAULTPAGE
                        dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
 #endif
@@ -1615,6 +1570,13 @@ vm_fault_page(
 
                                return (VM_FAULT_INTERRUPTED);
                        }
+                       if (force_fault_retry == TRUE) {
+
+                               vm_fault_cleanup(object, first_m);
+                               thread_interrupt_level(interruptible_state);
+
+                               return (VM_FAULT_RETRY);
+                       }
                        if (m == VM_PAGE_NULL && object->phys_contiguous) {
                                /*
                                 * No page here means that the object we
@@ -1646,7 +1608,7 @@ vm_fault_page(
                         */
                        continue;
                }
-
+dont_look_for_page:
                /*
                 * We get here if the object has no pager, or an existence map 
                 * exists and indicates the page isn't present on the pager
@@ -1899,7 +1861,7 @@ vm_fault_page(
                         */
                        assert(copy_m->busy);
                        vm_page_insert(copy_m, object, offset);
-                       copy_m->dirty = TRUE;
+                       SET_PAGE_DIRTY(copy_m, TRUE);
 
                        m = copy_m;
                        /*
@@ -2111,17 +2073,80 @@ vm_fault_page(
                                vm_page_activate(copy_m);
                                vm_page_unlock_queues();
 
-                               copy_m->dirty = TRUE;
+                               SET_PAGE_DIRTY(copy_m, TRUE);
                                PAGE_WAKEUP_DONE(copy_m);
-                       } 
-                       else {
+
+                       } else if (copy_object->internal) {
+                               /*
+                                * For internal objects check with the pager to see
+                                * if the page already exists in the backing store.
+                                * If yes, then we can drop the copy page. If not,
+                                * then we'll activate it, mark it dirty and keep it
+                                * around.
+                                */
+                               
+                               kern_return_t kr = KERN_SUCCESS;
+
+                               memory_object_t copy_pager = copy_object->pager;
+                               assert(copy_pager != MEMORY_OBJECT_NULL);
+                               vm_object_paging_begin(copy_object);
+
+                               vm_object_unlock(copy_object);
+
+                               kr = memory_object_data_request(
+                                       copy_pager,
+                                       copy_offset + copy_object->paging_offset,
+                                       0, /* Only query the pager. */
+                                       VM_PROT_READ,
+                                       NULL);
+                               
+                               vm_object_lock(copy_object);
+
+                               vm_object_paging_end(copy_object);
+
+                               /*
+                                * Since we dropped the copy_object's lock,
+                                * check whether we'll have to deallocate 
+                                * the hard way.
+                                */
+                               if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) {
+                                       vm_object_unlock(copy_object);
+                                       vm_object_deallocate(copy_object);
+                                       vm_object_lock(object);
+
+                                       continue;
+                               }
+                               if (kr == KERN_SUCCESS) {
+                                       /*
+                                        * The pager has the page. We don't want to overwrite
+                                        * that page by sending this one out to the backing store.
+                                        * So we drop the copy page.
+                                        */
+                                       VM_PAGE_FREE(copy_m);
+
+                               } else {
+                                       /*
+                                        * The pager doesn't have the page. We'll keep this one
+                                        * around in the copy object. It might get sent out to 
+                                        * the backing store under memory pressure.      
+                                        */
+                                       vm_page_lockspin_queues();
+                                       assert(!m->cleaning);
+                                       vm_page_activate(copy_m);
+                                       vm_page_unlock_queues();
+
+                                       SET_PAGE_DIRTY(copy_m, TRUE);
+                                       PAGE_WAKEUP_DONE(copy_m);
+                               } 
+                       } else {
+                               
                                assert(copy_m->busy == TRUE);
                                assert(!m->cleaning);
 
                                /*
                                 * dirty is protected by the object lock
                                 */
-                               copy_m->dirty = TRUE;
+                               SET_PAGE_DIRTY(copy_m, TRUE);
 
                                /*
                                 * The page is already ready for pageout:
@@ -2159,6 +2184,7 @@ vm_fault_page(
                                 */
                                vm_object_lock(object);
                        }
+
                        /*
                         * Because we're pushing a page upward
                         * in the object tree, we must restart
@@ -2287,6 +2313,7 @@ vm_fault_enter(vm_page_t m,
               boolean_t change_wiring,
               boolean_t no_cache,
               boolean_t cs_bypass,
+              boolean_t *need_retry,
               int *type_of_fault)
 {
        kern_return_t   kr, pe_result;
@@ -2532,19 +2559,38 @@ vm_fault_enter(vm_page_t m,
                /* Prevent a deadlock by not
                 * holding the object lock if we need to wait for a page in
                 * pmap_enter() - <rdar://problem/7138958> */
-               PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, 0,
+               PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type, 0,
                                  wired, PMAP_OPTIONS_NOWAIT, pe_result);
 
                if(pe_result == KERN_RESOURCE_SHORTAGE) {
+
+                       if (need_retry) {
+                               /*
+                                * this will be non-null in the case where we hold the lock
+                                * on the top-object in this chain... we can't just drop
+                                * the lock on the object we're inserting the page into
+                                * and recall the PMAP_ENTER since we can still cause
+                                * a deadlock if one of the critical paths tries to 
+                                * acquire the lock on the top-object and we're blocked
+                                * in PMAP_ENTER waiting for memory... our only recourse
+                                * is to deal with it at a higher level where we can 
+                                * drop both locks.
+                                */
+                               *need_retry = TRUE;
+                               vm_pmap_enter_retried++;
+                               goto after_the_pmap_enter;
+                       }
                        /* The nonblocking version of pmap_enter did not succeed.
-                        * Use the blocking version instead. Requires marking
+                        * and we don't need to drop other locks and retry
+                        * at the level above us, so 
+                        * use the blocking version instead. Requires marking
                         * the page busy and unlocking the object */
                        boolean_t was_busy = m->busy;
                        m->busy = TRUE;
                        vm_object_unlock(m->object);
                        
-                       PMAP_ENTER(pmap, vaddr, m, prot, 0, wired);
-
+                       PMAP_ENTER(pmap, vaddr, m, prot, fault_type, 0, wired);
+                               
                        /* Take the object lock again. */
                        vm_object_lock(m->object);
                        
@@ -2582,7 +2628,7 @@ after_the_pmap_enter:
                        vm_page_deactivate(m);
                        vm_page_unlock_queues();
                } else {
-                       if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m) && !m->throttled) {
+                       if (((!m->active && !m->inactive) || m->clean_queue || no_cache) && !VM_PAGE_WIRED(m) && !m->throttled) {
 
                                if ( vm_page_local_q && !no_cache && (*type_of_fault == DBG_COW_FAULT || *type_of_fault == DBG_ZERO_FILL_FAULT) ) {
                                        struct vpl      *lq;
@@ -2632,27 +2678,35 @@ after_the_pmap_enter:
                                /*
                                 * test again now that we hold the page queue lock
                                 */
-                               if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m)) {
+                               if (!VM_PAGE_WIRED(m)) {
+                                       if (m->clean_queue) {
+                                               VM_PAGE_QUEUES_REMOVE(m);
 
-                                       /*
-                                        * If this is a no_cache mapping and the page has never been
-                                        * mapped before or was previously a no_cache page, then we
-                                        * want to leave pages in the speculative state so that they
-                                        * can be readily recycled if free memory runs low.  Otherwise
-                                        * the page is activated as normal. 
-                                        */
+                                               vm_pageout_cleaned_reactivated++;
+                                               vm_pageout_cleaned_fault_reactivated++;
+                                       }
 
-                                       if (no_cache && (!previously_pmapped || m->no_cache)) {
-                                               m->no_cache = TRUE;
+                                       if ((!m->active && !m->inactive) || no_cache) {
+                                               /*
+                                                * If this is a no_cache mapping and the page has never been
+                                                * mapped before or was previously a no_cache page, then we
+                                                * want to leave pages in the speculative state so that they
+                                                * can be readily recycled if free memory runs low.  Otherwise
+                                                * the page is activated as normal. 
+                                                */
 
-                                               if (!m->speculative) 
-                                                       vm_page_speculate(m, FALSE);
+                                               if (no_cache && (!previously_pmapped || m->no_cache)) {
+                                                       m->no_cache = TRUE;
 
-                                       } else if (!m->active && !m->inactive)
-                                               vm_page_activate(m);
+                                                       if (!m->speculative) 
+                                                               vm_page_speculate(m, FALSE);
 
-                               }
+                                               } else if (!m->active && !m->inactive) {
 
+                                                       vm_page_activate(m);
+                                               }
+                                       }
+                               }
                                vm_page_unlock_queues();
                        }
                }
@@ -2714,13 +2768,15 @@ vm_fault(
        vm_prot_t               original_fault_type;
        struct vm_object_fault_info fault_info;
        boolean_t               need_collapse = FALSE;
+       boolean_t               need_retry = FALSE;
        int                     object_lock_type = 0;
        int                     cur_object_lock_type;
        vm_object_t             top_object = VM_OBJECT_NULL;
        int                     throttle_delay;
 
 
-       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                     (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
                              (int)((uint64_t)vaddr >> 32),
                              (int)vaddr,
                              (map == kernel_map),
@@ -2728,7 +2784,8 @@ vm_fault(
                              0);
 
        if (get_preemption_level() != 0) {
-               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
+               KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                                     (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
                                      (int)((uint64_t)vaddr >> 32),
                                      (int)vaddr,
                                      KERN_FAILURE,
@@ -2782,6 +2839,7 @@ RetryFault:
        fault_info.stealth = FALSE;
        fault_info.io_sync = FALSE;
        fault_info.mark_zf_absent = FALSE;
+       fault_info.batch_pmap_op = FALSE;
 
        /*
         * If the page is wired, we must fault for the current protection
@@ -2941,6 +2999,45 @@ RetryFault:
                                kr = KERN_ABORTED;
                                goto done;
                        }
+                       if (m->laundry) {
+                               if (object != cur_object) {
+                                       if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
+                                               cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
+
+                                               vm_object_unlock(object);
+                                               vm_object_unlock(cur_object);
+
+                                               vm_map_unlock_read(map);
+                                               if (real_map != map)
+                                                       vm_map_unlock(real_map);
+
+                                               goto RetryFault;
+                                       }
+
+                               } else if (object_lock_type == OBJECT_LOCK_SHARED) {
+
+                                       object_lock_type = OBJECT_LOCK_EXCLUSIVE;
+
+                                       if (vm_object_lock_upgrade(object) == FALSE) {
+                                               /*
+                                                * couldn't upgrade, so explictly take the lock
+                                                * exclusively and go relookup the page since we
+                                                * will have dropped the object lock and
+                                                * a different thread could have inserted
+                                                * a page at this offset
+                                                * no need for a full retry since we're
+                                                * at the top level of the object chain
+                                                */
+                                               vm_object_lock(object);
+
+                                               continue;
+                                       }
+                               }
+                               m->pageout = FALSE;
+                               
+                               vm_pageout_steal_laundry(m, FALSE);
+                       }
+
                        if (m->phys_page == vm_page_guard_addr) {
                                /*
                                 * Guard page: let the slow path deal with it
@@ -3166,6 +3263,7 @@ FastPmapEnter:
                                                            change_wiring,
                                                            fault_info.no_cache,
                                                            fault_info.cs_bypass,
+                                                           (top_object != VM_OBJECT_NULL ? &need_retry : NULL),
                                                            &type_of_fault);
                                } else {
                                        kr = vm_fault_enter(m,
@@ -3177,6 +3275,7 @@ FastPmapEnter:
                                                            change_wiring,
                                                            fault_info.no_cache,
                                                            fault_info.cs_bypass,
+                                                           (top_object != VM_OBJECT_NULL ? &need_retry : NULL),
                                                            &type_of_fault);
                                }
 
@@ -3197,7 +3296,8 @@ FastPmapEnter:
                                if (need_collapse == TRUE)
                                        vm_object_collapse(object, offset, TRUE);
                                
-                               if (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT) {
+                               if (need_retry == FALSE &&
+                                   (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT)) {
                                        /*
                                         * evaluate access pattern and update state
                                         * vm_fault_deactivate_behind depends on the
@@ -3219,6 +3319,20 @@ FastPmapEnter:
                                if (real_map != map)
                                        vm_map_unlock(real_map);
 
+                               if (need_retry == TRUE) {
+                                       /*
+                                        * vm_fault_enter couldn't complete the PMAP_ENTER...
+                                        * at this point we don't hold any locks so it's safe
+                                        * to ask the pmap layer to expand the page table to
+                                        * accommodate this mapping... once expanded, we'll
+                                        * re-drive the fault which should result in vm_fault_enter
+                                        * being able to successfully enter the mapping this time around
+                                        */
+                                       (void)pmap_enter_options(pmap, vaddr, 0, 0, 0, 0, 0, PMAP_OPTIONS_NOENTER);
+                                       
+                                       need_retry = FALSE;
+                                       goto RetryFault;
+                               }
                                goto done;
                        }
                        /*
@@ -3307,7 +3421,7 @@ FastPmapEnter:
                         */
                        vm_page_copy(cur_m, m);
                        vm_page_insert(m, object, offset);
-                       m->dirty = TRUE;
+                       SET_PAGE_DIRTY(m, FALSE);
 
                        /*
                         * Now cope with the source page and object
@@ -3779,6 +3893,7 @@ handle_copy_delay:
                                            change_wiring,
                                            fault_info.no_cache,
                                            fault_info.cs_bypass,
+                                           NULL,
                                            &type_of_fault);
                } else {
                        kr = vm_fault_enter(m,
@@ -3790,6 +3905,7 @@ handle_copy_delay:
                                            change_wiring,
                                            fault_info.no_cache,
                                            fault_info.cs_bypass,
+                                           NULL,
                                            &type_of_fault);
                }
                if (kr != KERN_SUCCESS) {
@@ -3926,7 +4042,8 @@ handle_copy_delay:
 done:
        thread_interrupt_level(interruptible_state);
 
-       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
+                             (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
                              (int)((uint64_t)vaddr >> 32),
                              (int)vaddr,
                              kr,
@@ -4041,6 +4158,7 @@ vm_fault_unwire(
        fault_info.io_sync = FALSE;
        fault_info.cs_bypass = FALSE;
        fault_info.mark_zf_absent = FALSE;
+       fault_info.batch_pmap_op = FALSE;
 
        /*
         *      Since the pages are wired down, we must be able to
@@ -4318,6 +4436,7 @@ vm_fault_wire_fast(
                            FALSE,
                            FALSE,
                            FALSE,
+                           NULL,
                            &type_of_fault);
 
 done:
@@ -4453,6 +4572,7 @@ vm_fault_copy(
        fault_info_src.io_sync = FALSE;
        fault_info_src.cs_bypass = FALSE;
        fault_info_src.mark_zf_absent = FALSE;
+       fault_info_src.batch_pmap_op = FALSE;
 
        fault_info_dst.interruptible = interruptible;
        fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL;
@@ -4464,6 +4584,7 @@ vm_fault_copy(
        fault_info_dst.io_sync = FALSE;
        fault_info_dst.cs_bypass = FALSE;
        fault_info_dst.mark_zf_absent = FALSE;
+       fault_info_dst.batch_pmap_op = FALSE;
 
        do { /* while (amount_left > 0) */
                /*
@@ -4689,7 +4810,7 @@ vm_fault_copy(
                                                  (vm_size_t)part_size);
                                if(!dst_page->dirty){
                                        vm_object_lock(dst_object);
-                                       dst_page->dirty = TRUE;
+                                       SET_PAGE_DIRTY(dst_page, TRUE);
                                        vm_object_unlock(dst_page->object);
                                }
 
@@ -4700,10 +4821,13 @@ vm_fault_copy(
                        if (result_page == VM_PAGE_NULL)
                                vm_page_zero_fill(dst_page);
                        else{
+                               vm_object_lock(result_page->object);
                                vm_page_copy(result_page, dst_page);
+                               vm_object_unlock(result_page->object);
+
                                if(!dst_page->dirty){
                                        vm_object_lock(dst_object);
-                                       dst_page->dirty = TRUE;
+                                       SET_PAGE_DIRTY(dst_page, TRUE);
                                        vm_object_unlock(dst_page->object);
                                }
                        }
@@ -4892,6 +5016,7 @@ vm_page_validate_cs_mapped(
 
        /* verify the SHA1 hash for this page */
        validated = cs_validate_page(blobs,
+                                    pager,
                                     offset + object->paging_offset,
                                     (const void *)kaddr,
                                     &tainted);
index 6d90a84b013b75d2ec3c3581d55858cc5ba829ed..878d140f13fa0e297d08a4cb889f417c968bece6 100644 (file)
@@ -164,6 +164,7 @@ extern kern_return_t vm_fault_enter(
        boolean_t change_wiring,
        boolean_t no_cache,
        boolean_t cs_bypass,
+       boolean_t *need_retry,
        int *type_of_fault);
 
 #endif /* MACH_KERNEL_PRIVATE */
index cf29c82f6634cc73ccba7a12f06c8dd5d68c0f00..59af43c2692d5e7402c14e58a55493834baef0d5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -64,6 +64,7 @@
  */
 
 #include <mach/machine/vm_types.h>
+#include <mach/vm_map.h>
 #include <kern/zalloc.h>
 #include <kern/kalloc.h>
 #include <kern/kext_alloc.h>
@@ -88,8 +89,12 @@ const vm_offset_t vm_min_kernel_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
 const vm_offset_t vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS;
 
 boolean_t vm_kernel_ready = FALSE;
+boolean_t kmem_ready = FALSE;
 boolean_t zlog_ready = FALSE;
 
+vm_offset_t kmapoff_kaddr;
+unsigned int kmapoff_pgcnt;
+
 /*
  *     vm_mem_bootstrap initializes the virtual memory system.
  *     This is done only by the first cpu up.
@@ -107,7 +112,7 @@ vm_mem_bootstrap(void)
         *      From here on, all physical memory is accounted for,
         *      and we use only virtual addresses.
         */
-#define vm_mem_bootstrap_kprintf(x)
+#define vm_mem_bootstrap_kprintf(x) /* kprintf(x) */
 
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_page_bootstrap\n"));
        vm_page_bootstrap(&start, &end);
@@ -124,11 +129,25 @@ vm_mem_bootstrap(void)
 
        vm_kernel_ready = TRUE;
 
-       vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_map_int\n"));
+       vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_map_init\n"));
        vm_map_init();
 
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling kmem_init\n"));
        kmem_init(start, end);
+       kmem_ready = TRUE;
+       /*
+        * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
+        * stack addresses. (With a 4K page and 9 bits of randomness, this
+        * eats at most 2M of VA from the map.)
+        */
+       if (!PE_parse_boot_argn("kmapoff", &kmapoff_pgcnt,
+           sizeof (kmapoff_pgcnt)))
+               kmapoff_pgcnt = early_random() & 0x1ff; /* 9 bits */
+
+       if (kmapoff_pgcnt > 0 &&
+           vm_allocate(kernel_map, &kmapoff_kaddr,
+           kmapoff_pgcnt * PAGE_SIZE_64, VM_FLAGS_ANYWHERE) != KERN_SUCCESS)
+               panic("cannot vm_allocate %u kernel_map pages", kmapoff_pgcnt);
 
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling pmap_init\n"));
        pmap_init();
@@ -158,21 +177,27 @@ vm_mem_bootstrap(void)
 
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling zone_init\n"));
        assert((vm_size_t) zsize == zsize);
-       zone_init((vm_size_t) zsize);                                           /* Allocate address space for zones */
-       
+       zone_init((vm_size_t) zsize);   /* Allocate address space for zones */
+
+       /* The vm_page_zone must be created prior to kalloc_init; that
+        * routine can trigger zalloc()s (for e.g. mutex statistic structure
+        * initialization). The vm_page_zone must exist to saisfy fictitious
+        * page allocations (which are used for guard pages by the guard
+        * mode zone allocator).
+        */
+       vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_page_module_init\n"));
+       vm_page_module_init();
+
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling kalloc_init\n"));
        kalloc_init();
 
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_fault_init\n"));
        vm_fault_init();
 
-       vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_page_module_init\n"));
-       vm_page_module_init();
-
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling memory_manager_default_init\n"));
        memory_manager_default_init();
 
-       vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling meory_object_control_bootstrap\n"));
+       vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling memory_object_control_bootstrap\n"));
        memory_object_control_bootstrap();
 
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling device_pager_bootstrap\n"));
index acd7d2a82bcd0f45035ac1248015d3fe0e4feeec..e35f70daf6339938ce091b8857b4a7d12deb6d03 100644 (file)
@@ -126,15 +126,16 @@ kmem_alloc_contig(
 
        if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) 
                return KERN_INVALID_ARGUMENT;
+
+       map_size = vm_map_round_page(size);
+       map_mask = (vm_map_offset_t)mask;
        
-       if (size == 0) {
+       /* Check for zero allocation size (either directly or via overflow) */
+       if (map_size == 0) {
                *addrp = 0;
                return KERN_INVALID_ARGUMENT;
        }
 
-       map_size = vm_map_round_page(size);
-       map_mask = (vm_map_offset_t)mask;
-
        /*
         *      Allocate a new object (if necessary) and the reference we
         *      will be donating to the map entry.  We must do this before
@@ -244,19 +245,21 @@ kernel_memory_allocate(
        int                     wired_page_count = 0;
        int                     i;
        int                     vm_alloc_flags;
+       vm_prot_t               kma_prot;
 
        if (! vm_kernel_ready) {
                panic("kernel_memory_allocate: VM is not ready");
        }
 
-       if (size == 0) {
-               *addrp = 0;
-               return KERN_INVALID_ARGUMENT;
-       }
        map_size = vm_map_round_page(size);
        map_mask = (vm_map_offset_t) mask;
        vm_alloc_flags = 0;
 
+       /* Check for zero allocation size (either directly or via overflow) */
+       if (map_size == 0) {
+               *addrp = 0;
+               return KERN_INVALID_ARGUMENT;
+       }
 
        /*
         * limit the size of a single extent of wired memory
@@ -406,6 +409,9 @@ kernel_memory_allocate(
                mem->busy = FALSE;
                pg_offset += PAGE_SIZE_64;
        }
+
+       kma_prot = VM_PROT_READ | VM_PROT_WRITE;
+
        for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
                if (wired_page_list == NULL)
                        panic("kernel_memory_allocate: wired_page_list == NULL");
@@ -422,7 +428,7 @@ kernel_memory_allocate(
                mem->wpmapped = TRUE;
 
                PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, 
-                          VM_PROT_READ | VM_PROT_WRITE, 0, TRUE);
+                          kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE);
 
                if (flags & KMA_NOENCRYPT) {
                        bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
@@ -812,7 +818,7 @@ kmem_remap_pages(
            mem->pmapped = TRUE;
            mem->wpmapped = TRUE;
 
-           PMAP_ENTER(kernel_pmap, map_start, mem, protection, 0, TRUE);
+           PMAP_ENTER(kernel_pmap, map_start, mem, protection, VM_PROT_NONE, 0, TRUE);
 
            map_start += PAGE_SIZE;
            offset += PAGE_SIZE;
index 1c03bac0cc44be29cc3510e00696754fc93b1197..c4c8696d2615bb3f2b1f0be1453f8a46489991b9 100644 (file)
@@ -88,6 +88,7 @@ extern kern_return_t  kernel_memory_allocate(
 #define KMA_GUARD_LAST 0x20
 #define KMA_PERMANENT  0x40
 #define KMA_NOENCRYPT  0x80
+#define KMA_KSTACK     0x100
 
 extern kern_return_t kmem_alloc_contig(
                                vm_map_t        map,
index 0ce07a4d9243d29bd08b5e03e27695a0273c7402..178a1cae06ada1f14cf517828e64c24fcae32d16 100644 (file)
@@ -94,7 +94,6 @@
 #include <ipc/ipc_port.h>
 #include <kern/sched_prim.h>
 #include <kern/misc_protos.h>
-#include <machine/db_machdep.h>
 #include <kern/xpr.h>
 
 #include <mach/vm_map_server.h>
 #include <vm/vm_shared_region.h>
 #include <vm/vm_map_store.h>
 
+extern u_int32_t random(void); /* from <libkern/libkern.h> */
 /* Internal prototypes
  */
 
@@ -288,11 +288,6 @@ static kern_return_t       vm_map_can_reuse(
        vm_map_offset_t start,
        vm_map_offset_t end);
 
-#if CONFIG_FREEZE
-struct default_freezer_table;
-__private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
-__private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);  
-#endif
 
 /*
  * Macros to copy a vm_map_entry. We must be careful to correctly
@@ -303,6 +298,7 @@ __private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);
  * wire count; it's used for map splitting and zone changing in
  * vm_map_copyout.
  */
+
 #define vm_map_entry_copy(NEW,OLD)     \
 MACRO_BEGIN                            \
 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;  \
@@ -313,6 +309,7 @@ boolean_t _vmec_reserved = (NEW)->from_reserved_zone;       \
        (NEW)->wired_count = 0;         \
        (NEW)->user_wired_count = 0;    \
        (NEW)->permanent = FALSE;       \
+       (NEW)->used_for_jit = FALSE;    \
        (NEW)->from_reserved_zone = _vmec_reserved;                     \
 MACRO_END
 
@@ -623,6 +620,8 @@ vm_map_init(
        void)
 {
        vm_size_t entry_zone_alloc_size;
+       const char *mez_name = "VM map entries";
+
        vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
                            PAGE_SIZE, "maps");
        zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
@@ -631,12 +630,12 @@ vm_map_init(
 #else
        entry_zone_alloc_size = PAGE_SIZE * 6;
 #endif
-       
        vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
                                  1024*1024, entry_zone_alloc_size,
-                                 "VM map entries");
+                                 mez_name);
        zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
        zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
+       zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 
        vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
                                   kentry_data_size * 64, kentry_data_size,
@@ -659,6 +658,7 @@ vm_map_init(
        zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
        zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
        zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
+       zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 
        zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
        zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
@@ -666,6 +666,10 @@ vm_map_init(
        lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
        lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
        lck_attr_setdefault(&vm_map_lck_attr);  
+
+#if CONFIG_FREEZE
+       default_freezer_init();
+#endif /* CONFIG_FREEZE */
 }
 
 void
@@ -688,6 +692,15 @@ vm_map_steal_memory(
 #else
        kentry_initial_pages = 6;
 #endif
+
+#if CONFIG_GZALLOC
+       /* If using the guard allocator, reserve more memory for the kernel
+        * reserved map entry pool.
+       */
+       if (gzalloc_enabled())
+               kentry_initial_pages *= 1024;
+#endif
+
        kentry_data_size = kentry_initial_pages * PAGE_SIZE;
        kentry_data = pmap_steal_memory(kentry_data_size);
 }
@@ -737,7 +750,7 @@ vm_map_create(
        result->max_offset = max;
        result->wiring_required = FALSE;
        result->no_zero_fill = FALSE;
-       result->mapped = FALSE;
+       result->mapped_in_other_pmaps = FALSE;
        result->wait_for_space = FALSE;
        result->switch_protect = FALSE;
        result->disable_vmentry_reuse = FALSE;
@@ -748,7 +761,7 @@ vm_map_create(
        result->color_rr = (color_seed++) & vm_color_mask;
        result->jit_entry_exists = FALSE;
 #if CONFIG_FREEZE
-       result->default_freezer_toc = NULL;
+       result->default_freezer_handle = NULL;
 #endif
        vm_map_lock_init(result);
        lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
@@ -798,7 +811,9 @@ _vm_map_entry_create(
        entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
 
        vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
-
+#if    MAP_ENTRY_CREATION_DEBUG
+       fastbacktrace(&entry->vme_bt[0], (sizeof(entry->vme_bt)/sizeof(uintptr_t)));
+#endif
        return(entry);
 }
 
@@ -812,7 +827,6 @@ _vm_map_entry_create(
  *     of the stores
  */
 #define        vm_map_entry_dispose(map, entry)                        \
-       vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);  \
        _vm_map_entry_dispose(&(map)->hdr, (entry))
 
 #define        vm_map_copy_entry_dispose(map, entry) \
@@ -949,8 +963,9 @@ vm_map_destroy(
                             flags, VM_MAP_NULL);
 
 #if CONFIG_FREEZE
-       if (map->default_freezer_toc){
-               default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
+       if (map->default_freezer_handle) {
+               default_freezer_handle_deallocate(map->default_freezer_handle);
+               map->default_freezer_handle = NULL;
        }
 #endif
        vm_map_unlock(map);
@@ -1321,6 +1336,8 @@ vm_map_find_space(
        new_entry->permanent = FALSE;
        new_entry->superpage_size = 0;
 
+       new_entry->used_for_jit = 0;
+
        new_entry->alias = 0;
        new_entry->zero_wired_pages = FALSE;
 
@@ -1400,7 +1417,7 @@ vm_map_pmap_enter(
                }
                type_of_fault = DBG_CACHE_HIT_FAULT;
                kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
-                                   VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
+                                   VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
                                    &type_of_fault);
 
                vm_object_unlock(object);
@@ -1445,6 +1462,58 @@ boolean_t vm_map_pmap_is_empty(
 #endif /* MACHINE_PMAP_IS_EMPTY */
 }
 
+#define MAX_TRIES_TO_GET_RANDOM_ADDRESS        1000
+kern_return_t
+vm_map_random_address_for_size(
+       vm_map_t        map,
+       vm_map_offset_t *address,
+       vm_map_size_t   size)
+{
+       kern_return_t   kr = KERN_SUCCESS;
+       int             tries = 0;
+       vm_map_offset_t random_addr = 0;
+       vm_map_offset_t hole_end;
+
+       vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
+       vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
+       vm_map_size_t   vm_hole_size = 0;
+       vm_map_size_t   addr_space_size;
+
+       addr_space_size = vm_map_max(map) - vm_map_min(map);
+
+       assert(page_aligned(size));
+
+       while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
+               random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
+               random_addr = trunc_page(vm_map_min(map) +
+                                        (random_addr % addr_space_size));
+
+               if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
+                       if (prev_entry == vm_map_to_entry(map)) {
+                               next_entry = vm_map_first_entry(map);
+                       } else {
+                               next_entry = prev_entry->vme_next;
+                       }
+                       if (next_entry == vm_map_to_entry(map)) {
+                               hole_end = vm_map_max(map);
+                       } else {
+                               hole_end = next_entry->vme_start;
+                       }
+                       vm_hole_size = hole_end - random_addr;
+                       if (vm_hole_size >= size) {
+                               *address = random_addr;
+                               break;
+                       }
+               }
+               tries++;
+       }
+
+       if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
+               kr = KERN_NO_SPACE;
+       }
+       return kr;
+}
+
 /*
  *     Routine:        vm_map_enter
  *
@@ -1489,6 +1558,7 @@ vm_map_enter(
        boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
        boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
        boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
+       boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
        unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
        char                    alias;
        vm_map_offset_t         effective_min_offset, effective_max_offset;
@@ -1522,7 +1592,7 @@ vm_map_enter(
 
 #if CONFIG_EMBEDDED
        if (cur_protection & VM_PROT_WRITE){
-               if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
+               if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
                        printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
                        cur_protection &= ~VM_PROT_EXECUTE;
                }
@@ -1616,11 +1686,22 @@ StartAgain: ;
                vm_map_lock(map);
                map_locked = TRUE;
                
-               if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
-                       result = KERN_INVALID_ARGUMENT;
-                       goto BailOut;
+               if (entry_for_jit) {
+                       if (map->jit_entry_exists) {
+                               result = KERN_INVALID_ARGUMENT;
+                               goto BailOut;
+                       }
+                       /*
+                        * Get a random start address.
+                        */
+                       result = vm_map_random_address_for_size(map, address, size);
+                       if (result != KERN_SUCCESS) {
+                               goto BailOut;
+                       }
+                       start = *address;
                }
 
+
                /*
                 *      Calculate the first possible address.
                 */
@@ -1665,8 +1746,10 @@ StartAgain: ;
 
                        if (entry == NULL) {
                                vm_map_entry_t  tmp_entry;
-                               if (vm_map_lookup_entry(map, start, &tmp_entry))
+                               if (vm_map_lookup_entry(map, start, &tmp_entry)) {
+                                       assert(!entry_for_jit);
                                        start = tmp_entry->vme_end;
+                               }
                                entry = tmp_entry;
                        }
                }
@@ -1872,11 +1955,13 @@ StartAgain: ;
         *      semantics.
         */
 
-       if (purgable) {
+       if (purgable || entry_for_jit) {
                if (object == VM_OBJECT_NULL) {
                        object = vm_object_allocate(size);
                        object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
-                       object->purgable = VM_PURGABLE_NONVOLATILE;
+                       if (purgable) {
+                               object->purgable = VM_PURGABLE_NONVOLATILE;
+                       }
                        offset = (vm_object_offset_t)0;
                }
        } else if ((is_submap == FALSE) &&
@@ -1951,11 +2036,11 @@ StartAgain: ;
                                                        FALSE, FALSE,
                                                        cur_protection, max_protection,
                                                        VM_BEHAVIOR_DEFAULT,
-                                                       (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance, 
+                                                       (entry_for_jit)? VM_INHERIT_NONE: inheritance, 
                                                        0, no_cache,
                                                        permanent, superpage_size);
                        new_entry->alias = alias;
-                       if (flags & VM_FLAGS_MAP_JIT){
+                       if (entry_for_jit){
                                if (!(map->jit_entry_exists)){
                                        new_entry->used_for_jit = TRUE;
                                        map->jit_entry_exists = TRUE;
@@ -1973,8 +2058,10 @@ StartAgain: ;
                                use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
        #ifndef NO_NESTED_PMAP 
                                if (use_pmap && submap->pmap == NULL) {
+                                       ledger_t ledger = map->pmap->ledger;
                                        /* we need a sub pmap to nest... */
-                                       submap->pmap = pmap_create(0, submap_is_64bit);
+                                       submap->pmap = pmap_create(ledger, 0,
+                                           submap_is_64bit);
                                        if (submap->pmap == NULL) {
                                                /* let's proceed without nesting... */
                                        }
@@ -2048,9 +2135,9 @@ StartAgain: ;
         */
        if ((map->wiring_required)||(superpage_size)) {
                pmap_empty = FALSE; /* pmap won't be empty */
-               result = vm_map_wire(map, start, end,
+               kr = vm_map_wire(map, start, end,
                                     new_entry->protection, TRUE);
-               RETURN(result);
+               RETURN(kr);
        }
 
        if ((object != VM_OBJECT_NULL) &&
@@ -2321,16 +2408,20 @@ vm_map_enter_mem_object(
                                 * once it's been set and if we race, we'll
                                 * just end up setting it twice, which is OK.
                                 */
-                               if (submap->mapped == FALSE) {
+                               if (submap->mapped_in_other_pmaps == FALSE &&
+                                   vm_map_pmap(submap) != PMAP_NULL &&
+                                   vm_map_pmap(submap) !=
+                                   vm_map_pmap(target_map)) {
                                        /*
-                                        * This submap has never been mapped.
-                                        * Set its "mapped" flag now that it
-                                        * has been mapped.
-                                        * This happens only for the first ever
-                                        * mapping of a "submap".
+                                        * This submap is being mapped in a map
+                                        * that uses a different pmap.
+                                        * Set its "mapped_in_other_pmaps" flag
+                                        * to indicate that we now need to 
+                                        * remove mappings from all pmaps rather
+                                        * than just the submap's pmap.
                                         */
                                        vm_map_lock(submap);
-                                       submap->mapped = TRUE;
+                                       submap->mapped_in_other_pmaps = TRUE;
                                        vm_map_unlock(submap);
                                }
                                *address = map_addr;
@@ -2370,6 +2461,8 @@ vm_map_enter_mem_object(
                                wimg_mode = VM_WIMG_IO;
                        } else if (access == MAP_MEM_COPYBACK) {
                                wimg_mode = VM_WIMG_USE_DEFAULT;
+                       } else if (access == MAP_MEM_INNERWBACK) {
+                               wimg_mode = VM_WIMG_INNERWBACK;
                        } else if (access == MAP_MEM_WTHRU) {
                                wimg_mode = VM_WIMG_WTHRU;
                        } else if (access == MAP_MEM_WCOMB) {
@@ -2731,14 +2824,11 @@ vm_map_enter_cpm(
        kern_return_t           kr;
        vm_map_offset_t         va, start, end, offset;
 #if    MACH_ASSERT
-       vm_map_offset_t         prev_addr;
+       vm_map_offset_t         prev_addr = 0;
 #endif /* MACH_ASSERT */
 
        boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
 
-       if (!vm_allocate_cpm_enabled)
-               return KERN_FAILURE;
-
        if (size == 0) {
                *addr = 0;
                return KERN_SUCCESS;
@@ -2763,7 +2853,7 @@ vm_map_enter_cpm(
        cpm_obj = vm_object_allocate((vm_object_size_t)size);
        assert(cpm_obj != VM_OBJECT_NULL);
        assert(cpm_obj->internal);
-       assert(cpm_obj->size == (vm_object_size_t)size);
+       assert(cpm_obj->vo_size == (vm_object_size_t)size);
        assert(cpm_obj->can_persist == FALSE);
        assert(cpm_obj->pager_created == FALSE);
        assert(cpm_obj->pageout == FALSE);
@@ -2873,7 +2963,7 @@ vm_map_enter_cpm(
                type_of_fault = DBG_ZERO_FILL_FAULT;
 
                vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
-                              VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
+                              VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
                               &type_of_fault);
 
                vm_object_unlock(cpm_obj);
@@ -2888,8 +2978,8 @@ vm_map_enter_cpm(
                m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
                vm_object_unlock(cpm_obj);
                if (m == VM_PAGE_NULL)
-                       panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
-                             cpm_obj, offset);
+                       panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
+                             cpm_obj, (uint64_t)offset);
                assert(m->tabled);
                assert(!m->busy);
                assert(!m->wanted);
@@ -2898,15 +2988,15 @@ vm_map_enter_cpm(
                assert(!m->absent);
                assert(!m->error);
                assert(!m->cleaning);
+               assert(!m->laundry);
                assert(!m->precious);
                assert(!m->clustered);
                if (offset != 0) {
                        if (m->phys_page != prev_addr + 1) {
-                               printf("start 0x%x end 0x%x va 0x%x\n",
-                                      start, end, va);
-                               printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
-                               printf("m 0x%x prev_address 0x%x\n", m,
-                                      prev_addr);
+                               printf("start 0x%llx end 0x%llx va 0x%llx\n",
+                                      (uint64_t)start, (uint64_t)end, (uint64_t)va);
+                               printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
+                               printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
                                panic("vm_allocate_cpm:  pages not contig!");
                        }
                }
@@ -2995,7 +3085,7 @@ vm_map_clip_unnest(
        pmap_unnest(map->pmap,
                    entry->vme_start,
                    entry->vme_end - entry->vme_start);
-       if ((map->mapped) && (map->ref_count)) {
+       if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
                /* clean up parent map/maps */
                vm_map_submap_pmap_clean(
                        map, entry->vme_start,
@@ -3004,6 +3094,9 @@ vm_map_clip_unnest(
                        entry->offset);
        }
        entry->use_pmap = FALSE;
+       if (entry->alias == VM_MEMORY_SHARED_PMAP) {
+               entry->alias = VM_MEMORY_UNSHARED_PMAP;
+       }
 }
 #endif /* NO_NESTED_PMAP */
 
@@ -3327,13 +3420,27 @@ vm_map_submap(
                entry->is_sub_map = TRUE;
                entry->object.sub_map = submap;
                vm_map_reference(submap);
-               submap->mapped = TRUE;
+               if (submap->mapped_in_other_pmaps == FALSE &&
+                   vm_map_pmap(submap) != PMAP_NULL &&
+                   vm_map_pmap(submap) != vm_map_pmap(map)) {
+                       /*
+                        * This submap is being mapped in a map
+                        * that uses a different pmap.
+                        * Set its "mapped_in_other_pmaps" flag
+                        * to indicate that we now need to 
+                        * remove mappings from all pmaps rather
+                        * than just the submap's pmap.
+                        */
+                       submap->mapped_in_other_pmaps = TRUE;
+               }
 
 #ifndef NO_NESTED_PMAP
                if (use_pmap) {
                        /* nest if platform code will allow */
                        if(submap->pmap == NULL) {
-                               submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
+                               ledger_t ledger = map->pmap->ledger;
+                               submap->pmap = pmap_create(ledger,
+                                               (vm_map_size_t) 0, FALSE);
                                if(submap->pmap == PMAP_NULL) {
                                        vm_map_unlock(map);
                                        return(KERN_NO_SPACE);
@@ -3948,11 +4055,11 @@ vm_map_wire_nested(
                                                      s, user_wire);
                                        return(KERN_FAILURE);
                                }
+                               vm_object_unlock(object);
                                if(real_map != lookup_map)
                                        vm_map_unlock(real_map);
                                vm_map_unlock_read(lookup_map);
                                vm_map_lock(map);
-                               vm_object_unlock(object);
 
                                /* we unlocked, so must re-lookup */
                                if (!vm_map_lookup_entry(map, 
@@ -4667,7 +4774,7 @@ vm_map_submap_pmap_clean(
                                entry->offset);
                } else {
 
-                       if((map->mapped) && (map->ref_count)
+                       if((map->mapped_in_other_pmaps) && (map->ref_count)
                           && (entry->object.vm_object != NULL)) {
                                vm_object_pmap_protect(
                                        entry->object.vm_object,
@@ -4700,7 +4807,7 @@ vm_map_submap_pmap_clean(
                                entry->object.sub_map,
                                entry->offset);
                } else {
-                       if((map->mapped) && (map->ref_count)
+                       if((map->mapped_in_other_pmaps) && (map->ref_count)
                           && (entry->object.vm_object != NULL)) {
                                vm_object_pmap_protect(
                                        entry->object.vm_object,
@@ -5077,7 +5184,7 @@ vm_map_delete(
                                            (addr64_t)entry->vme_start,
                                            entry->vme_end - entry->vme_start);
 #endif /* NO_NESTED_PMAP */
-                               if ((map->mapped) && (map->ref_count)) {
+                               if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
                                        /* clean up parent map/maps */
                                        vm_map_submap_pmap_clean(
                                                map, entry->vme_start,
@@ -5093,7 +5200,7 @@ vm_map_delete(
                        }
                } else if (entry->object.vm_object != kernel_object) {
                        object = entry->object.vm_object;
-                       if((map->mapped) && (map->ref_count)) {
+                       if((map->mapped_in_other_pmaps) && (map->ref_count)) {
                                vm_object_pmap_protect(
                                        object, entry->offset,
                                        entry->vme_end - entry->vme_start,
@@ -6774,7 +6881,7 @@ vm_map_copy_overwrite_aligned(
                                                            (addr64_t)entry->vme_start,
                                                            entry->vme_end - entry->vme_start);
 #endif /* NO_NESTED_PMAP */
-                                               if(dst_map->mapped) {
+                                               if(dst_map->mapped_in_other_pmaps) {
                                                        /* clean up parent */
                                                        /* map/maps */
                                                        vm_map_submap_pmap_clean(
@@ -6793,7 +6900,7 @@ vm_map_copy_overwrite_aligned(
                                        vm_map_deallocate(
                                                entry->object.sub_map);
                                } else {
-                                       if(dst_map->mapped) {
+                                       if(dst_map->mapped_in_other_pmaps) {
                                                vm_object_pmap_protect(
                                                        entry->object.vm_object,
                                                        entry->offset,
@@ -7377,7 +7484,7 @@ StartAgain: ;
                                type_of_fault = DBG_CACHE_HIT_FAULT;
 
                                vm_fault_enter(m, dst_map->pmap, va, prot, prot,
-                                              VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
+                                              VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
                                               &type_of_fault);
 
                                vm_object_unlock(object);
@@ -8212,7 +8319,7 @@ vm_map_fork_share(
                        if (override_nx(old_map, old_entry->alias) && prot)
                                prot |= VM_PROT_EXECUTE;
 
-                       if (old_map->mapped) {
+                       if (old_map->mapped_in_other_pmaps) {
                                vm_object_pmap_protect(
                                        old_entry->object.vm_object,
                                        old_entry->offset,
@@ -8374,6 +8481,7 @@ vm_map_fork_copy(
  */
 vm_map_t
 vm_map_fork(
+       ledger_t        ledger,
        vm_map_t        old_map)
 {
        pmap_t          new_pmap;
@@ -8384,11 +8492,11 @@ vm_map_fork(
        boolean_t       src_needs_copy;
        boolean_t       new_entry_needs_copy;
 
-       new_pmap = pmap_create((vm_map_size_t) 0,
+       new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
 #if defined(__i386__) || defined(__x86_64__)
                               old_map->pmap->pm_task_map != TASK_MAP_32BIT
 #else
-                              0
+#error Unknown architecture.
 #endif
                               );
 #if defined(__i386__)
@@ -8469,7 +8577,7 @@ vm_map_fork(
                                        (old_entry->vme_end -
                                         old_entry->vme_start),
                                        ((old_entry->is_shared 
-                                         || old_map->mapped)
+                                         || old_map->mapped_in_other_pmaps)
                                         ? PMAP_NULL :
                                         old_map->pmap),
                                        old_entry->vme_start,
@@ -8791,7 +8899,7 @@ submap_recurse:
 
                                prot = submap_entry->protection & ~VM_PROT_WRITE;
 
-                               if (override_nx(map, submap_entry->alias) && prot)
+                               if (override_nx(old_map, submap_entry->alias) && prot)
                                        prot |= VM_PROT_EXECUTE;
 
                                vm_object_pmap_protect(
@@ -8800,7 +8908,7 @@ submap_recurse:
                                        submap_entry->vme_end - 
                                        submap_entry->vme_start,
                                        (submap_entry->is_shared 
-                                        || map->mapped) ?
+                                        || map->mapped_in_other_pmaps) ?
                                        PMAP_NULL : map->pmap,
                                        submap_entry->vme_start,
                                        prot);
@@ -8912,7 +9020,7 @@ submap_recurse:
 
        prot = entry->protection;
 
-       if (override_nx(map, entry->alias) && prot) {
+       if (override_nx(old_map, entry->alias) && prot) {
                /*
                 * HACK -- if not a stack, then allow execution
                 */
@@ -9031,6 +9139,7 @@ submap_recurse:
                fault_info->io_sync = FALSE;
                fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
                fault_info->mark_zf_absent = FALSE;
+               fault_info->batch_pmap_op = FALSE;
        }
 
        /*
@@ -10105,7 +10214,7 @@ vm_map_simplify_entry(
            (prev_entry->is_shared == FALSE) &&
            (this_entry->is_shared == FALSE)
                ) {
-               _vm_map_store_entry_unlink(&map->hdr, prev_entry);
+               vm_map_store_entry_unlink(map, prev_entry);
                assert(prev_entry->vme_start < this_entry->vme_end);
                this_entry->vme_start = prev_entry->vme_start;
                this_entry->offset = prev_entry->offset;
@@ -10451,6 +10560,7 @@ vm_map_willneed(
        fault_info.io_sync = FALSE;
        fault_info.cs_bypass = FALSE;
        fault_info.mark_zf_absent = FALSE;
+       fault_info.batch_pmap_op = FALSE;
 
        /*
         * The MADV_WILLNEED operation doesn't require any changes to the
@@ -10616,7 +10726,22 @@ vm_map_entry_is_reusable(
        if (object == VM_OBJECT_NULL) {
                return TRUE;
        }
-       if (object->ref_count == 1 &&
+       if (
+#if 0
+               /*
+                * Let's proceed even if the VM object is potentially
+                * shared.
+                * We check for this later when processing the actual
+                * VM pages, so the contents will be safe if shared.
+                * 
+                * But we can still mark this memory region as "reusable" to
+                * acknowledge that the caller did let us know that the memory
+                * could be re-used and should not be penalized for holding
+                * on to it.  This allows its "resident size" to not include
+                * the reusable range.
+                */
+           object->ref_count == 1 &&
+#endif
            object->wired_page_count == 0 &&
            object->copy == VM_OBJECT_NULL &&
            object->shadow == VM_OBJECT_NULL &&
@@ -10864,300 +10989,6 @@ vm_map_can_reuse(
 }
 
 
-
-#include <mach_kdb.h>
-#if    MACH_KDB
-#include <ddb/db_output.h>
-#include <vm/vm_print.h>
-
-#define        printf  db_printf
-
-/*
- * Forward declarations for internal functions.
- */
-extern void vm_map_links_print(
-       struct vm_map_links     *links);
-
-extern void vm_map_header_print(
-       struct vm_map_header    *header);
-
-extern void vm_map_entry_print(
-       vm_map_entry_t          entry);
-
-extern void vm_follow_entry(
-       vm_map_entry_t          entry);
-
-extern void vm_follow_map(
-       vm_map_t                map);
-
-/*
- *     vm_map_links_print:     [ debug ]
- */
-void
-vm_map_links_print(
-       struct vm_map_links     *links)
-{
-       iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
-               links->prev,
-               links->next,
-               (unsigned long long)links->start,
-               (unsigned long long)links->end);
-}
-
-/*
- *     vm_map_header_print:    [ debug ]
- */
-void
-vm_map_header_print(
-       struct vm_map_header    *header)
-{
-       vm_map_links_print(&header->links);
-       iprintf("nentries = %08X, %sentries_pageable\n",
-               header->nentries,
-               (header->entries_pageable ? "" : "!"));
-}
-
-/*
- *     vm_follow_entry:        [ debug ]
- */
-void
-vm_follow_entry(
-       vm_map_entry_t entry)
-{
-       int shadows;
-
-       iprintf("map entry %08X\n", entry);
-
-       db_indent += 2;
-
-       shadows = vm_follow_object(entry->object.vm_object);
-       iprintf("Total objects : %d\n",shadows);
-
-       db_indent -= 2;
-}
-
-/*
- *     vm_map_entry_print:     [ debug ]
- */
-void
-vm_map_entry_print(
-       register vm_map_entry_t entry)
-{
-       static const char *inheritance_name[4] =
-               { "share", "copy", "none", "?"};
-       static const char *behavior_name[4] =
-               { "dflt", "rand", "seqtl", "rseqntl" };
-       
-       iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
-
-       db_indent += 2;
-
-       vm_map_links_print(&entry->links);
-
-       iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
-               (unsigned long long)entry->vme_start,
-               (unsigned long long)entry->vme_end,
-               entry->protection,
-               entry->max_protection,
-               inheritance_name[(entry->inheritance & 0x3)]);
-
-       iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
-               behavior_name[(entry->behavior & 0x3)],
-               entry->wired_count,
-               entry->user_wired_count);
-       iprintf("%sin_transition, %sneeds_wakeup\n",
-               (entry->in_transition ? "" : "!"),
-               (entry->needs_wakeup ? "" : "!"));
-
-       if (entry->is_sub_map) {
-               iprintf("submap = %08X - offset = %016llX\n",
-                       entry->object.sub_map,
-                       (unsigned long long)entry->offset);
-       } else {
-               iprintf("object = %08X  offset = %016llX - ",
-                       entry->object.vm_object,
-                       (unsigned long long)entry->offset);
-               printf("%sis_shared, %sneeds_copy\n",
-                      (entry->is_shared ? "" : "!"),
-                      (entry->needs_copy ? "" : "!"));
-       }
-
-       db_indent -= 2;
-}
-
-/*
- *     vm_follow_map:  [ debug ]
- */
-void
-vm_follow_map(
-       vm_map_t map)
-{
-       register vm_map_entry_t entry;
-
-       iprintf("task map %08X\n", map);
-
-       db_indent += 2;
-
-       for (entry = vm_map_first_entry(map);
-            entry && entry != vm_map_to_entry(map);
-            entry = entry->vme_next) {
-               vm_follow_entry(entry);
-       }
-
-       db_indent -= 2;
-}
-
-/*
- *     vm_map_print:   [ debug ]
- */
-void
-vm_map_print(
-       db_addr_t inmap)
-{
-       register vm_map_entry_t entry;
-       vm_map_t map;
-#if TASK_SWAPPER
-       char *swstate;
-#endif /* TASK_SWAPPER */
-
-       map = (vm_map_t)(long)
-               inmap;  /* Make sure we have the right type */
-
-       iprintf("task map %08X\n", map);
-
-       db_indent += 2;
-
-       vm_map_header_print(&map->hdr);
-
-       iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
-               map->pmap,
-               map->size,
-               map->ref_count,
-               map->hint,
-               map->first_free);
-
-       iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
-               (map->wait_for_space ? "" : "!"),
-               (map->wiring_required ? "" : "!"),
-               map->timestamp);
-
-#if    TASK_SWAPPER
-       switch (map->sw_state) {
-       case MAP_SW_IN:
-               swstate = "SW_IN";
-               break;
-       case MAP_SW_OUT:
-               swstate = "SW_OUT";
-               break;
-       default:
-               swstate = "????";
-               break;
-       }
-       iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
-#endif /* TASK_SWAPPER */
-
-       for (entry = vm_map_first_entry(map);
-            entry && entry != vm_map_to_entry(map);
-            entry = entry->vme_next) {
-               vm_map_entry_print(entry);
-       }
-
-       db_indent -= 2;
-}
-
-/*
- *     Routine:        vm_map_copy_print
- *     Purpose:
- *             Pretty-print a copy object for ddb.
- */
-
-void
-vm_map_copy_print(
-       db_addr_t       incopy)
-{
-       vm_map_copy_t copy;
-       vm_map_entry_t entry;
-
-       copy = (vm_map_copy_t)(long)
-               incopy; /* Make sure we have the right type */
-
-       printf("copy object 0x%x\n", copy);
-
-       db_indent += 2;
-
-       iprintf("type=%d", copy->type);
-       switch (copy->type) {
-       case VM_MAP_COPY_ENTRY_LIST:
-               printf("[entry_list]");
-               break;
-               
-       case VM_MAP_COPY_OBJECT:
-               printf("[object]");
-               break;
-               
-       case VM_MAP_COPY_KERNEL_BUFFER:
-               printf("[kernel_buffer]");
-               break;
-
-       default:
-               printf("[bad type]");
-               break;
-       }
-       printf(", offset=0x%llx", (unsigned long long)copy->offset);
-       printf(", size=0x%x\n", copy->size);
-
-       switch (copy->type) {
-       case VM_MAP_COPY_ENTRY_LIST:
-               vm_map_header_print(&copy->cpy_hdr);
-               for (entry = vm_map_copy_first_entry(copy);
-                    entry && entry != vm_map_copy_to_entry(copy);
-                    entry = entry->vme_next) {
-                       vm_map_entry_print(entry);
-               }
-               break;
-
-       case VM_MAP_COPY_OBJECT:
-               iprintf("object=0x%x\n", copy->cpy_object);
-               break;
-
-       case VM_MAP_COPY_KERNEL_BUFFER:
-               iprintf("kernel buffer=0x%x", copy->cpy_kdata);
-               printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
-               break;
-
-       }
-
-       db_indent -=2;
-}
-
-/*
- *     db_vm_map_total_size(map)       [ debug ]
- *
- *     return the total virtual size (in bytes) of the map
- */
-vm_map_size_t
-db_vm_map_total_size(
-       db_addr_t       inmap)
-{
-       vm_map_entry_t  entry;
-       vm_map_size_t   total;
-       vm_map_t map;
-
-       map = (vm_map_t)(long)
-               inmap;  /* Make sure we have the right type */
-
-       total = 0;
-       for (entry = vm_map_first_entry(map);
-            entry != vm_map_to_entry(map);
-            entry = entry->vme_next) {
-               total += entry->vme_end - entry->vme_start;
-       }
-
-       return total;
-}
-
-#endif /* MACH_KDB */
-
 /*
  *     Routine:        vm_map_entry_insert
  *
@@ -11357,7 +11188,7 @@ vm_map_remap_extract(
                                        if (override_nx(map, src_entry->alias) && prot)
                                                prot |= VM_PROT_EXECUTE;
 
-                                       if(map->mapped) {
+                                       if(map->mapped_in_other_pmaps) {
                                                vm_object_pmap_protect(
                                                        src_entry->object.vm_object,
                                                        src_entry->offset,
@@ -11405,6 +11236,14 @@ vm_map_remap_extract(
                 */
        RestartCopy:
                if (!copy) {
+                       /*
+                        * Cannot allow an entry describing a JIT
+                        * region to be shared across address spaces.
+                        */
+                       if (src_entry->used_for_jit == TRUE) {
+                               result = KERN_INVALID_ARGUMENT;
+                               break;
+                       }
                        src_entry->is_shared = TRUE;
                        new_entry->is_shared = TRUE;
                        if (!(new_entry->is_sub_map)) 
@@ -11440,7 +11279,7 @@ vm_map_remap_extract(
                                                       offset,
                                                       entry_size,
                                                       ((src_entry->is_shared 
-                                                        || map->mapped) ?
+                                                        || map->mapped_in_other_pmaps) ?
                                                        PMAP_NULL : map->pmap),
                                                       src_entry->vme_start,
                                                       prot);
@@ -11827,7 +11666,7 @@ StartAgain: ;
                         */
                        zap_map = vm_map_create(PMAP_NULL,
                                                start,
-                                               end - start,
+                                               end,
                                                map->hdr.entries_pageable);
                        if (zap_map == VM_MAP_NULL) {
                                return KERN_RESOURCE_SHORTAGE;
@@ -12937,8 +12776,9 @@ vm_map_is_64bit(
 }
 
 boolean_t
-vm_map_has_4GB_pagezero(
-               vm_map_t map)
+vm_map_has_hard_pagezero(
+               vm_map_t        map,
+               vm_map_offset_t pagezero_size)
 {
        /*
         * XXX FBDP
@@ -12950,7 +12790,7 @@ vm_map_has_4GB_pagezero(
         * VM map is being torn down, and when a new map is created via
         * load_machfile()/execve().
         */
-       return (map->min_offset >= 0x100000000ULL);
+       return (map->min_offset >= pagezero_size);
 }
 
 void
@@ -12974,6 +12814,38 @@ vm_map_clear_4GB_pagezero(vm_map_t map)
 #endif
 }
 
+/*
+ * Raise a VM map's maximun offset.
+ */
+kern_return_t
+vm_map_raise_max_offset(
+       vm_map_t        map,
+       vm_map_offset_t new_max_offset)
+{
+       kern_return_t   ret;
+
+       vm_map_lock(map);
+       ret = KERN_INVALID_ADDRESS;
+
+       if (new_max_offset >= map->max_offset) {
+               if (!vm_map_is_64bit(map)) { 
+                       if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
+                               map->max_offset = new_max_offset;
+                               ret = KERN_SUCCESS;
+                       }
+               } else {
+                       if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
+                               map->max_offset = new_max_offset;
+                               ret = KERN_SUCCESS;
+                       }
+               }
+       }
+
+       vm_map_unlock(map);
+       return ret;
+}
+
+
 /*
  * Raise a VM map's minimum offset.
  * To strictly enforce "page zero" reservation.
@@ -13120,7 +12992,7 @@ kern_return_t vm_map_sign(vm_map_t map,
                /* Pull the dirty status from the pmap, since we cleared the 
                 * wpmapped bit */
                if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
-                       m->dirty = TRUE;
+                       SET_PAGE_DIRTY(m, FALSE);
                }
                
                /* On to the next page */
@@ -13140,6 +13012,7 @@ kern_return_t vm_map_freeze_walk(
                unsigned int *wired_count,
                unsigned int *clean_count,
                unsigned int *dirty_count,
+               unsigned int  dirty_budget,
                boolean_t *has_shared)
 {
        vm_map_entry_t entry;
@@ -13161,7 +13034,7 @@ kern_return_t vm_map_freeze_walk(
                        continue;
                }
 
-               vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
+               default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
                
                *purgeable_count += purgeable;
                *wired_count += wired;
@@ -13171,6 +13044,14 @@ kern_return_t vm_map_freeze_walk(
                if (shared) {
                        *has_shared = TRUE;
                }
+               
+               /* Adjust pageout budget and finish up if reached */
+               if (dirty_budget) {
+                       dirty_budget -= dirty;
+                       if (dirty_budget == 0) {
+                               break;
+                       }
+               }
        }
 
        vm_map_unlock_read(map);
@@ -13184,31 +13065,15 @@ kern_return_t vm_map_freeze(
                unsigned int *wired_count,
                unsigned int *clean_count,
                unsigned int *dirty_count,
+               unsigned int dirty_budget,
                boolean_t *has_shared)
 {      
        vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
-       vm_object_t compact_object = VM_OBJECT_NULL;
-       vm_object_offset_t offset = 0x0;
        kern_return_t kr = KERN_SUCCESS;
-       void *default_freezer_toc = NULL;
-       boolean_t cleanup = FALSE;
 
        *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
        *has_shared = FALSE;
 
-       /* Create our compact object */
-       compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
-       if (!compact_object) {
-               kr = KERN_FAILURE;
-               goto done;
-       }
-       
-       default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
-       if (!default_freezer_toc) {
-               kr = KERN_FAILURE;
-               goto done;
-       }
-
        /*
         * We need the exclusive lock here so that we can
         * block any page faults or lookups while we are
@@ -13216,20 +13081,19 @@ kern_return_t vm_map_freeze(
         */
        vm_map_lock(map);
 
-       if (map->default_freezer_toc != NULL){
+       if (map->default_freezer_handle == NULL) {      
+               map->default_freezer_handle = default_freezer_handle_allocate();
+       }
+       
+       if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
                /*
-                * This map has already been frozen.
+                * Can happen if default_freezer_handle passed in is NULL
+                * Or, a table has already been allocated and associated
+                * with this handle, i.e. the map is already frozen.
                 */
-               cleanup = TRUE;
-               kr = KERN_SUCCESS;
                goto done;
        }
-
-       /* Get a mapping in place for the freezing about to commence */
-       map->default_freezer_toc = default_freezer_toc;
-
-       vm_object_lock(compact_object);
-
+       
        for (entry2 = vm_map_first_entry(map);
             entry2 != vm_map_to_entry(map);
             entry2 = entry2->vme_next) {
@@ -13241,13 +13105,21 @@ kern_return_t vm_map_freeze(
                        unsigned int purgeable, clean, dirty, wired;
                        boolean_t shared;
                
-                       vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
-                                                       src_object, compact_object, &default_freezer_toc, &offset);
+                       default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
+                                                       src_object, map->default_freezer_handle);
                                                                         
                        *purgeable_count += purgeable;
                        *wired_count += wired;
                        *clean_count += clean;
                        *dirty_count += dirty;
+                       
+                       /* Adjust pageout budget and finish up if reached */
+                       if (dirty_budget) {
+                               dirty_budget -= dirty;
+                               if (dirty_budget == 0) {
+                                       break;
+                               }
+                       }
 
                        if (shared) {
                                *has_shared = TRUE;
@@ -13255,61 +13127,36 @@ kern_return_t vm_map_freeze(
                }
        }
 
-       vm_object_unlock(compact_object);       
-       
        /* Finally, throw out the pages to swap */
-       vm_object_pageout(compact_object);
+       default_freezer_pageout(map->default_freezer_handle);
 
 done:
        vm_map_unlock(map);
-
-       /* Unwind if there was a failure */
-       if ((cleanup) || (KERN_SUCCESS != kr)) {
-               if (default_freezer_toc){
-                       default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
-               }
-               if (compact_object){
-                       vm_object_deallocate(compact_object);
-               }
-       }
        
        return kr;
 }
 
-__private_extern__ vm_object_t default_freezer_get_compact_vm_object( void** );
-
-void
+kern_return_t
 vm_map_thaw(
        vm_map_t map)
 {
-       void **default_freezer_toc;
-       vm_object_t compact_object;
+       kern_return_t kr = KERN_SUCCESS;
 
        vm_map_lock(map);
 
-       if (map->default_freezer_toc == NULL){
+       if (map->default_freezer_handle == NULL) {
                /*
                 * This map is not in a frozen state.
                 */
+               kr = KERN_FAILURE;              
                goto out;
        }
-       
-       default_freezer_toc = &(map->default_freezer_toc);
-       
-       compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
-       
-       /* Bring the pages back in */
-       vm_object_pagein(compact_object);
-       
-       /* Shift pages back to their original objects */
-       vm_object_unpack(compact_object, default_freezer_toc);
 
-       vm_object_deallocate(compact_object);
-
-       map->default_freezer_toc = NULL;
-       
+       default_freezer_unpack(map->default_freezer_handle);    
 out:
        vm_map_unlock(map);
+       
+       return kr;
 }
 #endif
 
index f88bd545d91f33ca51269712ccfb76bdd17616c3..a89ad4d50d26cad2cddc8f673e945217684a26d9 100644 (file)
@@ -143,6 +143,7 @@ typedef union vm_map_object {
 } vm_map_object_t;
 
 #define named_entry_lock_init(object)  lck_mtx_init(&(object)->Lock, &vm_object_lck_grp, &vm_object_lck_attr)
+#define named_entry_lock_destroy(object)       lck_mtx_destroy(&(object)->Lock, &vm_object_lck_grp)
 #define named_entry_lock(object)               lck_mtx_lock(&(object)->Lock)
 #define named_entry_unlock(object)             lck_mtx_unlock(&(object)->Lock)   
 
@@ -238,9 +239,15 @@ struct vm_map_entry {
        /* boolean_t */         zero_wired_pages:1, /* zero out the wired pages of this entry it is being deleted without unwiring them */
        /* boolean_t */         used_for_jit:1,
        /* boolean_t */ from_reserved_zone:1;   /* Allocated from
-                                                        * kernel reserved zone  */
+                                                * kernel reserved zone  */
        unsigned short          wired_count;    /* can be paged if = 0 */
        unsigned short          user_wired_count; /* for vm_wire */
+#if    DEBUG
+#define        MAP_ENTRY_CREATION_DEBUG (1)
+#endif 
+#if    MAP_ENTRY_CREATION_DEBUG
+       uintptr_t               vme_bt[16];
+#endif
 };
 
 /*
@@ -317,7 +324,7 @@ struct _vm_map {
        /* boolean_t */         wait_for_space:1, /* Should callers wait for space? */
        /* boolean_t */         wiring_required:1, /* All memory wired? */
        /* boolean_t */         no_zero_fill:1, /*No zero fill absent pages */
-       /* boolean_t */         mapped:1, /*has this map been mapped */
+       /* boolean_t */         mapped_in_other_pmaps:1, /*has this submap been mapped in maps that use a different pmap */
        /* boolean_t */         switch_protect:1, /*  Protect map from write faults while switched */
        /* boolean_t */         disable_vmentry_reuse:1, /*  All vm entries should keep using newer and higher addresses in the map */ 
        /* boolean_t */         map_disallow_data_exec:1, /* Disallow execution from data pages on exec-permissive architectures */
@@ -325,7 +332,7 @@ struct _vm_map {
        unsigned int            timestamp;      /* Version number */
        unsigned int            color_rr;       /* next color (not protected by a lock) */
 #if CONFIG_FREEZE
-       void                    *default_freezer_toc;
+       void                    *default_freezer_handle;
 #endif
        boolean_t               jit_entry_exists;
 } ;
@@ -701,6 +708,11 @@ extern kern_return_t       vm_map_copyin_object(
                                vm_object_size_t        size,
                                vm_map_copy_t           *copy_result); /* OUT */
 
+extern kern_return_t   vm_map_random_address_for_size(
+                               vm_map_t        map,
+                               vm_map_offset_t *address,
+                               vm_map_size_t   size);
+
 /* Enter a mapping */
 extern kern_return_t   vm_map_enter(
                                vm_map_t                map,
@@ -753,6 +765,7 @@ extern      kern_return_t   vm_map_read_user(
 
 /* Create a new task map using an existing task map as a template. */
 extern vm_map_t                vm_map_fork(
+                               ledger_t                ledger,
                                vm_map_t                old_map);
 
 /* Change inheritance */
@@ -982,11 +995,14 @@ extern void               vm_map_set_64bit(
 extern void            vm_map_set_32bit(
                                vm_map_t                map);
 
+extern boolean_t       vm_map_has_hard_pagezero(
+                               vm_map_t                map,
+                               vm_map_offset_t         pagezero_size);
+
 extern boolean_t       vm_map_is_64bit(
                                vm_map_t                map);
+#define vm_map_has_4GB_pagezero(map)   vm_map_has_hard_pagezero(map, (vm_map_offset_t)0x100000000ULL)
 
-extern boolean_t       vm_map_has_4GB_pagezero(
-                               vm_map_t                map);
 
 extern void            vm_map_set_4GB_pagezero(
                                vm_map_t                map);
@@ -994,6 +1010,10 @@ extern void               vm_map_set_4GB_pagezero(
 extern void            vm_map_clear_4GB_pagezero(
                                vm_map_t                map);
 
+extern kern_return_t   vm_map_raise_max_offset(
+       vm_map_t        map,
+       vm_map_offset_t new_max_offset);
+
 extern kern_return_t   vm_map_raise_min_offset(
        vm_map_t        map,
        vm_map_offset_t new_min_offset);
@@ -1078,12 +1098,17 @@ extern kern_return_t vm_map_sign(vm_map_t map,
 #endif
 
 #if CONFIG_FREEZE
+void   vm_map_freeze_thaw_init(void);
+void   vm_map_freeze_thaw(void);
+void   vm_map_demand_fault(void);
+
 extern kern_return_t vm_map_freeze_walk(
                vm_map_t map,
                unsigned int *purgeable_count,
                unsigned int *wired_count,
                unsigned int *clean_count,
                unsigned int *dirty_count,
+               unsigned int dirty_budget,
                boolean_t *has_shared);
 
 extern kern_return_t vm_map_freeze(
@@ -1092,9 +1117,10 @@ extern kern_return_t vm_map_freeze(
                unsigned int *wired_count,
                unsigned int *clean_count,
                unsigned int *dirty_count,
+               unsigned int dirty_budget,
                boolean_t *has_shared);
                 
-extern void vm_map_thaw(
+extern kern_return_t vm_map_thaw(
                 vm_map_t map);
 #endif
 
index ccfcd062fe550b562a7dff3957d6dc95025a00c9..b875fd651067df1a0b4664632d5f8574a9d491f0 100644 (file)
@@ -151,6 +151,7 @@ vm_map_store_entry_unlink( vm_map_t map, vm_map_entry_t entry)
        }
        
        _vm_map_store_entry_unlink(&VMEU_map->hdr, VMEU_entry);
+       vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);
        update_first_free_ll(VMEU_map, VMEU_first_free);
 #ifdef VM_MAP_STORE_USE_RB
        update_first_free_rb(VMEU_map, VMEU_first_free);
index 2f7d54e3c1d05b002742f6367b3b89505ad6adef..1cbe6926de2edbe2ac19fcdd7bbdc747e1d6a3bd 100644 (file)
@@ -72,6 +72,8 @@
 #include <mach/memory_object_control_server.h>
 #include <mach/vm_param.h>
 
+#include <mach/sdt.h>
+
 #include <ipc/ipc_types.h>
 #include <ipc/ipc_port.h>
 
 #include <vm/vm_protos.h>
 #include <vm/vm_purgeable_internal.h>
 
-#if CONFIG_EMBEDDED
-#include <sys/kern_memorystatus.h>
-#endif
-
 /*
  *     Virtual memory objects maintain the actual data
  *     associated with allocated virtual memory.  A given
@@ -1104,7 +1102,7 @@ vm_object_page_grab(
                p = next_p;
                next_p = (vm_page_t)queue_next(&next_p->listq);
 
-               if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->fictitious)
+               if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry || p->fictitious)
                        goto move_page_in_obj;
 
                if (p->pmapped || p->dirty || p->precious) {
@@ -1121,8 +1119,9 @@ vm_object_page_grab(
 
                                        if (refmod_state & VM_MEM_REFERENCED)
                                                p->reference = TRUE;
-                                       if (refmod_state & VM_MEM_MODIFIED)
-                                               p->dirty = TRUE;
+                                       if (refmod_state & VM_MEM_MODIFIED) {
+                                               SET_PAGE_DIRTY(p, FALSE);
+                                       }
                                }
                                if (p->dirty == FALSE && p->precious == FALSE) {
 
@@ -1130,8 +1129,9 @@ vm_object_page_grab(
 
                                        if (refmod_state & VM_MEM_REFERENCED)
                                                p->reference = TRUE;
-                                       if (refmod_state & VM_MEM_MODIFIED)
-                                               p->dirty = TRUE;
+                                       if (refmod_state & VM_MEM_MODIFIED) {
+                                               SET_PAGE_DIRTY(p, FALSE);
+                                       }
 
                                        if (p->dirty == FALSE)
                                                goto take_page;
@@ -1346,7 +1346,7 @@ vm_object_cache_evict(
 
                        object->vo_cache_pages_to_scan--;
 
-                       if (VM_PAGE_WIRED(p) || p->busy || p->cleaning) {
+                       if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry) {
                                queue_remove(&object->memq, p, vm_page_t, listq);
                                queue_enter(&object->memq, p, vm_page_t, listq);
 
@@ -1373,6 +1373,12 @@ vm_object_cache_evict(
                                p->reference = FALSE;
                                p->no_cache = FALSE;
 
+                               /*
+                                * we've already filtered out pages that are in the laundry
+                                * so if we get here, this page can't be on the pageout queue
+                                */
+                               assert(!p->pageout_queue);
+
                                VM_PAGE_QUEUES_REMOVE(p);
                                VM_PAGE_ENQUEUE_INACTIVE(p, TRUE);
 
@@ -1833,7 +1839,7 @@ vm_object_reap_pages(
 restart_after_sleep:
        if (queue_empty(&object->memq))
                return;
-       loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH) + 1;
+       loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH);
 
        vm_page_lockspin_queues();
 
@@ -1859,38 +1865,13 @@ restart_after_sleep:
                        } else
                                mutex_pause(0);
 
-                       loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH) + 1;
+                       loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH);
 
                        vm_page_lockspin_queues();
                }
                if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) {
 
-                       if (reap_type == REAP_DATA_FLUSH &&
-                           ((p->pageout == TRUE || p->cleaning == TRUE) && p->list_req_pending == TRUE)) {
-                               p->list_req_pending = FALSE;
-                               p->cleaning = FALSE;
-                               /*
-                                * need to drop the laundry count...
-                                * we may also need to remove it
-                                * from the I/O paging queue...
-                                * vm_pageout_throttle_up handles both cases
-                                *
-                                * the laundry and pageout_queue flags are cleared...
-                                */
-                               vm_pageout_throttle_up(p);
-
-                               if (p->pageout == TRUE) {
-                                       /*
-                                        * toss the wire count we picked up
-                                        * when we initially set this page up
-                                        * to be cleaned and stolen...
-                                        */
-                                       vm_page_unwire(p, TRUE);
-                                       p->pageout = FALSE;
-                               }
-                               PAGE_WAKEUP(p);
-
-                       } else if (p->busy || p->cleaning) {
+                       if (p->busy || p->cleaning) {
 
                                vm_page_unlock_queues();
                                /*
@@ -1903,6 +1884,11 @@ restart_after_sleep:
 
                                goto restart_after_sleep;
                        }
+                       if (p->laundry) {
+                               p->pageout = FALSE;
+
+                               vm_pageout_steal_laundry(p, TRUE);
+                       }
                }
                switch (reap_type) {
 
@@ -1920,15 +1906,29 @@ restart_after_sleep:
                        
                case REAP_PURGEABLE:
                        if (VM_PAGE_WIRED(p)) {
-                               /* can't purge a wired page */
+                               /*
+                                * can't purge a wired page
+                                */
                                vm_page_purged_wired++;
                                continue;
                        }
+                       if (p->laundry && !p->busy && !p->cleaning) {
+                               p->pageout = FALSE;
 
+                               vm_pageout_steal_laundry(p, TRUE);
+                       }
+                       if (p->cleaning || p->laundry) {
+                               /*
+                                * page is being acted upon,
+                                * so don't mess with it
+                                */
+                               vm_page_purged_others++;
+                               continue;
+                       }
                        if (p->busy) {
                                /*
                                 * We can't reclaim a busy page but we can
-                                * make it pageable (it's not wired) to make
+                                * make it more likely to be paged (it's not wired) to make
                                 * sure that it gets considered by
                                 * vm_pageout_scan() later.
                                 */
@@ -1937,14 +1937,6 @@ restart_after_sleep:
                                continue;
                        }
 
-                       if (p->cleaning || p->laundry || p->list_req_pending) {
-                               /*
-                                * page is being acted upon,
-                                * so don't mess with it
-                                */
-                               vm_page_purged_others++;
-                               continue;
-                       }
                        assert(p->object != kernel_object);
 
                        /*
@@ -1957,7 +1949,7 @@ restart_after_sleep:
                                 */
                                refmod_state = pmap_disconnect(p->phys_page);
                                if (refmod_state & VM_MEM_MODIFIED) {
-                                       p->dirty = TRUE;
+                                       SET_PAGE_DIRTY(p, FALSE);
                                }
                        }
                        if (p->dirty || p->precious) {
@@ -1989,15 +1981,14 @@ restart_after_sleep:
 
                        if ((p->dirty || p->precious) && !p->error && object->alive) {
 
-                               p->busy = TRUE;
-
-                               VM_PAGE_QUEUES_REMOVE(p);
-                               /*
-                                * flush page... page will be freed
-                                * upon completion of I/O
-                                */
-                               vm_pageout_cluster(p);
-
+                               if (!p->laundry) {
+                                       VM_PAGE_QUEUES_REMOVE(p);
+                                       /*
+                                        * flush page... page will be freed
+                                        * upon completion of I/O
+                                        */
+                                       vm_pageout_cluster(p, TRUE);
+                               }
                                vm_page_unlock_queues();
                                /*
                                 * free the pages reclaimed so far
@@ -2521,11 +2512,9 @@ deactivate_pages_in_object(
 
                        MARK_PAGE_HANDLED(*chunk_state, p);
        
-                       if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy)) {
+                       if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) {
                                int     clear_refmod;
        
-                               assert(!m->laundry);
-       
                                clear_refmod = VM_MEM_REFERENCED;
                                dwp->dw_mask = DW_clear_reference;
 
@@ -3092,6 +3081,7 @@ vm_object_copy_slowly(
        fault_info.io_sync = FALSE;
        fault_info.cs_bypass = FALSE;
        fault_info.mark_zf_absent = FALSE;
+       fault_info.batch_pmap_op = FALSE;
 
        for ( ;
            size != 0 ;
@@ -3149,11 +3139,6 @@ vm_object_copy_slowly(
                                result_page = _result_page;
 
                                /*
-                                *      We don't need to hold the object
-                                *      lock -- the busy page will be enough.
-                                *      [We don't care about picking up any
-                                *      new modifications.]
-                                *
                                 *      Copy the page to the new object.
                                 *
                                 *      POLICY DECISION:
@@ -3162,15 +3147,15 @@ vm_object_copy_slowly(
                                 *              of copying.
                                 */
 
-                               vm_object_unlock(result_page->object);
                                vm_page_copy(result_page, new_page);
+                               vm_object_unlock(result_page->object);
 
                                /*
                                 *      Let go of both pages (make them
                                 *      not busy, perform wakeup, activate).
                                 */
                                vm_object_lock(new_object);
-                               new_page->dirty = TRUE;
+                               SET_PAGE_DIRTY(new_page, FALSE);
                                PAGE_WAKEUP_DONE(new_page);
                                vm_object_unlock(new_object);
 
@@ -4758,7 +4743,19 @@ vm_object_do_bypass(
                        vm_object_res_reference(backing_object);
                }
 #endif /* TASK_SWAPPER */
+               /*
+                * vm_object_collapse (the caller of this function) is
+                * now called from contexts that may not guarantee that a
+                * valid reference is held on the object... w/o a valid
+                * reference, it is unsafe and unwise (you will definitely
+                * regret it) to unlock the object and then retake the lock
+                * since the object may be terminated and recycled in between.
+                * The "activity_in_progress" reference will keep the object
+                * 'stable'.
+                */
+               vm_object_activity_begin(object);
                vm_object_unlock(object);
+
                vm_object_unlock(backing_object);
                vm_object_deallocate(backing_object);
 
@@ -4770,6 +4767,7 @@ vm_object_do_bypass(
                 */
 
                vm_object_lock(object);
+               vm_object_activity_end(object);
        }
        
        object_bypasses++;
@@ -5269,7 +5267,7 @@ vm_object_page_remove(
                for (; start < end; start += PAGE_SIZE_64) {
                        p = vm_page_lookup(object, start);
                        if (p != VM_PAGE_NULL) {
-                               assert(!p->cleaning && !p->pageout);
+                               assert(!p->cleaning && !p->pageout && !p->laundry);
                                if (!p->fictitious && p->pmapped)
                                        pmap_disconnect(p->phys_page);
                                VM_PAGE_FREE(p);
@@ -5282,7 +5280,7 @@ vm_object_page_remove(
                while (!queue_end(&object->memq, (queue_entry_t) p)) {
                        next = (vm_page_t) queue_next(&p->listq);
                        if ((start <= p->offset) && (p->offset < end)) {
-                               assert(!p->cleaning && !p->pageout);
+                               assert(!p->cleaning && !p->pageout && !p->laundry);
                                if (!p->fictitious && p->pmapped)
                                        pmap_disconnect(p->phys_page);
                                VM_PAGE_FREE(p);
@@ -5462,332 +5460,6 @@ vm_object_page_map(
        }
 }
 
-#include <mach_kdb.h>
-
-#if    MACH_KDB
-#include <ddb/db_output.h>
-#include <vm/vm_print.h>
-
-#define printf kdbprintf
-
-extern boolean_t       vm_object_cached(
-                               vm_object_t object);
-
-extern void            print_bitstring(
-                               char byte);
-
-boolean_t      vm_object_print_pages = FALSE;
-
-void
-print_bitstring(
-       char byte)
-{
-       printf("%c%c%c%c%c%c%c%c",
-              ((byte & (1 << 0)) ? '1' : '0'),
-              ((byte & (1 << 1)) ? '1' : '0'),
-              ((byte & (1 << 2)) ? '1' : '0'),
-              ((byte & (1 << 3)) ? '1' : '0'),
-              ((byte & (1 << 4)) ? '1' : '0'),
-              ((byte & (1 << 5)) ? '1' : '0'),
-              ((byte & (1 << 6)) ? '1' : '0'),
-              ((byte & (1 << 7)) ? '1' : '0'));
-}
-
-boolean_t
-vm_object_cached(
-       __unused register vm_object_t object)
-{
-#if VM_OBJECT_CACHE
-       register vm_object_t o;
-
-       queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
-               if (object == o) {
-                       return TRUE;
-               }
-       }
-#endif
-       return FALSE;
-}
-
-#if    MACH_PAGEMAP
-/*
- *     vm_external_print:      [ debug ]
- */
-void
-vm_external_print(
-       vm_external_map_t       emap,
-       vm_object_size_t        size)
-{
-       if (emap == VM_EXTERNAL_NULL) {
-               printf("0  ");
-       } else {
-               vm_object_size_t existence_size = stob(size);
-               printf("{ size=%lld, map=[", (uint64_t) existence_size);
-               if (existence_size > 0) {
-                       print_bitstring(emap[0]);
-               }
-               if (existence_size > 1) {
-                       print_bitstring(emap[1]);
-               }
-               if (existence_size > 2) {
-                       printf("...");
-                       print_bitstring(emap[existence_size-1]);
-               }
-               printf("] }\n");
-       }
-       return;
-}
-#endif /* MACH_PAGEMAP */
-
-int
-vm_follow_object(
-       vm_object_t object)
-{
-       int count = 0;
-       int orig_db_indent = db_indent;
-
-       while (TRUE) {
-               if (object == VM_OBJECT_NULL) {
-                       db_indent = orig_db_indent;
-                       return count;
-               }
-
-               count += 1;
-
-               iprintf("object 0x%x", object);
-               printf(", shadow=0x%x", object->shadow);
-               printf(", copy=0x%x", object->copy);
-               printf(", pager=0x%x", object->pager);
-               printf(", ref=%d\n", object->ref_count);
-
-               db_indent += 2;
-               object = object->shadow;
-       }
-
-}
-
-/*
- *     vm_object_print:        [ debug ]
- */
-void
-vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
-               __unused db_expr_t arg_count, __unused char *modif)
-{
-       vm_object_t     object;
-       register vm_page_t p;
-       const char *s;
-
-       register int count;
-
-       object = (vm_object_t) (long) db_addr;
-       if (object == VM_OBJECT_NULL)
-               return;
-
-       iprintf("object 0x%x\n", object);
-
-       db_indent += 2;
-
-       iprintf("size=0x%x", object->vo_size);
-       printf(", memq_hint=%p", object->memq_hint);
-       printf(", ref_count=%d\n", object->ref_count);
-       iprintf("");
-#if    TASK_SWAPPER
-       printf("res_count=%d, ", object->res_count);
-#endif /* TASK_SWAPPER */
-       printf("resident_page_count=%d\n", object->resident_page_count);
-
-       iprintf("shadow=0x%x", object->shadow);
-       if (object->shadow) {
-               register int i = 0;
-               vm_object_t shadow = object;
-               while((shadow = shadow->shadow))
-                       i++;
-               printf(" (depth %d)", i);
-       }
-       printf(", copy=0x%x", object->copy);
-       printf(", shadow_offset=0x%x", object->vo_shadow_offset);
-       printf(", last_alloc=0x%x\n", object->last_alloc);
-
-       iprintf("pager=0x%x", object->pager);
-       printf(", paging_offset=0x%x", object->paging_offset);
-       printf(", pager_control=0x%x\n", object->pager_control);
-
-       iprintf("copy_strategy=%d[", object->copy_strategy);
-       switch (object->copy_strategy) {
-               case MEMORY_OBJECT_COPY_NONE:
-               printf("copy_none");
-               break;
-
-               case MEMORY_OBJECT_COPY_CALL:
-               printf("copy_call");
-               break;
-
-               case MEMORY_OBJECT_COPY_DELAY:
-               printf("copy_delay");
-               break;
-
-               case MEMORY_OBJECT_COPY_SYMMETRIC:
-               printf("copy_symmetric");
-               break;
-
-               case MEMORY_OBJECT_COPY_INVALID:
-               printf("copy_invalid");
-               break;
-
-               default:
-               printf("?");
-       }
-       printf("]");
-
-       iprintf("all_wanted=0x%x<", object->all_wanted);
-       s = "";
-       if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
-               printf("%sinit", s);
-               s = ",";
-       }
-       if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
-               printf("%sready", s);
-               s = ",";
-       }
-       if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
-               printf("%spaging", s);
-               s = ",";
-       }
-       if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
-               printf("%slock", s);
-               s = ",";
-       }
-       if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
-               printf("%suncaching", s);
-               s = ",";
-       }
-       if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
-               printf("%scopy_call", s);
-               s = ",";
-       }
-       if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
-               printf("%scaching", s);
-               s = ",";
-       }
-       printf(">");
-       printf(", paging_in_progress=%d\n", object->paging_in_progress);
-       printf(", activity_in_progress=%d\n", object->activity_in_progress);
-
-       iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
-               (object->pager_created ? "" : "!"),
-               (object->pager_initialized ? "" : "!"),
-               (object->pager_ready ? "" : "!"),
-               (object->can_persist ? "" : "!"),
-               (object->pager_trusted ? "" : "!"),
-               (object->pageout ? "" : "!"),
-               (object->internal ? "internal" : "external"),
-               (object->temporary ? "temporary" : "permanent"));
-       iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
-               (object->alive ? "" : "!"),
-               ((object->purgable != VM_PURGABLE_DENY) ? "" : "!"),
-               ((object->purgable == VM_PURGABLE_VOLATILE) ? "" : "!"),
-               ((object->purgable == VM_PURGABLE_EMPTY) ? "" : "!"),
-               (object->shadowed ? "" : "!"),
-               (vm_object_cached(object) ? "" : "!"),
-               (object->private ? "" : "!"));
-       iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
-               (object->advisory_pageout ? "" : "!"),
-               (object->silent_overwrite ? "" : "!"));
-
-#if    MACH_PAGEMAP
-       iprintf("existence_map=");
-       vm_external_print(object->existence_map, object->vo_size);
-#endif /* MACH_PAGEMAP */
-#if    MACH_ASSERT
-       iprintf("paging_object=0x%x\n", object->paging_object);
-#endif /* MACH_ASSERT */
-
-       if (vm_object_print_pages) {
-               count = 0;
-               p = (vm_page_t) queue_first(&object->memq);
-               while (!queue_end(&object->memq, (queue_entry_t) p)) {
-                       if (count == 0) {
-                               iprintf("memory:=");
-                       } else if (count == 2) {
-                               printf("\n");
-                               iprintf(" ...");
-                               count = 0;
-                       } else {
-                               printf(",");
-                       }
-                       count++;
-
-                       printf("(off=0x%llX,page=%p)", p->offset, p);
-                       p = (vm_page_t) queue_next(&p->listq);
-               }
-               if (count != 0) {
-                       printf("\n");
-               }
-       }
-       db_indent -= 2;
-}
-
-
-/*
- *     vm_object_find          [ debug ]
- *
- *     Find all tasks which reference the given vm_object.
- */
-
-boolean_t vm_object_find(vm_object_t object);
-boolean_t vm_object_print_verbose = FALSE;
-
-boolean_t
-vm_object_find(
-       vm_object_t     object)
-{
-        task_t task;
-       vm_map_t map;
-       vm_map_entry_t entry;
-       boolean_t found = FALSE;
-
-       queue_iterate(&tasks, task, task_t, tasks) {
-               map = task->map;
-               for (entry = vm_map_first_entry(map);
-                        entry && entry != vm_map_to_entry(map);
-                        entry = entry->vme_next) {
-
-                       vm_object_t obj;
-
-                       /* 
-                        * For the time being skip submaps,
-                        * only the kernel can have submaps,
-                        * and unless we are interested in 
-                        * kernel objects, we can simply skip 
-                        * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
-                        * for a full solution.
-                        */
-                       if (entry->is_sub_map)
-                               continue;
-                       if (entry) 
-                               obj = entry->object.vm_object;
-                       else 
-                               continue;
-
-                       while (obj != VM_OBJECT_NULL) {
-                               if (obj == object) {
-                                       if (!found) {
-                                               printf("TASK\t\tMAP\t\tENTRY\n");
-                                               found = TRUE;
-                                       }
-                                       printf("0x%x\t0x%x\t0x%x\n", 
-                                                  task, map, entry);
-                               }
-                               obj = obj->shadow;
-                       }
-               }
-       }
-
-       return(found);
-}
-
-#endif /* MACH_KDB */
-
 kern_return_t
 vm_object_populate_with_private(
                vm_object_t             object,
@@ -5799,23 +5471,27 @@ vm_object_populate_with_private(
        vm_object_offset_t      base_offset;
 
 
-       if(!object->private)
+       if (!object->private)
                return KERN_FAILURE;
 
        base_page = phys_page;
 
        vm_object_lock(object);
-       if(!object->phys_contiguous) {
+
+       if (!object->phys_contiguous) {
                vm_page_t       m;
-               if((base_offset = trunc_page_64(offset)) != offset) {
+
+               if ((base_offset = trunc_page_64(offset)) != offset) {
                        vm_object_unlock(object);
                        return KERN_FAILURE;
                }
                base_offset += object->paging_offset;
-               while(size) {
+
+               while (size) {
                        m = vm_page_lookup(object, base_offset);
-                       if(m != VM_PAGE_NULL) {
-                               if(m->fictitious) {
+
+                       if (m != VM_PAGE_NULL) {
+                               if (m->fictitious) {
                                        if (m->phys_page != vm_page_guard_addr) {
 
                                                vm_page_lockspin_queues();
@@ -5824,16 +5500,16 @@ vm_object_populate_with_private(
 
                                                m->fictitious = FALSE;
                                                m->phys_page = base_page;
-                                               if(!m->busy) {
-                                                       m->busy = TRUE;
-                                               }
-                                               if(!m->absent) {
-                                                       m->absent = TRUE;
-                                               }
-                                               m->list_req_pending = TRUE;
                                        }
                                } else if (m->phys_page != base_page) {
-                                       if (m->pmapped) {
+
+                                       if ( !m->private) {
+                                               /*
+                                                * we'd leak a real page... that can't be right
+                                                */
+                                               panic("vm_object_populate_with_private - %p not private", m);
+                                       }
+                                       if (m->pmapped) {
                                                /*
                                                 * pmap call to clear old mapping
                                                 */
@@ -5841,17 +5517,12 @@ vm_object_populate_with_private(
                                        }
                                        m->phys_page = base_page;
                                }
-
-                               /*
-                                * ENCRYPTED SWAP:
-                                * We're not pointing to the same
-                                * physical page any longer and the
-                                * contents of the new one are not
-                                * supposed to be encrypted.
-                                * XXX What happens to the original
-                                * physical page. Is it lost ?
-                                */
-                               m->encrypted = FALSE;
+                               if (m->encrypted) {
+                                       /*
+                                        * we should never see this on a ficticious or private page
+                                        */
+                                       panic("vm_object_populate_with_private - %p encrypted", m);
+                               }
 
                        } else {
                                while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
@@ -5864,9 +5535,8 @@ vm_object_populate_with_private(
                                m->private = TRUE;
                                m->fictitious = FALSE;
                                m->phys_page = base_page;
-                               m->list_req_pending = TRUE;
-                               m->absent = TRUE;
                                m->unusual = TRUE;
+                               m->busy = FALSE;
 
                                vm_page_insert(m, object, base_offset);
                        }
@@ -5887,6 +5557,7 @@ vm_object_populate_with_private(
                object->vo_size = size;
        }
        vm_object_unlock(object);
+
        return KERN_SUCCESS;
 }
 
@@ -6444,7 +6115,7 @@ vm_object_purgable_control(
                        purgeable_q_t queue = vm_purgeable_object_remove(object);
                        assert(queue);
 
-                       vm_purgeable_token_delete_first(queue);
+                       vm_purgeable_token_delete_last(queue);
                        assert(queue->debug_count_objects>=0);
 
                        vm_page_unlock_queues();
@@ -6465,7 +6136,7 @@ vm_object_purgable_control(
                                refmod = pmap_disconnect(p->phys_page);
                                if ((refmod & VM_MEM_MODIFIED) &&
                                    !p->dirty) {
-                                       p->dirty = TRUE;
+                                       SET_PAGE_DIRTY(p, FALSE);
                                }
                        }
                }
@@ -6538,7 +6209,7 @@ vm_object_purgable_control(
 
                                /* Changing queue. Have to move token. */
                                vm_page_lock_queues();
-                               vm_purgeable_token_delete_first(old_queue);
+                               vm_purgeable_token_delete_last(old_queue);
                                result = vm_purgeable_token_add(queue);
                                vm_page_unlock_queues();
 
@@ -6566,7 +6237,7 @@ vm_object_purgable_control(
                                refmod = pmap_disconnect(p->phys_page);
                                if ((refmod & VM_MEM_MODIFIED) &&
                                    !p->dirty) {
-                                       p->dirty = TRUE;
+                                       SET_PAGE_DIRTY(p, FALSE);
                                }
                        }
                }
@@ -6583,7 +6254,7 @@ vm_object_purgable_control(
                                old_queue = vm_purgeable_object_remove(object);
                                assert(old_queue);
                                vm_page_lock_queues();
-                               vm_purgeable_token_delete_first(old_queue);
+                               vm_purgeable_token_delete_last(old_queue);
                                vm_page_unlock_queues();
                        }
                        (void) vm_object_purge(object);
@@ -7072,7 +6743,7 @@ extern int ignore_is_ssd;
 
 #if CONFIG_EMBEDDED
 unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
-unsigned int preheat_pages_min = 8;
+unsigned int preheat_pages_min = 10;
 #else
 unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
 unsigned int preheat_pages_min = 8;
@@ -7269,11 +6940,15 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
                pre_heat_size = max_length;
 
        if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size_in_bytes)) {
-               if (vm_page_free_count < vm_page_throttle_limit)
+
+               unsigned int consider_free = vm_page_free_count + vm_page_cleaned_count;
+               
+               if (consider_free < vm_page_throttle_limit) {
                        pre_heat_size = trunc_page(pre_heat_size / 16);
-               else if (vm_page_free_count < vm_page_free_target)
+               } else if (consider_free < vm_page_free_target) {
                        pre_heat_size = trunc_page(pre_heat_size / 4);
-
+               }
+               
                if (pre_heat_size < min_ph_size_in_bytes)
                        pre_heat_size = min_ph_size_in_bytes;
        }
@@ -7406,6 +7081,8 @@ out:
        pre_heat_cluster[*length / PAGE_SIZE]++;
 
        vm_object_unlock(object);
+       
+       DTRACE_VM1(clustersize, vm_size_t, *length);
 }
 
 
@@ -7492,7 +7169,9 @@ vm_object_page_op(
                        /* if such violations occur we will assert sooner */
                        /* or later. */
                        assert(dst_page->busy || (ops & UPL_POP_BUSY));
-                       if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
+                       if (ops & UPL_POP_DIRTY) {
+                               SET_PAGE_DIRTY(dst_page, FALSE);
+                       }
                        if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
                        if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
                        if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
@@ -7611,12 +7290,7 @@ vm_object_range_op(
                dst_page = vm_page_lookup(object, offset);
                if (dst_page != VM_PAGE_NULL) {
                        if (ops & UPL_ROP_DUMP) {
-                               if (dst_page->list_req_pending) {
-                                       /*
-                                        * This page isn't on a UPL yet.
-                                        * So it's safe to steal it here and dump it.
-                                        */
-                               } else if (dst_page->busy || dst_page->cleaning) {
+                               if (dst_page->busy || dst_page->cleaning) {
                                        /*
                                         * someone else is playing with the 
                                         * page, we will have to wait
@@ -7630,6 +7304,11 @@ vm_object_range_op(
                                         */
                                        continue;
                                }
+                               if (dst_page->laundry) {
+                                       dst_page->pageout = FALSE;
+                                       
+                                       vm_pageout_steal_laundry(dst_page, FALSE);
+                               }
                                if (dst_page->pmapped == TRUE)
                                        pmap_disconnect(dst_page->phys_page);
 
@@ -7748,19 +7427,15 @@ vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode)
 
 #if CONFIG_FREEZE
 
-__private_extern__ void        default_freezer_pack_page(vm_page_t , vm_object_t , vm_object_offset_t, void**);
-__private_extern__ void        default_freezer_unpack(vm_object_t , void**);
-
 kern_return_t vm_object_pack(
-       unsigned int       *purgeable_count,
-       unsigned int       *wired_count,
-       unsigned int       *clean_count,
-       unsigned int       *dirty_count,
-       boolean_t          *shared,
-       vm_object_t         src_object,
-       vm_object_t         compact_object,
-       void                  **table,
-       vm_object_offset_t *offset)
+       unsigned int    *purgeable_count,
+       unsigned int    *wired_count,
+       unsigned int    *clean_count,
+       unsigned int    *dirty_count,
+       unsigned int    dirty_budget,
+       boolean_t       *shared,
+       vm_object_t     src_object,
+       struct default_freezer_handle *df_handle)
 {
        kern_return_t   kr = KERN_SUCCESS;
        
@@ -7777,8 +7452,8 @@ kern_return_t vm_object_pack(
        if (src_object->purgable == VM_PURGABLE_VOLATILE) {
                *purgeable_count = src_object->resident_page_count;
                
-               /* If the destination object is null, we're just walking the pages to discover how many can be hibernated */
-               if (VM_OBJECT_NULL != compact_object) {
+               /* If the default freezer handle is null, we're just walking the pages to discover how many can be hibernated */
+               if (df_handle != NULL) {
                        purgeable_q_t queue;
                        /* object should be on a queue */
                        assert(src_object->objq.next != NULL &&
@@ -7794,7 +7469,7 @@ kern_return_t vm_object_pack(
        }
 
        if (src_object->ref_count == 1) {
-               vm_object_pack_pages(wired_count, clean_count, dirty_count, src_object, compact_object, table, offset);
+               vm_object_pack_pages(wired_count, clean_count, dirty_count, dirty_budget, src_object, df_handle);
        } else {
                if (src_object->internal) {
                        *shared = TRUE;
@@ -7809,34 +7484,27 @@ done:
 
 void
 vm_object_pack_pages(
-       unsigned int       *wired_count,
-       unsigned int       *clean_count,
-       unsigned int       *dirty_count,
-       vm_object_t         src_object,
-       vm_object_t         compact_object,
-       void                  **table,
-       vm_object_offset_t *offset)
+       unsigned int            *wired_count,
+       unsigned int            *clean_count,
+       unsigned int            *dirty_count,
+       unsigned int            dirty_budget,
+       vm_object_t             src_object,
+       struct default_freezer_handle *df_handle)
 {
        vm_page_t p, next;
 
        next = (vm_page_t)queue_first(&src_object->memq);
 
-       /* Since this function is dual purpose in order that we can count
-        * the freezable pages as well as prepare them, assert that our 
-        * arguments are sane. Gnarly, but avoids code duplication. 
-        */
-       if (VM_OBJECT_NULL == compact_object){
-               assert(!table);
-               assert(!offset);
-       } else {
-               assert(table);
-               assert(offset);
-       }
-
        while (!queue_end(&src_object->memq, (queue_entry_t)next)) {
                p = next;
                next = (vm_page_t)queue_next(&next->listq);
                
+               /* Finish up if we've hit our pageout limit */
+               if (dirty_budget && (dirty_budget == *dirty_count)) {
+                       break;
+               }
+               assert(!p->laundry);
+
                if (p->fictitious || p->busy ) 
                        continue;
                
@@ -7848,7 +7516,7 @@ vm_object_pack_pages(
                        continue;
                }
                
-               if (VM_OBJECT_NULL == compact_object) {
+               if (df_handle == NULL) {
                        if (p->dirty || pmap_is_modified(p->phys_page)) {
                                (*dirty_count)++;
                        } else {
@@ -7858,14 +7526,7 @@ vm_object_pack_pages(
                }
                
                if (p->cleaning) {
-                       p->busy = TRUE;
                        p->pageout = TRUE;
-                       p->dump_cleaning = TRUE;
-
-                       vm_page_lockspin_queues();
-                       vm_page_wire(p);
-                       vm_page_unlock_queues();
-
                        continue;
                }
 
@@ -7873,16 +7534,12 @@ vm_object_pack_pages(
                        int refmod_state;
                        refmod_state = pmap_disconnect(p->phys_page);
                        if (refmod_state & VM_MEM_MODIFIED) {
-                               p->dirty = TRUE;
+                               SET_PAGE_DIRTY(p, FALSE);
                        }
                }
                
                if (p->dirty) {
-                       p->busy = TRUE;
-               
-                       default_freezer_pack_page(p, compact_object, *offset, table);   
-                       *offset += PAGE_SIZE;
-
+                       default_freezer_pack_page(p, df_handle);        
                        (*dirty_count)++;
                }
                else {
@@ -7911,9 +7568,14 @@ vm_object_pageout(
                /* Throw to the pageout queue */
                vm_page_lockspin_queues();
 
-               VM_PAGE_QUEUES_REMOVE(p);
-               vm_pageout_cluster(p);
-
+               /*
+                * see if page is already in the process of
+                * being cleaned... if so, leave it alone
+                */
+               if (!p->laundry) {
+                       VM_PAGE_QUEUES_REMOVE(p);
+                       vm_pageout_cluster(p, TRUE);
+               }
                vm_page_unlock_queues();
        }
 
@@ -7953,26 +7615,4 @@ vm_object_pagein(
        
        return kr;
 }
-
-void
-vm_object_unpack(
-       vm_object_t compact_object,
-       void    **table)
-{
-       /*
-        * Future Work:
-        * Right now we treat the default freezer much like
-        * the default pager with respect to when it is
-        * created and terminated.
-        * But, in the future, we may want to terminate the
-        * default freezer at the very instant that an object
-        * has been completely re-filled with all it's previously
-        * paged-out pages.
-        * At that time we'll need to reset the object fields like
-        * "pager" and the associated "pager_{created,initialized,trusted}"
-        * fields right here.
-        */
-       default_freezer_unpack(compact_object, table);
-}
-
 #endif /* CONFIG_FREEZE */
index 0d21734af26937855460fd500efb708e467cf783..21bc4ddcb7b60ed88c6e246e8e53aaf8324393fd 100644 (file)
@@ -115,7 +115,8 @@ struct vm_object_fault_info {
        /* boolean_t */ io_sync:1,
        /* boolean_t */ cs_bypass:1,
        /* boolean_t */ mark_zf_absent:1,
-               __vm_object_fault_info_unused_bits:27;
+       /* boolean_t */ batch_pmap_op:1,
+               __vm_object_fault_info_unused_bits:26;
 };
 
 
@@ -477,7 +478,7 @@ __private_extern__ void     vm_object_res_deallocate(
        vm_object_lock_assert_shared(object);                           \
        assert((RLObject)->ref_count > 0);                              \
        OSAddAtomic(1, &(RLObject)->ref_count);         \
-       assert((RLObject)->ref_count > 1);                              \
+       assert((RLObject)->ref_count > 0);                              \
        /* XXX we would need an atomic version of the following ... */  \
        vm_object_res_reference(RLObject);                              \
        MACRO_END
@@ -699,39 +700,35 @@ __private_extern__ void           vm_object_reap_pages(
 #define REAP_DATA_FLUSH        3
 
 #if CONFIG_FREEZE
+struct default_freezer_handle;
 
 __private_extern__ kern_return_t 
 vm_object_pack(
-       unsigned int       *purgeable_count,
-       unsigned int       *wired_count,
-       unsigned int       *clean_count,
-       unsigned int       *dirty_count,
-       boolean_t          *shared,
-       vm_object_t         src_object,
-       vm_object_t         dst_object,
-       void                  **table,
-       vm_object_offset_t *offset);
+       unsigned int            *purgeable_count,
+       unsigned int            *wired_count,
+       unsigned int            *clean_count,
+       unsigned int            *dirty_count,
+       unsigned int            dirty_budget,
+       boolean_t               *shared,
+       vm_object_t             src_object,
+       struct default_freezer_handle *df_handle);
 
 __private_extern__ void
 vm_object_pack_pages(
-       unsigned int       *wired_count,
-       unsigned int       *clean_count,
-       unsigned int       *dirty_count,
-       vm_object_t         src_object,
-       vm_object_t         dst_object,
-       void                  **table,
-       vm_object_offset_t *offset);
-
-__private_extern__ void vm_object_pageout(
-    vm_object_t     object);
-
-__private_extern__  kern_return_t vm_object_pagein(
-       vm_object_t     object);
+       unsigned int            *wired_count,
+       unsigned int            *clean_count,
+       unsigned int            *dirty_count,
+       unsigned int            dirty_budget,
+       vm_object_t             src_object,
+       struct default_freezer_handle *df_handle);
 
-__private_extern__ void vm_object_unpack(
-       vm_object_t     object,
-       void          **table);
+__private_extern__ void
+vm_object_pageout(
+       vm_object_t     object);
 
+__private_extern__  kern_return_t
+vm_object_pagein(
+       vm_object_t     object);
 #endif /* CONFIG_FREEZE */
 
 /*
index 543a0c6f5d447fb68ec15c6a67491a316e93722b..b9888628df41cb2ec933f6426a0d1802de36fb1f 100644 (file)
@@ -179,19 +179,19 @@ struct vm_page {
         */
 #define local_id wire_count
        unsigned int    wire_count:16,  /* how many wired down maps use me? (O&P) */
-       /* boolean_t */ inactive:1,     /* page is in inactive list (P) */
-                       zero_fill:1,
-                       active:1,       /* page is in active list (P) */
+       /* boolean_t */ active:1,       /* page is in active list (P) */
+                       inactive:1,     /* page is in inactive list (P) */
+                       clean_queue:1,  /* page is in pre-cleaned list (P) */
+                       local:1,        /* page is in one of the local queues (P) */
+                       speculative:1,  /* page is in speculative list (P) */
+                       throttled:1,    /* pager is not responding (P) */
+                       free:1,         /* page is on free list (P) */
                        pageout_queue:1,/* page is on queue for pageout (P) */
-                       speculative:1,  /* page is on speculative list (P) */
                        laundry:1,      /* page is being cleaned now (P)*/
-                       free:1,         /* page is on free list (P) */
                        reference:1,    /* page has been used (P) */
                        gobbled:1,      /* page used internally (P) */
                        private:1,      /* Page should not be returned to
                                         *  the free list (P) */
-                       throttled:1,    /* pager is not responding (P) */
-                       local:1,
                        no_cache:1,     /* page is not to be cached and should
                                         * be reused ahead of other pages (P) */
                        __unused_pageq_bits:3;  /* 3 bits available here */
@@ -238,20 +238,13 @@ struct vm_page {
                                           page locked */
                        encrypted:1,    /* encrypted for secure swap (O) */
                        encrypted_cleaning:1,   /* encrypting page */
-                       list_req_pending:1, /* pagein/pageout alt mechanism */
-                                           /* allows creation of list      */
-                                           /* requests on pages that are   */
-                                           /* actively being paged.        */
-                       dump_cleaning:1,   /* set by the pageout daemon when */
-                                          /* a page being cleaned is       */
-                                          /* encountered and targeted as   */
-                                          /* a pageout candidate           */
                        cs_validated:1,    /* code-signing: page was checked */ 
                        cs_tainted:1,      /* code-signing: page is tainted */
                        reusable:1,
                        lopage:1,
                        slid:1,
-                       __unused_object_bits:7;  /* 7 bits available here */
+                       was_dirty:1,    /* was this page previously dirty? */
+                       __unused_object_bits:8;  /* 8 bits available here */
 
 #if __LP64__
        unsigned int __unused_padding;  /* Pad structure explicitly
@@ -405,7 +398,9 @@ queue_head_t        vm_page_queue_active;   /* active memory queue */
 extern
 queue_head_t   vm_page_queue_inactive; /* inactive memory queue for normal pages */
 extern
-queue_head_t   vm_page_queue_zf;       /* inactive memory queue for zero fill */
+queue_head_t    vm_page_queue_cleaned; /* clean-queue inactive memory */
+extern
+queue_head_t   vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 extern
 queue_head_t   vm_page_queue_throttled;        /* memory queue for throttled pageout pages */
 
@@ -423,6 +418,8 @@ unsigned int        vm_page_active_count;   /* How many pages are active? */
 extern
 unsigned int   vm_page_inactive_count; /* How many pages are inactive? */
 extern
+unsigned int    vm_page_cleaned_count; /* How many pages are in the clean queue? */
+extern
 unsigned int   vm_page_throttled_count;/* How many inactives are throttled */
 extern
 unsigned int   vm_page_speculative_count;      /* How many speculative pages are unclaimed? */
@@ -439,6 +436,8 @@ uint32_t    vm_page_creation_throttle;      /* When to throttle new page creation */
 extern
 unsigned int   vm_page_inactive_target;/* How many do we want inactive? */
 extern
+unsigned int   vm_page_anonymous_min;  /* When it's ok to pre-clean */
+extern
 unsigned int   vm_page_inactive_min;   /* When do wakeup pageout */
 extern
 unsigned int   vm_page_free_reserved;  /* How many pages reserved to do pageout */
@@ -563,6 +562,8 @@ extern void         vm_page_deactivate_internal(
                                        vm_page_t       page,
                                        boolean_t       clear_hw_reference);
 
+extern void            vm_page_enqueue_cleaned(vm_page_t page);
+
 extern void            vm_page_lru(
                                        vm_page_t       page);
 
@@ -593,7 +594,8 @@ extern void         vm_page_insert_internal(
                                        vm_object_t             object,
                                        vm_object_offset_t      offset,
                                        boolean_t               queues_lock_held,
-                                       boolean_t               insert_in_hash);
+                                       boolean_t               insert_in_hash,
+                                       boolean_t               batch_pmap_op);
 
 extern void            vm_page_replace(
                                        vm_page_t               mem,
@@ -647,14 +649,44 @@ extern void               vm_page_free_prepare_object(
                                        vm_page_t       page,
                                        boolean_t       remove_from_hash);
 
-extern void            vm_check_memorystatus(void);
-
+#if CONFIG_JETSAM
+extern void memorystatus_update(unsigned int pages_avail);
+
+#define VM_CHECK_MEMORYSTATUS do { \
+       memorystatus_update(            \
+               vm_page_active_count +          \
+               vm_page_inactive_count +        \
+               vm_page_speculative_count +     \
+               vm_page_free_count +            \
+               (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count) \
+               ); \
+       } while(0)
+#else 
+#define VM_CHECK_MEMORYSTATUS do {} while(0)
+#endif
 
 /*
  *     Functions implemented as macros. m->wanted and m->busy are
  *     protected by the object lock.
  */
 
+#if CONFIG_EMBEDDED
+#define SET_PAGE_DIRTY(m, set_pmap_modified)                           \
+               MACRO_BEGIN                                             \
+               vm_page_t __page__ = (m);                               \
+               if (__page__->dirty == FALSE && (set_pmap_modified)) {  \
+                       pmap_set_modify(__page__->phys_page);           \
+               }                                                       \
+               __page__->dirty = TRUE;                                 \
+               MACRO_END
+#else /* CONFIG_EMBEDDED */
+#define SET_PAGE_DIRTY(m, set_pmap_modified)                           \
+               MACRO_BEGIN                                             \
+               vm_page_t __page__ = (m);                               \
+               __page__->dirty = TRUE;                                 \
+               MACRO_END
+#endif /* CONFIG_EMBEDDED */
+
 #define PAGE_ASSERT_WAIT(m, interruptible)                     \
                (((m)->wanted = TRUE),                          \
                 assert_wait((event_t) (m), (interruptible)))
@@ -736,16 +768,24 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
  * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
  * 'wired' and local are ALWAYS mutually exclusive conditions.
  */
+
 #define VM_PAGE_QUEUES_REMOVE(mem)                             \
        MACRO_BEGIN                                             \
        VM_PAGE_QUEUES_ASSERT(mem, 1);                          \
        assert(!mem->laundry);                                  \
-       assert(!mem->pageout_queue);                            \
+/*                                                             \
+ *     if (mem->pageout_queue)                                 \
+ *             NOTE: VM_PAGE_QUEUES_REMOVE does not deal with removing pages from the pageout queue... \
+ *             the caller is responsible for determing if the page is on that queue, and if so, must   \
+ *             either first remove it (it needs both the page queues lock and the object lock to do    \
+ *             this via vm_pageout_steal_laundry), or avoid the call to VM_PAGE_QUEUES_REMOVE          \
+ */                                                            \
        if (mem->local) {                                       \
                struct vpl      *lq;                            \
                assert(mem->object != kernel_object);           \
                assert(!mem->inactive && !mem->speculative);    \
                assert(!mem->active && !mem->throttled);        \
+               assert(!mem->clean_queue);                      \
                assert(!mem->fictitious);                       \
                lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;        \
                VPL_LOCK(&lq->vpl_lock);                        \
@@ -760,6 +800,7 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
        else if (mem->active) {                                 \
                assert(mem->object != kernel_object);           \
                assert(!mem->inactive && !mem->speculative);    \
+               assert(!mem->clean_queue);                      \
                assert(!mem->throttled);                        \
                assert(!mem->fictitious);                       \
                queue_remove(&vm_page_queue_active,             \
@@ -773,17 +814,24 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
                assert(!mem->active && !mem->speculative);      \
                assert(!mem->throttled);                        \
                assert(!mem->fictitious);                       \
-               if (mem->zero_fill) {                           \
-                       queue_remove(&vm_page_queue_zf,         \
-                       mem, vm_page_t, pageq);                 \
-                       vm_zf_queue_count--;                    \
+               vm_page_inactive_count--;                       \
+               if (mem->clean_queue) {                         \
+                       queue_remove(&vm_page_queue_cleaned,    \
+                        mem, vm_page_t, pageq);                        \
+                       mem->clean_queue = FALSE;               \
+                       vm_page_cleaned_count--;                \
                } else {                                        \
-                       queue_remove(&vm_page_queue_inactive,   \
-                       mem, vm_page_t, pageq);                 \
+                       if (mem->object->internal) {            \
+                               queue_remove(&vm_page_queue_anonymous,  \
+                               mem, vm_page_t, pageq);         \
+                               vm_page_anonymous_count--;      \
+                       } else {                                \
+                               queue_remove(&vm_page_queue_inactive,   \
+                               mem, vm_page_t, pageq);         \
+                       }                                       \
+                       vm_purgeable_q_advance_all();           \
                }                                               \
                mem->inactive = FALSE;                          \
-               vm_page_inactive_count--;                       \
-               vm_purgeable_q_advance_all();                   \
        }                                                       \
                                                                \
        else if (mem->throttled) {                              \
@@ -819,12 +867,12 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
        assert(!mem->fictitious);                               \
        assert(!mem->laundry);                                  \
        assert(!mem->pageout_queue);                            \
-       if (mem->zero_fill) {                                   \
+       if (mem->object->internal) {                            \
                if (first == TRUE)                              \
-                       queue_enter_first(&vm_page_queue_zf, mem, vm_page_t, pageq);    \
+                       queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);     \
                else                                            \
-                       queue_enter(&vm_page_queue_zf, mem, vm_page_t, pageq);          \
-               vm_zf_queue_count++;                            \
+                       queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);           \
+               vm_page_anonymous_count++;                              \
        } else {                                                \
                if (first == TRUE)                              \
                        queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq); \
@@ -873,7 +921,7 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
 #define DW_set_reference               0x800
 #define DW_move_page                   0x1000
 #define DW_VM_PAGE_QUEUES_REMOVE       0x2000
-#define DW_set_list_req_pending                0x4000
+#define DW_enqueue_cleaned             0x4000
 
 struct vm_page_delayed_work {
        vm_page_t       dw_m;
@@ -895,14 +943,6 @@ extern unsigned int vm_max_delayed_work_limit;
  * set, we need to set it and ask vm_page_do_delayed_work
  * to clear it and wakeup anyone that might have blocked on
  * it once we're done processing the page.
- *
- * additionally, we can't call vm_page_do_delayed_work with
- * list_req_pending == TRUE since it may need to 
- * drop the object lock before dealing
- * with this page and because list_req_pending == TRUE, 
- * busy == TRUE will NOT protect this page from being stolen
- * so clear list_req_pending and ask vm_page_do_delayed_work
- * to re-set it once it holds both the pageq and object locks
  */
 
 #define VM_PAGE_ADD_DELAYED_WORK(dwp, mem, dw_cnt)             \
@@ -912,13 +952,9 @@ extern unsigned int vm_max_delayed_work_limit;
                if ( !(dwp->dw_mask & DW_vm_page_free))         \
                        dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); \
        }                                                       \
-       if (mem->list_req_pending) {                            \
-               mem->list_req_pending = FALSE;                  \
-               dwp->dw_mask |= DW_set_list_req_pending;        \
-       }                                                       \
        dwp->dw_m = mem;                                        \
        dwp++;                                                  \
-       dw_count++;                                             \
+       dw_cnt++;                                               \
        MACRO_END
 
 extern vm_page_t vm_object_page_grab(vm_object_t);
index 28b3cb17293ee8ad45028258f043a169ef6fc36e..2bfd6e7a5967f705aeed9fd2cb99f4db0185eb9a 100644 (file)
@@ -68,7 +68,6 @@
 #include <debug.h>
 #include <mach_pagemap.h>
 #include <mach_cluster_stats.h>
-#include <mach_kdb.h>
 #include <advisory_pageout.h>
 
 #include <mach/mach_types.h>
@@ -95,8 +94,6 @@
 #include <machine/vm_tuning.h>
 #include <machine/commpage.h>
 
-#include <sys/kern_memorystatus.h>
-
 #include <vm/pmap.h>
 #include <vm/vm_fault.h>
 #include <vm/vm_map.h>
 /*
  * ENCRYPTED SWAP:
  */
-#include <../bsd/crypto/aes/aes.h>
+#include <libkern/crypto/aes.h>
 extern u_int32_t random(void); /* from <libkern/libkern.h> */
 
+extern int cs_debug;
+
 #if UPL_DEBUG
 #include <libkern/OSDebug.h>
 #endif
 
-extern void consider_pressure_events(void);
+#if VM_PRESSURE_EVENTS
+extern void consider_vm_pressure_events(void);
+#endif
 
 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE   /* maximum iterations of the active queue to move pages to inactive */
 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  100
@@ -159,6 +160,14 @@ extern void consider_pressure_events(void);
 #define VM_PAGEOUT_IDLE_WAIT   10      /* milliseconds */
 #endif /* VM_PAGEOUT_IDLE_WAIT */
 
+#ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
+#define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED           1000    /* maximum pages considered before we issue a pressure event */
+#endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
+
+#ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
+#define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS         5       /* seconds */
+#endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
+
 unsigned int   vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
 unsigned int   vm_page_speculative_percentage = 5;
 
@@ -185,7 +194,7 @@ unsigned int        vm_page_speculative_percentage = 5;
  */
 
 #ifndef        VM_PAGE_INACTIVE_TARGET
-#define        VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 3)
+#define        VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 2)
 #endif /* VM_PAGE_INACTIVE_TARGET */
 
 /*
@@ -251,6 +260,8 @@ unsigned int        vm_page_speculative_percentage = 5;
 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM      100
 
 
+extern boolean_t hibernate_cleaning_in_progress;
+
 /*
  * Exported variable used to broadcast the activation of the pageout scan
  * Working Set uses this to throttle its use of pmap removes.  In this
@@ -264,10 +275,12 @@ unsigned int      vm_pageout_scan_event_counter = 0;
  * Forward declarations for internal routines.
  */
 
+static void vm_pressure_thread(void);
 static void vm_pageout_garbage_collect(int);
 static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
 static void vm_pageout_iothread_external(void);
 static void vm_pageout_iothread_internal(void);
+static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t);
 
 extern void vm_pageout_continue(void);
 extern void vm_pageout_scan(void);
@@ -289,15 +302,6 @@ unsigned int vm_pageout_burst_inactive_throttle = 0;
 
 int    vm_upl_wait_for_pages = 0;
 
-/*
- *     Protection against zero fill flushing live working sets derived
- *     from existing backing store and files
- */
-unsigned int vm_accellerate_zf_pageout_trigger = 400;
-unsigned int zf_queue_min_count = 100;
-unsigned int vm_zf_queue_count = 0;
-
-uint64_t vm_zf_count __attribute__((aligned(8))) = 0;
 
 /*
  *     These variables record the pageout daemon's actions:
@@ -320,10 +324,27 @@ unsigned int vm_pageout_inactive_used = 0;        /* debugging */
 unsigned int vm_pageout_cache_evicted = 0;     /* debugging */
 unsigned int vm_pageout_inactive_clean = 0;    /* debugging */
 unsigned int vm_pageout_speculative_clean = 0; /* debugging */
+
+unsigned int vm_pageout_freed_from_cleaned = 0;
+unsigned int vm_pageout_freed_from_speculative = 0;
+unsigned int vm_pageout_freed_from_inactive_clean = 0;
+
+unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0;
+unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
+
+unsigned int vm_pageout_cleaned_reclaimed = 0;         /* debugging; how many cleaned pages are reclaimed by the pageout scan */
+unsigned int vm_pageout_cleaned_reactivated = 0;       /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
+unsigned int vm_pageout_cleaned_reference_reactivated = 0;
+unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
+unsigned int vm_pageout_cleaned_fault_reactivated = 0;
+unsigned int vm_pageout_cleaned_commit_reactivated = 0;        /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
+unsigned int vm_pageout_cleaned_busy = 0;
+unsigned int vm_pageout_cleaned_nolock = 0;
+
 unsigned int vm_pageout_inactive_dirty_internal = 0;   /* debugging */
 unsigned int vm_pageout_inactive_dirty_external = 0;   /* debugging */
 unsigned int vm_pageout_inactive_deactivated = 0;      /* debugging */
-unsigned int vm_pageout_inactive_zf = 0;       /* debugging */
+unsigned int vm_pageout_inactive_anonymous = 0;        /* debugging */
 unsigned int vm_pageout_dirty_no_pager = 0;    /* debugging */
 unsigned int vm_pageout_purged_objects = 0;    /* debugging */
 unsigned int vm_stat_discard = 0;              /* debugging */
@@ -339,16 +360,22 @@ unsigned int vm_pageout_scan_active_throttled = 0;
 unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
 unsigned int vm_pageout_scan_inactive_throttled_external = 0;
 unsigned int vm_pageout_scan_throttle = 0;                     /* debugging */
-unsigned int vm_pageout_scan_throttle_aborted = 0;             /* debugging */
 unsigned int vm_pageout_scan_burst_throttle = 0;               /* debugging */
 unsigned int vm_pageout_scan_empty_throttle = 0;               /* debugging */
 unsigned int vm_pageout_scan_deadlock_detected = 0;            /* debugging */
 unsigned int vm_pageout_scan_active_throttle_success = 0;      /* debugging */
 unsigned int vm_pageout_scan_inactive_throttle_success = 0;    /* debugging */
-unsigned int vm_pageout_inactive_external_forced_reactivate_count = 0; /* debugging */
+unsigned int vm_pageout_inactive_external_forced_reactivate_count = 0; /* debugging */
+unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0;     /* debugging */
 unsigned int vm_page_speculative_count_drifts = 0;
 unsigned int vm_page_speculative_count_drift_max = 0;
 
+
+unsigned int vm_precleaning_aborted = 0;
+
+static boolean_t vm_pageout_need_to_refill_clean_queue = FALSE;
+static boolean_t vm_pageout_precleaning_delayed = FALSE;
+
 /*
  * Backing store throttle when BS is exhausted
  */
@@ -385,6 +412,12 @@ unsigned long vm_cs_validated_resets = 0;
 
 int    vm_debug_events = 0;
 
+#if CONFIG_MEMORYSTATUS
+extern int memorystatus_wakeup;
+#endif
+#if CONFIG_JETSAM
+extern int memorystatus_kill_top_proc_from_VM(void);
+#endif
 
 /*
  *     Routine:        vm_backing_store_disable
@@ -457,6 +490,7 @@ vm_pageout_object_terminate(
                assert(p->pageout);
                p->pageout = FALSE;
                assert(!p->cleaning);
+               assert(!p->laundry);
 
                offset = p->offset;
                VM_PAGE_FREE(p);
@@ -467,13 +501,6 @@ vm_pageout_object_terminate(
 
                if(m == VM_PAGE_NULL)
                        continue;
-               assert(m->cleaning);
-               /* used as a trigger on upl_commit etc to recognize the */
-               /* pageout daemon's subseqent desire to pageout a cleaning */
-               /* page.  When the bit is on the upl commit code will   */
-               /* respect the pageout bit in the target page over the  */
-               /* caller's page list indication */
-               m->dump_cleaning = FALSE;
 
                assert((m->dirty) || (m->precious) ||
                                (m->busy && m->cleaning));
@@ -483,9 +510,8 @@ vm_pageout_object_terminate(
                 * Also decrement the burst throttle (if external).
                 */
                vm_page_lock_queues();
-               if (m->laundry) {
+               if (m->laundry)
                        vm_pageout_throttle_up(m);
-               }
 
                /*
                 * Handle the "target" page(s). These pages are to be freed if
@@ -514,10 +540,11 @@ vm_pageout_object_terminate(
                         * can detect whether the page was redirtied during
                         * pageout by checking the modify state.
                         */
-                       if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
-                             m->dirty = TRUE;
-                       else
-                             m->dirty = FALSE;
+                       if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(m, FALSE);
+                       } else {
+                               m->dirty = FALSE;
+                       }
 
                        if (m->dirty) {
                                CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
@@ -588,7 +615,7 @@ vm_pageout_object_terminate(
                        else            vm_pageout_cluster_cleaned++;
                        if (m->wanted)  vm_pageout_cluster_collisions++;
 #else
-                       m->dirty = 0;
+                       m->dirty = FALSE;
 #endif
                }
                if (m->encrypted_cleaning == TRUE) {
@@ -650,7 +677,7 @@ vm_pageclean_setup(
         * Mark original page as cleaning in place.
         */
        m->cleaning = TRUE;
-       m->dirty = TRUE;
+       SET_PAGE_DIRTY(m, FALSE);
        m->precious = FALSE;
 
        /*
@@ -697,7 +724,6 @@ vm_pageout_initialize_page(
 {
        vm_object_t             object;
        vm_object_offset_t      paging_offset;
-       vm_page_t               holding_page;
        memory_object_t         pager;
 
        XPR(XPR_VM_PAGEOUT,
@@ -740,21 +766,17 @@ vm_pageout_initialize_page(
                return;
        }
 
-       /* set the page for future call to vm_fault_list_request */
-       vm_object_paging_begin(object);
-       holding_page = NULL;
-
+       /*
+        * set the page for future call to vm_fault_list_request
+        */
        pmap_clear_modify(m->phys_page);
-       m->dirty = TRUE;
-       m->busy = TRUE;
-       m->list_req_pending = TRUE;
-       m->cleaning = TRUE;
+       SET_PAGE_DIRTY(m, FALSE);
        m->pageout = TRUE;
 
-       vm_page_lockspin_queues();
-       vm_page_wire(m);
-       vm_page_unlock_queues();
-
+       /*
+        * keep the object from collapsing or terminating
+        */
+       vm_object_paging_begin(object);
        vm_object_unlock(object);
 
        /*
@@ -797,7 +819,7 @@ struct {
  */
 
 void
-vm_pageout_cluster(vm_page_t m)
+vm_pageout_cluster(vm_page_t m, boolean_t pageout)
 {
        vm_object_t     object = m->object;
         struct         vm_pageout_queue *q;
@@ -816,27 +838,19 @@ vm_pageout_cluster(vm_page_t m)
        /*
         * Only a certain kind of page is appreciated here.
         */
-       assert(m->busy && (m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
-       assert(!m->cleaning && !m->pageout);
+       assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
+       assert(!m->cleaning && !m->pageout && !m->laundry);
 #ifndef CONFIG_FREEZE
        assert(!m->inactive && !m->active);
        assert(!m->throttled);
 #endif
 
        /*
-        * protect the object from collapse - 
-        * locking in the object's paging_offset.
+        * protect the object from collapse or termination
         */
-       vm_object_paging_begin(object);
+       vm_object_activity_begin(object);
 
-       /*
-        * set the page for future call to vm_fault_list_request
-        * page should already be marked busy
-        */
-       vm_page_wire(m);
-       m->list_req_pending = TRUE;
-       m->cleaning = TRUE;
-       m->pageout = TRUE;
+       m->pageout = pageout;
 
        if (object->internal == TRUE)
                q = &vm_pageout_queue_internal;
@@ -879,6 +893,11 @@ vm_pageout_throttle_up(
        assert(m->object != VM_OBJECT_NULL);
        assert(m->object != kernel_object);
 
+#if DEBUG
+       lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+       vm_object_lock_assert_exclusive(m->object);
+#endif
+
        vm_pageout_throttle_up_count++;
 
        if (m->object->internal == TRUE)
@@ -894,10 +913,9 @@ vm_pageout_throttle_up(
               m->pageq.next = NULL;
               m->pageq.prev = NULL;
 
-              vm_object_paging_end(m->object);
+              vm_object_activity_end(m->object);
        }
-
-       if ( m->laundry == TRUE ) {
+       if (m->laundry == TRUE) {
 
               m->laundry = FALSE;
               q->pgo_laundry--;
@@ -910,6 +928,16 @@ vm_pageout_throttle_up(
                       q->pgo_draining = FALSE;
                       thread_wakeup((event_t) (&q->pgo_laundry+1));
               }
+              if (vm_pageout_precleaning_delayed == TRUE) {
+                      /*
+                       * since the pageout scan can return on laundry congestion, wake it up this way
+                       * don't depend on pgo_throttled == TRUE to indicate that the pageout scan thread
+                       * is blocked on &q->pgo_laundry since the hibernation mechanism utilizes both
+                       * pgo_throttled and pgo_draining
+                       */
+                      vm_pageout_precleaning_delayed = FALSE;
+                      thread_wakeup((event_t)(&vm_page_free_wanted));
+              }
        }
 }
 
@@ -965,6 +993,14 @@ compute_memory_pressure(
        vm_pageout_stat_now = vm_pageout_next;
 }
 
+
+/*
+ * IMPORTANT
+ * mach_vm_ctl_page_free_wanted() is called indirectly, via
+ * mach_vm_pressure_monitor(), when taking a stackshot. Therefore, 
+ * it must be safe in the restricted stackshot context. Locks and/or 
+ * blocking are not allowable.
+ */
 unsigned int
 mach_vm_ctl_page_free_wanted(void)
 {
@@ -981,6 +1017,15 @@ mach_vm_ctl_page_free_wanted(void)
        return page_free_wanted;
 }
 
+
+/*
+ * IMPORTANT:
+ * mach_vm_pressure_monitor() is called when taking a stackshot, with 
+ * wait_for_pressure FALSE, so that code path must remain safe in the
+ * restricted stackshot context. No blocking or locks are allowable.
+ * on that code path.
+ */
+
 kern_return_t
 mach_vm_pressure_monitor(
        boolean_t       wait_for_pressure,
@@ -1049,15 +1094,24 @@ mach_vm_pressure_monitor(
        return KERN_SUCCESS;
 }
 
-/* Page States: Used below to maintain the page state
-   before it's removed from it's Q. This saved state
-   helps us do the right accounting in certain cases
-*/
 
+
+/*
+ * function in BSD to apply I/O throttle to the pageout thread
+ */
+extern void vm_pageout_io_throttle(void);
+
+
+/*
+ * Page States: Used below to maintain the page state
+ * before it's removed from it's Q. This saved state
+ * helps us do the right accounting in certain cases
+ */
 #define PAGE_STATE_SPECULATIVE         1
-#define PAGE_STATE_ZEROFILL            2
+#define PAGE_STATE_ANONYMOUS           2
 #define PAGE_STATE_INACTIVE            3
 #define PAGE_STATE_INACTIVE_FIRST      4
+#define PAGE_STATE_CLEAN      5
 
 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m)                                \
        MACRO_BEGIN                                                     \
@@ -1091,11 +1145,13 @@ struct flow_control {
         mach_timespec_t        ts;
 };
 
+uint32_t vm_pageout_considered_page = 0;
+
 
 /*
  *     vm_pageout_scan does the dirty work for the pageout daemon.
- *     It returns with vm_page_queue_free_lock held and
- *     vm_page_free_wanted == 0.
+ *     It returns with both vm_page_queue_free_lock and vm_page_queue_lock
+ *     held and vm_page_free_wanted == 0.
  */
 void
 vm_pageout_scan(void)
@@ -1121,11 +1177,11 @@ vm_pageout_scan(void)
        unsigned        int msecs = 0;
        vm_object_t     object;
        vm_object_t     last_object_tried;
-       uint64_t        zf_ratio;
-       uint64_t        zf_run_count;
        uint32_t        catch_up_count = 0;
        uint32_t        inactive_reclaim_run;
        boolean_t       forced_reclaim;
+       boolean_t       exceeded_burst_throttle;
+       boolean_t       grab_anonymous = FALSE;
        int             page_prev_state = 0;
        int             cache_evict_throttle = 0;
        uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
@@ -1155,6 +1211,7 @@ vm_pageout_scan(void)
                                                    vm_page_inactive_count);
        inactive_reclaim_run = 0;
 
+       vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
 
        /*
         *      We want to gradually dribble pages from the active queue
@@ -1176,39 +1233,16 @@ vm_pageout_scan(void)
 
 Restart:
        assert(delayed_unlock!=0);
-       
-       /*
-        *      A page is "zero-filled" if it was not paged in from somewhere,
-        *      and it belongs to an object at least VM_ZF_OBJECT_SIZE_THRESHOLD big.
-        *      Recalculate the zero-filled page ratio.  We use this to apportion
-        *      victimized pages between the normal and zero-filled inactive
-        *      queues according to their relative abundance in memory.  Thus if a task
-        *      is flooding memory with zf pages, we begin to hunt them down.
-        *      It would be better to throttle greedy tasks at a higher level,
-        *      but at the moment mach vm cannot do this.
-        */
-       {
-               uint64_t  total  = vm_page_active_count + vm_page_inactive_count;
-               uint64_t  normal = total - vm_zf_count;
 
-               /* zf_ratio is the number of zf pages we victimize per normal page */
-               
-               if (vm_zf_count < vm_accellerate_zf_pageout_trigger)
-                       zf_ratio = 0;
-               else if ((vm_zf_count <= normal) || (normal == 0))
-                       zf_ratio = 1;
-               else 
-                       zf_ratio = vm_zf_count / normal;
-                       
-               zf_run_count = 0;
-       }
-        
        /*
         *      Recalculate vm_page_inactivate_target.
         */
        vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
                                                          vm_page_inactive_count +
                                                          vm_page_speculative_count);
+
+       vm_page_anonymous_min = vm_page_inactive_target / 3;
+
        /*
         * don't want to wake the pageout_scan thread up everytime we fall below
         * the targets... set a low water mark at 0.25% below the target
@@ -1223,8 +1257,6 @@ Restart:
        vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
                                                                vm_page_inactive_count);
 
-       vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
-
        object = NULL;
        last_object_tried = NULL;
        try_failed = FALSE;
@@ -1233,7 +1265,7 @@ Restart:
                catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
        else
                catch_up_count = 0;
-                   
+
        for (;;) {
                vm_page_t m;
 
@@ -1254,8 +1286,9 @@ Restart:
                /*
                 * Move pages from active to inactive if we're below the target
                 */
+               /* if we are trying to make clean, we need to make sure we actually have inactive - mj */
                if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
-                       goto done_moving_active_pages;
+                       goto done_moving_active_pages;
 
                if (object != NULL) {
                        vm_object_unlock(object);
@@ -1336,6 +1369,9 @@ Restart:
 
 done_moving_active_pages:
 
+               if (vm_page_cleaned_count < VM_PAGE_CLEANED_MIN && vm_page_anonymous_count > vm_page_anonymous_min)
+                       vm_pageout_need_to_refill_clean_queue = TRUE;
+
                if (vm_page_free_count + local_freed >= vm_page_free_target) {
                        if (object != NULL) {
                                vm_object_unlock(object);
@@ -1349,7 +1385,7 @@ done_moving_active_pages:
                                VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
                                               vm_page_free_count, local_freed, delayed_unlock_limit, 2);
 
-                               vm_page_free_list(local_freeq, TRUE);
+                               vm_page_free_list(local_freeq, TRUE);
                                        
                                VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
                                               vm_page_free_count, local_freed, 0, 2);
@@ -1358,6 +1394,16 @@ done_moving_active_pages:
                                local_freed = 0;
                                vm_page_lock_queues();
                        }
+                       /*
+                        * make sure the pageout I/O threads are running
+                        * throttled in case there are still requests 
+                        * in the laundry... since we have met our targets
+                        * we don't need the laundry to be cleaned in a timely
+                        * fashion... so let's avoid interfering with foreground
+                        * activity
+                        */
+                       vm_pageout_adjust_io_throttles(iq, eq, TRUE);
+
                        /*
                         * recalculate vm_page_inactivate_target
                         */
@@ -1377,19 +1423,18 @@ done_moving_active_pages:
                        lck_mtx_lock(&vm_page_queue_free_lock);
 
                        if ((vm_page_free_count >= vm_page_free_target) &&
+                           (vm_page_cleaned_count >= VM_PAGE_CLEANED_TARGET || vm_pageout_need_to_refill_clean_queue == FALSE) &&
                            (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
                                /*
                                 * done - we have met our target *and*
                                 * there is no one waiting for a page.
                                 */
-                               vm_page_unlock_queues();
-
-                               thread_wakeup((event_t) &vm_pageout_garbage_collect);
-
+                               vm_pageout_need_to_refill_clean_queue = FALSE;
+return_from_scan:
                                assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
 
                                VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
-                                              vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
+                                              vm_pageout_inactive, vm_pageout_inactive_used, vm_pageout_need_to_refill_clean_queue, 0);
                                VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
                                               vm_pageout_speculative_clean, vm_pageout_inactive_clean,
                                               vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
@@ -1524,14 +1569,15 @@ done_moving_active_pages:
                        cache_evict_throttle--;
 
 
+               exceeded_burst_throttle = FALSE;
                /*
                 * Sometimes we have to pause:
                 *      1) No inactive pages - nothing to do.
-                *      2) Flow control - default pageout queue is full
-                *      3) Loop control - no acceptable pages found on the inactive queue
+                *      2) Loop control - no acceptable pages found on the inactive queue
                 *         within the last vm_pageout_burst_inactive_throttle iterations
+                *      3) Flow control - default pageout queue is full
                 */
-               if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q)) {
+               if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) {
                        vm_pageout_scan_empty_throttle++;
                        msecs = vm_pageout_empty_wait;
                        goto vm_pageout_scan_delay;
@@ -1542,6 +1588,8 @@ done_moving_active_pages:
                                vm_page_speculative_count))) {
                        vm_pageout_scan_burst_throttle++;
                        msecs = vm_pageout_burst_wait;
+
+                       exceeded_burst_throttle = TRUE;
                        goto vm_pageout_scan_delay;
 
                } else if (VM_PAGE_Q_THROTTLED(iq) && 
@@ -1552,6 +1600,14 @@ done_moving_active_pages:
                        switch (flow_control.state) {
 
                        case FCS_IDLE:
+                               if ((vm_page_free_count + local_freed) < vm_page_free_target) {
+                                       if (vm_page_inactive_count - vm_page_anonymous_count > 0) {
+                                               grab_anonymous = FALSE;
+                                               goto consider_inactive;
+                                       }
+                                       if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
+                                               continue;
+                               }
 reset_deadlock_timer:
                                ts.tv_sec = vm_pageout_deadlock_wait / 1000;
                                ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
@@ -1590,7 +1646,6 @@ reset_deadlock_timer:
                                        vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
                                        vm_pageout_scan_deadlock_detected++;
                                        flow_control.state = FCS_DEADLOCK_DETECTED;
-
                                        thread_wakeup((event_t) &vm_pageout_garbage_collect);
                                        goto consider_inactive;
                                }
@@ -1609,8 +1664,6 @@ reset_deadlock_timer:
                                goto reset_deadlock_timer;
 
                        }
-                       vm_pageout_scan_throttle++;
-                       iq->pgo_throttled = TRUE;
 vm_pageout_scan_delay:
                        if (object != NULL) {
                                vm_object_unlock(object);
@@ -1624,7 +1677,7 @@ vm_pageout_scan_delay:
                                VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
                                               vm_page_free_count, local_freed, delayed_unlock_limit, 3);
 
-                               vm_page_free_list(local_freeq, TRUE);
+                               vm_page_free_list(local_freeq, TRUE);
                                        
                                VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
                                               vm_page_free_count, local_freed, 0, 3);
@@ -1636,13 +1689,86 @@ vm_pageout_scan_delay:
                                if (flow_control.state == FCS_DELAYED &&
                                    !VM_PAGE_Q_THROTTLED(iq)) {
                                        flow_control.state = FCS_IDLE;
-                                       vm_pageout_scan_throttle_aborted++;
                                        goto consider_inactive;
                                }
                        }
+                       
+                       if (vm_page_free_count >= vm_page_free_target) {
+                               /*
+                                * we're here because either
+                                *  1) someone else freed up some pages while we had
+                                *     the queues unlocked above or 
+                                *  2) we're precleaning and we haven't yet met
+                                *     our cleaned target
+                                * and we've hit one of the 3 conditions that 
+                                * cause us to pause the pageout scan thread
+                                *
+                                * since we already have enough free pages,
+                                * let's avoid stalling and return normally
+                                *
+                                * before we return, make sure the pageout I/O threads
+                                * are running throttled in case there are still requests 
+                                * in the laundry... since we have enough free pages
+                                * we don't need the laundry to be cleaned in a timely
+                                * fashion... so let's avoid interfering with foreground
+                                * activity
+                                *
+                                * we don't want to hold vm_page_queue_free_lock when
+                                * calling vm_pageout_adjust_io_throttles (since it
+                                * may cause other locks to be taken), we do the intitial
+                                * check outside of the lock.  Once we take the lock,
+                                * we recheck the condition since it may have changed.
+                                * if it has, no problem, we will make the threads
+                                * non-throttled before actually blocking
+                                */
+                               vm_pageout_adjust_io_throttles(iq, eq, TRUE);
+                       }
+                       lck_mtx_lock(&vm_page_queue_free_lock);
 
+                       if (vm_page_free_count >= vm_page_free_target) {
+                               if (vm_page_cleaned_count < VM_PAGE_CLEANED_TARGET) {
+                                       vm_precleaning_aborted++;
+                                       vm_pageout_precleaning_delayed = TRUE;
+                               }
+                               goto return_from_scan;
+                       }
+                       lck_mtx_unlock(&vm_page_queue_free_lock);
+                       
+                       if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
+                               /*
+                                * we're most likely about to block due to one of
+                                * the 3 conditions that cause vm_pageout_scan to
+                                * not be able to make forward progress w/r
+                                * to providing new pages to the free queue,
+                                * so unthrottle the I/O threads in case we
+                                * have laundry to be cleaned... it needs
+                                * to be completed ASAP.
+                                *
+                                * even if we don't block, we want the io threads
+                                * running unthrottled since the sum of free +
+                                * clean pages is still under our free target
+                                */
+                               vm_pageout_adjust_io_throttles(iq, eq, FALSE);
+                       }
+                       if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
+                               /*
+                                * if we get here we're below our free target and
+                                * we're stalling due to a full laundry queue or
+                                * we don't have any inactive pages other then
+                                * those in the clean queue...
+                                * however, we have pages on the clean queue that
+                                * can be moved to the free queue, so let's not
+                                * stall the pageout scan
+                                */
+                               flow_control.state = FCS_IDLE;
+                               goto consider_inactive;
+                       }
                        VM_CHECK_MEMORYSTATUS;
 
+                       if (flow_control.state != FCS_IDLE)
+                               vm_pageout_scan_throttle++;
+                       iq->pgo_throttled = TRUE;
+
                        assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
                        counter(c_vm_pageout_scan_block++);
 
@@ -1679,9 +1805,12 @@ consider_inactive:
                loop_count++;
                inactive_burst_count++;
                vm_pageout_inactive++;
-
-               /* Choose a victim. */
                
+               boolean_t pageout_making_free = ((vm_page_free_count + local_freed) < vm_page_free_target); /* TRUE if making free, FALSE if making clean */
+
+               /*
+                * Choose a victim.
+                */
                while (1) {     
                        m = NULL;
                        
@@ -1689,43 +1818,109 @@ consider_inactive:
                                assert(vm_page_throttled_count == 0);
                                assert(queue_empty(&vm_page_queue_throttled));
                        }
-
+                       
                        /*
-                        * The most eligible pages are ones we paged in speculatively,
-                        * but which have not yet been touched.
+                        * If we are still below the free target, try speculative
+                        * and clean queue pages.
                         */
-                       if ( !queue_empty(&sq->age_q) ) {
-                               m = (vm_page_t) queue_first(&sq->age_q);
+                       if (pageout_making_free) {
+                               /*
+                                * The most eligible pages are ones we paged in speculatively,
+                                * but which have not yet been touched.
+                                */
+                               if ( !queue_empty(&sq->age_q) ) {
+                                       m = (vm_page_t) queue_first(&sq->age_q);
 
-                               page_prev_state = PAGE_STATE_SPECULATIVE;
-                               break;
+                                       page_prev_state = PAGE_STATE_SPECULATIVE;
+                                       
+                                       break;
+                               }
+
+                               /*
+                                * Try a clean-queue inactive page, if we are still trying to fill the free list.
+                                */
+                               if ( !queue_empty(&vm_page_queue_cleaned) ) {
+                                       m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
+                    
+                                       page_prev_state = PAGE_STATE_CLEAN;
+                    
+                                       break;
+                               }
+
+                               if (grab_anonymous == FALSE || queue_empty(&vm_page_queue_anonymous)) {
+
+                                       if ( !queue_empty(&vm_page_queue_inactive) ) {
+                                               m = (vm_page_t) queue_first(&vm_page_queue_inactive);
+                               
+                                               page_prev_state = PAGE_STATE_INACTIVE;
+                                               if (vm_pageout_need_to_refill_clean_queue == TRUE)
+                                                       grab_anonymous = TRUE;
+                                               break;
+                                       }
+                               }
                        }
-                       /*
-                        * Time for a zero-filled inactive page?
-                        */
-                       if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
-                            queue_empty(&vm_page_queue_inactive)) {
-                               if ( !queue_empty(&vm_page_queue_zf) ) {
-                                       m = (vm_page_t) queue_first(&vm_page_queue_zf);
+                       if (vm_pageout_need_to_refill_clean_queue == TRUE) {
+                               if ( !queue_empty(&vm_page_queue_anonymous) ) {
+                                       m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
 
-                                       page_prev_state = PAGE_STATE_ZEROFILL;
-                                       zf_run_count++;
+                                       page_prev_state = PAGE_STATE_ANONYMOUS;
+                                       grab_anonymous = FALSE;
                                        break;
                                }
                        }
+
                        /*
-                        * It's either a normal inactive page or nothing.
+                        * if we've gotten here, we have no victim page.
+                        * if making clean, free the local freed list and return.
+                        * if making free, check to see if we've finished balancing the queues
+                        * yet, if we haven't just continue, else panic
                         */
-                        if ( !queue_empty(&vm_page_queue_inactive) ) {
-                                m = (vm_page_t) queue_first(&vm_page_queue_inactive);
+                       vm_page_unlock_queues();
                                
-                               page_prev_state = PAGE_STATE_INACTIVE;
-                                zf_run_count = 0;
-                               break;
-                        }
+                       if (object != NULL) {
+                               vm_object_unlock(object);
+                               object = NULL;
+                       }
+                       vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+                               
+                       if (local_freeq) {
+                               VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
+                                              vm_page_free_count, local_freed, delayed_unlock_limit, 5);
+                                       
+                               vm_page_free_list(local_freeq, TRUE);
+                                       
+                               VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
+                                              vm_page_free_count, local_freed, 0, 5);
+                                       
+                               local_freeq = NULL;
+                               local_freed = 0;
+                       }
+                       vm_page_lock_queues();
+                       delayed_unlock = 1;
+
+                       if (pageout_making_free == FALSE) {
+                               if (vm_pageout_need_to_refill_clean_queue == TRUE)
+                                       DTRACE_VM(novictimforclean);
 
-                        panic("vm_pageout: no victim");
+                               lck_mtx_lock(&vm_page_queue_free_lock);
+                               goto return_from_scan;
+
+                       }
+                       if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
+                               goto Restart;
+
+                       panic("vm_pageout: no victim");
+                       
+                       /* NOTREACHED */
                }
+               
+               /*
+                * we just found this page on one of our queues...
+                * it can't also be on the pageout queue, so safe
+                * to call VM_PAGE_QUEUES_REMOVE
+                */
+               assert(!m->pageout_queue);
+
                VM_PAGE_QUEUES_REMOVE(m);
 
                assert(!m->laundry);
@@ -1771,6 +1966,9 @@ consider_inactive:
 
                                vm_pageout_inactive_nolock++;
 
+                               if (page_prev_state == PAGE_STATE_CLEAN)
+                                       vm_pageout_cleaned_nolock++;
+
                                if (page_prev_state == PAGE_STATE_SPECULATIVE)
                                        page_prev_state = PAGE_STATE_INACTIVE_FIRST;
 
@@ -1788,14 +1986,17 @@ consider_inactive:
                                 */
                                m->object->scan_collisions++;
 
-                               if ( !queue_empty(&sq->age_q) )
-                                       m_want = (vm_page_t) queue_first(&sq->age_q);
-                               else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
-                                         queue_empty(&vm_page_queue_inactive)) {
-                                       if ( !queue_empty(&vm_page_queue_zf) )
-                                               m_want = (vm_page_t) queue_first(&vm_page_queue_zf);
-                               } else if ( !queue_empty(&vm_page_queue_inactive) ) {
-                                       m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
+                               if (pageout_making_free) {
+                                       if ( !queue_empty(&sq->age_q) )
+                                               m_want = (vm_page_t) queue_first(&sq->age_q);
+                                       else if (!queue_empty(&vm_page_queue_cleaned))
+                                               m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned);
+                                       else if (grab_anonymous == FALSE || queue_empty(&vm_page_queue_anonymous))
+                                               m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
+                               }
+                               if (m_want == NULL && vm_pageout_need_to_refill_clean_queue == TRUE) {
+                                       if ( !queue_empty(&vm_page_queue_anonymous) )
+                                               m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous);
                                }
                                /*
                                 * this is the next object we're going to be interested in
@@ -1850,6 +2051,10 @@ consider_inactive:
                         *
                         */
                        vm_pageout_inactive_busy++;
+
+                       if (page_prev_state == PAGE_STATE_CLEAN)
+                               vm_pageout_cleaned_busy++;
+                       
 requeue_page:
                        switch (page_prev_state) {
 
@@ -1857,12 +2062,8 @@ requeue_page:
                                vm_page_speculate(m, FALSE);
                                break;
 
-                       case PAGE_STATE_ZEROFILL:
-                               m->zero_fill = TRUE;
-                               /*
-                                * fall through to add in the
-                                * inactive state
-                                */
+                       case PAGE_STATE_ANONYMOUS:
+                       case PAGE_STATE_CLEAN:
                        case PAGE_STATE_INACTIVE:
                                VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
                                break;
@@ -1890,7 +2091,7 @@ requeue_page:
                                vm_pageout_inactive_notalive++;
                        else
                                vm_pageout_inactive_error++;
-reclaim_page:
+reclaim_page:                  
                        if (vm_pageout_deadlock_target) {
                                vm_pageout_scan_inactive_throttle_success++;
                                vm_pageout_deadlock_target--;
@@ -1903,7 +2104,10 @@ reclaim_page:
                        } else {
                                DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
                        }
-                       vm_page_free_prepare_queues(m);
+                       assert(!m->cleaning);
+                       assert(!m->laundry);
+
+                       m->busy = TRUE;
 
                        /*
                         * remove page from object here since we're already
@@ -1919,6 +2123,13 @@ reclaim_page:
                        m->pageq.next = (queue_entry_t)local_freeq;
                        local_freeq = m;
                        local_freed++;
+                       
+                       if (page_prev_state == PAGE_STATE_SPECULATIVE)
+                               vm_pageout_freed_from_speculative++;
+                       else if (page_prev_state == PAGE_STATE_CLEAN)
+                               vm_pageout_freed_from_cleaned++;
+                       else
+                               vm_pageout_freed_from_inactive_clean++;
 
                        inactive_burst_count = 0;
 
@@ -1935,12 +2146,11 @@ reclaim_page:
                 */
                if (object->copy == VM_OBJECT_NULL) {
                        if (object->purgable == VM_PURGABLE_EMPTY) {
-                               m->busy = TRUE;
                                if (m->pmapped == TRUE) {
                                        /* unmap the page */
                                        refmod_state = pmap_disconnect(m->phys_page);
                                        if (refmod_state & VM_MEM_MODIFIED) {
-                                               m->dirty = TRUE;
+                                               SET_PAGE_DIRTY(m, FALSE);
                                        }
                                }
                                if (m->dirty || m->precious) {
@@ -1955,11 +2165,15 @@ reclaim_page:
 
                                /* just stick it back on! */
                                reactivated_this_call++;
+
+                               if (page_prev_state == PAGE_STATE_CLEAN)
+                                       vm_pageout_cleaned_volatile_reactivated++;
+
                                goto reactivate_page;
                        }
                }
 
-       consider_inactive_page:
+consider_inactive_page:
                if (m->busy) {
                        /*
                         * CAUTION CAUTION:
@@ -1994,37 +2208,54 @@ reclaim_page:
                  
                        if (refmod_state & VM_MEM_REFERENCED)
                                m->reference = TRUE;
-                       if (refmod_state & VM_MEM_MODIFIED)
-                               m->dirty = TRUE;
+                       if (refmod_state & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(m, FALSE);
+                       }
                }
-
+               
                /*
+                *   if (m->cleaning)
                 *      If already cleaning this page in place and it hasn't
-                *      been recently referenced, convert from
-                *      "adjacent" to "target". We can leave the page mapped,
-                *      and upl_commit_range will determine whether
-                *      to free or reactivate.
+                *  been recently referenced, just pull off the queue.
+                *  We can leave the page mapped, and upl_commit_range
+                *  will put it on the clean queue.
                 *
                 *      note: if m->encrypted_cleaning == TRUE, then
                 *              m->cleaning == TRUE
                 *      and we'll handle it here
+                *
+                *   if (m->pageout && !m->cleaning)
+                *      an msync INVALIDATE is in progress...
+                *      this page has been marked for destruction
+                *      after it has been cleaned,
+                *      but not yet gathered into a UPL
+                *      where 'cleaning' will be set...
+                *      just leave it off the paging queues
+                *
+                *   if (m->pageout && m->clenaing)
+                *      an msync INVALIDATE is in progress
+                *      and the UPL has already gathered this page...
+                *      just leave it off the paging queues
                 */
+               
+               /*
+                * page with m->pageout and still on the queues means that an
+                * MS_INVALIDATE in progress on this page... leave it alone
+                */
+               if (m->pageout) {
+                       inactive_burst_count = 0;
+                       goto done_with_inactivepage;
+               }
+               
+               /* if cleaning, reactivate if referenced.  otherwise, just pull off queue */
                if (m->cleaning) {
-                       
                        if (m->reference == TRUE) {
                                reactivated_this_call++;
                                goto reactivate_page;
+                       } else {
+                               inactive_burst_count = 0;
+                               goto done_with_inactivepage;
                        }
-                       m->busy = TRUE;
-                       m->pageout = TRUE;
-                       m->dump_cleaning = TRUE;
-                       vm_page_wire(m);
-
-                       CLUSTER_STAT(vm_pageout_cluster_conversions++);
-
-                       inactive_burst_count = 0;
-
-                       goto done_with_inactivepage;
                }
 
                if (m->reference || m->dirty) {
@@ -2049,6 +2280,10 @@ reclaim_page:
                                vm_pageout_inactive_force_reclaim++;
                        } else {
                                uint32_t isinuse;
+
+                               if (page_prev_state == PAGE_STATE_CLEAN)
+                                       vm_pageout_cleaned_reference_reactivated++;
+                               
 reactivate_page:
                                if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
                                     vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
@@ -2065,6 +2300,10 @@ reactivate_page:
                                        vm_page_activate(m);
                                        VM_STAT_INCR(reactivations);
                                }
+                               
+                               if (page_prev_state == PAGE_STATE_CLEAN)
+                                       vm_pageout_cleaned_reactivated++;
+
                                vm_pageout_inactive_used++;
                                inactive_burst_count = 0;
 
@@ -2077,8 +2316,9 @@ reactivate_page:
                         */
                        if ((refmod_state == -1) && !m->dirty && m->pmapped) {
                                refmod_state = pmap_get_refmod(m->phys_page);
-                               if (refmod_state & VM_MEM_MODIFIED)
-                                       m->dirty = TRUE;
+                               if (refmod_state & VM_MEM_MODIFIED) {
+                                       SET_PAGE_DIRTY(m, FALSE);
+                               }
                        }
                        forced_reclaim = TRUE;
                } else {
@@ -2156,14 +2396,18 @@ throttle_inactive:
                                 * b) The thread doing the writing is waiting for pages while holding the truncate lock
                                 * c) Most of the pages in the inactive queue belong to this file.
                                 */
-                               
-                               vm_page_activate(m);
+                               queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
+                               m->active = TRUE;
+                               vm_page_active_count++;
+
+                               vm_pageout_adjust_io_throttles(iq, eq, FALSE);
+
                                vm_pageout_inactive_external_forced_reactivate_count++;
                                vm_pageout_inactive_external_forced_reactivate_limit--;
 
                                if (vm_pageout_inactive_external_forced_reactivate_limit <= 0){
                                        vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
-#if CONFIG_EMBEDDED
+#if CONFIG_JETSAM
                                        /*
                                         * Possible deadlock scenario so request jetsam action
                                         */
@@ -2172,10 +2416,11 @@ throttle_inactive:
                                        object = VM_OBJECT_NULL;
                                        vm_page_unlock_queues();
 
-                                       if (jetsam_kill_top_proc(TRUE, kJetsamFlagsKilledVM) < 0){
+                                       if (memorystatus_kill_top_proc_from_VM() < 0){
                                                panic("vm_pageout_scan: Jetsam request failed\n");      
                                        }
 
+                                       vm_pageout_inactive_external_forced_jetsam_count++;
                                        vm_page_lock_queues();  
                                        delayed_unlock = 1;
 #endif
@@ -2191,12 +2436,7 @@ throttle_inactive:
                 * we've got a page that we can steal...
                 * eliminate all mappings and make sure
                 * we have the up-to-date modified state
-                * first take the page BUSY, so that no new
-                * mappings can be made
-                */
-               m->busy = TRUE;
-               
-               /*
+                *
                 * if we need to do a pmap_disconnect then we
                 * need to re-evaluate m->dirty since the pmap_disconnect
                 * provides the true state atomically... the 
@@ -2204,9 +2444,7 @@ throttle_inactive:
                 * and may have been dirtied at the last microsecond
                 *
                 * we also check for the page being referenced 'late'
-                * if it was, we first need to do a WAKEUP_DONE on it
-                * since we already set m->busy = TRUE, before 
-                * going off to reactivate it
+                * and reactivate it for that case
                 *
                 * Note that if 'pmapped' is FALSE then the page is not
                 * and has not been in any map, so there is no point calling
@@ -2216,8 +2454,9 @@ throttle_inactive:
                if (m->pmapped == TRUE) {
                        refmod_state = pmap_disconnect(m->phys_page);
 
-                       if (refmod_state & VM_MEM_MODIFIED)
-                               m->dirty = TRUE;
+                       if (refmod_state & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(m, FALSE);
+                       }
                        if (refmod_state & VM_MEM_REFERENCED) {
                                
                                /* If m->reference is already set, this page must have
@@ -2230,7 +2469,8 @@ throttle_inactive:
                                            ++reactivated_this_call >= reactivate_limit)
                                                vm_pageout_reactivation_limit_exceeded++;
                                        else {
-                                               PAGE_WAKEUP_DONE(m);
+                                               if (page_prev_state == PAGE_STATE_CLEAN)
+                                                       vm_pageout_cleaned_reference_reactivated++;
                                                goto reactivate_page;
                                        }
                                }
@@ -2250,10 +2490,71 @@ throttle_inactive:
                        if (page_prev_state == PAGE_STATE_SPECULATIVE)
                                vm_pageout_speculative_clean++;
                        else {
-                               if (page_prev_state == PAGE_STATE_ZEROFILL)
-                                       vm_pageout_inactive_zf++;
+                               if (page_prev_state == PAGE_STATE_ANONYMOUS)
+                                       vm_pageout_inactive_anonymous++;
+                               else if (page_prev_state == PAGE_STATE_CLEAN)
+                                       vm_pageout_cleaned_reclaimed++;
+
+                               if (m->was_dirty) {
+                                       /* page on clean queue used to be dirty; we should increment the vm_stat pageout count here */
+                                       VM_STAT_INCR(pageouts);
+                                       DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
+                               }
                                vm_pageout_inactive_clean++;
                        }
+                       /* FYI: (!pageout_making_free) == (!m->clean_queue && !m->speculative) */
+                       if (((vm_page_free_count + local_freed) >= vm_page_free_target) && !pageout_making_free) {
+                               
+                               assert(!m->clean_queue);
+                               assert(!m->speculative);
+                               
+                               /*
+                                * we have met our free page target and this page wasn't just pulled
+                                * from the clean or speculative queues, so put it on the clean queue
+                                */
+                               if (m->reference == TRUE) {
+                                       /*
+                                        * must have come through the forced reclaim path.
+                                        * we need to clear out the reference state in this case
+                                        * so that we don't just reactivate the page when we
+                                        * find it in the clean queue based on an old reference.
+                                        * if it gets re-referenced while on the queue, then
+                                        * the reactivation is justified
+                                        */
+                                       m->reference = FALSE;
+                                       pmap_clear_reference(m->phys_page);
+                               }
+                               
+                               vm_pageout_enqueued_cleaned_from_inactive_clean++;
+                               vm_page_enqueue_cleaned(m);
+                               
+                               inactive_burst_count = 0; /* we found a usable page on the inactive queue, hooray */
+                               
+                               goto done_with_inactivepage;
+
+                       }
+                       /*
+                        * OK, at this point we have found a page we are going to free.
+                        */
+
+#ifndef CONFIG_EMBEDDED
+
+#define VM_PRESSURE_INTERVAL_NS                250000000       /* nanoseconds; == .25 seconds */
+                       if (vm_pageout_need_to_refill_clean_queue == TRUE || page_prev_state == PAGE_STATE_CLEAN) {
+                               static uint64_t vm_pressure_last_time_ns = 0;
+                               uint64_t cur_time_ns = 0;
+                               absolutetime_to_nanoseconds(mach_absolute_time(), &cur_time_ns);
+                               if (cur_time_ns >= vm_pressure_last_time_ns + VM_PRESSURE_INTERVAL_NS) {
+                                       vm_pressure_last_time_ns = cur_time_ns;
+                                       thread_wakeup(&vm_pressure_thread);
+#if CONFIG_MEMORYSTATUS
+                                       /* Wake up idle-exit thread */
+                                       thread_wakeup((event_t)&memorystatus_wakeup);
+#endif
+                               }
+                       }
+#endif /* !CONFIG_EMBEDDED */
+
                        goto reclaim_page;
                }
 
@@ -2270,22 +2571,19 @@ throttle_inactive:
                        inactive_throttled = TRUE;
                }
 
-               if (inactive_throttled == TRUE) {
-                       /*
-                        * we set busy before issuing the pmap_disconnect,
-                        * so clear it and wakeup anyone that happened upon
-                        * it in that state
-                        */
-                       PAGE_WAKEUP_DONE(m);
+               if (inactive_throttled == TRUE)
                        goto throttle_inactive;
-               }
-
-               vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
-
-               vm_pageout_cluster(m);
+               
+               /*
+                * do NOT set the pageout bit!
+                * sure, we might need free pages, but this page is going to take time to become free 
+                * anyway, so we may as well put it on the clean queue first and take it from there later
+                * if necessary.  that way, we'll ensure we don't free up too much. -mj
+                */
+               vm_pageout_cluster(m, FALSE);
 
-               if (page_prev_state == PAGE_STATE_ZEROFILL)
-                       vm_pageout_inactive_zf++;
+               if (page_prev_state == PAGE_STATE_ANONYMOUS)
+                       vm_pageout_inactive_anonymous++;
                if (object->internal)
                        vm_pageout_inactive_dirty_internal++;
                else
@@ -2306,8 +2604,8 @@ done_with_inactivepage:
 
                                VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
                                               vm_page_free_count, local_freed, delayed_unlock_limit, 4);
-
-                               vm_page_free_list(local_freeq, TRUE);
+                                       
+                               vm_page_free_list(local_freeq, TRUE);
                                
                                VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
                                               vm_page_free_count, local_freed, 0, 4);
@@ -2320,6 +2618,8 @@ done_with_inactivepage:
 
                        delayed_unlock = 1;
                }
+               vm_pageout_considered_page++;
+               
                /*
                 * back to top of pageout scan loop
                 */
@@ -2358,7 +2658,7 @@ vm_page_free_reserve(
                vm_page_free_target = vm_page_free_min + 5;
 
        vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3);
-       vm_page_creation_throttle = vm_page_free_target / 2;
+       vm_page_creation_throttle = vm_page_free_target * 3;
 }
 
 /*
@@ -2370,12 +2670,18 @@ vm_pageout_continue(void)
 {
        DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
        vm_pageout_scan_event_counter++;
+
        vm_pageout_scan();
-       /* we hold vm_page_queue_free_lock now */
+       /*
+        * we hold both the vm_page_queue_free_lock
+        * and the vm_page_queues_lock at this point
+        */
        assert(vm_page_free_wanted == 0);
        assert(vm_page_free_wanted_privileged == 0);
        assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
+
        lck_mtx_unlock(&vm_page_queue_free_lock);
+       vm_page_unlock_queues();
 
        counter(c_vm_pageout_block++);
        thread_block((thread_continue_t)vm_pageout_continue);
@@ -2397,6 +2703,7 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
 {
        vm_page_t       m = NULL;
        vm_object_t     object;
+       vm_object_offset_t offset;
        memory_object_t pager;
        thread_t        self = current_thread();
 
@@ -2418,6 +2725,19 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                   m->pageout_queue = FALSE;
                   m->pageq.next = NULL;
                   m->pageq.prev = NULL;
+
+                  /*
+                   * grab a snapshot of the object and offset this
+                   * page is tabled in so that we can relookup this
+                   * page after we've taken the object lock - these
+                   * fields are stable while we hold the page queues lock
+                   * but as soon as we drop it, there is nothing to keep
+                   * this page in this object... we hold an activity_in_progress
+                   * on this object which will keep it from terminating
+                   */
+                  object = m->object;
+                  offset = m->offset;
+
                   vm_page_unlock_queues();
 
 #ifdef FAKE_DEADLOCK
@@ -2439,10 +2759,27 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                           }
                   }
 #endif
-                  object = m->object;
-
                   vm_object_lock(object);
 
+                  m = vm_page_lookup(object, offset);
+
+                  if (m == NULL ||
+                      m->busy || m->cleaning || m->pageout_queue || !m->laundry) {
+                          /*
+                           * it's either the same page that someone else has
+                           * started cleaning (or it's finished cleaning or
+                           * been put back on the pageout queue), or
+                           * the page has been freed or we have found a
+                           * new page at this offset... in all of these cases
+                           * we merely need to release the activity_in_progress
+                           * we took when we put the page on the pageout queue
+                           */
+                          vm_object_activity_end(object);
+                          vm_object_unlock(object);
+
+                          vm_page_lockspin_queues();
+                          continue;
+                  }
                   if (!object->pager_initialized) {
 
                           /*
@@ -2464,9 +2801,11 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                                    *   Should only happen if there is no
                                    *   default pager.
                                    */
+                                  m->pageout = FALSE;
+
                                   vm_page_lockspin_queues();
 
-                                  vm_pageout_queue_steal(m, TRUE);
+                                  vm_pageout_throttle_up(m);
                                   vm_page_activate(m);
                                   vm_pageout_dirty_no_pager++;
 
@@ -2475,9 +2814,7 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                                   /*
                                    *   And we are done with it.
                                    */
-                                  PAGE_WAKEUP_DONE(m);
-
-                                  vm_object_paging_end(object);
+                                  vm_object_activity_end(object);
                                   vm_object_unlock(object);
 
                                   vm_page_lockspin_queues();
@@ -2485,6 +2822,7 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                           }
                   }
                   pager = object->pager;
+
                   if (pager == MEMORY_OBJECT_NULL) {
                           /*
                            * This pager has been destroyed by either
@@ -2501,7 +2839,7 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                           } else {
                                   vm_page_lockspin_queues();
 
-                                  vm_pageout_queue_steal(m, TRUE);
+                                  vm_pageout_throttle_up(m);
                                   vm_page_activate(m);
                                   
                                   vm_page_unlock_queues();
@@ -2509,25 +2847,32 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                                   /*
                                    *   And we are done with it.
                                    */
-                                  PAGE_WAKEUP_DONE(m);
                           }
-                          vm_object_paging_end(object);
+                          vm_object_activity_end(object);
                           vm_object_unlock(object);
 
                           vm_page_lockspin_queues();
                           continue;
                   }
+#if 0
+                  /*
+                   * we don't hold the page queue lock
+                   * so this check isn't safe to make
+                   */
                   VM_PAGE_CHECK(m);
-                  vm_object_unlock(object);
+#endif
                   /*
-                   * we expect the paging_in_progress reference to have
-                   * already been taken on the object before it was added
-                   * to the appropriate pageout I/O queue... this will
-                   * keep the object from being terminated and/or the 
-                   * paging_offset from changing until the I/O has 
-                   * completed... therefore no need to lock the object to
-                   * pull the paging_offset from it.
-                   *
+                   * give back the activity_in_progress reference we
+                   * took when we queued up this page and replace it
+                   * it with a paging_in_progress reference that will
+                    * also hold the paging offset from changing and
+                    * prevent the object from terminating
+                   */
+                  vm_object_activity_end(object);
+                  vm_object_paging_begin(object);
+                  vm_object_unlock(object);
+
+                   /*
                    * Send the data to the pager.
                    * any pageout clustering happens there
                    */
@@ -2544,20 +2889,14 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
                   vm_object_paging_end(object);
                   vm_object_unlock(object);
 
-                  vm_page_lockspin_queues();
-       }
-       assert_wait((event_t) q, THREAD_UNINT);
+                  vm_pageout_io_throttle();
 
-       if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
-               q->pgo_throttled = FALSE;
-               thread_wakeup((event_t) &q->pgo_laundry);
-       }
-       if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
-               q->pgo_draining = FALSE;
-               thread_wakeup((event_t) (&q->pgo_laundry+1));
+                  vm_page_lockspin_queues();
        }
        q->pgo_busy = FALSE;
        q->pgo_idle = TRUE;
+
+       assert_wait((event_t) q, THREAD_UNINT);
        vm_page_unlock_queues();
 
        thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
@@ -2565,6 +2904,47 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
 }
 
 
+
+static void
+vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority)
+{
+       uint32_t        policy;
+       boolean_t       set_iq = FALSE;
+       boolean_t       set_eq = FALSE;
+       
+       if (hibernate_cleaning_in_progress == TRUE)
+               req_lowpriority = FALSE;
+
+       if (iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority)
+               set_iq = TRUE;
+
+       if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority)
+               set_eq = TRUE;
+       
+       if (set_iq == TRUE || set_eq == TRUE) {
+
+               vm_page_unlock_queues();
+
+               if (req_lowpriority == TRUE) {
+                       policy = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE;
+                       DTRACE_VM(laundrythrottle);
+               } else {
+                       policy = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL;
+                       DTRACE_VM(laundryunthrottle);
+               }
+               if (set_iq == TRUE) {
+                       proc_apply_thread_diskacc(kernel_task, iq->pgo_tid, policy);
+                       iq->pgo_lowpriority = req_lowpriority;
+               }
+               if (set_eq == TRUE) {
+                       proc_apply_thread_diskacc(kernel_task, eq->pgo_tid, policy);
+                       eq->pgo_lowpriority = req_lowpriority;
+               }
+               vm_page_lock_queues();
+       }
+}
+
+
 static void
 vm_pageout_iothread_external(void)
 {
@@ -2572,11 +2952,22 @@ vm_pageout_iothread_external(void)
 
        self->options |= TH_OPT_VMPRIV;
 
+       DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);        
+       proc_apply_thread_diskacc(kernel_task, self->thread_id, TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+
+       vm_page_lock_queues();
+
+       vm_pageout_queue_external.pgo_tid = self->thread_id;
+       vm_pageout_queue_external.pgo_lowpriority = TRUE;
+       vm_pageout_queue_external.pgo_inited = TRUE;
+
+       vm_page_unlock_queues();
+
        vm_pageout_iothread_continue(&vm_pageout_queue_external);
+
        /*NOTREACHED*/
 }
 
-
 static void
 vm_pageout_iothread_internal(void)
 {
@@ -2584,7 +2975,19 @@ vm_pageout_iothread_internal(void)
 
        self->options |= TH_OPT_VMPRIV;
 
+       DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
+       proc_apply_thread_diskacc(kernel_task, self->thread_id, TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+
+       vm_page_lock_queues();
+
+       vm_pageout_queue_internal.pgo_tid = self->thread_id;
+       vm_pageout_queue_internal.pgo_lowpriority = TRUE;
+       vm_pageout_queue_internal.pgo_inited = TRUE;
+
+       vm_page_unlock_queues();
+
        vm_pageout_iothread_continue(&vm_pageout_queue_internal);
+
        /*NOTREACHED*/
 }
 
@@ -2598,28 +3001,67 @@ vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
        }
 }
 
+static void
+vm_pressure_thread(void) {
+       static boolean_t set_up_thread = FALSE;
+
+       if (set_up_thread) {
+#if VM_PRESSURE_EVENTS
+               consider_vm_pressure_events();
+#endif /* VM_PRESSURE_EVENTS */
+       }
+
+       set_up_thread = TRUE;
+       assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
+       thread_block((thread_continue_t)vm_pressure_thread);
+}
+
+uint32_t vm_pageout_considered_page_last = 0;
+
+/*
+ * called once per-second via "compute_averages"
+ */
+void
+compute_pageout_gc_throttle()
+{
+       if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
+
+               vm_pageout_considered_page_last = vm_pageout_considered_page;
+
+               thread_wakeup((event_t) &vm_pageout_garbage_collect);
+       }
+}
+
+
 static void
 vm_pageout_garbage_collect(int collect)
 {
+
        if (collect) {
                boolean_t buf_large_zfree = FALSE;
+               boolean_t first_try = TRUE;
+
                stack_collect();
 
-               /*
-                * consider_zone_gc should be last, because the other operations
-                * might return memory to zones.
-                */
                consider_machine_collect();
-               if (consider_buffer_cache_collect != NULL) {
-                       buf_large_zfree = (*consider_buffer_cache_collect)(0);
-               }
-               consider_zone_gc(buf_large_zfree);
+
+               do {
+                       if (consider_buffer_cache_collect != NULL) {
+                               buf_large_zfree = (*consider_buffer_cache_collect)(0);
+                       }
+                       if (first_try == TRUE || buf_large_zfree == TRUE) {
+                               /*
+                                * consider_zone_gc should be last, because the other operations
+                                * might return memory to zones.
+                                */
+                               consider_zone_gc(buf_large_zfree);
+                       }
+                       first_try = FALSE;
+
+               } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
 
                consider_machine_adjust();
-               consider_pressure_events();
-               
        }
-
        assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
 
        thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
@@ -2708,6 +3150,10 @@ vm_pageout(void)
        vm_pageout_queue_external.pgo_busy = FALSE;
        vm_pageout_queue_external.pgo_throttled = FALSE;
        vm_pageout_queue_external.pgo_draining = FALSE;
+       vm_pageout_queue_external.pgo_lowpriority = FALSE;
+       vm_pageout_queue_external.pgo_tid = -1;
+       vm_pageout_queue_external.pgo_inited = FALSE;
+
 
        queue_init(&vm_pageout_queue_internal.pgo_pending);
        vm_pageout_queue_internal.pgo_maxlaundry = 0;
@@ -2716,7 +3162,9 @@ vm_pageout(void)
        vm_pageout_queue_internal.pgo_busy = FALSE;
        vm_pageout_queue_internal.pgo_throttled = FALSE;
        vm_pageout_queue_internal.pgo_draining = FALSE;
-
+       vm_pageout_queue_internal.pgo_lowpriority = FALSE;
+       vm_pageout_queue_internal.pgo_tid = -1;
+       vm_pageout_queue_internal.pgo_inited = FALSE;
 
        /* internal pageout thread started when default pager registered first time */
        /* external pageout and garbage collection threads started here */
@@ -2730,13 +3178,22 @@ vm_pageout(void)
        thread_deallocate(vm_pageout_external_iothread);
 
        result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
-                                             MINPRI_KERNEL
+                                             BASEPRI_DEFAULT
                                              &thread);
        if (result != KERN_SUCCESS)
                panic("vm_pageout_garbage_collect: create failed");
 
        thread_deallocate(thread);
 
+       result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
+                                               BASEPRI_DEFAULT,
+                                               &thread);
+
+       if (result != KERN_SUCCESS)
+               panic("vm_pressure_thread: create failed");
+
+       thread_deallocate(thread);
+
        vm_object_reaper_init();
 
 
@@ -2824,6 +3281,9 @@ upl_create(int type, int flags, upl_size_t size)
        upl->upl_commit_index = 0;
        bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
 
+       upl->uplq.next = 0;
+       upl->uplq.prev = 0;
+
        (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
 #endif /* UPL_DEBUG */
 
@@ -2841,7 +3301,7 @@ upl_destroy(upl_t upl)
        }
 
 #if UPL_DEBUG
-       {
+       if ( !(upl->flags & UPL_VECTOR)) {
                vm_object_t     object;
 
                if (upl->flags & UPL_SHADOWED) {
@@ -2851,6 +3311,8 @@ upl_destroy(upl_t upl)
                }
                vm_object_lock(object);
                queue_remove(&object->uplq, upl, upl_t, uplq);
+               vm_object_activity_end(object);
+               vm_object_collapse(object, 0, TRUE);
                vm_object_unlock(object);
        }
 #endif /* UPL_DEBUG */
@@ -2873,6 +3335,7 @@ upl_destroy(upl_t upl)
        }
        upl_lock_destroy(upl);
        upl->vector_upl = (vector_upl_t) 0xfeedbeef;
+
        if (upl->flags & UPL_INTERNAL) {
                kfree(upl,
                      sizeof(struct upl) + 
@@ -3080,6 +3543,7 @@ vm_object_upl_request(
        upl->offset = offset + object->paging_offset;
 
 #if UPL_DEBUG
+       vm_object_activity_begin(object);
        queue_enter(&object->uplq, upl, upl_t, uplq);
 #endif /* UPL_DEBUG */
 
@@ -3140,8 +3604,9 @@ vm_object_upl_request(
                                dst_page->fictitious ||
                                dst_page->absent ||
                                dst_page->error ||
-                              (VM_PAGE_WIRED(dst_page) && !dst_page->pageout && !dst_page->list_req_pending)) {
-
+                               dst_page->cleaning ||
+                               (VM_PAGE_WIRED(dst_page))) {
+                               
                                if (user_page_list)
                                        user_page_list[entry].phys_addr = 0;
 
@@ -3172,7 +3637,7 @@ vm_object_upl_request(
                                /*
                                 * we're only asking for DIRTY pages to be returned
                                 */
-                               if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
+                               if (dst_page->pageout || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
                                        /*
                                         * if we were the page stolen by vm_pageout_scan to be
                                         * cleaned (as opposed to a buddy being clustered in 
@@ -3188,13 +3653,11 @@ vm_object_upl_request(
                                 * this is a request for a PAGEOUT cluster and this page
                                 * is merely along for the ride as a 'buddy'... not only
                                 * does it have to be dirty to be returned, but it also
-                                * can't have been referenced recently... note that we've
-                                * already filtered above based on whether this page is
-                                * currently on the inactive queue or it meets the page
-                                * ticket (generation count) check
+                                * can't have been referenced recently...
                                 */
-                               if ( (cntrl_flags & UPL_CLEAN_IN_PLACE || !(refmod_state & VM_MEM_REFERENCED) || dst_page->throttled) && 
-                                    ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
+                               if ( (hibernate_cleaning_in_progress == TRUE ||
+                                     (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) || dst_page->throttled)) && 
+                                     ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
                                        goto check_busy;
                                }
 dont_return:
@@ -3202,15 +3665,29 @@ dont_return:
                                 * if we reach here, we're not to return
                                 * the page... go on to the next one
                                 */
+                               if (dst_page->laundry == TRUE) {
+                                       /*
+                                        * if we get here, the page is not 'cleaning' (filtered out above).
+                                        * since it has been referenced, remove it from the laundry
+                                        * so we don't pay the cost of an I/O to clean a page
+                                        * we're just going to take back
+                                        */
+                                       vm_page_lockspin_queues();
+
+                                       vm_pageout_steal_laundry(dst_page, TRUE);
+                                       vm_page_activate(dst_page);
+                                       
+                                       vm_page_unlock_queues();
+                               }
                                if (user_page_list)
                                        user_page_list[entry].phys_addr = 0;
 
                                goto try_next_page;
                        }
 check_busy:                    
-                       if (dst_page->busy && (!(dst_page->list_req_pending && (dst_page->pageout || dst_page->cleaning)))) {
-                               if (cntrl_flags & UPL_NOBLOCK) {
-                                       if (user_page_list)
+                       if (dst_page->busy) {
+                               if (cntrl_flags & UPL_NOBLOCK) {        
+                               if (user_page_list)
                                                user_page_list[entry].phys_addr = 0;
 
                                        goto try_next_page;
@@ -3221,16 +3698,7 @@ check_busy:
                                 */
                                PAGE_SLEEP(object, dst_page, THREAD_UNINT);
 
-                               continue;
-                       }
-                       /*
-                        * Someone else already cleaning the page?
-                        */
-                       if ((dst_page->cleaning || dst_page->absent || VM_PAGE_WIRED(dst_page)) && !dst_page->list_req_pending) {
-                               if (user_page_list)
-                                       user_page_list[entry].phys_addr = 0;
-
-                               goto try_next_page;
+                               continue;
                        }
                        /*
                         * ENCRYPTED SWAP:
@@ -3281,23 +3749,15 @@ check_busy:
                         * were not counted in the initial
                         * vm_pageout_scan work
                         */
-                       if (dst_page->list_req_pending)
+                       if (dst_page->pageout)
                                encountered_lrp = TRUE;
-                       if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && !dst_page->list_req_pending) {
+                       if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious))) {
                                if (encountered_lrp)
                                        CLUSTER_STAT(pages_at_higher_offsets++;)
                                else
                                        CLUSTER_STAT(pages_at_lower_offsets++;)
                        }
 #endif
-                       /*
-                        * Turn off busy indication on pending
-                        * pageout.  Note: we can only get here
-                        * in the request pending case.
-                        */
-                       dst_page->list_req_pending = FALSE;
-                       dst_page->busy = FALSE;
-
                        hw_dirty = refmod_state & VM_MEM_MODIFIED;
                        dirty = hw_dirty ? TRUE : dst_page->dirty;
 
@@ -3340,14 +3800,15 @@ check_busy:
                         */
                        vm_external_state_set(object->existence_map, dst_page->offset);
 #endif  /*MACH_PAGEMAP*/
-                       dst_page->dirty = dirty;
+                       if (dirty) {
+                               SET_PAGE_DIRTY(dst_page, FALSE);
+                       } else {
+                               dst_page->dirty = FALSE;
+                       }
 
                        if (!dirty)
                                dst_page->precious = TRUE;
 
-                       if (dst_page->pageout)
-                               dst_page->busy = TRUE;
-
                        if ( (cntrl_flags & UPL_ENCRYPT) ) {
                                /*
                                 * ENCRYPTED SWAP:
@@ -3367,16 +3828,8 @@ check_busy:
                                dst_page->encrypted_cleaning = TRUE;
                        }
                        if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
-                               /*
-                                * deny access to the target page
-                                * while it is being worked on
-                                */
-                               if ((!dst_page->pageout) && ( !VM_PAGE_WIRED(dst_page))) {
-                                       dst_page->busy = TRUE;
+                               if ( !VM_PAGE_WIRED(dst_page))
                                        dst_page->pageout = TRUE;
-
-                                       dwp->dw_mask |= DW_vm_page_wire;
-                               }
                        }
                } else {
                        if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
@@ -3427,70 +3880,33 @@ check_busy:
                        if (dst_page != VM_PAGE_NULL) {
 
                                if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
+                                       /*
+                                        * skip over pages already present in the cache
+                                        */
+                                       if (user_page_list)
+                                               user_page_list[entry].phys_addr = 0;
 
-                                       if ( !(dst_page->absent && dst_page->list_req_pending) ) {
-                                               /*
-                                                * skip over pages already present in the cache
-                                                */
-                                               if (user_page_list)
-                                                       user_page_list[entry].phys_addr = 0;
-
-                                               goto try_next_page;
-                                       }
+                                       goto try_next_page;
+                               }
+                               if (dst_page->fictitious) {
+                                       panic("need corner case for fictitious page");
                                }
-                               if ( !(dst_page->list_req_pending) ) {
-
-                                       if (dst_page->cleaning) {
-                                               /*
-                                                * someone else is writing to the page... wait...
-                                                */
-                                               PAGE_SLEEP(object, dst_page, THREAD_UNINT);
-
-                                               continue;
-                                       }
-                               } else {
-                                       if (dst_page->fictitious &&
-                                           dst_page->phys_page == vm_page_fictitious_addr) {
-                                               assert( !dst_page->speculative);
-                                               /*
-                                                * dump the fictitious page
-                                                */
-                                               dst_page->list_req_pending = FALSE;
-
-                                               VM_PAGE_FREE(dst_page);
 
-                                               dst_page = NULL;
+                               if (dst_page->busy || dst_page->cleaning) {
+                                       /*
+                                        * someone else is playing with the
+                                        * page.  We will have to wait.
+                                        */
+                                       PAGE_SLEEP(object, dst_page, THREAD_UNINT);
 
-                                       } else if (dst_page->absent) {
-                                               /*
-                                                * the default_pager case
-                                                */
-                                               dst_page->list_req_pending = FALSE;
-                                               PAGE_WAKEUP_DONE(dst_page);
+                                       continue;
+                               }
+                               if (dst_page->laundry) {
+                                       dst_page->pageout = FALSE;
 
-                                       } else if (dst_page->pageout || dst_page->cleaning) {
-                                               /*
-                                                * page was earmarked by vm_pageout_scan
-                                                * to be cleaned and stolen... we're going
-                                                * to take it back since we are not attempting
-                                                * to read that page and we don't want to stall
-                                                * waiting for it to be cleaned for 2 reasons...
-                                                * 1 - no use paging it out and back in
-                                                * 2 - if we stall, we may casue a deadlock in 
-                                                *     the FS trying to acquire the its locks
-                                                *     on the VNOP_PAGEOUT path presuming that
-                                                *     those locks are already held on the read
-                                                *     path before trying to create this UPL
-                                                *
-                                                * so undo all of the state that vm_pageout_scan
-                                                * hung on this page
-                                                */
-                                               vm_pageout_queue_steal(dst_page, FALSE);
-                                               PAGE_WAKEUP_DONE(dst_page);
-                                       }
+                                       vm_pageout_steal_laundry(dst_page, FALSE);
                                }
-                       }
-                       if (dst_page == VM_PAGE_NULL) {
+                       } else {
                                if (object->private) {
                                        /* 
                                         * This is a nasty wrinkle for users 
@@ -3580,18 +3996,6 @@ check_busy:
                                        dst_page->clustered = TRUE;
                                }
                        }
-                       if (dst_page->fictitious) {
-                               panic("need corner case for fictitious page");
-                       }
-                       if (dst_page->busy) {
-                               /*
-                                * someone else is playing with the
-                                * page.  We will have to wait.
-                                */
-                               PAGE_SLEEP(object, dst_page, THREAD_UNINT);
-
-                               continue;
-                       }
                        /*
                         * ENCRYPTED SWAP:
                         */
@@ -3698,7 +4102,7 @@ check_busy:
                        }
                        if (cntrl_flags & UPL_PRECIOUS) {
                                if (dst_page->object->internal) {
-                                       dst_page->dirty = TRUE;
+                                       SET_PAGE_DIRTY(dst_page, FALSE);
                                        dst_page->precious = FALSE;
                                } else {
                                        dst_page->precious = TRUE;
@@ -3719,6 +4123,7 @@ check_busy:
                        user_page_list[entry].dirty     = dst_page->dirty;
                        user_page_list[entry].precious  = dst_page->precious;
                        user_page_list[entry].device    = FALSE;
+                       user_page_list[entry].needed    = FALSE;
                        if (dst_page->clustered == TRUE)
                                user_page_list[entry].speculative = dst_page->speculative;
                        else
@@ -3979,7 +4384,6 @@ REDISCOVER_ENTRY:
                        if ((*upl_size/PAGE_SIZE) > MAX_UPL_SIZE)
                                        *upl_size = MAX_UPL_SIZE * PAGE_SIZE;
                }
-
                /*
                 *      Create an object if necessary.
                 */
@@ -4023,7 +4427,7 @@ REDISCOVER_ENTRY:
                                vm_object_pmap_protect(local_object,
                                                       entry->offset,
                                                       entry->vme_end - entry->vme_start,
-                                                      ((entry->is_shared || map->mapped)
+                                                      ((entry->is_shared || map->mapped_in_other_pmaps)
                                                        ? PMAP_NULL
                                                        : map->pmap),
                                                       entry->vme_start,
@@ -4371,7 +4775,7 @@ process_upl_to_enter:
                        /* m->wpmapped = TRUE; */
                        assert(map==kernel_map);
        
-                       PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, 0, TRUE);
+                       PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, VM_PROT_NONE, 0, TRUE);
                }
                offset += PAGE_SIZE_64;
        }
@@ -4685,7 +5089,7 @@ process_upl_to_commit:
                                page_list[entry].phys_addr = 0;
 
                        if (flags & UPL_COMMIT_SET_DIRTY) {
-                               m->dirty = TRUE;
+                               SET_PAGE_DIRTY(m, FALSE);
                        } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                                m->dirty = FALSE;
 
@@ -4733,6 +5137,9 @@ process_upl_to_commit:
 
                        goto commit_next_page;
                }
+               if (page_list)
+                       page_list[entry].phys_addr = 0;
+
                /*
                 * make sure to clear the hardware
                 * modify or reference bits before
@@ -4743,87 +5150,79 @@ process_upl_to_commit:
                if (flags & UPL_COMMIT_CLEAR_DIRTY) {
                        m->dirty = FALSE;
 
-                       if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
-                           m->cs_validated && !m->cs_tainted) {
-                               /*
-                                * CODE SIGNING:
-                                * This page is no longer dirty
-                                * but could have been modified,
-                                * so it will need to be
-                                * re-validated.
-                                */
-                               m->cs_validated = FALSE;
-#if DEVELOPMENT || DEBUG
-                               vm_cs_validated_resets++;
-#endif
-                               pmap_disconnect(m->phys_page);
-                       }
                        clear_refmod |= VM_MEM_MODIFIED;
                }
-               if (page_list) {
-                       upl_page_info_t *p;
-
-                       p = &(page_list[entry]);
-
-                       if (p->phys_addr && p->pageout && !m->pageout) {
-                               m->busy = TRUE;
-                               m->pageout = TRUE;
-
-                               dwp->dw_mask |= DW_vm_page_wire;
+               if (m->laundry)
+                       dwp->dw_mask |= DW_vm_pageout_throttle_up;
 
-                       } else if (p->phys_addr &&
-                                  !p->pageout && m->pageout &&
-                                  !m->dump_cleaning) {
-                               m->pageout = FALSE;
+               if (VM_PAGE_WIRED(m))
+                       m->pageout = FALSE;
+               
+               if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+                   m->cs_validated && !m->cs_tainted) {
+                       /*
+                        * CODE SIGNING:
+                        * This page is no longer dirty
+                        * but could have been modified,
+                        * so it will need to be
+                        * re-validated.
+                        */
+                       m->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
+                       vm_cs_validated_resets++;
+#endif
+                       pmap_disconnect(m->phys_page);
+               }
+               if (m->overwriting) {
+                       /*
+                        * the (COPY_OUT_FROM == FALSE) request_page_list case
+                        */
+                       if (m->busy) {
                                m->absent = FALSE;
-                               m->overwriting = FALSE;
 
-                               dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP);
+                               dwp->dw_mask |= DW_clear_busy;
+                       } else {
+                               /*
+                                * alternate (COPY_OUT_FROM == FALSE) page_list case
+                                * Occurs when the original page was wired
+                                * at the time of the list request
+                                */
+                               assert(VM_PAGE_WIRED(m));
+
+                               dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
                        }
-                       page_list[entry].phys_addr = 0;
+                       m->overwriting = FALSE;
                }
-               m->dump_cleaning = FALSE;
+               if (m->encrypted_cleaning == TRUE) {
+                       m->encrypted_cleaning = FALSE;
 
-               if (m->laundry)
-                       dwp->dw_mask |= DW_vm_pageout_throttle_up;
+                       dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP;
+               }
+               m->cleaning = FALSE;
 
                if (m->pageout) {
-                       m->cleaning = FALSE;
-                       m->encrypted_cleaning = FALSE;
+                       /* 
+                        * With the clean queue enabled, UPL_PAGEOUT should
+                        * no longer set the pageout bit. It's pages now go 
+                        * to the clean queue.
+                        */
+                       assert(!(flags & UPL_PAGEOUT));
+
                        m->pageout = FALSE;
 #if MACH_CLUSTER_STATS
                        if (m->wanted) vm_pageout_target_collisions++;
 #endif
-                       m->dirty = FALSE;
-
-                       if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
-                           m->cs_validated && !m->cs_tainted) {
-                               /*
-                                * CODE SIGNING:
-                                * This page is no longer dirty
-                                * but could have been modified,
-                                * so it will need to be
-                                * re-validated.
-                                */
-                               m->cs_validated = FALSE;
-#if DEVELOPMENT || DEBUG
-                               vm_cs_validated_resets++;
-#endif
-                               pmap_disconnect(m->phys_page);
-                       }
-
                        if ((flags & UPL_COMMIT_SET_DIRTY) ||
-                           (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)))
-                               m->dirty = TRUE;
-
-                       if (m->dirty) {
+                           (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) {
                                /*
                                 * page was re-dirtied after we started
                                 * the pageout... reactivate it since 
                                 * we don't know whether the on-disk
                                 * copy matches what is now in memory
                                 */
-                               dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP);
+                               SET_PAGE_DIRTY(m, FALSE);
+                               
+                               dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
 
                                if (upl->flags & UPL_PAGEOUT) {
                                        CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
@@ -4835,23 +5234,15 @@ process_upl_to_commit:
                                 * page has been successfully cleaned
                                 * go ahead and free it for other use
                                 */
-
                                if (m->object->internal) {
                                        DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
                                } else {
                                        DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
                                }
-                               dwp->dw_mask |= DW_vm_page_free;
-                               if (upl->flags & UPL_PAGEOUT) {
-                                       CLUSTER_STAT(vm_pageout_target_page_freed++;)
+                               m->dirty = FALSE;
+                               m->busy = TRUE;
 
-                                       if (page_list[entry].dirty) {
-                                               VM_STAT_INCR(pageouts);
-                                               DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
-                                               pgpgout_count++;
-                                       }
-                               }
+                               dwp->dw_mask |= DW_vm_page_free;
                        }
                        goto commit_next_page;
                }
@@ -4863,51 +5254,6 @@ process_upl_to_commit:
                else            vm_pageout_cluster_cleaned++;
                if (m->wanted)  vm_pageout_cluster_collisions++;
 #endif
-               m->dirty = FALSE;
-
-               if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
-                   m->cs_validated && !m->cs_tainted) {
-                       /*
-                        * CODE SIGNING:
-                        * This page is no longer dirty
-                        * but could have been modified,
-                        * so it will need to be
-                        * re-validated.
-                        */
-                       m->cs_validated = FALSE;
-#if DEVELOPMENT || DEBUG
-                       vm_cs_validated_resets++;
-#endif
-                       pmap_disconnect(m->phys_page);
-               }
-
-               if (m->overwriting) {
-                       /*
-                        * the (COPY_OUT_FROM == FALSE) request_page_list case
-                        */
-                       if (m->busy) {
-                               m->absent = FALSE;
-
-                               dwp->dw_mask |= DW_clear_busy;
-                       } else {
-                               /*
-                                * alternate (COPY_OUT_FROM == FALSE) page_list case
-                                * Occurs when the original page was wired
-                                * at the time of the list request
-                                */
-                               assert(VM_PAGE_WIRED(m));
-
-                               dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
-                       }
-                       m->overwriting = FALSE;
-               }
-               if (m->encrypted_cleaning == TRUE) {
-                       m->encrypted_cleaning = FALSE;
-
-                       dwp->dw_mask |= DW_clear_busy;
-               }
-               m->cleaning = FALSE;
-
                /*
                 * It is a part of the semantic of COPYOUT_FROM
                 * UPLs that a commit implies cache sync
@@ -4918,17 +5264,29 @@ process_upl_to_commit:
                if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
                        m->precious = FALSE;
 
-               if (flags & UPL_COMMIT_SET_DIRTY)
-                       m->dirty = TRUE;
+               if (flags & UPL_COMMIT_SET_DIRTY) {
+                       SET_PAGE_DIRTY(m, FALSE);
+               } else {
+                       m->dirty = FALSE;
+               }
+
+               /* with the clean queue on, move *all* cleaned pages to the clean queue */
+               if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
+                       pgpgout_count++;
+
+                       /* this page used to be dirty; now it's on the clean queue. */
+                       m->was_dirty = TRUE;
 
-               if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
+                       dwp->dw_mask |= DW_enqueue_cleaned;
+                       vm_pageout_enqueued_cleaned_from_inactive_dirty++;
+               } else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
                        /*
                         * page coming back in from being 'frozen'...
                         * it was dirty before it was frozen, so keep it so
                         * the vm_page_activate will notice that it really belongs
                         * on the throttle queue and put it there
                         */
-                       m->dirty = TRUE;
+                       SET_PAGE_DIRTY(m, FALSE);
                        dwp->dw_mask |= DW_vm_page_activate;
 
                } else {
@@ -5032,6 +5390,7 @@ commit_next_page:
                         * against this object
                         */
                        vm_object_activity_end(shadow_object);
+                       vm_object_collapse(shadow_object, 0, TRUE);
                } else {
                         /*
                          * we dontated the paging reference to
@@ -5078,6 +5437,7 @@ upl_abort_range(
        int                     error,
        boolean_t               *empty) 
 {
+       upl_page_info_t         *user_page_list = NULL;
        upl_size_t              xfer_size, subupl_size = size;
        vm_object_t             shadow_object;
        vm_object_t             object;
@@ -5155,6 +5515,8 @@ process_upl_to_abort:
                lite_list = (wpl_array_t) 
                        ((((uintptr_t)upl) + sizeof(struct upl))
                        + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
+
+               user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
        } else {
                lite_list = (wpl_array_t) 
                        (((uintptr_t)upl) + sizeof(struct upl));
@@ -5190,17 +5552,21 @@ process_upl_to_abort:
 
        while (xfer_size) {
                vm_page_t       t, m;
+               unsigned int    pg_num;
+               boolean_t       needed;
 
-               dwp->dw_mask = 0;
+               pg_num = (unsigned int) (target_offset/PAGE_SIZE);
+               assert(pg_num == target_offset/PAGE_SIZE);
+
+               needed = FALSE;
 
+               if (user_page_list)
+                       needed = user_page_list[pg_num].needed;
+
+               dwp->dw_mask = 0;
                m = VM_PAGE_NULL;
 
                if (upl->flags & UPL_LITE) {
-                       unsigned int    pg_num;
-
-                       pg_num = (unsigned int) (target_offset/PAGE_SIZE);
-                       assert(pg_num == target_offset/PAGE_SIZE);
-                       
 
                        if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
                                lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
@@ -5249,7 +5615,7 @@ process_upl_to_abort:
                                        m->unusual = TRUE;
                                        must_free = FALSE;
                                }
-                               if (m->clustered) {
+                               if (m->clustered && needed == FALSE) {
                                        /*
                                         * This page was a part of a speculative
                                         * read-ahead initiated by the kernel
@@ -5311,13 +5677,6 @@ process_upl_to_abort:
                                         */
                                        dwp->dw_mask |= DW_clear_busy;
                                }
-                               if (m->pageout) {
-                                       assert(m->busy);
-                                       assert(m->wire_count == 1);
-                                       m->pageout = FALSE;
-
-                                       dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy);
-                               }
                                if (m->overwriting) {
                                        if (m->busy)
                                                dwp->dw_mask |= DW_clear_busy;
@@ -5340,7 +5699,7 @@ process_upl_to_abort:
 
                                        dwp->dw_mask |= DW_clear_busy;
                                }
-                               m->dump_cleaning = FALSE;
+                               m->pageout = FALSE;
                                m->cleaning = FALSE;
 #if    MACH_PAGEMAP
                                vm_external_state_clr(m->object->existence_map, m->offset);
@@ -5350,14 +5709,18 @@ process_upl_to_abort:
 
                                        dwp->dw_mask |= DW_vm_page_free;
                                } else {
-                                       if (error & UPL_ABORT_REFERENCE) {
-                                               /*
-                                                * we've been told to explictly
-                                                * reference this page... for 
-                                                * file I/O, this is done by
-                                                * implementing an LRU on the inactive q
-                                                */
-                                               dwp->dw_mask |= DW_vm_page_lru;
+                                       if (!(dwp->dw_mask & DW_vm_page_unwire)) {
+                                               if (error & UPL_ABORT_REFERENCE) {
+                                                       /*
+                                                        * we've been told to explictly
+                                                        * reference this page... for 
+                                                        * file I/O, this is done by
+                                                        * implementing an LRU on the inactive q
+                                                        */
+                                                       dwp->dw_mask |= DW_vm_page_lru;
+
+                                               } else if (!m->active && !m->inactive && !m->speculative)
+                                                       dwp->dw_mask |= DW_vm_page_deactivate_internal;
                                        }
                                        dwp->dw_mask |= DW_PAGE_WAKEUP;
                                }
@@ -5432,6 +5795,7 @@ abort_next_page:
                         * against this object
                         */
                        vm_object_activity_end(shadow_object);
+                       vm_object_collapse(shadow_object, 0, TRUE);
                } else {
                         /*
                          * we dontated the paging reference to
@@ -5490,6 +5854,21 @@ upl_commit(
        return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
 }
 
+void
+vm_object_set_pmap_cache_attr(
+               vm_object_t             object,
+               upl_page_info_array_t   user_page_list,
+               unsigned int            num_pages,
+               boolean_t               batch_pmap_op)
+{
+       unsigned int    cache_attr = 0;
+
+       cache_attr = object->wimg_bits & VM_WIMG_MASK;
+       assert(user_page_list);
+       if (cache_attr != VM_WIMG_USE_DEFAULT) {
+               PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
+       }
+}
 
 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
 
@@ -5627,7 +6006,7 @@ vm_object_iopl_request(
 
        if (cntrl_flags & UPL_BLOCK_ACCESS) {
                /*
-                * The user requested that access to the pages in this URL
+                * The user requested that access to the pages in this UPL
                 * be blocked until the UPL is commited or aborted.
                 */
                upl->flags |= UPL_ACCESS_BLOCKED;
@@ -5635,6 +6014,7 @@ vm_object_iopl_request(
 
        if (object->phys_contiguous) {
 #if UPL_DEBUG
+               vm_object_activity_begin(object);
                queue_enter(&object->uplq, upl, upl_t, uplq);
 #endif /* UPL_DEBUG */
 
@@ -5676,6 +6056,7 @@ vm_object_iopl_request(
        }
  
 #if UPL_DEBUG
+       vm_object_activity_begin(object);
        queue_enter(&object->uplq, upl, upl_t, uplq);
 #endif /* UPL_DEBUG */
 
@@ -5768,6 +6149,7 @@ vm_object_iopl_request(
 
                        fault_info.interruptible = interruptible;
                        fault_info.cluster_size = xfer_size;
+                       fault_info.batch_pmap_op = TRUE;
 
                        vm_object_paging_begin(object);
 
@@ -5870,7 +6252,7 @@ vm_object_iopl_request(
 
                if (dst_page->cleaning) {
                        /*
-                        * Someone else is cleaning this page in place.as
+                        * Someone else is cleaning this page in place.
                         * In theory, we should be able to  proceed and use this
                         * page but they'll probably end up clearing the "busy"
                         * bit on it in upl_commit_range() but they didn't set
@@ -5883,6 +6265,11 @@ vm_object_iopl_request(
                        PAGE_SLEEP(object, dst_page, THREAD_UNINT);
                        continue;
                }
+               if (dst_page->laundry) {
+                       dst_page->pageout = FALSE;
+                       
+                       vm_pageout_steal_laundry(dst_page, FALSE);
+               }                       
                if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
                     dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
                        vm_page_t       low_page;
@@ -5927,8 +6314,9 @@ vm_object_iopl_request(
 
                        if (refmod & VM_MEM_REFERENCED)
                                low_page->reference = TRUE;
-                       if (refmod & VM_MEM_MODIFIED)
-                               low_page->dirty = TRUE;
+                       if (refmod & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(low_page, FALSE);
+                       }
 
                        vm_page_replace(low_page, object, dst_offset);
 
@@ -5960,8 +6348,9 @@ vm_object_iopl_request(
                 */
                dwp->dw_mask |= DW_set_reference;
 
-               if (!(cntrl_flags & UPL_COPYOUT_FROM))
-                       dst_page->dirty = TRUE;
+               if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
+                       SET_PAGE_DIRTY(dst_page, TRUE); 
+               }
 record_phys_addr:
                if (dst_page->busy)
                        upl->flags |= UPL_HAS_BUSY;
@@ -5980,6 +6369,7 @@ record_phys_addr:
                        user_page_list[entry].dirty     = dst_page->dirty;
                        user_page_list[entry].precious  = dst_page->precious;
                        user_page_list[entry].device    = FALSE;
+                       user_page_list[entry].needed    = FALSE;
                        if (dst_page->clustered == TRUE)
                                user_page_list[entry].speculative = dst_page->speculative;
                        else
@@ -6013,6 +6403,8 @@ record_phys_addr:
        if (dw_count)
                vm_page_do_delayed_work(object, &dw_array[0], dw_count);
 
+       vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE);
+
        if (page_list_count != NULL) {
                if (upl->flags & UPL_INTERNAL)
                        *page_list_count = 0;
@@ -6093,6 +6485,7 @@ return_err:
 #endif
        if (! (upl->flags & UPL_KERNEL_OBJECT)) {
                vm_object_activity_end(object);
+               vm_object_collapse(object, 0, TRUE);
        }
        vm_object_unlock(object);
        upl_destroy(upl);
@@ -6179,6 +6572,27 @@ done:
        return retval;
 }
 
+void
+upl_range_needed(
+       upl_t           upl,
+       int             index,
+       int             count)
+{
+       upl_page_info_t *user_page_list;
+       int             size_in_pages;
+
+       if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
+               return;
+
+       size_in_pages = upl->size / PAGE_SIZE;
+
+       user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
+
+       while (count-- && index < size_in_pages)
+               user_page_list[index++].needed = TRUE;
+}
+
+
 /*
  * ENCRYPTED SWAP:
  *
@@ -6217,7 +6631,7 @@ done:
  * can call the encryption/decryption routines with a kernel
  * virtual address.  We keep this pool of pre-allocated kernel
  * virtual addresses so that we don't have to scan the kernel's
- * virtual address space each time we need to encrypt or decrypt
+ * virtaul address space each time we need to encrypt or decrypt
  * a physical page.
  * It would be nice to be able to encrypt and decrypt in physical
  * mode but that might not always be more efficient...
@@ -6373,6 +6787,7 @@ vm_paging_map_object(
                                   page_map_offset,
                                   page,
                                   protection,
+                                  VM_PROT_NONE,
                                   0,
                                   TRUE);
                        vm_paging_objects_mapped++;
@@ -6462,6 +6877,7 @@ vm_paging_map_object(
                           *address + page_map_offset,
                           page,
                           protection,
+                          VM_PROT_NONE,
                           0,
                           TRUE);
        }
@@ -6538,7 +6954,7 @@ vm_paging_unmap_object(
  */
 #define SWAP_CRYPT_AES_KEY_SIZE        128     /* XXX 192 and 256 don't work ! */
 boolean_t              swap_crypt_ctx_initialized = FALSE;
-aes_32t                swap_crypt_key[8]; /* big enough for a 256 key */
+uint32_t               swap_crypt_key[8]; /* big enough for a 256 key */
 aes_ctx                        swap_crypt_ctx;
 const unsigned char    swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
 
@@ -6667,7 +7083,6 @@ vm_page_encrypt(
        }
 
        assert(page->busy);
-       assert(page->dirty || page->precious);
        
        if (page->encrypted) {
                /*
@@ -6676,6 +7091,8 @@ vm_page_encrypt(
                vm_page_encrypt_already_encrypted_counter++;
                return;
        }
+       assert(page->dirty || page->precious);
+
        ASSERT_PAGE_DECRYPTED(page);
 
        /*
@@ -7072,17 +7489,12 @@ vm_page_decrypt(
 
 #endif /* CRYPTO */
 
+/*
+ * page->object must be locked
+ */
 void
-vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked)
+vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
 {
-       boolean_t       pageout;
-
-       pageout = page->pageout;
-
-       page->list_req_pending = FALSE;
-       page->cleaning = FALSE;
-       page->pageout = FALSE;
-
        if (!queues_locked) {
                vm_page_lockspin_queues();
        }
@@ -7097,14 +7509,6 @@ vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked)
         */
        vm_pageout_throttle_up(page);
 
-       if (pageout == TRUE) {
-               /*
-                * toss the wire count we picked up
-                * when we intially set this page up
-                * to be cleaned...
-                */
-               vm_page_unwire(page, TRUE);
-       }
        vm_page_steal_pageout_page++;
 
        if (!queues_locked) {
@@ -7152,7 +7556,7 @@ vector_upl_deallocate(upl_t upl)
                        vector_upl->size = 0;
                        vector_upl->offset = 0;
                        kfree(vector_upl, sizeof(struct _vector_upl));
-                       vector_upl = (vector_upl_t)0xdeadbeef;
+                       vector_upl = (vector_upl_t)0xfeedfeed;
                }
                else
                        panic("vector_upl_deallocate was passed a non-vectored upl\n");
@@ -7166,7 +7570,7 @@ vector_upl_is_valid(upl_t upl)
 {
        if(upl &&  ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
                vector_upl_t vector_upl = upl->vector_upl;
-               if(vector_upl == NULL || vector_upl == (vector_upl_t)0xdeadbeef || vector_upl == (vector_upl_t)0xfeedbeef)
+               if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
                        return FALSE;
                else
                        return TRUE;
@@ -7495,6 +7899,9 @@ vm_page_slide(
        uint32_t                pageIndex = 0;
 
        assert(!page->slid);
+
+       if (page->error)
+               return KERN_FAILURE;
        
        /*
         * Take a paging-in-progress reference to keep the object
@@ -7548,6 +7955,15 @@ vm_page_slide(
        
        page->dirty = FALSE;
        pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+       
+       if (kr != KERN_SUCCESS || cs_debug > 1) {
+               printf("vm_page_slide(%p): "
+                      "obj %p off 0x%llx mobj %p moff 0x%llx\n",
+                      page,
+                      page->object, page->offset,
+                      page->object->pager,
+                      page->offset + page->object->paging_offset);
+       }
 
        if (kr == KERN_SUCCESS) {
                page->slid = TRUE;
@@ -7635,7 +8051,7 @@ vm_countdirtypages(void)
        vm_page_unlock_queues();
 
        vm_page_lock_queues();
-       m = (vm_page_t) queue_first(&vm_page_queue_zf);
+       m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
        do {
                if (m ==(vm_page_t )0) break;
 
@@ -7647,7 +8063,7 @@ vm_countdirtypages(void)
                m = (vm_page_t) queue_next(&m->pageq);
                if (m ==(vm_page_t )0) break;
 
-       } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
+       } while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m));
        vm_page_unlock_queues();
 
        printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
@@ -7705,80 +8121,3 @@ int  upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
        return KERN_SUCCESS;
 }
 #endif /* UPL_DEBUG */
-
-
-
-#if    MACH_KDB
-#include <ddb/db_output.h>
-#include <ddb/db_print.h>
-#include <vm/vm_print.h>
-
-#define        printf  kdbprintf
-void           db_pageout(void);
-
-void
-db_vm(void)
-{
-
-       iprintf("VM Statistics:\n");
-       db_indent += 2;
-       iprintf("pages:\n");
-       db_indent += 2;
-       iprintf("activ %5d  inact %5d  free  %5d",
-               vm_page_active_count, vm_page_inactive_count,
-               vm_page_free_count);
-       printf("   wire  %5d  gobbl %5d\n",
-              vm_page_wire_count, vm_page_gobble_count);
-       db_indent -= 2;
-       iprintf("target:\n");
-       db_indent += 2;
-       iprintf("min   %5d  inact %5d  free  %5d",
-               vm_page_free_min, vm_page_inactive_target,
-               vm_page_free_target);
-       printf("   resrv %5d\n", vm_page_free_reserved);
-       db_indent -= 2;
-       iprintf("pause:\n");
-       db_pageout();
-       db_indent -= 2;
-}
-
-#if    MACH_COUNTERS
-extern int c_laundry_pages_freed;
-#endif /* MACH_COUNTERS */
-
-void
-db_pageout(void)
-{
-       iprintf("Pageout Statistics:\n");
-       db_indent += 2;
-       iprintf("active %5d  inactv %5d\n",
-               vm_pageout_active, vm_pageout_inactive);
-       iprintf("nolock %5d  avoid  %5d  busy   %5d  absent %5d\n",
-               vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
-               vm_pageout_inactive_busy, vm_pageout_inactive_absent);
-       iprintf("used   %5d  clean  %5d  dirty(internal)  %5d  dirty(external)  %5d\n",
-               vm_pageout_inactive_used, vm_pageout_inactive_clean,
-               vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
-#if    MACH_COUNTERS
-       iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
-#endif /* MACH_COUNTERS */
-#if    MACH_CLUSTER_STATS
-       iprintf("Cluster Statistics:\n");
-       db_indent += 2;
-       iprintf("dirtied   %5d   cleaned  %5d   collisions  %5d\n",
-               vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
-               vm_pageout_cluster_collisions);
-       iprintf("clusters  %5d   conversions  %5d\n",
-               vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
-       db_indent -= 2;
-       iprintf("Target Statistics:\n");
-       db_indent += 2;
-       iprintf("collisions   %5d   page_dirtied  %5d   page_freed  %5d\n",
-               vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
-               vm_pageout_target_page_freed);
-       db_indent -= 2;
-#endif /* MACH_CLUSTER_STATS */
-       db_indent -= 2;
-}
-
-#endif /* MACH_KDB */
index d8ddac6a760dd90b8f117f55d3e5271db0608059..d04bbe8cf8032261b25dc0eebfdeafbe062c7a2e 100644 (file)
 
 #include <sys/kdebug.h>
 
+#define VM_PAGE_CLEANED_TARGET 30000           /* 25600 pages = 100 MB */
+#define VM_PAGE_CLEANED_MIN    ((VM_PAGE_CLEANED_TARGET * 80) / 100)
+
+#define VM_PAGE_AVAILABLE_COUNT()              ((unsigned int)(vm_page_cleaned_count))
+
+/* externally manipulated counters */
+extern unsigned int vm_pageout_cleaned_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated;
+
 #if CONFIG_FREEZE
-extern boolean_t vm_freeze_enabled;
-#define VM_DYNAMIC_PAGING_ENABLED(port) ((vm_freeze_enabled == FALSE) && IP_VALID(port))
+extern boolean_t memorystatus_freeze_enabled;
+#define VM_DYNAMIC_PAGING_ENABLED(port) ((memorystatus_freeze_enabled == FALSE) && IP_VALID(port))
 #else
 #define VM_DYNAMIC_PAGING_ENABLED(port) IP_VALID(port)
 #endif
@@ -111,6 +119,8 @@ extern int  vm_debug_events;
 #define VM_UPL_PAGE_WAIT       0x120
 #define VM_IOPL_PAGE_WAIT      0x121
 
+#define VM_PRESSURE_EVENT      0x130
+
 #define VM_DEBUG_EVENT(name, event, control, arg1, arg2, arg3, arg4)   \
        MACRO_BEGIN                                             \
        if (vm_debug_events) {                                  \
@@ -159,20 +169,8 @@ extern vm_page_t          vm_page_get_next(vm_page_t page);
 #include <vm/vm_page.h>
 
 extern unsigned int    vm_pageout_scan_event_counter;
-extern unsigned int    vm_zf_queue_count;
-
+extern unsigned int    vm_page_anonymous_count;
 
-extern uint64_t        vm_zf_count;
-
-#define VM_ZF_COUNT_INCR()                             \
-       MACRO_BEGIN                                     \
-       OSAddAtomic64(1, (SInt64 *) &vm_zf_count);      \
-       MACRO_END                                       \
-
-#define VM_ZF_COUNT_DECR()                             \
-       MACRO_BEGIN                                     \
-       OSAddAtomic64(-1, (SInt64 *) &vm_zf_count);     \
-       MACRO_END                                       \
 
 /*
  * must hold the page queues lock to
@@ -182,11 +180,14 @@ struct vm_pageout_queue {
         queue_head_t   pgo_pending;    /* laundry pages to be processed by pager's iothread */
         unsigned int   pgo_laundry;    /* current count of laundry pages on queue or in flight */
         unsigned int   pgo_maxlaundry;
+       uint64_t        pgo_tid;        /* thread ID of I/O thread that services this queue */
+       uint8_t         pgo_lowpriority; /* iothread is set to use low priority I/O */
 
         unsigned int   pgo_idle:1,     /* iothread is blocked waiting for work to do */
                        pgo_busy:1,     /* iothread is currently processing request from pgo_pending */
                        pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
                        pgo_draining:1,
+                       pgo_inited:1,
                        :0;
 };
 
@@ -208,7 +209,8 @@ extern void         vm_pageout_object_terminate(
                                        vm_object_t     object);
 
 extern void            vm_pageout_cluster(
-                                       vm_page_t       m);
+                                       vm_page_t       m,
+                                       boolean_t       pageout);
 
 extern void            vm_pageout_initialize_page(
                                        vm_page_t       m);
@@ -328,6 +330,12 @@ extern void vector_upl_get_iostate_byindex(upl_t, uint32_t, upl_offset_t*, upl_s
 extern upl_t vector_upl_subupl_byindex(upl_t , uint32_t);
 extern upl_t vector_upl_subupl_byoffset(upl_t , upl_offset_t*, upl_size_t*);
 
+extern void vm_object_set_pmap_cache_attr(
+               vm_object_t             object,
+               upl_page_info_array_t   user_page_list,
+               unsigned int            num_pages,
+               boolean_t               batch_pmap_op);
+
 extern kern_return_t vm_object_iopl_request(
        vm_object_t             object,
        vm_object_offset_t      offset,
@@ -399,7 +407,7 @@ decl_simple_lock_data(extern, vm_paging_lock)
  */
 extern unsigned int    vm_backing_store_low;
 
-extern void vm_pageout_queue_steal(
+extern void vm_pageout_steal_laundry(
        vm_page_t page, 
        boolean_t queues_locked);
        
diff --git a/osfmk/vm/vm_print.h b/osfmk/vm/vm_print.h
deleted file mode 100644 (file)
index 6decd44..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef VM_PRINT_H
-#define        VM_PRINT_H
-
-#include <vm/vm_map.h>
-#include <machine/db_machdep.h>
-
-extern void    vm_map_print(
-                       db_addr_t       map);
-
-extern void    vm_map_copy_print(
-                       db_addr_t       copy);
-
-#include <vm/vm_object.h>
-
-extern int     vm_follow_object(
-                       vm_object_t     object);
-
-extern void vm_object_print(db_expr_t, boolean_t, db_expr_t, char *);
-
-#include <vm/vm_page.h>
-
-extern void    vm_page_print(
-                       db_addr_t       p);
-
-#include <mach_pagemap.h>
-#if    MACH_PAGEMAP
-#include <vm/vm_external.h>
-extern void vm_external_print(
-                       vm_external_map_t       map,
-                       vm_object_size_t        size);
-#endif /* MACH_PAGEMAP */
-
-extern void    db_vm(void);
-
-extern vm_map_size_t db_vm_map_total_size(
-                       db_addr_t       map);
-
-#endif /* VM_PRINT_H */
index 53cf9e12a8c84f716e795a84b9831631b26c00a9..fa8c278724a4d2b1c20f642d9ff8305b0ea5a607 100644 (file)
@@ -68,15 +68,6 @@ extern int default_pager_init_flag;
 /*
  * osfmk
  */
-#ifndef _KERN_IPC_TT_H_        /* XXX FBDP */
-/* these should be exported cleanly from OSFMK since BSD needs them */
-extern ipc_port_t convert_task_to_port(
-       task_t          task);
-extern ipc_port_t convert_thread_to_port(
-       thread_t                thread);
-extern ipc_port_t convert_task_name_to_port(
-       task_name_t     task_name);
-#endif /* _KERN_IPC_TT_H_ */
 #ifndef _IPC_IPC_PORT_H_
 extern mach_port_name_t ipc_port_copyout_send(
        ipc_port_t      sright,
@@ -343,7 +334,38 @@ extern kern_return_t default_pager_memory_object_create(
 
 #if CONFIG_FREEZE
 extern unsigned int default_pager_swap_pages_free(void);
-#endif
+struct default_freezer_handle;
+struct vm_page;
+__private_extern__ void        default_freezer_init(void);
+__private_extern__ struct default_freezer_handle* default_freezer_handle_allocate(void);
+__private_extern__ kern_return_t
+default_freezer_handle_init(
+       struct  default_freezer_handle *df_handle);
+__private_extern__ void
+default_freezer_handle_deallocate(
+       struct default_freezer_handle *df_handle);
+__private_extern__ void
+default_freezer_pageout(
+       struct default_freezer_handle *df_handle);
+__private_extern__ kern_return_t
+default_freezer_pack(
+       unsigned int            *purgeable_count,
+       unsigned int            *wired_count,
+       unsigned int            *clean_count,
+       unsigned int            *dirty_count,
+       unsigned int            dirty_budget,
+       boolean_t               *shared,
+       vm_object_t             src_object,
+       struct default_freezer_handle *df_handle);
+__private_extern__ void
+default_freezer_unpack(
+       struct default_freezer_handle *df_handle);      
+__private_extern__ void
+default_freezer_pack_page(
+       struct vm_page* p,
+       struct default_freezer_handle *df_handle);
+
+#endif /* CONFIG_FREEZE */
 
 extern void   device_pager_reference(memory_object_t);
 extern void   device_pager_deallocate(memory_object_t);
@@ -410,6 +432,7 @@ extern void log_unnest_badness(vm_map_t, vm_map_offset_t, vm_map_offset_t);
 extern int cs_allow_invalid(struct proc *p);
 extern int cs_invalid_page(addr64_t vaddr);
 extern boolean_t cs_validate_page(void *blobs,
+                                 memory_object_t pager,
                                  memory_object_offset_t offset, 
                                  const void *data,
                                  boolean_t *tainted);
index f9f18a161c20dbc5dfc7d87ee62898608a61f4d2..61830edebdeb4c2de80118c2fbe5280e655aa90e 100644 (file)
@@ -99,7 +99,7 @@ vm_purgeable_token_check_queue(purgeable_q_t queue)
        {
                our_inactive_count = page_cnt + queue->new_pages + token_new_pagecount;
                assert(our_inactive_count >= 0);
-               assert((uint32_t) our_inactive_count == vm_page_inactive_count);
+               assert((uint32_t) our_inactive_count == vm_page_inactive_count - vm_page_cleaned_count);
        }
 }
 #endif
@@ -321,6 +321,68 @@ vm_purgeable_token_remove_first(purgeable_q_t queue)
        return token;
 }
 
+static token_idx_t 
+vm_purgeable_token_remove_last(purgeable_q_t queue)
+{
+#if MACH_ASSERT
+       lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+#endif
+       
+       token_idx_t     token;
+       token = queue->token_q_tail;
+
+       assert(token);
+
+       if (token) {
+               assert(queue->token_q_head);
+
+               if (queue->token_q_tail == queue->token_q_head)
+                       assert(tokens[token].next == 0);
+
+               if (queue->token_q_unripe == 0) {
+                       /* we're removing a ripe token. decrease count */
+                       available_for_purge--;
+                       assert(available_for_purge >= 0);
+               } else if (queue->token_q_unripe == token) {
+                       /* we're removing the only unripe token */
+                       queue->token_q_unripe = 0;
+               }
+                       
+               if (token == queue->token_q_head) {
+                       /* token is the last one in the queue */
+                       queue->token_q_head = 0;
+                       queue->token_q_tail = 0;
+               } else {
+                       token_idx_t new_tail;
+
+                       for (new_tail = queue->token_q_head;
+                            tokens[new_tail].next != token && new_tail != 0;
+                            new_tail = tokens[new_tail].next) {
+                       }
+                       assert(tokens[new_tail].next == token);
+                       queue->token_q_tail = new_tail;
+                       tokens[new_tail].next = 0;
+               }
+
+               queue->new_pages += tokens[token].count;
+
+#if MACH_ASSERT
+               queue->debug_count_tokens--;
+               vm_purgeable_token_check_queue(queue);
+
+               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_DELETE)),
+                                     queue->type,
+                                     tokens[queue->token_q_head].count,        /* num pages on new
+                                                                                * first token */
+                                     token_new_pagecount,      /* num pages waiting for
+                                                                * next token */
+                                     available_for_purge,
+                                     0);
+#endif
+       }
+       return token;
+}
+
 /* 
  * Delete first token from queue. Return token to token queue.
  * Call with page queue locked. 
@@ -340,6 +402,21 @@ vm_purgeable_token_delete_first(purgeable_q_t queue)
        }
 }
 
+void
+vm_purgeable_token_delete_last(purgeable_q_t queue)
+{
+#if MACH_ASSERT
+       lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+#endif
+       token_idx_t     token = vm_purgeable_token_remove_last(queue);
+
+       if (token) {
+               /* stick removed token on free queue */
+               tokens[token].next = token_free_idx;
+               token_free_idx = token;
+       }
+}
+
 
 /* Call with page queue locked. */
 void
index 4f720eb3940ace5d319a7aca2a1a8b2618e39dad..169aa660dbaaf3f890c93bec63857f9412cbce6d 100644 (file)
@@ -88,6 +88,7 @@ kern_return_t vm_purgeable_token_add(purgeable_q_t queue);
 
 /* enter with page queue locked */
 void vm_purgeable_token_delete_first(purgeable_q_t queue);
+void vm_purgeable_token_delete_last(purgeable_q_t queue);
 
 /*
  * decrement token counters.
index 0c0a34e04d1bd4a3e78b7cc3331a33b462e2c097..a548c0600eed6b49427c01f8d072b451a7f0bf0c 100644 (file)
 
 #include <IOKit/IOHibernatePrivate.h>
 
-
-#include <sys/kern_memorystatus.h>
-
 #include <sys/kdebug.h>
 
+boolean_t      hibernate_cleaning_in_progress = FALSE;
 boolean_t      vm_page_free_verify = TRUE;
 
 uint32_t       vm_lopage_free_count = 0;
@@ -259,6 +257,8 @@ unsigned int        vm_page_free_count_minimum;     /* debugging */
 zone_t vm_page_zone;
 vm_locks_array_t vm_page_locks;
 decl_lck_mtx_data(,vm_page_alloc_lock)
+lck_mtx_ext_t vm_page_alloc_lock_ext;
+
 unsigned int io_throttle_zero_fill;
 
 unsigned int   vm_page_local_q_count = 0;
@@ -266,6 +266,9 @@ unsigned int        vm_page_local_q_soft_limit = 250;
 unsigned int   vm_page_local_q_hard_limit = 500;
 struct vplq     *vm_page_local_q = NULL;
 
+/* N.B. Guard and fictitious pages must not
+ * be assigned a zero phys_page value.
+ */
 /*
  *     Fictitious pages don't have a physical address,
  *     but we must initialize phys_page to something.
@@ -296,11 +299,12 @@ ppnum_t vm_page_guard_addr = (ppnum_t) -2;
  */
 queue_head_t   vm_page_queue_active;
 queue_head_t   vm_page_queue_inactive;
-queue_head_t   vm_page_queue_zf;       /* inactive memory queue for zero fill */
+queue_head_t   vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 queue_head_t   vm_page_queue_throttled;
 
 unsigned int   vm_page_active_count;
 unsigned int   vm_page_inactive_count;
+unsigned int   vm_page_anonymous_count;
 unsigned int   vm_page_throttled_count;
 unsigned int   vm_page_speculative_count;
 unsigned int   vm_page_wire_count;
@@ -319,6 +323,11 @@ unsigned int       vm_page_speculative_created = 0;
 unsigned int   vm_page_speculative_used = 0;
 #endif
 
+queue_head_t    vm_page_queue_cleaned;
+
+unsigned int   vm_page_cleaned_count = 0;
+unsigned int   vm_pageout_enqueued_cleaned = 0;
+
 uint64_t       max_valid_dma_address = 0xffffffffffffffffULL;
 ppnum_t                max_valid_low_ppnum = 0xffffffff;
 
@@ -334,10 +343,12 @@ unsigned int      vm_page_free_min = 0;
 unsigned int   vm_page_throttle_limit = 0;
 uint32_t       vm_page_creation_throttle = 0;
 unsigned int   vm_page_inactive_target = 0;
+unsigned int   vm_page_anonymous_min = 0;
 unsigned int   vm_page_inactive_min = 0;
 unsigned int   vm_page_free_reserved = 0;
 unsigned int   vm_page_throttle_count = 0;
 
+
 /*
  *     The VM system has a couple of heuristics for deciding
  *     that pages are "uninteresting" and should be placed
@@ -424,6 +435,7 @@ vm_page_init_lck_grp(void)
        lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
        lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
        lck_attr_setdefault(&vm_page_lck_attr);
+       lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 }
 
 void
@@ -528,14 +540,12 @@ vm_page_bootstrap(
        m->unusual = FALSE;
        m->encrypted = FALSE;
        m->encrypted_cleaning = FALSE;
-       m->list_req_pending = FALSE;
-       m->dump_cleaning = FALSE;
        m->cs_validated = FALSE;
        m->cs_tainted = FALSE;
        m->no_cache = FALSE;
-       m->zero_fill = FALSE;
        m->reusable = FALSE;
        m->slid = FALSE;
+       m->was_dirty = FALSE;
        m->__unused_object_bits = 0;
 
 
@@ -570,8 +580,9 @@ vm_page_bootstrap(
        queue_init(&vm_lopage_queue_free);
        queue_init(&vm_page_queue_active);
        queue_init(&vm_page_queue_inactive);
+       queue_init(&vm_page_queue_cleaned);
        queue_init(&vm_page_queue_throttled);
-       queue_init(&vm_page_queue_zf);
+       queue_init(&vm_page_queue_anonymous);
 
        for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
                queue_init(&vm_page_queue_speculative[i].age_q);
@@ -588,9 +599,8 @@ vm_page_bootstrap(
        /*
         *      Steal memory for the map and zone subsystems.
         */
-
-       vm_map_steal_memory();
        zone_steal_memory();
+       vm_map_steal_memory();
 
        /*
         *      Allocate (and initialize) the virtual-to-physical
@@ -754,7 +764,7 @@ pmap_steal_memory(
 #endif
 
                pmap_enter(kernel_pmap, vaddr, phys_page,
-                          VM_PROT_READ|VM_PROT_WRITE, 
+                          VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
                                VM_WIMG_USE_DEFAULT, FALSE);
                /*
                 * Account for newly stolen memory
@@ -806,7 +816,18 @@ pmap_startup(
         */
        fill = 0;                                                               /* Assume no fill */
        if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
-       
+#if    DEBUG
+       /* This slows down booting the DEBUG kernel, particularly on
+        * large memory systems, but is worthwhile in deterministically
+        * trapping uninitialized memory usage.
+        */
+       if (fill == 0) {
+               fill = 1;
+               fillval = 0xDEB8F177;
+       }
+#endif
+       if (fill)
+               kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
        // -debug code remove
        if (2 == vm_himemory_mode) {
                // free low -> high so high is preferred
@@ -903,7 +924,7 @@ vm_page_module_init(void)
        zone_change(vm_page_zone, Z_EXPAND, FALSE);
        zone_change(vm_page_zone, Z_EXHAUST, TRUE);
        zone_change(vm_page_zone, Z_FOREIGN, TRUE);
-
+       zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
         /*
          * Adjust zone statistics to account for the real pages allocated
          * in vm_page_create(). [Q: is this really what we want?]
@@ -911,8 +932,6 @@ vm_page_module_init(void)
         vm_page_zone->count += vm_page_pages;
         vm_page_zone->sum_count += vm_page_pages;
         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
-
-       lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
 }
 
 /*
@@ -973,7 +992,7 @@ vm_page_insert(
        vm_object_t             object,
        vm_object_offset_t      offset)
 {
-       vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
+       vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
 }
 
 void
@@ -982,7 +1001,8 @@ vm_page_insert_internal(
        vm_object_t             object,
        vm_object_offset_t      offset,
        boolean_t               queues_lock_held,
-       boolean_t               insert_in_hash)
+       boolean_t               insert_in_hash,
+       boolean_t               batch_pmap_op)
 {
        vm_page_bucket_t *bucket;
        lck_spin_t      *bucket_lock;
@@ -991,8 +1011,13 @@ vm_page_insert_internal(
         XPR(XPR_VM_PAGE,
                 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
                 object, offset, mem, 0,0);
-
+#if 0
+       /*
+        * we may not hold the page queue lock
+        * so this check isn't safe to make
+        */
        VM_PAGE_CHECK(mem);
+#endif
 
        if (object == vm_submap_object) {
                /* the vm_submap_object is only a placeholder for submaps */
@@ -1047,13 +1072,13 @@ vm_page_insert_internal(
                lck_spin_unlock(bucket_lock);
        }
 
-       {       unsigned int    cache_attr;
+       {       
+               unsigned int    cache_attr;
 
                cache_attr = object->wimg_bits & VM_WIMG_MASK;
 
                if (cache_attr != VM_WIMG_USE_DEFAULT) {
-                       pmap_set_cache_attributes(mem->phys_page, cache_attr);
-                       object->set_cache_attr = TRUE;
+                       PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
                }
        }
        /*
@@ -1118,7 +1143,13 @@ vm_page_replace(
        lck_spin_t      *bucket_lock;
        int             hash_id;
 
+#if 0
+       /*
+        * we don't hold the page queue lock
+        * so this check isn't safe to make
+        */
        VM_PAGE_CHECK(mem);
+#endif
        vm_object_lock_assert_exclusive(object);
 #if DEBUG
        if (mem->tabled || mem->object != VM_OBJECT_NULL)
@@ -1181,7 +1212,7 @@ vm_page_replace(
                 */
                vm_page_free_unlocked(found_m, FALSE);
        }
-       vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
+       vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
 }
 
 /*
@@ -1211,8 +1242,14 @@ vm_page_remove(
        vm_object_lock_assert_exclusive(mem->object);
        assert(mem->tabled);
        assert(!mem->cleaning);
+       assert(!mem->laundry);
+#if 0
+       /*
+        * we don't hold the page queue lock
+        * so this check isn't safe to make
+        */
        VM_PAGE_CHECK(mem);
-
+#endif
        if (remove_from_hash == TRUE) {
                /*
                 *      Remove from the object_object/offset hash table
@@ -1389,7 +1426,13 @@ vm_page_lookup(
        lck_spin_lock(bucket_lock);
 
        for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
+#if 0
+               /*
+                * we don't hold the page queue lock
+                * so this check isn't safe to make
+                */
                VM_PAGE_CHECK(mem);
+#endif
                if ((mem->object == object) && (mem->offset == offset))
                        break;
        }
@@ -1454,7 +1497,7 @@ vm_page_rename(
        vm_page_lockspin_queues();
 
        vm_page_remove(mem, TRUE);
-       vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
+       vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
 
        vm_page_unlock_queues();
 }
@@ -1932,9 +1975,9 @@ return_page_from_cpu_list:
         *      it doesn't really matter.
         */
        if ((vm_page_free_count < vm_page_free_min) ||
-           ((vm_page_free_count < vm_page_free_target) &&
-            ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
-               thread_wakeup((event_t) &vm_page_free_wanted);
+            ((vm_page_free_count < vm_page_free_target) &&
+             ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
+                thread_wakeup((event_t) &vm_page_free_wanted);
 
        VM_CHECK_MEMORYSTATUS;
        
@@ -1964,7 +2007,6 @@ vm_page_release(
        }
 //     dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);      /* (TEST/DEBUG) */
 
-
        pmap_clear_noencrypt(mem->phys_page);
 
        lck_mtx_lock_spin(&vm_page_queue_free_lock);
@@ -2204,7 +2246,6 @@ vm_page_free_prepare_queues(
        VM_PAGE_CHECK(mem);
        assert(!mem->free);
        assert(!mem->cleaning);
-       assert(!mem->pageout);
 #if DEBUG
        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
        if (mem->free)
@@ -2213,18 +2254,18 @@ vm_page_free_prepare_queues(
        if (mem->object) {
                vm_object_lock_assert_exclusive(mem->object);
        }
-
        if (mem->laundry) {
                /*
                 * We may have to free a page while it's being laundered
                 * if we lost its pager (due to a forced unmount, for example).
-                * We need to call vm_pageout_throttle_up() before removing
-                * the page from its VM object, so that we can find out on
-                * which pageout queue the page is on.
+                * We need to call vm_pageout_steal_laundry() before removing
+                * the page from its VM object, so that we can remove it
+                * from its pageout queue and adjust the laundry accounting
                 */
-               vm_pageout_throttle_up(mem);
+               vm_pageout_steal_laundry(mem, TRUE);
                counter(++c_laundry_pages_freed);
        }
+               
        VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
 
        if (VM_PAGE_WIRED(mem)) {
@@ -2268,8 +2309,6 @@ vm_page_free_prepare_object(
                mem->phys_page = vm_page_fictitious_addr;
        }
        if ( !mem->fictitious) {
-               if (mem->zero_fill == TRUE)
-                       VM_ZF_COUNT_DECR();
                vm_page_init(mem, mem->phys_page, mem->lopage);
        }
 }
@@ -2315,196 +2354,163 @@ vm_page_free_unlocked(
        }
 }
 
+
 /*
  * Free a list of pages.  The list can be up to several hundred pages,
  * as blocked up by vm_pageout_scan().
  * The big win is not having to take the free list lock once
- * per page.  We sort the incoming pages into n lists, one for
- * each color.
+ * per page.
  */
 void
 vm_page_free_list(
-       vm_page_t       mem,
+       vm_page_t       freeq,
        boolean_t       prepare_object)
 {
+        vm_page_t      mem;
         vm_page_t      nxt;
-       int             pg_count = 0;
-       int             color;
-       int             inuse_list_head = -1;
+       vm_page_t       local_freeq;
+       int             pg_count;
 
-       queue_head_t    free_list[MAX_COLORS];
-       int             inuse[MAX_COLORS];
+       while (freeq) {
 
-       for (color = 0; color < (signed) vm_colors; color++) {
-               queue_init(&free_list[color]);
-       }
-       
-       while (mem) {
-               assert(!mem->inactive);
-               assert(!mem->active);
-               assert(!mem->throttled);
-               assert(!mem->free);
-               assert(!mem->speculative);
-               assert(!VM_PAGE_WIRED(mem));
-               assert(mem->pageq.prev == NULL);
+               pg_count = 0;
+               local_freeq = VM_PAGE_NULL;
+               mem = freeq;
 
-               nxt = (vm_page_t)(mem->pageq.next);
+               /*
+                * break up the processing into smaller chunks so
+                * that we can 'pipeline' the pages onto the
+                * free list w/o introducing too much
+                * contention on the global free queue lock
+                */
+               while (mem && pg_count < 64) {
+
+                       assert(!mem->inactive);
+                       assert(!mem->active);
+                       assert(!mem->throttled);
+                       assert(!mem->free);
+                       assert(!mem->speculative);
+                       assert(!VM_PAGE_WIRED(mem));
+                       assert(mem->pageq.prev == NULL);
+
+                       nxt = (vm_page_t)(mem->pageq.next);
                
-               if (prepare_object == TRUE)
-                       vm_page_free_prepare_object(mem, TRUE);
+                       if (vm_page_free_verify && !mem->fictitious && !mem->private) {
+                               assert(pmap_verify_free(mem->phys_page));
+                       }
+                       if (prepare_object == TRUE)
+                               vm_page_free_prepare_object(mem, TRUE);
 
-               if (vm_page_free_verify && !mem->fictitious && !mem->private) {
-                       assert(pmap_verify_free(mem->phys_page));
-               }
+                       if (!mem->fictitious) {
+                               assert(mem->busy);
 
-               if (!mem->fictitious) {
-                       assert(mem->busy);
-                       if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
-                           vm_lopage_free_count < vm_lopage_free_limit &&
-                           mem->phys_page < max_valid_low_ppnum) {
-                               mem->pageq.next = NULL;
-                               vm_page_release(mem);
-                       } else {
+                               if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
+                                   vm_lopage_free_count < vm_lopage_free_limit &&
+                                   mem->phys_page < max_valid_low_ppnum) {
+                                       mem->pageq.next = NULL;
+                                       vm_page_release(mem);
+                               } else {
+                                       /*
+                                        * IMPORTANT: we can't set the page "free" here
+                                        * because that would make the page eligible for
+                                        * a physically-contiguous allocation (see
+                                        * vm_page_find_contiguous()) right away (we don't
+                                        * hold the vm_page_queue_free lock).  That would
+                                        * cause trouble because the page is not actually
+                                        * in the free queue yet...
+                                        */
+                                       mem->pageq.next = (queue_entry_t)local_freeq;
+                                       local_freeq = mem;
+                                       pg_count++;
 
-                       /*
-                        * IMPORTANT: we can't set the page "free" here
-                        * because that would make the page eligible for
-                        * a physically-contiguous allocation (see
-                        * vm_page_find_contiguous()) right away (we don't
-                        * hold the vm_page_queue_free lock).  That would
-                        * cause trouble because the page is not actually
-                        * in the free queue yet...
-                        */
-                               color = mem->phys_page & vm_color_mask;
-                               if (queue_empty(&free_list[color])) {
-                                       inuse[color] = inuse_list_head;
-                                       inuse_list_head = color;
+                                       pmap_clear_noencrypt(mem->phys_page);
                                }
-                               queue_enter_first(&free_list[color],
-                                                 mem,
-                                                 vm_page_t,
-                                                 pageq);
-                               pg_count++;
-
-                               pmap_clear_noencrypt(mem->phys_page);
+                       } else {
+                               assert(mem->phys_page == vm_page_fictitious_addr ||
+                                      mem->phys_page == vm_page_guard_addr);
+                               vm_page_release_fictitious(mem);
                        }
-               } else {
-                       assert(mem->phys_page == vm_page_fictitious_addr ||
-                              mem->phys_page == vm_page_guard_addr);
-                       vm_page_release_fictitious(mem);
+                       mem = nxt;
                }
-               mem = nxt;
-       }
-       if (pg_count) {
-               unsigned int    avail_free_count;
-               unsigned int    need_wakeup = 0;
-               unsigned int    need_priv_wakeup = 0;
+               freeq = mem;
+
+               if ( (mem = local_freeq) ) {
+                       unsigned int    avail_free_count;
+                       unsigned int    need_wakeup = 0;
+                       unsigned int    need_priv_wakeup = 0;
          
-               lck_mtx_lock_spin(&vm_page_queue_free_lock);
+                       lck_mtx_lock_spin(&vm_page_queue_free_lock);
 
-               color = inuse_list_head;
-               
-               while( color != -1 ) {
-                       vm_page_t first, last;
-                       vm_page_t first_free;
+                       while (mem) {
+                               int     color;
+
+                               nxt = (vm_page_t)(mem->pageq.next);
 
-                       /*
-                        * Now that we hold the vm_page_queue_free lock,
-                        * it's safe to mark all pages in our local queue
-                        * as "free"...
-                        */
-                       queue_iterate(&free_list[color],
-                                     mem,
-                                     vm_page_t,
-                                     pageq) {
                                assert(!mem->free);
                                assert(mem->busy);
                                mem->free = TRUE;
-                       }
 
-                       /*
-                        * ... and insert our local queue at the head of
-                        * the global free queue.
-                        */
-                       first = (vm_page_t) queue_first(&free_list[color]);
-                       last = (vm_page_t) queue_last(&free_list[color]);
-                       first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
-                       if (queue_empty(&vm_page_queue_free[color])) {
-                               queue_last(&vm_page_queue_free[color]) =
-                                       (queue_entry_t) last;
-                       } else {
-                               queue_prev(&first_free->pageq) =
-                                       (queue_entry_t) last;
+                               color = mem->phys_page & vm_color_mask;
+                               queue_enter_first(&vm_page_queue_free[color],
+                                                 mem,
+                                                 vm_page_t,
+                                                 pageq);
+                               mem = nxt;
                        }
-                       queue_first(&vm_page_queue_free[color]) =
-                               (queue_entry_t) first;
-                       queue_prev(&first->pageq) =
-                               (queue_entry_t) &vm_page_queue_free[color];
-                       queue_next(&last->pageq) =
-                               (queue_entry_t) first_free;
-
-                       /* next color */
-                       color = inuse[color];
-               }
-               
-               vm_page_free_count += pg_count;
-               avail_free_count = vm_page_free_count;
-
-               if (vm_page_free_wanted_privileged > 0 &&
-                   avail_free_count > 0) {
-                       if (avail_free_count < vm_page_free_wanted_privileged) {
-                               need_priv_wakeup = avail_free_count;
-                               vm_page_free_wanted_privileged -=
-                                       avail_free_count;
-                               avail_free_count = 0;
-                       } else {
-                               need_priv_wakeup = vm_page_free_wanted_privileged;
-                               vm_page_free_wanted_privileged = 0;
-                               avail_free_count -=
-                                       vm_page_free_wanted_privileged;
+                       vm_page_free_count += pg_count;
+                       avail_free_count = vm_page_free_count;
+
+                       if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
+
+                               if (avail_free_count < vm_page_free_wanted_privileged) {
+                                       need_priv_wakeup = avail_free_count;
+                                       vm_page_free_wanted_privileged -= avail_free_count;
+                                       avail_free_count = 0;
+                               } else {
+                                       need_priv_wakeup = vm_page_free_wanted_privileged;
+                                       vm_page_free_wanted_privileged = 0;
+                                       avail_free_count -= vm_page_free_wanted_privileged;
+                               }
                        }
-               }
+                       if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
+                               unsigned int  available_pages;
 
-               if (vm_page_free_wanted > 0 &&
-                   avail_free_count > vm_page_free_reserved) {
-                       unsigned int  available_pages;
+                               available_pages = avail_free_count - vm_page_free_reserved;
 
-                       available_pages = (avail_free_count -
-                                          vm_page_free_reserved);
+                               if (available_pages >= vm_page_free_wanted) {
+                                       need_wakeup = vm_page_free_wanted;
+                                       vm_page_free_wanted = 0;
+                               } else {
+                                       need_wakeup = available_pages;
+                                       vm_page_free_wanted -= available_pages;
+                               }
+                       }
+                       lck_mtx_unlock(&vm_page_queue_free_lock);
 
-                       if (available_pages >= vm_page_free_wanted) {
-                               need_wakeup = vm_page_free_wanted;
-                               vm_page_free_wanted = 0;
-                       } else {
-                               need_wakeup = available_pages;
-                               vm_page_free_wanted -= available_pages;
+                       if (need_priv_wakeup != 0) {
+                               /*
+                                * There shouldn't be that many VM-privileged threads,
+                                * so let's wake them all up, even if we don't quite
+                                * have enough pages to satisfy them all.
+                                */
+                               thread_wakeup((event_t)&vm_page_free_wanted_privileged);
+                       }
+                       if (need_wakeup != 0 && vm_page_free_wanted == 0) {
+                               /*
+                                * We don't expect to have any more waiters
+                                * after this, so let's wake them all up at
+                                * once.
+                                */
+                               thread_wakeup((event_t) &vm_page_free_count);
+                       } else for (; need_wakeup != 0; need_wakeup--) {
+                               /*
+                                * Wake up one waiter per page we just released.
+                                */
+                               thread_wakeup_one((event_t) &vm_page_free_count);
                        }
-               }
-               lck_mtx_unlock(&vm_page_queue_free_lock);
 
-               if (need_priv_wakeup != 0) {
-                       /*
-                        * There shouldn't be that many VM-privileged threads,
-                        * so let's wake them all up, even if we don't quite
-                        * have enough pages to satisfy them all.
-                        */
-                       thread_wakeup((event_t)&vm_page_free_wanted_privileged);
+                       VM_CHECK_MEMORYSTATUS;
                }
-               if (need_wakeup != 0 && vm_page_free_wanted == 0) {
-                       /*
-                        * We don't expect to have any more waiters
-                        * after this, so let's wake them all up at
-                        * once.
-                        */
-                       thread_wakeup((event_t) &vm_page_free_count);
-               } else for (; need_wakeup != 0; need_wakeup--) {
-                       /*
-                        * Wake up one waiter per page we just released.
-                        */
-                       thread_wakeup_one((event_t) &vm_page_free_count);
-               }
-
-               VM_CHECK_MEMORYSTATUS;
        }
 }
 
@@ -2543,6 +2549,11 @@ vm_page_wire(
        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 #endif
        if ( !VM_PAGE_WIRED(mem)) {
+
+               if (mem->pageout_queue) {
+                       mem->pageout = FALSE;
+                       vm_pageout_throttle_up(mem);
+               }
                VM_PAGE_QUEUES_REMOVE(mem);
 
                if (mem->object) {
@@ -2579,10 +2590,6 @@ vm_page_wire(
                if (mem->gobbled)
                        vm_page_gobble_count--;
                mem->gobbled = FALSE;
-               if (mem->zero_fill == TRUE) {
-                       mem->zero_fill = FALSE;
-                       VM_ZF_COUNT_DECR();
-               }
 
                VM_CHECK_MEMORYSTATUS;
                
@@ -2728,7 +2735,15 @@ vm_page_deactivate_internal(
                vm_page_gobble_count--;
                m->gobbled = FALSE;
        }
-       if (m->private || m->fictitious || (VM_PAGE_WIRED(m)))
+       /*
+        * if this page is currently on the pageout queue, we can't do the
+        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * and we can't remove it manually since we would need the object lock
+        * (which is not required here) to decrement the activity_in_progress
+        * reference which is held on the object while the page is in the pageout queue...
+        * just let the normal laundry processing proceed
+        */
+       if (m->pageout_queue || m->private || m->fictitious || (VM_PAGE_WIRED(m)))
                return;
 
        if (!m->absent && clear_hw_reference == TRUE)
@@ -2740,9 +2755,6 @@ vm_page_deactivate_internal(
        if (!m->inactive) {
                VM_PAGE_QUEUES_REMOVE(m);
 
-               assert(!m->laundry);
-               assert(m->pageq.next == NULL && m->pageq.prev == NULL);
-
                if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
                    m->dirty && m->object->internal &&
                    (m->object->purgable == VM_PURGABLE_DENY ||
@@ -2764,6 +2776,54 @@ vm_page_deactivate_internal(
        }
 }
 
+/*
+ * vm_page_enqueue_cleaned
+ *
+ * Put the page on the cleaned queue, mark it cleaned, etc.
+ * Being on the cleaned queue (and having m->clean_queue set)
+ * does ** NOT ** guarantee that the page is clean!
+ *
+ * Call with the queues lock held.
+ */
+
+void vm_page_enqueue_cleaned(vm_page_t m)
+{
+       assert(m->phys_page != vm_page_guard_addr);
+#if DEBUG
+       lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+#endif
+       assert( !(m->absent && !m->unusual));
+
+       if (m->gobbled) {
+               assert( !VM_PAGE_WIRED(m));
+               if (!m->private && !m->fictitious)
+                       vm_page_wire_count--;
+               vm_page_gobble_count--;
+               m->gobbled = FALSE;
+       }
+       /*
+        * if this page is currently on the pageout queue, we can't do the
+        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * and we can't remove it manually since we would need the object lock
+        * (which is not required here) to decrement the activity_in_progress
+        * reference which is held on the object while the page is in the pageout queue...
+        * just let the normal laundry processing proceed
+        */
+       if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
+               return;
+
+       VM_PAGE_QUEUES_REMOVE(m);
+
+       queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
+       m->clean_queue = TRUE;
+       vm_page_cleaned_count++;
+
+       m->inactive = TRUE;
+       vm_page_inactive_count++;
+
+       vm_pageout_enqueued_cleaned++;
+}
+
 /*
  *     vm_page_activate:
  *
@@ -2793,7 +2853,15 @@ vm_page_activate(
                vm_page_gobble_count--;
                m->gobbled = FALSE;
        }
-       if (m->private || m->fictitious)
+       /*
+        * if this page is currently on the pageout queue, we can't do the
+        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * and we can't remove it manually since we would need the object lock
+        * (which is not required here) to decrement the activity_in_progress
+        * reference which is held on the object while the page is in the pageout queue...
+        * just let the normal laundry processing proceed
+        */
+       if (m->pageout_queue || m->private || m->fictitious)
                return;
 
 #if DEBUG
@@ -2805,12 +2873,11 @@ vm_page_activate(
                DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
                DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
        }
-
+       
        VM_PAGE_QUEUES_REMOVE(m);
 
        if ( !VM_PAGE_WIRED(m)) {
-               assert(!m->laundry);
-               assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+
                if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 
                    m->dirty && m->object->internal && 
                    (m->object->purgable == VM_PURGABLE_DENY ||
@@ -2853,7 +2920,15 @@ vm_page_speculate(
 #endif
        assert( !(m->absent && !m->unusual));
 
-       if (m->private || m->fictitious)
+       /*
+        * if this page is currently on the pageout queue, we can't do the
+        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * and we can't remove it manually since we would need the object lock
+        * (which is not required here) to decrement the activity_in_progress
+        * reference which is held on the object while the page is in the pageout queue...
+        * just let the normal laundry processing proceed
+        */
+       if (m->pageout_queue || m->private || m->fictitious)
                return;
 
        VM_PAGE_QUEUES_REMOVE(m);               
@@ -2974,19 +3049,21 @@ vm_page_lru(
 #if DEBUG
        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 #endif
-       if (m->active || m->reference)
-               return;
-
-       if (m->private || (VM_PAGE_WIRED(m)))
+       /*
+        * if this page is currently on the pageout queue, we can't do the
+        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * and we can't remove it manually since we would need the object lock
+        * (which is not required here) to decrement the activity_in_progress
+        * reference which is held on the object while the page is in the pageout queue...
+        * just let the normal laundry processing proceed
+        */
+       if (m->pageout_queue || m->private || (VM_PAGE_WIRED(m)))
                return;
 
        m->no_cache = FALSE;
 
        VM_PAGE_QUEUES_REMOVE(m);
 
-       assert(!m->laundry);
-       assert(m->pageq.next == NULL && m->pageq.prev == NULL);
-
        VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
 }
 
@@ -3160,7 +3237,14 @@ vm_page_part_zero_fill(
 {
        vm_page_t       tmp;
 
+#if 0
+       /*
+        * we don't hold the page queue lock
+        * so this check isn't safe to make
+        */
        VM_PAGE_CHECK(m);
+#endif
+
 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
        pmap_zero_part_page(m->phys_page, m_pa, len);
 #else
@@ -3198,8 +3282,13 @@ vm_page_zero_fill(
         XPR(XPR_VM_PAGE,
                 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
                 m->object, m->offset, m, 0,0);
-
+#if 0
+       /*
+        * we don't hold the page queue lock
+        * so this check isn't safe to make
+        */
        VM_PAGE_CHECK(m);
+#endif
 
 //     dbgTrace(0xAEAEAEAE, m->phys_page, 0);          /* (BRINGUP) */
        pmap_zero_page(m->phys_page);
@@ -3219,9 +3308,14 @@ vm_page_part_copy(
        vm_offset_t     dst_pa,
        vm_size_t       len)
 {
+#if 0
+       /*
+        * we don't hold the page queue lock
+        * so this check isn't safe to make
+        */
        VM_PAGE_CHECK(src_m);
        VM_PAGE_CHECK(dst_m);
-
+#endif
        pmap_copy_part_page(src_m->phys_page, src_pa,
                        dst_m->phys_page, dst_pa, len);
 }
@@ -3249,9 +3343,15 @@ vm_page_copy(
         src_m->object, src_m->offset, 
        dest_m->object, dest_m->offset,
        0);
-
+#if 0
+       /*
+        * we don't hold the page queue lock
+        * so this check isn't safe to make
+        */
        VM_PAGE_CHECK(src_m);
        VM_PAGE_CHECK(dest_m);
+#endif
+       vm_object_lock_assert_held(src_m->object);
 
        /*
         * ENCRYPTED SWAP:
@@ -3281,7 +3381,7 @@ vm_page_copy(
                src_m->busy = TRUE;
                (void) vm_page_slide(src_m, 0);
                assert(src_m->busy);
-               if(!was_busy) {
+               if (!was_busy) {
                        PAGE_WAKEUP_DONE(src_m);
                }
        }
@@ -3345,14 +3445,10 @@ _vm_page_print(
               (p->unusual ? "" : "!"),
               (p->encrypted ? "" : "!"),
               (p->encrypted_cleaning ? "" : "!"));
-       printf("  %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
-              (p->list_req_pending ? "" : "!"),
-              (p->dump_cleaning ? "" : "!"),
+       printf("  %scs_validated, %scs_tainted, %sno_cache\n",
               (p->cs_validated ? "" : "!"),
               (p->cs_tainted ? "" : "!"),
               (p->no_cache ? "" : "!"));
-       printf("  %szero_fill\n",
-              (p->zero_fill ? "" : "!"));
 
        printf("phys_page=0x%x\n", p->phys_page);
 }
@@ -3496,6 +3592,9 @@ vm_page_queues_assert(
        vm_page_t       mem,
        int             val)
 {
+#if DEBUG
+       lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+#endif
        if (mem->free + mem->active + mem->inactive + mem->speculative +
            mem->throttled + mem->pageout_queue > (val)) {
                _vm_page_print(mem);
@@ -3506,6 +3605,7 @@ vm_page_queues_assert(
                assert(!mem->inactive);
                assert(!mem->speculative);
                assert(!mem->throttled);
+               assert(!mem->pageout_queue);
        }
 }
 #endif /* MACH_ASSERT */
@@ -3665,8 +3765,7 @@ retry:
                } else if (VM_PAGE_WIRED(m) || m->gobbled ||
                           m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
                           m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
-                          m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
-                          m->pageout) {
+                          m->cleaning || m->overwriting || m->restart || m->unusual || m->pageout) {
                        /*
                         * page is in a transient state
                         * or a state we don't want to deal
@@ -3922,7 +4021,7 @@ did_consider:
                                    (VM_PAGE_WIRED(m1) || m1->gobbled ||
                                     m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
                                     m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
-                                    m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
+                                    m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->busy)) {
 
                                        if (locked_object) {
                                                vm_object_unlock(locked_object);
@@ -3958,8 +4057,9 @@ did_consider:
 
                                        if (refmod & VM_MEM_REFERENCED)
                                                m2->reference = TRUE;
-                                       if (refmod & VM_MEM_MODIFIED)
-                                               m2->dirty = TRUE;
+                                       if (refmod & VM_MEM_MODIFIED) {
+                                               SET_PAGE_DIRTY(m2, TRUE);
+                                       }
                                        offset = m1->offset;
 
                                        /*
@@ -3981,7 +4081,7 @@ did_consider:
                                        /*
                                         * now put the substitute page on the object
                                         */
-                                       vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
+                                       vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
 
                                        if (m2->reference)
                                                vm_page_activate(m2);
@@ -4143,9 +4243,9 @@ cpm_allocate(
         * determine need for wakeups
         */
        if ((vm_page_free_count < vm_page_free_min) ||
-           ((vm_page_free_count < vm_page_free_target) &&
-            ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
-               thread_wakeup((event_t) &vm_page_free_wanted);
+            ((vm_page_free_count < vm_page_free_target) &&
+             ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
+                thread_wakeup((event_t) &vm_page_free_wanted);
                
        VM_CHECK_MEMORYSTATUS;
        
@@ -4190,7 +4290,6 @@ vm_page_do_delayed_work(
        int             j;
        vm_page_t       m;
         vm_page_t       local_free_q = VM_PAGE_NULL;
-       boolean_t       dropped_obj_lock = FALSE;
 
        /*
         * pageout_scan takes the vm_page_lock_queues first
@@ -4218,28 +4317,11 @@ vm_page_do_delayed_work(
                        mutex_pause(j);
                        vm_page_lockspin_queues();
                }
-               dropped_obj_lock = TRUE;
        }
        for (j = 0; j < dw_count; j++, dwp++) {
 
                m = dwp->dw_m;
 
-               if (dwp->dw_mask & DW_set_list_req_pending) {
-                       m->list_req_pending = TRUE;
-
-                       if (dropped_obj_lock == TRUE) {
-                               /*
-                                * need to make sure anyone that might have
-                                * blocked on busy == TRUE when we dropped
-                                * the object lock gets a chance to re-evaluate
-                                * its state since we have several places
-                                * where we avoid potential deadlocks with
-                                * the fileysystem by stealing pages with
-                                * list_req_pending == TRUE and busy == TRUE
-                                */
-                               dwp->dw_mask |= DW_PAGE_WAKEUP;
-                       }
-               }
                if (dwp->dw_mask & DW_vm_pageout_throttle_up)
                        vm_pageout_throttle_up(m);
 
@@ -4272,25 +4354,51 @@ vm_page_do_delayed_work(
                        }
                        else if (dwp->dw_mask & DW_vm_page_speculate)
                                vm_page_speculate(m, TRUE);
+                       else if (dwp->dw_mask & DW_enqueue_cleaned) {
+                               /*
+                                * if we didn't hold the object lock and did this,
+                                * we might disconnect the page, then someone might
+                                * soft fault it back in, then we would put it on the
+                                * cleaned queue, and so we would have a referenced (maybe even dirty)
+                                * page on that queue, which we don't want
+                                */
+                               int refmod_state = pmap_disconnect(m->phys_page);
+
+                               if ((refmod_state & VM_MEM_REFERENCED)) {
+                                       /*
+                                        * this page has been touched since it got cleaned; let's activate it
+                                        * if it hasn't already been
+                                        */
+                                       vm_pageout_enqueued_cleaned++;
+                                       vm_pageout_cleaned_reactivated++;
+                                       vm_pageout_cleaned_commit_reactivated++;
+
+                                       if (m->active == FALSE)
+                                               vm_page_activate(m);
+                               } else {
+                                       m->reference = FALSE;
+                                       vm_page_enqueue_cleaned(m);
+                               }
+                       }
                        else if (dwp->dw_mask & DW_vm_page_lru)
                                vm_page_lru(m);
-                       else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE)
-                               VM_PAGE_QUEUES_REMOVE(m);
-                       
+                       else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
+                               if ( !m->pageout_queue)
+                                       VM_PAGE_QUEUES_REMOVE(m);
+                       }
                        if (dwp->dw_mask & DW_set_reference)
                                m->reference = TRUE;
                        else if (dwp->dw_mask & DW_clear_reference)
                                m->reference = FALSE;
 
                        if (dwp->dw_mask & DW_move_page) {
-                               VM_PAGE_QUEUES_REMOVE(m);
+                               if ( !m->pageout_queue) {
+                                       VM_PAGE_QUEUES_REMOVE(m);
 
-                               assert(!m->laundry);
-                               assert(m->object != kernel_object);
-                               assert(m->pageq.next == NULL &&
-                                      m->pageq.prev == NULL);
+                                       assert(m->object != kernel_object);
 
-                               VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+                                       VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+                               }
                        }
                        if (dwp->dw_mask & DW_clear_busy)
                                m->busy = FALSE;
@@ -4308,38 +4416,6 @@ vm_page_do_delayed_work(
 
 }
 
-
-       
-
-void vm_check_memorystatus()
-{
-#if CONFIG_EMBEDDED
-       static boolean_t in_critical = FALSE;
-       static unsigned int last_memorystatus = 0;
-       unsigned int pages_avail;
-       
-       if (!kern_memorystatus_delta) {
-           return;
-       }
-       
-       pages_avail = (vm_page_active_count + 
-                     vm_page_inactive_count + 
-                     vm_page_speculative_count + 
-                     vm_page_free_count +
-                     (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count));
-       if ( (!in_critical && (pages_avail < kern_memorystatus_delta)) ||
-            (pages_avail >= (last_memorystatus + kern_memorystatus_delta)) ||
-            (last_memorystatus >= (pages_avail + kern_memorystatus_delta)) ) {
-           kern_memorystatus_level = pages_avail * 100 / atop_64(max_mem);
-           last_memorystatus = pages_avail;
-               
-           thread_wakeup((event_t)&kern_memorystatus_wakeup);
-               
-           in_critical = (pages_avail < kern_memorystatus_delta) ? TRUE : FALSE;
-       }
-#endif
-}
-
 kern_return_t
 vm_page_alloc_list(
        int     page_count,
@@ -4409,7 +4485,6 @@ extern boolean_t (* volatile consider_buffer_cache_collect)(int);
 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
 static int  hibernate_flush_dirty_pages(void);
 static int  hibernate_flush_queue(queue_head_t *, int);
-static void hibernate_dirty_page(vm_page_t);
 
 void hibernate_flush_wait(void);
 void hibernate_mark_in_progress(void);
@@ -4477,46 +4552,6 @@ hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
        return (0);
 }
 
-static void
-hibernate_dirty_page(vm_page_t m)
-{
-       vm_object_t     object = m->object;
-        struct         vm_pageout_queue *q;
-
-#if DEBUG
-       lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
-#endif
-       vm_object_lock_assert_exclusive(object);
-
-       /*
-        * protect the object from collapse - 
-        * locking in the object's paging_offset.
-        */
-       vm_object_paging_begin(object);
-
-       m->list_req_pending = TRUE;
-       m->cleaning = TRUE;
-       m->busy = TRUE;
-
-       if (object->internal == TRUE)
-               q = &vm_pageout_queue_internal;
-       else
-               q = &vm_pageout_queue_external;
-
-        /* 
-        * pgo_laundry count is tied to the laundry bit
-        */
-       m->laundry = TRUE;
-       q->pgo_laundry++;
-
-       m->pageout_queue = TRUE;
-       queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
-       
-       if (q->pgo_idle == TRUE) {
-               q->pgo_idle = FALSE;
-               thread_wakeup((event_t) &q->pgo_pending);
-       }
-}
 
 static int
 hibernate_flush_queue(queue_head_t *q, int qcount)
@@ -4532,6 +4567,7 @@ hibernate_flush_queue(queue_head_t *q, int qcount)
        struct  vm_pageout_queue *eq;
        struct  vm_pageout_queue *tq;
 
+       hibernate_cleaning_in_progress = TRUE;
 
        KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
        
@@ -4595,7 +4631,7 @@ hibernate_flush_queue(queue_head_t *q, int qcount)
                                vm_pageout_scan_wants_object = VM_OBJECT_NULL;
                        }
                }
-               if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
+               if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
                        /*
                         * page is not to be cleaned
                         * put it back on the head of its queue
@@ -4622,8 +4658,9 @@ hibernate_flush_queue(queue_head_t *q, int qcount)
                if ( !m->dirty && m->pmapped) {
                        refmod_state = pmap_get_refmod(m->phys_page);
 
-                       if ((refmod_state & VM_MEM_MODIFIED))
-                               m->dirty = TRUE;
+                       if ((refmod_state & VM_MEM_MODIFIED)) {
+                               SET_PAGE_DIRTY(m, FALSE);
+                       }
                } else
                        refmod_state = 0;
 
@@ -4661,9 +4698,9 @@ hibernate_flush_queue(queue_head_t *q, int qcount)
 
                                assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
 
-                       vm_page_unlock_queues();
+                               vm_page_unlock_queues();
 
-                       wait_result = thread_block(THREAD_CONTINUE_NULL);
+                               wait_result = thread_block(THREAD_CONTINUE_NULL);
 
                                vm_page_lock_queues();
 
@@ -4674,9 +4711,9 @@ hibernate_flush_queue(queue_head_t *q, int qcount)
                                        break;
                                
                                if (--wait_count == 0) {
-                               hibernate_stats.hibernate_throttle_timeout++;
-                               retval = 1;
-                       }
+                                       hibernate_stats.hibernate_throttle_timeout++;
+                                       retval = 1;
+                               }
                        }
                        if (retval)
                                break;
@@ -4685,9 +4722,16 @@ hibernate_flush_queue(queue_head_t *q, int qcount)
 
                        continue;
                }
+               /*
+                * we've already factored out pages in the laundry which
+                * means this page can't be on the pageout queue so it's
+                * safe to do the VM_PAGE_QUEUES_REMOVE
+                */
+                assert(!m->pageout_queue);
+
                VM_PAGE_QUEUES_REMOVE(m);
 
-               hibernate_dirty_page(m);
+               vm_pageout_cluster(m, FALSE);
 
                hibernate_stats.hibernate_found_dirty++;
 
@@ -4708,12 +4752,14 @@ next_pg:
                vm_object_unlock(l_object);
                l_object = NULL;
        }
-    vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+       vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 
        vm_page_unlock_queues();
 
        KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
 
+       hibernate_cleaning_in_progress = FALSE;
+
        return (retval);
 }
 
@@ -4759,9 +4805,11 @@ hibernate_flush_dirty_pages()
        }
        if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
                return (1);
-       if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
+       if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
                return (1);
-       if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
+       if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
+               return (1);
+       if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
                return (1);
 
        if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
@@ -4935,7 +4983,7 @@ hibernate_consider_discard(vm_page_t m)
            hibernate_stats.cd_found_cleaning++;
             break;
        }
-       if (m->laundry || m->list_req_pending) {
+       if (m->laundry) {
            hibernate_stats.cd_found_laundry++;
             break;
        }
@@ -4945,8 +4993,9 @@ hibernate_consider_discard(vm_page_t m)
         
             if (refmod_state & VM_MEM_REFERENCED)
                 m->reference = TRUE;
-            if (refmod_state & VM_MEM_MODIFIED)
-                m->dirty = TRUE;
+            if (refmod_state & VM_MEM_MODIFIED) {
+               SET_PAGE_DIRTY(m, FALSE);
+           }
         }
    
         /*
@@ -4977,6 +5026,15 @@ hibernate_discard_page(vm_page_t m)
         */
         return;
 
+#if DEBUG
+    vm_object_t object = m->object;
+    if (!vm_object_lock_try(m->object))
+       panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
+#else
+    /* No need to lock page queue for token delete, hibernate_vm_unlock() 
+       makes sure these locks are uncontended before sleep */
+#endif /* !DEBUG */
+
     if (m->pmapped == TRUE) 
     {
         __unused int refmod_state = pmap_disconnect(m->phys_page);
@@ -4995,13 +5053,15 @@ hibernate_discard_page(vm_page_t m)
         assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
         purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
         assert(old_queue);
-        /* No need to lock page queue for token delete, hibernate_vm_unlock() 
-           makes sure these locks are uncontended before sleep */
         vm_purgeable_token_delete_first(old_queue);
         m->object->purgable = VM_PURGABLE_EMPTY;
     }
        
     vm_page_free(m);
+
+#if DEBUG
+    vm_object_unlock(object);
+#endif /* DEBUG */
 }
 
 /*
@@ -5020,10 +5080,11 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
     vm_page_t m;
     uint32_t pages = page_list->page_count;
     uint32_t count_zf = 0, count_throttled = 0;
-    uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
+    uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
     uint32_t count_wire = pages;
     uint32_t count_discard_active    = 0;
     uint32_t count_discard_inactive  = 0;
+    uint32_t count_discard_cleaned   = 0;
     uint32_t count_discard_purgeable = 0;
     uint32_t count_discard_speculative = 0;
     uint32_t i;
@@ -5034,6 +5095,18 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
 
     HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
 
+#if DEBUG
+        vm_page_lock_queues();
+       if (vm_page_local_q) {
+           for (i = 0; i < vm_page_local_q_count; i++) {
+               struct vpl      *lq;
+               lq = &vm_page_local_q[i].vpl_un.vpl;
+               VPL_LOCK(&lq->vpl_lock);
+           }
+       }
+#endif /* DEBUG */
+
+
     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
 
     clock_get_uptime(&start);
@@ -5123,7 +5196,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
        hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
     }
 
-    queue_iterate( &vm_page_queue_zf,
+    queue_iterate( &vm_page_queue_anonymous,
                     m,
                     vm_page_t,
                    pageq )
@@ -5163,6 +5236,26 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
        hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
     }
 
+    queue_iterate( &vm_page_queue_cleaned,
+                    m,
+                    vm_page_t,
+                    pageq )
+    {
+        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 
+         && hibernate_consider_discard(m))
+        {
+            hibernate_page_bitset(page_list, TRUE, m->phys_page);
+           if (m->dirty)
+               count_discard_purgeable++;
+           else
+               count_discard_cleaned++;
+        }
+        else
+            count_cleaned++;
+       count_wire--;
+       hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
+    }
+
     for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
     {
        queue_iterate(&vm_page_queue_speculative[i].age_q,
@@ -5219,17 +5312,28 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
     hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
 
     hibernate_stats.cd_count_wire = count_wire;
-    hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
+    hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative + count_discard_cleaned;
 
     clock_get_uptime(&end);
     absolutetime_to_nanoseconds(end - start, &nsec);
     HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
 
-    HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n", 
-                pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
-                count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
+    HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n", 
+               pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_zf, count_throttled,
+               count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
 
-    *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
+    *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
+
+#if DEBUG
+       if (vm_page_local_q) {
+           for (i = 0; i < vm_page_local_q_count; i++) {
+               struct vpl      *lq;
+               lq = &vm_page_local_q[i].vpl_un.vpl;
+               VPL_UNLOCK(&lq->vpl_lock);
+           }
+       }
+        vm_page_unlock_queues();
+#endif /* DEBUG */
 
     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
 }
@@ -5244,12 +5348,24 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list)
     uint32_t  count_discard_active    = 0;
     uint32_t  count_discard_inactive  = 0;
     uint32_t  count_discard_purgeable = 0;
+    uint32_t  count_discard_cleaned   = 0;
     uint32_t  count_discard_speculative = 0;
 
+#if DEBUG
+        vm_page_lock_queues();
+       if (vm_page_local_q) {
+           for (i = 0; i < vm_page_local_q_count; i++) {
+               struct vpl      *lq;
+               lq = &vm_page_local_q[i].vpl_un.vpl;
+               VPL_LOCK(&lq->vpl_lock);
+           }
+       }
+#endif /* DEBUG */
+
     clock_get_uptime(&start);
 
-    m = (vm_page_t) queue_first(&vm_page_queue_zf);
-    while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
+    m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
+    while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
     {
         next = (vm_page_t) m->pageq.next;
         if (hibernate_page_bittst(page_list, m->phys_page))
@@ -5308,11 +5424,37 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list)
         m = next;
     }
 
+    m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
+    while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
+    {
+        next = (vm_page_t) m->pageq.next;
+        if (hibernate_page_bittst(page_list, m->phys_page))
+        {
+           if (m->dirty)
+               count_discard_purgeable++;
+           else
+               count_discard_cleaned++;
+            hibernate_discard_page(m);
+        }
+        m = next;
+    }
+
+#if DEBUG
+       if (vm_page_local_q) {
+           for (i = 0; i < vm_page_local_q_count; i++) {
+               struct vpl      *lq;
+               lq = &vm_page_local_q[i].vpl_un.vpl;
+               VPL_UNLOCK(&lq->vpl_lock);
+           }
+       }
+        vm_page_unlock_queues();
+#endif /* DEBUG */
+
     clock_get_uptime(&end);
     absolutetime_to_nanoseconds(end - start, &nsec);
-    HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
+    HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
                 nsec / 1000000ULL,
-                count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
+               count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
 }
 
 #endif /* HIBERNATION */
@@ -5367,64 +5509,3 @@ vm_page_info(
        return vm_page_bucket_count;
 }
 #endif /* MACH_VM_DEBUG */
-
-#include <mach_kdb.h>
-#if    MACH_KDB
-
-#include <ddb/db_output.h>
-#include <vm/vm_print.h>
-#define        printf  kdbprintf
-
-/*
- *     Routine:        vm_page_print [exported]
- */
-void
-vm_page_print(
-       db_addr_t       db_addr)
-{
-       vm_page_t       p;
-
-       p = (vm_page_t) (long) db_addr;
-
-       iprintf("page 0x%x\n", p);
-
-       db_indent += 2;
-
-       iprintf("object=0x%x", p->object);
-       printf(", offset=0x%x", p->offset);
-       printf(", wire_count=%d", p->wire_count);
-
-       iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
-               (p->local ? "" : "!"),
-               (p->inactive ? "" : "!"),
-               (p->active ? "" : "!"),
-               (p->throttled ? "" : "!"),
-               (p->gobbled ? "" : "!"),
-               (p->laundry ? "" : "!"),
-               (p->free ? "" : "!"),
-               (p->reference ? "" : "!"),
-               (p->encrypted ? "" : "!"));
-       iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
-               (p->busy ? "" : "!"),
-               (p->wanted ? "" : "!"),
-               (p->tabled ? "" : "!"),
-               (p->fictitious ? "" : "!"),
-               (p->private ? "" : "!"),
-               (p->precious ? "" : "!"));
-       iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
-               (p->absent ? "" : "!"),
-               (p->error ? "" : "!"),
-               (p->dirty ? "" : "!"),
-               (p->cleaning ? "" : "!"),
-               (p->pageout ? "" : "!"),
-               (p->clustered ? "" : "!"));
-       iprintf("%soverwriting, %srestart, %sunusual\n",
-               (p->overwriting ? "" : "!"),
-               (p->restart ? "" : "!"),
-               (p->unusual ? "" : "!"));
-
-       iprintf("phys_page=0x%x", p->phys_page);
-
-       db_indent -= 2;
-}
-#endif /* MACH_KDB */
index a5931c9980a18ac807be74484e97be31e7c047dc..80ded84bee66b4d452315926a9645b97e357823e 100644 (file)
@@ -647,7 +647,7 @@ vm_shared_region_create(
        }
 
        /* create a VM sub map and its pmap */
-       sub_map = vm_map_create(pmap_create(0, is_64bit),
+       sub_map = vm_map_create(pmap_create(NULL, 0, is_64bit),
                                0, size,
                                TRUE);
        if (sub_map == VM_MAP_NULL) {
@@ -851,13 +851,13 @@ vm_shared_region_undo_mappings(
        unsigned int            j = 0;
        vm_shared_region_t      shared_region = NULL;
        boolean_t               reset_shared_region_state = FALSE;
-       
+
        shared_region = vm_shared_region_get(current_task());
        if (shared_region == NULL) {
-               SHARED_REGION_TRACE_DEBUG(("Failed to undo mappings because of NULL shared region.\n"));
+               printf("Failed to undo mappings because of NULL shared region.\n");
                return;
        }
-
+       
 
        if (sr_map == NULL) {
                ipc_port_t              sr_handle;
@@ -968,7 +968,7 @@ vm_shared_region_map_file(
        mach_vm_offset_t        sr_base_address;
        unsigned int            i;
        mach_port_t             map_port;
-       mach_vm_offset_t        target_address;
+       vm_map_offset_t         target_address;
        vm_object_t             object;
        vm_object_size_t        obj_size;
        boolean_t               found_mapping_to_slide = FALSE;
@@ -1384,7 +1384,7 @@ vm_shared_region_sliding_valid(uint32_t slide) {
 
        if ((shared_region_completed_slide == TRUE) && slide) {
                if (slide != slide_info.slide) {
-                       SHARED_REGION_TRACE_DEBUG(("Only one shared region can be slid\n"));
+                       printf("Only one shared region can be slid\n");
                        kr = KERN_FAILURE;      
                } else if (slide == slide_info.slide) {
                        /*
@@ -1429,7 +1429,7 @@ vm_shared_region_slide_init(
        }
 
        if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
-               SHARED_REGION_TRACE_DEBUG(("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size));
+               printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
                kr = KERN_FAILURE;
                return kr;
        }
@@ -1619,7 +1619,8 @@ vm_shared_region_slide(vm_offset_t vaddr, uint32_t pageIndex)
                                                                 * to the upper 32 bits.
                                                                 * The sliding failed...
                                                                 */
-                                                               printf("vm_shared_region_slide() carry over\n");
+                                                               printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
+                                                                      i, j, b, slide, old_value, *ptr_to_slide);
                                                                return KERN_FAILURE;
                                                        }
                                                }
@@ -1643,6 +1644,17 @@ vm_named_entry_t commpage64_entry = NULL;
 vm_map_t commpage32_map = VM_MAP_NULL;
 vm_map_t commpage64_map = VM_MAP_NULL;
 
+ipc_port_t commpage_text32_handle = IPC_PORT_NULL;
+ipc_port_t commpage_text64_handle = IPC_PORT_NULL;
+vm_named_entry_t commpage_text32_entry = NULL;
+vm_named_entry_t commpage_text64_entry = NULL;
+vm_map_t commpage_text32_map = VM_MAP_NULL;
+vm_map_t commpage_text64_map = VM_MAP_NULL;
+
+user32_addr_t commpage_text32_location = (user32_addr_t) _COMM_PAGE32_TEXT_START;
+user64_addr_t commpage_text64_location = (user64_addr_t) _COMM_PAGE64_TEXT_START;
+
+#if defined(__i386__) || defined(__x86_64__)
 /*
  * Create a memory entry, VM submap and pmap for one commpage.
  */
@@ -1664,7 +1676,7 @@ _vm_commpage_init(
        if (kr != KERN_SUCCESS) {
                panic("_vm_commpage_init: could not allocate mem_entry");
        }
-       new_map = vm_map_create(pmap_create(0, FALSE), 0, size, TRUE);
+       new_map = vm_map_create(pmap_create(NULL, 0, FALSE), 0, size, TRUE);
        if (new_map == VM_MAP_NULL) {
                panic("_vm_commpage_init: could not allocate VM map");
        }
@@ -1679,6 +1691,42 @@ _vm_commpage_init(
                ("commpage: _init(0x%llx) <- %p\n",
                 (long long)size, *handlep));
 }
+#endif
+
+
+/*
+ *Initialize the comm text pages at boot time
+ */
+ extern u_int32_t random(void);
+ void
+vm_commpage_text_init(void)
+{
+       SHARED_REGION_TRACE_DEBUG(
+               ("commpage text: ->init()\n"));
+#if defined(__i386__) || defined(__x86_64__)
+       /* create the 32 bit comm text page */
+       unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
+       _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
+       commpage_text32_entry = (vm_named_entry_t) commpage_text32_handle->ip_kobject;
+       commpage_text32_map = commpage_text32_entry->backing.map;
+       commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
+       /* XXX if (cpu_is_64bit_capable()) ? */
+        /* create the 64-bit comm page */
+       offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
+        _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
+        commpage_text64_entry = (vm_named_entry_t) commpage_text64_handle->ip_kobject;
+        commpage_text64_map = commpage_text64_entry->backing.map;
+       commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
+
+       commpage_text_populate();
+#else
+#error Unknown architecture.
+#endif /* __i386__ || __x86_64__ */
+       /* populate the routines in here */
+       SHARED_REGION_TRACE_DEBUG(
+                ("commpage text: init() <-\n"));
+
+}
 
 /*
  * Initialize the comm pages at boot time.
@@ -1689,6 +1737,7 @@ vm_commpage_init(void)
        SHARED_REGION_TRACE_DEBUG(
                ("commpage: -> init()\n"));
 
+#if defined(__i386__) || defined(__x86_64__)
        /* create the 32-bit comm page */
        _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
        commpage32_entry = (vm_named_entry_t) commpage32_handle->ip_kobject;
@@ -1700,6 +1749,8 @@ vm_commpage_init(void)
        commpage64_entry = (vm_named_entry_t) commpage64_handle->ip_kobject;
        commpage64_map = commpage64_entry->backing.map;
 
+#endif /* __i386__ || __x86_64__ */
+
        /* populate them according to this specific platform */
        commpage_populate();
        __commpage_setup = 1;
@@ -1722,9 +1773,9 @@ vm_commpage_enter(
        vm_map_t        map,
        task_t          task)
 {
-       ipc_port_t              commpage_handle;
-       vm_map_offset_t         commpage_address, objc_address;
-       vm_map_size_t           commpage_size, objc_size;
+       ipc_port_t              commpage_handle, commpage_text_handle;
+       vm_map_offset_t         commpage_address, objc_address, commpage_text_address;
+       vm_map_size_t           commpage_size, objc_size, commpage_text_size;
        int                     vm_flags;
        kern_return_t           kr;
 
@@ -1732,6 +1783,7 @@ vm_commpage_enter(
                ("commpage: -> enter(%p,%p)\n",
                 map, task));
 
+       commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
        /* the comm page is likely to be beyond the actual end of the VM map */
        vm_flags = VM_FLAGS_FIXED | VM_FLAGS_BEYOND_MAX;
 
@@ -1743,6 +1795,8 @@ vm_commpage_enter(
                commpage_size = _COMM_PAGE64_AREA_LENGTH;
                objc_size = _COMM_PAGE64_OBJC_SIZE;
                objc_address = _COMM_PAGE64_OBJC_BASE;
+               commpage_text_handle = commpage_text64_handle;
+               commpage_text_address = (vm_map_offset_t) commpage_text64_location;
        } else {
                commpage_handle = commpage32_handle;
                commpage_address =
@@ -1750,6 +1804,8 @@ vm_commpage_enter(
                commpage_size = _COMM_PAGE32_AREA_LENGTH;
                objc_size = _COMM_PAGE32_OBJC_SIZE;
                objc_address = _COMM_PAGE32_OBJC_BASE;
+               commpage_text_handle = commpage_text32_handle;
+               commpage_text_address = (vm_map_offset_t) commpage_text32_location;
        }
 
        if ((commpage_address & (pmap_nesting_size_min - 1)) == 0 &&
@@ -1757,7 +1813,6 @@ vm_commpage_enter(
                /* the commpage is properly aligned or sized for pmap-nesting */
                vm_flags |= VM_MAKE_TAG(VM_MEMORY_SHARED_PMAP);
        }
-
        /* map the comm page in the task's address space */
        assert(commpage_handle != IPC_PORT_NULL);
        kr = vm_map_enter_mem_object(
@@ -1769,8 +1824,8 @@ vm_commpage_enter(
                commpage_handle,
                0,
                FALSE,
-               VM_PROT_READ|VM_PROT_EXECUTE,
-               VM_PROT_READ|VM_PROT_EXECUTE,
+               VM_PROT_READ,
+               VM_PROT_READ,
                VM_INHERIT_SHARE);
        if (kr != KERN_SUCCESS) {
                SHARED_REGION_TRACE_ERROR(
@@ -1780,6 +1835,28 @@ vm_commpage_enter(
                         (long long)commpage_size, commpage_handle, kr));
        }
 
+       /* map the comm text page in the task's address space */
+       assert(commpage_text_handle != IPC_PORT_NULL);
+       kr = vm_map_enter_mem_object(
+               map,
+               &commpage_text_address,
+               commpage_text_size,
+               0,
+               vm_flags,
+               commpage_text_handle,
+               0,
+               FALSE,
+               VM_PROT_READ|VM_PROT_EXECUTE,
+               VM_PROT_READ|VM_PROT_EXECUTE,
+               VM_INHERIT_SHARE);
+       if (kr != KERN_SUCCESS) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("commpage text: enter(%p,0x%llx,0x%llx) "
+                        "commpage text %p mapping failed 0x%x\n",
+                        map, (long long)commpage_text_address,
+                        (long long)commpage_text_size, commpage_text_handle, kr));
+       }
+
        /*
         * Since we're here, we also pre-allocate some virtual space for the
         * Objective-C run-time, if needed...
index 51742f0b0053e900c6ff8f5245979331bf49ae80..cec44658f73d942329fb5d54e2793d03332a7683 100644 (file)
@@ -204,6 +204,7 @@ extern kern_return_t vm_shared_region_slide(
        vm_offset_t     vaddr, 
        uint32_t pageIndex);
 extern void vm_commpage_init(void);
+extern void vm_commpage_text_init(void);
 extern kern_return_t vm_commpage_enter(
        struct _vm_map          *map,
        struct task             *task);
index 4739455df9b04aaf1f62ad73bfb6cfa1019ec4f5..bb985060ff0ce4a0b17318c2ef1063d8cd79fb13 100644 (file)
@@ -412,6 +412,7 @@ swapfile_pager_data_request(
                           kernel_mapping,
                           dst_pnum,
                           VM_PROT_READ | VM_PROT_WRITE,
+                          VM_PROT_NONE,
                           0,
                           TRUE);
 
index 8271d71b2c1aa9499cbb151103c9aba7bb2fb6ee..05b51b4b20c04126bf13e8001b8df9fc279f80aa 100644 (file)
@@ -881,12 +881,17 @@ mach_vm_map(
        vm_prot_t               max_protection,
        vm_inherit_t            inheritance)
 {
+       kern_return_t           kr;
+       vm_map_offset_t         vmmaddr;
+
+       vmmaddr = (vm_map_offset_t) *address;
+
        /* filter out any kernel-only flags */
        if (flags & ~VM_FLAGS_USER_MAP)
                return KERN_INVALID_ARGUMENT;
 
-       return vm_map_enter_mem_object(target_map,
-                                      address,
+       kr = vm_map_enter_mem_object(target_map,
+                                      &vmmaddr,
                                       initial_size,
                                       mask,
                                       flags,
@@ -896,6 +901,9 @@ mach_vm_map(
                                       cur_protection,
                                       max_protection,
                                       inheritance);
+
+       *address = vmmaddr;
+       return kr;
 }
 
 
@@ -1887,6 +1895,9 @@ mach_make_memory_entry_64(
                } else if (access == MAP_MEM_COPYBACK) {
                   SET_MAP_MEM(access, parent_entry->protection);
                   wimg_mode = VM_WIMG_USE_DEFAULT;
+               } else if (access == MAP_MEM_INNERWBACK) {
+                  SET_MAP_MEM(access, parent_entry->protection);
+                  wimg_mode = VM_WIMG_INNERWBACK;
                } else if (access == MAP_MEM_WTHRU) {
                   SET_MAP_MEM(access, parent_entry->protection);
                   wimg_mode = VM_WIMG_WTHRU;
@@ -1951,6 +1962,8 @@ mach_make_memory_entry_64(
                        wimg_mode = VM_WIMG_IO;
                } else if (access == MAP_MEM_COPYBACK) {
                        wimg_mode = VM_WIMG_USE_DEFAULT;
+               } else if (access == MAP_MEM_INNERWBACK) {
+                       wimg_mode = VM_WIMG_INNERWBACK;
                } else if (access == MAP_MEM_WTHRU) {
                        wimg_mode = VM_WIMG_WTHRU;
                } else if (access == MAP_MEM_WCOMB) {
@@ -2156,6 +2169,10 @@ redo_lookup:
                                                 */
                                                protections &= next_entry->max_protection;
                                        }
+                                       if ((next_entry->wired_count) &&
+                                           (map_entry->wired_count == 0)) {
+                                               break;
+                                       }
                                        if(((next_entry->max_protection) 
                                                & protections) != protections) {
                                                break;
@@ -2264,7 +2281,7 @@ redo_lookup:
                                        object, map_entry->offset,
                                        total_size,
                                        ((map_entry->is_shared 
-                                               || target_map->mapped)
+                                         || target_map->mapped_in_other_pmaps)
                                                        ? PMAP_NULL :
                                                        target_map->pmap),
                                        map_entry->vme_start,
@@ -2276,6 +2293,9 @@ redo_lookup:
 
                                vm_object_lock(shadow_object);
                                while (total_size) {
+                                   assert((next_entry->wired_count == 0) ||
+                                          (map_entry->wired_count));
+
                                   if(next_entry->object.vm_object == object) {
                                        vm_object_reference_locked(shadow_object);
                                        next_entry->object.vm_object 
@@ -2327,6 +2347,8 @@ redo_lookup:
                                wimg_mode = VM_WIMG_IO;
                        } else if (access == MAP_MEM_COPYBACK) {
                                wimg_mode = VM_WIMG_USE_DEFAULT;
+                       } else if (access == MAP_MEM_INNERWBACK) {
+                               wimg_mode = VM_WIMG_INNERWBACK;
                        } else if (access == MAP_MEM_WTHRU) {
                                wimg_mode = VM_WIMG_WTHRU;
                        } else if (access == MAP_MEM_WCOMB) {
@@ -2768,8 +2790,10 @@ mach_destroy_memory_entry(
        assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
 #endif /* MACH_ASSERT */
        named_entry = (vm_named_entry_t)port->ip_kobject;
-       lck_mtx_lock(&(named_entry)->Lock);
+
+       named_entry_lock(named_entry);
        named_entry->ref_count -= 1;
+
        if(named_entry->ref_count == 0) {
                if (named_entry->is_sub_map) {
                        vm_map_deallocate(named_entry->backing.map);
@@ -2778,12 +2802,13 @@ mach_destroy_memory_entry(
                        vm_object_deallocate(named_entry->backing.object);
                } /* else JMM - need to drop reference on pager in that case */
 
-               lck_mtx_unlock(&(named_entry)->Lock);
+               named_entry_unlock(named_entry);
+               named_entry_lock_destroy(named_entry);
 
                kfree((void *) port->ip_kobject,
                      sizeof (struct vm_named_entry));
        } else
-               lck_mtx_unlock(&(named_entry)->Lock);
+               named_entry_unlock(named_entry);
 }
 
 /* Allow manipulation of individual page state.  This is actually part of */
diff --git a/osfmk/x86_64/boot_pt.c b/osfmk/x86_64/boot_pt.c
new file mode 100644 (file)
index 0000000..392c3c1
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <i386/pmap.h>
+
+/*
+ * These pagetables are used during early processor startup during
+ * the transition from protected mode to 64-bit mode and the jump
+ * to high kernel address space.
+ *
+ * They are required to be at the base of the kernel and specifically
+ * the base of the special __HIB section.
+ *
+ * These tables are statically-defined as physical-zero-based.
+ * Startup code in start.s rebases these according to the actual physical
+ * base address. 
+ */
+
+/*
+ * NB: This must be located at the kernel's base address!
+ */
+#define PML4_PROT (INTEL_PTE_VALID | INTEL_PTE_WRITE)
+pml4_entry_t   BootPML4[PTE_PER_PAGE]
+               __attribute__((section("__HIB, __bootPT"))) = {
+       [0]                     = ((uint64_t)(PAGE_SIZE) | PML4_PROT),
+       [KERNEL_PML4_INDEX]     = ((uint64_t)(PAGE_SIZE) | PML4_PROT),
+};
+
+#define PDPT_PROT (INTEL_PTE_VALID | INTEL_PTE_WRITE)
+pdpt_entry_t   BootPDPT[PTE_PER_PAGE]
+               __attribute__((section("__HIB, __bootPT"))) = {
+       [0]     = ((uint64_t)(2*PAGE_SIZE) | PDPT_PROT), 
+       [1]     = ((uint64_t)(3*PAGE_SIZE) | PDPT_PROT), 
+       [2]     = ((uint64_t)(4*PAGE_SIZE) | PDPT_PROT), 
+       [3]     = ((uint64_t)(5*PAGE_SIZE) | PDPT_PROT), 
+};
+
+#if NPGPTD != 4
+#error Please update boot_pt.c to reflect the new value of NPGPTD
+#endif
+
+#if MACHINE_BOOTSTRAPPTD
+
+#define PDT_PROT (INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE)
+#define ID_MAP_2MEG(x) [(x)] = ((((uint64_t)(x)) << 21) | (PDT_PROT)),
+
+#define L0(x,n)         x(n)
+#define L1(x,n)         L0(x,n-1)     L0(x,n)
+#define L2(x,n)  L1(x,n-2)     L1(x,n)
+#define L3(x,n)  L2(x,n-4)     L2(x,n)
+#define L4(x,n)  L3(x,n-8)     L3(x,n)
+#define L5(x,n)  L4(x,n-16)    L4(x,n)
+#define L6(x,n)  L5(x,n-32)    L5(x,n)
+#define L7(x,n)  L6(x,n-64)    L6(x,n)
+#define L8(x,n)  L7(x,n-128)   L7(x,n)
+#define L9(x,n)  L8(x,n-256)   L8(x,n)
+#define L10(x,n) L9(x,n-512)   L9(x,n)
+#define L11(x,n) L10(x,n-1024) L10(x,n)
+
+#define FOR_0_TO_2047(x) L11(x,2047)
+
+pd_entry_t     BootPTD[2048]
+               __attribute__((section("__HIB, __bootPT"))) = {
+       FOR_0_TO_2047(ID_MAP_2MEG)
+};
+#endif /* MACHINE_BOOTSTRAPPTD */
index 50bc8b99110389c4b88b3dd2f63b371f99044d03..a4a62feaaffb3c41e62616c28465bae9696bc879 100644 (file)
@@ -27,7 +27,6 @@
  */
 #include <i386/asm.h>
 #include <assym.s>
-#include <mach_kdb.h>
 #include <i386/eflags.h>
 #include <i386/rtclock_asm.h>
 #include <i386/trap.h>
@@ -77,7 +76,6 @@
 #define        HNDL_UNIX_SCALL         EXT(hndl_unix_scall)
 #define        HNDL_MACH_SCALL         EXT(hndl_mach_scall)
 #define        HNDL_MDEP_SCALL         EXT(hndl_mdep_scall)
-#define        HNDL_DIAG_SCALL         EXT(hndl_diag_scall)
 #define        HNDL_DOUBLE_FAULT       EXT(hndl_double_fault)
 #define        HNDL_MACHINE_CHECK      EXT(hndl_machine_check)
 
@@ -158,7 +156,7 @@ L_dispatch:
        push    %rcx
        mov     EXT(pal_efi_saved_cr3)(%rip), %rcx
        mov     %rcx, %cr3
-       leaq    0(%rip), %rcx
+       leaq    (%rip), %rcx
        shr     $32, %rcx               /* splice the upper 32-bits of rip */
        shl     $32, %rsp               /* .. and the lower 32-bits of rsp */
        shrd    $32, %rcx, %rsp         /* to recover the full 64-bits of rsp */
@@ -181,8 +179,8 @@ L_64bit_dispatch:
        /*
         * Save segment regs - for completeness since theyre not used.
         */
-       mov     %fs, R64_FS(%rsp)
-       mov     %gs, R64_GS(%rsp)
+       movl    %fs, R64_FS(%rsp)
+       movl    %gs, R64_GS(%rsp)
 
        /* Save general-purpose registers */
        mov     %rax, R64_RAX(%rsp)
@@ -240,10 +238,10 @@ L_32bit_dispatch: /* 32-bit user task */
        /*
         * Save segment regs
         */
-       mov     %ds, R32_DS(%rsp)
-       mov     %es, R32_ES(%rsp)
-       mov     %fs, R32_FS(%rsp)
-       mov     %gs, R32_GS(%rsp)
+       movl    %ds, R32_DS(%rsp)
+       movl    %es, R32_ES(%rsp)
+       movl    %fs, R32_FS(%rsp)
+       movl    %gs, R32_GS(%rsp)
 
        /*
         * Save general 32-bit registers
@@ -322,7 +320,7 @@ L_common_dispatch:
        mov     %gs:CPU_ACTIVE_THREAD, %rcx     /* Get the active thread */
        cmpq    $0, TH_PCB_IDS(%rcx)    /* Is there a debug register state? */
        je      3f
-       mov     $0, %rcx                /* If so, reset DR7 (the control) */
+       xor     %ecx, %ecx              /* If so, reset DR7 (the control) */
        mov     %rcx, %dr7
 3:
        incl    %gs:hwIntCnt(,%ebx,4)           // Bump the trap/intr count
@@ -340,7 +338,7 @@ Entry(ret_to_user)
        mov     %gs:CPU_ACTIVE_THREAD, %rdx
        movq    TH_PCB_IDS(%rdx),%rax   /* Obtain this thread's debug state */
        
-       cmpq    $0,%rax                 /* Is there a debug register context? */
+       test    %rax, %rax              /* Is there a debug register context? */
        je      2f                      /* branch if not */
        cmpl    $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
        jne     1f
@@ -431,21 +429,21 @@ L_32bit_return:
         */
        swapgs
 EXT(ret32_set_ds):     
-       movw    R32_DS(%rsp), %ds
+       movl    R32_DS(%rsp), %ds
 EXT(ret32_set_es):
-       movw    R32_ES(%rsp), %es
+       movl    R32_ES(%rsp), %es
 EXT(ret32_set_fs):
-       movw    R32_FS(%rsp), %fs
+       movl    R32_FS(%rsp), %fs
 EXT(ret32_set_gs):
-       movw    R32_GS(%rsp), %gs
+       movl    R32_GS(%rsp), %gs
 
        /* pop compat frame + trapno, trapfn and error */       
        add     $(ISC32_OFFSET)+8+8+8, %rsp
-        cmp    $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
+       cmpl    $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
                                        /* test for fast entry/exit */
-        je      L_fast_exit
+       je      L_fast_exit
 EXT(ret32_iret):
-        iretq                          /* return from interrupt */
+       iretq                           /* return from interrupt */
 
 L_fast_exit:
        pop     %rdx                    /* user return eip */
@@ -454,7 +452,7 @@ L_fast_exit:
        popf                            /* flags - carry denotes failure */
        pop     %rcx                    /* user return esp */
        sti                             /* interrupts enabled after sysexit */
-       sysexit                         /* 32-bit sysexit */
+       .byte 0x0f,0x35                 /* 32-bit sysexit */
 
 ret_to_kernel:
 #if DEBUG_IDT64
@@ -553,14 +551,6 @@ Entry(idt64_mdep_scall)
        pushq   $(MACHDEP_INT)
        jmp     L_32bit_entry_check
 
-       
-Entry(idt64_diag_scall)
-       swapgs                          /* switch to kernel gs (cpu_data) */
-       push    %rax                    /* save system call number */
-       PUSH_FUNCTION(HNDL_DIAG_SCALL)
-       pushq   $(DIAG_INT)
-       jmp     L_32bit_entry_check
-
 Entry(hi64_syscall)
 Entry(idt64_syscall)
 L_syscall_continue:
@@ -582,6 +572,7 @@ L_syscall_continue:
        movq    $(T_SYSCALL), ISF64_TRAPNO(%rsp)        /* trapno */
        leaq    HNDL_SYSCALL(%rip), %r11;
        movq    %r11, ISF64_TRAPFN(%rsp)
+       mov     ISF64_RFLAGS(%rsp), %r11        /* Avoid info leak,restore R11 */
        jmp     L_64bit_dispatch                /* this can only be a 64-bit task */
        
 /*
@@ -807,9 +798,9 @@ L_kernel_trap:
  *  24 ISF64_RIP:      rip
  *  32 ISF64_CS:       cs
  *  40 ISF64_RFLAGS:   rflags 
- *  48 ISF64_RSP:      rsp --> new trapno
- *  56 ISF64_SS:       ss  --> new trapfn
- *  64                 pad --> new errcode
+ *  48 ISF64_RSP:      rsp  <-- new trapno
+ *  56 ISF64_SS:       ss   <-- new trapfn
+ *  64                 pad8 <-- new errcode
  *  72                 user rip
  *  80                 user cs
  *  88                 user rflags
@@ -820,7 +811,7 @@ L_fault_iret:
        pop     %rax                    /* recover saved %rax */
        mov     %rax, ISF64_RIP(%rsp)   /* save rax (we don`t need saved rip) */
        mov     ISF64_TRAPNO(%rsp), %rax
-       mov     %rax, ISF64_TRAPNO(%rsp)/* put in user trap number */
+       mov     %rax, ISF64_RSP(%rsp)   /* put in user trap number */
        mov     ISF64_TRAPFN(%rsp), %rax
        mov     %rax, ISF64_SS(%rsp)    /* put in user trap function */
        mov     ISF64_ERR(%rsp), %rax   /* get error code */
@@ -1024,7 +1015,7 @@ Entry(hndl_allintrs)
        incl    %gs:CPU_INTERRUPT_LEVEL
 
        movq    %gs:CPU_INT_STATE, %rdi
-       
+
        CCALL(interrupt)                /* call generic interrupt routine */
 
        cli                             /* just in case we returned with intrs enabled */
@@ -1240,34 +1231,6 @@ Entry(hndl_mdep_scall)
         * always returns through thread_exception_return
         */
 
-
-Entry(hndl_diag_scall)
-       TIME_TRAP_UENTRY
-
-       movq    %gs:CPU_KERNEL_STACK,%rdi
-       xchgq   %rdi,%rsp                       /* switch to kernel stack */
-       
-       /* Check for active vtimers in the current task */
-       movq    %gs:CPU_ACTIVE_THREAD,%rcx      /* get current thread     */
-       movq    TH_TASK(%rcx),%rbx              /* point to current task  */
-       TASK_VTIMER_CHECK(%rbx,%rcx)
-
-       pushq   %rdi                    /* push pcb stack */
-
-       CCALL(diagCall)                 // Call diagnostics
-
-       cli                             // Disable interruptions just in case
-       cmpl    $0,%eax                 // What kind of return is this?
-       je      1f                      // - branch if bad (zero)
-       popq    %rsp                    // Get back the pcb stack
-       jmp     EXT(return_to_user)     // Normal return, do not check asts...
-1:
-       CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
-               // pass what would be the diag syscall
-               // error return - cause an exception
-       /* no return */
-
-
 /*
  * 64bit Tasks
  * System call entries via syscall only:
@@ -1305,6 +1268,7 @@ Entry(hndl_syscall)
        je      EXT(hndl_diag_scall64)
 
        /* Syscall class unknown */
+       sti
        CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
        /* no return */
 
@@ -1338,16 +1302,16 @@ Entry(hndl_mdep_scall64)
         * always returns through thread_exception_return
         */
 
-
 Entry(hndl_diag_scall64)
        pushq   %rdi                    // Push the previous stack
        CCALL(diagCall64)               // Call diagnostics
        cli                             // Disable interruptions just in case
-       cmpl    $0,%eax                 // What kind of return is this?
+       test    %eax, %eax              // What kind of return is this?
        je      1f                      // - branch if bad (zero)
        popq    %rsp                    // Get back the pcb stack
        jmp     EXT(return_to_user)     // Normal return, do not check asts...
 1:
+       sti
        CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
        /* no return */
 
index f2f26ce132d28d743070d19015f63137fddf0f41..2c1d334976e618c6722ed8e181a57c6845db716c 100644 (file)
@@ -34,19 +34,11 @@ USER_TRAP(0x04,idt64_into)
 USER_TRAP(0x05,idt64_bounds)
 TRAP(0x06,idt64_invop)
 TRAP(0x07,idt64_nofpu)
-#if    MACH_KDB
-TRAP_IST(0x08,idt64_db_task_dbl_fault)
-#else
 TRAP_IST(0x08,idt64_double_fault)
-#endif
 TRAP(0x09,idt64_fpu_over)
 TRAP(0x0a,idt64_inv_tss)
 TRAP_SPC(0x0b,idt64_segnp)
-#if    MACH_KDB
-TRAP_IST(0x0c,idt64_db_task_stk_fault)
-#else
 TRAP_SPC(0x0c,idt64_stack_fault)
-#endif
 TRAP_SPC(0x0d,idt64_gen_prot)
 TRAP_SPC(0x0e,idt64_page_fault)
 TRAP(0x0f,idt64_trap_0f)
@@ -172,8 +164,8 @@ USER_TRAP(0x7f, idt64_dtrace_ret) /* Required by dtrace "fasttrap" */
 USER_TRAP_SPC(0x80,idt64_unix_scall)
 USER_TRAP_SPC(0x81,idt64_mach_scall)
 USER_TRAP_SPC(0x82,idt64_mdep_scall)
-USER_TRAP_SPC(0x83,idt64_diag_scall)
 
+INTERRUPT(0x83)
 INTERRUPT(0x84)
 INTERRUPT(0x85)
 INTERRUPT(0x86)
index af3bac12a9877a3458378a626b216f75cc72d1db..f13db5aabdca101b63729cba05f7106e55266acf 100644 (file)
 
 #include <mach_rt.h>
 #include <platforms.h>
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
 #include <mach_kdp.h>
-#include <stat_time.h>
 #include <mach_assert.h>
 
 #include <sys/errno.h>
index 10a086542bcc289180d046c080b899689bf14380..b912c6d9b98277be64a22de20103c7d7559468b6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -582,7 +582,7 @@ flush_dcache64(addr64_t addr, unsigned count, int phys)
                dcache_incoherent_io_flush64(addr, count);
        }
        else {
-               uint32_t  linesize = cpuid_info()->cache_linesize;
+               uint64_t  linesize = cpuid_info()->cache_linesize;
                addr64_t  bound = (addr + count + linesize -1) & ~(linesize - 1);
                __mfence();
                while (addr < bound) {
@@ -646,6 +646,20 @@ kdp_register_callout(void)
 }
 #endif
 
+/*
+ * Return a uniformly distributed 64-bit random number.
+ *
+ * This interface should have minimal dependencies on kernel
+ * services, and thus be available very early in the life
+ * of the kernel.  But as a result, it may not be very random
+ * on all platforms.
+ */
+uint64_t
+early_random(void)
+{
+       return (ml_early_random());
+}
+
 #if !CONFIG_VMX
 int host_vmxon(boolean_t exclusive __unused)
 {
index e4f404b805959785b17f128b700a98150af49b49..cde29d95503a137ada336e07060159e8a924f91a 100644 (file)
  * which is in lowmem_vectors.s
  */
  
-/* 
- *     This is where we put constants, pointers, and data areas that must be accessed
- *     quickly through assembler.  They are designed to be accessed directly with 
- *     absolute addresses, not via a base register.  This is a global area, and not
- *     per processor.
- */
 #pragma pack(8)                /* Make sure the structure stays as we defined it */
 typedef struct lowglo {
 
        unsigned char   lgVerCode[8];           /* 0xffffff8000002000 System verification code */
-       uint64_t        lgZero[2];                              /* 0xffffff8000002008 Double constant 0 */
-       uint64_t        lgRsv010;                               /* 0xffffff8000002018 Reserved */
-       uint64_t        lgCHUDXNUfnStart;               /* 0xffffff8000002020 CHUD XNU function glue table */
-       uint64_t        lgRsv018;                               /* 0xffffff8000002028 Reserved */
-       uint64_t        lgVersion;                              /* 0xffffff8000002030 Pointer to kernel version string */
-       uint64_t        lgRsv020[280];                  /* 0xffffff8000002038 Reserved */
-       uint64_t        lgKmodptr;                              /* 0xffffff80000028f8 Pointer to kmod, debugging aid */
-       uint64_t        lgTransOff;                             /* 0xffffff8000002900 Pointer to kdp_trans_off, debugging aid */
-       uint64_t        lgReadIO;                               /* 0xffffff8000002908 Pointer to kdp_read_io, debugging aid */
-       uint64_t        lgDevSlot1;                             /* 0xffffff8000002910 For developer use */
-       uint64_t        lgDevSlot2;                             /* 0xffffff8000002918 For developer use */
-       uint64_t        lgOSVersion;                    /* 0xffffff8000002920 Pointer to OS version string */
-       uint64_t        lgRebootFlag;                   /* 0xffffff8000002928 Pointer to debugger reboot trigger */
-       uint64_t        lgManualPktAddr;                /* 0xffffff8000002930 Pointer to manual packet structure */
+       uint64_t        lgZero;                 /* 0xffffff8000002008 Double constant 0 */
+       uint64_t        lgStext;                /* 0xffffff8000002010 Start of kernel text */
+       uint64_t        lgRsv018;               /* 0xffffff8000002018 Reserved */
+       uint64_t        lgCHUDXNUfnStart;       /* 0xffffff8000002020 CHUD XNU function glue table */
+       uint64_t        lgRsv028;               /* 0xffffff8000002028 Reserved */
+       uint64_t        lgVersion;              /* 0xffffff8000002030 Pointer to kernel version string */
+       uint64_t        lgRsv038[280];          /* 0xffffff8000002038 Reserved */
+       uint64_t        lgKmodptr;              /* 0xffffff80000028f8 Pointer to kmod, debugging aid */
+       uint64_t        lgTransOff;             /* 0xffffff8000002900 Pointer to kdp_trans_off, debugging aid */
+       uint64_t        lgReadIO;               /* 0xffffff8000002908 Pointer to kdp_read_io, debugging aid */
+       uint64_t        lgDevSlot1;             /* 0xffffff8000002910 For developer use */
+       uint64_t        lgDevSlot2;             /* 0xffffff8000002918 For developer use */
+       uint64_t        lgOSVersion;            /* 0xffffff8000002920 Pointer to OS version string */
+       uint64_t        lgRebootFlag;           /* 0xffffff8000002928 Pointer to debugger reboot trigger */
+       uint64_t        lgManualPktAddr;        /* 0xffffff8000002930 Pointer to manual packet structure */
 
-       uint64_t        lgRsv49C[217];                  /* 0xffffff8000002938 Reserved - push to 1 page */
+       uint64_t        lgRsv938[217];          /* 0xffffff8000002938 Reserved - push to 1 page */
 } lowglo;
 #pragma pack()
 extern lowglo lowGlo;
diff --git a/osfmk/x86_64/lowmem_vectors.c b/osfmk/x86_64/lowmem_vectors.c
new file mode 100644 (file)
index 0000000..d1d1e7f
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * @OSF_COPYRIGHT@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ * 
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ * 
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <platforms.h>
+#include <mach_kdp.h>
+#include <mach/vm_param.h>
+#include <x86_64/lowglobals.h>
+
+/* 
+ * on x86_64 the low mem vectors live here and get mapped to 0xffffff8000002000 at
+ * system startup time
+ */
+
+extern void    *version;
+extern void    *kmod;
+extern void    *kdp_trans_off;
+extern void    *kdp_read_io;
+extern void    *osversion;
+extern void    *flag_kdp_trigger_reboot;
+extern void    *manual_pkt;
+
+lowglo lowGlo __attribute__ ((aligned(PAGE_SIZE))) = {
+
+       .lgVerCode              = { 'C','a','t','f','i','s','h',' ' },
+
+       .lgCHUDXNUfnStart       = 0,
+
+       .lgVersion              = (uint64_t) &version,
+
+       .lgKmodptr              = (uint64_t) &kmod,
+
+#if MACH_KDP
+       .lgTransOff             = (uint64_t) &kdp_trans_off,
+       .lgReadIO               = (uint64_t) &kdp_read_io,
+#else
+       .lgTransOff             = 0,
+       .lgReadIO               = 0,
+#endif
+
+       .lgDevSlot1             = 0,
+       .lgDevSlot2             = 0,
+
+       .lgOSVersion            = (uint64_t) &osversion,
+
+#if MACH_KDP
+       .lgRebootFlag           = (uint64_t) &flag_kdp_trigger_reboot,
+       .lgManualPktAddr        = (uint64_t) &manual_pkt,
+#else
+       .lgRebootFlag           = 0,
+       .lgManualPktAddr        = 0,
+#endif 
+};
diff --git a/osfmk/x86_64/lowmem_vectors.s b/osfmk/x86_64/lowmem_vectors.s
deleted file mode 100644 (file)
index 40133c4..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#include <platforms.h>
-#include <mach_kdb.h>
-#include <mach_kdp.h>
-
-#include <i386/asm.h>
-#include <i386/proc_reg.h>
-#include <i386/postcode.h>
-#include <assym.s>
-
-
-/* 
- * on x86_64 the low mem vectors live here and get mapped to 0xffffff8000200000 at
- * system startup time
- */
-
-       .text
-       .align  12
-       .globl  EXT(lowGlo)
-EXT(lowGlo):
-
-       .ascii "Catfish "       /* +0x000 System verification code */
-       .quad   0                       /* +0x008 Double constant 0 */
-       .quad   0
-       .quad   0                       /* +0x018 Reserved */
-       .quad   0                               /* +0x020 Reserved */
-       .quad   0                               /* +0x028 Reserved */
-       .quad   EXT(version)    /* +0x030 Pointer to kernel version string */
-       .fill   560, 4, 0               /* +0x038 Reserved - rdar://problem/5783217 */
-       .quad   EXT(kmod)               /* +0x8f8 Pointer to kmod, debugging aid */
-#if MACH_KDP
-       .quad   EXT(kdp_trans_off)      /* +0x900 Pointer to kdp_trans_off, debugging aid */
-       .quad   EXT(kdp_read_io)        /* +0x908 Pointer to kdp_read_io, debugging aid */
-#else
-       .quad   0                       /* +0x900 Reserved */
-       .quad   0                       /* +0x908 Reserved */
-#endif
-       .quad   0                       /* +0x910 Reserved for developer use */
-       .quad   0                       /* +0x918 Reserved for developer use */
-       .quad   EXT(osversion)          /* +0x920 Pointer to osversion string */
-#if MACH_KDP
-       .quad   EXT(flag_kdp_trigger_reboot) /* +0x928 Pointer to debugger reboot trigger */
-       .quad   EXT(manual_pkt)              /* +0x930 Pointer to manual packet structure */
-#else
-       .quad   0                       /* +0x928 Reserved */
-       .quad   0                       /* +0x930 Reserved */
-#endif 
-       .fill   434, 4, 0       /* pad to 0x1000 (page size) - rdar://problem/5783217 */
index 1c74f9fc8b3ed40d2551799ab5f9c6cce492c861..3628875868bad43f22a33de71f2e6af3ac828b8f 100644 (file)
@@ -33,6 +33,7 @@
        
 #include <i386/postcode.h>
 #include <i386/apic.h>
+#include <i386/vmx/vmx_asm.h>
 #include <assym.s>
 
 /*
@@ -161,7 +162,7 @@ Lslow:
        hlt
        .data
 1:     String  "_rtc_nanotime_read() - slow algorithm not supported"
-
+       .text
 
 Entry(call_continuation)
        movq    %rdi,%rcx                       /* get continuation */
@@ -173,3 +174,86 @@ Entry(call_continuation)
        movq    %gs:CPU_ACTIVE_THREAD,%rdi
        call    EXT(thread_terminate)
 
+Entry(x86_init_wrapper)
+       xor     %rbp, %rbp
+       movq    %rsi, %rsp
+       callq   *%rdi
+
+       /*
+       * Generate a 64-bit quantity with possibly random characteristics, intended for use
+       * before the kernel entropy pool is available. The processor's RNG is used if
+       * available, and a value derived from the Time Stamp Counter is returned if not.
+       * Multiple invocations may result in well-correlated values if sourced from the TSC.
+       */
+Entry(ml_early_random)
+       mov     %rbx, %rsi
+       mov     $1, %eax
+       cpuid
+       mov     %rsi, %rbx
+       test    $(1 << 30), %ecx
+       jz      Lnon_rdrand
+       RDRAND_RAX              /* RAX := 64 bits of DRBG entropy */
+       jnc     Lnon_rdrand
+       ret
+Lnon_rdrand:
+       rdtsc /* EDX:EAX := TSC */
+       /* Distribute low order bits */
+       mov     %eax, %ecx
+       xor     %al, %ah
+       shl     $16, %rcx
+       xor     %rcx, %rax
+       xor     %eax, %edx
+
+       /* Incorporate ASLR entropy, if any */
+       lea     (%rip), %rcx
+       shr     $21, %rcx
+       movzbl  %cl, %ecx
+       shl     $16, %ecx
+       xor     %ecx, %edx
+
+       mov     %ah, %cl
+       ror     %cl, %edx /* Right rotate EDX (TSC&0xFF ^ (TSC>>8 & 0xFF))&1F */
+       shl     $32, %rdx
+       xor     %rdx, %rax
+       mov     %cl, %al
+       ret
+       
+#if CONFIG_VMX
+
+/*
+ *     __vmxon -- Enter VMX Operation
+ *     int __vmxon(addr64_t v);
+ */
+Entry(__vmxon)
+       FRAME
+       push    %rdi
+       
+       mov     $(VMX_FAIL_INVALID), %ecx
+       mov     $(VMX_FAIL_VALID), %edx
+       mov     $(VMX_SUCCEED), %eax
+       vmxon   (%rsp)
+       cmovcl  %ecx, %eax      /* CF = 1, ZF = 0 */
+       cmovzl  %edx, %eax      /* CF = 0, ZF = 1 */
+
+       pop     %rdi
+       EMARF
+       ret
+
+/*
+ *     __vmxoff -- Leave VMX Operation
+ *     int __vmxoff(void);
+ */
+Entry(__vmxoff)
+       FRAME
+       
+       mov     $(VMX_FAIL_INVALID), %ecx
+       mov     $(VMX_FAIL_VALID), %edx
+       mov     $(VMX_SUCCEED), %eax
+       vmxoff
+       cmovcl  %ecx, %eax      /* CF = 1, ZF = 0 */
+       cmovzl  %edx, %eax      /* CF = 0, ZF = 1 */
+
+       EMARF
+       ret
+
+#endif /* CONFIG_VMX */
index 147372eff5f76acce3817936439816d491451c1e..2bc5bfab73952ee99cb883d6afe4c668130f4347 100644 (file)
@@ -89,7 +89,6 @@
  */
 
 #include <string.h>
-#include <mach_kdb.h>
 #include <mach_ldebug.h>
 
 #include <libkern/OSAtomic.h>
 #include <kern/thread.h>
 #include <kern/zalloc.h>
 #include <kern/queue.h>
+#include <kern/ledger.h>
 #include <kern/mach_param.h>
 
 #include <kern/lock.h>
 #include <i386/pmap_internal.h>
 #include <i386/pmap_pcid.h>
 
-#if    MACH_KDB
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_print.h>
-#endif /* MACH_KDB */
-
 #include <vm/vm_protos.h>
 
 #include <i386/mp.h>
 #include <i386/mp_desc.h>
+#include <libkern/kernel_mach_header.h>
+
+#include <pexpert/i386/efi.h>
 
 
 #ifdef IWANTTODEBUG
@@ -204,7 +200,7 @@ static struct vm_object kpdptobj_object_store;
  *     One byte per physical page.
  */
 char           *pmap_phys_attributes;
-unsigned int   last_managed_page = 0;
+ppnum_t                last_managed_page = 0;
 
 /*
  *     Amount of virtual memory mapped by one
@@ -226,9 +222,6 @@ pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
 struct pmap    kernel_pmap_store;
 pmap_t         kernel_pmap;
 
-pd_entry_t     high_shared_pde;
-pd_entry_t     commpage64_pde;
-
 struct zone    *pmap_zone;             /* zone of pmap structures */
 
 struct zone    *pmap_anchor_zone;
@@ -252,12 +245,11 @@ pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
 
-/*
- * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
- * properly deals with the anchor.
- * must be called with the hash locked, does not unlock it
- */
+const boolean_t        pmap_disable_kheap_nx = FALSE;
+const boolean_t        pmap_disable_kstack_nx = FALSE;
+extern boolean_t doconstro_override;
 
+extern long __stack_chk_guard[];
 
 /*
  *     Map memory at initialization.  The physical addresses being
@@ -279,7 +271,7 @@ pmap_map(
        ps = PAGE_SIZE;
        while (start_addr < end_addr) {
                pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
-                          (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
+                          (ppnum_t) i386_btop(start_addr), prot, VM_PROT_NONE, flags, TRUE);
                virt += ps;
                start_addr += ps;
        }
@@ -293,7 +285,8 @@ extern  vm_offset_t         sHIB;
 extern  vm_offset_t            eHIB;
 extern  vm_offset_t            stext;
 extern  vm_offset_t            etext;
-extern  vm_offset_t            sdata;
+extern  vm_offset_t            sdata, edata;
+extern  vm_offset_t            sconstdata, econstdata;
 
 extern void                    *KPTphys;
 
@@ -355,7 +348,7 @@ pmap_bootstrap(
 
        kernel_pmap = &kernel_pmap_store;
        kernel_pmap->ref_count = 1;
-       kernel_pmap->nx_enabled = FALSE;
+       kernel_pmap->nx_enabled = TRUE;
        kernel_pmap->pm_task_map = TASK_MAP_64BIT;
        kernel_pmap->pm_obj = (vm_object_t) NULL;
        kernel_pmap->dirbase = (pd_entry_t *)((uintptr_t)IdlePTD);
@@ -435,6 +428,27 @@ pmap_bootstrap(
        if (pmap_smep_enabled)
                printf("PMAP: Supervisor Mode Execute Protection enabled\n");
 
+#if    DEBUG
+       printf("Stack canary: 0x%lx\n", __stack_chk_guard[0]);
+       printf("ml_early_random(): 0x%qx\n", ml_early_random());
+#endif
+       boolean_t ptmp;
+       /* Check if the user has requested disabling stack or heap no-execute
+        * enforcement. These are "const" variables; that qualifier is cast away
+        * when altering them. The TEXT/DATA const sections are marked
+        * write protected later in the kernel startup sequence, so altering
+        * them is possible at this point, in pmap_bootstrap().
+        */
+       if (PE_parse_boot_argn("-pmap_disable_kheap_nx", &ptmp, sizeof(ptmp))) {
+               boolean_t *pdknxp = (boolean_t *) &pmap_disable_kheap_nx;
+               *pdknxp = TRUE;
+       }
+
+       if (PE_parse_boot_argn("-pmap_disable_kstack_nx", &ptmp, sizeof(ptmp))) {
+               boolean_t *pdknhp = (boolean_t *) &pmap_disable_kstack_nx;
+               *pdknhp = TRUE;
+       }
+
        boot_args *args = (boot_args *)PE_state.bootArgs;
        if (args->efiMode == kBootArgsEfiMode32) {
                printf("EFI32: kernel virtual space limited to 4GB\n");
@@ -552,7 +566,7 @@ pmap_init(void)
        for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
                if (pmptr->type != kEfiConventionalMemory)
                        continue;
-               unsigned int pn;
+               ppnum_t pn;
                for (pn = pmptr->base; pn <= pmptr->end; pn++) {
                        if (pn < last_pn) {
                                pmap_phys_attributes[pn] |= PHYS_MANAGED;
@@ -584,13 +598,12 @@ pmap_init(void)
        pmap_anchor_zone = zinit(PAGE_SIZE, task_max, PAGE_SIZE, "pagetable anchors");
        zone_change(pmap_anchor_zone, Z_NOENCRYPT, TRUE);
 
-#if    ZONE_DEBUG
        /* The anchor is required to be page aligned. Zone debugging adds
-        * padding which may violate that requirement. Disable it
-        * to avoid assumptions.
+        * padding which may violate that requirement. Tell the zone
+        * subsystem that alignment is required.
         */
-       zone_debug_disable(pmap_anchor_zone);
-#endif 
+
+       zone_change(pmap_anchor_zone, Z_ALIGNMENT_REQUIRED, TRUE);
 
        s = (vm_size_t) sizeof(struct pv_hashed_entry);
        pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */,
@@ -619,7 +632,41 @@ pmap_init(void)
         * Ensure the kernel's PML4 entry exists for the basement
         * before this is shared with any user.
         */
-       pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT);
+       pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT, PMAP_EXPAND_OPTIONS_NONE);
+}
+
+static
+void pmap_mark_range(pmap_t npmap, uint64_t sv, uint64_t nxrosz, boolean_t NX, boolean_t ro) {
+       uint64_t ev = sv + nxrosz, cv = sv;
+       pd_entry_t *pdep;
+       pt_entry_t *ptep = NULL;
+
+       assert(((sv & 0xFFFULL) | (nxrosz & 0xFFFULL)) == 0);
+
+       for (pdep = pmap_pde(npmap, cv); pdep != NULL && (cv < ev);) {
+               uint64_t pdev = (cv & ~((uint64_t)PDEMASK));
+
+               if (*pdep & INTEL_PTE_PS) {
+                       if (NX)
+                               *pdep |= INTEL_PTE_NX;
+                       if (ro)
+                               *pdep &= ~INTEL_PTE_WRITE;
+                       cv += NBPD;
+                       cv &= ~((uint64_t) PDEMASK);
+                       pdep = pmap_pde(npmap, cv);
+                       continue;
+               }
+
+               for (ptep = pmap_pte(npmap, cv); ptep != NULL && (cv < (pdev + NBPD)) && (cv < ev);) {
+                       if (NX)
+                               *ptep |= INTEL_PTE_NX;
+                       if (ro)
+                               *ptep &= ~INTEL_PTE_WRITE;
+                       cv += NBPT;
+                       ptep = pmap_pte(npmap, cv);
+               }
+       }
+       DPRINTF("%s(0x%llx, 0x%llx, %u, %u): 0x%llx, 0x%llx\n", __FUNCTION__, sv, nxrosz, NX, ro, cv, ptep ? *ptep: 0);
 }
 
 /*
@@ -667,50 +714,72 @@ pmap_init(void)
  * 4K pages covering [stext,etext] are coalesced as 2M large pages.
  * The now unused level-1 PTE pages are also freed.
  */
-extern uint32_t pmap_reserved_ranges;
+extern ppnum_t vm_kernel_base_page;
 void
 pmap_lowmem_finalize(void)
 {
        spl_t           spl;
        int             i;
 
-       /* Check the kernel is linked at the expected base address */
-       if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
-           I386_KERNEL_IMAGE_BASE_PAGE)
-               panic("pmap_lowmem_finalize() unexpected kernel base address");
-
        /*
         * Update wired memory statistics for early boot pages
         */
-       PMAP_ZINFO_PALLOC(bootstrap_wired_pages * PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE);
 
        /*
-        * Free all pages in pmap regions below the base:
+        * Free pages in pmap regions below the base:
         * rdar://6332712
         *      We can't free all the pages to VM that EFI reports available.
         *      Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
         *      There's also a size miscalculation here: pend is one page less
         *      than it should be but this is not fixed to be backwards
         *      compatible.
-        *      Due to this current EFI limitation, we take only the first
-        *      entry in the memory region table. However, the loop is retained
-        *      (with the intended termination criteria commented out) in the
-        *      hope that some day we can free all low-memory ranges.
+        * This is important for KASLR because up to 256*2MB = 512MB of space
+        * needs has to be released to VM.
         */
        for (i = 0;
-//          pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
-            i < 1  && (pmap_reserved_ranges == 0);
+            pmap_memory_regions[i].end < vm_kernel_base_page;
             i++) {
-               vm_offset_t     pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
-               vm_offset_t     pend  = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
-//             vm_offset_t     pend  = i386_ptob(pmap_memory_regions[i].end+1);
+               vm_offset_t     pbase = i386_ptob(pmap_memory_regions[i].base);
+               vm_offset_t     pend  = i386_ptob(pmap_memory_regions[i].end+1);
 
-               DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
+               DBG("pmap region %d [%p..[%p\n",
+                   i, (void *) pbase, (void *) pend);
+
+               if (pmap_memory_regions[i].attribute & EFI_MEMORY_KERN_RESERVED)
+                       continue;
+               /*
+                * rdar://6332712
+                * Adjust limits not to free pages in range 0xc0000-0xff000.
+                */
+               if (pbase >= 0xc0000 && pend <= 0x100000)
+                       continue;
+               if (pbase < 0xc0000 && pend > 0x100000) {
+                       /* page range entirely within region, free lower part */
+                       DBG("- ml_static_mfree(%p,%p)\n",
+                           (void *) ml_static_ptovirt(pbase),
+                           (void *) (0xc0000-pbase));
+                       ml_static_mfree(ml_static_ptovirt(pbase),0xc0000-pbase);
+                       pbase = 0x100000;
+               }
+               if (pbase < 0xc0000)
+                       pend = MIN(pend, 0xc0000);
+               if (pend  > 0x100000)
+                       pbase = MAX(pbase, 0x100000);
+               DBG("- ml_static_mfree(%p,%p)\n",
                    (void *) ml_static_ptovirt(pbase),
-                   (void *) (pend - pbase), i);
+                   (void *) (pend - pbase));
                ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
        }
 
+       /* A final pass to get rid of all initial identity mappings to
+        * low pages.
+        */
+       DPRINTF("%s: Removing mappings from 0->0x%lx\n", __FUNCTION__, vm_kernel_base);
+
+       /* Remove all mappings past the descriptor aliases and low globals */
+       pmap_remove(kernel_pmap, LOWGLOBAL_ALIAS + PAGE_SIZE, vm_kernel_base);
+
        /*
         * If text and data are both 2MB-aligned,
         * we can map text with large-pages,
@@ -746,7 +815,7 @@ pmap_lowmem_finalize(void)
 
                        ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
                        if (ptep)
-                               pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
+                               pmap_store_pte(ptep, *ptep & ~INTEL_PTE_WRITE);
                }
        }
 
@@ -784,7 +853,7 @@ pmap_lowmem_finalize(void)
                        pde |= pte_phys;        /* take page frame from pte */
 
                        if (wpkernel)
-                               pde &= ~INTEL_PTE_RW;
+                               pde &= ~INTEL_PTE_WRITE;
                        DBG("pmap_store_pte(%p,0x%llx)\n",
                                (void *)pdep, pde);
                        pmap_store_pte(pdep, pde);
@@ -807,19 +876,98 @@ pmap_lowmem_finalize(void)
                pmap_kernel_text_ps = I386_LPGBYTES;
        }
 
-       /* map lowmem global page into fixed addr */
-       pt_entry_t *pte = NULL;
-       if (0 == (pte = pmap_pte(kernel_pmap,
-                                VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
-               panic("lowmem pte");
-       /* make sure it is defined on page boundary */
-       assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
-       pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
-                               | INTEL_PTE_REF
-                               | INTEL_PTE_MOD
-                               | INTEL_PTE_WIRED
-                               | INTEL_PTE_VALID
-                               | INTEL_PTE_RW);
+       boolean_t doconstro = TRUE;
+
+       (void) PE_parse_boot_argn("dataconstro", &doconstro, sizeof(doconstro));
+
+       if ((sconstdata | econstdata) & PAGE_MASK) {
+               kprintf("Const DATA misaligned 0x%lx 0x%lx\n", sconstdata, econstdata);
+               if ((sconstdata & PAGE_MASK) || (doconstro_override == FALSE))
+                       doconstro = FALSE;
+       }
+
+       if ((sconstdata > edata) || (sconstdata < sdata) || ((econstdata - sconstdata) >= (edata - sdata))) {
+               kprintf("Const DATA incorrect size 0x%lx 0x%lx 0x%lx 0x%lx\n", sconstdata, econstdata, sdata, edata);
+               doconstro = FALSE;
+       }
+
+       if (doconstro)
+               kprintf("Marking const DATA read-only\n");
+
+       vm_offset_t dva;
+
+       for (dva = sdata; dva < edata; dva += I386_PGBYTES) {
+               assert(((sdata | edata) & PAGE_MASK) == 0);
+               if ( (sdata | edata) & PAGE_MASK) {
+                       kprintf("DATA misaligned, 0x%lx, 0x%lx\n", sdata, edata);
+                       break;
+               }
+
+               pt_entry_t dpte, *dptep = pmap_pte(kernel_pmap, dva);
+
+               dpte = *dptep;
+
+               assert((dpte & INTEL_PTE_VALID));
+               if ((dpte & INTEL_PTE_VALID) == 0) {
+                       kprintf("Missing data mapping 0x%lx 0x%lx 0x%lx\n", dva, sdata, edata);
+                       continue;
+               }
+
+               dpte |= INTEL_PTE_NX;
+               if (doconstro && (dva >= sconstdata) && (dva < econstdata)) {
+                       dpte &= ~INTEL_PTE_WRITE;
+               }
+               pmap_store_pte(dptep, dpte);
+       }
+       kernel_segment_command_t * seg;
+       kernel_section_t         * sec;
+
+       for (seg = firstseg(); seg != NULL; seg = nextsegfromheader(&_mh_execute_header, seg)) {
+               if (!strcmp(seg->segname, "__TEXT") ||
+                   !strcmp(seg->segname, "__DATA")) {
+                       continue;
+               }
+               //XXX
+               if (!strcmp(seg->segname, "__KLD")) {
+                       continue;
+               }
+               if (!strcmp(seg->segname, "__HIB")) {
+                       for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) {
+                               if (sec->addr & PAGE_MASK)
+                                       panic("__HIB segment's sections misaligned");
+                               if (!strcmp(sec->sectname, "__text")) {
+                                       pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), FALSE, TRUE);
+                               } else {
+                                       pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), TRUE, FALSE);
+                               }
+                       }
+               } else {
+                       pmap_mark_range(kernel_pmap, seg->vmaddr, round_page_64(seg->vmsize), TRUE, FALSE);
+               }
+       }
+
+       /*
+        * If we're debugging, map the low global vector page at the fixed
+        * virtual address.  Otherwise, remove the mapping for this.
+        */
+       if (debug_boot_arg) {
+               pt_entry_t *pte = NULL;
+               if (0 == (pte = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS)))
+                       panic("lowmem pte");
+               /* make sure it is defined on page boundary */
+               assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
+               pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
+                                       | INTEL_PTE_REF
+                                       | INTEL_PTE_MOD
+                                       | INTEL_PTE_WIRED
+                                       | INTEL_PTE_VALID
+                                       | INTEL_PTE_WRITE
+                                       | INTEL_PTE_NX);
+       } else {
+               pmap_remove(kernel_pmap,
+                           LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE);
+       }
+       
        splx(spl);
        if (pmap_pcid_ncpus)
                tlb_flush_global();
@@ -908,6 +1056,7 @@ pmap_is_empty(
  */
 pmap_t
 pmap_create(
+       ledger_t                ledger,
            vm_map_size_t       sz,
            boolean_t           is_64bit)
 {
@@ -942,10 +1091,13 @@ pmap_create(
        p->ref_count = 1;
        p->nx_enabled = 1;
        p->pm_shared = FALSE;
+       ledger_reference(ledger);
+       p->ledger = ledger;
 
        p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
        if (pmap_pcid_ncpus)
                pmap_pcid_initialize(p);
+
        p->pm_pml4 = zalloc(pmap_anchor_zone);
 
        pmap_assert((((uintptr_t)p->pm_pml4) & PAGE_MASK) == 0);
@@ -973,7 +1125,7 @@ pmap_create(
        kpml4 = kernel_pmap->pm_pml4;
        pml4[KERNEL_PML4_INDEX]    = kpml4[KERNEL_PML4_INDEX];
        pml4[KERNEL_KEXTS_INDEX]   = kpml4[KERNEL_KEXTS_INDEX];
-       pml4[KERNEL_PHYSMAP_INDEX] = kpml4[KERNEL_PHYSMAP_INDEX];
+       pml4[KERNEL_PHYSMAP_PML4_INDEX] = kpml4[KERNEL_PHYSMAP_PML4_INDEX];
 
        PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
                   p, is_64bit, 0, 0, 0);
@@ -1043,8 +1195,8 @@ pmap_destroy(pmap_t       p)
        vm_object_deallocate(p->pm_obj);
 
        OSAddAtomic(-inuse_ptepages,  &inuse_ptepages_count);
-       PMAP_ZINFO_PFREE(inuse_ptepages * PAGE_SIZE);
-
+       PMAP_ZINFO_PFREE(p, inuse_ptepages * PAGE_SIZE);
+       ledger_dereference(p->ledger);
        zfree(pmap_zone, p);
 
        PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
@@ -1079,7 +1231,6 @@ pmap_remove_some_phys(
 
 }
 
-
 /*
  *     Set the physical protection on the
  *     specified range of this map as requested.
@@ -1142,19 +1293,14 @@ pmap_protect(
                                        continue;
 
                                if (prot & VM_PROT_WRITE)
-                                       pmap_update_pte(spte, *spte,
-                                               *spte | INTEL_PTE_WRITE);
+                                       pmap_update_pte(spte, 0, INTEL_PTE_WRITE);
                                else
-                                       pmap_update_pte(spte, *spte,
-                                               *spte & ~INTEL_PTE_WRITE);
+                                       pmap_update_pte(spte, INTEL_PTE_WRITE, 0);
 
                                if (set_NX)
-                                       pmap_update_pte(spte, *spte,
-                                               *spte | INTEL_PTE_NX);
+                                       pmap_update_pte(spte, 0, INTEL_PTE_NX);
                                else
-                                       pmap_update_pte(spte, *spte,
-                                               *spte & ~INTEL_PTE_NX);
-
+                                       pmap_update_pte(spte, INTEL_PTE_NX, 0);
                                num_found++;
                        }
                }
@@ -1190,17 +1336,17 @@ pmap_map_block(
                cur_page_size =  PAGE_SIZE;
 
        for (page = 0; page < size; page+=cur_page_size/PAGE_SIZE) {
-               pmap_enter(pmap, va, pa, prot, attr, TRUE);
+               pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
                va += cur_page_size;
                pa+=cur_page_size/PAGE_SIZE;
        }
 }
 
-
-void
+kern_return_t
 pmap_expand_pml4(
        pmap_t          map,
-       vm_map_offset_t vaddr)
+       vm_map_offset_t vaddr,
+       unsigned int options)
 {
        vm_page_t       m;
        pmap_paddr_t    pa;
@@ -1213,9 +1359,11 @@ pmap_expand_pml4(
        /*
         *      Allocate a VM page for the pml4 page
         */
-       while ((m = vm_page_grab()) == VM_PAGE_NULL)
+       while ((m = vm_page_grab()) == VM_PAGE_NULL) {
+               if (options & PMAP_EXPAND_OPTIONS_NOWAIT)
+                       return KERN_RESOURCE_SHORTAGE;
                VM_PAGE_WAIT();
-
+       }
        /*
         *      put the page into the pmap's obj list so it
         *      can be found later.
@@ -1235,7 +1383,7 @@ pmap_expand_pml4(
 
        OSAddAtomic(1,  &inuse_ptepages_count);
        OSAddAtomic64(1,  &alloc_ptepages_count);
-       PMAP_ZINFO_PALLOC(PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
 
        /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
        vm_object_lock(map->pm_obj_pml4);
@@ -1251,8 +1399,8 @@ pmap_expand_pml4(
                VM_PAGE_FREE(m);
 
                OSAddAtomic(-1,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(PAGE_SIZE);
-               return;
+               PMAP_ZINFO_PFREE(map, PAGE_SIZE);
+               return KERN_SUCCESS;
        }
 
 #if 0 /* DEBUG */
@@ -1276,13 +1424,11 @@ pmap_expand_pml4(
 
        PMAP_UNLOCK(map);
 
-       return;
+       return KERN_SUCCESS;
 }
 
-void
-pmap_expand_pdpt(
-                pmap_t map,
-                vm_map_offset_t vaddr)
+kern_return_t
+pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options)
 {
        vm_page_t       m;
        pmap_paddr_t    pa;
@@ -1293,14 +1439,19 @@ pmap_expand_pdpt(
        DBG("pmap_expand_pdpt(%p,%p)\n", map, (void *)vaddr);
 
        while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
-               pmap_expand_pml4(map, vaddr);
+               kern_return_t pep4kr = pmap_expand_pml4(map, vaddr, options);
+               if (pep4kr != KERN_SUCCESS)
+                       return pep4kr;
        }
 
        /*
         *      Allocate a VM page for the pdpt page
         */
-       while ((m = vm_page_grab()) == VM_PAGE_NULL)
+       while ((m = vm_page_grab()) == VM_PAGE_NULL) {
+               if (options & PMAP_EXPAND_OPTIONS_NOWAIT)
+                       return KERN_RESOURCE_SHORTAGE;
                VM_PAGE_WAIT();
+       }
 
        /*
         *      put the page into the pmap's obj list so it
@@ -1321,7 +1472,7 @@ pmap_expand_pdpt(
 
        OSAddAtomic(1,  &inuse_ptepages_count);
        OSAddAtomic64(1,  &alloc_ptepages_count);
-       PMAP_ZINFO_PALLOC(PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
 
        /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
        vm_object_lock(map->pm_obj_pdpt);
@@ -1337,8 +1488,8 @@ pmap_expand_pdpt(
                VM_PAGE_FREE(m);
 
                OSAddAtomic(-1,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(PAGE_SIZE);
-               return;
+               PMAP_ZINFO_PFREE(map, PAGE_SIZE);
+               return KERN_SUCCESS;
        }
 
 #if 0 /* DEBUG */
@@ -1362,7 +1513,7 @@ pmap_expand_pdpt(
 
        PMAP_UNLOCK(map);
 
-       return;
+       return KERN_SUCCESS;
 
 }
 
@@ -1383,10 +1534,11 @@ pmap_expand_pdpt(
  *     has been expanded enough.
  *     (We won't loop forever, since page tables aren't shrunk.)
  */
-void
+kern_return_t
 pmap_expand(
        pmap_t          map,
-       vm_map_offset_t vaddr)
+       vm_map_offset_t vaddr,
+       unsigned int options)
 {
        pt_entry_t              *pdp;
        register vm_page_t      m;
@@ -1406,15 +1558,19 @@ pmap_expand(
 
 
        while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
-               /* need room for another pde entry */
-               pmap_expand_pdpt(map, vaddr);
+               kern_return_t pepkr = pmap_expand_pdpt(map, vaddr, options);
+               if (pepkr != KERN_SUCCESS)
+                       return pepkr;
        }
 
        /*
         *      Allocate a VM page for the pde entries.
         */
-       while ((m = vm_page_grab()) == VM_PAGE_NULL)
+       while ((m = vm_page_grab()) == VM_PAGE_NULL) {
+               if (options & PMAP_EXPAND_OPTIONS_NOWAIT)
+                       return KERN_RESOURCE_SHORTAGE;
                VM_PAGE_WAIT();
+       }
 
        /*
         *      put the page into the pmap's obj list so it
@@ -1435,7 +1591,7 @@ pmap_expand(
 
        OSAddAtomic(1,  &inuse_ptepages_count);
        OSAddAtomic64(1,  &alloc_ptepages_count);
-       PMAP_ZINFO_PALLOC(PAGE_SIZE);
+       PMAP_ZINFO_PALLOC(map, PAGE_SIZE);
 
        /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
        vm_object_lock(map->pm_obj);
@@ -1452,8 +1608,8 @@ pmap_expand(
                VM_PAGE_FREE(m);
 
                OSAddAtomic(-1,  &inuse_ptepages_count);
-               PMAP_ZINFO_PFREE(PAGE_SIZE);
-               return;
+               PMAP_ZINFO_PFREE(map, PAGE_SIZE);
+               return KERN_SUCCESS;
        }
 
 #if 0 /* DEBUG */
@@ -1476,7 +1632,7 @@ pmap_expand(
 
        PMAP_UNLOCK(map);
 
-       return;
+       return KERN_SUCCESS;
 }
 
 /* On K64 machines with more than 32GB of memory, pmap_steal_memory
@@ -1658,7 +1814,7 @@ pmap_collect(
                        VM_PAGE_FREE(m);
 
                        OSAddAtomic(-1,  &inuse_ptepages_count);
-                       PMAP_ZINFO_PFREE(PAGE_SIZE);
+                       PMAP_ZINFO_PFREE(p, PAGE_SIZE);
                    }
 
                    PMAP_LOCK(p);
@@ -1710,7 +1866,6 @@ pmap_pageable(
 #endif /* lint */
 }
 
-
 void 
 invalidate_icache(__unused vm_offset_t addr,
                  __unused unsigned     cnt,
@@ -2015,6 +2170,10 @@ pmap_flush_tlbs(pmap_t   pmap, vm_map_offset_t startv, vm_map_offset_t endv)
                }
        }
 
+       if (__improbable((pmap == kernel_pmap) && (flush_self != TRUE))) {
+               panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map);
+       }
+
        PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
            pmap, cpus_to_signal, startv, endv, 0);
 }
@@ -2054,3 +2213,114 @@ pmap_update_interrupt(void)
         PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
                   0, 0, 0, 0, 0);
 }
+
+#include <mach/mach_vm.h>      /* mach_vm_region_recurse() */
+/* Scan kernel pmap for W+X PTEs, scan kernel VM map for W+X map entries
+ * and identify ranges with mismatched VM permissions and PTE permissions
+ */
+kern_return_t
+pmap_permissions_verify(pmap_t ipmap, vm_map_t ivmmap, vm_offset_t sv, vm_offset_t ev) {
+       vm_offset_t cv = sv;
+       kern_return_t rv = KERN_SUCCESS;
+       uint64_t skip4 = 0, skip2 = 0;
+
+       sv &= ~PAGE_MASK_64;
+       ev &= ~PAGE_MASK_64;
+       while (cv < ev) {
+               if (__improbable((cv > 0x00007FFFFFFFFFFFULL) &&
+                       (cv < 0xFFFF800000000000ULL))) {
+                       cv = 0xFFFF800000000000ULL;
+               }
+               /* Potential inconsistencies from not holding pmap lock
+                * but harmless for the moment.
+                */
+               if (((cv & PML4MASK) == 0) && (pmap64_pml4(ipmap, cv) == 0)) {
+                       if ((cv + NBPML4) > cv)
+                               cv += NBPML4;
+                       else
+                               break;
+                       skip4++;
+                       continue;
+               }
+               if (((cv & PDMASK) == 0) && (pmap_pde(ipmap, cv) == 0)) {
+                       if ((cv + NBPD) > cv)
+                               cv += NBPD;
+                       else
+                               break;
+                       skip2++;
+                       continue;
+               }
+
+               pt_entry_t *ptep = pmap_pte(ipmap, cv);
+               if (ptep && (*ptep & INTEL_PTE_VALID)) {
+                       if (*ptep & INTEL_PTE_WRITE) {
+                               if (!(*ptep & INTEL_PTE_NX)) {
+                                       kprintf("W+X PTE at 0x%lx, P4: 0x%llx, P3: 0x%llx, P2: 0x%llx, PT: 0x%llx, VP: %u\n", cv, *pmap64_pml4(ipmap, cv), *pmap64_pdpt(ipmap, cv), *pmap64_pde(ipmap, cv), *ptep, pmap_valid_page((ppnum_t)(i386_btop(pte_to_pa(*ptep)))));
+                                       rv = KERN_FAILURE;
+                               }
+                       }
+               }
+               cv += PAGE_SIZE;
+       }
+       kprintf("Completed pmap scan\n");
+       cv = sv;
+
+       struct vm_region_submap_info_64 vbr;
+       mach_msg_type_number_t vbrcount = 0;
+       mach_vm_size_t  vmsize;
+       vm_prot_t       prot;
+       uint32_t nesting_depth = 0;
+       kern_return_t kret;
+       
+       while (cv < ev) {
+               
+               for (;;) {
+                       vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
+                       if((kret = mach_vm_region_recurse(ivmmap, 
+                                   (mach_vm_address_t *) &cv, &vmsize, &nesting_depth, 
+                                       (vm_region_recurse_info_t)&vbr,
+                                       &vbrcount)) != KERN_SUCCESS) {
+                               break;
+                       }
+
+                       if(vbr.is_submap) {
+                               nesting_depth++;
+                               continue;
+                       } else {
+                               break;
+                       }
+               }
+
+               if(kret != KERN_SUCCESS)
+                       break;
+
+               prot = vbr.protection;
+
+               if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
+                       kprintf("W+X map entry at address 0x%lx\n", cv);
+                       rv = KERN_FAILURE;
+               }
+
+               if (prot) {
+                       vm_offset_t pcv;
+                       for (pcv = cv; pcv < cv + vmsize; pcv += PAGE_SIZE) {
+                               pt_entry_t *ptep = pmap_pte(ipmap, pcv);
+                               vm_prot_t tprot;
+
+                               if ((ptep == NULL) || !(*ptep & INTEL_PTE_VALID))
+                                       continue;
+                               tprot = VM_PROT_READ;
+                               if (*ptep & INTEL_PTE_WRITE)
+                                       tprot |= VM_PROT_WRITE;
+                               if ((*ptep & INTEL_PTE_NX) == 0)
+                                       tprot |= VM_PROT_EXECUTE;
+                               if (tprot != prot) {
+                                       kprintf("PTE/map entry permissions mismatch at address 0x%lx, pte: 0x%llx, protection: 0x%x\n", pcv, *ptep, prot);
+                                       rv = KERN_FAILURE;
+                               }
+                       }
+               }
+               cv += vmsize;
+       }
+       return rv;
+}
index 8ca246de3c1c9ed18996a46ff790df3758556b7d..22045a2d526eb03171c96662aeb3b834f86429e1 100644 (file)
@@ -57,7 +57,7 @@
  */
 
 #include <platforms.h>
-#include <mach_kdb.h>
+#include <debug.h>
 
 #include <i386/asm.h>
 #include <i386/proc_reg.h>
 
 /*
  * Interrupt and bootup stack for initial processor.
+ * Note: we switch to a dynamically allocated interrupt stack once VM is up.
  */
 
-       /* in the __HIB section since the hibernate restore code uses this stack. */
+/* in the __HIB section since the hibernate restore code uses this stack. */
        .section __HIB, __data
        .align  12
 
@@ -95,17 +96,6 @@ EXT(gIOHibernateRestoreStackEnd):
 
        .section __DATA, __data
 
-/*
- * Stack for last-gasp double-fault handler.
- */
-       .align  12
-       .globl  EXT(df_task_stack)
-EXT(df_task_stack):
-       .space  INTSTACK_SIZE
-       .globl  EXT(df_task_stack_end)
-EXT(df_task_stack_end):
-
-
 /*
  * Stack for machine-check handler.
  */
@@ -116,6 +106,25 @@ EXT(mc_task_stack):
        .globl  EXT(mc_task_stack_end)
 EXT(mc_task_stack_end):
 
+       /* Must not clobber EDI */
+#define SWITCH_TO_64BIT_MODE                                    \
+       movl    $(CR4_PAE),%eax         /* enable PAE */        ;\
+       movl    %eax,%cr4                                       ;\
+       movl    $MSR_IA32_EFER,%ecx                             ;\
+       rdmsr                                                   ;\
+       /* enable long mode, NX */                              ;\
+       orl     $(MSR_IA32_EFER_LME | MSR_IA32_EFER_NXE),%eax   ;\
+       wrmsr                                                   ;\
+       movl    $EXT(BootPML4),%eax                             ;\
+       movl    %eax,%cr3                                       ;\
+       movl    %cr0,%eax                                       ;\
+       orl     $(CR0_PG|CR0_WP),%eax   /* enable paging */     ;\
+       movl    %eax,%cr0                                       ;\
+       /* "The Aussie Maneuver" ("Myria" variant) */           ;\
+       pushl $(0xcb<<24)|KERNEL64_CS /* reload CS with 0x08 */ ;\
+       call .-1                                                ;\
+       .code64
+
 /*
  * BSP CPU start here.
  *     eax points to kernbootstruct
@@ -123,51 +132,17 @@ EXT(mc_task_stack_end):
  * Environment:
  *     protected mode, no paging, flat 32-bit address space.
  *     (Code/data/stack segments have base == 0, limit == 4G)
- */
-
-#define SWITCH_TO_64BIT_MODE \
-       movl    $(CR4_PAE),%eax         /* enable PAE */                ;\
-       movl    %eax,%cr4                                                                       ;\
-       movl    $MSR_IA32_EFER,%ecx                                                     ;\
-       rdmsr                                                                                           ;\
-       orl     $MSR_IA32_EFER_LME,%eax /* enable long mode */  ;\
-       wrmsr                                                                                           ;\
-       movl    $INITPT_SEG_BASE,%eax                                           ;\
-       movl    %eax,%cr3                                                                       ;\
-       movl    %cr0,%eax                                                                       ;\
-       orl     $(CR0_PG|CR0_WP),%eax   /* enable paging */             ;\
-       movl    %eax,%cr0                                                                       ;\
-       /* "The Aussie Maneuver" ("Myria" variant) */           ;\
-       pushl $(0xcb<<24)|KERNEL64_CS /* reload CS with 0x08 */ ;\
-       call .-1                                                                                        ;\
-       .code64
-
-/*
- * [ We used to have a reason for the following statement; ]
- * [ but the issue has been fixed. The line is true        ]
- * [ nevertheless, therefore it should remain there.       ]
- * This proves that Little Endian is superior to Big Endian.
  */
        
+.code32
        .text
+       .section __HIB, __text
        .align  ALIGN
        .globl  EXT(_start)
-       .globl  EXT(_pstart)
+       .globl  EXT(pstart)
 LEXT(_start)
-LEXT(_pstart)
-
-       .code32
+LEXT(pstart)
 
-#if 0
-       mov $0x3f8, %dx
-       mov $0x4D, %al; out %al, %dx
-       mov $0x49, %al; out %al, %dx
-       mov $0x53, %al; out %al, %dx
-       mov $0x54, %al; out %al, %dx
-       mov $0x0D, %al; out %al, %dx
-       mov $0x0A, %al; out %al, %dx
-#endif
-       
 /*
  * Here we do the minimal setup to switch from 32 bit mode to 64 bit long mode.
  *
@@ -177,8 +152,13 @@ LEXT(_pstart)
  *     |                       |
  *     | Kernel text/data      |
  *     |                       |
- *     ------------------------- Kernel start addr
+ *     |-----------------------| Kernel text base addr - 2MB-aligned
+ *     | padding               |
+ *     |-----------------------|
+ *     | __HIB section         |
+ *     |-----------------------| Page-aligned
  *     |                       |
+ *     | padding               |
  *     |                       |
  *     ------------------------- 0
  *
@@ -186,14 +166,31 @@ LEXT(_pstart)
        mov     %eax, %edi      /* save kernbootstruct */
 
        /* Use low 32-bits of address as 32-bit stack */
-       movl $EXT(low_eintstack), %esp
+       movl    $EXT(low_eintstack), %esp
        
+       POSTCODE(PSTART_ENTRY)
+
        /*
         * Set up segmentation
         */
        movl    $EXT(protected_mode_gdtr), %eax
        lgdtl   (%eax)
 
+       /*
+        * Rebase Boot page tables to kernel base address.
+        */
+       movl    $EXT(BootPML4), %eax                    // Level 4:
+       add     %eax, 0*8+0(%eax)                       //  - 1:1
+       add     %eax, KERNEL_PML4_INDEX*8+0(%eax)       //  - kernel space
+
+       movl    $EXT(BootPDPT), %edx                    // Level 3:
+       add     %eax, 0*8+0(%edx)
+       add     %eax, 1*8+0(%edx)
+       add     %eax, 2*8+0(%edx)
+       add     %eax, 3*8+0(%edx)
+
+       POSTCODE(PSTART_REBASE)
+
 /* the following code is shared by the master CPU and all slave CPUs */
 L_pstart_common:
        /*
@@ -209,16 +206,56 @@ L_pstart_common:
        mov     %ax, %fs
        mov     %ax, %gs
 
+       test    %edi, %edi /* Populate stack canary on BSP */
+       jz      Lvstartshim
+
+       mov     $1, %eax
+       cpuid
+       test    $(1 << 30), %ecx
+       jz      Lnon_rdrand
+       RDRAND_RAX              /* RAX := 64 bits of DRBG entropy */
+       jnc     Lnon_rdrand     /* TODO: complain if DRBG fails at this stage */
+
+Lstore_random_guard:
+       xor     %ah, %ah        /* Security: zero second byte of stack canary */
+       movq    %rax, ___stack_chk_guard(%rip)
+       /* %edi = boot_args_start if BSP */
+Lvstartshim:   
+
+       POSTCODE(PSTART_VSTART)
+
        /* %edi = boot_args_start */
        
-       leaq _vstart(%rip), %rcx
-       movq $0xffffff8000000000, %rax  /* adjust the pointer to be up high */
-       or %rax, %rsp                   /* and stack pointer up there too */
-       or %rcx, %rax
-       andq $0xfffffffffffffff0, %rsp  /* align stack */
-       xorq %rbp, %rbp                 /* zero frame pointer */
-       callq *%rax
-
+       leaq    _vstart(%rip), %rcx
+       movq    $0xffffff8000000000, %rax       /* adjust pointer up high */
+       or      %rax, %rsp                      /* and stack pointer up there */
+       or      %rcx, %rax
+       andq    $0xfffffffffffffff0, %rsp       /* align stack */
+       xorq    %rbp, %rbp                      /* zero frame pointer */
+       callq   *%rax
+
+Lnon_rdrand:
+       rdtsc /* EDX:EAX := TSC */
+       /* Distribute low order bits */
+       mov     %eax, %ecx
+       xor     %al, %ah
+       shl     $16, %rcx
+       xor     %rcx, %rax
+       xor     %eax, %edx
+
+       /* Incorporate ASLR entropy, if any */
+       lea     (%rip), %rcx
+       shr     $21, %rcx
+       movzbl  %cl, %ecx
+       shl     $16, %ecx
+       xor     %ecx, %edx
+
+       mov     %ah, %cl
+       ror     %cl, %edx /* Right rotate EDX (TSC&0xFF ^ (TSC>>8 & 0xFF))&1F */
+       shl     $32, %rdx
+       xor     %rdx, %rax
+       mov     %cl, %al
+       jmp     Lstore_random_guard
 /*
  * AP (slave) CPUs enter here.
  *
@@ -232,18 +269,11 @@ LEXT(slave_pstart)
        .code32
        cli                             /* disable interrupts, so we don`t */
                                        /* need IDT for a while */
-       POSTCODE(SLAVE_PSTART_ENTRY)
+       POSTCODE(SLAVE_PSTART)
 
        movl    $EXT(mp_slave_stack) + PAGE_SIZE, %esp
 
-       /* set up identity mapping of page tables */
-       movl    $INITPT_SEG_BASE,%eax
-       movl    (KERNEL_PML4_INDEX*8)(%eax), %esi
-       movl    %esi, (0)(%eax)
-       movl    (KERNEL_PML4_INDEX*8+4)(%eax), %esi
-       movl    %esi, (0+4)(%eax)
-
-       movl    $0, %edi                /* "no kernbootstruct" */
+       xor     %edi, %edi              /* AP, no "kernbootstruct" */
 
        jmp     L_pstart_common         /* hop a ride to vstart() */
 
@@ -252,13 +282,13 @@ LEXT(slave_pstart)
 
 .section __HIB, __text
 /*
-This code is linked into the kernel but part of the "__HIB" section, which means
-its used by code running in the special context of restoring the kernel text and data
-from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything
-it calls or references (ie. hibernate_restore_phys_page())
-needs to be careful to only touch memory also in the "__HIB" section.
-*/
-
+ * This code is linked into the kernel but part of the "__HIB" section,
+ * which means it's used by code running in the special context of restoring
+ * the kernel text and data from the hibernation image read by the booter.
+ * hibernate_kernel_entrypoint() and everything it calls or references
+ * (ie. hibernate_restore_phys_page()) needs to be careful to only touch
+ * memory also in the "__HIB" section.
+ */
 
        .align  ALIGN
        .globl  EXT(hibernate_machine_entrypoint)
@@ -266,54 +296,35 @@ needs to be careful to only touch memory also in the "__HIB" section.
 LEXT(hibernate_machine_entrypoint)
        movl    %eax, %edi /* regparm(1) calling convention */
 
-       /* restore gdt */
-       mov     $(SLEEP_SEG_BASE)+20, %eax // load saved_gdt, this may break
+       /* Use low 32-bits of address as 32-bit stack */
+       movl $EXT(low_eintstack), %esp
+       
+       /*
+        * Set up GDT
+        */
+       movl    $EXT(master_gdtr), %eax
        lgdtl   (%eax)
 
-       /* setup the protected mode segment registers */
-       mov             $KERNEL_DS, %eax
-       movw    %ax, %ds
-       movw    %ax, %es
-       movw    %ax, %ss
-       xor             %eax,%eax
-       movw    %ax, %fs
-       movw    %ax, %gs
+       /* Switch to 64-bit on the Boot PTs */
+       SWITCH_TO_64BIT_MODE
 
-       /* set up the page tables to use BootstrapPTD 
-        * as done in idle_pt.c, but this must be done programatically */
-       mov $(INITPT_SEG_BASE + PAGE_SIZE), %eax
-       mov $(INITPT_SEG_BASE + 2*PAGE_SIZE | INTEL_PTE_WRITE | INTEL_PTE_VALID), %ecx
-       mov $0x0, %edx
-       mov     %ecx, (0*8+0)(%eax)
-       mov %edx, (0*8+4)(%eax)
-       add     $(PAGE_SIZE), %ecx
-       mov %ecx, (1*8+0)(%eax)
-       mov %edx, (1*8+4)(%eax)
-       add     $(PAGE_SIZE), %ecx
-       mov %ecx, (2*8+0)(%eax)
-       mov %edx, (2*8+4)(%eax)
-       add     $(PAGE_SIZE), %ecx
-       mov %ecx, (3*8+0)(%eax)
-       mov %edx, (3*8+4)(%eax)
-
-       /* Temporary stack */
-       mov     $(REAL_MODE_BOOTSTRAP_OFFSET + PROT_MODE_START), %esp
+       leaq    EXT(hibernate_kernel_entrypoint)(%rip),%rcx
 
-       SWITCH_TO_64BIT_MODE
+       /* adjust the pointers to be up high */
+       movq    $0xffffff8000000000, %rax
+       orq     %rax, %rsp
+       orq     %rcx, %rax
 
-       leaq EXT(hibernate_kernel_entrypoint)(%rip),%rcx
-       leaq EXT(gIOHibernateRestoreStackEnd)(%rip),%rsp        /* switch to the bootup stack */
-       movq $0xffffff8000000000, %rax  /* adjust the pointer to be up high */
-       orq %rax, %rsp                  /* and stack pointer up there too :D */
-       orq %rcx, %rax                  /* put entrypoint in %rax */
        /* %edi is already filled with header pointer */
-       xorl %esi, %esi /* zero 2nd arg */
-       xorl %edx, %edx /* zero 3rd arg */
-       xorl %ecx, %ecx /* zero 4th arg */
-       andq $0xfffffffffffffff0, %rsp  /* align stack */
-                                       /* (future-proofing, stack should already be aligned) */
-       xorq %rbp, %rbp                 /* zero frame pointer */
-       call *%rax /* call instead of jmp to keep the required stack alignment */
+       xorl    %esi, %esi                      /* zero 2nd arg */
+       xorl    %edx, %edx                      /* zero 3rd arg */
+       xorl    %ecx, %ecx                      /* zero 4th arg */
+       andq    $0xfffffffffffffff0, %rsp       /* align stack */
+
+       /* call instead of jmp to keep the required stack alignment */
+       xorq    %rbp, %rbp                      /* zero frame pointer */
+       call    *%rax
+
        /* NOTREACHED */
        hlt
 
@@ -325,41 +336,11 @@ LEXT(hibernate_machine_entrypoint)
 #include <i386/acpi.h>
 
 
-
-
-#define        PA(addr)        (addr)
-
 /*
  * acpi_wake_start
- *
- * The code from acpi_wake_start to acpi_wake_end is copied to
- * memory below 1MB.  The firmware waking vector is updated to
- * point at acpi_wake_start in low memory before sleeping.
  */
 
 .section __TEXT,__text
-.text
-.align 12      /* Page align for single bcopy_phys() */
-.code32
-.globl EXT(acpi_wake_prot)
-EXT(acpi_wake_prot):
-       /* protected mode, paging disabled */
-
-       /* jump to acpi_temp_alloc (stored in saved_tmp) */
-       mov $(SLEEP_SEG_BASE)+16, %eax 
-       mov (%eax), %ecx // Load acpi_temp_reloc from saved_eip
-       jmp     *%ecx
-acpi_temp_reloc:
-       mov $(SLEEP_SEG_BASE)+16, %esp  /* setup stack for 64bit */
-
-       SWITCH_TO_64BIT_MODE
-
-       lea Lwake_64(%rip), %rax
-       movq $0xffffff8000000000, %rdx
-       orq     %rdx, %rax
-       jmp *%rax
-.code32
-
 .code64
 
 /*
@@ -404,6 +385,8 @@ ENTRY(acpi_sleep_cpu)
        mov     %rax, saved_cr0(%rip)
        mov     %cr2, %rax
        mov     %rax, saved_cr2(%rip)
+       mov     %cr3, %rax
+       mov     %rax, saved_cr3(%rip)
        mov     %cr4, %rax
        mov     %rax, saved_cr4(%rip)
 
@@ -431,13 +414,6 @@ ENTRY(acpi_sleep_cpu)
        sidt    saved_idt(%rip)
        str     saved_tr(%rip)
 
-       /*
-        * When system wakes up, the real mode wake handler will revert to
-        * protected mode, then jump to the address stored at saved_eip.
-        */
-       leaq    acpi_temp_reloc(%rip), %rax
-       mov             %eax, saved_eip(%rip)
-
        /*
         * Call ACPI function provided by the caller to sleep the platform.
         * This call will not return on success.
@@ -449,48 +425,47 @@ ENTRY(acpi_sleep_cpu)
        /* sleep failed, no cpu context lost */
        jmp     wake_restore
 
+.section __HIB, __text
+.code32
+.globl EXT(acpi_wake_prot)
+EXT(acpi_wake_prot):
+       /* protected mode, paging disabled */
+       movl    $EXT(low_eintstack), %esp
+
+       SWITCH_TO_64BIT_MODE
+
+       jmp     Lwake_64
+
+.section __TEXT,__text
+.code64
+
 .globl EXT(acpi_wake_prot_entry)
 EXT(acpi_wake_prot_entry):
        POSTCODE(ACPI_WAKE_PROT_ENTRY)
-       /* Entry from the hibernate code in iokit/Kernel/IOHibernateRestoreKernel.c
-        *
-        * Reset the first 4 PDE's to point to entries in IdlePTD, as done in
-        * Idle_PTs_init() during startup */
-       leaq    _IdlePDPT(%rip), %rax
-       movq    _IdlePTD(%rip), %rcx
-       mov             %ecx, %ecx /* zero top 32bits of %rcx */
-       orq             $(INTEL_PTE_WRITE|INTEL_PTE_VALID), %rcx
-       movq    %rcx, 0x0(%rax)
-       add             $0x1000, %rcx
-       movq    %rcx, 0x8(%rax)
-       add             $0x1000, %rcx
-       movq    %rcx, 0x10(%rax)
-       add             $0x1000, %rcx
-       movq    %rcx, 0x18(%rax)
-       mov     %cr3, %rax
-       mov     %rax, %cr3
-       
+       /* Return from hibernate code in iokit/Kernel/IOHibernateRestoreKernel.c
+        */
 Lwake_64:
        /*
         * restore cr4, PAE and NXE states in an orderly fashion
         */
-       mov             saved_cr4(%rip), %rcx
-       mov             %rcx, %cr4
-
-       mov             $(MSR_IA32_EFER), %ecx                  /* MSR number in ecx */
-       rdmsr                                           /* MSR value return in edx: eax */
-       or              $(MSR_IA32_EFER_NXE), %eax              /* Set NXE bit in low 32-bits */
-       wrmsr                                           /* Update Extended Feature Enable reg */
+       mov     saved_cr4(%rip), %rcx
+       mov     %rcx, %cr4
 
-       /* restore kernel GDT */
-       lgdt    EXT(protected_mode_gdtr)(%rip)
+       mov     $(MSR_IA32_EFER), %ecx          /* MSR number in ecx */
+       rdmsr                                   /* MSR value in edx:eax */
+       or      $(MSR_IA32_EFER_NXE), %eax      /* Set NXE bit in low 32-bits */
+       wrmsr                                   /* Update */
 
        movq    saved_cr2(%rip), %rax
-       mov             %rax, %cr2
+       mov     %rax, %cr2
 
        /* restore CR0, paging enabled */
-       mov             saved_cr0(%rip), %rax
-       mov             %rax, %cr0
+       mov     saved_cr0(%rip), %rax
+       mov     %rax, %cr0
+
+       /* restore the page tables */
+       mov     saved_cr3(%rip), %rax
+       mov     %rax, %cr3
 
        /* protected mode, paging enabled */
        POSTCODE(ACPI_WAKE_PAGED_ENTRY)
@@ -500,7 +475,8 @@ Lwake_64:
        movw    %ax, %ss
        movw    %ax, %ds
 
-       /* restore local and interrupt descriptor tables */
+       /* restore descriptor tables */
+       lgdt    saved_gdt(%rip)
        lldt    saved_ldt(%rip)
        lidt    saved_idt(%rip)
 
@@ -580,7 +556,7 @@ wake_restore:
        .byte   0x15                            ;\
        .long   address-EXT(real_mode_bootstrap_base)
 
-.section __TEXT,__text
+.section __HIB, __text
 .align 12      /* Page align for single bcopy_phys() */
 .code32
 Entry(real_mode_bootstrap_base)
@@ -603,7 +579,7 @@ Entry(real_mode_bootstrap_base)
        movw    %ax, %ds
        movw    %ax, %es
        movw    %ax, %ss
-       xor             %eax,%eax
+       xor     %eax,%eax
        movw    %ax, %fs
        movw    %ax, %gs
 
@@ -613,20 +589,22 @@ Entry(real_mode_bootstrap_base)
        jmp     *%ecx
 
 Entry(protected_mode_gdtr)
-       .short  160             /* limit (8*6 segs) */
+       .short  160             /* limit (8*20 segs) */
        .quad   EXT(master_gdt)
 
 Entry(real_mode_bootstrap_end)
 
 /* Save area used across sleep/wake */
-.section __SLEEP, __data
+.section __HIB, __data
 .align 2
 
-temp_stack: .quad 0
-                       .quad 0
-saved_eip:     .long 0
+/* gdtr for real address of master_gdt in HIB (not the aliased address) */
+Entry(master_gdtr)                     
+               .word 160               /* limit (8*20 segs) */
+               .quad EXT(master_gdt)
+
 saved_gdt:     .word 0
-                       .quad 0
+               .quad 0
 saved_rsp:     .quad 0
 saved_es:      .word 0
 saved_fs:      .word 0
@@ -634,6 +612,7 @@ saved_gs:   .word 0
 saved_ss:      .word 0
 saved_cr0:     .quad 0
 saved_cr2:     .quad 0
+saved_cr3:     .quad 0
 saved_cr4:     .quad 0
 saved_idt:     .word 0
                .quad 0
index 7731f838826ec384fa8ec5ceea737d36db3a2672..83387854484fc132f968975756eca6b04c68b3fa 100644 (file)
@@ -85,6 +85,7 @@ ident         PEXPERT
 options                MACH_PE         # Objective-C support           # <mach_pe>
 options                MACH_KERNEL
 options                DEBUG           # general debugging code        # <debug>
+options                MACH_ASSERT     #                               # <mach_assert>
 options                CONFIG_DTRACE   # dtrace support                # <config_dtrace>
 
 options                PANIC_INFO      # want kernel panic info        # <panic_info>
index 9283af22659703ac9fcefaa34bb490edaf761875..9f0004250836b6acd4524fcbca64e1a69ae953c8 100644 (file)
@@ -5,7 +5,7 @@
 #
 #  RELEASE     = [ intel mach mach_pe panic_info config_dtrace ]
 #  PROFILE     = [ RELEASE profile ]
-#  DEBUG       = [ RELEASE debug ]
+#  DEBUG       = [ RELEASE debug mach_assert ]
 #
 #  EMBEDDED    = [ intel mach mach_pe panic_info ]
 #  DEVELOPMENT = [ EMBEDDED ]
index 06a9defdf38b6f3e83ad2e7de7d6116c431a2aab..482f105be649582515356661d27a84bbb4dcf9a0 100644 (file)
@@ -42,9 +42,11 @@ $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 
 do_all: $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile 
        $(_v)next_source=$(subst conf/,,$(SOURCE));                     \
+       next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH));         \
        ${MAKE} -C $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)      \
                MAKEFILES=$(TARGET)/$(PEXPERT_KERNEL_CONFIG)/Makefile   \
                SOURCE=$${next_source}                  \
+               RELATIVE_SOURCE_PATH=$${next_relsource}                 \
                TARGET=$(TARGET)                                        \
                INCL_MAKEDEP=FALSE      \
                KERNEL_CONFIG=$(PEXPERT_KERNEL_CONFIG)  \
index 3ad1855bc9f5c2d26f34aea8320df831030f83f1..8c11d39339ba3fa83b637d63e4a3fc7283f82bb2 100644 (file)
@@ -1,5 +1,4 @@
 #
-OPTIONS/mach_kdb                               optional mach_kdb
 OPTIONS/panic_info                             optional panic_info
 OPTIONS/config_dtrace           optional config_dtrace
 
index 6ca4fa102ab25a9afd0e105f5dba9ef944c56139..6bc636010b55c43fab84b22437c51064246ca81a 100644 (file)
 static boolean_t isargsep( char c);
 #if !CONFIG_EMBEDDED
 static int argstrcpy(char *from, char *to);
-#endif 
+#endif
 static int argstrcpy2(char *from,char *to, unsigned maxlen);
 static int argnumcpy(int val, void *to, unsigned maxlen);
 static int getval(char *s, int *val);
 
 extern int IODTGetDefault(const char *key, void *infoAddr, unsigned int infoSize);
 
+
 struct i24 {
        int32_t i24 : 24;
        int32_t _pad : 8;
@@ -71,7 +72,7 @@ PE_parse_boot_argn(
 {
        char *args;
        char *cp, c;
-       unsigned int i;
+       uintptr_t i;
        int val;
        boolean_t arg_boolean;
        boolean_t arg_found;
index dc3ea9ddc05f7d0a6a3830b13cbc4c1bb7a38143..d78bed0bf68ff0f6acef7319f99d43c175a4f4bd 100644 (file)
@@ -127,11 +127,18 @@ GetNextChild(RealDTEntry sibling)
 static const char *
 GetNextComponent(const char *cp, char *bp)
 {
+       size_t length = 0;
+       char *origbp = bp;
+
        while (*cp != 0) {
                if (*cp == kDTPathNameSeparator) {
                        cp++;
                        break;
                }
+               if (++length > kDTMaxEntryNameLength) {
+                       *origbp = '\0';
+                       return cp;
+               }
                *bp++ = *cp++;
        }
        *bp = 0;
index 21978a2a78831fe90be720a0a6b440427704cfe1..1ef6c7c7b3b21feb4fb15c29f4a9569ec3f8746f 100644 (file)
@@ -201,6 +201,15 @@ void PE_init_platform(boolean_t vm_initialized, void * _args)
         PE_state.video.v_scale      = (kBootArgsFlagHiDPI & args->flags) ? 2 : 1;
         strlcpy(PE_state.video.v_pixelFormat, "PPPPPPPP",
                sizeof(PE_state.video.v_pixelFormat));
+
+#ifdef  kBootArgsFlagHiDPI
+       if (args->flags & kBootArgsFlagHiDPI)
+                PE_state.video.v_scale = kPEScaleFactor2x;
+       else
+                PE_state.video.v_scale = kPEScaleFactor1x;
+#else
+       PE_state.video.v_scale = kPEScaleFactor1x;
+#endif
     }
 
     if (!vm_initialized) {
index e68a85588c713f22fb0f434f8792ac25acb531f8..d6fd3d9f8ff24beefdfb920ecf637ae46d42920e 100644 (file)
@@ -57,11 +57,11 @@ typedef char DTPropertyNameBuf[32];
 
 /* Entry Name Definitions (Entry Names are C-Strings)*/
 enum {
-       kDTMaxEntryNameLength           = 31    /* Max length of a C-String Entry Name (terminator not included) */
+       kDTMaxEntryNameLength           = 63    /* Max length of a C-String Entry Name (terminator not included) */
 };
 
 /* length of DTEntryNameBuf = kDTMaxEntryNameLength +1*/
-typedef char DTEntryNameBuf[32];
+typedef char DTEntryNameBuf[kDTMaxEntryNameLength+1];
 
 
 /* Entry*/
index b347de0e37093f9f1590976b855040e2cc4b81be..369c2c477747cf30ef6d29c9fe86008d0bbb16d0 100644 (file)
@@ -121,8 +121,9 @@ typedef struct boot_icon_element boot_icon_element;
 #define kBootArgsEfiMode32              32
 #define kBootArgsEfiMode64              64
 
-#define kBootArgsFlagRebootOnPanic     1
-#define kBootArgsFlagHiDPI             2
+/* Bitfields for boot_args->flags */
+#define kBootArgsFlagRebootOnPanic     (1 << 0)
+#define kBootArgsFlagHiDPI             (1 << 1)
 
 typedef struct boot_args {
     uint16_t    Revision;      /* Revision of boot_args structure */
@@ -152,7 +153,7 @@ typedef struct boot_args {
     uint64_t    efiRuntimeServicesVirtualPageStart; /* virtual address of defragmented runtime pages */
 
     uint32_t    efiSystemTable;   /* physical address of system table in runtime area */
-    uint32_t    __reserved2;
+    uint32_t    kslide;
 
     uint32_t    performanceDataStart; /* physical address of log */
     uint32_t    performanceDataSize;
index b57d59edb2a57a74ffb3dc7cc7fae61648fc8cd5..31209ff7865822a0813e2435c9b41439feae64f3 100644 (file)
@@ -58,8 +58,6 @@ void PE_init_platform(
        void *args);
 
 
-
-
 void PE_init_kprintf(
        boolean_t vm_initialized);
 
@@ -172,6 +170,13 @@ enum {
     kDebugTypeSerial  = 2 
 };
 
+/*  Scale factor values for PE_Video.v_scale */
+enum {
+   kPEScaleFactorUnknown = 0,
+   kPEScaleFactor1x      = 1,
+   kPEScaleFactor2x      = 2
+};
+
 struct PE_Video {
         unsigned long   v_baseAddr;     /* Base address of video memory */
         unsigned long   v_rowBytes;     /* Number of bytes per pixel row */
index 8d159899021b6cd267ca3f9584bd2d4223013b1d..33009253588f9c0970faa19c6ef03e279fc13265 100644 (file)
@@ -60,8 +60,15 @@ options              CONFIG_LCTX     # Login Context
 
 options                CONFIG_DTRACE   # dtrace support        # <config_dtrace>
 
+options                VM_PRESSURE_EVENTS              # <vm_pressure_events>
+
 options                CONFIG_NO_PANIC_STRINGS                 # <no_panic_str>
 options                CONFIG_NO_PRINTF_STRINGS                # <no_printf_str>
 options                CONFIG_NO_KPRINTF_STRINGS               # <no_kprintf_str>
 options                CONFIG_FSE      # file system events    # <config_fse>
 options                CONFIG_TRIGGERS # trigger vnodes        # <config_triggers>
+options                CONFIG_EXT_RESOLVER     # e.g. memberd  # <config_ext_resolver>
+
+options                SECURE_KERNEL                           # <secure_kernel>
+options                DEBUG           #       # <debug>
+options                MACH_ASSERT     #       # <mach_assert>
index dd4fb5f6942fbbe45675daa213df183aed805fe2..60bcfbe5e7e6ae24ae4e2db02b2a76cf41800a4e 100644 (file)
@@ -1,6 +1,6 @@
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp config_dtrace audit ]
+#  RELEASE     = [ intel mach libkerncpp config_dtrace audit vm_pressure_events ]
 #  PROFILE     = [ RELEASE profile ]
 #  DEBUG       = [ RELEASE debug ]
 #
@@ -20,6 +20,8 @@ options               CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
 options                CONFIG_FSE
 options                CONFIG_TRIGGERS
+options                CONFIG_VFS_FUNNEL
+options                CONFIG_EXT_RESOLVER
 #options       CONFIG_MACF_SOCKET
 #options       CONFIG_MACF_NET
 #options       CONFIG_MACF_ALWAYS_LABEL_MBUF
index d362cf0490ddeeb9fff585d03814dcc37f39fea6..4483af782c7264a741707e7ac06178829676f4b1 100644 (file)
@@ -1,8 +1,8 @@
 ######################################################################
 #
-#  RELEASE     = [ intel mach libkerncpp config_dtrace audit ]
+#  RELEASE     = [ intel mach libkerncpp config_dtrace audit vm_pressure_events ]
 #  PROFILE     = [ RELEASE profile ]
-#  DEBUG       = [ RELEASE debug ]
+#  DEBUG       = [ RELEASE debug mach_assert ]
 #
 #  EMBEDDED    = [ intel mach libkerncpp audit ]
 #  DEVELOPMENT = [ EMBEDDED ]
@@ -17,6 +17,7 @@ options               CONFIG_MACF                     # Mandatory Access Control Framework
 options                CONFIG_MACF_SOCKET_SUBSET       # MACF subset of socket support
 options                CONFIG_FSE
 options                CONFIG_TRIGGERS
+options                CONFIG_EXT_RESOLVER
 #options       CONFIG_MACF_SOCKET
 #options       CONFIG_MACF_NET
 #options       CONFIG_MACF_ALWAYS_LABEL_MBUF
index bdb8f33f837e44371657647aef662a5efe0c346c..3bab0d1cef0ab8821e721ffeb91cfb96c7dbe78a 100644 (file)
@@ -42,9 +42,11 @@ $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 
 do_all: $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile
        $(_v)next_source=$(subst conf/,,$(SOURCE));                     \
+       next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH));         \
        ${MAKE} -C $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)     \
                MAKEFILES=$(TARGET)/$(SECURITY_KERNEL_CONFIG)/Makefile  \
                SOURCE=$${next_source}                  \
+               RELATIVE_SOURCE_PATH=$${next_relsource}                 \
                TARGET=$(TARGET)                                        \
                INCL_MAKEDEP=FALSE      \
                KERNEL_CONFIG=$(SECURITY_KERNEL_CONFIG) \
index 3e0cf7a89a89da2a10295c3d139e131e3c0c3c21..480d1a30bca141ffb2822adf1a0f61ec87a79c89 100644 (file)
@@ -147,7 +147,8 @@ struct user64_mac {
 #define MAC_PROC_CHECK_SUSPEND                 0
 #define MAC_PROC_CHECK_RESUME                  1
 #define MAC_PROC_CHECK_HIBERNATE               2
-#define MAC_PROC_CHECK_SHUTDOWN_SOCKETS        3
+#define MAC_PROC_CHECK_SHUTDOWN_SOCKETS                3
+#define MAC_PROC_CHECK_PIDBIND                 4
 
 #ifndef KERNEL
 /*
index 33dd044578e772d20f6118353239eec72ac8db25..ae808e27796654e5ec484e6b767785569f1ce6de 100644 (file)
@@ -88,6 +88,7 @@
 #include <sys/kauth.h>
 #include <sys/sysproto.h>
 
+#include <mach/exception_types.h>
 #include <mach/vm_types.h>
 #include <mach/vm_prot.h>
 
@@ -123,7 +124,11 @@ SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 SYSCTL_NODE(_security, OID_AUTO, mac, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
     "TrustedBSD MAC policy controls");
 
-
+#if DEBUG
+#define SECURITY_MAC_CTLFLAGS CTLFLAG_RW | CTLFLAG_LOCKED
+#else
+#define SECURITY_MAC_CTLFLAGS CTLFLAG_RD | CTLFLAG_LOCKED
+#endif
 
 /*
  * Declare that the kernel provides MAC support, version 1.  This permits
@@ -163,7 +168,7 @@ int mac_late = 0;
  */
 #if CONFIG_MACF_NET
 unsigned int mac_label_mbufs   = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, SECURITY_MAC_CTLFLAGS,
        &mac_label_mbufs, 0, "Label all MBUFs");
 #endif
 
@@ -180,87 +185,68 @@ SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW | CTLFLAG_LOCKED,
  * be a problem.
  */
 unsigned int   mac_label_vnodes = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, labelvnodes, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, labelvnodes, SECURITY_MAC_CTLFLAGS,
     &mac_label_vnodes, 0, "Label all vnodes");
 
 
 unsigned int   mac_mmap_revocation = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation, SECURITY_MAC_CTLFLAGS,
     &mac_mmap_revocation, 0, "Revoke mmap access to files on subject "
     "relabel");
 
 unsigned int   mac_mmap_revocation_via_cow = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation_via_cow, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation_via_cow, SECURITY_MAC_CTLFLAGS,
     &mac_mmap_revocation_via_cow, 0, "Revoke mmap access to files via "
     "copy-on-write semantics, or by removing all write access");
 
 unsigned int mac_device_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, device_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, device_enforce, SECURITY_MAC_CTLFLAGS,
           &mac_device_enforce, 0, "Enforce MAC policy on device operations");
 
-unsigned int mac_file_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, file_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mac_file_enforce, 0, "Enforce MAC policy on file operations");
-
-unsigned int mac_iokit_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, iokit_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mac_file_enforce, 0, "Enforce MAC policy on IOKit operations");
-
 unsigned int   mac_pipe_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, pipe_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, pipe_enforce, SECURITY_MAC_CTLFLAGS,
     &mac_pipe_enforce, 0, "Enforce MAC policy on pipe operations");
 
 unsigned int   mac_posixsem_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, posixsem_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, posixsem_enforce, SECURITY_MAC_CTLFLAGS,
     &mac_posixsem_enforce, 0, "Enforce MAC policy on POSIX semaphores");
 
 unsigned int mac_posixshm_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, posixshm_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, posixshm_enforce, SECURITY_MAC_CTLFLAGS,
     &mac_posixshm_enforce, 0, "Enforce MAC policy on Posix Shared Memory");
 
 unsigned int   mac_proc_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, proc_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, proc_enforce, SECURITY_MAC_CTLFLAGS,
           &mac_proc_enforce, 0, "Enforce MAC policy on process operations");
 
 unsigned int mac_socket_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, socket_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, socket_enforce, SECURITY_MAC_CTLFLAGS,
        &mac_socket_enforce, 0, "Enforce MAC policy on socket operations");
 
 unsigned int   mac_system_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, system_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, system_enforce, SECURITY_MAC_CTLFLAGS,
     &mac_system_enforce, 0, "Enforce MAC policy on system-wide interfaces");
 
 unsigned int   mac_sysvmsg_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, sysvmsg_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, sysvmsg_enforce, SECURITY_MAC_CTLFLAGS,
     &mac_sysvmsg_enforce, 0, "Enforce MAC policy on System V IPC message queues");
 
 unsigned int   mac_sysvsem_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, sysvsem_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, sysvsem_enforce, SECURITY_MAC_CTLFLAGS,
     &mac_sysvsem_enforce, 0, "Enforce MAC policy on System V IPC semaphores");
 
 unsigned int   mac_sysvshm_enforce = 1;
-SYSCTL_INT(_security_mac, OID_AUTO, sysvshm_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_INT(_security_mac, OID_AUTO, sysvshm_enforce, SECURITY_MAC_CTLFLAGS,
     &mac_sysvshm_enforce, 0, "Enforce MAC policy on System V Shared Memory");
 
 unsigned int   mac_vm_enforce = 1;
-SYSCTL_INT(_security_mac, OID_AUTO, vm_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_INT(_security_mac, OID_AUTO, vm_enforce, SECURITY_MAC_CTLFLAGS,
           &mac_vm_enforce, 0, "Enforce MAC policy on VM operations");
 
 unsigned int   mac_vnode_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, vnode_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_UINT(_security_mac, OID_AUTO, vnode_enforce, SECURITY_MAC_CTLFLAGS,
           &mac_vnode_enforce, 0, "Enforce MAC policy on vnode operations");
 
-
-#if CONFIG_MACF_MACH
-unsigned int   mac_port_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, port_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &mac_port_enforce, 0, "Enforce MAC policy on Mach port operations");
-
-unsigned int   mac_task_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, task_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &mac_task_enforce, 0, "Enforce MAC policy on Mach task operations");
-#endif
-
 #if CONFIG_AUDIT
 /*
  * mac_audit_data_zone is the zone used for data pushed into the audit
@@ -2254,6 +2240,61 @@ __mac_get_mount(proc_t p __unused, struct __mac_get_mount_args *uap,
        return mac_mount_label_get(mp, uap->mac_p);
 }
 
+/*
+ * mac_schedule_userret()
+ *
+ * Schedule a callback to the mpo_thread_userret hook. The mpo_thread_userret
+ * hook is called just before the thread exit from the kernel in ast_taken().
+ *
+ * Returns:     0              Success
+ *             !0              Not successful
+ */
+int
+mac_schedule_userret(void)
+{
+
+       act_set_astmacf(current_thread());
+       return (0);
+}
+
+/*
+ * mac_do_machexc()
+ *
+ * Do a Mach exception.  This should only be done in the mpo_thread_userret
+ * callback.
+ *
+ * params:     code            exception code
+ *             subcode         exception subcode
+ *             flags           flags:
+ *                             MAC_DOEXCF_TRACED  Only do exception if being
+ *                                                ptrace()'ed.
+ *
+ *
+ * Returns:     0              Success
+ *             !0              Not successful
+ */
+int
+mac_do_machexc(int64_t code, int64_t subcode, uint32_t flags)
+{
+       mach_exception_data_type_t  codes[EXCEPTION_CODE_MAX];
+       proc_t p = current_proc();
+
+       /* Only allow execption codes in MACF's reserved range. */
+       if ((code < EXC_MACF_MIN) || (code > EXC_MACF_MAX))
+               return (1);
+
+       if (flags & MAC_DOEXCF_TRACED &&
+           !(p->p_lflag & P_LTRACED && (p->p_lflag & P_LPPWAIT) == 0))
+               return (0);
+
+
+       /* Send the Mach exception */
+       codes[0] = (mach_exception_data_type_t)code;
+       codes[1] = (mach_exception_data_type_t)subcode;
+
+       return (bsd_exception(EXC_SOFTWARE, codes, 2) != KERN_SUCCESS);
+}
+
 #else /* MAC */
 
 int
@@ -2404,4 +2445,18 @@ __mac_get_mount(proc_t p __unused,
 
        return (ENOSYS);
 }
+
+int
+mac_schedule_userret(void)
+{
+
+       return (1);
+}
+
+int
+mac_do_machexc(int64_t code __unused, int64_t subcode __unused, uint32_t flags __unused)
+{
+
+       return (1);
+}
 #endif /* !MAC */
index 20780b249958344fc193c9de105429af5ca71ba2..7d0f15a103ecd8067f49eb7762d0e0aa1d0b0d11 100644 (file)
@@ -116,6 +116,7 @@ struct thread;
 struct timespec;
 struct ucred;
 struct uio;
+struct uthread;
 struct vfs_attr;
 struct vfs_context;
 struct vnode;
@@ -168,6 +169,7 @@ void        mac_cred_label_destroy(kauth_cred_t cred);
 int    mac_cred_label_externalize_audit(proc_t p, struct mac *mac);
 void   mac_cred_label_free(struct label *label);
 void   mac_cred_label_init(kauth_cred_t cred);
+int    mac_cred_label_compare(struct label *a, struct label *b);
 void   mac_cred_label_update(kauth_cred_t cred, struct label *newlabel);
 int    mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t newcred,
            struct vnode *vp, struct label *scriptvnodelabel,
@@ -310,7 +312,8 @@ void        mac_posixsem_label_init(struct pseminfo *psem);
 int    mac_posixshm_check_create(kauth_cred_t cred, const char *name);
 int    mac_posixshm_check_mmap(kauth_cred_t cred, struct pshminfo *pshm,
            int prot, int flags);
-int    mac_posixshm_check_open(kauth_cred_t cred, struct pshminfo *pshm);
+int    mac_posixshm_check_open(kauth_cred_t cred, struct pshminfo *pshm,
+           int fflags);
 int    mac_posixshm_check_stat(kauth_cred_t cred, struct pshminfo *pshm);
 int    mac_posixshm_check_truncate(kauth_cred_t cred, struct pshminfo *pshm,
            off_t s);
@@ -334,6 +337,7 @@ int mac_proc_check_getaudit(proc_t proc);
 int    mac_proc_check_getauid(proc_t proc);
 int     mac_proc_check_getlcid(proc_t proc1, proc_t proc2,
            pid_t pid);
+int     mac_proc_check_ledger(proc_t curp, proc_t target, int op);
 int    mac_proc_check_map_anon(proc_t proc, user_addr_t u_addr,
            user_size_t u_size, int prot, int flags, int *maxprot);
 int    mac_proc_check_mprotect(proc_t proc,
@@ -401,6 +405,7 @@ int mac_system_check_swapon(kauth_cred_t cred, struct vnode *vp);
 int    mac_system_check_sysctl(kauth_cred_t cred, int *name,
            u_int namelen, user_addr_t oldctl, user_addr_t oldlenp, int inkernel,
            user_addr_t newctl, size_t newlen);
+int    mac_system_check_kas_info(kauth_cred_t cred, int selector);
 void   mac_sysvmsg_label_associate(kauth_cred_t cred,
            struct msqid_kernel *msqptr, struct msg *msgptr);
 void   mac_sysvmsg_label_init(struct msg *msgptr);
@@ -445,6 +450,10 @@ void       mac_sysvshm_label_associate(kauth_cred_t cred,
 void   mac_sysvshm_label_destroy(struct shmid_kernel *shmsegptr);
 void   mac_sysvshm_label_init(struct shmid_kernel* shmsegptr);
 void   mac_sysvshm_label_recycle(struct shmid_kernel *shmsegptr);
+struct label * mac_thread_label_alloc(void);
+void   mac_thread_label_destroy(struct uthread *uthread);
+void   mac_thread_label_free(struct label *label);
+void   mac_thread_label_init(struct uthread *uthread);
 int    mac_vnode_check_access(vfs_context_t ctx, struct vnode *vp,
            int acc_mode);
 int    mac_vnode_check_chdir(vfs_context_t ctx, struct vnode *dvp);
index 2b823bab90088999e47bc4c3b961f3385ded10a2..1f57fdc79a1ede102ffd7d9aad408180ce44cd07 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -71,8 +71,6 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 
-#include <net/if.h>
-#include <net/if_var.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
index 6e8ae3d2a803af504fc42c2b7c3ede4ec55449a5..6ca8b699ecf6bc637a981758c18e80978fdf3d91 100644 (file)
@@ -184,8 +184,6 @@ extern struct mac_policy_list mac_policy_list;
  * at all in the system.
  */
 extern unsigned int mac_device_enforce;
-extern unsigned int mac_file_enforce;
-extern unsigned int mac_iokit_enforce;
 extern unsigned int mac_pipe_enforce;
 extern unsigned int mac_posixsem_enforce;
 extern unsigned int mac_posixshm_enforce;
@@ -198,11 +196,6 @@ extern unsigned int mac_sysvshm_enforce;
 extern unsigned int mac_vm_enforce;
 extern unsigned int mac_vnode_enforce;
 
-#if CONFIG_MACF_MACH
-extern unsigned int mac_port_enforce;
-extern unsigned int mac_task_enforce;
-#endif
-
 #if CONFIG_MACF_NET
 extern unsigned int mac_label_mbufs;
 #endif
index 799f0fac0054c4615b9b22a96c6ffa2f7f953dad..5393c750dc52779ccbd207bf536f0566abe7b97e 100644 (file)
@@ -60,6 +60,15 @@ int mac_port_label_compute(struct label *subj, struct label *obj,
     const char *serv, struct label *out);
 int mac_port_check_method(task_t task, struct label *sub, struct label *obj, int msgid);
 
+/* mac_do_machexc() flags */
+#define        MAC_DOEXCF_TRACED       0x01    /* Only do mach exeception if
+                                          being ptrace()'ed */
+struct uthread;
+int    mac_do_machexc(int64_t code, int64_t subcode, uint32_t flags __unused);
+int    mac_schedule_userret(void);
+struct label *mac_thread_get_threadlabel(struct thread *thread);
+struct label *mac_thread_get_uthreadlabel(struct uthread *uthread);
+
 #if CONFIG_MACF
 void mac_policy_init(void);
 void mac_policy_initmach(void);
@@ -106,6 +115,10 @@ int mac_port_label_internalize(struct label *label, char *string);
 void   mac_task_label_update(struct label *cred, struct label *task);
 int    mac_port_check_service(struct label *subj, struct label *obj,
            const char *serv, const char *perm);
+
+/* threads */
+void   act_set_astmacf(struct thread *);
+void   mac_thread_userret(struct thread *);
 #endif /* MAC */
 
 #endif /* !_SECURITY_MAC_MACH_INTERNAL_H_ */
index 836be3cc0612ffce4f8234554abdcbe6ece5a7bd..91439381351e513d92851cf7bb50674779e469c8 100644 (file)
@@ -2881,6 +2881,7 @@ typedef int mpo_posixshm_check_mmap_t(
   @param cred Subject credential
   @param ps Pointer to shared memory information structure
   @param shmlabel Label associated with the shared memory region
+  @param fflags shm_open(2) open flags ('fflags' encoded)
 
   Determine whether the subject identified by the credential can open
   the POSIX shared memory region.
@@ -2891,7 +2892,8 @@ typedef int mpo_posixshm_check_mmap_t(
 typedef int mpo_posixshm_check_open_t(
        kauth_cred_t cred,
        struct pshminfo *ps,
-       struct label *shmlabel
+       struct label *shmlabel,
+       int fflags
 );
 /**
   @brief Access control check for POSIX shared memory stat
@@ -3123,6 +3125,25 @@ typedef int mpo_proc_check_getlcid_t(
        struct proc *p,
        pid_t pid
 );
+/**
+  @brief Access control check for retrieving ledger information
+  @param cred Subject credential
+  @param target Object process
+  @param op ledger operation
+
+  Determine if ledger(2) system call is permitted.
+
+  Information returned by this system call is similar to that returned via
+  process listings etc.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_proc_check_ledger_t(
+       kauth_cred_t cred,
+       struct proc *target,
+       int op
+);
 /**
   @brief Access control check for mmap MAP_ANON
   @param proc User process requesting the memory
@@ -4082,6 +4103,22 @@ typedef int mpo_system_check_sysctl_t(
        user_addr_t newvalue,   /* NULLOK */
        size_t newlen
 );
+/**
+  @brief Access control check for kas_info
+  @param cred Subject credential
+  @param selector Category of information to return. See kas_info.h
+
+  Determine whether the subject identified by the credential can perform
+  introspection of the kernel address space layout for
+  debugging/performance analysis.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_system_check_kas_info_t(
+       kauth_cred_t cred,
+       int selector
+);
 /**
   @brief Create a System V message label
   @param cred Subject credential
@@ -4723,6 +4760,38 @@ typedef void mpo_task_label_update_t(
        struct label *cred,
        struct label *task
 );
+/**
+  @brief Perform MAC-related events when a thread returns to user space
+  @param thread Mach (not BSD) thread that is returning
+
+  This entry point permits policy modules to perform MAC-related
+  events when a thread returns to user space, via a system call
+  return or trap return.
+*/
+typedef void mpo_thread_userret_t(
+       struct thread *thread
+);
+/**
+  @brief Initialize per thread label
+  @param label New label to initialize
+
+  Initialize the label for a newly instantiated thread.
+  Sleeping is permitted.
+*/
+typedef void mpo_thread_label_init_t(
+       struct label *label
+);
+/**
+  @brief Destroy thread label
+  @param label The label to be destroyed
+
+  Destroy a user thread label.  Since the user thread
+  is going out of scope, policy modules should free any internal
+  storage associated with the label so that it may be destroyed.
+*/
+typedef void mpo_thread_label_destroy_t(
+       struct label *label
+);
 /**
   @brief Check vnode access
   @param cred Subject credential
@@ -5967,7 +6036,7 @@ typedef void mpo_reserved_hook_t(void);
 /*!
   \struct mac_policy_ops
 */
-#define MAC_POLICY_OPS_VERSION 11 /* inc when new reserved slots are taken */
+#define MAC_POLICY_OPS_VERSION 13 /* inc when new reserved slots are taken */
 struct mac_policy_ops {
        mpo_audit_check_postselect_t            *mpo_audit_check_postselect;
        mpo_audit_check_preselect_t             *mpo_audit_check_preselect;
@@ -6278,7 +6347,7 @@ struct mac_policy_ops {
        mpo_vnode_check_uipc_connect_t          *mpo_vnode_check_uipc_connect;
        mac_proc_check_run_cs_invalid_t         *mpo_proc_check_run_cs_invalid;
        mpo_proc_check_suspend_resume_t         *mpo_proc_check_suspend_resume;
-       mpo_reserved_hook_t                     *mpo_reserved12;
+       mpo_thread_userret_t                    *mpo_thread_userret;
        mpo_iokit_check_set_properties_t        *mpo_iokit_check_set_properties;
        mpo_system_check_chud_t                 *mpo_system_check_chud;
        mpo_vnode_check_searchfs_t              *mpo_vnode_check_searchfs;
@@ -6287,11 +6356,11 @@ struct mac_policy_ops {
        mpo_proc_check_map_anon_t               *mpo_proc_check_map_anon;
        mpo_vnode_check_fsgetpath_t             *mpo_vnode_check_fsgetpath;
        mpo_iokit_check_open_t                  *mpo_iokit_check_open;
+       mpo_proc_check_ledger_t                 *mpo_proc_check_ledger;
        mpo_vnode_notify_rename_t               *mpo_vnode_notify_rename;
-       mpo_reserved_hook_t                     *mpo_reserved14;
-       mpo_reserved_hook_t                     *mpo_reserved15;
-       mpo_reserved_hook_t                     *mpo_reserved16;
-       mpo_reserved_hook_t                     *mpo_reserved17;
+       mpo_thread_label_init_t                 *mpo_thread_label_init;
+       mpo_thread_label_destroy_t              *mpo_thread_label_destroy;
+       mpo_system_check_kas_info_t     *mpo_system_check_kas_info;
        mpo_reserved_hook_t                     *mpo_reserved18;
        mpo_reserved_hook_t                     *mpo_reserved19;
        mpo_reserved_hook_t                     *mpo_reserved20;
index f6cc28e568d6baf4a8b8f59d142290ef7df151f5..f2ffd9daf3b81a11d4c2dd8a63c64a929aa4ced5 100644 (file)
@@ -136,14 +136,14 @@ mac_posixshm_check_create(kauth_cred_t cred, const char *name)
 }
 
 int
-mac_posixshm_check_open(kauth_cred_t cred, struct pshminfo *shm)
+mac_posixshm_check_open(kauth_cred_t cred, struct pshminfo *shm, int fflags)
 {
        int error = 0;
 
        if (!mac_posixshm_enforce)
                return 0;
 
-       MAC_CHECK(posixshm_check_open, cred, shm, shm->pshm_label);
+       MAC_CHECK(posixshm_check_open, cred, shm, shm->pshm_label, fflags);
 
        return (error);
 }
index 631b468a90ad1614bf202b57ec39fc8d161a235c..18fbdeccabb5bd0b8a6932777a8c06cd87991715 100644 (file)
 #include <sys/proc_internal.h>
 #include <sys/kauth.h>
 #include <sys/imgact.h>
+#include <mach/mach_types.h>
 
 #include <security/mac_internal.h>
+#include <security/mac_mach_internal.h>
 
 #include <bsd/security/audit/audit.h>
 
@@ -102,6 +104,12 @@ mac_cred_label_free(struct label *label)
        mac_labelzone_free(label);
 }
 
+int
+mac_cred_label_compare(struct label *a, struct label *b)
+{
+       return (bcmp(a, b, sizeof (*a)) == 0);
+}
+
 int
 mac_cred_label_externalize_audit(struct proc *p, struct mac *mac)
 {
@@ -590,3 +598,75 @@ mac_proc_check_suspend_resume(proc_t curp, int sr)
 
        return (error);
 }
+
+int
+mac_proc_check_ledger(proc_t curp, proc_t proc, int ledger_op)
+{
+       kauth_cred_t cred;
+       int error = 0;
+
+       if (!mac_proc_enforce ||
+           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+               return (0);
+
+       cred = kauth_cred_proc_ref(curp);
+       MAC_CHECK(proc_check_ledger, cred, proc, ledger_op);
+       kauth_cred_unref(&cred);
+
+       return (error);
+}
+
+struct label *
+mac_thread_label_alloc(void)
+{
+       struct label *label;
+
+       label = mac_labelzone_alloc(MAC_WAITOK);
+       if (label == NULL)
+               return (NULL);
+       MAC_PERFORM(thread_label_init, label);
+       return (label);
+}
+
+void
+mac_thread_label_init(struct uthread *uthread)
+{
+       uthread->uu_label = mac_thread_label_alloc();
+}
+
+void
+mac_thread_label_free(struct label *label)
+{
+       MAC_PERFORM(thread_label_destroy, label);
+       mac_labelzone_free(label);
+}
+
+void
+mac_thread_label_destroy(struct uthread *uthread)
+{
+
+       mac_thread_label_free(uthread->uu_label);
+       uthread->uu_label = NULL;
+}
+
+void
+mac_thread_userret(struct thread *td)
+{
+
+       MAC_PERFORM(thread_userret, td);
+}
+
+struct label *
+mac_thread_get_uthreadlabel(struct uthread *uthread)
+{
+
+       return (uthread->uu_label);
+}
+
+struct label *
+mac_thread_get_threadlabel(struct thread *thread)
+{
+       struct uthread *uthread = get_bsdthread_info(thread);
+
+       return (mac_thread_get_uthreadlabel(uthread));
+}
index 45c7daef68b4fa7d0495a3b89590ec7fbaa9ddad..32acf01f56bbb3307c0e09e861dd0ee3c98006eb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/sysctl.h>
 #include <sys/kpi_socket.h>
 
-#include <net/if.h>
-#include <net/if_var.h>
-
-#include <netinet/in.h>
-#include <netinet/ip_var.h>
-
 #include <security/mac_internal.h>
 
 #if CONFIG_MACF_SOCKET
index 8089caac842809cc53b8d41e5d335729e772af89..f3de4ca13460c9a50cf222868a861c8e92e831f3 100644 (file)
@@ -192,3 +192,16 @@ mac_system_check_sysctl(kauth_cred_t cred, int *name, u_int namelen,
 
        return (error);
 }
+
+int
+mac_system_check_kas_info(kauth_cred_t cred, int selector)
+{
+       int error;
+
+       if (!mac_system_enforce)
+               return (0);
+
+       MAC_CHECK(system_check_kas_info, cred, selector);
+
+       return (error);
+}
index 7cc5561a2ac119cd49fc09bd3dbd2193ce4d7067..ba8e50fceaea4b823b1dd68562e0314e65013023 100644 (file)
@@ -1221,7 +1221,7 @@ mac_mount_label_associate(vfs_context_t ctx, struct mount *mp)
        }
 
        MAC_PERFORM(mount_label_associate, cred, mp, mp->mnt_mntlabel);
-#if MAC_DEBUG
+#if DEBUG
        printf("MAC Framework enabling %s support: %s -> %s (%s)\n",
                mp->mnt_flag & MNT_MULTILABEL ? "multilabel" : "singlelabel", 
                mp->mnt_vfsstat.f_mntfromname,
index 5b659a8333d633ba40d878981f940f5ec25ff1c6..4ee81c42734b41ecd03383bcb0184ee7f1722207 100644 (file)
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <err.h>
+#include <unistd.h>
 
 #include <pthread.h>
 #include <mach/mach.h>
@@ -419,7 +420,7 @@ server(void *serverarg)
                } 
 #else
                if (kev[0].data != args.port)
-                       printf("kevent64(MACH_PORT_NULL) port name (0x%x) != expected (0x%x)\n", kev[0].data, args.port);
+                       printf("kevent64(MACH_PORT_NULL) port name (%lld) != expected (0x%x)\n", kev[0].data, args.port);
 
                args.req_msg->msgh_bits = 0;
                args.req_msg->msgh_size = args.req_size;
@@ -470,6 +471,7 @@ server(void *serverarg)
                        }
                }
        }
+       return NULL;
 }
 
 static inline void
@@ -535,6 +537,7 @@ calibrate_client_work(void)
                        printf("calibration_count=%d calibration_usec=%d\n",
                                calibration_count, calibration_usec);
        }
+        return NULL;
 }
 
 static void *
@@ -549,6 +552,7 @@ client_work(void)
        if (client_delay) {
                usleep(client_delay);
        }
+       return NULL;
 }
 
 void *client(void *threadarg) 
@@ -558,7 +562,7 @@ void *client(void *threadarg)
        mach_msg_header_t *req, *reply; 
        mach_port_t bsport, servport;
        kern_return_t ret;
-       long server_num = (long) threadarg;
+       int server_num = (int) threadarg;
        void *ints = malloc(sizeof(u_int32_t) * num_ints);
 
        if (verbose) 
@@ -655,7 +659,7 @@ void *client(void *threadarg)
        }
 
        free(ints);
-       return;
+       return NULL;
 }
 
 static void
@@ -670,12 +674,12 @@ thread_spawn(thread_id_t *thread, void *(fn)(void *), void *arg) {
                if (ret != 0)
                        err(1, "pthread_create()");
                if (verbose)
-                       printf("created pthread 0x%x\n", thread->tid);
+                       printf("created pthread %p\n", thread->tid);
        } else {
                thread->pid = fork();
                if (thread->pid == 0) {
                        if (verbose)
-                               printf("calling 0x%x(0x%x)\n", fn, arg);
+                               printf("calling %p(%p)\n", fn, arg);
                        fn(arg);
                        exit(0);
                }
@@ -689,10 +693,10 @@ thread_join(thread_id_t *thread) {
        if (threaded) {
                kern_return_t   ret;
                if (verbose)
-                       printf("joining thread 0x%x\n", thread->tid);
+                       printf("joining thread %p\n", thread->tid);
                ret = pthread_join(thread->tid, NULL);
                if (ret != KERN_SUCCESS)
-                       err(1, "pthread_join(0x%x)", thread->tid);
+                       err(1, "pthread_join(%p)", thread->tid);
        } else {
                int     stat;
                if (verbose)
@@ -820,8 +824,8 @@ int main(int argc, char *argv[])
        double dsecs = (double) deltatv.tv_sec + 
                1.0E-6 * (double) deltatv.tv_usec;
 
-       printf(" in %u.%03u seconds\n",  
-                       deltatv.tv_sec, deltatv.tv_usec/1000);
+       printf(" in %ld.%03u seconds\n",  
+              (long)deltatv.tv_sec, deltatv.tv_usec/1000);
        printf("  throughput in messages/sec:     %g\n",
                        (double)totalmsg / dsecs);
        printf("  average message latency (usec): %2.3g\n", 
index 44389b35d28ae95a2690a9936ed3c5149b5e1b0a..590ac04b1f9eb7da2d22e6bff37e66a74c89e4d8 100644 (file)
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <err.h>
+#include <unistd.h>
 
 #include <pthread.h>
 #include <mach/mach.h>
@@ -417,6 +418,7 @@ server(void *serverarg)
                        }
                }
        }
+       return NULL;
 }
 
 static inline void
@@ -482,6 +484,7 @@ calibrate_client_work(void)
                        printf("calibration_count=%d calibration_usec=%d\n",
                                calibration_count, calibration_usec);
        }
+       return NULL;
 }
 
 static void *
@@ -496,6 +499,7 @@ client_work(void)
        if (client_delay) {
                usleep(client_delay);
        }
+       return NULL;
 }
 
 void *client(void *threadarg) 
@@ -505,7 +509,7 @@ void *client(void *threadarg)
        mach_msg_header_t *req, *reply; 
        mach_port_t bsport, servport;
        kern_return_t ret;
-       long server_num = (long) threadarg;
+       int server_num = (int) threadarg;
        void *ints = malloc(sizeof(u_int32_t) * num_ints);
 
        if (verbose) 
@@ -602,7 +606,7 @@ void *client(void *threadarg)
        }
 
        free(ints);
-       return;
+       return NULL;
 }
 
 static void
@@ -617,12 +621,12 @@ thread_spawn(thread_id_t *thread, void *(fn)(void *), void *arg) {
                if (ret != 0)
                        err(1, "pthread_create()");
                if (verbose)
-                       printf("created pthread 0x%x\n", thread->tid);
+                       printf("created pthread %p\n", thread->tid);
        } else {
                thread->pid = fork();
                if (thread->pid == 0) {
                        if (verbose)
-                               printf("calling 0x%x(0x%x)\n", fn, arg);
+                               printf("calling %p(%p)\n", fn, arg);
                        fn(arg);
                        exit(0);
                }
@@ -636,10 +640,10 @@ thread_join(thread_id_t *thread) {
        if (threaded) {
                kern_return_t   ret;
                if (verbose)
-                       printf("joining thread 0x%x\n", thread->tid);
+                       printf("joining thread %p\n", thread->tid);
                ret = pthread_join(thread->tid, NULL);
                if (ret != KERN_SUCCESS)
-                       err(1, "pthread_join(0x%x)", thread->tid);
+                       err(1, "pthread_join(%p)", thread->tid);
        } else {
                int     stat;
                if (verbose)
index ded5378812a30e0cdbcb5287015a3a9e4e084c78..1f6e025ad89f483ea7e12a6623819044db9252dc 100644 (file)
@@ -1,3 +1,5 @@
+#include <stdlib.h>
+
 void mystart(void) __asm__("mystart");
 
 void mystart(void) {
index 1acf0d49315cfe92efff868ec4c9c24989e37b17..5dfcd6bb74dbc007ae5467d60d7b0c631b348373 100644 (file)
@@ -6,6 +6,7 @@
 #include <mach-o/ldsyms.h>
 #include <mach-o/dyld_images.h>
 #include <stdlib.h>
+#include <sys/sysctl.h>
 
 __attribute__((constructor))
 void init(int argc, const char *argv[], const char *envp[], const char *appl[], void *vars __attribute__((unused))) {
index d7d5f6a5b0edb6042e8c2244fbdd51246835be9f..79a2bf602364c31aceb0609f8a2b458b3e1c50fd 100644 (file)
@@ -5,6 +5,7 @@
 #include <errno.h>
 #include <err.h>
 #include <pthread.h>
+#include <spawn.h>
 
 extern char **environ;
 
index de49c7daf812ff824ff0a243e1e58f2b29097915..8b38345869bfdd6c868ea384093c089385b92b52 100755 (executable)
@@ -20,6 +20,7 @@ runs the libMicro test suite excluding the lmbench tests and gives you a text fi
 gives you a html file comparing two runs.
 
 *** To run libMicro testsuite with stepper disabled ***
+*** For Desktop use coreos_bench script***
 
 To get a more consistent result of libMicro benchmark run, we need to disable the 
 stepper to prevent it from causing wide variations in results. See rdar://6243819 
@@ -39,6 +40,10 @@ which provides '/usr/local/bin/pstates'.
 2) 'coreos_bench' script is used exactly like the 'bench' script. All the usage examples for 
 'bench' script in this readme file also holds true for 'coreos_bench' script. 
 
+
+
+
+
 *** Makefile ***
 
 The Makefile invokes Makefile.Darwin which invokes Makefile.com.Darwin.
@@ -62,8 +67,11 @@ ARCH                 defaults to  i386
        the makefile will automatically build with ARCH_FLAG="-arch i386 -arch x86_64" and put the results in bin-fat
 
         to build for ARM architecture,
-        first set an environment variable 'SDKROOT' to point to iPhone sdk
-                make ARCH=ARM_ARCH where ARM_ARCH can be armv6 or armv7
+        first set an environment variable 'SDKROOT' to point to iPhone internal sdk
+       For example:
+               $export SDKROOT="/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS5.0.Internal.sdk/"
+       to build use:
+                make ARCH=ARM_ARCH where ARM_ARCH can be for e.g. armv6, armv7 
         this will put the results in bin-ARM_ARCH
 
        to build with only two of the architectures see below
@@ -99,6 +107,9 @@ system then needs to be rebooted.
 The shell script "bench" will run all the benchmarks, or you can
 pass it a parameter to run a single benchmark, e.g.
 
+*** To run libMicro on the embedded platform, use "embd_bench" script. 'embd_bench' script is used exactly like the 'bench' script. All the usage examples for 
+'bench' script in this readme file also holds true for 'embd_bench' script. ***
+
        bench lmbench_bw_unix
 
 By default the script will run only the libMicro testsuite excluding the lmbench tests. 
index 877beb36d8de2e515cdc565d3f835db94ed31dd0..d9dad443e15d762f8ccf2b18bbfdb80c1db1c5ee 100644 (file)
@@ -72,16 +72,21 @@ BINS=               $(ALL:%=bin-$(ARCH)/%) bin-$(ARCH)/tattle
        wrapper.sh      \
        README
 
+ifeq "$(Embedded)" "YES"
+SEMOP_FLAG= 
+endif
+
 default $(ALL) run cstyle lint tattle: $(BINS)
        @cp bench.sh bench
        @cp coreos_bench.sh coreos_bench
+       @cp embd_bench.sh embd_bench
        @cp multiview.sh multiview
        @cp wrapper.sh wrapper
        @cp create_stuff.sh create_stuff
        @cp benchDS.sh benchDS
        @cp od_account_create.sh od_account_create
        @cp od_account_delete.sh od_account_delete
-       @chmod +x bench coreos_bench create_stuff multiview wrapper benchDS od_account_create od_account_delete
+       @chmod +x bench coreos_bench embd_bench create_stuff multiview wrapper benchDS od_account_create od_account_delete
        @mkdir -p bin-$(ARCH); cd bin-$(ARCH); MACH=$(ARCH) $(MAKE) -f ../Makefile.`uname -s` ARCH=$(ARCH) UNAME_RELEASE=`uname -r | sed 's/\./_/g'` $@
        @echo "code signing all the binaries under bin-$(ARCH) and apple/bin-$(ARCH)"
        @for file in $(abspath bin-$(ARCH)/*) $(abspath apple/bin-$(ARCH)/*);do        \
@@ -94,7 +99,7 @@ default $(ALL) run cstyle lint tattle: $(BINS)
 .PHONY: clean clean_subdirs clean_$(SUBDIRS)
 
 clean: clean_subdirs
-       rm -rf bin bin-* wrapper multiview create_stuff bench tattle benchDS od_account_create od_account_delete coreos_bench
+       rm -rf bin bin-* wrapper multiview create_stuff bench tattle benchDS od_account_create od_account_delete coreos_bench embd_bench
 
 clean_subdirs:
        for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean; done
index d113fc4f2e0b3f1160d045791a9d3a1c4fda8231..9d4e006088c3ac933f0bc86091912fa8181875b9 100644 (file)
@@ -53,14 +53,23 @@ endif
 OPT_FLAG=      -Os
 SEMOP_FLAG=    -DUSE_SEMOP
 
+ifeq "$(Embedded)" "YES"
+SEMOP_FLAG= 
+endif
+
 ###
 ###CFLAGS=             -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 ###extra_CFLAGS=       -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 ###
 CFLAGS+=               $(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+
 ifeq "$(Embedded)" "YES"
-CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
+#CFLAGS+=               $(OPT_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+CFLAGS+= -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
 endif
+
+
+
 extra_CFLAGS=  $(OPT_FLAG) $(SEMOP_FLAG) -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 CPPFLAGS=              $(SEMOP_FLAG) -D_REENTRANT -Wall
 MATHLIB=       -lm
index fe5e573bf2cd5932a0211f14c363fcbc12a4fcfb..9ef0e27cf1ed2bba2bad14bb64b3e2d28fc189b3 100644 (file)
@@ -53,15 +53,21 @@ endif
 ### OPT_FLAG value was modified from '-g' to '-Os' as part of the fix for radar 7508837
 OPT_FLAG=      -Os
 SEMOP_FLAG=    -DUSE_SEMOP
+ifeq "$(Embedded)" "YES"
+SEMOP_FLAG= 
+endif
 
 ###
 ###CFLAGS=             -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 ###extra_CFLAGS=       -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
-### Added -DUSE_GETHRTIME to CFLAGS and extra_CFLAGS as part of the fix for radar 7508837
+###
 CFLAGS+=               $(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+
 ifeq "$(Embedded)" "YES"
-CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
+#CFLAGS+=               $(OPT_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+CFLAGS+= -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
 endif
+
 extra_CFLAGS=  $(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 CPPFLAGS=              $(SEMOP_FLAG) -D_REENTRANT -Wall
 MATHLIB=       -lm
index a26d1287167ed5f3dc9245e7deac5deb991795f3..210cf37a56f60408081acf059bf58441cd339546 100644 (file)
@@ -26,6 +26,7 @@
 # Use is subject to license terms.
 #
 
+Embedded=$(shell tconf --test TARGET_OS_EMBEDDED)
 
 ALL =                  \
                create_file     \
@@ -52,7 +53,6 @@ ALL =                         \
                posix_spawn             \
                trivial                 \
                vm_allocate \
-               od_query_create_with_node   \
                mbr_check_service_membership  \
                getpwnam                \
                mbr_check_membership    \
@@ -63,3 +63,9 @@ ALL =                         \
                getaddrinfo_host        \
                getaddrinfo_port        \
                getgrnam
+
+# Compile the following test on desktop platform only  
+ifeq "$(Embedded)" "NO"
+ALL += od_query_create_with_node
+endif
+
index a862cbd8623f4a0ba8bb7bf86fb458240e3aff6d..b153b43dd4fed970f0ca97d5431271cbe63ff752 100644 (file)
@@ -621,8 +621,6 @@ pwrite              $OPTS -N "pwrite_n1k"   -s 1k   -I 100          -f /dev/null
 pwrite         $OPTS -N "pwrite_n10k"  -s 10k  -I 100          -f /dev/null 
 pwrite         $OPTS -N "pwrite_n100k" -s 100k -I 100          -f /dev/null 
 
-mmap           $OPTS -N "mmap_z8k"     -l 8k   -I 1000 -B 50   -f /dev/zero
-mmap           $OPTS -N "mmap_z128k"   -l 128k -I 2000 -B 100  -f /dev/zero
 mmap           $OPTS -N "mmap_t8k"     -l 8k   -I 1000         -f $TFILE
 mmap           $OPTS -N "mmap_t128k"   -l 128k -I 1000         -f $TFILE
 mmap           $OPTS -N "mmap_u8k"     -l 8k   -I 1000         -f $VFILE
@@ -631,8 +629,7 @@ mmap                $OPTS -N "mmap_a8k"     -l 8k   -I 200          -f MAP_ANON
 mmap           $OPTS -N "mmap_a128k"   -l 128k -I 200          -f MAP_ANON
 
 
-mmap           $OPTS -N "mmap_rz8k"    -l 8k   -I 2000 -r      -f /dev/zero
-mmap           $OPTS -N "mmap_rz128k"  -l 128k -I 2000 -r      -f /dev/zero
+
 mmap           $OPTS -N "mmap_rt8k"    -l 8k   -I 2000 -r      -f $TFILE
 mmap           $OPTS -N "mmap_rt128k"  -l 128k -I 20000 -r     -f $TFILE
 mmap           $OPTS -N "mmap_ru8k"    -l 8k   -I 2000 -r      -f $VFILE
@@ -640,8 +637,7 @@ mmap                $OPTS -N "mmap_ru128k"  -l 128k -I 20000 -r     -f $VFILE
 mmap           $OPTS -N "mmap_ra8k"    -l 8k   -I 2000 -r      -f MAP_ANON
 mmap           $OPTS -N "mmap_ra128k"  -l 128k -I 20000 -r     -f MAP_ANON
 
-mmap           $OPTS -N "mmap_wz8k"    -l 8k   -I 5000 -w      -B 50 -f /dev/zero
-mmap           $OPTS -N "mmap_wz128k"  -l 128k -I 50000 -w     -B 50 -f /dev/zero
+
 mmap           $OPTS -N "mmap_wt8k"    -l 8k   -I 5000 -w      -f $TFILE
 mmap           $OPTS -N "mmap_wt128k"  -l 128k -I 50000 -w     -f $TFILE
 mmap           $OPTS -N "mmap_wu8k"    -l 8k   -I 5000 -w      -f $VFILE
@@ -649,8 +645,7 @@ mmap                $OPTS -N "mmap_wu128k"  -l 128k -I 500000 -w    -f $VFILE
 mmap           $OPTS -N "mmap_wa8k"    -l 8k   -I 3000 -w      -f MAP_ANON
 mmap           $OPTS -N "mmap_wa128k"  -l 128k -I 50000 -w     -f MAP_ANON
 
-munmap         $OPTS -N "unmap_z8k"    -l 8k   -I 500          -f /dev/zero
-munmap         $OPTS -N "unmap_z128k"  -l 128k -I 500  -B 100  -f /dev/zero
+
 munmap         $OPTS -N "unmap_t8k"    -l 8k   -I 500          -f $TFILE
 munmap         $OPTS -N "unmap_t128k"  -l 128k -I 500          -f $TFILE
 munmap         $OPTS -N "unmap_u8k"    -l 8k   -I 500          -f $VFILE
@@ -658,8 +653,7 @@ munmap              $OPTS -N "unmap_u128k"  -l 128k -I 500          -f $VFILE
 munmap         $OPTS -N "unmap_a8k"    -l 8k   -I 500          -f MAP_ANON
 munmap         $OPTS -N "unmap_a128k"  -l 128k -I 500          -f MAP_ANON
 
-munmap         $OPTS -N "unmap_rz8k"   -l 8k   -I 1000 -r      -f /dev/zero
-munmap         $OPTS -N "unmap_rz128k" -l 128k -I 2000 -r      -B 100 -f /dev/zero
+
 munmap         $OPTS -N "unmap_rt8k"   -l 8k   -I 1000 -r      -f $TFILE
 munmap         $OPTS -N "unmap_rt128k" -l 128k -I 3000 -r      -f $TFILE
 munmap         $OPTS -N "unmap_ru8k"   -l 8k   -I 1000 -r      -f $VFILE
@@ -669,8 +663,7 @@ munmap              $OPTS -N "unmap_ra128k" -l 128k -I 2000 -r      -f MAP_ANON
 
 connection     $OPTS -N "conn_connect"         -B 256  -c
 
-munmap         $OPTS -N "unmap_wz8k"   -l 8k   -I 1000 -w      -f /dev/zero
-munmap         $OPTS -N "unmap_wz128k" -l 128k -I 8000 -w      -B 100 -f /dev/zero
+
 munmap         $OPTS -N "unmap_wt8k"   -l 8k   -I 1000 -w      -f $TFILE
 munmap         $OPTS -N "unmap_wt128k" -l 128k -I 10000        -w      -f $TFILE
 munmap         $OPTS -N "unmap_wu8k"   -l 8k   -I 1000 -w      -f $VFILE
@@ -678,7 +671,6 @@ munmap              $OPTS -N "unmap_wu128k" -l 128k -I 50000        -w -B 10        -f $VFILE
 munmap         $OPTS -N "unmap_wa8k"   -l 8k   -I 1000 -w      -f MAP_ANON
 munmap         $OPTS -N "unmap_wa128k" -l 128k -I 10000        -w      -f MAP_ANON
 
-
 mprotect       $OPTS -N "mprot_z8k"    -l 8k  -I 300                   -f /dev/zero
 mprotect       $OPTS -N "mprot_z128k"  -l 128k -I 500          -f /dev/zero
 mprotect       $OPTS -N "mprot_wz8k"   -l 8k   -I 500  -w      -f /dev/zero
diff --git a/tools/tests/libMicro/embd_bench.sh b/tools/tests/libMicro/embd_bench.sh
new file mode 100644 (file)
index 0000000..7b61d0f
--- /dev/null
@@ -0,0 +1,815 @@
+#!/bin/sh
+#
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms
+# of the Common Development and Distribution License
+# (the "License").  You may not use this file except
+# in compliance with the License.
+#
+# You can obtain a copy of the license at
+# src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL
+# HEADER in each file and include the License file at
+# usr/src/OPENSOLARIS.LICENSE.  If applicable,
+# add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your
+# own identifying information: Portions Copyright [yyyy]
+# [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+
+# usage function - defines all the options that can be given to this script.
+function usage {
+       echo "Usage"
+       echo "$0 [-l] [-h] [name of test]"
+       echo "-l               : This option runs the lmbench tests along with the default libmicro tests."
+       echo "-h               : Help. This option displays information on how to run the script. "
+       echo "[name of test]   : This option runs only the test that is specified"
+       echo ""
+       echo "Examples"
+       echo "$0               : This is the defualt execution. This will run only the default libmicro tests."
+       echo "$0 -l            : This will run the lmbench tests too "
+       echo "$0 getppid       : This will run only the getppid tests"
+       exit
+       
+}
+
+if [ $# -eq 1 ]
+then 
+       lmbench=2    # to check if only a single test is to be run. e.g, ./bench.sh getppid
+else
+       lmbench=0    # to run the default libMicro tests, without the lmbench tests.
+fi
+
+while getopts "lh" OPT_LIST
+do
+       case $OPT_LIST in 
+               l) lmbench=1;;    # to run the libmicro tests including the lmbench tests.
+               h) usage;;
+               *) usage;;
+       esac
+done
+
+if [ -w / ]; then
+       #do nothing
+       echo "/ is mounted"
+else
+       echo "ERROR: the test requires that the / directory be read/writable, please mount using the command: 'mount -uw /' "
+       exit 1
+fi
+
+
+tattle="./tattle"
+
+bench_version=0.4.0
+libmicro_version=`$tattle -V`
+
+case $libmicro_version in
+$bench_version)
+       ;;
+*)
+       echo "ERROR: libMicro version doesn't match 'bench' script version"
+       exit 1
+esac
+
+TMPROOT=/private/tmp/libmicro.$$
+VARROOT=/private/var/tmp/libmicro.$$
+mkdir -p $TMPROOT
+mkdir -p $VARROOT
+trap "rm -rf $TMPROOT $VARROOT && exit" 0 2
+
+TFILE=$TMPROOT/data
+IFILE=$TMPROOT/ifile
+TDIR1=$TMPROOT/0/1/2/3/4/5/6/7/8/9
+TDIR2=$TMPROOT/1/2/3/4/5/6/7/8/9/0
+VFILE=$VARROOT/data
+VDIR1=$VARROOT/0/1/2/3/4/5/6/7/8/9
+VDIR2=$VARROOT/1/2/3/4/5/6/7/8/9/0
+
+
+OPTS="-E -C 200 -L -S -W"
+
+dd if=/dev/zero of=$TFILE bs=1024k count=10 2>/dev/null
+dd if=/dev/zero of=$VFILE bs=1024k count=10 2>/dev/null
+mkdir -p $TDIR1 $TDIR2
+mkdir -p $VDIR1 $VDIR2
+
+touch $IFILE
+/usr/bin/touch /private/var/tmp/lmbench
+
+
+# produce benchmark header for easier comparisons
+
+hostname=`uname -n`
+
+if [ -f /usr/sbin/psrinfo ]; then
+       p_count=`psrinfo|wc -l`
+       p_mhz=`psrinfo -v | awk '/operates/{print $6 "MHz"; exit }'`
+       p_type=`psrinfo -vp 2>/dev/null | awk '{if (NR == 3) {print $0; exit}}'` 
+       p_ipaddr=`getent hosts $hostname | awk '{print $1}'`
+fi
+
+if [ -f /proc/cpuinfo ]; then
+       p_count=`egrep processor /proc/cpuinfo | wc -l`
+       p_mhz=`awk -F: '/cpu MHz/{printf("%5.0f00Mhz\n",$2/100); exit}' /proc/cpuinfo`
+       p_type=`awk -F: '/model name/{print $2; exit}' /proc/cpuinfo`
+       p_ipaddr=`getent hosts $hostname | awk '{print $1}'`
+else
+## Mac OS X specific stuff
+# first, get ugly output, in case pretty output isn't available
+#
+       p_count=`sysctl -n hw.physicalcpu`
+       p_mhz=`sysctl -n hw.cpufrequency`
+       p_type=`sysctl -n hw.model`
+
+if [ -x /usr/sbin/system_profiler ]; then
+       # <rdar://4655981> requires this hunk of work-around
+       # grep the XML for the characteristic we need. The key appears twice, so grep for the useful key (with 'string')
+       # use sed to strip off the <string></string> and the tabs in front of the string.  So much work for so little result.
+       #
+               p_mhz=`system_profiler -xml -detailLevel mini SPHardwareDataType | \
+                       grep -A1 current_processor_speed | grep string | \
+                       sed -E 's/<string>(.+)<\/string>/\1/' | sed 's- --g'`
+               p_type=`system_profiler -xml -detailLevel mini SPHardwareDataType | \
+                       grep -A1 cpu_type | grep string | \
+                       sed -E 's/<string>(.+)<\/string>/\1/' | sed 's- --g'`
+fi
+
+# look for en0 (usually ethernet) if that isn't there try en1 (usually wireless) else give up
+       p_ipaddr=`ipconfig getpacket en0 | grep yiaddr | tr "= " "\n" | grep [0-9]`
+       if [ ! $p_ipaddr  ]; then
+               p_ipaddr=`ipconfig getpacket en1 | grep yiaddr | tr "= " "\n" | grep [0-9]`
+       elif [ ! $p_ipaddr ]; then
+               p_ipaddr="unknown"
+       fi
+fi
+
+printf "\n\n!Libmicro_#:   %30s\n" $libmicro_version
+printf "!Options:      %30s\n" "$OPTS"
+printf "!Machine_name: %30s\n" "$hostname"
+printf "!OS_name:      %30s\n" `uname -s`
+printf "!OS_release:   %30s\n" `sw_vers -productVersion`
+printf "!OS_build:     %30.18s\n" "`sw_vers -buildVersion`"
+printf "!Processor:    %30s\n" `arch`
+printf "!#CPUs:        %30s\n" $p_count
+printf "!CPU_MHz:      %30s\n" "$p_mhz"
+printf "!CPU_NAME:     %30s\n" "$p_type"
+printf "!IP_address:   %30s\n" "$p_ipaddr"
+printf "!Run_by:       %30s\n" $LOGNAME
+printf "!Date:        %30s\n" "`date '+%D %R'`"
+printf "!Compiler:     %30s\n" `$tattle -c`
+printf "!Compiler Ver.:%30s\n" "`$tattle -v`"
+printf "!sizeof(long): %30s\n" `$tattle -s`
+printf "!extra_CFLAGS: %30s\n" "`$tattle -f`"
+printf "!TimerRes:     %30s\n\n\n" "`$tattle -r`"
+bin_dir="$TMPROOT/bin"
+
+mkdir -p $bin_dir
+cp bin-*/exec_bin $bin_dir/$A
+
+cp ./apple/bin-*/posix_spawn_bin $bin_dir/$A
+
+newline=0
+
+#
+# Everything below the while loop is input for the while loop
+# if you have any tests which can't run in the while loop, put
+# them above this comment
+#
+while read A B
+do
+       # $A contains the command, $B contains the arguments
+       # we echo blank lines and comments
+       # we skip anything which fails to match *$1* (useful
+       # if we only want to test one case, but a nasty hack)
+
+       case $A in
+       \#*)
+               echo "$A $B"
+               newline=1
+               continue
+               ;;
+       
+       "")
+               if [ $newline -eq 1 ]
+               then
+                       newline=0
+                       echo
+                       echo
+               fi
+
+               continue
+               ;;
+
+       *$1*)
+               # Default execution without the lmbench tests. 
+               # checks if there is no argument passed by the user.
+               if [  $lmbench -eq 0 ]
+               then
+                       string=lmbench
+                       if [ "${A:0:7}" == "$string" ]
+                       then
+                               continue
+                       fi
+               fi
+                       
+               ;;
+       
+       *)              
+               if [ $lmbench -ne 1 ]
+               then
+                       continue
+               fi
+               ;;
+       esac
+
+       if [ ! -f $bin_dir/$A ]
+       then
+               cp bin-*/$A $bin_dir/$A
+       fi
+
+       echo
+
+       (cd $TMPROOT && eval "bin/$A $B")
+
+       echo
+       echo
+done <<.
+
+#
+# Obligatory null system call: use very short time
+# for default since SuSe implements this "syscall" in userland
+#
+
+getpid         $OPTS -N "getpid" -I 5
+getppid                $OPTS -N "getppid" -I 5
+
+getenv         $OPTS -N "getenv"       -s 100 -I 100    
+getenv         $OPTS -N "getenvT2"     -s 100 -I 100   -T 2 
+
+gettimeofday   $OPTS -N "gettimeofday"          
+
+log            $OPTS -N "log"  -I 20   -B 300000        
+exp            $OPTS -N "exp"  -I 20   -B 100000        
+lrand48                $OPTS -N "lrand48"
+
+memset         $OPTS -N "memset_10"    -s 10   -I 10 
+memset         $OPTS -N "memset_256"   -s 256  -I 20
+memset         $OPTS -N "memset_256_u" -s 256   -a 1 -I 20 
+memset         $OPTS -N "memset_1k"    -s 1k    -I 100 -B 2000
+memset         $OPTS -N "memset_4k"    -s 4k    -I 250 -B 500
+memset         $OPTS -N "memset_4k_uc" -s 4k    -u -I 400
+
+memset         $OPTS -N "memset_10k"   -s 10k  -I 600 -B 500
+memset         $OPTS -N "memset_1m"    -s 1m   -I 200000
+memset         $OPTS -N "memset_10m"   -s 10m -I 2000000 
+memset         $OPTS -N "memsetP2_10m" -s 10m -P 2 -I 2000000 
+
+memrand                $OPTS -N "memrand"      -s 40m -B 10000
+
+# This is an elided test and is not ported yet.
+# Check Makefile.darwin for list of elided tests  
+# cachetocache $OPTS -N "cachetocache" -s 100k -T 2 -I 200
+
+isatty         $OPTS -N "isatty_yes"   
+isatty         $OPTS -N "isatty_no"  -f $IFILE
+
+malloc         $OPTS -N "malloc_10"    -s 10    -g 10 -I 50
+malloc         $OPTS -N "malloc_100"   -s 100   -g 10 -I 50
+malloc         $OPTS -N "malloc_1k"    -s 1k    -g 10 -I 50
+malloc         $OPTS -N "malloc_10k"   -s 10k   -g 10 -I 50
+malloc         $OPTS -N "malloc_100k"  -s 100k  -g 10 -I 2000
+
+malloc         $OPTS -N "mallocT2_10"    -s 10   -g 10 -T 2 -I 200
+malloc         $OPTS -N "mallocT2_100"   -s 100  -g 10 -T 2 -I 200
+malloc         $OPTS -N "mallocT2_1k"    -s 1k   -g 10 -T 2 -I 200
+malloc         $OPTS -N "mallocT2_10k"   -s 10k  -g 10 -T 2 -I 200
+malloc         $OPTS -N "mallocT2_100k"  -s 100k -g 10 -T 2 -I 10000
+
+close          $OPTS -N "close_bad"            -B 96           -b
+close          $OPTS -N "close_tmp"            -B 64           -f $TFILE
+close          $OPTS -N "close_usr"            -B 64           -f $VFILE
+close          $OPTS -N "close_zero"           -B 64           -f /dev/zero
+close_tcp      $OPTS -N "close_tcp"            -B 32  
+
+memcpy         $OPTS -N "memcpy_10"    -s 10   -I 10 
+memcpy         $OPTS -N "memcpy_1k"    -s 1k   -I 50
+memcpy         $OPTS -N "memcpy_10k"   -s 10k  -I 800
+memcpy         $OPTS -N "memcpy_1m"    -s 1m   -I 500000
+memcpy         $OPTS -N "memcpy_10m"   -s 10m  -I 5000000
+
+strcpy         $OPTS -N "strcpy_10"    -s 10   -I 5 
+strcpy         $OPTS -N "strcpy_1k"    -s 1k   -I 100
+
+strlen         $OPTS -N "strlen_10"    -s 10   -I 5
+strlen         $OPTS -N "strlen_1k"    -s 1k   -I 100
+
+strchr         $OPTS -N "strchr_10"    -s 10   -I 5
+strchr         $OPTS -N "strchr_1k"    -s 1k   -I 200
+strcmp         $OPTS -N "strcmp_10"    -s 10   -I 10
+strcmp         $OPTS -N "strcmp_1k"    -s 1k   -I 200
+
+strcasecmp     $OPTS -N "scasecmp_10"  -s 10 -I 50 -B 2000
+strcasecmp     $OPTS -N "scasecmp_1k"  -s 1k -I 20000 -B 100
+
+strtol         $OPTS -N "strtol"      -I 20      
+
+# This is an elided test and is not ported yet.     
+# Check Makefile.darwin for list of elided tests
+# getcontext   $OPTS -N "getcontext"  -I 100
+
+# This is an elided test and is not ported yet.     
+# Check Makefile.darwin for list of elided tests
+# setcontext   $OPTS -N "setcontext"  -I 100
+
+mutex          $OPTS -N "mutex_st"     -I 10
+mutex          $OPTS -N "mutex_mt"     -t -I 10        
+mutex          $OPTS -N "mutex_T2"     -T 2  -I 100
+
+longjmp                $OPTS -N "longjmp"      -I 10
+siglongjmp     $OPTS -N "siglongjmp"   -I 20
+
+getrusage      $OPTS -N "getrusage"    -I 200
+
+times          $OPTS -N "times"        -I 200
+time           $OPTS -N "time"         -I 50
+localtime_r    $OPTS -N "localtime_r"  -I 200  
+strftime       $OPTS -N "strftime" -I 10000 -B 100 
+
+mktime         $OPTS -N "mktime"       -I 500   
+mktime         $OPTS -N "mktimeT2" -T 2 -I 1000 
+
+cascade_mutex  $OPTS -N "c_mutex_1"    -I 50
+cascade_mutex  $OPTS -N "c_mutex_10"   -T 10 -I 5000
+cascade_mutex  $OPTS -N "c_mutex_200"  -T 200  -I 2000000
+
+cascade_cond   $OPTS -N "c_cond_1"     -I 100
+cascade_cond   $OPTS -N "c_cond_10"    -T 10   -I 3000
+cascade_cond   $OPTS -N "c_cond_200"   -T 200  -I 2000000
+
+cascade_lockf  $OPTS -N "c_lockf_1"    -I 1000 
+cascade_lockf  $OPTS -N "c_lockf_10"   -P 10 -I 50000
+#cascade_lockf $OPTS -N "c_lockf_200"  -P 200 -I 5000000
+
+
+
+cascade_flock  $OPTS -N "c_flock"      -I 1000 
+cascade_flock  $OPTS -N "c_flock_10"   -P 10   -I 50000
+#cascade_flock $OPTS -N "c_flock_200"  -P 200  -I 5000000
+
+
+
+cascade_fcntl  $OPTS -N "c_fcntl_1"    -I 2000         
+cascade_fcntl  $OPTS -N "c_fcntl_10"   -P 10 -I 20000
+#cascade_fcntl $OPTS -N "c_fcntl_200"  -P 200  -I 5000000
+
+
+file_lock      $OPTS -N "file_lock"   -I 1000         
+
+getsockname    $OPTS -N "getsockname"  -I 100
+getpeername    $OPTS -N "getpeername"  -I 100
+
+chdir          $OPTS -N "chdir_tmp"    -I 2000         $TDIR1 $TDIR2
+chdir          $OPTS -N "chdir_usr"    -I 2000         $VDIR1 $VDIR2
+
+chdir          $OPTS -N "chgetwd_tmp"  -I 3000 -g $TDIR1 $TDIR2
+chdir          $OPTS -N "chgetwd_usr"  -I 3000 -g $VDIR1 $VDIR2
+
+realpath       $OPTS -N "realpath_tmp" -I 3000         -f $TDIR1
+realpath       $OPTS -N "realpath_usr" -I 3000 -f $VDIR1
+
+stat           $OPTS -N "stat_tmp" -I 1000             -f $TFILE
+stat           $OPTS -N "stat_usr" -I 1000             -f $VFILE
+
+lmbench_stat           $OPTS -N "lmbench_stat_tmp" -I 1000             -f $TFILE
+lmbench_stat           $OPTS -N "lmbench_stat_usr" -I 10000 -B 100             -f /private/var/tmp/lmbench
+
+#
+# lmbench uses a touched empty file in /private/var/tmp
+# libMicro uses a 1M file in a directory off /private/var/tmp
+# performance difference is ~ 0.2 usecs/call
+#
+# why? - walking the dir tree, empty file vs. non-empty file, non-empty dir
+# in the case of libMicro, etc., etc.
+#
+
+lmbench_stat           $OPTS -N "lmbench_stat_usr - Default" -I 10000 -B 100   -f /private/var/tmp/lmbench
+
+lmbench_fstat          $OPTS -N "lmbench_fstat_tmp" -I 1000            -f $TFILE
+lmbench_fstat          $OPTS -N "lmbench_fstat_usr" -I 10000 -B 100            -f /private/var/tmp/lmbench
+
+# see stat test to understand why we are using /private/var/tmp/lmbench
+
+lmbench_fstat          $OPTS -N "lmbench_fstat_usr - Default" -I 10000 -B 100  -f /private/var/tmp/lmbench
+
+lmbench_openclose      $OPTS -N "lmbench_openclose - Default" -I 10000 -B 100  -f /private/var/tmp/lmbench
+
+lmbench_select_file $OPTS -N "lmbench_select_file_10"  -n 10  -B 100
+lmbench_select_file $OPTS -N "lmbench_select_file_100" -n 100 -B 100
+lmbench_select_file $OPTS -N "lmbench_select_file_250" -n 250 -B 100
+lmbench_select_file $OPTS -N "lmbench_select_file_500" -n 500 -B 100
+
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_10"  -n 10  -B 100
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_100" -n 100 -B 100
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_250" -n 250 -B 100
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_500" -n 500 -B 100
+
+fcntl          $OPTS -N "fcntl_tmp"    -I 100  -f $TFILE
+fcntl          $OPTS -N "fcntl_usr"    -I 100  -f $VFILE
+fcntl_ndelay   $OPTS -N "fcntl_ndelay" -I 100  
+
+lseek          $OPTS -N "lseek_t8k"    -s 8k   -I 50   -f $TFILE
+lseek          $OPTS -N "lseek_u8k"    -s 8k   -I 50   -f $VFILE
+
+open           $OPTS -N "open_tmp"             -B 256          -f $TFILE
+open           $OPTS -N "open_usr"             -B 256          -f $VFILE
+open           $OPTS -N "open_zero"            -B 256          -f /dev/zero
+
+dup            $OPTS -N "dup"                  -B 512   
+
+socket         $OPTS -N "socket_u"             -B 256
+socket         $OPTS -N "socket_i"             -B 256          -f PF_INET
+
+socketpair     $OPTS -N "socketpair"           -B 256
+
+setsockopt     $OPTS -N "setsockopt"           -I 200
+
+bind           $OPTS -N "bind"                 -B 100
+
+listen         $OPTS -N "listen"               -B 100
+
+#connection    $OPTS -N "connection"           -B 256 
+
+poll           $OPTS -N "poll_10"      -n 10   -I 500
+poll           $OPTS -N "poll_100"     -n 100  -I 1000
+poll           $OPTS -N "poll_1000"    -n 1000 -I 5000
+
+poll           $OPTS -N "poll_w10"     -n 10   -I 500          -w 1
+poll           $OPTS -N "poll_w100"    -n 100  -I 2000         -w 10
+poll           $OPTS -N "poll_w1000"   -n 1000 -I 40000        -w 100
+
+select         $OPTS -N "select_10"    -n 10   -I 500
+select         $OPTS -N "select_100"   -n 100  -I 1000
+select         $OPTS -N "select_1000"  -n 1000 -I 5000
+
+select         $OPTS -N "select_w10"   -n 10   -I 500          -w 1
+select         $OPTS -N "select_w100"  -n 100  -I 2000         -w 10
+select         $OPTS -N "select_w1000" -n 1000 -I 40000        -w 100
+
+semop          $OPTS -N "semop" -I 200
+
+sigaction      $OPTS -N "sigaction" -I 100
+signal         $OPTS -N "signal" -I 1000
+sigprocmask    $OPTS -N "sigprocmask" -I 200
+
+lmbench_lat_sig_install        $OPTS -N "lmbench_siginstall"
+# sigcatch and sigsend need to be evaluated together
+# lmbench framework will allow multiple measurements within the same
+# benchmark test which allow them to factor out the cost of sending
+# a signal from catching one
+#
+# for our purposes sigcatch results - sigsend results yield
+# lmbench sig handler overhead measurements
+lmbench_lat_sig_catch  $OPTS -N "lmbench_sigcatch" 
+lmbench_lat_sig_send   $OPTS -N "lmbench_sigsend" 
+
+
+pthread_create  $OPTS -N "pthread_8"           -B 8
+pthread_create  $OPTS -N "pthread_32"          -B 32
+pthread_create  $OPTS -N "pthread_128"         -B 128
+pthread_create  $OPTS -N "pthread_512"         -B 512
+
+fork           $OPTS -N "fork_10"              -B 10
+#fork          $OPTS -N "fork_100"             -B 100  -C 100
+
+#fork          $OPTS -N "fork_1000"            -B 1000 -C 50
+
+exit           $OPTS -N "exit_10"              -B 10
+##exit         $OPTS -N "exit_100"             -B 100
+
+#exit          $OPTS -N "exit_1000"            -B 1000 -C 50
+
+exit           $OPTS -N "exit_10_nolibc"       -e -B 10
+
+exec           $OPTS -N "exec" -B 10
+
+posix_spawn    $OPTS -N "posix_spawn" -B 10
+
+system         $OPTS -N "system" -I 1000000
+
+recurse                $OPTS -N "recurse"              -B 512
+
+read           $OPTS -N "read_t1k"     -s 1k -B 50                     -f $TFILE
+read           $OPTS -N "read_t10k"    -s 10k  -B 16           -f $TFILE
+read           $OPTS -N "read_t100k"   -s 100k -B 4            -f $TFILE
+
+read           $OPTS -N "read_u1k"     -s 1k   -B 50           -f $VFILE
+read           $OPTS -N "read_u10k"    -s 10k  -B 16           -f $VFILE
+read           $OPTS -N "read_u100k"   -s 100k -B 4            -f $VFILE
+
+read           $OPTS -N "read_z1k"     -s 1k   -B 100          -f /dev/zero 
+read           $OPTS -N "read_z10k"    -s 10k  -B 30           -f /dev/zero 
+read           $OPTS -N "read_z100k"   -s 100k -B 4            -f /dev/zero 
+read           $OPTS -N "read_zw100k"  -s 100k -B 4         -w -f /dev/zero 
+
+lmbench_read           $OPTS -N "read_t1b"     -s 1 -B 50                      -f $TFILE
+lmbench_read           $OPTS -N "read_t1k"     -s 1k -B 50                     -f $TFILE
+lmbench_read           $OPTS -N "read_t10k"    -s 10k  -B 16           -f $TFILE
+lmbench_read           $OPTS -N "read_t100k"   -s 100k -B 4            -f $TFILE
+
+lmbench_read           $OPTS -N "read_u1b"     -s 1    -B 50           -f $VFILE
+lmbench_read           $OPTS -N "read_u1k"     -s 1k   -B 50           -f $VFILE
+lmbench_read           $OPTS -N "read_u10k"    -s 10k  -B 16           -f $VFILE
+lmbench_read           $OPTS -N "read_u100k"   -s 100k -B 4            -f $VFILE
+
+lmbench_read           $OPTS -N "read_z1b - Default"   -s 1    -B 100          -f /dev/zero 
+lmbench_read           $OPTS -N "read_z1k"     -s 1k   -B 100          -f /dev/zero 
+lmbench_read           $OPTS -N "read_z10k"    -s 10k  -B 30           -f /dev/zero 
+lmbench_read           $OPTS -N "read_z100k"   -s 100k -B 4            -f /dev/zero 
+lmbench_read           $OPTS -N "read_zw100k"  -s 100k -B 4         -w -f /dev/zero 
+
+write          $OPTS -N "write_t1k"    -s 1k   -B 50           -f $TFILE
+write          $OPTS -N "write_t10k"   -s 10k  -B 25           -f $TFILE
+write          $OPTS -N "write_t100k"  -s 100k -B 4            -f $TFILE
+
+write          $OPTS -N "write_u1k"    -s 1k   -B 50           -f $VFILE
+write          $OPTS -N "write_u10k"   -s 10k  -B 25           -f $VFILE
+write          $OPTS -N "write_u100k"  -s 100k -B 4            -f $VFILE
+
+write          $OPTS -N "write_n1k"    -s 1k   -I 100 -B 0     -f /dev/null 
+write          $OPTS -N "write_n10k"   -s 10k  -I 100 -B 0     -f /dev/null 
+write          $OPTS -N "write_n100k"  -s 100k -I 100 -B 0     -f /dev/null 
+
+lmbench_write          $OPTS -N "lmbench_write_t1b"    -s 1    -B 50           -f $TFILE
+lmbench_write          $OPTS -N "lmbench_write_t1k"    -s 1k   -B 50           -f $TFILE
+lmbench_write          $OPTS -N "lmbench_write_t10k"   -s 10k  -B 25           -f $TFILE
+lmbench_write          $OPTS -N "lmbench_write_t100k"  -s 100k -B 4            -f $TFILE
+
+lmbench_write          $OPTS -N "lmbench_write_u1b"    -s 1    -B 50           -f $VFILE
+lmbench_write          $OPTS -N "lmbench_write_u1k"    -s 1k   -B 50           -f $VFILE
+lmbench_write          $OPTS -N "lmbench_write_u10k"   -s 10k  -B 25           -f $VFILE
+lmbench_write          $OPTS -N "lmbench_write_u100k"  -s 100k -B 4            -f $VFILE
+
+lmbench_write          $OPTS -N "lmbench_write_n1b - Default"  -s 1    -I 100 -B 0     -f /dev/null 
+lmbench_write          $OPTS -N "lmbench_write_n1k"    -s 1k   -I 100 -B 0     -f /dev/null 
+lmbench_write          $OPTS -N "lmbench_write_n10k"   -s 10k  -I 100 -B 0     -f /dev/null 
+lmbench_write          $OPTS -N "lmbench_write_n100k"  -s 100k -I 100 -B 0     -f /dev/null 
+
+writev         $OPTS -N "writev_t1k"   -s 1k   -B 20           -f $TFILE
+writev         $OPTS -N "writev_t10k"  -s 10k  -B 4            -f $TFILE
+writev         $OPTS -N "writev_t100k" -s 100k                 -f $TFILE
+
+writev         $OPTS -N "writev_u1k"   -s 1k   -B 20           -f $VFILE
+writev         $OPTS -N "writev_u10k"  -s 10k  -B 4            -f $VFILE
+writev         $OPTS -N "writev_u100k" -s 100k                 -f $VFILE
+
+writev         $OPTS -N "writev_n1k"   -s 1k   -I 100 -B 0     -f /dev/null 
+writev         $OPTS -N "writev_n10k"  -s 10k  -I 100 -B 0     -f /dev/null 
+writev         $OPTS -N "writev_n100k" -s 100k -I 100 -B 0     -f /dev/null 
+
+pread          $OPTS -N "pread_t1k"    -s 1k   -I 300          -f $TFILE
+pread          $OPTS -N "pread_t10k"   -s 10k  -I 1000         -f $TFILE
+pread          $OPTS -N "pread_t100k"  -s 100k -I 10000        -f $TFILE
+
+pread          $OPTS -N "pread_u1k"    -s 1k   -I 300          -f $VFILE
+pread          $OPTS -N "pread_u10k"   -s 10k  -I 1000         -f $VFILE
+pread          $OPTS -N "pread_u100k"  -s 100k -I 10000        -f $VFILE
+
+pread          $OPTS -N "pread_z1k"    -s 1k   -I 300          -f /dev/zero 
+pread          $OPTS -N "pread_z10k"   -s 10k  -I 1000         -f /dev/zero 
+pread          $OPTS -N "pread_z100k"  -s 100k -I 2000 -f /dev/zero 
+pread          $OPTS -N "pread_zw100k" -s 100k -w -I 10000     -f /dev/zero 
+
+pwrite         $OPTS -N "pwrite_t1k"   -s 1k   -I 500          -f $TFILE
+pwrite         $OPTS -N "pwrite_t10k"  -s 10k  -I 1000         -f $TFILE
+pwrite         $OPTS -N "pwrite_t100k" -s 100k -I 10000        -f $TFILE
+
+pwrite         $OPTS -N "pwrite_u1k"   -s 1k   -I 500          -f $VFILE
+pwrite         $OPTS -N "pwrite_u10k"  -s 10k  -I 1000         -f $VFILE
+pwrite         $OPTS -N "pwrite_u100k" -s 100k -I 20000        -f $VFILE
+
+pwrite         $OPTS -N "pwrite_n1k"   -s 1k   -I 100          -f /dev/null 
+pwrite         $OPTS -N "pwrite_n10k"  -s 10k  -I 100          -f /dev/null 
+pwrite         $OPTS -N "pwrite_n100k" -s 100k -I 100          -f /dev/null 
+
+
+mmap           $OPTS -N "mmap_t8k"     -l 8k   -I 1000         -f $TFILE
+mmap           $OPTS -N "mmap_t128k"   -l 128k -I 1000         -f $TFILE
+mmap           $OPTS -N "mmap_u8k"     -l 8k   -I 1000         -f $VFILE
+mmap           $OPTS -N "mmap_u128k"   -l 128k -I 1000         -f $VFILE
+mmap           $OPTS -N "mmap_a8k"     -l 8k   -I 200          -f MAP_ANON
+mmap           $OPTS -N "mmap_a128k"   -l 128k -I 200          -f MAP_ANON
+
+
+
+mmap           $OPTS -N "mmap_rt8k"    -l 8k   -I 2000 -r      -f $TFILE
+mmap           $OPTS -N "mmap_rt128k"  -l 128k -I 20000 -r     -f $TFILE
+mmap           $OPTS -N "mmap_ru8k"    -l 8k   -I 2000 -r      -f $VFILE
+mmap           $OPTS -N "mmap_ru128k"  -l 128k -I 20000 -r     -f $VFILE
+mmap           $OPTS -N "mmap_ra8k"    -l 8k   -I 2000 -r      -f MAP_ANON
+mmap           $OPTS -N "mmap_ra128k"  -l 128k -I 20000 -r     -f MAP_ANON
+
+
+mmap           $OPTS -N "mmap_wt8k"    -l 8k   -I 5000 -w      -f $TFILE
+mmap           $OPTS -N "mmap_wt128k"  -l 128k -I 50000 -w     -f $TFILE
+mmap           $OPTS -N "mmap_wu8k"    -l 8k   -I 5000 -w      -f $VFILE
+mmap           $OPTS -N "mmap_wu128k"  -l 128k -I 500000 -w    -f $VFILE
+mmap           $OPTS -N "mmap_wa8k"    -l 8k   -I 3000 -w      -f MAP_ANON
+mmap           $OPTS -N "mmap_wa128k"  -l 128k -I 50000 -w     -f MAP_ANON
+
+
+munmap         $OPTS -N "unmap_t8k"    -l 8k   -I 500          -f $TFILE
+munmap         $OPTS -N "unmap_t128k"  -l 128k -I 500          -f $TFILE
+munmap         $OPTS -N "unmap_u8k"    -l 8k   -I 500          -f $VFILE
+munmap         $OPTS -N "unmap_u128k"  -l 128k -I 500          -f $VFILE
+munmap         $OPTS -N "unmap_a8k"    -l 8k   -I 500          -f MAP_ANON
+munmap         $OPTS -N "unmap_a128k"  -l 128k -I 500          -f MAP_ANON
+
+
+munmap         $OPTS -N "unmap_rt8k"   -l 8k   -I 1000 -r      -f $TFILE
+munmap         $OPTS -N "unmap_rt128k" -l 128k -I 3000 -r      -f $TFILE
+munmap         $OPTS -N "unmap_ru8k"   -l 8k   -I 1000 -r      -f $VFILE
+munmap         $OPTS -N "unmap_ru128k" -l 128k -I 3000 -r      -f $VFILE
+munmap         $OPTS -N "unmap_ra8k"   -l 8k   -I 1000 -r      -f MAP_ANON
+munmap         $OPTS -N "unmap_ra128k" -l 128k -I 2000 -r      -f MAP_ANON
+
+connection     $OPTS -N "conn_connect"         -B 256  -c
+
+
+munmap         $OPTS -N "unmap_wt8k"   -l 8k   -I 1000 -w      -f $TFILE
+munmap         $OPTS -N "unmap_wt128k" -l 128k -I 10000        -w      -f $TFILE
+munmap         $OPTS -N "unmap_wu8k"   -l 8k   -I 1000 -w      -f $VFILE
+munmap         $OPTS -N "unmap_wu128k" -l 128k -I 50000        -w -B 10        -f $VFILE
+munmap         $OPTS -N "unmap_wa8k"   -l 8k   -I 1000 -w      -f MAP_ANON
+munmap         $OPTS -N "unmap_wa128k" -l 128k -I 10000        -w      -f MAP_ANON
+
+mprotect       $OPTS -N "mprot_z8k"    -l 8k  -I 300                   -f /dev/zero
+mprotect       $OPTS -N "mprot_z128k"  -l 128k -I 500          -f /dev/zero
+mprotect       $OPTS -N "mprot_wz8k"   -l 8k   -I 500  -w      -f /dev/zero
+mprotect       $OPTS -N "mprot_wz128k" -l 128k -I 1000 -w      -f /dev/zero
+mprotect       $OPTS -N "mprot_twz8k"  -l 8k   -I 1000 -w -t   -f /dev/zero
+mprotect       $OPTS -N "mprot_tw128k" -l 128k -I 2000 -w -t   -f /dev/zero
+mprotect       $OPTS -N "mprot_tw4m"   -l 4m   -w -t -B 1  -f /dev/zero
+
+pipe           $OPTS -N "pipe_pst1"    -s 1    -I 1000 -x pipe -m st
+pipe           $OPTS -N "pipe_pmt1"    -s 1    -I 8000 -x pipe -m mt
+pipe           $OPTS -N "pipe_pmp1"    -s 1    -I 8000 -x pipe -m mp
+pipe           $OPTS -N "pipe_pst4k"   -s 4k   -I 1000 -x pipe -m st
+pipe           $OPTS -N "pipe_pmt4k"   -s 4k   -I 8000 -x pipe -m mt
+pipe           $OPTS -N "pipe_pmp4k"   -s 4k   -I 8000 -x pipe -m mp
+
+pipe           $OPTS -N "pipe_sst1"    -s 1    -I 1000 -x sock -m st
+pipe           $OPTS -N "pipe_smt1"    -s 1    -I 8000 -x sock -m mt
+pipe           $OPTS -N "pipe_smp1"    -s 1    -I 8000 -x sock -m mp
+pipe           $OPTS -N "pipe_sst4k"   -s 4k   -I 1000 -x sock -m st
+pipe           $OPTS -N "pipe_smt4k"   -s 4k   -I 8000 -x sock -m mt
+pipe           $OPTS -N "pipe_smp4k"   -s 4k   -I 8000 -x sock -m mp
+
+pipe           $OPTS -N "pipe_tst1"    -s 1    -I 1000 -x tcp  -m st
+pipe           $OPTS -N "pipe_tmt1"    -s 1    -I 8000 -x tcp  -m mt
+pipe           $OPTS -N "pipe_tmp1"    -s 1    -I 8000 -x tcp  -m mp
+pipe           $OPTS -N "pipe_tst4k"   -s 4k   -I 1000 -x tcp  -m st
+pipe           $OPTS -N "pipe_tmt4k"   -s 4k   -I 8000 -x tcp  -m mt
+pipe           $OPTS -N "pipe_tmp4k"   -s 4k   -I 8000 -x tcp  -m mp
+
+#connection    $OPTS -N "conn_accept"          -B 256      -a
+
+lmbench_bw_unix -B 11 -L -W
+
+lmbench_bw_mem $OPTS -N lmbench_bcopy_512 -s 512 -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_1k -s 1k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_2k -s 2k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_4k -s 4k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_8k -s 8k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_16k -s 16k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_32k -s 32k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_64k -s 64k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_128k -s 128k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_256k -s 256k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_512k -s 512k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_1m -s 1m -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bzero_512 -s 512 -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_1k -s 1k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_2k -s 2k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_4k -s 4k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_8k -s 8k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_16k -s 16k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_32k -s 32k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_64k -s 64k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_128k -s 128k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_256k -s 256k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_512k -s 512k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_1m -s 1m -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_512 -s 512 -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_1k -s 1k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_2k -s 2k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_4k -s 4k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_8k -s 8k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_16k -s 16k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_32k -s 32k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_64k -s 64k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_128k -s 128k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_256k -s 256k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_512k -s 512k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_1m -s 1m -x fcp
+lmbench_bw_mem $OPTS -N lmbench_cp_512 -s 512 -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_1k -s 1k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_2k -s 2k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_4k -s 4k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_8k -s 8k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_16k -s 16k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_32k -s 32k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_64k -s 64k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_128k -s 128k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_256k -s 256k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_512k -s 512k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_1m -s 1m -x cp
+lmbench_bw_mem $OPTS -N lmbench_frd_512 -s 512 -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_1k -s 1k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_2k -s 2k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_4k -s 4k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_8k -s 8k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_16k -s 16k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_32k -s 32k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_64k -s 64k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_128k -s 128k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_256k -s 256k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_512k -s 512k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_1m -s 1m -x frd
+lmbench_bw_mem $OPTS -N lmbench_rd_512 -s 512 -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_1k -s 1k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_2k -s 2k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_4k -s 4k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_8k -s 8k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_16k -s 16k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_32k -s 32k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_64k -s 64k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_128k -s 128k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_256k -s 256k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_512k -s 512k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_1m -s 1m -x rd
+lmbench_bw_mem $OPTS -N lmbench_fwr_512 -s 512 -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_1k -s 1k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_2k -s 2k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_4k -s 4k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_8k -s 8k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_16k -s 16k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_32k -s 32k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_64k -s 64k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_128k -s 128k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_256k -s 256k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_512k -s 512k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_1m -s 1m -x fwr
+lmbench_bw_mem $OPTS -N lmbench_wr_512 -s 512 -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_1k -s 1k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_2k -s 2k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_4k -s 4k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_8k -s 8k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_16k -s 16k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_32k -s 32k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_64k -s 64k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_128k -s 128k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_256k -s 256k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_512k -s 512k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_1m -s 1m -x wr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_512 -s 512 -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_1k -s 1k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_2k -s 2k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_4k -s 4k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_8k -s 8k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_16k -s 16k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_32k -s 32k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_64k -s 64k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_128k -s 128k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_256k -s 256k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_512k -s 512k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_1m -s 1m -x rdwr
+
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_512 -s 512 -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_1k -s 1k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_2k -s 2k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_4k -s 4k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_8k -s 8k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_16k -s 16k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_32k -s 32k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_64k -s 64k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_128k -s 128k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_256k -s 256k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_512k -s 512k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_1m -s 1m -f $TFILE
+
+.
index b85bb89119cad41a835a01a02db64a04a5952149..c6b1e6f48a16782821867cf0850a0c19fb2e7042 100644 (file)
@@ -42,7 +42,7 @@ int getdirentries_test( void * the_argp )
        char *                          my_pathp = NULL;
        char *                          my_bufp = NULL;
        char *                          my_file_namep;
-       unsigned long           my_base;
+       long                            my_base;
        unsigned long           my_count;
        unsigned long           my_new_state;
        fsobj_id_t                      my_obj_id;
index 792e78f00f6e2a4032cc3320f107ade243a28c11..37e1ae62152fff4a24c12675a0d54d897413c535 100644 (file)
@@ -156,7 +156,7 @@ int commpage_data_tests( void * the_argp )
 #endif /* __i386__ || __x86_64__ */
         
        /* These fields are not implemented for all architectures */
-#ifdef _COMM_PAGE_SCHED_GEN
+#if defined(_COMM_PAGE_SCHED_GEN) && !TARGET_OS_EMBEDDED
        uint32_t preempt_count1, preempt_count2;
        uint64_t count;
 
@@ -189,8 +189,11 @@ int commpage_data_tests( void * the_argp )
                goto fail;
        }
 
+       /* We shouldn't be supporting userspace processor_start/processor_exit on embedded */
+#if !TARGET_OS_EMBEDDED
        ret = active_cpu_test();
        if (ret) goto fail;
+#endif /* !TARGET_OS_EMBEDDED */
 #endif /* _COMM_PAGE_ACTIVE_CPUS */
 
 #ifdef _COMM_PAGE_PHYSICAL_CPUS
@@ -289,8 +292,8 @@ int active_cpu_test(void)
        processor_t             *processor_list;                
        host_name_port_t        host;
        struct processor_basic_info     processor_basic_info;
-       int                     cpu_count;
-       int                     data_count;
+       mach_msg_type_number_t  cpu_count;
+       mach_msg_type_number_t  data_count;
        int                     i;
 
 
diff --git a/tools/tests/xnu_quick_test/content_protection_test.c b/tools/tests/xnu_quick_test/content_protection_test.c
new file mode 100644 (file)
index 0000000..9f2cceb
--- /dev/null
@@ -0,0 +1,922 @@
+#include "tests.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+
+#include <IOKit/IOKitLib.h>
+#include <Kernel/IOKit/crypto/AppleKeyStoreDefs.h>
+#include <Kernel/sys/content_protection.h>
+
+/* Note that this test (due to the need to lock/unlock the device on demand, and the
+   need to manipulate the passcode) has the unfortunate effect of link xnu_quick_test
+   to the IOKit Framework. */
+
+/* TODO: Change the test to use a single cleanup label. */
+
+#define CPT_IO_SIZE      4096
+#define CPT_AKS_BUF_SIZE 256
+#define CPT_MAX_PASS_LEN 64
+
+#define GET_PROT_CLASS(fd)             fcntl((fd), F_GETPROTECTIONCLASS)
+#define SET_PROT_CLASS(fd, prot_class) fcntl((fd), F_SETPROTECTIONCLASS, (prot_class))
+
+#define PRINT_LOCK_FAIL   printf("%s, line %d: failed to lock the device.\n", cpt_fail_header, __LINE__);
+#define PRINT_UNLOCK_FAIL printf("%s, line %d: failed to unlock the device.\n", cpt_fail_header, __LINE__);
+
+extern char g_target_path[PATH_MAX];
+
+char * cpt_fail_header = "Content protection test failed";
+char * keystorectl_path = "/usr/local/bin/keystorectl";
+
+/* Shamelessly ripped from keystorectl routines; a wrapper for invoking the AKS user client. */
+int apple_key_store(uint32_t command,
+                    uint64_t * inputs,
+                    uint32_t input_count,
+                    void * input_structs,
+                    size_t input_struct_count,
+                    uint64_t * outputs,
+                    uint32_t * output_count)
+{
+       int result = -1;
+       io_connect_t connection = IO_OBJECT_NULL;
+       io_registry_entry_t apple_key_bag_service = IO_OBJECT_NULL;
+       kern_return_t k_result = KERN_FAILURE;
+       IOReturn io_result = IO_OBJECT_NULL;
+
+       apple_key_bag_service = IOServiceGetMatchingService(kIOMasterPortDefault, IOServiceMatching(kAppleKeyStoreServiceName));
+
+       if (apple_key_bag_service == IO_OBJECT_NULL)
+       {
+               printf("FAILURE: failed to match kAppleKeyStoreServiceName.\n");
+               goto end;
+       }
+
+       k_result = IOServiceOpen(apple_key_bag_service, mach_task_self(), 0, &connection);
+
+       if (k_result != KERN_SUCCESS)
+       {
+               printf("FAILURE: failed to open AppleKeyStore.\n");
+               goto end;
+       }
+
+       k_result = IOConnectCallMethod(connection, kAppleKeyStoreUserClientOpen, NULL, 0, NULL, 0, NULL, NULL, NULL, NULL);
+
+       if (k_result != KERN_SUCCESS)
+       {
+               printf("FAILURE: call to AppleKeyStore method kAppleKeyStoreUserClientOpen failed.\n");
+               goto close;
+       }
+
+       io_result = IOConnectCallMethod(connection, command, inputs, input_count, input_structs, input_struct_count, outputs, output_count, NULL, NULL);
+
+       if (io_result != kIOReturnSuccess)
+       {
+               printf("FAILURE: call to AppleKeyStore method %d failed.\n", command);
+               goto close;
+       }
+
+       result = 0;
+
+close:
+       IOServiceClose(apple_key_bag_service);
+
+end:
+       return(result);
+}
+
+#ifndef   KEYBAG_ENTITLEMENTS
+/* Just a wrapper around forking to exec keystorectl for commands requiring entitlements. */
+int keystorectl(char * const command[])
+{
+       int child_result = -1;
+       int result = -1;
+       pid_t child = -1;
+
+       child = fork();
+
+       if (child == -1)
+       {
+               printf("FAILURE: failed to fork.\n");
+               goto end;
+       }
+       else if (child == 0)
+       {
+               /* TODO: This keeps keystorectl from bombarding us with key state changes, but
+                  there must be a better way of doing this; killing stderr is a bit nasty,
+                  and if keystorectl fails, we want all the information we can get. */
+               fclose(stderr);
+               fclose(stdin);
+               execv(keystorectl_path, command);
+               printf("FAILURE: child failed to execv keystorectl, errno = %s.\n",
+                 strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       if ((waitpid(child, &child_result, 0) != child) || WEXITSTATUS(child_result))
+       {
+               printf("FAILURE: keystorectl failed.\n");
+               result = -1;
+       }
+       else
+       {
+               result = 0;
+       }
+
+end:
+       return(result);
+}
+#endif /* KEYBAG_ENTITLEMENTS */
+
+/* Code based on Mobile Key Bag; specifically MKBDeviceSupportsContentProtection
+   and MKBDeviceFormattedForContentProtection. */
+/* We want to verify that we support content protection, and that
+   we are formatted for it. */
+int supports_content_prot()
+{
+       int local_result = -1;
+       int result = -1;
+       uint32_t buffer_size = 1;
+       char buffer[buffer_size];
+       io_registry_entry_t defaults = IO_OBJECT_NULL;
+       kern_return_t k_result = KERN_FAILURE;
+       struct statfs statfs_results;
+
+       defaults = IORegistryEntryFromPath(kIOMasterPortDefault, kIODeviceTreePlane ":/defaults");
+
+       if (defaults == IO_OBJECT_NULL)
+       {
+               printf("FAILURE: failed to find defaults registry entry.\n");
+               goto end;
+       }
+
+       k_result = IORegistryEntryGetProperty(defaults, "content-protect", buffer, &buffer_size);
+
+       if (k_result != KERN_SUCCESS)
+       {       /* This isn't a failure; it means the entry doesn't exist, so we assume CP
+                  is unsupported. */
+               result = 0;
+               goto end;
+       }
+
+       /* At this point, we SUPPORT content protection... but are we formatted for it? */
+       /* This is ugly; we should be testing the file system we'll be testing in, not
+          just /tmp/. */
+       local_result = statfs(g_target_path, &statfs_results);
+
+       if (local_result == -1)
+       {
+               printf("FAILURE: failed to statfs the test directory, errno = %s.\n",
+                 strerror(errno));
+       }
+       else if (statfs_results.f_flags & MNT_CPROTECT)
+       {
+               result = 1;
+       }
+       else
+       {       /* This isn't a failure, it means the filesystem isn't formatted for CP. */
+               result = 0;
+       }
+
+end:
+       return(result);
+}
+
+#if 0
+int device_lock_state()
+{
+       /* TODO: Actually implement this. */
+       /* We fail if a passcode already exists, and the methods being used to lock/unlock
+          the device in this test appear to be synchronous... do we need this function? */
+       int result = -1;
+
+       return(result);
+}
+#endif
+
+int lock_device()
+{
+       int result = -1;
+
+#ifdef    KEYBAG_ENTITLEMENTS
+       /* If we're entitled, we can lock the device ourselves. */
+       uint64_t inputs[] = {device_keybag_handle};
+       uint32_t input_count = (sizeof(inputs) / sizeof(*inputs));
+       result = apple_key_store(kAppleKeyStoreKeyBagLock, inputs, input_count, NULL, 0, NULL, NULL);
+#else
+       /* If we aren't entitled, we'll need to use keystorectl to lock the device. */
+       /* keystorectl seems to have a bus error (though it locks successfully) unless
+          lock is passed an argument, so we'll also pass it the empty string. */
+       char * const keystorectl_args[] = {keystorectl_path, "lock", "", NULL};
+       result = keystorectl(keystorectl_args);
+#endif /* KEYBAG_ENTITLEMENTS */
+
+       return(result);
+}
+
+int unlock_device(char * passcode)
+{
+       int result = -1;
+
+#ifdef    KEYBAG_ENTITLEMENTS
+       /* If we're entitled, we can unlock the device ourselves. */
+       uint64_t inputs[] = {device_keybag_handle};
+       uint32_t input_count = (sizeof(inputs) / sizeof(*inputs));
+       size_t input_struct_count = 0;
+
+       if ((passcode == NULL) || ((input_struct_count = strnlen(passcode, CPT_MAX_PASS_LEN)) == CPT_MAX_PASS_LEN))
+       {
+               passcode = "";
+               input_struct_count = 0;
+       }
+
+       result = apple_key_store(kAppleKeyStoreKeyBagUnlock, inputs, input_count, passcode, input_struct_count, NULL, NULL);
+#else
+       /* If we aren't entitled, we'll need to use keystorectl to unlock the device. */
+       if ((passcode == NULL) || (strnlen(passcode, CPT_MAX_PASS_LEN) == CPT_MAX_PASS_LEN))
+       {
+               passcode = "";
+       }
+
+       char * const keystorectl_args[] = {keystorectl_path, "unlock", passcode, NULL};
+       result = keystorectl(keystorectl_args);
+#endif /* KEYBAG_ENTITLEMENTS */
+
+       return(result);
+}
+
+int set_passcode(char * new_passcode, char * old_passcode)
+{
+       int result = -1;
+
+#ifdef    KEYBAG_ENTITLEMENTS
+       /* If we're entitled, we can set the passcode ourselves. */
+       uint64_t inputs[] = {device_keybag_handle};
+       uint32_t input_count = (sizeof(inputs) / sizeof(*inputs));
+       void * input_structs = NULL;
+       size_t input_struct_count = 0;
+       char buffer[CPT_AKS_BUF_SIZE];
+       char * buffer_ptr = buffer;
+       uint32_t old_passcode_len = 0;
+       uint32_t new_passcode_len = 0;
+
+       if ((old_passcode == NULL) || ((old_passcode_len = strnlen(old_passcode, CPT_MAX_PASS_LEN)) == CPT_MAX_PASS_LEN))
+       {
+               old_passcode = "";
+               old_passcode_len = 0;
+       }
+
+       if ((new_passcode == NULL) || ((new_passcode_len = strnlen(new_passcode, CPT_MAX_PASS_LEN)) == CPT_MAX_PASS_LEN))
+       {
+               new_passcode = "";
+               new_passcode_len = 0;
+       }
+
+       *((uint32_t *) buffer_ptr) = ((uint32_t) 2);
+       buffer_ptr += sizeof(uint32_t);
+       *((uint32_t *) buffer_ptr) = old_passcode_len;
+       buffer_ptr += sizeof(uint32_t);
+       memcpy(buffer_ptr, old_passcode, old_passcode_len);
+       buffer_ptr += ((old_passcode_len + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1));
+       *((uint32_t *) buffer_ptr) = new_passcode_len;
+       buffer_ptr += sizeof(uint32_t);
+       memcpy(buffer_ptr, new_passcode, new_passcode_len);
+       buffer_ptr += ((new_passcode_len + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1));
+       input_structs = buffer;
+       input_struct_count = (buffer_ptr - buffer);
+
+       result = apple_key_store(kAppleKeyStoreKeyBagSetPasscode, inputs, input_count, input_structs, input_struct_count, NULL, NULL);
+#else
+       /* If we aren't entitled, we'll need to use keystorectl to set the passcode. */
+       if ((old_passcode == NULL) || (strnlen(old_passcode, CPT_MAX_PASS_LEN) == CPT_MAX_PASS_LEN))
+       {
+               old_passcode = "";
+       }
+
+       if ((new_passcode == NULL) || (strnlen(new_passcode, CPT_MAX_PASS_LEN) == CPT_MAX_PASS_LEN))
+       {
+               new_passcode = "";
+       }
+
+       char * const keystorectl_args[] = {keystorectl_path, "change-password", old_passcode, new_passcode, NULL};
+       result = keystorectl(keystorectl_args);
+#endif /* KEYBAG_ENTITLEMENTS */
+
+       return(result);
+}
+
+int clear_passcode(char * passcode)
+{
+       /* For the moment, this will set the passcode to the empty string (a known value);
+          this will most likely need to change, or running this test may ruin everything(tm). */
+       int result = -1;
+
+       result = set_passcode(NULL, passcode);
+
+       return(result);
+}
+
+#if 0
+/* Determines if we will try to test class C semanatics. */
+int unlocked_since_boot()
+{
+       /* TODO: Actually implement this. */
+       /* The actual semantics for CP mean that even with this primative, we would need
+          set a passcode and then reboot the device in order to test this; this function
+          will probably be rather worthless as a result. */
+       int result = 1;
+
+       return(result);
+}
+#endif
+
+/* If the device has a passcode when we want to test it, things are going to go wrong.
+   As such, we'll assume the device never has a passcode.
+   No, not even then.
+   Or we could just try "" to ""; it works. */
+int has_passcode()
+{
+       int result = -1;
+
+       result = set_passcode(NULL, NULL);
+
+       return(result);
+}
+
+int content_protection_test(void * argp)
+{
+       #pragma unused (argp)
+       int init_result = 0;
+       int local_result = -1;
+       int test_result = -1;
+       int fd = -1;
+       int dir_fd = -1;
+       int subdir_fd = -1;
+       int new_prot_class = -1;
+       int old_prot_class = -1;
+       int current_byte = 0;
+       char filepath[PATH_MAX];
+       char dirpath[PATH_MAX];
+       char subdirpath[PATH_MAX];
+       char rd_buffer[CPT_IO_SIZE];
+       char wr_buffer[CPT_IO_SIZE];
+       char * passcode = "IAmASecurePassword";
+
+       /* Do some initial setup (names). */
+       bzero(filepath, PATH_MAX);
+       bzero(dirpath, PATH_MAX);
+       bzero(subdirpath, PATH_MAX);
+
+       /* This is just easier than checking each result individually. */
+       init_result |= (strlcat(filepath, g_target_path, PATH_MAX) == PATH_MAX);
+       init_result |= (strlcat(filepath, "/", PATH_MAX) == PATH_MAX);
+       init_result |= (strlcpy(dirpath, filepath, PATH_MAX) == PATH_MAX);
+       init_result |= (strlcat(filepath, "cpt_test_file", PATH_MAX) == PATH_MAX);
+       init_result |= (strlcat(dirpath, "cpt_test_dir/", PATH_MAX) == PATH_MAX);
+       init_result |= (strlcpy(subdirpath, dirpath, PATH_MAX) == PATH_MAX);
+       init_result |= (strlcat(subdirpath, "cpt_test_subdir/", PATH_MAX) == PATH_MAX);
+
+       if (init_result)
+       {       /* If any of the initialization failed, we're just going to fail now. */
+               printf("%s, line %d: failed to initialize test strings.\n",
+                 cpt_fail_header, __LINE__);
+               goto end;
+       }
+
+       local_result = supports_content_prot();
+
+       if (local_result == -1)
+       {
+               printf("%s, line %d: failed to determine if content protection is supported.\n",
+                 cpt_fail_header, __LINE__);
+               goto end;
+       }
+       else if (local_result == 0)
+       {       /* If we don't support content protection at the moment, pass the test. */
+               printf("This device does not support or is not formatted for content protection.\n");
+               test_result = 0;
+               goto end;
+       }
+
+       /* If we support content protection, we'll need to be able to set the passcode. */
+       local_result = has_passcode();
+
+       if (local_result == -1)
+       {
+               printf("%s, line %d: the device appears to have a passcode.\n",
+                 cpt_fail_header, __LINE__);
+               goto end;
+       }
+
+       if (set_passcode(passcode, NULL))
+       {
+               printf("%s, line %d: failed to set a new passcode.\n",
+                 cpt_fail_header, __LINE__);
+               goto end;
+       }
+
+       fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC);
+
+       if (fd == -1)
+       {
+               printf("%s, line %d: failed to create the test file, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto remove_passcode;
+       }
+
+       /* Ensure we can freely read and change protection classes when unlocked. */
+       for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_F; new_prot_class++)
+       {
+               old_prot_class = GET_PROT_CLASS(fd);
+
+               if (old_prot_class == -1)
+               {
+                       printf("%s, line %d: failed to get protection class when unlocked, errno = %s.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               if (SET_PROT_CLASS(fd, new_prot_class))
+               {
+                       printf("%s, line %d: failed to change protection class from %d to %d during unlock, errno = %s.\n",
+                         cpt_fail_header, __LINE__, old_prot_class, new_prot_class, strerror(errno));
+                       goto cleanup_file;
+               }
+       }
+
+       if (SET_PROT_CLASS(fd, PROTECTION_CLASS_D))
+       {
+               printf("%s, line %d: failed to change protection class from F to D when unlocked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_file;
+       }
+
+       /* Try making a class A file while locked. */
+       if (lock_device())
+       {
+               PRINT_LOCK_FAIL;
+               goto cleanup_file;
+       }
+
+       if (!SET_PROT_CLASS(fd, PROTECTION_CLASS_A))
+       {
+               printf("%s, line %d: was able to change protection class from D to A when locked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       if (unlock_device(passcode))
+       {
+               PRINT_UNLOCK_FAIL;
+               goto cleanup_file;
+       }
+
+       /* Attempt opening/IO to a class A file while unlocked. */
+       if (SET_PROT_CLASS(fd, PROTECTION_CLASS_A))
+       {
+               printf("%s, line %d: failed to change protection class from D to A when unlocked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_file;
+       }
+
+       close(fd);
+       fd = open(filepath, O_RDWR | O_CLOEXEC);
+
+       if (fd == -1)
+       {
+               printf("%s, line %d: failed to open a class A file when unlocked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto remove_file;
+       }
+
+       /* TODO: Write specific data we can check for.
+          If we're going to do that, the write scheme should be deliberately ugly. */
+       current_byte = 0;
+
+       while (current_byte < CPT_IO_SIZE)
+       {
+               local_result = pwrite(fd, &wr_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
+
+               if (local_result == -1)
+               {
+                       printf("%s, line %d: failed to write to class A file when unlocked, errno = %s.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               current_byte += local_result;
+       }       
+
+       current_byte = 0;
+
+       while (current_byte < CPT_IO_SIZE)
+       {
+               local_result = pread(fd, &rd_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
+
+               if (local_result == -1)
+               {
+                       printf("%s, line %d: failed to read from class A file when unlocked, errno = %s.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               current_byte += local_result;
+       }
+
+       /* Again, but now while locked; and try to change the file class as well. */
+       if (lock_device())
+       {
+               PRINT_LOCK_FAIL;
+               goto cleanup_file;
+       }
+
+       if (pread(fd, rd_buffer, CPT_IO_SIZE, 0) > 0)
+       {
+               printf("%s, line %d: was able to read from a class A file when locked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       if (pwrite(fd, wr_buffer, CPT_IO_SIZE, 0) > 0)
+       {
+               printf("%s, line %d: was able to write to a class A file when locked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       if (!SET_PROT_CLASS(fd, PROTECTION_CLASS_D))
+       {
+               printf("%s, line %d: was able to change protection class from A to D when locked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       /* Try to open and truncate the file. */
+       close(fd);
+       fd = open(filepath, O_RDWR | O_TRUNC | O_CLOEXEC);
+
+       if (fd != -1)
+       {
+               printf("%s, line %d: was able to open and truncate a class A file when locked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       /* Try to open the file */
+       fd = open(filepath, O_RDWR | O_CLOEXEC);
+
+       if (fd != -1)
+       {
+               printf("%s, line %d: was able to open a class A file when locked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       /* What about class B files? */
+       if (unlock_device(passcode))
+       {
+               PRINT_UNLOCK_FAIL;
+               goto cleanup_file;
+       }
+
+       fd = open(filepath, O_RDWR | O_CLOEXEC);
+
+       if (fd == -1)
+       {
+               printf("%s, line %d: was unable to open a class A file when unlocked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       if (SET_PROT_CLASS(fd, PROTECTION_CLASS_D))
+       {
+               printf("%s, line %d: failed to change protection class from A to D when unlocked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_file;
+       }
+
+       if (lock_device())
+       {
+               PRINT_LOCK_FAIL;
+               goto cleanup_file;
+       }
+
+       /* Can we create a class B file while locked? */
+       if (SET_PROT_CLASS(fd, PROTECTION_CLASS_B))
+       {
+               printf("%s, line %d: failed to change protection class from D to B when locked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_file;
+       }
+
+       /* We should also be able to read/write to the file descriptor while it is open. */
+       current_byte = 0;
+
+       while (current_byte < CPT_IO_SIZE)
+       {
+               local_result = pwrite(fd, &wr_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
+
+               if (local_result == -1)
+               {
+                       printf("%s, line %d: failed to write to new class B file when locked, errno = %s.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               current_byte += local_result;
+       }
+
+       current_byte = 0;
+
+       while (current_byte < CPT_IO_SIZE)
+       {
+               local_result = pread(fd, &rd_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
+
+               if (local_result == -1)
+               {
+                       printf("%s, line %d: failed to read from new class B file when locked, errno = %s.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               current_byte += local_result;
+       }
+
+       /* We should not be able to open a class B file under lock. */
+       close(fd);
+       fd = open(filepath, O_RDWR | O_CLOEXEC);
+
+       if (fd != -1)
+       {
+               printf("%s, line %d: was able to open a class B file when locked.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_file;
+       }
+
+       unlink(filepath);
+
+       /* We still need to test directory semantics. */
+       if (mkdir(dirpath, 0x0777) == -1)
+       {
+               printf("%s, line %d: failed to create a new directory when locked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto remove_passcode;
+       }
+
+       /* The newly created directory should not have a protection class. */
+       dir_fd = open(dirpath, O_RDONLY | O_CLOEXEC);
+
+       if (dir_fd == -1)
+       {
+               printf("%s, line %d: failed to open an unclassed directory when locked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto remove_dir;
+       }
+
+       if (GET_PROT_CLASS(dir_fd) != PROTECTION_CLASS_D)
+       {
+               printf("%s, line %d: newly created directory had a non-D protection class.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_dir;
+       }
+
+       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_A))
+       {
+               printf("%s, line %d: was unable to change a directory from class D to class A during lock.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_dir;
+       }
+
+       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_D))
+       {
+               printf("%s, line %d: failed to change a directory from class A to class D during lock, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_dir;
+       }
+
+       /* Do all files created in the directory properly inherit the directory's protection class? */
+       if ((strlcpy(filepath, dirpath, PATH_MAX) == PATH_MAX) || (strlcat(filepath, "cpt_test_file", PATH_MAX) == PATH_MAX))
+       {
+               printf("%s, line %d: failed to construct the path for a file in the directory.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_dir;
+       }
+
+       if (unlock_device(passcode))
+       {
+               PRINT_UNLOCK_FAIL;
+               goto cleanup_dir;
+       }
+
+       for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_E; new_prot_class++)
+       {
+               old_prot_class = GET_PROT_CLASS(dir_fd);
+               
+               if (old_prot_class == -1)
+               {
+                       printf("%s, line %d: failed to get the protection class for the directory, errno = %s.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_dir;
+               }
+
+               if (SET_PROT_CLASS(dir_fd, new_prot_class))
+               {
+                       printf("%s, line %d: failed to change the protection class for the directory from %d to %d, errno = %s.\n",
+                         cpt_fail_header, __LINE__, old_prot_class, new_prot_class, strerror(errno));
+                       goto cleanup_dir;
+               }
+
+               fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC);
+
+               if (fd == -1)
+               {
+                       printf("%s, line %d: failed to create a file in a class %d directory when unlocked, errno = %s.\n",
+                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
+                       goto cleanup_dir;
+               }
+
+               local_result = GET_PROT_CLASS(fd);
+
+               if (local_result == -1)
+               {
+                       printf("%s, line %d: failed to get the new file's protection class, errno = %s.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_file;
+               }
+               else if (local_result != new_prot_class)
+               {
+                       printf("%s, line %d: new file did not inherit the directory's protection class.\n",
+                         cpt_fail_header, __LINE__, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               close(fd);
+               unlink(filepath);
+       }
+
+       /* Do we disallow creation of a class F directory? */
+       if (!SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_F))
+       {
+               printf("%s, line %d: creation of a class F directory did not fail as expected.\n",
+                 cpt_fail_header, __LINE__);
+               goto cleanup_dir;
+       }
+
+       /* And are class A and class B semantics followed for when we create these files during lock? */
+       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_A))
+       {
+               printf("%s, line %d: failed to change directory class from F to A when unlocked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_dir;
+       }
+
+       if (lock_device())
+       {
+               PRINT_LOCK_FAIL;
+               goto cleanup_dir;
+       }
+
+       fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC);
+
+       if (fd != -1)
+       {
+               printf("%s, line %d: was able to create a new file in a class A directory when locked.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_file;
+       }
+
+       if (unlock_device(passcode))
+       {
+               PRINT_UNLOCK_FAIL;
+               goto cleanup_dir;
+       }
+
+       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_B))
+       {
+               printf("%s, line %d: failed to change directory class from A to B when unlocked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_dir;
+       }
+
+       if (lock_device())
+       {
+               PRINT_LOCK_FAIL;
+               goto cleanup_dir;
+       }
+
+       fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC);
+
+       if (fd == -1)
+       {
+               printf("%s, line %d: failed to create new file in class B directory when locked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_dir;
+       }
+
+       local_result = GET_PROT_CLASS(fd);
+
+       if (local_result == -1)
+       {
+               printf("%s, line %d: failed to get protection class for a new file when locked, errno = %s.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_file;
+       }
+       else if (local_result != PROTECTION_CLASS_B)
+       {
+               printf("%s, line %d: new file in class B directory did not inherit protection class.\n",
+                 cpt_fail_header, __LINE__, strerror(errno));
+               goto cleanup_file;
+       }
+
+       /* What happens when we try to create new subdirectories? */
+       if (unlock_device(passcode))
+       {
+               PRINT_UNLOCK_FAIL;
+               goto cleanup_file;
+       }
+
+       for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_E; new_prot_class++)
+       {
+               if (SET_PROT_CLASS(dir_fd, new_prot_class))
+               {
+                       printf("%s, line %d: failed to change directory to class %d, errno = %s.\n",
+                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               local_result = mkdir(subdirpath, 0x0777);
+
+               if (local_result == -1)
+               {
+                       printf("%s, line %d: failed to create subdirectory in class %d directory, errno = %s.\n",
+                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
+                       goto cleanup_file;
+               }
+
+               subdir_fd = open(subdirpath, O_RDONLY | O_CLOEXEC);
+
+               if (subdir_fd == -1)
+               {
+                       printf("%s, line %d: failed to open subdirectory in class %d directory, errno = %s.\n",
+                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
+                       goto remove_subdir;
+               }
+
+               local_result = GET_PROT_CLASS(subdir_fd);
+
+               if (local_result == -1)
+               {
+                       printf("%s, line %d: failed to get class of new subdirectory of class %d directory, errno = %s.\n",
+                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
+                       goto cleanup_subdir;
+               }
+               else if (local_result != new_prot_class)
+               {
+                       printf("%s, line %d: new subdirectory had different class than class %d parent.\n",
+                         cpt_fail_header, __LINE__, new_prot_class);
+                       goto cleanup_subdir;
+               }
+
+               close(subdir_fd);
+               rmdir(subdirpath);
+       }
+
+       /* If we've made it this far, the test was successful. */
+       test_result = 0;
+
+cleanup_subdir:
+       close(subdir_fd);
+
+remove_subdir:
+       rmdir(subdirpath);
+
+cleanup_file:
+       close(fd);
+
+remove_file:
+       unlink(filepath);
+
+cleanup_dir:
+       close(dir_fd);
+
+remove_dir:
+       rmdir(dirpath);
+
+remove_passcode:
+       /* Try to unlock the device (no ramifications if it isn't locked when we try) and remove the passcode. */
+       if (unlock_device(passcode))
+       {
+               printf("WARNING: failed to unlock the device.\n");
+       }
+
+       if (clear_passcode(passcode))
+       {
+               printf("WARNING: failed to clear the passcode.\n");
+       }
+
+end:
+       return(test_result);
+}
+
index 8cd7c03168e5a2ad44b4fc0a4c9e8237c0d8c93b..e6c1229ab6a7d8b3c884307259cbb9ede957971b 100644 (file)
@@ -63,7 +63,7 @@ int expected[4] = {
 };
 
 
-main(int argc, char *argv[])
+int main(int argc, char *argv[])
 {
        int (*func)();
        int result, test;
index 206116042deeba61afda88a81ef3e197640c7303..c9b1357890012a66d4aa22b7f7680e4d970e977c 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include <stdio.h>
+#include <unistd.h>
 #include <sys/types.h>
 #include <sys/syscall.h>
 
index d1ca1574d18b0a23ba32ea804c517507c989670d..5c526f2463cf0eb0a5846fd3159ffe240dfdf142 100644 (file)
@@ -43,6 +43,7 @@
 #include <time.h>
 #include <grp.h>
 #include <unistd.h>
+#include <ctype.h>
 #include <sys/mount.h>
 #include <sys/param.h>
 #include <sys/select.h>
@@ -106,8 +107,8 @@ struct test_entry   g_tests[] =
        {1, &directory_tests, NULL, "getattrlist, getdirentriesattr, setattrlist"},
 #if !TARGET_OS_EMBEDDED
        {1, &getdirentries_test, NULL, "getdirentries"},
-#endif
        {1, &exchangedata_test, NULL, "exchangedata"},
+#endif
        {1, &searchfs_test, NULL, "searchfs"},
        {1, &sema2_tests, NULL, "sem_close, sem_open, sem_post, sem_trywait, sem_unlink, sem_wait"},
        {1, &sema_tests, NULL, "semctl, semget, semop"},
@@ -124,6 +125,11 @@ struct test_entry   g_tests[] =
        {1, &atomic_fifo_queue_test, NULL, "OSAtomicFifoEnqueue, OSAtomicFifoDequeue"},
 #endif
        {1, &sched_tests, NULL, "Scheduler tests"},
+#if TARGET_OS_EMBEDDED
+       {1, &content_protection_test, NULL, "Content protection tests"},
+#endif
+       {1, &pipes_test, NULL, "Pipes tests"},
+       {1, &kaslr_test, NULL, "KASLR tests"},
        {0, NULL, NULL, "last one"}
 };
 
@@ -132,7 +138,9 @@ static void list_all_tests( void );
 static void mark_tests_to_run( long my_start, long my_end );
 static int parse_tests_to_run( int argc, const char * argv[], int * indexp );
 static void usage( void );
+#if !TARGET_OS_EMBEDDED
 static int setgroups_if_single_user(void);
+#endif
 static const char *current_arch( void );
 
 /* globals */
@@ -269,23 +277,23 @@ g_testbots_active = 1;
 #endif
        /* Code added to run xnu_quick_test under testbots */
        if ( g_testbots_active == 1 ) {
-       printf("[TEST] xnu_quick_test \n");     /* Declare the beginning of test suite */
+               printf("[TEST] xnu_quick_test \n");     /* Declare the beginning of test suite */
        }
-    
+
+#if !TARGET_OS_EMBEDDED    
        /* Populate groups list if we're in single user mode */
        if (setgroups_if_single_user()) {
                return 1;
        }
-    
+#endif
        if ( list_the_tests != 0 ) {
                list_all_tests( );
                return 0;
        }
 #if !TARGET_OS_EMBEDDED
        if (g_xilog_active == 1) {      
-               logRef = XILogOpenLogExtended( logPath, "xnu_quick_test", "com.apple.coreos", 
-                                                                               config, xml, echo, NULL, "ResultOwner", 
-                                                                               "com.apple.coreos", NULL );
+               logRef = XILogOpenLogExtended( logPath, "xnu_quick_test", "com.apple.coreos", config, xml, 
+                                               echo, NULL, "ResultOwner", "com.apple.coreos", NULL );
                if( logRef == NULL )  {
                        fprintf(stderr,"Couldn't create log: %s",logPath);
                        exit(-1);
@@ -304,9 +312,6 @@ g_testbots_active = 1;
        printf( "Current architecture is %s\n", current_arch() );
 
        /* Code added to run xnu_quick_test under testbots */
-        if ( g_testbots_active == 1 ) {
-        printf("[PASS] xnu_quick_test started\n");     
-        }
                
        /* run each test that is marked to run in our table until we complete all of them or
         * hit the maximum number of failures.
@@ -325,6 +330,11 @@ g_testbots_active = 1;
                        XILogMsg( "test #%d - %s \n", (i + 1), my_testp->test_infop );
                }
 #endif
+
+               if ( g_testbots_active == 1 ) {
+                       printf("[BEGIN] %s \n", my_testp->test_infop);
+               }
+
                printf( "test #%d - %s \n", (i + 1), my_testp->test_infop );
                fflush(stdout);
                my_err = my_testp->test_routine( my_testp->test_input );
@@ -347,7 +357,7 @@ g_testbots_active = 1;
                                printf( "\n Reached the maximum number of failures - Aborting xnu_quick_test. \n" );
                                /* Code added to run xnu_quick_test under testbots */
                                if ( g_testbots_active == 1 ) {
-                               printf("[FAIL] %s \n", my_testp->test_infop);
+                                       printf("[FAIL] %s \n", my_testp->test_infop);
                                }       
                                goto exit_this_routine;
                        }
@@ -369,7 +379,7 @@ g_testbots_active = 1;
 #endif
                /* Code added to run xnu_quick_test under testbots */
                if ( g_testbots_active == 1 ) {
-               printf("[PASS] %s \n", my_testp->test_infop);
+                       printf("[PASS] %s \n", my_testp->test_infop);
                }       
        }
        
@@ -573,6 +583,7 @@ static void usage( void )
 
 } /* usage */
 
+#if !TARGET_OS_EMBEDDED
 /* This is a private API between Libinfo, Libc, and the DirectoryService daemon.
  * Since we are trying to determine if an external provider will back group
  * lookups, we can use this, without relying on additional APIs or tools
@@ -629,6 +640,7 @@ setgroups_if_single_user(void)
 
        return retval;
 }
+#endif
 
 static const char *current_arch( void )
 {
index 5544164758240580330cc20ee01d7df09044abc8..9dbf1631b4c66633fded99414e351ec434117a8a 100644 (file)
@@ -2,17 +2,21 @@ SDKROOT ?= /
 Product=$(shell tconf --product)
 Embedded=$(shell tconf --test TARGET_OS_EMBEDDED)
 
+SDKVERSION=$(shell xcodebuild -sdk $(SDKROOT) -version SDKVersion | head -1)
+
 ifeq "$(Embedded)" "YES"
 XILogFLAG =
 SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version Path)
-CFLAGS += -isysroot $(SDKPATH)
-LIBFLAGS += -isysroot $(SDKPATH)
+CFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION)
+LIBFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION)
 else
 XILogFLAG = -framework XILog
+CFLAGS += -mmacosx-version-min=$(SDKVERSION)
+LIBFLAGS += -mmacosx-version-min=$(SDKVERSION)
 endif
 
-HOSTCC = gcc
-CC = xcrun -sdk $(SDKROOT) gcc
+HOSTCC = cc
+CC = xcrun -sdk $(SDKROOT) cc
 
 ifdef RC_BUILDIT
 DOING_BUILDIT=yes
@@ -42,7 +46,7 @@ else
                # this hack should be removed once tconf gets
                # <rdar://problem/6618734>
                ifeq "$(Product)" "iPhone"
-               ARCH=armv6
+               ARCH=armv7
                endif
                ifeq "$(Product)" "AppleTV"
                ARCH=i386
@@ -57,18 +61,26 @@ else
 endif
 
 
-CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
+CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS) -Wno-deprecated-declarations
 LIBFLAGS += -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders  -F/AppleInternal/Library/Frameworks/ $(XILogFLAG)
 
+# The current implementation of the content protection test requires IOKit.
+ifeq "$(Product)" "iPhone"
+LIBFLAGS += -framework IOKit
+endif
+
 MY_OBJECTS = $(OBJROOT)/main.o $(OBJROOT)/memory_tests.o $(OBJROOT)/misc.o \
                         $(OBJROOT)/sema_tests.o $(OBJROOT)/shared_memory_tests.o \
                         $(OBJROOT)/socket_tests.o $(OBJROOT)/tests.o \
                         $(OBJROOT)/xattr_tests.o $(OBJROOT)/kqueue_tests.o \
                         $(OBJROOT)/machvm_tests.o $(OBJROOT)/commpage_tests.o \
-                        $(OBJROOT)/atomic_fifo_queue_test.o $(OBJROOT)/sched_tests.o
+                        $(OBJROOT)/atomic_fifo_queue_test.o $(OBJROOT)/sched_tests.o \
+                        $(OBJROOT)/pipes_tests.o
 
 ifneq "$(Product)" "iPhone"
 MY_OBJECTS += $(OBJROOT)/32bit_inode_tests.o
+else
+MY_OBJECTS += $(OBJROOT)/content_protection_test.o
 endif
 
 # In networked home directories, the chown will fail; we notice and print a helpful message
@@ -112,9 +124,9 @@ ifeq "$(Product)" "MacOSX"
 
 endif
 ifeq "$(Product)" "iPhone"
-       $(CC) -arch armv6 -isysroot $(SDKROOT) $(CFLAGS) helpers/sleep.c -o $(DSTROOT)/helpers/sleep-arm
-       $(CC) $(LIBFLAGS) -arch armv6 -isysroot $(SDKROOT) $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-arm
-       $(CC) $(MY_ARCH) -isysroot $(SDKROOT)   helpers/arch.c -o $(DSTROOT)/helpers/arch
+       $(CC) $(CFLAGS) helpers/sleep.c -o $(DSTROOT)/helpers/sleep-arm
+       $(CC) $(LIBFLAGS) $(CFLAGS) $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-arm
+       $(CC) $(MY_ARCH) $(CFLAGS) helpers/arch.c -o $(DSTROOT)/helpers/arch
 endif
        
        
@@ -136,7 +148,7 @@ $(OBJROOT)/memory_tests.o : memory_tests.c tests.h
 # misc.o has to be built 3-way for the helpers to link
 $(OBJROOT)/misc.o : misc.c tests.h
 ifeq "$(Product)" "iPhone"
-       $(CC) -arch armv6 $(CFLAGS) -c misc.c   -o $@
+       $(CC) -arch armv7 $(CFLAGS) -c misc.c   -o $@
 else
        $(CC) -arch i386 -arch x86_64 $(CFLAGS) -c misc.c   -o $@
 endif
@@ -174,6 +186,11 @@ $(OBJROOT)/commpage_tests.o : commpage_tests.c tests.h
 $(OBJROOT)/atomic_fifo_queue_test.o : atomic_fifo_queue_test.c tests.h
        $(CC) $(CFLAGS) -c atomic_fifo_queue_test.c    -o $@
 
+$(OBJROOT)/content_protection_test.o : content_protection_test.c tests.h
+       $(CC) $(CFLAGS) -c content_protection_test.c -o $@
+
+$(OBJROOT)/pipes_tests.o : pipes_tests.c tests.h
+       $(CC) $(CFLAGS) -c pipes_tests.c -o $@
 
 ifndef DOING_BUILDIT
 .PHONY : clean
index dc8675087475ebf93428539dcb1513393aadfdfe..03e31a456d704c51bf0fe2c5c5361ea1c97907dc 100644 (file)
@@ -49,16 +49,14 @@ crashcount(char *namebuf1, char *namebuf2)
        char            *crash_file_pfx = "xnu_quick_test";
        int             crash_file_pfxlen = strlen(crash_file_pfx);
        struct stat     sb;
-       DIR             *dirp1, *dirp2;
+       DIR             *dirp1 = NULL, *dirp2 = NULL;
        struct dirent   *dep1, *dep2;
        int             count = 0;
 
-       /* If we can't open the directory, it hasn't been created */
-       if ((dirp1 = opendir(crashdir1)) == NULL) {
-               return( 0 );
-       }
+       /* If we can't open the directory, dirp1 will be NULL */
+       dirp1 = opendir(crashdir1);
 
-       while((dep1 = readdir(dirp1)) != NULL) {
+       while(dirp1 != NULL && ((dep1 = readdir(dirp1)) != NULL)) {
                if (strncmp(crash_file_pfx, dep1->d_name, crash_file_pfxlen))
                        continue;
                /* record each one to get the last one */
@@ -70,14 +68,14 @@ crashcount(char *namebuf1, char *namebuf2)
                count++;
        }
 
-       closedir(dirp1);
+       if (dirp1 != NULL)
+               closedir(dirp1);
 
-       /* If we can't open the directory, it hasn't been created */
-        if ((dirp2 = opendir(crashdir2)) == NULL) {
-                return( 0 );
-        }
+#if !TARGET_OS_EMBEDDED
+       /* If we can't open the directory, dirp2 will be NULL */
+        dirp2 = opendir(crashdir2);
 
-        while((dep2 = readdir(dirp2)) != NULL) {
+        while(dirp2 != NULL && (dep2 = readdir(dirp2)) != NULL) {
                 if (strncmp(crash_file_pfx, dep2->d_name, crash_file_pfxlen))
                         continue;
                 /* record each one to get the last one */
@@ -88,10 +86,10 @@ crashcount(char *namebuf1, char *namebuf2)
                 }
                 count++;
         }
-
-        closedir(dirp2);
-
-       return( count/2 );
+       if (dirp2 != NULL)
+               closedir(dirp2);
+#endif
+       return( count );
 }
 
 
@@ -155,6 +153,8 @@ int memory_tests( void * the_argp )
         * Find out how many crashes there have already been; if it's not
         * zero, then don't even attempt this test.
         */
+        my_namebuf1[0] = '\0';
+        my_namebuf2[0] = '\0';
        if ((my_crashcount = crashcount(my_namebuf1, my_namebuf2)) != 0) {
                printf( "memtest aborted: can not distinguish our expected crash from \n");
                printf( "%d existing crashes including %s \n", my_crashcount, my_namebuf2);
@@ -406,23 +406,27 @@ exit_child:
         * Find out how many crashes there have already been; if it's not
         * one, then don't even attempt this test.
         */
-       if ((my_crashcount = crashcount(my_namebuf1, my_namebuf2)) != 1) {
+       my_namebuf1[0] = '\0';
+       my_namebuf2[0] = '\0';
+       my_crashcount = crashcount(my_namebuf1, my_namebuf2);
+       if (!(my_crashcount == 1 || my_crashcount == 2)) {
                printf( "child did not crash as expected \n");
-               printf( "saw %d crashes including %s \n", my_crashcount, my_namebuf2);
+               printf( "saw %d crashes including %s \n", my_crashcount, my_namebuf1);
                goto test_failed_exit;
        }
 
        /* post-remove the expected crash report */
-       if (unlink(my_namebuf1)) {
+       if (unlink(my_namebuf1) && !(errno == ENOENT || errno == ENOTDIR)) {
                printf("unlink of expected crash report '%s' failed \n", my_namebuf1);
                goto test_failed_exit;
        }
-
-        if (unlink(my_namebuf2)) {
+#if !TARGET_OS_EMBEDDED
+       /* /Library/Logs/DiagnosticReports/ does not exist on embedded targets. */
+        if (unlink(my_namebuf2) && !(errno == ENOENT || errno == ENOTDIR)) {
                 printf("unlink of expected crash report '%s' failed \n", my_namebuf2);
                 goto test_failed_exit;
         }
-
+#endif
        /* make sure shared page got modified in child */
        if ( strcmp( my_test_page_p, "parent data child data" ) != 0 ) {
                printf( "minherit did not work correctly - shared page looks wrong \n" );
index 9545bf1406f11ee0c1d353614e37bc904c6b0f5c..5e37062115684ecf6001754dd4d8f6c16109db53 100644 (file)
@@ -139,6 +139,7 @@ int create_file_with_name( char *the_target_dirp, char *the_namep, int remove_ex
                        printf( "open failed with error %d - \"%s\" \n", errno, strerror( errno) );
                        goto failure_exit;
                }
+               fcntl( my_fd, F_FULLFSYNC );
                close( my_fd );
        } 
        goto routine_exit;
@@ -319,9 +320,9 @@ int get_architecture()
        char *errmsg = NULL;
 
        errmsg = "sysctlbyname() failed when getting hw.cputype";
-       if (my_err = sysctlbyname("hw.cputype", NULL, &length, NULL, 0)) goto finished; /* get length of data */
+       if ((my_err = sysctlbyname("hw.cputype", NULL, &length, NULL, 0))) goto finished;       /* get length of data */
        if (length != sizeof(buf))                                       goto finished;
-       if (my_err = sysctlbyname("hw.cputype", &buf, &length, NULL, 0)) goto finished; /* copy data */
+       if ((my_err = sysctlbyname("hw.cputype", &buf, &length, NULL, 0))) goto finished; /* copy data */
        switch (buf) {
        case CPU_TYPE_X86:
        case CPU_TYPE_X86_64:
diff --git a/tools/tests/xnu_quick_test/pipes_tests.c b/tools/tests/xnu_quick_test/pipes_tests.c
new file mode 100644 (file)
index 0000000..c87f94d
--- /dev/null
@@ -0,0 +1,880 @@
+/* Mach virtual memory unit tests
+ *
+ * The main goal of this code is to facilitate the construction,
+ * running, result logging and clean up of a test suite, taking care
+ * of all the scaffolding. A test suite is a sequence of very targeted
+ * unit tests, each running as a separate process to isolate its
+ * address space.
+ * A unit test is abstracted as a unit_test_t structure, consisting of
+ * a test function and a logging identifier. A test suite is a suite_t
+ * structure, consisting of an unit_test_t array, a logging identifier,
+ * and fixture set up and tear down functions.
+ * Test suites are created dynamically. Each of its unit test runs in
+ * its own fork()d process, with the fixture set up and tear down
+ * running before and after each test. The parent process will log a
+ * pass result if the child exits normally, and a fail result in any
+ * other case (non-zero exit status, abnormal signal). The suite
+ * results are then aggregated and logged, and finally the test suite
+ * is destroyed.
+ * Everything is logged to stdout in the standard Testbot format, which
+ * can be easily converted to Munin or SimonSays logging
+ * format. Logging is factored out as much as possible for future
+ * flexibility. In our particular case, a unit test is logged as a
+ * Testbot Test Case ([BEGIN]/[PASS]/[FAIL], and a test suite is
+ * logged as a Testbot Test ([TEST]). This is confusing but
+ * unfortunately cannot be avoided for compatibility. Suite results
+ * are aggregated after the [SUMMARY] keyword.
+ * The included test suites cover the various pipe buffer operations 
+ * with dynamic expansion.
+ *
+ * Vishal Patel (vishal_patel@apple.com)
+ */
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+#include <errno.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/sysctl.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <dispatch/dispatch.h>
+
+/**************************/
+/**************************/
+/* Unit Testing Framework */
+/**************************/
+/**************************/                                   
+
+/*********************/
+/* Private interface */
+/*********************/
+
+static const char frameworkname[] = "pipes_unitester";
+
+/* Type for test, fixture set up and fixture tear down functions. */
+typedef void (*test_fn_t)();
+
+/* Unit test structure. */
+typedef struct {
+     const char *name;
+     test_fn_t test;
+} unit_test_t;
+
+/* Test suite structure. */
+typedef struct {
+     const char *name;
+     int numoftests;
+     test_fn_t set_up;
+     unit_test_t *tests;
+     test_fn_t tear_down;
+} suite_t;
+
+int _quietness = 0;
+unsigned int _timeout = 0;
+int _expected_signal = 0;
+
+struct {
+     uintmax_t numoftests;
+     uintmax_t passed_tests;
+} results = { 0, 0 };
+
+void logr(char *format, ...) __printflike(1, 2);
+
+static void die(int condition, const char *culprit)
+{
+     if (condition) {
+         printf("%s: %s error: %s.\n", frameworkname, culprit,
+                strerror(errno));
+         exit(1);
+     }
+}
+
+static void die_on_stdout_error()
+{
+     die(ferror(stdout), "stdout");
+}
+
+/* Individual test result logging. */
+void logr(char *format, ...)
+{
+     if (_quietness <= 1) {
+         va_list ap;
+         
+         va_start(ap, format);
+         vprintf(format, ap);
+         va_end(ap);
+         die_on_stdout_error();
+     }
+}
+
+static suite_t *create_suite(const char *name, int numoftests,
+                            test_fn_t set_up, unit_test_t *tests,
+                            test_fn_t tear_down)
+{
+     suite_t *suite =  (suite_t *)malloc(sizeof(suite_t));
+     die(suite == NULL, "malloc()");
+
+     suite->name = name;
+     suite->numoftests = numoftests;
+     suite->set_up = set_up;
+     suite->tests = tests;
+     suite->tear_down = tear_down;
+     return suite;
+}
+
+static void destroy_suite(suite_t *suite)
+{
+     free(suite);
+}
+
+static void log_suite_info(suite_t *suite)
+{
+     logr("[TEST] %s\n", suite->name);
+     logr("Number of tests: %d\n\n", suite->numoftests);
+}
+
+static void log_suite_results(suite_t *suite, int passed_tests)
+{
+     results.numoftests += (uintmax_t)suite->numoftests;
+     results.passed_tests += (uintmax_t)passed_tests;
+}
+
+static void log_test_info(unit_test_t *unit_test)
+{
+     logr("[BEGIN] %s\n", unit_test->name);
+}
+
+static void log_test_result(unit_test_t *unit_test,
+                           boolean_t test_passed)
+{
+     logr("[%s] %s\n\n", test_passed ? "PASS" : "FAIL",
+         unit_test->name);
+}
+
+/* Handler for test time out. */
+static void alarm_handler(int signo)
+{
+     write(1,"Child process timed out.\n",
+          strlen("Child process timed out.\n"));
+     _Exit(6);
+}
+
+/* Run a test with fixture set up and teardown, while enforcing the
+ * time out constraint. */
+static void run_test(suite_t *suite, unit_test_t *unit_test)
+{
+     struct sigaction alarm_act;
+
+     log_test_info(unit_test);
+     alarm_act.sa_handler = alarm_handler;
+     sigemptyset(&alarm_act.sa_mask);
+     alarm_act.sa_flags = 0;
+     die(sigaction(SIGALRM, &alarm_act, NULL) != 0, "sigaction()");
+     alarm(_timeout);
+     
+     suite->set_up();
+     unit_test->test();
+     suite->tear_down();
+}
+
+/* Check a child return status. */
+static boolean_t child_terminated_normally(int child_status)
+{
+     boolean_t normal_exit = FALSE;
+     
+     if (WIFEXITED(child_status)) {
+         int exit_status = WEXITSTATUS(child_status);
+         if (exit_status) {
+              printf("Child process unexpectedly exited with code "
+                     "%d.\n", exit_status);
+         } else if (!_expected_signal) {
+              normal_exit = TRUE;
+         }
+     } else if (WIFSIGNALED(child_status)) {
+         int signal = WTERMSIG(child_status);
+         if (signal == _expected_signal) {
+              if (_quietness <= 0) {
+                   printf("Child process died with expected signal "
+                          "%d.\n", signal);
+              }
+              normal_exit = TRUE;
+         } else {
+              printf("Child process unexpectedly died with signal "
+                     "%d.\n", signal);
+         }            
+     } else {
+         printf("Child process unexpectedly did not exit nor "
+                "die.\n");
+     }
+     die_on_stdout_error();
+     return normal_exit;
+}
+
+/* Run a test in its own process, and report the result. */
+static boolean_t child_test_passed(suite_t *suite,
+                                  unit_test_t *unit_test)
+{
+     int test_status;
+
+     pid_t test_pid = fork();
+     die(test_pid == -1, "fork()");
+     if (!test_pid) {
+         run_test(suite, unit_test);
+         exit(0);
+     }
+     while (waitpid(test_pid, &test_status, 0) != test_pid) {
+         continue;
+     }
+     boolean_t test_result = child_terminated_normally(test_status);
+     log_test_result(unit_test, test_result);
+     return test_result;
+}
+
+/* Run each test in a suite, and report the results. */
+static int count_passed_suite_tests(suite_t *suite)
+{
+     int passed_tests = 0;
+     int i;
+     
+     for (i = 0; i < suite->numoftests; i++) {
+         passed_tests += child_test_passed(suite,
+                                           &(suite->tests[i]));
+     }
+     return passed_tests;
+}
+
+/********************/
+/* Public interface */
+/********************/
+
+#define DEFAULT_TIMEOUT 5U
+#define DEFAULT_QUIETNESS 1
+
+#define assert(condition, exit_status, ...)    \
+     if (!(condition)) {                       \
+         _fatal(__FILE__, __LINE__, __func__,  \
+                (exit_status),  __VA_ARGS__);  \
+     }
+
+/* Include in tests whose expected outcome is a specific signal. */
+#define expect_signal(signal)                          \
+     struct sigaction _act;                            \
+     _act.sa_handler = expected_signal_handler;                \
+     sigemptyset(&_act.sa_mask);                       \
+     _act.sa_flags = 0;                                        \
+     assert(sigaction((signal), &_act, NULL) == 0, 1,  \
+           "sigaction() error: %s.", strerror(errno));
+
+#define run_suite(set_up, tests, tear_down, ...)               \
+     _run_suite((sizeof(tests)/sizeof(tests[0])),              \
+               (set_up), (tests), (tear_down), __VA_ARGS__)    
+
+typedef unit_test_t UnitTests[];
+
+void _fatal(const char *file, int line, const char *function,
+           int exit_status, const char *format, ...)
+     __printflike(5, 6);
+void _run_suite(int numoftests, test_fn_t set_up, UnitTests tests,
+               test_fn_t tear_down, const char *format, ...)
+     __printflike(5, 6);
+void logv(char *format, ...) __printflike(1, 2);
+
+void _fatal(const char *file, int line, const char *function,
+           int exit_status, const char *format, ...)
+{
+     va_list ap;
+     
+     va_start(ap, format);
+     vprintf(format, ap);
+     printf("\n");
+     printf("Assert failed in file %s, function %s(), line %d.\n",
+           file, function, line);
+     va_end(ap);
+     exit(exit_status);
+}
+void _run_suite(int numoftests, test_fn_t set_up, UnitTests tests,
+               test_fn_t tear_down, const char *format, ...)
+{
+     va_list ap;
+     char *name;
+     
+     va_start(ap, format);
+     die(vasprintf(&name, format, ap) == -1, "vasprintf()");
+     va_end(ap);
+     suite_t *suite = create_suite(name, numoftests, set_up, tests,
+                                  tear_down);
+     log_suite_info(suite);
+     log_suite_results(suite, count_passed_suite_tests(suite));
+     free(name);
+     destroy_suite(suite);
+}
+
+/* Signal handler for tests expected to terminate with a specific
+ * signal. */
+void expected_signal_handler(int signo)
+{
+     write(1,"Child process received expected signal.\n",
+          strlen("Child process received expected signal.\n"));
+     _Exit(0);
+}
+
+/* Setters and getters for various test framework global
+ * variables. Should only be used outside of the test, set up and tear
+ * down functions. */
+
+/* Time out constraint for running a single test. */
+void set_timeout(unsigned int time)
+{
+     _timeout = time;
+}
+
+unsigned int get_timeout()
+{
+     return _timeout;
+}
+
+/* Expected signal for a test, default is 0. */
+void set_expected_signal(int signal)
+{
+     _expected_signal = signal;
+}
+
+int get_expected_signal()
+{
+     return _expected_signal;
+}
+
+/* Logging verbosity. */
+void set_quietness(int value)
+{
+     _quietness = value;
+}
+
+int get_quietness()
+{
+     return _quietness;
+}
+
+/* For fixture set up and tear down functions, and units tests. */
+void do_nothing() {
+}
+
+/* Verbose (default) logging. */
+void logv(char *format, ...)
+{
+     if (get_quietness() <= 0) {
+         va_list ap;
+         
+         va_start(ap, format);
+         vprintf(format, ap);
+         va_end(ap);
+         die_on_stdout_error();
+     }
+}
+
+void log_aggregated_results()
+{
+     printf("[SUMMARY] Aggregated Test Results\n");
+     printf("Total: %ju\n", results.numoftests);
+     printf("Passed: %ju\n", results.passed_tests);
+     printf("Failed: %ju\n\n", results.numoftests
+           - results.passed_tests);
+     die_on_stdout_error();
+}
+
+/*******************************/
+/*******************************/
+/* pipes buffer  unit  testing */
+/*******************************/
+/*******************************/
+
+static const char progname[] = "pipes_unitester";
+
+static void die_on_error(int condition, const char *culprit)
+{
+     assert(!condition, 1, "%s: %s error: %s.", progname, culprit,
+           strerror(errno));
+}
+
+  
+/*******************************/
+/* Usage and option processing */
+/*******************************/
+
+static void usage(int exit_status)
+{
+     printf("Usage : %s\n", progname);
+     exit(exit_status);
+} 
+
+static void die_on_invalid_value(int condition,
+                                const char *value_string)
+{
+     if (condition) {
+         printf("%s: invalid value: %s.\n", progname, value_string);
+         usage(1);
+     }
+}
+
+/* Convert a storage unit suffix into an exponent. */
+static int strtoexp(const char *string)
+{
+     if (string[0] == '\0') {
+         return 0;
+     }
+     
+     char first_letter =  toupper(string[0]);
+     char prefixes[] = "BKMGTPE";
+     const int numofprefixes = strlen(prefixes);
+     prefixes[numofprefixes] = first_letter;
+     int i = 0;
+
+     while (prefixes[i] != first_letter) {
+         i++;
+     }
+     die_on_invalid_value(i >= numofprefixes || (string[1] != '\0' &&
+                                                (toupper(string[1])
+                                                 != 'B' || string[2]
+                                                 != '\0')), string);
+     return 10 * i;
+}
+
+static void process_options(int argc, char *argv[])
+{
+     int opt;
+     char *endptr;
+  
+     setvbuf(stdout, NULL, _IONBF, 0);
+
+     set_timeout(DEFAULT_TIMEOUT);
+     set_quietness(DEFAULT_QUIETNESS);
+     
+     while ((opt = getopt(argc, argv, "t:vqh")) != -1) {
+         switch (opt) {
+         case 't': 
+              errno = 0;
+              set_timeout(strtoul(optarg, &endptr, 0));
+              die_on_invalid_value(errno == ERANGE || *endptr != '\0'
+                                   || endptr == optarg, optarg);
+              break;
+         case 'q':
+              set_quietness(get_quietness() + 1);
+              break;
+         case 'v':
+              set_quietness(0);
+              break;
+         case 'h':
+              usage(0);
+              break;
+         default:
+              usage(1);
+              break;
+         }
+     }
+}
+
+/*********************************/
+/* Various function declarations */
+/*********************************/
+
+void initialize_data(int *ptr, int len);
+
+int verify_data(int *base, int *target, int len);
+
+void clear_data(int *ptr, int len);
+
+/*******************************/
+/* Arrays for test suite loops */
+/*******************************/
+
+#define BUFMAX 20000
+#define BUFMAXLEN (BUFMAX * sizeof(int))
+
+const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE,PAGE_SIZE*2,PAGE_SIZE*4};
+static const int bufsizes[] = { 128, 512, 1024, 2048, 4096, 16384  };
+
+int data[BUFMAX],readbuf[BUFMAX];
+int pipefd[2] = {0,0};
+
+typedef int * pipe_t;
+
+struct thread_work_data {
+       pipe_t p;
+       unsigned int total_bytes;
+       unsigned int chunk_size;
+};
+
+void * reader_thread(void *ptr);
+void * writer_thread(void *ptr);
+
+dispatch_semaphore_t r_sem, w_sem;
+
+unsigned long current_buf_size=0;
+
+/*************************************/
+/* Global variables set up functions */
+/*************************************/
+
+
+void initialize_data(int *ptr, int len)
+{
+        int i;
+        if (!ptr || len <=0 )
+                return;
+
+        for (i = 0; i < len; i ++)
+                ptr[i] = i;
+}
+
+void clear_data(int *ptr, int len)
+{
+
+        int i;
+        if (!ptr)
+                return;
+        for (i = 0; i < len; i++)
+                ptr[i]=0;
+}
+
+int verify_data(int *base, int *target, int len)
+{
+        int i = 0;
+        
+        if (!base || !target)
+                return 0;
+        
+        for (i = 0; i < len; i++){
+                if (base[i] != target[i])
+                        return 0;
+        }
+
+        return 1;
+}
+
+void initialize_data_buffer()
+{
+       initialize_data(data, BUFMAX);
+       initialize_data(readbuf, BUFMAX);
+}
+
+/*******************************/
+/* core read write helper funtions */
+/*******************************/
+
+ssize_t read_whole_buffer(pipe_t p, void *scratch_buf, int size);
+ssize_t pipe_read_data(pipe_t p, void *dest_buf, int size);
+ssize_t pipe_write_data(pipe_t p, void *src_buf, int size);
+
+ssize_t read_whole_buffer(pipe_t p, void *scratch_buf, int size)
+{
+       int fd = p[0];
+       logv("reading whole buffer from fd %d, size %d", fd, size);
+       int retval = pread(fd, scratch_buf, size, 0);
+       if (retval == -1 ){
+               logv("Error reading whole buffer. (%d) %s\n",errno, strerror(errno));
+       }
+       return retval;
+
+}
+
+ssize_t pipe_read_data(pipe_t p, void *dest_buf, int size)
+{
+       int fd = p[0];
+       //logv("reading from pipe %d, for size %d", fd, size);
+       int retval = read(fd, dest_buf, size);
+       if (retval == -1) {
+               logv("Error reading from buffer. (%d)",errno);  
+       }
+       return retval;
+}
+
+ssize_t pipe_write_data(pipe_t p, void *src_buf, int size)
+{
+       int fd = p[1];
+       //logv("writing to pipe %d, for size %d", fd, size);
+       int retval = write(fd, src_buf, size);
+       if (retval == -1) {
+               logv("Error writing to buffer. (%d) %s",errno, strerror(errno));        
+       }
+       return retval;
+}
+
+
+void * reader_thread(void *ptr)
+{
+       struct thread_work_data *m;
+       m = (struct thread_work_data *) ptr;
+       int i = m->total_bytes/m->chunk_size;
+       int retval, data_idx=0;
+       while (i > 0){
+               dispatch_semaphore_wait(r_sem, 8000);
+               retval = pipe_read_data(m->p, &readbuf[data_idx], m->chunk_size);
+               assert(retval == m->chunk_size, 1, "Pipe read returned different amount of numbe");
+               data_idx +=m->chunk_size;
+               //logv("RD %d \n", m->chunk_size);
+               dispatch_semaphore_signal(w_sem);
+               i--;
+       }
+       return 0;
+}
+
+void * writer_thread(void *ptr)
+{
+       struct thread_work_data *m;
+       m = (struct thread_work_data *)ptr;
+       int i = m->total_bytes/m->chunk_size;
+       int retval, data_idx=0;
+       while ( i > 0 ){
+
+               dispatch_semaphore_wait(w_sem, 8000);
+               //logv("WR %d \n", m->chunk_size);
+               retval=pipe_write_data(m->p, &data[data_idx], m->chunk_size);
+                assert(retval == m->chunk_size, 1, "Pipe write failed");
+               data_idx +=m->chunk_size;
+               dispatch_semaphore_signal(r_sem);
+               i--;
+       }
+       return 0;
+}
+
+
+void create_threads(struct thread_work_data *rdata, struct thread_work_data *wdata){
+
+       pthread_t thread1, thread2;
+       r_sem = dispatch_semaphore_create(0);
+       w_sem = dispatch_semaphore_create(1);
+       int iret1, iret2;
+       void * thread_ret1 =0;
+       void * thread_ret2 =0;
+       /* Create independent threads each of which will execute function */
+
+       iret1 = pthread_create( &thread1, NULL, reader_thread, (void*) rdata);
+       iret2 = pthread_create( &thread2, NULL, writer_thread, (void*) wdata);
+
+       pthread_join( thread2, &thread_ret1);
+       pthread_join( thread1, &thread_ret1);
+       assert(thread_ret1 == 0, 1, "Reader Thread Failed");
+       assert(thread_ret2 == 0, 1, "Writer Thread Failed");
+}
+
+
+/*******************************/
+/* Pipes unit test functions   */
+/*******************************/
+void test_pipebuffer_setup ()
+{
+
+       logv("Setting up buffers data and readbuf\n");
+       clear_data(data, BUFMAX);
+       clear_data(readbuf, BUFMAX);
+       logv("Initializing buffers data and readbuf\n");
+       initialize_data(data, BUFMAX);
+       initialize_data(readbuf, BUFMAX);
+       logv("verifying data for correctness\n");
+       die_on_error(!verify_data(data, readbuf, BUFMAX), "data initialization");
+       clear_data(readbuf, BUFMAX);
+}
+
+void test_pipe_create(){
+       int pipefds[2] = {0,0};
+       pipe_t p = pipefds;
+       int err = pipe(p);
+       if ( err ){
+               logv("error opening pipes (%d) %s", errno, strerror(errno));
+               return;
+       }
+
+       die_on_error(0 != close(pipefds[0]), "close()");
+       die_on_error(0 != close(pipefds[1]), "close()");
+}
+
+void test_pipe_write_single_byte(){
+       int pipefds[2] = { 0 , 0 };
+       pipe_t p = pipefds;
+       die_on_error( 0 != pipe(p), "pipe()");
+       initialize_data_buffer();
+       int i = 0,retval;
+       for ( ; i < current_buf_size; i++){
+               if ( i > 16384){
+                       logv("cannot fill continuously beyond 16K.");
+                       break;
+               }
+               retval=pipe_write_data(p, &data[i], 1);
+               assert(retval == 1, 1, "Pipe write failed");
+       }
+
+       close(p[0]);
+       close(p[1]);
+}
+
+void test_pipe_single_read_write(){
+       int pipefds[2] = { 0 , 0 };
+        pipe_t p = pipefds;
+        die_on_error( 0 != pipe(p), "pipe()");
+        initialize_data_buffer();
+       struct thread_work_data d = { p, current_buf_size, 1};
+       create_threads(&d, &d);
+        verify_data(data, readbuf, current_buf_size);
+        close(p[0]);
+        close(p[1]);
+
+}
+
+void test_pipe_single_read_2write(){
+       int pipefds[2] = { 0 , 0 };
+        pipe_t p = pipefds;
+        die_on_error( 0 != pipe(p), "pipe()");
+        initialize_data_buffer();
+       struct thread_work_data rd = { p, current_buf_size, 1};
+       struct thread_work_data wd = { p, current_buf_size, 2};
+       create_threads(&rd, &wd);
+        verify_data(data, readbuf, current_buf_size);
+        close(p[0]);
+        close(p[1]);
+
+}
+
+void test_pipe_expansion_buffer(){
+       int pipefds[2] = { 0 , 0 };
+       int iter = 0;
+        pipe_t p = pipefds;
+        die_on_error( 0 != pipe(p), "pipe()");
+        initialize_data_buffer();
+       for ( iter=0; iter < sizeof(pipesize_blocks)/sizeof(unsigned int); iter++){
+               assert(pipesize_blocks[iter] == pipe_write_data(p, &data[0], pipesize_blocks[iter] ), 1, "expansion write failed");
+               assert(pipesize_blocks[iter] == pipe_read_data(p, &readbuf[0], pipesize_blocks[iter]+200), 1, "reading from expanded data failed");
+       /*      logv("finished round for size %u \n", pipesize_blocks[iter]); */
+       }
+        verify_data(data, readbuf, current_buf_size);
+        close(p[0]);
+        close(p[1]);
+
+}
+
+void test_pipe_initial_big_allocation(){
+        int pipefds[2] = { 0 , 0 };
+        int iter = 0;
+        pipe_t p = pipefds;
+        die_on_error( 0 != pipe(p), "pipe()");
+        initialize_data_buffer();
+        assert(current_buf_size == pipe_write_data(p, &data[0], current_buf_size ), 1, "initial big allocation failed");
+        assert(current_buf_size == pipe_read_data(p, &readbuf[0], current_buf_size+200), 1, "reading from initial big write failed");
+        assert(verify_data(data, readbuf, current_buf_size), 1, "big pipe initial allocation -not able to verify data");
+        close(p[0]);
+        close(p[1]);
+
+}
+
+void test_pipe_cycle_small_writes(){
+        int pipefds[2] = { 0 , 0 };
+        int iter = 0;
+        pipe_t p = pipefds;
+        die_on_error( 0 != pipe(p), "pipe()");
+        initialize_data_buffer();
+       int buf_size = current_buf_size / 2;
+        
+       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
+        assert(buf_size == pipe_read_data(p, &readbuf[0], buf_size+200), 1, "reading from cycle read failed");
+        assert(verify_data(data, readbuf, buf_size), 1, "data verification failed");
+        
+       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
+        assert(buf_size == pipe_read_data(p, &readbuf[0], buf_size+200), 1, "reading from cycle read failed");
+        assert(verify_data(data, readbuf, buf_size), 1, "data verification failed");
+        
+       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
+        assert(buf_size == pipe_read_data(p, &readbuf[0], buf_size+200), 1, "reading from cycle read failed");
+        assert(verify_data(data, readbuf, buf_size), 1, "data verification failed");
+        
+       close(p[0]);
+        close(p[1]);
+
+}
+void test_pipe_moving_data(){
+        int pipefds[2] = { 0 , 0 };
+        int iter = 0;
+        pipe_t p = pipefds;
+        die_on_error( 0 != pipe(p), "pipe()");
+        initialize_data_buffer();
+       int buf_size = current_buf_size / 2;
+       if (buf_size > PAGE_SIZE)
+               buf_size = PAGE_SIZE;
+        
+       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
+        logv("write of size =%d\n", buf_size);
+       assert(buf_size == pipe_write_data(p, &data[buf_size/sizeof(int)], buf_size ), 1, "cycle  write failed");
+        logv("write of size =%d\n", buf_size*2);
+       assert(buf_size == pipe_write_data(p, &data[(buf_size*2)/sizeof(int)], buf_size ), 1, "cycle  write failed");
+        logv("write of size =%d\n", buf_size*3);
+        assert((3*buf_size) == pipe_read_data(p, &readbuf[0], (3*buf_size)+200), 1, "reading from cycle read failed");
+        assert(verify_data(data, readbuf, (3*buf_size)/sizeof(int)), 1, "data verification failed");
+        
+       close(p[0]);
+        close(p[1]);
+
+}
+    
+
+/*************/
+/* pipe Suites */
+/*************/
+
+void run_pipe_basic_tests()
+{
+     int sizes_idx;
+     int numofsizes = sizeof(bufsizes)/sizeof(int);
+
+     logv("running tests for %d different sizes \n", numofsizes);
+
+     UnitTests pipe_basic_tests = {
+         { "1. create buffer and verify both reads/writes are valid",
+           test_pipebuffer_setup },
+         { "2. open and close pipes", test_pipe_create },
+         { "3. single byte write to full", test_pipe_write_single_byte},
+         { "4. single byte read/write in sync", test_pipe_single_read_write},
+         { "5. single byte read/2write in sync", test_pipe_single_read_2write},
+         { "6. expansion from existing size", test_pipe_expansion_buffer},
+         { "7. initial big allocation " , test_pipe_initial_big_allocation},
+         { "8. cycle_small_writes " ,test_pipe_cycle_small_writes },
+         { "9. test moving data " ,test_pipe_moving_data }
+     };
+  for (sizes_idx = 0; sizes_idx < numofsizes; sizes_idx++) {
+       current_buf_size = bufsizes[sizes_idx];
+       run_suite(do_nothing,
+                pipe_basic_tests,
+                do_nothing, "pipe create base test "
+                "Size: 0x%jx (%ju)",
+                (uintmax_t)bufsizes[sizes_idx],
+                (uintmax_t)bufsizes[sizes_idx]);
+  }
+}
+
+
+int pipes_test(void *the_argp)
+{
+     set_quietness(2);
+     run_pipe_basic_tests();
+     //log_aggregated_results();
+     return results.numoftests - results.passed_tests;
+}
+
+/*
+ * retaining the old main function to debug issues with the tests and not the xnu_quick_test framework
+ * or the system
+ */
+int main_nonuse(int argc, char *argv[])
+{
+     process_options(argc, argv);
+     
+     run_pipe_basic_tests();
+     
+     log_aggregated_results();
+     return 0;
+}
index 00433d5ba82c34e8ee88388b10afc329e97676d6..e9a34380ff3c6b22c474336b407962b9442e256c 100644 (file)
@@ -210,7 +210,7 @@ int socket_tests( void * the_argp )
                }
 #endif
                
-#if 1
+#if !TARGET_OS_EMBEDDED
                /* sendfile test. Open libsystem, set up some headers, and send it */
                struct sf_hdtr          my_sf_hdtr;
                int                                     my_libsys_fd;
@@ -328,7 +328,7 @@ int socket_tests( void * the_argp )
                }
 #endif
 
-#if 1
+#if !TARGET_OS_EMBEDDED
                size_t neededBytes = 11;
                        
                /* Check for sendfile output */
index 2d79c6be58667f933f8ce51b950da893de73fa0f..cf2867e8ac541b294af28353630753e3f95f8c4b 100644 (file)
@@ -12,6 +12,7 @@
 #include <sys/msg.h>           /* for message queue tests */
 #include <sys/syscall.h>       /* for get / settid */
 #include <sys/sysctl.h>                /* for determining hw */
+#include <sys/kas_info.h>      /* for kas_info() */
 #include <AvailabilityMacros.h>        /* for determination of Mac OS X version (tiger, leopard, etc.) */
 #include <libkern/OSByteOrder.h> /* for OSSwap32() */
 #include <mach/mach.h>
@@ -893,12 +894,19 @@ test_passed_exit:
  */
 int access_chmod_fchmod_test( void * the_argp )
 {
-       int                     my_err;
-       int                     my_fd = -1;
+       int             error_occurred;
+       int             my_err;
+       int             my_fd = -1;
+
        char *          my_pathp = NULL;
-       uid_t euid,ruid;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
+
+       uid_t           euid,ruid;
+       struct stat     my_sb;
+
+       FILE *          file_handle;
+
+       kern_return_t   my_kr;
+
 
         my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
         if(my_kr != KERN_SUCCESS){
@@ -963,49 +971,77 @@ int access_chmod_fchmod_test( void * the_argp )
        
        
        /*  another test for the access system call  -- refer ro radar# 6725311 */
-       
-       system("touch /tmp/me");
-       system("echo local | sudo touch /tmp/notme");
-       
+
+#if !TARGET_OS_EMBEDDED        
+
+       /*
+        * This test makes sure that the access system call does not give the current user extra
+        * permissions on files the current user does not own. From radar #6725311, this could
+        * happen when the current user calls access() on a file owned by the current user in
+        * the same directory as the other files not owned by the current user.
+        * 
+        * Note: This test expects that the effective uid (euid) is set to root.
+        *
+        */
+
+       /* Create a file that root owns  */
+       file_handle = fopen(FILE_NOTME, "w");
+       fclose(file_handle);
+
+       /* Currently running as root (through setreuid manipulation), switch to running as the current user. */
        euid = geteuid();
        ruid = getuid();
-       //printf("effective user id is %d: and real user id is %d: \n", (int)euid, (int)ruid);
        setreuid(ruid, ruid);
-       //printf("effective user id is %d: and real user id is %d: \n", (int)geteuid, (int)getuid);
+
+       /* Create a file that the current user owns  */
+       file_handle = fopen(FILE_ME, "w");
+       fclose(file_handle);
+
+       error_occurred = 0;
+
+       /* Try to remove the file owned by root (this should fail). */
        my_err = unlink(FILE_NOTME);
+
        if (my_err < 0) {
                my_err = errno;
        }
+
        if (my_err == 0) {
-               printf("Unresolved: First attempt deleted '" FILE_NOTME "'! \n" );
-               goto test_failed_exit;
+               printf("Unresolved: First attempt deleted '" FILE_NOTME "'! \n");
+               error_occurred = 1;
        } else {
                printf("Status: First attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
-                       
-               if (true) {
-                       my_err = access(FILE_ME, _DELETE_OK);
-            if (my_err < 0) {
-                my_err = errno;
-            }
-                       //printf("Status: access('" FILE_ME "') = %d - %s.\n", my_err, strerror( my_err ));
-          fprintf(stderr, "Status: access('" FILE_ME "') = %d\n", my_err);
-               }
+
+               /* Set _DELETE_OK on a file that the current user owns */
+               access(FILE_ME, _DELETE_OK);
+
+               /* Try to remove the file owned by root again (should give us: EPERM [13]) */
                my_err = unlink(FILE_NOTME);
-        if (my_err < 0) {
-            my_err = errno;
-        }
-        if (my_err == 0) {
+
+               if (my_err < 0) {
+                   my_err = errno;
+               }
+
+               if (my_err == 0) {
                        printf("Failed: Second attempt deleted '" FILE_NOTME "'!\n");
-            //fprintf(stderr, "Failed: Second attempt deleted '" FILE_NOTME "'!\n");
-                       goto test_failed_exit;
-        } else {
+                       error_occurred = 1;
+               } else if (my_err == 13) {
                        printf("Passed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
-           // fprintf(stderr, "Passed: Second attempt to delete '" FILE_NOTME "' failed with error %d\n", my_err);
-                       
-        }
+               } else {
+                       printf("Failed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
+                       error_occurred = 1;
+               }
        }
+
+       /* Reset to running as root */
        setreuid(ruid, euid);
-       //printf("effective user id is %d: and real user id is %d    ---1: \n", euid, ruid);
+
+       if(error_occurred == 1) {
+               goto test_failed_exit;
+       }
+
+#endif
+
        /* end of test*/
        
        
@@ -1052,6 +1088,26 @@ test_passed_exit:
        return( my_err );
 }
 
+#if !TARGET_OS_EMBEDDED
+static bool _prime_groups(void)
+{
+       /*
+        * prime groups with a known list to ensure consistent test behavior
+        */
+       
+       gid_t   my_exp_groups[] = { getegid(), 20, 61, 12 };
+       int             my_err;
+
+       my_err = setgroups( ( sizeof(my_exp_groups) / sizeof(*my_exp_groups) ), &my_exp_groups[0] );
+       if ( my_err == -1 ) {
+               printf( "initial setgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
+               return false;
+       }
+
+       return true;
+}
+#endif
+
 /*  **************************************************************************************************************
  *     Test chown, fchown, lchown, lstat, readlink, symlink system calls.
  *  **************************************************************************************************************
@@ -1103,6 +1159,10 @@ int chown_fchown_lchown_lstat_symlink_test( void * the_argp )
                goto test_failed_exit;
        }
        
+       if ( !_prime_groups() ) {
+               goto test_failed_exit;
+       }
+       
        /* set up by getting a list of groups */
        my_group_count = getgroups( NGROUPS_MAX, &my_groups[0] );
        
@@ -2114,6 +2174,10 @@ int groups_test( void * the_argp )
        my_real_gid = getgid( );
        my_effective_gid = getegid( );
 
+       if ( !_prime_groups() ) {
+               goto test_failed_exit;
+       }
+       
        /* start by getting list of groups the current user belongs to */
        my_orig_group_count = getgroups( NGROUPS_MAX, &my_groups[0] );
 
@@ -3357,6 +3421,7 @@ int fcntl_test( void * the_argp )
        close( my_newfd );
        my_newfd = -1;
 
+#if !TARGET_OS_EMBEDDED /* This section of the test is specific for the desktop platform, refer <rdar://problem/8850905>*/
        /* While we're here, dup it via an open of /dev/fd/<fd> .. */
 
        {
@@ -3385,7 +3450,7 @@ int fcntl_test( void * the_argp )
        }
        close ( my_newfd );
        my_newfd = -1;
-
+#endif
        my_err = 0;
        goto test_passed_exit;
 
@@ -4418,6 +4483,7 @@ typedef struct packed_result * packed_result_p;
 
 int searchfs_test( void * the_argp )
 {
+#if !TARGET_OS_EMBEDDED
        int                                             my_err, my_items_found = 0, my_ebusy_count;
        char *                                  my_pathp = NULL;
     unsigned long                      my_matches;
@@ -4612,6 +4678,10 @@ test_passed_exit:
                vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
         }
        return( my_err );
+#else
+       printf( "\t--> Not supported on EMBEDDED TARGET\n" );
+       return 0;
+#endif
 }
 
 
@@ -4623,7 +4693,6 @@ test_passed_exit:
  */
 int aio_tests( void * the_argp )
 {
-#if !TARGET_OS_EMBEDDED
        int                                     my_err, i;
        char *                          my_pathp;
        struct aiocb *          my_aiocbp;
@@ -4888,10 +4957,6 @@ test_passed_exit:
                }
        }
        return( my_err );
-#else
-       printf( "\t--> Not supported on EMBEDDED TARGET\n" );
-       return 0;
-#endif
 }
 
 
@@ -5072,6 +5137,80 @@ test_passed_exit:
        return my_err;
 }
 
+/*  **************************************************************************************************************
+ *     Test KASLR-related functionality
+ *  **************************************************************************************************************
+ */
+int kaslr_test( void * the_argp )
+{
+       int result = 0;
+       uint64_t slide = 0;
+       size_t size;
+       int slide_enabled;
+
+       size = sizeof(slide_enabled);
+       result = sysctlbyname("kern.slide", &slide_enabled, &size, NULL, 0);
+       if (result != 0) {
+               printf("sysctlbyname(\"kern.slide\") failed with errno %d\n", errno);
+               goto test_failed_exit;
+       }
+
+       /* Test positive case first */
+       size = sizeof(slide);
+       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size);
+       if (result == 0) {
+               /* syscall supported, slide must be non-zero if running latest xnu and KASLR is enabled */
+               if (slide_enabled && (slide == 0)) {
+                       printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size) reported slide of 0x%016llx\n", slide);
+                       goto test_failed_exit;
+               }
+               if (size != sizeof(slide)) {
+                       printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size) reported size of %lu\n", size);
+                       goto test_failed_exit;
+               }
+       } else {
+               /* Only ENOTSUP is allowed. If so, assume all calls will be unsupported */
+               if (errno == ENOTSUP) {
+                       return 0;
+               } else {
+                       printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size) returned unexpected errno (errno %d)\n", errno);
+                       goto test_failed_exit;
+               }
+       }
+       
+       /* Negative cases for expected failures */
+       size = sizeof(slide);
+       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL /* EFAULT */, &size);
+       if ((result == 0) || (errno != EFAULT)) {
+               printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, &size) returned unexpected success or errno (result %d errno %d)\n", result, errno);
+               goto test_failed_exit;
+       }
+
+       size = sizeof(slide) + 1; /* EINVAL */
+       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, &size);
+       if ((result == 0) || (errno != EINVAL)) {
+               printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, &size+1) returned unexpected success or errno (result %d errno %d)\n", result, errno);
+               goto test_failed_exit;
+       }
+
+       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL /* EFAULT */, NULL /* EFAULT */);
+       if ((result == 0) || (errno != EFAULT)) {
+               printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, NULL) returned unexpected success or errno (result %d errno %d)\n", result, errno);
+               goto test_failed_exit;
+       }
+
+       size = sizeof(slide);
+       result = kas_info(KAS_INFO_MAX_SELECTOR /* EINVAL */, &slide, &size);
+       if ((result == 0) || (errno != EINVAL)) {
+               printf("kas_info(KAS_INFO_MAX_SELECTOR, &slide, &size) returned unexpected success or errno (result %d errno %d)\n", result, errno);
+               goto test_failed_exit;
+       }
+
+       return 0;
+
+test_failed_exit:
+       return -1;
+}
 
 #if TEST_SYSTEM_CALLS 
 
index 53b346804d9f128e28a604f8b36692f5f4dcb78d..6edbaa9b66f101df0d4efa69c113841d671af034 100644 (file)
@@ -55,8 +55,9 @@
                                         * Random values used by execve tests to 
                                         * determine architecture of machine.
                                         */
-#define        FILE_NOTME      "/tmp/notme"    /* file in /tm not owned by me */
-#define        FILE_ME         "/tmp/me"       /* file in /tmp owned by me */
+
+#define FILE_NOTME "/private/tmp/notme"                /* file in /private/tmp not owned by the current user */
+#define FILE_ME "/private/tmp/me"              /* file in /private/tmp owned by the current user */
 
 typedef int (*test_rtn_t)(void *);
 
@@ -72,6 +73,7 @@ int create_file_with_name( char *the_pathp, char *the_namep, int remove_existing
 int create_random_name( char *the_pathp, int do_open );
 int directory_tests( void * the_argp );
 int do_execve_test(char * path, char * argv[], void * envpi, int killwait);
+int do_spawn_test(int arch, int shouldfail);
 int dup_test( void * the_argp );
 int exchangedata_test( void * the_argp );
 int execve_kill_vfork_test( void * the_argp );
@@ -118,6 +120,9 @@ int statfs_32bit_inode_tests( void * the_argp );
 int commpage_data_tests( void * the_argp );
 int atomic_fifo_queue_test( void * the_argp );
 int sched_tests( void * the_argp );
+int content_protection_test( void * the_argp );
+int pipes_test( void * the_argp );
+int kaslr_test( void * the_argp );
 
 struct test_entry 
 {